devinet_ioctl(): take copyin/copyout to caller
[muen/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111         u32 val = (__force u32) addr ^ net_hash_mix(net);
112
113         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118         u32 hash = inet_addr_hash(net, ifa->ifa_local);
119
120         ASSERT_RTNL();
121         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126         ASSERT_RTNL();
127         hlist_del_init_rcu(&ifa->hash);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         ifa = inet_lookup_ifaddr_rcu(net, addr);
145         if (!ifa) {
146                 struct flowi4 fl4 = { .daddr = addr };
147                 struct fib_result res = { 0 };
148                 struct fib_table *local;
149
150                 /* Fallback to FIB local table so that communication
151                  * over loopback subnets work.
152                  */
153                 local = fib_get_table(net, RT_TABLE_LOCAL);
154                 if (local &&
155                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156                     res.type == RTN_LOCAL)
157                         result = FIB_RES_DEV(res);
158         } else {
159                 result = ifa->ifa_dev->dev;
160         }
161         if (result && devref)
162                 dev_hold(result);
163         rcu_read_unlock();
164         return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171         u32 hash = inet_addr_hash(net, addr);
172         struct in_ifaddr *ifa;
173
174         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175                 if (ifa->ifa_local == addr &&
176                     net_eq(dev_net(ifa->ifa_dev->dev), net))
177                         return ifa;
178
179         return NULL;
180 }
181
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187                          int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194         return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200
201 /* Locks all the inet devices. */
202
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211         if (ifa->ifa_dev)
212                 in_dev_put(ifa->ifa_dev);
213         kfree(ifa);
214 }
215
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223         struct net_device *dev = idev->dev;
224
225         WARN_ON(idev->ifa_list);
226         WARN_ON(idev->mc_list);
227         kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231         dev_put(dev);
232         if (!idev->dead)
233                 pr_err("Freeing alive in_device %p\n", idev);
234         else
235                 kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241         struct in_device *in_dev;
242         int err = -ENOMEM;
243
244         ASSERT_RTNL();
245
246         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247         if (!in_dev)
248                 goto out;
249         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250                         sizeof(in_dev->cnf));
251         in_dev->cnf.sysctl = NULL;
252         in_dev->dev = dev;
253         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254         if (!in_dev->arp_parms)
255                 goto out_kfree;
256         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257                 dev_disable_lro(dev);
258         /* Reference in_dev->dev */
259         dev_hold(dev);
260         /* Account for reference dev->ip_ptr (below) */
261         refcount_set(&in_dev->refcnt, 1);
262
263         err = devinet_sysctl_register(in_dev);
264         if (err) {
265                 in_dev->dead = 1;
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid, struct netlink_ext_ack *extack)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451         struct in_validator_info ivi;
452         int ret;
453
454         ASSERT_RTNL();
455
456         if (!ifa->ifa_local) {
457                 inet_free_ifa(ifa);
458                 return 0;
459         }
460
461         ifa->ifa_flags &= ~IFA_F_SECONDARY;
462         last_primary = &in_dev->ifa_list;
463
464         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465              ifap = &ifa1->ifa_next) {
466                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467                     ifa->ifa_scope <= ifa1->ifa_scope)
468                         last_primary = &ifa1->ifa_next;
469                 if (ifa1->ifa_mask == ifa->ifa_mask &&
470                     inet_ifa_match(ifa1->ifa_address, ifa)) {
471                         if (ifa1->ifa_local == ifa->ifa_local) {
472                                 inet_free_ifa(ifa);
473                                 return -EEXIST;
474                         }
475                         if (ifa1->ifa_scope != ifa->ifa_scope) {
476                                 inet_free_ifa(ifa);
477                                 return -EINVAL;
478                         }
479                         ifa->ifa_flags |= IFA_F_SECONDARY;
480                 }
481         }
482
483         /* Allow any devices that wish to register ifaddr validtors to weigh
484          * in now, before changes are committed.  The rntl lock is serializing
485          * access here, so the state should not change between a validator call
486          * and a final notify on commit.  This isn't invoked on promotion under
487          * the assumption that validators are checking the address itself, and
488          * not the flags.
489          */
490         ivi.ivi_addr = ifa->ifa_address;
491         ivi.ivi_dev = ifa->ifa_dev;
492         ivi.extack = extack;
493         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
494                                            NETDEV_UP, &ivi);
495         ret = notifier_to_errno(ret);
496         if (ret) {
497                 inet_free_ifa(ifa);
498                 return ret;
499         }
500
501         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
502                 prandom_seed((__force u32) ifa->ifa_local);
503                 ifap = last_primary;
504         }
505
506         ifa->ifa_next = *ifap;
507         *ifap = ifa;
508
509         inet_hash_insert(dev_net(in_dev->dev), ifa);
510
511         cancel_delayed_work(&check_lifetime_work);
512         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
513
514         /* Send message first, then call notifier.
515            Notifier will trigger FIB update, so that
516            listeners of netlink will know about new ifaddr */
517         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
518         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
519
520         return 0;
521 }
522
523 static int inet_insert_ifa(struct in_ifaddr *ifa)
524 {
525         return __inet_insert_ifa(ifa, NULL, 0, NULL);
526 }
527
528 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
529 {
530         struct in_device *in_dev = __in_dev_get_rtnl(dev);
531
532         ASSERT_RTNL();
533
534         if (!in_dev) {
535                 inet_free_ifa(ifa);
536                 return -ENOBUFS;
537         }
538         ipv4_devconf_setall(in_dev);
539         neigh_parms_data_state_setall(in_dev->arp_parms);
540         if (ifa->ifa_dev != in_dev) {
541                 WARN_ON(ifa->ifa_dev);
542                 in_dev_hold(in_dev);
543                 ifa->ifa_dev = in_dev;
544         }
545         if (ipv4_is_loopback(ifa->ifa_local))
546                 ifa->ifa_scope = RT_SCOPE_HOST;
547         return inet_insert_ifa(ifa);
548 }
549
550 /* Caller must hold RCU or RTNL :
551  * We dont take a reference on found in_device
552  */
553 struct in_device *inetdev_by_index(struct net *net, int ifindex)
554 {
555         struct net_device *dev;
556         struct in_device *in_dev = NULL;
557
558         rcu_read_lock();
559         dev = dev_get_by_index_rcu(net, ifindex);
560         if (dev)
561                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
562         rcu_read_unlock();
563         return in_dev;
564 }
565 EXPORT_SYMBOL(inetdev_by_index);
566
567 /* Called only from RTNL semaphored context. No locks. */
568
569 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
570                                     __be32 mask)
571 {
572         ASSERT_RTNL();
573
574         for_primary_ifa(in_dev) {
575                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
576                         return ifa;
577         } endfor_ifa(in_dev);
578         return NULL;
579 }
580
581 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
582 {
583         struct ip_mreqn mreq = {
584                 .imr_multiaddr.s_addr = ifa->ifa_address,
585                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
586         };
587         int ret;
588
589         ASSERT_RTNL();
590
591         lock_sock(sk);
592         if (join)
593                 ret = ip_mc_join_group(sk, &mreq);
594         else
595                 ret = ip_mc_leave_group(sk, &mreq);
596         release_sock(sk);
597
598         return ret;
599 }
600
601 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
602                             struct netlink_ext_ack *extack)
603 {
604         struct net *net = sock_net(skb->sk);
605         struct nlattr *tb[IFA_MAX+1];
606         struct in_device *in_dev;
607         struct ifaddrmsg *ifm;
608         struct in_ifaddr *ifa, **ifap;
609         int err = -EINVAL;
610
611         ASSERT_RTNL();
612
613         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
614                           extack);
615         if (err < 0)
616                 goto errout;
617
618         ifm = nlmsg_data(nlh);
619         in_dev = inetdev_by_index(net, ifm->ifa_index);
620         if (!in_dev) {
621                 err = -ENODEV;
622                 goto errout;
623         }
624
625         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
626              ifap = &ifa->ifa_next) {
627                 if (tb[IFA_LOCAL] &&
628                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
629                         continue;
630
631                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
632                         continue;
633
634                 if (tb[IFA_ADDRESS] &&
635                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
636                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
637                         continue;
638
639                 if (ipv4_is_multicast(ifa->ifa_address))
640                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
641                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
642                 return 0;
643         }
644
645         err = -EADDRNOTAVAIL;
646 errout:
647         return err;
648 }
649
650 #define INFINITY_LIFE_TIME      0xFFFFFFFF
651
652 static void check_lifetime(struct work_struct *work)
653 {
654         unsigned long now, next, next_sec, next_sched;
655         struct in_ifaddr *ifa;
656         struct hlist_node *n;
657         int i;
658
659         now = jiffies;
660         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
661
662         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
663                 bool change_needed = false;
664
665                 rcu_read_lock();
666                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
667                         unsigned long age;
668
669                         if (ifa->ifa_flags & IFA_F_PERMANENT)
670                                 continue;
671
672                         /* We try to batch several events at once. */
673                         age = (now - ifa->ifa_tstamp +
674                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
675
676                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
677                             age >= ifa->ifa_valid_lft) {
678                                 change_needed = true;
679                         } else if (ifa->ifa_preferred_lft ==
680                                    INFINITY_LIFE_TIME) {
681                                 continue;
682                         } else if (age >= ifa->ifa_preferred_lft) {
683                                 if (time_before(ifa->ifa_tstamp +
684                                                 ifa->ifa_valid_lft * HZ, next))
685                                         next = ifa->ifa_tstamp +
686                                                ifa->ifa_valid_lft * HZ;
687
688                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
689                                         change_needed = true;
690                         } else if (time_before(ifa->ifa_tstamp +
691                                                ifa->ifa_preferred_lft * HZ,
692                                                next)) {
693                                 next = ifa->ifa_tstamp +
694                                        ifa->ifa_preferred_lft * HZ;
695                         }
696                 }
697                 rcu_read_unlock();
698                 if (!change_needed)
699                         continue;
700                 rtnl_lock();
701                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
702                         unsigned long age;
703
704                         if (ifa->ifa_flags & IFA_F_PERMANENT)
705                                 continue;
706
707                         /* We try to batch several events at once. */
708                         age = (now - ifa->ifa_tstamp +
709                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
710
711                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
712                             age >= ifa->ifa_valid_lft) {
713                                 struct in_ifaddr **ifap;
714
715                                 for (ifap = &ifa->ifa_dev->ifa_list;
716                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
717                                         if (*ifap == ifa) {
718                                                 inet_del_ifa(ifa->ifa_dev,
719                                                              ifap, 1);
720                                                 break;
721                                         }
722                                 }
723                         } else if (ifa->ifa_preferred_lft !=
724                                    INFINITY_LIFE_TIME &&
725                                    age >= ifa->ifa_preferred_lft &&
726                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
727                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
728                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
729                         }
730                 }
731                 rtnl_unlock();
732         }
733
734         next_sec = round_jiffies_up(next);
735         next_sched = next;
736
737         /* If rounded timeout is accurate enough, accept it. */
738         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
739                 next_sched = next_sec;
740
741         now = jiffies;
742         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
743         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
744                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
745
746         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
747                         next_sched - now);
748 }
749
750 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
751                              __u32 prefered_lft)
752 {
753         unsigned long timeout;
754
755         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
756
757         timeout = addrconf_timeout_fixup(valid_lft, HZ);
758         if (addrconf_finite_timeout(timeout))
759                 ifa->ifa_valid_lft = timeout;
760         else
761                 ifa->ifa_flags |= IFA_F_PERMANENT;
762
763         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
764         if (addrconf_finite_timeout(timeout)) {
765                 if (timeout == 0)
766                         ifa->ifa_flags |= IFA_F_DEPRECATED;
767                 ifa->ifa_preferred_lft = timeout;
768         }
769         ifa->ifa_tstamp = jiffies;
770         if (!ifa->ifa_cstamp)
771                 ifa->ifa_cstamp = ifa->ifa_tstamp;
772 }
773
774 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
775                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
776 {
777         struct nlattr *tb[IFA_MAX+1];
778         struct in_ifaddr *ifa;
779         struct ifaddrmsg *ifm;
780         struct net_device *dev;
781         struct in_device *in_dev;
782         int err;
783
784         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
785                           NULL);
786         if (err < 0)
787                 goto errout;
788
789         ifm = nlmsg_data(nlh);
790         err = -EINVAL;
791         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
792                 goto errout;
793
794         dev = __dev_get_by_index(net, ifm->ifa_index);
795         err = -ENODEV;
796         if (!dev)
797                 goto errout;
798
799         in_dev = __in_dev_get_rtnl(dev);
800         err = -ENOBUFS;
801         if (!in_dev)
802                 goto errout;
803
804         ifa = inet_alloc_ifa();
805         if (!ifa)
806                 /*
807                  * A potential indev allocation can be left alive, it stays
808                  * assigned to its device and is destroy with it.
809                  */
810                 goto errout;
811
812         ipv4_devconf_setall(in_dev);
813         neigh_parms_data_state_setall(in_dev->arp_parms);
814         in_dev_hold(in_dev);
815
816         if (!tb[IFA_ADDRESS])
817                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
818
819         INIT_HLIST_NODE(&ifa->hash);
820         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
821         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
822         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
823                                          ifm->ifa_flags;
824         ifa->ifa_scope = ifm->ifa_scope;
825         ifa->ifa_dev = in_dev;
826
827         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
828         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
829
830         if (tb[IFA_BROADCAST])
831                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
832
833         if (tb[IFA_LABEL])
834                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
835         else
836                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837
838         if (tb[IFA_CACHEINFO]) {
839                 struct ifa_cacheinfo *ci;
840
841                 ci = nla_data(tb[IFA_CACHEINFO]);
842                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
843                         err = -EINVAL;
844                         goto errout_free;
845                 }
846                 *pvalid_lft = ci->ifa_valid;
847                 *pprefered_lft = ci->ifa_prefered;
848         }
849
850         return ifa;
851
852 errout_free:
853         inet_free_ifa(ifa);
854 errout:
855         return ERR_PTR(err);
856 }
857
858 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
859 {
860         struct in_device *in_dev = ifa->ifa_dev;
861         struct in_ifaddr *ifa1, **ifap;
862
863         if (!ifa->ifa_local)
864                 return NULL;
865
866         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
867              ifap = &ifa1->ifa_next) {
868                 if (ifa1->ifa_mask == ifa->ifa_mask &&
869                     inet_ifa_match(ifa1->ifa_address, ifa) &&
870                     ifa1->ifa_local == ifa->ifa_local)
871                         return ifa1;
872         }
873         return NULL;
874 }
875
876 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
877                             struct netlink_ext_ack *extack)
878 {
879         struct net *net = sock_net(skb->sk);
880         struct in_ifaddr *ifa;
881         struct in_ifaddr *ifa_existing;
882         __u32 valid_lft = INFINITY_LIFE_TIME;
883         __u32 prefered_lft = INFINITY_LIFE_TIME;
884
885         ASSERT_RTNL();
886
887         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
888         if (IS_ERR(ifa))
889                 return PTR_ERR(ifa);
890
891         ifa_existing = find_matching_ifa(ifa);
892         if (!ifa_existing) {
893                 /* It would be best to check for !NLM_F_CREATE here but
894                  * userspace already relies on not having to provide this.
895                  */
896                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
897                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
898                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
899                                                true, ifa);
900
901                         if (ret < 0) {
902                                 inet_free_ifa(ifa);
903                                 return ret;
904                         }
905                 }
906                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907                                          extack);
908         } else {
909                 inet_free_ifa(ifa);
910
911                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
912                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
913                         return -EEXIST;
914                 ifa = ifa_existing;
915                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916                 cancel_delayed_work(&check_lifetime_work);
917                 queue_delayed_work(system_power_efficient_wq,
918                                 &check_lifetime_work, 0);
919                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
920         }
921         return 0;
922 }
923
924 /*
925  *      Determine a default network mask, based on the IP address.
926  */
927
928 static int inet_abc_len(__be32 addr)
929 {
930         int rc = -1;    /* Something else, probably a multicast. */
931
932         if (ipv4_is_zeronet(addr))
933                 rc = 0;
934         else {
935                 __u32 haddr = ntohl(addr);
936
937                 if (IN_CLASSA(haddr))
938                         rc = 8;
939                 else if (IN_CLASSB(haddr))
940                         rc = 16;
941                 else if (IN_CLASSC(haddr))
942                         rc = 24;
943         }
944
945         return rc;
946 }
947
948
949 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
950 {
951         struct sockaddr_in sin_orig;
952         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
953         struct in_device *in_dev;
954         struct in_ifaddr **ifap = NULL;
955         struct in_ifaddr *ifa = NULL;
956         struct net_device *dev;
957         char *colon;
958         int ret = -EFAULT;
959         int tryaddrmatch = 0;
960
961         ifr->ifr_name[IFNAMSIZ - 1] = 0;
962
963         /* save original address for comparison */
964         memcpy(&sin_orig, sin, sizeof(*sin));
965
966         colon = strchr(ifr->ifr_name, ':');
967         if (colon)
968                 *colon = 0;
969
970         dev_load(net, ifr->ifr_name);
971
972         switch (cmd) {
973         case SIOCGIFADDR:       /* Get interface address */
974         case SIOCGIFBRDADDR:    /* Get the broadcast address */
975         case SIOCGIFDSTADDR:    /* Get the destination address */
976         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
977                 /* Note that these ioctls will not sleep,
978                    so that we do not impose a lock.
979                    One day we will be forced to put shlock here (I mean SMP)
980                  */
981                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
982                 memset(sin, 0, sizeof(*sin));
983                 sin->sin_family = AF_INET;
984                 break;
985
986         case SIOCSIFFLAGS:
987                 ret = -EPERM;
988                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
989                         goto out;
990                 break;
991         case SIOCSIFADDR:       /* Set interface address (and family) */
992         case SIOCSIFBRDADDR:    /* Set the broadcast address */
993         case SIOCSIFDSTADDR:    /* Set the destination address */
994         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
995                 ret = -EPERM;
996                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
997                         goto out;
998                 ret = -EINVAL;
999                 if (sin->sin_family != AF_INET)
1000                         goto out;
1001                 break;
1002         default:
1003                 ret = -EINVAL;
1004                 goto out;
1005         }
1006
1007         rtnl_lock();
1008
1009         ret = -ENODEV;
1010         dev = __dev_get_by_name(net, ifr->ifr_name);
1011         if (!dev)
1012                 goto done;
1013
1014         if (colon)
1015                 *colon = ':';
1016
1017         in_dev = __in_dev_get_rtnl(dev);
1018         if (in_dev) {
1019                 if (tryaddrmatch) {
1020                         /* Matthias Andree */
1021                         /* compare label and address (4.4BSD style) */
1022                         /* note: we only do this for a limited set of ioctls
1023                            and only if the original address family was AF_INET.
1024                            This is checked above. */
1025                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1026                              ifap = &ifa->ifa_next) {
1027                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1028                                     sin_orig.sin_addr.s_addr ==
1029                                                         ifa->ifa_local) {
1030                                         break; /* found */
1031                                 }
1032                         }
1033                 }
1034                 /* we didn't get a match, maybe the application is
1035                    4.3BSD-style and passed in junk so we fall back to
1036                    comparing just the label */
1037                 if (!ifa) {
1038                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1039                              ifap = &ifa->ifa_next)
1040                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1041                                         break;
1042                 }
1043         }
1044
1045         ret = -EADDRNOTAVAIL;
1046         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1047                 goto done;
1048
1049         switch (cmd) {
1050         case SIOCGIFADDR:       /* Get interface address */
1051                 sin->sin_addr.s_addr = ifa->ifa_local;
1052                 break;
1053
1054         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1055                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1056                 break;
1057
1058         case SIOCGIFDSTADDR:    /* Get the destination address */
1059                 sin->sin_addr.s_addr = ifa->ifa_address;
1060                 break;
1061
1062         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1063                 sin->sin_addr.s_addr = ifa->ifa_mask;
1064                 break;
1065
1066         case SIOCSIFFLAGS:
1067                 if (colon) {
1068                         ret = -EADDRNOTAVAIL;
1069                         if (!ifa)
1070                                 break;
1071                         ret = 0;
1072                         if (!(ifr->ifr_flags & IFF_UP))
1073                                 inet_del_ifa(in_dev, ifap, 1);
1074                         break;
1075                 }
1076                 ret = dev_change_flags(dev, ifr->ifr_flags);
1077                 break;
1078
1079         case SIOCSIFADDR:       /* Set interface address (and family) */
1080                 ret = -EINVAL;
1081                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1082                         break;
1083
1084                 if (!ifa) {
1085                         ret = -ENOBUFS;
1086                         ifa = inet_alloc_ifa();
1087                         if (!ifa)
1088                                 break;
1089                         INIT_HLIST_NODE(&ifa->hash);
1090                         if (colon)
1091                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1092                         else
1093                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1094                 } else {
1095                         ret = 0;
1096                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1097                                 break;
1098                         inet_del_ifa(in_dev, ifap, 0);
1099                         ifa->ifa_broadcast = 0;
1100                         ifa->ifa_scope = 0;
1101                 }
1102
1103                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1104
1105                 if (!(dev->flags & IFF_POINTOPOINT)) {
1106                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1107                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1108                         if ((dev->flags & IFF_BROADCAST) &&
1109                             ifa->ifa_prefixlen < 31)
1110                                 ifa->ifa_broadcast = ifa->ifa_address |
1111                                                      ~ifa->ifa_mask;
1112                 } else {
1113                         ifa->ifa_prefixlen = 32;
1114                         ifa->ifa_mask = inet_make_mask(32);
1115                 }
1116                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1117                 ret = inet_set_ifa(dev, ifa);
1118                 break;
1119
1120         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1121                 ret = 0;
1122                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1123                         inet_del_ifa(in_dev, ifap, 0);
1124                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1125                         inet_insert_ifa(ifa);
1126                 }
1127                 break;
1128
1129         case SIOCSIFDSTADDR:    /* Set the destination address */
1130                 ret = 0;
1131                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1132                         break;
1133                 ret = -EINVAL;
1134                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1135                         break;
1136                 ret = 0;
1137                 inet_del_ifa(in_dev, ifap, 0);
1138                 ifa->ifa_address = sin->sin_addr.s_addr;
1139                 inet_insert_ifa(ifa);
1140                 break;
1141
1142         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1143
1144                 /*
1145                  *      The mask we set must be legal.
1146                  */
1147                 ret = -EINVAL;
1148                 if (bad_mask(sin->sin_addr.s_addr, 0))
1149                         break;
1150                 ret = 0;
1151                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1152                         __be32 old_mask = ifa->ifa_mask;
1153                         inet_del_ifa(in_dev, ifap, 0);
1154                         ifa->ifa_mask = sin->sin_addr.s_addr;
1155                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1156
1157                         /* See if current broadcast address matches
1158                          * with current netmask, then recalculate
1159                          * the broadcast address. Otherwise it's a
1160                          * funny address, so don't touch it since
1161                          * the user seems to know what (s)he's doing...
1162                          */
1163                         if ((dev->flags & IFF_BROADCAST) &&
1164                             (ifa->ifa_prefixlen < 31) &&
1165                             (ifa->ifa_broadcast ==
1166                              (ifa->ifa_local|~old_mask))) {
1167                                 ifa->ifa_broadcast = (ifa->ifa_local |
1168                                                       ~sin->sin_addr.s_addr);
1169                         }
1170                         inet_insert_ifa(ifa);
1171                 }
1172                 break;
1173         }
1174 done:
1175         rtnl_unlock();
1176 out:
1177         return ret;
1178 }
1179
1180 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1181 {
1182         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1183         struct in_ifaddr *ifa;
1184         struct ifreq ifr;
1185         int done = 0;
1186
1187         if (WARN_ON(size > sizeof(struct ifreq)))
1188                 goto out;
1189
1190         if (!in_dev)
1191                 goto out;
1192
1193         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1194                 if (!buf) {
1195                         done += size;
1196                         continue;
1197                 }
1198                 if (len < size)
1199                         break;
1200                 memset(&ifr, 0, sizeof(struct ifreq));
1201                 strcpy(ifr.ifr_name, ifa->ifa_label);
1202
1203                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1204                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1205                                                                 ifa->ifa_local;
1206
1207                 if (copy_to_user(buf + done, &ifr, size)) {
1208                         done = -EFAULT;
1209                         break;
1210                 }
1211                 len  -= size;
1212                 done += size;
1213         }
1214 out:
1215         return done;
1216 }
1217
1218 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1219                                  int scope)
1220 {
1221         for_primary_ifa(in_dev) {
1222                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1223                     ifa->ifa_scope <= scope)
1224                         return ifa->ifa_local;
1225         } endfor_ifa(in_dev);
1226
1227         return 0;
1228 }
1229
1230 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1231 {
1232         __be32 addr = 0;
1233         struct in_device *in_dev;
1234         struct net *net = dev_net(dev);
1235         int master_idx;
1236
1237         rcu_read_lock();
1238         in_dev = __in_dev_get_rcu(dev);
1239         if (!in_dev)
1240                 goto no_in_dev;
1241
1242         for_primary_ifa(in_dev) {
1243                 if (ifa->ifa_scope > scope)
1244                         continue;
1245                 if (!dst || inet_ifa_match(dst, ifa)) {
1246                         addr = ifa->ifa_local;
1247                         break;
1248                 }
1249                 if (!addr)
1250                         addr = ifa->ifa_local;
1251         } endfor_ifa(in_dev);
1252
1253         if (addr)
1254                 goto out_unlock;
1255 no_in_dev:
1256         master_idx = l3mdev_master_ifindex_rcu(dev);
1257
1258         /* For VRFs, the VRF device takes the place of the loopback device,
1259          * with addresses on it being preferred.  Note in such cases the
1260          * loopback device will be among the devices that fail the master_idx
1261          * equality check in the loop below.
1262          */
1263         if (master_idx &&
1264             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1265             (in_dev = __in_dev_get_rcu(dev))) {
1266                 addr = in_dev_select_addr(in_dev, scope);
1267                 if (addr)
1268                         goto out_unlock;
1269         }
1270
1271         /* Not loopback addresses on loopback should be preferred
1272            in this case. It is important that lo is the first interface
1273            in dev_base list.
1274          */
1275         for_each_netdev_rcu(net, dev) {
1276                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1277                         continue;
1278
1279                 in_dev = __in_dev_get_rcu(dev);
1280                 if (!in_dev)
1281                         continue;
1282
1283                 addr = in_dev_select_addr(in_dev, scope);
1284                 if (addr)
1285                         goto out_unlock;
1286         }
1287 out_unlock:
1288         rcu_read_unlock();
1289         return addr;
1290 }
1291 EXPORT_SYMBOL(inet_select_addr);
1292
1293 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1294                               __be32 local, int scope)
1295 {
1296         int same = 0;
1297         __be32 addr = 0;
1298
1299         for_ifa(in_dev) {
1300                 if (!addr &&
1301                     (local == ifa->ifa_local || !local) &&
1302                     ifa->ifa_scope <= scope) {
1303                         addr = ifa->ifa_local;
1304                         if (same)
1305                                 break;
1306                 }
1307                 if (!same) {
1308                         same = (!local || inet_ifa_match(local, ifa)) &&
1309                                 (!dst || inet_ifa_match(dst, ifa));
1310                         if (same && addr) {
1311                                 if (local || !dst)
1312                                         break;
1313                                 /* Is the selected addr into dst subnet? */
1314                                 if (inet_ifa_match(addr, ifa))
1315                                         break;
1316                                 /* No, then can we use new local src? */
1317                                 if (ifa->ifa_scope <= scope) {
1318                                         addr = ifa->ifa_local;
1319                                         break;
1320                                 }
1321                                 /* search for large dst subnet for addr */
1322                                 same = 0;
1323                         }
1324                 }
1325         } endfor_ifa(in_dev);
1326
1327         return same ? addr : 0;
1328 }
1329
1330 /*
1331  * Confirm that local IP address exists using wildcards:
1332  * - net: netns to check, cannot be NULL
1333  * - in_dev: only on this interface, NULL=any interface
1334  * - dst: only in the same subnet as dst, 0=any dst
1335  * - local: address, 0=autoselect the local address
1336  * - scope: maximum allowed scope value for the local address
1337  */
1338 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1339                          __be32 dst, __be32 local, int scope)
1340 {
1341         __be32 addr = 0;
1342         struct net_device *dev;
1343
1344         if (in_dev)
1345                 return confirm_addr_indev(in_dev, dst, local, scope);
1346
1347         rcu_read_lock();
1348         for_each_netdev_rcu(net, dev) {
1349                 in_dev = __in_dev_get_rcu(dev);
1350                 if (in_dev) {
1351                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1352                         if (addr)
1353                                 break;
1354                 }
1355         }
1356         rcu_read_unlock();
1357
1358         return addr;
1359 }
1360 EXPORT_SYMBOL(inet_confirm_addr);
1361
1362 /*
1363  *      Device notifier
1364  */
1365
1366 int register_inetaddr_notifier(struct notifier_block *nb)
1367 {
1368         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1369 }
1370 EXPORT_SYMBOL(register_inetaddr_notifier);
1371
1372 int unregister_inetaddr_notifier(struct notifier_block *nb)
1373 {
1374         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1375 }
1376 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1377
1378 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1379 {
1380         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1381 }
1382 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1383
1384 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1385 {
1386         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1387             nb);
1388 }
1389 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1390
1391 /* Rename ifa_labels for a device name change. Make some effort to preserve
1392  * existing alias numbering and to create unique labels if possible.
1393 */
1394 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1395 {
1396         struct in_ifaddr *ifa;
1397         int named = 0;
1398
1399         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1400                 char old[IFNAMSIZ], *dot;
1401
1402                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1403                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1404                 if (named++ == 0)
1405                         goto skip;
1406                 dot = strchr(old, ':');
1407                 if (!dot) {
1408                         sprintf(old, ":%d", named);
1409                         dot = old;
1410                 }
1411                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1412                         strcat(ifa->ifa_label, dot);
1413                 else
1414                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1415 skip:
1416                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1417         }
1418 }
1419
1420 static bool inetdev_valid_mtu(unsigned int mtu)
1421 {
1422         return mtu >= IPV4_MIN_MTU;
1423 }
1424
1425 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1426                                         struct in_device *in_dev)
1427
1428 {
1429         struct in_ifaddr *ifa;
1430
1431         for (ifa = in_dev->ifa_list; ifa;
1432              ifa = ifa->ifa_next) {
1433                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1434                          ifa->ifa_local, dev,
1435                          ifa->ifa_local, NULL,
1436                          dev->dev_addr, NULL);
1437         }
1438 }
1439
1440 /* Called only under RTNL semaphore */
1441
1442 static int inetdev_event(struct notifier_block *this, unsigned long event,
1443                          void *ptr)
1444 {
1445         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1446         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1447
1448         ASSERT_RTNL();
1449
1450         if (!in_dev) {
1451                 if (event == NETDEV_REGISTER) {
1452                         in_dev = inetdev_init(dev);
1453                         if (IS_ERR(in_dev))
1454                                 return notifier_from_errno(PTR_ERR(in_dev));
1455                         if (dev->flags & IFF_LOOPBACK) {
1456                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1457                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1458                         }
1459                 } else if (event == NETDEV_CHANGEMTU) {
1460                         /* Re-enabling IP */
1461                         if (inetdev_valid_mtu(dev->mtu))
1462                                 in_dev = inetdev_init(dev);
1463                 }
1464                 goto out;
1465         }
1466
1467         switch (event) {
1468         case NETDEV_REGISTER:
1469                 pr_debug("%s: bug\n", __func__);
1470                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1471                 break;
1472         case NETDEV_UP:
1473                 if (!inetdev_valid_mtu(dev->mtu))
1474                         break;
1475                 if (dev->flags & IFF_LOOPBACK) {
1476                         struct in_ifaddr *ifa = inet_alloc_ifa();
1477
1478                         if (ifa) {
1479                                 INIT_HLIST_NODE(&ifa->hash);
1480                                 ifa->ifa_local =
1481                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1482                                 ifa->ifa_prefixlen = 8;
1483                                 ifa->ifa_mask = inet_make_mask(8);
1484                                 in_dev_hold(in_dev);
1485                                 ifa->ifa_dev = in_dev;
1486                                 ifa->ifa_scope = RT_SCOPE_HOST;
1487                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1488                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1489                                                  INFINITY_LIFE_TIME);
1490                                 ipv4_devconf_setall(in_dev);
1491                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1492                                 inet_insert_ifa(ifa);
1493                         }
1494                 }
1495                 ip_mc_up(in_dev);
1496                 /* fall through */
1497         case NETDEV_CHANGEADDR:
1498                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1499                         break;
1500                 /* fall through */
1501         case NETDEV_NOTIFY_PEERS:
1502                 /* Send gratuitous ARP to notify of link change */
1503                 inetdev_send_gratuitous_arp(dev, in_dev);
1504                 break;
1505         case NETDEV_DOWN:
1506                 ip_mc_down(in_dev);
1507                 break;
1508         case NETDEV_PRE_TYPE_CHANGE:
1509                 ip_mc_unmap(in_dev);
1510                 break;
1511         case NETDEV_POST_TYPE_CHANGE:
1512                 ip_mc_remap(in_dev);
1513                 break;
1514         case NETDEV_CHANGEMTU:
1515                 if (inetdev_valid_mtu(dev->mtu))
1516                         break;
1517                 /* disable IP when MTU is not enough */
1518                 /* fall through */
1519         case NETDEV_UNREGISTER:
1520                 inetdev_destroy(in_dev);
1521                 break;
1522         case NETDEV_CHANGENAME:
1523                 /* Do not notify about label change, this event is
1524                  * not interesting to applications using netlink.
1525                  */
1526                 inetdev_changename(dev, in_dev);
1527
1528                 devinet_sysctl_unregister(in_dev);
1529                 devinet_sysctl_register(in_dev);
1530                 break;
1531         }
1532 out:
1533         return NOTIFY_DONE;
1534 }
1535
1536 static struct notifier_block ip_netdev_notifier = {
1537         .notifier_call = inetdev_event,
1538 };
1539
1540 static size_t inet_nlmsg_size(void)
1541 {
1542         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1543                + nla_total_size(4) /* IFA_ADDRESS */
1544                + nla_total_size(4) /* IFA_LOCAL */
1545                + nla_total_size(4) /* IFA_BROADCAST */
1546                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1547                + nla_total_size(4)  /* IFA_FLAGS */
1548                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1549 }
1550
1551 static inline u32 cstamp_delta(unsigned long cstamp)
1552 {
1553         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1554 }
1555
1556 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1557                          unsigned long tstamp, u32 preferred, u32 valid)
1558 {
1559         struct ifa_cacheinfo ci;
1560
1561         ci.cstamp = cstamp_delta(cstamp);
1562         ci.tstamp = cstamp_delta(tstamp);
1563         ci.ifa_prefered = preferred;
1564         ci.ifa_valid = valid;
1565
1566         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1567 }
1568
1569 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1570                             u32 portid, u32 seq, int event, unsigned int flags)
1571 {
1572         struct ifaddrmsg *ifm;
1573         struct nlmsghdr  *nlh;
1574         u32 preferred, valid;
1575
1576         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1577         if (!nlh)
1578                 return -EMSGSIZE;
1579
1580         ifm = nlmsg_data(nlh);
1581         ifm->ifa_family = AF_INET;
1582         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1583         ifm->ifa_flags = ifa->ifa_flags;
1584         ifm->ifa_scope = ifa->ifa_scope;
1585         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1586
1587         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1588                 preferred = ifa->ifa_preferred_lft;
1589                 valid = ifa->ifa_valid_lft;
1590                 if (preferred != INFINITY_LIFE_TIME) {
1591                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1592
1593                         if (preferred > tval)
1594                                 preferred -= tval;
1595                         else
1596                                 preferred = 0;
1597                         if (valid != INFINITY_LIFE_TIME) {
1598                                 if (valid > tval)
1599                                         valid -= tval;
1600                                 else
1601                                         valid = 0;
1602                         }
1603                 }
1604         } else {
1605                 preferred = INFINITY_LIFE_TIME;
1606                 valid = INFINITY_LIFE_TIME;
1607         }
1608         if ((ifa->ifa_address &&
1609              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1610             (ifa->ifa_local &&
1611              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1612             (ifa->ifa_broadcast &&
1613              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1614             (ifa->ifa_label[0] &&
1615              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1616             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1617             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1618                           preferred, valid))
1619                 goto nla_put_failure;
1620
1621         nlmsg_end(skb, nlh);
1622         return 0;
1623
1624 nla_put_failure:
1625         nlmsg_cancel(skb, nlh);
1626         return -EMSGSIZE;
1627 }
1628
1629 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1630 {
1631         struct net *net = sock_net(skb->sk);
1632         int h, s_h;
1633         int idx, s_idx;
1634         int ip_idx, s_ip_idx;
1635         struct net_device *dev;
1636         struct in_device *in_dev;
1637         struct in_ifaddr *ifa;
1638         struct hlist_head *head;
1639
1640         s_h = cb->args[0];
1641         s_idx = idx = cb->args[1];
1642         s_ip_idx = ip_idx = cb->args[2];
1643
1644         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1645                 idx = 0;
1646                 head = &net->dev_index_head[h];
1647                 rcu_read_lock();
1648                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1649                           net->dev_base_seq;
1650                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1651                         if (idx < s_idx)
1652                                 goto cont;
1653                         if (h > s_h || idx > s_idx)
1654                                 s_ip_idx = 0;
1655                         in_dev = __in_dev_get_rcu(dev);
1656                         if (!in_dev)
1657                                 goto cont;
1658
1659                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1660                              ifa = ifa->ifa_next, ip_idx++) {
1661                                 if (ip_idx < s_ip_idx)
1662                                         continue;
1663                                 if (inet_fill_ifaddr(skb, ifa,
1664                                              NETLINK_CB(cb->skb).portid,
1665                                              cb->nlh->nlmsg_seq,
1666                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1667                                         rcu_read_unlock();
1668                                         goto done;
1669                                 }
1670                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1671                         }
1672 cont:
1673                         idx++;
1674                 }
1675                 rcu_read_unlock();
1676         }
1677
1678 done:
1679         cb->args[0] = h;
1680         cb->args[1] = idx;
1681         cb->args[2] = ip_idx;
1682
1683         return skb->len;
1684 }
1685
1686 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1687                       u32 portid)
1688 {
1689         struct sk_buff *skb;
1690         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1691         int err = -ENOBUFS;
1692         struct net *net;
1693
1694         net = dev_net(ifa->ifa_dev->dev);
1695         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1696         if (!skb)
1697                 goto errout;
1698
1699         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1700         if (err < 0) {
1701                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1702                 WARN_ON(err == -EMSGSIZE);
1703                 kfree_skb(skb);
1704                 goto errout;
1705         }
1706         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1707         return;
1708 errout:
1709         if (err < 0)
1710                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1711 }
1712
1713 static size_t inet_get_link_af_size(const struct net_device *dev,
1714                                     u32 ext_filter_mask)
1715 {
1716         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1717
1718         if (!in_dev)
1719                 return 0;
1720
1721         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1722 }
1723
1724 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1725                              u32 ext_filter_mask)
1726 {
1727         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1728         struct nlattr *nla;
1729         int i;
1730
1731         if (!in_dev)
1732                 return -ENODATA;
1733
1734         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1735         if (!nla)
1736                 return -EMSGSIZE;
1737
1738         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1739                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1740
1741         return 0;
1742 }
1743
1744 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1745         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1746 };
1747
1748 static int inet_validate_link_af(const struct net_device *dev,
1749                                  const struct nlattr *nla)
1750 {
1751         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1752         int err, rem;
1753
1754         if (dev && !__in_dev_get_rcu(dev))
1755                 return -EAFNOSUPPORT;
1756
1757         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1758         if (err < 0)
1759                 return err;
1760
1761         if (tb[IFLA_INET_CONF]) {
1762                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1763                         int cfgid = nla_type(a);
1764
1765                         if (nla_len(a) < 4)
1766                                 return -EINVAL;
1767
1768                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1769                                 return -EINVAL;
1770                 }
1771         }
1772
1773         return 0;
1774 }
1775
1776 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1777 {
1778         struct in_device *in_dev = __in_dev_get_rcu(dev);
1779         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780         int rem;
1781
1782         if (!in_dev)
1783                 return -EAFNOSUPPORT;
1784
1785         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1786                 BUG();
1787
1788         if (tb[IFLA_INET_CONF]) {
1789                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1790                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1791         }
1792
1793         return 0;
1794 }
1795
1796 static int inet_netconf_msgsize_devconf(int type)
1797 {
1798         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1799                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1800         bool all = false;
1801
1802         if (type == NETCONFA_ALL)
1803                 all = true;
1804
1805         if (all || type == NETCONFA_FORWARDING)
1806                 size += nla_total_size(4);
1807         if (all || type == NETCONFA_RP_FILTER)
1808                 size += nla_total_size(4);
1809         if (all || type == NETCONFA_MC_FORWARDING)
1810                 size += nla_total_size(4);
1811         if (all || type == NETCONFA_PROXY_NEIGH)
1812                 size += nla_total_size(4);
1813         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1814                 size += nla_total_size(4);
1815
1816         return size;
1817 }
1818
1819 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1820                                      struct ipv4_devconf *devconf, u32 portid,
1821                                      u32 seq, int event, unsigned int flags,
1822                                      int type)
1823 {
1824         struct nlmsghdr  *nlh;
1825         struct netconfmsg *ncm;
1826         bool all = false;
1827
1828         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1829                         flags);
1830         if (!nlh)
1831                 return -EMSGSIZE;
1832
1833         if (type == NETCONFA_ALL)
1834                 all = true;
1835
1836         ncm = nlmsg_data(nlh);
1837         ncm->ncm_family = AF_INET;
1838
1839         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1840                 goto nla_put_failure;
1841
1842         if (!devconf)
1843                 goto out;
1844
1845         if ((all || type == NETCONFA_FORWARDING) &&
1846             nla_put_s32(skb, NETCONFA_FORWARDING,
1847                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1848                 goto nla_put_failure;
1849         if ((all || type == NETCONFA_RP_FILTER) &&
1850             nla_put_s32(skb, NETCONFA_RP_FILTER,
1851                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1852                 goto nla_put_failure;
1853         if ((all || type == NETCONFA_MC_FORWARDING) &&
1854             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1855                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1856                 goto nla_put_failure;
1857         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1858             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1859                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1860                 goto nla_put_failure;
1861         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1862             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1863                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1864                 goto nla_put_failure;
1865
1866 out:
1867         nlmsg_end(skb, nlh);
1868         return 0;
1869
1870 nla_put_failure:
1871         nlmsg_cancel(skb, nlh);
1872         return -EMSGSIZE;
1873 }
1874
1875 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1876                                  int ifindex, struct ipv4_devconf *devconf)
1877 {
1878         struct sk_buff *skb;
1879         int err = -ENOBUFS;
1880
1881         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1882         if (!skb)
1883                 goto errout;
1884
1885         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1886                                         event, 0, type);
1887         if (err < 0) {
1888                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1889                 WARN_ON(err == -EMSGSIZE);
1890                 kfree_skb(skb);
1891                 goto errout;
1892         }
1893         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1894         return;
1895 errout:
1896         if (err < 0)
1897                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1898 }
1899
1900 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1901         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1902         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1903         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1904         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1905         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1906 };
1907
1908 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1909                                     struct nlmsghdr *nlh,
1910                                     struct netlink_ext_ack *extack)
1911 {
1912         struct net *net = sock_net(in_skb->sk);
1913         struct nlattr *tb[NETCONFA_MAX+1];
1914         struct netconfmsg *ncm;
1915         struct sk_buff *skb;
1916         struct ipv4_devconf *devconf;
1917         struct in_device *in_dev;
1918         struct net_device *dev;
1919         int ifindex;
1920         int err;
1921
1922         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1923                           devconf_ipv4_policy, extack);
1924         if (err < 0)
1925                 goto errout;
1926
1927         err = -EINVAL;
1928         if (!tb[NETCONFA_IFINDEX])
1929                 goto errout;
1930
1931         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1932         switch (ifindex) {
1933         case NETCONFA_IFINDEX_ALL:
1934                 devconf = net->ipv4.devconf_all;
1935                 break;
1936         case NETCONFA_IFINDEX_DEFAULT:
1937                 devconf = net->ipv4.devconf_dflt;
1938                 break;
1939         default:
1940                 dev = __dev_get_by_index(net, ifindex);
1941                 if (!dev)
1942                         goto errout;
1943                 in_dev = __in_dev_get_rtnl(dev);
1944                 if (!in_dev)
1945                         goto errout;
1946                 devconf = &in_dev->cnf;
1947                 break;
1948         }
1949
1950         err = -ENOBUFS;
1951         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1952         if (!skb)
1953                 goto errout;
1954
1955         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1956                                         NETLINK_CB(in_skb).portid,
1957                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1958                                         NETCONFA_ALL);
1959         if (err < 0) {
1960                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1961                 WARN_ON(err == -EMSGSIZE);
1962                 kfree_skb(skb);
1963                 goto errout;
1964         }
1965         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1966 errout:
1967         return err;
1968 }
1969
1970 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1971                                      struct netlink_callback *cb)
1972 {
1973         struct net *net = sock_net(skb->sk);
1974         int h, s_h;
1975         int idx, s_idx;
1976         struct net_device *dev;
1977         struct in_device *in_dev;
1978         struct hlist_head *head;
1979
1980         s_h = cb->args[0];
1981         s_idx = idx = cb->args[1];
1982
1983         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1984                 idx = 0;
1985                 head = &net->dev_index_head[h];
1986                 rcu_read_lock();
1987                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1988                           net->dev_base_seq;
1989                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1990                         if (idx < s_idx)
1991                                 goto cont;
1992                         in_dev = __in_dev_get_rcu(dev);
1993                         if (!in_dev)
1994                                 goto cont;
1995
1996                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1997                                                       &in_dev->cnf,
1998                                                       NETLINK_CB(cb->skb).portid,
1999                                                       cb->nlh->nlmsg_seq,
2000                                                       RTM_NEWNETCONF,
2001                                                       NLM_F_MULTI,
2002                                                       NETCONFA_ALL) < 0) {
2003                                 rcu_read_unlock();
2004                                 goto done;
2005                         }
2006                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2007 cont:
2008                         idx++;
2009                 }
2010                 rcu_read_unlock();
2011         }
2012         if (h == NETDEV_HASHENTRIES) {
2013                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2014                                               net->ipv4.devconf_all,
2015                                               NETLINK_CB(cb->skb).portid,
2016                                               cb->nlh->nlmsg_seq,
2017                                               RTM_NEWNETCONF, NLM_F_MULTI,
2018                                               NETCONFA_ALL) < 0)
2019                         goto done;
2020                 else
2021                         h++;
2022         }
2023         if (h == NETDEV_HASHENTRIES + 1) {
2024                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2025                                               net->ipv4.devconf_dflt,
2026                                               NETLINK_CB(cb->skb).portid,
2027                                               cb->nlh->nlmsg_seq,
2028                                               RTM_NEWNETCONF, NLM_F_MULTI,
2029                                               NETCONFA_ALL) < 0)
2030                         goto done;
2031                 else
2032                         h++;
2033         }
2034 done:
2035         cb->args[0] = h;
2036         cb->args[1] = idx;
2037
2038         return skb->len;
2039 }
2040
2041 #ifdef CONFIG_SYSCTL
2042
2043 static void devinet_copy_dflt_conf(struct net *net, int i)
2044 {
2045         struct net_device *dev;
2046
2047         rcu_read_lock();
2048         for_each_netdev_rcu(net, dev) {
2049                 struct in_device *in_dev;
2050
2051                 in_dev = __in_dev_get_rcu(dev);
2052                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2053                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2054         }
2055         rcu_read_unlock();
2056 }
2057
2058 /* called with RTNL locked */
2059 static void inet_forward_change(struct net *net)
2060 {
2061         struct net_device *dev;
2062         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2063
2064         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2065         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2066         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2067                                     NETCONFA_FORWARDING,
2068                                     NETCONFA_IFINDEX_ALL,
2069                                     net->ipv4.devconf_all);
2070         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071                                     NETCONFA_FORWARDING,
2072                                     NETCONFA_IFINDEX_DEFAULT,
2073                                     net->ipv4.devconf_dflt);
2074
2075         for_each_netdev(net, dev) {
2076                 struct in_device *in_dev;
2077
2078                 if (on)
2079                         dev_disable_lro(dev);
2080
2081                 in_dev = __in_dev_get_rtnl(dev);
2082                 if (in_dev) {
2083                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2084                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2085                                                     NETCONFA_FORWARDING,
2086                                                     dev->ifindex, &in_dev->cnf);
2087                 }
2088         }
2089 }
2090
2091 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2092 {
2093         if (cnf == net->ipv4.devconf_dflt)
2094                 return NETCONFA_IFINDEX_DEFAULT;
2095         else if (cnf == net->ipv4.devconf_all)
2096                 return NETCONFA_IFINDEX_ALL;
2097         else {
2098                 struct in_device *idev
2099                         = container_of(cnf, struct in_device, cnf);
2100                 return idev->dev->ifindex;
2101         }
2102 }
2103
2104 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2105                              void __user *buffer,
2106                              size_t *lenp, loff_t *ppos)
2107 {
2108         int old_value = *(int *)ctl->data;
2109         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110         int new_value = *(int *)ctl->data;
2111
2112         if (write) {
2113                 struct ipv4_devconf *cnf = ctl->extra1;
2114                 struct net *net = ctl->extra2;
2115                 int i = (int *)ctl->data - cnf->data;
2116                 int ifindex;
2117
2118                 set_bit(i, cnf->state);
2119
2120                 if (cnf == net->ipv4.devconf_dflt)
2121                         devinet_copy_dflt_conf(net, i);
2122                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2123                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2124                         if ((new_value == 0) && (old_value != 0))
2125                                 rt_cache_flush(net);
2126
2127                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2128                     new_value != old_value) {
2129                         ifindex = devinet_conf_ifindex(net, cnf);
2130                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2131                                                     NETCONFA_RP_FILTER,
2132                                                     ifindex, cnf);
2133                 }
2134                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2135                     new_value != old_value) {
2136                         ifindex = devinet_conf_ifindex(net, cnf);
2137                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2138                                                     NETCONFA_PROXY_NEIGH,
2139                                                     ifindex, cnf);
2140                 }
2141                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2142                     new_value != old_value) {
2143                         ifindex = devinet_conf_ifindex(net, cnf);
2144                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2145                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2146                                                     ifindex, cnf);
2147                 }
2148         }
2149
2150         return ret;
2151 }
2152
2153 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2154                                   void __user *buffer,
2155                                   size_t *lenp, loff_t *ppos)
2156 {
2157         int *valp = ctl->data;
2158         int val = *valp;
2159         loff_t pos = *ppos;
2160         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2161
2162         if (write && *valp != val) {
2163                 struct net *net = ctl->extra2;
2164
2165                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2166                         if (!rtnl_trylock()) {
2167                                 /* Restore the original values before restarting */
2168                                 *valp = val;
2169                                 *ppos = pos;
2170                                 return restart_syscall();
2171                         }
2172                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2173                                 inet_forward_change(net);
2174                         } else {
2175                                 struct ipv4_devconf *cnf = ctl->extra1;
2176                                 struct in_device *idev =
2177                                         container_of(cnf, struct in_device, cnf);
2178                                 if (*valp)
2179                                         dev_disable_lro(idev->dev);
2180                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2181                                                             NETCONFA_FORWARDING,
2182                                                             idev->dev->ifindex,
2183                                                             cnf);
2184                         }
2185                         rtnl_unlock();
2186                         rt_cache_flush(net);
2187                 } else
2188                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2189                                                     NETCONFA_FORWARDING,
2190                                                     NETCONFA_IFINDEX_DEFAULT,
2191                                                     net->ipv4.devconf_dflt);
2192         }
2193
2194         return ret;
2195 }
2196
2197 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2198                                 void __user *buffer,
2199                                 size_t *lenp, loff_t *ppos)
2200 {
2201         int *valp = ctl->data;
2202         int val = *valp;
2203         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2204         struct net *net = ctl->extra2;
2205
2206         if (write && *valp != val)
2207                 rt_cache_flush(net);
2208
2209         return ret;
2210 }
2211
2212 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2213         { \
2214                 .procname       = name, \
2215                 .data           = ipv4_devconf.data + \
2216                                   IPV4_DEVCONF_ ## attr - 1, \
2217                 .maxlen         = sizeof(int), \
2218                 .mode           = mval, \
2219                 .proc_handler   = proc, \
2220                 .extra1         = &ipv4_devconf, \
2221         }
2222
2223 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2224         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2225
2226 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2227         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2228
2229 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2230         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2231
2232 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2233         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2234
2235 static struct devinet_sysctl_table {
2236         struct ctl_table_header *sysctl_header;
2237         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2238 } devinet_sysctl = {
2239         .devinet_vars = {
2240                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2241                                              devinet_sysctl_forward),
2242                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2243
2244                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2245                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2246                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2247                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2248                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2249                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2250                                         "accept_source_route"),
2251                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2252                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2253                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2254                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2255                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2256                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2257                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2258                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2259                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2260                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2261                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2262                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2263                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2264                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2265                                         "force_igmp_version"),
2266                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2267                                         "igmpv2_unsolicited_report_interval"),
2268                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2269                                         "igmpv3_unsolicited_report_interval"),
2270                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2271                                         "ignore_routes_with_linkdown"),
2272                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2273                                         "drop_gratuitous_arp"),
2274
2275                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2276                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2277                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2278                                               "promote_secondaries"),
2279                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2280                                               "route_localnet"),
2281                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2282                                               "drop_unicast_in_l2_multicast"),
2283         },
2284 };
2285
2286 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2287                                      int ifindex, struct ipv4_devconf *p)
2288 {
2289         int i;
2290         struct devinet_sysctl_table *t;
2291         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2292
2293         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2294         if (!t)
2295                 goto out;
2296
2297         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2298                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2299                 t->devinet_vars[i].extra1 = p;
2300                 t->devinet_vars[i].extra2 = net;
2301         }
2302
2303         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2304
2305         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2306         if (!t->sysctl_header)
2307                 goto free;
2308
2309         p->sysctl = t;
2310
2311         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2312                                     ifindex, p);
2313         return 0;
2314
2315 free:
2316         kfree(t);
2317 out:
2318         return -ENOBUFS;
2319 }
2320
2321 static void __devinet_sysctl_unregister(struct net *net,
2322                                         struct ipv4_devconf *cnf, int ifindex)
2323 {
2324         struct devinet_sysctl_table *t = cnf->sysctl;
2325
2326         if (t) {
2327                 cnf->sysctl = NULL;
2328                 unregister_net_sysctl_table(t->sysctl_header);
2329                 kfree(t);
2330         }
2331
2332         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2333 }
2334
2335 static int devinet_sysctl_register(struct in_device *idev)
2336 {
2337         int err;
2338
2339         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2340                 return -EINVAL;
2341
2342         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2343         if (err)
2344                 return err;
2345         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2346                                         idev->dev->ifindex, &idev->cnf);
2347         if (err)
2348                 neigh_sysctl_unregister(idev->arp_parms);
2349         return err;
2350 }
2351
2352 static void devinet_sysctl_unregister(struct in_device *idev)
2353 {
2354         struct net *net = dev_net(idev->dev);
2355
2356         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2357         neigh_sysctl_unregister(idev->arp_parms);
2358 }
2359
2360 static struct ctl_table ctl_forward_entry[] = {
2361         {
2362                 .procname       = "ip_forward",
2363                 .data           = &ipv4_devconf.data[
2364                                         IPV4_DEVCONF_FORWARDING - 1],
2365                 .maxlen         = sizeof(int),
2366                 .mode           = 0644,
2367                 .proc_handler   = devinet_sysctl_forward,
2368                 .extra1         = &ipv4_devconf,
2369                 .extra2         = &init_net,
2370         },
2371         { },
2372 };
2373 #endif
2374
2375 static __net_init int devinet_init_net(struct net *net)
2376 {
2377         int err;
2378         struct ipv4_devconf *all, *dflt;
2379 #ifdef CONFIG_SYSCTL
2380         struct ctl_table *tbl = ctl_forward_entry;
2381         struct ctl_table_header *forw_hdr;
2382 #endif
2383
2384         err = -ENOMEM;
2385         all = &ipv4_devconf;
2386         dflt = &ipv4_devconf_dflt;
2387
2388         if (!net_eq(net, &init_net)) {
2389                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2390                 if (!all)
2391                         goto err_alloc_all;
2392
2393                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2394                 if (!dflt)
2395                         goto err_alloc_dflt;
2396
2397 #ifdef CONFIG_SYSCTL
2398                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2399                 if (!tbl)
2400                         goto err_alloc_ctl;
2401
2402                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2403                 tbl[0].extra1 = all;
2404                 tbl[0].extra2 = net;
2405 #endif
2406         }
2407
2408 #ifdef CONFIG_SYSCTL
2409         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2410         if (err < 0)
2411                 goto err_reg_all;
2412
2413         err = __devinet_sysctl_register(net, "default",
2414                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2415         if (err < 0)
2416                 goto err_reg_dflt;
2417
2418         err = -ENOMEM;
2419         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2420         if (!forw_hdr)
2421                 goto err_reg_ctl;
2422         net->ipv4.forw_hdr = forw_hdr;
2423 #endif
2424
2425         net->ipv4.devconf_all = all;
2426         net->ipv4.devconf_dflt = dflt;
2427         return 0;
2428
2429 #ifdef CONFIG_SYSCTL
2430 err_reg_ctl:
2431         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2432 err_reg_dflt:
2433         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2434 err_reg_all:
2435         if (tbl != ctl_forward_entry)
2436                 kfree(tbl);
2437 err_alloc_ctl:
2438 #endif
2439         if (dflt != &ipv4_devconf_dflt)
2440                 kfree(dflt);
2441 err_alloc_dflt:
2442         if (all != &ipv4_devconf)
2443                 kfree(all);
2444 err_alloc_all:
2445         return err;
2446 }
2447
2448 static __net_exit void devinet_exit_net(struct net *net)
2449 {
2450 #ifdef CONFIG_SYSCTL
2451         struct ctl_table *tbl;
2452
2453         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2454         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2455         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2456                                     NETCONFA_IFINDEX_DEFAULT);
2457         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2458                                     NETCONFA_IFINDEX_ALL);
2459         kfree(tbl);
2460 #endif
2461         kfree(net->ipv4.devconf_dflt);
2462         kfree(net->ipv4.devconf_all);
2463 }
2464
2465 static __net_initdata struct pernet_operations devinet_ops = {
2466         .init = devinet_init_net,
2467         .exit = devinet_exit_net,
2468 };
2469
2470 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2471         .family           = AF_INET,
2472         .fill_link_af     = inet_fill_link_af,
2473         .get_link_af_size = inet_get_link_af_size,
2474         .validate_link_af = inet_validate_link_af,
2475         .set_link_af      = inet_set_link_af,
2476 };
2477
2478 void __init devinet_init(void)
2479 {
2480         int i;
2481
2482         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2483                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2484
2485         register_pernet_subsys(&devinet_ops);
2486
2487         register_gifconf(PF_INET, inet_gifconf);
2488         register_netdevice_notifier(&ip_netdev_notifier);
2489
2490         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2491
2492         rtnl_af_register(&inet_af_ops);
2493
2494         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2495         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2496         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2497         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2498                       inet_netconf_dump_devconf, 0);
2499 }