1771549d24385f3372852b6732a081c4328ceb0e
[muen/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111         u32 val = (__force u32) addr ^ net_hash_mix(net);
112
113         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118         u32 hash = inet_addr_hash(net, ifa->ifa_local);
119
120         ASSERT_RTNL();
121         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126         ASSERT_RTNL();
127         hlist_del_init_rcu(&ifa->hash);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         ifa = inet_lookup_ifaddr_rcu(net, addr);
145         if (!ifa) {
146                 struct flowi4 fl4 = { .daddr = addr };
147                 struct fib_result res = { 0 };
148                 struct fib_table *local;
149
150                 /* Fallback to FIB local table so that communication
151                  * over loopback subnets work.
152                  */
153                 local = fib_get_table(net, RT_TABLE_LOCAL);
154                 if (local &&
155                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156                     res.type == RTN_LOCAL)
157                         result = FIB_RES_DEV(res);
158         } else {
159                 result = ifa->ifa_dev->dev;
160         }
161         if (result && devref)
162                 dev_hold(result);
163         rcu_read_unlock();
164         return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171         u32 hash = inet_addr_hash(net, addr);
172         struct in_ifaddr *ifa;
173
174         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175                 if (ifa->ifa_local == addr &&
176                     net_eq(dev_net(ifa->ifa_dev->dev), net))
177                         return ifa;
178
179         return NULL;
180 }
181
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187                          int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194         return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200
201 /* Locks all the inet devices. */
202
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211         if (ifa->ifa_dev)
212                 in_dev_put(ifa->ifa_dev);
213         kfree(ifa);
214 }
215
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223         struct net_device *dev = idev->dev;
224
225         WARN_ON(idev->ifa_list);
226         WARN_ON(idev->mc_list);
227         kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231         dev_put(dev);
232         if (!idev->dead)
233                 pr_err("Freeing alive in_device %p\n", idev);
234         else
235                 kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241         struct in_device *in_dev;
242         int err = -ENOMEM;
243
244         ASSERT_RTNL();
245
246         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247         if (!in_dev)
248                 goto out;
249         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250                         sizeof(in_dev->cnf));
251         in_dev->cnf.sysctl = NULL;
252         in_dev->dev = dev;
253         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254         if (!in_dev->arp_parms)
255                 goto out_kfree;
256         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257                 dev_disable_lro(dev);
258         /* Reference in_dev->dev */
259         dev_hold(dev);
260         /* Account for reference dev->ip_ptr (below) */
261         refcount_set(&in_dev->refcnt, 1);
262
263         err = devinet_sysctl_register(in_dev);
264         if (err) {
265                 in_dev->dead = 1;
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid, struct netlink_ext_ack *extack)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451         struct in_validator_info ivi;
452         int ret;
453
454         ASSERT_RTNL();
455
456         if (!ifa->ifa_local) {
457                 inet_free_ifa(ifa);
458                 return 0;
459         }
460
461         ifa->ifa_flags &= ~IFA_F_SECONDARY;
462         last_primary = &in_dev->ifa_list;
463
464         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465              ifap = &ifa1->ifa_next) {
466                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467                     ifa->ifa_scope <= ifa1->ifa_scope)
468                         last_primary = &ifa1->ifa_next;
469                 if (ifa1->ifa_mask == ifa->ifa_mask &&
470                     inet_ifa_match(ifa1->ifa_address, ifa)) {
471                         if (ifa1->ifa_local == ifa->ifa_local) {
472                                 inet_free_ifa(ifa);
473                                 return -EEXIST;
474                         }
475                         if (ifa1->ifa_scope != ifa->ifa_scope) {
476                                 inet_free_ifa(ifa);
477                                 return -EINVAL;
478                         }
479                         ifa->ifa_flags |= IFA_F_SECONDARY;
480                 }
481         }
482
483         /* Allow any devices that wish to register ifaddr validtors to weigh
484          * in now, before changes are committed.  The rntl lock is serializing
485          * access here, so the state should not change between a validator call
486          * and a final notify on commit.  This isn't invoked on promotion under
487          * the assumption that validators are checking the address itself, and
488          * not the flags.
489          */
490         ivi.ivi_addr = ifa->ifa_address;
491         ivi.ivi_dev = ifa->ifa_dev;
492         ivi.extack = extack;
493         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
494                                            NETDEV_UP, &ivi);
495         ret = notifier_to_errno(ret);
496         if (ret) {
497                 inet_free_ifa(ifa);
498                 return ret;
499         }
500
501         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
502                 prandom_seed((__force u32) ifa->ifa_local);
503                 ifap = last_primary;
504         }
505
506         ifa->ifa_next = *ifap;
507         *ifap = ifa;
508
509         inet_hash_insert(dev_net(in_dev->dev), ifa);
510
511         cancel_delayed_work(&check_lifetime_work);
512         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
513
514         /* Send message first, then call notifier.
515            Notifier will trigger FIB update, so that
516            listeners of netlink will know about new ifaddr */
517         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
518         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
519
520         return 0;
521 }
522
523 static int inet_insert_ifa(struct in_ifaddr *ifa)
524 {
525         return __inet_insert_ifa(ifa, NULL, 0, NULL);
526 }
527
528 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
529 {
530         struct in_device *in_dev = __in_dev_get_rtnl(dev);
531
532         ASSERT_RTNL();
533
534         if (!in_dev) {
535                 inet_free_ifa(ifa);
536                 return -ENOBUFS;
537         }
538         ipv4_devconf_setall(in_dev);
539         neigh_parms_data_state_setall(in_dev->arp_parms);
540         if (ifa->ifa_dev != in_dev) {
541                 WARN_ON(ifa->ifa_dev);
542                 in_dev_hold(in_dev);
543                 ifa->ifa_dev = in_dev;
544         }
545         if (ipv4_is_loopback(ifa->ifa_local))
546                 ifa->ifa_scope = RT_SCOPE_HOST;
547         return inet_insert_ifa(ifa);
548 }
549
550 /* Caller must hold RCU or RTNL :
551  * We dont take a reference on found in_device
552  */
553 struct in_device *inetdev_by_index(struct net *net, int ifindex)
554 {
555         struct net_device *dev;
556         struct in_device *in_dev = NULL;
557
558         rcu_read_lock();
559         dev = dev_get_by_index_rcu(net, ifindex);
560         if (dev)
561                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
562         rcu_read_unlock();
563         return in_dev;
564 }
565 EXPORT_SYMBOL(inetdev_by_index);
566
567 /* Called only from RTNL semaphored context. No locks. */
568
569 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
570                                     __be32 mask)
571 {
572         ASSERT_RTNL();
573
574         for_primary_ifa(in_dev) {
575                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
576                         return ifa;
577         } endfor_ifa(in_dev);
578         return NULL;
579 }
580
581 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
582 {
583         struct ip_mreqn mreq = {
584                 .imr_multiaddr.s_addr = ifa->ifa_address,
585                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
586         };
587         int ret;
588
589         ASSERT_RTNL();
590
591         lock_sock(sk);
592         if (join)
593                 ret = ip_mc_join_group(sk, &mreq);
594         else
595                 ret = ip_mc_leave_group(sk, &mreq);
596         release_sock(sk);
597
598         return ret;
599 }
600
601 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
602                             struct netlink_ext_ack *extack)
603 {
604         struct net *net = sock_net(skb->sk);
605         struct nlattr *tb[IFA_MAX+1];
606         struct in_device *in_dev;
607         struct ifaddrmsg *ifm;
608         struct in_ifaddr *ifa, **ifap;
609         int err = -EINVAL;
610
611         ASSERT_RTNL();
612
613         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
614                           extack);
615         if (err < 0)
616                 goto errout;
617
618         ifm = nlmsg_data(nlh);
619         in_dev = inetdev_by_index(net, ifm->ifa_index);
620         if (!in_dev) {
621                 err = -ENODEV;
622                 goto errout;
623         }
624
625         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
626              ifap = &ifa->ifa_next) {
627                 if (tb[IFA_LOCAL] &&
628                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
629                         continue;
630
631                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
632                         continue;
633
634                 if (tb[IFA_ADDRESS] &&
635                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
636                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
637                         continue;
638
639                 if (ipv4_is_multicast(ifa->ifa_address))
640                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
641                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
642                 return 0;
643         }
644
645         err = -EADDRNOTAVAIL;
646 errout:
647         return err;
648 }
649
650 #define INFINITY_LIFE_TIME      0xFFFFFFFF
651
652 static void check_lifetime(struct work_struct *work)
653 {
654         unsigned long now, next, next_sec, next_sched;
655         struct in_ifaddr *ifa;
656         struct hlist_node *n;
657         int i;
658
659         now = jiffies;
660         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
661
662         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
663                 bool change_needed = false;
664
665                 rcu_read_lock();
666                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
667                         unsigned long age;
668
669                         if (ifa->ifa_flags & IFA_F_PERMANENT)
670                                 continue;
671
672                         /* We try to batch several events at once. */
673                         age = (now - ifa->ifa_tstamp +
674                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
675
676                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
677                             age >= ifa->ifa_valid_lft) {
678                                 change_needed = true;
679                         } else if (ifa->ifa_preferred_lft ==
680                                    INFINITY_LIFE_TIME) {
681                                 continue;
682                         } else if (age >= ifa->ifa_preferred_lft) {
683                                 if (time_before(ifa->ifa_tstamp +
684                                                 ifa->ifa_valid_lft * HZ, next))
685                                         next = ifa->ifa_tstamp +
686                                                ifa->ifa_valid_lft * HZ;
687
688                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
689                                         change_needed = true;
690                         } else if (time_before(ifa->ifa_tstamp +
691                                                ifa->ifa_preferred_lft * HZ,
692                                                next)) {
693                                 next = ifa->ifa_tstamp +
694                                        ifa->ifa_preferred_lft * HZ;
695                         }
696                 }
697                 rcu_read_unlock();
698                 if (!change_needed)
699                         continue;
700                 rtnl_lock();
701                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
702                         unsigned long age;
703
704                         if (ifa->ifa_flags & IFA_F_PERMANENT)
705                                 continue;
706
707                         /* We try to batch several events at once. */
708                         age = (now - ifa->ifa_tstamp +
709                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
710
711                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
712                             age >= ifa->ifa_valid_lft) {
713                                 struct in_ifaddr **ifap;
714
715                                 for (ifap = &ifa->ifa_dev->ifa_list;
716                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
717                                         if (*ifap == ifa) {
718                                                 inet_del_ifa(ifa->ifa_dev,
719                                                              ifap, 1);
720                                                 break;
721                                         }
722                                 }
723                         } else if (ifa->ifa_preferred_lft !=
724                                    INFINITY_LIFE_TIME &&
725                                    age >= ifa->ifa_preferred_lft &&
726                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
727                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
728                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
729                         }
730                 }
731                 rtnl_unlock();
732         }
733
734         next_sec = round_jiffies_up(next);
735         next_sched = next;
736
737         /* If rounded timeout is accurate enough, accept it. */
738         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
739                 next_sched = next_sec;
740
741         now = jiffies;
742         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
743         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
744                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
745
746         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
747                         next_sched - now);
748 }
749
750 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
751                              __u32 prefered_lft)
752 {
753         unsigned long timeout;
754
755         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
756
757         timeout = addrconf_timeout_fixup(valid_lft, HZ);
758         if (addrconf_finite_timeout(timeout))
759                 ifa->ifa_valid_lft = timeout;
760         else
761                 ifa->ifa_flags |= IFA_F_PERMANENT;
762
763         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
764         if (addrconf_finite_timeout(timeout)) {
765                 if (timeout == 0)
766                         ifa->ifa_flags |= IFA_F_DEPRECATED;
767                 ifa->ifa_preferred_lft = timeout;
768         }
769         ifa->ifa_tstamp = jiffies;
770         if (!ifa->ifa_cstamp)
771                 ifa->ifa_cstamp = ifa->ifa_tstamp;
772 }
773
774 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
775                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
776 {
777         struct nlattr *tb[IFA_MAX+1];
778         struct in_ifaddr *ifa;
779         struct ifaddrmsg *ifm;
780         struct net_device *dev;
781         struct in_device *in_dev;
782         int err;
783
784         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
785                           NULL);
786         if (err < 0)
787                 goto errout;
788
789         ifm = nlmsg_data(nlh);
790         err = -EINVAL;
791         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
792                 goto errout;
793
794         dev = __dev_get_by_index(net, ifm->ifa_index);
795         err = -ENODEV;
796         if (!dev)
797                 goto errout;
798
799         in_dev = __in_dev_get_rtnl(dev);
800         err = -ENOBUFS;
801         if (!in_dev)
802                 goto errout;
803
804         ifa = inet_alloc_ifa();
805         if (!ifa)
806                 /*
807                  * A potential indev allocation can be left alive, it stays
808                  * assigned to its device and is destroy with it.
809                  */
810                 goto errout;
811
812         ipv4_devconf_setall(in_dev);
813         neigh_parms_data_state_setall(in_dev->arp_parms);
814         in_dev_hold(in_dev);
815
816         if (!tb[IFA_ADDRESS])
817                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
818
819         INIT_HLIST_NODE(&ifa->hash);
820         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
821         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
822         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
823                                          ifm->ifa_flags;
824         ifa->ifa_scope = ifm->ifa_scope;
825         ifa->ifa_dev = in_dev;
826
827         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
828         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
829
830         if (tb[IFA_BROADCAST])
831                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
832
833         if (tb[IFA_LABEL])
834                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
835         else
836                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837
838         if (tb[IFA_CACHEINFO]) {
839                 struct ifa_cacheinfo *ci;
840
841                 ci = nla_data(tb[IFA_CACHEINFO]);
842                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
843                         err = -EINVAL;
844                         goto errout_free;
845                 }
846                 *pvalid_lft = ci->ifa_valid;
847                 *pprefered_lft = ci->ifa_prefered;
848         }
849
850         return ifa;
851
852 errout_free:
853         inet_free_ifa(ifa);
854 errout:
855         return ERR_PTR(err);
856 }
857
858 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
859 {
860         struct in_device *in_dev = ifa->ifa_dev;
861         struct in_ifaddr *ifa1, **ifap;
862
863         if (!ifa->ifa_local)
864                 return NULL;
865
866         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
867              ifap = &ifa1->ifa_next) {
868                 if (ifa1->ifa_mask == ifa->ifa_mask &&
869                     inet_ifa_match(ifa1->ifa_address, ifa) &&
870                     ifa1->ifa_local == ifa->ifa_local)
871                         return ifa1;
872         }
873         return NULL;
874 }
875
876 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
877                             struct netlink_ext_ack *extack)
878 {
879         struct net *net = sock_net(skb->sk);
880         struct in_ifaddr *ifa;
881         struct in_ifaddr *ifa_existing;
882         __u32 valid_lft = INFINITY_LIFE_TIME;
883         __u32 prefered_lft = INFINITY_LIFE_TIME;
884
885         ASSERT_RTNL();
886
887         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
888         if (IS_ERR(ifa))
889                 return PTR_ERR(ifa);
890
891         ifa_existing = find_matching_ifa(ifa);
892         if (!ifa_existing) {
893                 /* It would be best to check for !NLM_F_CREATE here but
894                  * userspace already relies on not having to provide this.
895                  */
896                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
897                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
898                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
899                                                true, ifa);
900
901                         if (ret < 0) {
902                                 inet_free_ifa(ifa);
903                                 return ret;
904                         }
905                 }
906                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907                                          extack);
908         } else {
909                 inet_free_ifa(ifa);
910
911                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
912                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
913                         return -EEXIST;
914                 ifa = ifa_existing;
915                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916                 cancel_delayed_work(&check_lifetime_work);
917                 queue_delayed_work(system_power_efficient_wq,
918                                 &check_lifetime_work, 0);
919                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
920         }
921         return 0;
922 }
923
924 /*
925  *      Determine a default network mask, based on the IP address.
926  */
927
928 static int inet_abc_len(__be32 addr)
929 {
930         int rc = -1;    /* Something else, probably a multicast. */
931
932         if (ipv4_is_zeronet(addr))
933                 rc = 0;
934         else {
935                 __u32 haddr = ntohl(addr);
936
937                 if (IN_CLASSA(haddr))
938                         rc = 8;
939                 else if (IN_CLASSB(haddr))
940                         rc = 16;
941                 else if (IN_CLASSC(haddr))
942                         rc = 24;
943         }
944
945         return rc;
946 }
947
948
949 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
950 {
951         struct ifreq ifr;
952         struct sockaddr_in sin_orig;
953         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
954         struct in_device *in_dev;
955         struct in_ifaddr **ifap = NULL;
956         struct in_ifaddr *ifa = NULL;
957         struct net_device *dev;
958         char *colon;
959         int ret = -EFAULT;
960         int tryaddrmatch = 0;
961
962         /*
963          *      Fetch the caller's info block into kernel space
964          */
965
966         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
967                 goto out;
968         ifr.ifr_name[IFNAMSIZ - 1] = 0;
969
970         /* save original address for comparison */
971         memcpy(&sin_orig, sin, sizeof(*sin));
972
973         colon = strchr(ifr.ifr_name, ':');
974         if (colon)
975                 *colon = 0;
976
977         dev_load(net, ifr.ifr_name);
978
979         switch (cmd) {
980         case SIOCGIFADDR:       /* Get interface address */
981         case SIOCGIFBRDADDR:    /* Get the broadcast address */
982         case SIOCGIFDSTADDR:    /* Get the destination address */
983         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
984                 /* Note that these ioctls will not sleep,
985                    so that we do not impose a lock.
986                    One day we will be forced to put shlock here (I mean SMP)
987                  */
988                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
989                 memset(sin, 0, sizeof(*sin));
990                 sin->sin_family = AF_INET;
991                 break;
992
993         case SIOCSIFFLAGS:
994                 ret = -EPERM;
995                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
996                         goto out;
997                 break;
998         case SIOCSIFADDR:       /* Set interface address (and family) */
999         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1000         case SIOCSIFDSTADDR:    /* Set the destination address */
1001         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1002                 ret = -EPERM;
1003                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1004                         goto out;
1005                 ret = -EINVAL;
1006                 if (sin->sin_family != AF_INET)
1007                         goto out;
1008                 break;
1009         default:
1010                 ret = -EINVAL;
1011                 goto out;
1012         }
1013
1014         rtnl_lock();
1015
1016         ret = -ENODEV;
1017         dev = __dev_get_by_name(net, ifr.ifr_name);
1018         if (!dev)
1019                 goto done;
1020
1021         if (colon)
1022                 *colon = ':';
1023
1024         in_dev = __in_dev_get_rtnl(dev);
1025         if (in_dev) {
1026                 if (tryaddrmatch) {
1027                         /* Matthias Andree */
1028                         /* compare label and address (4.4BSD style) */
1029                         /* note: we only do this for a limited set of ioctls
1030                            and only if the original address family was AF_INET.
1031                            This is checked above. */
1032                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1033                              ifap = &ifa->ifa_next) {
1034                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1035                                     sin_orig.sin_addr.s_addr ==
1036                                                         ifa->ifa_local) {
1037                                         break; /* found */
1038                                 }
1039                         }
1040                 }
1041                 /* we didn't get a match, maybe the application is
1042                    4.3BSD-style and passed in junk so we fall back to
1043                    comparing just the label */
1044                 if (!ifa) {
1045                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1046                              ifap = &ifa->ifa_next)
1047                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1048                                         break;
1049                 }
1050         }
1051
1052         ret = -EADDRNOTAVAIL;
1053         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1054                 goto done;
1055
1056         switch (cmd) {
1057         case SIOCGIFADDR:       /* Get interface address */
1058                 sin->sin_addr.s_addr = ifa->ifa_local;
1059                 goto rarok;
1060
1061         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1062                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1063                 goto rarok;
1064
1065         case SIOCGIFDSTADDR:    /* Get the destination address */
1066                 sin->sin_addr.s_addr = ifa->ifa_address;
1067                 goto rarok;
1068
1069         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1070                 sin->sin_addr.s_addr = ifa->ifa_mask;
1071                 goto rarok;
1072
1073         case SIOCSIFFLAGS:
1074                 if (colon) {
1075                         ret = -EADDRNOTAVAIL;
1076                         if (!ifa)
1077                                 break;
1078                         ret = 0;
1079                         if (!(ifr.ifr_flags & IFF_UP))
1080                                 inet_del_ifa(in_dev, ifap, 1);
1081                         break;
1082                 }
1083                 ret = dev_change_flags(dev, ifr.ifr_flags);
1084                 break;
1085
1086         case SIOCSIFADDR:       /* Set interface address (and family) */
1087                 ret = -EINVAL;
1088                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1089                         break;
1090
1091                 if (!ifa) {
1092                         ret = -ENOBUFS;
1093                         ifa = inet_alloc_ifa();
1094                         if (!ifa)
1095                                 break;
1096                         INIT_HLIST_NODE(&ifa->hash);
1097                         if (colon)
1098                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1099                         else
1100                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1101                 } else {
1102                         ret = 0;
1103                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1104                                 break;
1105                         inet_del_ifa(in_dev, ifap, 0);
1106                         ifa->ifa_broadcast = 0;
1107                         ifa->ifa_scope = 0;
1108                 }
1109
1110                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1111
1112                 if (!(dev->flags & IFF_POINTOPOINT)) {
1113                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1114                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1115                         if ((dev->flags & IFF_BROADCAST) &&
1116                             ifa->ifa_prefixlen < 31)
1117                                 ifa->ifa_broadcast = ifa->ifa_address |
1118                                                      ~ifa->ifa_mask;
1119                 } else {
1120                         ifa->ifa_prefixlen = 32;
1121                         ifa->ifa_mask = inet_make_mask(32);
1122                 }
1123                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1124                 ret = inet_set_ifa(dev, ifa);
1125                 break;
1126
1127         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1128                 ret = 0;
1129                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1130                         inet_del_ifa(in_dev, ifap, 0);
1131                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1132                         inet_insert_ifa(ifa);
1133                 }
1134                 break;
1135
1136         case SIOCSIFDSTADDR:    /* Set the destination address */
1137                 ret = 0;
1138                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1139                         break;
1140                 ret = -EINVAL;
1141                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1142                         break;
1143                 ret = 0;
1144                 inet_del_ifa(in_dev, ifap, 0);
1145                 ifa->ifa_address = sin->sin_addr.s_addr;
1146                 inet_insert_ifa(ifa);
1147                 break;
1148
1149         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1150
1151                 /*
1152                  *      The mask we set must be legal.
1153                  */
1154                 ret = -EINVAL;
1155                 if (bad_mask(sin->sin_addr.s_addr, 0))
1156                         break;
1157                 ret = 0;
1158                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1159                         __be32 old_mask = ifa->ifa_mask;
1160                         inet_del_ifa(in_dev, ifap, 0);
1161                         ifa->ifa_mask = sin->sin_addr.s_addr;
1162                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1163
1164                         /* See if current broadcast address matches
1165                          * with current netmask, then recalculate
1166                          * the broadcast address. Otherwise it's a
1167                          * funny address, so don't touch it since
1168                          * the user seems to know what (s)he's doing...
1169                          */
1170                         if ((dev->flags & IFF_BROADCAST) &&
1171                             (ifa->ifa_prefixlen < 31) &&
1172                             (ifa->ifa_broadcast ==
1173                              (ifa->ifa_local|~old_mask))) {
1174                                 ifa->ifa_broadcast = (ifa->ifa_local |
1175                                                       ~sin->sin_addr.s_addr);
1176                         }
1177                         inet_insert_ifa(ifa);
1178                 }
1179                 break;
1180         }
1181 done:
1182         rtnl_unlock();
1183 out:
1184         return ret;
1185 rarok:
1186         rtnl_unlock();
1187         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1188         goto out;
1189 }
1190
1191 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1192 {
1193         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1194         struct in_ifaddr *ifa;
1195         struct ifreq ifr;
1196         int done = 0;
1197
1198         if (WARN_ON(size > sizeof(struct ifreq)))
1199                 goto out;
1200
1201         if (!in_dev)
1202                 goto out;
1203
1204         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1205                 if (!buf) {
1206                         done += size;
1207                         continue;
1208                 }
1209                 if (len < size)
1210                         break;
1211                 memset(&ifr, 0, sizeof(struct ifreq));
1212                 strcpy(ifr.ifr_name, ifa->ifa_label);
1213
1214                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1215                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1216                                                                 ifa->ifa_local;
1217
1218                 if (copy_to_user(buf + done, &ifr, size)) {
1219                         done = -EFAULT;
1220                         break;
1221                 }
1222                 len  -= size;
1223                 done += size;
1224         }
1225 out:
1226         return done;
1227 }
1228
1229 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1230                                  int scope)
1231 {
1232         for_primary_ifa(in_dev) {
1233                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1234                     ifa->ifa_scope <= scope)
1235                         return ifa->ifa_local;
1236         } endfor_ifa(in_dev);
1237
1238         return 0;
1239 }
1240
1241 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1242 {
1243         __be32 addr = 0;
1244         struct in_device *in_dev;
1245         struct net *net = dev_net(dev);
1246         int master_idx;
1247
1248         rcu_read_lock();
1249         in_dev = __in_dev_get_rcu(dev);
1250         if (!in_dev)
1251                 goto no_in_dev;
1252
1253         for_primary_ifa(in_dev) {
1254                 if (ifa->ifa_scope > scope)
1255                         continue;
1256                 if (!dst || inet_ifa_match(dst, ifa)) {
1257                         addr = ifa->ifa_local;
1258                         break;
1259                 }
1260                 if (!addr)
1261                         addr = ifa->ifa_local;
1262         } endfor_ifa(in_dev);
1263
1264         if (addr)
1265                 goto out_unlock;
1266 no_in_dev:
1267         master_idx = l3mdev_master_ifindex_rcu(dev);
1268
1269         /* For VRFs, the VRF device takes the place of the loopback device,
1270          * with addresses on it being preferred.  Note in such cases the
1271          * loopback device will be among the devices that fail the master_idx
1272          * equality check in the loop below.
1273          */
1274         if (master_idx &&
1275             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1276             (in_dev = __in_dev_get_rcu(dev))) {
1277                 addr = in_dev_select_addr(in_dev, scope);
1278                 if (addr)
1279                         goto out_unlock;
1280         }
1281
1282         /* Not loopback addresses on loopback should be preferred
1283            in this case. It is important that lo is the first interface
1284            in dev_base list.
1285          */
1286         for_each_netdev_rcu(net, dev) {
1287                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1288                         continue;
1289
1290                 in_dev = __in_dev_get_rcu(dev);
1291                 if (!in_dev)
1292                         continue;
1293
1294                 addr = in_dev_select_addr(in_dev, scope);
1295                 if (addr)
1296                         goto out_unlock;
1297         }
1298 out_unlock:
1299         rcu_read_unlock();
1300         return addr;
1301 }
1302 EXPORT_SYMBOL(inet_select_addr);
1303
1304 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1305                               __be32 local, int scope)
1306 {
1307         int same = 0;
1308         __be32 addr = 0;
1309
1310         for_ifa(in_dev) {
1311                 if (!addr &&
1312                     (local == ifa->ifa_local || !local) &&
1313                     ifa->ifa_scope <= scope) {
1314                         addr = ifa->ifa_local;
1315                         if (same)
1316                                 break;
1317                 }
1318                 if (!same) {
1319                         same = (!local || inet_ifa_match(local, ifa)) &&
1320                                 (!dst || inet_ifa_match(dst, ifa));
1321                         if (same && addr) {
1322                                 if (local || !dst)
1323                                         break;
1324                                 /* Is the selected addr into dst subnet? */
1325                                 if (inet_ifa_match(addr, ifa))
1326                                         break;
1327                                 /* No, then can we use new local src? */
1328                                 if (ifa->ifa_scope <= scope) {
1329                                         addr = ifa->ifa_local;
1330                                         break;
1331                                 }
1332                                 /* search for large dst subnet for addr */
1333                                 same = 0;
1334                         }
1335                 }
1336         } endfor_ifa(in_dev);
1337
1338         return same ? addr : 0;
1339 }
1340
1341 /*
1342  * Confirm that local IP address exists using wildcards:
1343  * - net: netns to check, cannot be NULL
1344  * - in_dev: only on this interface, NULL=any interface
1345  * - dst: only in the same subnet as dst, 0=any dst
1346  * - local: address, 0=autoselect the local address
1347  * - scope: maximum allowed scope value for the local address
1348  */
1349 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1350                          __be32 dst, __be32 local, int scope)
1351 {
1352         __be32 addr = 0;
1353         struct net_device *dev;
1354
1355         if (in_dev)
1356                 return confirm_addr_indev(in_dev, dst, local, scope);
1357
1358         rcu_read_lock();
1359         for_each_netdev_rcu(net, dev) {
1360                 in_dev = __in_dev_get_rcu(dev);
1361                 if (in_dev) {
1362                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1363                         if (addr)
1364                                 break;
1365                 }
1366         }
1367         rcu_read_unlock();
1368
1369         return addr;
1370 }
1371 EXPORT_SYMBOL(inet_confirm_addr);
1372
1373 /*
1374  *      Device notifier
1375  */
1376
1377 int register_inetaddr_notifier(struct notifier_block *nb)
1378 {
1379         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1380 }
1381 EXPORT_SYMBOL(register_inetaddr_notifier);
1382
1383 int unregister_inetaddr_notifier(struct notifier_block *nb)
1384 {
1385         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1386 }
1387 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1388
1389 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1390 {
1391         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1392 }
1393 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1394
1395 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1396 {
1397         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1398             nb);
1399 }
1400 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1401
1402 /* Rename ifa_labels for a device name change. Make some effort to preserve
1403  * existing alias numbering and to create unique labels if possible.
1404 */
1405 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1406 {
1407         struct in_ifaddr *ifa;
1408         int named = 0;
1409
1410         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1411                 char old[IFNAMSIZ], *dot;
1412
1413                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1414                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1415                 if (named++ == 0)
1416                         goto skip;
1417                 dot = strchr(old, ':');
1418                 if (!dot) {
1419                         sprintf(old, ":%d", named);
1420                         dot = old;
1421                 }
1422                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1423                         strcat(ifa->ifa_label, dot);
1424                 else
1425                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1426 skip:
1427                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1428         }
1429 }
1430
1431 static bool inetdev_valid_mtu(unsigned int mtu)
1432 {
1433         return mtu >= IPV4_MIN_MTU;
1434 }
1435
1436 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1437                                         struct in_device *in_dev)
1438
1439 {
1440         struct in_ifaddr *ifa;
1441
1442         for (ifa = in_dev->ifa_list; ifa;
1443              ifa = ifa->ifa_next) {
1444                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1445                          ifa->ifa_local, dev,
1446                          ifa->ifa_local, NULL,
1447                          dev->dev_addr, NULL);
1448         }
1449 }
1450
1451 /* Called only under RTNL semaphore */
1452
1453 static int inetdev_event(struct notifier_block *this, unsigned long event,
1454                          void *ptr)
1455 {
1456         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1457         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1458
1459         ASSERT_RTNL();
1460
1461         if (!in_dev) {
1462                 if (event == NETDEV_REGISTER) {
1463                         in_dev = inetdev_init(dev);
1464                         if (IS_ERR(in_dev))
1465                                 return notifier_from_errno(PTR_ERR(in_dev));
1466                         if (dev->flags & IFF_LOOPBACK) {
1467                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1468                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1469                         }
1470                 } else if (event == NETDEV_CHANGEMTU) {
1471                         /* Re-enabling IP */
1472                         if (inetdev_valid_mtu(dev->mtu))
1473                                 in_dev = inetdev_init(dev);
1474                 }
1475                 goto out;
1476         }
1477
1478         switch (event) {
1479         case NETDEV_REGISTER:
1480                 pr_debug("%s: bug\n", __func__);
1481                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1482                 break;
1483         case NETDEV_UP:
1484                 if (!inetdev_valid_mtu(dev->mtu))
1485                         break;
1486                 if (dev->flags & IFF_LOOPBACK) {
1487                         struct in_ifaddr *ifa = inet_alloc_ifa();
1488
1489                         if (ifa) {
1490                                 INIT_HLIST_NODE(&ifa->hash);
1491                                 ifa->ifa_local =
1492                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1493                                 ifa->ifa_prefixlen = 8;
1494                                 ifa->ifa_mask = inet_make_mask(8);
1495                                 in_dev_hold(in_dev);
1496                                 ifa->ifa_dev = in_dev;
1497                                 ifa->ifa_scope = RT_SCOPE_HOST;
1498                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1499                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1500                                                  INFINITY_LIFE_TIME);
1501                                 ipv4_devconf_setall(in_dev);
1502                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1503                                 inet_insert_ifa(ifa);
1504                         }
1505                 }
1506                 ip_mc_up(in_dev);
1507                 /* fall through */
1508         case NETDEV_CHANGEADDR:
1509                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1510                         break;
1511                 /* fall through */
1512         case NETDEV_NOTIFY_PEERS:
1513                 /* Send gratuitous ARP to notify of link change */
1514                 inetdev_send_gratuitous_arp(dev, in_dev);
1515                 break;
1516         case NETDEV_DOWN:
1517                 ip_mc_down(in_dev);
1518                 break;
1519         case NETDEV_PRE_TYPE_CHANGE:
1520                 ip_mc_unmap(in_dev);
1521                 break;
1522         case NETDEV_POST_TYPE_CHANGE:
1523                 ip_mc_remap(in_dev);
1524                 break;
1525         case NETDEV_CHANGEMTU:
1526                 if (inetdev_valid_mtu(dev->mtu))
1527                         break;
1528                 /* disable IP when MTU is not enough */
1529                 /* fall through */
1530         case NETDEV_UNREGISTER:
1531                 inetdev_destroy(in_dev);
1532                 break;
1533         case NETDEV_CHANGENAME:
1534                 /* Do not notify about label change, this event is
1535                  * not interesting to applications using netlink.
1536                  */
1537                 inetdev_changename(dev, in_dev);
1538
1539                 devinet_sysctl_unregister(in_dev);
1540                 devinet_sysctl_register(in_dev);
1541                 break;
1542         }
1543 out:
1544         return NOTIFY_DONE;
1545 }
1546
1547 static struct notifier_block ip_netdev_notifier = {
1548         .notifier_call = inetdev_event,
1549 };
1550
1551 static size_t inet_nlmsg_size(void)
1552 {
1553         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1554                + nla_total_size(4) /* IFA_ADDRESS */
1555                + nla_total_size(4) /* IFA_LOCAL */
1556                + nla_total_size(4) /* IFA_BROADCAST */
1557                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1558                + nla_total_size(4)  /* IFA_FLAGS */
1559                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1560 }
1561
1562 static inline u32 cstamp_delta(unsigned long cstamp)
1563 {
1564         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1565 }
1566
1567 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1568                          unsigned long tstamp, u32 preferred, u32 valid)
1569 {
1570         struct ifa_cacheinfo ci;
1571
1572         ci.cstamp = cstamp_delta(cstamp);
1573         ci.tstamp = cstamp_delta(tstamp);
1574         ci.ifa_prefered = preferred;
1575         ci.ifa_valid = valid;
1576
1577         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1578 }
1579
1580 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1581                             u32 portid, u32 seq, int event, unsigned int flags)
1582 {
1583         struct ifaddrmsg *ifm;
1584         struct nlmsghdr  *nlh;
1585         u32 preferred, valid;
1586
1587         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1588         if (!nlh)
1589                 return -EMSGSIZE;
1590
1591         ifm = nlmsg_data(nlh);
1592         ifm->ifa_family = AF_INET;
1593         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1594         ifm->ifa_flags = ifa->ifa_flags;
1595         ifm->ifa_scope = ifa->ifa_scope;
1596         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1597
1598         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1599                 preferred = ifa->ifa_preferred_lft;
1600                 valid = ifa->ifa_valid_lft;
1601                 if (preferred != INFINITY_LIFE_TIME) {
1602                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1603
1604                         if (preferred > tval)
1605                                 preferred -= tval;
1606                         else
1607                                 preferred = 0;
1608                         if (valid != INFINITY_LIFE_TIME) {
1609                                 if (valid > tval)
1610                                         valid -= tval;
1611                                 else
1612                                         valid = 0;
1613                         }
1614                 }
1615         } else {
1616                 preferred = INFINITY_LIFE_TIME;
1617                 valid = INFINITY_LIFE_TIME;
1618         }
1619         if ((ifa->ifa_address &&
1620              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1621             (ifa->ifa_local &&
1622              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1623             (ifa->ifa_broadcast &&
1624              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1625             (ifa->ifa_label[0] &&
1626              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1627             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1628             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1629                           preferred, valid))
1630                 goto nla_put_failure;
1631
1632         nlmsg_end(skb, nlh);
1633         return 0;
1634
1635 nla_put_failure:
1636         nlmsg_cancel(skb, nlh);
1637         return -EMSGSIZE;
1638 }
1639
1640 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1641 {
1642         struct net *net = sock_net(skb->sk);
1643         int h, s_h;
1644         int idx, s_idx;
1645         int ip_idx, s_ip_idx;
1646         struct net_device *dev;
1647         struct in_device *in_dev;
1648         struct in_ifaddr *ifa;
1649         struct hlist_head *head;
1650
1651         s_h = cb->args[0];
1652         s_idx = idx = cb->args[1];
1653         s_ip_idx = ip_idx = cb->args[2];
1654
1655         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1656                 idx = 0;
1657                 head = &net->dev_index_head[h];
1658                 rcu_read_lock();
1659                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1660                           net->dev_base_seq;
1661                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1662                         if (idx < s_idx)
1663                                 goto cont;
1664                         if (h > s_h || idx > s_idx)
1665                                 s_ip_idx = 0;
1666                         in_dev = __in_dev_get_rcu(dev);
1667                         if (!in_dev)
1668                                 goto cont;
1669
1670                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1671                              ifa = ifa->ifa_next, ip_idx++) {
1672                                 if (ip_idx < s_ip_idx)
1673                                         continue;
1674                                 if (inet_fill_ifaddr(skb, ifa,
1675                                              NETLINK_CB(cb->skb).portid,
1676                                              cb->nlh->nlmsg_seq,
1677                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1678                                         rcu_read_unlock();
1679                                         goto done;
1680                                 }
1681                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1682                         }
1683 cont:
1684                         idx++;
1685                 }
1686                 rcu_read_unlock();
1687         }
1688
1689 done:
1690         cb->args[0] = h;
1691         cb->args[1] = idx;
1692         cb->args[2] = ip_idx;
1693
1694         return skb->len;
1695 }
1696
1697 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1698                       u32 portid)
1699 {
1700         struct sk_buff *skb;
1701         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1702         int err = -ENOBUFS;
1703         struct net *net;
1704
1705         net = dev_net(ifa->ifa_dev->dev);
1706         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1707         if (!skb)
1708                 goto errout;
1709
1710         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1711         if (err < 0) {
1712                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1713                 WARN_ON(err == -EMSGSIZE);
1714                 kfree_skb(skb);
1715                 goto errout;
1716         }
1717         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1718         return;
1719 errout:
1720         if (err < 0)
1721                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1722 }
1723
1724 static size_t inet_get_link_af_size(const struct net_device *dev,
1725                                     u32 ext_filter_mask)
1726 {
1727         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1728
1729         if (!in_dev)
1730                 return 0;
1731
1732         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1733 }
1734
1735 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1736                              u32 ext_filter_mask)
1737 {
1738         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1739         struct nlattr *nla;
1740         int i;
1741
1742         if (!in_dev)
1743                 return -ENODATA;
1744
1745         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1746         if (!nla)
1747                 return -EMSGSIZE;
1748
1749         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1750                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1751
1752         return 0;
1753 }
1754
1755 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1756         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1757 };
1758
1759 static int inet_validate_link_af(const struct net_device *dev,
1760                                  const struct nlattr *nla)
1761 {
1762         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1763         int err, rem;
1764
1765         if (dev && !__in_dev_get_rcu(dev))
1766                 return -EAFNOSUPPORT;
1767
1768         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1769         if (err < 0)
1770                 return err;
1771
1772         if (tb[IFLA_INET_CONF]) {
1773                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1774                         int cfgid = nla_type(a);
1775
1776                         if (nla_len(a) < 4)
1777                                 return -EINVAL;
1778
1779                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1780                                 return -EINVAL;
1781                 }
1782         }
1783
1784         return 0;
1785 }
1786
1787 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1788 {
1789         struct in_device *in_dev = __in_dev_get_rcu(dev);
1790         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1791         int rem;
1792
1793         if (!in_dev)
1794                 return -EAFNOSUPPORT;
1795
1796         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1797                 BUG();
1798
1799         if (tb[IFLA_INET_CONF]) {
1800                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1801                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1802         }
1803
1804         return 0;
1805 }
1806
1807 static int inet_netconf_msgsize_devconf(int type)
1808 {
1809         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1810                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1811         bool all = false;
1812
1813         if (type == NETCONFA_ALL)
1814                 all = true;
1815
1816         if (all || type == NETCONFA_FORWARDING)
1817                 size += nla_total_size(4);
1818         if (all || type == NETCONFA_RP_FILTER)
1819                 size += nla_total_size(4);
1820         if (all || type == NETCONFA_MC_FORWARDING)
1821                 size += nla_total_size(4);
1822         if (all || type == NETCONFA_PROXY_NEIGH)
1823                 size += nla_total_size(4);
1824         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1825                 size += nla_total_size(4);
1826
1827         return size;
1828 }
1829
1830 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1831                                      struct ipv4_devconf *devconf, u32 portid,
1832                                      u32 seq, int event, unsigned int flags,
1833                                      int type)
1834 {
1835         struct nlmsghdr  *nlh;
1836         struct netconfmsg *ncm;
1837         bool all = false;
1838
1839         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1840                         flags);
1841         if (!nlh)
1842                 return -EMSGSIZE;
1843
1844         if (type == NETCONFA_ALL)
1845                 all = true;
1846
1847         ncm = nlmsg_data(nlh);
1848         ncm->ncm_family = AF_INET;
1849
1850         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1851                 goto nla_put_failure;
1852
1853         if (!devconf)
1854                 goto out;
1855
1856         if ((all || type == NETCONFA_FORWARDING) &&
1857             nla_put_s32(skb, NETCONFA_FORWARDING,
1858                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1859                 goto nla_put_failure;
1860         if ((all || type == NETCONFA_RP_FILTER) &&
1861             nla_put_s32(skb, NETCONFA_RP_FILTER,
1862                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1863                 goto nla_put_failure;
1864         if ((all || type == NETCONFA_MC_FORWARDING) &&
1865             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1866                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1867                 goto nla_put_failure;
1868         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1869             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1870                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1871                 goto nla_put_failure;
1872         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1873             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1874                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1875                 goto nla_put_failure;
1876
1877 out:
1878         nlmsg_end(skb, nlh);
1879         return 0;
1880
1881 nla_put_failure:
1882         nlmsg_cancel(skb, nlh);
1883         return -EMSGSIZE;
1884 }
1885
1886 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1887                                  int ifindex, struct ipv4_devconf *devconf)
1888 {
1889         struct sk_buff *skb;
1890         int err = -ENOBUFS;
1891
1892         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1893         if (!skb)
1894                 goto errout;
1895
1896         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1897                                         event, 0, type);
1898         if (err < 0) {
1899                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1900                 WARN_ON(err == -EMSGSIZE);
1901                 kfree_skb(skb);
1902                 goto errout;
1903         }
1904         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1905         return;
1906 errout:
1907         if (err < 0)
1908                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1909 }
1910
1911 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1912         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1913         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1914         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1915         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1916         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1917 };
1918
1919 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1920                                     struct nlmsghdr *nlh,
1921                                     struct netlink_ext_ack *extack)
1922 {
1923         struct net *net = sock_net(in_skb->sk);
1924         struct nlattr *tb[NETCONFA_MAX+1];
1925         struct netconfmsg *ncm;
1926         struct sk_buff *skb;
1927         struct ipv4_devconf *devconf;
1928         struct in_device *in_dev;
1929         struct net_device *dev;
1930         int ifindex;
1931         int err;
1932
1933         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1934                           devconf_ipv4_policy, extack);
1935         if (err < 0)
1936                 goto errout;
1937
1938         err = -EINVAL;
1939         if (!tb[NETCONFA_IFINDEX])
1940                 goto errout;
1941
1942         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1943         switch (ifindex) {
1944         case NETCONFA_IFINDEX_ALL:
1945                 devconf = net->ipv4.devconf_all;
1946                 break;
1947         case NETCONFA_IFINDEX_DEFAULT:
1948                 devconf = net->ipv4.devconf_dflt;
1949                 break;
1950         default:
1951                 dev = __dev_get_by_index(net, ifindex);
1952                 if (!dev)
1953                         goto errout;
1954                 in_dev = __in_dev_get_rtnl(dev);
1955                 if (!in_dev)
1956                         goto errout;
1957                 devconf = &in_dev->cnf;
1958                 break;
1959         }
1960
1961         err = -ENOBUFS;
1962         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1963         if (!skb)
1964                 goto errout;
1965
1966         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1967                                         NETLINK_CB(in_skb).portid,
1968                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1969                                         NETCONFA_ALL);
1970         if (err < 0) {
1971                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1972                 WARN_ON(err == -EMSGSIZE);
1973                 kfree_skb(skb);
1974                 goto errout;
1975         }
1976         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1977 errout:
1978         return err;
1979 }
1980
1981 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1982                                      struct netlink_callback *cb)
1983 {
1984         struct net *net = sock_net(skb->sk);
1985         int h, s_h;
1986         int idx, s_idx;
1987         struct net_device *dev;
1988         struct in_device *in_dev;
1989         struct hlist_head *head;
1990
1991         s_h = cb->args[0];
1992         s_idx = idx = cb->args[1];
1993
1994         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1995                 idx = 0;
1996                 head = &net->dev_index_head[h];
1997                 rcu_read_lock();
1998                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1999                           net->dev_base_seq;
2000                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2001                         if (idx < s_idx)
2002                                 goto cont;
2003                         in_dev = __in_dev_get_rcu(dev);
2004                         if (!in_dev)
2005                                 goto cont;
2006
2007                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2008                                                       &in_dev->cnf,
2009                                                       NETLINK_CB(cb->skb).portid,
2010                                                       cb->nlh->nlmsg_seq,
2011                                                       RTM_NEWNETCONF,
2012                                                       NLM_F_MULTI,
2013                                                       NETCONFA_ALL) < 0) {
2014                                 rcu_read_unlock();
2015                                 goto done;
2016                         }
2017                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2018 cont:
2019                         idx++;
2020                 }
2021                 rcu_read_unlock();
2022         }
2023         if (h == NETDEV_HASHENTRIES) {
2024                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2025                                               net->ipv4.devconf_all,
2026                                               NETLINK_CB(cb->skb).portid,
2027                                               cb->nlh->nlmsg_seq,
2028                                               RTM_NEWNETCONF, NLM_F_MULTI,
2029                                               NETCONFA_ALL) < 0)
2030                         goto done;
2031                 else
2032                         h++;
2033         }
2034         if (h == NETDEV_HASHENTRIES + 1) {
2035                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2036                                               net->ipv4.devconf_dflt,
2037                                               NETLINK_CB(cb->skb).portid,
2038                                               cb->nlh->nlmsg_seq,
2039                                               RTM_NEWNETCONF, NLM_F_MULTI,
2040                                               NETCONFA_ALL) < 0)
2041                         goto done;
2042                 else
2043                         h++;
2044         }
2045 done:
2046         cb->args[0] = h;
2047         cb->args[1] = idx;
2048
2049         return skb->len;
2050 }
2051
2052 #ifdef CONFIG_SYSCTL
2053
2054 static void devinet_copy_dflt_conf(struct net *net, int i)
2055 {
2056         struct net_device *dev;
2057
2058         rcu_read_lock();
2059         for_each_netdev_rcu(net, dev) {
2060                 struct in_device *in_dev;
2061
2062                 in_dev = __in_dev_get_rcu(dev);
2063                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2064                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2065         }
2066         rcu_read_unlock();
2067 }
2068
2069 /* called with RTNL locked */
2070 static void inet_forward_change(struct net *net)
2071 {
2072         struct net_device *dev;
2073         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2074
2075         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2076         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2077         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2078                                     NETCONFA_FORWARDING,
2079                                     NETCONFA_IFINDEX_ALL,
2080                                     net->ipv4.devconf_all);
2081         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2082                                     NETCONFA_FORWARDING,
2083                                     NETCONFA_IFINDEX_DEFAULT,
2084                                     net->ipv4.devconf_dflt);
2085
2086         for_each_netdev(net, dev) {
2087                 struct in_device *in_dev;
2088
2089                 if (on)
2090                         dev_disable_lro(dev);
2091
2092                 in_dev = __in_dev_get_rtnl(dev);
2093                 if (in_dev) {
2094                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2095                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2096                                                     NETCONFA_FORWARDING,
2097                                                     dev->ifindex, &in_dev->cnf);
2098                 }
2099         }
2100 }
2101
2102 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2103 {
2104         if (cnf == net->ipv4.devconf_dflt)
2105                 return NETCONFA_IFINDEX_DEFAULT;
2106         else if (cnf == net->ipv4.devconf_all)
2107                 return NETCONFA_IFINDEX_ALL;
2108         else {
2109                 struct in_device *idev
2110                         = container_of(cnf, struct in_device, cnf);
2111                 return idev->dev->ifindex;
2112         }
2113 }
2114
2115 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2116                              void __user *buffer,
2117                              size_t *lenp, loff_t *ppos)
2118 {
2119         int old_value = *(int *)ctl->data;
2120         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2121         int new_value = *(int *)ctl->data;
2122
2123         if (write) {
2124                 struct ipv4_devconf *cnf = ctl->extra1;
2125                 struct net *net = ctl->extra2;
2126                 int i = (int *)ctl->data - cnf->data;
2127                 int ifindex;
2128
2129                 set_bit(i, cnf->state);
2130
2131                 if (cnf == net->ipv4.devconf_dflt)
2132                         devinet_copy_dflt_conf(net, i);
2133                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2134                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2135                         if ((new_value == 0) && (old_value != 0))
2136                                 rt_cache_flush(net);
2137
2138                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2139                     new_value != old_value) {
2140                         ifindex = devinet_conf_ifindex(net, cnf);
2141                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2142                                                     NETCONFA_RP_FILTER,
2143                                                     ifindex, cnf);
2144                 }
2145                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2146                     new_value != old_value) {
2147                         ifindex = devinet_conf_ifindex(net, cnf);
2148                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2149                                                     NETCONFA_PROXY_NEIGH,
2150                                                     ifindex, cnf);
2151                 }
2152                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2153                     new_value != old_value) {
2154                         ifindex = devinet_conf_ifindex(net, cnf);
2155                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2156                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2157                                                     ifindex, cnf);
2158                 }
2159         }
2160
2161         return ret;
2162 }
2163
2164 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2165                                   void __user *buffer,
2166                                   size_t *lenp, loff_t *ppos)
2167 {
2168         int *valp = ctl->data;
2169         int val = *valp;
2170         loff_t pos = *ppos;
2171         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2172
2173         if (write && *valp != val) {
2174                 struct net *net = ctl->extra2;
2175
2176                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2177                         if (!rtnl_trylock()) {
2178                                 /* Restore the original values before restarting */
2179                                 *valp = val;
2180                                 *ppos = pos;
2181                                 return restart_syscall();
2182                         }
2183                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2184                                 inet_forward_change(net);
2185                         } else {
2186                                 struct ipv4_devconf *cnf = ctl->extra1;
2187                                 struct in_device *idev =
2188                                         container_of(cnf, struct in_device, cnf);
2189                                 if (*valp)
2190                                         dev_disable_lro(idev->dev);
2191                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2192                                                             NETCONFA_FORWARDING,
2193                                                             idev->dev->ifindex,
2194                                                             cnf);
2195                         }
2196                         rtnl_unlock();
2197                         rt_cache_flush(net);
2198                 } else
2199                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2200                                                     NETCONFA_FORWARDING,
2201                                                     NETCONFA_IFINDEX_DEFAULT,
2202                                                     net->ipv4.devconf_dflt);
2203         }
2204
2205         return ret;
2206 }
2207
2208 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2209                                 void __user *buffer,
2210                                 size_t *lenp, loff_t *ppos)
2211 {
2212         int *valp = ctl->data;
2213         int val = *valp;
2214         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2215         struct net *net = ctl->extra2;
2216
2217         if (write && *valp != val)
2218                 rt_cache_flush(net);
2219
2220         return ret;
2221 }
2222
2223 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2224         { \
2225                 .procname       = name, \
2226                 .data           = ipv4_devconf.data + \
2227                                   IPV4_DEVCONF_ ## attr - 1, \
2228                 .maxlen         = sizeof(int), \
2229                 .mode           = mval, \
2230                 .proc_handler   = proc, \
2231                 .extra1         = &ipv4_devconf, \
2232         }
2233
2234 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2235         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2236
2237 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2238         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2239
2240 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2241         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2242
2243 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2244         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2245
2246 static struct devinet_sysctl_table {
2247         struct ctl_table_header *sysctl_header;
2248         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2249 } devinet_sysctl = {
2250         .devinet_vars = {
2251                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2252                                              devinet_sysctl_forward),
2253                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2254
2255                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2256                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2257                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2258                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2259                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2260                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2261                                         "accept_source_route"),
2262                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2263                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2264                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2265                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2266                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2267                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2268                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2269                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2270                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2271                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2272                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2273                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2274                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2275                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2276                                         "force_igmp_version"),
2277                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2278                                         "igmpv2_unsolicited_report_interval"),
2279                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2280                                         "igmpv3_unsolicited_report_interval"),
2281                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2282                                         "ignore_routes_with_linkdown"),
2283                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2284                                         "drop_gratuitous_arp"),
2285
2286                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2287                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2288                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2289                                               "promote_secondaries"),
2290                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2291                                               "route_localnet"),
2292                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2293                                               "drop_unicast_in_l2_multicast"),
2294         },
2295 };
2296
2297 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2298                                      int ifindex, struct ipv4_devconf *p)
2299 {
2300         int i;
2301         struct devinet_sysctl_table *t;
2302         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2303
2304         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2305         if (!t)
2306                 goto out;
2307
2308         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2309                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2310                 t->devinet_vars[i].extra1 = p;
2311                 t->devinet_vars[i].extra2 = net;
2312         }
2313
2314         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2315
2316         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2317         if (!t->sysctl_header)
2318                 goto free;
2319
2320         p->sysctl = t;
2321
2322         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2323                                     ifindex, p);
2324         return 0;
2325
2326 free:
2327         kfree(t);
2328 out:
2329         return -ENOBUFS;
2330 }
2331
2332 static void __devinet_sysctl_unregister(struct net *net,
2333                                         struct ipv4_devconf *cnf, int ifindex)
2334 {
2335         struct devinet_sysctl_table *t = cnf->sysctl;
2336
2337         if (t) {
2338                 cnf->sysctl = NULL;
2339                 unregister_net_sysctl_table(t->sysctl_header);
2340                 kfree(t);
2341         }
2342
2343         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2344 }
2345
2346 static int devinet_sysctl_register(struct in_device *idev)
2347 {
2348         int err;
2349
2350         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2351                 return -EINVAL;
2352
2353         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2354         if (err)
2355                 return err;
2356         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2357                                         idev->dev->ifindex, &idev->cnf);
2358         if (err)
2359                 neigh_sysctl_unregister(idev->arp_parms);
2360         return err;
2361 }
2362
2363 static void devinet_sysctl_unregister(struct in_device *idev)
2364 {
2365         struct net *net = dev_net(idev->dev);
2366
2367         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2368         neigh_sysctl_unregister(idev->arp_parms);
2369 }
2370
2371 static struct ctl_table ctl_forward_entry[] = {
2372         {
2373                 .procname       = "ip_forward",
2374                 .data           = &ipv4_devconf.data[
2375                                         IPV4_DEVCONF_FORWARDING - 1],
2376                 .maxlen         = sizeof(int),
2377                 .mode           = 0644,
2378                 .proc_handler   = devinet_sysctl_forward,
2379                 .extra1         = &ipv4_devconf,
2380                 .extra2         = &init_net,
2381         },
2382         { },
2383 };
2384 #endif
2385
2386 static __net_init int devinet_init_net(struct net *net)
2387 {
2388         int err;
2389         struct ipv4_devconf *all, *dflt;
2390 #ifdef CONFIG_SYSCTL
2391         struct ctl_table *tbl = ctl_forward_entry;
2392         struct ctl_table_header *forw_hdr;
2393 #endif
2394
2395         err = -ENOMEM;
2396         all = &ipv4_devconf;
2397         dflt = &ipv4_devconf_dflt;
2398
2399         if (!net_eq(net, &init_net)) {
2400                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2401                 if (!all)
2402                         goto err_alloc_all;
2403
2404                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2405                 if (!dflt)
2406                         goto err_alloc_dflt;
2407
2408 #ifdef CONFIG_SYSCTL
2409                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2410                 if (!tbl)
2411                         goto err_alloc_ctl;
2412
2413                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2414                 tbl[0].extra1 = all;
2415                 tbl[0].extra2 = net;
2416 #endif
2417         }
2418
2419 #ifdef CONFIG_SYSCTL
2420         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2421         if (err < 0)
2422                 goto err_reg_all;
2423
2424         err = __devinet_sysctl_register(net, "default",
2425                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2426         if (err < 0)
2427                 goto err_reg_dflt;
2428
2429         err = -ENOMEM;
2430         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2431         if (!forw_hdr)
2432                 goto err_reg_ctl;
2433         net->ipv4.forw_hdr = forw_hdr;
2434 #endif
2435
2436         net->ipv4.devconf_all = all;
2437         net->ipv4.devconf_dflt = dflt;
2438         return 0;
2439
2440 #ifdef CONFIG_SYSCTL
2441 err_reg_ctl:
2442         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2443 err_reg_dflt:
2444         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2445 err_reg_all:
2446         if (tbl != ctl_forward_entry)
2447                 kfree(tbl);
2448 err_alloc_ctl:
2449 #endif
2450         if (dflt != &ipv4_devconf_dflt)
2451                 kfree(dflt);
2452 err_alloc_dflt:
2453         if (all != &ipv4_devconf)
2454                 kfree(all);
2455 err_alloc_all:
2456         return err;
2457 }
2458
2459 static __net_exit void devinet_exit_net(struct net *net)
2460 {
2461 #ifdef CONFIG_SYSCTL
2462         struct ctl_table *tbl;
2463
2464         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2465         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2466         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2467                                     NETCONFA_IFINDEX_DEFAULT);
2468         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2469                                     NETCONFA_IFINDEX_ALL);
2470         kfree(tbl);
2471 #endif
2472         kfree(net->ipv4.devconf_dflt);
2473         kfree(net->ipv4.devconf_all);
2474 }
2475
2476 static __net_initdata struct pernet_operations devinet_ops = {
2477         .init = devinet_init_net,
2478         .exit = devinet_exit_net,
2479 };
2480
2481 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2482         .family           = AF_INET,
2483         .fill_link_af     = inet_fill_link_af,
2484         .get_link_af_size = inet_get_link_af_size,
2485         .validate_link_af = inet_validate_link_af,
2486         .set_link_af      = inet_set_link_af,
2487 };
2488
2489 void __init devinet_init(void)
2490 {
2491         int i;
2492
2493         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2494                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2495
2496         register_pernet_subsys(&devinet_ops);
2497
2498         register_gifconf(PF_INET, inet_gifconf);
2499         register_netdevice_notifier(&ip_netdev_notifier);
2500
2501         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2502
2503         rtnl_af_register(&inet_af_ops);
2504
2505         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2506         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2507         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2508         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2509                       inet_netconf_dump_devconf, 0);
2510 }