net/ipv4: Move loop over addresses on a device into in_dev_dump_addr
[muen/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
104 };
105
106 struct inet_fill_args {
107         u32 portid;
108         u32 seq;
109         int event;
110         unsigned int flags;
111         int netnsid;
112 };
113
114 #define IN4_ADDR_HSIZE_SHIFT    8
115 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
116
117 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
118
119 static u32 inet_addr_hash(const struct net *net, __be32 addr)
120 {
121         u32 val = (__force u32) addr ^ net_hash_mix(net);
122
123         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
124 }
125
126 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
127 {
128         u32 hash = inet_addr_hash(net, ifa->ifa_local);
129
130         ASSERT_RTNL();
131         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
132 }
133
134 static void inet_hash_remove(struct in_ifaddr *ifa)
135 {
136         ASSERT_RTNL();
137         hlist_del_init_rcu(&ifa->hash);
138 }
139
140 /**
141  * __ip_dev_find - find the first device with a given source address.
142  * @net: the net namespace
143  * @addr: the source address
144  * @devref: if true, take a reference on the found device
145  *
146  * If a caller uses devref=false, it should be protected by RCU, or RTNL
147  */
148 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
149 {
150         struct net_device *result = NULL;
151         struct in_ifaddr *ifa;
152
153         rcu_read_lock();
154         ifa = inet_lookup_ifaddr_rcu(net, addr);
155         if (!ifa) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         } else {
169                 result = ifa->ifa_dev->dev;
170         }
171         if (result && devref)
172                 dev_hold(result);
173         rcu_read_unlock();
174         return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177
178 /* called under RCU lock */
179 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
180 {
181         u32 hash = inet_addr_hash(net, addr);
182         struct in_ifaddr *ifa;
183
184         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
185                 if (ifa->ifa_local == addr &&
186                     net_eq(dev_net(ifa->ifa_dev->dev), net))
187                         return ifa;
188
189         return NULL;
190 }
191
192 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
193
194 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
196 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
197                          int destroy);
198 #ifdef CONFIG_SYSCTL
199 static int devinet_sysctl_register(struct in_device *idev);
200 static void devinet_sysctl_unregister(struct in_device *idev);
201 #else
202 static int devinet_sysctl_register(struct in_device *idev)
203 {
204         return 0;
205 }
206 static void devinet_sysctl_unregister(struct in_device *idev)
207 {
208 }
209 #endif
210
211 /* Locks all the inet devices. */
212
213 static struct in_ifaddr *inet_alloc_ifa(void)
214 {
215         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
216 }
217
218 static void inet_rcu_free_ifa(struct rcu_head *head)
219 {
220         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
221         if (ifa->ifa_dev)
222                 in_dev_put(ifa->ifa_dev);
223         kfree(ifa);
224 }
225
226 static void inet_free_ifa(struct in_ifaddr *ifa)
227 {
228         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
229 }
230
231 void in_dev_finish_destroy(struct in_device *idev)
232 {
233         struct net_device *dev = idev->dev;
234
235         WARN_ON(idev->ifa_list);
236         WARN_ON(idev->mc_list);
237         kfree(rcu_dereference_protected(idev->mc_hash, 1));
238 #ifdef NET_REFCNT_DEBUG
239         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
240 #endif
241         dev_put(dev);
242         if (!idev->dead)
243                 pr_err("Freeing alive in_device %p\n", idev);
244         else
245                 kfree(idev);
246 }
247 EXPORT_SYMBOL(in_dev_finish_destroy);
248
249 static struct in_device *inetdev_init(struct net_device *dev)
250 {
251         struct in_device *in_dev;
252         int err = -ENOMEM;
253
254         ASSERT_RTNL();
255
256         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
257         if (!in_dev)
258                 goto out;
259         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
260                         sizeof(in_dev->cnf));
261         in_dev->cnf.sysctl = NULL;
262         in_dev->dev = dev;
263         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
264         if (!in_dev->arp_parms)
265                 goto out_kfree;
266         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
267                 dev_disable_lro(dev);
268         /* Reference in_dev->dev */
269         dev_hold(dev);
270         /* Account for reference dev->ip_ptr (below) */
271         refcount_set(&in_dev->refcnt, 1);
272
273         err = devinet_sysctl_register(in_dev);
274         if (err) {
275                 in_dev->dead = 1;
276                 in_dev_put(in_dev);
277                 in_dev = NULL;
278                 goto out;
279         }
280         ip_mc_init_dev(in_dev);
281         if (dev->flags & IFF_UP)
282                 ip_mc_up(in_dev);
283
284         /* we can receive as soon as ip_ptr is set -- do this last */
285         rcu_assign_pointer(dev->ip_ptr, in_dev);
286 out:
287         return in_dev ?: ERR_PTR(err);
288 out_kfree:
289         kfree(in_dev);
290         in_dev = NULL;
291         goto out;
292 }
293
294 static void in_dev_rcu_put(struct rcu_head *head)
295 {
296         struct in_device *idev = container_of(head, struct in_device, rcu_head);
297         in_dev_put(idev);
298 }
299
300 static void inetdev_destroy(struct in_device *in_dev)
301 {
302         struct in_ifaddr *ifa;
303         struct net_device *dev;
304
305         ASSERT_RTNL();
306
307         dev = in_dev->dev;
308
309         in_dev->dead = 1;
310
311         ip_mc_destroy_dev(in_dev);
312
313         while ((ifa = in_dev->ifa_list) != NULL) {
314                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
315                 inet_free_ifa(ifa);
316         }
317
318         RCU_INIT_POINTER(dev->ip_ptr, NULL);
319
320         devinet_sysctl_unregister(in_dev);
321         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
322         arp_ifdown(dev);
323
324         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
325 }
326
327 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
328 {
329         rcu_read_lock();
330         for_primary_ifa(in_dev) {
331                 if (inet_ifa_match(a, ifa)) {
332                         if (!b || inet_ifa_match(b, ifa)) {
333                                 rcu_read_unlock();
334                                 return 1;
335                         }
336                 }
337         } endfor_ifa(in_dev);
338         rcu_read_unlock();
339         return 0;
340 }
341
342 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
343                          int destroy, struct nlmsghdr *nlh, u32 portid)
344 {
345         struct in_ifaddr *promote = NULL;
346         struct in_ifaddr *ifa, *ifa1 = *ifap;
347         struct in_ifaddr *last_prim = in_dev->ifa_list;
348         struct in_ifaddr *prev_prom = NULL;
349         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
350
351         ASSERT_RTNL();
352
353         if (in_dev->dead)
354                 goto no_promotions;
355
356         /* 1. Deleting primary ifaddr forces deletion all secondaries
357          * unless alias promotion is set
358          **/
359
360         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
361                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
362
363                 while ((ifa = *ifap1) != NULL) {
364                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
365                             ifa1->ifa_scope <= ifa->ifa_scope)
366                                 last_prim = ifa;
367
368                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
369                             ifa1->ifa_mask != ifa->ifa_mask ||
370                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
371                                 ifap1 = &ifa->ifa_next;
372                                 prev_prom = ifa;
373                                 continue;
374                         }
375
376                         if (!do_promote) {
377                                 inet_hash_remove(ifa);
378                                 *ifap1 = ifa->ifa_next;
379
380                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
381                                 blocking_notifier_call_chain(&inetaddr_chain,
382                                                 NETDEV_DOWN, ifa);
383                                 inet_free_ifa(ifa);
384                         } else {
385                                 promote = ifa;
386                                 break;
387                         }
388                 }
389         }
390
391         /* On promotion all secondaries from subnet are changing
392          * the primary IP, we must remove all their routes silently
393          * and later to add them back with new prefsrc. Do this
394          * while all addresses are on the device list.
395          */
396         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
397                 if (ifa1->ifa_mask == ifa->ifa_mask &&
398                     inet_ifa_match(ifa1->ifa_address, ifa))
399                         fib_del_ifaddr(ifa, ifa1);
400         }
401
402 no_promotions:
403         /* 2. Unlink it */
404
405         *ifap = ifa1->ifa_next;
406         inet_hash_remove(ifa1);
407
408         /* 3. Announce address deletion */
409
410         /* Send message first, then call notifier.
411            At first sight, FIB update triggered by notifier
412            will refer to already deleted ifaddr, that could confuse
413            netlink listeners. It is not true: look, gated sees
414            that route deleted and if it still thinks that ifaddr
415            is valid, it will try to restore deleted routes... Grr.
416            So that, this order is correct.
417          */
418         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
419         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
420
421         if (promote) {
422                 struct in_ifaddr *next_sec = promote->ifa_next;
423
424                 if (prev_prom) {
425                         prev_prom->ifa_next = promote->ifa_next;
426                         promote->ifa_next = last_prim->ifa_next;
427                         last_prim->ifa_next = promote;
428                 }
429
430                 promote->ifa_flags &= ~IFA_F_SECONDARY;
431                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
432                 blocking_notifier_call_chain(&inetaddr_chain,
433                                 NETDEV_UP, promote);
434                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
435                         if (ifa1->ifa_mask != ifa->ifa_mask ||
436                             !inet_ifa_match(ifa1->ifa_address, ifa))
437                                         continue;
438                         fib_add_ifaddr(ifa);
439                 }
440
441         }
442         if (destroy)
443                 inet_free_ifa(ifa1);
444 }
445
446 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
447                          int destroy)
448 {
449         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
450 }
451
452 static void check_lifetime(struct work_struct *work);
453
454 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
455
456 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
457                              u32 portid, struct netlink_ext_ack *extack)
458 {
459         struct in_device *in_dev = ifa->ifa_dev;
460         struct in_ifaddr *ifa1, **ifap, **last_primary;
461         struct in_validator_info ivi;
462         int ret;
463
464         ASSERT_RTNL();
465
466         if (!ifa->ifa_local) {
467                 inet_free_ifa(ifa);
468                 return 0;
469         }
470
471         ifa->ifa_flags &= ~IFA_F_SECONDARY;
472         last_primary = &in_dev->ifa_list;
473
474         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
475              ifap = &ifa1->ifa_next) {
476                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
477                     ifa->ifa_scope <= ifa1->ifa_scope)
478                         last_primary = &ifa1->ifa_next;
479                 if (ifa1->ifa_mask == ifa->ifa_mask &&
480                     inet_ifa_match(ifa1->ifa_address, ifa)) {
481                         if (ifa1->ifa_local == ifa->ifa_local) {
482                                 inet_free_ifa(ifa);
483                                 return -EEXIST;
484                         }
485                         if (ifa1->ifa_scope != ifa->ifa_scope) {
486                                 inet_free_ifa(ifa);
487                                 return -EINVAL;
488                         }
489                         ifa->ifa_flags |= IFA_F_SECONDARY;
490                 }
491         }
492
493         /* Allow any devices that wish to register ifaddr validtors to weigh
494          * in now, before changes are committed.  The rntl lock is serializing
495          * access here, so the state should not change between a validator call
496          * and a final notify on commit.  This isn't invoked on promotion under
497          * the assumption that validators are checking the address itself, and
498          * not the flags.
499          */
500         ivi.ivi_addr = ifa->ifa_address;
501         ivi.ivi_dev = ifa->ifa_dev;
502         ivi.extack = extack;
503         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
504                                            NETDEV_UP, &ivi);
505         ret = notifier_to_errno(ret);
506         if (ret) {
507                 inet_free_ifa(ifa);
508                 return ret;
509         }
510
511         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
512                 prandom_seed((__force u32) ifa->ifa_local);
513                 ifap = last_primary;
514         }
515
516         ifa->ifa_next = *ifap;
517         *ifap = ifa;
518
519         inet_hash_insert(dev_net(in_dev->dev), ifa);
520
521         cancel_delayed_work(&check_lifetime_work);
522         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
523
524         /* Send message first, then call notifier.
525            Notifier will trigger FIB update, so that
526            listeners of netlink will know about new ifaddr */
527         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
528         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
529
530         return 0;
531 }
532
533 static int inet_insert_ifa(struct in_ifaddr *ifa)
534 {
535         return __inet_insert_ifa(ifa, NULL, 0, NULL);
536 }
537
538 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
539 {
540         struct in_device *in_dev = __in_dev_get_rtnl(dev);
541
542         ASSERT_RTNL();
543
544         if (!in_dev) {
545                 inet_free_ifa(ifa);
546                 return -ENOBUFS;
547         }
548         ipv4_devconf_setall(in_dev);
549         neigh_parms_data_state_setall(in_dev->arp_parms);
550         if (ifa->ifa_dev != in_dev) {
551                 WARN_ON(ifa->ifa_dev);
552                 in_dev_hold(in_dev);
553                 ifa->ifa_dev = in_dev;
554         }
555         if (ipv4_is_loopback(ifa->ifa_local))
556                 ifa->ifa_scope = RT_SCOPE_HOST;
557         return inet_insert_ifa(ifa);
558 }
559
560 /* Caller must hold RCU or RTNL :
561  * We dont take a reference on found in_device
562  */
563 struct in_device *inetdev_by_index(struct net *net, int ifindex)
564 {
565         struct net_device *dev;
566         struct in_device *in_dev = NULL;
567
568         rcu_read_lock();
569         dev = dev_get_by_index_rcu(net, ifindex);
570         if (dev)
571                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
572         rcu_read_unlock();
573         return in_dev;
574 }
575 EXPORT_SYMBOL(inetdev_by_index);
576
577 /* Called only from RTNL semaphored context. No locks. */
578
579 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
580                                     __be32 mask)
581 {
582         ASSERT_RTNL();
583
584         for_primary_ifa(in_dev) {
585                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
586                         return ifa;
587         } endfor_ifa(in_dev);
588         return NULL;
589 }
590
591 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
592 {
593         struct ip_mreqn mreq = {
594                 .imr_multiaddr.s_addr = ifa->ifa_address,
595                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
596         };
597         int ret;
598
599         ASSERT_RTNL();
600
601         lock_sock(sk);
602         if (join)
603                 ret = ip_mc_join_group(sk, &mreq);
604         else
605                 ret = ip_mc_leave_group(sk, &mreq);
606         release_sock(sk);
607
608         return ret;
609 }
610
611 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
612                             struct netlink_ext_ack *extack)
613 {
614         struct net *net = sock_net(skb->sk);
615         struct nlattr *tb[IFA_MAX+1];
616         struct in_device *in_dev;
617         struct ifaddrmsg *ifm;
618         struct in_ifaddr *ifa, **ifap;
619         int err = -EINVAL;
620
621         ASSERT_RTNL();
622
623         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
624                           extack);
625         if (err < 0)
626                 goto errout;
627
628         ifm = nlmsg_data(nlh);
629         in_dev = inetdev_by_index(net, ifm->ifa_index);
630         if (!in_dev) {
631                 err = -ENODEV;
632                 goto errout;
633         }
634
635         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
636              ifap = &ifa->ifa_next) {
637                 if (tb[IFA_LOCAL] &&
638                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
639                         continue;
640
641                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
642                         continue;
643
644                 if (tb[IFA_ADDRESS] &&
645                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
646                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
647                         continue;
648
649                 if (ipv4_is_multicast(ifa->ifa_address))
650                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
651                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
652                 return 0;
653         }
654
655         err = -EADDRNOTAVAIL;
656 errout:
657         return err;
658 }
659
660 #define INFINITY_LIFE_TIME      0xFFFFFFFF
661
662 static void check_lifetime(struct work_struct *work)
663 {
664         unsigned long now, next, next_sec, next_sched;
665         struct in_ifaddr *ifa;
666         struct hlist_node *n;
667         int i;
668
669         now = jiffies;
670         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
671
672         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
673                 bool change_needed = false;
674
675                 rcu_read_lock();
676                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
677                         unsigned long age;
678
679                         if (ifa->ifa_flags & IFA_F_PERMANENT)
680                                 continue;
681
682                         /* We try to batch several events at once. */
683                         age = (now - ifa->ifa_tstamp +
684                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
685
686                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
687                             age >= ifa->ifa_valid_lft) {
688                                 change_needed = true;
689                         } else if (ifa->ifa_preferred_lft ==
690                                    INFINITY_LIFE_TIME) {
691                                 continue;
692                         } else if (age >= ifa->ifa_preferred_lft) {
693                                 if (time_before(ifa->ifa_tstamp +
694                                                 ifa->ifa_valid_lft * HZ, next))
695                                         next = ifa->ifa_tstamp +
696                                                ifa->ifa_valid_lft * HZ;
697
698                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
699                                         change_needed = true;
700                         } else if (time_before(ifa->ifa_tstamp +
701                                                ifa->ifa_preferred_lft * HZ,
702                                                next)) {
703                                 next = ifa->ifa_tstamp +
704                                        ifa->ifa_preferred_lft * HZ;
705                         }
706                 }
707                 rcu_read_unlock();
708                 if (!change_needed)
709                         continue;
710                 rtnl_lock();
711                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
712                         unsigned long age;
713
714                         if (ifa->ifa_flags & IFA_F_PERMANENT)
715                                 continue;
716
717                         /* We try to batch several events at once. */
718                         age = (now - ifa->ifa_tstamp +
719                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720
721                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722                             age >= ifa->ifa_valid_lft) {
723                                 struct in_ifaddr **ifap;
724
725                                 for (ifap = &ifa->ifa_dev->ifa_list;
726                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
727                                         if (*ifap == ifa) {
728                                                 inet_del_ifa(ifa->ifa_dev,
729                                                              ifap, 1);
730                                                 break;
731                                         }
732                                 }
733                         } else if (ifa->ifa_preferred_lft !=
734                                    INFINITY_LIFE_TIME &&
735                                    age >= ifa->ifa_preferred_lft &&
736                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
737                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
738                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
739                         }
740                 }
741                 rtnl_unlock();
742         }
743
744         next_sec = round_jiffies_up(next);
745         next_sched = next;
746
747         /* If rounded timeout is accurate enough, accept it. */
748         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
749                 next_sched = next_sec;
750
751         now = jiffies;
752         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
753         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
754                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
755
756         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
757                         next_sched - now);
758 }
759
760 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
761                              __u32 prefered_lft)
762 {
763         unsigned long timeout;
764
765         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
766
767         timeout = addrconf_timeout_fixup(valid_lft, HZ);
768         if (addrconf_finite_timeout(timeout))
769                 ifa->ifa_valid_lft = timeout;
770         else
771                 ifa->ifa_flags |= IFA_F_PERMANENT;
772
773         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
774         if (addrconf_finite_timeout(timeout)) {
775                 if (timeout == 0)
776                         ifa->ifa_flags |= IFA_F_DEPRECATED;
777                 ifa->ifa_preferred_lft = timeout;
778         }
779         ifa->ifa_tstamp = jiffies;
780         if (!ifa->ifa_cstamp)
781                 ifa->ifa_cstamp = ifa->ifa_tstamp;
782 }
783
784 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
785                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
786                                        struct netlink_ext_ack *extack)
787 {
788         struct nlattr *tb[IFA_MAX+1];
789         struct in_ifaddr *ifa;
790         struct ifaddrmsg *ifm;
791         struct net_device *dev;
792         struct in_device *in_dev;
793         int err;
794
795         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
796                           extack);
797         if (err < 0)
798                 goto errout;
799
800         ifm = nlmsg_data(nlh);
801         err = -EINVAL;
802         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
803                 goto errout;
804
805         dev = __dev_get_by_index(net, ifm->ifa_index);
806         err = -ENODEV;
807         if (!dev)
808                 goto errout;
809
810         in_dev = __in_dev_get_rtnl(dev);
811         err = -ENOBUFS;
812         if (!in_dev)
813                 goto errout;
814
815         ifa = inet_alloc_ifa();
816         if (!ifa)
817                 /*
818                  * A potential indev allocation can be left alive, it stays
819                  * assigned to its device and is destroy with it.
820                  */
821                 goto errout;
822
823         ipv4_devconf_setall(in_dev);
824         neigh_parms_data_state_setall(in_dev->arp_parms);
825         in_dev_hold(in_dev);
826
827         if (!tb[IFA_ADDRESS])
828                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
829
830         INIT_HLIST_NODE(&ifa->hash);
831         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
832         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
833         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
834                                          ifm->ifa_flags;
835         ifa->ifa_scope = ifm->ifa_scope;
836         ifa->ifa_dev = in_dev;
837
838         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
839         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
840
841         if (tb[IFA_BROADCAST])
842                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
843
844         if (tb[IFA_LABEL])
845                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
846         else
847                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
848
849         if (tb[IFA_RT_PRIORITY])
850                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
851
852         if (tb[IFA_CACHEINFO]) {
853                 struct ifa_cacheinfo *ci;
854
855                 ci = nla_data(tb[IFA_CACHEINFO]);
856                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
857                         err = -EINVAL;
858                         goto errout_free;
859                 }
860                 *pvalid_lft = ci->ifa_valid;
861                 *pprefered_lft = ci->ifa_prefered;
862         }
863
864         return ifa;
865
866 errout_free:
867         inet_free_ifa(ifa);
868 errout:
869         return ERR_PTR(err);
870 }
871
872 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
873 {
874         struct in_device *in_dev = ifa->ifa_dev;
875         struct in_ifaddr *ifa1, **ifap;
876
877         if (!ifa->ifa_local)
878                 return NULL;
879
880         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
881              ifap = &ifa1->ifa_next) {
882                 if (ifa1->ifa_mask == ifa->ifa_mask &&
883                     inet_ifa_match(ifa1->ifa_address, ifa) &&
884                     ifa1->ifa_local == ifa->ifa_local)
885                         return ifa1;
886         }
887         return NULL;
888 }
889
890 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
891                             struct netlink_ext_ack *extack)
892 {
893         struct net *net = sock_net(skb->sk);
894         struct in_ifaddr *ifa;
895         struct in_ifaddr *ifa_existing;
896         __u32 valid_lft = INFINITY_LIFE_TIME;
897         __u32 prefered_lft = INFINITY_LIFE_TIME;
898
899         ASSERT_RTNL();
900
901         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
902         if (IS_ERR(ifa))
903                 return PTR_ERR(ifa);
904
905         ifa_existing = find_matching_ifa(ifa);
906         if (!ifa_existing) {
907                 /* It would be best to check for !NLM_F_CREATE here but
908                  * userspace already relies on not having to provide this.
909                  */
910                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
911                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
912                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
913                                                true, ifa);
914
915                         if (ret < 0) {
916                                 inet_free_ifa(ifa);
917                                 return ret;
918                         }
919                 }
920                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
921                                          extack);
922         } else {
923                 u32 new_metric = ifa->ifa_rt_priority;
924
925                 inet_free_ifa(ifa);
926
927                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
928                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
929                         return -EEXIST;
930                 ifa = ifa_existing;
931
932                 if (ifa->ifa_rt_priority != new_metric) {
933                         fib_modify_prefix_metric(ifa, new_metric);
934                         ifa->ifa_rt_priority = new_metric;
935                 }
936
937                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
938                 cancel_delayed_work(&check_lifetime_work);
939                 queue_delayed_work(system_power_efficient_wq,
940                                 &check_lifetime_work, 0);
941                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
942         }
943         return 0;
944 }
945
946 /*
947  *      Determine a default network mask, based on the IP address.
948  */
949
950 static int inet_abc_len(__be32 addr)
951 {
952         int rc = -1;    /* Something else, probably a multicast. */
953
954         if (ipv4_is_zeronet(addr))
955                 rc = 0;
956         else {
957                 __u32 haddr = ntohl(addr);
958
959                 if (IN_CLASSA(haddr))
960                         rc = 8;
961                 else if (IN_CLASSB(haddr))
962                         rc = 16;
963                 else if (IN_CLASSC(haddr))
964                         rc = 24;
965         }
966
967         return rc;
968 }
969
970
971 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
972 {
973         struct sockaddr_in sin_orig;
974         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
975         struct in_device *in_dev;
976         struct in_ifaddr **ifap = NULL;
977         struct in_ifaddr *ifa = NULL;
978         struct net_device *dev;
979         char *colon;
980         int ret = -EFAULT;
981         int tryaddrmatch = 0;
982
983         ifr->ifr_name[IFNAMSIZ - 1] = 0;
984
985         /* save original address for comparison */
986         memcpy(&sin_orig, sin, sizeof(*sin));
987
988         colon = strchr(ifr->ifr_name, ':');
989         if (colon)
990                 *colon = 0;
991
992         dev_load(net, ifr->ifr_name);
993
994         switch (cmd) {
995         case SIOCGIFADDR:       /* Get interface address */
996         case SIOCGIFBRDADDR:    /* Get the broadcast address */
997         case SIOCGIFDSTADDR:    /* Get the destination address */
998         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
999                 /* Note that these ioctls will not sleep,
1000                    so that we do not impose a lock.
1001                    One day we will be forced to put shlock here (I mean SMP)
1002                  */
1003                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1004                 memset(sin, 0, sizeof(*sin));
1005                 sin->sin_family = AF_INET;
1006                 break;
1007
1008         case SIOCSIFFLAGS:
1009                 ret = -EPERM;
1010                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1011                         goto out;
1012                 break;
1013         case SIOCSIFADDR:       /* Set interface address (and family) */
1014         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1015         case SIOCSIFDSTADDR:    /* Set the destination address */
1016         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1017                 ret = -EPERM;
1018                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1019                         goto out;
1020                 ret = -EINVAL;
1021                 if (sin->sin_family != AF_INET)
1022                         goto out;
1023                 break;
1024         default:
1025                 ret = -EINVAL;
1026                 goto out;
1027         }
1028
1029         rtnl_lock();
1030
1031         ret = -ENODEV;
1032         dev = __dev_get_by_name(net, ifr->ifr_name);
1033         if (!dev)
1034                 goto done;
1035
1036         if (colon)
1037                 *colon = ':';
1038
1039         in_dev = __in_dev_get_rtnl(dev);
1040         if (in_dev) {
1041                 if (tryaddrmatch) {
1042                         /* Matthias Andree */
1043                         /* compare label and address (4.4BSD style) */
1044                         /* note: we only do this for a limited set of ioctls
1045                            and only if the original address family was AF_INET.
1046                            This is checked above. */
1047                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1048                              ifap = &ifa->ifa_next) {
1049                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1050                                     sin_orig.sin_addr.s_addr ==
1051                                                         ifa->ifa_local) {
1052                                         break; /* found */
1053                                 }
1054                         }
1055                 }
1056                 /* we didn't get a match, maybe the application is
1057                    4.3BSD-style and passed in junk so we fall back to
1058                    comparing just the label */
1059                 if (!ifa) {
1060                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1061                              ifap = &ifa->ifa_next)
1062                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1063                                         break;
1064                 }
1065         }
1066
1067         ret = -EADDRNOTAVAIL;
1068         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1069                 goto done;
1070
1071         switch (cmd) {
1072         case SIOCGIFADDR:       /* Get interface address */
1073                 ret = 0;
1074                 sin->sin_addr.s_addr = ifa->ifa_local;
1075                 break;
1076
1077         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1078                 ret = 0;
1079                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1080                 break;
1081
1082         case SIOCGIFDSTADDR:    /* Get the destination address */
1083                 ret = 0;
1084                 sin->sin_addr.s_addr = ifa->ifa_address;
1085                 break;
1086
1087         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1088                 ret = 0;
1089                 sin->sin_addr.s_addr = ifa->ifa_mask;
1090                 break;
1091
1092         case SIOCSIFFLAGS:
1093                 if (colon) {
1094                         ret = -EADDRNOTAVAIL;
1095                         if (!ifa)
1096                                 break;
1097                         ret = 0;
1098                         if (!(ifr->ifr_flags & IFF_UP))
1099                                 inet_del_ifa(in_dev, ifap, 1);
1100                         break;
1101                 }
1102                 ret = dev_change_flags(dev, ifr->ifr_flags);
1103                 break;
1104
1105         case SIOCSIFADDR:       /* Set interface address (and family) */
1106                 ret = -EINVAL;
1107                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1108                         break;
1109
1110                 if (!ifa) {
1111                         ret = -ENOBUFS;
1112                         ifa = inet_alloc_ifa();
1113                         if (!ifa)
1114                                 break;
1115                         INIT_HLIST_NODE(&ifa->hash);
1116                         if (colon)
1117                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1118                         else
1119                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1120                 } else {
1121                         ret = 0;
1122                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1123                                 break;
1124                         inet_del_ifa(in_dev, ifap, 0);
1125                         ifa->ifa_broadcast = 0;
1126                         ifa->ifa_scope = 0;
1127                 }
1128
1129                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1130
1131                 if (!(dev->flags & IFF_POINTOPOINT)) {
1132                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1133                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1134                         if ((dev->flags & IFF_BROADCAST) &&
1135                             ifa->ifa_prefixlen < 31)
1136                                 ifa->ifa_broadcast = ifa->ifa_address |
1137                                                      ~ifa->ifa_mask;
1138                 } else {
1139                         ifa->ifa_prefixlen = 32;
1140                         ifa->ifa_mask = inet_make_mask(32);
1141                 }
1142                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1143                 ret = inet_set_ifa(dev, ifa);
1144                 break;
1145
1146         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1147                 ret = 0;
1148                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1149                         inet_del_ifa(in_dev, ifap, 0);
1150                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1151                         inet_insert_ifa(ifa);
1152                 }
1153                 break;
1154
1155         case SIOCSIFDSTADDR:    /* Set the destination address */
1156                 ret = 0;
1157                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1158                         break;
1159                 ret = -EINVAL;
1160                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1161                         break;
1162                 ret = 0;
1163                 inet_del_ifa(in_dev, ifap, 0);
1164                 ifa->ifa_address = sin->sin_addr.s_addr;
1165                 inet_insert_ifa(ifa);
1166                 break;
1167
1168         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1169
1170                 /*
1171                  *      The mask we set must be legal.
1172                  */
1173                 ret = -EINVAL;
1174                 if (bad_mask(sin->sin_addr.s_addr, 0))
1175                         break;
1176                 ret = 0;
1177                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1178                         __be32 old_mask = ifa->ifa_mask;
1179                         inet_del_ifa(in_dev, ifap, 0);
1180                         ifa->ifa_mask = sin->sin_addr.s_addr;
1181                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1182
1183                         /* See if current broadcast address matches
1184                          * with current netmask, then recalculate
1185                          * the broadcast address. Otherwise it's a
1186                          * funny address, so don't touch it since
1187                          * the user seems to know what (s)he's doing...
1188                          */
1189                         if ((dev->flags & IFF_BROADCAST) &&
1190                             (ifa->ifa_prefixlen < 31) &&
1191                             (ifa->ifa_broadcast ==
1192                              (ifa->ifa_local|~old_mask))) {
1193                                 ifa->ifa_broadcast = (ifa->ifa_local |
1194                                                       ~sin->sin_addr.s_addr);
1195                         }
1196                         inet_insert_ifa(ifa);
1197                 }
1198                 break;
1199         }
1200 done:
1201         rtnl_unlock();
1202 out:
1203         return ret;
1204 }
1205
1206 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1207 {
1208         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1209         struct in_ifaddr *ifa;
1210         struct ifreq ifr;
1211         int done = 0;
1212
1213         if (WARN_ON(size > sizeof(struct ifreq)))
1214                 goto out;
1215
1216         if (!in_dev)
1217                 goto out;
1218
1219         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1220                 if (!buf) {
1221                         done += size;
1222                         continue;
1223                 }
1224                 if (len < size)
1225                         break;
1226                 memset(&ifr, 0, sizeof(struct ifreq));
1227                 strcpy(ifr.ifr_name, ifa->ifa_label);
1228
1229                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1230                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1231                                                                 ifa->ifa_local;
1232
1233                 if (copy_to_user(buf + done, &ifr, size)) {
1234                         done = -EFAULT;
1235                         break;
1236                 }
1237                 len  -= size;
1238                 done += size;
1239         }
1240 out:
1241         return done;
1242 }
1243
1244 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1245                                  int scope)
1246 {
1247         for_primary_ifa(in_dev) {
1248                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1249                     ifa->ifa_scope <= scope)
1250                         return ifa->ifa_local;
1251         } endfor_ifa(in_dev);
1252
1253         return 0;
1254 }
1255
1256 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1257 {
1258         __be32 addr = 0;
1259         struct in_device *in_dev;
1260         struct net *net = dev_net(dev);
1261         int master_idx;
1262
1263         rcu_read_lock();
1264         in_dev = __in_dev_get_rcu(dev);
1265         if (!in_dev)
1266                 goto no_in_dev;
1267
1268         for_primary_ifa(in_dev) {
1269                 if (ifa->ifa_scope > scope)
1270                         continue;
1271                 if (!dst || inet_ifa_match(dst, ifa)) {
1272                         addr = ifa->ifa_local;
1273                         break;
1274                 }
1275                 if (!addr)
1276                         addr = ifa->ifa_local;
1277         } endfor_ifa(in_dev);
1278
1279         if (addr)
1280                 goto out_unlock;
1281 no_in_dev:
1282         master_idx = l3mdev_master_ifindex_rcu(dev);
1283
1284         /* For VRFs, the VRF device takes the place of the loopback device,
1285          * with addresses on it being preferred.  Note in such cases the
1286          * loopback device will be among the devices that fail the master_idx
1287          * equality check in the loop below.
1288          */
1289         if (master_idx &&
1290             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1291             (in_dev = __in_dev_get_rcu(dev))) {
1292                 addr = in_dev_select_addr(in_dev, scope);
1293                 if (addr)
1294                         goto out_unlock;
1295         }
1296
1297         /* Not loopback addresses on loopback should be preferred
1298            in this case. It is important that lo is the first interface
1299            in dev_base list.
1300          */
1301         for_each_netdev_rcu(net, dev) {
1302                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1303                         continue;
1304
1305                 in_dev = __in_dev_get_rcu(dev);
1306                 if (!in_dev)
1307                         continue;
1308
1309                 addr = in_dev_select_addr(in_dev, scope);
1310                 if (addr)
1311                         goto out_unlock;
1312         }
1313 out_unlock:
1314         rcu_read_unlock();
1315         return addr;
1316 }
1317 EXPORT_SYMBOL(inet_select_addr);
1318
1319 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1320                               __be32 local, int scope)
1321 {
1322         int same = 0;
1323         __be32 addr = 0;
1324
1325         for_ifa(in_dev) {
1326                 if (!addr &&
1327                     (local == ifa->ifa_local || !local) &&
1328                     ifa->ifa_scope <= scope) {
1329                         addr = ifa->ifa_local;
1330                         if (same)
1331                                 break;
1332                 }
1333                 if (!same) {
1334                         same = (!local || inet_ifa_match(local, ifa)) &&
1335                                 (!dst || inet_ifa_match(dst, ifa));
1336                         if (same && addr) {
1337                                 if (local || !dst)
1338                                         break;
1339                                 /* Is the selected addr into dst subnet? */
1340                                 if (inet_ifa_match(addr, ifa))
1341                                         break;
1342                                 /* No, then can we use new local src? */
1343                                 if (ifa->ifa_scope <= scope) {
1344                                         addr = ifa->ifa_local;
1345                                         break;
1346                                 }
1347                                 /* search for large dst subnet for addr */
1348                                 same = 0;
1349                         }
1350                 }
1351         } endfor_ifa(in_dev);
1352
1353         return same ? addr : 0;
1354 }
1355
1356 /*
1357  * Confirm that local IP address exists using wildcards:
1358  * - net: netns to check, cannot be NULL
1359  * - in_dev: only on this interface, NULL=any interface
1360  * - dst: only in the same subnet as dst, 0=any dst
1361  * - local: address, 0=autoselect the local address
1362  * - scope: maximum allowed scope value for the local address
1363  */
1364 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1365                          __be32 dst, __be32 local, int scope)
1366 {
1367         __be32 addr = 0;
1368         struct net_device *dev;
1369
1370         if (in_dev)
1371                 return confirm_addr_indev(in_dev, dst, local, scope);
1372
1373         rcu_read_lock();
1374         for_each_netdev_rcu(net, dev) {
1375                 in_dev = __in_dev_get_rcu(dev);
1376                 if (in_dev) {
1377                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1378                         if (addr)
1379                                 break;
1380                 }
1381         }
1382         rcu_read_unlock();
1383
1384         return addr;
1385 }
1386 EXPORT_SYMBOL(inet_confirm_addr);
1387
1388 /*
1389  *      Device notifier
1390  */
1391
1392 int register_inetaddr_notifier(struct notifier_block *nb)
1393 {
1394         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1395 }
1396 EXPORT_SYMBOL(register_inetaddr_notifier);
1397
1398 int unregister_inetaddr_notifier(struct notifier_block *nb)
1399 {
1400         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1401 }
1402 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1403
1404 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1405 {
1406         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1407 }
1408 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1409
1410 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1411 {
1412         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1413             nb);
1414 }
1415 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1416
1417 /* Rename ifa_labels for a device name change. Make some effort to preserve
1418  * existing alias numbering and to create unique labels if possible.
1419 */
1420 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1421 {
1422         struct in_ifaddr *ifa;
1423         int named = 0;
1424
1425         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1426                 char old[IFNAMSIZ], *dot;
1427
1428                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1429                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1430                 if (named++ == 0)
1431                         goto skip;
1432                 dot = strchr(old, ':');
1433                 if (!dot) {
1434                         sprintf(old, ":%d", named);
1435                         dot = old;
1436                 }
1437                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1438                         strcat(ifa->ifa_label, dot);
1439                 else
1440                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1441 skip:
1442                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1443         }
1444 }
1445
1446 static bool inetdev_valid_mtu(unsigned int mtu)
1447 {
1448         return mtu >= IPV4_MIN_MTU;
1449 }
1450
1451 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1452                                         struct in_device *in_dev)
1453
1454 {
1455         struct in_ifaddr *ifa;
1456
1457         for (ifa = in_dev->ifa_list; ifa;
1458              ifa = ifa->ifa_next) {
1459                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1460                          ifa->ifa_local, dev,
1461                          ifa->ifa_local, NULL,
1462                          dev->dev_addr, NULL);
1463         }
1464 }
1465
1466 /* Called only under RTNL semaphore */
1467
1468 static int inetdev_event(struct notifier_block *this, unsigned long event,
1469                          void *ptr)
1470 {
1471         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1472         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1473
1474         ASSERT_RTNL();
1475
1476         if (!in_dev) {
1477                 if (event == NETDEV_REGISTER) {
1478                         in_dev = inetdev_init(dev);
1479                         if (IS_ERR(in_dev))
1480                                 return notifier_from_errno(PTR_ERR(in_dev));
1481                         if (dev->flags & IFF_LOOPBACK) {
1482                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1483                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1484                         }
1485                 } else if (event == NETDEV_CHANGEMTU) {
1486                         /* Re-enabling IP */
1487                         if (inetdev_valid_mtu(dev->mtu))
1488                                 in_dev = inetdev_init(dev);
1489                 }
1490                 goto out;
1491         }
1492
1493         switch (event) {
1494         case NETDEV_REGISTER:
1495                 pr_debug("%s: bug\n", __func__);
1496                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1497                 break;
1498         case NETDEV_UP:
1499                 if (!inetdev_valid_mtu(dev->mtu))
1500                         break;
1501                 if (dev->flags & IFF_LOOPBACK) {
1502                         struct in_ifaddr *ifa = inet_alloc_ifa();
1503
1504                         if (ifa) {
1505                                 INIT_HLIST_NODE(&ifa->hash);
1506                                 ifa->ifa_local =
1507                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1508                                 ifa->ifa_prefixlen = 8;
1509                                 ifa->ifa_mask = inet_make_mask(8);
1510                                 in_dev_hold(in_dev);
1511                                 ifa->ifa_dev = in_dev;
1512                                 ifa->ifa_scope = RT_SCOPE_HOST;
1513                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1514                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1515                                                  INFINITY_LIFE_TIME);
1516                                 ipv4_devconf_setall(in_dev);
1517                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1518                                 inet_insert_ifa(ifa);
1519                         }
1520                 }
1521                 ip_mc_up(in_dev);
1522                 /* fall through */
1523         case NETDEV_CHANGEADDR:
1524                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1525                         break;
1526                 /* fall through */
1527         case NETDEV_NOTIFY_PEERS:
1528                 /* Send gratuitous ARP to notify of link change */
1529                 inetdev_send_gratuitous_arp(dev, in_dev);
1530                 break;
1531         case NETDEV_DOWN:
1532                 ip_mc_down(in_dev);
1533                 break;
1534         case NETDEV_PRE_TYPE_CHANGE:
1535                 ip_mc_unmap(in_dev);
1536                 break;
1537         case NETDEV_POST_TYPE_CHANGE:
1538                 ip_mc_remap(in_dev);
1539                 break;
1540         case NETDEV_CHANGEMTU:
1541                 if (inetdev_valid_mtu(dev->mtu))
1542                         break;
1543                 /* disable IP when MTU is not enough */
1544                 /* fall through */
1545         case NETDEV_UNREGISTER:
1546                 inetdev_destroy(in_dev);
1547                 break;
1548         case NETDEV_CHANGENAME:
1549                 /* Do not notify about label change, this event is
1550                  * not interesting to applications using netlink.
1551                  */
1552                 inetdev_changename(dev, in_dev);
1553
1554                 devinet_sysctl_unregister(in_dev);
1555                 devinet_sysctl_register(in_dev);
1556                 break;
1557         }
1558 out:
1559         return NOTIFY_DONE;
1560 }
1561
1562 static struct notifier_block ip_netdev_notifier = {
1563         .notifier_call = inetdev_event,
1564 };
1565
1566 static size_t inet_nlmsg_size(void)
1567 {
1568         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1569                + nla_total_size(4) /* IFA_ADDRESS */
1570                + nla_total_size(4) /* IFA_LOCAL */
1571                + nla_total_size(4) /* IFA_BROADCAST */
1572                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1573                + nla_total_size(4)  /* IFA_FLAGS */
1574                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1575                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1576 }
1577
1578 static inline u32 cstamp_delta(unsigned long cstamp)
1579 {
1580         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1581 }
1582
1583 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1584                          unsigned long tstamp, u32 preferred, u32 valid)
1585 {
1586         struct ifa_cacheinfo ci;
1587
1588         ci.cstamp = cstamp_delta(cstamp);
1589         ci.tstamp = cstamp_delta(tstamp);
1590         ci.ifa_prefered = preferred;
1591         ci.ifa_valid = valid;
1592
1593         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1594 }
1595
1596 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1597                             struct inet_fill_args *args)
1598 {
1599         struct ifaddrmsg *ifm;
1600         struct nlmsghdr  *nlh;
1601         u32 preferred, valid;
1602
1603         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1604                         args->flags);
1605         if (!nlh)
1606                 return -EMSGSIZE;
1607
1608         ifm = nlmsg_data(nlh);
1609         ifm->ifa_family = AF_INET;
1610         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1611         ifm->ifa_flags = ifa->ifa_flags;
1612         ifm->ifa_scope = ifa->ifa_scope;
1613         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1614
1615         if (args->netnsid >= 0 &&
1616             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1617                 goto nla_put_failure;
1618
1619         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1620                 preferred = ifa->ifa_preferred_lft;
1621                 valid = ifa->ifa_valid_lft;
1622                 if (preferred != INFINITY_LIFE_TIME) {
1623                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1624
1625                         if (preferred > tval)
1626                                 preferred -= tval;
1627                         else
1628                                 preferred = 0;
1629                         if (valid != INFINITY_LIFE_TIME) {
1630                                 if (valid > tval)
1631                                         valid -= tval;
1632                                 else
1633                                         valid = 0;
1634                         }
1635                 }
1636         } else {
1637                 preferred = INFINITY_LIFE_TIME;
1638                 valid = INFINITY_LIFE_TIME;
1639         }
1640         if ((ifa->ifa_address &&
1641              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1642             (ifa->ifa_local &&
1643              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1644             (ifa->ifa_broadcast &&
1645              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1646             (ifa->ifa_label[0] &&
1647              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1648             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1649             (ifa->ifa_rt_priority &&
1650              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1651             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1652                           preferred, valid))
1653                 goto nla_put_failure;
1654
1655         nlmsg_end(skb, nlh);
1656         return 0;
1657
1658 nla_put_failure:
1659         nlmsg_cancel(skb, nlh);
1660         return -EMSGSIZE;
1661 }
1662
1663 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1664                                       struct inet_fill_args *fillargs,
1665                                       struct net **tgt_net, struct sock *sk,
1666                                       struct netlink_ext_ack *extack)
1667 {
1668         struct nlattr *tb[IFA_MAX+1];
1669         struct ifaddrmsg *ifm;
1670         int err, i;
1671
1672         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1673                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1674                 return -EINVAL;
1675         }
1676
1677         ifm = nlmsg_data(nlh);
1678         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1679                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1680                 return -EINVAL;
1681         }
1682         if (ifm->ifa_index) {
1683                 NL_SET_ERR_MSG(extack, "ipv4: Filter by device index not supported for address dump");
1684                 return -EINVAL;
1685         }
1686
1687         err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1688                                  ifa_ipv4_policy, extack);
1689         if (err < 0)
1690                 return err;
1691
1692         for (i = 0; i <= IFA_MAX; ++i) {
1693                 if (!tb[i])
1694                         continue;
1695
1696                 if (i == IFA_TARGET_NETNSID) {
1697                         struct net *net;
1698
1699                         fillargs->netnsid = nla_get_s32(tb[i]);
1700
1701                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1702                         if (IS_ERR(net)) {
1703                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1704                                 return PTR_ERR(net);
1705                         }
1706                         *tgt_net = net;
1707                 } else {
1708                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1709                         return -EINVAL;
1710                 }
1711         }
1712
1713         return 0;
1714 }
1715
1716 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1717                             struct netlink_callback *cb, int s_ip_idx,
1718                             struct inet_fill_args *fillargs)
1719 {
1720         struct in_ifaddr *ifa;
1721         int ip_idx = 0;
1722         int err;
1723
1724         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1725                 if (ip_idx < s_ip_idx)
1726                         continue;
1727
1728                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1729                 if (err < 0)
1730                         goto done;
1731
1732                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1733         }
1734         err = 0;
1735
1736 done:
1737         cb->args[2] = ip_idx;
1738
1739         return err;
1740 }
1741
1742 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1743 {
1744         const struct nlmsghdr *nlh = cb->nlh;
1745         struct inet_fill_args fillargs = {
1746                 .portid = NETLINK_CB(cb->skb).portid,
1747                 .seq = nlh->nlmsg_seq,
1748                 .event = RTM_NEWADDR,
1749                 .flags = NLM_F_MULTI,
1750                 .netnsid = -1,
1751         };
1752         struct net *net = sock_net(skb->sk);
1753         struct net *tgt_net = net;
1754         int h, s_h;
1755         int idx, s_idx;
1756         int s_ip_idx;
1757         struct net_device *dev;
1758         struct in_device *in_dev;
1759         struct hlist_head *head;
1760         int err;
1761
1762         s_h = cb->args[0];
1763         s_idx = idx = cb->args[1];
1764         s_ip_idx = cb->args[2];
1765
1766         if (cb->strict_check) {
1767                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1768                                                  skb->sk, cb->extack);
1769                 if (err < 0)
1770                         return err;
1771         }
1772
1773         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1774                 idx = 0;
1775                 head = &tgt_net->dev_index_head[h];
1776                 rcu_read_lock();
1777                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1778                           tgt_net->dev_base_seq;
1779                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1780                         if (idx < s_idx)
1781                                 goto cont;
1782                         if (h > s_h || idx > s_idx)
1783                                 s_ip_idx = 0;
1784                         in_dev = __in_dev_get_rcu(dev);
1785                         if (!in_dev)
1786                                 goto cont;
1787
1788                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1789                                                &fillargs);
1790                         if (err < 0) {
1791                                 rcu_read_unlock();
1792                                 goto done;
1793                         }
1794 cont:
1795                         idx++;
1796                 }
1797                 rcu_read_unlock();
1798         }
1799
1800 done:
1801         cb->args[0] = h;
1802         cb->args[1] = idx;
1803         if (fillargs.netnsid >= 0)
1804                 put_net(tgt_net);
1805
1806         return skb->len;
1807 }
1808
1809 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1810                       u32 portid)
1811 {
1812         struct inet_fill_args fillargs = {
1813                 .portid = portid,
1814                 .seq = nlh ? nlh->nlmsg_seq : 0,
1815                 .event = event,
1816                 .flags = 0,
1817                 .netnsid = -1,
1818         };
1819         struct sk_buff *skb;
1820         int err = -ENOBUFS;
1821         struct net *net;
1822
1823         net = dev_net(ifa->ifa_dev->dev);
1824         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1825         if (!skb)
1826                 goto errout;
1827
1828         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1829         if (err < 0) {
1830                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1831                 WARN_ON(err == -EMSGSIZE);
1832                 kfree_skb(skb);
1833                 goto errout;
1834         }
1835         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1836         return;
1837 errout:
1838         if (err < 0)
1839                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1840 }
1841
1842 static size_t inet_get_link_af_size(const struct net_device *dev,
1843                                     u32 ext_filter_mask)
1844 {
1845         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1846
1847         if (!in_dev)
1848                 return 0;
1849
1850         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1851 }
1852
1853 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1854                              u32 ext_filter_mask)
1855 {
1856         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1857         struct nlattr *nla;
1858         int i;
1859
1860         if (!in_dev)
1861                 return -ENODATA;
1862
1863         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1864         if (!nla)
1865                 return -EMSGSIZE;
1866
1867         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1868                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1869
1870         return 0;
1871 }
1872
1873 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1874         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1875 };
1876
1877 static int inet_validate_link_af(const struct net_device *dev,
1878                                  const struct nlattr *nla)
1879 {
1880         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1881         int err, rem;
1882
1883         if (dev && !__in_dev_get_rcu(dev))
1884                 return -EAFNOSUPPORT;
1885
1886         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1887         if (err < 0)
1888                 return err;
1889
1890         if (tb[IFLA_INET_CONF]) {
1891                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1892                         int cfgid = nla_type(a);
1893
1894                         if (nla_len(a) < 4)
1895                                 return -EINVAL;
1896
1897                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1898                                 return -EINVAL;
1899                 }
1900         }
1901
1902         return 0;
1903 }
1904
1905 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1906 {
1907         struct in_device *in_dev = __in_dev_get_rcu(dev);
1908         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1909         int rem;
1910
1911         if (!in_dev)
1912                 return -EAFNOSUPPORT;
1913
1914         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1915                 BUG();
1916
1917         if (tb[IFLA_INET_CONF]) {
1918                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1919                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1920         }
1921
1922         return 0;
1923 }
1924
1925 static int inet_netconf_msgsize_devconf(int type)
1926 {
1927         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1928                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1929         bool all = false;
1930
1931         if (type == NETCONFA_ALL)
1932                 all = true;
1933
1934         if (all || type == NETCONFA_FORWARDING)
1935                 size += nla_total_size(4);
1936         if (all || type == NETCONFA_RP_FILTER)
1937                 size += nla_total_size(4);
1938         if (all || type == NETCONFA_MC_FORWARDING)
1939                 size += nla_total_size(4);
1940         if (all || type == NETCONFA_BC_FORWARDING)
1941                 size += nla_total_size(4);
1942         if (all || type == NETCONFA_PROXY_NEIGH)
1943                 size += nla_total_size(4);
1944         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1945                 size += nla_total_size(4);
1946
1947         return size;
1948 }
1949
1950 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1951                                      struct ipv4_devconf *devconf, u32 portid,
1952                                      u32 seq, int event, unsigned int flags,
1953                                      int type)
1954 {
1955         struct nlmsghdr  *nlh;
1956         struct netconfmsg *ncm;
1957         bool all = false;
1958
1959         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1960                         flags);
1961         if (!nlh)
1962                 return -EMSGSIZE;
1963
1964         if (type == NETCONFA_ALL)
1965                 all = true;
1966
1967         ncm = nlmsg_data(nlh);
1968         ncm->ncm_family = AF_INET;
1969
1970         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1971                 goto nla_put_failure;
1972
1973         if (!devconf)
1974                 goto out;
1975
1976         if ((all || type == NETCONFA_FORWARDING) &&
1977             nla_put_s32(skb, NETCONFA_FORWARDING,
1978                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1979                 goto nla_put_failure;
1980         if ((all || type == NETCONFA_RP_FILTER) &&
1981             nla_put_s32(skb, NETCONFA_RP_FILTER,
1982                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1983                 goto nla_put_failure;
1984         if ((all || type == NETCONFA_MC_FORWARDING) &&
1985             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1986                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1987                 goto nla_put_failure;
1988         if ((all || type == NETCONFA_BC_FORWARDING) &&
1989             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1990                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1991                 goto nla_put_failure;
1992         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1993             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1994                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1995                 goto nla_put_failure;
1996         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1997             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1998                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1999                 goto nla_put_failure;
2000
2001 out:
2002         nlmsg_end(skb, nlh);
2003         return 0;
2004
2005 nla_put_failure:
2006         nlmsg_cancel(skb, nlh);
2007         return -EMSGSIZE;
2008 }
2009
2010 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2011                                  int ifindex, struct ipv4_devconf *devconf)
2012 {
2013         struct sk_buff *skb;
2014         int err = -ENOBUFS;
2015
2016         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2017         if (!skb)
2018                 goto errout;
2019
2020         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2021                                         event, 0, type);
2022         if (err < 0) {
2023                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2024                 WARN_ON(err == -EMSGSIZE);
2025                 kfree_skb(skb);
2026                 goto errout;
2027         }
2028         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2029         return;
2030 errout:
2031         if (err < 0)
2032                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2033 }
2034
2035 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2036         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2037         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2038         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2039         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2040         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2041 };
2042
2043 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2044                                     struct nlmsghdr *nlh,
2045                                     struct netlink_ext_ack *extack)
2046 {
2047         struct net *net = sock_net(in_skb->sk);
2048         struct nlattr *tb[NETCONFA_MAX+1];
2049         struct netconfmsg *ncm;
2050         struct sk_buff *skb;
2051         struct ipv4_devconf *devconf;
2052         struct in_device *in_dev;
2053         struct net_device *dev;
2054         int ifindex;
2055         int err;
2056
2057         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2058                           devconf_ipv4_policy, extack);
2059         if (err < 0)
2060                 goto errout;
2061
2062         err = -EINVAL;
2063         if (!tb[NETCONFA_IFINDEX])
2064                 goto errout;
2065
2066         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2067         switch (ifindex) {
2068         case NETCONFA_IFINDEX_ALL:
2069                 devconf = net->ipv4.devconf_all;
2070                 break;
2071         case NETCONFA_IFINDEX_DEFAULT:
2072                 devconf = net->ipv4.devconf_dflt;
2073                 break;
2074         default:
2075                 dev = __dev_get_by_index(net, ifindex);
2076                 if (!dev)
2077                         goto errout;
2078                 in_dev = __in_dev_get_rtnl(dev);
2079                 if (!in_dev)
2080                         goto errout;
2081                 devconf = &in_dev->cnf;
2082                 break;
2083         }
2084
2085         err = -ENOBUFS;
2086         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2087         if (!skb)
2088                 goto errout;
2089
2090         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2091                                         NETLINK_CB(in_skb).portid,
2092                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2093                                         NETCONFA_ALL);
2094         if (err < 0) {
2095                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2096                 WARN_ON(err == -EMSGSIZE);
2097                 kfree_skb(skb);
2098                 goto errout;
2099         }
2100         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2101 errout:
2102         return err;
2103 }
2104
2105 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2106                                      struct netlink_callback *cb)
2107 {
2108         const struct nlmsghdr *nlh = cb->nlh;
2109         struct net *net = sock_net(skb->sk);
2110         int h, s_h;
2111         int idx, s_idx;
2112         struct net_device *dev;
2113         struct in_device *in_dev;
2114         struct hlist_head *head;
2115
2116         if (cb->strict_check) {
2117                 struct netlink_ext_ack *extack = cb->extack;
2118                 struct netconfmsg *ncm;
2119
2120                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2121                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2122                         return -EINVAL;
2123                 }
2124
2125                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2126                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2127                         return -EINVAL;
2128                 }
2129         }
2130
2131         s_h = cb->args[0];
2132         s_idx = idx = cb->args[1];
2133
2134         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2135                 idx = 0;
2136                 head = &net->dev_index_head[h];
2137                 rcu_read_lock();
2138                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2139                           net->dev_base_seq;
2140                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2141                         if (idx < s_idx)
2142                                 goto cont;
2143                         in_dev = __in_dev_get_rcu(dev);
2144                         if (!in_dev)
2145                                 goto cont;
2146
2147                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2148                                                       &in_dev->cnf,
2149                                                       NETLINK_CB(cb->skb).portid,
2150                                                       nlh->nlmsg_seq,
2151                                                       RTM_NEWNETCONF,
2152                                                       NLM_F_MULTI,
2153                                                       NETCONFA_ALL) < 0) {
2154                                 rcu_read_unlock();
2155                                 goto done;
2156                         }
2157                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2158 cont:
2159                         idx++;
2160                 }
2161                 rcu_read_unlock();
2162         }
2163         if (h == NETDEV_HASHENTRIES) {
2164                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2165                                               net->ipv4.devconf_all,
2166                                               NETLINK_CB(cb->skb).portid,
2167                                               nlh->nlmsg_seq,
2168                                               RTM_NEWNETCONF, NLM_F_MULTI,
2169                                               NETCONFA_ALL) < 0)
2170                         goto done;
2171                 else
2172                         h++;
2173         }
2174         if (h == NETDEV_HASHENTRIES + 1) {
2175                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2176                                               net->ipv4.devconf_dflt,
2177                                               NETLINK_CB(cb->skb).portid,
2178                                               nlh->nlmsg_seq,
2179                                               RTM_NEWNETCONF, NLM_F_MULTI,
2180                                               NETCONFA_ALL) < 0)
2181                         goto done;
2182                 else
2183                         h++;
2184         }
2185 done:
2186         cb->args[0] = h;
2187         cb->args[1] = idx;
2188
2189         return skb->len;
2190 }
2191
2192 #ifdef CONFIG_SYSCTL
2193
2194 static void devinet_copy_dflt_conf(struct net *net, int i)
2195 {
2196         struct net_device *dev;
2197
2198         rcu_read_lock();
2199         for_each_netdev_rcu(net, dev) {
2200                 struct in_device *in_dev;
2201
2202                 in_dev = __in_dev_get_rcu(dev);
2203                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2204                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2205         }
2206         rcu_read_unlock();
2207 }
2208
2209 /* called with RTNL locked */
2210 static void inet_forward_change(struct net *net)
2211 {
2212         struct net_device *dev;
2213         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2214
2215         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2216         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2217         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2218                                     NETCONFA_FORWARDING,
2219                                     NETCONFA_IFINDEX_ALL,
2220                                     net->ipv4.devconf_all);
2221         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2222                                     NETCONFA_FORWARDING,
2223                                     NETCONFA_IFINDEX_DEFAULT,
2224                                     net->ipv4.devconf_dflt);
2225
2226         for_each_netdev(net, dev) {
2227                 struct in_device *in_dev;
2228
2229                 if (on)
2230                         dev_disable_lro(dev);
2231
2232                 in_dev = __in_dev_get_rtnl(dev);
2233                 if (in_dev) {
2234                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2235                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2236                                                     NETCONFA_FORWARDING,
2237                                                     dev->ifindex, &in_dev->cnf);
2238                 }
2239         }
2240 }
2241
2242 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2243 {
2244         if (cnf == net->ipv4.devconf_dflt)
2245                 return NETCONFA_IFINDEX_DEFAULT;
2246         else if (cnf == net->ipv4.devconf_all)
2247                 return NETCONFA_IFINDEX_ALL;
2248         else {
2249                 struct in_device *idev
2250                         = container_of(cnf, struct in_device, cnf);
2251                 return idev->dev->ifindex;
2252         }
2253 }
2254
2255 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2256                              void __user *buffer,
2257                              size_t *lenp, loff_t *ppos)
2258 {
2259         int old_value = *(int *)ctl->data;
2260         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2261         int new_value = *(int *)ctl->data;
2262
2263         if (write) {
2264                 struct ipv4_devconf *cnf = ctl->extra1;
2265                 struct net *net = ctl->extra2;
2266                 int i = (int *)ctl->data - cnf->data;
2267                 int ifindex;
2268
2269                 set_bit(i, cnf->state);
2270
2271                 if (cnf == net->ipv4.devconf_dflt)
2272                         devinet_copy_dflt_conf(net, i);
2273                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2274                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2275                         if ((new_value == 0) && (old_value != 0))
2276                                 rt_cache_flush(net);
2277
2278                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2279                     new_value != old_value)
2280                         rt_cache_flush(net);
2281
2282                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2283                     new_value != old_value) {
2284                         ifindex = devinet_conf_ifindex(net, cnf);
2285                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2286                                                     NETCONFA_RP_FILTER,
2287                                                     ifindex, cnf);
2288                 }
2289                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2290                     new_value != old_value) {
2291                         ifindex = devinet_conf_ifindex(net, cnf);
2292                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2293                                                     NETCONFA_PROXY_NEIGH,
2294                                                     ifindex, cnf);
2295                 }
2296                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2297                     new_value != old_value) {
2298                         ifindex = devinet_conf_ifindex(net, cnf);
2299                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2300                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2301                                                     ifindex, cnf);
2302                 }
2303         }
2304
2305         return ret;
2306 }
2307
2308 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2309                                   void __user *buffer,
2310                                   size_t *lenp, loff_t *ppos)
2311 {
2312         int *valp = ctl->data;
2313         int val = *valp;
2314         loff_t pos = *ppos;
2315         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2316
2317         if (write && *valp != val) {
2318                 struct net *net = ctl->extra2;
2319
2320                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2321                         if (!rtnl_trylock()) {
2322                                 /* Restore the original values before restarting */
2323                                 *valp = val;
2324                                 *ppos = pos;
2325                                 return restart_syscall();
2326                         }
2327                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2328                                 inet_forward_change(net);
2329                         } else {
2330                                 struct ipv4_devconf *cnf = ctl->extra1;
2331                                 struct in_device *idev =
2332                                         container_of(cnf, struct in_device, cnf);
2333                                 if (*valp)
2334                                         dev_disable_lro(idev->dev);
2335                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2336                                                             NETCONFA_FORWARDING,
2337                                                             idev->dev->ifindex,
2338                                                             cnf);
2339                         }
2340                         rtnl_unlock();
2341                         rt_cache_flush(net);
2342                 } else
2343                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344                                                     NETCONFA_FORWARDING,
2345                                                     NETCONFA_IFINDEX_DEFAULT,
2346                                                     net->ipv4.devconf_dflt);
2347         }
2348
2349         return ret;
2350 }
2351
2352 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2353                                 void __user *buffer,
2354                                 size_t *lenp, loff_t *ppos)
2355 {
2356         int *valp = ctl->data;
2357         int val = *valp;
2358         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2359         struct net *net = ctl->extra2;
2360
2361         if (write && *valp != val)
2362                 rt_cache_flush(net);
2363
2364         return ret;
2365 }
2366
2367 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2368         { \
2369                 .procname       = name, \
2370                 .data           = ipv4_devconf.data + \
2371                                   IPV4_DEVCONF_ ## attr - 1, \
2372                 .maxlen         = sizeof(int), \
2373                 .mode           = mval, \
2374                 .proc_handler   = proc, \
2375                 .extra1         = &ipv4_devconf, \
2376         }
2377
2378 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2379         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2380
2381 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2382         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2383
2384 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2385         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2386
2387 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2388         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2389
2390 static struct devinet_sysctl_table {
2391         struct ctl_table_header *sysctl_header;
2392         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2393 } devinet_sysctl = {
2394         .devinet_vars = {
2395                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2396                                              devinet_sysctl_forward),
2397                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2398                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2399
2400                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2401                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2402                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2403                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2404                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2405                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2406                                         "accept_source_route"),
2407                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2408                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2409                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2410                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2411                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2412                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2413                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2414                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2415                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2416                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2417                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2418                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2419                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2420                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2421                                         "force_igmp_version"),
2422                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2423                                         "igmpv2_unsolicited_report_interval"),
2424                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2425                                         "igmpv3_unsolicited_report_interval"),
2426                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2427                                         "ignore_routes_with_linkdown"),
2428                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2429                                         "drop_gratuitous_arp"),
2430
2431                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2432                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2433                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2434                                               "promote_secondaries"),
2435                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2436                                               "route_localnet"),
2437                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2438                                               "drop_unicast_in_l2_multicast"),
2439         },
2440 };
2441
2442 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2443                                      int ifindex, struct ipv4_devconf *p)
2444 {
2445         int i;
2446         struct devinet_sysctl_table *t;
2447         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2448
2449         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2450         if (!t)
2451                 goto out;
2452
2453         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2454                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2455                 t->devinet_vars[i].extra1 = p;
2456                 t->devinet_vars[i].extra2 = net;
2457         }
2458
2459         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2460
2461         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2462         if (!t->sysctl_header)
2463                 goto free;
2464
2465         p->sysctl = t;
2466
2467         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2468                                     ifindex, p);
2469         return 0;
2470
2471 free:
2472         kfree(t);
2473 out:
2474         return -ENOBUFS;
2475 }
2476
2477 static void __devinet_sysctl_unregister(struct net *net,
2478                                         struct ipv4_devconf *cnf, int ifindex)
2479 {
2480         struct devinet_sysctl_table *t = cnf->sysctl;
2481
2482         if (t) {
2483                 cnf->sysctl = NULL;
2484                 unregister_net_sysctl_table(t->sysctl_header);
2485                 kfree(t);
2486         }
2487
2488         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2489 }
2490
2491 static int devinet_sysctl_register(struct in_device *idev)
2492 {
2493         int err;
2494
2495         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2496                 return -EINVAL;
2497
2498         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2499         if (err)
2500                 return err;
2501         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2502                                         idev->dev->ifindex, &idev->cnf);
2503         if (err)
2504                 neigh_sysctl_unregister(idev->arp_parms);
2505         return err;
2506 }
2507
2508 static void devinet_sysctl_unregister(struct in_device *idev)
2509 {
2510         struct net *net = dev_net(idev->dev);
2511
2512         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2513         neigh_sysctl_unregister(idev->arp_parms);
2514 }
2515
2516 static struct ctl_table ctl_forward_entry[] = {
2517         {
2518                 .procname       = "ip_forward",
2519                 .data           = &ipv4_devconf.data[
2520                                         IPV4_DEVCONF_FORWARDING - 1],
2521                 .maxlen         = sizeof(int),
2522                 .mode           = 0644,
2523                 .proc_handler   = devinet_sysctl_forward,
2524                 .extra1         = &ipv4_devconf,
2525                 .extra2         = &init_net,
2526         },
2527         { },
2528 };
2529 #endif
2530
2531 static __net_init int devinet_init_net(struct net *net)
2532 {
2533         int err;
2534         struct ipv4_devconf *all, *dflt;
2535 #ifdef CONFIG_SYSCTL
2536         struct ctl_table *tbl = ctl_forward_entry;
2537         struct ctl_table_header *forw_hdr;
2538 #endif
2539
2540         err = -ENOMEM;
2541         all = &ipv4_devconf;
2542         dflt = &ipv4_devconf_dflt;
2543
2544         if (!net_eq(net, &init_net)) {
2545                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2546                 if (!all)
2547                         goto err_alloc_all;
2548
2549                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2550                 if (!dflt)
2551                         goto err_alloc_dflt;
2552
2553 #ifdef CONFIG_SYSCTL
2554                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2555                 if (!tbl)
2556                         goto err_alloc_ctl;
2557
2558                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2559                 tbl[0].extra1 = all;
2560                 tbl[0].extra2 = net;
2561 #endif
2562         }
2563
2564 #ifdef CONFIG_SYSCTL
2565         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2566         if (err < 0)
2567                 goto err_reg_all;
2568
2569         err = __devinet_sysctl_register(net, "default",
2570                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2571         if (err < 0)
2572                 goto err_reg_dflt;
2573
2574         err = -ENOMEM;
2575         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2576         if (!forw_hdr)
2577                 goto err_reg_ctl;
2578         net->ipv4.forw_hdr = forw_hdr;
2579 #endif
2580
2581         net->ipv4.devconf_all = all;
2582         net->ipv4.devconf_dflt = dflt;
2583         return 0;
2584
2585 #ifdef CONFIG_SYSCTL
2586 err_reg_ctl:
2587         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2588 err_reg_dflt:
2589         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2590 err_reg_all:
2591         if (tbl != ctl_forward_entry)
2592                 kfree(tbl);
2593 err_alloc_ctl:
2594 #endif
2595         if (dflt != &ipv4_devconf_dflt)
2596                 kfree(dflt);
2597 err_alloc_dflt:
2598         if (all != &ipv4_devconf)
2599                 kfree(all);
2600 err_alloc_all:
2601         return err;
2602 }
2603
2604 static __net_exit void devinet_exit_net(struct net *net)
2605 {
2606 #ifdef CONFIG_SYSCTL
2607         struct ctl_table *tbl;
2608
2609         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2610         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2611         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2612                                     NETCONFA_IFINDEX_DEFAULT);
2613         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2614                                     NETCONFA_IFINDEX_ALL);
2615         kfree(tbl);
2616 #endif
2617         kfree(net->ipv4.devconf_dflt);
2618         kfree(net->ipv4.devconf_all);
2619 }
2620
2621 static __net_initdata struct pernet_operations devinet_ops = {
2622         .init = devinet_init_net,
2623         .exit = devinet_exit_net,
2624 };
2625
2626 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2627         .family           = AF_INET,
2628         .fill_link_af     = inet_fill_link_af,
2629         .get_link_af_size = inet_get_link_af_size,
2630         .validate_link_af = inet_validate_link_af,
2631         .set_link_af      = inet_set_link_af,
2632 };
2633
2634 void __init devinet_init(void)
2635 {
2636         int i;
2637
2638         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2639                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2640
2641         register_pernet_subsys(&devinet_ops);
2642
2643         register_gifconf(PF_INET, inet_gifconf);
2644         register_netdevice_notifier(&ip_netdev_notifier);
2645
2646         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2647
2648         rtnl_af_register(&inet_af_ops);
2649
2650         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2651         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2652         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2653         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2654                       inet_netconf_dump_devconf, 0);
2655 }