d122ebbe5980139f892431af0eb98fa514f0a448
[muen/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
104 };
105
106 struct inet_fill_args {
107         u32 portid;
108         u32 seq;
109         int event;
110         unsigned int flags;
111         int netnsid;
112 };
113
114 #define IN4_ADDR_HSIZE_SHIFT    8
115 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
116
117 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
118
119 static u32 inet_addr_hash(const struct net *net, __be32 addr)
120 {
121         u32 val = (__force u32) addr ^ net_hash_mix(net);
122
123         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
124 }
125
126 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
127 {
128         u32 hash = inet_addr_hash(net, ifa->ifa_local);
129
130         ASSERT_RTNL();
131         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
132 }
133
134 static void inet_hash_remove(struct in_ifaddr *ifa)
135 {
136         ASSERT_RTNL();
137         hlist_del_init_rcu(&ifa->hash);
138 }
139
140 /**
141  * __ip_dev_find - find the first device with a given source address.
142  * @net: the net namespace
143  * @addr: the source address
144  * @devref: if true, take a reference on the found device
145  *
146  * If a caller uses devref=false, it should be protected by RCU, or RTNL
147  */
148 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
149 {
150         struct net_device *result = NULL;
151         struct in_ifaddr *ifa;
152
153         rcu_read_lock();
154         ifa = inet_lookup_ifaddr_rcu(net, addr);
155         if (!ifa) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         } else {
169                 result = ifa->ifa_dev->dev;
170         }
171         if (result && devref)
172                 dev_hold(result);
173         rcu_read_unlock();
174         return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177
178 /* called under RCU lock */
179 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
180 {
181         u32 hash = inet_addr_hash(net, addr);
182         struct in_ifaddr *ifa;
183
184         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
185                 if (ifa->ifa_local == addr &&
186                     net_eq(dev_net(ifa->ifa_dev->dev), net))
187                         return ifa;
188
189         return NULL;
190 }
191
192 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
193
194 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
196 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
197                          int destroy);
198 #ifdef CONFIG_SYSCTL
199 static int devinet_sysctl_register(struct in_device *idev);
200 static void devinet_sysctl_unregister(struct in_device *idev);
201 #else
202 static int devinet_sysctl_register(struct in_device *idev)
203 {
204         return 0;
205 }
206 static void devinet_sysctl_unregister(struct in_device *idev)
207 {
208 }
209 #endif
210
211 /* Locks all the inet devices. */
212
213 static struct in_ifaddr *inet_alloc_ifa(void)
214 {
215         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
216 }
217
218 static void inet_rcu_free_ifa(struct rcu_head *head)
219 {
220         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
221         if (ifa->ifa_dev)
222                 in_dev_put(ifa->ifa_dev);
223         kfree(ifa);
224 }
225
226 static void inet_free_ifa(struct in_ifaddr *ifa)
227 {
228         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
229 }
230
231 void in_dev_finish_destroy(struct in_device *idev)
232 {
233         struct net_device *dev = idev->dev;
234
235         WARN_ON(idev->ifa_list);
236         WARN_ON(idev->mc_list);
237         kfree(rcu_dereference_protected(idev->mc_hash, 1));
238 #ifdef NET_REFCNT_DEBUG
239         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
240 #endif
241         dev_put(dev);
242         if (!idev->dead)
243                 pr_err("Freeing alive in_device %p\n", idev);
244         else
245                 kfree(idev);
246 }
247 EXPORT_SYMBOL(in_dev_finish_destroy);
248
249 static struct in_device *inetdev_init(struct net_device *dev)
250 {
251         struct in_device *in_dev;
252         int err = -ENOMEM;
253
254         ASSERT_RTNL();
255
256         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
257         if (!in_dev)
258                 goto out;
259         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
260                         sizeof(in_dev->cnf));
261         in_dev->cnf.sysctl = NULL;
262         in_dev->dev = dev;
263         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
264         if (!in_dev->arp_parms)
265                 goto out_kfree;
266         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
267                 dev_disable_lro(dev);
268         /* Reference in_dev->dev */
269         dev_hold(dev);
270         /* Account for reference dev->ip_ptr (below) */
271         refcount_set(&in_dev->refcnt, 1);
272
273         err = devinet_sysctl_register(in_dev);
274         if (err) {
275                 in_dev->dead = 1;
276                 in_dev_put(in_dev);
277                 in_dev = NULL;
278                 goto out;
279         }
280         ip_mc_init_dev(in_dev);
281         if (dev->flags & IFF_UP)
282                 ip_mc_up(in_dev);
283
284         /* we can receive as soon as ip_ptr is set -- do this last */
285         rcu_assign_pointer(dev->ip_ptr, in_dev);
286 out:
287         return in_dev ?: ERR_PTR(err);
288 out_kfree:
289         kfree(in_dev);
290         in_dev = NULL;
291         goto out;
292 }
293
294 static void in_dev_rcu_put(struct rcu_head *head)
295 {
296         struct in_device *idev = container_of(head, struct in_device, rcu_head);
297         in_dev_put(idev);
298 }
299
300 static void inetdev_destroy(struct in_device *in_dev)
301 {
302         struct in_ifaddr *ifa;
303         struct net_device *dev;
304
305         ASSERT_RTNL();
306
307         dev = in_dev->dev;
308
309         in_dev->dead = 1;
310
311         ip_mc_destroy_dev(in_dev);
312
313         while ((ifa = in_dev->ifa_list) != NULL) {
314                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
315                 inet_free_ifa(ifa);
316         }
317
318         RCU_INIT_POINTER(dev->ip_ptr, NULL);
319
320         devinet_sysctl_unregister(in_dev);
321         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
322         arp_ifdown(dev);
323
324         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
325 }
326
327 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
328 {
329         rcu_read_lock();
330         for_primary_ifa(in_dev) {
331                 if (inet_ifa_match(a, ifa)) {
332                         if (!b || inet_ifa_match(b, ifa)) {
333                                 rcu_read_unlock();
334                                 return 1;
335                         }
336                 }
337         } endfor_ifa(in_dev);
338         rcu_read_unlock();
339         return 0;
340 }
341
342 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
343                          int destroy, struct nlmsghdr *nlh, u32 portid)
344 {
345         struct in_ifaddr *promote = NULL;
346         struct in_ifaddr *ifa, *ifa1 = *ifap;
347         struct in_ifaddr *last_prim = in_dev->ifa_list;
348         struct in_ifaddr *prev_prom = NULL;
349         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
350
351         ASSERT_RTNL();
352
353         if (in_dev->dead)
354                 goto no_promotions;
355
356         /* 1. Deleting primary ifaddr forces deletion all secondaries
357          * unless alias promotion is set
358          **/
359
360         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
361                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
362
363                 while ((ifa = *ifap1) != NULL) {
364                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
365                             ifa1->ifa_scope <= ifa->ifa_scope)
366                                 last_prim = ifa;
367
368                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
369                             ifa1->ifa_mask != ifa->ifa_mask ||
370                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
371                                 ifap1 = &ifa->ifa_next;
372                                 prev_prom = ifa;
373                                 continue;
374                         }
375
376                         if (!do_promote) {
377                                 inet_hash_remove(ifa);
378                                 *ifap1 = ifa->ifa_next;
379
380                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
381                                 blocking_notifier_call_chain(&inetaddr_chain,
382                                                 NETDEV_DOWN, ifa);
383                                 inet_free_ifa(ifa);
384                         } else {
385                                 promote = ifa;
386                                 break;
387                         }
388                 }
389         }
390
391         /* On promotion all secondaries from subnet are changing
392          * the primary IP, we must remove all their routes silently
393          * and later to add them back with new prefsrc. Do this
394          * while all addresses are on the device list.
395          */
396         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
397                 if (ifa1->ifa_mask == ifa->ifa_mask &&
398                     inet_ifa_match(ifa1->ifa_address, ifa))
399                         fib_del_ifaddr(ifa, ifa1);
400         }
401
402 no_promotions:
403         /* 2. Unlink it */
404
405         *ifap = ifa1->ifa_next;
406         inet_hash_remove(ifa1);
407
408         /* 3. Announce address deletion */
409
410         /* Send message first, then call notifier.
411            At first sight, FIB update triggered by notifier
412            will refer to already deleted ifaddr, that could confuse
413            netlink listeners. It is not true: look, gated sees
414            that route deleted and if it still thinks that ifaddr
415            is valid, it will try to restore deleted routes... Grr.
416            So that, this order is correct.
417          */
418         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
419         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
420
421         if (promote) {
422                 struct in_ifaddr *next_sec = promote->ifa_next;
423
424                 if (prev_prom) {
425                         prev_prom->ifa_next = promote->ifa_next;
426                         promote->ifa_next = last_prim->ifa_next;
427                         last_prim->ifa_next = promote;
428                 }
429
430                 promote->ifa_flags &= ~IFA_F_SECONDARY;
431                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
432                 blocking_notifier_call_chain(&inetaddr_chain,
433                                 NETDEV_UP, promote);
434                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
435                         if (ifa1->ifa_mask != ifa->ifa_mask ||
436                             !inet_ifa_match(ifa1->ifa_address, ifa))
437                                         continue;
438                         fib_add_ifaddr(ifa);
439                 }
440
441         }
442         if (destroy)
443                 inet_free_ifa(ifa1);
444 }
445
446 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
447                          int destroy)
448 {
449         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
450 }
451
452 static void check_lifetime(struct work_struct *work);
453
454 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
455
456 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
457                              u32 portid, struct netlink_ext_ack *extack)
458 {
459         struct in_device *in_dev = ifa->ifa_dev;
460         struct in_ifaddr *ifa1, **ifap, **last_primary;
461         struct in_validator_info ivi;
462         int ret;
463
464         ASSERT_RTNL();
465
466         if (!ifa->ifa_local) {
467                 inet_free_ifa(ifa);
468                 return 0;
469         }
470
471         ifa->ifa_flags &= ~IFA_F_SECONDARY;
472         last_primary = &in_dev->ifa_list;
473
474         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
475              ifap = &ifa1->ifa_next) {
476                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
477                     ifa->ifa_scope <= ifa1->ifa_scope)
478                         last_primary = &ifa1->ifa_next;
479                 if (ifa1->ifa_mask == ifa->ifa_mask &&
480                     inet_ifa_match(ifa1->ifa_address, ifa)) {
481                         if (ifa1->ifa_local == ifa->ifa_local) {
482                                 inet_free_ifa(ifa);
483                                 return -EEXIST;
484                         }
485                         if (ifa1->ifa_scope != ifa->ifa_scope) {
486                                 inet_free_ifa(ifa);
487                                 return -EINVAL;
488                         }
489                         ifa->ifa_flags |= IFA_F_SECONDARY;
490                 }
491         }
492
493         /* Allow any devices that wish to register ifaddr validtors to weigh
494          * in now, before changes are committed.  The rntl lock is serializing
495          * access here, so the state should not change between a validator call
496          * and a final notify on commit.  This isn't invoked on promotion under
497          * the assumption that validators are checking the address itself, and
498          * not the flags.
499          */
500         ivi.ivi_addr = ifa->ifa_address;
501         ivi.ivi_dev = ifa->ifa_dev;
502         ivi.extack = extack;
503         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
504                                            NETDEV_UP, &ivi);
505         ret = notifier_to_errno(ret);
506         if (ret) {
507                 inet_free_ifa(ifa);
508                 return ret;
509         }
510
511         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
512                 prandom_seed((__force u32) ifa->ifa_local);
513                 ifap = last_primary;
514         }
515
516         ifa->ifa_next = *ifap;
517         *ifap = ifa;
518
519         inet_hash_insert(dev_net(in_dev->dev), ifa);
520
521         cancel_delayed_work(&check_lifetime_work);
522         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
523
524         /* Send message first, then call notifier.
525            Notifier will trigger FIB update, so that
526            listeners of netlink will know about new ifaddr */
527         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
528         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
529
530         return 0;
531 }
532
533 static int inet_insert_ifa(struct in_ifaddr *ifa)
534 {
535         return __inet_insert_ifa(ifa, NULL, 0, NULL);
536 }
537
538 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
539 {
540         struct in_device *in_dev = __in_dev_get_rtnl(dev);
541
542         ASSERT_RTNL();
543
544         if (!in_dev) {
545                 inet_free_ifa(ifa);
546                 return -ENOBUFS;
547         }
548         ipv4_devconf_setall(in_dev);
549         neigh_parms_data_state_setall(in_dev->arp_parms);
550         if (ifa->ifa_dev != in_dev) {
551                 WARN_ON(ifa->ifa_dev);
552                 in_dev_hold(in_dev);
553                 ifa->ifa_dev = in_dev;
554         }
555         if (ipv4_is_loopback(ifa->ifa_local))
556                 ifa->ifa_scope = RT_SCOPE_HOST;
557         return inet_insert_ifa(ifa);
558 }
559
560 /* Caller must hold RCU or RTNL :
561  * We dont take a reference on found in_device
562  */
563 struct in_device *inetdev_by_index(struct net *net, int ifindex)
564 {
565         struct net_device *dev;
566         struct in_device *in_dev = NULL;
567
568         rcu_read_lock();
569         dev = dev_get_by_index_rcu(net, ifindex);
570         if (dev)
571                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
572         rcu_read_unlock();
573         return in_dev;
574 }
575 EXPORT_SYMBOL(inetdev_by_index);
576
577 /* Called only from RTNL semaphored context. No locks. */
578
579 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
580                                     __be32 mask)
581 {
582         ASSERT_RTNL();
583
584         for_primary_ifa(in_dev) {
585                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
586                         return ifa;
587         } endfor_ifa(in_dev);
588         return NULL;
589 }
590
591 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
592 {
593         struct ip_mreqn mreq = {
594                 .imr_multiaddr.s_addr = ifa->ifa_address,
595                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
596         };
597         int ret;
598
599         ASSERT_RTNL();
600
601         lock_sock(sk);
602         if (join)
603                 ret = ip_mc_join_group(sk, &mreq);
604         else
605                 ret = ip_mc_leave_group(sk, &mreq);
606         release_sock(sk);
607
608         return ret;
609 }
610
611 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
612                             struct netlink_ext_ack *extack)
613 {
614         struct net *net = sock_net(skb->sk);
615         struct nlattr *tb[IFA_MAX+1];
616         struct in_device *in_dev;
617         struct ifaddrmsg *ifm;
618         struct in_ifaddr *ifa, **ifap;
619         int err = -EINVAL;
620
621         ASSERT_RTNL();
622
623         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
624                           extack);
625         if (err < 0)
626                 goto errout;
627
628         ifm = nlmsg_data(nlh);
629         in_dev = inetdev_by_index(net, ifm->ifa_index);
630         if (!in_dev) {
631                 err = -ENODEV;
632                 goto errout;
633         }
634
635         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
636              ifap = &ifa->ifa_next) {
637                 if (tb[IFA_LOCAL] &&
638                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
639                         continue;
640
641                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
642                         continue;
643
644                 if (tb[IFA_ADDRESS] &&
645                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
646                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
647                         continue;
648
649                 if (ipv4_is_multicast(ifa->ifa_address))
650                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
651                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
652                 return 0;
653         }
654
655         err = -EADDRNOTAVAIL;
656 errout:
657         return err;
658 }
659
660 #define INFINITY_LIFE_TIME      0xFFFFFFFF
661
662 static void check_lifetime(struct work_struct *work)
663 {
664         unsigned long now, next, next_sec, next_sched;
665         struct in_ifaddr *ifa;
666         struct hlist_node *n;
667         int i;
668
669         now = jiffies;
670         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
671
672         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
673                 bool change_needed = false;
674
675                 rcu_read_lock();
676                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
677                         unsigned long age;
678
679                         if (ifa->ifa_flags & IFA_F_PERMANENT)
680                                 continue;
681
682                         /* We try to batch several events at once. */
683                         age = (now - ifa->ifa_tstamp +
684                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
685
686                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
687                             age >= ifa->ifa_valid_lft) {
688                                 change_needed = true;
689                         } else if (ifa->ifa_preferred_lft ==
690                                    INFINITY_LIFE_TIME) {
691                                 continue;
692                         } else if (age >= ifa->ifa_preferred_lft) {
693                                 if (time_before(ifa->ifa_tstamp +
694                                                 ifa->ifa_valid_lft * HZ, next))
695                                         next = ifa->ifa_tstamp +
696                                                ifa->ifa_valid_lft * HZ;
697
698                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
699                                         change_needed = true;
700                         } else if (time_before(ifa->ifa_tstamp +
701                                                ifa->ifa_preferred_lft * HZ,
702                                                next)) {
703                                 next = ifa->ifa_tstamp +
704                                        ifa->ifa_preferred_lft * HZ;
705                         }
706                 }
707                 rcu_read_unlock();
708                 if (!change_needed)
709                         continue;
710                 rtnl_lock();
711                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
712                         unsigned long age;
713
714                         if (ifa->ifa_flags & IFA_F_PERMANENT)
715                                 continue;
716
717                         /* We try to batch several events at once. */
718                         age = (now - ifa->ifa_tstamp +
719                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720
721                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722                             age >= ifa->ifa_valid_lft) {
723                                 struct in_ifaddr **ifap;
724
725                                 for (ifap = &ifa->ifa_dev->ifa_list;
726                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
727                                         if (*ifap == ifa) {
728                                                 inet_del_ifa(ifa->ifa_dev,
729                                                              ifap, 1);
730                                                 break;
731                                         }
732                                 }
733                         } else if (ifa->ifa_preferred_lft !=
734                                    INFINITY_LIFE_TIME &&
735                                    age >= ifa->ifa_preferred_lft &&
736                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
737                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
738                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
739                         }
740                 }
741                 rtnl_unlock();
742         }
743
744         next_sec = round_jiffies_up(next);
745         next_sched = next;
746
747         /* If rounded timeout is accurate enough, accept it. */
748         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
749                 next_sched = next_sec;
750
751         now = jiffies;
752         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
753         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
754                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
755
756         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
757                         next_sched - now);
758 }
759
760 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
761                              __u32 prefered_lft)
762 {
763         unsigned long timeout;
764
765         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
766
767         timeout = addrconf_timeout_fixup(valid_lft, HZ);
768         if (addrconf_finite_timeout(timeout))
769                 ifa->ifa_valid_lft = timeout;
770         else
771                 ifa->ifa_flags |= IFA_F_PERMANENT;
772
773         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
774         if (addrconf_finite_timeout(timeout)) {
775                 if (timeout == 0)
776                         ifa->ifa_flags |= IFA_F_DEPRECATED;
777                 ifa->ifa_preferred_lft = timeout;
778         }
779         ifa->ifa_tstamp = jiffies;
780         if (!ifa->ifa_cstamp)
781                 ifa->ifa_cstamp = ifa->ifa_tstamp;
782 }
783
784 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
785                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
786                                        struct netlink_ext_ack *extack)
787 {
788         struct nlattr *tb[IFA_MAX+1];
789         struct in_ifaddr *ifa;
790         struct ifaddrmsg *ifm;
791         struct net_device *dev;
792         struct in_device *in_dev;
793         int err;
794
795         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
796                           extack);
797         if (err < 0)
798                 goto errout;
799
800         ifm = nlmsg_data(nlh);
801         err = -EINVAL;
802         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
803                 goto errout;
804
805         dev = __dev_get_by_index(net, ifm->ifa_index);
806         err = -ENODEV;
807         if (!dev)
808                 goto errout;
809
810         in_dev = __in_dev_get_rtnl(dev);
811         err = -ENOBUFS;
812         if (!in_dev)
813                 goto errout;
814
815         ifa = inet_alloc_ifa();
816         if (!ifa)
817                 /*
818                  * A potential indev allocation can be left alive, it stays
819                  * assigned to its device and is destroy with it.
820                  */
821                 goto errout;
822
823         ipv4_devconf_setall(in_dev);
824         neigh_parms_data_state_setall(in_dev->arp_parms);
825         in_dev_hold(in_dev);
826
827         if (!tb[IFA_ADDRESS])
828                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
829
830         INIT_HLIST_NODE(&ifa->hash);
831         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
832         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
833         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
834                                          ifm->ifa_flags;
835         ifa->ifa_scope = ifm->ifa_scope;
836         ifa->ifa_dev = in_dev;
837
838         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
839         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
840
841         if (tb[IFA_BROADCAST])
842                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
843
844         if (tb[IFA_LABEL])
845                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
846         else
847                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
848
849         if (tb[IFA_RT_PRIORITY])
850                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
851
852         if (tb[IFA_CACHEINFO]) {
853                 struct ifa_cacheinfo *ci;
854
855                 ci = nla_data(tb[IFA_CACHEINFO]);
856                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
857                         err = -EINVAL;
858                         goto errout_free;
859                 }
860                 *pvalid_lft = ci->ifa_valid;
861                 *pprefered_lft = ci->ifa_prefered;
862         }
863
864         return ifa;
865
866 errout_free:
867         inet_free_ifa(ifa);
868 errout:
869         return ERR_PTR(err);
870 }
871
872 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
873 {
874         struct in_device *in_dev = ifa->ifa_dev;
875         struct in_ifaddr *ifa1, **ifap;
876
877         if (!ifa->ifa_local)
878                 return NULL;
879
880         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
881              ifap = &ifa1->ifa_next) {
882                 if (ifa1->ifa_mask == ifa->ifa_mask &&
883                     inet_ifa_match(ifa1->ifa_address, ifa) &&
884                     ifa1->ifa_local == ifa->ifa_local)
885                         return ifa1;
886         }
887         return NULL;
888 }
889
890 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
891                             struct netlink_ext_ack *extack)
892 {
893         struct net *net = sock_net(skb->sk);
894         struct in_ifaddr *ifa;
895         struct in_ifaddr *ifa_existing;
896         __u32 valid_lft = INFINITY_LIFE_TIME;
897         __u32 prefered_lft = INFINITY_LIFE_TIME;
898
899         ASSERT_RTNL();
900
901         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
902         if (IS_ERR(ifa))
903                 return PTR_ERR(ifa);
904
905         ifa_existing = find_matching_ifa(ifa);
906         if (!ifa_existing) {
907                 /* It would be best to check for !NLM_F_CREATE here but
908                  * userspace already relies on not having to provide this.
909                  */
910                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
911                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
912                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
913                                                true, ifa);
914
915                         if (ret < 0) {
916                                 inet_free_ifa(ifa);
917                                 return ret;
918                         }
919                 }
920                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
921                                          extack);
922         } else {
923                 u32 new_metric = ifa->ifa_rt_priority;
924
925                 inet_free_ifa(ifa);
926
927                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
928                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
929                         return -EEXIST;
930                 ifa = ifa_existing;
931
932                 if (ifa->ifa_rt_priority != new_metric) {
933                         fib_modify_prefix_metric(ifa, new_metric);
934                         ifa->ifa_rt_priority = new_metric;
935                 }
936
937                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
938                 cancel_delayed_work(&check_lifetime_work);
939                 queue_delayed_work(system_power_efficient_wq,
940                                 &check_lifetime_work, 0);
941                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
942         }
943         return 0;
944 }
945
946 /*
947  *      Determine a default network mask, based on the IP address.
948  */
949
950 static int inet_abc_len(__be32 addr)
951 {
952         int rc = -1;    /* Something else, probably a multicast. */
953
954         if (ipv4_is_zeronet(addr))
955                 rc = 0;
956         else {
957                 __u32 haddr = ntohl(addr);
958
959                 if (IN_CLASSA(haddr))
960                         rc = 8;
961                 else if (IN_CLASSB(haddr))
962                         rc = 16;
963                 else if (IN_CLASSC(haddr))
964                         rc = 24;
965         }
966
967         return rc;
968 }
969
970
971 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
972 {
973         struct sockaddr_in sin_orig;
974         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
975         struct in_device *in_dev;
976         struct in_ifaddr **ifap = NULL;
977         struct in_ifaddr *ifa = NULL;
978         struct net_device *dev;
979         char *colon;
980         int ret = -EFAULT;
981         int tryaddrmatch = 0;
982
983         ifr->ifr_name[IFNAMSIZ - 1] = 0;
984
985         /* save original address for comparison */
986         memcpy(&sin_orig, sin, sizeof(*sin));
987
988         colon = strchr(ifr->ifr_name, ':');
989         if (colon)
990                 *colon = 0;
991
992         dev_load(net, ifr->ifr_name);
993
994         switch (cmd) {
995         case SIOCGIFADDR:       /* Get interface address */
996         case SIOCGIFBRDADDR:    /* Get the broadcast address */
997         case SIOCGIFDSTADDR:    /* Get the destination address */
998         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
999                 /* Note that these ioctls will not sleep,
1000                    so that we do not impose a lock.
1001                    One day we will be forced to put shlock here (I mean SMP)
1002                  */
1003                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1004                 memset(sin, 0, sizeof(*sin));
1005                 sin->sin_family = AF_INET;
1006                 break;
1007
1008         case SIOCSIFFLAGS:
1009                 ret = -EPERM;
1010                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1011                         goto out;
1012                 break;
1013         case SIOCSIFADDR:       /* Set interface address (and family) */
1014         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1015         case SIOCSIFDSTADDR:    /* Set the destination address */
1016         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1017                 ret = -EPERM;
1018                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1019                         goto out;
1020                 ret = -EINVAL;
1021                 if (sin->sin_family != AF_INET)
1022                         goto out;
1023                 break;
1024         default:
1025                 ret = -EINVAL;
1026                 goto out;
1027         }
1028
1029         rtnl_lock();
1030
1031         ret = -ENODEV;
1032         dev = __dev_get_by_name(net, ifr->ifr_name);
1033         if (!dev)
1034                 goto done;
1035
1036         if (colon)
1037                 *colon = ':';
1038
1039         in_dev = __in_dev_get_rtnl(dev);
1040         if (in_dev) {
1041                 if (tryaddrmatch) {
1042                         /* Matthias Andree */
1043                         /* compare label and address (4.4BSD style) */
1044                         /* note: we only do this for a limited set of ioctls
1045                            and only if the original address family was AF_INET.
1046                            This is checked above. */
1047                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1048                              ifap = &ifa->ifa_next) {
1049                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1050                                     sin_orig.sin_addr.s_addr ==
1051                                                         ifa->ifa_local) {
1052                                         break; /* found */
1053                                 }
1054                         }
1055                 }
1056                 /* we didn't get a match, maybe the application is
1057                    4.3BSD-style and passed in junk so we fall back to
1058                    comparing just the label */
1059                 if (!ifa) {
1060                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1061                              ifap = &ifa->ifa_next)
1062                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1063                                         break;
1064                 }
1065         }
1066
1067         ret = -EADDRNOTAVAIL;
1068         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1069                 goto done;
1070
1071         switch (cmd) {
1072         case SIOCGIFADDR:       /* Get interface address */
1073                 ret = 0;
1074                 sin->sin_addr.s_addr = ifa->ifa_local;
1075                 break;
1076
1077         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1078                 ret = 0;
1079                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1080                 break;
1081
1082         case SIOCGIFDSTADDR:    /* Get the destination address */
1083                 ret = 0;
1084                 sin->sin_addr.s_addr = ifa->ifa_address;
1085                 break;
1086
1087         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1088                 ret = 0;
1089                 sin->sin_addr.s_addr = ifa->ifa_mask;
1090                 break;
1091
1092         case SIOCSIFFLAGS:
1093                 if (colon) {
1094                         ret = -EADDRNOTAVAIL;
1095                         if (!ifa)
1096                                 break;
1097                         ret = 0;
1098                         if (!(ifr->ifr_flags & IFF_UP))
1099                                 inet_del_ifa(in_dev, ifap, 1);
1100                         break;
1101                 }
1102                 ret = dev_change_flags(dev, ifr->ifr_flags);
1103                 break;
1104
1105         case SIOCSIFADDR:       /* Set interface address (and family) */
1106                 ret = -EINVAL;
1107                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1108                         break;
1109
1110                 if (!ifa) {
1111                         ret = -ENOBUFS;
1112                         ifa = inet_alloc_ifa();
1113                         if (!ifa)
1114                                 break;
1115                         INIT_HLIST_NODE(&ifa->hash);
1116                         if (colon)
1117                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1118                         else
1119                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1120                 } else {
1121                         ret = 0;
1122                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1123                                 break;
1124                         inet_del_ifa(in_dev, ifap, 0);
1125                         ifa->ifa_broadcast = 0;
1126                         ifa->ifa_scope = 0;
1127                 }
1128
1129                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1130
1131                 if (!(dev->flags & IFF_POINTOPOINT)) {
1132                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1133                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1134                         if ((dev->flags & IFF_BROADCAST) &&
1135                             ifa->ifa_prefixlen < 31)
1136                                 ifa->ifa_broadcast = ifa->ifa_address |
1137                                                      ~ifa->ifa_mask;
1138                 } else {
1139                         ifa->ifa_prefixlen = 32;
1140                         ifa->ifa_mask = inet_make_mask(32);
1141                 }
1142                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1143                 ret = inet_set_ifa(dev, ifa);
1144                 break;
1145
1146         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1147                 ret = 0;
1148                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1149                         inet_del_ifa(in_dev, ifap, 0);
1150                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1151                         inet_insert_ifa(ifa);
1152                 }
1153                 break;
1154
1155         case SIOCSIFDSTADDR:    /* Set the destination address */
1156                 ret = 0;
1157                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1158                         break;
1159                 ret = -EINVAL;
1160                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1161                         break;
1162                 ret = 0;
1163                 inet_del_ifa(in_dev, ifap, 0);
1164                 ifa->ifa_address = sin->sin_addr.s_addr;
1165                 inet_insert_ifa(ifa);
1166                 break;
1167
1168         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1169
1170                 /*
1171                  *      The mask we set must be legal.
1172                  */
1173                 ret = -EINVAL;
1174                 if (bad_mask(sin->sin_addr.s_addr, 0))
1175                         break;
1176                 ret = 0;
1177                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1178                         __be32 old_mask = ifa->ifa_mask;
1179                         inet_del_ifa(in_dev, ifap, 0);
1180                         ifa->ifa_mask = sin->sin_addr.s_addr;
1181                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1182
1183                         /* See if current broadcast address matches
1184                          * with current netmask, then recalculate
1185                          * the broadcast address. Otherwise it's a
1186                          * funny address, so don't touch it since
1187                          * the user seems to know what (s)he's doing...
1188                          */
1189                         if ((dev->flags & IFF_BROADCAST) &&
1190                             (ifa->ifa_prefixlen < 31) &&
1191                             (ifa->ifa_broadcast ==
1192                              (ifa->ifa_local|~old_mask))) {
1193                                 ifa->ifa_broadcast = (ifa->ifa_local |
1194                                                       ~sin->sin_addr.s_addr);
1195                         }
1196                         inet_insert_ifa(ifa);
1197                 }
1198                 break;
1199         }
1200 done:
1201         rtnl_unlock();
1202 out:
1203         return ret;
1204 }
1205
1206 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1207 {
1208         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1209         struct in_ifaddr *ifa;
1210         struct ifreq ifr;
1211         int done = 0;
1212
1213         if (WARN_ON(size > sizeof(struct ifreq)))
1214                 goto out;
1215
1216         if (!in_dev)
1217                 goto out;
1218
1219         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1220                 if (!buf) {
1221                         done += size;
1222                         continue;
1223                 }
1224                 if (len < size)
1225                         break;
1226                 memset(&ifr, 0, sizeof(struct ifreq));
1227                 strcpy(ifr.ifr_name, ifa->ifa_label);
1228
1229                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1230                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1231                                                                 ifa->ifa_local;
1232
1233                 if (copy_to_user(buf + done, &ifr, size)) {
1234                         done = -EFAULT;
1235                         break;
1236                 }
1237                 len  -= size;
1238                 done += size;
1239         }
1240 out:
1241         return done;
1242 }
1243
1244 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1245                                  int scope)
1246 {
1247         for_primary_ifa(in_dev) {
1248                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1249                     ifa->ifa_scope <= scope)
1250                         return ifa->ifa_local;
1251         } endfor_ifa(in_dev);
1252
1253         return 0;
1254 }
1255
1256 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1257 {
1258         __be32 addr = 0;
1259         struct in_device *in_dev;
1260         struct net *net = dev_net(dev);
1261         int master_idx;
1262
1263         rcu_read_lock();
1264         in_dev = __in_dev_get_rcu(dev);
1265         if (!in_dev)
1266                 goto no_in_dev;
1267
1268         for_primary_ifa(in_dev) {
1269                 if (ifa->ifa_scope > scope)
1270                         continue;
1271                 if (!dst || inet_ifa_match(dst, ifa)) {
1272                         addr = ifa->ifa_local;
1273                         break;
1274                 }
1275                 if (!addr)
1276                         addr = ifa->ifa_local;
1277         } endfor_ifa(in_dev);
1278
1279         if (addr)
1280                 goto out_unlock;
1281 no_in_dev:
1282         master_idx = l3mdev_master_ifindex_rcu(dev);
1283
1284         /* For VRFs, the VRF device takes the place of the loopback device,
1285          * with addresses on it being preferred.  Note in such cases the
1286          * loopback device will be among the devices that fail the master_idx
1287          * equality check in the loop below.
1288          */
1289         if (master_idx &&
1290             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1291             (in_dev = __in_dev_get_rcu(dev))) {
1292                 addr = in_dev_select_addr(in_dev, scope);
1293                 if (addr)
1294                         goto out_unlock;
1295         }
1296
1297         /* Not loopback addresses on loopback should be preferred
1298            in this case. It is important that lo is the first interface
1299            in dev_base list.
1300          */
1301         for_each_netdev_rcu(net, dev) {
1302                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1303                         continue;
1304
1305                 in_dev = __in_dev_get_rcu(dev);
1306                 if (!in_dev)
1307                         continue;
1308
1309                 addr = in_dev_select_addr(in_dev, scope);
1310                 if (addr)
1311                         goto out_unlock;
1312         }
1313 out_unlock:
1314         rcu_read_unlock();
1315         return addr;
1316 }
1317 EXPORT_SYMBOL(inet_select_addr);
1318
1319 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1320                               __be32 local, int scope)
1321 {
1322         int same = 0;
1323         __be32 addr = 0;
1324
1325         for_ifa(in_dev) {
1326                 if (!addr &&
1327                     (local == ifa->ifa_local || !local) &&
1328                     ifa->ifa_scope <= scope) {
1329                         addr = ifa->ifa_local;
1330                         if (same)
1331                                 break;
1332                 }
1333                 if (!same) {
1334                         same = (!local || inet_ifa_match(local, ifa)) &&
1335                                 (!dst || inet_ifa_match(dst, ifa));
1336                         if (same && addr) {
1337                                 if (local || !dst)
1338                                         break;
1339                                 /* Is the selected addr into dst subnet? */
1340                                 if (inet_ifa_match(addr, ifa))
1341                                         break;
1342                                 /* No, then can we use new local src? */
1343                                 if (ifa->ifa_scope <= scope) {
1344                                         addr = ifa->ifa_local;
1345                                         break;
1346                                 }
1347                                 /* search for large dst subnet for addr */
1348                                 same = 0;
1349                         }
1350                 }
1351         } endfor_ifa(in_dev);
1352
1353         return same ? addr : 0;
1354 }
1355
1356 /*
1357  * Confirm that local IP address exists using wildcards:
1358  * - net: netns to check, cannot be NULL
1359  * - in_dev: only on this interface, NULL=any interface
1360  * - dst: only in the same subnet as dst, 0=any dst
1361  * - local: address, 0=autoselect the local address
1362  * - scope: maximum allowed scope value for the local address
1363  */
1364 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1365                          __be32 dst, __be32 local, int scope)
1366 {
1367         __be32 addr = 0;
1368         struct net_device *dev;
1369
1370         if (in_dev)
1371                 return confirm_addr_indev(in_dev, dst, local, scope);
1372
1373         rcu_read_lock();
1374         for_each_netdev_rcu(net, dev) {
1375                 in_dev = __in_dev_get_rcu(dev);
1376                 if (in_dev) {
1377                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1378                         if (addr)
1379                                 break;
1380                 }
1381         }
1382         rcu_read_unlock();
1383
1384         return addr;
1385 }
1386 EXPORT_SYMBOL(inet_confirm_addr);
1387
1388 /*
1389  *      Device notifier
1390  */
1391
1392 int register_inetaddr_notifier(struct notifier_block *nb)
1393 {
1394         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1395 }
1396 EXPORT_SYMBOL(register_inetaddr_notifier);
1397
1398 int unregister_inetaddr_notifier(struct notifier_block *nb)
1399 {
1400         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1401 }
1402 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1403
1404 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1405 {
1406         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1407 }
1408 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1409
1410 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1411 {
1412         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1413             nb);
1414 }
1415 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1416
1417 /* Rename ifa_labels for a device name change. Make some effort to preserve
1418  * existing alias numbering and to create unique labels if possible.
1419 */
1420 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1421 {
1422         struct in_ifaddr *ifa;
1423         int named = 0;
1424
1425         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1426                 char old[IFNAMSIZ], *dot;
1427
1428                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1429                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1430                 if (named++ == 0)
1431                         goto skip;
1432                 dot = strchr(old, ':');
1433                 if (!dot) {
1434                         sprintf(old, ":%d", named);
1435                         dot = old;
1436                 }
1437                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1438                         strcat(ifa->ifa_label, dot);
1439                 else
1440                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1441 skip:
1442                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1443         }
1444 }
1445
1446 static bool inetdev_valid_mtu(unsigned int mtu)
1447 {
1448         return mtu >= IPV4_MIN_MTU;
1449 }
1450
1451 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1452                                         struct in_device *in_dev)
1453
1454 {
1455         struct in_ifaddr *ifa;
1456
1457         for (ifa = in_dev->ifa_list; ifa;
1458              ifa = ifa->ifa_next) {
1459                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1460                          ifa->ifa_local, dev,
1461                          ifa->ifa_local, NULL,
1462                          dev->dev_addr, NULL);
1463         }
1464 }
1465
1466 /* Called only under RTNL semaphore */
1467
1468 static int inetdev_event(struct notifier_block *this, unsigned long event,
1469                          void *ptr)
1470 {
1471         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1472         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1473
1474         ASSERT_RTNL();
1475
1476         if (!in_dev) {
1477                 if (event == NETDEV_REGISTER) {
1478                         in_dev = inetdev_init(dev);
1479                         if (IS_ERR(in_dev))
1480                                 return notifier_from_errno(PTR_ERR(in_dev));
1481                         if (dev->flags & IFF_LOOPBACK) {
1482                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1483                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1484                         }
1485                 } else if (event == NETDEV_CHANGEMTU) {
1486                         /* Re-enabling IP */
1487                         if (inetdev_valid_mtu(dev->mtu))
1488                                 in_dev = inetdev_init(dev);
1489                 }
1490                 goto out;
1491         }
1492
1493         switch (event) {
1494         case NETDEV_REGISTER:
1495                 pr_debug("%s: bug\n", __func__);
1496                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1497                 break;
1498         case NETDEV_UP:
1499                 if (!inetdev_valid_mtu(dev->mtu))
1500                         break;
1501                 if (dev->flags & IFF_LOOPBACK) {
1502                         struct in_ifaddr *ifa = inet_alloc_ifa();
1503
1504                         if (ifa) {
1505                                 INIT_HLIST_NODE(&ifa->hash);
1506                                 ifa->ifa_local =
1507                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1508                                 ifa->ifa_prefixlen = 8;
1509                                 ifa->ifa_mask = inet_make_mask(8);
1510                                 in_dev_hold(in_dev);
1511                                 ifa->ifa_dev = in_dev;
1512                                 ifa->ifa_scope = RT_SCOPE_HOST;
1513                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1514                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1515                                                  INFINITY_LIFE_TIME);
1516                                 ipv4_devconf_setall(in_dev);
1517                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1518                                 inet_insert_ifa(ifa);
1519                         }
1520                 }
1521                 ip_mc_up(in_dev);
1522                 /* fall through */
1523         case NETDEV_CHANGEADDR:
1524                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1525                         break;
1526                 /* fall through */
1527         case NETDEV_NOTIFY_PEERS:
1528                 /* Send gratuitous ARP to notify of link change */
1529                 inetdev_send_gratuitous_arp(dev, in_dev);
1530                 break;
1531         case NETDEV_DOWN:
1532                 ip_mc_down(in_dev);
1533                 break;
1534         case NETDEV_PRE_TYPE_CHANGE:
1535                 ip_mc_unmap(in_dev);
1536                 break;
1537         case NETDEV_POST_TYPE_CHANGE:
1538                 ip_mc_remap(in_dev);
1539                 break;
1540         case NETDEV_CHANGEMTU:
1541                 if (inetdev_valid_mtu(dev->mtu))
1542                         break;
1543                 /* disable IP when MTU is not enough */
1544                 /* fall through */
1545         case NETDEV_UNREGISTER:
1546                 inetdev_destroy(in_dev);
1547                 break;
1548         case NETDEV_CHANGENAME:
1549                 /* Do not notify about label change, this event is
1550                  * not interesting to applications using netlink.
1551                  */
1552                 inetdev_changename(dev, in_dev);
1553
1554                 devinet_sysctl_unregister(in_dev);
1555                 devinet_sysctl_register(in_dev);
1556                 break;
1557         }
1558 out:
1559         return NOTIFY_DONE;
1560 }
1561
1562 static struct notifier_block ip_netdev_notifier = {
1563         .notifier_call = inetdev_event,
1564 };
1565
1566 static size_t inet_nlmsg_size(void)
1567 {
1568         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1569                + nla_total_size(4) /* IFA_ADDRESS */
1570                + nla_total_size(4) /* IFA_LOCAL */
1571                + nla_total_size(4) /* IFA_BROADCAST */
1572                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1573                + nla_total_size(4)  /* IFA_FLAGS */
1574                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1575                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1576 }
1577
1578 static inline u32 cstamp_delta(unsigned long cstamp)
1579 {
1580         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1581 }
1582
1583 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1584                          unsigned long tstamp, u32 preferred, u32 valid)
1585 {
1586         struct ifa_cacheinfo ci;
1587
1588         ci.cstamp = cstamp_delta(cstamp);
1589         ci.tstamp = cstamp_delta(tstamp);
1590         ci.ifa_prefered = preferred;
1591         ci.ifa_valid = valid;
1592
1593         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1594 }
1595
1596 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1597                             struct inet_fill_args *args)
1598 {
1599         struct ifaddrmsg *ifm;
1600         struct nlmsghdr  *nlh;
1601         u32 preferred, valid;
1602
1603         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1604                         args->flags);
1605         if (!nlh)
1606                 return -EMSGSIZE;
1607
1608         ifm = nlmsg_data(nlh);
1609         ifm->ifa_family = AF_INET;
1610         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1611         ifm->ifa_flags = ifa->ifa_flags;
1612         ifm->ifa_scope = ifa->ifa_scope;
1613         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1614
1615         if (args->netnsid >= 0 &&
1616             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1617                 goto nla_put_failure;
1618
1619         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1620                 preferred = ifa->ifa_preferred_lft;
1621                 valid = ifa->ifa_valid_lft;
1622                 if (preferred != INFINITY_LIFE_TIME) {
1623                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1624
1625                         if (preferred > tval)
1626                                 preferred -= tval;
1627                         else
1628                                 preferred = 0;
1629                         if (valid != INFINITY_LIFE_TIME) {
1630                                 if (valid > tval)
1631                                         valid -= tval;
1632                                 else
1633                                         valid = 0;
1634                         }
1635                 }
1636         } else {
1637                 preferred = INFINITY_LIFE_TIME;
1638                 valid = INFINITY_LIFE_TIME;
1639         }
1640         if ((ifa->ifa_address &&
1641              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1642             (ifa->ifa_local &&
1643              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1644             (ifa->ifa_broadcast &&
1645              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1646             (ifa->ifa_label[0] &&
1647              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1648             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1649             (ifa->ifa_rt_priority &&
1650              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1651             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1652                           preferred, valid))
1653                 goto nla_put_failure;
1654
1655         nlmsg_end(skb, nlh);
1656         return 0;
1657
1658 nla_put_failure:
1659         nlmsg_cancel(skb, nlh);
1660         return -EMSGSIZE;
1661 }
1662
1663 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1664                                       struct inet_fill_args *fillargs,
1665                                       struct net **tgt_net, struct sock *sk,
1666                                       struct netlink_ext_ack *extack)
1667 {
1668         struct nlattr *tb[IFA_MAX+1];
1669         struct ifaddrmsg *ifm;
1670         int err, i;
1671
1672         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1673                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1674                 return -EINVAL;
1675         }
1676
1677         ifm = nlmsg_data(nlh);
1678         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1679                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1680                 return -EINVAL;
1681         }
1682         if (ifm->ifa_index) {
1683                 NL_SET_ERR_MSG(extack, "ipv4: Filter by device index not supported for address dump");
1684                 return -EINVAL;
1685         }
1686
1687         err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1688                                  ifa_ipv4_policy, extack);
1689         if (err < 0)
1690                 return err;
1691
1692         for (i = 0; i <= IFA_MAX; ++i) {
1693                 if (!tb[i])
1694                         continue;
1695
1696                 if (i == IFA_TARGET_NETNSID) {
1697                         struct net *net;
1698
1699                         fillargs->netnsid = nla_get_s32(tb[i]);
1700
1701                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1702                         if (IS_ERR(net)) {
1703                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1704                                 return PTR_ERR(net);
1705                         }
1706                         *tgt_net = net;
1707                 } else {
1708                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1709                         return -EINVAL;
1710                 }
1711         }
1712
1713         return 0;
1714 }
1715
1716 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1717 {
1718         const struct nlmsghdr *nlh = cb->nlh;
1719         struct inet_fill_args fillargs = {
1720                 .portid = NETLINK_CB(cb->skb).portid,
1721                 .seq = nlh->nlmsg_seq,
1722                 .event = RTM_NEWADDR,
1723                 .flags = NLM_F_MULTI,
1724                 .netnsid = -1,
1725         };
1726         struct net *net = sock_net(skb->sk);
1727         struct net *tgt_net = net;
1728         int h, s_h;
1729         int idx, s_idx;
1730         int ip_idx, s_ip_idx;
1731         struct net_device *dev;
1732         struct in_device *in_dev;
1733         struct in_ifaddr *ifa;
1734         struct hlist_head *head;
1735
1736         s_h = cb->args[0];
1737         s_idx = idx = cb->args[1];
1738         s_ip_idx = ip_idx = cb->args[2];
1739
1740         if (cb->strict_check) {
1741                 int err;
1742
1743                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1744                                                  skb->sk, cb->extack);
1745                 if (err < 0)
1746                         return err;
1747         }
1748
1749         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1750                 idx = 0;
1751                 head = &tgt_net->dev_index_head[h];
1752                 rcu_read_lock();
1753                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1754                           tgt_net->dev_base_seq;
1755                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1756                         if (idx < s_idx)
1757                                 goto cont;
1758                         if (h > s_h || idx > s_idx)
1759                                 s_ip_idx = 0;
1760                         in_dev = __in_dev_get_rcu(dev);
1761                         if (!in_dev)
1762                                 goto cont;
1763
1764                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1765                              ifa = ifa->ifa_next, ip_idx++) {
1766                                 if (ip_idx < s_ip_idx)
1767                                         continue;
1768                                 if (inet_fill_ifaddr(skb, ifa, &fillargs) < 0) {
1769                                         rcu_read_unlock();
1770                                         goto done;
1771                                 }
1772                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1773                         }
1774 cont:
1775                         idx++;
1776                 }
1777                 rcu_read_unlock();
1778         }
1779
1780 done:
1781         cb->args[0] = h;
1782         cb->args[1] = idx;
1783         cb->args[2] = ip_idx;
1784         if (fillargs.netnsid >= 0)
1785                 put_net(tgt_net);
1786
1787         return skb->len;
1788 }
1789
1790 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1791                       u32 portid)
1792 {
1793         struct inet_fill_args fillargs = {
1794                 .portid = portid,
1795                 .seq = nlh ? nlh->nlmsg_seq : 0,
1796                 .event = event,
1797                 .flags = 0,
1798                 .netnsid = -1,
1799         };
1800         struct sk_buff *skb;
1801         int err = -ENOBUFS;
1802         struct net *net;
1803
1804         net = dev_net(ifa->ifa_dev->dev);
1805         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1806         if (!skb)
1807                 goto errout;
1808
1809         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1810         if (err < 0) {
1811                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1812                 WARN_ON(err == -EMSGSIZE);
1813                 kfree_skb(skb);
1814                 goto errout;
1815         }
1816         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1817         return;
1818 errout:
1819         if (err < 0)
1820                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1821 }
1822
1823 static size_t inet_get_link_af_size(const struct net_device *dev,
1824                                     u32 ext_filter_mask)
1825 {
1826         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1827
1828         if (!in_dev)
1829                 return 0;
1830
1831         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1832 }
1833
1834 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1835                              u32 ext_filter_mask)
1836 {
1837         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1838         struct nlattr *nla;
1839         int i;
1840
1841         if (!in_dev)
1842                 return -ENODATA;
1843
1844         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1845         if (!nla)
1846                 return -EMSGSIZE;
1847
1848         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1849                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1850
1851         return 0;
1852 }
1853
1854 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1855         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1856 };
1857
1858 static int inet_validate_link_af(const struct net_device *dev,
1859                                  const struct nlattr *nla)
1860 {
1861         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1862         int err, rem;
1863
1864         if (dev && !__in_dev_get_rcu(dev))
1865                 return -EAFNOSUPPORT;
1866
1867         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1868         if (err < 0)
1869                 return err;
1870
1871         if (tb[IFLA_INET_CONF]) {
1872                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1873                         int cfgid = nla_type(a);
1874
1875                         if (nla_len(a) < 4)
1876                                 return -EINVAL;
1877
1878                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1879                                 return -EINVAL;
1880                 }
1881         }
1882
1883         return 0;
1884 }
1885
1886 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1887 {
1888         struct in_device *in_dev = __in_dev_get_rcu(dev);
1889         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1890         int rem;
1891
1892         if (!in_dev)
1893                 return -EAFNOSUPPORT;
1894
1895         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1896                 BUG();
1897
1898         if (tb[IFLA_INET_CONF]) {
1899                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1900                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1901         }
1902
1903         return 0;
1904 }
1905
1906 static int inet_netconf_msgsize_devconf(int type)
1907 {
1908         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1909                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1910         bool all = false;
1911
1912         if (type == NETCONFA_ALL)
1913                 all = true;
1914
1915         if (all || type == NETCONFA_FORWARDING)
1916                 size += nla_total_size(4);
1917         if (all || type == NETCONFA_RP_FILTER)
1918                 size += nla_total_size(4);
1919         if (all || type == NETCONFA_MC_FORWARDING)
1920                 size += nla_total_size(4);
1921         if (all || type == NETCONFA_BC_FORWARDING)
1922                 size += nla_total_size(4);
1923         if (all || type == NETCONFA_PROXY_NEIGH)
1924                 size += nla_total_size(4);
1925         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1926                 size += nla_total_size(4);
1927
1928         return size;
1929 }
1930
1931 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1932                                      struct ipv4_devconf *devconf, u32 portid,
1933                                      u32 seq, int event, unsigned int flags,
1934                                      int type)
1935 {
1936         struct nlmsghdr  *nlh;
1937         struct netconfmsg *ncm;
1938         bool all = false;
1939
1940         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1941                         flags);
1942         if (!nlh)
1943                 return -EMSGSIZE;
1944
1945         if (type == NETCONFA_ALL)
1946                 all = true;
1947
1948         ncm = nlmsg_data(nlh);
1949         ncm->ncm_family = AF_INET;
1950
1951         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1952                 goto nla_put_failure;
1953
1954         if (!devconf)
1955                 goto out;
1956
1957         if ((all || type == NETCONFA_FORWARDING) &&
1958             nla_put_s32(skb, NETCONFA_FORWARDING,
1959                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1960                 goto nla_put_failure;
1961         if ((all || type == NETCONFA_RP_FILTER) &&
1962             nla_put_s32(skb, NETCONFA_RP_FILTER,
1963                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1964                 goto nla_put_failure;
1965         if ((all || type == NETCONFA_MC_FORWARDING) &&
1966             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1967                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1968                 goto nla_put_failure;
1969         if ((all || type == NETCONFA_BC_FORWARDING) &&
1970             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1971                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1972                 goto nla_put_failure;
1973         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1974             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1975                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1976                 goto nla_put_failure;
1977         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1978             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1979                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1980                 goto nla_put_failure;
1981
1982 out:
1983         nlmsg_end(skb, nlh);
1984         return 0;
1985
1986 nla_put_failure:
1987         nlmsg_cancel(skb, nlh);
1988         return -EMSGSIZE;
1989 }
1990
1991 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1992                                  int ifindex, struct ipv4_devconf *devconf)
1993 {
1994         struct sk_buff *skb;
1995         int err = -ENOBUFS;
1996
1997         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1998         if (!skb)
1999                 goto errout;
2000
2001         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2002                                         event, 0, type);
2003         if (err < 0) {
2004                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2005                 WARN_ON(err == -EMSGSIZE);
2006                 kfree_skb(skb);
2007                 goto errout;
2008         }
2009         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2010         return;
2011 errout:
2012         if (err < 0)
2013                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2014 }
2015
2016 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2017         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2018         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2019         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2020         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2021         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2022 };
2023
2024 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2025                                     struct nlmsghdr *nlh,
2026                                     struct netlink_ext_ack *extack)
2027 {
2028         struct net *net = sock_net(in_skb->sk);
2029         struct nlattr *tb[NETCONFA_MAX+1];
2030         struct netconfmsg *ncm;
2031         struct sk_buff *skb;
2032         struct ipv4_devconf *devconf;
2033         struct in_device *in_dev;
2034         struct net_device *dev;
2035         int ifindex;
2036         int err;
2037
2038         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2039                           devconf_ipv4_policy, extack);
2040         if (err < 0)
2041                 goto errout;
2042
2043         err = -EINVAL;
2044         if (!tb[NETCONFA_IFINDEX])
2045                 goto errout;
2046
2047         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2048         switch (ifindex) {
2049         case NETCONFA_IFINDEX_ALL:
2050                 devconf = net->ipv4.devconf_all;
2051                 break;
2052         case NETCONFA_IFINDEX_DEFAULT:
2053                 devconf = net->ipv4.devconf_dflt;
2054                 break;
2055         default:
2056                 dev = __dev_get_by_index(net, ifindex);
2057                 if (!dev)
2058                         goto errout;
2059                 in_dev = __in_dev_get_rtnl(dev);
2060                 if (!in_dev)
2061                         goto errout;
2062                 devconf = &in_dev->cnf;
2063                 break;
2064         }
2065
2066         err = -ENOBUFS;
2067         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2068         if (!skb)
2069                 goto errout;
2070
2071         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2072                                         NETLINK_CB(in_skb).portid,
2073                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2074                                         NETCONFA_ALL);
2075         if (err < 0) {
2076                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2077                 WARN_ON(err == -EMSGSIZE);
2078                 kfree_skb(skb);
2079                 goto errout;
2080         }
2081         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2082 errout:
2083         return err;
2084 }
2085
2086 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2087                                      struct netlink_callback *cb)
2088 {
2089         const struct nlmsghdr *nlh = cb->nlh;
2090         struct net *net = sock_net(skb->sk);
2091         int h, s_h;
2092         int idx, s_idx;
2093         struct net_device *dev;
2094         struct in_device *in_dev;
2095         struct hlist_head *head;
2096
2097         if (cb->strict_check) {
2098                 struct netlink_ext_ack *extack = cb->extack;
2099                 struct netconfmsg *ncm;
2100
2101                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2102                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2103                         return -EINVAL;
2104                 }
2105
2106                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2107                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2108                         return -EINVAL;
2109                 }
2110         }
2111
2112         s_h = cb->args[0];
2113         s_idx = idx = cb->args[1];
2114
2115         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2116                 idx = 0;
2117                 head = &net->dev_index_head[h];
2118                 rcu_read_lock();
2119                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2120                           net->dev_base_seq;
2121                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2122                         if (idx < s_idx)
2123                                 goto cont;
2124                         in_dev = __in_dev_get_rcu(dev);
2125                         if (!in_dev)
2126                                 goto cont;
2127
2128                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2129                                                       &in_dev->cnf,
2130                                                       NETLINK_CB(cb->skb).portid,
2131                                                       nlh->nlmsg_seq,
2132                                                       RTM_NEWNETCONF,
2133                                                       NLM_F_MULTI,
2134                                                       NETCONFA_ALL) < 0) {
2135                                 rcu_read_unlock();
2136                                 goto done;
2137                         }
2138                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2139 cont:
2140                         idx++;
2141                 }
2142                 rcu_read_unlock();
2143         }
2144         if (h == NETDEV_HASHENTRIES) {
2145                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2146                                               net->ipv4.devconf_all,
2147                                               NETLINK_CB(cb->skb).portid,
2148                                               nlh->nlmsg_seq,
2149                                               RTM_NEWNETCONF, NLM_F_MULTI,
2150                                               NETCONFA_ALL) < 0)
2151                         goto done;
2152                 else
2153                         h++;
2154         }
2155         if (h == NETDEV_HASHENTRIES + 1) {
2156                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2157                                               net->ipv4.devconf_dflt,
2158                                               NETLINK_CB(cb->skb).portid,
2159                                               nlh->nlmsg_seq,
2160                                               RTM_NEWNETCONF, NLM_F_MULTI,
2161                                               NETCONFA_ALL) < 0)
2162                         goto done;
2163                 else
2164                         h++;
2165         }
2166 done:
2167         cb->args[0] = h;
2168         cb->args[1] = idx;
2169
2170         return skb->len;
2171 }
2172
2173 #ifdef CONFIG_SYSCTL
2174
2175 static void devinet_copy_dflt_conf(struct net *net, int i)
2176 {
2177         struct net_device *dev;
2178
2179         rcu_read_lock();
2180         for_each_netdev_rcu(net, dev) {
2181                 struct in_device *in_dev;
2182
2183                 in_dev = __in_dev_get_rcu(dev);
2184                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2185                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2186         }
2187         rcu_read_unlock();
2188 }
2189
2190 /* called with RTNL locked */
2191 static void inet_forward_change(struct net *net)
2192 {
2193         struct net_device *dev;
2194         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2195
2196         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2197         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2198         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2199                                     NETCONFA_FORWARDING,
2200                                     NETCONFA_IFINDEX_ALL,
2201                                     net->ipv4.devconf_all);
2202         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2203                                     NETCONFA_FORWARDING,
2204                                     NETCONFA_IFINDEX_DEFAULT,
2205                                     net->ipv4.devconf_dflt);
2206
2207         for_each_netdev(net, dev) {
2208                 struct in_device *in_dev;
2209
2210                 if (on)
2211                         dev_disable_lro(dev);
2212
2213                 in_dev = __in_dev_get_rtnl(dev);
2214                 if (in_dev) {
2215                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2216                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2217                                                     NETCONFA_FORWARDING,
2218                                                     dev->ifindex, &in_dev->cnf);
2219                 }
2220         }
2221 }
2222
2223 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2224 {
2225         if (cnf == net->ipv4.devconf_dflt)
2226                 return NETCONFA_IFINDEX_DEFAULT;
2227         else if (cnf == net->ipv4.devconf_all)
2228                 return NETCONFA_IFINDEX_ALL;
2229         else {
2230                 struct in_device *idev
2231                         = container_of(cnf, struct in_device, cnf);
2232                 return idev->dev->ifindex;
2233         }
2234 }
2235
2236 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2237                              void __user *buffer,
2238                              size_t *lenp, loff_t *ppos)
2239 {
2240         int old_value = *(int *)ctl->data;
2241         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2242         int new_value = *(int *)ctl->data;
2243
2244         if (write) {
2245                 struct ipv4_devconf *cnf = ctl->extra1;
2246                 struct net *net = ctl->extra2;
2247                 int i = (int *)ctl->data - cnf->data;
2248                 int ifindex;
2249
2250                 set_bit(i, cnf->state);
2251
2252                 if (cnf == net->ipv4.devconf_dflt)
2253                         devinet_copy_dflt_conf(net, i);
2254                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2255                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2256                         if ((new_value == 0) && (old_value != 0))
2257                                 rt_cache_flush(net);
2258
2259                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2260                     new_value != old_value)
2261                         rt_cache_flush(net);
2262
2263                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2264                     new_value != old_value) {
2265                         ifindex = devinet_conf_ifindex(net, cnf);
2266                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2267                                                     NETCONFA_RP_FILTER,
2268                                                     ifindex, cnf);
2269                 }
2270                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2271                     new_value != old_value) {
2272                         ifindex = devinet_conf_ifindex(net, cnf);
2273                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2274                                                     NETCONFA_PROXY_NEIGH,
2275                                                     ifindex, cnf);
2276                 }
2277                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2278                     new_value != old_value) {
2279                         ifindex = devinet_conf_ifindex(net, cnf);
2280                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2281                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2282                                                     ifindex, cnf);
2283                 }
2284         }
2285
2286         return ret;
2287 }
2288
2289 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2290                                   void __user *buffer,
2291                                   size_t *lenp, loff_t *ppos)
2292 {
2293         int *valp = ctl->data;
2294         int val = *valp;
2295         loff_t pos = *ppos;
2296         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2297
2298         if (write && *valp != val) {
2299                 struct net *net = ctl->extra2;
2300
2301                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2302                         if (!rtnl_trylock()) {
2303                                 /* Restore the original values before restarting */
2304                                 *valp = val;
2305                                 *ppos = pos;
2306                                 return restart_syscall();
2307                         }
2308                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2309                                 inet_forward_change(net);
2310                         } else {
2311                                 struct ipv4_devconf *cnf = ctl->extra1;
2312                                 struct in_device *idev =
2313                                         container_of(cnf, struct in_device, cnf);
2314                                 if (*valp)
2315                                         dev_disable_lro(idev->dev);
2316                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2317                                                             NETCONFA_FORWARDING,
2318                                                             idev->dev->ifindex,
2319                                                             cnf);
2320                         }
2321                         rtnl_unlock();
2322                         rt_cache_flush(net);
2323                 } else
2324                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2325                                                     NETCONFA_FORWARDING,
2326                                                     NETCONFA_IFINDEX_DEFAULT,
2327                                                     net->ipv4.devconf_dflt);
2328         }
2329
2330         return ret;
2331 }
2332
2333 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2334                                 void __user *buffer,
2335                                 size_t *lenp, loff_t *ppos)
2336 {
2337         int *valp = ctl->data;
2338         int val = *valp;
2339         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2340         struct net *net = ctl->extra2;
2341
2342         if (write && *valp != val)
2343                 rt_cache_flush(net);
2344
2345         return ret;
2346 }
2347
2348 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2349         { \
2350                 .procname       = name, \
2351                 .data           = ipv4_devconf.data + \
2352                                   IPV4_DEVCONF_ ## attr - 1, \
2353                 .maxlen         = sizeof(int), \
2354                 .mode           = mval, \
2355                 .proc_handler   = proc, \
2356                 .extra1         = &ipv4_devconf, \
2357         }
2358
2359 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2360         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2361
2362 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2363         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2364
2365 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2366         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2367
2368 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2369         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2370
2371 static struct devinet_sysctl_table {
2372         struct ctl_table_header *sysctl_header;
2373         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2374 } devinet_sysctl = {
2375         .devinet_vars = {
2376                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2377                                              devinet_sysctl_forward),
2378                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2379                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2380
2381                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2382                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2383                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2384                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2385                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2386                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2387                                         "accept_source_route"),
2388                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2389                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2390                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2391                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2392                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2393                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2394                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2395                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2396                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2397                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2398                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2399                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2400                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2401                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2402                                         "force_igmp_version"),
2403                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2404                                         "igmpv2_unsolicited_report_interval"),
2405                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2406                                         "igmpv3_unsolicited_report_interval"),
2407                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2408                                         "ignore_routes_with_linkdown"),
2409                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2410                                         "drop_gratuitous_arp"),
2411
2412                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2413                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2414                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2415                                               "promote_secondaries"),
2416                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2417                                               "route_localnet"),
2418                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2419                                               "drop_unicast_in_l2_multicast"),
2420         },
2421 };
2422
2423 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2424                                      int ifindex, struct ipv4_devconf *p)
2425 {
2426         int i;
2427         struct devinet_sysctl_table *t;
2428         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2429
2430         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2431         if (!t)
2432                 goto out;
2433
2434         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2435                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2436                 t->devinet_vars[i].extra1 = p;
2437                 t->devinet_vars[i].extra2 = net;
2438         }
2439
2440         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2441
2442         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2443         if (!t->sysctl_header)
2444                 goto free;
2445
2446         p->sysctl = t;
2447
2448         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2449                                     ifindex, p);
2450         return 0;
2451
2452 free:
2453         kfree(t);
2454 out:
2455         return -ENOBUFS;
2456 }
2457
2458 static void __devinet_sysctl_unregister(struct net *net,
2459                                         struct ipv4_devconf *cnf, int ifindex)
2460 {
2461         struct devinet_sysctl_table *t = cnf->sysctl;
2462
2463         if (t) {
2464                 cnf->sysctl = NULL;
2465                 unregister_net_sysctl_table(t->sysctl_header);
2466                 kfree(t);
2467         }
2468
2469         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2470 }
2471
2472 static int devinet_sysctl_register(struct in_device *idev)
2473 {
2474         int err;
2475
2476         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2477                 return -EINVAL;
2478
2479         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2480         if (err)
2481                 return err;
2482         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2483                                         idev->dev->ifindex, &idev->cnf);
2484         if (err)
2485                 neigh_sysctl_unregister(idev->arp_parms);
2486         return err;
2487 }
2488
2489 static void devinet_sysctl_unregister(struct in_device *idev)
2490 {
2491         struct net *net = dev_net(idev->dev);
2492
2493         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2494         neigh_sysctl_unregister(idev->arp_parms);
2495 }
2496
2497 static struct ctl_table ctl_forward_entry[] = {
2498         {
2499                 .procname       = "ip_forward",
2500                 .data           = &ipv4_devconf.data[
2501                                         IPV4_DEVCONF_FORWARDING - 1],
2502                 .maxlen         = sizeof(int),
2503                 .mode           = 0644,
2504                 .proc_handler   = devinet_sysctl_forward,
2505                 .extra1         = &ipv4_devconf,
2506                 .extra2         = &init_net,
2507         },
2508         { },
2509 };
2510 #endif
2511
2512 static __net_init int devinet_init_net(struct net *net)
2513 {
2514         int err;
2515         struct ipv4_devconf *all, *dflt;
2516 #ifdef CONFIG_SYSCTL
2517         struct ctl_table *tbl = ctl_forward_entry;
2518         struct ctl_table_header *forw_hdr;
2519 #endif
2520
2521         err = -ENOMEM;
2522         all = &ipv4_devconf;
2523         dflt = &ipv4_devconf_dflt;
2524
2525         if (!net_eq(net, &init_net)) {
2526                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2527                 if (!all)
2528                         goto err_alloc_all;
2529
2530                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2531                 if (!dflt)
2532                         goto err_alloc_dflt;
2533
2534 #ifdef CONFIG_SYSCTL
2535                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2536                 if (!tbl)
2537                         goto err_alloc_ctl;
2538
2539                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2540                 tbl[0].extra1 = all;
2541                 tbl[0].extra2 = net;
2542 #endif
2543         }
2544
2545 #ifdef CONFIG_SYSCTL
2546         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2547         if (err < 0)
2548                 goto err_reg_all;
2549
2550         err = __devinet_sysctl_register(net, "default",
2551                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2552         if (err < 0)
2553                 goto err_reg_dflt;
2554
2555         err = -ENOMEM;
2556         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2557         if (!forw_hdr)
2558                 goto err_reg_ctl;
2559         net->ipv4.forw_hdr = forw_hdr;
2560 #endif
2561
2562         net->ipv4.devconf_all = all;
2563         net->ipv4.devconf_dflt = dflt;
2564         return 0;
2565
2566 #ifdef CONFIG_SYSCTL
2567 err_reg_ctl:
2568         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2569 err_reg_dflt:
2570         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2571 err_reg_all:
2572         if (tbl != ctl_forward_entry)
2573                 kfree(tbl);
2574 err_alloc_ctl:
2575 #endif
2576         if (dflt != &ipv4_devconf_dflt)
2577                 kfree(dflt);
2578 err_alloc_dflt:
2579         if (all != &ipv4_devconf)
2580                 kfree(all);
2581 err_alloc_all:
2582         return err;
2583 }
2584
2585 static __net_exit void devinet_exit_net(struct net *net)
2586 {
2587 #ifdef CONFIG_SYSCTL
2588         struct ctl_table *tbl;
2589
2590         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2591         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2592         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2593                                     NETCONFA_IFINDEX_DEFAULT);
2594         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2595                                     NETCONFA_IFINDEX_ALL);
2596         kfree(tbl);
2597 #endif
2598         kfree(net->ipv4.devconf_dflt);
2599         kfree(net->ipv4.devconf_all);
2600 }
2601
2602 static __net_initdata struct pernet_operations devinet_ops = {
2603         .init = devinet_init_net,
2604         .exit = devinet_exit_net,
2605 };
2606
2607 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2608         .family           = AF_INET,
2609         .fill_link_af     = inet_fill_link_af,
2610         .get_link_af_size = inet_get_link_af_size,
2611         .validate_link_af = inet_validate_link_af,
2612         .set_link_af      = inet_set_link_af,
2613 };
2614
2615 void __init devinet_init(void)
2616 {
2617         int i;
2618
2619         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2620                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2621
2622         register_pernet_subsys(&devinet_ops);
2623
2624         register_gifconf(PF_INET, inet_gifconf);
2625         register_netdevice_notifier(&ip_netdev_notifier);
2626
2627         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2628
2629         rtnl_af_register(&inet_af_ops);
2630
2631         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2632         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2633         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2634         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2635                       inet_netconf_dump_devconf, 0);
2636 }