x86/speculation/l1tf: Exempt zeroed PTEs from inversion
[muen/linux.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46 #include <linux/rhashtable.h>
47
48 #include <linux/uaccess.h>
49
50 #include "util.h"
51
52 struct shmid_kernel /* private to the kernel */
53 {
54         struct kern_ipc_perm    shm_perm;
55         struct file             *shm_file;
56         unsigned long           shm_nattch;
57         unsigned long           shm_segsz;
58         time64_t                shm_atim;
59         time64_t                shm_dtim;
60         time64_t                shm_ctim;
61         struct pid              *shm_cprid;
62         struct pid              *shm_lprid;
63         struct user_struct      *mlock_user;
64
65         /* The task created the shm object.  NULL if the task is dead. */
66         struct task_struct      *shm_creator;
67         struct list_head        shm_clist;      /* list by creator */
68 } __randomize_layout;
69
70 /* shm_mode upper byte flags */
71 #define SHM_DEST        01000   /* segment will be destroyed on last detach */
72 #define SHM_LOCKED      02000   /* segment will not be swapped */
73
74 struct shm_file_data {
75         int id;
76         struct ipc_namespace *ns;
77         struct file *file;
78         const struct vm_operations_struct *vm_ops;
79 };
80
81 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
82
83 static const struct file_operations shm_file_operations;
84 static const struct vm_operations_struct shm_vm_ops;
85
86 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
87
88 #define shm_unlock(shp)                 \
89         ipc_unlock(&(shp)->shm_perm)
90
91 static int newseg(struct ipc_namespace *, struct ipc_params *);
92 static void shm_open(struct vm_area_struct *vma);
93 static void shm_close(struct vm_area_struct *vma);
94 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
95 #ifdef CONFIG_PROC_FS
96 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
97 #endif
98
99 int shm_init_ns(struct ipc_namespace *ns)
100 {
101         ns->shm_ctlmax = SHMMAX;
102         ns->shm_ctlall = SHMALL;
103         ns->shm_ctlmni = SHMMNI;
104         ns->shm_rmid_forced = 0;
105         ns->shm_tot = 0;
106         return ipc_init_ids(&shm_ids(ns));
107 }
108
109 /*
110  * Called with shm_ids.rwsem (writer) and the shp structure locked.
111  * Only shm_ids.rwsem remains locked on exit.
112  */
113 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
114 {
115         struct shmid_kernel *shp;
116
117         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
118
119         if (shp->shm_nattch) {
120                 shp->shm_perm.mode |= SHM_DEST;
121                 /* Do not find it any more */
122                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
123                 shm_unlock(shp);
124         } else
125                 shm_destroy(ns, shp);
126 }
127
128 #ifdef CONFIG_IPC_NS
129 void shm_exit_ns(struct ipc_namespace *ns)
130 {
131         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
132         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
133         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
134 }
135 #endif
136
137 static int __init ipc_ns_init(void)
138 {
139         const int err = shm_init_ns(&init_ipc_ns);
140         WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
141         return err;
142 }
143
144 pure_initcall(ipc_ns_init);
145
146 void __init shm_init(void)
147 {
148         ipc_init_proc_interface("sysvipc/shm",
149 #if BITS_PER_LONG <= 32
150                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
151 #else
152                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
153 #endif
154                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
155 }
156
157 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
158 {
159         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
160
161         if (IS_ERR(ipcp))
162                 return ERR_CAST(ipcp);
163
164         return container_of(ipcp, struct shmid_kernel, shm_perm);
165 }
166
167 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
168 {
169         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
170
171         if (IS_ERR(ipcp))
172                 return ERR_CAST(ipcp);
173
174         return container_of(ipcp, struct shmid_kernel, shm_perm);
175 }
176
177 /*
178  * shm_lock_(check_) routines are called in the paths where the rwsem
179  * is not necessarily held.
180  */
181 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
182 {
183         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
184
185         /*
186          * Callers of shm_lock() must validate the status of the returned ipc
187          * object pointer (as returned by ipc_lock()), and error out as
188          * appropriate.
189          */
190         if (IS_ERR(ipcp))
191                 return (void *)ipcp;
192         return container_of(ipcp, struct shmid_kernel, shm_perm);
193 }
194
195 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
196 {
197         rcu_read_lock();
198         ipc_lock_object(&ipcp->shm_perm);
199 }
200
201 static void shm_rcu_free(struct rcu_head *head)
202 {
203         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
204                                                         rcu);
205         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
206                                                         shm_perm);
207         security_shm_free(&shp->shm_perm);
208         kvfree(shp);
209 }
210
211 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
212 {
213         list_del(&s->shm_clist);
214         ipc_rmid(&shm_ids(ns), &s->shm_perm);
215 }
216
217
218 static int __shm_open(struct vm_area_struct *vma)
219 {
220         struct file *file = vma->vm_file;
221         struct shm_file_data *sfd = shm_file_data(file);
222         struct shmid_kernel *shp;
223
224         shp = shm_lock(sfd->ns, sfd->id);
225
226         if (IS_ERR(shp))
227                 return PTR_ERR(shp);
228
229         if (shp->shm_file != sfd->file) {
230                 /* ID was reused */
231                 shm_unlock(shp);
232                 return -EINVAL;
233         }
234
235         shp->shm_atim = ktime_get_real_seconds();
236         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
237         shp->shm_nattch++;
238         shm_unlock(shp);
239         return 0;
240 }
241
242 /* This is called by fork, once for every shm attach. */
243 static void shm_open(struct vm_area_struct *vma)
244 {
245         int err = __shm_open(vma);
246         /*
247          * We raced in the idr lookup or with shm_destroy().
248          * Either way, the ID is busted.
249          */
250         WARN_ON_ONCE(err);
251 }
252
253 /*
254  * shm_destroy - free the struct shmid_kernel
255  *
256  * @ns: namespace
257  * @shp: struct to free
258  *
259  * It has to be called with shp and shm_ids.rwsem (writer) locked,
260  * but returns with shp unlocked and freed.
261  */
262 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
263 {
264         struct file *shm_file;
265
266         shm_file = shp->shm_file;
267         shp->shm_file = NULL;
268         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
269         shm_rmid(ns, shp);
270         shm_unlock(shp);
271         if (!is_file_hugepages(shm_file))
272                 shmem_lock(shm_file, 0, shp->mlock_user);
273         else if (shp->mlock_user)
274                 user_shm_unlock(i_size_read(file_inode(shm_file)),
275                                 shp->mlock_user);
276         fput(shm_file);
277         ipc_update_pid(&shp->shm_cprid, NULL);
278         ipc_update_pid(&shp->shm_lprid, NULL);
279         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
280 }
281
282 /*
283  * shm_may_destroy - identifies whether shm segment should be destroyed now
284  *
285  * Returns true if and only if there are no active users of the segment and
286  * one of the following is true:
287  *
288  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
289  *
290  * 2) sysctl kernel.shm_rmid_forced is set to 1.
291  */
292 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
293 {
294         return (shp->shm_nattch == 0) &&
295                (ns->shm_rmid_forced ||
296                 (shp->shm_perm.mode & SHM_DEST));
297 }
298
299 /*
300  * remove the attach descriptor vma.
301  * free memory for segment if it is marked destroyed.
302  * The descriptor has already been removed from the current->mm->mmap list
303  * and will later be kfree()d.
304  */
305 static void shm_close(struct vm_area_struct *vma)
306 {
307         struct file *file = vma->vm_file;
308         struct shm_file_data *sfd = shm_file_data(file);
309         struct shmid_kernel *shp;
310         struct ipc_namespace *ns = sfd->ns;
311
312         down_write(&shm_ids(ns).rwsem);
313         /* remove from the list of attaches of the shm segment */
314         shp = shm_lock(ns, sfd->id);
315
316         /*
317          * We raced in the idr lookup or with shm_destroy().
318          * Either way, the ID is busted.
319          */
320         if (WARN_ON_ONCE(IS_ERR(shp)))
321                 goto done; /* no-op */
322
323         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
324         shp->shm_dtim = ktime_get_real_seconds();
325         shp->shm_nattch--;
326         if (shm_may_destroy(ns, shp))
327                 shm_destroy(ns, shp);
328         else
329                 shm_unlock(shp);
330 done:
331         up_write(&shm_ids(ns).rwsem);
332 }
333
334 /* Called with ns->shm_ids(ns).rwsem locked */
335 static int shm_try_destroy_orphaned(int id, void *p, void *data)
336 {
337         struct ipc_namespace *ns = data;
338         struct kern_ipc_perm *ipcp = p;
339         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
340
341         /*
342          * We want to destroy segments without users and with already
343          * exit'ed originating process.
344          *
345          * As shp->* are changed under rwsem, it's safe to skip shp locking.
346          */
347         if (shp->shm_creator != NULL)
348                 return 0;
349
350         if (shm_may_destroy(ns, shp)) {
351                 shm_lock_by_ptr(shp);
352                 shm_destroy(ns, shp);
353         }
354         return 0;
355 }
356
357 void shm_destroy_orphaned(struct ipc_namespace *ns)
358 {
359         down_write(&shm_ids(ns).rwsem);
360         if (shm_ids(ns).in_use)
361                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
362         up_write(&shm_ids(ns).rwsem);
363 }
364
365 /* Locking assumes this will only be called with task == current */
366 void exit_shm(struct task_struct *task)
367 {
368         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
369         struct shmid_kernel *shp, *n;
370
371         if (list_empty(&task->sysvshm.shm_clist))
372                 return;
373
374         /*
375          * If kernel.shm_rmid_forced is not set then only keep track of
376          * which shmids are orphaned, so that a later set of the sysctl
377          * can clean them up.
378          */
379         if (!ns->shm_rmid_forced) {
380                 down_read(&shm_ids(ns).rwsem);
381                 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
382                         shp->shm_creator = NULL;
383                 /*
384                  * Only under read lock but we are only called on current
385                  * so no entry on the list will be shared.
386                  */
387                 list_del(&task->sysvshm.shm_clist);
388                 up_read(&shm_ids(ns).rwsem);
389                 return;
390         }
391
392         /*
393          * Destroy all already created segments, that were not yet mapped,
394          * and mark any mapped as orphan to cover the sysctl toggling.
395          * Destroy is skipped if shm_may_destroy() returns false.
396          */
397         down_write(&shm_ids(ns).rwsem);
398         list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
399                 shp->shm_creator = NULL;
400
401                 if (shm_may_destroy(ns, shp)) {
402                         shm_lock_by_ptr(shp);
403                         shm_destroy(ns, shp);
404                 }
405         }
406
407         /* Remove the list head from any segments still attached. */
408         list_del(&task->sysvshm.shm_clist);
409         up_write(&shm_ids(ns).rwsem);
410 }
411
412 static vm_fault_t shm_fault(struct vm_fault *vmf)
413 {
414         struct file *file = vmf->vma->vm_file;
415         struct shm_file_data *sfd = shm_file_data(file);
416
417         return sfd->vm_ops->fault(vmf);
418 }
419
420 static int shm_split(struct vm_area_struct *vma, unsigned long addr)
421 {
422         struct file *file = vma->vm_file;
423         struct shm_file_data *sfd = shm_file_data(file);
424
425         if (sfd->vm_ops->split)
426                 return sfd->vm_ops->split(vma, addr);
427
428         return 0;
429 }
430
431 static unsigned long shm_pagesize(struct vm_area_struct *vma)
432 {
433         struct file *file = vma->vm_file;
434         struct shm_file_data *sfd = shm_file_data(file);
435
436         if (sfd->vm_ops->pagesize)
437                 return sfd->vm_ops->pagesize(vma);
438
439         return PAGE_SIZE;
440 }
441
442 #ifdef CONFIG_NUMA
443 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
444 {
445         struct file *file = vma->vm_file;
446         struct shm_file_data *sfd = shm_file_data(file);
447         int err = 0;
448
449         if (sfd->vm_ops->set_policy)
450                 err = sfd->vm_ops->set_policy(vma, new);
451         return err;
452 }
453
454 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
455                                         unsigned long addr)
456 {
457         struct file *file = vma->vm_file;
458         struct shm_file_data *sfd = shm_file_data(file);
459         struct mempolicy *pol = NULL;
460
461         if (sfd->vm_ops->get_policy)
462                 pol = sfd->vm_ops->get_policy(vma, addr);
463         else if (vma->vm_policy)
464                 pol = vma->vm_policy;
465
466         return pol;
467 }
468 #endif
469
470 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
471 {
472         struct shm_file_data *sfd = shm_file_data(file);
473         int ret;
474
475         /*
476          * In case of remap_file_pages() emulation, the file can represent an
477          * IPC ID that was removed, and possibly even reused by another shm
478          * segment already.  Propagate this case as an error to caller.
479          */
480         ret = __shm_open(vma);
481         if (ret)
482                 return ret;
483
484         ret = call_mmap(sfd->file, vma);
485         if (ret) {
486                 shm_close(vma);
487                 return ret;
488         }
489         sfd->vm_ops = vma->vm_ops;
490 #ifdef CONFIG_MMU
491         WARN_ON(!sfd->vm_ops->fault);
492 #endif
493         vma->vm_ops = &shm_vm_ops;
494         return 0;
495 }
496
497 static int shm_release(struct inode *ino, struct file *file)
498 {
499         struct shm_file_data *sfd = shm_file_data(file);
500
501         put_ipc_ns(sfd->ns);
502         fput(sfd->file);
503         shm_file_data(file) = NULL;
504         kfree(sfd);
505         return 0;
506 }
507
508 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
509 {
510         struct shm_file_data *sfd = shm_file_data(file);
511
512         if (!sfd->file->f_op->fsync)
513                 return -EINVAL;
514         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
515 }
516
517 static long shm_fallocate(struct file *file, int mode, loff_t offset,
518                           loff_t len)
519 {
520         struct shm_file_data *sfd = shm_file_data(file);
521
522         if (!sfd->file->f_op->fallocate)
523                 return -EOPNOTSUPP;
524         return sfd->file->f_op->fallocate(file, mode, offset, len);
525 }
526
527 static unsigned long shm_get_unmapped_area(struct file *file,
528         unsigned long addr, unsigned long len, unsigned long pgoff,
529         unsigned long flags)
530 {
531         struct shm_file_data *sfd = shm_file_data(file);
532
533         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
534                                                 pgoff, flags);
535 }
536
537 static const struct file_operations shm_file_operations = {
538         .mmap           = shm_mmap,
539         .fsync          = shm_fsync,
540         .release        = shm_release,
541         .get_unmapped_area      = shm_get_unmapped_area,
542         .llseek         = noop_llseek,
543         .fallocate      = shm_fallocate,
544 };
545
546 /*
547  * shm_file_operations_huge is now identical to shm_file_operations,
548  * but we keep it distinct for the sake of is_file_shm_hugepages().
549  */
550 static const struct file_operations shm_file_operations_huge = {
551         .mmap           = shm_mmap,
552         .fsync          = shm_fsync,
553         .release        = shm_release,
554         .get_unmapped_area      = shm_get_unmapped_area,
555         .llseek         = noop_llseek,
556         .fallocate      = shm_fallocate,
557 };
558
559 bool is_file_shm_hugepages(struct file *file)
560 {
561         return file->f_op == &shm_file_operations_huge;
562 }
563
564 static const struct vm_operations_struct shm_vm_ops = {
565         .open   = shm_open,     /* callback for a new vm-area open */
566         .close  = shm_close,    /* callback for when the vm-area is released */
567         .fault  = shm_fault,
568         .split  = shm_split,
569         .pagesize = shm_pagesize,
570 #if defined(CONFIG_NUMA)
571         .set_policy = shm_set_policy,
572         .get_policy = shm_get_policy,
573 #endif
574 };
575
576 /**
577  * newseg - Create a new shared memory segment
578  * @ns: namespace
579  * @params: ptr to the structure that contains key, size and shmflg
580  *
581  * Called with shm_ids.rwsem held as a writer.
582  */
583 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
584 {
585         key_t key = params->key;
586         int shmflg = params->flg;
587         size_t size = params->u.size;
588         int error;
589         struct shmid_kernel *shp;
590         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
591         struct file *file;
592         char name[13];
593         vm_flags_t acctflag = 0;
594
595         if (size < SHMMIN || size > ns->shm_ctlmax)
596                 return -EINVAL;
597
598         if (numpages << PAGE_SHIFT < size)
599                 return -ENOSPC;
600
601         if (ns->shm_tot + numpages < ns->shm_tot ||
602                         ns->shm_tot + numpages > ns->shm_ctlall)
603                 return -ENOSPC;
604
605         shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
606         if (unlikely(!shp))
607                 return -ENOMEM;
608
609         shp->shm_perm.key = key;
610         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
611         shp->mlock_user = NULL;
612
613         shp->shm_perm.security = NULL;
614         error = security_shm_alloc(&shp->shm_perm);
615         if (error) {
616                 kvfree(shp);
617                 return error;
618         }
619
620         sprintf(name, "SYSV%08x", key);
621         if (shmflg & SHM_HUGETLB) {
622                 struct hstate *hs;
623                 size_t hugesize;
624
625                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
626                 if (!hs) {
627                         error = -EINVAL;
628                         goto no_file;
629                 }
630                 hugesize = ALIGN(size, huge_page_size(hs));
631
632                 /* hugetlb_file_setup applies strict accounting */
633                 if (shmflg & SHM_NORESERVE)
634                         acctflag = VM_NORESERVE;
635                 file = hugetlb_file_setup(name, hugesize, acctflag,
636                                   &shp->mlock_user, HUGETLB_SHMFS_INODE,
637                                 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
638         } else {
639                 /*
640                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
641                  * if it's asked for.
642                  */
643                 if  ((shmflg & SHM_NORESERVE) &&
644                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
645                         acctflag = VM_NORESERVE;
646                 file = shmem_kernel_file_setup(name, size, acctflag);
647         }
648         error = PTR_ERR(file);
649         if (IS_ERR(file))
650                 goto no_file;
651
652         shp->shm_cprid = get_pid(task_tgid(current));
653         shp->shm_lprid = NULL;
654         shp->shm_atim = shp->shm_dtim = 0;
655         shp->shm_ctim = ktime_get_real_seconds();
656         shp->shm_segsz = size;
657         shp->shm_nattch = 0;
658         shp->shm_file = file;
659         shp->shm_creator = current;
660
661         /* ipc_addid() locks shp upon success. */
662         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
663         if (error < 0)
664                 goto no_id;
665
666         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
667
668         /*
669          * shmid gets reported as "inode#" in /proc/pid/maps.
670          * proc-ps tools use this. Changing this will break them.
671          */
672         file_inode(file)->i_ino = shp->shm_perm.id;
673
674         ns->shm_tot += numpages;
675         error = shp->shm_perm.id;
676
677         ipc_unlock_object(&shp->shm_perm);
678         rcu_read_unlock();
679         return error;
680
681 no_id:
682         ipc_update_pid(&shp->shm_cprid, NULL);
683         ipc_update_pid(&shp->shm_lprid, NULL);
684         if (is_file_hugepages(file) && shp->mlock_user)
685                 user_shm_unlock(size, shp->mlock_user);
686         fput(file);
687 no_file:
688         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
689         return error;
690 }
691
692 /*
693  * Called with shm_ids.rwsem and ipcp locked.
694  */
695 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
696                                 struct ipc_params *params)
697 {
698         struct shmid_kernel *shp;
699
700         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
701         if (shp->shm_segsz < params->u.size)
702                 return -EINVAL;
703
704         return 0;
705 }
706
707 long ksys_shmget(key_t key, size_t size, int shmflg)
708 {
709         struct ipc_namespace *ns;
710         static const struct ipc_ops shm_ops = {
711                 .getnew = newseg,
712                 .associate = security_shm_associate,
713                 .more_checks = shm_more_checks,
714         };
715         struct ipc_params shm_params;
716
717         ns = current->nsproxy->ipc_ns;
718
719         shm_params.key = key;
720         shm_params.flg = shmflg;
721         shm_params.u.size = size;
722
723         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
724 }
725
726 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
727 {
728         return ksys_shmget(key, size, shmflg);
729 }
730
731 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
732 {
733         switch (version) {
734         case IPC_64:
735                 return copy_to_user(buf, in, sizeof(*in));
736         case IPC_OLD:
737             {
738                 struct shmid_ds out;
739
740                 memset(&out, 0, sizeof(out));
741                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
742                 out.shm_segsz   = in->shm_segsz;
743                 out.shm_atime   = in->shm_atime;
744                 out.shm_dtime   = in->shm_dtime;
745                 out.shm_ctime   = in->shm_ctime;
746                 out.shm_cpid    = in->shm_cpid;
747                 out.shm_lpid    = in->shm_lpid;
748                 out.shm_nattch  = in->shm_nattch;
749
750                 return copy_to_user(buf, &out, sizeof(out));
751             }
752         default:
753                 return -EINVAL;
754         }
755 }
756
757 static inline unsigned long
758 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
759 {
760         switch (version) {
761         case IPC_64:
762                 if (copy_from_user(out, buf, sizeof(*out)))
763                         return -EFAULT;
764                 return 0;
765         case IPC_OLD:
766             {
767                 struct shmid_ds tbuf_old;
768
769                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
770                         return -EFAULT;
771
772                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
773                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
774                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
775
776                 return 0;
777             }
778         default:
779                 return -EINVAL;
780         }
781 }
782
783 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
784 {
785         switch (version) {
786         case IPC_64:
787                 return copy_to_user(buf, in, sizeof(*in));
788         case IPC_OLD:
789             {
790                 struct shminfo out;
791
792                 if (in->shmmax > INT_MAX)
793                         out.shmmax = INT_MAX;
794                 else
795                         out.shmmax = (int)in->shmmax;
796
797                 out.shmmin      = in->shmmin;
798                 out.shmmni      = in->shmmni;
799                 out.shmseg      = in->shmseg;
800                 out.shmall      = in->shmall;
801
802                 return copy_to_user(buf, &out, sizeof(out));
803             }
804         default:
805                 return -EINVAL;
806         }
807 }
808
809 /*
810  * Calculate and add used RSS and swap pages of a shm.
811  * Called with shm_ids.rwsem held as a reader
812  */
813 static void shm_add_rss_swap(struct shmid_kernel *shp,
814         unsigned long *rss_add, unsigned long *swp_add)
815 {
816         struct inode *inode;
817
818         inode = file_inode(shp->shm_file);
819
820         if (is_file_hugepages(shp->shm_file)) {
821                 struct address_space *mapping = inode->i_mapping;
822                 struct hstate *h = hstate_file(shp->shm_file);
823                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
824         } else {
825 #ifdef CONFIG_SHMEM
826                 struct shmem_inode_info *info = SHMEM_I(inode);
827
828                 spin_lock_irq(&info->lock);
829                 *rss_add += inode->i_mapping->nrpages;
830                 *swp_add += info->swapped;
831                 spin_unlock_irq(&info->lock);
832 #else
833                 *rss_add += inode->i_mapping->nrpages;
834 #endif
835         }
836 }
837
838 /*
839  * Called with shm_ids.rwsem held as a reader
840  */
841 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
842                 unsigned long *swp)
843 {
844         int next_id;
845         int total, in_use;
846
847         *rss = 0;
848         *swp = 0;
849
850         in_use = shm_ids(ns).in_use;
851
852         for (total = 0, next_id = 0; total < in_use; next_id++) {
853                 struct kern_ipc_perm *ipc;
854                 struct shmid_kernel *shp;
855
856                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
857                 if (ipc == NULL)
858                         continue;
859                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
860
861                 shm_add_rss_swap(shp, rss, swp);
862
863                 total++;
864         }
865 }
866
867 /*
868  * This function handles some shmctl commands which require the rwsem
869  * to be held in write mode.
870  * NOTE: no locks must be held, the rwsem is taken inside this function.
871  */
872 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
873                        struct shmid64_ds *shmid64)
874 {
875         struct kern_ipc_perm *ipcp;
876         struct shmid_kernel *shp;
877         int err;
878
879         down_write(&shm_ids(ns).rwsem);
880         rcu_read_lock();
881
882         ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
883                                       &shmid64->shm_perm, 0);
884         if (IS_ERR(ipcp)) {
885                 err = PTR_ERR(ipcp);
886                 goto out_unlock1;
887         }
888
889         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
890
891         err = security_shm_shmctl(&shp->shm_perm, cmd);
892         if (err)
893                 goto out_unlock1;
894
895         switch (cmd) {
896         case IPC_RMID:
897                 ipc_lock_object(&shp->shm_perm);
898                 /* do_shm_rmid unlocks the ipc object and rcu */
899                 do_shm_rmid(ns, ipcp);
900                 goto out_up;
901         case IPC_SET:
902                 ipc_lock_object(&shp->shm_perm);
903                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
904                 if (err)
905                         goto out_unlock0;
906                 shp->shm_ctim = ktime_get_real_seconds();
907                 break;
908         default:
909                 err = -EINVAL;
910                 goto out_unlock1;
911         }
912
913 out_unlock0:
914         ipc_unlock_object(&shp->shm_perm);
915 out_unlock1:
916         rcu_read_unlock();
917 out_up:
918         up_write(&shm_ids(ns).rwsem);
919         return err;
920 }
921
922 static int shmctl_ipc_info(struct ipc_namespace *ns,
923                            struct shminfo64 *shminfo)
924 {
925         int err = security_shm_shmctl(NULL, IPC_INFO);
926         if (!err) {
927                 memset(shminfo, 0, sizeof(*shminfo));
928                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
929                 shminfo->shmmax = ns->shm_ctlmax;
930                 shminfo->shmall = ns->shm_ctlall;
931                 shminfo->shmmin = SHMMIN;
932                 down_read(&shm_ids(ns).rwsem);
933                 err = ipc_get_maxid(&shm_ids(ns));
934                 up_read(&shm_ids(ns).rwsem);
935                 if (err < 0)
936                         err = 0;
937         }
938         return err;
939 }
940
941 static int shmctl_shm_info(struct ipc_namespace *ns,
942                            struct shm_info *shm_info)
943 {
944         int err = security_shm_shmctl(NULL, SHM_INFO);
945         if (!err) {
946                 memset(shm_info, 0, sizeof(*shm_info));
947                 down_read(&shm_ids(ns).rwsem);
948                 shm_info->used_ids = shm_ids(ns).in_use;
949                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
950                 shm_info->shm_tot = ns->shm_tot;
951                 shm_info->swap_attempts = 0;
952                 shm_info->swap_successes = 0;
953                 err = ipc_get_maxid(&shm_ids(ns));
954                 up_read(&shm_ids(ns).rwsem);
955                 if (err < 0)
956                         err = 0;
957         }
958         return err;
959 }
960
961 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
962                         int cmd, struct shmid64_ds *tbuf)
963 {
964         struct shmid_kernel *shp;
965         int id = 0;
966         int err;
967
968         memset(tbuf, 0, sizeof(*tbuf));
969
970         rcu_read_lock();
971         if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
972                 shp = shm_obtain_object(ns, shmid);
973                 if (IS_ERR(shp)) {
974                         err = PTR_ERR(shp);
975                         goto out_unlock;
976                 }
977                 id = shp->shm_perm.id;
978         } else { /* IPC_STAT */
979                 shp = shm_obtain_object_check(ns, shmid);
980                 if (IS_ERR(shp)) {
981                         err = PTR_ERR(shp);
982                         goto out_unlock;
983                 }
984         }
985
986         /*
987          * Semantically SHM_STAT_ANY ought to be identical to
988          * that functionality provided by the /proc/sysvipc/
989          * interface. As such, only audit these calls and
990          * do not do traditional S_IRUGO permission checks on
991          * the ipc object.
992          */
993         if (cmd == SHM_STAT_ANY)
994                 audit_ipc_obj(&shp->shm_perm);
995         else {
996                 err = -EACCES;
997                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
998                         goto out_unlock;
999         }
1000
1001         err = security_shm_shmctl(&shp->shm_perm, cmd);
1002         if (err)
1003                 goto out_unlock;
1004
1005         ipc_lock_object(&shp->shm_perm);
1006
1007         if (!ipc_valid_object(&shp->shm_perm)) {
1008                 ipc_unlock_object(&shp->shm_perm);
1009                 err = -EIDRM;
1010                 goto out_unlock;
1011         }
1012
1013         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1014         tbuf->shm_segsz = shp->shm_segsz;
1015         tbuf->shm_atime = shp->shm_atim;
1016         tbuf->shm_dtime = shp->shm_dtim;
1017         tbuf->shm_ctime = shp->shm_ctim;
1018 #ifndef CONFIG_64BIT
1019         tbuf->shm_atime_high = shp->shm_atim >> 32;
1020         tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1021         tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1022 #endif
1023         tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1024         tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1025         tbuf->shm_nattch = shp->shm_nattch;
1026
1027         ipc_unlock_object(&shp->shm_perm);
1028         rcu_read_unlock();
1029         return id;
1030
1031 out_unlock:
1032         rcu_read_unlock();
1033         return err;
1034 }
1035
1036 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1037 {
1038         struct shmid_kernel *shp;
1039         struct file *shm_file;
1040         int err;
1041
1042         rcu_read_lock();
1043         shp = shm_obtain_object_check(ns, shmid);
1044         if (IS_ERR(shp)) {
1045                 err = PTR_ERR(shp);
1046                 goto out_unlock1;
1047         }
1048
1049         audit_ipc_obj(&(shp->shm_perm));
1050         err = security_shm_shmctl(&shp->shm_perm, cmd);
1051         if (err)
1052                 goto out_unlock1;
1053
1054         ipc_lock_object(&shp->shm_perm);
1055
1056         /* check if shm_destroy() is tearing down shp */
1057         if (!ipc_valid_object(&shp->shm_perm)) {
1058                 err = -EIDRM;
1059                 goto out_unlock0;
1060         }
1061
1062         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1063                 kuid_t euid = current_euid();
1064
1065                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1066                     !uid_eq(euid, shp->shm_perm.cuid)) {
1067                         err = -EPERM;
1068                         goto out_unlock0;
1069                 }
1070                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1071                         err = -EPERM;
1072                         goto out_unlock0;
1073                 }
1074         }
1075
1076         shm_file = shp->shm_file;
1077         if (is_file_hugepages(shm_file))
1078                 goto out_unlock0;
1079
1080         if (cmd == SHM_LOCK) {
1081                 struct user_struct *user = current_user();
1082
1083                 err = shmem_lock(shm_file, 1, user);
1084                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1085                         shp->shm_perm.mode |= SHM_LOCKED;
1086                         shp->mlock_user = user;
1087                 }
1088                 goto out_unlock0;
1089         }
1090
1091         /* SHM_UNLOCK */
1092         if (!(shp->shm_perm.mode & SHM_LOCKED))
1093                 goto out_unlock0;
1094         shmem_lock(shm_file, 0, shp->mlock_user);
1095         shp->shm_perm.mode &= ~SHM_LOCKED;
1096         shp->mlock_user = NULL;
1097         get_file(shm_file);
1098         ipc_unlock_object(&shp->shm_perm);
1099         rcu_read_unlock();
1100         shmem_unlock_mapping(shm_file->f_mapping);
1101
1102         fput(shm_file);
1103         return err;
1104
1105 out_unlock0:
1106         ipc_unlock_object(&shp->shm_perm);
1107 out_unlock1:
1108         rcu_read_unlock();
1109         return err;
1110 }
1111
1112 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1113 {
1114         int err, version;
1115         struct ipc_namespace *ns;
1116         struct shmid64_ds sem64;
1117
1118         if (cmd < 0 || shmid < 0)
1119                 return -EINVAL;
1120
1121         version = ipc_parse_version(&cmd);
1122         ns = current->nsproxy->ipc_ns;
1123
1124         switch (cmd) {
1125         case IPC_INFO: {
1126                 struct shminfo64 shminfo;
1127                 err = shmctl_ipc_info(ns, &shminfo);
1128                 if (err < 0)
1129                         return err;
1130                 if (copy_shminfo_to_user(buf, &shminfo, version))
1131                         err = -EFAULT;
1132                 return err;
1133         }
1134         case SHM_INFO: {
1135                 struct shm_info shm_info;
1136                 err = shmctl_shm_info(ns, &shm_info);
1137                 if (err < 0)
1138                         return err;
1139                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1140                         err = -EFAULT;
1141                 return err;
1142         }
1143         case SHM_STAT:
1144         case SHM_STAT_ANY:
1145         case IPC_STAT: {
1146                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1147                 if (err < 0)
1148                         return err;
1149                 if (copy_shmid_to_user(buf, &sem64, version))
1150                         err = -EFAULT;
1151                 return err;
1152         }
1153         case IPC_SET:
1154                 if (copy_shmid_from_user(&sem64, buf, version))
1155                         return -EFAULT;
1156                 /* fallthru */
1157         case IPC_RMID:
1158                 return shmctl_down(ns, shmid, cmd, &sem64);
1159         case SHM_LOCK:
1160         case SHM_UNLOCK:
1161                 return shmctl_do_lock(ns, shmid, cmd);
1162         default:
1163                 return -EINVAL;
1164         }
1165 }
1166
1167 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1168 {
1169         return ksys_shmctl(shmid, cmd, buf);
1170 }
1171
1172 #ifdef CONFIG_COMPAT
1173
1174 struct compat_shmid_ds {
1175         struct compat_ipc_perm shm_perm;
1176         int shm_segsz;
1177         compat_time_t shm_atime;
1178         compat_time_t shm_dtime;
1179         compat_time_t shm_ctime;
1180         compat_ipc_pid_t shm_cpid;
1181         compat_ipc_pid_t shm_lpid;
1182         unsigned short shm_nattch;
1183         unsigned short shm_unused;
1184         compat_uptr_t shm_unused2;
1185         compat_uptr_t shm_unused3;
1186 };
1187
1188 struct compat_shminfo64 {
1189         compat_ulong_t shmmax;
1190         compat_ulong_t shmmin;
1191         compat_ulong_t shmmni;
1192         compat_ulong_t shmseg;
1193         compat_ulong_t shmall;
1194         compat_ulong_t __unused1;
1195         compat_ulong_t __unused2;
1196         compat_ulong_t __unused3;
1197         compat_ulong_t __unused4;
1198 };
1199
1200 struct compat_shm_info {
1201         compat_int_t used_ids;
1202         compat_ulong_t shm_tot, shm_rss, shm_swp;
1203         compat_ulong_t swap_attempts, swap_successes;
1204 };
1205
1206 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1207                                         int version)
1208 {
1209         if (in->shmmax > INT_MAX)
1210                 in->shmmax = INT_MAX;
1211         if (version == IPC_64) {
1212                 struct compat_shminfo64 info;
1213                 memset(&info, 0, sizeof(info));
1214                 info.shmmax = in->shmmax;
1215                 info.shmmin = in->shmmin;
1216                 info.shmmni = in->shmmni;
1217                 info.shmseg = in->shmseg;
1218                 info.shmall = in->shmall;
1219                 return copy_to_user(buf, &info, sizeof(info));
1220         } else {
1221                 struct shminfo info;
1222                 memset(&info, 0, sizeof(info));
1223                 info.shmmax = in->shmmax;
1224                 info.shmmin = in->shmmin;
1225                 info.shmmni = in->shmmni;
1226                 info.shmseg = in->shmseg;
1227                 info.shmall = in->shmall;
1228                 return copy_to_user(buf, &info, sizeof(info));
1229         }
1230 }
1231
1232 static int put_compat_shm_info(struct shm_info *ip,
1233                                 struct compat_shm_info __user *uip)
1234 {
1235         struct compat_shm_info info;
1236
1237         memset(&info, 0, sizeof(info));
1238         info.used_ids = ip->used_ids;
1239         info.shm_tot = ip->shm_tot;
1240         info.shm_rss = ip->shm_rss;
1241         info.shm_swp = ip->shm_swp;
1242         info.swap_attempts = ip->swap_attempts;
1243         info.swap_successes = ip->swap_successes;
1244         return copy_to_user(uip, &info, sizeof(info));
1245 }
1246
1247 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1248                                         int version)
1249 {
1250         if (version == IPC_64) {
1251                 struct compat_shmid64_ds v;
1252                 memset(&v, 0, sizeof(v));
1253                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1254                 v.shm_atime      = lower_32_bits(in->shm_atime);
1255                 v.shm_atime_high = upper_32_bits(in->shm_atime);
1256                 v.shm_dtime      = lower_32_bits(in->shm_dtime);
1257                 v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1258                 v.shm_ctime      = lower_32_bits(in->shm_ctime);
1259                 v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1260                 v.shm_segsz = in->shm_segsz;
1261                 v.shm_nattch = in->shm_nattch;
1262                 v.shm_cpid = in->shm_cpid;
1263                 v.shm_lpid = in->shm_lpid;
1264                 return copy_to_user(buf, &v, sizeof(v));
1265         } else {
1266                 struct compat_shmid_ds v;
1267                 memset(&v, 0, sizeof(v));
1268                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1269                 v.shm_perm.key = in->shm_perm.key;
1270                 v.shm_atime = in->shm_atime;
1271                 v.shm_dtime = in->shm_dtime;
1272                 v.shm_ctime = in->shm_ctime;
1273                 v.shm_segsz = in->shm_segsz;
1274                 v.shm_nattch = in->shm_nattch;
1275                 v.shm_cpid = in->shm_cpid;
1276                 v.shm_lpid = in->shm_lpid;
1277                 return copy_to_user(buf, &v, sizeof(v));
1278         }
1279 }
1280
1281 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1282                                         int version)
1283 {
1284         memset(out, 0, sizeof(*out));
1285         if (version == IPC_64) {
1286                 struct compat_shmid64_ds __user *p = buf;
1287                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1288         } else {
1289                 struct compat_shmid_ds __user *p = buf;
1290                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1291         }
1292 }
1293
1294 long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
1295 {
1296         struct ipc_namespace *ns;
1297         struct shmid64_ds sem64;
1298         int version = compat_ipc_parse_version(&cmd);
1299         int err;
1300
1301         ns = current->nsproxy->ipc_ns;
1302
1303         if (cmd < 0 || shmid < 0)
1304                 return -EINVAL;
1305
1306         switch (cmd) {
1307         case IPC_INFO: {
1308                 struct shminfo64 shminfo;
1309                 err = shmctl_ipc_info(ns, &shminfo);
1310                 if (err < 0)
1311                         return err;
1312                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1313                         err = -EFAULT;
1314                 return err;
1315         }
1316         case SHM_INFO: {
1317                 struct shm_info shm_info;
1318                 err = shmctl_shm_info(ns, &shm_info);
1319                 if (err < 0)
1320                         return err;
1321                 if (put_compat_shm_info(&shm_info, uptr))
1322                         err = -EFAULT;
1323                 return err;
1324         }
1325         case IPC_STAT:
1326         case SHM_STAT_ANY:
1327         case SHM_STAT:
1328                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1329                 if (err < 0)
1330                         return err;
1331                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1332                         err = -EFAULT;
1333                 return err;
1334
1335         case IPC_SET:
1336                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1337                         return -EFAULT;
1338                 /* fallthru */
1339         case IPC_RMID:
1340                 return shmctl_down(ns, shmid, cmd, &sem64);
1341         case SHM_LOCK:
1342         case SHM_UNLOCK:
1343                 return shmctl_do_lock(ns, shmid, cmd);
1344                 break;
1345         default:
1346                 return -EINVAL;
1347         }
1348         return err;
1349 }
1350
1351 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1352 {
1353         return compat_ksys_shmctl(shmid, cmd, uptr);
1354 }
1355 #endif
1356
1357 /*
1358  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1359  *
1360  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1361  * "raddr" thing points to kernel space, and there has to be a wrapper around
1362  * this.
1363  */
1364 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1365               ulong *raddr, unsigned long shmlba)
1366 {
1367         struct shmid_kernel *shp;
1368         unsigned long addr = (unsigned long)shmaddr;
1369         unsigned long size;
1370         struct file *file, *base;
1371         int    err;
1372         unsigned long flags = MAP_SHARED;
1373         unsigned long prot;
1374         int acc_mode;
1375         struct ipc_namespace *ns;
1376         struct shm_file_data *sfd;
1377         int f_flags;
1378         unsigned long populate = 0;
1379
1380         err = -EINVAL;
1381         if (shmid < 0)
1382                 goto out;
1383
1384         if (addr) {
1385                 if (addr & (shmlba - 1)) {
1386                         if (shmflg & SHM_RND) {
1387                                 addr &= ~(shmlba - 1);  /* round down */
1388
1389                                 /*
1390                                  * Ensure that the round-down is non-nil
1391                                  * when remapping. This can happen for
1392                                  * cases when addr < shmlba.
1393                                  */
1394                                 if (!addr && (shmflg & SHM_REMAP))
1395                                         goto out;
1396                         } else
1397 #ifndef __ARCH_FORCE_SHMLBA
1398                                 if (addr & ~PAGE_MASK)
1399 #endif
1400                                         goto out;
1401                 }
1402
1403                 flags |= MAP_FIXED;
1404         } else if ((shmflg & SHM_REMAP))
1405                 goto out;
1406
1407         if (shmflg & SHM_RDONLY) {
1408                 prot = PROT_READ;
1409                 acc_mode = S_IRUGO;
1410                 f_flags = O_RDONLY;
1411         } else {
1412                 prot = PROT_READ | PROT_WRITE;
1413                 acc_mode = S_IRUGO | S_IWUGO;
1414                 f_flags = O_RDWR;
1415         }
1416         if (shmflg & SHM_EXEC) {
1417                 prot |= PROT_EXEC;
1418                 acc_mode |= S_IXUGO;
1419         }
1420
1421         /*
1422          * We cannot rely on the fs check since SYSV IPC does have an
1423          * additional creator id...
1424          */
1425         ns = current->nsproxy->ipc_ns;
1426         rcu_read_lock();
1427         shp = shm_obtain_object_check(ns, shmid);
1428         if (IS_ERR(shp)) {
1429                 err = PTR_ERR(shp);
1430                 goto out_unlock;
1431         }
1432
1433         err = -EACCES;
1434         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1435                 goto out_unlock;
1436
1437         err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1438         if (err)
1439                 goto out_unlock;
1440
1441         ipc_lock_object(&shp->shm_perm);
1442
1443         /* check if shm_destroy() is tearing down shp */
1444         if (!ipc_valid_object(&shp->shm_perm)) {
1445                 ipc_unlock_object(&shp->shm_perm);
1446                 err = -EIDRM;
1447                 goto out_unlock;
1448         }
1449
1450         /*
1451          * We need to take a reference to the real shm file to prevent the
1452          * pointer from becoming stale in cases where the lifetime of the outer
1453          * file extends beyond that of the shm segment.  It's not usually
1454          * possible, but it can happen during remap_file_pages() emulation as
1455          * that unmaps the memory, then does ->mmap() via file reference only.
1456          * We'll deny the ->mmap() if the shm segment was since removed, but to
1457          * detect shm ID reuse we need to compare the file pointers.
1458          */
1459         base = get_file(shp->shm_file);
1460         shp->shm_nattch++;
1461         size = i_size_read(file_inode(base));
1462         ipc_unlock_object(&shp->shm_perm);
1463         rcu_read_unlock();
1464
1465         err = -ENOMEM;
1466         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1467         if (!sfd) {
1468                 fput(base);
1469                 goto out_nattch;
1470         }
1471
1472         file = alloc_file_clone(base, f_flags,
1473                           is_file_hugepages(base) ?
1474                                 &shm_file_operations_huge :
1475                                 &shm_file_operations);
1476         err = PTR_ERR(file);
1477         if (IS_ERR(file)) {
1478                 kfree(sfd);
1479                 fput(base);
1480                 goto out_nattch;
1481         }
1482
1483         sfd->id = shp->shm_perm.id;
1484         sfd->ns = get_ipc_ns(ns);
1485         sfd->file = base;
1486         sfd->vm_ops = NULL;
1487         file->private_data = sfd;
1488
1489         err = security_mmap_file(file, prot, flags);
1490         if (err)
1491                 goto out_fput;
1492
1493         if (down_write_killable(&current->mm->mmap_sem)) {
1494                 err = -EINTR;
1495                 goto out_fput;
1496         }
1497
1498         if (addr && !(shmflg & SHM_REMAP)) {
1499                 err = -EINVAL;
1500                 if (addr + size < addr)
1501                         goto invalid;
1502
1503                 if (find_vma_intersection(current->mm, addr, addr + size))
1504                         goto invalid;
1505         }
1506
1507         addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1508         *raddr = addr;
1509         err = 0;
1510         if (IS_ERR_VALUE(addr))
1511                 err = (long)addr;
1512 invalid:
1513         up_write(&current->mm->mmap_sem);
1514         if (populate)
1515                 mm_populate(addr, populate);
1516
1517 out_fput:
1518         fput(file);
1519
1520 out_nattch:
1521         down_write(&shm_ids(ns).rwsem);
1522         shp = shm_lock(ns, shmid);
1523         shp->shm_nattch--;
1524         if (shm_may_destroy(ns, shp))
1525                 shm_destroy(ns, shp);
1526         else
1527                 shm_unlock(shp);
1528         up_write(&shm_ids(ns).rwsem);
1529         return err;
1530
1531 out_unlock:
1532         rcu_read_unlock();
1533 out:
1534         return err;
1535 }
1536
1537 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1538 {
1539         unsigned long ret;
1540         long err;
1541
1542         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1543         if (err)
1544                 return err;
1545         force_successful_syscall_return();
1546         return (long)ret;
1547 }
1548
1549 #ifdef CONFIG_COMPAT
1550
1551 #ifndef COMPAT_SHMLBA
1552 #define COMPAT_SHMLBA   SHMLBA
1553 #endif
1554
1555 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1556 {
1557         unsigned long ret;
1558         long err;
1559
1560         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1561         if (err)
1562                 return err;
1563         force_successful_syscall_return();
1564         return (long)ret;
1565 }
1566 #endif
1567
1568 /*
1569  * detach and kill segment if marked destroyed.
1570  * The work is done in shm_close.
1571  */
1572 long ksys_shmdt(char __user *shmaddr)
1573 {
1574         struct mm_struct *mm = current->mm;
1575         struct vm_area_struct *vma;
1576         unsigned long addr = (unsigned long)shmaddr;
1577         int retval = -EINVAL;
1578 #ifdef CONFIG_MMU
1579         loff_t size = 0;
1580         struct file *file;
1581         struct vm_area_struct *next;
1582 #endif
1583
1584         if (addr & ~PAGE_MASK)
1585                 return retval;
1586
1587         if (down_write_killable(&mm->mmap_sem))
1588                 return -EINTR;
1589
1590         /*
1591          * This function tries to be smart and unmap shm segments that
1592          * were modified by partial mlock or munmap calls:
1593          * - It first determines the size of the shm segment that should be
1594          *   unmapped: It searches for a vma that is backed by shm and that
1595          *   started at address shmaddr. It records it's size and then unmaps
1596          *   it.
1597          * - Then it unmaps all shm vmas that started at shmaddr and that
1598          *   are within the initially determined size and that are from the
1599          *   same shm segment from which we determined the size.
1600          * Errors from do_munmap are ignored: the function only fails if
1601          * it's called with invalid parameters or if it's called to unmap
1602          * a part of a vma. Both calls in this function are for full vmas,
1603          * the parameters are directly copied from the vma itself and always
1604          * valid - therefore do_munmap cannot fail. (famous last words?)
1605          */
1606         /*
1607          * If it had been mremap()'d, the starting address would not
1608          * match the usual checks anyway. So assume all vma's are
1609          * above the starting address given.
1610          */
1611         vma = find_vma(mm, addr);
1612
1613 #ifdef CONFIG_MMU
1614         while (vma) {
1615                 next = vma->vm_next;
1616
1617                 /*
1618                  * Check if the starting address would match, i.e. it's
1619                  * a fragment created by mprotect() and/or munmap(), or it
1620                  * otherwise it starts at this address with no hassles.
1621                  */
1622                 if ((vma->vm_ops == &shm_vm_ops) &&
1623                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1624
1625                         /*
1626                          * Record the file of the shm segment being
1627                          * unmapped.  With mremap(), someone could place
1628                          * page from another segment but with equal offsets
1629                          * in the range we are unmapping.
1630                          */
1631                         file = vma->vm_file;
1632                         size = i_size_read(file_inode(vma->vm_file));
1633                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1634                         /*
1635                          * We discovered the size of the shm segment, so
1636                          * break out of here and fall through to the next
1637                          * loop that uses the size information to stop
1638                          * searching for matching vma's.
1639                          */
1640                         retval = 0;
1641                         vma = next;
1642                         break;
1643                 }
1644                 vma = next;
1645         }
1646
1647         /*
1648          * We need look no further than the maximum address a fragment
1649          * could possibly have landed at. Also cast things to loff_t to
1650          * prevent overflows and make comparisons vs. equal-width types.
1651          */
1652         size = PAGE_ALIGN(size);
1653         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1654                 next = vma->vm_next;
1655
1656                 /* finding a matching vma now does not alter retval */
1657                 if ((vma->vm_ops == &shm_vm_ops) &&
1658                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1659                     (vma->vm_file == file))
1660                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1661                 vma = next;
1662         }
1663
1664 #else   /* CONFIG_MMU */
1665         /* under NOMMU conditions, the exact address to be destroyed must be
1666          * given
1667          */
1668         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1669                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1670                 retval = 0;
1671         }
1672
1673 #endif
1674
1675         up_write(&mm->mmap_sem);
1676         return retval;
1677 }
1678
1679 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1680 {
1681         return ksys_shmdt(shmaddr);
1682 }
1683
1684 #ifdef CONFIG_PROC_FS
1685 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1686 {
1687         struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1688         struct user_namespace *user_ns = seq_user_ns(s);
1689         struct kern_ipc_perm *ipcp = it;
1690         struct shmid_kernel *shp;
1691         unsigned long rss = 0, swp = 0;
1692
1693         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1694         shm_add_rss_swap(shp, &rss, &swp);
1695
1696 #if BITS_PER_LONG <= 32
1697 #define SIZE_SPEC "%10lu"
1698 #else
1699 #define SIZE_SPEC "%21lu"
1700 #endif
1701
1702         seq_printf(s,
1703                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1704                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1705                    SIZE_SPEC " " SIZE_SPEC "\n",
1706                    shp->shm_perm.key,
1707                    shp->shm_perm.id,
1708                    shp->shm_perm.mode,
1709                    shp->shm_segsz,
1710                    pid_nr_ns(shp->shm_cprid, pid_ns),
1711                    pid_nr_ns(shp->shm_lprid, pid_ns),
1712                    shp->shm_nattch,
1713                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1714                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1715                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1716                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1717                    shp->shm_atim,
1718                    shp->shm_dtim,
1719                    shp->shm_ctim,
1720                    rss * PAGE_SIZE,
1721                    swp * PAGE_SIZE);
1722
1723         return 0;
1724 }
1725 #endif