Merge tag 'stackleak-v4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[muen/linux.git] / kernel / fork.c
index 47911e4..07cddff 100644 (file)
@@ -224,9 +224,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
                return s->addr;
        }
 
+       /*
+        * Allocated stacks are cached and later reused by new threads,
+        * so memcg accounting is performed manually on assigning/releasing
+        * stacks to tasks. Drop __GFP_ACCOUNT.
+        */
        stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
                                     VMALLOC_START, VMALLOC_END,
-                                    THREADINFO_GFP,
+                                    THREADINFO_GFP & ~__GFP_ACCOUNT,
                                     PAGE_KERNEL,
                                     0, node, __builtin_return_address(0));
 
@@ -249,9 +254,19 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
-       if (task_stack_vm_area(tsk)) {
+       struct vm_struct *vm = task_stack_vm_area(tsk);
+
+       if (vm) {
                int i;
 
+               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+                       mod_memcg_page_state(vm->pages[i],
+                                            MEMCG_KERNEL_STACK_KB,
+                                            -(int)(PAGE_SIZE / 1024));
+
+                       memcg_kmem_uncharge(vm->pages[i], 0);
+               }
+
                for (i = 0; i < NR_CACHED_STACKS; i++) {
                        if (this_cpu_cmpxchg(cached_stacks[i],
                                        NULL, tsk->stack_vm_area) != NULL)
@@ -352,10 +367,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
                                            NR_KERNEL_STACK_KB,
                                            PAGE_SIZE / 1024 * account);
                }
-
-               /* All stack pages belong to the same memcg. */
-               mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
-                                    account * (THREAD_SIZE / 1024));
        } else {
                /*
                 * All stack pages are in the same zone and belong to the
@@ -371,6 +382,35 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
        }
 }
 
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+#ifdef CONFIG_VMAP_STACK
+       struct vm_struct *vm = task_stack_vm_area(tsk);
+       int ret;
+
+       if (vm) {
+               int i;
+
+               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+                       /*
+                        * If memcg_kmem_charge() fails, page->mem_cgroup
+                        * pointer is NULL, and both memcg_kmem_uncharge()
+                        * and mod_memcg_page_state() in free_thread_stack()
+                        * will ignore this page. So it's safe.
+                        */
+                       ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0);
+                       if (ret)
+                               return ret;
+
+                       mod_memcg_page_state(vm->pages[i],
+                                            MEMCG_KERNEL_STACK_KB,
+                                            PAGE_SIZE / 1024);
+               }
+       }
+#endif
+       return 0;
+}
+
 static void release_task_stack(struct task_struct *tsk)
 {
        if (WARN_ON(tsk->state != TASK_DEAD))
@@ -551,8 +591,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        goto out;
        }
        /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
 out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
@@ -809,6 +848,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
        if (!stack)
                goto free_tsk;
 
+       if (memcg_charge_kernel_stack(tsk))
+               goto free_stack;
+
        stack_vm_area = task_stack_vm_area(tsk);
 
        err = arch_dup_task_struct(tsk, orig);
@@ -1781,6 +1823,10 @@ static __latent_entropy struct task_struct *copy_process(
 
        p->default_timer_slack_ns = current->timer_slack_ns;
 
+#ifdef CONFIG_PSI
+       p->psi_flags = 0;
+#endif
+
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);