mm/oom_kill.c: add tracepoints for oom reaper-related events
authorRoman Gushchin <guro@fb.com>
Mon, 10 Jul 2017 22:49:05 +0000 (15:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Jul 2017 23:32:32 +0000 (16:32 -0700)
During the debugging of the problem described in
https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa in
https://lkml.org/lkml/2017/5/19/383 , I've found that the existing debug
output is not really useful to understand issues related to the oom
reaper.

So, I assume, that adding some tracepoints might help with debugging of
similar issues.

Trace the following events:
 1) a process is marked as an oom victim,
 2) a process is added to the oom reaper list,
 3) the oom reaper starts reaping process's mm,
 4) the oom reaper finished reaping,
 5) the oom reaper skips reaping.

How it works in practice? Below is an example which show how the problem
mentioned above can be found: one process is added twice to the
oom_reaper list:

  $ cd /sys/kernel/debug/tracing
  $ echo "oom:mark_victim" > set_event
  $ echo "oom:wake_reaper" >> set_event
  $ echo "oom:skip_task_reaping" >> set_event
  $ echo "oom:start_task_reaping" >> set_event
  $ echo "oom:finish_task_reaping" >> set_event
  $ cat trace_pipe
          allocate-502   [001] ....    91.836405: mark_victim: pid=502
          allocate-502   [001] .N..    91.837356: wake_reaper: pid=502
          allocate-502   [000] .N..    91.871149: wake_reaper: pid=502
        oom_reaper-23    [000] ....    91.871177: start_task_reaping: pid=502
        oom_reaper-23    [000] .N..    91.879511: finish_task_reaping: pid=502
        oom_reaper-23    [000] ....    91.879580: skip_task_reaping: pid=502

Link: http://lkml.kernel.org/r/20170530185231.GA13412@castle
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/trace/events/oom.h
mm/oom_kill.c

index 38baeb27221a98163d29321af6ef23acca65672a..c3c19d47ae5e3a6c0616b5206f659a9640986aa5 100644 (file)
@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
                        __entry->wmark_check)
 );
 
                        __entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+       TP_PROTO(int pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __field(int, pid)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+       ),
+
+       TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+       TP_PROTO(int pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __field(int, pid)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+       ),
+
+       TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+       TP_PROTO(int pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __field(int, pid)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+       ),
+
+       TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+       TP_PROTO(int pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __field(int, pid)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+       ),
+
+       TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+       TP_PROTO(int pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __field(int, pid)
+       ),
+
+       TP_fast_assign(
+               __entry->pid = pid;
+       ),
+
+       TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 
index 0e2c925e7826fe302e5db43c5940237f1c6eb0d6..9e8b4f030c1c43cb92da706306e1b9390658af7b 100644 (file)
@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
        if (!down_read_trylock(&mm->mmap_sem)) {
                ret = false;
 
        if (!down_read_trylock(&mm->mmap_sem)) {
                ret = false;
+               trace_skip_task_reaping(tsk->pid);
                goto unlock_oom;
        }
 
                goto unlock_oom;
        }
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
         */
        if (!mmget_not_zero(mm)) {
                up_read(&mm->mmap_sem);
         */
        if (!mmget_not_zero(mm)) {
                up_read(&mm->mmap_sem);
+               trace_skip_task_reaping(tsk->pid);
                goto unlock_oom;
        }
 
                goto unlock_oom;
        }
 
+       trace_start_task_reaping(tsk->pid);
+
        /*
         * Tell all users of get_user/copy_from_user etc... that the content
         * is no longer stable. No barriers really needed because unmapping
        /*
         * Tell all users of get_user/copy_from_user etc... that the content
         * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
         * put the oom_reaper out of the way.
         */
        mmput_async(mm);
         * put the oom_reaper out of the way.
         */
        mmput_async(mm);
+       trace_finish_task_reaping(tsk->pid);
 unlock_oom:
        mutex_unlock(&oom_lock);
        return ret;
 unlock_oom:
        mutex_unlock(&oom_lock);
        return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
        tsk->oom_reaper_list = oom_reaper_list;
        oom_reaper_list = tsk;
        spin_unlock(&oom_reaper_lock);
        tsk->oom_reaper_list = oom_reaper_list;
        oom_reaper_list = tsk;
        spin_unlock(&oom_reaper_lock);
+       trace_wake_reaper(tsk->pid);
        wake_up(&oom_reaper_wait);
 }
 
        wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
         */
        __thaw_task(tsk);
        atomic_inc(&oom_victims);
         */
        __thaw_task(tsk);
        atomic_inc(&oom_victims);
+       trace_mark_victim(tsk->pid);
 }
 
 /**
 }
 
 /**