Merge tag 'trace-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[muen/linux.git] / kernel / events / uprobes.c
index 2bf792d..96d4bee 100644 (file)
@@ -73,6 +73,7 @@ struct uprobe {
        struct uprobe_consumer  *consumers;
        struct inode            *inode;         /* Also hold a ref to inode */
        loff_t                  offset;
+       loff_t                  ref_ctr_offset;
        unsigned long           flags;
 
        /*
@@ -88,6 +89,15 @@ struct uprobe {
        struct arch_uprobe      arch;
 };
 
+struct delayed_uprobe {
+       struct list_head list;
+       struct uprobe *uprobe;
+       struct mm_struct *mm;
+};
+
+static DEFINE_MUTEX(delayed_uprobe_lock);
+static LIST_HEAD(delayed_uprobe_list);
+
 /*
  * Execute out of line area: anonymous executable mapping installed
  * by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
        return 1;
 }
 
+static struct delayed_uprobe *
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
+{
+       struct delayed_uprobe *du;
+
+       list_for_each_entry(du, &delayed_uprobe_list, list)
+               if (du->uprobe == uprobe && du->mm == mm)
+                       return du;
+       return NULL;
+}
+
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
+{
+       struct delayed_uprobe *du;
+
+       if (delayed_uprobe_check(uprobe, mm))
+               return 0;
+
+       du  = kzalloc(sizeof(*du), GFP_KERNEL);
+       if (!du)
+               return -ENOMEM;
+
+       du->uprobe = uprobe;
+       du->mm = mm;
+       list_add(&du->list, &delayed_uprobe_list);
+       return 0;
+}
+
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
+{
+       if (WARN_ON(!du))
+               return;
+       list_del(&du->list);
+       kfree(du);
+}
+
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
+{
+       struct list_head *pos, *q;
+       struct delayed_uprobe *du;
+
+       if (!uprobe && !mm)
+               return;
+
+       list_for_each_safe(pos, q, &delayed_uprobe_list) {
+               du = list_entry(pos, struct delayed_uprobe, list);
+
+               if (uprobe && du->uprobe != uprobe)
+                       continue;
+               if (mm && du->mm != mm)
+                       continue;
+
+               delayed_uprobe_delete(du);
+       }
+}
+
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
+                             struct vm_area_struct *vma)
+{
+       unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
+
+       return uprobe->ref_ctr_offset &&
+               vma->vm_file &&
+               file_inode(vma->vm_file) == uprobe->inode &&
+               (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+               vma->vm_start <= vaddr &&
+               vma->vm_end > vaddr;
+}
+
+static struct vm_area_struct *
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
+{
+       struct vm_area_struct *tmp;
+
+       for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+               if (valid_ref_ctr_vma(uprobe, tmp))
+                       return tmp;
+
+       return NULL;
+}
+
+static int
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
+{
+       void *kaddr;
+       struct page *page;
+       struct vm_area_struct *vma;
+       int ret;
+       short *ptr;
+
+       if (!vaddr || !d)
+               return -EINVAL;
+
+       ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+                       FOLL_WRITE, &page, &vma, NULL);
+       if (unlikely(ret <= 0)) {
+               /*
+                * We are asking for 1 page. If get_user_pages_remote() fails,
+                * it may return 0, in that case we have to return error.
+                */
+               return ret == 0 ? -EBUSY : ret;
+       }
+
+       kaddr = kmap_atomic(page);
+       ptr = kaddr + (vaddr & ~PAGE_MASK);
+
+       if (unlikely(*ptr + d < 0)) {
+               pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
+                       "curr val: %d, delta: %d\n", vaddr, *ptr, d);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       *ptr += d;
+       ret = 0;
+out:
+       kunmap_atomic(kaddr);
+       put_page(page);
+       return ret;
+}
+
+static void update_ref_ctr_warn(struct uprobe *uprobe,
+                               struct mm_struct *mm, short d)
+{
+       pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+               "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+               d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
+               (unsigned long long) uprobe->offset,
+               (unsigned long long) uprobe->ref_ctr_offset, mm);
+}
+
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
+                         short d)
+{
+       struct vm_area_struct *rc_vma;
+       unsigned long rc_vaddr;
+       int ret = 0;
+
+       rc_vma = find_ref_ctr_vma(uprobe, mm);
+
+       if (rc_vma) {
+               rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
+               ret = __update_ref_ctr(mm, rc_vaddr, d);
+               if (ret)
+                       update_ref_ctr_warn(uprobe, mm, d);
+
+               if (d > 0)
+                       return ret;
+       }
+
+       mutex_lock(&delayed_uprobe_lock);
+       if (d > 0)
+               ret = delayed_uprobe_add(uprobe, mm);
+       else
+               delayed_uprobe_remove(uprobe, mm);
+       mutex_unlock(&delayed_uprobe_lock);
+
+       return ret;
+}
+
 /*
  * NOTE:
  * Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
 int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
                        unsigned long vaddr, uprobe_opcode_t opcode)
 {
+       struct uprobe *uprobe;
        struct page *old_page, *new_page;
        struct vm_area_struct *vma;
-       int ret;
+       int ret, is_register, ref_ctr_updated = 0;
+
+       is_register = is_swbp_insn(&opcode);
+       uprobe = container_of(auprobe, struct uprobe, arch);
 
 retry:
        /* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
        if (ret <= 0)
                goto put_old;
 
+       /* We are going to replace instruction, update ref_ctr. */
+       if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
+               ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
+               if (ret)
+                       goto put_old;
+
+               ref_ctr_updated = 1;
+       }
+
        ret = anon_vma_prepare(vma);
        if (ret)
                goto put_old;
@@ -337,6 +520,11 @@ put_old:
 
        if (unlikely(ret == -EAGAIN))
                goto retry;
+
+       /* Revert back reference counter if instruction update failed. */
+       if (ret && is_register && ref_ctr_updated)
+               update_ref_ctr(uprobe, mm, -1);
+
        return ret;
 }
 
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
 
 static void put_uprobe(struct uprobe *uprobe)
 {
-       if (atomic_dec_and_test(&uprobe->ref))
+       if (atomic_dec_and_test(&uprobe->ref)) {
+               /*
+                * If application munmap(exec_vma) before uprobe_unregister()
+                * gets called, we don't get a chance to remove uprobe from
+                * delayed_uprobe_list from remove_breakpoint(). Do it here.
+                */
+               delayed_uprobe_remove(uprobe, NULL);
                kfree(uprobe);
+       }
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,18 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
        return u;
 }
 
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+static void
+ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
+{
+       pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
+               "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
+               uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
+               (unsigned long long) cur_uprobe->ref_ctr_offset,
+               (unsigned long long) uprobe->ref_ctr_offset);
+}
+
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
+                                  loff_t ref_ctr_offset)
 {
        struct uprobe *uprobe, *cur_uprobe;
 
@@ -494,6 +700,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
        uprobe->inode = inode;
        uprobe->offset = offset;
+       uprobe->ref_ctr_offset = ref_ctr_offset;
        init_rwsem(&uprobe->register_rwsem);
        init_rwsem(&uprobe->consumer_rwsem);
 
@@ -501,6 +708,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
        cur_uprobe = insert_uprobe(uprobe);
        /* a uprobe exists for this inode:offset combination */
        if (cur_uprobe) {
+               if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
+                       ref_ctr_mismatch_warn(cur_uprobe, uprobe);
+                       put_uprobe(cur_uprobe);
+                       kfree(uprobe);
+                       return ERR_PTR(-EINVAL);
+               }
                kfree(uprobe);
                uprobe = cur_uprobe;
        }
@@ -895,7 +1108,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
  * else return 0 (success)
  */
 static int __uprobe_register(struct inode *inode, loff_t offset,
-                            struct uprobe_consumer *uc)
+                            loff_t ref_ctr_offset, struct uprobe_consumer *uc)
 {
        struct uprobe *uprobe;
        int ret;
@@ -912,9 +1125,12 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
                return -EINVAL;
 
  retry:
-       uprobe = alloc_uprobe(inode, offset);
+       uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
        if (!uprobe)
                return -ENOMEM;
+       if (IS_ERR(uprobe))
+               return PTR_ERR(uprobe);
+
        /*
         * We can race with uprobe_unregister()->delete_uprobe().
         * Check uprobe_is_active() and retry if it is false.
@@ -938,10 +1154,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
 int uprobe_register(struct inode *inode, loff_t offset,
                    struct uprobe_consumer *uc)
 {
-       return __uprobe_register(inode, offset, uc);
+       return __uprobe_register(inode, offset, 0, uc);
 }
 EXPORT_SYMBOL_GPL(uprobe_register);
 
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
+                          loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+       return __uprobe_register(inode, offset, ref_ctr_offset, uc);
+}
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
+
 /*
  * uprobe_apply - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
@@ -1060,6 +1283,35 @@ static void build_probe_list(struct inode *inode,
        spin_unlock(&uprobes_treelock);
 }
 
+/* @vma contains reference counter, not the probed instruction. */
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
+{
+       struct list_head *pos, *q;
+       struct delayed_uprobe *du;
+       unsigned long vaddr;
+       int ret = 0, err = 0;
+
+       mutex_lock(&delayed_uprobe_lock);
+       list_for_each_safe(pos, q, &delayed_uprobe_list) {
+               du = list_entry(pos, struct delayed_uprobe, list);
+
+               if (du->mm != vma->vm_mm ||
+                   !valid_ref_ctr_vma(du->uprobe, vma))
+                       continue;
+
+               vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
+               ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
+               if (ret) {
+                       update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
+                       if (!err)
+                               err = ret;
+               }
+               delayed_uprobe_delete(du);
+       }
+       mutex_unlock(&delayed_uprobe_lock);
+       return err;
+}
+
 /*
  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
  *
@@ -1072,7 +1324,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
        struct uprobe *uprobe, *u;
        struct inode *inode;
 
-       if (no_uprobe_events() || !valid_vma(vma, true))
+       if (no_uprobe_events())
+               return 0;
+
+       if (vma->vm_file &&
+           (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+           test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
+               delayed_ref_ctr_inc(vma);
+
+       if (!valid_vma(vma, true))
                return 0;
 
        inode = file_inode(vma->vm_file);
@@ -1246,6 +1506,10 @@ void uprobe_clear_state(struct mm_struct *mm)
 {
        struct xol_area *area = mm->uprobes_state.xol_area;
 
+       mutex_lock(&delayed_uprobe_lock);
+       delayed_uprobe_remove(NULL, mm);
+       mutex_unlock(&delayed_uprobe_lock);
+
        if (!area)
                return;