Merge tag 'arch-removal' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm...
[muen/linux.git] / mm / percpu.c
index 79e3549cab0f40f30916499f2acca7622379fdc5..0b6480979ac78147d4a3fe33d8aded2d5e9a1a75 100644 (file)
@@ -80,6 +80,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/kmemleak.h>
+#include <linux/sched.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
 /**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
+ * @gfp: allocation flags
  *
  * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vzalloc() is used.  The returned
- * memory is always zeroed.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
+ * This is to facilitate passing through whitelisted flags.  The
+ * returned memory is always zeroed.
  *
  * RETURNS:
  * Pointer to the allocated area on success, NULL on failure.
  */
-static void *pcpu_mem_zalloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
 {
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;
 
        if (size <= PAGE_SIZE)
-               return kzalloc(size, GFP_KERNEL);
+               return kzalloc(size, gfp);
        else
-               return vzalloc(size);
+               return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
 }
 
 /**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
        return chunk;
 }
 
-static struct pcpu_chunk *pcpu_alloc_chunk(void)
+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
 {
        struct pcpu_chunk *chunk;
        int region_bits;
 
-       chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
+       chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
        if (!chunk)
                return NULL;
 
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
        region_bits = pcpu_chunk_map_bits(chunk);
 
        chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
-                                          sizeof(chunk->alloc_map[0]));
+                                          sizeof(chunk->alloc_map[0]), gfp);
        if (!chunk->alloc_map)
                goto alloc_map_fail;
 
        chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
-                                          sizeof(chunk->bound_map[0]));
+                                          sizeof(chunk->bound_map[0]), gfp);
        if (!chunk->bound_map)
                goto bound_map_fail;
 
        chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
-                                          sizeof(chunk->md_blocks[0]));
+                                          sizeof(chunk->md_blocks[0]), gfp);
        if (!chunk->md_blocks)
                goto md_blocks_fail;
 
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
  * pcpu_addr_to_page           - translate address to physical address
  * pcpu_verify_alloc_info      - check alloc_info is acceptable during init
  */
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static struct pcpu_chunk *pcpu_create_chunk(void);
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+                              int page_start, int page_end, gfp_t gfp);
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+                                 int page_start, int page_end);
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
 static struct page *pcpu_addr_to_page(void *addr);
 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                 gfp_t gfp)
 {
+       /* whitelisted flags that can be passed to the backing allocators */
+       gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
        bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
        bool do_warn = !(gfp & __GFP_NOWARN);
        static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                return NULL;
        }
 
-       if (!is_atomic)
-               mutex_lock(&pcpu_alloc_mutex);
+       if (!is_atomic) {
+               /*
+                * pcpu_balance_workfn() allocates memory under this mutex,
+                * and it may wait for memory reclaim. Allow current task
+                * to become OOM victim, in case of memory pressure.
+                */
+               if (gfp & __GFP_NOFAIL)
+                       mutex_lock(&pcpu_alloc_mutex);
+               else if (mutex_lock_killable(&pcpu_alloc_mutex))
+                       return NULL;
+       }
 
        spin_lock_irqsave(&pcpu_lock, flags);
 
@@ -1421,7 +1434,7 @@ restart:
        }
 
        if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
-               chunk = pcpu_create_chunk();
+               chunk = pcpu_create_chunk(pcpu_gfp);
                if (!chunk) {
                        err = "failed to allocate new chunk";
                        goto fail;
@@ -1450,7 +1463,7 @@ area_found:
                                           page_start, page_end) {
                        WARN_ON(chunk->immutable);
 
-                       ret = pcpu_populate_chunk(chunk, rs, re);
+                       ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
 
                        spin_lock_irqsave(&pcpu_lock, flags);
                        if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
  * pcpu_balance_workfn - manage the amount of free chunks and populated pages
  * @work: unused
  *
- * Reclaim all fully free chunks except for the first one.
+ * Reclaim all fully free chunks except for the first one.  This is also
+ * responsible for maintaining the pool of empty populated pages.  However,
+ * it is possible that this is called when physical memory is scarce causing
+ * OOM killer to be triggered.  We should avoid doing so until an actual
+ * allocation causes the failure as it is possible that requests can be
+ * serviced from already backed regions.
  */
 static void pcpu_balance_workfn(struct work_struct *work)
 {
+       /* gfp flags passed to underlying allocators */
+       const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
        LIST_HEAD(to_free);
        struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
        struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
                        spin_unlock_irq(&pcpu_lock);
                }
                pcpu_destroy_chunk(chunk);
+               cond_resched();
        }
 
        /*
@@ -1645,7 +1666,7 @@ retry_pop:
                                           chunk->nr_pages) {
                        int nr = min(re - rs, nr_to_pop);
 
-                       ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+                       ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
                        if (!ret) {
                                nr_to_pop -= nr;
                                spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
 
        if (nr_to_pop) {
                /* ran out of chunks to populate, create a new one and retry */
-               chunk = pcpu_create_chunk();
+               chunk = pcpu_create_chunk(gfp);
                if (chunk) {
                        spin_lock_irq(&pcpu_lock);
                        pcpu_chunk_relocate(chunk, -1);