99bfb192803ffcbc22299503a48b6ae84f962f91
[muen/linux.git] / arch / x86 / mm / dump_pagetables.c
1 /*
2  * Debug helper to dump the current kernel pagetables of the system
3  * so that we can see what the various memory ranges are set to.
4  *
5  * (C) Copyright 2008 Intel Corporation
6  *
7  * Author: Arjan van de Ven <arjan@linux.intel.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; version 2
12  * of the License.
13  */
14
15 #include <linux/debugfs.h>
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/seq_file.h>
19
20 #include <asm/pgtable.h>
21
22 /*
23  * The dumper groups pagetable entries of the same type into one, and for
24  * that it needs to keep some state when walking, and flush this state
25  * when a "break" in the continuity is found.
26  */
27 struct pg_state {
28         int level;
29         pgprot_t current_prot;
30         unsigned long start_address;
31         unsigned long current_address;
32         const struct addr_marker *marker;
33         unsigned long lines;
34         bool to_dmesg;
35         bool check_wx;
36         unsigned long wx_pages;
37 };
38
39 struct addr_marker {
40         unsigned long start_address;
41         const char *name;
42         unsigned long max_lines;
43 };
44
45 /* indices for address_markers; keep sync'd w/ address_markers below */
46 enum address_markers_idx {
47         USER_SPACE_NR = 0,
48 #ifdef CONFIG_X86_64
49         KERNEL_SPACE_NR,
50         LOW_KERNEL_NR,
51         VMALLOC_START_NR,
52         VMEMMAP_START_NR,
53 # ifdef CONFIG_X86_ESPFIX64
54         ESPFIX_START_NR,
55 # endif
56         HIGH_KERNEL_NR,
57         MODULES_VADDR_NR,
58         MODULES_END_NR,
59 #else
60         KERNEL_SPACE_NR,
61         VMALLOC_START_NR,
62         VMALLOC_END_NR,
63 # ifdef CONFIG_HIGHMEM
64         PKMAP_BASE_NR,
65 # endif
66         FIXADDR_START_NR,
67 #endif
68 };
69
70 /* Address space markers hints */
71 static struct addr_marker address_markers[] = {
72         { 0, "User Space" },
73 #ifdef CONFIG_X86_64
74         { 0x8000000000000000UL, "Kernel Space" },
75         { PAGE_OFFSET,          "Low Kernel Mapping" },
76         { VMALLOC_START,        "vmalloc() Area" },
77         { VMEMMAP_START,        "Vmemmap" },
78 # ifdef CONFIG_X86_ESPFIX64
79         { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
80 # endif
81 # ifdef CONFIG_EFI
82         { EFI_VA_END,           "EFI Runtime Services" },
83 # endif
84         { __START_KERNEL_map,   "High Kernel Mapping" },
85         { MODULES_VADDR,        "Modules" },
86         { MODULES_END,          "End Modules" },
87 #else
88         { PAGE_OFFSET,          "Kernel Mapping" },
89         { 0/* VMALLOC_START */, "vmalloc() Area" },
90         { 0/*VMALLOC_END*/,     "vmalloc() End" },
91 # ifdef CONFIG_HIGHMEM
92         { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
93 # endif
94         { 0/*FIXADDR_START*/,   "Fixmap Area" },
95 #endif
96         { -1, NULL }            /* End of list */
97 };
98
99 /* Multipliers for offsets within the PTEs */
100 #define PTE_LEVEL_MULT (PAGE_SIZE)
101 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
102 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
103 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
104
105 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...)           \
106 ({                                                              \
107         if (to_dmesg)                                   \
108                 printk(KERN_INFO fmt, ##args);                  \
109         else                                                    \
110                 if (m)                                          \
111                         seq_printf(m, fmt, ##args);             \
112 })
113
114 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...)          \
115 ({                                                              \
116         if (to_dmesg)                                   \
117                 printk(KERN_CONT fmt, ##args);                  \
118         else                                                    \
119                 if (m)                                          \
120                         seq_printf(m, fmt, ##args);             \
121 })
122
123 /*
124  * Print a readable form of a pgprot_t to the seq_file
125  */
126 static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
127 {
128         pgprotval_t pr = pgprot_val(prot);
129         static const char * const level_name[] =
130                 { "cr3", "pgd", "pud", "pmd", "pte" };
131
132         if (!pgprot_val(prot)) {
133                 /* Not present */
134                 pt_dump_cont_printf(m, dmsg, "                              ");
135         } else {
136                 if (pr & _PAGE_USER)
137                         pt_dump_cont_printf(m, dmsg, "USR ");
138                 else
139                         pt_dump_cont_printf(m, dmsg, "    ");
140                 if (pr & _PAGE_RW)
141                         pt_dump_cont_printf(m, dmsg, "RW ");
142                 else
143                         pt_dump_cont_printf(m, dmsg, "ro ");
144                 if (pr & _PAGE_PWT)
145                         pt_dump_cont_printf(m, dmsg, "PWT ");
146                 else
147                         pt_dump_cont_printf(m, dmsg, "    ");
148                 if (pr & _PAGE_PCD)
149                         pt_dump_cont_printf(m, dmsg, "PCD ");
150                 else
151                         pt_dump_cont_printf(m, dmsg, "    ");
152
153                 /* Bit 7 has a different meaning on level 3 vs 4 */
154                 if (level <= 3 && pr & _PAGE_PSE)
155                         pt_dump_cont_printf(m, dmsg, "PSE ");
156                 else
157                         pt_dump_cont_printf(m, dmsg, "    ");
158                 if ((level == 4 && pr & _PAGE_PAT) ||
159                     ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
160                         pt_dump_cont_printf(m, dmsg, "PAT ");
161                 else
162                         pt_dump_cont_printf(m, dmsg, "    ");
163                 if (pr & _PAGE_GLOBAL)
164                         pt_dump_cont_printf(m, dmsg, "GLB ");
165                 else
166                         pt_dump_cont_printf(m, dmsg, "    ");
167                 if (pr & _PAGE_NX)
168                         pt_dump_cont_printf(m, dmsg, "NX ");
169                 else
170                         pt_dump_cont_printf(m, dmsg, "x  ");
171         }
172         pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
173 }
174
175 /*
176  * On 64 bits, sign-extend the 48 bit address to 64 bit
177  */
178 static unsigned long normalize_addr(unsigned long u)
179 {
180 #ifdef CONFIG_X86_64
181         return (signed long)(u << 16) >> 16;
182 #else
183         return u;
184 #endif
185 }
186
187 /*
188  * This function gets called on a break in a continuous series
189  * of PTE entries; the next one is different so we need to
190  * print what we collected so far.
191  */
192 static void note_page(struct seq_file *m, struct pg_state *st,
193                       pgprot_t new_prot, int level)
194 {
195         pgprotval_t prot, cur;
196         static const char units[] = "BKMGTPE";
197
198         /*
199          * If we have a "break" in the series, we need to flush the state that
200          * we have now. "break" is either changing perms, levels or
201          * address space marker.
202          */
203         prot = pgprot_val(new_prot);
204         cur = pgprot_val(st->current_prot);
205
206         if (!st->level) {
207                 /* First entry */
208                 st->current_prot = new_prot;
209                 st->level = level;
210                 st->marker = address_markers;
211                 st->lines = 0;
212                 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
213                                    st->marker->name);
214         } else if (prot != cur || level != st->level ||
215                    st->current_address >= st->marker[1].start_address) {
216                 const char *unit = units;
217                 unsigned long delta;
218                 int width = sizeof(unsigned long) * 2;
219                 pgprotval_t pr = pgprot_val(st->current_prot);
220
221                 if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
222                         WARN_ONCE(1,
223                                   "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
224                                   (void *)st->start_address,
225                                   (void *)st->start_address);
226                         st->wx_pages += (st->current_address -
227                                          st->start_address) / PAGE_SIZE;
228                 }
229
230                 /*
231                  * Now print the actual finished series
232                  */
233                 if (!st->marker->max_lines ||
234                     st->lines < st->marker->max_lines) {
235                         pt_dump_seq_printf(m, st->to_dmesg,
236                                            "0x%0*lx-0x%0*lx   ",
237                                            width, st->start_address,
238                                            width, st->current_address);
239
240                         delta = st->current_address - st->start_address;
241                         while (!(delta & 1023) && unit[1]) {
242                                 delta >>= 10;
243                                 unit++;
244                         }
245                         pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
246                                             delta, *unit);
247                         printk_prot(m, st->current_prot, st->level,
248                                     st->to_dmesg);
249                 }
250                 st->lines++;
251
252                 /*
253                  * We print markers for special areas of address space,
254                  * such as the start of vmalloc space etc.
255                  * This helps in the interpretation.
256                  */
257                 if (st->current_address >= st->marker[1].start_address) {
258                         if (st->marker->max_lines &&
259                             st->lines > st->marker->max_lines) {
260                                 unsigned long nskip =
261                                         st->lines - st->marker->max_lines;
262                                 pt_dump_seq_printf(m, st->to_dmesg,
263                                                    "... %lu entr%s skipped ... \n",
264                                                    nskip,
265                                                    nskip == 1 ? "y" : "ies");
266                         }
267                         st->marker++;
268                         st->lines = 0;
269                         pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
270                                            st->marker->name);
271                 }
272
273                 st->start_address = st->current_address;
274                 st->current_prot = new_prot;
275                 st->level = level;
276         }
277 }
278
279 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
280                                                         unsigned long P)
281 {
282         int i;
283         pte_t *start;
284         pgprotval_t prot;
285
286         start = (pte_t *) pmd_page_vaddr(addr);
287         for (i = 0; i < PTRS_PER_PTE; i++) {
288                 prot = pte_flags(*start);
289                 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
290                 note_page(m, st, __pgprot(prot), 4);
291                 start++;
292         }
293 }
294
295 #if PTRS_PER_PMD > 1
296
297 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
298                                                         unsigned long P)
299 {
300         int i;
301         pmd_t *start;
302         pgprotval_t prot;
303
304         start = (pmd_t *) pud_page_vaddr(addr);
305         for (i = 0; i < PTRS_PER_PMD; i++) {
306                 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
307                 if (!pmd_none(*start)) {
308                         if (pmd_large(*start) || !pmd_present(*start)) {
309                                 prot = pmd_flags(*start);
310                                 note_page(m, st, __pgprot(prot), 3);
311                         } else {
312                                 walk_pte_level(m, st, *start,
313                                                P + i * PMD_LEVEL_MULT);
314                         }
315                 } else
316                         note_page(m, st, __pgprot(0), 3);
317                 start++;
318         }
319 }
320
321 #else
322 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
323 #define pud_large(a) pmd_large(__pmd(pud_val(a)))
324 #define pud_none(a)  pmd_none(__pmd(pud_val(a)))
325 #endif
326
327 #if PTRS_PER_PUD > 1
328
329 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
330                                                         unsigned long P)
331 {
332         int i;
333         pud_t *start;
334         pgprotval_t prot;
335
336         start = (pud_t *) pgd_page_vaddr(addr);
337
338         for (i = 0; i < PTRS_PER_PUD; i++) {
339                 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
340                 if (!pud_none(*start)) {
341                         if (pud_large(*start) || !pud_present(*start)) {
342                                 prot = pud_flags(*start);
343                                 note_page(m, st, __pgprot(prot), 2);
344                         } else {
345                                 walk_pmd_level(m, st, *start,
346                                                P + i * PUD_LEVEL_MULT);
347                         }
348                 } else
349                         note_page(m, st, __pgprot(0), 2);
350
351                 start++;
352         }
353 }
354
355 #else
356 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
357 #define pgd_large(a) pud_large(__pud(pgd_val(a)))
358 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
359 #endif
360
361 static inline bool is_hypervisor_range(int idx)
362 {
363 #ifdef CONFIG_X86_64
364         /*
365          * ffff800000000000 - ffff87ffffffffff is reserved for
366          * the hypervisor.
367          */
368         return  (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
369                 (idx <  pgd_index(__PAGE_OFFSET));
370 #else
371         return false;
372 #endif
373 }
374
375 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
376                                        bool checkwx)
377 {
378 #ifdef CONFIG_X86_64
379         pgd_t *start = (pgd_t *) &init_level4_pgt;
380 #else
381         pgd_t *start = swapper_pg_dir;
382 #endif
383         pgprotval_t prot;
384         int i;
385         struct pg_state st = {};
386
387         if (pgd) {
388                 start = pgd;
389                 st.to_dmesg = true;
390         }
391
392         st.check_wx = checkwx;
393         if (checkwx)
394                 st.wx_pages = 0;
395
396         for (i = 0; i < PTRS_PER_PGD; i++) {
397                 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
398                 if (!pgd_none(*start) && !is_hypervisor_range(i)) {
399                         if (pgd_large(*start) || !pgd_present(*start)) {
400                                 prot = pgd_flags(*start);
401                                 note_page(m, &st, __pgprot(prot), 1);
402                         } else {
403                                 walk_pud_level(m, &st, *start,
404                                                i * PGD_LEVEL_MULT);
405                         }
406                 } else
407                         note_page(m, &st, __pgprot(0), 1);
408
409                 start++;
410         }
411
412         /* Flush out the last page */
413         st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
414         note_page(m, &st, __pgprot(0), 0);
415         if (!checkwx)
416                 return;
417         if (st.wx_pages)
418                 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
419                         st.wx_pages);
420         else
421                 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
422 }
423
424 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
425 {
426         ptdump_walk_pgd_level_core(m, pgd, false);
427 }
428 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
429
430 void ptdump_walk_pgd_level_checkwx(void)
431 {
432         ptdump_walk_pgd_level_core(NULL, NULL, true);
433 }
434
435 static int __init pt_dump_init(void)
436 {
437 #ifdef CONFIG_X86_32
438         /* Not a compile-time constant on x86-32 */
439         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
440         address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
441 # ifdef CONFIG_HIGHMEM
442         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
443 # endif
444         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
445 #endif
446
447         return 0;
448 }
449
450 __initcall(pt_dump_init);
451 MODULE_LICENSE("GPL");
452 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
453 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");