File: /usr/src/linux/arch/i386/mm/init.c
1 /*
2 * linux/arch/i386/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 */
8
9 #include <linux/config.h>
10 #include <linux/signal.h>
11 #include <linux/sched.h>
12 #include <linux/kernel.h>
13 #include <linux/errno.h>
14 #include <linux/string.h>
15 #include <linux/types.h>
16 #include <linux/ptrace.h>
17 #include <linux/mman.h>
18 #include <linux/mm.h>
19 #include <linux/swap.h>
20 #include <linux/smp.h>
21 #include <linux/init.h>
22 #ifdef CONFIG_BLK_DEV_INITRD
23 #include <linux/blk.h>
24 #endif
25 #include <linux/highmem.h>
26 #include <linux/pagemap.h>
27 #include <linux/bootmem.h>
28
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
34 #include <asm/dma.h>
35 #include <asm/fixmap.h>
36 #include <asm/e820.h>
37 #include <asm/apic.h>
38 #include <asm/tlb.h>
39
40 mmu_gather_t mmu_gathers[NR_CPUS];
41 unsigned long highstart_pfn, highend_pfn;
42 static unsigned long totalram_pages;
43 static unsigned long totalhigh_pages;
44
45 int do_check_pgt_cache(int low, int high)
46 {
47 int freed = 0;
48 if(pgtable_cache_size > high) {
49 do {
50 if (pgd_quicklist) {
51 free_pgd_slow(get_pgd_fast());
52 freed++;
53 }
54 if (pmd_quicklist) {
55 pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
56 freed++;
57 }
58 if (pte_quicklist) {
59 pte_free_slow(pte_alloc_one_fast(NULL, 0));
60 freed++;
61 }
62 } while(pgtable_cache_size > low);
63 }
64 return freed;
65 }
66
67 /*
68 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69 * physical space so we can cache the place of the first one and move
70 * around without checking the pgd every time.
71 */
72
73 #if CONFIG_HIGHMEM
74 pte_t *kmap_pte;
75 pgprot_t kmap_prot;
76
77 #define kmap_get_fixmap_pte(vaddr) \
78 pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
79
80 void __init kmap_init(void)
81 {
82 unsigned long kmap_vstart;
83
84 /* cache the first kmap pte */
85 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
86 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
87
88 kmap_prot = PAGE_KERNEL;
89 }
90 #endif /* CONFIG_HIGHMEM */
91
92 void show_mem(void)
93 {
94 int i, total = 0, reserved = 0;
95 int shared = 0, cached = 0;
96 int highmem = 0;
97
98 printk("Mem-info:\n");
99 show_free_areas();
100 printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
101 i = max_mapnr;
102 while (i-- > 0) {
103 total++;
104 if (PageHighMem(mem_map+i))
105 highmem++;
106 if (PageReserved(mem_map+i))
107 reserved++;
108 else if (PageSwapCache(mem_map+i))
109 cached++;
110 else if (page_count(mem_map+i))
111 shared += page_count(mem_map+i) - 1;
112 }
113 printk("%d pages of RAM\n", total);
114 printk("%d pages of HIGHMEM\n",highmem);
115 printk("%d reserved pages\n",reserved);
116 printk("%d pages shared\n",shared);
117 printk("%d pages swap cached\n",cached);
118 printk("%ld pages in page table cache\n",pgtable_cache_size);
119 show_buffers();
120 }
121
122 /* References to section boundaries */
123
124 extern char _text, _etext, _edata, __bss_start, _end;
125 extern char __init_begin, __init_end;
126
127 static inline void set_pte_phys (unsigned long vaddr,
128 unsigned long phys, pgprot_t flags)
129 {
130 pgprot_t prot;
131 pgd_t *pgd;
132 pmd_t *pmd;
133 pte_t *pte;
134
135 pgd = swapper_pg_dir + __pgd_offset(vaddr);
136 if (pgd_none(*pgd)) {
137 printk("PAE BUG #00!\n");
138 return;
139 }
140 pmd = pmd_offset(pgd, vaddr);
141 if (pmd_none(*pmd)) {
142 printk("PAE BUG #01!\n");
143 return;
144 }
145 pte = pte_offset(pmd, vaddr);
146 if (pte_val(*pte))
147 pte_ERROR(*pte);
148 pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
149 set_pte(pte, mk_pte_phys(phys, prot));
150
151 /*
152 * It's enough to flush this one mapping.
153 * (PGE mappings get flushed as well)
154 */
155 __flush_tlb_one(vaddr);
156 }
157
158 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
159 {
160 unsigned long address = __fix_to_virt(idx);
161
162 if (idx >= __end_of_fixed_addresses) {
163 printk("Invalid __set_fixmap\n");
164 return;
165 }
166 set_pte_phys(address, phys, flags);
167 }
168
169 static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
170 {
171 pgd_t *pgd;
172 pmd_t *pmd;
173 pte_t *pte;
174 int i, j;
175 unsigned long vaddr;
176
177 vaddr = start;
178 i = __pgd_offset(vaddr);
179 j = __pmd_offset(vaddr);
180 pgd = pgd_base + i;
181
182 for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
183 #if CONFIG_X86_PAE
184 if (pgd_none(*pgd)) {
185 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
186 set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
187 if (pmd != pmd_offset(pgd, 0))
188 printk("PAE BUG #02!\n");
189 }
190 pmd = pmd_offset(pgd, vaddr);
191 #else
192 pmd = (pmd_t *)pgd;
193 #endif
194 for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
195 if (pmd_none(*pmd)) {
196 pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
197 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
198 if (pte != pte_offset(pmd, 0))
199 BUG();
200 }
201 vaddr += PMD_SIZE;
202 }
203 j = 0;
204 }
205 }
206
207 static void __init pagetable_init (void)
208 {
209 unsigned long vaddr, end;
210 pgd_t *pgd, *pgd_base;
211 int i, j, k;
212 pmd_t *pmd;
213 pte_t *pte, *pte_base;
214
215 /*
216 * This can be zero as well - no problem, in that case we exit
217 * the loops anyway due to the PTRS_PER_* conditions.
218 */
219 end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
220
221 pgd_base = swapper_pg_dir;
222 #if CONFIG_X86_PAE
223 for (i = 0; i < PTRS_PER_PGD; i++)
224 set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page)));
225 #endif
226 i = __pgd_offset(PAGE_OFFSET);
227 pgd = pgd_base + i;
228
229 for (; i < PTRS_PER_PGD; pgd++, i++) {
230 vaddr = i*PGDIR_SIZE;
231 if (end && (vaddr >= end))
232 break;
233 #if CONFIG_X86_PAE
234 pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
235 set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
236 #else
237 pmd = (pmd_t *)pgd;
238 #endif
239 if (pmd != pmd_offset(pgd, 0))
240 BUG();
241 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
242 vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
243 if (end && (vaddr >= end))
244 break;
245 if (cpu_has_pse) {
246 unsigned long __pe;
247
248 set_in_cr4(X86_CR4_PSE);
249 boot_cpu_data.wp_works_ok = 1;
250 __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
251 /* Make it "global" too if supported */
252 if (cpu_has_pge) {
253 set_in_cr4(X86_CR4_PGE);
254 __pe += _PAGE_GLOBAL;
255 }
256 set_pmd(pmd, __pmd(__pe));
257 continue;
258 }
259
260 pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
261
262 for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
263 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
264 if (end && (vaddr >= end))
265 break;
266 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
267 }
268 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
269 if (pte_base != pte_offset(pmd, 0))
270 BUG();
271
272 }
273 }
274
275 /*
276 * Fixed mappings, only the page table structure has to be
277 * created - mappings will be set by set_fixmap():
278 */
279 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
280 fixrange_init(vaddr, 0, pgd_base);
281
282 #if CONFIG_HIGHMEM
283 /*
284 * Permanent kmaps:
285 */
286 vaddr = PKMAP_BASE;
287 fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
288
289 pgd = swapper_pg_dir + __pgd_offset(vaddr);
290 pmd = pmd_offset(pgd, vaddr);
291 pte = pte_offset(pmd, vaddr);
292 pkmap_page_table = pte;
293 #endif
294
295 #if CONFIG_X86_PAE
296 /*
297 * Add low memory identity-mappings - SMP needs it when
298 * starting up on an AP from real-mode. In the non-PAE
299 * case we already have these mappings through head.S.
300 * All user-space mappings are explicitly cleared after
301 * SMP startup.
302 */
303 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
304 #endif
305 }
306
307 void __init zap_low_mappings (void)
308 {
309 int i;
310 /*
311 * Zap initial low-memory mappings.
312 *
313 * Note that "pgd_clear()" doesn't do it for
314 * us, because pgd_clear() is a no-op on i386.
315 */
316 for (i = 0; i < USER_PTRS_PER_PGD; i++)
317 #if CONFIG_X86_PAE
318 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
319 #else
320 set_pgd(swapper_pg_dir+i, __pgd(0));
321 #endif
322 flush_tlb_all();
323 }
324
325 /*
326 * paging_init() sets up the page tables - note that the first 8MB are
327 * already mapped by head.S.
328 *
329 * This routines also unmaps the page at virtual kernel address 0, so
330 * that we can trap those pesky NULL-reference errors in the kernel.
331 */
332 void __init paging_init(void)
333 {
334 pagetable_init();
335
336 __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
337
338 #if CONFIG_X86_PAE
339 /*
340 * We will bail out later - printk doesnt work right now so
341 * the user would just see a hanging kernel.
342 */
343 if (cpu_has_pae)
344 set_in_cr4(X86_CR4_PAE);
345 #endif
346
347 __flush_tlb_all();
348
349 #ifdef CONFIG_HIGHMEM
350 kmap_init();
351 #endif
352 {
353 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
354 unsigned int max_dma, high, low;
355
356 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
357 low = max_low_pfn;
358 high = highend_pfn;
359
360 if (low < max_dma)
361 zones_size[ZONE_DMA] = low;
362 else {
363 zones_size[ZONE_DMA] = max_dma;
364 zones_size[ZONE_NORMAL] = low - max_dma;
365 #ifdef CONFIG_HIGHMEM
366 zones_size[ZONE_HIGHMEM] = high - low;
367 #endif
368 }
369 free_area_init(zones_size);
370 }
371 return;
372 }
373
374 /*
375 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
376 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
377 * before and after the test are here to work-around some nasty CPU bugs.
378 */
379
380 /*
381 * This function cannot be __init, since exceptions don't work in that
382 * section.
383 */
384 static int do_test_wp_bit(unsigned long vaddr);
385
386 void __init test_wp_bit(void)
387 {
388 /*
389 * Ok, all PSE-capable CPUs are definitely handling the WP bit right.
390 */
391 const unsigned long vaddr = PAGE_OFFSET;
392 pgd_t *pgd;
393 pmd_t *pmd;
394 pte_t *pte, old_pte;
395
396 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
397
398 pgd = swapper_pg_dir + __pgd_offset(vaddr);
399 pmd = pmd_offset(pgd, vaddr);
400 pte = pte_offset(pmd, vaddr);
401 old_pte = *pte;
402 *pte = mk_pte_phys(0, PAGE_READONLY);
403 local_flush_tlb();
404
405 boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
406
407 *pte = old_pte;
408 local_flush_tlb();
409
410 if (!boot_cpu_data.wp_works_ok) {
411 printk("No.\n");
412 #ifdef CONFIG_X86_WP_WORKS_OK
413 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
414 #endif
415 } else {
416 printk("Ok.\n");
417 }
418 }
419
420 static inline int page_is_ram (unsigned long pagenr)
421 {
422 int i;
423
424 for (i = 0; i < e820.nr_map; i++) {
425 unsigned long addr, end;
426
427 if (e820.map[i].type != E820_RAM) /* not usable memory */
428 continue;
429 /*
430 * !!!FIXME!!! Some BIOSen report areas as RAM that
431 * are not. Notably the 640->1Mb area. We need a sanity
432 * check here.
433 */
434 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
435 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
436 if ((pagenr >= addr) && (pagenr < end))
437 return 1;
438 }
439 return 0;
440 }
441
442 void __init mem_init(void)
443 {
444 int codesize, reservedpages, datasize, initsize;
445 int tmp;
446
447 if (!mem_map)
448 BUG();
449
450 #ifdef CONFIG_HIGHMEM
451 highmem_start_page = mem_map + highstart_pfn;
452 max_mapnr = num_physpages = highend_pfn;
453 #else
454 max_mapnr = num_physpages = max_low_pfn;
455 #endif
456 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
457
458 /* clear the zero-page */
459 memset(empty_zero_page, 0, PAGE_SIZE);
460
461 /* this will put all low memory onto the freelists */
462 totalram_pages += free_all_bootmem();
463
464 reservedpages = 0;
465 for (tmp = 0; tmp < max_low_pfn; tmp++)
466 /*
467 * Only count reserved RAM pages
468 */
469 if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
470 reservedpages++;
471 #ifdef CONFIG_HIGHMEM
472 for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
473 struct page *page = mem_map + tmp;
474
475 if (!page_is_ram(tmp)) {
476 SetPageReserved(page);
477 continue;
478 }
479 ClearPageReserved(page);
480 set_bit(PG_highmem, &page->flags);
481 atomic_set(&page->count, 1);
482 __free_page(page);
483 totalhigh_pages++;
484 }
485 totalram_pages += totalhigh_pages;
486 #endif
487 codesize = (unsigned long) &_etext - (unsigned long) &_text;
488 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
489 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
490
491 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
492 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
493 max_mapnr << (PAGE_SHIFT-10),
494 codesize >> 10,
495 reservedpages << (PAGE_SHIFT-10),
496 datasize >> 10,
497 initsize >> 10,
498 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
499 );
500
501 #if CONFIG_X86_PAE
502 if (!cpu_has_pae)
503 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
504 #endif
505 if (boot_cpu_data.wp_works_ok < 0)
506 test_wp_bit();
507
508 /*
509 * Subtle. SMP is doing it's boot stuff late (because it has to
510 * fork idle threads) - but it also needs low mappings for the
511 * protected-mode entry to work. We zap these entries only after
512 * the WP-bit has been tested.
513 */
514 #ifndef CONFIG_SMP
515 zap_low_mappings();
516 #endif
517
518 }
519
520 /* Put this after the callers, so that it cannot be inlined */
521 static int do_test_wp_bit(unsigned long vaddr)
522 {
523 char tmp_reg;
524 int flag;
525
526 __asm__ __volatile__(
527 " movb %0,%1 \n"
528 "1: movb %1,%0 \n"
529 " xorl %2,%2 \n"
530 "2: \n"
531 ".section __ex_table,\"a\"\n"
532 " .align 4 \n"
533 " .long 1b,2b \n"
534 ".previous \n"
535 :"=m" (*(char *) vaddr),
536 "=q" (tmp_reg),
537 "=r" (flag)
538 :"2" (1)
539 :"memory");
540
541 return flag;
542 }
543
544 void free_initmem(void)
545 {
546 unsigned long addr;
547
548 addr = (unsigned long)(&__init_begin);
549 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
550 ClearPageReserved(virt_to_page(addr));
551 set_page_count(virt_to_page(addr), 1);
552 free_page(addr);
553 totalram_pages++;
554 }
555 printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
556 }
557
558 #ifdef CONFIG_BLK_DEV_INITRD
559 void free_initrd_mem(unsigned long start, unsigned long end)
560 {
561 if (start < end)
562 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
563 for (; start < end; start += PAGE_SIZE) {
564 ClearPageReserved(virt_to_page(start));
565 set_page_count(virt_to_page(start), 1);
566 free_page(start);
567 totalram_pages++;
568 }
569 }
570 #endif
571
572 void si_meminfo(struct sysinfo *val)
573 {
574 val->totalram = totalram_pages;
575 val->sharedram = 0;
576 val->freeram = nr_free_pages();
577 val->bufferram = atomic_read(&buffermem_pages);
578 val->totalhigh = totalhigh_pages;
579 val->freehigh = nr_free_highpages();
580 val->mem_unit = PAGE_SIZE;
581 return;
582 }
583