File: /usr/src/linux/arch/i386/mm/init.c

1     /*
2      *  linux/arch/i386/mm/init.c
3      *
4      *  Copyright (C) 1995  Linus Torvalds
5      *
6      *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7      */
8     
9     #include <linux/config.h>
10     #include <linux/signal.h>
11     #include <linux/sched.h>
12     #include <linux/kernel.h>
13     #include <linux/errno.h>
14     #include <linux/string.h>
15     #include <linux/types.h>
16     #include <linux/ptrace.h>
17     #include <linux/mman.h>
18     #include <linux/mm.h>
19     #include <linux/swap.h>
20     #include <linux/smp.h>
21     #include <linux/init.h>
22     #ifdef CONFIG_BLK_DEV_INITRD
23     #include <linux/blk.h>
24     #endif
25     #include <linux/highmem.h>
26     #include <linux/pagemap.h>
27     #include <linux/bootmem.h>
28     
29     #include <asm/processor.h>
30     #include <asm/system.h>
31     #include <asm/uaccess.h>
32     #include <asm/pgtable.h>
33     #include <asm/pgalloc.h>
34     #include <asm/dma.h>
35     #include <asm/fixmap.h>
36     #include <asm/e820.h>
37     #include <asm/apic.h>
38     #include <asm/tlb.h>
39     
40     mmu_gather_t mmu_gathers[NR_CPUS];
41     unsigned long highstart_pfn, highend_pfn;
42     static unsigned long totalram_pages;
43     static unsigned long totalhigh_pages;
44     
45     int do_check_pgt_cache(int low, int high)
46     {
47     	int freed = 0;
48     	if(pgtable_cache_size > high) {
49     		do {
50     			if (pgd_quicklist) {
51     				free_pgd_slow(get_pgd_fast());
52     				freed++;
53     			}
54     			if (pmd_quicklist) {
55     				pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
56     				freed++;
57     			}
58     			if (pte_quicklist) {
59     				pte_free_slow(pte_alloc_one_fast(NULL, 0));
60     				freed++;
61     			}
62     		} while(pgtable_cache_size > low);
63     	}
64     	return freed;
65     }
66     
67     /*
68      * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
69      * physical space so we can cache the place of the first one and move
70      * around without checking the pgd every time.
71      */
72     
73     #if CONFIG_HIGHMEM
74     pte_t *kmap_pte;
75     pgprot_t kmap_prot;
76     
77     #define kmap_get_fixmap_pte(vaddr)					\
78     	pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
79     
80     void __init kmap_init(void)
81     {
82     	unsigned long kmap_vstart;
83     
84     	/* cache the first kmap pte */
85     	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
86     	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
87     
88     	kmap_prot = PAGE_KERNEL;
89     }
90     #endif /* CONFIG_HIGHMEM */
91     
92     void show_mem(void)
93     {
94     	int i, total = 0, reserved = 0;
95     	int shared = 0, cached = 0;
96     	int highmem = 0;
97     
98     	printk("Mem-info:\n");
99     	show_free_areas();
100     	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
101     	i = max_mapnr;
102     	while (i-- > 0) {
103     		total++;
104     		if (PageHighMem(mem_map+i))
105     			highmem++;
106     		if (PageReserved(mem_map+i))
107     			reserved++;
108     		else if (PageSwapCache(mem_map+i))
109     			cached++;
110     		else if (page_count(mem_map+i))
111     			shared += page_count(mem_map+i) - 1;
112     	}
113     	printk("%d pages of RAM\n", total);
114     	printk("%d pages of HIGHMEM\n",highmem);
115     	printk("%d reserved pages\n",reserved);
116     	printk("%d pages shared\n",shared);
117     	printk("%d pages swap cached\n",cached);
118     	printk("%ld pages in page table cache\n",pgtable_cache_size);
119     	show_buffers();
120     }
121     
122     /* References to section boundaries */
123     
124     extern char _text, _etext, _edata, __bss_start, _end;
125     extern char __init_begin, __init_end;
126     
127     static inline void set_pte_phys (unsigned long vaddr,
128     			unsigned long phys, pgprot_t flags)
129     {
130     	pgprot_t prot;
131     	pgd_t *pgd;
132     	pmd_t *pmd;
133     	pte_t *pte;
134     
135     	pgd = swapper_pg_dir + __pgd_offset(vaddr);
136     	if (pgd_none(*pgd)) {
137     		printk("PAE BUG #00!\n");
138     		return;
139     	}
140     	pmd = pmd_offset(pgd, vaddr);
141     	if (pmd_none(*pmd)) {
142     		printk("PAE BUG #01!\n");
143     		return;
144     	}
145     	pte = pte_offset(pmd, vaddr);
146     	if (pte_val(*pte))
147     		pte_ERROR(*pte);
148     	pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
149     	set_pte(pte, mk_pte_phys(phys, prot));
150     
151     	/*
152     	 * It's enough to flush this one mapping.
153     	 * (PGE mappings get flushed as well)
154     	 */
155     	__flush_tlb_one(vaddr);
156     }
157     
158     void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
159     {
160     	unsigned long address = __fix_to_virt(idx);
161     
162     	if (idx >= __end_of_fixed_addresses) {
163     		printk("Invalid __set_fixmap\n");
164     		return;
165     	}
166     	set_pte_phys(address, phys, flags);
167     }
168     
169     static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
170     {
171     	pgd_t *pgd;
172     	pmd_t *pmd;
173     	pte_t *pte;
174     	int i, j;
175     	unsigned long vaddr;
176     
177     	vaddr = start;
178     	i = __pgd_offset(vaddr);
179     	j = __pmd_offset(vaddr);
180     	pgd = pgd_base + i;
181     
182     	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
183     #if CONFIG_X86_PAE
184     		if (pgd_none(*pgd)) {
185     			pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
186     			set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
187     			if (pmd != pmd_offset(pgd, 0))
188     				printk("PAE BUG #02!\n");
189     		}
190     		pmd = pmd_offset(pgd, vaddr);
191     #else
192     		pmd = (pmd_t *)pgd;
193     #endif
194     		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
195     			if (pmd_none(*pmd)) {
196     				pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
197     				set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
198     				if (pte != pte_offset(pmd, 0))
199     					BUG();
200     			}
201     			vaddr += PMD_SIZE;
202     		}
203     		j = 0;
204     	}
205     }
206     
207     static void __init pagetable_init (void)
208     {
209     	unsigned long vaddr, end;
210     	pgd_t *pgd, *pgd_base;
211     	int i, j, k;
212     	pmd_t *pmd;
213     	pte_t *pte, *pte_base;
214     
215     	/*
216     	 * This can be zero as well - no problem, in that case we exit
217     	 * the loops anyway due to the PTRS_PER_* conditions.
218     	 */
219     	end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
220     
221     	pgd_base = swapper_pg_dir;
222     #if CONFIG_X86_PAE
223     	for (i = 0; i < PTRS_PER_PGD; i++)
224     		set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page)));
225     #endif
226     	i = __pgd_offset(PAGE_OFFSET);
227     	pgd = pgd_base + i;
228     
229     	for (; i < PTRS_PER_PGD; pgd++, i++) {
230     		vaddr = i*PGDIR_SIZE;
231     		if (end && (vaddr >= end))
232     			break;
233     #if CONFIG_X86_PAE
234     		pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
235     		set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
236     #else
237     		pmd = (pmd_t *)pgd;
238     #endif
239     		if (pmd != pmd_offset(pgd, 0))
240     			BUG();
241     		for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
242     			vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
243     			if (end && (vaddr >= end))
244     				break;
245     			if (cpu_has_pse) {
246     				unsigned long __pe;
247     
248     				set_in_cr4(X86_CR4_PSE);
249     				boot_cpu_data.wp_works_ok = 1;
250     				__pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
251     				/* Make it "global" too if supported */
252     				if (cpu_has_pge) {
253     					set_in_cr4(X86_CR4_PGE);
254     					__pe += _PAGE_GLOBAL;
255     				}
256     				set_pmd(pmd, __pmd(__pe));
257     				continue;
258     			}
259     
260     			pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
261     
262     			for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
263     				vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
264     				if (end && (vaddr >= end))
265     					break;
266     				*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
267     			}
268     			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
269     			if (pte_base != pte_offset(pmd, 0))
270     				BUG();
271     
272     		}
273     	}
274     
275     	/*
276     	 * Fixed mappings, only the page table structure has to be
277     	 * created - mappings will be set by set_fixmap():
278     	 */
279     	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
280     	fixrange_init(vaddr, 0, pgd_base);
281     
282     #if CONFIG_HIGHMEM
283     	/*
284     	 * Permanent kmaps:
285     	 */
286     	vaddr = PKMAP_BASE;
287     	fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
288     
289     	pgd = swapper_pg_dir + __pgd_offset(vaddr);
290     	pmd = pmd_offset(pgd, vaddr);
291     	pte = pte_offset(pmd, vaddr);
292     	pkmap_page_table = pte;
293     #endif
294     
295     #if CONFIG_X86_PAE
296     	/*
297     	 * Add low memory identity-mappings - SMP needs it when
298     	 * starting up on an AP from real-mode. In the non-PAE
299     	 * case we already have these mappings through head.S.
300     	 * All user-space mappings are explicitly cleared after
301     	 * SMP startup.
302     	 */
303     	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
304     #endif
305     }
306     
307     void __init zap_low_mappings (void)
308     {
309     	int i;
310     	/*
311     	 * Zap initial low-memory mappings.
312     	 *
313     	 * Note that "pgd_clear()" doesn't do it for
314     	 * us, because pgd_clear() is a no-op on i386.
315     	 */
316     	for (i = 0; i < USER_PTRS_PER_PGD; i++)
317     #if CONFIG_X86_PAE
318     		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
319     #else
320     		set_pgd(swapper_pg_dir+i, __pgd(0));
321     #endif
322     	flush_tlb_all();
323     }
324     
325     /*
326      * paging_init() sets up the page tables - note that the first 8MB are
327      * already mapped by head.S.
328      *
329      * This routines also unmaps the page at virtual kernel address 0, so
330      * that we can trap those pesky NULL-reference errors in the kernel.
331      */
332     void __init paging_init(void)
333     {
334     	pagetable_init();
335     
336     	__asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
337     
338     #if CONFIG_X86_PAE
339     	/*
340     	 * We will bail out later - printk doesnt work right now so
341     	 * the user would just see a hanging kernel.
342     	 */
343     	if (cpu_has_pae)
344     		set_in_cr4(X86_CR4_PAE);
345     #endif
346     
347     	__flush_tlb_all();
348     
349     #ifdef CONFIG_HIGHMEM
350     	kmap_init();
351     #endif
352     	{
353     		unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
354     		unsigned int max_dma, high, low;
355     
356     		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
357     		low = max_low_pfn;
358     		high = highend_pfn;
359     
360     		if (low < max_dma)
361     			zones_size[ZONE_DMA] = low;
362     		else {
363     			zones_size[ZONE_DMA] = max_dma;
364     			zones_size[ZONE_NORMAL] = low - max_dma;
365     #ifdef CONFIG_HIGHMEM
366     			zones_size[ZONE_HIGHMEM] = high - low;
367     #endif
368     		}
369     		free_area_init(zones_size);
370     	}
371     	return;
372     }
373     
374     /*
375      * Test if the WP bit works in supervisor mode. It isn't supported on 386's
376      * and also on some strange 486's (NexGen etc.). All 586+'s are OK. The jumps
377      * before and after the test are here to work-around some nasty CPU bugs.
378      */
379     
380     /*
381      * This function cannot be __init, since exceptions don't work in that
382      * section.
383      */
384     static int do_test_wp_bit(unsigned long vaddr);
385     
386     void __init test_wp_bit(void)
387     {
388     /*
389      * Ok, all PSE-capable CPUs are definitely handling the WP bit right.
390      */
391     	const unsigned long vaddr = PAGE_OFFSET;
392     	pgd_t *pgd;
393     	pmd_t *pmd;
394     	pte_t *pte, old_pte;
395     
396     	printk("Checking if this processor honours the WP bit even in supervisor mode... ");
397     
398     	pgd = swapper_pg_dir + __pgd_offset(vaddr);
399     	pmd = pmd_offset(pgd, vaddr);
400     	pte = pte_offset(pmd, vaddr);
401     	old_pte = *pte;
402     	*pte = mk_pte_phys(0, PAGE_READONLY);
403     	local_flush_tlb();
404     
405     	boot_cpu_data.wp_works_ok = do_test_wp_bit(vaddr);
406     
407     	*pte = old_pte;
408     	local_flush_tlb();
409     
410     	if (!boot_cpu_data.wp_works_ok) {
411     		printk("No.\n");
412     #ifdef CONFIG_X86_WP_WORKS_OK
413     		panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
414     #endif
415     	} else {
416     		printk("Ok.\n");
417     	}
418     }
419     
420     static inline int page_is_ram (unsigned long pagenr)
421     {
422     	int i;
423     
424     	for (i = 0; i < e820.nr_map; i++) {
425     		unsigned long addr, end;
426     
427     		if (e820.map[i].type != E820_RAM)	/* not usable memory */
428     			continue;
429     		/*
430     		 *	!!!FIXME!!! Some BIOSen report areas as RAM that
431     		 *	are not. Notably the 640->1Mb area. We need a sanity
432     		 *	check here.
433     		 */
434     		addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
435     		end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
436     		if  ((pagenr >= addr) && (pagenr < end))
437     			return 1;
438     	}
439     	return 0;
440     }
441     
442     void __init mem_init(void)
443     {
444     	int codesize, reservedpages, datasize, initsize;
445     	int tmp;
446     
447     	if (!mem_map)
448     		BUG();
449     
450     #ifdef CONFIG_HIGHMEM
451     	highmem_start_page = mem_map + highstart_pfn;
452     	max_mapnr = num_physpages = highend_pfn;
453     #else
454     	max_mapnr = num_physpages = max_low_pfn;
455     #endif
456     	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
457     
458     	/* clear the zero-page */
459     	memset(empty_zero_page, 0, PAGE_SIZE);
460     
461     	/* this will put all low memory onto the freelists */
462     	totalram_pages += free_all_bootmem();
463     
464     	reservedpages = 0;
465     	for (tmp = 0; tmp < max_low_pfn; tmp++)
466     		/*
467     		 * Only count reserved RAM pages
468     		 */
469     		if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
470     			reservedpages++;
471     #ifdef CONFIG_HIGHMEM
472     	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
473     		struct page *page = mem_map + tmp;
474     
475     		if (!page_is_ram(tmp)) {
476     			SetPageReserved(page);
477     			continue;
478     		}
479     		ClearPageReserved(page);
480     		set_bit(PG_highmem, &page->flags);
481     		atomic_set(&page->count, 1);
482     		__free_page(page);
483     		totalhigh_pages++;
484     	}
485     	totalram_pages += totalhigh_pages;
486     #endif
487     	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
488     	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
489     	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
490     
491     	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
492     		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
493     		max_mapnr << (PAGE_SHIFT-10),
494     		codesize >> 10,
495     		reservedpages << (PAGE_SHIFT-10),
496     		datasize >> 10,
497     		initsize >> 10,
498     		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
499     	       );
500     
501     #if CONFIG_X86_PAE
502     	if (!cpu_has_pae)
503     		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
504     #endif
505     	if (boot_cpu_data.wp_works_ok < 0)
506     		test_wp_bit();
507     
508     	/*
509     	 * Subtle. SMP is doing it's boot stuff late (because it has to
510     	 * fork idle threads) - but it also needs low mappings for the
511     	 * protected-mode entry to work. We zap these entries only after
512     	 * the WP-bit has been tested.
513     	 */
514     #ifndef CONFIG_SMP
515     	zap_low_mappings();
516     #endif
517     
518     }
519     
520     /* Put this after the callers, so that it cannot be inlined */
521     static int do_test_wp_bit(unsigned long vaddr)
522     {
523     	char tmp_reg;
524     	int flag;
525     
526     	__asm__ __volatile__(
527     		"	movb %0,%1	\n"
528     		"1:	movb %1,%0	\n"
529     		"	xorl %2,%2	\n"
530     		"2:			\n"
531     		".section __ex_table,\"a\"\n"
532     		"	.align 4	\n"
533     		"	.long 1b,2b	\n"
534     		".previous		\n"
535     		:"=m" (*(char *) vaddr),
536     		 "=q" (tmp_reg),
537     		 "=r" (flag)
538     		:"2" (1)
539     		:"memory");
540     	
541     	return flag;
542     }
543     
544     void free_initmem(void)
545     {
546     	unsigned long addr;
547     
548     	addr = (unsigned long)(&__init_begin);
549     	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
550     		ClearPageReserved(virt_to_page(addr));
551     		set_page_count(virt_to_page(addr), 1);
552     		free_page(addr);
553     		totalram_pages++;
554     	}
555     	printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
556     }
557     
558     #ifdef CONFIG_BLK_DEV_INITRD
559     void free_initrd_mem(unsigned long start, unsigned long end)
560     {
561     	if (start < end)
562     		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
563     	for (; start < end; start += PAGE_SIZE) {
564     		ClearPageReserved(virt_to_page(start));
565     		set_page_count(virt_to_page(start), 1);
566     		free_page(start);
567     		totalram_pages++;
568     	}
569     }
570     #endif
571     
572     void si_meminfo(struct sysinfo *val)
573     {
574     	val->totalram = totalram_pages;
575     	val->sharedram = 0;
576     	val->freeram = nr_free_pages();
577     	val->bufferram = atomic_read(&buffermem_pages);
578     	val->totalhigh = totalhigh_pages;
579     	val->freehigh = nr_free_highpages();
580     	val->mem_unit = PAGE_SIZE;
581     	return;
582     }
583