File: /usr/src/linux/arch/ia64/mm/init.c

1     /*
2      * Initialize MMU support.
3      *
4      * Copyright (C) 1998-2001 Hewlett-Packard Co
5      * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
6      */
7     #include <linux/config.h>
8     #include <linux/kernel.h>
9     #include <linux/init.h>
10     
11     #include <linux/bootmem.h>
12     #include <linux/mm.h>
13     #include <linux/reboot.h>
14     #include <linux/slab.h>
15     #include <linux/swap.h>
16     
17     #include <asm/bitops.h>
18     #include <asm/dma.h>
19     #include <asm/efi.h>
20     #include <asm/ia32.h>
21     #include <asm/io.h>
22     #include <asm/machvec.h>
23     #include <asm/pgalloc.h>
24     #include <asm/sal.h>
25     #include <asm/system.h>
26     #include <asm/uaccess.h>
27     #include <asm/tlb.h>
28     
29     mmu_gather_t mmu_gathers[NR_CPUS];
30     
31     /* References to section boundaries: */
32     extern char _stext, _etext, _edata, __init_begin, __init_end;
33     
34     extern void ia64_tlb_init (void);
35     
36     unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
37     
38     static unsigned long totalram_pages;
39     
40     int
41     do_check_pgt_cache (int low, int high)
42     {
43     	int freed = 0;
44     
45     	if (pgtable_cache_size > high) {
46     		do {
47     			if (pgd_quicklist)
48     				free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed;
49     			if (pmd_quicklist)
50     				free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed;
51     			if (pte_quicklist)
52     				free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed;
53     		} while (pgtable_cache_size > low);
54     	}
55     	return freed;
56     }
57     
58     /*
59      * This performs some platform-dependent address space initialization.
60      * On IA-64, we want to setup the VM area for the register backing
61      * store (which grows upwards) and install the gateway page which is
62      * used for signal trampolines, etc.
63      */
64     void
65     ia64_init_addr_space (void)
66     {
67     	struct vm_area_struct *vma;
68     
69     	/*
70     	 * If we're out of memory and kmem_cache_alloc() returns NULL,
71     	 * we simply ignore the problem.  When the process attempts to
72     	 * write to the register backing store for the first time, it
73     	 * will get a SEGFAULT in this case.
74     	 */
75     	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
76     	if (vma) {
77     		vma->vm_mm = current->mm;
78     		vma->vm_start = IA64_RBS_BOT;
79     		vma->vm_end = vma->vm_start + PAGE_SIZE;
80     		vma->vm_page_prot = PAGE_COPY;
81     		vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
82     		vma->vm_ops = NULL;
83     		vma->vm_pgoff = 0;
84     		vma->vm_file = NULL;
85     		vma->vm_private_data = NULL;
86     		insert_vm_struct(current->mm, vma);
87     	}
88     }
89     
90     void
91     free_initmem (void)
92     {
93     	unsigned long addr;
94     
95     	addr = (unsigned long) &__init_begin;
96     	for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
97     		clear_bit(PG_reserved, &virt_to_page(addr)->flags);
98     		set_page_count(virt_to_page(addr), 1);
99     		free_page(addr);
100     		++totalram_pages;
101     	}
102     	printk ("Freeing unused kernel memory: %ldkB freed\n",
103     		(&__init_end - &__init_begin) >> 10);
104     }
105     
106     void
107     free_initrd_mem(unsigned long start, unsigned long end)
108     {
109     	/*
110     	 * EFI uses 4KB pages while the kernel can use 4KB  or bigger.
111     	 * Thus EFI and the kernel may have different page sizes. It is
112     	 * therefore possible to have the initrd share the same page as
113     	 * the end of the kernel (given current setup).
114     	 *
115     	 * To avoid freeing/using the wrong page (kernel sized) we:
116     	 *	- align up the beginning of initrd
117     	 *	- align down the end of initrd
118     	 *
119     	 *  |             |
120     	 *  |=============| a000
121     	 *  |             |
122     	 *  |             |
123     	 *  |             | 9000
124     	 *  |/////////////|
125     	 *  |/////////////|
126     	 *  |=============| 8000
127     	 *  |///INITRD////|
128     	 *  |/////////////|
129     	 *  |/////////////| 7000
130     	 *  |             |
131     	 *  |KKKKKKKKKKKKK|
132     	 *  |=============| 6000
133     	 *  |KKKKKKKKKKKKK|
134     	 *  |KKKKKKKKKKKKK|
135     	 *  K=kernel using 8KB pages
136     	 *
137     	 * In this example, we must free page 8000 ONLY. So we must align up
138     	 * initrd_start and keep initrd_end as is.
139     	 */
140     	start = PAGE_ALIGN(start);
141     	end = end & PAGE_MASK;
142     
143     	if (start < end)
144     		printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
145     
146     	for (; start < end; start += PAGE_SIZE) {
147     		if (!VALID_PAGE(virt_to_page(start)))
148     			continue;
149     		clear_bit(PG_reserved, &virt_to_page(start)->flags);
150     		set_page_count(virt_to_page(start), 1);
151     		free_page(start);
152     		++totalram_pages;
153     	}
154     }
155     
156     void
157     si_meminfo (struct sysinfo *val)
158     {
159     	val->totalram = totalram_pages;
160     	val->sharedram = 0;
161     	val->freeram = nr_free_pages();
162     	val->bufferram = atomic_read(&buffermem_pages);
163     	val->totalhigh = 0;
164     	val->freehigh = 0;
165     	val->mem_unit = PAGE_SIZE;
166     	return;
167     }
168     
169     void
170     show_mem (void)
171     {
172     	int i, total = 0, reserved = 0;
173     	int shared = 0, cached = 0;
174     
175     	printk("Mem-info:\n");
176     	show_free_areas();
177     	printk("Free swap:       %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
178     	i = max_mapnr;
179     	while (i-- > 0) {
180     		total++;
181     		if (PageReserved(mem_map+i))
182     			reserved++;
183     		else if (PageSwapCache(mem_map+i))
184     			cached++;
185     		else if (page_count(mem_map + i))
186     			shared += page_count(mem_map + i) - 1;
187     	}
188     	printk("%d pages of RAM\n", total);
189     	printk("%d reserved pages\n", reserved);
190     	printk("%d pages shared\n", shared);
191     	printk("%d pages swap cached\n", cached);
192     	printk("%ld pages in page table cache\n", pgtable_cache_size);
193     	show_buffers();
194     }
195     
196     /*
197      * This is like put_dirty_page() but installs a clean page with PAGE_GATE protection
198      * (execute-only, typically).
199      */
200     struct page *
201     put_gate_page (struct page *page, unsigned long address)
202     {
203     	pgd_t *pgd;
204     	pmd_t *pmd;
205     	pte_t *pte;
206     
207     	if (!PageReserved(page))
208     		printk("put_gate_page: gate page at 0x%p not in reserved memory\n",
209     		       page_address(page));
210     
211     	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
212     
213     	spin_lock(&init_mm.page_table_lock);
214     	{
215     		pmd = pmd_alloc(&init_mm, pgd, address);
216     		if (!pmd)
217     			goto out;
218     		pte = pte_alloc(&init_mm, pmd, address);
219     		if (!pte)
220     			goto out;
221     		if (!pte_none(*pte)) {
222     			pte_ERROR(*pte);
223     			goto out;
224     		}
225     		flush_page_to_ram(page);
226     		set_pte(pte, mk_pte(page, PAGE_GATE));
227     	}
228       out:	spin_unlock(&init_mm.page_table_lock);
229     	/* no need for flush_tlb */
230     	return page;
231     }
232     
233     void __init
234     ia64_mmu_init (void *my_cpu_data)
235     {
236     	unsigned long flags, rid, pta, impl_va_bits;
237     	extern void __init tlb_init (void);
238     #ifdef CONFIG_DISABLE_VHPT
239     #	define VHPT_ENABLE_BIT	0
240     #else
241     #	define VHPT_ENABLE_BIT	1
242     #endif
243     
244     	/*
245     	 * Set up the kernel identity mapping for regions 6 and 5.  The mapping for region
246     	 * 7 is setup up in _start().
247     	 */
248     	ia64_clear_ic(flags);
249     
250     	rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
251     	ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (KERNEL_PG_SHIFT << 2));
252     
253     	rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START);
254     	ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1);
255     
256     	/* ensure rr6 is up-to-date before inserting the PERCPU_ADDR translation: */
257     	ia64_srlz_d();
258     
259     	ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
260     		 pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)), PAGE_SHIFT);
261     
262     	__restore_flags(flags);
263     	ia64_srlz_i();
264     
265     	/*
266     	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
267     	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
268     	 * virtual address space are implemented but if we pick a large enough page size
269     	 * (e.g., 64KB), the mapped address space is big enough that it will overlap with
270     	 * VMLPT.  I assume that once we run on machines big enough to warrant 64KB pages,
271     	 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
272     	 * problem in practice.  Alternatively, we could truncate the top of the mapped
273     	 * address space to not permit mappings that would overlap with the VMLPT.
274     	 * --davidm 00/12/06
275     	 */
276     #	define pte_bits			3
277     #	define mapped_space_bits	(3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
278     	/*
279     	 * The virtual page table has to cover the entire implemented address space within
280     	 * a region even though not all of this space may be mappable.  The reason for
281     	 * this is that the Access bit and Dirty bit fault handlers perform
282     	 * non-speculative accesses to the virtual page table, so the address range of the
283     	 * virtual page table itself needs to be covered by virtual page table.
284     	 */
285     #	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
286     #	define POW2(n)			(1ULL << (n))
287     
288     	impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
289     
290     	if (impl_va_bits < 51 || impl_va_bits > 61)
291     		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
292     
293     	/* place the VMLPT at the end of each page-table mapped region: */
294     	pta = POW2(61) - POW2(vmlpt_bits);
295     
296     	if (POW2(mapped_space_bits) >= pta)
297     		panic("mm/init: overlap between virtually mapped linear page table and "
298     		      "mapped kernel space!");
299     	/*
300     	 * Set the (virtually mapped linear) page table address.  Bit
301     	 * 8 selects between the short and long format, bits 2-7 the
302     	 * size of the table, and bit 0 whether the VHPT walker is
303     	 * enabled.
304     	 */
305     	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
306     
307     	ia64_tlb_init();
308     }
309     
310     /*
311      * Set up the page tables.
312      */
313     void
314     paging_init (void)
315     {
316     	unsigned long max_dma, zones_size[MAX_NR_ZONES];
317     
318     	clear_page((void *) ZERO_PAGE_ADDR);
319     
320     	/* initialize mem_map[] */
321     
322     	memset(zones_size, 0, sizeof(zones_size));
323     
324     	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
325     	if (max_low_pfn < max_dma)
326     		zones_size[ZONE_DMA] = max_low_pfn;
327     	else {
328     		zones_size[ZONE_DMA] = max_dma;
329     		zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
330     	}
331     	free_area_init(zones_size);
332     }
333     
334     static int
335     count_pages (u64 start, u64 end, void *arg)
336     {
337     	unsigned long *count = arg;
338     
339     	*count += (end - start) >> PAGE_SHIFT;
340     	return 0;
341     }
342     
343     static int
344     count_reserved_pages (u64 start, u64 end, void *arg)
345     {
346     	unsigned long num_reserved = 0;
347     	unsigned long *count = arg;
348     	struct page *pg;
349     
350     	for (pg = virt_to_page(start); pg < virt_to_page(end); ++pg)
351     		if (PageReserved(pg))
352     			++num_reserved;
353     	*count += num_reserved;
354     	return 0;
355     }
356     
357     void
358     mem_init (void)
359     {
360     	extern char __start_gate_section[];
361     	long reserved_pages, codesize, datasize, initsize;
362     	unsigned long num_pgt_pages;
363     
364     #ifdef CONFIG_PCI
365     	/*
366     	 * This needs to be called _after_ the command line has been parsed but _before_
367     	 * any drivers that may need the PCI DMA interface are initialized or bootmem has
368     	 * been freed.
369     	 */
370     	platform_pci_dma_init();
371     #endif
372     
373     	if (!mem_map)
374     		BUG();
375     
376     	num_physpages = 0;
377     	efi_memmap_walk(count_pages, &num_physpages);
378     
379     	max_mapnr = max_low_pfn;
380     	high_memory = __va(max_low_pfn * PAGE_SIZE);
381     
382     	totalram_pages += free_all_bootmem();
383     
384     	reserved_pages = 0;
385     	efi_memmap_walk(count_reserved_pages, &reserved_pages);
386     
387     	codesize =  (unsigned long) &_etext - (unsigned long) &_stext;
388     	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
389     	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
390     
391     	printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n",
392     	       (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
393     	       max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
394     	       datasize >> 10, initsize >> 10);
395     
396     	/*
397     	 * Allow for enough (cached) page table pages so that we can map the entire memory
398     	 * at least once.  Each task also needs a couple of page tables pages, so add in a
399     	 * fudge factor for that (don't use "threads-max" here; that would be wrong!).
400     	 * Don't allow the cache to be more than 10% of total memory, though.
401     	 */
402     #	define NUM_TASKS	500	/* typical number of tasks */
403     	num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
404     	if (num_pgt_pages > nr_free_pages() / 10)
405     		num_pgt_pages = nr_free_pages() / 10;
406     	if (num_pgt_pages > pgt_cache_water[1])
407     		pgt_cache_water[1] = num_pgt_pages;
408     
409     	/* install the gate page in the global page table: */
410     	put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
411     
412     #ifdef CONFIG_IA32_SUPPORT
413     	ia32_gdt_init();
414     #endif
415     }
416