File: /usr/src/linux/arch/ia64/mm/init.c
1 /*
2 * Initialize MMU support.
3 *
4 * Copyright (C) 1998-2001 Hewlett-Packard Co
5 * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7 #include <linux/config.h>
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10
11 #include <linux/bootmem.h>
12 #include <linux/mm.h>
13 #include <linux/reboot.h>
14 #include <linux/slab.h>
15 #include <linux/swap.h>
16
17 #include <asm/bitops.h>
18 #include <asm/dma.h>
19 #include <asm/efi.h>
20 #include <asm/ia32.h>
21 #include <asm/io.h>
22 #include <asm/machvec.h>
23 #include <asm/pgalloc.h>
24 #include <asm/sal.h>
25 #include <asm/system.h>
26 #include <asm/uaccess.h>
27 #include <asm/tlb.h>
28
29 mmu_gather_t mmu_gathers[NR_CPUS];
30
31 /* References to section boundaries: */
32 extern char _stext, _etext, _edata, __init_begin, __init_end;
33
34 extern void ia64_tlb_init (void);
35
36 unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
37
38 static unsigned long totalram_pages;
39
40 int
41 do_check_pgt_cache (int low, int high)
42 {
43 int freed = 0;
44
45 if (pgtable_cache_size > high) {
46 do {
47 if (pgd_quicklist)
48 free_page((unsigned long)pgd_alloc_one_fast(0)), ++freed;
49 if (pmd_quicklist)
50 free_page((unsigned long)pmd_alloc_one_fast(0, 0)), ++freed;
51 if (pte_quicklist)
52 free_page((unsigned long)pte_alloc_one_fast(0, 0)), ++freed;
53 } while (pgtable_cache_size > low);
54 }
55 return freed;
56 }
57
58 /*
59 * This performs some platform-dependent address space initialization.
60 * On IA-64, we want to setup the VM area for the register backing
61 * store (which grows upwards) and install the gateway page which is
62 * used for signal trampolines, etc.
63 */
64 void
65 ia64_init_addr_space (void)
66 {
67 struct vm_area_struct *vma;
68
69 /*
70 * If we're out of memory and kmem_cache_alloc() returns NULL,
71 * we simply ignore the problem. When the process attempts to
72 * write to the register backing store for the first time, it
73 * will get a SEGFAULT in this case.
74 */
75 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
76 if (vma) {
77 vma->vm_mm = current->mm;
78 vma->vm_start = IA64_RBS_BOT;
79 vma->vm_end = vma->vm_start + PAGE_SIZE;
80 vma->vm_page_prot = PAGE_COPY;
81 vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
82 vma->vm_ops = NULL;
83 vma->vm_pgoff = 0;
84 vma->vm_file = NULL;
85 vma->vm_private_data = NULL;
86 insert_vm_struct(current->mm, vma);
87 }
88 }
89
90 void
91 free_initmem (void)
92 {
93 unsigned long addr;
94
95 addr = (unsigned long) &__init_begin;
96 for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
97 clear_bit(PG_reserved, &virt_to_page(addr)->flags);
98 set_page_count(virt_to_page(addr), 1);
99 free_page(addr);
100 ++totalram_pages;
101 }
102 printk ("Freeing unused kernel memory: %ldkB freed\n",
103 (&__init_end - &__init_begin) >> 10);
104 }
105
106 void
107 free_initrd_mem(unsigned long start, unsigned long end)
108 {
109 /*
110 * EFI uses 4KB pages while the kernel can use 4KB or bigger.
111 * Thus EFI and the kernel may have different page sizes. It is
112 * therefore possible to have the initrd share the same page as
113 * the end of the kernel (given current setup).
114 *
115 * To avoid freeing/using the wrong page (kernel sized) we:
116 * - align up the beginning of initrd
117 * - align down the end of initrd
118 *
119 * | |
120 * |=============| a000
121 * | |
122 * | |
123 * | | 9000
124 * |/////////////|
125 * |/////////////|
126 * |=============| 8000
127 * |///INITRD////|
128 * |/////////////|
129 * |/////////////| 7000
130 * | |
131 * |KKKKKKKKKKKKK|
132 * |=============| 6000
133 * |KKKKKKKKKKKKK|
134 * |KKKKKKKKKKKKK|
135 * K=kernel using 8KB pages
136 *
137 * In this example, we must free page 8000 ONLY. So we must align up
138 * initrd_start and keep initrd_end as is.
139 */
140 start = PAGE_ALIGN(start);
141 end = end & PAGE_MASK;
142
143 if (start < end)
144 printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
145
146 for (; start < end; start += PAGE_SIZE) {
147 if (!VALID_PAGE(virt_to_page(start)))
148 continue;
149 clear_bit(PG_reserved, &virt_to_page(start)->flags);
150 set_page_count(virt_to_page(start), 1);
151 free_page(start);
152 ++totalram_pages;
153 }
154 }
155
156 void
157 si_meminfo (struct sysinfo *val)
158 {
159 val->totalram = totalram_pages;
160 val->sharedram = 0;
161 val->freeram = nr_free_pages();
162 val->bufferram = atomic_read(&buffermem_pages);
163 val->totalhigh = 0;
164 val->freehigh = 0;
165 val->mem_unit = PAGE_SIZE;
166 return;
167 }
168
169 void
170 show_mem (void)
171 {
172 int i, total = 0, reserved = 0;
173 int shared = 0, cached = 0;
174
175 printk("Mem-info:\n");
176 show_free_areas();
177 printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
178 i = max_mapnr;
179 while (i-- > 0) {
180 total++;
181 if (PageReserved(mem_map+i))
182 reserved++;
183 else if (PageSwapCache(mem_map+i))
184 cached++;
185 else if (page_count(mem_map + i))
186 shared += page_count(mem_map + i) - 1;
187 }
188 printk("%d pages of RAM\n", total);
189 printk("%d reserved pages\n", reserved);
190 printk("%d pages shared\n", shared);
191 printk("%d pages swap cached\n", cached);
192 printk("%ld pages in page table cache\n", pgtable_cache_size);
193 show_buffers();
194 }
195
196 /*
197 * This is like put_dirty_page() but installs a clean page with PAGE_GATE protection
198 * (execute-only, typically).
199 */
200 struct page *
201 put_gate_page (struct page *page, unsigned long address)
202 {
203 pgd_t *pgd;
204 pmd_t *pmd;
205 pte_t *pte;
206
207 if (!PageReserved(page))
208 printk("put_gate_page: gate page at 0x%p not in reserved memory\n",
209 page_address(page));
210
211 pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
212
213 spin_lock(&init_mm.page_table_lock);
214 {
215 pmd = pmd_alloc(&init_mm, pgd, address);
216 if (!pmd)
217 goto out;
218 pte = pte_alloc(&init_mm, pmd, address);
219 if (!pte)
220 goto out;
221 if (!pte_none(*pte)) {
222 pte_ERROR(*pte);
223 goto out;
224 }
225 flush_page_to_ram(page);
226 set_pte(pte, mk_pte(page, PAGE_GATE));
227 }
228 out: spin_unlock(&init_mm.page_table_lock);
229 /* no need for flush_tlb */
230 return page;
231 }
232
233 void __init
234 ia64_mmu_init (void *my_cpu_data)
235 {
236 unsigned long flags, rid, pta, impl_va_bits;
237 extern void __init tlb_init (void);
238 #ifdef CONFIG_DISABLE_VHPT
239 # define VHPT_ENABLE_BIT 0
240 #else
241 # define VHPT_ENABLE_BIT 1
242 #endif
243
244 /*
245 * Set up the kernel identity mapping for regions 6 and 5. The mapping for region
246 * 7 is setup up in _start().
247 */
248 ia64_clear_ic(flags);
249
250 rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
251 ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (KERNEL_PG_SHIFT << 2));
252
253 rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START);
254 ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1);
255
256 /* ensure rr6 is up-to-date before inserting the PERCPU_ADDR translation: */
257 ia64_srlz_d();
258
259 ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
260 pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)), PAGE_SHIFT);
261
262 __restore_flags(flags);
263 ia64_srlz_i();
264
265 /*
266 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
267 * address space. The IA-64 architecture guarantees that at least 50 bits of
268 * virtual address space are implemented but if we pick a large enough page size
269 * (e.g., 64KB), the mapped address space is big enough that it will overlap with
270 * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages,
271 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
272 * problem in practice. Alternatively, we could truncate the top of the mapped
273 * address space to not permit mappings that would overlap with the VMLPT.
274 * --davidm 00/12/06
275 */
276 # define pte_bits 3
277 # define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
278 /*
279 * The virtual page table has to cover the entire implemented address space within
280 * a region even though not all of this space may be mappable. The reason for
281 * this is that the Access bit and Dirty bit fault handlers perform
282 * non-speculative accesses to the virtual page table, so the address range of the
283 * virtual page table itself needs to be covered by virtual page table.
284 */
285 # define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
286 # define POW2(n) (1ULL << (n))
287
288 impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
289
290 if (impl_va_bits < 51 || impl_va_bits > 61)
291 panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
292
293 /* place the VMLPT at the end of each page-table mapped region: */
294 pta = POW2(61) - POW2(vmlpt_bits);
295
296 if (POW2(mapped_space_bits) >= pta)
297 panic("mm/init: overlap between virtually mapped linear page table and "
298 "mapped kernel space!");
299 /*
300 * Set the (virtually mapped linear) page table address. Bit
301 * 8 selects between the short and long format, bits 2-7 the
302 * size of the table, and bit 0 whether the VHPT walker is
303 * enabled.
304 */
305 ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
306
307 ia64_tlb_init();
308 }
309
310 /*
311 * Set up the page tables.
312 */
313 void
314 paging_init (void)
315 {
316 unsigned long max_dma, zones_size[MAX_NR_ZONES];
317
318 clear_page((void *) ZERO_PAGE_ADDR);
319
320 /* initialize mem_map[] */
321
322 memset(zones_size, 0, sizeof(zones_size));
323
324 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
325 if (max_low_pfn < max_dma)
326 zones_size[ZONE_DMA] = max_low_pfn;
327 else {
328 zones_size[ZONE_DMA] = max_dma;
329 zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
330 }
331 free_area_init(zones_size);
332 }
333
334 static int
335 count_pages (u64 start, u64 end, void *arg)
336 {
337 unsigned long *count = arg;
338
339 *count += (end - start) >> PAGE_SHIFT;
340 return 0;
341 }
342
343 static int
344 count_reserved_pages (u64 start, u64 end, void *arg)
345 {
346 unsigned long num_reserved = 0;
347 unsigned long *count = arg;
348 struct page *pg;
349
350 for (pg = virt_to_page(start); pg < virt_to_page(end); ++pg)
351 if (PageReserved(pg))
352 ++num_reserved;
353 *count += num_reserved;
354 return 0;
355 }
356
357 void
358 mem_init (void)
359 {
360 extern char __start_gate_section[];
361 long reserved_pages, codesize, datasize, initsize;
362 unsigned long num_pgt_pages;
363
364 #ifdef CONFIG_PCI
365 /*
366 * This needs to be called _after_ the command line has been parsed but _before_
367 * any drivers that may need the PCI DMA interface are initialized or bootmem has
368 * been freed.
369 */
370 platform_pci_dma_init();
371 #endif
372
373 if (!mem_map)
374 BUG();
375
376 num_physpages = 0;
377 efi_memmap_walk(count_pages, &num_physpages);
378
379 max_mapnr = max_low_pfn;
380 high_memory = __va(max_low_pfn * PAGE_SIZE);
381
382 totalram_pages += free_all_bootmem();
383
384 reserved_pages = 0;
385 efi_memmap_walk(count_reserved_pages, &reserved_pages);
386
387 codesize = (unsigned long) &_etext - (unsigned long) &_stext;
388 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
389 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
390
391 printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n",
392 (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
393 max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
394 datasize >> 10, initsize >> 10);
395
396 /*
397 * Allow for enough (cached) page table pages so that we can map the entire memory
398 * at least once. Each task also needs a couple of page tables pages, so add in a
399 * fudge factor for that (don't use "threads-max" here; that would be wrong!).
400 * Don't allow the cache to be more than 10% of total memory, though.
401 */
402 # define NUM_TASKS 500 /* typical number of tasks */
403 num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
404 if (num_pgt_pages > nr_free_pages() / 10)
405 num_pgt_pages = nr_free_pages() / 10;
406 if (num_pgt_pages > pgt_cache_water[1])
407 pgt_cache_water[1] = num_pgt_pages;
408
409 /* install the gate page in the global page table: */
410 put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
411
412 #ifdef CONFIG_IA32_SUPPORT
413 ia32_gdt_init();
414 #endif
415 }
416