File: /usr/src/linux/include/asm-ia64/pgtable.h
1 #ifndef _ASM_IA64_PGTABLE_H
2 #define _ASM_IA64_PGTABLE_H
3
4 /*
5 * This file contains the functions and defines necessary to modify and use
6 * the IA-64 page table tree.
7 *
8 * This hopefully works with any (fixed) IA-64 page-size, as defined
9 * in <asm/page.h> (currently 8192).
10 *
11 * Copyright (C) 1998-2001 Hewlett-Packard Co
12 * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
13 */
14
15 #include <linux/config.h>
16
17 #include <asm/mman.h>
18 #include <asm/page.h>
19 #include <asm/processor.h>
20 #include <asm/system.h>
21 #include <asm/types.h>
22
23 #define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */
24
25 /*
26 * First, define the various bits in a PTE. Note that the PTE format
27 * matches the VHPT short format, the firt doubleword of the VHPD long
28 * format, and the first doubleword of the TLB insertion format.
29 */
30 #define _PAGE_P_BIT 0
31 #define _PAGE_A_BIT 5
32 #define _PAGE_D_BIT 6
33
34 #define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */
35 #define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */
36 #define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */
37 #define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */
38 #define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */
39 #define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */
40 #define _PAGE_MA_MASK (0x7 << 2)
41 #define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */
42 #define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */
43 #define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */
44 #define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */
45 #define _PAGE_PL_MASK (3 << 7)
46 #define _PAGE_AR_R (0 << 9) /* read only */
47 #define _PAGE_AR_RX (1 << 9) /* read & execute */
48 #define _PAGE_AR_RW (2 << 9) /* read & write */
49 #define _PAGE_AR_RWX (3 << 9) /* read, write & execute */
50 #define _PAGE_AR_R_RW (4 << 9) /* read / read & write */
51 #define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */
52 #define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */
53 #define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */
54 #define _PAGE_AR_MASK (7 << 9)
55 #define _PAGE_AR_SHIFT 9
56 #define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */
57 #define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */
58 #define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL)
59 #define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
60 #define _PAGE_PROTNONE (__IA64_UL(1) << 63)
61
62 #define _PFN_MASK _PAGE_PPN_MASK
63 #define _PAGE_CHG_MASK (_PFN_MASK | _PAGE_A | _PAGE_D)
64
65 #define _PAGE_SIZE_4K 12
66 #define _PAGE_SIZE_8K 13
67 #define _PAGE_SIZE_16K 14
68 #define _PAGE_SIZE_64K 16
69 #define _PAGE_SIZE_256K 18
70 #define _PAGE_SIZE_1M 20
71 #define _PAGE_SIZE_4M 22
72 #define _PAGE_SIZE_16M 24
73 #define _PAGE_SIZE_64M 26
74 #define _PAGE_SIZE_256M 28
75
76 #define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
77 #define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
78 #define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED
79
80 /*
81 * Definitions for first level:
82 *
83 * PGDIR_SHIFT determines what a first-level page table entry can map.
84 */
85 #define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3))
86 #define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT)
87 #define PGDIR_MASK (~(PGDIR_SIZE-1))
88 #define PTRS_PER_PGD (__IA64_UL(1) << (PAGE_SHIFT-3))
89 #define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */
90 #define FIRST_USER_PGD_NR 0
91
92 /*
93 * Definitions for second level:
94 *
95 * PMD_SHIFT determines the size of the area a second-level page table
96 * can map.
97 */
98 #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
99 #define PMD_SIZE (__IA64_UL(1) << PMD_SHIFT)
100 #define PMD_MASK (~(PMD_SIZE-1))
101 #define PTRS_PER_PMD (__IA64_UL(1) << (PAGE_SHIFT-3))
102
103 /*
104 * Definitions for third level:
105 */
106 #define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3))
107
108 /*
109 * All the normal masks have the "page accessed" bits on, as any time
110 * they are used, the page is accessed. They are cleared only by the
111 * page-out routines. On the other hand, we do NOT turn on the
112 * execute bit on pages that are mapped writable. For those pages, we
113 * turn on the X bit only when the program attempts to actually
114 * execute code in such a page (it's a "lazy execute bit", if you
115 * will). This lets reduce the amount of i-cache flushing we have to
116 * do for data pages such as stack and heap pages.
117 */
118 #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A)
119 #define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
120 #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
121 #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
122 #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
123 #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
124
125 # ifndef __ASSEMBLY__
126
127 #include <asm/bitops.h>
128 #include <asm/mmu_context.h>
129 #include <asm/processor.h>
130
131 /*
132 * Next come the mappings that determine how mmap() protection bits
133 * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The
134 * _P version gets used for a private shared memory segment, the _S
135 * version gets used for a shared memory segment with MAP_SHARED on.
136 * In a private shared memory segment, we do a copy-on-write if a task
137 * attempts to write to the page.
138 */
139 /* xwr */
140 #define __P000 PAGE_NONE
141 #define __P001 PAGE_READONLY
142 #define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */
143 #define __P011 PAGE_READONLY /* ditto */
144 #define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
145 #define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
146 #define __P110 PAGE_COPY
147 #define __P111 PAGE_COPY
148
149 #define __S000 PAGE_NONE
150 #define __S001 PAGE_READONLY
151 #define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */
152 #define __S011 PAGE_SHARED
153 #define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
154 #define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
155 #define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
156 #define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
157
158 #define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e))
159 #define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
160 #define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
161
162
163 /*
164 * Some definitions to translate between mem_map, PTEs, and page
165 * addresses:
166 */
167
168 /*
169 * Given a pointer to an mem_map[] entry, return the kernel virtual
170 * address corresponding to that page.
171 */
172 #define page_address(page) ((page)->virtual)
173
174 /* Quick test to see if ADDR is a (potentially) valid physical address. */
175 static inline long
176 ia64_phys_addr_valid (unsigned long addr)
177 {
178 return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
179 }
180
181 /*
182 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
183 * memory. For the return value to be meaningful, ADDR must be >=
184 * PAGE_OFFSET. This operation can be relatively expensive (e.g.,
185 * require a hash-, or multi-level tree-lookup or something of that
186 * sort) but it guarantees to return TRUE only if accessing the page
187 * at that address does not cause an error. Note that there may be
188 * addresses for which kern_addr_valid() returns FALSE even though an
189 * access would not cause an error (e.g., this is typically true for
190 * memory mapped I/O regions.
191 *
192 * XXX Need to implement this for IA-64.
193 */
194 #define kern_addr_valid(addr) (1)
195
196 /*
197 * Now come the defines and routines to manage and access the three-level
198 * page table.
199 */
200
201 /*
202 * On some architectures, special things need to be done when setting
203 * the PTE in a page table. Nothing special needs to be on IA-64.
204 */
205 #define set_pte(ptep, pteval) (*(ptep) = (pteval))
206
207 #define RGN_SIZE (1UL << 61)
208 #define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
209 #define RGN_KERNEL 7
210
211 #define VMALLOC_START (0xa000000000000000 + 3*PAGE_SIZE)
212 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
213 #define VMALLOC_END (0xa000000000000000 + (1UL << (4*PAGE_SHIFT - 9)))
214
215 /*
216 * Conversion functions: convert a page and protection to a page entry,
217 * and a page entry and page directory to the page they refer to.
218 */
219 #define mk_pte(page,pgprot) \
220 ({ \
221 pte_t __pte; \
222 \
223 pte_val(__pte) = ((page - mem_map) << PAGE_SHIFT) | pgprot_val(pgprot); \
224 __pte; \
225 })
226
227 /* This takes a physical page address that is used by the remapping functions */
228 #define mk_pte_phys(physpage, pgprot) \
229 ({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
230
231 #define pte_modify(_pte, newprot) \
232 (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
233
234 #define page_pte_prot(page,prot) mk_pte(page, prot)
235 #define page_pte(page) page_pte_prot(page, __pgprot(0))
236
237 #define pte_none(pte) (!pte_val(pte))
238 #define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE))
239 #define pte_clear(pte) (pte_val(*(pte)) = 0UL)
240 /* pte_page() returns the "struct page *" corresponding to the PTE: */
241 #define pte_page(pte) (mem_map + (unsigned long) ((pte_val(pte) & _PFN_MASK) >> PAGE_SHIFT))
242
243 #define pmd_none(pmd) (!pmd_val(pmd))
244 #define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
245 #define pmd_present(pmd) (pmd_val(pmd) != 0UL)
246 #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
247 #define pmd_page(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK))
248
249 #define pgd_none(pgd) (!pgd_val(pgd))
250 #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd)))
251 #define pgd_present(pgd) (pgd_val(pgd) != 0UL)
252 #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
253 #define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK))
254
255 /*
256 * The following have defined behavior only work if pte_present() is true.
257 */
258 #define pte_read(pte) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) < 6)
259 #define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4)
260 #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0)
261 #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0)
262 #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0)
263 /*
264 * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the
265 * access rights:
266 */
267 #define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW))
268 #define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
269 #define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX))
270 #define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A))
271 #define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
272 #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
273 #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
274
275 /*
276 * Macro to make mark a page protection value as "uncacheable". Note
277 * that "protection" is really a misnomer here as the protection value
278 * contains the memory attribute bits, dirty bits, and various other
279 * bits as well.
280 */
281 #define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
282
283 /*
284 * Macro to make mark a page protection value as "write-combining".
285 * Note that "protection" is really a misnomer here as the protection
286 * value contains the memory attribute bits, dirty bits, and various
287 * other bits as well. Accesses through a write-combining translation
288 * works bypasses the caches, but does allow for consecutive writes to
289 * be combined into single (but larger) write transactions.
290 */
291 #ifdef CONFIG_MCKINLEY_A0_SPECIFIC
292 # define pgprot_writecombine(prot) prot
293 #else
294 # define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
295 #endif
296
297 /*
298 * Return the region index for virtual address ADDRESS.
299 */
300 static inline unsigned long
301 rgn_index (unsigned long address)
302 {
303 ia64_va a;
304
305 a.l = address;
306 return a.f.reg;
307 }
308
309 /*
310 * Return the region offset for virtual address ADDRESS.
311 */
312 static inline unsigned long
313 rgn_offset (unsigned long address)
314 {
315 ia64_va a;
316
317 a.l = address;
318 return a.f.off;
319 }
320
321 static inline unsigned long
322 pgd_index (unsigned long address)
323 {
324 unsigned long region = address >> 61;
325 unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1);
326
327 return (region << (PAGE_SHIFT - 6)) | l1index;
328 }
329
330 /* The offset in the 1-level directory is given by the 3 region bits
331 (61..63) and the seven level-1 bits (33-39). */
332 static inline pgd_t*
333 pgd_offset (struct mm_struct *mm, unsigned long address)
334 {
335 return mm->pgd + pgd_index(address);
336 }
337
338 /* In the kernel's mapped region we have a full 43 bit space available and completely
339 ignore the region number (since we know its in region number 5). */
340 #define pgd_offset_k(addr) \
341 (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
342
343 /* Find an entry in the second-level page table.. */
344 #define pmd_offset(dir,addr) \
345 ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
346
347 /* Find an entry in the third-level page table.. */
348 #define pte_offset(dir,addr) \
349 ((pte_t *) pmd_page(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
350
351 /* atomic versions of the some PTE manipulations: */
352
353 static inline int
354 ptep_test_and_clear_young (pte_t *ptep)
355 {
356 #ifdef CONFIG_SMP
357 return test_and_clear_bit(_PAGE_A_BIT, ptep);
358 #else
359 pte_t pte = *ptep;
360 if (!pte_young(pte))
361 return 0;
362 set_pte(ptep, pte_mkold(pte));
363 return 1;
364 #endif
365 }
366
367 static inline int
368 ptep_test_and_clear_dirty (pte_t *ptep)
369 {
370 #ifdef CONFIG_SMP
371 return test_and_clear_bit(_PAGE_D_BIT, ptep);
372 #else
373 pte_t pte = *ptep;
374 if (!pte_dirty(pte))
375 return 0;
376 set_pte(ptep, pte_mkclean(pte));
377 return 1;
378 #endif
379 }
380
381 static inline pte_t
382 ptep_get_and_clear (pte_t *ptep)
383 {
384 #ifdef CONFIG_SMP
385 return __pte(xchg((long *) ptep, 0));
386 #else
387 pte_t pte = *ptep;
388 pte_clear(ptep);
389 return pte;
390 #endif
391 }
392
393 static inline void
394 ptep_set_wrprotect (pte_t *ptep)
395 {
396 #ifdef CONFIG_SMP
397 unsigned long new, old;
398
399 do {
400 old = pte_val(*ptep);
401 new = pte_val(pte_wrprotect(__pte (old)));
402 } while (cmpxchg((unsigned long *) ptep, old, new) != old);
403 #else
404 pte_t old_pte = *ptep;
405 set_pte(ptep, pte_wrprotect(old_pte));
406 #endif
407 }
408
409 static inline void
410 ptep_mkdirty (pte_t *ptep)
411 {
412 #ifdef CONFIG_SMP
413 set_bit(_PAGE_D_BIT, ptep);
414 #else
415 pte_t old_pte = *ptep;
416 set_pte(ptep, pte_mkdirty(old_pte));
417 #endif
418 }
419
420 static inline int
421 pte_same (pte_t a, pte_t b)
422 {
423 return pte_val(a) == pte_val(b);
424 }
425
426 /*
427 * Macros to check the type of access that triggered a page fault.
428 */
429
430 static inline int
431 is_write_access (int access_type)
432 {
433 return (access_type & 0x2);
434 }
435
436 static inline int
437 is_exec_access (int access_type)
438 {
439 return (access_type & 0x4);
440 }
441
442 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
443 extern void paging_init (void);
444
445 #define SWP_TYPE(entry) (((entry).val >> 1) & 0xff)
446 #define SWP_OFFSET(entry) (((entry).val << 1) >> 10)
447 #define SWP_ENTRY(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 9) })
448 #define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
449 #define swp_entry_to_pte(x) ((pte_t) { (x).val })
450
451 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
452 #define PageSkip(page) (0)
453
454 #define io_remap_page_range remap_page_range /* XXX is this right? */
455
456 /*
457 * ZERO_PAGE is a global shared page that is always zero: used
458 * for zero-mapped memory areas etc..
459 */
460 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
461 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
462
463 /* We provide our own get_unmapped_area to cope with VA holes for userland */
464 #define HAVE_ARCH_UNMAPPED_AREA
465
466 # endif /* !__ASSEMBLY__ */
467
468 /*
469 * Identity-mapped regions use a large page size. KERNEL_PG_NUM is the
470 * number of the (large) page frame that mapps the kernel.
471 */
472 #define KERNEL_PG_SHIFT _PAGE_SIZE_64M
473 #define KERNEL_PG_SIZE (1 << KERNEL_PG_SHIFT)
474 #define KERNEL_PG_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_PG_SIZE)
475
476 #endif /* _ASM_IA64_PGTABLE_H */
477