File: /usr/src/linux/include/asm-ia64/sn/nodepda.h
1 /* $Id$
2 *
3 * This file is subject to the terms and conditions of the GNU General Public
4 * License. See the file "COPYING" in the main directory of this archive
5 * for more details.
6 *
7 * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
8 * Copyright (C) 2000 by Colin Ngam
9 */
10 #ifndef _ASM_SN_NODEPDA_H
11 #define _ASM_SN_NODEPDA_H
12
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16
17 #include <linux/config.h>
18
19 #include <asm/sn/agent.h>
20 #include <asm/sn/intr.h>
21 #include <asm/sn/router.h>
22 #include <asm/sn/synergy.h>
23 /* #include <SN/klkernvars.h> */
24 #ifdef LATER
25 typedef struct module_s module_t; /* Avoids sys/SN/module.h */
26 #else
27 #include <asm/sn/module.h>
28 #endif
29 /* #include <SN/slotnum.h> */
30
31 /*
32 * NUMA Node-Specific Data structures are defined in this file.
33 * In particular, this is the location of the node PDA.
34 * A pointer to the right node PDA is saved in each CPU PDA.
35 */
36
37 /*
38 * Subnode PDA structures. Each node needs a few data structures that
39 * correspond to the PIs on the HUB chip that supports the node.
40 *
41 * WARNING!!!! 6.5.x compatibility requirements prevent us from
42 * changing or reordering fields in the following structure for IP27.
43 * It is essential that the data mappings not change for IP27 platforms.
44 * It is OK to add fields that are IP35 specific if they are under #ifdef IP35.
45 */
46 struct subnodepda_s {
47 intr_vecblk_t intr_dispatch0;
48 intr_vecblk_t intr_dispatch1;
49 uint64_t next_prof_timeout;
50 int prof_count;
51 };
52
53
54 typedef struct subnodepda_s subnode_pda_t;
55
56
57 struct ptpool_s;
58
59 #if defined(CONFIG_IA64_SGI_SYNERGY_PERF)
60 struct synergy_perf_s;
61 #endif
62
63
64 /*
65 * Node-specific data structure.
66 *
67 * One of these structures is allocated on each node of a NUMA system.
68 * Non-NUMA systems are considered to be systems with one node, and
69 * hence there will be one of this structure for the entire system.
70 *
71 * This structure provides a convenient way of keeping together
72 * all per-node data structures.
73 */
74
75
76
77 #ifdef LATER
78 /*
79 * The following structure is contained in the nodepda & contains
80 * a lock & queue-head for sanon pages that belong to the node.
81 * See the anon manager for more details.
82 */
83 typedef struct {
84 lock_t sal_lock;
85 plist_t sal_listhead;
86 } sanon_list_head_t;
87 #endif
88 struct nodepda_s {
89
90 #ifdef NUMA_BASE
91
92 /*
93 * Pointer to this node's copy of Nodepdaindr
94 */
95 struct nodepda_s **pernode_pdaindr;
96
97 /*
98 * Data used for migration control
99 */
100 struct migr_control_data_s *mcd;
101
102 /*
103 * Data used for replication control
104 */
105 struct repl_control_data_s *rcd;
106
107 /*
108 * Numa statistics
109 */
110 struct numa_stats_s *numa_stats;
111
112 /*
113 * Load distribution
114 */
115 uint memfit_assign;
116
117 /*
118 * New extended memory reference counters
119 */
120 void *migr_refcnt_counterbase;
121 void *migr_refcnt_counterbuffer;
122 size_t migr_refcnt_cbsize;
123 int migr_refcnt_numsets;
124
125 /*
126 * mem_tick quiescing lock
127 */
128 uint mem_tick_lock;
129
130 /*
131 * Migration candidate set
132 * by migration prologue intr handler
133 */
134 uint64_t migr_candidate;
135
136 /*
137 * Each node gets its own syswait counter to remove contention
138 * on the global one.
139 */
140 #ifdef LATER
141 struct syswait syswait;
142 #endif
143
144 #endif /* NUMA_BASE */
145 /*
146 * Node-specific Zone structures.
147 */
148 #ifdef LATER
149 zoneset_element_t node_zones;
150 pg_data_t node_pg_data; /* VM page data structures */
151 plist_t error_discard_plist;
152 #endif
153 uint error_discard_count;
154 uint error_page_count;
155 uint error_cleaned_count;
156 spinlock_t error_discard_lock;
157
158 /* Information needed for SN Hub chip interrupt handling. */
159 subnode_pda_t snpda[NUM_SUBNODES];
160 /* Distributed kernel support */
161 #ifdef LATER
162 kern_vars_t kern_vars;
163 #endif
164 /* Vector operation support */
165 /* Change this to a sleep lock? */
166 spinlock_t vector_lock;
167 /* State of the vector unit for this node */
168 char vector_unit_busy;
169 cpuid_t node_first_cpu; /* Starting cpu number for node */
170 ushort node_num_cpus; /* Number of cpus present */
171
172 /* node utlbmiss info */
173 spinlock_t node_utlbswitchlock;
174 volatile cpumask_t node_utlbmiss_flush;
175 volatile signed char node_need_utlbmiss_patch;
176 volatile char node_utlbmiss_patched;
177 nodepda_router_info_t *npda_rip_first;
178 nodepda_router_info_t **npda_rip_last;
179 int dependent_routers;
180
181 #if defined(CONFIG_IA64_SGI_SYNERGY_PERF)
182 int synergy_perf_enabled;
183 int synergy_perf_freq;
184 spinlock_t synergy_perf_lock;
185 uint64_t synergy_inactive_intervals;
186 uint64_t synergy_active_intervals;
187 struct synergy_perf_s *synergy_perf_data;
188 struct synergy_perf_s *synergy_perf_first; /* reporting consistency .. */
189 #endif /* CONFIG_IA64_SGI_SYNERGY_PERF */
190
191 devfs_handle_t xbow_vhdl;
192 nasid_t xbow_peer; /* NASID of our peer hub on xbow */
193 struct semaphore xbow_sema; /* Sema for xbow synchronization */
194 slotid_t slotdesc;
195 moduleid_t module_id; /* Module ID (redundant local copy) */
196 module_t *module; /* Pointer to containing module */
197 int hub_chip_rev; /* Rev of my Hub chip */
198 char nasid_mask[NASID_MASK_BYTES];
199 /* Need a copy of the nasid mask
200 * on every node */
201 xwidgetnum_t basew_id;
202 devfs_handle_t basew_xc;
203 spinlock_t fprom_lock;
204 char ni_error_print; /* For printing ni error state
205 * only once during system panic
206 */
207 #ifdef LATER
208 md_perf_monitor_t node_md_perfmon;
209 hubstat_t hubstats;
210 int hubticks;
211 sbe_info_t *sbe_info; /* ECC single-bit error statistics */
212 #endif /* LATER */
213 int huberror_ticks;
214
215 router_queue_t *visited_router_q;
216 router_queue_t *bfs_router_q;
217 /* Used for router traversal */
218 #if defined (CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
219 router_map_ent_t router_map[MAX_RTR_BREADTH];
220 #endif
221 int num_routers; /* Total routers in the system */
222
223 char membank_flavor;
224 /* Indicates what sort of memory
225 * banks are present on this node
226 */
227
228 char *hwg_node_name; /* hwgraph node name */
229
230 struct widget_info_t *widget_info; /* Node as xtalk widget */
231 devfs_handle_t node_vertex; /* Hwgraph vertex for this node */
232
233 void *pdinfo; /* Platform-dependent per-node info */
234 uint64_t *dump_stack; /* Dump stack during nmi handling */
235 int dump_count; /* To allow only one cpu-per-node */
236 #ifdef LATER
237 io_perf_monitor_t node_io_perfmon;
238 #endif
239
240 /*
241 * Each node gets its own pdcount counter to remove contention
242 * on the global one.
243 */
244
245 int pdcount; /* count of pdinserted pages */
246
247 #ifdef NUMA_BASE
248 void *cached_global_pool; /* pointer to cached vmpool */
249 #endif /* NUMA_BASE */
250
251 #ifdef LATER
252 sanon_list_head_t sanon_list_head; /* head for sanon pages */
253 #endif
254 #ifdef NUMA_BASE
255 struct ptpool_s *ptpool; /* ptpool for this node */
256 #endif /* NUMA_BASE */
257
258 /*
259 * The BTEs on this node are shared by the local cpus
260 */
261 #if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
262 #ifdef LATER
263 bteinfo_t *node_bte_info[BTES_PER_NODE];
264 #endif
265 #endif
266 };
267
268 typedef struct nodepda_s nodepda_t;
269
270
271 #define NODE_MODULEID(_node) (NODEPDA(_node)->module_id)
272 #define NODE_SLOTID(_node) (NODEPDA(_node)->slotdesc)
273
274 #ifdef NUMA_BASE
275 /*
276 * Access Functions for node PDA.
277 * Since there is one nodepda for each node, we need a convenient mechanism
278 * to access these nodepdas without cluttering code with #ifdefs.
279 * The next set of definitions provides this.
280 * Routines are expected to use
281 *
282 * nodepda -> to access PDA for the node on which code is running
283 * subnodepda -> to access subnode PDA for the node on which code is running
284 *
285 * NODEPDA(x) -> to access node PDA for cnodeid 'x'
286 * SUBNODEPDA(x,s) -> to access subnode PDA for cnodeid/slice 'x'
287 */
288
289 #ifdef LATER
290 #define nodepda private.p_nodepda /* Ptr to this node's PDA */
291 #if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
292 #define subnodepda private.p_subnodepda /* Ptr to this node's subnode PDA */
293 #endif
294
295 #else
296 /*
297 * Until we have a shared node local area defined, do it this way ..
298 * like in Caliase space. See above.
299 */
300 extern nodepda_t *nodepda;
301 extern subnode_pda_t *subnodepda;
302 #endif
303
304 /*
305 * Nodepdaindr[]
306 * This is a private data structure for use only in early initialization.
307 * All users of nodepda should use the macro NODEPDA(nodenum) to get
308 * the suitable nodepda structure.
309 * This macro has the advantage of not requiring #ifdefs for NUMA and
310 * non-NUMA code.
311 */
312 extern nodepda_t *Nodepdaindr[];
313 /*
314 * NODEPDA_GLOBAL(x) macro should ONLY be used during early initialization.
315 * Once meminit is complete, NODEPDA(x) is ready to use.
316 * During early init, the system fills up Nodepdaindr. By the time we
317 * are in meminit(), all nodepdas are initialized, and hence
318 * we can fill up the node_pdaindr array in each nodepda structure.
319 */
320 #define NODEPDA_GLOBAL(x) Nodepdaindr[x]
321
322 /*
323 * Returns a pointer to a given node's nodepda.
324 */
325 #define NODEPDA(x) (nodepda->pernode_pdaindr[x])
326
327 /*
328 * Returns a pointer to a given node/slice's subnodepda.
329 * SUBNODEPDA(cnode, subnode) - uses cnode as first arg
330 * SNPDA(npda, subnode) - uses pointer to nodepda as first arg
331 */
332 #define SUBNODEPDA(x,sn) (&nodepda->pernode_pdaindr[x]->snpda[sn])
333 #define SNPDA(npda,sn) (&(npda)->snpda[sn])
334
335 #define NODEPDA_ERROR_FOOTPRINT(node, cpu) \
336 (&(NODEPDA(node)->error_stamp[cpu]))
337 #define NODEPDA_MDP_MON(node) (&(NODEPDA(node)->node_md_perfmon))
338 #define NODEPDA_IOP_MON(node) (&(NODEPDA(node)->node_io_perfmon))
339
340 /*
341 * Macros to access data structures inside nodepda
342 */
343 #if NUMA_MIGR_CONTROL
344 #define NODEPDA_MCD(node) (NODEPDA(node)->mcd)
345 #endif /* NUMA_MIGR_CONTROL */
346
347 #if NUMA_REPL_CONTROL
348 #define NODEPDA_RCD(node) (NODEPDA(node)->rcd)
349 #endif /* NUMA_REPL_CONTROL */
350
351 #if (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL)
352 #define NODEPDA_LRS(node) (NODEPDA(node)->lrs)
353 #endif /* (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL) */
354
355 /*
356 * Exported functions
357 */
358 extern nodepda_t *nodepda_alloc(void);
359
360 #else /* !NUMA_BASE */
361 /*
362 * For a single-node system we will just have one global nodepda pointer
363 * allocated at startup. The global nodepda will point to this nodepda
364 * structure.
365 */
366 extern nodepda_t *Nodepdaindr;
367
368 /*
369 * On non-NUMA systems, NODEPDA_GLOBAL and NODEPDA macros collapse to
370 * be the same.
371 */
372 #define NODEPDA_GLOBAL(x) Nodepdaindr
373
374 /*
375 * Returns a pointer to a given node's nodepda.
376 */
377 #define NODEPDA(x) Nodepdaindr
378
379 /*
380 * nodepda can also be defined as private.p_nodepda.
381 * But on non-NUMA systems, there is only one nodepda, and there is
382 * no reason to go through the PDA to access this pointer.
383 * Hence nodepda aliases to the global nodepda directly.
384 *
385 * Routines should use nodepda to access the local node's PDA.
386 */
387 #define nodepda (Nodepdaindr)
388
389 #endif /* NUMA_BASE */
390
391 /* Quickly convert a compact node ID into a hwgraph vertex */
392 #define cnodeid_to_vertex(cnodeid) (NODEPDA(cnodeid)->node_vertex)
393
394
395 /* Check if given a compact node id the corresponding node has all the
396 * cpus disabled.
397 */
398 #define is_headless_node(_cnode) ((_cnode == CNODEID_NONE) || \
399 (CNODE_NUM_CPUS(_cnode) == 0))
400 /* Check if given a node vertex handle the corresponding node has all the
401 * cpus disabled.
402 */
403 #define is_headless_node_vertex(_nodevhdl) \
404 is_headless_node(nodevertex_to_cnodeid(_nodevhdl))
405
406 #ifdef __cplusplus
407 }
408 #endif
409
410 #ifdef NUMA_BASE
411 /*
412 * To remove contention on the global syswait counter each node will have
413 * its own. Each clock tick the clock cpu will re-calculate the global
414 * syswait counter by summing from each of the nodes. The other cpus will
415 * continue to read the global one during their clock ticks. This does
416 * present a problem when a thread increments the count on one node and wakes
417 * up on a different node and decrements it there. Eventually the count could
418 * overflow if this happens continually for a long period. To prevent this
419 * second_thread() periodically preserves the current syswait state and
420 * resets the counters.
421 */
422 #define ADD_SYSWAIT(_field) atomicAddInt(&nodepda->syswait._field, 1)
423 #define SUB_SYSWAIT(_field) atomicAddInt(&nodepda->syswait._field, -1)
424 #else
425 #define ADD_SYSWAIT(_field) \
426 { \
427 ASSERT(syswait._field >= 0); \
428 atomicAddInt(&syswait._field, 1); \
429 }
430 #define SUB_SYSWAIT(_field) \
431 { \
432 ASSERT(syswait._field > 0); \
433 atomicAddInt(&syswait._field, -1); \
434 }
435 #endif /* NUMA_BASE */
436
437 #ifdef NUMA_BASE
438 /*
439 * Another global variable to remove contention from: pdcount.
440 * See above comments for SYSWAIT.
441 */
442 #define ADD_PDCOUNT(_n) \
443 { \
444 atomicAddInt(&nodepda->pdcount, _n); \
445 if (_n > 0 && !pdflag) \
446 pdflag = 1; \
447 }
448 #else
449 #define ADD_PDCOUNT(_n) \
450 { \
451 ASSERT(&pdcount >= 0); \
452 atomicAddInt(&pdcount, _n); \
453 if (_n > 0 && !pdflag) \
454 pdflag = 1; \
455 }
456 #endif /* NUMA_BASE */
457
458 #endif /* _ASM_SN_NODEPDA_H */
459