File: /usr/src/linux/include/asm-ia64/sn/nodepda.h

1     /* $Id$
2      *
3      * This file is subject to the terms and conditions of the GNU General Public
4      * License.  See the file "COPYING" in the main directory of this archive
5      * for more details.
6      *
7      * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
8      * Copyright (C) 2000 by Colin Ngam
9      */
10     #ifndef _ASM_SN_NODEPDA_H
11     #define _ASM_SN_NODEPDA_H
12     
13     #ifdef	__cplusplus
14     extern "C" {
15     #endif
16     
17     #include <linux/config.h>
18     
19     #include <asm/sn/agent.h>
20     #include <asm/sn/intr.h>
21     #include <asm/sn/router.h>
22     #include <asm/sn/synergy.h>
23     /* #include <SN/klkernvars.h> */
24     #ifdef LATER
25     typedef struct module_s module_t;       /* Avoids sys/SN/module.h */
26     #else
27     #include <asm/sn/module.h>
28     #endif
29     /* #include <SN/slotnum.h> */
30     
31     /*
32      * NUMA Node-Specific Data structures are defined in this file.
33      * In particular, this is the location of the node PDA.
34      * A pointer to the right node PDA is saved in each CPU PDA.
35      */
36     
37     /*
38      * Subnode PDA structures. Each node needs a few data structures that 
39      * correspond to the PIs on the HUB chip that supports the node.
40      *
41      * WARNING!!!! 6.5.x compatibility requirements prevent us from
42      * changing or reordering fields in the following structure for IP27.
43      * It is essential that the data mappings not change for IP27 platforms.
44      * It is OK to add fields that are IP35 specific if they are under #ifdef IP35.
45      */
46     struct subnodepda_s {
47     	intr_vecblk_t	intr_dispatch0;
48     	intr_vecblk_t	intr_dispatch1;
49     	uint64_t	next_prof_timeout;
50     	int		prof_count;
51     };
52     
53     
54     typedef struct subnodepda_s subnode_pda_t;
55     
56     
57     struct ptpool_s;
58     
59     #if defined(CONFIG_IA64_SGI_SYNERGY_PERF)
60     struct synergy_perf_s;
61     #endif
62     
63     
64     /*
65      * Node-specific data structure.
66      *
67      * One of these structures is allocated on each node of a NUMA system.
68      * Non-NUMA systems are considered to be systems with one node, and
69      * hence there will be one of this structure for the entire system.
70      *
71      * This structure provides a convenient way of keeping together 
72      * all per-node data structures. 
73      */
74     
75     
76     
77     #ifdef LATER
78     /*
79      * The following structure is contained in the nodepda & contains
80      * a lock & queue-head for sanon pages that belong to the node.
81      * See the anon manager for more details.
82      */
83     typedef struct {
84     	lock_t  sal_lock;
85     	plist_t sal_listhead;
86     } sanon_list_head_t;
87     #endif
88     struct nodepda_s {
89     
90     #ifdef	NUMA_BASE
91     
92     	/* 
93     	 * Pointer to this node's copy of Nodepdaindr 
94     	 */
95     	struct nodepda_s	**pernode_pdaindr; 
96     
97     	/*
98              * Data used for migration control
99              */
100     	struct migr_control_data_s *mcd; 
101     
102     	/*
103              * Data used for replication control
104              */
105     	struct repl_control_data_s *rcd;
106     
107             /*
108              * Numa statistics
109              */
110     	struct numa_stats_s *numa_stats;
111     
112             /*
113              * Load distribution
114              */
115             uint memfit_assign;
116     
117             /*
118              * New extended memory reference counters
119              */
120             void *migr_refcnt_counterbase;
121             void *migr_refcnt_counterbuffer;
122             size_t migr_refcnt_cbsize;
123             int  migr_refcnt_numsets;
124     
125             /*
126              * mem_tick quiescing lock
127              */
128             uint mem_tick_lock;
129     
130             /*
131              * Migration candidate set
132              * by migration prologue intr handler
133              */
134             uint64_t migr_candidate;
135     
136     	/*
137     	 * Each node gets its own syswait counter to remove contention
138     	 * on the global one.
139     	 */
140     #ifdef	LATER
141     	struct syswait syswait;
142     #endif
143     
144     #endif	/* NUMA_BASE */
145     	/*
146     	 * Node-specific Zone structures.
147     	 */
148     #ifdef LATER
149     	zoneset_element_t	node_zones;
150     	pg_data_t	node_pg_data;	/* VM page data structures */ 
151     	plist_t	error_discard_plist;
152     #endif
153     	uint		error_discard_count;
154     	uint		error_page_count;
155     	uint		error_cleaned_count;
156     	spinlock_t	error_discard_lock;
157     
158     	/* Information needed for SN Hub chip interrupt handling. */
159     	subnode_pda_t	snpda[NUM_SUBNODES];
160     	/* Distributed kernel support */
161     #ifdef	LATER
162     	kern_vars_t	kern_vars;
163     #endif
164     	/* Vector operation support */
165     	/* Change this to a sleep lock? */
166     	spinlock_t	vector_lock;
167     	/* State of the vector unit for this node */
168     	char		vector_unit_busy;
169     	cpuid_t         node_first_cpu; /* Starting cpu number for node */
170     	ushort          node_num_cpus;  /* Number of cpus present       */
171     
172     	/* node utlbmiss info */
173       	spinlock_t		node_utlbswitchlock;
174     	volatile cpumask_t	node_utlbmiss_flush;
175     	volatile signed char	node_need_utlbmiss_patch;
176     	volatile char		node_utlbmiss_patched;
177     	nodepda_router_info_t	*npda_rip_first;
178     	nodepda_router_info_t	**npda_rip_last;
179     	int		dependent_routers;
180     
181     #if defined(CONFIG_IA64_SGI_SYNERGY_PERF)
182     	int			synergy_perf_enabled;
183             int       		synergy_perf_freq;
184     	spinlock_t		synergy_perf_lock;
185             uint64_t       		synergy_inactive_intervals;
186             uint64_t       		synergy_active_intervals;
187             struct synergy_perf_s	*synergy_perf_data;
188             struct synergy_perf_s	*synergy_perf_first; /* reporting consistency .. */
189     #endif /* CONFIG_IA64_SGI_SYNERGY_PERF */
190     
191     	devfs_handle_t 	xbow_vhdl;
192     	nasid_t		xbow_peer;	/* NASID of our peer hub on xbow */
193     	struct semaphore xbow_sema;	/* Sema for xbow synchronization */
194     	slotid_t	slotdesc;
195     	moduleid_t	module_id;	/* Module ID (redundant local copy) */
196     	module_t	*module;	/* Pointer to containing module */
197     	int		hub_chip_rev;	/* Rev of my Hub chip */
198     	char		nasid_mask[NASID_MASK_BYTES];
199     					/* Need a copy of the nasid mask
200     					 * on every node */
201     	xwidgetnum_t 	basew_id;
202     	devfs_handle_t 	basew_xc;
203     	spinlock_t	fprom_lock;
204     	char		ni_error_print; /* For printing ni error state
205     					 * only once during system panic
206     					 */
207     #ifdef	LATER
208     	md_perf_monitor_t node_md_perfmon;
209     	hubstat_t	hubstats;
210     	int		hubticks;
211     	sbe_info_t	*sbe_info;	/* ECC single-bit error statistics */
212     #endif	/* LATER */
213     	int		huberror_ticks;
214     
215     	router_queue_t  *visited_router_q;
216     	router_queue_t	*bfs_router_q; 
217     					/* Used for router traversal */
218     #if defined (CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
219     	router_map_ent_t router_map[MAX_RTR_BREADTH];
220     #endif
221     	int		num_routers;	/* Total routers in the system */
222     
223             char		membank_flavor;
224     	                                /* Indicates what sort of memory 
225     					 * banks are present on this node
226     					 */
227     	
228     	char		*hwg_node_name;	/* hwgraph node name */
229     
230     	struct widget_info_t *widget_info;	/* Node as xtalk widget */
231     	devfs_handle_t	node_vertex;	/* Hwgraph vertex for this node */
232     
233     	void 		*pdinfo;	/* Platform-dependent per-node info */
234     	uint64_t	*dump_stack;	/* Dump stack during nmi handling */
235     	int		dump_count;	/* To allow only one cpu-per-node */
236     #ifdef	LATER
237     	io_perf_monitor_t node_io_perfmon;
238     #endif
239     
240     	/*
241     	 * Each node gets its own pdcount counter to remove contention
242     	 * on the global one.
243     	 */
244     
245     	int pdcount;			/* count of pdinserted pages */
246     
247     #ifdef	NUMA_BASE
248     	void		*cached_global_pool;	/* pointer to cached vmpool */
249     #endif /* NUMA_BASE */
250     
251     #ifdef	LATER
252     	sanon_list_head_t sanon_list_head;	/* head for sanon pages */	
253     #endif
254     #ifdef	NUMA_BASE
255     	struct ptpool_s	*ptpool;	/* ptpool for this node */
256     #endif /* NUMA_BASE */
257     
258     	/*
259     	 * The BTEs on this node are shared by the local cpus
260     	 */
261     #if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
262     #ifdef	LATER
263     	bteinfo_t	*node_bte_info[BTES_PER_NODE];
264     #endif
265     #endif
266     };
267     
268     typedef struct nodepda_s nodepda_t;
269     
270     
271     #define NODE_MODULEID(_node)	(NODEPDA(_node)->module_id)
272     #define NODE_SLOTID(_node)	(NODEPDA(_node)->slotdesc)
273     
274     #ifdef	NUMA_BASE
275     /*
276      * Access Functions for node PDA.
277      * Since there is one nodepda for each node, we need a convenient mechanism
278      * to access these nodepdas without cluttering code with #ifdefs.
279      * The next set of definitions provides this.
280      * Routines are expected to use 
281      *
282      *	nodepda		-> to access PDA for the node on which code is running
283      *	subnodepda	-> to access subnode PDA for the node on which code is running
284      *
285      *	NODEPDA(x)	-> to access node PDA for cnodeid 'x'
286      *	SUBNODEPDA(x,s)	-> to access subnode PDA for cnodeid/slice 'x'
287      */
288     
289     #ifdef	LATER
290     #define	nodepda		private.p_nodepda	/* Ptr to this node's PDA */
291     #if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
292     #define subnodepda	private.p_subnodepda	/* Ptr to this node's subnode PDA */
293     #endif
294     
295     #else
296     /*
297      * Until we have a shared node local area defined, do it this way ..
298      * like in Caliase space.  See above.
299      */
300     extern nodepda_t        *nodepda;
301     extern subnode_pda_t	*subnodepda;
302     #endif
303     
304     /* 
305      * Nodepdaindr[]
306      * This is a private data structure for use only in early initialization.
307      * All users of nodepda should use the macro NODEPDA(nodenum) to get
308      * the suitable nodepda structure.
309      * This macro has the advantage of not requiring #ifdefs for NUMA and
310      * non-NUMA code.
311      */
312     extern nodepda_t	*Nodepdaindr[]; 
313     /*
314      * NODEPDA_GLOBAL(x) macro should ONLY be used during early initialization.
315      * Once meminit is complete, NODEPDA(x) is ready to use.
316      * During early init, the system fills up Nodepdaindr.  By the time we
317      * are in meminit(), all nodepdas are initialized, and hence
318      * we can fill up the node_pdaindr array in each nodepda structure.
319      */
320     #define	NODEPDA_GLOBAL(x)	Nodepdaindr[x]
321     
322     /*
323      * Returns a pointer to a given node's nodepda.
324      */
325     #define	NODEPDA(x)		(nodepda->pernode_pdaindr[x])
326     
327     /*
328      * Returns a pointer to a given node/slice's subnodepda.
329      *	SUBNODEPDA(cnode, subnode) - uses cnode as first arg
330      *	SNPDA(npda, subnode)	   - uses pointer to nodepda as first arg
331      */
332     #define	SUBNODEPDA(x,sn)	(&nodepda->pernode_pdaindr[x]->snpda[sn])
333     #define	SNPDA(npda,sn)		(&(npda)->snpda[sn])
334     
335     #define NODEPDA_ERROR_FOOTPRINT(node, cpu) \
336                        (&(NODEPDA(node)->error_stamp[cpu]))
337     #define NODEPDA_MDP_MON(node)	(&(NODEPDA(node)->node_md_perfmon))
338     #define NODEPDA_IOP_MON(node)	(&(NODEPDA(node)->node_io_perfmon))
339     
340     /*
341      * Macros to access data structures inside nodepda 
342      */
343     #if NUMA_MIGR_CONTROL
344     #define NODEPDA_MCD(node) (NODEPDA(node)->mcd)
345     #endif /* NUMA_MIGR_CONTROL */
346     
347     #if NUMA_REPL_CONTROL
348     #define NODEPDA_RCD(node) (NODEPDA(node)->rcd)
349     #endif /* NUMA_REPL_CONTROL */
350     
351     #if (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL)
352     #define NODEPDA_LRS(node) (NODEPDA(node)->lrs)
353     #endif /* (NUMA_MIGR_CONTROL || NUMA_REPL_CONTROL) */
354     
355     /* 
356      * Exported functions
357      */
358     extern nodepda_t *nodepda_alloc(void);
359     
360     #else	/* !NUMA_BASE */
361     /*
362      * For a single-node system we will just have one global nodepda pointer
363      * allocated at startup.  The global nodepda will point to this nodepda 
364      * structure.
365      */
366     extern nodepda_t	*Nodepdaindr; 
367     
368     /*
369      * On non-NUMA systems, NODEPDA_GLOBAL and NODEPDA macros collapse to
370      * be the same.
371      */
372     #define	NODEPDA_GLOBAL(x)	Nodepdaindr
373     
374     /*
375      * Returns a pointer to a given node's nodepda.
376      */
377     #define	NODEPDA(x)	Nodepdaindr
378     
379     /*
380      * nodepda can also be defined as private.p_nodepda.
381      * But on non-NUMA systems, there is only one nodepda, and there is
382      * no reason to go through the PDA to access this pointer.
383      * Hence nodepda aliases to the global nodepda directly.
384      *
385      * Routines should use nodepda to access the local node's PDA.
386      */
387     #define	nodepda		(Nodepdaindr)
388     
389     #endif	/* NUMA_BASE */
390     
391     /* Quickly convert a compact node ID into a hwgraph vertex */
392     #define cnodeid_to_vertex(cnodeid) (NODEPDA(cnodeid)->node_vertex)
393     
394     
395     /* Check if given a compact node id the corresponding node has all the
396      * cpus disabled. 
397      */
398     #define is_headless_node(_cnode)	((_cnode == CNODEID_NONE) || \
399     					 (CNODE_NUM_CPUS(_cnode) == 0))
400     /* Check if given a node vertex handle the corresponding node has all the
401      * cpus disabled. 
402      */
403     #define is_headless_node_vertex(_nodevhdl) \
404     			is_headless_node(nodevertex_to_cnodeid(_nodevhdl))
405     
406     #ifdef	__cplusplus
407     }
408     #endif
409     
410     #ifdef NUMA_BASE
411     /*
412      * To remove contention on the global syswait counter each node will have
413      * its own.  Each clock tick the clock cpu will re-calculate the global
414      * syswait counter by summing from each of the nodes.  The other cpus will
415      * continue to read the global one during their clock ticks.   This does 
416      * present a problem when a thread increments the count on one node and wakes
417      * up on a different node and decrements it there.  Eventually the count could
418      * overflow if this happens continually for a long period.  To prevent this
419      * second_thread() periodically preserves the current syswait state and
420      * resets the counters.
421      */
422     #define ADD_SYSWAIT(_field)	atomicAddInt(&nodepda->syswait._field, 1)
423     #define SUB_SYSWAIT(_field)	atomicAddInt(&nodepda->syswait._field, -1)
424     #else
425     #define ADD_SYSWAIT(_field)				\
426     {							\
427     	ASSERT(syswait._field >= 0);			\
428     	atomicAddInt(&syswait._field, 1);		\
429     }
430     #define SUB_SYSWAIT(_field)				\
431     {							\
432     	ASSERT(syswait._field > 0);			\
433     	atomicAddInt(&syswait._field, -1);		\
434     }
435     #endif /* NUMA_BASE */
436     
437     #ifdef NUMA_BASE
438     /*
439      * Another global variable to remove contention from: pdcount.
440      * See above comments for SYSWAIT.
441      */
442     #define ADD_PDCOUNT(_n)					\
443     {							\
444     	atomicAddInt(&nodepda->pdcount, _n);		\
445     	if (_n > 0 && !pdflag)				\
446     		pdflag = 1;				\
447     }
448     #else
449     #define ADD_PDCOUNT(_n)					\
450     {							\
451     	ASSERT(&pdcount >= 0);				\
452     	atomicAddInt(&pdcount, _n);			\
453     	if (_n > 0 && !pdflag)				\
454     		pdflag = 1;				\
455     }
456     #endif /* NUMA_BASE */
457     
458     #endif /* _ASM_SN_NODEPDA_H */
459