File: /usr/src/linux/arch/ia64/kernel/smpboot.c

1     /*
2      * SMP boot-related support
3      *
4      * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com>
5      *
6      * 01/05/16 Rohit Seth <rohit.seth@intel.com>	Moved SMP booting functions from smp.c to here.
7      * 01/04/27 David Mosberger <davidm@hpl.hp.com>	Added ITC synching code.
8      */
9     
10     
11     #define __KERNEL_SYSCALLS__
12     
13     #include <linux/config.h>
14     
15     #include <linux/bootmem.h>
16     #include <linux/delay.h>
17     #include <linux/init.h>
18     #include <linux/interrupt.h>
19     #include <linux/irq.h>
20     #include <linux/kernel.h>
21     #include <linux/kernel_stat.h>
22     #include <linux/mm.h>
23     #include <linux/smp.h>
24     #include <linux/smp_lock.h>
25     #include <linux/spinlock.h>
26     
27     #include <asm/atomic.h>
28     #include <asm/bitops.h>
29     #include <asm/cache.h>
30     #include <asm/current.h>
31     #include <asm/delay.h>
32     #include <asm/efi.h>
33     #include <asm/io.h>
34     #include <asm/irq.h>
35     #include <asm/machvec.h>
36     #include <asm/page.h>
37     #include <asm/pgalloc.h>
38     #include <asm/pgtable.h>
39     #include <asm/processor.h>
40     #include <asm/ptrace.h>
41     #include <asm/sal.h>
42     #include <asm/system.h>
43     #include <asm/unistd.h>
44     
45     #if SMP_DEBUG
46     #define Dprintk(x...)  printk(x)
47     #else
48     #define Dprintk(x...)
49     #endif
50     
51     
52     /*
53      * ITC synchronization related stuff:
54      */
55     #define MASTER	0
56     #define SLAVE	(SMP_CACHE_BYTES/8)
57     
58     #define NUM_ROUNDS	64	/* magic value */
59     #define NUM_ITERS	5	/* likewise */
60     
61     static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED;
62     static volatile unsigned long go[SLAVE + 1];
63     
64     #define DEBUG_ITC_SYNC	0
65     
66     extern void __init calibrate_delay(void);
67     extern void start_ap(void);
68     
69     int cpucount;
70     
71     /* Setup configured maximum number of CPUs to activate */
72     static int max_cpus = -1;
73     
74     /* Total count of live CPUs */
75     int smp_num_cpus = 1;
76     
77     /* Bitmask of currently online CPUs */
78     volatile unsigned long cpu_online_map;
79     
80     /* which logical CPU number maps to which CPU (physical APIC ID) */
81     volatile int ia64_cpu_to_sapicid[NR_CPUS];
82     
83     static volatile unsigned long cpu_callin_map;
84     
85     struct smp_boot_data smp_boot_data __initdata;
86     
87     /* Set when the idlers are all forked */
88     volatile int smp_threads_ready;
89     
90     unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
91     
92     char __initdata no_int_routing;
93     
94     unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
95     
96     /*
97      * Setup routine for controlling SMP activation
98      *
99      * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
100      * activation entirely (the MPS table probe still happens, though).
101      *
102      * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
103      * greater than 0, limits the maximum number of CPUs activated in
104      * SMP mode to <NUM>.
105      */
106     
107     static int __init
108     nosmp (char *str)
109     {
110     	max_cpus = 0;
111     	return 1;
112     }
113     
114     __setup("nosmp", nosmp);
115     
116     static int __init
117     maxcpus (char *str)
118     {
119     	get_option(&str, &max_cpus);
120     	return 1;
121     }
122     
123     __setup("maxcpus=", maxcpus);
124     
125     static int __init
126     nointroute (char *str)
127     {
128     	no_int_routing = 1;
129     	return 1;
130     }
131     
132     __setup("nointroute", nointroute);
133     
134     void
135     sync_master (void *arg)
136     {
137     	unsigned long flags, i;
138     
139     	go[MASTER] = 0;
140     
141     	local_irq_save(flags);
142     	{
143     		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
144     			while (!go[MASTER]);
145     			go[MASTER] = 0;
146     			go[SLAVE] = ia64_get_itc();
147     		}
148     	}
149     	local_irq_restore(flags);
150     }
151     
152     /*
153      * Return the number of cycles by which our itc differs from the itc on the master
154      * (time-keeper) CPU.  A positive number indicates our itc is ahead of the master,
155      * negative that it is behind.
156      */
157     static inline long
158     get_delta (long *rt, long *master)
159     {
160     	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
161     	unsigned long tcenter, t0, t1, tm;
162     	long i;
163     
164     	for (i = 0; i < NUM_ITERS; ++i) {
165     		t0 = ia64_get_itc();
166     		go[MASTER] = 1;
167     		while (!(tm = go[SLAVE]));
168     		go[SLAVE] = 0;
169     		t1 = ia64_get_itc();
170     
171     		if (t1 - t0 < best_t1 - best_t0)
172     			best_t0 = t0, best_t1 = t1, best_tm = tm;
173     	}
174     
175     	*rt = best_t1 - best_t0;
176     	*master = best_tm - best_t0;
177     
178     	/* average best_t0 and best_t1 without overflow: */
179     	tcenter = (best_t0/2 + best_t1/2);
180     	if (best_t0 % 2 + best_t1 % 2 == 2)
181     		++tcenter;
182     	return tcenter - best_tm;
183     }
184     
185     /*
186      * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
187      * (normally the time-keeper CPU).  We use a closed loop to eliminate the possibility of
188      * unaccounted-for errors (such as getting a machine check in the middle of a calibration
189      * step).  The basic idea is for the slave to ask the master what itc value it has and to
190      * read its own itc before and after the master responds.  Each iteration gives us three
191      * timestamps:
192      *
193      *	slave		master
194      *
195      *	t0 ---\
196      *             ---\
197      *		   --->
198      *			tm
199      *		   /---
200      *	       /---
201      *	t1 <---
202      *
203      *
204      * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
205      * and t1.  If we achieve this, the clocks are synchronized provided the interconnect
206      * between the slave and the master is symmetric.  Even if the interconnect were
207      * asymmetric, we would still know that the synchronization error is smaller than the
208      * roundtrip latency (t0 - t1).
209      *
210      * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
211      * within one or two cycles.  However, we can only *guarantee* that the synchronization is
212      * accurate to within a round-trip time, which is typically in the range of several
213      * hundred cycles (e.g., ~500 cycles).  In practice, this means that the itc's are usually
214      * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
215      * than half a micro second or so.
216      */
217     void
218     ia64_sync_itc (unsigned int master)
219     {
220     	long i, delta, adj, adjust_latency = 0, done = 0;
221     	unsigned long flags, rt, master_time_stamp, bound;
222     #if DEBUG_ITC_SYNC
223     	struct {
224     		long rt;	/* roundtrip time */
225     		long master;	/* master's timestamp */
226     		long diff;	/* difference between midpoint and master's timestamp */
227     		long lat;	/* estimate of itc adjustment latency */
228     	} t[NUM_ROUNDS];
229     #endif
230     
231     	go[MASTER] = 1;
232     
233     	if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
234     		printk("sync_itc: failed to get attention of CPU %u!\n", master);
235     		return;
236     	}
237     
238     	while (go[MASTER]);	/* wait for master to be ready */
239     
240     	spin_lock_irqsave(&itc_sync_lock, flags);
241     	{
242     		for (i = 0; i < NUM_ROUNDS; ++i) {
243     			delta = get_delta(&rt, &master_time_stamp);
244     			if (delta == 0) {
245     				done = 1;	/* let's lock on to this... */
246     				bound = rt;
247     			}
248     
249     			if (!done) {
250     				if (i > 0) {
251     					adjust_latency += -delta;
252     					adj = -delta + adjust_latency/4;
253     				} else
254     					adj = -delta;
255     
256     				ia64_set_itc(ia64_get_itc() + adj);
257     			}
258     #if DEBUG_ITC_SYNC
259     			t[i].rt = rt;
260     			t[i].master = master_time_stamp;
261     			t[i].diff = delta;
262     			t[i].lat = adjust_latency/4;
263     #endif
264     		}
265     	}
266     	spin_unlock_irqrestore(&itc_sync_lock, flags);
267     
268     #if DEBUG_ITC_SYNC
269     	for (i = 0; i < NUM_ROUNDS; ++i)
270     		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
271     		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
272     #endif
273     
274     	printk("CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, maxerr %lu cycles)\n",
275     	       smp_processor_id(), master, delta, rt);
276     }
277     
278     /*
279      * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
280      */
281     static inline void __init
282     smp_setup_percpu_timer (void)
283     {
284     	local_cpu_data->prof_counter = 1;
285     	local_cpu_data->prof_multiplier = 1;
286     }
287     
288     /*
289      * Architecture specific routine called by the kernel just before init is
290      * fired off. This allows the BP to have everything in order [we hope].
291      * At the end of this all the APs will hit the system scheduling and off
292      * we go. Each AP will jump through the kernel
293      * init into idle(). At this point the scheduler will one day take over
294      * and give them jobs to do. smp_callin is a standard routine
295      * we use to track CPUs as they power up.
296      */
297     
298     static volatile atomic_t smp_commenced = ATOMIC_INIT(0);
299     
300     void __init
301     smp_commence (void)
302     {
303     	/*
304     	 * Lets the callins below out of their loop.
305     	 */
306     	Dprintk("Setting commenced=1, go go go\n");
307     
308     	wmb();
309     	atomic_set(&smp_commenced,1);
310     }
311     
312     
313     void __init
314     smp_callin (void)
315     {
316     	int cpuid, phys_id;
317     	extern void ia64_init_itm(void);
318     
319     #ifdef CONFIG_PERFMON
320     	extern void perfmon_init_percpu(void);
321     #endif
322     
323     	cpuid = smp_processor_id();
324     	phys_id = hard_smp_processor_id();
325     
326     	if (test_and_set_bit(cpuid, &cpu_online_map)) {
327     		printk("huh, phys CPU#0x%x, CPU#0x%x already present??\n", 
328     					phys_id, cpuid);
329     		BUG();
330     	}
331     
332     	smp_setup_percpu_timer();
333     
334     	/*
335     	 * Synchronize the ITC with the BP
336     	 */
337     	Dprintk("Going to syncup ITC with BP.\n");
338     
339     	ia64_sync_itc(0);
340     	/*
341     	 * Get our bogomips.
342     	 */
343     	ia64_init_itm();
344     #ifdef CONFIG_PERFMON
345     	perfmon_init_percpu();
346     #endif
347     
348     	local_irq_enable();
349     	calibrate_delay();
350     	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
351     	/*
352     	 * Allow the master to continue.
353     	 */
354     	set_bit(cpuid, &cpu_callin_map);
355     	Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
356     }
357     
358     
359     /*
360      * Activate a secondary processor.  head.S calls this.
361      */
362     int __init
363     start_secondary (void *unused)
364     {
365     	extern int cpu_idle (void);
366     
367     	efi_map_pal_code();
368     	cpu_init();
369     	smp_callin();
370     	Dprintk("CPU %d is set to go. \n", smp_processor_id());
371     	while (!atomic_read(&smp_commenced))
372     		;
373     
374     	Dprintk("CPU %d is starting idle. \n", smp_processor_id());
375     	return cpu_idle();
376     }
377     
378     static int __init
379     fork_by_hand (void)
380     {
381     	/*
382     	 * don't care about the eip and regs settings since
383     	 * we'll never reschedule the forked task.
384     	 */
385     	return do_fork(CLONE_VM|CLONE_PID, 0, 0, 0);
386     }
387     
388     static void __init
389     do_boot_cpu (int sapicid)
390     {
391     	struct task_struct *idle;
392     	int timeout, cpu;
393     
394     	cpu = ++cpucount;
395     	/*
396     	 * We can't use kernel_thread since we must avoid to
397     	 * reschedule the child.
398     	 */
399     	if (fork_by_hand() < 0)
400     		panic("failed fork for CPU %d", cpu);
401     
402     	/*
403     	 * We remove it from the pidhash and the runqueue
404     	 * once we got the process:
405     	 */
406     	idle = init_task.prev_task;
407     	if (!idle)
408     		panic("No idle process for CPU %d", cpu);
409     
410     	idle->processor = cpu;
411     	ia64_cpu_to_sapicid[cpu] = sapicid;
412     	idle->has_cpu = 1; /* we schedule the first task manually */
413     
414     	del_from_runqueue(idle);
415     	unhash_process(idle);
416     	init_tasks[cpu] = idle;
417     
418     	Dprintk("Sending Wakeup Vector to AP 0x%x/0x%x.\n", cpu, sapicid);
419     
420     	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
421     
422     	/*
423     	 * Wait 10s total for the AP to start
424     	 */
425     	Dprintk("Waiting on callin_map ...");
426     	for (timeout = 0; timeout < 100000; timeout++) {
427     		Dprintk(".");
428     		if (test_bit(cpu, &cpu_callin_map))
429     			break;  /* It has booted */
430     		udelay(100);
431     	}
432     	Dprintk("\n");
433     
434     	if (test_bit(cpu, &cpu_callin_map)) {
435     		/* number CPUs logically, starting from 1 (BSP is 0) */
436     		printk("CPU%d: ", cpu);
437     		/*print_cpu_info(&cpu_data[cpu]); */
438     		printk("CPU has booted.\n");
439     	} else {
440     		printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
441     		ia64_cpu_to_sapicid[cpu] = -1;
442     		cpucount--;
443     	}
444     }
445     
446     /*
447      * Cycle through the APs sending Wakeup IPIs to boot each.
448      */
449     void __init
450     smp_boot_cpus (void)
451     {
452     	int sapicid, cpu;
453     	int boot_cpu_id = hard_smp_processor_id();
454     
455     	/*
456     	 * Initialize the logical to physical CPU number mapping
457     	 * and the per-CPU profiling counter/multiplier
458     	 */
459     
460     	for (cpu = 0; cpu < NR_CPUS; cpu++)
461     		ia64_cpu_to_sapicid[cpu] = -1;
462     	smp_setup_percpu_timer();
463     
464     	/*
465     	* We have the boot CPU online for sure.
466     	*/
467     	set_bit(0, &cpu_online_map);
468     	set_bit(0, &cpu_callin_map);
469     
470     	local_cpu_data->loops_per_jiffy = loops_per_jiffy;
471     	ia64_cpu_to_sapicid[0] = boot_cpu_id;
472     
473     	printk("Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
474     
475     	global_irq_holder = 0;
476     	current->processor = 0;
477     	init_idle();
478     
479     	/*
480     	 * If SMP should be disabled, then really disable it!
481     	 */
482     	if (!max_cpus || (max_cpus < -1)) {
483     		printk(KERN_INFO "SMP mode deactivated.\n");
484     		cpu_online_map =  1;
485     		smp_num_cpus = 1;
486     		goto smp_done;
487     	}
488     	if  (max_cpus != -1)
489     		printk (KERN_INFO "Limiting CPUs to %d\n", max_cpus);
490     
491     	if (smp_boot_data.cpu_count > 1) {
492     
493     		printk(KERN_INFO "SMP: starting up secondaries.\n");
494     
495     		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) {
496     			/*
497     			 * Don't even attempt to start the boot CPU!
498     			 */
499     			sapicid = smp_boot_data.cpu_phys_id[cpu];
500     			if ((sapicid == -1) || (sapicid == hard_smp_processor_id()))
501     				continue;
502     
503     			if ((max_cpus > 0) && (cpucount + 1 >= max_cpus))
504     				break;
505     
506     			do_boot_cpu(sapicid);
507     
508     			/*
509     			 * Make sure we unmap all failed CPUs
510     			 */
511     			if (ia64_cpu_to_sapicid[cpu] == -1)
512     				printk("phys CPU#%d not responding - cannot use it.\n", cpu);
513     		}
514     
515     		smp_num_cpus = cpucount + 1;
516     
517     		/*
518     		 * Allow the user to impress friends.
519     		 */
520     
521     		printk("Before bogomips.\n");
522     		if (!cpucount) {
523     			printk(KERN_ERR "Error: only one processor found.\n");
524     		} else {
525     			unsigned long bogosum = 0;
526       			for (cpu = 0; cpu < NR_CPUS; cpu++)
527     				if (cpu_online_map & (1<<cpu))
528     					bogosum += cpu_data(cpu)->loops_per_jiffy;
529     
530     			printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
531     			       cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
532     		}
533     	}
534       smp_done:
535     	;
536     }
537     
538     /*
539      * Assume that CPU's have been discovered by some platform-dependant interface.  For
540      * SoftSDV/Lion, that would be ACPI.
541      *
542      * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
543      */
544     void __init
545     init_smp_config(void)
546     {
547     	struct fptr {
548     		unsigned long fp;
549     		unsigned long gp;
550     	} *ap_startup;
551     	long sal_ret;
552     
553     	/* Tell SAL where to drop the AP's.  */
554     	ap_startup = (struct fptr *) start_ap;
555     	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
556     				       __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
557     	if (sal_ret < 0) {
558     		printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n     Forcing UP mode\n",
559     		       ia64_sal_strerror(sal_ret));
560     		max_cpus = 0;
561     		smp_num_cpus = 1;
562     	}
563     }
564