File: /usr/src/linux/arch/ia64/kernel/smpboot.c
1 /*
2 * SMP boot-related support
3 *
4 * Copyright (C) 2001 David Mosberger-Tang <davidm@hpl.hp.com>
5 *
6 * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here.
7 * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code.
8 */
9
10
11 #define __KERNEL_SYSCALLS__
12
13 #include <linux/config.h>
14
15 #include <linux/bootmem.h>
16 #include <linux/delay.h>
17 #include <linux/init.h>
18 #include <linux/interrupt.h>
19 #include <linux/irq.h>
20 #include <linux/kernel.h>
21 #include <linux/kernel_stat.h>
22 #include <linux/mm.h>
23 #include <linux/smp.h>
24 #include <linux/smp_lock.h>
25 #include <linux/spinlock.h>
26
27 #include <asm/atomic.h>
28 #include <asm/bitops.h>
29 #include <asm/cache.h>
30 #include <asm/current.h>
31 #include <asm/delay.h>
32 #include <asm/efi.h>
33 #include <asm/io.h>
34 #include <asm/irq.h>
35 #include <asm/machvec.h>
36 #include <asm/page.h>
37 #include <asm/pgalloc.h>
38 #include <asm/pgtable.h>
39 #include <asm/processor.h>
40 #include <asm/ptrace.h>
41 #include <asm/sal.h>
42 #include <asm/system.h>
43 #include <asm/unistd.h>
44
45 #if SMP_DEBUG
46 #define Dprintk(x...) printk(x)
47 #else
48 #define Dprintk(x...)
49 #endif
50
51
52 /*
53 * ITC synchronization related stuff:
54 */
55 #define MASTER 0
56 #define SLAVE (SMP_CACHE_BYTES/8)
57
58 #define NUM_ROUNDS 64 /* magic value */
59 #define NUM_ITERS 5 /* likewise */
60
61 static spinlock_t itc_sync_lock = SPIN_LOCK_UNLOCKED;
62 static volatile unsigned long go[SLAVE + 1];
63
64 #define DEBUG_ITC_SYNC 0
65
66 extern void __init calibrate_delay(void);
67 extern void start_ap(void);
68
69 int cpucount;
70
71 /* Setup configured maximum number of CPUs to activate */
72 static int max_cpus = -1;
73
74 /* Total count of live CPUs */
75 int smp_num_cpus = 1;
76
77 /* Bitmask of currently online CPUs */
78 volatile unsigned long cpu_online_map;
79
80 /* which logical CPU number maps to which CPU (physical APIC ID) */
81 volatile int ia64_cpu_to_sapicid[NR_CPUS];
82
83 static volatile unsigned long cpu_callin_map;
84
85 struct smp_boot_data smp_boot_data __initdata;
86
87 /* Set when the idlers are all forked */
88 volatile int smp_threads_ready;
89
90 unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
91
92 char __initdata no_int_routing;
93
94 unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
95
96 /*
97 * Setup routine for controlling SMP activation
98 *
99 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
100 * activation entirely (the MPS table probe still happens, though).
101 *
102 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
103 * greater than 0, limits the maximum number of CPUs activated in
104 * SMP mode to <NUM>.
105 */
106
107 static int __init
108 nosmp (char *str)
109 {
110 max_cpus = 0;
111 return 1;
112 }
113
114 __setup("nosmp", nosmp);
115
116 static int __init
117 maxcpus (char *str)
118 {
119 get_option(&str, &max_cpus);
120 return 1;
121 }
122
123 __setup("maxcpus=", maxcpus);
124
125 static int __init
126 nointroute (char *str)
127 {
128 no_int_routing = 1;
129 return 1;
130 }
131
132 __setup("nointroute", nointroute);
133
134 void
135 sync_master (void *arg)
136 {
137 unsigned long flags, i;
138
139 go[MASTER] = 0;
140
141 local_irq_save(flags);
142 {
143 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
144 while (!go[MASTER]);
145 go[MASTER] = 0;
146 go[SLAVE] = ia64_get_itc();
147 }
148 }
149 local_irq_restore(flags);
150 }
151
152 /*
153 * Return the number of cycles by which our itc differs from the itc on the master
154 * (time-keeper) CPU. A positive number indicates our itc is ahead of the master,
155 * negative that it is behind.
156 */
157 static inline long
158 get_delta (long *rt, long *master)
159 {
160 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
161 unsigned long tcenter, t0, t1, tm;
162 long i;
163
164 for (i = 0; i < NUM_ITERS; ++i) {
165 t0 = ia64_get_itc();
166 go[MASTER] = 1;
167 while (!(tm = go[SLAVE]));
168 go[SLAVE] = 0;
169 t1 = ia64_get_itc();
170
171 if (t1 - t0 < best_t1 - best_t0)
172 best_t0 = t0, best_t1 = t1, best_tm = tm;
173 }
174
175 *rt = best_t1 - best_t0;
176 *master = best_tm - best_t0;
177
178 /* average best_t0 and best_t1 without overflow: */
179 tcenter = (best_t0/2 + best_t1/2);
180 if (best_t0 % 2 + best_t1 % 2 == 2)
181 ++tcenter;
182 return tcenter - best_tm;
183 }
184
185 /*
186 * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
187 * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of
188 * unaccounted-for errors (such as getting a machine check in the middle of a calibration
189 * step). The basic idea is for the slave to ask the master what itc value it has and to
190 * read its own itc before and after the master responds. Each iteration gives us three
191 * timestamps:
192 *
193 * slave master
194 *
195 * t0 ---\
196 * ---\
197 * --->
198 * tm
199 * /---
200 * /---
201 * t1 <---
202 *
203 *
204 * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
205 * and t1. If we achieve this, the clocks are synchronized provided the interconnect
206 * between the slave and the master is symmetric. Even if the interconnect were
207 * asymmetric, we would still know that the synchronization error is smaller than the
208 * roundtrip latency (t0 - t1).
209 *
210 * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
211 * within one or two cycles. However, we can only *guarantee* that the synchronization is
212 * accurate to within a round-trip time, which is typically in the range of several
213 * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually
214 * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
215 * than half a micro second or so.
216 */
217 void
218 ia64_sync_itc (unsigned int master)
219 {
220 long i, delta, adj, adjust_latency = 0, done = 0;
221 unsigned long flags, rt, master_time_stamp, bound;
222 #if DEBUG_ITC_SYNC
223 struct {
224 long rt; /* roundtrip time */
225 long master; /* master's timestamp */
226 long diff; /* difference between midpoint and master's timestamp */
227 long lat; /* estimate of itc adjustment latency */
228 } t[NUM_ROUNDS];
229 #endif
230
231 go[MASTER] = 1;
232
233 if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
234 printk("sync_itc: failed to get attention of CPU %u!\n", master);
235 return;
236 }
237
238 while (go[MASTER]); /* wait for master to be ready */
239
240 spin_lock_irqsave(&itc_sync_lock, flags);
241 {
242 for (i = 0; i < NUM_ROUNDS; ++i) {
243 delta = get_delta(&rt, &master_time_stamp);
244 if (delta == 0) {
245 done = 1; /* let's lock on to this... */
246 bound = rt;
247 }
248
249 if (!done) {
250 if (i > 0) {
251 adjust_latency += -delta;
252 adj = -delta + adjust_latency/4;
253 } else
254 adj = -delta;
255
256 ia64_set_itc(ia64_get_itc() + adj);
257 }
258 #if DEBUG_ITC_SYNC
259 t[i].rt = rt;
260 t[i].master = master_time_stamp;
261 t[i].diff = delta;
262 t[i].lat = adjust_latency/4;
263 #endif
264 }
265 }
266 spin_unlock_irqrestore(&itc_sync_lock, flags);
267
268 #if DEBUG_ITC_SYNC
269 for (i = 0; i < NUM_ROUNDS; ++i)
270 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
271 t[i].rt, t[i].master, t[i].diff, t[i].lat);
272 #endif
273
274 printk("CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, maxerr %lu cycles)\n",
275 smp_processor_id(), master, delta, rt);
276 }
277
278 /*
279 * Ideally sets up per-cpu profiling hooks. Doesn't do much now...
280 */
281 static inline void __init
282 smp_setup_percpu_timer (void)
283 {
284 local_cpu_data->prof_counter = 1;
285 local_cpu_data->prof_multiplier = 1;
286 }
287
288 /*
289 * Architecture specific routine called by the kernel just before init is
290 * fired off. This allows the BP to have everything in order [we hope].
291 * At the end of this all the APs will hit the system scheduling and off
292 * we go. Each AP will jump through the kernel
293 * init into idle(). At this point the scheduler will one day take over
294 * and give them jobs to do. smp_callin is a standard routine
295 * we use to track CPUs as they power up.
296 */
297
298 static volatile atomic_t smp_commenced = ATOMIC_INIT(0);
299
300 void __init
301 smp_commence (void)
302 {
303 /*
304 * Lets the callins below out of their loop.
305 */
306 Dprintk("Setting commenced=1, go go go\n");
307
308 wmb();
309 atomic_set(&smp_commenced,1);
310 }
311
312
313 void __init
314 smp_callin (void)
315 {
316 int cpuid, phys_id;
317 extern void ia64_init_itm(void);
318
319 #ifdef CONFIG_PERFMON
320 extern void perfmon_init_percpu(void);
321 #endif
322
323 cpuid = smp_processor_id();
324 phys_id = hard_smp_processor_id();
325
326 if (test_and_set_bit(cpuid, &cpu_online_map)) {
327 printk("huh, phys CPU#0x%x, CPU#0x%x already present??\n",
328 phys_id, cpuid);
329 BUG();
330 }
331
332 smp_setup_percpu_timer();
333
334 /*
335 * Synchronize the ITC with the BP
336 */
337 Dprintk("Going to syncup ITC with BP.\n");
338
339 ia64_sync_itc(0);
340 /*
341 * Get our bogomips.
342 */
343 ia64_init_itm();
344 #ifdef CONFIG_PERFMON
345 perfmon_init_percpu();
346 #endif
347
348 local_irq_enable();
349 calibrate_delay();
350 local_cpu_data->loops_per_jiffy = loops_per_jiffy;
351 /*
352 * Allow the master to continue.
353 */
354 set_bit(cpuid, &cpu_callin_map);
355 Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
356 }
357
358
359 /*
360 * Activate a secondary processor. head.S calls this.
361 */
362 int __init
363 start_secondary (void *unused)
364 {
365 extern int cpu_idle (void);
366
367 efi_map_pal_code();
368 cpu_init();
369 smp_callin();
370 Dprintk("CPU %d is set to go. \n", smp_processor_id());
371 while (!atomic_read(&smp_commenced))
372 ;
373
374 Dprintk("CPU %d is starting idle. \n", smp_processor_id());
375 return cpu_idle();
376 }
377
378 static int __init
379 fork_by_hand (void)
380 {
381 /*
382 * don't care about the eip and regs settings since
383 * we'll never reschedule the forked task.
384 */
385 return do_fork(CLONE_VM|CLONE_PID, 0, 0, 0);
386 }
387
388 static void __init
389 do_boot_cpu (int sapicid)
390 {
391 struct task_struct *idle;
392 int timeout, cpu;
393
394 cpu = ++cpucount;
395 /*
396 * We can't use kernel_thread since we must avoid to
397 * reschedule the child.
398 */
399 if (fork_by_hand() < 0)
400 panic("failed fork for CPU %d", cpu);
401
402 /*
403 * We remove it from the pidhash and the runqueue
404 * once we got the process:
405 */
406 idle = init_task.prev_task;
407 if (!idle)
408 panic("No idle process for CPU %d", cpu);
409
410 idle->processor = cpu;
411 ia64_cpu_to_sapicid[cpu] = sapicid;
412 idle->has_cpu = 1; /* we schedule the first task manually */
413
414 del_from_runqueue(idle);
415 unhash_process(idle);
416 init_tasks[cpu] = idle;
417
418 Dprintk("Sending Wakeup Vector to AP 0x%x/0x%x.\n", cpu, sapicid);
419
420 platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
421
422 /*
423 * Wait 10s total for the AP to start
424 */
425 Dprintk("Waiting on callin_map ...");
426 for (timeout = 0; timeout < 100000; timeout++) {
427 Dprintk(".");
428 if (test_bit(cpu, &cpu_callin_map))
429 break; /* It has booted */
430 udelay(100);
431 }
432 Dprintk("\n");
433
434 if (test_bit(cpu, &cpu_callin_map)) {
435 /* number CPUs logically, starting from 1 (BSP is 0) */
436 printk("CPU%d: ", cpu);
437 /*print_cpu_info(&cpu_data[cpu]); */
438 printk("CPU has booted.\n");
439 } else {
440 printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
441 ia64_cpu_to_sapicid[cpu] = -1;
442 cpucount--;
443 }
444 }
445
446 /*
447 * Cycle through the APs sending Wakeup IPIs to boot each.
448 */
449 void __init
450 smp_boot_cpus (void)
451 {
452 int sapicid, cpu;
453 int boot_cpu_id = hard_smp_processor_id();
454
455 /*
456 * Initialize the logical to physical CPU number mapping
457 * and the per-CPU profiling counter/multiplier
458 */
459
460 for (cpu = 0; cpu < NR_CPUS; cpu++)
461 ia64_cpu_to_sapicid[cpu] = -1;
462 smp_setup_percpu_timer();
463
464 /*
465 * We have the boot CPU online for sure.
466 */
467 set_bit(0, &cpu_online_map);
468 set_bit(0, &cpu_callin_map);
469
470 local_cpu_data->loops_per_jiffy = loops_per_jiffy;
471 ia64_cpu_to_sapicid[0] = boot_cpu_id;
472
473 printk("Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
474
475 global_irq_holder = 0;
476 current->processor = 0;
477 init_idle();
478
479 /*
480 * If SMP should be disabled, then really disable it!
481 */
482 if (!max_cpus || (max_cpus < -1)) {
483 printk(KERN_INFO "SMP mode deactivated.\n");
484 cpu_online_map = 1;
485 smp_num_cpus = 1;
486 goto smp_done;
487 }
488 if (max_cpus != -1)
489 printk (KERN_INFO "Limiting CPUs to %d\n", max_cpus);
490
491 if (smp_boot_data.cpu_count > 1) {
492
493 printk(KERN_INFO "SMP: starting up secondaries.\n");
494
495 for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) {
496 /*
497 * Don't even attempt to start the boot CPU!
498 */
499 sapicid = smp_boot_data.cpu_phys_id[cpu];
500 if ((sapicid == -1) || (sapicid == hard_smp_processor_id()))
501 continue;
502
503 if ((max_cpus > 0) && (cpucount + 1 >= max_cpus))
504 break;
505
506 do_boot_cpu(sapicid);
507
508 /*
509 * Make sure we unmap all failed CPUs
510 */
511 if (ia64_cpu_to_sapicid[cpu] == -1)
512 printk("phys CPU#%d not responding - cannot use it.\n", cpu);
513 }
514
515 smp_num_cpus = cpucount + 1;
516
517 /*
518 * Allow the user to impress friends.
519 */
520
521 printk("Before bogomips.\n");
522 if (!cpucount) {
523 printk(KERN_ERR "Error: only one processor found.\n");
524 } else {
525 unsigned long bogosum = 0;
526 for (cpu = 0; cpu < NR_CPUS; cpu++)
527 if (cpu_online_map & (1<<cpu))
528 bogosum += cpu_data(cpu)->loops_per_jiffy;
529
530 printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
531 cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
532 }
533 }
534 smp_done:
535 ;
536 }
537
538 /*
539 * Assume that CPU's have been discovered by some platform-dependant interface. For
540 * SoftSDV/Lion, that would be ACPI.
541 *
542 * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
543 */
544 void __init
545 init_smp_config(void)
546 {
547 struct fptr {
548 unsigned long fp;
549 unsigned long gp;
550 } *ap_startup;
551 long sal_ret;
552
553 /* Tell SAL where to drop the AP's. */
554 ap_startup = (struct fptr *) start_ap;
555 sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
556 __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
557 if (sal_ret < 0) {
558 printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n Forcing UP mode\n",
559 ia64_sal_strerror(sal_ret));
560 max_cpus = 0;
561 smp_num_cpus = 1;
562 }
563 }
564