File: /usr/src/linux/arch/i386/kernel/smpboot.c
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 */
33
34 #include <linux/config.h>
35 #include <linux/init.h>
36
37 #include <linux/mm.h>
38 #include <linux/kernel_stat.h>
39 #include <linux/smp_lock.h>
40 #include <linux/irq.h>
41 #include <linux/bootmem.h>
42
43 #include <linux/delay.h>
44 #include <linux/mc146818rtc.h>
45 #include <asm/mtrr.h>
46 #include <asm/pgalloc.h>
47
48 /* Set if we find a B stepping CPU */
49 static int smp_b_stepping;
50
51 /* Setup configured maximum number of CPUs to activate */
52 static int max_cpus = -1;
53
54 /* Total count of live CPUs */
55 int smp_num_cpus = 1;
56
57 /* Bitmask of currently online CPUs */
58 unsigned long cpu_online_map;
59
60 /* which CPU (physical APIC ID) maps to which logical CPU number */
61 volatile int x86_apicid_to_cpu[NR_CPUS];
62 /* which logical CPU number maps to which CPU (physical APIC ID) */
63 volatile int x86_cpu_to_apicid[NR_CPUS];
64
65 static volatile unsigned long cpu_callin_map;
66 static volatile unsigned long cpu_callout_map;
67
68 /* Per CPU bogomips and other parameters */
69 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
70
71 /* Set when the idlers are all forked */
72 int smp_threads_ready;
73
74 /*
75 * Setup routine for controlling SMP activation
76 *
77 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
78 * activation entirely (the MPS table probe still happens, though).
79 *
80 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
81 * greater than 0, limits the maximum number of CPUs activated in
82 * SMP mode to <NUM>.
83 */
84
85 static int __init nosmp(char *str)
86 {
87 max_cpus = 0;
88 return 1;
89 }
90
91 __setup("nosmp", nosmp);
92
93 static int __init maxcpus(char *str)
94 {
95 get_option(&str, &max_cpus);
96 return 1;
97 }
98
99 __setup("maxcpus=", maxcpus);
100
101 /*
102 * Trampoline 80x86 program as an array.
103 */
104
105 extern unsigned char trampoline_data [];
106 extern unsigned char trampoline_end [];
107 static unsigned char *trampoline_base;
108
109 /*
110 * Currently trivial. Write the real->protected mode
111 * bootstrap into the page concerned. The caller
112 * has made sure it's suitably aligned.
113 */
114
115 static unsigned long __init setup_trampoline(void)
116 {
117 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
118 return virt_to_phys(trampoline_base);
119 }
120
121 /*
122 * We are called very early to get the low memory for the
123 * SMP bootup trampoline page.
124 */
125 void __init smp_alloc_memory(void)
126 {
127 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
128 /*
129 * Has to be in very low memory so we can execute
130 * real-mode AP code.
131 */
132 if (__pa(trampoline_base) >= 0x9F000)
133 BUG();
134 }
135
136 /*
137 * The bootstrap kernel entry code has set these up. Save them for
138 * a given CPU
139 */
140
141 void __init smp_store_cpu_info(int id)
142 {
143 struct cpuinfo_x86 *c = cpu_data + id;
144
145 *c = boot_cpu_data;
146 c->pte_quick = 0;
147 c->pmd_quick = 0;
148 c->pgd_quick = 0;
149 c->pgtable_cache_sz = 0;
150 identify_cpu(c);
151 /*
152 * Mask B, Pentium, but not Pentium MMX
153 */
154 if (c->x86_vendor == X86_VENDOR_INTEL &&
155 c->x86 == 5 &&
156 c->x86_mask >= 1 && c->x86_mask <= 4 &&
157 c->x86_model <= 3)
158 /*
159 * Remember we have B step Pentia with bugs
160 */
161 smp_b_stepping = 1;
162 }
163
164 /*
165 * Architecture specific routine called by the kernel just before init is
166 * fired off. This allows the BP to have everything in order [we hope].
167 * At the end of this all the APs will hit the system scheduling and off
168 * we go. Each AP will load the system gdt's and jump through the kernel
169 * init into idle(). At this point the scheduler will one day take over
170 * and give them jobs to do. smp_callin is a standard routine
171 * we use to track CPUs as they power up.
172 */
173
174 static atomic_t smp_commenced = ATOMIC_INIT(0);
175
176 void __init smp_commence(void)
177 {
178 /*
179 * Lets the callins below out of their loop.
180 */
181 Dprintk("Setting commenced=1, go go go\n");
182
183 wmb();
184 atomic_set(&smp_commenced,1);
185 }
186
187 /*
188 * TSC synchronization.
189 *
190 * We first check wether all CPUs have their TSC's synchronized,
191 * then we print a warning if not, and always resync.
192 */
193
194 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
195 static atomic_t tsc_count_start = ATOMIC_INIT(0);
196 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
197 static unsigned long long tsc_values[NR_CPUS];
198
199 #define NR_LOOPS 5
200
201 extern unsigned long fast_gettimeoffset_quotient;
202
203 /*
204 * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
205 * multiplication. Not terribly optimized but we need it at boot time only
206 * anyway.
207 *
208 * result == a / b
209 * == (a1 + a2*(2^32)) / b
210 * == a1/b + a2*(2^32/b)
211 * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
212 * ^---- (this multiplication can overflow)
213 */
214
215 static unsigned long long div64 (unsigned long long a, unsigned long b0)
216 {
217 unsigned int a1, a2;
218 unsigned long long res;
219
220 a1 = ((unsigned int*)&a)[0];
221 a2 = ((unsigned int*)&a)[1];
222
223 res = a1/b0 +
224 (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
225 a2 / b0 +
226 (a2 * (0xffffffff % b0)) / b0;
227
228 return res;
229 }
230
231 static void __init synchronize_tsc_bp (void)
232 {
233 int i;
234 unsigned long long t0;
235 unsigned long long sum, avg;
236 long long delta;
237 unsigned long one_usec;
238 int buggy = 0;
239
240 printk("checking TSC synchronization across CPUs: ");
241
242 one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2);
243
244 atomic_set(&tsc_start_flag, 1);
245 wmb();
246
247 /*
248 * We loop a few times to get a primed instruction cache,
249 * then the last pass is more or less synchronized and
250 * the BP and APs set their cycle counters to zero all at
251 * once. This reduces the chance of having random offsets
252 * between the processors, and guarantees that the maximum
253 * delay between the cycle counters is never bigger than
254 * the latency of information-passing (cachelines) between
255 * two CPUs.
256 */
257 for (i = 0; i < NR_LOOPS; i++) {
258 /*
259 * all APs synchronize but they loop on '== num_cpus'
260 */
261 while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
262 atomic_set(&tsc_count_stop, 0);
263 wmb();
264 /*
265 * this lets the APs save their current TSC:
266 */
267 atomic_inc(&tsc_count_start);
268
269 rdtscll(tsc_values[smp_processor_id()]);
270 /*
271 * We clear the TSC in the last loop:
272 */
273 if (i == NR_LOOPS-1)
274 write_tsc(0, 0);
275
276 /*
277 * Wait for all APs to leave the synchronization point:
278 */
279 while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
280 atomic_set(&tsc_count_start, 0);
281 wmb();
282 atomic_inc(&tsc_count_stop);
283 }
284
285 sum = 0;
286 for (i = 0; i < smp_num_cpus; i++) {
287 t0 = tsc_values[i];
288 sum += t0;
289 }
290 avg = div64(sum, smp_num_cpus);
291
292 sum = 0;
293 for (i = 0; i < smp_num_cpus; i++) {
294 delta = tsc_values[i] - avg;
295 if (delta < 0)
296 delta = -delta;
297 /*
298 * We report bigger than 2 microseconds clock differences.
299 */
300 if (delta > 2*one_usec) {
301 long realdelta;
302 if (!buggy) {
303 buggy = 1;
304 printk("\n");
305 }
306 realdelta = div64(delta, one_usec);
307 if (tsc_values[i] < avg)
308 realdelta = -realdelta;
309
310 printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
311 i, realdelta);
312 }
313
314 sum += delta;
315 }
316 if (!buggy)
317 printk("passed.\n");
318 }
319
320 static void __init synchronize_tsc_ap (void)
321 {
322 int i;
323
324 /*
325 * smp_num_cpus is not necessarily known at the time
326 * this gets called, so we first wait for the BP to
327 * finish SMP initialization:
328 */
329 while (!atomic_read(&tsc_start_flag)) mb();
330
331 for (i = 0; i < NR_LOOPS; i++) {
332 atomic_inc(&tsc_count_start);
333 while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
334
335 rdtscll(tsc_values[smp_processor_id()]);
336 if (i == NR_LOOPS-1)
337 write_tsc(0, 0);
338
339 atomic_inc(&tsc_count_stop);
340 while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
341 }
342 }
343 #undef NR_LOOPS
344
345 extern void calibrate_delay(void);
346
347 static atomic_t init_deasserted;
348
349 void __init smp_callin(void)
350 {
351 int cpuid, phys_id;
352 unsigned long timeout;
353
354 /*
355 * If waken up by an INIT in an 82489DX configuration
356 * we may get here before an INIT-deassert IPI reaches
357 * our local APIC. We have to wait for the IPI or we'll
358 * lock up on an APIC access.
359 */
360 while (!atomic_read(&init_deasserted));
361
362 /*
363 * (This works even if the APIC is not enabled.)
364 */
365 phys_id = GET_APIC_ID(apic_read(APIC_ID));
366 cpuid = current->processor;
367 if (test_and_set_bit(cpuid, &cpu_online_map)) {
368 printk("huh, phys CPU#%d, CPU#%d already present??\n",
369 phys_id, cpuid);
370 BUG();
371 }
372 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
373
374 /*
375 * STARTUP IPIs are fragile beasts as they might sometimes
376 * trigger some glue motherboard logic. Complete APIC bus
377 * silence for 1 second, this overestimates the time the
378 * boot CPU is spending to send the up to 2 STARTUP IPIs
379 * by a factor of two. This should be enough.
380 */
381
382 /*
383 * Waiting 2s total for startup (udelay is not yet working)
384 */
385 timeout = jiffies + 2*HZ;
386 while (time_before(jiffies, timeout)) {
387 /*
388 * Has the boot CPU finished it's STARTUP sequence?
389 */
390 if (test_bit(cpuid, &cpu_callout_map))
391 break;
392 rep_nop();
393 }
394
395 if (!time_before(jiffies, timeout)) {
396 printk("BUG: CPU%d started up but did not get a callout!\n",
397 cpuid);
398 BUG();
399 }
400
401 /*
402 * the boot CPU has finished the init stage and is spinning
403 * on callin_map until we finish. We are free to set up this
404 * CPU, first the APIC. (this is probably redundant on most
405 * boards)
406 */
407
408 Dprintk("CALLIN, before setup_local_APIC().\n");
409 setup_local_APIC();
410
411 sti();
412
413 #ifdef CONFIG_MTRR
414 /*
415 * Must be done before calibration delay is computed
416 */
417 mtrr_init_secondary_cpu ();
418 #endif
419 /*
420 * Get our bogomips.
421 */
422 calibrate_delay();
423 Dprintk("Stack at about %p\n",&cpuid);
424
425 /*
426 * Save our processor parameters
427 */
428 smp_store_cpu_info(cpuid);
429
430 /*
431 * Allow the master to continue.
432 */
433 set_bit(cpuid, &cpu_callin_map);
434
435 /*
436 * Synchronize the TSC with the BP
437 */
438 if (cpu_has_tsc)
439 synchronize_tsc_ap();
440 }
441
442 int cpucount;
443
444 extern int cpu_idle(void);
445
446 /*
447 * Activate a secondary processor.
448 */
449 int __init start_secondary(void *unused)
450 {
451 /*
452 * Dont put anything before smp_callin(), SMP
453 * booting is too fragile that we want to limit the
454 * things done here to the most necessary things.
455 */
456 cpu_init();
457 smp_callin();
458 while (!atomic_read(&smp_commenced))
459 rep_nop();
460 /*
461 * low-memory mappings have been cleared, flush them from
462 * the local TLBs too.
463 */
464 local_flush_tlb();
465
466 return cpu_idle();
467 }
468
469 /*
470 * Everything has been set up for the secondary
471 * CPUs - they just need to reload everything
472 * from the task structure
473 * This function must not return.
474 */
475 void __init initialize_secondary(void)
476 {
477 /*
478 * We don't actually need to load the full TSS,
479 * basically just the stack pointer and the eip.
480 */
481
482 asm volatile(
483 "movl %0,%%esp\n\t"
484 "jmp *%1"
485 :
486 :"r" (current->thread.esp),"r" (current->thread.eip));
487 }
488
489 extern struct {
490 void * esp;
491 unsigned short ss;
492 } stack_start;
493
494 static int __init fork_by_hand(void)
495 {
496 struct pt_regs regs;
497 /*
498 * don't care about the eip and regs settings since
499 * we'll never reschedule the forked task.
500 */
501 return do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0);
502 }
503
504 #if APIC_DEBUG
505 static inline void inquire_remote_apic(int apicid)
506 {
507 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
508 char *names[] = { "ID", "VERSION", "SPIV" };
509 int timeout, status;
510
511 printk("Inquiring remote APIC #%d...\n", apicid);
512
513 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
514 printk("... APIC #%d %s: ", apicid, names[i]);
515
516 /*
517 * Wait for idle.
518 */
519 apic_wait_icr_idle();
520
521 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
522 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
523
524 timeout = 0;
525 do {
526 udelay(100);
527 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
528 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
529
530 switch (status) {
531 case APIC_ICR_RR_VALID:
532 status = apic_read(APIC_RRR);
533 printk("%08x\n", status);
534 break;
535 default:
536 printk("failed\n");
537 }
538 }
539 }
540 #endif
541
542 static void __init do_boot_cpu (int apicid)
543 {
544 struct task_struct *idle;
545 unsigned long send_status, accept_status, boot_status, maxlvt;
546 int timeout, num_starts, j, cpu;
547 unsigned long start_eip;
548
549 cpu = ++cpucount;
550 /*
551 * We can't use kernel_thread since we must avoid to
552 * reschedule the child.
553 */
554 if (fork_by_hand() < 0)
555 panic("failed fork for CPU %d", cpu);
556
557 /*
558 * We remove it from the pidhash and the runqueue
559 * once we got the process:
560 */
561 idle = init_task.prev_task;
562 if (!idle)
563 panic("No idle process for CPU %d", cpu);
564
565 idle->processor = cpu;
566 x86_cpu_to_apicid[cpu] = apicid;
567 x86_apicid_to_cpu[apicid] = cpu;
568 idle->has_cpu = 1; /* we schedule the first task manually */
569 idle->thread.eip = (unsigned long) start_secondary;
570
571 del_from_runqueue(idle);
572 unhash_process(idle);
573 init_tasks[cpu] = idle;
574
575 /* start_eip had better be page-aligned! */
576 start_eip = setup_trampoline();
577
578 /* So we see what's up */
579 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
580 stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
581
582 /*
583 * This grunge runs the startup process for
584 * the targeted processor.
585 */
586
587 atomic_set(&init_deasserted, 0);
588
589 Dprintk("Setting warm reset code and vector.\n");
590
591 CMOS_WRITE(0xa, 0xf);
592 local_flush_tlb();
593 Dprintk("1.\n");
594 *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
595 Dprintk("2.\n");
596 *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
597 Dprintk("3.\n");
598
599 /*
600 * Be paranoid about clearing APIC errors.
601 */
602 if (APIC_INTEGRATED(apic_version[apicid])) {
603 apic_read_around(APIC_SPIV);
604 apic_write(APIC_ESR, 0);
605 apic_read(APIC_ESR);
606 }
607
608 /*
609 * Status is now clean
610 */
611 send_status = 0;
612 accept_status = 0;
613 boot_status = 0;
614
615 /*
616 * Starting actual IPI sequence...
617 */
618
619 Dprintk("Asserting INIT.\n");
620
621 /*
622 * Turn INIT on target chip
623 */
624 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
625
626 /*
627 * Send IPI
628 */
629 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
630 | APIC_DM_INIT);
631
632 Dprintk("Waiting for send to finish...\n");
633 timeout = 0;
634 do {
635 Dprintk("+");
636 udelay(100);
637 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
638 } while (send_status && (timeout++ < 1000));
639
640 mdelay(10);
641
642 Dprintk("Deasserting INIT.\n");
643
644 /* Target chip */
645 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
646
647 /* Send IPI */
648 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
649
650 Dprintk("Waiting for send to finish...\n");
651 timeout = 0;
652 do {
653 Dprintk("+");
654 udelay(100);
655 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
656 } while (send_status && (timeout++ < 1000));
657
658 atomic_set(&init_deasserted, 1);
659
660 /*
661 * Should we send STARTUP IPIs ?
662 *
663 * Determine this based on the APIC version.
664 * If we don't have an integrated APIC, don't
665 * send the STARTUP IPIs.
666 */
667 if (APIC_INTEGRATED(apic_version[apicid]))
668 num_starts = 2;
669 else
670 num_starts = 0;
671
672 /*
673 * Run STARTUP IPI loop.
674 */
675 Dprintk("#startup loops: %d.\n", num_starts);
676
677 maxlvt = get_maxlvt();
678
679 for (j = 1; j <= num_starts; j++) {
680 Dprintk("Sending STARTUP #%d.\n",j);
681 apic_read_around(APIC_SPIV);
682 apic_write(APIC_ESR, 0);
683 apic_read(APIC_ESR);
684 Dprintk("After apic_write.\n");
685
686 /*
687 * STARTUP IPI
688 */
689
690 /* Target chip */
691 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
692
693 /* Boot on the stack */
694 /* Kick the second */
695 apic_write_around(APIC_ICR, APIC_DM_STARTUP
696 | (start_eip >> 12));
697
698 /*
699 * Give the other CPU some time to accept the IPI.
700 */
701 udelay(300);
702
703 Dprintk("Startup point 1.\n");
704
705 Dprintk("Waiting for send to finish...\n");
706 timeout = 0;
707 do {
708 Dprintk("+");
709 udelay(100);
710 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
711 } while (send_status && (timeout++ < 1000));
712
713 /*
714 * Give the other CPU some time to accept the IPI.
715 */
716 udelay(200);
717 /*
718 * Due to the Pentium erratum 3AP.
719 */
720 if (maxlvt > 3) {
721 apic_read_around(APIC_SPIV);
722 apic_write(APIC_ESR, 0);
723 }
724 accept_status = (apic_read(APIC_ESR) & 0xEF);
725 if (send_status || accept_status)
726 break;
727 }
728 Dprintk("After Startup.\n");
729
730 if (send_status)
731 printk("APIC never delivered???\n");
732 if (accept_status)
733 printk("APIC delivery error (%lx).\n", accept_status);
734
735 if (!send_status && !accept_status) {
736 /*
737 * allow APs to start initializing.
738 */
739 Dprintk("Before Callout %d.\n", cpu);
740 set_bit(cpu, &cpu_callout_map);
741 Dprintk("After Callout %d.\n", cpu);
742
743 /*
744 * Wait 5s total for a response
745 */
746 for (timeout = 0; timeout < 50000; timeout++) {
747 if (test_bit(cpu, &cpu_callin_map))
748 break; /* It has booted */
749 udelay(100);
750 }
751
752 if (test_bit(cpu, &cpu_callin_map)) {
753 /* number CPUs logically, starting from 1 (BSP is 0) */
754 Dprintk("OK.\n");
755 printk("CPU%d: ", cpu);
756 print_cpu_info(&cpu_data[cpu]);
757 Dprintk("CPU has booted.\n");
758 } else {
759 boot_status = 1;
760 if (*((volatile unsigned char *)phys_to_virt(8192))
761 == 0xA5)
762 /* trampoline started but...? */
763 printk("Stuck ??\n");
764 else
765 /* trampoline code not run */
766 printk("Not responding.\n");
767 #if APIC_DEBUG
768 inquire_remote_apic(apicid);
769 #endif
770 }
771 }
772 if (send_status || accept_status || boot_status) {
773 x86_cpu_to_apicid[cpu] = -1;
774 x86_apicid_to_cpu[apicid] = -1;
775 cpucount--;
776 }
777
778 /* mark "stuck" area as not stuck */
779 *((volatile unsigned long *)phys_to_virt(8192)) = 0;
780 }
781
782 cycles_t cacheflush_time;
783
784 static void smp_tune_scheduling (void)
785 {
786 unsigned long cachesize; /* kB */
787 unsigned long bandwidth = 350; /* MB/s */
788 /*
789 * Rough estimation for SMP scheduling, this is the number of
790 * cycles it takes for a fully memory-limited process to flush
791 * the SMP-local cache.
792 *
793 * (For a P5 this pretty much means we will choose another idle
794 * CPU almost always at wakeup time (this is due to the small
795 * L1 cache), on PIIs it's around 50-100 usecs, depending on
796 * the cache size)
797 */
798
799 if (!cpu_khz) {
800 /*
801 * this basically disables processor-affinity
802 * scheduling on SMP without a TSC.
803 */
804 cacheflush_time = 0;
805 return;
806 } else {
807 cachesize = boot_cpu_data.x86_cache_size;
808 if (cachesize == -1) {
809 cachesize = 16; /* Pentiums, 2x8kB cache */
810 bandwidth = 100;
811 }
812
813 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
814 }
815
816 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
817 (long)cacheflush_time/(cpu_khz/1000),
818 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
819 }
820
821 /*
822 * Cycle through the processors sending APIC IPIs to boot each.
823 */
824
825 extern int prof_multiplier[NR_CPUS];
826 extern int prof_old_multiplier[NR_CPUS];
827 extern int prof_counter[NR_CPUS];
828
829 void __init smp_boot_cpus(void)
830 {
831 int apicid, cpu;
832
833 #ifdef CONFIG_MTRR
834 /* Must be done before other processors booted */
835 mtrr_init_boot_cpu ();
836 #endif
837 /*
838 * Initialize the logical to physical CPU number mapping
839 * and the per-CPU profiling counter/multiplier
840 */
841
842 for (apicid = 0; apicid < NR_CPUS; apicid++) {
843 x86_apicid_to_cpu[apicid] = -1;
844 prof_counter[apicid] = 1;
845 prof_old_multiplier[apicid] = 1;
846 prof_multiplier[apicid] = 1;
847 }
848
849 /*
850 * Setup boot CPU information
851 */
852 smp_store_cpu_info(0); /* Final full version of the data */
853 printk("CPU%d: ", 0);
854 print_cpu_info(&cpu_data[0]);
855
856 /*
857 * We have the boot CPU online for sure.
858 */
859 set_bit(0, &cpu_online_map);
860 x86_apicid_to_cpu[boot_cpu_id] = 0;
861 x86_cpu_to_apicid[0] = boot_cpu_id;
862 global_irq_holder = 0;
863 current->processor = 0;
864 init_idle();
865 smp_tune_scheduling();
866
867 /*
868 * If we couldnt find an SMP configuration at boot time,
869 * get out of here now!
870 */
871 if (!smp_found_config) {
872 printk(KERN_NOTICE "SMP motherboard not detected.\n");
873 #ifndef CONFIG_VISWS
874 io_apic_irqs = 0;
875 #endif
876 cpu_online_map = phys_cpu_present_map = 1;
877 smp_num_cpus = 1;
878 if (APIC_init_uniprocessor())
879 printk(KERN_NOTICE "Local APIC not detected."
880 " Using dummy APIC emulation.\n");
881 goto smp_done;
882 }
883
884 /*
885 * Should not be necessary because the MP table should list the boot
886 * CPU too, but we do it for the sake of robustness anyway.
887 */
888 if (!test_bit(boot_cpu_id, &phys_cpu_present_map)) {
889 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
890 boot_cpu_id);
891 phys_cpu_present_map |= (1 << hard_smp_processor_id());
892 }
893
894 /*
895 * If we couldn't find a local APIC, then get out of here now!
896 */
897 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) &&
898 !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
899 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
900 boot_cpu_id);
901 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
902 #ifndef CONFIG_VISWS
903 io_apic_irqs = 0;
904 #endif
905 cpu_online_map = phys_cpu_present_map = 1;
906 smp_num_cpus = 1;
907 goto smp_done;
908 }
909
910 verify_local_APIC();
911
912 /*
913 * If SMP should be disabled, then really disable it!
914 */
915 if (!max_cpus) {
916 smp_found_config = 0;
917 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
918 #ifndef CONFIG_VISWS
919 io_apic_irqs = 0;
920 #endif
921 cpu_online_map = phys_cpu_present_map = 1;
922 smp_num_cpus = 1;
923 goto smp_done;
924 }
925
926 connect_bsp_APIC();
927 setup_local_APIC();
928
929 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
930 BUG();
931
932 /*
933 * Now scan the CPU present map and fire up the other CPUs.
934 */
935 Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
936
937 for (apicid = 0; apicid < NR_CPUS; apicid++) {
938 /*
939 * Don't even attempt to start the boot CPU!
940 */
941 if (apicid == boot_cpu_id)
942 continue;
943
944 if (!(phys_cpu_present_map & (1 << apicid)))
945 continue;
946 if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
947 continue;
948
949 do_boot_cpu(apicid);
950
951 /*
952 * Make sure we unmap all failed CPUs
953 */
954 if ((x86_apicid_to_cpu[apicid] == -1) &&
955 (phys_cpu_present_map & (1 << apicid)))
956 printk("phys CPU #%d not responding - cannot use it.\n",apicid);
957 }
958
959 /*
960 * Cleanup possible dangling ends...
961 */
962 #ifndef CONFIG_VISWS
963 {
964 /*
965 * Install writable page 0 entry to set BIOS data area.
966 */
967 local_flush_tlb();
968
969 /*
970 * Paranoid: Set warm reset code and vector here back
971 * to default values.
972 */
973 CMOS_WRITE(0, 0xf);
974
975 *((volatile long *) phys_to_virt(0x467)) = 0;
976 }
977 #endif
978
979 /*
980 * Allow the user to impress friends.
981 */
982
983 Dprintk("Before bogomips.\n");
984 if (!cpucount) {
985 printk(KERN_ERR "Error: only one processor found.\n");
986 } else {
987 unsigned long bogosum = 0;
988 for (cpu = 0; cpu < NR_CPUS; cpu++)
989 if (cpu_online_map & (1<<cpu))
990 bogosum += cpu_data[cpu].loops_per_jiffy;
991 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
992 cpucount+1,
993 bogosum/(500000/HZ),
994 (bogosum/(5000/HZ))%100);
995 Dprintk("Before bogocount - setting activated=1.\n");
996 }
997 smp_num_cpus = cpucount + 1;
998
999 if (smp_b_stepping)
1000 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1001 Dprintk("Boot done.\n");
1002
1003 #ifndef CONFIG_VISWS
1004 /*
1005 * Here we can be sure that there is an IO-APIC in the system. Let's
1006 * go and set it up:
1007 */
1008 if (!skip_ioapic_setup && nr_ioapics)
1009 setup_IO_APIC();
1010 #endif
1011
1012 /*
1013 * Set up all local APIC timers in the system:
1014 */
1015 setup_APIC_clocks();
1016
1017 /*
1018 * Synchronize the TSC with the AP
1019 */
1020 if (cpu_has_tsc && cpucount)
1021 synchronize_tsc_bp();
1022
1023 smp_done:
1024 zap_low_mappings();
1025 }
1026