File: /usr/src/linux/arch/i386/kernel/process.c
1 /*
2 * linux/arch/i386/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10 /*
11 * This file handles the architecture-dependent parts of process handling..
12 */
13
14 #define __KERNEL_SYSCALLS__
15 #include <stdarg.h>
16
17 #include <linux/errno.h>
18 #include <linux/sched.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/smp.h>
22 #include <linux/smp_lock.h>
23 #include <linux/stddef.h>
24 #include <linux/unistd.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/vmalloc.h>
28 #include <linux/user.h>
29 #include <linux/a.out.h>
30 #include <linux/interrupt.h>
31 #include <linux/config.h>
32 #include <linux/delay.h>
33 #include <linux/reboot.h>
34 #include <linux/init.h>
35 #include <linux/mc146818rtc.h>
36
37 #include <asm/uaccess.h>
38 #include <asm/pgtable.h>
39 #include <asm/system.h>
40 #include <asm/io.h>
41 #include <asm/ldt.h>
42 #include <asm/processor.h>
43 #include <asm/i387.h>
44 #include <asm/desc.h>
45 #include <asm/mmu_context.h>
46 #ifdef CONFIG_MATH_EMULATION
47 #include <asm/math_emu.h>
48 #endif
49
50 #include <linux/irq.h>
51
52 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
53
54 int hlt_counter;
55
56 /*
57 * Powermanagement idle function, if any..
58 */
59 void (*pm_idle)(void);
60
61 /*
62 * Power off function, if any
63 */
64 void (*pm_power_off)(void);
65
66 void disable_hlt(void)
67 {
68 hlt_counter++;
69 }
70
71 void enable_hlt(void)
72 {
73 hlt_counter--;
74 }
75
76 /*
77 * We use this if we don't have any better
78 * idle routine..
79 */
80 static void default_idle(void)
81 {
82 if (current_cpu_data.hlt_works_ok && !hlt_counter) {
83 __cli();
84 if (!current->need_resched)
85 safe_halt();
86 else
87 __sti();
88 }
89 }
90
91 /*
92 * On SMP it's slightly faster (but much more power-consuming!)
93 * to poll the ->need_resched flag instead of waiting for the
94 * cross-CPU IPI to arrive. Use this option with caution.
95 */
96 static void poll_idle (void)
97 {
98 int oldval;
99
100 __sti();
101
102 /*
103 * Deal with another CPU just having chosen a thread to
104 * run here:
105 */
106 oldval = xchg(¤t->need_resched, -1);
107
108 if (!oldval)
109 asm volatile(
110 "2:"
111 "cmpl $-1, %0;"
112 "rep; nop;"
113 "je 2b;"
114 : :"m" (current->need_resched));
115 }
116
117 /*
118 * The idle thread. There's no useful work to be
119 * done, so just try to conserve power and have a
120 * low exit latency (ie sit in a loop waiting for
121 * somebody to say that they'd like to reschedule)
122 */
123 void cpu_idle (void)
124 {
125 /* endless idle loop with no priority at all */
126 init_idle();
127 current->nice = 20;
128 current->counter = -100;
129
130 while (1) {
131 void (*idle)(void) = pm_idle;
132 if (!idle)
133 idle = default_idle;
134 while (!current->need_resched)
135 idle();
136 schedule();
137 check_pgt_cache();
138 }
139 }
140
141 static int __init idle_setup (char *str)
142 {
143 if (!strncmp(str, "poll", 4)) {
144 printk("using polling idle threads.\n");
145 pm_idle = poll_idle;
146 }
147
148 return 1;
149 }
150
151 __setup("idle=", idle_setup);
152
153 static long no_idt[2];
154 static int reboot_mode;
155 int reboot_thru_bios;
156
157 #ifdef CONFIG_SMP
158 int reboot_smp = 0;
159 static int reboot_cpu = -1;
160 /* shamelessly grabbed from lib/vsprintf.c for readability */
161 #define is_digit(c) ((c) >= '0' && (c) <= '9')
162 #endif
163 static int __init reboot_setup(char *str)
164 {
165 while(1) {
166 switch (*str) {
167 case 'w': /* "warm" reboot (no memory testing etc) */
168 reboot_mode = 0x1234;
169 break;
170 case 'c': /* "cold" reboot (with memory testing etc) */
171 reboot_mode = 0x0;
172 break;
173 case 'b': /* "bios" reboot by jumping through the BIOS */
174 reboot_thru_bios = 1;
175 break;
176 case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
177 reboot_thru_bios = 0;
178 break;
179 #ifdef CONFIG_SMP
180 case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
181 reboot_smp = 1;
182 if (is_digit(*(str+1))) {
183 reboot_cpu = (int) (*(str+1) - '0');
184 if (is_digit(*(str+2)))
185 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
186 }
187 /* we will leave sorting out the final value
188 when we are ready to reboot, since we might not
189 have set up boot_cpu_id or smp_num_cpu */
190 break;
191 #endif
192 }
193 if((str = strchr(str,',')) != NULL)
194 str++;
195 else
196 break;
197 }
198 return 1;
199 }
200
201 __setup("reboot=", reboot_setup);
202
203 /* The following code and data reboots the machine by switching to real
204 mode and jumping to the BIOS reset entry point, as if the CPU has
205 really been reset. The previous version asked the keyboard
206 controller to pulse the CPU reset line, which is more thorough, but
207 doesn't work with at least one type of 486 motherboard. It is easy
208 to stop this code working; hence the copious comments. */
209
210 static unsigned long long
211 real_mode_gdt_entries [3] =
212 {
213 0x0000000000000000ULL, /* Null descriptor */
214 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
215 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
216 };
217
218 static struct
219 {
220 unsigned short size __attribute__ ((packed));
221 unsigned long long * base __attribute__ ((packed));
222 }
223 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
224 real_mode_idt = { 0x3ff, 0 };
225
226 /* This is 16-bit protected mode code to disable paging and the cache,
227 switch to real mode and jump to the BIOS reset code.
228
229 The instruction that switches to real mode by writing to CR0 must be
230 followed immediately by a far jump instruction, which set CS to a
231 valid value for real mode, and flushes the prefetch queue to avoid
232 running instructions that have already been decoded in protected
233 mode.
234
235 Clears all the flags except ET, especially PG (paging), PE
236 (protected-mode enable) and TS (task switch for coprocessor state
237 save). Flushes the TLB after paging has been disabled. Sets CD and
238 NW, to disable the cache on a 486, and invalidates the cache. This
239 is more like the state of a 486 after reset. I don't know if
240 something else should be done for other chips.
241
242 More could be done here to set up the registers as if a CPU reset had
243 occurred; hopefully real BIOSs don't assume much. */
244
245 static unsigned char real_mode_switch [] =
246 {
247 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
248 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
249 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
250 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
251 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
252 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
253 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
254 0x74, 0x02, /* jz f */
255 0x0f, 0x08, /* invd */
256 0x24, 0x10, /* f: andb $0x10,al */
257 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
258 };
259 static unsigned char jump_to_bios [] =
260 {
261 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
262 };
263
264 static inline void kb_wait(void)
265 {
266 int i;
267
268 for (i=0; i<0x10000; i++)
269 if ((inb_p(0x64) & 0x02) == 0)
270 break;
271 }
272
273 /*
274 * Switch to real mode and then execute the code
275 * specified by the code and length parameters.
276 * We assume that length will aways be less that 100!
277 */
278 void machine_real_restart(unsigned char *code, int length)
279 {
280 unsigned long flags;
281
282 cli();
283
284 /* Write zero to CMOS register number 0x0f, which the BIOS POST
285 routine will recognize as telling it to do a proper reboot. (Well
286 that's what this book in front of me says -- it may only apply to
287 the Phoenix BIOS though, it's not clear). At the same time,
288 disable NMIs by setting the top bit in the CMOS address register,
289 as we're about to do peculiar things to the CPU. I'm not sure if
290 `outb_p' is needed instead of just `outb'. Use it to be on the
291 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
292 */
293
294 spin_lock_irqsave(&rtc_lock, flags);
295 CMOS_WRITE(0x00, 0x8f);
296 spin_unlock_irqrestore(&rtc_lock, flags);
297
298 /* Remap the kernel at virtual address zero, as well as offset zero
299 from the kernel segment. This assumes the kernel segment starts at
300 virtual address PAGE_OFFSET. */
301
302 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
303 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
304
305 /* Make sure the first page is mapped to the start of physical memory.
306 It is normally not mapped, to trap kernel NULL pointer dereferences. */
307
308 pg0[0] = _PAGE_RW | _PAGE_PRESENT;
309
310 /*
311 * Use `swapper_pg_dir' as our page directory.
312 */
313 asm volatile("movl %0,%%cr3": :"r" (__pa(swapper_pg_dir)));
314
315 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
316 this on booting to tell it to "Bypass memory test (also warm
317 boot)". This seems like a fairly standard thing that gets set by
318 REBOOT.COM programs, and the previous reset routine did this
319 too. */
320
321 *((unsigned short *)0x472) = reboot_mode;
322
323 /* For the switch to real mode, copy some code to low memory. It has
324 to be in the first 64k because it is running in 16-bit mode, and it
325 has to have the same physical and virtual address, because it turns
326 off paging. Copy it near the end of the first page, out of the way
327 of BIOS variables. */
328
329 memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100),
330 real_mode_switch, sizeof (real_mode_switch));
331 memcpy ((void *) (0x1000 - 100), code, length);
332
333 /* Set up the IDT for real mode. */
334
335 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
336
337 /* Set up a GDT from which we can load segment descriptors for real
338 mode. The GDT is not used in real mode; it is just needed here to
339 prepare the descriptors. */
340
341 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
342
343 /* Load the data segment registers, and thus the descriptors ready for
344 real mode. The base address of each segment is 0x100, 16 times the
345 selector value being loaded here. This is so that the segment
346 registers don't have to be reloaded after switching to real mode:
347 the values are consistent for real mode operation already. */
348
349 __asm__ __volatile__ ("movl $0x0010,%%eax\n"
350 "\tmovl %%eax,%%ds\n"
351 "\tmovl %%eax,%%es\n"
352 "\tmovl %%eax,%%fs\n"
353 "\tmovl %%eax,%%gs\n"
354 "\tmovl %%eax,%%ss" : : : "eax");
355
356 /* Jump to the 16-bit code that we copied earlier. It disables paging
357 and the cache, switches to real mode, and jumps to the BIOS reset
358 entry point. */
359
360 __asm__ __volatile__ ("ljmp $0x0008,%0"
361 :
362 : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
363 }
364
365 void machine_restart(char * __unused)
366 {
367 #if CONFIG_SMP
368 int cpuid;
369
370 cpuid = GET_APIC_ID(apic_read(APIC_ID));
371
372 if (reboot_smp) {
373
374 /* check to see if reboot_cpu is valid
375 if its not, default to the BSP */
376 if ((reboot_cpu == -1) ||
377 (reboot_cpu > (NR_CPUS -1)) ||
378 !(phys_cpu_present_map & (1<<cpuid)))
379 reboot_cpu = boot_cpu_id;
380
381 reboot_smp = 0; /* use this as a flag to only go through this once*/
382 /* re-run this function on the other CPUs
383 it will fall though this section since we have
384 cleared reboot_smp, and do the reboot if it is the
385 correct CPU, otherwise it halts. */
386 if (reboot_cpu != cpuid)
387 smp_call_function((void *)machine_restart , NULL, 1, 0);
388 }
389
390 /* if reboot_cpu is still -1, then we want a tradional reboot,
391 and if we are not running on the reboot_cpu,, halt */
392 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
393 for (;;)
394 __asm__ __volatile__ ("hlt");
395 }
396 /*
397 * Stop all CPUs and turn off local APICs and the IO-APIC, so
398 * other OSs see a clean IRQ state.
399 */
400 smp_send_stop();
401 disable_IO_APIC();
402 #endif
403
404 if(!reboot_thru_bios) {
405 /* rebooting needs to touch the page at absolute addr 0 */
406 *((unsigned short *)__va(0x472)) = reboot_mode;
407 for (;;) {
408 int i;
409 for (i=0; i<100; i++) {
410 kb_wait();
411 udelay(50);
412 outb(0xfe,0x64); /* pulse reset low */
413 udelay(50);
414 }
415 /* That didn't work - force a triple fault.. */
416 __asm__ __volatile__("lidt %0": :"m" (no_idt));
417 __asm__ __volatile__("int3");
418 }
419 }
420
421 machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
422 }
423
424 void machine_halt(void)
425 {
426 }
427
428 void machine_power_off(void)
429 {
430 if (pm_power_off)
431 pm_power_off();
432 }
433
434 extern void show_trace(unsigned long* esp);
435
436 void show_regs(struct pt_regs * regs)
437 {
438 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
439
440 printk("\n");
441 printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
442 printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
443 if (regs->xcs & 3)
444 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
445 printk(" EFLAGS: %08lx\n",regs->eflags);
446 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
447 regs->eax,regs->ebx,regs->ecx,regs->edx);
448 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
449 regs->esi, regs->edi, regs->ebp);
450 printk(" DS: %04x ES: %04x\n",
451 0xffff & regs->xds,0xffff & regs->xes);
452
453 __asm__("movl %%cr0, %0": "=r" (cr0));
454 __asm__("movl %%cr2, %0": "=r" (cr2));
455 __asm__("movl %%cr3, %0": "=r" (cr3));
456 /* This could fault if %cr4 does not exist */
457 __asm__("1: movl %%cr4, %0 \n"
458 "2: \n"
459 ".section __ex_table,\"a\" \n"
460 ".long 1b,2b \n"
461 ".previous \n"
462 : "=r" (cr4): "0" (0));
463 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
464 show_trace(®s->esp);
465 }
466
467 /*
468 * No need to lock the MM as we are the last user
469 */
470 void release_segments(struct mm_struct *mm)
471 {
472 void * ldt = mm->context.segments;
473
474 /*
475 * free the LDT
476 */
477 if (ldt) {
478 mm->context.segments = NULL;
479 clear_LDT();
480 vfree(ldt);
481 }
482 }
483
484 /*
485 * Create a kernel thread
486 */
487 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
488 {
489 long retval, d0;
490
491 __asm__ __volatile__(
492 "movl %%esp,%%esi\n\t"
493 "int $0x80\n\t" /* Linux/i386 system call */
494 "cmpl %%esp,%%esi\n\t" /* child or parent? */
495 "je 1f\n\t" /* parent - jump */
496 /* Load the argument into eax, and push it. That way, it does
497 * not matter whether the called function is compiled with
498 * -mregparm or not. */
499 "movl %4,%%eax\n\t"
500 "pushl %%eax\n\t"
501 "call *%5\n\t" /* call fn */
502 "movl %3,%0\n\t" /* exit */
503 "int $0x80\n"
504 "1:\t"
505 :"=&a" (retval), "=&S" (d0)
506 :"0" (__NR_clone), "i" (__NR_exit),
507 "r" (arg), "r" (fn),
508 "b" (flags | CLONE_VM)
509 : "memory");
510 return retval;
511 }
512
513 /*
514 * Free current thread data structures etc..
515 */
516 void exit_thread(void)
517 {
518 /* nothing to do ... */
519 }
520
521 void flush_thread(void)
522 {
523 struct task_struct *tsk = current;
524
525 memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
526 /*
527 * Forget coprocessor state..
528 */
529 clear_fpu(tsk);
530 tsk->used_math = 0;
531 }
532
533 void release_thread(struct task_struct *dead_task)
534 {
535 if (dead_task->mm) {
536 void * ldt = dead_task->mm->context.segments;
537
538 // temporary debugging check
539 if (ldt) {
540 printk("WARNING: dead process %8s still has LDT? <%p>\n",
541 dead_task->comm, ldt);
542 BUG();
543 }
544 }
545 }
546
547 /*
548 * we do not have to muck with descriptors here, that is
549 * done in switch_mm() as needed.
550 */
551 void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
552 {
553 struct mm_struct * old_mm;
554 void *old_ldt, *ldt;
555
556 ldt = NULL;
557 old_mm = current->mm;
558 if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
559 /*
560 * Completely new LDT, we initialize it from the parent:
561 */
562 ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
563 if (!ldt)
564 printk(KERN_WARNING "ldt allocation failed\n");
565 else
566 memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
567 }
568 new_mm->context.segments = ldt;
569 new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */
570 }
571
572 /*
573 * Save a segment.
574 */
575 #define savesegment(seg,value) \
576 asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
577
578 int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
579 unsigned long unused,
580 struct task_struct * p, struct pt_regs * regs)
581 {
582 struct pt_regs * childregs;
583
584 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
585 struct_cpy(childregs, regs);
586 childregs->eax = 0;
587 childregs->esp = esp;
588
589 p->thread.esp = (unsigned long) childregs;
590 p->thread.esp0 = (unsigned long) (childregs+1);
591
592 p->thread.eip = (unsigned long) ret_from_fork;
593
594 savesegment(fs,p->thread.fs);
595 savesegment(gs,p->thread.gs);
596
597 unlazy_fpu(current);
598 struct_cpy(&p->thread.i387, ¤t->thread.i387);
599
600 return 0;
601 }
602
603 /*
604 * fill in the user structure for a core dump..
605 */
606 void dump_thread(struct pt_regs * regs, struct user * dump)
607 {
608 int i;
609
610 /* changed the size calculations - should hopefully work better. lbt */
611 dump->magic = CMAGIC;
612 dump->start_code = 0;
613 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
614 dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
615 dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
616 dump->u_dsize -= dump->u_tsize;
617 dump->u_ssize = 0;
618 for (i = 0; i < 8; i++)
619 dump->u_debugreg[i] = current->thread.debugreg[i];
620
621 if (dump->start_stack < TASK_SIZE)
622 dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
623
624 dump->regs.ebx = regs->ebx;
625 dump->regs.ecx = regs->ecx;
626 dump->regs.edx = regs->edx;
627 dump->regs.esi = regs->esi;
628 dump->regs.edi = regs->edi;
629 dump->regs.ebp = regs->ebp;
630 dump->regs.eax = regs->eax;
631 dump->regs.ds = regs->xds;
632 dump->regs.es = regs->xes;
633 savesegment(fs,dump->regs.fs);
634 savesegment(gs,dump->regs.gs);
635 dump->regs.orig_eax = regs->orig_eax;
636 dump->regs.eip = regs->eip;
637 dump->regs.cs = regs->xcs;
638 dump->regs.eflags = regs->eflags;
639 dump->regs.esp = regs->esp;
640 dump->regs.ss = regs->xss;
641
642 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
643 }
644
645 /*
646 * This special macro can be used to load a debugging register
647 */
648 #define loaddebug(thread,register) \
649 __asm__("movl %0,%%db" #register \
650 : /* no output */ \
651 :"r" (thread->debugreg[register]))
652
653 /*
654 * switch_to(x,yn) should switch tasks from x to y.
655 *
656 * We fsave/fwait so that an exception goes off at the right time
657 * (as a call from the fsave or fwait in effect) rather than to
658 * the wrong process. Lazy FP saving no longer makes any sense
659 * with modern CPU's, and this simplifies a lot of things (SMP
660 * and UP become the same).
661 *
662 * NOTE! We used to use the x86 hardware context switching. The
663 * reason for not using it any more becomes apparent when you
664 * try to recover gracefully from saved state that is no longer
665 * valid (stale segment register values in particular). With the
666 * hardware task-switch, there is no way to fix up bad state in
667 * a reasonable manner.
668 *
669 * The fact that Intel documents the hardware task-switching to
670 * be slow is a fairly red herring - this code is not noticeably
671 * faster. However, there _is_ some room for improvement here,
672 * so the performance issues may eventually be a valid point.
673 * More important, however, is the fact that this allows us much
674 * more flexibility.
675 */
676 void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
677 {
678 struct thread_struct *prev = &prev_p->thread,
679 *next = &next_p->thread;
680 struct tss_struct *tss = init_tss + smp_processor_id();
681
682 unlazy_fpu(prev_p);
683
684 /*
685 * Reload esp0, LDT and the page table pointer:
686 */
687 tss->esp0 = next->esp0;
688
689 /*
690 * Save away %fs and %gs. No need to save %es and %ds, as
691 * those are always kernel segments while inside the kernel.
692 */
693 asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
694 asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
695
696 /*
697 * Restore %fs and %gs.
698 */
699 loadsegment(fs, next->fs);
700 loadsegment(gs, next->gs);
701
702 /*
703 * Now maybe reload the debug registers
704 */
705 if (next->debugreg[7]){
706 loaddebug(next, 0);
707 loaddebug(next, 1);
708 loaddebug(next, 2);
709 loaddebug(next, 3);
710 /* no 4 and 5 */
711 loaddebug(next, 6);
712 loaddebug(next, 7);
713 }
714
715 if (prev->ioperm || next->ioperm) {
716 if (next->ioperm) {
717 /*
718 * 4 cachelines copy ... not good, but not that
719 * bad either. Anyone got something better?
720 * This only affects processes which use ioperm().
721 * [Putting the TSSs into 4k-tlb mapped regions
722 * and playing VM tricks to switch the IO bitmap
723 * is not really acceptable.]
724 */
725 memcpy(tss->io_bitmap, next->io_bitmap,
726 IO_BITMAP_SIZE*sizeof(unsigned long));
727 tss->bitmap = IO_BITMAP_OFFSET;
728 } else
729 /*
730 * a bitmap offset pointing outside of the TSS limit
731 * causes a nicely controllable SIGSEGV if a process
732 * tries to use a port IO instruction. The first
733 * sys_ioperm() call sets up the bitmap properly.
734 */
735 tss->bitmap = INVALID_IO_BITMAP_OFFSET;
736 }
737 }
738
739 asmlinkage int sys_fork(struct pt_regs regs)
740 {
741 return do_fork(SIGCHLD, regs.esp, ®s, 0);
742 }
743
744 asmlinkage int sys_clone(struct pt_regs regs)
745 {
746 unsigned long clone_flags;
747 unsigned long newsp;
748
749 clone_flags = regs.ebx;
750 newsp = regs.ecx;
751 if (!newsp)
752 newsp = regs.esp;
753 return do_fork(clone_flags, newsp, ®s, 0);
754 }
755
756 /*
757 * This is trivial, and on the face of it looks like it
758 * could equally well be done in user mode.
759 *
760 * Not so, for quite unobvious reasons - register pressure.
761 * In user mode vfork() cannot have a stack frame, and if
762 * done by calling the "clone()" system call directly, you
763 * do not have enough call-clobbered registers to hold all
764 * the information you need.
765 */
766 asmlinkage int sys_vfork(struct pt_regs regs)
767 {
768 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0);
769 }
770
771 /*
772 * sys_execve() executes a new program.
773 */
774 asmlinkage int sys_execve(struct pt_regs regs)
775 {
776 int error;
777 char * filename;
778
779 filename = getname((char *) regs.ebx);
780 error = PTR_ERR(filename);
781 if (IS_ERR(filename))
782 goto out;
783 error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s);
784 if (error == 0)
785 current->ptrace &= ~PT_DTRACE;
786 putname(filename);
787 out:
788 return error;
789 }
790
791 /*
792 * These bracket the sleeping functions..
793 */
794 extern void scheduling_functions_start_here(void);
795 extern void scheduling_functions_end_here(void);
796 #define first_sched ((unsigned long) scheduling_functions_start_here)
797 #define last_sched ((unsigned long) scheduling_functions_end_here)
798
799 unsigned long get_wchan(struct task_struct *p)
800 {
801 unsigned long ebp, esp, eip;
802 unsigned long stack_page;
803 int count = 0;
804 if (!p || p == current || p->state == TASK_RUNNING)
805 return 0;
806 stack_page = (unsigned long)p;
807 esp = p->thread.esp;
808 if (!stack_page || esp < stack_page || esp > 8188+stack_page)
809 return 0;
810 /* include/asm-i386/system.h:switch_to() pushes ebp last. */
811 ebp = *(unsigned long *) esp;
812 do {
813 if (ebp < stack_page || ebp > 8184+stack_page)
814 return 0;
815 eip = *(unsigned long *) (ebp+4);
816 if (eip < first_sched || eip >= last_sched)
817 return eip;
818 ebp = *(unsigned long *) ebp;
819 } while (count++ < 16);
820 return 0;
821 }
822 #undef last_sched
823 #undef first_sched
824