diff options
Diffstat (limited to 'x86_64/locore.S')
-rw-r--r-- | x86_64/locore.S | 1640 |
1 files changed, 1640 insertions, 0 deletions
diff --git a/x86_64/locore.S b/x86_64/locore.S new file mode 100644 index 0000000..25dc15d --- /dev/null +++ b/x86_64/locore.S @@ -0,0 +1,1640 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990 Carnegie Mellon University + * Copyright (c) 1991 IBM Corporation + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, + * and that the nema IBM not be used in advertising or publicity + * pertaining to distribution of the software without specific, written + * prior permission. + * + * CARNEGIE MELLON AND IBM ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON AND IBM DISCLAIM ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/machine/asm.h> +#include <mach/machine/eflags.h> +#include <i386/i386/proc_reg.h> +#include <i386/i386/trap.h> +#include <i386/i386/seg.h> +#include <i386/i386/gdt.h> +#include <i386/i386/ldt.h> +#include <i386/i386/msr.h> +#include <i386/i386/i386asm.h> +#include <i386/i386/cpu_number.h> +#include <i386/i386/xen.h> + + +/* + * Helpers for thread state as saved in the pcb area, during trap or irq handling + */ +#define pusha \ + pushq %rax ;\ + pushq %rcx ;\ + pushq %rdx ;\ + pushq %rbx ;\ + subq $8,%rsp ;\ + pushq %rbp ;\ + pushq %rsi ;\ + pushq %rdi ;\ + pushq %r8 ;\ + pushq %r9 ;\ + pushq %r10 ;\ + pushq %r11 ;\ + pushq %r12 ;\ + pushq %r13 ;\ + pushq %r14 ;\ + pushq %r15 + +#define popa \ + popq %r15 ;\ + popq %r14 ;\ + popq %r13 ;\ + popq %r12 ;\ + popq %r11 ;\ + popq %r10 ;\ + popq %r9 ;\ + popq %r8 ;\ + popq %rdi ;\ + popq %rsi ;\ + popq %rbp ;\ + addq $8,%rsp ;\ + popq %rbx ;\ + popq %rdx ;\ + popq %rcx ;\ + popq %rax + +#define PUSH_REGS_ISR \ + pushq %rcx ;\ + pushq %rdx ;\ + pushq %rsi ;\ + pushq %rdi ;\ + pushq %r8 ;\ + pushq %r9 ;\ + pushq %r10 ;\ + pushq %r11 + +#define PUSH_AREGS_ISR \ + pushq %rax ;\ + PUSH_REGS_ISR + + +#define POP_REGS_ISR \ + popq %r11 ;\ + popq %r10 ;\ + popq %r9 ;\ + popq %r8 ;\ + popq %rdi ;\ + popq %rsi ;\ + popq %rdx ;\ + popq %rcx + +#define POP_AREGS_ISR \ + POP_REGS_ISR ;\ + popq %rax + +/* + * Note that we have to load the kernel segment registers even if this + * is a trap from the kernel, because the kernel uses user segment + * registers for copyin/copyout. + * (XXX Would it be smarter just to use fs or gs for that?) + */ +#ifdef USER32 +#define PUSH_SEGMENTS(reg) \ + movq %ds,reg ;\ + pushq reg ;\ + movq %es,reg ;\ + pushq reg ;\ + pushq %fs ;\ + pushq %gs +#else +#define PUSH_SEGMENTS(reg) +#endif + +#ifdef USER32 +#define POP_SEGMENTS(reg) \ + popq %gs ;\ + popq %fs ;\ + popq reg ;\ + movq reg,%es ;\ + popq reg ;\ + movq reg,%ds +#else +#define POP_SEGMENTS(reg) +#endif + +#ifdef USER32 +#define PUSH_SEGMENTS_ISR(reg) \ + movq %ds,reg ;\ + pushq reg ;\ + movq %es,reg ;\ + pushq reg ;\ + pushq %fs ;\ + pushq %gs +#else +#define PUSH_SEGMENTS_ISR(reg) +#endif + +#ifdef USER32 +#define POP_SEGMENTS_ISR(reg) \ + popq %gs ;\ + popq %fs ;\ + popq reg ;\ + movq reg,%es ;\ + popq reg ;\ + movq reg,%ds +#else +#define POP_SEGMENTS_ISR(reg) +#endif + +#ifdef USER32 +#define SET_KERNEL_SEGMENTS(reg) \ + mov %ss,reg /* switch to kernel segments */ ;\ + mov reg,%ds /* (same as kernel stack segment) */ ;\ + mov reg,%es ;\ + mov reg,%fs ;\ + mov $(PERCPU_DS),reg ;\ + mov reg,%gs +#else +#define SET_KERNEL_SEGMENTS(reg) +#endif + +/* + * Fault recovery. + */ +#define RECOVER_TABLE_START \ + .text 2 ;\ +DATA(recover_table) ;\ + .text + +#define RECOVER(addr) \ + .text 2 ;\ + .quad 9f ;\ + .quad addr ;\ + .text ;\ +9: + +#define RECOVER_TABLE_END \ + .text 2 ;\ + .globl EXT(recover_table_end) ;\ +LEXT(recover_table_end) ;\ + .text + +/* + * Retry table for certain successful faults. + */ +#define RETRY_TABLE_START \ + .text 3 ;\ +DATA(retry_table) ;\ + .text + +#define RETRY(addr) \ + .text 3 ;\ + .quad 9f ;\ + .quad addr ;\ + .text ;\ +9: + +#define RETRY_TABLE_END \ + .text 3 ;\ + .globl EXT(retry_table_end) ;\ +LEXT(retry_table_end) ;\ + .text + +/* + * Allocate recovery and retry tables. + */ + RECOVER_TABLE_START + RETRY_TABLE_START + +/* + * Timing routines. + */ +#if STAT_TIME + +#define TIME_TRAP_UENTRY +#define TIME_TRAP_SENTRY +#define TIME_TRAP_UEXIT +#define TIME_INT_ENTRY +#define TIME_INT_EXIT + +#else /* microsecond timing */ + +/* + * Microsecond timing. + * Assumes a free-running microsecond counter. + * no TIMER_MAX check needed. + */ + +/* + * There is only one current time-stamp per CPU, since only + * the time-stamp in the current timer is used. + * To save time, we allocate the current time-stamps here. + */ + .comm EXT(current_tstamp), 4*NCPUS + +/* + * Update time on user trap entry. + * 11 instructions (including cli on entry) + * Assumes CPU number in %edx. + * Uses %eax, %ebx, %ecx. + */ +#define TIME_TRAP_UENTRY \ + pushf /* Save flags */ ;\ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer value */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ + /* switch to sys timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\ + popf /* allow interrupts */ + +/* + * Update time on system call entry. + * 11 instructions (including cli on entry) + * Assumes CPU number in %edx. + * Uses %ebx, %ecx. + * Same as TIME_TRAP_UENTRY, but preserves %eax. + */ +#define TIME_TRAP_SENTRY \ + pushf /* Save flags */ ;\ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer value */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + pushq %rax /* save %rax */ ;\ + call timer_normalize /* normalize timer */ ;\ + popq %rax /* restore %rax */ ;\ +0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ + /* switch to sys timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\ + popf /* allow interrupts */ + +/* + * update time on user trap exit. + * 10 instructions. + * Assumes CPU number in %edx. + * Uses %ebx, %ecx. + */ +#define TIME_TRAP_UEXIT \ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: addl $(TH_USER_TIMER-TH_SYSTEM_TIMER),%ecx ;\ + /* switch to user timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ + +/* + * update time on interrupt entry. + * 9 instructions. + * Assumes CPU number in %edx. + * Leaves old timer in %ebx. + * Uses %ecx. + */ +#define TIME_INT_ENTRY \ + movl VA_ETC,%ecx /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ebx /* get old time stamp */;\ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ebx,%ecx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ebx /* get current timer */ ;\ + addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\ + leal CX(0,%rdx),%ecx /* timer is 16 bytes */ ;\ + lea CX(EXT(kernel_timer),%rdx),%ecx /* get interrupt timer*/;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */ + +/* + * update time on interrupt exit. + * 11 instructions + * Assumes CPU number in %edx, old timer in %ebx. + * Uses %eax, %ecx. + */ +#define TIME_INT_EXIT \ + movl VA_ETC,%eax /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %eax,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%eax /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %eax,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: testb $0x80,LOW_BITS+3(%ebx) /* old timer overflow? */;\ + jz 0f /* if overflow, */ ;\ + movl %ebx,%ecx /* get old timer */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: movl %ebx,CX(EXT(current_timer),%rdx) /* set timer */ + + +/* + * Normalize timer in ecx. + * Preserves edx; clobbers eax. + */ + .align 2 +timer_high_unit: + .long TIMER_HIGH_UNIT /* div has no immediate opnd */ + +timer_normalize: + pushq %rdx /* save register */ + xorl %edx,%edx /* clear divisor high */ + movl LOW_BITS(%ecx),%eax /* get divisor low */ + divl timer_high_unit,%eax /* quotient in eax */ + /* remainder in edx */ + addl %eax,HIGH_BITS_CHECK(%ecx) /* add high_inc to check */ + movl %edx,LOW_BITS(%ecx) /* remainder to low_bits */ + addl %eax,HIGH_BITS(%ecx) /* add high_inc to high bits */ + popq %rdx /* restore register */ + ret + +/* + * Switch to a new timer. + */ +ENTRY(timer_switch) + CPU_NUMBER(%edx) /* get this CPU */ + movl VA_ETC,%ecx /* get timer */ + movl CX(EXT(current_tstamp),%rdx),%eax /* get old time stamp */ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */ + subl %ecx,%eax /* elapsed = new - old */ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ + addl %eax,LOW_BITS(%ecx) /* add to low bits */ + jns 0f /* if overflow, */ + call timer_normalize /* normalize timer */ +0: + movl S_ARG0,%ecx /* get new timer */ + movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */ + ret + +/* + * Initialize the first timer for a CPU. + */ +ENTRY(start_timer) + CPU_NUMBER(%edx) /* get this CPU */ + movl VA_ETC,%ecx /* get timer */ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set initial time stamp */ + movl S_ARG0,%ecx /* get timer */ + movl %ecx,CX(EXT(current_timer),%rdx) /* set initial timer */ + ret + +#endif /* accurate timing */ + +/**/ + +/* + * Trap/interrupt entry points. + * + * All traps must create the i386_saved_state struct on the stack on + * entry. Note that: + * - CR2 is only used if the trap is a page fault + * - user_rsp/user_ss are only used if entering from user space + * - v86_regs are used only from V86 threads + * (TODO check if V86 is still used with USER32) + * + * Depending the CPL before entry, the stack might be switched or not; + * if entering from user-space the CPU loads TSS->RSP0 in RSP, + * otherwise RSP is unchanged. After this, the cpu pushes + * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler. + */ + +/* Try to save/show some information when a double fault happens + * We can't recover to a working state, so if we have a debugger wait for it, + * otherwise reset */ +ENTRY(t_dbl_fault) + INT_FIX + cli /* disable interrupts that might corrupt the state*/ + pusha + movq %cr2,%rax + movq %rax,R_CR2-R_R15(%rsp) /* CR2 might contain the faulting address */ + subq $48,%rsp // FIXME remove when segments are cleaned up + movq %rsp,%rdi /* pass the saved state */ + call handle_double_fault + jmp cpu_shutdown /* reset */ +END(t_dbl_fault) + +/* + * General protection or segment-not-present fault. + * Check for a GP/NP fault in the kernel_return + * sequence; if there, report it as a GP/NP fault on the user's instruction. + * + * rsp-> 0: trap code (NP or GP) + * 8: segment number in error + * 16 eip + * 24 cs + * 32 eflags + * 40 old registers (trap is from kernel) + */ +ENTRY(t_gen_prot) + INT_FIX + pushq $(T_GENERAL_PROTECTION) /* indicate fault type */ + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +ENTRY(t_segnp) + INT_FIX + pushq $(T_SEGMENT_NOT_PRESENT) + /* indicate fault type */ + +trap_check_kernel_exit: +#ifdef USER32 + testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */ + jnz EXT(alltraps) /* isn`t kernel trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testq $2,24(%rsp) /* is trap from kernel mode? */ + jnz EXT(alltraps) /* if so: */ + /* check for the kernel exit sequence */ + cmpq $_kret_iret,16(%rsp) /* on IRET? */ + je fault_iret +#ifdef USER32 + cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */ + je fault_popl_ds + cmpq $_kret_popl_es,16(%rsp) /* popping ES? */ + je fault_popl_es + cmpq $_kret_popl_fs,16(%rsp) /* popping FS? */ + je fault_popl_fs + cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */ + je fault_popl_gs +#endif +take_fault: /* if none of the above: */ + jmp EXT(alltraps) /* treat as normal trap. */ + +/* + * GP/NP fault on IRET: CS or SS is in error. + * All registers contain the user's values. + * + * on SP is + * 0 trap number + * 8 errcode + * 16 eip + * 24 cs --> trapno + * 32 efl --> errcode + * 40 user eip + * 48 user cs + * 56 user eflags + * 64 user rsp + * 72 user ss + */ +fault_iret: + movq %rax,16(%rsp) /* save eax (we don`t need saved eip) */ + popq %rax /* get trap number */ + movq %rax,24-8(%rsp) /* put in user trap number */ + popq %rax /* get error code */ + movq %rax,32-16(%rsp) /* put in user errcode */ + popq %rax /* restore eax */ + jmp EXT(alltraps) /* take fault */ + +#ifdef USER32 +/* + * Fault restoring a segment register. The user's registers are still + * saved on the stack. The offending segment register has not been + * popped. + */ +fault_popl_ds: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_es /* (DS on top of stack) */ +fault_popl_es: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_fs /* (ES on top of stack) */ +fault_popl_fs: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_gs /* (FS on top of stack) */ +fault_popl_gs: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_segregs /* (GS on top of stack) */ + +push_es: + movq %es,%rcx + pushq %rcx /* restore es, */ +push_fs: + pushq %fs /* restore fs, */ +push_gs: + pushq %gs /* restore gs. */ +push_gsbase: + pushq $0 + pushq $0 +#endif +push_segregs: + movq %rax,R_TRAPNO(%rsp) /* set trap number */ + movq %rdx,R_ERR(%rsp) /* set error code */ + jmp trap_set_segs /* take trap */ + +/* + * Debug trap. Check for single-stepping across system call into + * kernel. If this is the case, taking the debug trap has turned + * off single-stepping - save the flags register with the trace + * bit set. + */ +ENTRY(t_debug) + INT_FIX +#ifdef USER32 + testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */ + jnz 0f /* isn`t kernel trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testq $2,8(%rsp) /* is trap from kernel mode? */ + jnz 0f /* if so: */ +#ifdef USER32 + cmpq $syscall_entry,(%rsp) /* system call entry? */ + jne 0f /* if so: */ + /* flags are sitting where syscall */ + /* wants them */ + addq $32,%rsp /* remove eip/cs */ + jmp syscall_entry_2 /* continue system call entry */ +#else + // TODO: implement the 64-bit case + ud2 +#endif +0: pushq $0 /* otherwise: */ + pushq $(T_DEBUG) /* handle as normal */ + jmp EXT(alltraps) /* debug fault */ + +/* + * Page fault traps save cr2. + */ +ENTRY(t_page_fault) + INT_FIX + pushq $(T_PAGE_FAULT) /* mark a page fault trap */ + pusha /* save the general registers */ +#ifdef MACH_XEN + movq %ss:hyp_shared_info+CR2,%rax +#else /* MACH_XEN */ + movq %cr2,%rax /* get the faulting address */ +#endif /* MACH_XEN */ + movq %rax,R_CR2-R_R15(%rsp) /* save in rsp save slot */ + jmp trap_push_segs /* continue fault */ + +/* + * All 'exceptions' enter here with: + * rsp-> trap number + * error code + * old eip + * old cs + * old eflags + * old rsp if trapped from user + * old ss if trapped from user + */ +ENTRY(alltraps) + pusha /* save the general registers */ +trap_push_segs: + PUSH_SEGMENTS(%rax) /* and the segment registers */ + SET_KERNEL_SEGMENTS(%rax) /* switch to kernel data segment */ +trap_set_segs: + cld /* clear direction flag */ +#ifdef USER32 + testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */ + jnz trap_from_user /* user mode trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testb $2,R_CS(%rsp) /* user mode trap? */ + jz trap_from_kernel /* kernel trap if not */ +trap_from_user: + + CPU_NUMBER(%edx) + TIME_TRAP_UENTRY + + movq CX(EXT(kernel_stack),%rdx),%rbx + xchgq %rbx,%rsp /* switch to kernel stack */ + /* user regs pointer already set */ +_take_trap: + movq %rbx,%rdi /* pass register save area to trap */ + call EXT(user_trap) /* call user trap routine */ +#ifdef USER32 + orq %rax,%rax /* emulated syscall? */ + jz 1f /* no, just return */ + movq R_EAX(%rbx),%rax /* yes, get syscall number */ + jmp syscall_entry_3 /* and emulate it */ +#endif +1: + movq (%rsp),%rsp /* switch back to PCB stack */ + +/* + * Return from trap or system call, checking for ASTs. + * On PCB stack. + */ + +_return_from_trap: + CPU_NUMBER(%edx) + cmpl $0,CX(EXT(need_ast),%rdx) + jz _return_to_user /* if we need an AST: */ + + movq CX(EXT(kernel_stack),%rdx),%rsp + /* switch to kernel stack */ + call EXT(i386_astintr) /* take the AST */ + popq %rsp /* switch back to PCB stack */ + jmp _return_from_trap /* and check again (rare) */ + /* ASTs after this point will */ + /* have to wait */ + +_return_to_user: + TIME_TRAP_UEXIT + +/* + * Return from kernel mode to interrupted thread. + */ + +_return_from_kernel: +#ifdef USER32 +_kret_popl_gs: + popq %gs /* restore segment registers */ +_kret_popl_fs: + popq %fs +_kret_popl_es: + popq %rax + movq %rax,%es +_kret_popl_ds: + popq %rax + movq %rax,%ds +#endif + popa /* restore general registers */ + addq $16,%rsp /* discard trap number and error code */ +_kret_iret: + iretq /* return from interrupt */ + + +/* + * Trap from kernel mode. No need to switch stacks. + */ +trap_from_kernel: +#if MACH_KDB || MACH_TTD + movq %rsp,%rbx /* save current stack */ + movq %rsp,%rdx /* on an interrupt stack? */ + + CPU_NUMBER(%ecx) + and $(~(INTSTACK_SIZE-1)),%rdx + cmpq CX(EXT(int_stack_base),%rcx),%rdx + je 1f /* OK if so */ + + movl %ecx,%edx + cmpq CX(EXT(kernel_stack),%rdx),%rsp + /* already on kernel stack? */ + ja 0f + cmpq MY(ACTIVE_STACK),%rsp + ja 1f /* switch if not */ +0: + movq CX(EXT(kernel_stack),%rdx),%rsp +1: + pushq %rbx /* save old stack */ + movq %rbx,%rdi /* pass as parameter */ + call EXT(kernel_trap) /* to kernel trap routine */ + + popq %rsp /* return to old stack */ +#else /* MACH_KDB || MACH_TTD */ + + movq %rsp,%rdi /* pass parameter */ + call EXT(kernel_trap) /* to kernel trap routine */ + +#endif /* MACH_KDB || MACH_TTD */ + + jmp _return_from_kernel + + +/* + * Called as a function, makes the current thread + * return from the kernel as if from an exception. + */ + +ENTRY(thread_exception_return) +ENTRY(thread_bootstrap_return) + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + jmp _return_from_trap + +/* + * Called as a function, makes the current thread + * return from the kernel as if from a syscall. + * Takes the syscall's return code as an argument. + */ + +ENTRY(thread_syscall_return) + movq S_ARG0,%rax /* get return value */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + movq %rax,R_EAX(%rsp) /* save return value */ + jmp _return_from_trap + +ENTRY(call_continuation) + movq S_ARG0,%rax /* get continuation */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + addq $(-7-IKS_SIZE),%rcx + movq %rcx,%rsp /* pop the stack */ + xorq %rbp,%rbp /* zero frame pointer */ + pushq $0 /* Dummy return address */ + jmp *%rax /* goto continuation */ + +/* IOAPIC has 24 interrupts, put spurious in the same array */ + +#define INTERRUPT(n) \ + .data 2 ;\ + .quad 0f ;\ + .text ;\ + P2ALIGN(TEXT_ALIGN) ;\ +0: ;\ + INT_FIX ;\ + pushq %rax ;\ + movq $(n),%rax ;\ + jmp EXT(all_intrs) + + .data 2 +DATA(int_entry_table) + .text +/* Legacy APIC interrupts or PIC interrupts */ +INTERRUPT(0) +INTERRUPT(1) +INTERRUPT(2) +INTERRUPT(3) +INTERRUPT(4) +INTERRUPT(5) +INTERRUPT(6) +INTERRUPT(7) +INTERRUPT(8) +INTERRUPT(9) +INTERRUPT(10) +INTERRUPT(11) +INTERRUPT(12) +INTERRUPT(13) +INTERRUPT(14) +INTERRUPT(15) +#ifdef APIC +/* APIC PCI interrupts PIRQ A-H */ +INTERRUPT(16) +INTERRUPT(17) +INTERRUPT(18) +INTERRUPT(19) +INTERRUPT(20) +INTERRUPT(21) +INTERRUPT(22) +INTERRUPT(23) +#endif +#if NCPUS > 1 +INTERRUPT(CALL_AST_CHECK) +INTERRUPT(CALL_PMAP_UPDATE) +#endif +#ifdef APIC +/* Spurious interrupt, set irq number to vect number */ +INTERRUPT(255) +#endif + +/* XXX handle NMI - at least print a warning like Linux does. */ + +/* + * All interrupts enter here. The cpu might have loaded a new RSP, + * depending on the previous CPL, as in alltraps. + * Old %eax on stack, interrupt number in %eax; we need to fill the remaining + * fields of struct i386_interrupt_state, which might be in the pcb or in the + * interrupt stack. + */ +ENTRY(all_intrs) + PUSH_REGS_ISR /* save registers */ + cld /* clear direction flag */ + + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + + CPU_NUMBER_NO_GS(%ecx) + movq %rsp,%rdx /* on an interrupt stack? */ + and $(~(INTSTACK_SIZE-1)),%rdx + cmpq %ss:CX(EXT(int_stack_base),%rcx),%rdx + je int_from_intstack /* if not: */ + + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */ + + CPU_NUMBER(%edx) + + movq CX(EXT(int_stack_top),%rdx),%rcx + + xchgq %rcx,%rsp /* switch to interrupt stack */ + +#if STAT_TIME + pushq %rcx /* save pointer to old stack */ +#else + pushq %rbx /* save %ebx - out of the way */ + /* so stack looks the same */ + pushq %rcx /* save pointer to old stack */ + TIME_INT_ENTRY /* do timing */ +#endif + +#ifdef MACH_LDEBUG + incl CX(EXT(in_interrupt),%rdx) +#endif + + call EXT(interrupt) /* call generic interrupt routine */ + .globl EXT(return_to_iret) /* ( label for kdb_kintr and hardclock */ +LEXT(return_to_iret) /* to find the return from calling interrupt) */ + + CPU_NUMBER(%edx) +#ifdef MACH_LDEBUG + decl CX(EXT(in_interrupt),%rdx) +#endif + +#if STAT_TIME +#else + TIME_INT_EXIT /* do timing */ + movq 8(%rsp),%rbx /* restore the extra reg we saved */ +#endif + + popq %rsp /* switch back to old stack */ + +#ifdef USER32 + testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */ + jnz 0f /* or */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testb $2,I_CS(%rsp) /* user mode, */ + jz 1f /* check for ASTs */ +0: + cmpq $0,CX(EXT(need_ast),%rdx) + jnz ast_from_interrupt /* take it if so */ +1: + POP_SEGMENTS_ISR(%rdx) /* restore segment regs */ + POP_AREGS_ISR /* restore registers */ + + iretq /* return to caller */ + +int_from_intstack: + CPU_NUMBER_NO_GS(%edx) + cmpq CX(EXT(int_stack_base),%rdx),%rsp /* seemingly looping? */ + jb stack_overflowed /* if not: */ + call EXT(interrupt) /* call interrupt routine */ +_return_to_iret_i: /* ( label for kdb_kintr) */ + POP_SEGMENTS_ISR(%rdx) + POP_AREGS_ISR /* restore registers */ + /* no ASTs */ + + iretq + +stack_overflowed: + ud2 + +/* + * Take an AST from an interrupt. + * On PCB stack. + * sp-> gs -> edx + * fs -> ecx + * es -> eax + * ds -> trapno + * edx -> code + * ecx + * eax + * eip + * cs + * efl + * rsp + * ss + */ +ast_from_interrupt: + POP_SEGMENTS_ISR(%rdx) /* restore all registers ... */ + POP_AREGS_ISR + pushq $0 /* zero code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */ + CPU_NUMBER(%edx) + TIME_TRAP_UENTRY + + movq CX(EXT(kernel_stack),%rdx),%rsp + /* switch to kernel stack */ + call EXT(i386_astintr) /* take the AST */ + popq %rsp /* back to PCB stack */ + jmp _return_from_trap /* return */ + +#if MACH_KDB +/* + * kdb_kintr: enter kdb from keyboard interrupt. + * Chase down the stack frames until we find one whose return + * address is the interrupt handler. At that point, we have: + * + * frame-> saved %rbp + * return address in interrupt handler + * saved SPL + * saved IRQ + * return address == return_to_iret_i + * saved %r11 + * saved %r10 + * saved %r9 + * saved %r8 + * saved %rdx + * saved %rcx + * saved %rax + * saved %rip + * saved %cs + * saved %rfl + * + * OR: + * frame-> saved %rbp + * return address in interrupt handler + * return address == return_to_iret + * pointer to save area on old stack + * [ saved %ebx, if accurate timing ] + * + * old stack: saved %gs + * saved %fs + * saved %es + * saved %ds + * saved %r11 + * saved %r10 + * saved %r9 + * saved %r8 + * saved %rdi + * saved %rsi + * saved %rdx + * saved %rcx + * saved %eax + * saved %rip + * saved %cs + * saved %rfl + * + * Call kdb, passing it that register save area. + */ + +#define RET_OFFSET 32 + + +ENTRY(kdb_kintr) + movq %rbp,%rax /* save caller`s frame pointer */ + movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */ + movq $_return_to_iret_i,%rdx /* interrupt return address 2 */ + +0: cmpq RET_OFFSET(%rax),%rcx /* does this frame return to */ + /* interrupt handler (1)? */ + je 1f + cmpq RET_OFFSET(%rax),%rdx /* interrupt handler (2)? */ + je 2f /* if not: */ + movq (%rax),%rax /* try next frame */ + testq %rax,%rax + jnz 0b + ud2 /* oops, didn't find frame, fix me :/ */ + +1: movq $kdb_from_iret,RET_OFFSET(%rax) + ret /* returns to kernel/user stack */ + +2: movq $kdb_from_iret_i,RET_OFFSET(%rax) + /* returns to interrupt stack */ + ret + +/* + * On return from keyboard interrupt, we will execute + * kdb_from_iret_i + * if returning to an interrupt on the interrupt stack + * kdb_from_iret + * if returning to an interrupt on the user or kernel stack + */ +kdb_from_iret: + /* save regs in known locations */ +#if STAT_TIME + pushq %rbx /* caller`s %ebx is in reg */ +#else + movq 8(%rsp),%rax /* get caller`s %ebx */ + pushq %rax /* push on stack */ +#endif + pushq %rbp + movq %rsp,%rdi /* pass regs */ + call EXT(kdb_kentry) /* to kdb */ + popq %rbp +#if STAT_TIME + popq %rbx +#else + popq %rax + movq %rax,8(%rsp) +#endif + jmp EXT(return_to_iret) /* normal interrupt return */ + +kdb_from_iret_i: /* on interrupt stack */ + pop %rdx /* restore saved registers */ + pop %rcx + pop %rax + pushq $0 /* zero error code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS(%rdx) /* save segment registers */ + movq %rsp,%rdx /* pass regs, */ + movq $0,%rsi /* code, */ + movq $-1,%rdi /* type to kdb */ + call EXT(kdb_trap) + POP_SEGMENTS(%rdx) /* restore segment registers */ + popa /* restore general registers */ + addq $16,%rsp + +// TODO: test it before dropping ud2 +movq (%rsp),%rax +ud2 + iretq + +#endif /* MACH_KDB */ + +#if MACH_TTD +/* + * Same code as that above for the keyboard entry into kdb. + */ +ENTRY(kttd_intr) +// TODO: test it before dropping ud2 +ud2 + movq %rbp,%rax /* save caller`s frame pointer */ + movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */ + movq $_return_to_iret_i,%rdx /* interrupt return address 2 */ + +0: cmpq 32(%rax),%rcx /* does this frame return to */ + /* interrupt handler (1)? */ + je 1f + cmpq 32(%rax),%rdx /* interrupt handler (2)? */ + je 2f /* if not: */ + movq (%rax),%rax /* try next frame */ + jmp 0b + +1: movq $ttd_from_iret,32(%rax) /* returns to kernel/user stack */ + ret + +2: movq $ttd_from_iret_i,32(%rax) + /* returns to interrupt stack */ + ret + +/* + * On return from keyboard interrupt, we will execute + * ttd_from_iret_i + * if returning to an interrupt on the interrupt stack + * ttd_from_iret + * if returning to an interrupt on the user or kernel stack + */ +ttd_from_iret: + /* save regs in known locations */ +#if STAT_TIME + pushq %rbx /* caller`s %ebx is in reg */ +#else + movq 8(%rsp),%rax /* get caller`s %ebx */ + pushq %rax /* push on stack */ +#endif + pushq %rbp + pushq %rsi + pushq %rdi + movq %rsp,%rdi /* pass regs */ + call _kttd_netentry /* to kdb */ + popq %rdi /* restore registers */ + popq %rsi + popq %rbp +#if STAT_TIME + popq %rbx +#else + popq %rax + movq %rax,8(%rsp) +#endif + jmp EXT(return_to_iret) /* normal interrupt return */ + +ttd_from_iret_i: /* on interrupt stack */ + pop %rdx /* restore saved registers */ + pop %rcx + pop %rax + pushq $0 /* zero error code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + ud2 // TEST it + movq %rsp,%rdx /* pass regs, */ + movq $0,%rsi /* code, */ + movq $-1,%rdi /* type to kdb */ + call _kttd_trap + POP_SEGMENTS_ISR(%rdx) /* restore segment registers */ + popa /* restore general registers */ + addq $16,%rsp + +// TODO: test it before dropping ud2 +movq (%rsp),%rax +ud2 + iretq + +#endif /* MACH_TTD */ + +#ifdef USER32 +/* + * System call enters through a call gate. Flags are not saved - + * we must shuffle stack to look like trap save area. + * + * rsp-> old eip + * old cs + * old rsp + * old ss + * + * eax contains system call number. + */ +ENTRY(syscall) +syscall_entry: + pushf /* save flags as soon as possible */ +syscall_entry_2: + cld /* clear direction flag */ + + pushq %rax /* save system call number */ + pushq $0 /* clear trap number slot */ + + pusha /* save the general registers */ + PUSH_SEGMENTS(%rdx) /* and the segment registers */ + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel data segment */ + +/* + * Shuffle eflags,eip,cs into proper places + */ + + movq R_EIP(%rsp),%rbx /* eflags are in EIP slot */ + movq R_CS(%rsp),%rcx /* eip is in CS slot */ + movq R_EFLAGS(%rsp),%rdx /* cs is in EFLAGS slot */ + movq %rcx,R_EIP(%rsp) /* fix eip */ + movq %rdx,R_CS(%rsp) /* fix cs */ + movq %rbx,R_EFLAGS(%rsp) /* fix eflags */ + + CPU_NUMBER_NO_STACK(%edx) + TIME_TRAP_SENTRY + + movq CX(EXT(kernel_stack),%rdx),%rbx + /* get current kernel stack */ + xchgq %rbx,%rsp /* switch stacks - %ebx points to */ + /* user registers. */ + /* user regs pointer already set */ + +/* + * Check for MACH or emulated system call + */ +syscall_entry_3: + movq MY(ACTIVE_THREAD),%rdx + /* point to current thread */ + movq TH_TASK(%rdx),%rdx /* point to task */ + movq TASK_EMUL(%rdx),%rdx /* get emulation vector */ + orq %rdx,%rdx /* if none, */ + je syscall_native /* do native system call */ + movq %rax,%rcx /* copy system call number */ + subq DISP_MIN(%rdx),%rcx /* get displacement into syscall */ + /* vector table */ + jl syscall_native /* too low - native system call */ + cmpq DISP_COUNT(%rdx),%rcx /* check range */ + jnl syscall_native /* too high - native system call */ + movq DISP_VECTOR(%rdx,%rcx,4),%rdx + /* get the emulation vector */ + orq %rdx,%rdx /* emulated system call if not zero */ + jnz syscall_emul + +/* + * Native system call. + */ +syscall_native: + negl %eax /* get system call number */ + jl mach_call_range /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ + jg mach_call_range /* error if out of range */ +#if 0 /* debug hack to show the syscall number on the screen */ + movb %al,%dl + shrb $4,%dl + orb $0x30,%dl + movb $0x0f,%dh + movw %dx,0xb800a + movb %al,%dl + andb $0xf,%dl + orb $0x30,%dl + movb $0xf,%dh + movw %dx,0xb800c +#endif + shll $5,%eax /* manual indexing of mach_trap_t */ + xorq %r10,%r10 + mov EXT(mach_trap_table)(%rax),%r10 + /* get number of arguments */ + andq %r10,%r10 + jz mach_call_call /* skip argument copy if none */ + + movq $USER_DS,%rdx /* use user data segment for accesses */ + mov %dx,%fs + movq %rsp,%r11 /* save kernel ESP for error recovery */ + + movq R_UESP(%rbx),%rbp /* get user stack pointer */ + addq $4,%rbp /* Skip user return address */ + +#define PARAM(reg,ereg) \ + xorq %reg,%reg ;\ + RECOVER(mach_call_addr_push) \ + movl %fs:(%rbp),%ereg /* 1st parameter */ ;\ + addq $4,%rbp ;\ + dec %r10 ;\ + jz mach_call_call + + PARAM(rdi,edi) /* 1st parameter */ + PARAM(rsi,esi) /* 2nd parameter */ + PARAM(rdx,edx) /* 3rd parameter */ + PARAM(rcx,ecx) /* 4th parameter */ + PARAM(r8,r8d) /* 5th parameter */ + PARAM(r9,r9d) /* 6th parameter */ + + lea (%rbp,%r10,4),%rbp /* point past last argument */ + xorq %r12,%r12 + +0: subq $4,%rbp + RECOVER(mach_call_addr_push) + movl %fs:(%rbp),%r12d + pushq %r12 /* push argument on stack */ + dec %r10 + jnz 0b /* loop for all arguments */ + +mach_call_call: + +#ifdef DEBUG + testb $0xff,EXT(syscall_trace) + jz 0f + movq %rax,%rdi + call EXT(syscall_trace_print) + /* will return with syscallofs still (or again) in eax */ +0: +#endif /* DEBUG */ + call *EXT(mach_trap_table)+8(%rax) /* call procedure */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + movq %rax,R_EAX(%rsp) /* save return value */ + jmp _return_from_trap /* return to user */ + +/* + * Address out of range. Change to page fault. + * %rbp holds failing address. + */ +mach_call_addr_push: + movq %r11,%rsp /* clean parameters from stack */ +mach_call_addr: + movq %rbp,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) + /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + +/* + * System call out of range. Treat as invalid-instruction trap. + * (? general protection?) + */ +mach_call_range: + movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx) + /* set invalid-operation trap */ + movq $0,R_ERR(%rbx) /* clear error code */ + jmp _take_trap /* treat as a trap */ + +/* + * User space emulation of system calls. + * edx - user address to handle syscall + * + * User stack will become: + * ursp-> eflags + * eip + * eax still contains syscall number. + */ +syscall_emul: + movq $USER_DS,%rdi /* use user data segment for accesses */ + mov %di,%fs + +/* XXX what about write-protected pages? */ + movq R_UESP(%rbx),%rdi /* get user stack pointer */ + subq $16,%rdi /* push space for new arguments */ + movq R_EFLAGS(%rbx),%rax /* move flags */ + RECOVER(syscall_addr) + movl %eax,%fs:0(%rdi) /* to user stack */ + movl R_EIP(%rbx),%eax /* move eip */ + RECOVER(syscall_addr) + movl %eax,%fs:4(%rdi) /* to user stack */ + movq %rdi,R_UESP(%rbx) /* set new user stack pointer */ + movq %rdx,R_EIP(%rbx) /* change return address to trap */ + movq %rbx,%rsp /* back to PCB stack */ +// TODO: test it before dropping ud2 +ud2 + jmp _return_from_trap /* return to user */ + +/* + * Address error - address is in %edi. + */ +syscall_addr: + movq %rdi,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) + /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ +END(syscall) + +#else /* USER32 */ + +/* Entry point for 64-bit syscalls. + * On entry we're still on the user stack, so better not use it. Instead we + * save the thread state immediately in thread->pcb->iss, then try to invoke + * the syscall. + * Note: emulated syscalls seem to not be used anymore in GNU/Hurd, so they + * are not handled here. + * TODO: + - for now we assume the return address is canonical, but apparently there + can be cases where it's not (see how Linux handles this). Does it apply + here? + - check that the case where a task is suspended, and later returns via + iretq from return_from_trap, works fine in all combinations + */ +ENTRY(syscall64) + /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and + * eflags in RAX to allow using r11 as temporary register + */ + shlq $32,%r11 + shlq $32,%rax /* make sure bits 32:63 of %rax are zero */ + shrq $32,%rax + or %r11,%rax + + /* Save thread state in pcb->iss, as on exception entry. + * Since this is triggered synchronously from userspace, we could + * save only the callee-preserved status according to the C ABI, + * plus RIP and EFLAGS for sysret + */ + movq MY(ACTIVE_THREAD),%r11 /* point to current thread */ + movq TH_PCB(%r11),%r11 /* point to pcb */ + addq $ PCB_ISS,%r11 /* point to saved state */ + + mov %rsp,R_UESP(%r11) /* callee-preserved register */ + mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */ + mov %rbx,R_EBX(%r11) /* callee-preserved register */ + mov %rax,%rbx /* Now we can unpack eflags again */ + shr $32,%rbx + mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */ + mov %rbp,R_EBP(%r11) /* callee-preserved register */ + mov %r12,R_R12(%r11) /* callee-preserved register */ + mov %r13,R_R13(%r11) /* callee-preserved register */ + mov %r14,R_R14(%r11) /* callee-preserved register */ + mov %r15,R_R15(%r11) /* callee-preserved register */ + + /* Save syscall number and args for SYSCALL_EXAMINE/MSG_EXAMINE in glibc. + * Note: syscall number is only 32 bit, in EAX, so we sign-extend it in + * RAX to mask the EFLAGS bits. + */ + cdqe /* sign-extend EAX in RAX */ + mov %rax,R_EAX(%r11) /* syscall number */ + mov %rdi,R_EDI(%r11) /* syscall arg0 */ + mov %rsi,R_ESI(%r11) /* syscall arg1 */ + mov %rdx,R_EDX(%r11) /* syscall arg2 */ + mov %r10,R_R10(%r11) /* syscall arg3 */ + mov %r8,R_R8(%r11) /* syscall arg4 */ + mov %r9,R_R9(%r11) /* syscall arg5 */ + + mov %r11,%rbx /* prepare for error handling */ + mov %r10,%rcx /* fix arg3 location according to C ABI */ + + /* switch to kernel stack, then we can enable interrupts */ + CPU_NUMBER_NO_STACK(%r11d) + movq CX(EXT(kernel_stack),%r11),%rsp + sti + + /* Now we have saved state and args 1-6 are in place. + * Before invoking the syscall we do some bound checking and, + * if we have more that 6 arguments, we need to copy the + * remaining ones to the kernel stack, handling page faults when + * accessing the user stack. + */ + negl %eax /* get system call number */ + jl _syscall64_range /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ + jg _syscall64_range /* error if out of range */ + shll $5,%eax /* manual indexing of mach_trap_t */ + + /* check if we need to place some arguments on the stack */ +_syscall64_args_stack: + mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */ + subq $6,%r10 /* the first 6 args are already in place */ + jle _syscall64_call /* skip argument copy if num args <= 6 */ + + movq R_UESP(%rbx),%r11 /* get user stack pointer */ + addq $8,%r11 /* Skip user return address */ + + lea (%r11,%r10,8),%r11 /* point past last argument */ + +0: subq $8,%r11 + RECOVER(_syscall64_addr_push) + mov (%r11),%r12 + pushq %r12 /* push argument on stack */ + dec %r10 + jnz 0b /* loop for all remaining arguments */ + +_syscall64_call: + call *EXT(mach_trap_table)+8(%rax) /* call procedure */ + +_syscall64_check_for_ast: + /* Check for ast. */ + CPU_NUMBER_NO_GS(%r11d) + cmpl $0,CX(EXT(need_ast),%r11) + jz _syscall64_restore_state + + /* Save the syscall return value, both on our stack, for the case + * i386_astintr returns normally, and in the PCB stack, in case it + * instead calls thread_block(thread_exception_return). + */ + pushq %rax /* save the return value on our stack */ + pushq $0 /* dummy value to keep the stack aligned */ + + /* Find the PCB stack. */ + movq %rsp,%rcx + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rcx + + movq %rax,R_EAX(%rcx) /* save the return value in the PCB stack */ + call EXT(i386_astintr) + popq %rax + popq %rax /* restore the return value */ + jmp _syscall64_check_for_ast /* check again */ + +_syscall64_restore_state: + /* Restore thread state and return to user using sysret. */ + cli /* block interrupts when using the user stack in kernel space */ + movq MY(ACTIVE_THREAD),%r11 /* point to current thread */ + movq TH_PCB(%r11),%r11 /* point to pcb */ + addq $ PCB_ISS,%r11 /* point to saved state */ + + /* Restore syscall args. Note: we can't restore the syscall number in + * RAX because it needs to hold the return value.*/ + mov R_EDI(%r11),%rdi /* syscall arg0 */ + mov R_ESI(%r11),%rsi /* syscall arg1 */ + mov R_EDX(%r11),%rdx /* syscall arg2 */ + mov R_R10(%r11),%r10 /* syscall arg3 */ + mov R_R8(%r11),%r8 /* syscall arg4 */ + mov R_R9(%r11),%r9 /* syscall arg5 */ + + mov R_UESP(%r11),%rsp /* callee-preserved register, + * also switch back to user stack */ + mov R_EIP(%r11),%rcx /* sysret convention */ + mov R_EBX(%r11),%rbx /* callee-preserved register */ + mov R_EBP(%r11),%rbp /* callee-preserved register */ + mov R_R12(%r11),%r12 /* callee-preserved register */ + mov R_R13(%r11),%r13 /* callee-preserved register */ + mov R_R14(%r11),%r14 /* callee-preserved register */ + mov R_R15(%r11),%r15 /* callee-preserved register */ + mov R_EFLAGS(%r11),%r11 /* sysret convention */ + + sysretq /* fast return to user-space, the thread didn't block */ + +/* Error handling fragments, from here we jump directly to the trap handler */ +_syscall64_addr_push: + movq %r11,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + +_syscall64_range: + movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx) + /* set invalid-operation trap */ + movq $0,R_ERR(%rbx) /* clear error code */ + jmp _take_trap /* treat as a trap */ + +END(syscall64) +#endif /* USER32 */ + + .data +DATA(cpu_features) +DATA(cpu_features_edx) + .long 0 +DATA(cpu_features_ecx) + .long 0 + .text + +/* Discover what kind of cpu we have; return the family number + (3, 4, 5, 6, for 386, 486, 586, 686 respectively). */ +ENTRY(discover_x86_cpu_type) + /* We are a modern enough processor to have the CPUID instruction; + use it to find out what we are. */ + movl $1,%eax /* Fetch CPU type info ... */ + cpuid /* ... into eax */ + movl %ecx,cpu_features_ecx /* Keep a copy */ + movl %edx,cpu_features_edx /* Keep a copy */ + shrl $8,%eax /* Slide family bits down */ + andl $15,%eax /* And select them */ + ret /* And return */ + + +/**/ +/* + * Utility routines. + */ + +ENTRY(copyin) + xchgq %rsi,%rdi /* Get user source and kernel destination */ + +copyin_remainder: + /*cld*/ /* count up: default mode in all GCC code */ + movq %rdx,%rcx /* move by longwords first */ + shrq $3,%rcx + RECOVER(copyin_fail) + rep + movsq /* move longwords */ + movq %rdx,%rcx /* now move remaining bytes */ + andq $7,%rcx + RECOVER(copyin_fail) + rep + movsb + xorq %rax,%rax /* return 0 for success */ + +copyin_ret: + ret /* and return */ + +copyin_fail: + movq $1,%rax /* return 1 for failure */ + jmp copyin_ret /* pop frame and return */ + +bogus: + ud2 + +ENTRY(copyout) + xchgq %rsi,%rdi /* Get user source and kernel destination */ + +copyout_remainder: + movq %rdx,%rax /* use count */ + /*cld*/ /* count up: always this way in GCC code */ + movq %rax,%rcx /* move by longwords first */ + shrq $3,%rcx + RECOVER(copyout_fail) + rep + movsq + movq %rax,%rcx /* now move remaining bytes */ + andq $7,%rcx + RECOVER(copyout_fail) + rep + movsb /* move */ + xorq %rax,%rax /* return 0 for success */ + +copyout_ret: + ret /* and return */ + +copyout_fail: + movq $1,%rax /* return 1 for failure */ + jmp copyout_ret /* pop frame and return */ + +/* + * int inst_fetch(int eip, int cs); + * + * Fetch instruction byte. Return -1 if invalid address. + */ +ENTRY(inst_fetch) + movq S_ARG1, %rax /* get segment */ + movw %ax,%fs /* into FS */ + movq S_ARG0, %rax /* get offset */ + RETRY(EXT(inst_fetch)) /* re-load FS on retry */ + RECOVER(_inst_fetch_fault) + movzbq %fs:(%rax),%rax /* load instruction byte */ + ret + +_inst_fetch_fault: + movq $-1,%rax /* return -1 if error */ + ret + + +/* + * Done with recovery and retry tables. + */ + RECOVER_TABLE_END + RETRY_TABLE_END + + + +/* + * cpu_shutdown() + * Force reboot + */ +null_idt: + .space 8 * 32 + +null_idtr: + .word 8 * 32 - 1 + .quad null_idt + +Entry(cpu_shutdown) + lidt null_idtr /* disable the interrupt handler */ + xor %rcx,%rcx /* generate a divide by zero */ + div %rcx,%rax /* reboot now */ + ret /* this will "never" be executed */ |