From 5e0b8d508ed51004bd836384293be00950ee62c9 Mon Sep 17 00:00:00 2001 From: Pasha Date: Tue, 20 Feb 2024 18:49:50 +0000 Subject: init gnumach copy --- x86_64/Makefrag.am | 245 +++++++ x86_64/_setjmp.S | 65 ++ x86_64/boothdr.S | 222 ++++++ x86_64/configfrag.ac | 63 ++ x86_64/copy_user.c | 613 ++++++++++++++++ x86_64/cswitch.S | 148 ++++ x86_64/debug_trace.S | 56 ++ x86_64/idt_inittab.S | 148 ++++ x86_64/include/mach/x86_64 | 1 + x86_64/include/syscall_sw.h | 40 ++ x86_64/interrupt.S | 140 ++++ x86_64/kdasm.S | 133 ++++ x86_64/ldscript | 227 ++++++ x86_64/locore.S | 1640 +++++++++++++++++++++++++++++++++++++++++++ x86_64/spl.S | 265 +++++++ x86_64/x86_64 | 1 + x86_64/xen_boothdr.S | 190 +++++ x86_64/xen_locore.S | 146 ++++ 18 files changed, 4343 insertions(+) create mode 100644 x86_64/Makefrag.am create mode 100644 x86_64/_setjmp.S create mode 100644 x86_64/boothdr.S create mode 100644 x86_64/configfrag.ac create mode 100644 x86_64/copy_user.c create mode 100644 x86_64/cswitch.S create mode 100644 x86_64/debug_trace.S create mode 100644 x86_64/idt_inittab.S create mode 120000 x86_64/include/mach/x86_64 create mode 100644 x86_64/include/syscall_sw.h create mode 100644 x86_64/interrupt.S create mode 100644 x86_64/kdasm.S create mode 100644 x86_64/ldscript create mode 100644 x86_64/locore.S create mode 100644 x86_64/spl.S create mode 120000 x86_64/x86_64 create mode 100644 x86_64/xen_boothdr.S create mode 100644 x86_64/xen_locore.S (limited to 'x86_64') diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am new file mode 100644 index 0000000..b0bc45c --- /dev/null +++ b/x86_64/Makefrag.am @@ -0,0 +1,245 @@ +# Makefile fragment for x86_64. + +# Copyright (C) 1997, 1999, 2006, 2007 Free Software Foundation, Inc. + +# Permission to use, copy, modify and distribute this software and its +# documentation is hereby granted, provided that both the copyright +# notice and this permission notice appear in all copies of the +# software, derivative works or modified versions, and any portions +# thereof, and that both notices appear in supporting documentation. +# +# THE FREE SOFTWARE FOUNDATION ALLOWS FREE USE OF THIS SOFTWARE IN ITS +# "AS IS" CONDITION. THE FREE SOFTWARE FOUNDATION DISCLAIMS ANY +# LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE +# USE OF THIS SOFTWARE. + +# +# Building a distribution. +# +EXTRA_DIST += \ + x86_64/ldscript \ + x86_64/x86_64 \ + x86_64/include/mach/x86_64 + +if HOST_x86_64 + +# +# Source files for any x86_64 kernel. +# + +libkernel_a_SOURCES += \ + i386/i386at/acpi_parse_apic.h \ + i386/i386at/acpi_parse_apic.c \ + i386/i386at/autoconf.c \ + i386/i386at/autoconf.h \ + i386/i386at/biosmem.c \ + i386/i386at/biosmem.h \ + i386/i386at/conf.c \ + i386/i386at/cons_conf.c \ + i386/i386at/elf.h \ + i386/i386at/idt.h \ + i386/i386at/model_dep.c \ + i386/i386at/model_dep.h \ + i386/include/mach/sa/stdarg.h + +if PLATFORM_at +libkernel_a_SOURCES += \ + x86_64/boothdr.S \ + i386/i386at/com.c \ + i386/i386at/com.h \ + i386/i386at/comreg.h \ + i386/i386at/cram.h \ + i386/i386at/disk.h \ + i386/i386at/i8250.h \ + i386/i386at/immc.c \ + i386/i386at/int_init.c \ + i386/i386at/int_init.h \ + x86_64/interrupt.S \ + i386/i386at/kd.c \ + i386/i386at/kd.h \ + i386/i386at/kd_event.c \ + i386/i386at/kd_event.h \ + i386/i386at/kd_queue.c \ + i386/i386at/kd_queue.h \ + i386/i386at/kd_mouse.c \ + i386/i386at/kd_mouse.h \ + x86_64/kdasm.S \ + i386/i386at/kdsoft.h \ + i386/i386at/mem.c \ + i386/i386at/mem.h \ + i386/i386at/rtc.c \ + i386/i386at/rtc.h +endif + +# +# `lpr' device support. +# + +if enable_lpr +libkernel_a_SOURCES += \ + i386/i386at/lpr.c \ + i386/i386at/lpr.h +endif + + +# +# Further source files for any x86_64 kernel. +# + +libkernel_a_SOURCES += \ + i386/i386/percpu.h \ + i386/i386/percpu.c \ + x86_64/cswitch.S \ + x86_64/copy_user.c \ + x86_64/debug_trace.S \ + x86_64/idt_inittab.S \ + x86_64/locore.S \ + x86_64/spl.S + +if PLATFORM_at +libkernel_a_SOURCES += \ + i386/i386/apic.h \ + i386/i386/apic.c \ + i386/i386/hardclock.c \ + i386/i386/hardclock.h \ + i386/i386/irq.c \ + i386/i386/irq.h \ + i386/i386/msr.h \ + i386/i386/pit.c \ + i386/i386/pit.h + +if enable_apic +libkernel_a_SOURCES += \ + i386/i386at/ioapic.c +else +libkernel_a_SOURCES += \ + i386/i386/pic.c \ + i386/i386/pic.h \ + i386/i386at/pic_isa.c +endif +endif + +# +# KDB support. +# + +if enable_kdb +libkernel_a_SOURCES += \ + x86_64/_setjmp.S +endif + + +# +# Files from the generic sources that we want. +# + +libkernel_a_SOURCES += \ + chips/busses.c \ + chips/busses.h \ + device/cirbuf.c + +# +# Automatically generated source files. +# +# See Makerules.mig.am. +# + +nodist_lib_dep_tr_for_defs_a_SOURCES += \ + i386/i386/mach_i386.server.defs.c +nodist_libkernel_a_SOURCES += \ + i386/i386/mach_i386.server.h \ + i386/i386/mach_i386.server.c \ + i386/i386/mach_i386.server.msgids +# i386/i386/mach_i386.server.defs + +nodist_libkernel_a_SOURCES += \ + i386/i386/i386asm.h + +# +# Architecture specialities. +# + +AM_CPPFLAGS += \ + -I$(top_srcdir)/i386 \ + -I$(top_srcdir)/i386/i386 \ + -I$(top_srcdir)/i386/include/mach/sa + +AM_CFLAGS += \ + -mno-red-zone \ + -mcmodel=kernel \ + -mno-3dnow \ + -mno-mmx \ + -mno-sse \ + -mno-sse2 + +# +# Installation. +# + +include_mach_x86_64dir = $(includedir)/mach/x86_64 +include_mach_x86_64_HEADERS = \ + i386/include/mach/i386/asm.h \ + i386/include/mach/i386/boolean.h \ + i386/include/mach/i386/eflags.h \ + i386/include/mach/i386/exception.h \ + i386/include/mach/i386/fp_reg.h \ + i386/include/mach/i386/ioccom.h \ + i386/include/mach/i386/kern_return.h \ + i386/include/mach/i386/mach_i386.defs \ + i386/include/mach/i386/mach_i386_types.h \ + i386/include/mach/i386/machine_types.defs \ + i386/include/mach/i386/multiboot.h \ + i386/include/mach/i386/thread_status.h \ + i386/include/mach/i386/trap.h \ + i386/include/mach/i386/vm_param.h \ + i386/include/mach/i386/vm_types.h + +if enable_user32 +include_mach_x86_64_HEADERS += i386/include/mach/i386/syscall_sw.h +else +include_mach_x86_64_HEADERS += x86_64/include/syscall_sw.h +endif + +# +# Platform specific parts. +# + +KERNEL_MAP_BASE=0xffffffff80000000 + +if PLATFORM_at +# For now simply keep all the kernel virtual space in the last 2G. +# We could use a more elaborate schema if needed (e.g. reserving a +# larger area for directmap or the kernel heap)), I think only the +# test/bss/data sections need to be placed here kere because of +# -mcmodel=kernel +gnumach_LINKFLAGS += \ + --defsym _START_MAP=$(_START_MAP) \ + --defsym _START=$(_START_MAP) \ + --defsym KERNEL_MAP_SHIFT=$(KERNEL_MAP_BASE) \ + -z max-page-size=0x1000 \ + -T '$(srcdir)'/x86_64/ldscript + +AM_CCASFLAGS += \ + -Ii386 +endif + +if PLATFORM_xen +libkernel_a_SOURCES += \ + x86_64/xen_locore.S \ + x86_64/xen_boothdr.S \ + i386/xen/xen.c \ + i386/i386/xen.h + +gnumach_LINKFLAGS += \ + --defsym _START_MAP=$(KERNEL_MAP_BASE) \ + --defsym _START=$(KERNEL_MAP_BASE) \ + --defsym KERNEL_MAP_SHIFT=0 \ + -T '$(srcdir)'/x86_64/ldscript +endif + +AM_CFLAGS += -D_START_MAP=$(_START_MAP) \ + -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE) +AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \ + -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE) + +endif # HOST_x86_64 diff --git a/x86_64/_setjmp.S b/x86_64/_setjmp.S new file mode 100644 index 0000000..5714f43 --- /dev/null +++ b/x86_64/_setjmp.S @@ -0,0 +1,65 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * C library -- _setjmp, _longjmp + * + * _longjmp(a,v) + * will generate a "return(v)" from + * the last call to + * _setjmp(a) + * by restoring registers from the stack, + * The previous signal state is NOT restored. + * + */ + +#include + +ENTRY(_setjmp) + movq %rbx,0(%rdi) + movq %rbp,8(%rdi) /* save frame pointer of caller */ + movq %r12,16(%rdi) + movq %r13,24(%rdi) + movq %r14,32(%rdi) + movq %r15,40(%rdi) + popq %rdx + movq %rsp,48(%rdi) /* save stack pointer of caller */ + movq %rdx,56(%rdi) /* save pc of caller */ + xorq %rax,%rax + jmp *%rdx + +ENTRY(_longjmp) + movq %rsi,%rax /* return(v) */ + movq 0(%rdi),%rbx + movq 8(%rdi),%rbp + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + movq 32(%rdi),%r14 + movq 40(%rdi),%r15 + movq 48(%rdi),%rsp + orq %rax,%rax + jnz 0f + incq %rax +0: jmp *56(%rdi) /* done, return.... */ diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S new file mode 100644 index 0000000..0ab9bd5 --- /dev/null +++ b/x86_64/boothdr.S @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2022 Free Software Foundation + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#include +#include +#include + /* + * This section will be put first into .boot. See also x86_64/ldscript. + */ + .section .boot.text,"ax" + /* We should never be entered this way. */ + .globl boot_start +boot_start: + + .code32 + jmp boot_entry + + /* MultiBoot header - see multiboot.h. */ +#define MULTIBOOT_MAGIC 0x1BADB002 +#define MULTIBOOT_FLAGS 0x00000003 + P2ALIGN(2) +boot_hdr: + .long MULTIBOOT_MAGIC + .long MULTIBOOT_FLAGS + /* + * The next item here is the checksum. + * XX this works OK until we need at least the 30th bit. + */ + .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS) + + .global _start +_start: +boot_entry: + /* + * Prepare minimal page mapping to jump to 64 bit and to C code. + * The first 4GB is identity mapped, and the first 2GB are re-mapped + * to high addresses at KERNEL_MAP_BASE + */ + + movl $p3table,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p4table) + /* + * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address + * space. Part of it might be remapped later if the kernel is mapped + * below 4G. + */ + movl $p2table,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table) + movl $p2table1,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table + 8) + movl $p2table2,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table + 16) + movl $p2table3,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table + 24) + /* point each page table level two entry to a page */ + mov $0,%ecx +.map_p2_table: + mov $0x200000,%eax // 2MiB page, should be always available + mul %ecx + or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k + mov %eax,p2table(,%ecx,8) + inc %ecx + cmp $2048,%ecx // 512 entries per table, map 4 L2 tables + jne .map_p2_table + + /* + * KERNEL_MAP_BASE must me aligned to 2GB. + * Depending on kernel starting address, we might need to add another + * entry in the L4 table (controlling 512 GB chunks). In any case, we + * add two entries in L3 table to make sure we map 2GB for the kernel. + * Note that this may override part of the mapping create above. + */ +.kernel_map: +#if KERNEL_MAP_BASE >= (1U << 39) + movl $p3ktable,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // select 512G block + movl $p2ktable1,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block + movl $p2ktable2,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block +#else + movl $p2ktable1,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block + movl $p2ktable2,%eax + or $(PTE_V|PTE_W),%eax + movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block +#endif + + mov $0,%ecx +.map_p2k_table: + mov $0x200000,%eax // 2MiB page, should be always available + mul %ecx + or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K + mov %eax,p2ktable1(,%ecx,8) + inc %ecx + cmp $1024,%ecx // 512 entries per table, map 2 L2 tables + jne .map_p2k_table + +switch64: + /* + * Jump to 64 bit mode, we have to + * - enable PAE + * - enable long mode + * - enable paging and load the tables filled above in CR3 + * - jump to a 64-bit code segment + */ + mov %cr4,%eax + or $CR4_PAE,%eax + mov %eax,%cr4 + mov $0xC0000080,%ecx // select EFER register + rdmsr + or $(1 << 8),%eax // long mode enable bit + wrmsr + mov $p4table,%eax + mov %eax,%cr3 + mov %cr0,%eax + or $CR0_PG,%eax + or $CR0_WP,%eax + mov %eax,%cr0 + + lgdt gdt64pointer + movw $0,%ax + movw %ax,%fs + movw %ax,%gs + movw $16,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + ljmp $8,$boot_entry64 + + .code64 + +boot_entry64: + /* Switch to our own interrupt stack. */ + movq $solid_intstack+INTSTACK_SIZE-16, %rax + andq $(~15),%rax + movq %rax,%rsp + + /* Reset EFLAGS to a known state. */ + pushq $0 + popf + /* save multiboot info for later */ + movq %rbx,%r8 + + /* Fix ifunc entries */ + movq $__rela_iplt_start,%rsi + movq $__rela_iplt_end,%rdi +iplt_cont: + cmpq %rdi,%rsi + jae iplt_done + movq (%rsi),%rbx /* r_offset */ + movb 4(%rsi),%al /* info */ + cmpb $42,%al /* IRELATIVE */ + jnz iplt_next + call *(%ebx) /* call ifunc */ + movq %rax,(%rbx) /* fixed address */ +iplt_next: + addq $8,%rsi + jmp iplt_cont +iplt_done: + + /* restore multiboot info */ + movq %r8,%rdi + /* Jump into C code. */ + call EXT(c_boot_entry) + /* not reached */ + nop + + .code32 + .section .boot.data + .align 4096 +#define SEG_ACCESS_OFS 40 +#define SEG_GRANULARITY_OFS 52 +gdt64: + .quad 0 +gdt64code: + .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS) +gdt64data: + .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << SEG_ACCESS_OFS) +gdt64end: + .skip (4096 - (gdt64end - gdt64)) +gdt64pointer: + .word gdt64end - gdt64 - 1 + .quad gdt64 + + .section .boot.data + .align 4096 +p4table: .space 4096 +p3table: .space 4096 +p2table: .space 4096 +p2table1: .space 4096 +p2table2: .space 4096 +p2table3: .space 4096 +p3ktable: .space 4096 +p2ktable1: .space 4096 +p2ktable2: .space 4096 diff --git a/x86_64/configfrag.ac b/x86_64/configfrag.ac new file mode 100644 index 0000000..f119a9a --- /dev/null +++ b/x86_64/configfrag.ac @@ -0,0 +1,63 @@ +dnl Configure fragment for x86_64. + +dnl Copyright (C) 1999, 2004, 2006, 2007, 2008 Free Software Foundation, Inc. + +dnl Permission to use, copy, modify and distribute this software and its +dnl documentation is hereby granted, provided that both the copyright +dnl notice and this permission notice appear in all copies of the +dnl software, derivative works or modified versions, and any portions +dnl thereof, and that both notices appear in supporting documentation. +dnl +dnl THE FREE SOFTWARE FOUNDATION ALLOWS FREE USE OF THIS SOFTWARE IN ITS +dnl "AS IS" CONDITION. THE FREE SOFTWARE FOUNDATION DISCLAIMS ANY +dnl LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE +dnl USE OF THIS SOFTWARE. + +# +# Definitions. +# + +[case $host_cpu in + x86_64)] + AM_CONDITIONAL([HOST_x86_64], [true]) + + # Some of the x86_64-specific code checks for these. + AC_DEFINE([__ELF__], [1], [__ELF__]) + + # Determines the size of the CPU cache line. + AC_DEFINE([CPU_L1_SHIFT], [6], [CPU_L1_SHIFT]) + + [if test x"$enable_user32" = xyes ; then + user32_cpu=i686 + fi] + + [# Does the architecture provide machine-specific interfaces? + mach_machine_routines=1 + + enable_pae=yes;; + *)] + AM_CONDITIONAL([HOST_x86_64], [false])[;; +esac + +case $host_platform in + at)] + AM_CONDITIONAL([PLATFORM_at], [true])[;; + *)] + AM_CONDITIONAL([PLATFORM_at], [false])[;; +esac] + +[case $host_platform:$host_cpu in + at:x86_64)] + # should be 4, but we do not support shared IRQ for these + ncom=2 + nlpr=1 + AC_DEFINE([ATX86_64], [1], [ATX86_64])[;; + xen:x86_64)] + AC_DEFINE([ATX86_64], [1], [ATX86_64])[;; + *) + :;; +esac] + +dnl Local Variables: +dnl mode: autoconf +dnl End: diff --git a/x86_64/copy_user.c b/x86_64/copy_user.c new file mode 100644 index 0000000..c6e125d --- /dev/null +++ b/x86_64/copy_user.c @@ -0,0 +1,613 @@ +/* + * Copyright (C) 2023 Free Software Foundation + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +#include +#include + +#include + + +/* Mach field descriptors measure size in bits */ +#define descsize_to_bytes(n) (n / 8) +#define bytes_to_descsize(n) (n * 8) + +#ifdef USER32 +/* Versions of mach_msg_type_t and mach_msg_type_long that are expected from the 32 bit userland. */ +typedef struct { + unsigned int msgt_name : 8, + msgt_size : 8, + msgt_number : 12, + msgt_inline : 1, + msgt_longform : 1, + msgt_deallocate : 1, + msgt_unused : 1; +} mach_msg_user_type_t; +_Static_assert(sizeof(mach_msg_user_type_t) == 4); + +typedef struct { + mach_msg_user_type_t msgtl_header; + unsigned short msgtl_name; + unsigned short msgtl_size; + natural_t msgtl_number; +} mach_msg_user_type_long_t; +_Static_assert(sizeof(mach_msg_user_type_long_t) == 12); +#else +typedef mach_msg_type_t mach_msg_user_type_t; +typedef mach_msg_type_long_t mach_msg_user_type_long_t; +#endif /* USER32 */ + +/* +* Helper to unpack the relevant fields of a msg type; the fields are different +* depending on whether is long form or not. +.*/ +static inline void unpack_msg_type(vm_offset_t addr, + mach_msg_type_name_t *name, + mach_msg_type_size_t *size, + mach_msg_type_number_t *number, + boolean_t *is_inline, + vm_size_t *user_amount, + vm_size_t *kernel_amount) +{ + mach_msg_type_t* kmt = (mach_msg_type_t*)addr; + *is_inline = kmt->msgt_inline; + if (kmt->msgt_longform) + { + mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)addr; + *name = kmtl->msgtl_name; + *size = kmtl->msgtl_size; + *number = kmtl->msgtl_number; + *kernel_amount = sizeof(mach_msg_type_long_t); + *user_amount = sizeof(mach_msg_user_type_long_t); + } + else + { + *name = kmt->msgt_name; + *size = kmt->msgt_size; + *number = kmt->msgt_number; + *kernel_amount = sizeof(mach_msg_type_t); + *user_amount = sizeof(mach_msg_user_type_t); + } +} + +#ifdef USER32 +static inline void mach_msg_user_type_to_kernel(const mach_msg_user_type_t *u, + mach_msg_type_t* k) { + k->msgt_name = u->msgt_name; + k->msgt_size = u->msgt_size; + k->msgt_number = u->msgt_number; + k->msgt_inline = u->msgt_inline; + k->msgt_longform = u->msgt_longform; + k->msgt_deallocate = u->msgt_deallocate; + k->msgt_unused = 0; +} + +static inline void mach_msg_user_type_to_kernel_long(const mach_msg_user_type_long_t *u, + mach_msg_type_long_t* k) { + const mach_msg_type_long_t kernel = { + .msgtl_header = { + .msgt_name = u->msgtl_name, + .msgt_size = u->msgtl_size, + .msgt_number = u->msgtl_number, + .msgt_inline = u->msgtl_header.msgt_inline, + .msgt_longform = u->msgtl_header.msgt_longform, + .msgt_deallocate = u->msgtl_header.msgt_deallocate, + .msgt_unused = 0 + } + }; + *k = kernel; +} + +static inline void mach_msg_kernel_type_to_user(const mach_msg_type_t *k, + mach_msg_user_type_t *u) { + u->msgt_name = k->msgt_name; + u->msgt_size = k->msgt_size; + u->msgt_number = k->msgt_number; + u->msgt_inline = k->msgt_inline; + u->msgt_longform = k->msgt_longform; + u->msgt_deallocate = k->msgt_deallocate; + u->msgt_unused = 0; +} + +static inline void mach_msg_kernel_type_to_user_long(const mach_msg_type_long_t *k, + mach_msg_user_type_long_t *u) { + const mach_msg_user_type_long_t user = { + .msgtl_header = { + .msgt_name = 0, + .msgt_size = 0, + .msgt_number = 0, + .msgt_inline = k->msgtl_header.msgt_inline, + .msgt_longform = k->msgtl_header.msgt_longform, + .msgt_deallocate = k->msgtl_header.msgt_deallocate, + .msgt_unused = 0 + }, + .msgtl_name = k->msgtl_header.msgt_name, + .msgtl_size = k->msgtl_header.msgt_size, + .msgtl_number = k->msgtl_header.msgt_number + }; + *u = user; +} +#endif + +static inline int copyin_mach_msg_type(const rpc_vm_offset_t *uaddr, mach_msg_type_t *kaddr) { +#ifdef USER32 + mach_msg_user_type_t user; + int ret = copyin(uaddr, &user, sizeof(mach_msg_user_type_t)); + if (ret) { + return ret; + } + mach_msg_user_type_to_kernel(&user, kaddr); + return 0; +#else + return copyin(uaddr, kaddr, sizeof(mach_msg_type_t)); +#endif +} + +static inline int copyout_mach_msg_type(const mach_msg_type_t *kaddr, rpc_vm_offset_t *uaddr) { +#ifdef USER32 + mach_msg_user_type_t user; + mach_msg_kernel_type_to_user(kaddr, &user); + return copyout(&user, uaddr, sizeof(mach_msg_user_type_t)); +#else + return copyout(kaddr, uaddr, sizeof(mach_msg_type_t)); +#endif +} + +static inline int copyin_mach_msg_type_long(const rpc_vm_offset_t *uaddr, mach_msg_type_long_t *kaddr) { +#ifdef USER32 + mach_msg_user_type_long_t user; + int ret = copyin(uaddr, &user, sizeof(mach_msg_user_type_long_t)); + if (ret) + return ret; + mach_msg_user_type_to_kernel_long(&user, kaddr); + return 0; +#else + return copyin(uaddr, kaddr, sizeof(mach_msg_type_long_t)); +#endif +} + +static inline int copyout_mach_msg_type_long(const mach_msg_type_long_t *kaddr, rpc_vm_offset_t *uaddr) { +#ifdef USER32 + mach_msg_user_type_long_t user; + mach_msg_kernel_type_to_user_long(kaddr, &user); + return copyout(&user, uaddr, sizeof(mach_msg_user_type_long_t)); +#else + return copyout(kaddr, uaddr, sizeof(mach_msg_type_long_t)); +#endif +} + +/* Optimized version of unpack_msg_type(), including proper copyin() */ +static inline int copyin_unpack_msg_type(vm_offset_t uaddr, + vm_offset_t kaddr, + mach_msg_type_name_t *name, + mach_msg_type_size_t *size, + mach_msg_type_number_t *number, + boolean_t *is_inline, + vm_size_t *user_amount, + vm_size_t *kernel_amount) +{ + mach_msg_type_t *kmt = (mach_msg_type_t*)kaddr; + if (copyin_mach_msg_type((void *)uaddr, kmt)) + return 1; + *is_inline = kmt->msgt_inline; + if (kmt->msgt_longform) + { + mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)kaddr; + if (copyin_mach_msg_type_long((void *)uaddr, kmtl)) + return 1; + *name = kmtl->msgtl_name; + *size = kmtl->msgtl_size; + *number = kmtl->msgtl_number; + *user_amount = sizeof(mach_msg_user_type_long_t); + *kernel_amount = sizeof(mach_msg_type_long_t); + } + else + { + *name = kmt->msgt_name; + *size = kmt->msgt_size; + *number = kmt->msgt_number; + *user_amount = sizeof(mach_msg_user_type_t); + *kernel_amount = sizeof(mach_msg_type_t); + } + return 0; +} + +/* + * The msg type has a different size field depending on whether is long or not, + * and we also need to convert from bytes to bits + */ +static inline void adjust_msg_type_size(vm_offset_t addr, int amount) +{ + mach_msg_type_t* kmt = (mach_msg_type_t*)addr; + if (kmt->msgt_longform) + { + mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)addr; + kmtl->msgtl_size += bytes_to_descsize(amount); + } + else + { + kmt->msgt_size += bytes_to_descsize(amount); + } +} + +/* Optimized version of unpack_msg_type(), including proper copyout() */ +static inline int copyout_unpack_msg_type(vm_offset_t kaddr, + vm_offset_t uaddr, + mach_msg_type_name_t *name, + mach_msg_type_size_t *size, + mach_msg_type_number_t *number, + boolean_t *is_inline, + vm_size_t *user_amount, + vm_size_t *kernel_amount) +{ + mach_msg_type_t *kmt = (mach_msg_type_t*)kaddr; + *is_inline = kmt->msgt_inline; + if (kmt->msgt_longform) + { + mach_msg_type_long_t* kmtl = (mach_msg_type_long_t*)kaddr; + mach_msg_type_size_t orig_size = kmtl->msgtl_size; + int ret; + + if (MACH_MSG_TYPE_PORT_ANY(kmtl->msgtl_name)) { +#ifdef USER32 + kmtl->msgtl_size = bytes_to_descsize(sizeof(mach_port_name_t)); +#else + /* 64 bit ABI uses mach_port_name_inlined_t for inlined ports. */ + if (!kmt->msgt_inline) + kmtl->msgtl_size = bytes_to_descsize(sizeof(mach_port_name_t)); +#endif + } + ret = copyout_mach_msg_type_long(kmtl, (void*)uaddr); + kmtl->msgtl_size = orig_size; + if (ret) + return 1; + + *name = kmtl->msgtl_name; + *size = kmtl->msgtl_size; + *number = kmtl->msgtl_number; + *user_amount = sizeof(mach_msg_user_type_long_t); + *kernel_amount = sizeof(mach_msg_type_long_t); + } + else + { + mach_msg_type_size_t orig_size = kmt->msgt_size; + int ret; + + if (MACH_MSG_TYPE_PORT_ANY(kmt->msgt_name)) { +#ifdef USER32 + kmt->msgt_size = bytes_to_descsize(sizeof(mach_port_name_t)); +#else + /* 64 bit ABI uses mach_port_name_inlined_t for inlined ports. */ + if (!kmt->msgt_inline) + kmt->msgt_size = bytes_to_descsize(sizeof(mach_port_name_t)); +#endif + } + ret = copyout_mach_msg_type(kmt, (void *)uaddr); + kmt->msgt_size = orig_size; + if (ret) + return 1; + + *name = kmt->msgt_name; + *size = kmt->msgt_size; + *number = kmt->msgt_number; + *user_amount = sizeof(mach_msg_user_type_t); + *kernel_amount = sizeof(mach_msg_type_t); + } + return 0; +} + +#ifdef USER32 +/* + * Compute the user-space size of a message still in the kernel when processing + * messages from 32bit userland. + * The message may be originating from userspace (in which case we could + * optimize this by keeping the usize around) or from kernel space (we could + * optimize if the message structure is fixed and known in advance). + * For now just handle the most general case, iterating over the msg body. + */ +size_t msg_usize(const mach_msg_header_t *kmsg) +{ + size_t ksize = kmsg->msgh_size; + size_t usize = sizeof(mach_msg_user_header_t); + if (ksize > sizeof(mach_msg_header_t)) + { + // iterate over body compute the user-space message size + vm_offset_t saddr, eaddr; + saddr = (vm_offset_t)(kmsg + 1); + eaddr = saddr + ksize - sizeof(mach_msg_header_t); + while (saddr < (eaddr - sizeof(mach_msg_type_t))) + { + vm_size_t user_amount, kernel_amount; + mach_msg_type_name_t name; + mach_msg_type_size_t size; + mach_msg_type_number_t number; + boolean_t is_inline; + unpack_msg_type(saddr, &name, &size, &number, &is_inline, &user_amount, &kernel_amount); + saddr += kernel_amount; + saddr = mach_msg_kernel_align(saddr); + usize += user_amount; + usize = mach_msg_user_align(usize); + + if (is_inline) + { + if (MACH_MSG_TYPE_PORT_ANY(name)) + { + const vm_size_t length = sizeof(mach_port_t) * number; + saddr += length; + usize += sizeof(mach_port_name_t) * number; + } + else + { + size_t n = descsize_to_bytes(size); + saddr += n*number; + usize += n*number; + } + } + else + { + // advance one pointer + saddr += sizeof(vm_offset_t); + usize += sizeof(rpc_vm_offset_t); + } + saddr = mach_msg_kernel_align(saddr); + usize = mach_msg_user_align(usize); + } + } + return usize; +} +#endif /* USER32 */ + +/* + * Expand the msg header and, if required, the msg body (ports, pointers) + * + * To not make the code too complicated, we use the fact that some fields of + * mach_msg_header have the same size in the kernel and user variant (basically + * all fields except ports and addresses) +*/ +int copyinmsg (const void *userbuf, void *kernelbuf, const size_t usize, const size_t ksize) +{ + const mach_msg_user_header_t *umsg = userbuf; + mach_msg_header_t *kmsg = kernelbuf; + +#ifdef USER32 + if (copyin(&umsg->msgh_bits, &kmsg->msgh_bits, sizeof(kmsg->msgh_bits))) + return 1; + /* kmsg->msgh_size is filled in later */ + if (copyin_port(&umsg->msgh_remote_port, &kmsg->msgh_remote_port)) + return 1; + if (copyin_port(&umsg->msgh_local_port, &kmsg->msgh_local_port)) + return 1; + if (copyin(&umsg->msgh_seqno, &kmsg->msgh_seqno, + sizeof(kmsg->msgh_seqno) + sizeof(kmsg->msgh_id))) + return 1; +#else + /* The 64 bit interface ensures the header is the same size, so it does not need any resizing. */ + _Static_assert(sizeof(mach_msg_header_t) == sizeof(mach_msg_user_header_t), + "mach_msg_header_t and mach_msg_user_header_t expected to be of the same size"); + if (copyin(umsg, kmsg, sizeof(mach_msg_header_t))) + return 1; + kmsg->msgh_remote_port &= 0xFFFFFFFF; // FIXME: still have port names here + kmsg->msgh_local_port &= 0xFFFFFFFF; // also, this assumes little-endian +#endif + + vm_offset_t usaddr, ueaddr, ksaddr; + ksaddr = (vm_offset_t)(kmsg + 1); + usaddr = (vm_offset_t)(umsg + 1); + ueaddr = (vm_offset_t)umsg + usize; + + _Static_assert(!mach_msg_user_is_misaligned(sizeof(mach_msg_user_header_t)), + "mach_msg_user_header_t needs to be MACH_MSG_USER_ALIGNMENT aligned."); + + if (usize > sizeof(mach_msg_user_header_t)) + { + /* check we have at least space for an empty descryptor */ + while (usaddr <= (ueaddr - sizeof(mach_msg_user_type_t))) + { + vm_size_t user_amount, kernel_amount; + mach_msg_type_name_t name; + mach_msg_type_size_t size; + mach_msg_type_number_t number; + boolean_t is_inline; + if (copyin_unpack_msg_type(usaddr, ksaddr, &name, &size, &number, + &is_inline, &user_amount, &kernel_amount)) + return 1; + + // keep a reference to the current field descriptor, we + // might need to adjust it later depending on the type + vm_offset_t ktaddr = ksaddr; + usaddr += user_amount; + usaddr = mach_msg_user_align(usaddr); + ksaddr += kernel_amount; + ksaddr = mach_msg_kernel_align(ksaddr); + + if (is_inline) + { + if (MACH_MSG_TYPE_PORT_ANY(name)) + { +#ifdef USER32 + if (size != bytes_to_descsize(sizeof(mach_port_name_t))) + return 1; + if ((usaddr + sizeof(mach_port_name_t)*number) > ueaddr) + return 1; + adjust_msg_type_size(ktaddr, sizeof(mach_port_t) - sizeof(mach_port_name_t)); + for (int i=0; i ueaddr) + return 1; + if (copyin((void*)usaddr, (void*)ksaddr, length)) + return 1; + usaddr += length; + ksaddr += length; +#endif + } + else + { + // type that doesn't need change + size_t n = descsize_to_bytes(size); + if ((usaddr + n*number) > ueaddr) + return 1; + if (copyin((void*)usaddr, (void*)ksaddr, n*number)) + return 1; + usaddr += n*number; + ksaddr += n*number; + } + } + else + { + if ((usaddr + sizeof(rpc_vm_offset_t)) > ueaddr) + return 1; + + /* out-of-line port arrays are always arrays of mach_port_name_t (4 bytes) + * and are expanded in ipc_kmsg_copyin_body() */ + if (MACH_MSG_TYPE_PORT_ANY(name)) { + if (size != bytes_to_descsize(sizeof(mach_port_name_t))) + return 1; + adjust_msg_type_size(ktaddr, sizeof(mach_port_t) - sizeof(mach_port_name_t)); + } + + if (copyin_address((rpc_vm_offset_t*)usaddr, (vm_offset_t*)ksaddr)) + return 1; + // Advance one pointer. + ksaddr += sizeof(vm_offset_t); + usaddr += sizeof(rpc_vm_offset_t); + } + // Note that we have to align because mach_port_name_t might not align + // with the required user alignment. + usaddr = mach_msg_user_align(usaddr); + ksaddr = mach_msg_kernel_align(ksaddr); + } + } + + kmsg->msgh_size = sizeof(mach_msg_header_t) + ksaddr - (vm_offset_t)(kmsg + 1); + assert(kmsg->msgh_size <= ksize); +#ifndef USER32 + if (kmsg->msgh_size != usize) + return 1; +#endif + return 0; +} + +int copyoutmsg (const void *kernelbuf, void *userbuf, const size_t ksize) +{ + const mach_msg_header_t *kmsg = kernelbuf; + mach_msg_user_header_t *umsg = userbuf; +#ifdef USER32 + if (copyout(&kmsg->msgh_bits, &umsg->msgh_bits, sizeof(kmsg->msgh_bits))) + return 1; + /* umsg->msgh_size is filled in later */ + if (copyout_port(&kmsg->msgh_remote_port, &umsg->msgh_remote_port)) + return 1; + if (copyout_port(&kmsg->msgh_local_port, &umsg->msgh_local_port)) + return 1; + if (copyout(&kmsg->msgh_seqno, &umsg->msgh_seqno, + sizeof(kmsg->msgh_seqno) + sizeof(kmsg->msgh_id))) + return 1; +#else + if (copyout(kmsg, umsg, sizeof(mach_msg_header_t))) + return 1; +#endif /* USER32 */ + + vm_offset_t ksaddr, keaddr, usaddr; + ksaddr = (vm_offset_t)(kmsg + 1); + usaddr = (vm_offset_t)(umsg + 1); + keaddr = ksaddr + ksize - sizeof(mach_msg_header_t); + + if (ksize > sizeof(mach_msg_header_t)) + { + while (ksaddr < keaddr) + { + vm_size_t user_amount, kernel_amount; + mach_msg_type_name_t name; + mach_msg_type_size_t size; + mach_msg_type_number_t number; + boolean_t is_inline; + if (copyout_unpack_msg_type(ksaddr, usaddr, &name, &size, &number, + &is_inline, &user_amount, &kernel_amount)) + return 1; + usaddr += user_amount; + usaddr = mach_msg_user_align(usaddr); + ksaddr += kernel_amount; + ksaddr = mach_msg_kernel_align(ksaddr); + + if (is_inline) + { + if (MACH_MSG_TYPE_PORT_ANY(name)) + { +#ifdef USER32 + for (int i=0; imsgh_size, sizeof(umsg->msgh_size))) + return 1; +#ifndef USER32 + if (usize != ksize) + return 1; +#endif + + return 0; + +} diff --git a/x86_64/cswitch.S b/x86_64/cswitch.S new file mode 100644 index 0000000..9c4640f --- /dev/null +++ b/x86_64/cswitch.S @@ -0,0 +1,148 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include + +#include +#include +#include + +/* + * Context switch routines for x86_64. + */ + +ENTRY(Load_context) + movq S_ARG0,%rcx /* get thread */ + movq TH_KERNEL_STACK(%rcx),%rcx /* get kernel stack */ + lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%rcx),%rdx + /* point to stack top */ + CPU_NUMBER(%eax) + movq %rcx,MY(ACTIVE_STACK) /* store stack address */ + movq %rdx,CX(EXT(kernel_stack),%rax) /* store stack top */ + +/* XXX complete */ + + movq KSS_ESP(%rcx),%rsp /* switch stacks */ + movq KSS_EBP(%rcx),%rbp /* restore registers */ + movq KSS_EBX(%rcx),%rbx + movq KSS_R12(%rcx),%r12 + movq KSS_R13(%rcx),%r13 + movq KSS_R14(%rcx),%r14 + movq KSS_R15(%rcx),%r15 + xorq %rax,%rax /* return zero (no old thread) */ + jmp *KSS_EIP(%rcx) /* resume thread */ + +/* + * This really only has to save registers + * when there is no explicit continuation. + */ + +ENTRY(Switch_context) + movq MY(ACTIVE_STACK),%rcx /* get old kernel stack */ + + movq %r12,KSS_R12(%rcx) /* save registers */ + movq %r13,KSS_R13(%rcx) + movq %r14,KSS_R14(%rcx) + movq %r15,KSS_R15(%rcx) + movq %rbx,KSS_EBX(%rcx) + movq %rbp,KSS_EBP(%rcx) + popq KSS_EIP(%rcx) /* save return PC */ + movq %rsp,KSS_ESP(%rcx) /* save SP */ + + movq S_ARG0,%rax /* get old thread */ + movq %rcx,TH_KERNEL_STACK(%rax) /* save old stack */ + movq S_ARG1,%rbx /* get continuation */ + movq %rbx,TH_SWAP_FUNC(%rax) /* save continuation */ + + movq S_ARG2,%rsi /* get new thread */ + + movq TH_KERNEL_STACK(%rsi),%rcx /* get its kernel stack */ + lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%rcx),%rbx + /* point to stack top */ + + CPU_NUMBER(%eax) + movq %rsi,MY(ACTIVE_THREAD) /* new thread is active */ + movq %rcx,MY(ACTIVE_STACK) /* set current stack */ + movq %rbx,CX(EXT(kernel_stack),%rax) /* set stack top */ + + movq KSS_ESP(%rcx),%rsp /* switch stacks */ + movq KSS_EBP(%rcx),%rbp /* restore registers */ + movq KSS_EBX(%rcx),%rbx + movq KSS_R12(%rcx),%r12 + movq KSS_R13(%rcx),%r13 + movq KSS_R14(%rcx),%r14 + movq KSS_R15(%rcx),%r15 + jmp *KSS_EIP(%rcx) /* return old thread */ + +ENTRY(Thread_continue) + movq %rax,%rdi /* push the thread argument */ + xorq %rbp,%rbp /* zero frame pointer */ + call *%rbx /* call real continuation */ + +#if NCPUS > 1 +/* + * void switch_to_shutdown_context(thread_t thread, + * void (*routine)(processor_t), + * processor_t processor) + * + * saves the kernel context of the thread, + * switches to the interrupt stack, + * continues the thread (with thread_continue), + * then runs routine on the interrupt stack. + * + * Assumes that the thread is a kernel thread (thus + * has no FPU state) + */ +ENTRY(switch_to_shutdown_context) +ud2 + movq MY(ACTIVE_STACK),%rcx /* get old kernel stack */ + movq %r12,KSS_R12(%rcx) /* save registers */ + movq %r13,KSS_R13(%rcx) + movq %r14,KSS_R14(%rcx) + movq %r15,KSS_R15(%rcx) + movq %rbx,KSS_EBX(%rcx) + movq %rbp,KSS_EBP(%rcx) + popq KSS_EIP(%rcx) /* save return PC */ + movq %rsp,KSS_ESP(%rcx) /* save SP */ + + movq S_ARG0,%rax /* get old thread */ + movq %rcx,TH_KERNEL_STACK(%rax) /* save old stack */ + movq $0,TH_SWAP_FUNC(%rax) /* clear continuation */ + movq S_ARG1,%rbx /* get routine to run next */ + movq S_ARG2,%rsi /* get its argument */ + + CPU_NUMBER(%ecx) + movq CX(EXT(int_stack_base),%rcx),%rcx /* point to its interrupt stack */ + lea INTSTACK_SIZE(%rcx),%rsp /* switch to it (top) */ + + movq %rax,%rdi /* push thread */ + call EXT(thread_dispatch) /* reschedule thread */ + + movq %rsi,%rdi /* push argument */ + call *%rbx /* call routine to run */ + hlt /* (should never return) */ + +#endif /* NCPUS > 1 */ diff --git a/x86_64/debug_trace.S b/x86_64/debug_trace.S new file mode 100644 index 0000000..7bed5cc --- /dev/null +++ b/x86_64/debug_trace.S @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ + +#ifdef DEBUG + +#include +#include + +#include "debug.h" + + .text +ENTRY(_debug_trace) + pushf + cli + pushq %rax + pushq %rbx + .byte 0x36 /* SS: bug in gas? */ + movl %ss:EXT(debug_trace_pos),%eax + movq S_ARG0,%rbx + movq %rbx,%ss:EXT(debug_trace_buf)(,%eax,16) + movl S_ARG1,%ebx + movl %ebx,%ss:EXT(debug_trace_buf)+8(,%eax,16) + incl %eax + andl $DEBUG_TRACE_LEN-1,%eax + .byte 0x36 /* SS: bug in gas? */ + movl %eax,%ss:EXT(debug_trace_pos) + popq %rbx + popq %rax + popf + ret + +#endif /* DEBUG */ + +/* XXX gas bug? need at least one symbol... */ +foo: + diff --git a/x86_64/idt_inittab.S b/x86_64/idt_inittab.S new file mode 100644 index 0000000..3a205ae --- /dev/null +++ b/x86_64/idt_inittab.S @@ -0,0 +1,148 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +#include + +#include +#include + +#include + +/* We'll be using macros to fill in a table in data hunk 2 + while writing trap entrypoint routines at the same time. + Here's the header that comes before everything else. */ + .data 2 +ENTRY(idt_inittab) + .text + +/* + * Interrupt descriptor table and code vectors for it. + */ +#ifdef MACH_PV_DESCRIPTORS +#define IDT_ENTRY(n,entry,type,ist) \ + .data 2 ;\ + .byte n ;\ + .byte (((type)&ACC_PL)>>5)|((((type)&(ACC_TYPE|ACC_A))==ACC_INTR_GATE)<<2) ;\ + .word FLAT_KERNEL_CS ;\ + .word ist ;\ + .word 0 /*pad*/ ;\ + .quad entry ;\ + .text +#else /* MACH_PV_DESCRIPTORS */ +#define IDT_ENTRY(n,entry,type,ist) \ + .data 2 ;\ + .quad entry ;\ + .word n ;\ + .word type ;\ + .word ist ;\ + .word 0 /*pad*/ ;\ + .text +#endif /* MACH_PV_DESCRIPTORS */ + +/* + * No error code. Clear error code and push trap number. + */ +#define EXCEPTION(n,name) \ + IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE, 0);\ +ENTRY(name) ;\ + INT_FIX ;\ + pushq $(0) ;\ + pushq $(n) ;\ + jmp EXT(alltraps) + +/* + * User-accessible exception. Otherwise, same as above. + */ +#define EXCEP_USR(n,name) \ + IDT_ENTRY(n,EXT(name),ACC_PL_U|ACC_TRAP_GATE, 0);\ +ENTRY(name) ;\ + INT_FIX ;\ + pushq $(0) ;\ + pushq $(n) ;\ + jmp EXT(alltraps) + +/* + * Error code has been pushed. Just push trap number. + */ +#define EXCEP_ERR(n,name) \ + IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_INTR_GATE, 0);\ +ENTRY(name) ;\ + INT_FIX ;\ + pushq $(n) ;\ + jmp EXT(alltraps) + +/* + * Special interrupt code: dispatches to a unique entrypoint, + * not defined automatically here. + */ +#define EXCEP_SPC(n,name, ist) \ + IDT_ENTRY(n,EXT(name),ACC_PL_K|ACC_TRAP_GATE, ist) + + +EXCEPTION(0x00,t_zero_div) +EXCEP_SPC(0x01,t_debug, 0) +/* skip NMI interrupt - let more specific code figure that out. */ +EXCEP_USR(0x03,t_int3) +EXCEP_USR(0x04,t_into) +EXCEP_USR(0x05,t_bounds) +EXCEPTION(0x06,t_invop) +EXCEPTION(0x07,t_nofpu) +EXCEP_SPC(0x08,t_dbl_fault, 1) +EXCEPTION(0x09,a_fpu_over) +EXCEPTION(0x0a,a_inv_tss) +EXCEP_SPC(0x0b,t_segnp, 0) +EXCEP_ERR(0x0c,t_stack_fault) +EXCEP_SPC(0x0d,t_gen_prot, 0) +EXCEP_SPC(0x0e,t_page_fault, 0) +#ifdef MACH_PV_DESCRIPTORS +EXCEP_ERR(0x0f,t_trap_0f) +#else +EXCEPTION(0x0f,t_trap_0f) +#endif +EXCEPTION(0x10,t_fpu_err) +EXCEPTION(0x11,t_trap_11) +EXCEPTION(0x12,t_trap_12) +EXCEPTION(0x13,t_trap_13) +EXCEPTION(0x14,t_trap_14) +EXCEPTION(0x15,t_trap_15) +EXCEPTION(0x16,t_trap_16) +EXCEPTION(0x17,t_trap_17) +EXCEPTION(0x18,t_trap_18) +EXCEPTION(0x19,t_trap_19) +EXCEPTION(0x1a,t_trap_1a) +EXCEPTION(0x1b,t_trap_1b) +EXCEPTION(0x1c,t_trap_1c) +EXCEPTION(0x1d,t_trap_1d) +EXCEPTION(0x1e,t_trap_1e) +EXCEPTION(0x1f,t_trap_1f) + +/* Terminator */ + .data 2 + .long 0 +#ifdef MACH_PV_DESCRIPTORS + .long 0 + .quad 0 +#endif /* MACH_PV_DESCRIPTORS */ + diff --git a/x86_64/include/mach/x86_64 b/x86_64/include/mach/x86_64 new file mode 120000 index 0000000..698e9fb --- /dev/null +++ b/x86_64/include/mach/x86_64 @@ -0,0 +1 @@ +../../../i386/include/mach/i386 \ No newline at end of file diff --git a/x86_64/include/syscall_sw.h b/x86_64/include/syscall_sw.h new file mode 100644 index 0000000..4e03f28 --- /dev/null +++ b/x86_64/include/syscall_sw.h @@ -0,0 +1,40 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _MACH_X86_64_SYSCALL_SW_H_ +#define _MACH_X86_64_SYSCALL_SW_H_ + +#include + +#define kernel_trap(trap_name,trap_number,number_args) \ +ENTRY(trap_name) \ + movq $ trap_number,%rax; \ + movq %rcx,%r10; \ + syscall; \ + ret; \ +END(trap_name) + +#endif /* _MACH_X86_64_SYSCALL_SW_H_ */ diff --git a/x86_64/interrupt.S b/x86_64/interrupt.S new file mode 100644 index 0000000..6fb7772 --- /dev/null +++ b/x86_64/interrupt.S @@ -0,0 +1,140 @@ +/* + * Copyright (c) 1995 Shantanu Goel + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE AUTHOR ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. THE AUTHOR DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + */ + +#include + +#include +#ifdef APIC +# include +#else +# include +#endif +#include + +#define READ_ISR (OCW_TEMPLATE|READ_NEXT_RD|READ_IS_ONRD) + +/* + * Generic interrupt handler. + * + * On entry, %eax contains the irq number. + * + * Note: kdb_kintr needs to know our stack usage + */ + +#define S_REGS 24(%rsp) +#define S_RET 16(%rsp) +#define S_IRQ 8(%rsp) +#define S_IPL 0(%rsp) + +ENTRY(interrupt) +#ifdef APIC + cmpl $255,%eax /* was this a spurious intr? */ + jne 1f + ret /* if so, just return */ +1: +#endif + subq $16,%rsp /* Two local variables */ + movl %eax,S_IRQ /* save irq number */ + + call spl7 /* set ipl */ + movl %eax,S_IPL /* save previous ipl */ + + movl S_IRQ,%ecx /* restore irq number */ + +#if NCPUS > 1 + cmpl $CALL_PMAP_UPDATE,%ecx /* was this a SMP pmap_update request? */ + je _call_single + + cmpl $CALL_AST_CHECK,%ecx /* was this a SMP remote -> local ast request? */ + je _call_local_ast +#endif + +#ifndef APIC + movl $1,%eax + shll %cl,%eax /* get corresponding IRQ mask */ + orl EXT(curr_pic_mask),%eax /* add current mask */ + + cmpl $8,%ecx /* do we need to ack slave? */ + jl 1f /* no, only master */ + + /* EOI on slave */ + movb %ah,%al + outb %al,$(PIC_SLAVE_OCW) /* mask slave out */ + + movb $(SPECIFIC_EOI),%al /* specific EOI for this irq */ + andb $7,%cl /* irq number for the slave */ + orb %cl,%al /* combine them */ + outb %al,$(PIC_SLAVE_ICW) /* ack interrupt to slave */ + + movb $(SPECIFIC_EOI + I_AM_SLAVE_2),%al /* specific master EOI for cascaded slave */ + outb %al,$(PIC_MASTER_ICW) /* ack interrupt to master */ + + movl EXT(curr_pic_mask),%eax /* restore original mask */ + movb %ah,%al + outb %al,$(PIC_SLAVE_OCW) /* unmask slave */ + jmp 2f + +1: + /* EOI on master */ + outb %al,$(PIC_MASTER_OCW) /* mask master out */ + + movb $(SPECIFIC_EOI),%al /* specific EOI for this irq */ + orb %cl,%al /* combine with irq number */ + outb %al,$(PIC_MASTER_ICW) /* ack interrupt to master */ + + movl EXT(curr_pic_mask),%eax /* restore original mask */ + outb %al,$(PIC_MASTER_OCW) /* unmask master */ +2: +#else + movl %ecx,%edi /* load irq number as 1st arg */ + call EXT(ioapic_irq_eoi) /* ioapic irq specific EOI */ +#endif + + ; + movq S_IPL,S_ARG1 /* previous ipl as 2nd arg */ + + ; + movq S_RET,S_ARG2 /* return address as 3th arg */ + + ; + movq S_REGS,S_ARG3 /* address of interrupted registers as 4th arg */ + + movl S_IRQ,%eax /* copy irq number */ + shll $2,%eax /* irq * 4 */ + movl EXT(iunit)(%rax),%edi /* get device unit number as 1st arg */ + + shll $1,%eax /* irq * 8 */ + call *EXT(ivect)(%rax) /* call interrupt handler */ + +_completed: + movl S_IPL,%edi /* restore previous ipl */ + call splx_cli /* restore previous ipl */ + + addq $16,%rsp /* pop local variables */ + ret + +#if NCPUS > 1 +_call_single: + call EXT(lapic_eoi) /* lapic EOI before the handler to allow extra update */ + call EXT(pmap_update_interrupt) + jmp _completed + +_call_local_ast: + call EXT(lapic_eoi) /* lapic EOI */ + call EXT(ast_check) /* AST check on this cpu */ + jmp _completed + +#endif +END(interrupt) diff --git a/x86_64/kdasm.S b/x86_64/kdasm.S new file mode 100644 index 0000000..e1acf39 --- /dev/null +++ b/x86_64/kdasm.S @@ -0,0 +1,133 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Some inline code to speed up major block copies to and from the + * screen buffer. + * + * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989. + * All rights reserved. + * + * orc!eugene 28 Oct 1988 + * + */ +/* + Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc., +Cupertino, California. + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software and +its documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appears in all +copies and that both the copyright notice and this permission notice +appear in supporting documentation, and that the name of Olivetti +not be used in advertising or publicity pertaining to distribution +of the software without specific, written prior permission. + + OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, +IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, +NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION +WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +/* $ Header: $ */ + + +#include + +/* + * Function: kd_slmwd() + * + * This function "slams" a word (char/attr) into the screen memory using + * a block fill operation on the 386. + * + */ + +#define start B_ARG0 +#define count B_ARG1 +#define value %dx // B_ARG2 + +ENTRY(kd_slmwd) + pushq %rbp + movq %rsp, %rbp + + # start already in %rdi + movq count, %rcx + movw value, %ax + cld + rep + stosw + + leave + ret +#undef start +#undef count +#undef value + +/* + * "slam up" + */ + +#define from B_ARG0 +#define to B_ARG1 +#define count %edx // B_ARG2 +ENTRY(kd_slmscu) + pushq %rbp + movq %rsp, %rbp + + xchgq %rsi, %rdi + movl count, %ecx + cmpq %rdi, %rsi + cld + rep + movsw + + leave + ret + +/* + * "slam down" + */ +ENTRY(kd_slmscd) + pushq %rbp + movq %rsp, %rbp + + xchgq %rsi, %rdi + movl count, %ecx + cmpq %rdi, %rsi + std + rep + movsw + cld + + leave + ret +#undef from +#undef to +#undef count diff --git a/x86_64/ldscript b/x86_64/ldscript new file mode 100644 index 0000000..67703b4 --- /dev/null +++ b/x86_64/ldscript @@ -0,0 +1,227 @@ +/* Default linker script, for normal executables */ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", + "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(boot_start) +SECTIONS +{ + /* + * There are specific requirements about entry points, so we have it + * configurable via `_START': `.text' will begin there and `.text.start' will + * be first in there. See also `i386/i386at/boothdr.S' and + * `gnumach_LINKFLAGS' in `i386/Makefrag.am'. + */ + + . = _START; + .boot : AT(_START_MAP) + { + *(.boot.text) + *(.boot.data) + } =0x90909090 + + . += KERNEL_MAP_SHIFT; + _start = .; + .text : AT(((ADDR(.text)) - KERNEL_MAP_SHIFT)) + { + *(.text*) + *(.text .stub .text.* .gnu.linkonce.t.*) + *(.text.unlikely .text.*_unlikely) + KEEP (*(.text.*personality*)) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + } =0x90909090 + .init : AT(((ADDR(.init)) - KERNEL_MAP_SHIFT)) + { + KEEP (*(.init)) + } =0x90909090 + .fini : AT(((ADDR(.fini)) - KERNEL_MAP_SHIFT)) + { + KEEP (*(.fini)) + } =0x90909090 + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Read-only sections, merged into text segment: */ + PROVIDE (__executable_start = .); + .interp : { *(.interp) } + .note.gnu.build-id : { *(.note.gnu.build-id) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rela.init : { *(.rela.init) } + .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } + .rela.fini : { *(.rela.fini) } + .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } + .rela.data.rel.ro : { *(.rela.data.rel.ro* .rela.gnu.linkonce.d.rel.ro.*) } + .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } + .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } + .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } + .rela.ctors : { *(.rela.ctors) } + .rela.dtors : { *(.rela.dtors) } + .rela.got : { *(.rela.got) } + .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } + .rela.ldata : { *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) } + .rela.lbss : { *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) } + .rela.lrodata : { *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) } + .rela.ifunc : { *(.rela.ifunc) } + .rela.plt : + { + *(.rela.plt) + PROVIDE_HIDDEN (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN (__rela_iplt_end = .); + } + .plt : { *(.plt) *(.iplt) } + .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_SHIFT)) { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) } + .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table + .gcc_except_table.*) } + /* Adjust the address for the data segment. We want to adjust up to + the same address within the page on the next page up. */ + . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); + /* Exception handling */ + .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) } + .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } + /* Thread Local Storage sections */ + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + .ctors : + { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin.o(.ctors)) + KEEP (*crtbegin?.o(.ctors)) + /* We don't want to include the .ctor section from + the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : + { + KEEP (*crtbegin.o(.dtors)) + KEEP (*crtbegin?.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) } + .dynamic : { *(.dynamic) } + .got : { *(.got) *(.igot) } + . = DATA_SEGMENT_RELRO_END (24, .); + .got.plt : { *(.got.plt) *(.igot.plt) } + .data : AT(((ADDR(.data)) - KERNEL_MAP_SHIFT)) + { + *(.data .data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + _edata = .; PROVIDE (edata = .); + __bss_start = .; + .bss : AT(((ADDR(.bss)) - KERNEL_MAP_SHIFT)) + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. + FIXME: Why do we need it? When there is no .bss section, we don't + pad the .data section. */ + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + .lbss : + { + *(.dynlbss) + *(.lbss .lbss.* .gnu.linkonce.lb.*) + *(LARGE_COMMON) + } + . = ALIGN(64 / 8); + .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.lrodata .lrodata.* .gnu.linkonce.lr.*) + } + .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.ldata .ldata.* .gnu.linkonce.l.*) + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + . = ALIGN(64 / 8); + _end = .; PROVIDE (end = .); + . = DATA_SEGMENT_END (.); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } +} diff --git a/x86_64/locore.S b/x86_64/locore.S new file mode 100644 index 0000000..25dc15d --- /dev/null +++ b/x86_64/locore.S @@ -0,0 +1,1640 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990 Carnegie Mellon University + * Copyright (c) 1991 IBM Corporation + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, + * and that the nema IBM not be used in advertising or publicity + * pertaining to distribution of the software without specific, written + * prior permission. + * + * CARNEGIE MELLON AND IBM ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON AND IBM DISCLAIM ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Helpers for thread state as saved in the pcb area, during trap or irq handling + */ +#define pusha \ + pushq %rax ;\ + pushq %rcx ;\ + pushq %rdx ;\ + pushq %rbx ;\ + subq $8,%rsp ;\ + pushq %rbp ;\ + pushq %rsi ;\ + pushq %rdi ;\ + pushq %r8 ;\ + pushq %r9 ;\ + pushq %r10 ;\ + pushq %r11 ;\ + pushq %r12 ;\ + pushq %r13 ;\ + pushq %r14 ;\ + pushq %r15 + +#define popa \ + popq %r15 ;\ + popq %r14 ;\ + popq %r13 ;\ + popq %r12 ;\ + popq %r11 ;\ + popq %r10 ;\ + popq %r9 ;\ + popq %r8 ;\ + popq %rdi ;\ + popq %rsi ;\ + popq %rbp ;\ + addq $8,%rsp ;\ + popq %rbx ;\ + popq %rdx ;\ + popq %rcx ;\ + popq %rax + +#define PUSH_REGS_ISR \ + pushq %rcx ;\ + pushq %rdx ;\ + pushq %rsi ;\ + pushq %rdi ;\ + pushq %r8 ;\ + pushq %r9 ;\ + pushq %r10 ;\ + pushq %r11 + +#define PUSH_AREGS_ISR \ + pushq %rax ;\ + PUSH_REGS_ISR + + +#define POP_REGS_ISR \ + popq %r11 ;\ + popq %r10 ;\ + popq %r9 ;\ + popq %r8 ;\ + popq %rdi ;\ + popq %rsi ;\ + popq %rdx ;\ + popq %rcx + +#define POP_AREGS_ISR \ + POP_REGS_ISR ;\ + popq %rax + +/* + * Note that we have to load the kernel segment registers even if this + * is a trap from the kernel, because the kernel uses user segment + * registers for copyin/copyout. + * (XXX Would it be smarter just to use fs or gs for that?) + */ +#ifdef USER32 +#define PUSH_SEGMENTS(reg) \ + movq %ds,reg ;\ + pushq reg ;\ + movq %es,reg ;\ + pushq reg ;\ + pushq %fs ;\ + pushq %gs +#else +#define PUSH_SEGMENTS(reg) +#endif + +#ifdef USER32 +#define POP_SEGMENTS(reg) \ + popq %gs ;\ + popq %fs ;\ + popq reg ;\ + movq reg,%es ;\ + popq reg ;\ + movq reg,%ds +#else +#define POP_SEGMENTS(reg) +#endif + +#ifdef USER32 +#define PUSH_SEGMENTS_ISR(reg) \ + movq %ds,reg ;\ + pushq reg ;\ + movq %es,reg ;\ + pushq reg ;\ + pushq %fs ;\ + pushq %gs +#else +#define PUSH_SEGMENTS_ISR(reg) +#endif + +#ifdef USER32 +#define POP_SEGMENTS_ISR(reg) \ + popq %gs ;\ + popq %fs ;\ + popq reg ;\ + movq reg,%es ;\ + popq reg ;\ + movq reg,%ds +#else +#define POP_SEGMENTS_ISR(reg) +#endif + +#ifdef USER32 +#define SET_KERNEL_SEGMENTS(reg) \ + mov %ss,reg /* switch to kernel segments */ ;\ + mov reg,%ds /* (same as kernel stack segment) */ ;\ + mov reg,%es ;\ + mov reg,%fs ;\ + mov $(PERCPU_DS),reg ;\ + mov reg,%gs +#else +#define SET_KERNEL_SEGMENTS(reg) +#endif + +/* + * Fault recovery. + */ +#define RECOVER_TABLE_START \ + .text 2 ;\ +DATA(recover_table) ;\ + .text + +#define RECOVER(addr) \ + .text 2 ;\ + .quad 9f ;\ + .quad addr ;\ + .text ;\ +9: + +#define RECOVER_TABLE_END \ + .text 2 ;\ + .globl EXT(recover_table_end) ;\ +LEXT(recover_table_end) ;\ + .text + +/* + * Retry table for certain successful faults. + */ +#define RETRY_TABLE_START \ + .text 3 ;\ +DATA(retry_table) ;\ + .text + +#define RETRY(addr) \ + .text 3 ;\ + .quad 9f ;\ + .quad addr ;\ + .text ;\ +9: + +#define RETRY_TABLE_END \ + .text 3 ;\ + .globl EXT(retry_table_end) ;\ +LEXT(retry_table_end) ;\ + .text + +/* + * Allocate recovery and retry tables. + */ + RECOVER_TABLE_START + RETRY_TABLE_START + +/* + * Timing routines. + */ +#if STAT_TIME + +#define TIME_TRAP_UENTRY +#define TIME_TRAP_SENTRY +#define TIME_TRAP_UEXIT +#define TIME_INT_ENTRY +#define TIME_INT_EXIT + +#else /* microsecond timing */ + +/* + * Microsecond timing. + * Assumes a free-running microsecond counter. + * no TIMER_MAX check needed. + */ + +/* + * There is only one current time-stamp per CPU, since only + * the time-stamp in the current timer is used. + * To save time, we allocate the current time-stamps here. + */ + .comm EXT(current_tstamp), 4*NCPUS + +/* + * Update time on user trap entry. + * 11 instructions (including cli on entry) + * Assumes CPU number in %edx. + * Uses %eax, %ebx, %ecx. + */ +#define TIME_TRAP_UENTRY \ + pushf /* Save flags */ ;\ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer value */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ + /* switch to sys timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\ + popf /* allow interrupts */ + +/* + * Update time on system call entry. + * 11 instructions (including cli on entry) + * Assumes CPU number in %edx. + * Uses %ebx, %ecx. + * Same as TIME_TRAP_UENTRY, but preserves %eax. + */ +#define TIME_TRAP_SENTRY \ + pushf /* Save flags */ ;\ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer value */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + pushq %rax /* save %rax */ ;\ + call timer_normalize /* normalize timer */ ;\ + popq %rax /* restore %rax */ ;\ +0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\ + /* switch to sys timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\ + popf /* allow interrupts */ + +/* + * update time on user trap exit. + * 10 instructions. + * Assumes CPU number in %edx. + * Uses %ebx, %ecx. + */ +#define TIME_TRAP_UEXIT \ + cli /* block interrupts */ ;\ + movl VA_ETC,%ebx /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%ebx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: addl $(TH_USER_TIMER-TH_SYSTEM_TIMER),%ecx ;\ + /* switch to user timer */;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ + +/* + * update time on interrupt entry. + * 9 instructions. + * Assumes CPU number in %edx. + * Leaves old timer in %ebx. + * Uses %ecx. + */ +#define TIME_INT_ENTRY \ + movl VA_ETC,%ecx /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ebx /* get old time stamp */;\ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ebx,%ecx /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ebx /* get current timer */ ;\ + addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\ + leal CX(0,%rdx),%ecx /* timer is 16 bytes */ ;\ + lea CX(EXT(kernel_timer),%rdx),%ecx /* get interrupt timer*/;\ + movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */ + +/* + * update time on interrupt exit. + * 11 instructions + * Assumes CPU number in %edx, old timer in %ebx. + * Uses %eax, %ecx. + */ +#define TIME_INT_EXIT \ + movl VA_ETC,%eax /* get timer */ ;\ + movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\ + movl %eax,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\ + subl %ecx,%eax /* elapsed = new-old */ ;\ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\ + addl %eax,LOW_BITS(%ecx) /* add to low bits */ ;\ + jns 0f /* if overflow, */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: testb $0x80,LOW_BITS+3(%ebx) /* old timer overflow? */;\ + jz 0f /* if overflow, */ ;\ + movl %ebx,%ecx /* get old timer */ ;\ + call timer_normalize /* normalize timer */ ;\ +0: movl %ebx,CX(EXT(current_timer),%rdx) /* set timer */ + + +/* + * Normalize timer in ecx. + * Preserves edx; clobbers eax. + */ + .align 2 +timer_high_unit: + .long TIMER_HIGH_UNIT /* div has no immediate opnd */ + +timer_normalize: + pushq %rdx /* save register */ + xorl %edx,%edx /* clear divisor high */ + movl LOW_BITS(%ecx),%eax /* get divisor low */ + divl timer_high_unit,%eax /* quotient in eax */ + /* remainder in edx */ + addl %eax,HIGH_BITS_CHECK(%ecx) /* add high_inc to check */ + movl %edx,LOW_BITS(%ecx) /* remainder to low_bits */ + addl %eax,HIGH_BITS(%ecx) /* add high_inc to high bits */ + popq %rdx /* restore register */ + ret + +/* + * Switch to a new timer. + */ +ENTRY(timer_switch) + CPU_NUMBER(%edx) /* get this CPU */ + movl VA_ETC,%ecx /* get timer */ + movl CX(EXT(current_tstamp),%rdx),%eax /* get old time stamp */ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */ + subl %ecx,%eax /* elapsed = new - old */ + movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ + addl %eax,LOW_BITS(%ecx) /* add to low bits */ + jns 0f /* if overflow, */ + call timer_normalize /* normalize timer */ +0: + movl S_ARG0,%ecx /* get new timer */ + movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */ + ret + +/* + * Initialize the first timer for a CPU. + */ +ENTRY(start_timer) + CPU_NUMBER(%edx) /* get this CPU */ + movl VA_ETC,%ecx /* get timer */ + movl %ecx,CX(EXT(current_tstamp),%rdx) /* set initial time stamp */ + movl S_ARG0,%ecx /* get timer */ + movl %ecx,CX(EXT(current_timer),%rdx) /* set initial timer */ + ret + +#endif /* accurate timing */ + +/* */ + +/* + * Trap/interrupt entry points. + * + * All traps must create the i386_saved_state struct on the stack on + * entry. Note that: + * - CR2 is only used if the trap is a page fault + * - user_rsp/user_ss are only used if entering from user space + * - v86_regs are used only from V86 threads + * (TODO check if V86 is still used with USER32) + * + * Depending the CPL before entry, the stack might be switched or not; + * if entering from user-space the CPU loads TSS->RSP0 in RSP, + * otherwise RSP is unchanged. After this, the cpu pushes + * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler. + */ + +/* Try to save/show some information when a double fault happens + * We can't recover to a working state, so if we have a debugger wait for it, + * otherwise reset */ +ENTRY(t_dbl_fault) + INT_FIX + cli /* disable interrupts that might corrupt the state*/ + pusha + movq %cr2,%rax + movq %rax,R_CR2-R_R15(%rsp) /* CR2 might contain the faulting address */ + subq $48,%rsp // FIXME remove when segments are cleaned up + movq %rsp,%rdi /* pass the saved state */ + call handle_double_fault + jmp cpu_shutdown /* reset */ +END(t_dbl_fault) + +/* + * General protection or segment-not-present fault. + * Check for a GP/NP fault in the kernel_return + * sequence; if there, report it as a GP/NP fault on the user's instruction. + * + * rsp-> 0: trap code (NP or GP) + * 8: segment number in error + * 16 eip + * 24 cs + * 32 eflags + * 40 old registers (trap is from kernel) + */ +ENTRY(t_gen_prot) + INT_FIX + pushq $(T_GENERAL_PROTECTION) /* indicate fault type */ + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +ENTRY(t_segnp) + INT_FIX + pushq $(T_SEGMENT_NOT_PRESENT) + /* indicate fault type */ + +trap_check_kernel_exit: +#ifdef USER32 + testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */ + jnz EXT(alltraps) /* isn`t kernel trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testq $2,24(%rsp) /* is trap from kernel mode? */ + jnz EXT(alltraps) /* if so: */ + /* check for the kernel exit sequence */ + cmpq $_kret_iret,16(%rsp) /* on IRET? */ + je fault_iret +#ifdef USER32 + cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */ + je fault_popl_ds + cmpq $_kret_popl_es,16(%rsp) /* popping ES? */ + je fault_popl_es + cmpq $_kret_popl_fs,16(%rsp) /* popping FS? */ + je fault_popl_fs + cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */ + je fault_popl_gs +#endif +take_fault: /* if none of the above: */ + jmp EXT(alltraps) /* treat as normal trap. */ + +/* + * GP/NP fault on IRET: CS or SS is in error. + * All registers contain the user's values. + * + * on SP is + * 0 trap number + * 8 errcode + * 16 eip + * 24 cs --> trapno + * 32 efl --> errcode + * 40 user eip + * 48 user cs + * 56 user eflags + * 64 user rsp + * 72 user ss + */ +fault_iret: + movq %rax,16(%rsp) /* save eax (we don`t need saved eip) */ + popq %rax /* get trap number */ + movq %rax,24-8(%rsp) /* put in user trap number */ + popq %rax /* get error code */ + movq %rax,32-16(%rsp) /* put in user errcode */ + popq %rax /* restore eax */ + jmp EXT(alltraps) /* take fault */ + +#ifdef USER32 +/* + * Fault restoring a segment register. The user's registers are still + * saved on the stack. The offending segment register has not been + * popped. + */ +fault_popl_ds: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_es /* (DS on top of stack) */ +fault_popl_es: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_fs /* (ES on top of stack) */ +fault_popl_fs: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_gs /* (FS on top of stack) */ +fault_popl_gs: + popq %rax /* get trap number */ + popq %rdx /* get error code */ + addq $24,%rsp /* pop stack to user regs */ + jmp push_segregs /* (GS on top of stack) */ + +push_es: + movq %es,%rcx + pushq %rcx /* restore es, */ +push_fs: + pushq %fs /* restore fs, */ +push_gs: + pushq %gs /* restore gs. */ +push_gsbase: + pushq $0 + pushq $0 +#endif +push_segregs: + movq %rax,R_TRAPNO(%rsp) /* set trap number */ + movq %rdx,R_ERR(%rsp) /* set error code */ + jmp trap_set_segs /* take trap */ + +/* + * Debug trap. Check for single-stepping across system call into + * kernel. If this is the case, taking the debug trap has turned + * off single-stepping - save the flags register with the trace + * bit set. + */ +ENTRY(t_debug) + INT_FIX +#ifdef USER32 + testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */ + jnz 0f /* isn`t kernel trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testq $2,8(%rsp) /* is trap from kernel mode? */ + jnz 0f /* if so: */ +#ifdef USER32 + cmpq $syscall_entry,(%rsp) /* system call entry? */ + jne 0f /* if so: */ + /* flags are sitting where syscall */ + /* wants them */ + addq $32,%rsp /* remove eip/cs */ + jmp syscall_entry_2 /* continue system call entry */ +#else + // TODO: implement the 64-bit case + ud2 +#endif +0: pushq $0 /* otherwise: */ + pushq $(T_DEBUG) /* handle as normal */ + jmp EXT(alltraps) /* debug fault */ + +/* + * Page fault traps save cr2. + */ +ENTRY(t_page_fault) + INT_FIX + pushq $(T_PAGE_FAULT) /* mark a page fault trap */ + pusha /* save the general registers */ +#ifdef MACH_XEN + movq %ss:hyp_shared_info+CR2,%rax +#else /* MACH_XEN */ + movq %cr2,%rax /* get the faulting address */ +#endif /* MACH_XEN */ + movq %rax,R_CR2-R_R15(%rsp) /* save in rsp save slot */ + jmp trap_push_segs /* continue fault */ + +/* + * All 'exceptions' enter here with: + * rsp-> trap number + * error code + * old eip + * old cs + * old eflags + * old rsp if trapped from user + * old ss if trapped from user + */ +ENTRY(alltraps) + pusha /* save the general registers */ +trap_push_segs: + PUSH_SEGMENTS(%rax) /* and the segment registers */ + SET_KERNEL_SEGMENTS(%rax) /* switch to kernel data segment */ +trap_set_segs: + cld /* clear direction flag */ +#ifdef USER32 + testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */ + jnz trap_from_user /* user mode trap if so */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testb $2,R_CS(%rsp) /* user mode trap? */ + jz trap_from_kernel /* kernel trap if not */ +trap_from_user: + + CPU_NUMBER(%edx) + TIME_TRAP_UENTRY + + movq CX(EXT(kernel_stack),%rdx),%rbx + xchgq %rbx,%rsp /* switch to kernel stack */ + /* user regs pointer already set */ +_take_trap: + movq %rbx,%rdi /* pass register save area to trap */ + call EXT(user_trap) /* call user trap routine */ +#ifdef USER32 + orq %rax,%rax /* emulated syscall? */ + jz 1f /* no, just return */ + movq R_EAX(%rbx),%rax /* yes, get syscall number */ + jmp syscall_entry_3 /* and emulate it */ +#endif +1: + movq (%rsp),%rsp /* switch back to PCB stack */ + +/* + * Return from trap or system call, checking for ASTs. + * On PCB stack. + */ + +_return_from_trap: + CPU_NUMBER(%edx) + cmpl $0,CX(EXT(need_ast),%rdx) + jz _return_to_user /* if we need an AST: */ + + movq CX(EXT(kernel_stack),%rdx),%rsp + /* switch to kernel stack */ + call EXT(i386_astintr) /* take the AST */ + popq %rsp /* switch back to PCB stack */ + jmp _return_from_trap /* and check again (rare) */ + /* ASTs after this point will */ + /* have to wait */ + +_return_to_user: + TIME_TRAP_UEXIT + +/* + * Return from kernel mode to interrupted thread. + */ + +_return_from_kernel: +#ifdef USER32 +_kret_popl_gs: + popq %gs /* restore segment registers */ +_kret_popl_fs: + popq %fs +_kret_popl_es: + popq %rax + movq %rax,%es +_kret_popl_ds: + popq %rax + movq %rax,%ds +#endif + popa /* restore general registers */ + addq $16,%rsp /* discard trap number and error code */ +_kret_iret: + iretq /* return from interrupt */ + + +/* + * Trap from kernel mode. No need to switch stacks. + */ +trap_from_kernel: +#if MACH_KDB || MACH_TTD + movq %rsp,%rbx /* save current stack */ + movq %rsp,%rdx /* on an interrupt stack? */ + + CPU_NUMBER(%ecx) + and $(~(INTSTACK_SIZE-1)),%rdx + cmpq CX(EXT(int_stack_base),%rcx),%rdx + je 1f /* OK if so */ + + movl %ecx,%edx + cmpq CX(EXT(kernel_stack),%rdx),%rsp + /* already on kernel stack? */ + ja 0f + cmpq MY(ACTIVE_STACK),%rsp + ja 1f /* switch if not */ +0: + movq CX(EXT(kernel_stack),%rdx),%rsp +1: + pushq %rbx /* save old stack */ + movq %rbx,%rdi /* pass as parameter */ + call EXT(kernel_trap) /* to kernel trap routine */ + + popq %rsp /* return to old stack */ +#else /* MACH_KDB || MACH_TTD */ + + movq %rsp,%rdi /* pass parameter */ + call EXT(kernel_trap) /* to kernel trap routine */ + +#endif /* MACH_KDB || MACH_TTD */ + + jmp _return_from_kernel + + +/* + * Called as a function, makes the current thread + * return from the kernel as if from an exception. + */ + +ENTRY(thread_exception_return) +ENTRY(thread_bootstrap_return) + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + jmp _return_from_trap + +/* + * Called as a function, makes the current thread + * return from the kernel as if from a syscall. + * Takes the syscall's return code as an argument. + */ + +ENTRY(thread_syscall_return) + movq S_ARG0,%rax /* get return value */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + movq %rax,R_EAX(%rsp) /* save return value */ + jmp _return_from_trap + +ENTRY(call_continuation) + movq S_ARG0,%rax /* get continuation */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + addq $(-7-IKS_SIZE),%rcx + movq %rcx,%rsp /* pop the stack */ + xorq %rbp,%rbp /* zero frame pointer */ + pushq $0 /* Dummy return address */ + jmp *%rax /* goto continuation */ + +/* IOAPIC has 24 interrupts, put spurious in the same array */ + +#define INTERRUPT(n) \ + .data 2 ;\ + .quad 0f ;\ + .text ;\ + P2ALIGN(TEXT_ALIGN) ;\ +0: ;\ + INT_FIX ;\ + pushq %rax ;\ + movq $(n),%rax ;\ + jmp EXT(all_intrs) + + .data 2 +DATA(int_entry_table) + .text +/* Legacy APIC interrupts or PIC interrupts */ +INTERRUPT(0) +INTERRUPT(1) +INTERRUPT(2) +INTERRUPT(3) +INTERRUPT(4) +INTERRUPT(5) +INTERRUPT(6) +INTERRUPT(7) +INTERRUPT(8) +INTERRUPT(9) +INTERRUPT(10) +INTERRUPT(11) +INTERRUPT(12) +INTERRUPT(13) +INTERRUPT(14) +INTERRUPT(15) +#ifdef APIC +/* APIC PCI interrupts PIRQ A-H */ +INTERRUPT(16) +INTERRUPT(17) +INTERRUPT(18) +INTERRUPT(19) +INTERRUPT(20) +INTERRUPT(21) +INTERRUPT(22) +INTERRUPT(23) +#endif +#if NCPUS > 1 +INTERRUPT(CALL_AST_CHECK) +INTERRUPT(CALL_PMAP_UPDATE) +#endif +#ifdef APIC +/* Spurious interrupt, set irq number to vect number */ +INTERRUPT(255) +#endif + +/* XXX handle NMI - at least print a warning like Linux does. */ + +/* + * All interrupts enter here. The cpu might have loaded a new RSP, + * depending on the previous CPL, as in alltraps. + * Old %eax on stack, interrupt number in %eax; we need to fill the remaining + * fields of struct i386_interrupt_state, which might be in the pcb or in the + * interrupt stack. + */ +ENTRY(all_intrs) + PUSH_REGS_ISR /* save registers */ + cld /* clear direction flag */ + + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + + CPU_NUMBER_NO_GS(%ecx) + movq %rsp,%rdx /* on an interrupt stack? */ + and $(~(INTSTACK_SIZE-1)),%rdx + cmpq %ss:CX(EXT(int_stack_base),%rcx),%rdx + je int_from_intstack /* if not: */ + + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */ + + CPU_NUMBER(%edx) + + movq CX(EXT(int_stack_top),%rdx),%rcx + + xchgq %rcx,%rsp /* switch to interrupt stack */ + +#if STAT_TIME + pushq %rcx /* save pointer to old stack */ +#else + pushq %rbx /* save %ebx - out of the way */ + /* so stack looks the same */ + pushq %rcx /* save pointer to old stack */ + TIME_INT_ENTRY /* do timing */ +#endif + +#ifdef MACH_LDEBUG + incl CX(EXT(in_interrupt),%rdx) +#endif + + call EXT(interrupt) /* call generic interrupt routine */ + .globl EXT(return_to_iret) /* ( label for kdb_kintr and hardclock */ +LEXT(return_to_iret) /* to find the return from calling interrupt) */ + + CPU_NUMBER(%edx) +#ifdef MACH_LDEBUG + decl CX(EXT(in_interrupt),%rdx) +#endif + +#if STAT_TIME +#else + TIME_INT_EXIT /* do timing */ + movq 8(%rsp),%rbx /* restore the extra reg we saved */ +#endif + + popq %rsp /* switch back to old stack */ + +#ifdef USER32 + testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */ + jnz 0f /* or */ +#endif + /* Note: handling KERNEL_RING value by hand */ + testb $2,I_CS(%rsp) /* user mode, */ + jz 1f /* check for ASTs */ +0: + cmpq $0,CX(EXT(need_ast),%rdx) + jnz ast_from_interrupt /* take it if so */ +1: + POP_SEGMENTS_ISR(%rdx) /* restore segment regs */ + POP_AREGS_ISR /* restore registers */ + + iretq /* return to caller */ + +int_from_intstack: + CPU_NUMBER_NO_GS(%edx) + cmpq CX(EXT(int_stack_base),%rdx),%rsp /* seemingly looping? */ + jb stack_overflowed /* if not: */ + call EXT(interrupt) /* call interrupt routine */ +_return_to_iret_i: /* ( label for kdb_kintr) */ + POP_SEGMENTS_ISR(%rdx) + POP_AREGS_ISR /* restore registers */ + /* no ASTs */ + + iretq + +stack_overflowed: + ud2 + +/* + * Take an AST from an interrupt. + * On PCB stack. + * sp-> gs -> edx + * fs -> ecx + * es -> eax + * ds -> trapno + * edx -> code + * ecx + * eax + * eip + * cs + * efl + * rsp + * ss + */ +ast_from_interrupt: + POP_SEGMENTS_ISR(%rdx) /* restore all registers ... */ + POP_AREGS_ISR + pushq $0 /* zero code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */ + CPU_NUMBER(%edx) + TIME_TRAP_UENTRY + + movq CX(EXT(kernel_stack),%rdx),%rsp + /* switch to kernel stack */ + call EXT(i386_astintr) /* take the AST */ + popq %rsp /* back to PCB stack */ + jmp _return_from_trap /* return */ + +#if MACH_KDB +/* + * kdb_kintr: enter kdb from keyboard interrupt. + * Chase down the stack frames until we find one whose return + * address is the interrupt handler. At that point, we have: + * + * frame-> saved %rbp + * return address in interrupt handler + * saved SPL + * saved IRQ + * return address == return_to_iret_i + * saved %r11 + * saved %r10 + * saved %r9 + * saved %r8 + * saved %rdx + * saved %rcx + * saved %rax + * saved %rip + * saved %cs + * saved %rfl + * + * OR: + * frame-> saved %rbp + * return address in interrupt handler + * return address == return_to_iret + * pointer to save area on old stack + * [ saved %ebx, if accurate timing ] + * + * old stack: saved %gs + * saved %fs + * saved %es + * saved %ds + * saved %r11 + * saved %r10 + * saved %r9 + * saved %r8 + * saved %rdi + * saved %rsi + * saved %rdx + * saved %rcx + * saved %eax + * saved %rip + * saved %cs + * saved %rfl + * + * Call kdb, passing it that register save area. + */ + +#define RET_OFFSET 32 + + +ENTRY(kdb_kintr) + movq %rbp,%rax /* save caller`s frame pointer */ + movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */ + movq $_return_to_iret_i,%rdx /* interrupt return address 2 */ + +0: cmpq RET_OFFSET(%rax),%rcx /* does this frame return to */ + /* interrupt handler (1)? */ + je 1f + cmpq RET_OFFSET(%rax),%rdx /* interrupt handler (2)? */ + je 2f /* if not: */ + movq (%rax),%rax /* try next frame */ + testq %rax,%rax + jnz 0b + ud2 /* oops, didn't find frame, fix me :/ */ + +1: movq $kdb_from_iret,RET_OFFSET(%rax) + ret /* returns to kernel/user stack */ + +2: movq $kdb_from_iret_i,RET_OFFSET(%rax) + /* returns to interrupt stack */ + ret + +/* + * On return from keyboard interrupt, we will execute + * kdb_from_iret_i + * if returning to an interrupt on the interrupt stack + * kdb_from_iret + * if returning to an interrupt on the user or kernel stack + */ +kdb_from_iret: + /* save regs in known locations */ +#if STAT_TIME + pushq %rbx /* caller`s %ebx is in reg */ +#else + movq 8(%rsp),%rax /* get caller`s %ebx */ + pushq %rax /* push on stack */ +#endif + pushq %rbp + movq %rsp,%rdi /* pass regs */ + call EXT(kdb_kentry) /* to kdb */ + popq %rbp +#if STAT_TIME + popq %rbx +#else + popq %rax + movq %rax,8(%rsp) +#endif + jmp EXT(return_to_iret) /* normal interrupt return */ + +kdb_from_iret_i: /* on interrupt stack */ + pop %rdx /* restore saved registers */ + pop %rcx + pop %rax + pushq $0 /* zero error code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS(%rdx) /* save segment registers */ + movq %rsp,%rdx /* pass regs, */ + movq $0,%rsi /* code, */ + movq $-1,%rdi /* type to kdb */ + call EXT(kdb_trap) + POP_SEGMENTS(%rdx) /* restore segment registers */ + popa /* restore general registers */ + addq $16,%rsp + +// TODO: test it before dropping ud2 +movq (%rsp),%rax +ud2 + iretq + +#endif /* MACH_KDB */ + +#if MACH_TTD +/* + * Same code as that above for the keyboard entry into kdb. + */ +ENTRY(kttd_intr) +// TODO: test it before dropping ud2 +ud2 + movq %rbp,%rax /* save caller`s frame pointer */ + movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */ + movq $_return_to_iret_i,%rdx /* interrupt return address 2 */ + +0: cmpq 32(%rax),%rcx /* does this frame return to */ + /* interrupt handler (1)? */ + je 1f + cmpq 32(%rax),%rdx /* interrupt handler (2)? */ + je 2f /* if not: */ + movq (%rax),%rax /* try next frame */ + jmp 0b + +1: movq $ttd_from_iret,32(%rax) /* returns to kernel/user stack */ + ret + +2: movq $ttd_from_iret_i,32(%rax) + /* returns to interrupt stack */ + ret + +/* + * On return from keyboard interrupt, we will execute + * ttd_from_iret_i + * if returning to an interrupt on the interrupt stack + * ttd_from_iret + * if returning to an interrupt on the user or kernel stack + */ +ttd_from_iret: + /* save regs in known locations */ +#if STAT_TIME + pushq %rbx /* caller`s %ebx is in reg */ +#else + movq 8(%rsp),%rax /* get caller`s %ebx */ + pushq %rax /* push on stack */ +#endif + pushq %rbp + pushq %rsi + pushq %rdi + movq %rsp,%rdi /* pass regs */ + call _kttd_netentry /* to kdb */ + popq %rdi /* restore registers */ + popq %rsi + popq %rbp +#if STAT_TIME + popq %rbx +#else + popq %rax + movq %rax,8(%rsp) +#endif + jmp EXT(return_to_iret) /* normal interrupt return */ + +ttd_from_iret_i: /* on interrupt stack */ + pop %rdx /* restore saved registers */ + pop %rcx + pop %rax + pushq $0 /* zero error code */ + pushq $0 /* zero trap number */ + pusha /* save general registers */ + PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */ + ud2 // TEST it + movq %rsp,%rdx /* pass regs, */ + movq $0,%rsi /* code, */ + movq $-1,%rdi /* type to kdb */ + call _kttd_trap + POP_SEGMENTS_ISR(%rdx) /* restore segment registers */ + popa /* restore general registers */ + addq $16,%rsp + +// TODO: test it before dropping ud2 +movq (%rsp),%rax +ud2 + iretq + +#endif /* MACH_TTD */ + +#ifdef USER32 +/* + * System call enters through a call gate. Flags are not saved - + * we must shuffle stack to look like trap save area. + * + * rsp-> old eip + * old cs + * old rsp + * old ss + * + * eax contains system call number. + */ +ENTRY(syscall) +syscall_entry: + pushf /* save flags as soon as possible */ +syscall_entry_2: + cld /* clear direction flag */ + + pushq %rax /* save system call number */ + pushq $0 /* clear trap number slot */ + + pusha /* save the general registers */ + PUSH_SEGMENTS(%rdx) /* and the segment registers */ + SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel data segment */ + +/* + * Shuffle eflags,eip,cs into proper places + */ + + movq R_EIP(%rsp),%rbx /* eflags are in EIP slot */ + movq R_CS(%rsp),%rcx /* eip is in CS slot */ + movq R_EFLAGS(%rsp),%rdx /* cs is in EFLAGS slot */ + movq %rcx,R_EIP(%rsp) /* fix eip */ + movq %rdx,R_CS(%rsp) /* fix cs */ + movq %rbx,R_EFLAGS(%rsp) /* fix eflags */ + + CPU_NUMBER_NO_STACK(%edx) + TIME_TRAP_SENTRY + + movq CX(EXT(kernel_stack),%rdx),%rbx + /* get current kernel stack */ + xchgq %rbx,%rsp /* switch stacks - %ebx points to */ + /* user registers. */ + /* user regs pointer already set */ + +/* + * Check for MACH or emulated system call + */ +syscall_entry_3: + movq MY(ACTIVE_THREAD),%rdx + /* point to current thread */ + movq TH_TASK(%rdx),%rdx /* point to task */ + movq TASK_EMUL(%rdx),%rdx /* get emulation vector */ + orq %rdx,%rdx /* if none, */ + je syscall_native /* do native system call */ + movq %rax,%rcx /* copy system call number */ + subq DISP_MIN(%rdx),%rcx /* get displacement into syscall */ + /* vector table */ + jl syscall_native /* too low - native system call */ + cmpq DISP_COUNT(%rdx),%rcx /* check range */ + jnl syscall_native /* too high - native system call */ + movq DISP_VECTOR(%rdx,%rcx,4),%rdx + /* get the emulation vector */ + orq %rdx,%rdx /* emulated system call if not zero */ + jnz syscall_emul + +/* + * Native system call. + */ +syscall_native: + negl %eax /* get system call number */ + jl mach_call_range /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ + jg mach_call_range /* error if out of range */ +#if 0 /* debug hack to show the syscall number on the screen */ + movb %al,%dl + shrb $4,%dl + orb $0x30,%dl + movb $0x0f,%dh + movw %dx,0xb800a + movb %al,%dl + andb $0xf,%dl + orb $0x30,%dl + movb $0xf,%dh + movw %dx,0xb800c +#endif + shll $5,%eax /* manual indexing of mach_trap_t */ + xorq %r10,%r10 + mov EXT(mach_trap_table)(%rax),%r10 + /* get number of arguments */ + andq %r10,%r10 + jz mach_call_call /* skip argument copy if none */ + + movq $USER_DS,%rdx /* use user data segment for accesses */ + mov %dx,%fs + movq %rsp,%r11 /* save kernel ESP for error recovery */ + + movq R_UESP(%rbx),%rbp /* get user stack pointer */ + addq $4,%rbp /* Skip user return address */ + +#define PARAM(reg,ereg) \ + xorq %reg,%reg ;\ + RECOVER(mach_call_addr_push) \ + movl %fs:(%rbp),%ereg /* 1st parameter */ ;\ + addq $4,%rbp ;\ + dec %r10 ;\ + jz mach_call_call + + PARAM(rdi,edi) /* 1st parameter */ + PARAM(rsi,esi) /* 2nd parameter */ + PARAM(rdx,edx) /* 3rd parameter */ + PARAM(rcx,ecx) /* 4th parameter */ + PARAM(r8,r8d) /* 5th parameter */ + PARAM(r9,r9d) /* 6th parameter */ + + lea (%rbp,%r10,4),%rbp /* point past last argument */ + xorq %r12,%r12 + +0: subq $4,%rbp + RECOVER(mach_call_addr_push) + movl %fs:(%rbp),%r12d + pushq %r12 /* push argument on stack */ + dec %r10 + jnz 0b /* loop for all arguments */ + +mach_call_call: + +#ifdef DEBUG + testb $0xff,EXT(syscall_trace) + jz 0f + movq %rax,%rdi + call EXT(syscall_trace_print) + /* will return with syscallofs still (or again) in eax */ +0: +#endif /* DEBUG */ + call *EXT(mach_trap_table)+8(%rax) /* call procedure */ + movq %rsp,%rcx /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */ + movq %rax,R_EAX(%rsp) /* save return value */ + jmp _return_from_trap /* return to user */ + +/* + * Address out of range. Change to page fault. + * %rbp holds failing address. + */ +mach_call_addr_push: + movq %r11,%rsp /* clean parameters from stack */ +mach_call_addr: + movq %rbp,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) + /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + +/* + * System call out of range. Treat as invalid-instruction trap. + * (? general protection?) + */ +mach_call_range: + movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx) + /* set invalid-operation trap */ + movq $0,R_ERR(%rbx) /* clear error code */ + jmp _take_trap /* treat as a trap */ + +/* + * User space emulation of system calls. + * edx - user address to handle syscall + * + * User stack will become: + * ursp-> eflags + * eip + * eax still contains syscall number. + */ +syscall_emul: + movq $USER_DS,%rdi /* use user data segment for accesses */ + mov %di,%fs + +/* XXX what about write-protected pages? */ + movq R_UESP(%rbx),%rdi /* get user stack pointer */ + subq $16,%rdi /* push space for new arguments */ + movq R_EFLAGS(%rbx),%rax /* move flags */ + RECOVER(syscall_addr) + movl %eax,%fs:0(%rdi) /* to user stack */ + movl R_EIP(%rbx),%eax /* move eip */ + RECOVER(syscall_addr) + movl %eax,%fs:4(%rdi) /* to user stack */ + movq %rdi,R_UESP(%rbx) /* set new user stack pointer */ + movq %rdx,R_EIP(%rbx) /* change return address to trap */ + movq %rbx,%rsp /* back to PCB stack */ +// TODO: test it before dropping ud2 +ud2 + jmp _return_from_trap /* return to user */ + +/* + * Address error - address is in %edi. + */ +syscall_addr: + movq %rdi,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) + /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) + /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ +END(syscall) + +#else /* USER32 */ + +/* Entry point for 64-bit syscalls. + * On entry we're still on the user stack, so better not use it. Instead we + * save the thread state immediately in thread->pcb->iss, then try to invoke + * the syscall. + * Note: emulated syscalls seem to not be used anymore in GNU/Hurd, so they + * are not handled here. + * TODO: + - for now we assume the return address is canonical, but apparently there + can be cases where it's not (see how Linux handles this). Does it apply + here? + - check that the case where a task is suspended, and later returns via + iretq from return_from_trap, works fine in all combinations + */ +ENTRY(syscall64) + /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and + * eflags in RAX to allow using r11 as temporary register + */ + shlq $32,%r11 + shlq $32,%rax /* make sure bits 32:63 of %rax are zero */ + shrq $32,%rax + or %r11,%rax + + /* Save thread state in pcb->iss, as on exception entry. + * Since this is triggered synchronously from userspace, we could + * save only the callee-preserved status according to the C ABI, + * plus RIP and EFLAGS for sysret + */ + movq MY(ACTIVE_THREAD),%r11 /* point to current thread */ + movq TH_PCB(%r11),%r11 /* point to pcb */ + addq $ PCB_ISS,%r11 /* point to saved state */ + + mov %rsp,R_UESP(%r11) /* callee-preserved register */ + mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */ + mov %rbx,R_EBX(%r11) /* callee-preserved register */ + mov %rax,%rbx /* Now we can unpack eflags again */ + shr $32,%rbx + mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */ + mov %rbp,R_EBP(%r11) /* callee-preserved register */ + mov %r12,R_R12(%r11) /* callee-preserved register */ + mov %r13,R_R13(%r11) /* callee-preserved register */ + mov %r14,R_R14(%r11) /* callee-preserved register */ + mov %r15,R_R15(%r11) /* callee-preserved register */ + + /* Save syscall number and args for SYSCALL_EXAMINE/MSG_EXAMINE in glibc. + * Note: syscall number is only 32 bit, in EAX, so we sign-extend it in + * RAX to mask the EFLAGS bits. + */ + cdqe /* sign-extend EAX in RAX */ + mov %rax,R_EAX(%r11) /* syscall number */ + mov %rdi,R_EDI(%r11) /* syscall arg0 */ + mov %rsi,R_ESI(%r11) /* syscall arg1 */ + mov %rdx,R_EDX(%r11) /* syscall arg2 */ + mov %r10,R_R10(%r11) /* syscall arg3 */ + mov %r8,R_R8(%r11) /* syscall arg4 */ + mov %r9,R_R9(%r11) /* syscall arg5 */ + + mov %r11,%rbx /* prepare for error handling */ + mov %r10,%rcx /* fix arg3 location according to C ABI */ + + /* switch to kernel stack, then we can enable interrupts */ + CPU_NUMBER_NO_STACK(%r11d) + movq CX(EXT(kernel_stack),%r11),%rsp + sti + + /* Now we have saved state and args 1-6 are in place. + * Before invoking the syscall we do some bound checking and, + * if we have more that 6 arguments, we need to copy the + * remaining ones to the kernel stack, handling page faults when + * accessing the user stack. + */ + negl %eax /* get system call number */ + jl _syscall64_range /* out of range if it was positive */ + cmpl EXT(mach_trap_count),%eax /* check system call table bounds */ + jg _syscall64_range /* error if out of range */ + shll $5,%eax /* manual indexing of mach_trap_t */ + + /* check if we need to place some arguments on the stack */ +_syscall64_args_stack: + mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */ + subq $6,%r10 /* the first 6 args are already in place */ + jle _syscall64_call /* skip argument copy if num args <= 6 */ + + movq R_UESP(%rbx),%r11 /* get user stack pointer */ + addq $8,%r11 /* Skip user return address */ + + lea (%r11,%r10,8),%r11 /* point past last argument */ + +0: subq $8,%r11 + RECOVER(_syscall64_addr_push) + mov (%r11),%r12 + pushq %r12 /* push argument on stack */ + dec %r10 + jnz 0b /* loop for all remaining arguments */ + +_syscall64_call: + call *EXT(mach_trap_table)+8(%rax) /* call procedure */ + +_syscall64_check_for_ast: + /* Check for ast. */ + CPU_NUMBER_NO_GS(%r11d) + cmpl $0,CX(EXT(need_ast),%r11) + jz _syscall64_restore_state + + /* Save the syscall return value, both on our stack, for the case + * i386_astintr returns normally, and in the PCB stack, in case it + * instead calls thread_block(thread_exception_return). + */ + pushq %rax /* save the return value on our stack */ + pushq $0 /* dummy value to keep the stack aligned */ + + /* Find the PCB stack. */ + movq %rsp,%rcx + or $(KERNEL_STACK_SIZE-1),%rcx + movq -7-IKS_SIZE(%rcx),%rcx + + movq %rax,R_EAX(%rcx) /* save the return value in the PCB stack */ + call EXT(i386_astintr) + popq %rax + popq %rax /* restore the return value */ + jmp _syscall64_check_for_ast /* check again */ + +_syscall64_restore_state: + /* Restore thread state and return to user using sysret. */ + cli /* block interrupts when using the user stack in kernel space */ + movq MY(ACTIVE_THREAD),%r11 /* point to current thread */ + movq TH_PCB(%r11),%r11 /* point to pcb */ + addq $ PCB_ISS,%r11 /* point to saved state */ + + /* Restore syscall args. Note: we can't restore the syscall number in + * RAX because it needs to hold the return value.*/ + mov R_EDI(%r11),%rdi /* syscall arg0 */ + mov R_ESI(%r11),%rsi /* syscall arg1 */ + mov R_EDX(%r11),%rdx /* syscall arg2 */ + mov R_R10(%r11),%r10 /* syscall arg3 */ + mov R_R8(%r11),%r8 /* syscall arg4 */ + mov R_R9(%r11),%r9 /* syscall arg5 */ + + mov R_UESP(%r11),%rsp /* callee-preserved register, + * also switch back to user stack */ + mov R_EIP(%r11),%rcx /* sysret convention */ + mov R_EBX(%r11),%rbx /* callee-preserved register */ + mov R_EBP(%r11),%rbp /* callee-preserved register */ + mov R_R12(%r11),%r12 /* callee-preserved register */ + mov R_R13(%r11),%r13 /* callee-preserved register */ + mov R_R14(%r11),%r14 /* callee-preserved register */ + mov R_R15(%r11),%r15 /* callee-preserved register */ + mov R_EFLAGS(%r11),%r11 /* sysret convention */ + + sysretq /* fast return to user-space, the thread didn't block */ + +/* Error handling fragments, from here we jump directly to the trap handler */ +_syscall64_addr_push: + movq %r11,R_CR2(%rbx) /* set fault address */ + movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) /* set page-fault trap */ + movq $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */ + jmp _take_trap /* treat as a trap */ + +_syscall64_range: + movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx) + /* set invalid-operation trap */ + movq $0,R_ERR(%rbx) /* clear error code */ + jmp _take_trap /* treat as a trap */ + +END(syscall64) +#endif /* USER32 */ + + .data +DATA(cpu_features) +DATA(cpu_features_edx) + .long 0 +DATA(cpu_features_ecx) + .long 0 + .text + +/* Discover what kind of cpu we have; return the family number + (3, 4, 5, 6, for 386, 486, 586, 686 respectively). */ +ENTRY(discover_x86_cpu_type) + /* We are a modern enough processor to have the CPUID instruction; + use it to find out what we are. */ + movl $1,%eax /* Fetch CPU type info ... */ + cpuid /* ... into eax */ + movl %ecx,cpu_features_ecx /* Keep a copy */ + movl %edx,cpu_features_edx /* Keep a copy */ + shrl $8,%eax /* Slide family bits down */ + andl $15,%eax /* And select them */ + ret /* And return */ + + +/* */ +/* + * Utility routines. + */ + +ENTRY(copyin) + xchgq %rsi,%rdi /* Get user source and kernel destination */ + +copyin_remainder: + /*cld*/ /* count up: default mode in all GCC code */ + movq %rdx,%rcx /* move by longwords first */ + shrq $3,%rcx + RECOVER(copyin_fail) + rep + movsq /* move longwords */ + movq %rdx,%rcx /* now move remaining bytes */ + andq $7,%rcx + RECOVER(copyin_fail) + rep + movsb + xorq %rax,%rax /* return 0 for success */ + +copyin_ret: + ret /* and return */ + +copyin_fail: + movq $1,%rax /* return 1 for failure */ + jmp copyin_ret /* pop frame and return */ + +bogus: + ud2 + +ENTRY(copyout) + xchgq %rsi,%rdi /* Get user source and kernel destination */ + +copyout_remainder: + movq %rdx,%rax /* use count */ + /*cld*/ /* count up: always this way in GCC code */ + movq %rax,%rcx /* move by longwords first */ + shrq $3,%rcx + RECOVER(copyout_fail) + rep + movsq + movq %rax,%rcx /* now move remaining bytes */ + andq $7,%rcx + RECOVER(copyout_fail) + rep + movsb /* move */ + xorq %rax,%rax /* return 0 for success */ + +copyout_ret: + ret /* and return */ + +copyout_fail: + movq $1,%rax /* return 1 for failure */ + jmp copyout_ret /* pop frame and return */ + +/* + * int inst_fetch(int eip, int cs); + * + * Fetch instruction byte. Return -1 if invalid address. + */ +ENTRY(inst_fetch) + movq S_ARG1, %rax /* get segment */ + movw %ax,%fs /* into FS */ + movq S_ARG0, %rax /* get offset */ + RETRY(EXT(inst_fetch)) /* re-load FS on retry */ + RECOVER(_inst_fetch_fault) + movzbq %fs:(%rax),%rax /* load instruction byte */ + ret + +_inst_fetch_fault: + movq $-1,%rax /* return -1 if error */ + ret + + +/* + * Done with recovery and retry tables. + */ + RECOVER_TABLE_END + RETRY_TABLE_END + + + +/* + * cpu_shutdown() + * Force reboot + */ +null_idt: + .space 8 * 32 + +null_idtr: + .word 8 * 32 - 1 + .quad null_idt + +Entry(cpu_shutdown) + lidt null_idtr /* disable the interrupt handler */ + xor %rcx,%rcx /* generate a divide by zero */ + div %rcx,%rax /* reboot now */ + ret /* this will "never" be executed */ diff --git a/x86_64/spl.S b/x86_64/spl.S new file mode 100644 index 0000000..80c65c1 --- /dev/null +++ b/x86_64/spl.S @@ -0,0 +1,265 @@ +/* + * Copyright (c) 1995 Shantanu Goel + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE AUTHOR ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. THE AUTHOR DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + */ + +/* + * spl routines for the i386at. + */ + +#include +#include +#include +#include +#include + +#if NCPUS > 1 +#define mb lock; addl $0,(%esp) +#else +#define mb +#endif + +/* + * Program XEN evt masks from %eax. + */ +#define XEN_SETMASK() \ + pushq %rbx; \ + movl %eax,%ebx; \ + xchgl %eax,hyp_shared_info+EVTMASK; \ + notl %ebx; \ + andl %eax,%ebx; /* Get unmasked events */ \ + testl hyp_shared_info+PENDING, %ebx; \ + popq %rbx; \ + jz 9f; /* Check whether there was some pending */ \ +lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \ + movb $1,hyp_shared_info+CPU_PENDING; \ +9: + +ENTRY(spl0) + mb; + CPU_NUMBER(%edx) + movl CX(EXT(curr_ipl),%rdx),%eax /* save current ipl */ + pushq %rax + cli /* disable interrupts */ +#ifdef LINUX_DEV + movl EXT(bh_active),%eax + /* get pending mask */ + andl EXT(bh_mask),%eax /* any pending unmasked interrupts? */ + jz 1f /* no, skip */ + call EXT(spl1) /* block further interrupts */ + incl EXT(intr_count) /* set interrupt flag */ + call EXT(linux_soft_intr) /* go handle interrupt */ + decl EXT(intr_count) /* decrement interrupt flag */ + cli /* disable interrupts */ +1: +#endif + cmpl $0,softclkpending /* softclock pending? */ + je 1f /* no, skip */ + movl $0,softclkpending /* clear flag */ + call EXT(spl1) /* block further interrupts */ +#ifdef LINUX_DEV + incl EXT(intr_count) /* set interrupt flag */ +#endif + call EXT(softclock) /* go handle interrupt */ +#ifdef LINUX_DEV + decl EXT(intr_count) /* decrement interrupt flag */ +#endif + cli /* disable interrupts */ +1: + CPU_NUMBER(%edx) + cmpl $(SPL0),CX(EXT(curr_ipl),%rdx) /* are we at spl0? */ + je 1f /* yes, all done */ + movl $(SPL0),CX(EXT(curr_ipl),%rdx) /* set ipl */ +#ifdef MACH_XEN + movl EXT(int_mask)+SPL0*4,%eax + /* get xen mask */ + XEN_SETMASK() /* program xen evts */ +#endif +1: + sti /* enable interrupts */ + popq %rax /* return previous mask */ + ret + + +/* + * Historically, SETIPL(level) was called + * for spl levels 1-6, now we have combined + * all the intermediate levels into the highest level + * such that interrupts are either on or off, + * since modern hardware can handle it. + * This simplifies the interrupt handling + * especially for the linux drivers. + */ +Entry(splsoftclock) +ENTRY(spl1) +ENTRY(spl2) +ENTRY(spl3) +Entry(splnet) +Entry(splhdw) +ENTRY(spl4) +Entry(splbio) +Entry(spldcm) +ENTRY(spl5) +Entry(spltty) +Entry(splimp) +Entry(splvm) +ENTRY(spl6) +Entry(splclock) +Entry(splsched) +Entry(splhigh) +Entry(splhi) +ENTRY(spl7) + mb; + /* just clear IF */ + cli + CPU_NUMBER(%edx) + movl $SPL7,%eax + xchgl CX(EXT(curr_ipl),%rdx),%eax + ret + +ENTRY(splx) + movq S_ARG0,%rdx /* get ipl */ + CPU_NUMBER(%eax) +#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN) + /* First make sure that if we're exitting from ipl7, IF is still cleared */ + cmpl $SPL7,CX(EXT(curr_ipl),%rax) /* from ipl7? */ + jne 0f + pushfq + popq %rax + testl $0x200,%eax /* IF? */ + jz 0f + int3 /* Oops, interrupts got enabled?! */ + +0: +#endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */ + testl %edx,%edx /* spl0? */ + jz EXT(spl0) /* yes, handle specially */ + CPU_NUMBER(%eax) + cmpl CX(EXT(curr_ipl),%rax),%edx /* same ipl as current? */ + jne spl /* no */ + cmpl $SPL7,%edx /* spl7? */ + je 1f /* to ipl7, don't enable interrupts */ + sti /* ensure interrupts are enabled */ +1: + movl %edx,%eax /* return previous ipl */ + ret + +/* + * Like splx() but returns with interrupts disabled and does + * not return the previous ipl. This should only be called + * when returning from an interrupt. + */ + .align TEXT_ALIGN + .globl splx_cli +splx_cli: + movq S_ARG0,%rdx /* get ipl */ + cli /* disable interrupts */ + testl %edx,%edx /* spl0? */ + jnz 2f /* no, skip */ +#ifdef LINUX_DEV + movl EXT(bh_active),%eax + /* get pending mask */ + andl EXT(bh_mask),%eax /* any pending unmasked interrupts? */ + jz 1f /* no, skip */ + call EXT(spl1) /* block further interrupts */ + incl EXT(intr_count) /* set interrupt flag */ + call EXT(linux_soft_intr) /* go handle interrupt */ + decl EXT(intr_count) /* decrement interrupt flag */ + cli /* disable interrupts */ +1: +#endif + cmpl $0,softclkpending /* softclock pending? */ + je 1f /* no, skip */ + movl $0,softclkpending /* clear flag */ + call EXT(spl1) /* block further interrupts */ +#ifdef LINUX_DEV + incl EXT(intr_count) /* set interrupt flag */ +#endif + call EXT(softclock) /* go handle interrupt */ +#ifdef LINUX_DEV + decl EXT(intr_count) /* decrement interrupt flag */ +#endif + cli /* disable interrupts */ +1: + xorl %edx,%edx /* edx = ipl 0 */ +2: + CPU_NUMBER(%eax) + cmpl CX(EXT(curr_ipl),%rax),%edx /* same ipl as current? */ + je 1f /* yes, all done */ + movl %edx,CX(EXT(curr_ipl),%rax) /* set ipl */ +#ifdef MACH_XEN + movl EXT(int_mask),%eax + movl (%eax,%edx,4),%eax + /* get int mask */ + XEN_SETMASK() /* program xen evts with new mask */ +#endif +1: + ret + +/* + * NOTE: This routine must *not* use %ecx, otherwise + * the interrupt code will break. + */ + .align TEXT_ALIGN + .globl spl +spl: + CPU_NUMBER(%eax) +#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN) + /* First make sure that if we're exitting from ipl7, IF is still cleared */ + cmpl $SPL7,CX(EXT(curr_ipl),%rax) /* from ipl7? */ + jne 0f + pushfq + popq %rax + testl $0x200,%eax /* IF? */ + jz 0f + int3 /* Oops, interrupts got enabled?! */ + +0: +#endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */ + cmpl $SPL7,%edx /* spl7? */ + je EXT(spl7) /* yes, handle specially */ +#ifdef MACH_XEN + movl EXT(int_mask),%eax + movl (%eax,%edx,4),%eax + /* get int mask */ +#endif + cli /* disable interrupts */ + CPU_NUMBER(%eax) + xchgl CX(EXT(curr_ipl),%rax),%edx /* set ipl */ +#ifdef MACH_XEN + XEN_SETMASK() /* program PICs with new mask */ +#endif + sti /* enable interrupts */ + movl %edx,%eax /* return previous ipl */ + ret + +ENTRY(sploff) + pushfq + popq %rax + cli + ret + +ENTRY(splon) + pushq S_ARG0 + popfq + ret + + .data + .align DATA_ALIGN +softclkpending: + .long 0 + .text + +ENTRY(setsoftclock) + incl softclkpending + ret diff --git a/x86_64/x86_64 b/x86_64/x86_64 new file mode 120000 index 0000000..ee8aacf --- /dev/null +++ b/x86_64/x86_64 @@ -0,0 +1 @@ +../i386/i386 \ No newline at end of file diff --git a/x86_64/xen_boothdr.S b/x86_64/xen_boothdr.S new file mode 100644 index 0000000..da40a5c --- /dev/null +++ b/x86_64/xen_boothdr.S @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2006-2011 Free Software Foundation + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +.section __xen_guest + .ascii "GUEST_OS=GNU Mach" + .ascii ",GUEST_VERSION=1.3" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x40000000" + .ascii ",ELF_PADDR_OFFSET=0x40000000" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",LOADER=generic" +#ifndef MACH_PSEUDO_PHYS + .ascii ",FEATURES=!auto_translated_physmap" +#endif +#ifndef MACH_PV_PAGETABLES + .ascii "|!writable_page_tables" +#endif /* MACH_PV_PAGETABLES */ +#ifndef MACH_PV_DESCRIPTORS + .ascii "|!writable_descriptor_tables" +#endif /* MACH_PV_DESCRIPTORS */ + .byte 0 + +/* Macro taken from linux/include/linux/elfnote.h */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection ; + + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "GNU Mach") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "1.3") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, _START) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, _START) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypcalls) + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "" +#ifndef MACH_PSEUDO_PHYS + "!auto_translated_physmap" +#endif +#ifndef MACH_PV_PAGETABLES + "|!writable_page_tables" +#endif /* MACH_PV_PAGETABLES */ +#ifndef MACH_PV_DESCRIPTORS + "|!writable_descriptor_tables" +#endif /* MACH_PV_DESCRIPTORS */ + ) + +#include + +#include + + .text + .globl gdt, ldt + .globl start, _start, gdt +start: +_start: + + /* Switch to our own interrupt stack. */ + movq $(_intstack+INTSTACK_SIZE),%rax + andq $(~15),%rax + movq %rax,%rsp + + /* Reset EFLAGS to a known state. */ + pushq $0 + popf + + /* Push the start_info pointer to be the argument. */ + movabs $KERNELBASE,%rax + subq %rax,%rsi + movq %rsi,%r8 + + /* Fix ifunc entries */ + movq $__rela_iplt_start,%rsi + movq $__rela_iplt_end,%rdi +iplt_cont: + cmpq %rdi,%rsi + jae iplt_done + movq (%rsi),%rbx /* r_offset */ + movb 4(%rsi),%al /* info */ + cmpb $42,%al /* IRELATIVE */ + jnz iplt_next + call *(%ebx) /* call ifunc */ + movq %rax,(%rbx) /* fixed address */ +iplt_next: + addq $8,%rsi + jmp iplt_cont +iplt_done: + + movq %r8,%rdi + /* Jump into C code. */ + call EXT(c_boot_entry) + +/* Those need to be aligned on page boundaries. */ +.global hyp_shared_info, hypcalls + + .org (start + 0x1000) +hyp_shared_info: + .org hyp_shared_info + 0x1000 + +/* Labels just for debuggers */ +#define hypcall(name, n) \ + .org hypcalls + n*32 ; \ +.globl __hyp_##name ; \ +__hyp_##name: + +hypcalls: + hypcall(set_trap_table, 0) + hypcall(mmu_update, 1) + hypcall(set_gdt, 2) + hypcall(stack_switch, 3) + hypcall(set_callbacks, 4) + hypcall(fpu_taskswitch, 5) + hypcall(sched_op_compat, 6) + hypcall(platform_op, 7) + hypcall(set_debugreg, 8) + hypcall(get_debugreg, 9) + hypcall(update_descriptor, 10) + hypcall(memory_op, 12) + hypcall(multicall, 13) + hypcall(update_va_mapping, 14) + hypcall(set_timer_op, 15) + hypcall(event_channel_op_compat, 16) + hypcall(xen_version, 17) + hypcall(console_io, 18) + hypcall(physdev_op_compat, 19) + hypcall(grant_table_op, 20) + hypcall(vm_assist, 21) + hypcall(update_va_mapping_otherdomain, 22) + hypcall(iret, 23) + hypcall(vcpu_op, 24) + hypcall(set_segment_base, 25) + hypcall(mmuext_op, 26) + hypcall(acm_op, 27) + hypcall(nmi_op, 28) + hypcall(sched_op, 29) + hypcall(callback_op, 30) + hypcall(xenoprof_op, 31) + hypcall(event_channel_op, 32) + hypcall(physdev_op, 33) + hypcall(hvm_op, 34) + hypcall(sysctl, 35) + hypcall(domctl, 36) + hypcall(kexec_op, 37) + + hypcall(arch_0, 48) + hypcall(arch_1, 49) + hypcall(arch_2, 50) + hypcall(arch_3, 51) + hypcall(arch_4, 52) + hypcall(arch_5, 53) + hypcall(arch_6, 54) + hypcall(arch_7, 55) + + .org hypcalls + 0x1000 + +gdt: + .org gdt + 0x1000 + +ldt: + .org ldt + 0x1000 + +stack: + .comm _intstack,INTSTACK_SIZE + .comm _eintstack,0 + diff --git a/x86_64/xen_locore.S b/x86_64/xen_locore.S new file mode 100644 index 0000000..967c890 --- /dev/null +++ b/x86_64/xen_locore.S @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2006-2009 Free Software Foundation + * + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with the program ; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#include +#include +#include + + .data 2 +int_active: + .long 0 + + + .text + .globl hyp_callback, hyp_failsafe_callback + P2ALIGN(TEXT_ALIGN) +hyp_callback: + popq %rcx + popq %r11 + pushq %rax + jmp EXT(all_intrs) + +ENTRY(interrupt) + incl int_active /* currently handling interrupts */ + call EXT(hyp_c_callback) /* call generic interrupt routine */ + decl int_active /* stopped handling interrupts */ + sti + ret + +/* FIXME: if we're _very_ unlucky, we may be re-interrupted, filling stack + * + * Far from trivial, see mini-os. That said, maybe we could just, before poping + * everything (which is _not_ destructive), save sp into a known place and use + * it+jmp back? + * + * Mmm, there seems to be an iret hypcall that does exactly what we want: + * perform iret, and if IF is set, clear the interrupt mask. + */ + +/* Pfff, we have to check pending interrupts ourselves. Some other DomUs just make an hypercall for retriggering the irq. Not sure it's really easier/faster */ +ENTRY(hyp_sti) + pushq %rbp + movq %rsp, %rbp +_hyp_sti: + movb $0,hyp_shared_info+CPU_CLI /* Enable interrupts */ + cmpl $0,int_active /* Check whether we were already checking pending interrupts */ + jz 0f + popq %rbp + ret /* Already active, just return */ +0: + /* Not active, check pending interrupts by hand */ + /* no memory barrier needed on x86 */ + cmpb $0,hyp_shared_info+CPU_PENDING + jne 0f + popq %rbp + ret +0: + movb $0xff,hyp_shared_info+CPU_CLI +1: + pushq %rax + pushq %rcx + pushq %rdx + pushq %rdi + pushq %rsi + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r11 + incl int_active /* currently handling interrupts */ + + xorq %rdi,%rdi + xorq %rsi,%rsi + call EXT(hyp_c_callback) + + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rsi + popq %rdi + popq %rdx + popq %rcx + popq %rax + decl int_active /* stopped handling interrupts */ + cmpb $0,hyp_shared_info+CPU_PENDING + jne 1b + jmp _hyp_sti + +/* Hypervisor failed to reload segments. Dump them. */ +hyp_failsafe_callback: +ud2 +#if 1 +/* TODO: FIXME */ + /* load sane segments */ + mov %ss, %ax +#if 0 + mov %ax, %ds + mov %ax, %es +#endif + mov %ax, %fs + mov %ax, %gs + movq %rsp, %rdi + call EXT(hyp_failsafe_c_callback) +#else + popq %rdx + movq %rdx,%ds + popq %rdx + movq %rdx,%es + popq %fs + popq %gs + +movq (%rsp),%rax +ud2 + iretq +#endif + +#undef iretq +ENTRY(hyp_iretq) + testb $2,1*8(%rsp) + jnz slow + /* There is no ring1 on x86_64, we have to force ring 3 */ + orb $3,1*8(%rsp) + orb $3,4*8(%rsp) + iretq + +slow: +/* There is no ring 1/2 on x86_64, so going back to user needs to go through + * hypervisor */ + pushq $0 + jmp __hyp_iret -- cgit v1.2.1