aboutsummaryrefslogtreecommitdiff
path: root/riscv/intel
diff options
context:
space:
mode:
Diffstat (limited to 'riscv/intel')
-rw-r--r--riscv/intel/pmap.c3322
-rw-r--r--riscv/intel/pmap.h574
-rw-r--r--riscv/intel/pmap.h~574
-rw-r--r--riscv/intel/read_fault.c178
-rw-r--r--riscv/intel/read_fault.h35
5 files changed, 4683 insertions, 0 deletions
diff --git a/riscv/intel/pmap.c b/riscv/intel/pmap.c
new file mode 100644
index 0000000..f2122a1
--- /dev/null
+++ b/riscv/intel/pmap.c
@@ -0,0 +1,3322 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: pmap.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * (These guys wrote the Vax version)
+ *
+ * Physical Map management code for Intel i386, and i486.
+ *
+ * Manages physical address maps.
+ *
+ * In addition to hardware address maps, this
+ * module is called upon to provide software-use-only
+ * maps which may or may not be stored in the same
+ * form as hardware maps. These pseudo-maps are
+ * used to store intermediate results from copy
+ * operations to and from address spaces.
+ *
+ * Since the information managed by this module is
+ * also stored by the logical address mapping module,
+ * this module may throw away valid virtual-to-physical
+ * mappings at almost any time. However, invalidations
+ * of virtual-to-physical mappings must be done as
+ * requested.
+ *
+ * In order to cope with hardware architectures which
+ * make virtual-to-physical map invalidates expensive,
+ * this module may delay invalidate or reduced protection
+ * operations until such time as they are actually
+ * necessary. This module is given full information as
+ * to which processors are currently using which maps,
+ * and to when physical maps must be made correct.
+ */
+
+#include <string.h>
+
+#include <mach/machine/vm_types.h>
+
+#include <mach/boolean.h>
+#include <kern/debug.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <kern/slab.h>
+
+#include <kern/lock.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <i386/vm_param.h>
+#include <mach/vm_prot.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_user.h>
+
+#include <mach/machine/vm_param.h>
+#include <mach/xen.h>
+#include <machine/thread.h>
+#include <i386/cpu_number.h>
+#include <i386/proc_reg.h>
+#include <i386/locore.h>
+#include <i386/model_dep.h>
+#include <i386/spl.h>
+#include <i386at/biosmem.h>
+#include <i386at/model_dep.h>
+
+#if NCPUS > 1
+#include <i386/mp_desc.h>
+#endif
+
+#include <ddb/db_output.h>
+#include <machine/db_machdep.h>
+
+#ifdef MACH_PSEUDO_PHYS
+#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0;
+#else /* MACH_PSEUDO_PHYS */
+#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
+#endif /* MACH_PSEUDO_PHYS */
+
+/*
+ * Private data structures.
+ */
+
+/*
+ * For each vm_page_t, there is a list of all currently
+ * valid virtual mappings of that page. An entry is
+ * a pv_entry_t; the list is the pv_table.
+ */
+
+typedef struct pv_entry {
+ struct pv_entry *next; /* next pv_entry */
+ pmap_t pmap; /* pmap where mapping lies */
+ vm_offset_t va; /* virtual address for mapping */
+} *pv_entry_t;
+
+#define PV_ENTRY_NULL ((pv_entry_t) 0)
+
+pv_entry_t pv_head_table; /* array of entries, one per page */
+
+/*
+ * pv_list entries are kept on a list that can only be accessed
+ * with the pmap system locked (at SPLVM, not in the cpus_active set).
+ * The list is refilled from the pv_list_cache if it becomes empty.
+ */
+pv_entry_t pv_free_list; /* free list at SPLVM */
+def_simple_lock_data(static, pv_free_list_lock)
+
+#define PV_ALLOC(pv_e) { \
+ simple_lock(&pv_free_list_lock); \
+ if ((pv_e = pv_free_list) != 0) { \
+ pv_free_list = pv_e->next; \
+ } \
+ simple_unlock(&pv_free_list_lock); \
+}
+
+#define PV_FREE(pv_e) { \
+ simple_lock(&pv_free_list_lock); \
+ pv_e->next = pv_free_list; \
+ pv_free_list = pv_e; \
+ simple_unlock(&pv_free_list_lock); \
+}
+
+struct kmem_cache pv_list_cache; /* cache of pv_entry structures */
+
+/*
+ * Each entry in the pv_head_table is locked by a bit in the
+ * pv_lock_table. The lock bits are accessed by the physical
+ * address of the page they lock.
+ */
+
+char *pv_lock_table; /* pointer to array of bits */
+#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
+/* Has pmap_init completed? */
+boolean_t pmap_initialized = FALSE;
+
+/*
+ * Range of kernel virtual addresses available for kernel memory mapping.
+ * Does not include the virtual addresses used to map physical memory 1-1.
+ * Initialized by pmap_bootstrap.
+ */
+vm_offset_t kernel_virtual_start;
+vm_offset_t kernel_virtual_end;
+
+/*
+ * Index into pv_head table, its lock bits, and the modify/reference
+ * bits.
+ */
+#define pa_index(pa) vm_page_table_index(pa)
+
+#define pai_to_pvh(pai) (&pv_head_table[pai])
+#define lock_pvh_pai(pai) (bit_lock(pai, pv_lock_table))
+#define unlock_pvh_pai(pai) (bit_unlock(pai, pv_lock_table))
+
+/*
+ * Array of physical page attributes for managed pages.
+ * One byte per physical page.
+ */
+char *pmap_phys_attributes;
+
+/*
+ * Physical page attributes. Copy bits from PTE definition.
+ */
+#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
+#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
+
+/*
+ * Amount of virtual memory mapped by one
+ * page-directory entry.
+ */
+#define PDE_MAPPED_SIZE (pdenum2lin(1))
+
+/*
+ * We allocate page table pages directly from the VM system
+ * through this object. It maps physical memory.
+ */
+vm_object_t pmap_object = VM_OBJECT_NULL;
+
+/*
+ * Locking and TLB invalidation
+ */
+
+/*
+ * Locking Protocols:
+ *
+ * There are two structures in the pmap module that need locking:
+ * the pmaps themselves, and the per-page pv_lists (which are locked
+ * by locking the pv_lock_table entry that corresponds to the pv_head
+ * for the list in question.) Most routines want to lock a pmap and
+ * then do operations in it that require pv_list locking -- however
+ * pmap_remove_all and pmap_copy_on_write operate on a physical page
+ * basis and want to do the locking in the reverse order, i.e. lock
+ * a pv_list and then go through all the pmaps referenced by that list.
+ * To protect against deadlock between these two cases, the pmap_lock
+ * is used. There are three different locking protocols as a result:
+ *
+ * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
+ * the pmap.
+ *
+ * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
+ * lock on the pmap_lock (shared read), then lock the pmap
+ * and finally the pv_lists as needed [i.e. pmap lock before
+ * pv_list lock.]
+ *
+ * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
+ * Get a write lock on the pmap_lock (exclusive write); this
+ * also guaranteees exclusive access to the pv_lists. Lock the
+ * pmaps as needed.
+ *
+ * At no time may any routine hold more than one pmap lock or more than
+ * one pv_list lock. Because interrupt level routines can allocate
+ * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
+ * kernel_pmap can only be held at splvm.
+ */
+
+#if NCPUS > 1
+/*
+ * We raise the interrupt level to splvm, to block interprocessor
+ * interrupts during pmap operations. We must take the CPU out of
+ * the cpus_active set while interrupts are blocked.
+ */
+#define SPLVM(spl) { \
+ spl = splvm(); \
+ i_bit_clear(cpu_number(), &cpus_active); \
+}
+
+#define SPLX(spl) { \
+ i_bit_set(cpu_number(), &cpus_active); \
+ splx(spl); \
+}
+
+/*
+ * Lock on pmap system
+ */
+lock_data_t pmap_system_lock;
+
+#define PMAP_READ_LOCK(pmap, spl) { \
+ SPLVM(spl); \
+ lock_read(&pmap_system_lock); \
+ simple_lock(&(pmap)->lock); \
+}
+
+#define PMAP_WRITE_LOCK(spl) { \
+ SPLVM(spl); \
+ lock_write(&pmap_system_lock); \
+}
+
+#define PMAP_READ_UNLOCK(pmap, spl) { \
+ simple_unlock(&(pmap)->lock); \
+ lock_read_done(&pmap_system_lock); \
+ SPLX(spl); \
+}
+
+#define PMAP_WRITE_UNLOCK(spl) { \
+ lock_write_done(&pmap_system_lock); \
+ SPLX(spl); \
+}
+
+#define PMAP_WRITE_TO_READ_LOCK(pmap) { \
+ simple_lock(&(pmap)->lock); \
+ lock_write_to_read(&pmap_system_lock); \
+}
+
+#define LOCK_PVH(index) (lock_pvh_pai(index))
+
+#define UNLOCK_PVH(index) (unlock_pvh_pai(index))
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) \
+{ \
+ cpu_set cpu_mask = 1 << cpu_number(); \
+ cpu_set users; \
+ \
+ /* Since the pmap is locked, other updates are locked */ \
+ /* out, and any pmap_activate has finished. */ \
+ \
+ /* find other cpus using the pmap */ \
+ users = (pmap)->cpus_using & ~cpu_mask; \
+ if (users) { \
+ /* signal them, and wait for them to finish */ \
+ /* using the pmap */ \
+ signal_cpus(users, (pmap), (s), (e)); \
+ while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
+ cpu_pause(); \
+ } \
+ \
+ /* invalidate our own TLB if pmap is in use */ \
+ if ((pmap)->cpus_using & cpu_mask) { \
+ INVALIDATE_TLB((pmap), (s), (e)); \
+ } \
+}
+
+#else /* NCPUS > 1 */
+
+#define SPLVM(spl) ((void)(spl))
+#define SPLX(spl) ((void)(spl))
+
+#define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
+#define PMAP_WRITE_LOCK(spl) SPLVM(spl)
+#define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
+#define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
+#define PMAP_WRITE_TO_READ_LOCK(pmap)
+
+#define LOCK_PVH(index)
+#define UNLOCK_PVH(index)
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) { \
+ /* invalidate our own TLB if pmap is in use */ \
+ if ((pmap)->cpus_using) { \
+ INVALIDATE_TLB((pmap), (s), (e)); \
+ } \
+}
+
+#endif /* NCPUS > 1 */
+
+#ifdef MACH_PV_PAGETABLES
+#define INVALIDATE_TLB(pmap, s, e) do { \
+ if (__builtin_constant_p((e) - (s)) \
+ && (e) - (s) == PAGE_SIZE) \
+ hyp_invlpg((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+ else \
+ hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
+} while(0)
+#else /* MACH_PV_PAGETABLES */
+/* It is hard to know when a TLB flush becomes less expensive than a bunch of
+ * invlpgs. But it surely is more expensive than just one invlpg. */
+#define INVALIDATE_TLB(pmap, s, e) do { \
+ if (__builtin_constant_p((e) - (s)) \
+ && (e) - (s) == PAGE_SIZE) \
+ invlpg_linear((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+ else \
+ flush_tlb(); \
+} while (0)
+#endif /* MACH_PV_PAGETABLES */
+
+
+#if NCPUS > 1
+/*
+ * Structures to keep track of pending TLB invalidations
+ */
+
+#define UPDATE_LIST_SIZE 4
+
+struct pmap_update_item {
+ pmap_t pmap; /* pmap to invalidate */
+ vm_offset_t start; /* start address to invalidate */
+ vm_offset_t end; /* end address to invalidate */
+} ;
+
+typedef struct pmap_update_item *pmap_update_item_t;
+
+/*
+ * List of pmap updates. If the list overflows,
+ * the last entry is changed to invalidate all.
+ */
+struct pmap_update_list {
+ decl_simple_lock_data(, lock)
+ int count;
+ struct pmap_update_item item[UPDATE_LIST_SIZE];
+} ;
+typedef struct pmap_update_list *pmap_update_list_t;
+
+struct pmap_update_list cpu_update_list[NCPUS];
+
+cpu_set cpus_active;
+cpu_set cpus_idle;
+volatile
+boolean_t cpu_update_needed[NCPUS];
+
+#endif /* NCPUS > 1 */
+
+/*
+ * Other useful macros.
+ */
+#define current_pmap() (vm_map_pmap(current_thread()->task->map))
+#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
+
+struct pmap kernel_pmap_store;
+pmap_t kernel_pmap;
+
+struct kmem_cache pmap_cache; /* cache of pmap structures */
+struct kmem_cache pt_cache; /* cache of page tables */
+struct kmem_cache pd_cache; /* cache of page directories */
+#if PAE
+struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */
+#ifdef __x86_64__
+struct kmem_cache l4_cache; /* cache of L4 tables */
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+boolean_t pmap_debug = FALSE; /* flag for debugging prints */
+
+#if 0
+int ptes_per_vm_page; /* number of hardware ptes needed
+ to map one VM page. */
+#else
+#define ptes_per_vm_page 1
+#endif
+
+unsigned int inuse_ptepages_count = 0; /* debugging */
+
+/*
+ * Pointer to the basic page directory for the kernel.
+ * Initialized by pmap_bootstrap().
+ */
+pt_entry_t *kernel_page_dir;
+
+/*
+ * Two slots for temporary physical page mapping, to allow for
+ * physical-to-physical transfers.
+ */
+static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS];
+#define MAPWINDOW_SIZE (PMAP_NMAPWINDOWS * NCPUS * PAGE_SIZE)
+
+#ifdef __x86_64__
+static inline pt_entry_t *
+pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr)
+{
+ return &pmap->l4base[lin2l4num(lin_addr)];
+}
+#endif
+
+#ifdef PAE
+static inline pt_entry_t *
+pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
+{
+ pt_entry_t *pdp_table;
+#ifdef __x86_64__
+ pt_entry_t *l4_table;
+ l4_table = pmap_l4base(pmap, lin_addr);
+ if (l4_table == PT_ENTRY_NULL)
+ return(PT_ENTRY_NULL);
+ pt_entry_t pdp = *l4_table;
+ if ((pdp & INTEL_PTE_VALID) == 0)
+ return PT_ENTRY_NULL;
+ pdp_table = (pt_entry_t *) ptetokv(pdp);
+#else /* __x86_64__ */
+ pdp_table = pmap->pdpbase;
+#endif /* __x86_64__ */
+ return &pdp_table[lin2pdpnum(lin_addr)];
+}
+#endif
+
+static inline pt_entry_t *
+pmap_pde(const pmap_t pmap, vm_offset_t addr)
+{
+ pt_entry_t *page_dir;
+ if (pmap == kernel_pmap)
+ addr = kvtolin(addr);
+#if PAE
+ pt_entry_t *pdp_table;
+ pdp_table = pmap_ptp(pmap, addr);
+ if (pdp_table == PT_ENTRY_NULL)
+ return(PT_ENTRY_NULL);
+ pt_entry_t pde = *pdp_table;
+ if ((pde & INTEL_PTE_VALID) == 0)
+ return PT_ENTRY_NULL;
+ page_dir = (pt_entry_t *) ptetokv(pde);
+#else /* PAE */
+ page_dir = pmap->dirbase;
+#endif /* PAE */
+ return &page_dir[lin2pdenum(addr)];
+}
+
+/*
+ * Given an offset and a map, compute the address of the
+ * pte. If the address is invalid with respect to the map
+ * then PT_ENTRY_NULL is returned (and the map may need to grow).
+ *
+ * This is only used internally.
+ */
+pt_entry_t *
+pmap_pte(const pmap_t pmap, vm_offset_t addr)
+{
+ pt_entry_t *ptp;
+ pt_entry_t pte;
+
+#ifdef __x86_64__
+ if (pmap->l4base == 0)
+ return(PT_ENTRY_NULL);
+#elif PAE
+ if (pmap->pdpbase == 0)
+ return(PT_ENTRY_NULL);
+#else
+ if (pmap->dirbase == 0)
+ return(PT_ENTRY_NULL);
+#endif
+ ptp = pmap_pde(pmap, addr);
+ if (ptp == 0)
+ return(PT_ENTRY_NULL);
+ pte = *ptp;
+ if ((pte & INTEL_PTE_VALID) == 0)
+ return(PT_ENTRY_NULL);
+ ptp = (pt_entry_t *)ptetokv(pte);
+ return(&ptp[ptenum(addr)]);
+}
+
+#define DEBUG_PTE_PAGE 0
+
+#if DEBUG_PTE_PAGE
+void ptep_check(ptep_t ptep)
+{
+ pt_entry_t *pte, *epte;
+ int ctu, ctw;
+
+ /* check the use and wired counts */
+ if (ptep == PTE_PAGE_NULL)
+ return;
+ pte = pmap_pte(ptep->pmap, ptep->va);
+ epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
+ ctu = 0;
+ ctw = 0;
+ while (pte < epte) {
+ if (pte->pfn != 0) {
+ ctu++;
+ if (pte->wired)
+ ctw++;
+ }
+ pte += ptes_per_vm_page;
+ }
+
+ if (ctu != ptep->use_count || ctw != ptep->wired_count) {
+ printf("use %d wired %d - actual use %d wired %d\n",
+ ptep->use_count, ptep->wired_count, ctu, ctw);
+ panic("pte count");
+ }
+}
+#endif /* DEBUG_PTE_PAGE */
+
+/*
+ * Back-door routine for mapping kernel VM at initialization.
+ * Useful for mapping memory outside the range of direct mapped
+ * physical memory (i.e., devices).
+ */
+vm_offset_t pmap_map_bd(
+ vm_offset_t virt,
+ phys_addr_t start,
+ phys_addr_t end,
+ vm_prot_t prot)
+{
+ pt_entry_t template;
+ pt_entry_t *pte;
+ int spl;
+#ifdef MACH_PV_PAGETABLES
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_PV_PAGETABLES */
+
+ template = pa_to_pte(start)
+ | INTEL_PTE_NCACHE|INTEL_PTE_WTHRU
+ | INTEL_PTE_VALID;
+ if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
+ template |= INTEL_PTE_GLOBAL;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+
+ PMAP_READ_LOCK(kernel_pmap, spl);
+ while (start < end) {
+ pte = pmap_pte(kernel_pmap, virt);
+ if (pte == PT_ENTRY_NULL)
+ panic("pmap_map_bd: Invalid kernel address\n");
+#ifdef MACH_PV_PAGETABLES
+ update[i].ptr = kv_to_ma(pte);
+ update[i].val = pa_to_ma(template);
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+ i = 0;
+ }
+#else /* MACH_PV_PAGETABLES */
+ WRITE_PTE(pte, template)
+#endif /* MACH_PV_PAGETABLES */
+ pte_increment_pa(template);
+ virt += PAGE_SIZE;
+ start += PAGE_SIZE;
+ }
+#ifdef MACH_PV_PAGETABLES
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_map_bd");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+#endif /* MACH_PV_PAGETABLES */
+ PMAP_READ_UNLOCK(kernel_pmap, spl);
+ return(virt);
+}
+
+#ifdef PAE
+static void pmap_bootstrap_pae(void)
+{
+ vm_offset_t addr;
+ pt_entry_t *pdp_kernel;
+
+#ifdef __x86_64__
+#ifdef MACH_HYP
+ kernel_pmap->user_l4base = NULL;
+ kernel_pmap->user_pdpbase = NULL;
+#endif
+ kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
+#else
+ const int PDPNUM_KERNEL = PDPNUM;
+#endif /* x86_64 */
+
+ init_alloc_aligned(PDPNUM_KERNEL * INTEL_PGBYTES, &addr);
+ kernel_page_dir = (pt_entry_t*)phystokv(addr);
+ memset(kernel_page_dir, 0, PDPNUM_KERNEL * INTEL_PGBYTES);
+
+ pdp_kernel = (pt_entry_t*)phystokv(pmap_grab_page());
+ memset(pdp_kernel, 0, INTEL_PGBYTES);
+ for (int i = 0; i < PDPNUM_KERNEL; i++) {
+ int pdp_index = i;
+#ifdef __x86_64__
+ pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+ WRITE_PTE(&pdp_kernel[pdp_index],
+ pa_to_pte(_kvtophys((void *) kernel_page_dir
+ + i * INTEL_PGBYTES))
+ | INTEL_PTE_VALID
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
+ | INTEL_PTE_WRITE
+#endif
+ );
+ }
+
+#ifdef __x86_64__
+ /* only fill the kernel pdpte during bootstrap */
+ WRITE_PTE(&kernel_pmap->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+ pa_to_pte(_kvtophys(pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#ifdef MACH_PV_PAGETABLES
+ pmap_set_page_readonly_init(kernel_pmap->l4base);
+#endif /* MACH_PV_PAGETABLES */
+#else /* x86_64 */
+ kernel_pmap->pdpbase = pdp_kernel;
+#endif /* x86_64 */
+}
+#endif /* PAE */
+
+#ifdef MACH_PV_PAGETABLES
+#ifdef PAE
+#define NSUP_L1 4
+#else
+#define NSUP_L1 1
+#endif
+static void pmap_bootstrap_xen(pt_entry_t *l1_map[NSUP_L1])
+{
+ /* We don't actually deal with the CR3 register content at all */
+ hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
+ /*
+ * Xen may only provide as few as 512KB extra bootstrap linear memory,
+ * which is far from enough to map all available memory, so we need to
+ * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
+ * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
+ * enough for a pagetable mapping 4GiB.
+ */
+ vm_offset_t la;
+ int n_l1map;
+ for (n_l1map = 0, la = VM_MIN_KERNEL_ADDRESS; la >= VM_MIN_KERNEL_ADDRESS; la += NPTES * PAGE_SIZE) {
+ pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
+#ifdef PAE
+#ifdef __x86_64__
+ base = (pt_entry_t*) ptetokv(base[0]);
+#endif /* x86_64 */
+ pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[lin2pdpnum(la)]);
+#else /* PAE */
+ pt_entry_t *l2_map = base;
+#endif /* PAE */
+ /* Like lin2pdenum, but works with non-contiguous boot L3 */
+ l2_map += (la >> PDESHIFT) & PDEMASK;
+ if (!(*l2_map & INTEL_PTE_VALID)) {
+ struct mmu_update update;
+ unsigned j, n;
+
+ l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
+ for (j = 0; j < NPTES; j++)
+ l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn(lin2pdenum(la - VM_MIN_KERNEL_ADDRESS) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ pmap_set_page_readonly_init(l1_map[n_l1map]);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
+ panic("couldn't pin page %p(%lx)", l1_map[n_l1map], (vm_offset_t) kv_to_ma (l1_map[n_l1map]));
+ update.ptr = kv_to_ma(l2_map);
+ update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
+ if (n != 1)
+ panic("couldn't complete bootstrap map");
+ /* added the last L1 table, can stop */
+ if (++n_l1map >= NSUP_L1)
+ break;
+ }
+ }
+}
+#endif /* MACH_PV_PAGETABLES */
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+ * and direct-map all physical memory.
+ * Called with mapping off.
+ */
+void pmap_bootstrap(void)
+{
+ /*
+ * Mapping is turned off; we must reference only physical addresses.
+ * The load image of the system is to be mapped 1-1 physical = virtual.
+ */
+
+ /*
+ * Set ptes_per_vm_page for general use.
+ */
+#if 0
+ ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
+#endif
+
+ /*
+ * The kernel's pmap is statically allocated so we don't
+ * have to use pmap_create, which is unlikely to work
+ * correctly at this part of the boot sequence.
+ */
+
+ kernel_pmap = &kernel_pmap_store;
+
+#if NCPUS > 1
+ lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */
+#endif /* NCPUS > 1 */
+
+ simple_lock_init(&kernel_pmap->lock);
+
+ kernel_pmap->ref_count = 1;
+
+ /*
+ * Determine the kernel virtual address range.
+ * It starts at the end of the physical memory
+ * mapped into the kernel address space,
+ * and extends to a stupid arbitrary limit beyond that.
+ */
+ kernel_virtual_start = phystokv(biosmem_directmap_end());
+ kernel_virtual_end = kernel_virtual_start + VM_KERNEL_MAP_SIZE;
+
+ if (kernel_virtual_end < kernel_virtual_start
+ || kernel_virtual_end > VM_MAX_KERNEL_ADDRESS - PAGE_SIZE)
+ kernel_virtual_end = VM_MAX_KERNEL_ADDRESS - PAGE_SIZE;
+
+ /*
+ * Allocate and clear a kernel page directory.
+ */
+ /* Note: initial Xen mapping holds at least 512kB free mapped page.
+ * We use that for directly building our linear mapping. */
+#if PAE
+ pmap_bootstrap_pae();
+#else /* PAE */
+ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page());
+ {
+ unsigned i;
+ for (i = 0; i < NPDES; i++)
+ kernel_page_dir[i] = 0;
+ }
+#endif /* PAE */
+
+#ifdef MACH_PV_PAGETABLES
+ pt_entry_t *l1_map[NSUP_L1];
+ pmap_bootstrap_xen(l1_map);
+#endif /* MACH_PV_PAGETABLES */
+
+ /*
+ * Allocate and set up the kernel page tables.
+ */
+ {
+ vm_offset_t va;
+ pt_entry_t global = CPU_HAS_FEATURE(CPU_FEATURE_PGE) ? INTEL_PTE_GLOBAL : 0;
+
+ /*
+ * Map virtual memory for all directly mappable physical memory, 1-1,
+ * Make any mappings completely in the kernel's text segment read-only.
+ *
+ * Also allocate some additional all-null page tables afterwards
+ * for kernel virtual memory allocation,
+ * because this PMAP module is too stupid
+ * to allocate new kernel page tables later.
+ * XX fix this
+ */
+ for (va = phystokv(0); va >= phystokv(0) && va < kernel_virtual_end; )
+ {
+ pt_entry_t *pde = kernel_page_dir + lin2pdenum_cont(kvtolin(va));
+ pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page());
+ pt_entry_t *pte;
+
+ /* Initialize the page directory entry. */
+ WRITE_PTE(pde, pa_to_pte((vm_offset_t)_kvtophys(ptable))
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+
+ /* Initialize the page table. */
+ for (pte = ptable; (va < phystokv(biosmem_directmap_end())) && (pte < ptable+NPTES); pte++)
+ {
+ if ((pte - ptable) < ptenum(va))
+ {
+ WRITE_PTE(pte, 0);
+ }
+ else
+#ifdef MACH_PV_PAGETABLES
+ if (va == (vm_offset_t) &hyp_shared_info)
+ {
+ *pte = boot_info.shared_info | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ va += INTEL_PGBYTES;
+ }
+ else
+#endif /* MACH_PV_PAGETABLES */
+ {
+ extern char _start[], etext[];
+
+ if (((va >= (vm_offset_t) _start)
+ && (va + INTEL_PGBYTES <= (vm_offset_t)etext))
+#ifdef MACH_PV_PAGETABLES
+ || (va >= (vm_offset_t) boot_info.pt_base
+ && (va + INTEL_PGBYTES <=
+ (vm_offset_t) ptable + INTEL_PGBYTES))
+#endif /* MACH_PV_PAGETABLES */
+ )
+ {
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | global);
+ }
+ else
+ {
+#ifdef MACH_PV_PAGETABLES
+ /* Keep supplementary L1 pages read-only */
+ int i;
+ for (i = 0; i < NSUP_L1; i++)
+ if (va == (vm_offset_t) l1_map[i]) {
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | global);
+ break;
+ }
+ if (i == NSUP_L1)
+#endif /* MACH_PV_PAGETABLES */
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | global)
+
+ }
+ va += INTEL_PGBYTES;
+ }
+ }
+ for (; pte < ptable+NPTES; pte++)
+ {
+ if (va >= kernel_virtual_end - MAPWINDOW_SIZE && va < kernel_virtual_end)
+ {
+ pmap_mapwindow_t *win = &mapwindows[atop(va - (kernel_virtual_end - MAPWINDOW_SIZE))];
+ win->entry = pte;
+ win->vaddr = va;
+ }
+ WRITE_PTE(pte, 0);
+ va += INTEL_PGBYTES;
+ }
+#ifdef MACH_PV_PAGETABLES
+ pmap_set_page_readonly_init(ptable);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (ptable)))
+ panic("couldn't pin page %p(%lx)\n", ptable, (vm_offset_t) kv_to_ma (ptable));
+#endif /* MACH_PV_PAGETABLES */
+ }
+ }
+
+ /* Architecture-specific code will turn on paging
+ soon after we return from here. */
+}
+
+#ifdef MACH_PV_PAGETABLES
+/* These are only required because of Xen security policies */
+
+/* Set back a page read write */
+void pmap_set_page_readwrite(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ phys_addr_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for addr %lx(%lx)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+ if (canon_vaddr != vaddr)
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for paddr %lx(%lx)\n", canon_vaddr, (vm_offset_t) pa_to_ma (paddr));
+}
+
+/* Set a page read only (so as to pin it for instance) */
+void pmap_set_page_readonly(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ phys_addr_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+ }
+ if (canon_vaddr != vaddr &&
+ *pmap_pde(kernel_pmap, canon_vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %lx canon_vaddr %lx paddr %lx (%lx)\n", vaddr, canon_vaddr, paddr, (vm_offset_t) pa_to_ma (paddr));
+ }
+}
+
+/* This needs to be called instead of pmap_set_page_readonly as long as RC3
+ * still points to the bootstrap dirbase, to also fix the bootstrap table. */
+void pmap_set_page_readonly_init(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+#if PAE
+ pt_entry_t *pdpbase = (void*) boot_info.pt_base;
+#ifdef __x86_64__
+ pdpbase = (pt_entry_t *) ptetokv(pdpbase[lin2l4num(vaddr)]);
+#endif
+ /* The bootstrap table does not necessarily use contiguous pages for the pde tables */
+ pt_entry_t *dirbase = (void*) ptetokv(pdpbase[lin2pdpnum(vaddr)]);
+#else
+ pt_entry_t *dirbase = (void*) boot_info.pt_base;
+#endif
+ pt_entry_t *pte = &dirbase[lin2pdenum(vaddr) & PTEMASK];
+ /* Modify our future kernel map (can't use update_va_mapping for this)... */
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+ if (!hyp_mmu_update_la (kvtolin(vaddr), pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID))
+ panic("couldn't set hiMMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+ }
+ /* ... and the bootstrap map. */
+ if (*pte & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (vaddr, pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set MMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+ }
+}
+
+void pmap_clear_bootstrap_pagetable(pt_entry_t *base) {
+ unsigned i;
+ pt_entry_t *dir;
+ vm_offset_t va = 0;
+#ifdef __x86_64__
+ int l4i, l3i;
+#else
+#if PAE
+ unsigned j;
+#endif /* PAE */
+#endif
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base)))
+ panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%lx)\n", base, (vm_offset_t) kv_to_ma(base));
+#ifdef __x86_64__
+ /* 4-level page table */
+ for (l4i = 0; l4i < NPTES && va < HYP_VIRT_START && va < 0x0000800000000000UL; l4i++) {
+ pt_entry_t l4e = base[l4i];
+ pt_entry_t *l3;
+ if (!(l4e & INTEL_PTE_VALID)) {
+ va += NPTES * NPTES * NPTES * INTEL_PGBYTES;
+ continue;
+ }
+ l3 = (pt_entry_t *) ptetokv(l4e);
+
+ for (l3i = 0; l3i < NPTES && va < HYP_VIRT_START; l3i++) {
+ pt_entry_t l3e = l3[l3i];
+ if (!(l3e & INTEL_PTE_VALID)) {
+ va += NPTES * NPTES * INTEL_PGBYTES;
+ continue;
+ }
+ dir = (pt_entry_t *) ptetokv(l3e);
+#else
+#if PAE
+ /* 3-level page table */
+ for (j = 0; j < PDPNUM && va < HYP_VIRT_START; j++)
+ {
+ pt_entry_t pdpe = base[j];
+ if (!(pdpe & INTEL_PTE_VALID)) {
+ va += NPTES * NPTES * INTEL_PGBYTES;
+ continue;
+ }
+ dir = (pt_entry_t *) ptetokv(pdpe);
+#else /* PAE */
+ /* 2-level page table */
+ dir = base;
+#endif /* PAE */
+#endif
+ for (i = 0; i < NPTES && va < HYP_VIRT_START; i++) {
+ pt_entry_t pde = dir[i];
+ unsigned long pfn = atop(pte_to_pa(pde));
+ void *pgt = (void*) phystokv(ptoa(pfn));
+ if (pde & INTEL_PTE_VALID)
+ hyp_free_page(pfn, pgt);
+ va += NPTES * INTEL_PGBYTES;
+ }
+#ifndef __x86_64__
+#if PAE
+ hyp_free_page(atop(_kvtophys(dir)), dir);
+ }
+#endif /* PAE */
+#else
+ hyp_free_page(atop(_kvtophys(dir)), dir);
+ }
+ hyp_free_page(atop(_kvtophys(l3)), l3);
+ }
+#endif
+ hyp_free_page(atop(_kvtophys(base)), base);
+}
+#endif /* MACH_PV_PAGETABLES */
+
+/*
+ * Create a temporary mapping for a given physical entry
+ *
+ * This can be used to access physical pages which are not mapped 1:1 by
+ * phystokv().
+ */
+pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
+{
+ pmap_mapwindow_t *map;
+ int cpu = cpu_number();
+
+ assert(entry != 0);
+
+ /* Find an empty one. */
+ for (map = &mapwindows[cpu * PMAP_NMAPWINDOWS]; map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]; map++)
+ if (!(*map->entry))
+ break;
+ assert(map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]);
+
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(map->entry), pa_to_ma(entry)))
+ panic("pmap_get_mapwindow");
+#else /* MACH_PV_PAGETABLES */
+ WRITE_PTE(map->entry, entry);
+#endif /* MACH_PV_PAGETABLES */
+ INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
+ return map;
+}
+
+/*
+ * Destroy a temporary mapping for a physical entry
+ */
+void pmap_put_mapwindow(pmap_mapwindow_t *map)
+{
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(map->entry), 0))
+ panic("pmap_put_mapwindow");
+#else /* MACH_PV_PAGETABLES */
+ WRITE_PTE(map->entry, 0);
+#endif /* MACH_PV_PAGETABLES */
+ INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
+}
+
+void pmap_virtual_space(
+ vm_offset_t *startp,
+ vm_offset_t *endp)
+{
+ *startp = kernel_virtual_start;
+ *endp = kernel_virtual_end - MAPWINDOW_SIZE;
+}
+
+/*
+ * Initialize the pmap module.
+ * Called by vm_init, to initialize any structures that the pmap
+ * system needs to map virtual memory.
+ */
+void pmap_init(void)
+{
+ unsigned long npages;
+ vm_offset_t addr;
+ vm_size_t s;
+#if NCPUS > 1
+ int i;
+#endif /* NCPUS > 1 */
+
+ /*
+ * Allocate memory for the pv_head_table and its lock bits,
+ * the modify bit array, and the pte_page table.
+ */
+
+ npages = vm_page_table_size();
+ s = (vm_size_t) (sizeof(struct pv_entry) * npages
+ + pv_lock_table_size(npages)
+ + npages);
+
+ s = round_page(s);
+ if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
+ panic("pmap_init");
+ memset((void *) addr, 0, s);
+
+ /*
+ * Allocate the structures first to preserve word-alignment.
+ */
+ pv_head_table = (pv_entry_t) addr;
+ addr = (vm_offset_t) (pv_head_table + npages);
+
+ pv_lock_table = (char *) addr;
+ addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
+
+ pmap_phys_attributes = (char *) addr;
+
+ /*
+ * Create the cache of physical maps,
+ * and of the physical-to-virtual entries.
+ */
+ s = (vm_size_t) sizeof(struct pmap);
+ kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0);
+ kmem_cache_init(&pt_cache, "pmap_L1",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+ kmem_cache_init(&pd_cache, "pmap_L2",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+#if PAE
+ kmem_cache_init(&pdpt_cache, "pmap_L3",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+#ifdef __x86_64__
+ kmem_cache_init(&l4_cache, "pmap_L4",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+#endif /* __x86_64__ */
+#endif /* PAE */
+ s = (vm_size_t) sizeof(struct pv_entry);
+ kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, 0);
+
+#if NCPUS > 1
+ /*
+ * Set up the pmap request lists
+ */
+ for (i = 0; i < NCPUS; i++) {
+ pmap_update_list_t up = &cpu_update_list[i];
+
+ simple_lock_init(&up->lock);
+ up->count = 0;
+ }
+#endif /* NCPUS > 1 */
+
+ /*
+ * Indicate that the PMAP module is now fully initialized.
+ */
+ pmap_initialized = TRUE;
+}
+
+static inline boolean_t
+valid_page(phys_addr_t addr)
+{
+ struct vm_page *p;
+
+ if (!pmap_initialized)
+ return FALSE;
+
+ p = vm_page_lookup_pa(addr);
+ return (p != NULL);
+}
+
+/*
+ * Routine: pmap_page_table_page_alloc
+ *
+ * Allocates a new physical page to be used as a page-table page.
+ *
+ * Must be called with the pmap system and the pmap unlocked,
+ * since these must be unlocked to use vm_page_grab.
+ */
+static vm_offset_t
+pmap_page_table_page_alloc(void)
+{
+ vm_page_t m;
+ phys_addr_t pa;
+
+ check_simple_locks();
+
+ /*
+ * We cannot allocate the pmap_object in pmap_init,
+ * because it is called before the cache package is up.
+ * Allocate it now if it is missing.
+ */
+ if (pmap_object == VM_OBJECT_NULL)
+ pmap_object = vm_object_allocate(vm_page_table_size() * PAGE_SIZE);
+
+ /*
+ * Allocate a VM page for the level 2 page table entries.
+ */
+ while ((m = vm_page_grab(VM_PAGE_DIRECTMAP)) == VM_PAGE_NULL)
+ VM_PAGE_WAIT((void (*)()) 0);
+
+ /*
+ * Map the page to its physical address so that it
+ * can be found later.
+ */
+ pa = m->phys_addr;
+ assert(pa == (vm_offset_t) pa);
+ vm_object_lock(pmap_object);
+ vm_page_insert(m, pmap_object, pa);
+ vm_page_lock_queues();
+ vm_page_wire(m);
+ inuse_ptepages_count++;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+
+ /*
+ * Zero the page.
+ */
+ memset((void *)phystokv(pa), 0, PAGE_SIZE);
+
+ return pa;
+}
+
+#ifdef MACH_XEN
+void pmap_map_mfn(void *_addr, unsigned long mfn) {
+ vm_offset_t addr = (vm_offset_t) _addr;
+ pt_entry_t *pte, *pdp;
+ vm_offset_t ptp;
+ pt_entry_t ma = ((pt_entry_t) mfn) << PAGE_SHIFT;
+
+ /* Add a ptp if none exist yet for this pte */
+ if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) {
+ ptp = phystokv(pmap_page_table_page_alloc());
+#ifdef MACH_PV_PAGETABLES
+ pmap_set_page_readonly((void*) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, pa_to_mfn(ptp)))
+ panic("couldn't pin page %lx(%lx)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+#endif /* MACH_PV_PAGETABLES */
+ pdp = pmap_pde(kernel_pmap, addr);
+
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pdp),
+ pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %llx(%lx) to %lx(%lx)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),(vm_offset_t) kv_to_ma(pdp), ptp, (vm_offset_t) pa_to_ma(ptp));
+#else /* MACH_PV_PAGETABLES */
+ *pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE;
+#endif /* MACH_PV_PAGETABLES */
+ pte = pmap_pte(kernel_pmap, addr);
+ }
+
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), ma | INTEL_PTE_VALID | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pte %p(%lx) to %llx(%llx)\n",__FILE__,__LINE__,pte,(vm_offset_t) kv_to_ma(pte), ma, ma_to_pa(ma));
+#else /* MACH_PV_PAGETABLES */
+ /* Note: in this case, mfn is actually a pfn. */
+ WRITE_PTE(pte, ma | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#endif /* MACH_PV_PAGETABLES */
+}
+#endif /* MACH_XEN */
+
+/*
+ * Deallocate a page-table page.
+ * The page-table page must have all mappings removed,
+ * and be removed from its page directory.
+ */
+static void
+pmap_page_table_page_dealloc(vm_offset_t pa)
+{
+ vm_page_t m;
+
+ vm_object_lock(pmap_object);
+ m = vm_page_lookup(pmap_object, pa);
+ vm_page_lock_queues();
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+ panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
+ pmap_set_page_readwrite((void*) phystokv(pa));
+#endif /* MACH_PV_PAGETABLES */
+ vm_page_free(m);
+ inuse_ptepages_count--;
+ vm_page_unlock_queues();
+ vm_object_unlock(pmap_object);
+}
+
+/*
+ * Create and return a physical map.
+ *
+ * If the size specified for the map
+ * is zero, the map is an actual physical
+ * map, and may be referenced by the
+ * hardware.
+ *
+ * If the size specified is non-zero,
+ * the map will be used in software only, and
+ * is bounded by that size.
+ */
+pmap_t pmap_create(vm_size_t size)
+{
+#ifdef __x86_64__
+ // needs to be reworked if we want to dynamically allocate PDPs for kernel
+ const int PDPNUM = PDPNUM_KERNEL;
+#endif
+ pt_entry_t *page_dir[PDPNUM];
+ int i;
+ pmap_t p;
+ pmap_statistics_t stats;
+
+ /*
+ * A software use-only map doesn't even need a map.
+ */
+
+ if (size != 0) {
+ return(PMAP_NULL);
+ }
+
+/*
+ * Allocate a pmap struct from the pmap_cache. Then allocate
+ * the page descriptor table.
+ */
+
+ p = (pmap_t) kmem_cache_alloc(&pmap_cache);
+ if (p == PMAP_NULL)
+ return PMAP_NULL;
+
+ for (i = 0; i < PDPNUM; i++) {
+ page_dir[i] = (pt_entry_t *) kmem_cache_alloc(&pd_cache);
+ if (page_dir[i] == NULL) {
+ i -= 1;
+ while (i >= 0) {
+ kmem_cache_free(&pd_cache,
+ (vm_address_t) page_dir[i]);
+ i -= 1;
+ }
+ kmem_cache_free(&pmap_cache, (vm_address_t) p);
+ return PMAP_NULL;
+ }
+ memcpy(page_dir[i],
+ (void *) kernel_page_dir + i * INTEL_PGBYTES,
+ INTEL_PGBYTES);
+ }
+
+#ifdef LINUX_DEV
+#if VM_MIN_KERNEL_ADDRESS != 0
+ /* Do not map BIOS in user tasks */
+ page_dir
+#if PAE
+ [lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)]
+#else
+ [0]
+#endif
+ [lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)]
+ = 0;
+#endif
+#endif /* LINUX_DEV */
+
+#ifdef MACH_PV_PAGETABLES
+ {
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readonly((void *) page_dir[i]);
+ }
+#endif /* MACH_PV_PAGETABLES */
+
+#if PAE
+ pt_entry_t *pdp_kernel = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
+ if (pdp_kernel == NULL) {
+ for (i = 0; i < PDPNUM; i++)
+ kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]);
+ kmem_cache_free(&pmap_cache, (vm_address_t) p);
+ return PMAP_NULL;
+ }
+
+ memset(pdp_kernel, 0, INTEL_PGBYTES);
+ {
+ for (i = 0; i < PDPNUM; i++) {
+ int pdp_index = i;
+#ifdef __x86_64__
+ pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+ WRITE_PTE(&pdp_kernel[pdp_index],
+ pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
+ | INTEL_PTE_VALID
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
+ | INTEL_PTE_WRITE
+#ifdef __x86_64__
+ | INTEL_PTE_USER
+#endif /* __x86_64__ */
+#endif
+ );
+ }
+ }
+#ifdef __x86_64__
+ p->l4base = (pt_entry_t *) kmem_cache_alloc(&l4_cache);
+ if (p->l4base == NULL)
+ panic("pmap_create");
+ memset(p->l4base, 0, INTEL_PGBYTES);
+ WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+ pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#ifdef MACH_PV_PAGETABLES
+ // FIXME: use kmem_cache_alloc instead
+ if (kmem_alloc_wired(kernel_map,
+ (vm_offset_t *)&p->user_pdpbase, INTEL_PGBYTES)
+ != KERN_SUCCESS)
+ panic("pmap_create");
+ memset(p->user_pdpbase, 0, INTEL_PGBYTES);
+ {
+ int i;
+ for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++)
+ WRITE_PTE(&p->user_pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+ }
+ // FIXME: use kmem_cache_alloc instead
+ if (kmem_alloc_wired(kernel_map,
+ (vm_offset_t *)&p->user_l4base, INTEL_PGBYTES)
+ != KERN_SUCCESS)
+ panic("pmap_create");
+ memset(p->user_l4base, 0, INTEL_PGBYTES);
+ WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#endif /* MACH_PV_PAGETABLES */
+#else /* _x86_64 */
+ p->pdpbase = pdp_kernel;
+#endif /* _x86_64 */
+#ifdef MACH_PV_PAGETABLES
+#ifdef __x86_64__
+ pmap_set_page_readonly(p->l4base);
+ pmap_set_page_readonly(p->user_l4base);
+ pmap_set_page_readonly(p->user_pdpbase);
+#else
+ pmap_set_page_readonly(p->pdpbase);
+#endif
+#endif /* MACH_PV_PAGETABLES */
+#else /* PAE */
+ p->dirbase = page_dir[0];
+#endif /* PAE */
+
+ p->ref_count = 1;
+
+ simple_lock_init(&p->lock);
+ p->cpus_using = 0;
+
+ /*
+ * Initialize statistics.
+ */
+
+ stats = &p->stats;
+ stats->resident_count = 0;
+ stats->wired_count = 0;
+
+ return(p);
+}
+
+/*
+ * Retire the given physical map from service.
+ * Should only be called if the map contains
+ * no valid mappings.
+ */
+
+void pmap_destroy(pmap_t p)
+{
+ int c, s;
+
+ if (p == PMAP_NULL)
+ return;
+
+ SPLVM(s);
+ simple_lock(&p->lock);
+ c = --p->ref_count;
+ simple_unlock(&p->lock);
+ SPLX(s);
+
+ if (c != 0) {
+ return; /* still in use */
+ }
+
+ /*
+ * Free the page table tree.
+ */
+#if PAE
+#ifdef __x86_64__
+ for (int l4i = 0; l4i < NPTES; l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+ pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+ for (int l3i = 0; l3i < NPTES; l3i++) {
+ pt_entry_t pde = (pt_entry_t) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ if (
+#ifdef __x86_64__
+ l4i < lin2l4num(VM_MAX_USER_ADDRESS) ||
+ (l4i == lin2l4num(VM_MAX_USER_ADDRESS) && l3i < lin2pdpnum(VM_MAX_USER_ADDRESS))
+#else /* __x86_64__ */
+ l3i < lin2pdpnum(VM_MAX_USER_ADDRESS)
+#endif /* __x86_64__ */
+ )
+ for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
+ {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+ }
+ kmem_cache_free(&pd_cache, (vm_offset_t)pdebase);
+#if PAE
+ }
+ kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase);
+#ifdef __x86_64__
+ }
+ kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+ /* Finally, free the pmap itself */
+ kmem_cache_free(&pmap_cache, (vm_offset_t) p);
+}
+
+/*
+ * Add a reference to the specified pmap.
+ */
+
+void pmap_reference(pmap_t p)
+{
+ int s;
+ if (p != PMAP_NULL) {
+ SPLVM(s);
+ simple_lock(&p->lock);
+ p->ref_count++;
+ simple_unlock(&p->lock);
+ SPLX(s);
+ }
+}
+
+/*
+ * Remove a range of hardware page-table entries.
+ * The entries given are the first (inclusive)
+ * and last (exclusive) entries for the VM pages.
+ * The virtual address is the va for the first pte.
+ *
+ * The pmap must be locked.
+ * If the pmap is not the kernel pmap, the range must lie
+ * entirely within one pte-page. This is NOT checked.
+ * Assumes that the pte-page exists.
+ */
+
+static
+void pmap_remove_range(
+ pmap_t pmap,
+ vm_offset_t va,
+ pt_entry_t *spte,
+ pt_entry_t *epte)
+{
+ pt_entry_t *cpte;
+ unsigned long num_removed, num_unwired;
+ unsigned long pai;
+ phys_addr_t pa;
+#ifdef MACH_PV_PAGETABLES
+ int n, ii = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_PV_PAGETABLES */
+
+ if (pmap == kernel_pmap && (va < kernel_virtual_start || va + (epte-spte)*PAGE_SIZE > kernel_virtual_end))
+ panic("pmap_remove_range(%lx-%lx) falls in physical memory area!\n", (unsigned long) va, (unsigned long) va + (epte-spte)*PAGE_SIZE);
+
+#if DEBUG_PTE_PAGE
+ if (pmap != kernel_pmap)
+ ptep_check(get_pte_page(spte));
+#endif /* DEBUG_PTE_PAGE */
+ num_removed = 0;
+ num_unwired = 0;
+
+ for (cpte = spte; cpte < epte;
+ cpte += ptes_per_vm_page, va += PAGE_SIZE) {
+
+ if (*cpte == 0)
+ continue;
+
+ assert(*cpte & INTEL_PTE_VALID);
+
+ pa = pte_to_pa(*cpte);
+
+ num_removed++;
+ if (*cpte & INTEL_PTE_WIRED)
+ num_unwired++;
+
+ if (!valid_page(pa)) {
+
+ /*
+ * Outside range of managed physical memory.
+ * Just remove the mappings.
+ */
+ int i = ptes_per_vm_page;
+ pt_entry_t *lpte = cpte;
+ do {
+#ifdef MACH_PV_PAGETABLES
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_PV_PAGETABLES */
+ *lpte = 0;
+#endif /* MACH_PV_PAGETABLES */
+ lpte++;
+ } while (--i > 0);
+ continue;
+ }
+
+ pai = pa_index(pa);
+ LOCK_PVH(pai);
+
+ /*
+ * Get the modify and reference bits.
+ */
+ {
+ int i;
+ pt_entry_t *lpte;
+
+ i = ptes_per_vm_page;
+ lpte = cpte;
+ do {
+ pmap_phys_attributes[pai] |=
+ *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_PV_PAGETABLES
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_PV_PAGETABLES */
+ *lpte = 0;
+#endif /* MACH_PV_PAGETABLES */
+ lpte++;
+ } while (--i > 0);
+ }
+
+ /*
+ * Remove the mapping from the pvlist for
+ * this physical page.
+ */
+ {
+ pv_entry_t pv_h, prev, cur;
+
+ pv_h = pai_to_pvh(pai);
+ if (pv_h->pmap == PMAP_NULL) {
+ panic("pmap_remove: null pv_list for pai %lx at va %lx!", pai, (unsigned long) va);
+ }
+ if (pv_h->va == va && pv_h->pmap == pmap) {
+ /*
+ * Header is the pv_entry. Copy the next one
+ * to header and free the next one (we cannot
+ * free the header)
+ */
+ cur = pv_h->next;
+ if (cur != PV_ENTRY_NULL) {
+ *pv_h = *cur;
+ PV_FREE(cur);
+ }
+ else {
+ pv_h->pmap = PMAP_NULL;
+ }
+ }
+ else {
+ cur = pv_h;
+ do {
+ prev = cur;
+ if ((cur = prev->next) == PV_ENTRY_NULL) {
+ panic("pmap-remove: mapping not in pv_list!");
+ }
+ } while (cur->va != va || cur->pmap != pmap);
+ prev->next = cur->next;
+ PV_FREE(cur);
+ }
+ UNLOCK_PVH(pai);
+ }
+ }
+
+#ifdef MACH_PV_PAGETABLES
+ if (ii > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_remove_range");
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+#endif /* MACH_PV_PAGETABLES */
+
+ /*
+ * Update the counts
+ */
+ pmap->stats.resident_count -= num_removed;
+ pmap->stats.wired_count -= num_unwired;
+}
+
+/*
+ * Remove the given range of addresses
+ * from the specified map.
+ *
+ * It is assumed that the start and end are properly
+ * rounded to the hardware page size.
+ */
+
+void pmap_remove(
+ pmap_t map,
+ vm_offset_t s,
+ vm_offset_t e)
+{
+ int spl;
+ pt_entry_t *spte, *epte;
+ vm_offset_t l;
+ vm_offset_t _s = s;
+
+ if (map == PMAP_NULL)
+ return;
+
+ PMAP_READ_LOCK(map, spl);
+
+ while (s < e) {
+ pt_entry_t *pde = pmap_pde(map, s);
+
+ l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+ if (l > e || l < s)
+ l = e;
+ if (pde && (*pde & INTEL_PTE_VALID)) {
+ spte = (pt_entry_t *)ptetokv(*pde);
+ spte = &spte[ptenum(s)];
+ epte = &spte[intel_btop(l-s)];
+ pmap_remove_range(map, s, spte, epte);
+ }
+ s = l;
+ }
+ PMAP_UPDATE_TLBS(map, _s, e);
+
+ PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ * Routine: pmap_page_protect
+ *
+ * Function:
+ * Lower the permission for all mappings to a given
+ * page.
+ */
+void pmap_page_protect(
+ phys_addr_t phys,
+ vm_prot_t prot)
+{
+ pv_entry_t pv_h, prev;
+ pv_entry_t pv_e;
+ pt_entry_t *pte;
+ unsigned long pai;
+ pmap_t pmap;
+ int spl;
+ boolean_t remove;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return;
+ }
+
+ /*
+ * Determine the new protection.
+ */
+ switch (prot) {
+ case VM_PROT_READ:
+ case VM_PROT_READ|VM_PROT_EXECUTE:
+ remove = FALSE;
+ break;
+ case VM_PROT_ALL:
+ return; /* nothing to do */
+ default:
+ remove = TRUE;
+ break;
+ }
+
+ /*
+ * Lock the pmap system first, since we will be changing
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ /*
+ * Walk down PV list, changing or removing all mappings.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+
+ prev = pv_e = pv_h;
+ do {
+ vm_offset_t va;
+
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ assert(pte_to_pa(*pte) == phys);
+
+ /*
+ * Remove the mapping if new protection is NONE
+ * or if write-protecting a kernel mapping.
+ */
+ if (remove || pmap == kernel_pmap) {
+ /*
+ * Remove the mapping, collecting any modify bits.
+ */
+
+ if (*pte & INTEL_PTE_WIRED) {
+ pmap->stats.wired_count--;
+ }
+
+ {
+ int i = ptes_per_vm_page;
+
+ do {
+ pmap_phys_attributes[pai] |=
+ *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pte++), 0))
+ panic("%s:%d could not clear pte %p\n",__FILE__,__LINE__,pte-1);
+#else /* MACH_PV_PAGETABLES */
+ *pte++ = 0;
+#endif /* MACH_PV_PAGETABLES */
+ } while (--i > 0);
+ }
+
+ pmap->stats.resident_count--;
+
+ /*
+ * Remove the pv_entry.
+ */
+ if (pv_e == pv_h) {
+ /*
+ * Fix up head later.
+ */
+ pv_h->pmap = PMAP_NULL;
+ }
+ else {
+ /*
+ * Delete this entry.
+ */
+ prev->next = pv_e->next;
+ PV_FREE(pv_e);
+ }
+ }
+ else {
+ /*
+ * Write-protect.
+ */
+ int i = ptes_per_vm_page;
+
+ do {
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WRITE))
+ panic("%s:%d could not disable write on pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_PV_PAGETABLES */
+ *pte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_PV_PAGETABLES */
+ pte++;
+ } while (--i > 0);
+
+ /*
+ * Advance prev.
+ */
+ prev = pv_e;
+ }
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+
+ simple_unlock(&pmap->lock);
+
+ } while ((pv_e = prev->next) != PV_ENTRY_NULL);
+
+ /*
+ * If pv_head mapping was removed, fix it up.
+ */
+ if (pv_h->pmap == PMAP_NULL) {
+ pv_e = pv_h->next;
+ if (pv_e != PV_ENTRY_NULL) {
+ *pv_h = *pv_e;
+ PV_FREE(pv_e);
+ }
+ }
+ }
+
+ PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ * Set the physical protection on the
+ * specified range of this map as requested.
+ * Will not increase permissions.
+ */
+void pmap_protect(
+ pmap_t map,
+ vm_offset_t s,
+ vm_offset_t e,
+ vm_prot_t prot)
+{
+ pt_entry_t *spte, *epte;
+ vm_offset_t l;
+ int spl;
+ vm_offset_t _s = s;
+
+ if (map == PMAP_NULL)
+ return;
+
+ /*
+ * Determine the new protection.
+ */
+ switch (prot) {
+ case VM_PROT_READ:
+ case VM_PROT_READ|VM_PROT_EXECUTE:
+ break;
+ case VM_PROT_READ|VM_PROT_WRITE:
+ case VM_PROT_ALL:
+ return; /* nothing to do */
+ default:
+ pmap_remove(map, s, e);
+ return;
+ }
+
+#if !(__i486__ || __i586__ || __i686__)
+ /*
+ * If write-protecting in the kernel pmap,
+ * remove the mappings; the i386 ignores
+ * the write-permission bit in kernel mode.
+ */
+ if (map == kernel_pmap) {
+ pmap_remove(map, s, e);
+ return;
+ }
+#endif
+
+ SPLVM(spl);
+ simple_lock(&map->lock);
+
+ while (s < e) {
+ pt_entry_t *pde = pde = pmap_pde(map, s);
+
+ l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+ if (l > e || l < s)
+ l = e;
+ if (pde && (*pde & INTEL_PTE_VALID)) {
+ spte = (pt_entry_t *)ptetokv(*pde);
+ spte = &spte[ptenum(s)];
+ epte = &spte[intel_btop(l-s)];
+
+#ifdef MACH_PV_PAGETABLES
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_PV_PAGETABLES */
+
+ while (spte < epte) {
+ if (*spte & INTEL_PTE_VALID) {
+#ifdef MACH_PV_PAGETABLES
+ update[i].ptr = kv_to_ma(spte);
+ update[i].val = *spte & ~INTEL_PTE_WRITE;
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+ i = 0;
+ }
+#else /* MACH_PV_PAGETABLES */
+ *spte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_PV_PAGETABLES */
+ }
+ spte++;
+ }
+#ifdef MACH_PV_PAGETABLES
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_protect");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+#endif /* MACH_PV_PAGETABLES */
+ }
+ s = l;
+ }
+ PMAP_UPDATE_TLBS(map, _s, e);
+
+ simple_unlock(&map->lock);
+ SPLX(spl);
+}
+
+typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, vm_offset_t addr);
+/*
+* Expand one single level of the page table tree
+*/
+static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int spl,
+ pmap_level_getter_t pmap_level,
+ pmap_level_getter_t pmap_level_upper,
+ int n_per_vm_page,
+ struct kmem_cache *cache)
+{
+ pt_entry_t *pte;
+
+ /*
+ * Expand pmap to include this pte. Assume that
+ * pmap is always expanded to include enough hardware
+ * pages to map one VM page.
+ */
+ while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) {
+ /*
+ * Need to allocate a new page-table page.
+ */
+ vm_offset_t ptp;
+ pt_entry_t *pdp;
+ int i;
+
+ if (pmap == kernel_pmap) {
+ /*
+ * Would have to enter the new page-table page in
+ * EVERY pmap.
+ */
+ panic("pmap_expand kernel pmap to %#zx", v);
+ }
+
+ /*
+ * Unlock the pmap and allocate a new page-table page.
+ */
+ PMAP_READ_UNLOCK(pmap, spl);
+
+ while (!(ptp = kmem_cache_alloc(cache)))
+ VM_PAGE_WAIT((void (*)()) 0);
+ memset((void *)ptp, 0, PAGE_SIZE);
+
+ /*
+ * Re-lock the pmap and check that another thread has
+ * not already allocated the page-table page. If it
+ * has, discard the new page-table page (and try
+ * again to make sure).
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ if (pmap_level(pmap, v) != PT_ENTRY_NULL) {
+ /*
+ * Oops...
+ */
+ PMAP_READ_UNLOCK(pmap, spl);
+ kmem_cache_free(cache, ptp);
+ PMAP_READ_LOCK(pmap, spl);
+ continue;
+ }
+
+ /*
+ * Enter the new page table page in the page directory.
+ */
+ i = n_per_vm_page;
+ pdp = pmap_level_upper(pmap, v);
+ do {
+#ifdef MACH_PV_PAGETABLES
+ pmap_set_page_readonly((void *) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn(ptp)))
+ panic("couldn't pin page %lx(%lx)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+ if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)),
+ pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %p(%llx,%lx) to %lx(%llx,%lx) %lx\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), (vm_offset_t) pa_to_ma(kvtophys(ptp)), (vm_offset_t) pa_to_pte(kv_to_ma(ptp)));
+#else /* MACH_PV_PAGETABLES */
+ *pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE;
+#endif /* MACH_PV_PAGETABLES */
+ pdp++; /* Note: This is safe b/c we stay in one page. */
+ ptp += INTEL_PGBYTES;
+ } while (--i > 0);
+
+ /*
+ * Now, get the address of the page-table entry.
+ */
+ continue;
+ }
+ return pte;
+}
+
+/*
+ * Expand, if required, the PMAP to include the virtual address V.
+ * PMAP needs to be locked, and it will be still locked on return. It
+ * can temporarily unlock the PMAP, during allocation or deallocation
+ * of physical pages.
+ */
+static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl)
+{
+#ifdef PAE
+#ifdef __x86_64__
+ pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, 1, &pdpt_cache);
+#endif /* __x86_64__ */
+ pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, 1, &pd_cache);
+#endif /* PAE */
+ return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, ptes_per_vm_page, &pt_cache);
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+void pmap_enter(
+ pmap_t pmap,
+ vm_offset_t v,
+ phys_addr_t pa,
+ vm_prot_t prot,
+ boolean_t wired)
+{
+ boolean_t is_physmem;
+ pt_entry_t *pte;
+ pv_entry_t pv_h;
+ unsigned long i, pai;
+ pv_entry_t pv_e;
+ pt_entry_t template;
+ int spl;
+ phys_addr_t old_pa;
+
+ assert(pa != vm_page_fictitious_addr);
+ if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
+ if (pmap == PMAP_NULL)
+ return;
+
+ if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+ panic("pmap_enter(%lx, %llx) falls in physical memory area!\n", (unsigned long) v, (unsigned long long) pa);
+#if !(__i486__ || __i586__ || __i686__)
+ if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
+ && !wired /* hack for io_wire */ ) {
+ /*
+ * Because the 386 ignores write protection in kernel mode,
+ * we cannot enter a read-only kernel mapping, and must
+ * remove an existing mapping if changing it.
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_pte(pmap, v);
+ if (pte != PT_ENTRY_NULL && *pte != 0) {
+ /*
+ * Invalidate the translation buffer,
+ * then remove the mapping.
+ */
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ }
+ PMAP_READ_UNLOCK(pmap, spl);
+ return;
+ }
+#endif
+
+ /*
+ * Must allocate a new pvlist entry while we're unlocked;
+ * Allocating may cause pageout (which will lock the pmap system).
+ * If we determine we need a pvlist entry, we will unlock
+ * and allocate one. Then we will retry, throughing away
+ * the allocated entry later (if we no longer need it).
+ */
+ pv_e = PV_ENTRY_NULL;
+Retry:
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_expand(pmap, v, spl);
+
+ if (vm_page_ready())
+ is_physmem = (vm_page_lookup_pa(pa) != NULL);
+ else
+ is_physmem = (pa < biosmem_directmap_end());
+
+ /*
+ * Special case if the physical page is already mapped
+ * at this address.
+ */
+ old_pa = pte_to_pa(*pte);
+ if (*pte && old_pa == pa) {
+ /*
+ * May be changing its wired attribute or protection
+ */
+
+ if (wired && !(*pte & INTEL_PTE_WIRED))
+ pmap->stats.wired_count++;
+ else if (!wired && (*pte & INTEL_PTE_WIRED))
+ pmap->stats.wired_count--;
+
+ template = pa_to_pte(pa) | INTEL_PTE_VALID;
+ if (pmap != kernel_pmap)
+ template |= INTEL_PTE_USER;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+ if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
+ && !is_physmem)
+ template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
+ if (wired)
+ template |= INTEL_PTE_WIRED;
+ i = ptes_per_vm_page;
+ do {
+ if (*pte & INTEL_PTE_MOD)
+ template |= INTEL_PTE_MOD;
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template)))
+ panic("%s:%d could not set pte %p to %llx\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_PV_PAGETABLES */
+ WRITE_PTE(pte, template)
+#endif /* MACH_PV_PAGETABLES */
+ pte++;
+ pte_increment_pa(template);
+ } while (--i > 0);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ }
+ else {
+
+ /*
+ * Remove old mapping from the PV list if necessary.
+ */
+ if (*pte) {
+ /*
+ * Don't free the pte page if removing last
+ * mapping - we will immediately replace it.
+ */
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ }
+
+ if (valid_page(pa)) {
+
+ /*
+ * Enter the mapping in the PV list for this
+ * physical page.
+ */
+
+ pai = pa_index(pa);
+ LOCK_PVH(pai);
+ pv_h = pai_to_pvh(pai);
+
+ if (pv_h->pmap == PMAP_NULL) {
+ /*
+ * No mappings yet
+ */
+ pv_h->va = v;
+ pv_h->pmap = pmap;
+ pv_h->next = PV_ENTRY_NULL;
+ }
+ else {
+#if DEBUG
+ {
+ /* check that this mapping is not already there */
+ pv_entry_t e = pv_h;
+ while (e != PV_ENTRY_NULL) {
+ if (e->pmap == pmap && e->va == v)
+ panic("pmap_enter: already in pv_list");
+ e = e->next;
+ }
+ }
+#endif /* DEBUG */
+
+ /*
+ * Add new pv_entry after header.
+ */
+ if (pv_e == PV_ENTRY_NULL) {
+ PV_ALLOC(pv_e);
+ if (pv_e == PV_ENTRY_NULL) {
+ UNLOCK_PVH(pai);
+ PMAP_READ_UNLOCK(pmap, spl);
+
+ /*
+ * Refill from cache.
+ */
+ pv_e = (pv_entry_t) kmem_cache_alloc(&pv_list_cache);
+ goto Retry;
+ }
+ }
+ pv_e->va = v;
+ pv_e->pmap = pmap;
+ pv_e->next = pv_h->next;
+ pv_h->next = pv_e;
+ /*
+ * Remember that we used the pvlist entry.
+ */
+ pv_e = PV_ENTRY_NULL;
+ }
+ UNLOCK_PVH(pai);
+ }
+
+ /*
+ * And count the mapping.
+ */
+
+ pmap->stats.resident_count++;
+ if (wired)
+ pmap->stats.wired_count++;
+
+ /*
+ * Build a template to speed up entering -
+ * only the pfn changes.
+ */
+ template = pa_to_pte(pa) | INTEL_PTE_VALID;
+ if (pmap != kernel_pmap)
+ template |= INTEL_PTE_USER;
+ if (prot & VM_PROT_WRITE)
+ template |= INTEL_PTE_WRITE;
+ if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
+ && !is_physmem)
+ template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
+ if (wired)
+ template |= INTEL_PTE_WIRED;
+ i = ptes_per_vm_page;
+ do {
+#ifdef MACH_PV_PAGETABLES
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template))))
+ panic("%s:%d could not set pte %p to %llx\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_PV_PAGETABLES */
+ WRITE_PTE(pte, template)
+#endif /* MACH_PV_PAGETABLES */
+ pte++;
+ pte_increment_pa(template);
+ } while (--i > 0);
+ }
+
+ if (pv_e != PV_ENTRY_NULL) {
+ PV_FREE(pv_e);
+ }
+
+ PMAP_READ_UNLOCK(pmap, spl);
+}
+
+/*
+ * Routine: pmap_change_wiring
+ * Function: Change the wiring attribute for a map/virtual-address
+ * pair.
+ * In/out conditions:
+ * The mapping must already exist in the pmap.
+ */
+void pmap_change_wiring(
+ pmap_t map,
+ vm_offset_t v,
+ boolean_t wired)
+{
+ pt_entry_t *pte;
+ int i;
+ int spl;
+
+ /*
+ * We must grab the pmap system lock because we may
+ * change a pte_page queue.
+ */
+ PMAP_READ_LOCK(map, spl);
+
+ if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
+ panic("pmap_change_wiring: pte missing");
+
+ if (wired && !(*pte & INTEL_PTE_WIRED)) {
+ /*
+ * wiring down mapping
+ */
+ map->stats.wired_count++;
+ i = ptes_per_vm_page;
+ do {
+ *pte++ |= INTEL_PTE_WIRED;
+ } while (--i > 0);
+ }
+ else if (!wired && (*pte & INTEL_PTE_WIRED)) {
+ /*
+ * unwiring mapping
+ */
+ map->stats.wired_count--;
+ i = ptes_per_vm_page;
+ do {
+#ifdef MACH_PV_PAGETABLES
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WIRED)))
+ panic("%s:%d could not wire down pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_PV_PAGETABLES */
+ *pte &= ~INTEL_PTE_WIRED;
+#endif /* MACH_PV_PAGETABLES */
+ pte++;
+ } while (--i > 0);
+ }
+
+ PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ * Routine: pmap_extract
+ * Function:
+ * Extract the physical page address associated
+ * with the given map/virtual_address pair.
+ */
+
+phys_addr_t pmap_extract(
+ pmap_t pmap,
+ vm_offset_t va)
+{
+ pt_entry_t *pte;
+ phys_addr_t pa;
+ int spl;
+
+ SPLVM(spl);
+ simple_lock(&pmap->lock);
+ if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
+ pa = 0;
+ else if (!(*pte & INTEL_PTE_VALID))
+ pa = 0;
+ else
+ pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
+ simple_unlock(&pmap->lock);
+ SPLX(spl);
+ return(pa);
+}
+
+/*
+ * Copy the range specified by src_addr/len
+ * from the source map to the range dst_addr/len
+ * in the destination map.
+ *
+ * This routine is only advisory and need not do anything.
+ */
+#if 0
+void pmap_copy(
+ pmap_t dst_pmap,
+ pmap_t src_pmap,
+ vm_offset_t dst_addr,
+ vm_size_t len,
+ vm_offset_t src_addr)
+{
+}
+#endif /* 0 */
+
+/*
+ * Routine: pmap_collect
+ * Function:
+ * Garbage collects the physical map system for
+ * pages which are no longer used.
+ * Success need not be guaranteed -- that is, there
+ * may well be pages which are not referenced, but
+ * others may be collected.
+ * Usage:
+ * Called by the pageout daemon when pages are scarce.
+ */
+void pmap_collect(pmap_t p)
+{
+ pt_entry_t *ptp;
+ pt_entry_t *eptp;
+ phys_addr_t pa;
+ int spl, wired;
+
+ if (p == PMAP_NULL)
+ return;
+
+ if (p == kernel_pmap)
+ return;
+
+ /*
+ * Free the page table tree.
+ */
+ PMAP_READ_LOCK(p, spl);
+#if PAE
+#ifdef __x86_64__
+ for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+ for (int l3i = 0; l3i < NPTES; l3i++)
+#else /* __x86_64__ */
+ pt_entry_t *pdpbase = p->pdpbase;
+ for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++)
+#endif /* __x86_64__ */
+ {
+ pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
+ {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+
+ pa = pte_to_pa(pte);
+ ptp = (pt_entry_t *)phystokv(pa);
+ eptp = ptp + NPTES*ptes_per_vm_page;
+
+ /*
+ * If the pte page has any wired mappings, we cannot
+ * free it.
+ */
+ wired = 0;
+ {
+ pt_entry_t *ptep;
+ for (ptep = ptp; ptep < eptp; ptep++) {
+ if (*ptep & INTEL_PTE_WIRED) {
+ wired = 1;
+ break;
+ }
+ }
+ }
+ if (!wired) {
+ /*
+ * Remove the virtual addresses mapped by this pte page.
+ */
+ { /*XXX big hack*/
+ vm_offset_t va = pagenum2lin(l4i, l3i, l2i, 0);
+ if (p == kernel_pmap)
+ va = lintokv(va);
+ pmap_remove_range(p, va, ptp, eptp);
+ }
+
+ /*
+ * Invalidate the page directory pointer.
+ */
+ {
+ int i = ptes_per_vm_page;
+ pt_entry_t *pdep = &pdebase[l2i];
+ do {
+#ifdef MACH_PV_PAGETABLES
+ unsigned long pte = *pdep;
+ void *ptable = (void*) ptetokv(pte);
+ if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+ panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+ panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
+ pmap_set_page_readwrite(ptable);
+#else /* MACH_PV_PAGETABLES */
+ *pdep++ = 0;
+#endif /* MACH_PV_PAGETABLES */
+ } while (--i > 0);
+ }
+
+ PMAP_READ_UNLOCK(p, spl);
+
+ /*
+ * And free the pte page itself.
+ */
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+
+ PMAP_READ_LOCK(p, spl);
+
+ }
+ }
+#if PAE
+ // TODO check l2
+ }
+#ifdef __x86_64__
+ // TODO check l3
+ }
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+ PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
+
+ PMAP_READ_UNLOCK(p, spl);
+ return;
+
+}
+
+#if MACH_KDB
+/*
+ * Routine: pmap_whatis
+ * Function:
+ * Check whether this address is within a pmap
+ * Usage:
+ * Called from debugger
+ */
+int pmap_whatis(pmap_t p, vm_offset_t a)
+{
+ pt_entry_t *ptp;
+ phys_addr_t pa;
+ int spl;
+ int ret = 0;
+
+ if (p == PMAP_NULL)
+ return 0;
+
+ PMAP_READ_LOCK(p, spl);
+#if PAE
+#ifdef __x86_64__
+ if (a >= (vm_offset_t) p->l4base && a < (vm_offset_t) (&p->l4base[NPTES])) {
+ db_printf("L4 for pmap %p\n", p);
+ ret = 1;
+ }
+ for (int l4i = 0; l4i < NPTES; l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+ int l4i = 0;
+ pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+ if (a >= (vm_offset_t) pdpbase && a < (vm_offset_t) (&pdpbase[NPTES])) {
+ db_printf("PDP %d for pmap %p\n", l4i, p);
+ ret = 1;
+ }
+ for (int l3i = 0; l3i < NPTES; l3i++)
+ {
+ pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+#else /* PAE */
+ int l4i = 0, l3i = 0;
+ pt_entry_t *pdebase = p->dirbase;
+#endif /* PAE */
+ if (a >= (vm_offset_t) pdebase && a < (vm_offset_t) (&pdebase[NPTES])) {
+ db_printf("PDE %d %d for pmap %p\n", l4i, l3i, p);
+ ret = 1;
+ }
+ for (int l2i = 0; l2i < NPTES; l2i++)
+ {
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+
+ pa = pte_to_pa(pte);
+ ptp = (pt_entry_t *)phystokv(pa);
+
+ if (a >= (vm_offset_t) ptp && a < (vm_offset_t) (&ptp[NPTES*ptes_per_vm_page])) {
+ db_printf("PTP %d %d %d for pmap %p\n", l4i, l3i, l2i, p);
+ ret = 1;
+ }
+ }
+#if PAE
+ }
+#ifdef __x86_64__
+ }
+#endif /* __x86_64__ */
+#endif /* PAE */
+ PMAP_READ_UNLOCK(p, spl);
+
+ if (p == kernel_pmap) {
+ phys_addr_t pa;
+ if (DB_VALID_KERN_ADDR(a))
+ pa = kvtophys(a);
+ else
+ pa = pmap_extract(current_task()->map->pmap, a);
+
+ if (valid_page(pa)) {
+ unsigned long pai;
+ pv_entry_t pv_h;
+
+ pai = pa_index(pa);
+ for (pv_h = pai_to_pvh(pai);
+ pv_h && pv_h->pmap;
+ pv_h = pv_h->next)
+ db_printf("pmap %p at %llx\n", pv_h->pmap, pv_h->va);
+ }
+ }
+
+ return ret;
+}
+#endif /* MACH_KDB */
+
+/*
+ * Routine: pmap_activate
+ * Function:
+ * Binds the given physical map to the given
+ * processor, and returns a hardware map description.
+ */
+#if 0
+void pmap_activate(pmap_t my_pmap, thread_t th, int my_cpu)
+{
+ PMAP_ACTIVATE(my_pmap, th, my_cpu);
+}
+#endif /* 0 */
+
+/*
+ * Routine: pmap_deactivate
+ * Function:
+ * Indicates that the given physical map is no longer
+ * in use on the specified processor. (This is a macro
+ * in pmap.h)
+ */
+#if 0
+void pmap_deactivate(pmap_t pmap, thread_t th, int which_cpu)
+{
+ PMAP_DEACTIVATE(pmap, th, which_cpu);
+}
+#endif /* 0 */
+
+/*
+ * Routine: pmap_kernel
+ * Function:
+ * Returns the physical map handle for the kernel.
+ */
+#if 0
+pmap_t pmap_kernel()
+{
+ return (kernel_pmap);
+}
+#endif /* 0 */
+
+/*
+ * pmap_zero_page zeros the specified (machine independent) page.
+ * See machine/phys.c or machine/phys.s for implementation.
+ */
+#if 0
+pmap_zero_page(vm_offset_t phys)
+{
+ int i;
+
+ assert(phys != vm_page_fictitious_addr);
+ i = PAGE_SIZE / INTEL_PGBYTES;
+ phys = intel_pfn(phys);
+
+ while (i--)
+ zero_phys(phys++);
+}
+#endif /* 0 */
+
+/*
+ * pmap_copy_page copies the specified (machine independent) page.
+ * See machine/phys.c or machine/phys.s for implementation.
+ */
+#if 0
+pmap_copy_page(vm_offset_t src, vm_offset_t dst)
+{
+ int i;
+
+ assert(src != vm_page_fictitious_addr);
+ assert(dst != vm_page_fictitious_addr);
+ i = PAGE_SIZE / INTEL_PGBYTES;
+
+ while (i--) {
+ copy_phys(intel_pfn(src), intel_pfn(dst));
+ src += INTEL_PGBYTES;
+ dst += INTEL_PGBYTES;
+ }
+}
+#endif /* 0 */
+
+/*
+ * Routine: pmap_pageable
+ * Function:
+ * Make the specified pages (by pmap, offset)
+ * pageable (or not) as requested.
+ *
+ * A page which is not pageable may not take
+ * a fault; therefore, its page table entry
+ * must remain valid for the duration.
+ *
+ * This routine is merely advisory; pmap_enter
+ * will specify that these pages are to be wired
+ * down (or not) as appropriate.
+ */
+void
+pmap_pageable(
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end,
+ boolean_t pageable)
+{
+}
+
+/*
+ * Clear specified attribute bits.
+ */
+static void
+phys_attribute_clear(
+ phys_addr_t phys,
+ int bits)
+{
+ pv_entry_t pv_h;
+ pv_entry_t pv_e;
+ pt_entry_t *pte;
+ unsigned long pai;
+ pmap_t pmap;
+ int spl;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return;
+ }
+
+ /*
+ * Lock the pmap system first, since we will be changing
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ /*
+ * Walk down PV list, clearing all modify or reference bits.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+ /*
+ * There are some mappings.
+ */
+ for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+ vm_offset_t va;
+
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ assert(pte_to_pa(*pte) == phys);
+
+ /*
+ * Clear modify or reference bits.
+ */
+ {
+ int i = ptes_per_vm_page;
+ do {
+#ifdef MACH_PV_PAGETABLES
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~bits)))
+ panic("%s:%d could not clear bits %x from pte %p\n",__FILE__,__LINE__,bits,pte);
+#else /* MACH_PV_PAGETABLES */
+ *pte &= ~bits;
+#endif /* MACH_PV_PAGETABLES */
+ } while (--i > 0);
+ }
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+ simple_unlock(&pmap->lock);
+ }
+ }
+
+ pmap_phys_attributes[pai] &= ~bits;
+
+ PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ * Check specified attribute bits.
+ */
+static boolean_t
+phys_attribute_test(
+ phys_addr_t phys,
+ int bits)
+{
+ pv_entry_t pv_h;
+ pv_entry_t pv_e;
+ pt_entry_t *pte;
+ unsigned long pai;
+ pmap_t pmap;
+ int spl;
+
+ assert(phys != vm_page_fictitious_addr);
+ if (!valid_page(phys)) {
+ /*
+ * Not a managed page.
+ */
+ return (FALSE);
+ }
+
+ /*
+ * Lock the pmap system first, since we will be checking
+ * several pmaps.
+ */
+
+ PMAP_WRITE_LOCK(spl);
+
+ pai = pa_index(phys);
+ pv_h = pai_to_pvh(pai);
+
+ if (pmap_phys_attributes[pai] & bits) {
+ PMAP_WRITE_UNLOCK(spl);
+ return (TRUE);
+ }
+
+ /*
+ * Walk down PV list, checking all mappings.
+ * We do not have to lock the pv_list because we have
+ * the entire pmap system locked.
+ */
+ if (pv_h->pmap != PMAP_NULL) {
+ /*
+ * There are some mappings.
+ */
+ for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+
+ pmap = pv_e->pmap;
+ /*
+ * Lock the pmap to block pmap_extract and similar routines.
+ */
+ simple_lock(&pmap->lock);
+
+ {
+ vm_offset_t va;
+
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
+
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ assert(pte_to_pa(*pte) == phys);
+ }
+
+ /*
+ * Check modify or reference bits.
+ */
+ {
+ int i = ptes_per_vm_page;
+
+ do {
+ if (*pte & bits) {
+ simple_unlock(&pmap->lock);
+ PMAP_WRITE_UNLOCK(spl);
+ return (TRUE);
+ }
+ } while (--i > 0);
+ }
+ simple_unlock(&pmap->lock);
+ }
+ }
+ PMAP_WRITE_UNLOCK(spl);
+ return (FALSE);
+}
+
+/*
+ * Clear the modify bits on the specified physical page.
+ */
+
+void pmap_clear_modify(phys_addr_t phys)
+{
+ phys_attribute_clear(phys, PHYS_MODIFIED);
+}
+
+/*
+ * pmap_is_modified:
+ *
+ * Return whether or not the specified physical page is modified
+ * by any physical maps.
+ */
+
+boolean_t pmap_is_modified(phys_addr_t phys)
+{
+ return (phys_attribute_test(phys, PHYS_MODIFIED));
+}
+
+/*
+ * pmap_clear_reference:
+ *
+ * Clear the reference bit on the specified physical page.
+ */
+
+void pmap_clear_reference(phys_addr_t phys)
+{
+ phys_attribute_clear(phys, PHYS_REFERENCED);
+}
+
+/*
+ * pmap_is_referenced:
+ *
+ * Return whether or not the specified physical page is referenced
+ * by any physical maps.
+ */
+
+boolean_t pmap_is_referenced(phys_addr_t phys)
+{
+ return (phys_attribute_test(phys, PHYS_REFERENCED));
+}
+
+#if NCPUS > 1
+/*
+* TLB Coherence Code (TLB "shootdown" code)
+*
+* Threads that belong to the same task share the same address space and
+* hence share a pmap. However, they may run on distinct cpus and thus
+* have distinct TLBs that cache page table entries. In order to guarantee
+* the TLBs are consistent, whenever a pmap is changed, all threads that
+* are active in that pmap must have their TLB updated. To keep track of
+* this information, the set of cpus that are currently using a pmap is
+* maintained within each pmap structure (cpus_using). Pmap_activate() and
+* pmap_deactivate add and remove, respectively, a cpu from this set.
+* Since the TLBs are not addressable over the bus, each processor must
+* flush its own TLB; a processor that needs to invalidate another TLB
+* needs to interrupt the processor that owns that TLB to signal the
+* update.
+*
+* Whenever a pmap is updated, the lock on that pmap is locked, and all
+* cpus using the pmap are signaled to invalidate. All threads that need
+* to activate a pmap must wait for the lock to clear to await any updates
+* in progress before using the pmap. They must ACQUIRE the lock to add
+* their cpu to the cpus_using set. An implicit assumption made
+* throughout the TLB code is that all kernel code that runs at or higher
+* than splvm blocks out update interrupts, and that such code does not
+* touch pageable pages.
+*
+* A shootdown interrupt serves another function besides signaling a
+* processor to invalidate. The interrupt routine (pmap_update_interrupt)
+* waits for the both the pmap lock (and the kernel pmap lock) to clear,
+* preventing user code from making implicit pmap updates while the
+* sending processor is performing its update. (This could happen via a
+* user data write reference that turns on the modify bit in the page
+* table). It must wait for any kernel updates that may have started
+* concurrently with a user pmap update because the IPC code
+* changes mappings.
+* Spinning on the VALUES of the locks is sufficient (rather than
+* having to acquire the locks) because any updates that occur subsequent
+* to finding the lock unlocked will be signaled via another interrupt.
+* (This assumes the interrupt is cleared before the low level interrupt code
+* calls pmap_update_interrupt()).
+*
+* The signaling processor must wait for any implicit updates in progress
+* to terminate before continuing with its update. Thus it must wait for an
+* acknowledgement of the interrupt from each processor for which such
+* references could be made. For maintaining this information, a set
+* cpus_active is used. A cpu is in this set if and only if it can
+* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
+* this set; when all such cpus are removed, it is safe to update.
+*
+* Before attempting to acquire the update lock on a pmap, a cpu (A) must
+* be at least at the priority of the interprocessor interrupt
+* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
+* kernel update; it would spin forever in pmap_update_interrupt() trying
+* to acquire the user pmap lock it had already acquired. Furthermore A
+* must remove itself from cpus_active. Otherwise, another cpu holding
+* the lock (B) could be in the process of sending an update signal to A,
+* and thus be waiting for A to remove itself from cpus_active. If A is
+* spinning on the lock at priority this will never happen and a deadlock
+* will result.
+*/
+
+/*
+ * Signal another CPU that it must flush its TLB
+ */
+void signal_cpus(
+ cpu_set use_list,
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ int which_cpu, j;
+ pmap_update_list_t update_list_p;
+
+ while ((which_cpu = __builtin_ffs(use_list)) != 0) {
+ which_cpu -= 1; /* convert to 0 origin */
+
+ update_list_p = &cpu_update_list[which_cpu];
+ simple_lock(&update_list_p->lock);
+
+ j = update_list_p->count;
+ if (j >= UPDATE_LIST_SIZE) {
+ /*
+ * list overflowed. Change last item to
+ * indicate overflow.
+ */
+ update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
+ update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_USER_ADDRESS;
+ update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
+ }
+ else {
+ update_list_p->item[j].pmap = pmap;
+ update_list_p->item[j].start = start;
+ update_list_p->item[j].end = end;
+ update_list_p->count = j+1;
+ }
+ cpu_update_needed[which_cpu] = TRUE;
+ simple_unlock(&update_list_p->lock);
+
+ __sync_synchronize();
+ if (((cpus_idle & (1 << which_cpu)) == 0))
+ interrupt_processor(which_cpu);
+ use_list &= ~(1 << which_cpu);
+ }
+}
+
+void process_pmap_updates(pmap_t my_pmap)
+{
+ int my_cpu = cpu_number();
+ pmap_update_list_t update_list_p;
+ int j;
+ pmap_t pmap;
+
+ update_list_p = &cpu_update_list[my_cpu];
+ simple_lock(&update_list_p->lock);
+
+ for (j = 0; j < update_list_p->count; j++) {
+ pmap = update_list_p->item[j].pmap;
+ if (pmap == my_pmap ||
+ pmap == kernel_pmap) {
+
+ INVALIDATE_TLB(pmap,
+ update_list_p->item[j].start,
+ update_list_p->item[j].end);
+ }
+ }
+ update_list_p->count = 0;
+ cpu_update_needed[my_cpu] = FALSE;
+ simple_unlock(&update_list_p->lock);
+}
+
+/*
+ * Interrupt routine for TBIA requested from other processor.
+ */
+void pmap_update_interrupt(void)
+{
+ int my_cpu;
+ pmap_t my_pmap;
+ int s;
+
+ my_cpu = cpu_number();
+
+ /*
+ * Exit now if we're idle. We'll pick up the update request
+ * when we go active, and we must not put ourselves back in
+ * the active set because we'll never process the interrupt
+ * while we're idle (thus hanging the system).
+ */
+ if (cpus_idle & (1 << my_cpu))
+ return;
+
+ if (current_thread() == THREAD_NULL)
+ my_pmap = kernel_pmap;
+ else {
+ my_pmap = current_pmap();
+ if (!pmap_in_use(my_pmap, my_cpu))
+ my_pmap = kernel_pmap;
+ }
+
+ /*
+ * Raise spl to splvm (above splip) to block out pmap_extract
+ * from IO code (which would put this cpu back in the active
+ * set).
+ */
+ s = splvm();
+
+ do {
+
+ /*
+ * Indicate that we're not using either user or kernel
+ * pmap.
+ */
+ i_bit_clear(my_cpu, &cpus_active);
+
+ /*
+ * Wait for any pmap updates in progress, on either user
+ * or kernel pmap.
+ */
+ while (my_pmap->lock.lock_data ||
+ kernel_pmap->lock.lock_data)
+ cpu_pause();
+
+ process_pmap_updates(my_pmap);
+
+ i_bit_set(my_cpu, &cpus_active);
+
+ } while (cpu_update_needed[my_cpu]);
+
+ splx(s);
+}
+#else /* NCPUS > 1 */
+/*
+ * Dummy routine to satisfy external reference.
+ */
+void pmap_update_interrupt(void)
+{
+ /* should never be called. */
+}
+#endif /* NCPUS > 1 */
+
+#if defined(__i386__) || defined (__x86_64__)
+/* Unmap page 0 to trap NULL references. */
+void
+pmap_unmap_page_zero (void)
+{
+ int *pte;
+
+ printf("Unmapping the zero page. Some BIOS functions may not be working any more.\n");
+ pte = (int *) pmap_pte (kernel_pmap, 0);
+ if (!pte)
+ return;
+ assert (pte);
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), 0))
+ printf("couldn't unmap page 0\n");
+#else /* MACH_PV_PAGETABLES */
+ *pte = 0;
+ INVALIDATE_TLB(kernel_pmap, 0, PAGE_SIZE);
+#endif /* MACH_PV_PAGETABLES */
+}
+#endif /* __i386__ */
+
+void
+pmap_make_temporary_mapping(void)
+{
+ int i;
+ /*
+ * We'll have to temporarily install a direct mapping
+ * between physical memory and low linear memory,
+ * until we start using our new kernel segment descriptors.
+ */
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ for (i = 0; i < nb_direct; i++)
+ kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
+#endif
+
+#ifdef LINUX_DEV
+ /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+#ifdef MACH_PV_PAGETABLES
+#ifndef __x86_64__
+ const int PDPNUM_KERNEL = PDPNUM;
+#endif
+ for (i = 0; i < PDPNUM_KERNEL; i++)
+ pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES);
+#if PAE
+#ifndef __x86_64__
+ pmap_set_page_readonly_init(kernel_pmap->pdpbase);
+#endif
+#endif /* PAE */
+#endif /* MACH_PV_PAGETABLES */
+
+ pmap_set_page_dir();
+}
+
+void
+pmap_set_page_dir(void)
+{
+#if PAE
+#ifdef __x86_64__
+ set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base));
+#else
+ set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase));
+#endif
+#ifndef MACH_HYP
+ if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
+ panic("CPU doesn't have support for PAE.");
+ set_cr4(get_cr4() | CR4_PAE);
+#endif /* MACH_HYP */
+#else
+ set_cr3((unsigned long)_kvtophys(kernel_page_dir));
+#endif /* PAE */
+}
+
+void
+pmap_remove_temporary_mapping(void)
+{
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ int i;
+ vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+ if ((vm_offset_t)(-delta) < delta)
+ delta = (vm_offset_t)(-delta);
+ int nb_direct = delta >> PDESHIFT;
+ /* Get rid of the temporary direct mapping and flush it out of the TLB. */
+ for (i = 0 ; i < nb_direct; i++) {
+#ifdef MACH_XEN
+#ifdef MACH_PSEUDO_PHYS
+ if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame %d\n", i);
+#else /* MACH_XEN */
+ kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0;
+#endif /* MACH_XEN */
+ }
+#endif
+
+#ifdef LINUX_DEV
+ /* Keep BIOS memory mapped */
+#if VM_MIN_KERNEL_ADDRESS != 0
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+ kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+ /* Not used after boot, better give it back. */
+#ifdef MACH_XEN
+ hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
+#endif /* MACH_XEN */
+
+ flush_tlb();
+}
diff --git a/riscv/intel/pmap.h b/riscv/intel/pmap.h
new file mode 100644
index 0000000..d745aad
--- /dev/null
+++ b/riscv/intel/pmap.h
@@ -0,0 +1,574 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: pmap.h
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Machine-dependent structures for the physical map module.
+ */
+
+#ifndef _PMAP_MACHINE_
+#define _PMAP_MACHINE_ 1
+
+#ifndef __ASSEMBLER__
+
+#include <kern/lock.h>
+#include <mach/machine/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
+#include <riscv/proc_reg.h>
+
+/*
+ * Define the generic in terms of the specific
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define INTEL_PGBYTES I386_PGBYTES
+#define INTEL_PGSHIFT I386_PGSHIFT
+#define intel_btop(x) i386_btop(x)
+#define intel_ptob(x) i386_ptob(x)
+#define intel_round_page(x) i386_round_page(x)
+#define intel_trunc_page(x) i386_trunc_page(x)
+#define trunc_intel_to_vm(x) trunc_i386_to_vm(x)
+#define round_intel_to_vm(x) round_i386_to_vm(x)
+#define vm_to_intel(x) vm_to_i386(x)
+#endif /* __i386__ */
+
+/*
+ * i386/i486 Page Table Entry
+ */
+
+typedef phys_addr_t pt_entry_t;
+#define PT_ENTRY_NULL ((pt_entry_t *) 0)
+
+#endif /* __ASSEMBLER__ */
+
+#define INTEL_OFFMASK 0xfff /* offset within page */
+#if PAE
+#ifdef __x86_64__
+#define L4SHIFT 39 /* L4 shift */
+#define L4MASK 0x1ff /* mask for L4 index */
+#define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
+#define PDPMASK 0x1ff /* mask for page directory pointer index */
+#else /* __x86_64__ */
+#define PDPNUM 4 /* number of page directory pointers */
+#define PDPMASK 3 /* mask for page directory pointer index */
+#endif /* __x86_64__ */
+#define PDPSHIFT 30 /* page directory pointer */
+#define PDESHIFT 21 /* page descriptor shift */
+#define PDEMASK 0x1ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x1ff /* mask for page table index */
+#else /* PAE */
+#define PDPNUM 1 /* number of page directory pointers */
+#define PDESHIFT 22 /* page descriptor shift */
+#define PDEMASK 0x3ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x3ff /* mask for page table index */
+#endif /* PAE */
+
+/*
+ * Convert linear offset to L4 pointer index
+ */
+#ifdef __x86_64__
+#define lin2l4num(a) (((a) >> L4SHIFT) & L4MASK)
+#endif
+
+/*
+ * Convert linear offset to page descriptor index
+ */
+#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK)
+
+#if PAE
+/* Special version assuming contiguous page directories. Making it
+ include the page directory pointer table index too. */
+#ifdef __x86_64__
+#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x3ff)
+#else
+#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x7ff)
+#endif
+#else
+#define lin2pdenum_cont(a) lin2pdenum(a)
+#endif
+
+/*
+ * Convert linear offset to page directory pointer index
+ */
+#if PAE
+#define lin2pdpnum(a) (((a) >> PDPSHIFT) & PDPMASK)
+#endif
+
+/*
+ * Convert page descriptor index to linear address
+ */
+#define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
+
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l4num) << L4SHIFT) + \
+ ((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
+/*
+ * Convert linear offset to page table index
+ */
+#define ptenum(a) (((a) >> PTESHIFT) & PTEMASK)
+
+#define NPTES (intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES (PDPNUM * (intel_ptob(1)/sizeof(pt_entry_t)))
+
+/*
+ * Hardware pte bit definitions (to be used directly on the ptes
+ * without using the bit fields).
+ */
+
+#define INTEL_PTE_VALID 0x00000001
+#define INTEL_PTE_WRITE 0x00000002
+#define INTEL_PTE_USER 0x00000004
+#define INTEL_PTE_WTHRU 0x00000008
+#define INTEL_PTE_NCACHE 0x00000010
+#define INTEL_PTE_REF 0x00000020
+#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_PS 0x00000080
+#ifdef MACH_PV_PAGETABLES
+/* Not supported */
+#define INTEL_PTE_GLOBAL 0x00000000
+#else /* MACH_PV_PAGETABLES */
+#define INTEL_PTE_GLOBAL 0x00000100
+#endif /* MACH_PV_PAGETABLES */
+#define INTEL_PTE_WIRED 0x00000200
+#ifdef PAE
+#ifdef __x86_64__
+#define INTEL_PTE_PFN 0xfffffffffffff000ULL
+#else /* __x86_64__ */
+#define INTEL_PTE_PFN 0x00007ffffffff000ULL
+#endif/* __x86_64__ */
+#else
+#define INTEL_PTE_PFN 0xfffff000
+#endif
+
+#define pa_to_pte(a) ((a) & INTEL_PTE_PFN)
+#ifdef MACH_PSEUDO_PHYS
+#define pte_to_pa(p) ma_to_pa((p) & INTEL_PTE_PFN)
+#else /* MACH_PSEUDO_PHYS */
+#define pte_to_pa(p) ((p) & INTEL_PTE_PFN)
+#endif /* MACH_PSEUDO_PHYS */
+#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
+
+/*
+ * Convert page table entry to kernel virtual address
+ */
+#define ptetokv(a) (phystokv(pte_to_pa(a)))
+
+#ifndef __ASSEMBLER__
+typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
+ /* changed by other processors */
+
+struct pmap {
+#if ! PAE
+ pt_entry_t *dirbase; /* page directory table */
+#else /* PAE */
+#ifdef __x86_64__
+ pt_entry_t *l4base; /* l4 table */
+#ifdef MACH_HYP
+ pt_entry_t *user_l4base; /* Userland l4 table */
+ pt_entry_t *user_pdpbase; /* Userland l4 table */
+#endif /* MACH_HYP */
+#else /* x86_64 */
+ pt_entry_t *pdpbase; /* page directory pointer table */
+#endif /* x86_64 */
+#endif /* PAE */
+ int ref_count; /* reference count */
+ decl_simple_lock_data(,lock)
+ /* lock on map */
+ struct pmap_statistics stats; /* map statistics */
+ cpu_set cpus_using; /* bitmap of cpus using pmap */
+};
+
+typedef struct pmap *pmap_t;
+
+#define PMAP_NULL ((pmap_t) 0)
+
+#ifdef MACH_PV_PAGETABLES
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif /* MACH_PV_PAGETABLES */
+
+#if PAE
+#ifdef __x86_64__
+/* TODO: support PCID */
+#ifdef MACH_HYP
+#define set_pmap(pmap) \
+ MACRO_BEGIN \
+ set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)); \
+ if (pmap->user_l4base) \
+ if (!hyp_set_user_cr3(kvtophys((vm_offset_t)(pmap)->user_l4base))) \
+ panic("set_user_cr3"); \
+ MACRO_END
+#else /* MACH_HYP */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->l4base))
+#endif /* MACH_HYP */
+#else /* x86_64 */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
+#endif /* x86_64 */
+#else /* PAE */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase))
+#endif /* PAE */
+
+typedef struct {
+ pt_entry_t *entry;
+ vm_offset_t vaddr;
+} pmap_mapwindow_t;
+
+extern pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry);
+extern void pmap_put_mapwindow(pmap_mapwindow_t *map);
+
+#define PMAP_NMAPWINDOWS 2 /* Per CPU */
+
+#if NCPUS > 1
+/*
+ * List of cpus that are actively using mapped memory. Any
+ * pmap update operation must wait for all cpus in this list.
+ * Update operations must still be queued to cpus not in this
+ * list.
+ */
+extern cpu_set cpus_active;
+
+/*
+ * List of cpus that are idle, but still operating, and will want
+ * to see any kernel pmap updates when they become active.
+ */
+extern cpu_set cpus_idle;
+
+/*
+ * Quick test for pmap update requests.
+ */
+extern volatile
+boolean_t cpu_update_needed[NCPUS];
+
+/*
+ * External declarations for PMAP_ACTIVATE.
+ */
+
+void process_pmap_updates(pmap_t);
+extern pmap_t kernel_pmap;
+
+#endif /* NCPUS > 1 */
+
+void pmap_update_interrupt(void);
+
+/*
+ * Machine dependent routines that are used only for i386/i486.
+ */
+
+pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
+
+/*
+ * Macros for speed.
+ */
+
+#if NCPUS > 1
+
+/*
+ * For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage
+ * fields to control TLB invalidation on other CPUS.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&kernel_pmap->lock); \
+ \
+ /* \
+ * Process invalidate requests for the kernel pmap. \
+ */ \
+ if (cpu_update_needed[(my_cpu)]) \
+ process_pmap_updates(kernel_pmap); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &kernel_pmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&kernel_pmap->lock); \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &kernel_pmap->cpus_using); \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ \
+ if (tpmap == kernel_pmap) { \
+ /* \
+ * If this is the kernel pmap, switch to its page tables. \
+ */ \
+ set_pmap(tpmap); \
+ } \
+ else { \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&tpmap->lock); \
+ \
+ /* \
+ * No need to invalidate the TLB - the entire user pmap \
+ * will be invalidated by reloading dirbase. \
+ */ \
+ set_pmap(tpmap); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &tpmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&tpmap->lock); \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ \
+ /* \
+ * Do nothing if this is the kernel pmap. \
+ */ \
+ if (tpmap != kernel_pmap) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &(pmap)->cpus_using); \
+ } \
+}
+
+#define MARK_CPU_IDLE(my_cpu) { \
+ /* \
+ * Mark this cpu idle, and remove it from the active set, \
+ * since it is not actively using any pmap. Signal_cpus \
+ * will notice that it is idle, and avoid signaling it, \
+ * but will queue the update request for when the cpu \
+ * becomes active. \
+ */ \
+ int s = splvm(); \
+ i_bit_set((my_cpu), &cpus_idle); \
+ i_bit_clear((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#define MARK_CPU_ACTIVE(my_cpu) { \
+ \
+ int s = splvm(); \
+ /* \
+ * If a kernel_pmap update was requested while this cpu \
+ * was idle, process it as if we got the interrupt. \
+ * Before doing so, remove this cpu from the idle set. \
+ * Since we do not grab any pmap locks while we flush \
+ * our TLB, another cpu may start an update operation \
+ * before we finish. Removing this cpu from the idle \
+ * set assures that we will receive another update \
+ * interrupt if this happens. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_idle); \
+ __sync_synchronize(); \
+ \
+ if (cpu_update_needed[(my_cpu)]) \
+ pmap_update_interrupt(); \
+ \
+ /* \
+ * Mark that this cpu is now active. \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#else /* NCPUS > 1 */
+
+/*
+ * With only one CPU, we just have to indicate whether the pmap is
+ * in use.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
+ kernel_pmap->cpus_using = TRUE; \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
+ kernel_pmap->cpus_using = FALSE; \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ (void) (th); \
+ (void) (my_cpu); \
+ \
+ set_pmap(tpmap); \
+ if (tpmap != kernel_pmap) { \
+ tpmap->cpus_using = TRUE; \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ (void) (thread); \
+ (void) (cpu); \
+ if ((pmap) != kernel_pmap) \
+ (pmap)->cpus_using = FALSE; \
+}
+
+#endif /* NCPUS > 1 */
+
+#define PMAP_CONTEXT(pmap, thread)
+
+#define pmap_kernel() (kernel_pmap)
+#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
+#define pmap_phys_address(frame) ((intel_ptob((phys_addr_t) frame)))
+#define pmap_phys_to_frame(phys) ((int) (intel_btop(phys)))
+#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
+#define pmap_attribute(pmap,addr,size,attr,value) \
+ (KERN_INVALID_ADDRESS)
+
+extern pt_entry_t *kernel_page_dir;
+
+extern vm_offset_t kernel_virtual_start;
+extern vm_offset_t kernel_virtual_end;
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+ * and direct-map all physical memory.
+ * Called with mapping off.
+ */
+extern void pmap_bootstrap(void);
+
+extern void pmap_set_page_dir(void);
+extern void pmap_make_temporary_mapping(void);
+extern void pmap_remove_temporary_mapping(void);
+
+extern void pmap_unmap_page_zero (void);
+
+/*
+ * pmap_zero_page zeros the specified (machine independent) page.
+ */
+extern void pmap_zero_page (phys_addr_t);
+
+/*
+ * pmap_copy_page copies the specified (machine independent) pages.
+ */
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
+
+/*
+ * copy_to_phys(src_addr_v, dst_addr_p, count)
+ *
+ * Copy virtual memory to physical memory
+ */
+extern void
+copy_to_phys(
+ vm_offset_t src_addr_v,
+ phys_addr_t dst_addr_p,
+ int count);
+
+/*
+ * copy_from_phys(src_addr_p, dst_addr_v, count)
+ *
+ * Copy physical memory to virtual memory. The virtual memory
+ * is assumed to be present (e.g. the buffer pool).
+ */
+extern void
+copy_from_phys(
+ phys_addr_t src_addr_p,
+ vm_offset_t dst_addr_v,
+ int count);
+
+/*
+ * kvtophys(addr)
+ *
+ * Convert a kernel virtual address to a physical address
+ */
+extern phys_addr_t kvtophys (vm_offset_t);
+
+#if NCPUS > 1
+void signal_cpus(
+ cpu_set use_list,
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end);
+#endif /* NCPUS > 1 */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _PMAP_MACHINE_ */
diff --git a/riscv/intel/pmap.h~ b/riscv/intel/pmap.h~
new file mode 100644
index 0000000..8b0eba0
--- /dev/null
+++ b/riscv/intel/pmap.h~
@@ -0,0 +1,574 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: pmap.h
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Machine-dependent structures for the physical map module.
+ */
+
+#ifndef _PMAP_MACHINE_
+#define _PMAP_MACHINE_ 1
+
+#ifndef __ASSEMBLER__
+
+#include <kern/lock.h>
+#include <mach/machine/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
+#include <i386/proc_reg.h>
+
+/*
+ * Define the generic in terms of the specific
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define INTEL_PGBYTES I386_PGBYTES
+#define INTEL_PGSHIFT I386_PGSHIFT
+#define intel_btop(x) i386_btop(x)
+#define intel_ptob(x) i386_ptob(x)
+#define intel_round_page(x) i386_round_page(x)
+#define intel_trunc_page(x) i386_trunc_page(x)
+#define trunc_intel_to_vm(x) trunc_i386_to_vm(x)
+#define round_intel_to_vm(x) round_i386_to_vm(x)
+#define vm_to_intel(x) vm_to_i386(x)
+#endif /* __i386__ */
+
+/*
+ * i386/i486 Page Table Entry
+ */
+
+typedef phys_addr_t pt_entry_t;
+#define PT_ENTRY_NULL ((pt_entry_t *) 0)
+
+#endif /* __ASSEMBLER__ */
+
+#define INTEL_OFFMASK 0xfff /* offset within page */
+#if PAE
+#ifdef __x86_64__
+#define L4SHIFT 39 /* L4 shift */
+#define L4MASK 0x1ff /* mask for L4 index */
+#define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
+#define PDPMASK 0x1ff /* mask for page directory pointer index */
+#else /* __x86_64__ */
+#define PDPNUM 4 /* number of page directory pointers */
+#define PDPMASK 3 /* mask for page directory pointer index */
+#endif /* __x86_64__ */
+#define PDPSHIFT 30 /* page directory pointer */
+#define PDESHIFT 21 /* page descriptor shift */
+#define PDEMASK 0x1ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x1ff /* mask for page table index */
+#else /* PAE */
+#define PDPNUM 1 /* number of page directory pointers */
+#define PDESHIFT 22 /* page descriptor shift */
+#define PDEMASK 0x3ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x3ff /* mask for page table index */
+#endif /* PAE */
+
+/*
+ * Convert linear offset to L4 pointer index
+ */
+#ifdef __x86_64__
+#define lin2l4num(a) (((a) >> L4SHIFT) & L4MASK)
+#endif
+
+/*
+ * Convert linear offset to page descriptor index
+ */
+#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK)
+
+#if PAE
+/* Special version assuming contiguous page directories. Making it
+ include the page directory pointer table index too. */
+#ifdef __x86_64__
+#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x3ff)
+#else
+#define lin2pdenum_cont(a) (((a) >> PDESHIFT) & 0x7ff)
+#endif
+#else
+#define lin2pdenum_cont(a) lin2pdenum(a)
+#endif
+
+/*
+ * Convert linear offset to page directory pointer index
+ */
+#if PAE
+#define lin2pdpnum(a) (((a) >> PDPSHIFT) & PDPMASK)
+#endif
+
+/*
+ * Convert page descriptor index to linear address
+ */
+#define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
+
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l4num) << L4SHIFT) + \
+ ((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
+/*
+ * Convert linear offset to page table index
+ */
+#define ptenum(a) (((a) >> PTESHIFT) & PTEMASK)
+
+#define NPTES (intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES (PDPNUM * (intel_ptob(1)/sizeof(pt_entry_t)))
+
+/*
+ * Hardware pte bit definitions (to be used directly on the ptes
+ * without using the bit fields).
+ */
+
+#define INTEL_PTE_VALID 0x00000001
+#define INTEL_PTE_WRITE 0x00000002
+#define INTEL_PTE_USER 0x00000004
+#define INTEL_PTE_WTHRU 0x00000008
+#define INTEL_PTE_NCACHE 0x00000010
+#define INTEL_PTE_REF 0x00000020
+#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_PS 0x00000080
+#ifdef MACH_PV_PAGETABLES
+/* Not supported */
+#define INTEL_PTE_GLOBAL 0x00000000
+#else /* MACH_PV_PAGETABLES */
+#define INTEL_PTE_GLOBAL 0x00000100
+#endif /* MACH_PV_PAGETABLES */
+#define INTEL_PTE_WIRED 0x00000200
+#ifdef PAE
+#ifdef __x86_64__
+#define INTEL_PTE_PFN 0xfffffffffffff000ULL
+#else /* __x86_64__ */
+#define INTEL_PTE_PFN 0x00007ffffffff000ULL
+#endif/* __x86_64__ */
+#else
+#define INTEL_PTE_PFN 0xfffff000
+#endif
+
+#define pa_to_pte(a) ((a) & INTEL_PTE_PFN)
+#ifdef MACH_PSEUDO_PHYS
+#define pte_to_pa(p) ma_to_pa((p) & INTEL_PTE_PFN)
+#else /* MACH_PSEUDO_PHYS */
+#define pte_to_pa(p) ((p) & INTEL_PTE_PFN)
+#endif /* MACH_PSEUDO_PHYS */
+#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
+
+/*
+ * Convert page table entry to kernel virtual address
+ */
+#define ptetokv(a) (phystokv(pte_to_pa(a)))
+
+#ifndef __ASSEMBLER__
+typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
+ /* changed by other processors */
+
+struct pmap {
+#if ! PAE
+ pt_entry_t *dirbase; /* page directory table */
+#else /* PAE */
+#ifdef __x86_64__
+ pt_entry_t *l4base; /* l4 table */
+#ifdef MACH_HYP
+ pt_entry_t *user_l4base; /* Userland l4 table */
+ pt_entry_t *user_pdpbase; /* Userland l4 table */
+#endif /* MACH_HYP */
+#else /* x86_64 */
+ pt_entry_t *pdpbase; /* page directory pointer table */
+#endif /* x86_64 */
+#endif /* PAE */
+ int ref_count; /* reference count */
+ decl_simple_lock_data(,lock)
+ /* lock on map */
+ struct pmap_statistics stats; /* map statistics */
+ cpu_set cpus_using; /* bitmap of cpus using pmap */
+};
+
+typedef struct pmap *pmap_t;
+
+#define PMAP_NULL ((pmap_t) 0)
+
+#ifdef MACH_PV_PAGETABLES
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif /* MACH_PV_PAGETABLES */
+
+#if PAE
+#ifdef __x86_64__
+/* TODO: support PCID */
+#ifdef MACH_HYP
+#define set_pmap(pmap) \
+ MACRO_BEGIN \
+ set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)); \
+ if (pmap->user_l4base) \
+ if (!hyp_set_user_cr3(kvtophys((vm_offset_t)(pmap)->user_l4base))) \
+ panic("set_user_cr3"); \
+ MACRO_END
+#else /* MACH_HYP */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->l4base))
+#endif /* MACH_HYP */
+#else /* x86_64 */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
+#endif /* x86_64 */
+#else /* PAE */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase))
+#endif /* PAE */
+
+typedef struct {
+ pt_entry_t *entry;
+ vm_offset_t vaddr;
+} pmap_mapwindow_t;
+
+extern pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry);
+extern void pmap_put_mapwindow(pmap_mapwindow_t *map);
+
+#define PMAP_NMAPWINDOWS 2 /* Per CPU */
+
+#if NCPUS > 1
+/*
+ * List of cpus that are actively using mapped memory. Any
+ * pmap update operation must wait for all cpus in this list.
+ * Update operations must still be queued to cpus not in this
+ * list.
+ */
+extern cpu_set cpus_active;
+
+/*
+ * List of cpus that are idle, but still operating, and will want
+ * to see any kernel pmap updates when they become active.
+ */
+extern cpu_set cpus_idle;
+
+/*
+ * Quick test for pmap update requests.
+ */
+extern volatile
+boolean_t cpu_update_needed[NCPUS];
+
+/*
+ * External declarations for PMAP_ACTIVATE.
+ */
+
+void process_pmap_updates(pmap_t);
+extern pmap_t kernel_pmap;
+
+#endif /* NCPUS > 1 */
+
+void pmap_update_interrupt(void);
+
+/*
+ * Machine dependent routines that are used only for i386/i486.
+ */
+
+pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
+
+/*
+ * Macros for speed.
+ */
+
+#if NCPUS > 1
+
+/*
+ * For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage
+ * fields to control TLB invalidation on other CPUS.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&kernel_pmap->lock); \
+ \
+ /* \
+ * Process invalidate requests for the kernel pmap. \
+ */ \
+ if (cpu_update_needed[(my_cpu)]) \
+ process_pmap_updates(kernel_pmap); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &kernel_pmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&kernel_pmap->lock); \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &kernel_pmap->cpus_using); \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ \
+ if (tpmap == kernel_pmap) { \
+ /* \
+ * If this is the kernel pmap, switch to its page tables. \
+ */ \
+ set_pmap(tpmap); \
+ } \
+ else { \
+ /* \
+ * Let pmap updates proceed while we wait for this pmap. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_active); \
+ \
+ /* \
+ * Lock the pmap to put this cpu in its active set. \
+ * Wait for updates here. \
+ */ \
+ simple_lock(&tpmap->lock); \
+ \
+ /* \
+ * No need to invalidate the TLB - the entire user pmap \
+ * will be invalidated by reloading dirbase. \
+ */ \
+ set_pmap(tpmap); \
+ \
+ /* \
+ * Mark that this cpu is using the pmap. \
+ */ \
+ i_bit_set((my_cpu), &tpmap->cpus_using); \
+ \
+ /* \
+ * Mark this cpu active - IPL will be lowered by \
+ * load_context(). \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ \
+ simple_unlock(&tpmap->lock); \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ \
+ /* \
+ * Do nothing if this is the kernel pmap. \
+ */ \
+ if (tpmap != kernel_pmap) { \
+ /* \
+ * Mark pmap no longer in use by this cpu even if \
+ * pmap is locked against updates. \
+ */ \
+ i_bit_clear((my_cpu), &(pmap)->cpus_using); \
+ } \
+}
+
+#define MARK_CPU_IDLE(my_cpu) { \
+ /* \
+ * Mark this cpu idle, and remove it from the active set, \
+ * since it is not actively using any pmap. Signal_cpus \
+ * will notice that it is idle, and avoid signaling it, \
+ * but will queue the update request for when the cpu \
+ * becomes active. \
+ */ \
+ int s = splvm(); \
+ i_bit_set((my_cpu), &cpus_idle); \
+ i_bit_clear((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#define MARK_CPU_ACTIVE(my_cpu) { \
+ \
+ int s = splvm(); \
+ /* \
+ * If a kernel_pmap update was requested while this cpu \
+ * was idle, process it as if we got the interrupt. \
+ * Before doing so, remove this cpu from the idle set. \
+ * Since we do not grab any pmap locks while we flush \
+ * our TLB, another cpu may start an update operation \
+ * before we finish. Removing this cpu from the idle \
+ * set assures that we will receive another update \
+ * interrupt if this happens. \
+ */ \
+ i_bit_clear((my_cpu), &cpus_idle); \
+ __sync_synchronize(); \
+ \
+ if (cpu_update_needed[(my_cpu)]) \
+ pmap_update_interrupt(); \
+ \
+ /* \
+ * Mark that this cpu is now active. \
+ */ \
+ i_bit_set((my_cpu), &cpus_active); \
+ splx(s); \
+}
+
+#else /* NCPUS > 1 */
+
+/*
+ * With only one CPU, we just have to indicate whether the pmap is
+ * in use.
+ */
+
+#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
+ kernel_pmap->cpus_using = TRUE; \
+}
+
+#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
+ kernel_pmap->cpus_using = FALSE; \
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
+ pmap_t tpmap = (pmap); \
+ (void) (th); \
+ (void) (my_cpu); \
+ \
+ set_pmap(tpmap); \
+ if (tpmap != kernel_pmap) { \
+ tpmap->cpus_using = TRUE; \
+ } \
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ (void) (thread); \
+ (void) (cpu); \
+ if ((pmap) != kernel_pmap) \
+ (pmap)->cpus_using = FALSE; \
+}
+
+#endif /* NCPUS > 1 */
+
+#define PMAP_CONTEXT(pmap, thread)
+
+#define pmap_kernel() (kernel_pmap)
+#define pmap_resident_count(pmap) ((pmap)->stats.resident_count)
+#define pmap_phys_address(frame) ((intel_ptob((phys_addr_t) frame)))
+#define pmap_phys_to_frame(phys) ((int) (intel_btop(phys)))
+#define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
+#define pmap_attribute(pmap,addr,size,attr,value) \
+ (KERN_INVALID_ADDRESS)
+
+extern pt_entry_t *kernel_page_dir;
+
+extern vm_offset_t kernel_virtual_start;
+extern vm_offset_t kernel_virtual_end;
+
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+ * and direct-map all physical memory.
+ * Called with mapping off.
+ */
+extern void pmap_bootstrap(void);
+
+extern void pmap_set_page_dir(void);
+extern void pmap_make_temporary_mapping(void);
+extern void pmap_remove_temporary_mapping(void);
+
+extern void pmap_unmap_page_zero (void);
+
+/*
+ * pmap_zero_page zeros the specified (machine independent) page.
+ */
+extern void pmap_zero_page (phys_addr_t);
+
+/*
+ * pmap_copy_page copies the specified (machine independent) pages.
+ */
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
+
+/*
+ * copy_to_phys(src_addr_v, dst_addr_p, count)
+ *
+ * Copy virtual memory to physical memory
+ */
+extern void
+copy_to_phys(
+ vm_offset_t src_addr_v,
+ phys_addr_t dst_addr_p,
+ int count);
+
+/*
+ * copy_from_phys(src_addr_p, dst_addr_v, count)
+ *
+ * Copy physical memory to virtual memory. The virtual memory
+ * is assumed to be present (e.g. the buffer pool).
+ */
+extern void
+copy_from_phys(
+ phys_addr_t src_addr_p,
+ vm_offset_t dst_addr_v,
+ int count);
+
+/*
+ * kvtophys(addr)
+ *
+ * Convert a kernel virtual address to a physical address
+ */
+extern phys_addr_t kvtophys (vm_offset_t);
+
+#if NCPUS > 1
+void signal_cpus(
+ cpu_set use_list,
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end);
+#endif /* NCPUS > 1 */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _PMAP_MACHINE_ */
diff --git a/riscv/intel/read_fault.c b/riscv/intel/read_fault.c
new file mode 100644
index 0000000..0b79e3d
--- /dev/null
+++ b/riscv/intel/read_fault.c
@@ -0,0 +1,178 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+
+#include <kern/macros.h>
+
+#if !(__i486__ || __i586__ || __i686__)
+/*
+ * Expansion of vm_fault for read fault in kernel mode.
+ * Must enter the mapping as writable, since the i386
+ * ignores write protection in kernel mode.
+ */
+kern_return_t
+intel_read_fault(
+ vm_map_t map,
+ vm_offset_t vaddr)
+{
+ vm_map_version_t version; /* Map version for
+ verification */
+ vm_object_t object; /* Top-level object */
+ vm_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ boolean_t wired; /* Is map region wired? */
+ kern_return_t result;
+ vm_page_t m;
+
+ RetryFault:
+
+ /*
+ * Find the backing store object and offset into it
+ * to begin search.
+ */
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ &object, &offset, &prot, &wired);
+ if (result != KERN_SUCCESS)
+ return (result);
+
+ /*
+ * Make a reference to this object to prevent its
+ * disposal while we are playing with it.
+ */
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_paging_begin(object);
+
+ result = vm_fault_page(object, offset, VM_PROT_READ, FALSE, TRUE,
+ &prot, &result_page, &top_page,
+ FALSE, (void (*)()) 0);
+
+ if (result != VM_FAULT_SUCCESS) {
+ vm_object_deallocate(object);
+
+ switch (result) {
+ case VM_FAULT_RETRY:
+ goto RetryFault;
+ case VM_FAULT_INTERRUPTED:
+ return (KERN_SUCCESS);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryFault;
+ case VM_FAULT_MEMORY_ERROR:
+ return (KERN_MEMORY_ERROR);
+ }
+ }
+
+ m = result_page;
+
+ /*
+ * How to clean up the result of vm_fault_page. This
+ * happens whether the mapping is entered or not.
+ */
+
+#define UNLOCK_AND_DEALLOCATE \
+ MACRO_BEGIN \
+ vm_fault_cleanup(m->object, top_page); \
+ vm_object_deallocate(object); \
+ MACRO_END
+
+ /*
+ * What to do with the resulting page from vm_fault_page
+ * if it doesn't get entered into the physical map:
+ */
+
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ /*
+ * We must verify that the maps have not changed.
+ */
+ vm_object_unlock(m->object);
+ while (!vm_map_verify(map, &version)) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired);
+ if (result != KERN_SUCCESS) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return (result);
+ }
+
+ vm_object_unlock(retry_object);
+
+ if (retry_object != object || retry_offset != offset) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+ }
+
+ /*
+ * Put the page in the physical map.
+ */
+ PMAP_ENTER(map->pmap, vaddr, m, VM_PROT_READ|VM_PROT_WRITE, wired);
+
+ vm_object_lock(m->object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ m->reference = TRUE;
+ vm_page_unlock_queues();
+
+ vm_map_verify_done(map, &version);
+ PAGE_WAKEUP_DONE(m);
+
+ UNLOCK_AND_DEALLOCATE;
+
+#undef UNLOCK_AND_DEALLOCATE
+#undef RELEASE_PAGE
+
+ return (KERN_SUCCESS);
+}
+#endif
diff --git a/riscv/intel/read_fault.h b/riscv/intel/read_fault.h
new file mode 100644
index 0000000..8aa3f03
--- /dev/null
+++ b/riscv/intel/read_fault.h
@@ -0,0 +1,35 @@
+/*
+ * Kernel read_fault on i386 functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Kernel read_fault on i386 functions.
+ *
+ */
+
+#ifndef _READ_FAULT_H_
+#define _READ_FAULT_H_
+
+#include <mach/std_types.h>
+
+extern kern_return_t intel_read_fault(
+ vm_map_t map,
+ vm_offset_t vaddr);
+
+#endif /* _READ_FAULT_H_ */