5 files changed, 4683 insertions, 0 deletions
diff --git a/riscv/intel/pmap.c b/riscv/intel/pmap.c
new file mode 100644
index 0000000..f2122a1
--- /dev/null
+++ b/riscv/intel/pmap.c
@@ -0,0 +1,3322 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ *	File:	pmap.c
+ *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
+ *	(These guys wrote the Vax version)
+ *
+ *	Physical Map management code for Intel i386, and i486.
+ *
+ *	Manages physical address maps.
+ *
+ *	In addition to hardware address maps, this
+ *	module is called upon to provide software-use-only
+ *	maps which may or may not be stored in the same
+ *	form as hardware maps.  These pseudo-maps are
+ *	used to store intermediate results from copy
+ *	operations to and from address spaces.
+ *
+ *	Since the information managed by this module is
+ *	also stored by the logical address mapping module,
+ *	this module may throw away valid virtual-to-physical
+ *	mappings at almost any time.  However, invalidations
+ *	of virtual-to-physical mappings must be done as
+ *	requested.
+ *
+ *	In order to cope with hardware architectures which
+ *	make virtual-to-physical map invalidates expensive,
+ *	this module may delay invalidate or reduced protection
+ *	operations until such time as they are actually
+ *	necessary.  This module is given full information as
+ *	to which processors are currently using which maps,
+ *	and to when physical maps must be made correct.
+ */
+
+#include <string.h>
+
+#include <mach/machine/vm_types.h>
+
+#include <mach/boolean.h>
+#include <kern/debug.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <kern/slab.h>
+
+#include <kern/lock.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <i386/vm_param.h>
+#include <mach/vm_prot.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_user.h>
+
+#include <mach/machine/vm_param.h>
+#include <mach/xen.h>
+#include <machine/thread.h>
+#include <i386/cpu_number.h>
+#include <i386/proc_reg.h>
+#include <i386/locore.h>
+#include <i386/model_dep.h>
+#include <i386/spl.h>
+#include <i386at/biosmem.h>
+#include <i386at/model_dep.h>
+
+#if	NCPUS > 1
+#include <i386/mp_desc.h>
+#endif
+
+#include <ddb/db_output.h>
+#include <machine/db_machdep.h>
+
+#ifdef	MACH_PSEUDO_PHYS
+#define	WRITE_PTE(pte_p, pte_entry)		*(pte_p) = pte_entry?pa_to_ma(pte_entry):0;
+#else	/* MACH_PSEUDO_PHYS */
+#define	WRITE_PTE(pte_p, pte_entry)		*(pte_p) = (pte_entry);
+#endif	/* MACH_PSEUDO_PHYS */
+
+/*
+ *	Private data structures.
+ */
+
+/*
+ *	For each vm_page_t, there is a list of all currently
+ *	valid virtual mappings of that page.  An entry is
+ *	a pv_entry_t; the list is the pv_table.
+ */
+
+typedef struct pv_entry {
+	struct pv_entry	*next;		/* next pv_entry */
+	pmap_t		pmap;		/* pmap where mapping lies */
+	vm_offset_t	va;		/* virtual address for mapping */
+} *pv_entry_t;
+
+#define PV_ENTRY_NULL	((pv_entry_t) 0)
+
+pv_entry_t	pv_head_table;		/* array of entries, one per page */
+
+/*
+ *	pv_list entries are kept on a list that can only be accessed
+ *	with the pmap system locked (at SPLVM, not in the cpus_active set).
+ *	The list is refilled from the pv_list_cache if it becomes empty.
+ */
+pv_entry_t	pv_free_list;		/* free list at SPLVM */
+def_simple_lock_data(static, pv_free_list_lock)
+
+#define	PV_ALLOC(pv_e) { \
+	simple_lock(&pv_free_list_lock); \
+	if ((pv_e = pv_free_list) != 0) { \
+	    pv_free_list = pv_e->next; \
+	} \
+	simple_unlock(&pv_free_list_lock); \
+}
+
+#define	PV_FREE(pv_e) { \
+	simple_lock(&pv_free_list_lock); \
+	pv_e->next = pv_free_list; \
+	pv_free_list = pv_e; \
+	simple_unlock(&pv_free_list_lock); \
+}
+
+struct kmem_cache	pv_list_cache;		/* cache of pv_entry structures */
+
+/*
+ *	Each entry in the pv_head_table is locked by a bit in the
+ *	pv_lock_table.  The lock bits are accessed by the physical
+ *	address of the page they lock.
+ */
+
+char	*pv_lock_table;		/* pointer to array of bits */
+#define pv_lock_table_size(n)	(((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
+/* Has pmap_init completed? */
+boolean_t	pmap_initialized = FALSE;
+
+/*
+ *	Range of kernel virtual addresses available for kernel memory mapping.
+ *	Does not include the virtual addresses used to map physical memory 1-1.
+ *	Initialized by pmap_bootstrap.
+ */
+vm_offset_t kernel_virtual_start;
+vm_offset_t kernel_virtual_end;
+
+/*
+ *	Index into pv_head table, its lock bits, and the modify/reference
+ *	bits.
+ */
+#define pa_index(pa)	vm_page_table_index(pa)
+
+#define pai_to_pvh(pai)		(&pv_head_table[pai])
+#define lock_pvh_pai(pai)	(bit_lock(pai, pv_lock_table))
+#define unlock_pvh_pai(pai)	(bit_unlock(pai, pv_lock_table))
+
+/*
+ *	Array of physical page attributes for managed pages.
+ *	One byte per physical page.
+ */
+char	*pmap_phys_attributes;
+
+/*
+ *	Physical page attributes.  Copy bits from PTE definition.
+ */
+#define	PHYS_MODIFIED	INTEL_PTE_MOD	/* page modified */
+#define	PHYS_REFERENCED	INTEL_PTE_REF	/* page referenced */
+
+/*
+ *	Amount of virtual memory mapped by one
+ *	page-directory entry.
+ */
+#define	PDE_MAPPED_SIZE		(pdenum2lin(1))
+
+/*
+ *	We allocate page table pages directly from the VM system
+ *	through this object.  It maps physical memory.
+ */
+vm_object_t	pmap_object = VM_OBJECT_NULL;
+
+/*
+ *	Locking and TLB invalidation
+ */
+
+/*
+ *	Locking Protocols:
+ *
+ *	There are two structures in the pmap module that need locking:
+ *	the pmaps themselves, and the per-page pv_lists (which are locked
+ *	by locking the pv_lock_table entry that corresponds to the pv_head
+ *	for the list in question.)  Most routines want to lock a pmap and
+ *	then do operations in it that require pv_list locking -- however
+ *	pmap_remove_all and pmap_copy_on_write operate on a physical page
+ *	basis and want to do the locking in the reverse order, i.e. lock
+ *	a pv_list and then go through all the pmaps referenced by that list.
+ *	To protect against deadlock between these two cases, the pmap_lock
+ *	is used.  There are three different locking protocols as a result:
+ *
+ *  1.  pmap operations only (pmap_extract, pmap_access, ...)  Lock only
+ *		the pmap.
+ *
+ *  2.  pmap-based operations (pmap_enter, pmap_remove, ...)  Get a read
+ *		lock on the pmap_lock (shared read), then lock the pmap
+ *		and finally the pv_lists as needed [i.e. pmap lock before
+ *		pv_list lock.]
+ *
+ *  3.  pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
+ *		Get a write lock on the pmap_lock (exclusive write); this
+ *		also guaranteees exclusive access to the pv_lists.  Lock the
+ *		pmaps as needed.
+ *
+ *	At no time may any routine hold more than one pmap lock or more than
+ *	one pv_list lock.  Because interrupt level routines can allocate
+ *	mbufs and cause pmap_enter's, the pmap_lock and the lock on the
+ *	kernel_pmap can only be held at splvm.
+ */
+
+#if	NCPUS > 1
+/*
+ *	We raise the interrupt level to splvm, to block interprocessor
+ *	interrupts during pmap operations.  We must take the CPU out of
+ *	the cpus_active set while interrupts are blocked.
+ */
+#define SPLVM(spl)	{ \
+	spl = splvm(); \
+	i_bit_clear(cpu_number(), &cpus_active); \
+}
+
+#define SPLX(spl)	{ \
+	i_bit_set(cpu_number(), &cpus_active); \
+	splx(spl); \
+}
+
+/*
+ *	Lock on pmap system
+ */
+lock_data_t	pmap_system_lock;
+
+#define PMAP_READ_LOCK(pmap, spl) { \
+	SPLVM(spl); \
+	lock_read(&pmap_system_lock); \
+	simple_lock(&(pmap)->lock); \
+}
+
+#define PMAP_WRITE_LOCK(spl) { \
+	SPLVM(spl); \
+	lock_write(&pmap_system_lock); \
+}
+
+#define PMAP_READ_UNLOCK(pmap, spl) { \
+	simple_unlock(&(pmap)->lock); \
+	lock_read_done(&pmap_system_lock); \
+	SPLX(spl); \
+}
+
+#define PMAP_WRITE_UNLOCK(spl) { \
+	lock_write_done(&pmap_system_lock); \
+	SPLX(spl); \
+}
+
+#define PMAP_WRITE_TO_READ_LOCK(pmap) { \
+	simple_lock(&(pmap)->lock); \
+	lock_write_to_read(&pmap_system_lock); \
+}
+
+#define LOCK_PVH(index)		(lock_pvh_pai(index))
+
+#define UNLOCK_PVH(index)	(unlock_pvh_pai(index))
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) \
+{ \
+	cpu_set	cpu_mask = 1 << cpu_number(); \
+	cpu_set	users; \
+ \
+	/* Since the pmap is locked, other updates are locked */ \
+	/* out, and any pmap_activate has finished. */ \
+ \
+	/* find other cpus using the pmap */ \
+	users = (pmap)->cpus_using & ~cpu_mask; \
+	if (users) { \
+	    /* signal them, and wait for them to finish */ \
+	    /* using the pmap */ \
+	    signal_cpus(users, (pmap), (s), (e)); \
+	    while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
+		cpu_pause(); \
+	} \
+ \
+	/* invalidate our own TLB if pmap is in use */ \
+	if ((pmap)->cpus_using & cpu_mask) { \
+	    INVALIDATE_TLB((pmap), (s), (e)); \
+	} \
+}
+
+#else	/* NCPUS > 1 */
+
+#define SPLVM(spl) ((void)(spl))
+#define SPLX(spl) ((void)(spl))
+
+#define PMAP_READ_LOCK(pmap, spl)	SPLVM(spl)
+#define PMAP_WRITE_LOCK(spl)		SPLVM(spl)
+#define PMAP_READ_UNLOCK(pmap, spl)	SPLX(spl)
+#define PMAP_WRITE_UNLOCK(spl)		SPLX(spl)
+#define PMAP_WRITE_TO_READ_LOCK(pmap)
+
+#define LOCK_PVH(index)
+#define UNLOCK_PVH(index)
+
+#define PMAP_UPDATE_TLBS(pmap, s, e) { \
+	/* invalidate our own TLB if pmap is in use */ \
+	if ((pmap)->cpus_using) { \
+	    INVALIDATE_TLB((pmap), (s), (e)); \
+	} \
+}
+
+#endif	/* NCPUS > 1 */
+
+#ifdef	MACH_PV_PAGETABLES
+#define INVALIDATE_TLB(pmap, s, e) do { \
+	if (__builtin_constant_p((e) - (s)) \
+		&& (e) - (s) == PAGE_SIZE) \
+		hyp_invlpg((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+	else \
+		hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
+} while(0)
+#else	/* MACH_PV_PAGETABLES */
+/* It is hard to know when a TLB flush becomes less expensive than a bunch of
+ * invlpgs.  But it surely is more expensive than just one invlpg.  */
+#define INVALIDATE_TLB(pmap, s, e) do { \
+	if (__builtin_constant_p((e) - (s)) \
+		&& (e) - (s) == PAGE_SIZE) \
+		invlpg_linear((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+	else \
+		flush_tlb(); \
+} while (0)
+#endif	/* MACH_PV_PAGETABLES */
+
+
+#if	NCPUS > 1
+/*
+ *	Structures to keep track of pending TLB invalidations
+ */
+
+#define UPDATE_LIST_SIZE	4
+
+struct pmap_update_item {
+	pmap_t		pmap;		/* pmap to invalidate */
+	vm_offset_t	start;		/* start address to invalidate */
+	vm_offset_t	end;		/* end address to invalidate */
+} ;
+
+typedef	struct pmap_update_item	*pmap_update_item_t;
+
+/*
+ *	List of pmap updates.  If the list overflows,
+ *	the last entry is changed to invalidate all.
+ */
+struct pmap_update_list {
+	decl_simple_lock_data(,	lock)
+	int			count;
+	struct pmap_update_item	item[UPDATE_LIST_SIZE];
+} ;
+typedef	struct pmap_update_list	*pmap_update_list_t;
+
+struct pmap_update_list	cpu_update_list[NCPUS];
+
+cpu_set		cpus_active;
+cpu_set		cpus_idle;
+volatile
+boolean_t	cpu_update_needed[NCPUS];
+
+#endif	/* NCPUS > 1 */
+
+/*
+ *	Other useful macros.
+ */
+#define current_pmap()		(vm_map_pmap(current_thread()->task->map))
+#define pmap_in_use(pmap, cpu)	(((pmap)->cpus_using & (1 << (cpu))) != 0)
+
+struct pmap	kernel_pmap_store;
+pmap_t		kernel_pmap;
+
+struct kmem_cache pmap_cache;  /* cache of pmap structures */
+struct kmem_cache pt_cache;    /* cache of page tables */
+struct kmem_cache pd_cache;    /* cache of page directories */
+#if PAE
+struct kmem_cache pdpt_cache;  /* cache of page directory pointer tables */
+#ifdef __x86_64__
+struct kmem_cache l4_cache;    /* cache of L4 tables */
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+boolean_t		pmap_debug = FALSE;	/* flag for debugging prints */
+
+#if 0
+int		ptes_per_vm_page;	/* number of hardware ptes needed
+					   to map one VM page. */
+#else
+#define		ptes_per_vm_page	1
+#endif
+
+unsigned int	inuse_ptepages_count = 0;	/* debugging */
+
+/*
+ * Pointer to the basic page directory for the kernel.
+ * Initialized by pmap_bootstrap().
+ */
+pt_entry_t *kernel_page_dir;
+
+/*
+ * Two slots for temporary physical page mapping, to allow for
+ * physical-to-physical transfers.
+ */
+static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS];
+#define MAPWINDOW_SIZE (PMAP_NMAPWINDOWS * NCPUS * PAGE_SIZE)
+
+#ifdef __x86_64__
+static inline pt_entry_t *
+pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr)
+{
+	return &pmap->l4base[lin2l4num(lin_addr)];
+}
+#endif
+
+#ifdef PAE
+static inline pt_entry_t *
+pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
+{
+	pt_entry_t *pdp_table;
+#ifdef __x86_64__
+	pt_entry_t *l4_table;
+	l4_table = pmap_l4base(pmap, lin_addr);
+	if (l4_table == PT_ENTRY_NULL)
+		return(PT_ENTRY_NULL);
+	pt_entry_t pdp = *l4_table;
+	if ((pdp & INTEL_PTE_VALID) == 0)
+		return PT_ENTRY_NULL;
+	pdp_table = (pt_entry_t *) ptetokv(pdp);
+#else /* __x86_64__ */
+	pdp_table = pmap->pdpbase;
+#endif /* __x86_64__ */
+	return &pdp_table[lin2pdpnum(lin_addr)];
+}
+#endif
+
+static inline pt_entry_t *
+pmap_pde(const pmap_t pmap, vm_offset_t addr)
+{
+	pt_entry_t *page_dir;
+	if (pmap == kernel_pmap)
+		addr = kvtolin(addr);
+#if PAE
+	pt_entry_t *pdp_table;
+	pdp_table = pmap_ptp(pmap, addr);
+        if (pdp_table == PT_ENTRY_NULL)
+		return(PT_ENTRY_NULL);
+	pt_entry_t pde = *pdp_table;
+	if ((pde & INTEL_PTE_VALID) == 0)
+		return PT_ENTRY_NULL;
+	page_dir = (pt_entry_t *) ptetokv(pde);
+#else /* PAE */
+	page_dir = pmap->dirbase;
+#endif /* PAE */
+	return &page_dir[lin2pdenum(addr)];
+}
+
+/*
+ *	Given an offset and a map, compute the address of the
+ *	pte.  If the address is invalid with respect to the map
+ *	then PT_ENTRY_NULL is returned (and the map may need to grow).
+ *
+ *	This is only used internally.
+ */
+pt_entry_t *
+pmap_pte(const pmap_t pmap, vm_offset_t addr)
+{
+	pt_entry_t	*ptp;
+	pt_entry_t	pte;
+
+#ifdef __x86_64__
+	if (pmap->l4base == 0)
+		return(PT_ENTRY_NULL);
+#elif PAE
+	if (pmap->pdpbase == 0)
+		return(PT_ENTRY_NULL);
+#else
+	if (pmap->dirbase == 0)
+		return(PT_ENTRY_NULL);
+#endif
+	ptp = pmap_pde(pmap, addr);
+	if (ptp == 0)
+		return(PT_ENTRY_NULL);
+	pte = *ptp;
+	if ((pte & INTEL_PTE_VALID) == 0)
+		return(PT_ENTRY_NULL);
+	ptp = (pt_entry_t *)ptetokv(pte);
+	return(&ptp[ptenum(addr)]);
+}
+
+#define DEBUG_PTE_PAGE	0
+
+#if	DEBUG_PTE_PAGE
+void ptep_check(ptep_t ptep)
+{
+	pt_entry_t		*pte, *epte;
+	int			ctu, ctw;
+
+	/* check the use and wired counts */
+	if (ptep == PTE_PAGE_NULL)
+		return;
+	pte = pmap_pte(ptep->pmap, ptep->va);
+	epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
+	ctu = 0;
+	ctw = 0;
+	while (pte < epte) {
+		if (pte->pfn != 0) {
+			ctu++;
+			if (pte->wired)
+				ctw++;
+		}
+		pte += ptes_per_vm_page;
+	}
+
+	if (ctu != ptep->use_count || ctw != ptep->wired_count) {
+		printf("use %d wired %d - actual use %d wired %d\n",
+		    	ptep->use_count, ptep->wired_count, ctu, ctw);
+		panic("pte count");
+	}
+}
+#endif	/* DEBUG_PTE_PAGE */
+
+/*
+ *	Back-door routine for mapping kernel VM at initialization.
+ * 	Useful for mapping memory outside the range of direct mapped
+ *	physical memory (i.e., devices).
+ */
+vm_offset_t pmap_map_bd(
+	vm_offset_t	virt,
+	phys_addr_t	start,
+	phys_addr_t	end,
+	vm_prot_t	prot)
+{
+	pt_entry_t	template;
+	pt_entry_t	*pte;
+	int		spl;
+#ifdef	MACH_PV_PAGETABLES
+	int n, i = 0;
+	struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif	/* MACH_PV_PAGETABLES */
+
+	template = pa_to_pte(start)
+		| INTEL_PTE_NCACHE|INTEL_PTE_WTHRU
+		| INTEL_PTE_VALID;
+	if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
+		template |= INTEL_PTE_GLOBAL;
+	if (prot & VM_PROT_WRITE)
+	    template |= INTEL_PTE_WRITE;
+
+	PMAP_READ_LOCK(kernel_pmap, spl);
+	while (start < end) {
+		pte = pmap_pte(kernel_pmap, virt);
+		if (pte == PT_ENTRY_NULL)
+			panic("pmap_map_bd: Invalid kernel address\n");
+#ifdef	MACH_PV_PAGETABLES
+		update[i].ptr = kv_to_ma(pte);
+		update[i].val = pa_to_ma(template);
+		i++;
+		if (i == HYP_BATCH_MMU_UPDATES) {
+			hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+			if (n != i)
+				panic("couldn't pmap_map_bd\n");
+			i = 0;
+		}
+#else	/* MACH_PV_PAGETABLES */
+		WRITE_PTE(pte, template)
+#endif	/* MACH_PV_PAGETABLES */
+		pte_increment_pa(template);
+		virt += PAGE_SIZE;
+		start += PAGE_SIZE;
+	}
+#ifdef	MACH_PV_PAGETABLES
+	if (i > HYP_BATCH_MMU_UPDATES)
+		panic("overflowed array in pmap_map_bd");
+	hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+	if (n != i)
+		panic("couldn't pmap_map_bd\n");
+#endif	/* MACH_PV_PAGETABLES */
+	PMAP_READ_UNLOCK(kernel_pmap, spl);
+	return(virt);
+}
+
+#ifdef PAE
+static void pmap_bootstrap_pae(void)
+{
+	vm_offset_t addr;
+	pt_entry_t *pdp_kernel;
+
+#ifdef __x86_64__
+#ifdef MACH_HYP
+	kernel_pmap->user_l4base = NULL;
+	kernel_pmap->user_pdpbase = NULL;
+#endif
+	kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page());
+	memset(kernel_pmap->l4base, 0, INTEL_PGBYTES);
+#else
+	const int PDPNUM_KERNEL = PDPNUM;
+#endif	/* x86_64 */
+
+	init_alloc_aligned(PDPNUM_KERNEL * INTEL_PGBYTES, &addr);
+	kernel_page_dir = (pt_entry_t*)phystokv(addr);
+	memset(kernel_page_dir, 0, PDPNUM_KERNEL * INTEL_PGBYTES);
+
+	pdp_kernel = (pt_entry_t*)phystokv(pmap_grab_page());
+	memset(pdp_kernel, 0, INTEL_PGBYTES);
+	for (int i = 0; i < PDPNUM_KERNEL; i++) {
+		int pdp_index = i;
+#ifdef __x86_64__
+		pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+		WRITE_PTE(&pdp_kernel[pdp_index],
+			  pa_to_pte(_kvtophys((void *) kernel_page_dir
+					      + i * INTEL_PGBYTES))
+			  | INTEL_PTE_VALID
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
+			  | INTEL_PTE_WRITE
+#endif
+			);
+	}
+
+#ifdef __x86_64__
+        /* only fill the kernel pdpte during bootstrap */
+	WRITE_PTE(&kernel_pmap->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+                  pa_to_pte(_kvtophys(pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#ifdef	MACH_PV_PAGETABLES
+	pmap_set_page_readonly_init(kernel_pmap->l4base);
+#endif /* MACH_PV_PAGETABLES */
+#else	/* x86_64 */
+        kernel_pmap->pdpbase = pdp_kernel;
+#endif	/* x86_64 */
+}
+#endif /* PAE */
+
+#ifdef	MACH_PV_PAGETABLES
+#ifdef PAE
+#define NSUP_L1 4
+#else
+#define NSUP_L1 1
+#endif
+static void pmap_bootstrap_xen(pt_entry_t *l1_map[NSUP_L1])
+{
+	/* We don't actually deal with the CR3 register content at all */
+	hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3);
+	/*
+	 * Xen may only provide as few as 512KB extra bootstrap linear memory,
+	 * which is far from enough to map all available memory, so we need to
+	 * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
+	 * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
+	 * enough for a pagetable mapping 4GiB.
+	 */
+	vm_offset_t la;
+	int n_l1map;
+	for (n_l1map = 0, la = VM_MIN_KERNEL_ADDRESS; la >= VM_MIN_KERNEL_ADDRESS; la += NPTES * PAGE_SIZE) {
+		pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
+#ifdef	PAE
+#ifdef __x86_64__
+		base = (pt_entry_t*) ptetokv(base[0]);
+#endif /* x86_64 */
+		pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[lin2pdpnum(la)]);
+#else	/* PAE */
+		pt_entry_t *l2_map = base;
+#endif	/* PAE */
+		/* Like lin2pdenum, but works with non-contiguous boot L3 */
+		l2_map += (la >> PDESHIFT) & PDEMASK;
+		if (!(*l2_map & INTEL_PTE_VALID)) {
+			struct mmu_update update;
+			unsigned j, n;
+
+			l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
+			for (j = 0; j < NPTES; j++)
+				l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn(lin2pdenum(la - VM_MIN_KERNEL_ADDRESS) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+			pmap_set_page_readonly_init(l1_map[n_l1map]);
+			if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
+				panic("couldn't pin page %p(%lx)", l1_map[n_l1map], (vm_offset_t) kv_to_ma (l1_map[n_l1map]));
+			update.ptr = kv_to_ma(l2_map);
+			update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+			hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
+			if (n != 1)
+				panic("couldn't complete bootstrap map");
+			/* added the last L1 table, can stop */
+			if (++n_l1map >= NSUP_L1)
+				break;
+		}
+	}
+}
+#endif	/* MACH_PV_PAGETABLES */
+
+/*
+ *	Bootstrap the system enough to run with virtual memory.
+ *	Allocate the kernel page directory and page tables,
+ *	and direct-map all physical memory.
+ *	Called with mapping off.
+ */
+void pmap_bootstrap(void)
+{
+	/*
+	 * Mapping is turned off; we must reference only physical addresses.
+	 * The load image of the system is to be mapped 1-1 physical = virtual.
+	 */
+
+	/*
+	 *	Set ptes_per_vm_page for general use.
+	 */
+#if 0
+	ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
+#endif
+
+	/*
+	 *	The kernel's pmap is statically allocated so we don't
+	 *	have to use pmap_create, which is unlikely to work
+	 *	correctly at this part of the boot sequence.
+	 */
+
+	kernel_pmap = &kernel_pmap_store;
+
+#if	NCPUS > 1
+	lock_init(&pmap_system_lock, FALSE);	/* NOT a sleep lock */
+#endif	/* NCPUS > 1 */
+
+	simple_lock_init(&kernel_pmap->lock);
+
+	kernel_pmap->ref_count = 1;
+
+	/*
+	 * Determine the kernel virtual address range.
+	 * It starts at the end of the physical memory
+	 * mapped into the kernel address space,
+	 * and extends to a stupid arbitrary limit beyond that.
+	 */
+	kernel_virtual_start = phystokv(biosmem_directmap_end());
+	kernel_virtual_end = kernel_virtual_start + VM_KERNEL_MAP_SIZE;
+
+	if (kernel_virtual_end < kernel_virtual_start
+			|| kernel_virtual_end > VM_MAX_KERNEL_ADDRESS - PAGE_SIZE)
+		kernel_virtual_end = VM_MAX_KERNEL_ADDRESS - PAGE_SIZE;
+
+	/*
+	 * Allocate and clear a kernel page directory.
+	 */
+	/* Note: initial Xen mapping holds at least 512kB free mapped page.
+	 * We use that for directly building our linear mapping. */
+#if PAE
+	pmap_bootstrap_pae();
+#else	/* PAE */
+	kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page());
+	{
+		unsigned i;
+		for (i = 0; i < NPDES; i++)
+			kernel_page_dir[i] = 0;
+	}
+#endif	/* PAE */
+
+#ifdef	MACH_PV_PAGETABLES
+	pt_entry_t *l1_map[NSUP_L1];
+	pmap_bootstrap_xen(l1_map);
+#endif	/* MACH_PV_PAGETABLES */
+
+	/*
+	 * Allocate and set up the kernel page tables.
+	 */
+	{
+		vm_offset_t va;
+		pt_entry_t global = CPU_HAS_FEATURE(CPU_FEATURE_PGE) ? INTEL_PTE_GLOBAL : 0;
+
+		/*
+		 * Map virtual memory for all directly mappable physical memory, 1-1,
+		 * Make any mappings completely in the kernel's text segment read-only.
+		 *
+		 * Also allocate some additional all-null page tables afterwards
+		 * for kernel virtual memory allocation,
+		 * because this PMAP module is too stupid
+		 * to allocate new kernel page tables later.
+		 * XX fix this
+		 */
+		for (va = phystokv(0); va >= phystokv(0) && va < kernel_virtual_end; )
+		{
+			pt_entry_t *pde = kernel_page_dir + lin2pdenum_cont(kvtolin(va));
+			pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page());
+			pt_entry_t *pte;
+
+			/* Initialize the page directory entry.  */
+			WRITE_PTE(pde, pa_to_pte((vm_offset_t)_kvtophys(ptable))
+				| INTEL_PTE_VALID | INTEL_PTE_WRITE);
+
+			/* Initialize the page table.  */
+			for (pte = ptable; (va < phystokv(biosmem_directmap_end())) && (pte < ptable+NPTES); pte++)
+			{
+				if ((pte - ptable) < ptenum(va))
+				{
+					WRITE_PTE(pte, 0);
+				}
+				else
+#ifdef	MACH_PV_PAGETABLES
+				if (va == (vm_offset_t) &hyp_shared_info)
+				{
+					*pte = boot_info.shared_info | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+					va += INTEL_PGBYTES;
+				}
+				else
+#endif	/* MACH_PV_PAGETABLES */
+				{
+					extern char _start[], etext[];
+
+					if (((va >= (vm_offset_t) _start)
+					    && (va + INTEL_PGBYTES <= (vm_offset_t)etext))
+#ifdef	MACH_PV_PAGETABLES
+					    || (va >= (vm_offset_t) boot_info.pt_base
+					    && (va + INTEL_PGBYTES <=
+					    (vm_offset_t) ptable + INTEL_PGBYTES))
+#endif	/* MACH_PV_PAGETABLES */
+					    )
+					{
+						WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+							| INTEL_PTE_VALID | global);
+					}
+					else
+					{
+#ifdef	MACH_PV_PAGETABLES
+						/* Keep supplementary L1 pages read-only */
+						int i;
+						for (i = 0; i < NSUP_L1; i++)
+							if (va == (vm_offset_t) l1_map[i]) {
+								WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+									| INTEL_PTE_VALID | global);
+								break;
+							}
+						if (i == NSUP_L1)
+#endif	/* MACH_PV_PAGETABLES */
+							WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+								| INTEL_PTE_VALID | INTEL_PTE_WRITE | global)
+
+					}
+					va += INTEL_PGBYTES;
+				}
+			}
+			for (; pte < ptable+NPTES; pte++)
+			{
+				if (va >= kernel_virtual_end - MAPWINDOW_SIZE && va < kernel_virtual_end)
+				{
+					pmap_mapwindow_t *win = &mapwindows[atop(va - (kernel_virtual_end - MAPWINDOW_SIZE))];
+					win->entry = pte;
+					win->vaddr = va;
+				}
+				WRITE_PTE(pte, 0);
+				va += INTEL_PGBYTES;
+			}
+#ifdef	MACH_PV_PAGETABLES
+			pmap_set_page_readonly_init(ptable);
+			if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (ptable)))
+				panic("couldn't pin page %p(%lx)\n", ptable, (vm_offset_t) kv_to_ma (ptable));
+#endif	/* MACH_PV_PAGETABLES */
+		}
+	}
+
+	/* Architecture-specific code will turn on paging
+	   soon after we return from here.  */
+}
+
+#ifdef	MACH_PV_PAGETABLES
+/* These are only required because of Xen security policies */
+
+/* Set back a page read write */
+void pmap_set_page_readwrite(void *_vaddr) {
+	vm_offset_t vaddr = (vm_offset_t) _vaddr;
+	phys_addr_t paddr = kvtophys(vaddr);
+	vm_offset_t canon_vaddr = phystokv(paddr);
+	if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+		panic("couldn't set hiMMU readwrite for addr %lx(%lx)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+	if (canon_vaddr != vaddr)
+		if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+			panic("couldn't set hiMMU readwrite for paddr %lx(%lx)\n", canon_vaddr, (vm_offset_t) pa_to_ma (paddr));
+}
+
+/* Set a page read only (so as to pin it for instance) */
+void pmap_set_page_readonly(void *_vaddr) {
+	vm_offset_t vaddr = (vm_offset_t) _vaddr;
+	phys_addr_t paddr = kvtophys(vaddr);
+	vm_offset_t canon_vaddr = phystokv(paddr);
+	if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+		if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+			panic("couldn't set hiMMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+	}
+	if (canon_vaddr != vaddr &&
+		*pmap_pde(kernel_pmap, canon_vaddr) & INTEL_PTE_VALID) {
+		if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+			panic("couldn't set hiMMU readonly for vaddr %lx canon_vaddr %lx paddr %lx (%lx)\n", vaddr, canon_vaddr, paddr, (vm_offset_t) pa_to_ma (paddr));
+	}
+}
+
+/* This needs to be called instead of pmap_set_page_readonly as long as RC3
+ * still points to the bootstrap dirbase, to also fix the bootstrap table.  */
+void pmap_set_page_readonly_init(void *_vaddr) {
+	vm_offset_t vaddr = (vm_offset_t) _vaddr;
+#if PAE
+	pt_entry_t *pdpbase = (void*) boot_info.pt_base;
+#ifdef __x86_64__
+	pdpbase = (pt_entry_t *) ptetokv(pdpbase[lin2l4num(vaddr)]);
+#endif
+	/* The bootstrap table does not necessarily use contiguous pages for the pde tables */
+	pt_entry_t *dirbase = (void*) ptetokv(pdpbase[lin2pdpnum(vaddr)]);
+#else
+	pt_entry_t *dirbase = (void*) boot_info.pt_base;
+#endif
+	pt_entry_t *pte = &dirbase[lin2pdenum(vaddr) & PTEMASK];
+	/* Modify our future kernel map (can't use update_va_mapping for this)... */
+	if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+		if (!hyp_mmu_update_la (kvtolin(vaddr), pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID))
+			panic("couldn't set hiMMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+	}
+	/* ... and the bootstrap map.  */
+	if (*pte & INTEL_PTE_VALID) {
+		if (hyp_do_update_va_mapping (vaddr, pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID, UVMF_NONE))
+			panic("couldn't set MMU readonly for vaddr %lx(%lx)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+	}
+}
+
+void pmap_clear_bootstrap_pagetable(pt_entry_t *base) {
+	unsigned i;
+	pt_entry_t *dir;
+	vm_offset_t va = 0;
+#ifdef __x86_64__
+	int l4i, l3i;
+#else
+#if PAE
+	unsigned j;
+#endif	/* PAE */
+#endif
+	if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base)))
+		panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%lx)\n", base, (vm_offset_t) kv_to_ma(base));
+#ifdef __x86_64__
+	/* 4-level page table */
+	for (l4i = 0; l4i < NPTES && va < HYP_VIRT_START && va < 0x0000800000000000UL; l4i++) {
+		pt_entry_t l4e = base[l4i];
+		pt_entry_t *l3;
+		if (!(l4e & INTEL_PTE_VALID)) {
+			va += NPTES * NPTES * NPTES * INTEL_PGBYTES;
+			continue;
+		}
+		l3 = (pt_entry_t *) ptetokv(l4e);
+
+		for (l3i = 0; l3i < NPTES && va < HYP_VIRT_START; l3i++) {
+			pt_entry_t l3e = l3[l3i];
+			if (!(l3e & INTEL_PTE_VALID)) {
+				va += NPTES * NPTES * INTEL_PGBYTES;
+				continue;
+			}
+			dir = (pt_entry_t *) ptetokv(l3e);
+#else
+#if PAE
+	/* 3-level page table */
+	for (j = 0; j < PDPNUM && va < HYP_VIRT_START; j++)
+	{
+			pt_entry_t pdpe = base[j];
+			if (!(pdpe & INTEL_PTE_VALID)) {
+				va += NPTES * NPTES * INTEL_PGBYTES;
+				continue;
+			}
+			dir = (pt_entry_t *) ptetokv(pdpe);
+#else	/* PAE */
+			/* 2-level page table */
+			dir = base;
+#endif	/* PAE */
+#endif
+			for (i = 0; i < NPTES && va < HYP_VIRT_START; i++) {
+				pt_entry_t pde = dir[i];
+				unsigned long pfn = atop(pte_to_pa(pde));
+				void *pgt = (void*) phystokv(ptoa(pfn));
+				if (pde & INTEL_PTE_VALID)
+					hyp_free_page(pfn, pgt);
+				va += NPTES * INTEL_PGBYTES;
+			}
+#ifndef __x86_64__
+#if PAE
+			hyp_free_page(atop(_kvtophys(dir)), dir);
+	}
+#endif	/* PAE */
+#else
+			hyp_free_page(atop(_kvtophys(dir)), dir);
+		}
+		hyp_free_page(atop(_kvtophys(l3)), l3);
+	}
+#endif
+	hyp_free_page(atop(_kvtophys(base)), base);
+}
+#endif	/* MACH_PV_PAGETABLES */
+
+/*
+ * Create a temporary mapping for a given physical entry
+ *
+ * This can be used to access physical pages which are not mapped 1:1 by
+ * phystokv().
+ */
+pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
+{
+	pmap_mapwindow_t *map;
+	int cpu = cpu_number();
+
+	assert(entry != 0);
+
+	/* Find an empty one.  */
+	for (map = &mapwindows[cpu * PMAP_NMAPWINDOWS]; map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]; map++)
+		if (!(*map->entry))
+			break;
+	assert(map < &mapwindows[(cpu+1) * PMAP_NMAPWINDOWS]);
+
+#ifdef MACH_PV_PAGETABLES
+	if (!hyp_mmu_update_pte(kv_to_ma(map->entry), pa_to_ma(entry)))
+		panic("pmap_get_mapwindow");
+#else /* MACH_PV_PAGETABLES */
+	WRITE_PTE(map->entry, entry);
+#endif /* MACH_PV_PAGETABLES */
+	INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
+	return map;
+}
+
+/*
+ * Destroy a temporary mapping for a physical entry
+ */
+void pmap_put_mapwindow(pmap_mapwindow_t *map)
+{
+#ifdef MACH_PV_PAGETABLES
+	if (!hyp_mmu_update_pte(kv_to_ma(map->entry), 0))
+		panic("pmap_put_mapwindow");
+#else /* MACH_PV_PAGETABLES */
+	WRITE_PTE(map->entry, 0);
+#endif /* MACH_PV_PAGETABLES */
+	INVALIDATE_TLB(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
+}
+
+void pmap_virtual_space(
+	vm_offset_t *startp,
+	vm_offset_t *endp)
+{
+	*startp = kernel_virtual_start;
+	*endp = kernel_virtual_end - MAPWINDOW_SIZE;
+}
+
+/*
+ *	Initialize the pmap module.
+ *	Called by vm_init, to initialize any structures that the pmap
+ *	system needs to map virtual memory.
+ */
+void pmap_init(void)
+{
+	unsigned long		npages;
+	vm_offset_t		addr;
+	vm_size_t		s;
+#if	NCPUS > 1
+	int			i;
+#endif	/* NCPUS > 1 */
+
+	/*
+	 *	Allocate memory for the pv_head_table and its lock bits,
+	 *	the modify bit array, and the pte_page table.
+	 */
+
+	npages = vm_page_table_size();
+	s = (vm_size_t) (sizeof(struct pv_entry) * npages
+				+ pv_lock_table_size(npages)
+				+ npages);
+
+	s = round_page(s);
+	if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
+		panic("pmap_init");
+	memset((void *) addr, 0, s);
+
+	/*
+	 *	Allocate the structures first to preserve word-alignment.
+	 */
+	pv_head_table = (pv_entry_t) addr;
+	addr = (vm_offset_t) (pv_head_table + npages);
+
+	pv_lock_table = (char *) addr;
+	addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
+
+	pmap_phys_attributes = (char *) addr;
+
+	/*
+	 *	Create the cache of physical maps,
+	 *	and of the physical-to-virtual entries.
+	 */
+	s = (vm_size_t) sizeof(struct pmap);
+	kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0);
+	kmem_cache_init(&pt_cache, "pmap_L1",
+			INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+			KMEM_CACHE_PHYSMEM);
+	kmem_cache_init(&pd_cache, "pmap_L2",
+			INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+			KMEM_CACHE_PHYSMEM);
+#if PAE
+	kmem_cache_init(&pdpt_cache, "pmap_L3",
+			INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+			KMEM_CACHE_PHYSMEM);
+#ifdef __x86_64__
+	kmem_cache_init(&l4_cache, "pmap_L4",
+			INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+			KMEM_CACHE_PHYSMEM);
+#endif /* __x86_64__ */
+#endif /* PAE */
+	s = (vm_size_t) sizeof(struct pv_entry);
+	kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, 0);
+
+#if	NCPUS > 1
+	/*
+	 *	Set up the pmap request lists
+	 */
+	for (i = 0; i < NCPUS; i++) {
+	    pmap_update_list_t	up = &cpu_update_list[i];
+
+	    simple_lock_init(&up->lock);
+	    up->count = 0;
+	}
+#endif	/* NCPUS > 1 */
+
+	/*
+	 * Indicate that the PMAP module is now fully initialized.
+	 */
+	pmap_initialized = TRUE;
+}
+
+static inline boolean_t
+valid_page(phys_addr_t addr)
+{
+	struct vm_page *p;
+
+	if (!pmap_initialized)
+		return FALSE;
+
+	p = vm_page_lookup_pa(addr);
+	return (p != NULL);
+}
+
+/*
+ *	Routine:	pmap_page_table_page_alloc
+ *
+ *	Allocates a new physical page to be used as a page-table page.
+ *
+ *	Must be called with the pmap system and the pmap unlocked,
+ *	since these must be unlocked to use vm_page_grab.
+ */
+static vm_offset_t
+pmap_page_table_page_alloc(void)
+{
+	vm_page_t	m;
+	phys_addr_t	pa;
+
+	check_simple_locks();
+
+	/*
+	 *	We cannot allocate the pmap_object in pmap_init,
+	 *	because it is called before the cache package is up.
+	 *	Allocate it now if it is missing.
+	 */
+	if (pmap_object == VM_OBJECT_NULL)
+	    pmap_object = vm_object_allocate(vm_page_table_size() * PAGE_SIZE);
+
+	/*
+	 *	Allocate a VM page for the level 2 page table entries.
+	 */
+	while ((m = vm_page_grab(VM_PAGE_DIRECTMAP)) == VM_PAGE_NULL)
+		VM_PAGE_WAIT((void (*)()) 0);
+
+	/*
+	 *	Map the page to its physical address so that it
+	 *	can be found later.
+	 */
+	pa = m->phys_addr;
+	assert(pa == (vm_offset_t) pa);
+	vm_object_lock(pmap_object);
+	vm_page_insert(m, pmap_object, pa);
+	vm_page_lock_queues();
+	vm_page_wire(m);
+	inuse_ptepages_count++;
+	vm_page_unlock_queues();
+	vm_object_unlock(pmap_object);
+
+	/*
+	 *	Zero the page.
+	 */
+	memset((void *)phystokv(pa), 0, PAGE_SIZE);
+
+	return pa;
+}
+
+#ifdef	MACH_XEN
+void pmap_map_mfn(void *_addr, unsigned long mfn) {
+	vm_offset_t addr = (vm_offset_t) _addr;
+	pt_entry_t	*pte, *pdp;
+	vm_offset_t	ptp;
+	pt_entry_t ma = ((pt_entry_t) mfn) << PAGE_SHIFT;
+
+	/* Add a ptp if none exist yet for this pte */
+	if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) {
+		ptp = phystokv(pmap_page_table_page_alloc());
+#ifdef	MACH_PV_PAGETABLES
+		pmap_set_page_readonly((void*) ptp);
+		if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, pa_to_mfn(ptp)))
+			panic("couldn't pin page %lx(%lx)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+#endif	/* MACH_PV_PAGETABLES */
+		pdp = pmap_pde(kernel_pmap, addr);
+
+#ifdef	MACH_PV_PAGETABLES
+		if (!hyp_mmu_update_pte(kv_to_ma(pdp),
+			pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID
+					      | INTEL_PTE_USER
+					      | INTEL_PTE_WRITE))
+			panic("%s:%d could not set pde %llx(%lx) to %lx(%lx)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),(vm_offset_t) kv_to_ma(pdp), ptp, (vm_offset_t) pa_to_ma(ptp));
+#else	/* MACH_PV_PAGETABLES */
+		*pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
+						| INTEL_PTE_USER
+						| INTEL_PTE_WRITE;
+#endif	/* MACH_PV_PAGETABLES */
+		pte = pmap_pte(kernel_pmap, addr);
+	}
+
+#ifdef	MACH_PV_PAGETABLES
+	if (!hyp_mmu_update_pte(kv_to_ma(pte), ma | INTEL_PTE_VALID | INTEL_PTE_WRITE))
+		panic("%s:%d could not set pte %p(%lx) to %llx(%llx)\n",__FILE__,__LINE__,pte,(vm_offset_t) kv_to_ma(pte), ma, ma_to_pa(ma));
+#else	/* MACH_PV_PAGETABLES */
+	/* Note: in this case, mfn is actually a pfn.  */
+	WRITE_PTE(pte, ma | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#endif	/* MACH_PV_PAGETABLES */
+}
+#endif	/* MACH_XEN */
+
+/*
+ *	Deallocate a page-table page.
+ *	The page-table page must have all mappings removed,
+ *	and be removed from its page directory.
+ */
+static void
+pmap_page_table_page_dealloc(vm_offset_t pa)
+{
+	vm_page_t	m;
+
+	vm_object_lock(pmap_object);
+	m = vm_page_lookup(pmap_object, pa);
+	vm_page_lock_queues();
+#ifdef	MACH_PV_PAGETABLES
+        if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+                panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
+        pmap_set_page_readwrite((void*) phystokv(pa));
+#endif	/* MACH_PV_PAGETABLES */
+	vm_page_free(m);
+	inuse_ptepages_count--;
+	vm_page_unlock_queues();
+	vm_object_unlock(pmap_object);
+}
+
+/*
+ *	Create and return a physical map.
+ *
+ *	If the size specified for the map
+ *	is zero, the map is an actual physical
+ *	map, and may be referenced by the
+ *	hardware.
+ *
+ *	If the size specified is non-zero,
+ *	the map will be used in software only, and
+ *	is bounded by that size.
+ */
+pmap_t pmap_create(vm_size_t size)
+{
+#ifdef __x86_64__
+	// needs to be reworked if we want to dynamically allocate PDPs for kernel
+	const int PDPNUM = PDPNUM_KERNEL;
+#endif
+	pt_entry_t		*page_dir[PDPNUM];
+	int			i;
+	pmap_t			p;
+	pmap_statistics_t	stats;
+
+	/*
+	 *	A software use-only map doesn't even need a map.
+	 */
+
+	if (size != 0) {
+		return(PMAP_NULL);
+	}
+
+/*
+ *	Allocate a pmap struct from the pmap_cache.  Then allocate
+ *	the page descriptor table.
+ */
+
+	p = (pmap_t) kmem_cache_alloc(&pmap_cache);
+	if (p == PMAP_NULL)
+		return PMAP_NULL;
+
+	for (i = 0; i < PDPNUM; i++) {
+		page_dir[i] = (pt_entry_t *) kmem_cache_alloc(&pd_cache);
+		if (page_dir[i] == NULL) {
+			i -= 1;
+			while (i >= 0) {
+				kmem_cache_free(&pd_cache,
+						(vm_address_t) page_dir[i]);
+				i -= 1;
+			}
+			kmem_cache_free(&pmap_cache, (vm_address_t) p);
+			return PMAP_NULL;
+		}
+		memcpy(page_dir[i],
+		       (void *) kernel_page_dir + i * INTEL_PGBYTES,
+		       INTEL_PGBYTES);
+	}
+
+#ifdef LINUX_DEV
+#if VM_MIN_KERNEL_ADDRESS != 0
+	/* Do not map BIOS in user tasks */
+	page_dir
+#if PAE
+		[lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)]
+#else
+		[0]
+#endif
+		[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)]
+		= 0;
+#endif
+#endif /* LINUX_DEV */
+
+#ifdef	MACH_PV_PAGETABLES
+	{
+		for (i = 0; i < PDPNUM; i++)
+			pmap_set_page_readonly((void *) page_dir[i]);
+	}
+#endif	/* MACH_PV_PAGETABLES */
+
+#if PAE
+	pt_entry_t *pdp_kernel = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
+	if (pdp_kernel == NULL) {
+		for (i = 0; i < PDPNUM; i++)
+			kmem_cache_free(&pd_cache, (vm_address_t) page_dir[i]);
+		kmem_cache_free(&pmap_cache, (vm_address_t) p);
+		return PMAP_NULL;
+	}
+
+	memset(pdp_kernel, 0, INTEL_PGBYTES);
+	{
+		for (i = 0; i < PDPNUM; i++) {
+			int pdp_index = i;
+#ifdef __x86_64__
+			pdp_index += lin2pdpnum(VM_MIN_KERNEL_ADDRESS);
+#endif
+			WRITE_PTE(&pdp_kernel[pdp_index],
+				  pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
+				  | INTEL_PTE_VALID
+#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
+				  | INTEL_PTE_WRITE
+#ifdef __x86_64__
+				  | INTEL_PTE_USER
+#endif /* __x86_64__ */
+#endif
+				  );
+			}
+	}
+#ifdef __x86_64__
+	p->l4base = (pt_entry_t *) kmem_cache_alloc(&l4_cache);
+	if (p->l4base == NULL)
+		panic("pmap_create");
+	memset(p->l4base, 0, INTEL_PGBYTES);
+	WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
+		  pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#ifdef	MACH_PV_PAGETABLES
+	// FIXME: use kmem_cache_alloc instead
+	if (kmem_alloc_wired(kernel_map,
+			     (vm_offset_t *)&p->user_pdpbase, INTEL_PGBYTES)
+							!= KERN_SUCCESS)
+		panic("pmap_create");
+	memset(p->user_pdpbase, 0, INTEL_PGBYTES);
+	{
+		int i;
+		for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++)
+			WRITE_PTE(&p->user_pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+	}
+	// FIXME: use kmem_cache_alloc instead
+	if (kmem_alloc_wired(kernel_map,
+			     (vm_offset_t *)&p->user_l4base, INTEL_PGBYTES)
+							!= KERN_SUCCESS)
+		panic("pmap_create");
+	memset(p->user_l4base, 0, INTEL_PGBYTES);
+	WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+#endif	/* MACH_PV_PAGETABLES */
+#else	/* _x86_64 */
+	p->pdpbase = pdp_kernel;
+#endif	/* _x86_64 */
+#ifdef	MACH_PV_PAGETABLES 
+#ifdef __x86_64__
+	pmap_set_page_readonly(p->l4base);
+	pmap_set_page_readonly(p->user_l4base);
+	pmap_set_page_readonly(p->user_pdpbase);
+#else
+	pmap_set_page_readonly(p->pdpbase);
+#endif
+#endif	/* MACH_PV_PAGETABLES */
+#else	/* PAE */
+	p->dirbase = page_dir[0];
+#endif	/* PAE */
+
+	p->ref_count = 1;
+
+	simple_lock_init(&p->lock);
+	p->cpus_using = 0;
+
+	/*
+	 *	Initialize statistics.
+	 */
+
+	stats = &p->stats;
+	stats->resident_count = 0;
+	stats->wired_count = 0;
+
+	return(p);
+}
+
+/*
+ *	Retire the given physical map from service.
+ *	Should only be called if the map contains
+ *	no valid mappings.
+ */
+
+void pmap_destroy(pmap_t p)
+{
+	int		c, s;
+
+	if (p == PMAP_NULL)
+		return;
+
+	SPLVM(s);
+	simple_lock(&p->lock);
+	c = --p->ref_count;
+	simple_unlock(&p->lock);
+	SPLX(s);
+
+	if (c != 0) {
+	    return;	/* still in use */
+	}
+
+        /*
+         * Free the page table tree.
+         */
+#if PAE
+#ifdef __x86_64__
+	for (int l4i = 0; l4i < NPTES; l4i++) {
+		pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+		if (!(pdp & INTEL_PTE_VALID))
+			continue;
+		pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+		pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+		for (int l3i = 0; l3i < NPTES; l3i++) {
+			pt_entry_t pde = (pt_entry_t) pdpbase[l3i];
+			if (!(pde & INTEL_PTE_VALID))
+				continue;
+			pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+			if (
+#ifdef __x86_64__
+			    l4i < lin2l4num(VM_MAX_USER_ADDRESS) ||
+			    (l4i == lin2l4num(VM_MAX_USER_ADDRESS) && l3i < lin2pdpnum(VM_MAX_USER_ADDRESS))
+#else /* __x86_64__ */
+			    l3i < lin2pdpnum(VM_MAX_USER_ADDRESS)
+#endif /* __x86_64__ */
+			    )
+			for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+			pt_entry_t *pdebase = p->dirbase;
+			for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
+			{
+				pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+				if (!(pte & INTEL_PTE_VALID))
+					continue;
+				kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+			}
+			kmem_cache_free(&pd_cache, (vm_offset_t)pdebase);
+#if PAE
+		}
+		kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase);
+#ifdef __x86_64__
+	}
+	kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+        /* Finally, free the pmap itself */
+	kmem_cache_free(&pmap_cache, (vm_offset_t) p);
+}
+
+/*
+ *	Add a reference to the specified pmap.
+ */
+
+void pmap_reference(pmap_t p)
+{
+	int	s;
+	if (p != PMAP_NULL) {
+		SPLVM(s);
+		simple_lock(&p->lock);
+		p->ref_count++;
+		simple_unlock(&p->lock);
+		SPLX(s);
+	}
+}
+
+/*
+ *	Remove a range of hardware page-table entries.
+ *	The entries given are the first (inclusive)
+ *	and last (exclusive) entries for the VM pages.
+ *	The virtual address is the va for the first pte.
+ *
+ *	The pmap must be locked.
+ *	If the pmap is not the kernel pmap, the range must lie
+ *	entirely within one pte-page.  This is NOT checked.
+ *	Assumes that the pte-page exists.
+ */
+
+static
+void pmap_remove_range(
+	pmap_t			pmap,
+	vm_offset_t		va,
+	pt_entry_t		*spte,
+	pt_entry_t		*epte)
+{
+	pt_entry_t		*cpte;
+	unsigned long		num_removed, num_unwired;
+	unsigned long		pai;
+	phys_addr_t		pa;
+#ifdef	MACH_PV_PAGETABLES
+	int n, ii = 0;
+	struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif	/* MACH_PV_PAGETABLES */
+
+	if (pmap == kernel_pmap && (va < kernel_virtual_start || va + (epte-spte)*PAGE_SIZE > kernel_virtual_end))
+		panic("pmap_remove_range(%lx-%lx) falls in physical memory area!\n", (unsigned long) va, (unsigned long) va + (epte-spte)*PAGE_SIZE);
+
+#if	DEBUG_PTE_PAGE
+	if (pmap != kernel_pmap)
+		ptep_check(get_pte_page(spte));
+#endif	/* DEBUG_PTE_PAGE */
+	num_removed = 0;
+	num_unwired = 0;
+
+	for (cpte = spte; cpte < epte;
+	     cpte += ptes_per_vm_page, va += PAGE_SIZE) {
+
+	    if (*cpte == 0)
+		continue;
+
+	    assert(*cpte & INTEL_PTE_VALID);
+
+	    pa = pte_to_pa(*cpte);
+
+	    num_removed++;
+	    if (*cpte & INTEL_PTE_WIRED)
+		num_unwired++;
+
+	    if (!valid_page(pa)) {
+
+		/*
+		 *	Outside range of managed physical memory.
+		 *	Just remove the mappings.
+		 */
+		int		i = ptes_per_vm_page;
+		pt_entry_t	*lpte = cpte;
+		do {
+#ifdef	MACH_PV_PAGETABLES
+		    update[ii].ptr = kv_to_ma(lpte);
+		    update[ii].val = 0;
+		    ii++;
+		    if (ii == HYP_BATCH_MMU_UPDATES) {
+			hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+			if (n != ii)
+				panic("couldn't pmap_remove_range\n");
+			ii = 0;
+		    }
+#else	/* MACH_PV_PAGETABLES */
+		    *lpte = 0;
+#endif	/* MACH_PV_PAGETABLES */
+		    lpte++;
+		} while (--i > 0);
+		continue;
+	    }
+
+	    pai = pa_index(pa);
+	    LOCK_PVH(pai);
+
+	    /*
+	     *	Get the modify and reference bits.
+	     */
+	    {
+		int		i;
+		pt_entry_t	*lpte;
+
+		i = ptes_per_vm_page;
+		lpte = cpte;
+		do {
+		    pmap_phys_attributes[pai] |=
+			*lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef	MACH_PV_PAGETABLES
+		    update[ii].ptr = kv_to_ma(lpte);
+		    update[ii].val = 0;
+		    ii++;
+		    if (ii == HYP_BATCH_MMU_UPDATES) {
+			hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+			if (n != ii)
+				panic("couldn't pmap_remove_range\n");
+			ii = 0;
+		    }
+#else	/* MACH_PV_PAGETABLES */
+		    *lpte = 0;
+#endif	/* MACH_PV_PAGETABLES */
+		    lpte++;
+		} while (--i > 0);
+	    }
+
+	    /*
+	     *	Remove the mapping from the pvlist for
+	     *	this physical page.
+	     */
+	    {
+		pv_entry_t	pv_h, prev, cur;
+
+		pv_h = pai_to_pvh(pai);
+		if (pv_h->pmap == PMAP_NULL) {
+		    panic("pmap_remove: null pv_list for pai %lx at va %lx!", pai, (unsigned long) va);
+		}
+		if (pv_h->va == va && pv_h->pmap == pmap) {
+		    /*
+		     * Header is the pv_entry.  Copy the next one
+		     * to header and free the next one (we cannot
+		     * free the header)
+		     */
+		    cur = pv_h->next;
+		    if (cur != PV_ENTRY_NULL) {
+			*pv_h = *cur;
+			PV_FREE(cur);
+		    }
+		    else {
+			pv_h->pmap = PMAP_NULL;
+		    }
+		}
+		else {
+		    cur = pv_h;
+		    do {
+			prev = cur;
+			if ((cur = prev->next) == PV_ENTRY_NULL) {
+			    panic("pmap-remove: mapping not in pv_list!");
+			}
+		    } while (cur->va != va || cur->pmap != pmap);
+		    prev->next = cur->next;
+		    PV_FREE(cur);
+		}
+		UNLOCK_PVH(pai);
+	    }
+	}
+
+#ifdef	MACH_PV_PAGETABLES
+	if (ii > HYP_BATCH_MMU_UPDATES)
+		panic("overflowed array in pmap_remove_range");
+	hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+	if (n != ii)
+		panic("couldn't pmap_remove_range\n");
+#endif	/* MACH_PV_PAGETABLES */
+
+	/*
+	 *	Update the counts
+	 */
+	pmap->stats.resident_count -= num_removed;
+	pmap->stats.wired_count -= num_unwired;
+}
+
+/*
+ *	Remove the given range of addresses
+ *	from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the hardware page size.
+ */
+
+void pmap_remove(
+	pmap_t		map,
+	vm_offset_t	s, 
+	vm_offset_t	e)
+{
+	int			spl;
+	pt_entry_t		*spte, *epte;
+	vm_offset_t		l;
+	vm_offset_t		_s = s;
+
+	if (map == PMAP_NULL)
+		return;
+
+	PMAP_READ_LOCK(map, spl);
+
+	while (s < e) {
+	    pt_entry_t *pde = pmap_pde(map, s);
+
+	    l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+	    if (l > e || l < s)
+		l = e;
+	    if (pde && (*pde & INTEL_PTE_VALID)) {
+		spte = (pt_entry_t *)ptetokv(*pde);
+		spte = &spte[ptenum(s)];
+		epte = &spte[intel_btop(l-s)];
+		pmap_remove_range(map, s, spte, epte);
+	    }
+	    s = l;
+	}
+	PMAP_UPDATE_TLBS(map, _s, e);
+
+	PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ *	Routine:	pmap_page_protect
+ *
+ *	Function:
+ *		Lower the permission for all mappings to a given
+ *		page.
+ */
+void pmap_page_protect(
+	phys_addr_t	phys,
+	vm_prot_t	prot)
+{
+	pv_entry_t		pv_h, prev;
+	pv_entry_t		pv_e;
+	pt_entry_t		*pte;
+	unsigned long		pai;
+	pmap_t			pmap;
+	int			spl;
+	boolean_t		remove;
+
+	assert(phys != vm_page_fictitious_addr);
+	if (!valid_page(phys)) {
+	    /*
+	     *	Not a managed page.
+	     */
+	    return;
+	}
+
+	/*
+	 * Determine the new protection.
+	 */
+	switch (prot) {
+	    case VM_PROT_READ:
+	    case VM_PROT_READ|VM_PROT_EXECUTE:
+		remove = FALSE;
+		break;
+	    case VM_PROT_ALL:
+		return;	/* nothing to do */
+	    default:
+		remove = TRUE;
+		break;
+	}
+
+	/*
+	 *	Lock the pmap system first, since we will be changing
+	 *	several pmaps.
+	 */
+
+	PMAP_WRITE_LOCK(spl);
+
+	pai = pa_index(phys);
+	pv_h = pai_to_pvh(pai);
+
+	/*
+	 * Walk down PV list, changing or removing all mappings.
+	 * We do not have to lock the pv_list because we have
+	 * the entire pmap system locked.
+	 */
+	if (pv_h->pmap != PMAP_NULL) {
+
+	    prev = pv_e = pv_h;
+	    do {
+		vm_offset_t va;
+
+		pmap = pv_e->pmap;
+		/*
+		 * Lock the pmap to block pmap_extract and similar routines.
+		 */
+		simple_lock(&pmap->lock);
+
+		va = pv_e->va;
+		pte = pmap_pte(pmap, va);
+
+		/*
+		 * Consistency checks.
+		 */
+		assert(*pte & INTEL_PTE_VALID);
+		assert(pte_to_pa(*pte) == phys);
+
+		/*
+		 * Remove the mapping if new protection is NONE
+		 * or if write-protecting a kernel mapping.
+		 */
+		if (remove || pmap == kernel_pmap) {
+		    /*
+		     * Remove the mapping, collecting any modify bits.
+		     */
+
+		    if (*pte & INTEL_PTE_WIRED) {
+			pmap->stats.wired_count--;
+		    }
+
+		    {
+			int	i = ptes_per_vm_page;
+
+			do {
+			    pmap_phys_attributes[pai] |=
+				*pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef	MACH_PV_PAGETABLES
+			    if (!hyp_mmu_update_pte(kv_to_ma(pte++), 0))
+			    	panic("%s:%d could not clear pte %p\n",__FILE__,__LINE__,pte-1);
+#else	/* MACH_PV_PAGETABLES */
+			    *pte++ = 0;
+#endif	/* MACH_PV_PAGETABLES */
+			} while (--i > 0);
+		    }
+
+		    pmap->stats.resident_count--;
+
+		    /*
+		     * Remove the pv_entry.
+		     */
+		    if (pv_e == pv_h) {
+			/*
+			 * Fix up head later.
+			 */
+			pv_h->pmap = PMAP_NULL;
+		    }
+		    else {
+			/*
+			 * Delete this entry.
+			 */
+			prev->next = pv_e->next;
+			PV_FREE(pv_e);
+		    }
+		}
+		else {
+		    /*
+		     * Write-protect.
+		     */
+		    int i = ptes_per_vm_page;
+
+		    do {
+#ifdef	MACH_PV_PAGETABLES
+			if (!hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WRITE))
+			    	panic("%s:%d could not disable write on pte %p\n",__FILE__,__LINE__,pte);
+#else	/* MACH_PV_PAGETABLES */
+			*pte &= ~INTEL_PTE_WRITE;
+#endif	/* MACH_PV_PAGETABLES */
+			pte++;
+		    } while (--i > 0);
+
+		    /*
+		     * Advance prev.
+		     */
+		    prev = pv_e;
+		}
+		PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+
+		simple_unlock(&pmap->lock);
+
+	    } while ((pv_e = prev->next) != PV_ENTRY_NULL);
+
+	    /*
+	     * If pv_head mapping was removed, fix it up.
+	     */
+	    if (pv_h->pmap == PMAP_NULL) {
+		pv_e = pv_h->next;
+		if (pv_e != PV_ENTRY_NULL) {
+		    *pv_h = *pv_e;
+		    PV_FREE(pv_e);
+		}
+	    }
+	}
+
+	PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ *	Set the physical protection on the
+ *	specified range of this map as requested.
+ *	Will not increase permissions.
+ */
+void pmap_protect(
+	pmap_t		map,
+	vm_offset_t	s, 
+	vm_offset_t	e,
+	vm_prot_t	prot)
+{
+	pt_entry_t	*spte, *epte;
+	vm_offset_t	l;
+	int		spl;
+	vm_offset_t	_s = s;
+
+	if (map == PMAP_NULL)
+		return;
+
+	/*
+	 * Determine the new protection.
+	 */
+	switch (prot) {
+	    case VM_PROT_READ:
+	    case VM_PROT_READ|VM_PROT_EXECUTE:
+		break;
+	    case VM_PROT_READ|VM_PROT_WRITE:
+	    case VM_PROT_ALL:
+		return;	/* nothing to do */
+	    default:
+		pmap_remove(map, s, e);
+		return;
+	}
+
+#if !(__i486__ || __i586__ || __i686__)
+	/*
+	 * If write-protecting in the kernel pmap,
+	 * remove the mappings; the i386 ignores
+	 * the write-permission bit in kernel mode.
+	 */
+	if (map == kernel_pmap) {
+	    pmap_remove(map, s, e);
+	    return;
+	}
+#endif
+
+	SPLVM(spl);
+	simple_lock(&map->lock);
+
+	while (s < e) {
+	    pt_entry_t *pde = pde = pmap_pde(map, s);
+
+	    l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
+	    if (l > e || l < s)
+		l = e;
+	    if (pde && (*pde & INTEL_PTE_VALID)) {
+		spte = (pt_entry_t *)ptetokv(*pde);
+		spte = &spte[ptenum(s)];
+		epte = &spte[intel_btop(l-s)];
+
+#ifdef	MACH_PV_PAGETABLES
+		int n, i = 0;
+		struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif	/* MACH_PV_PAGETABLES */
+
+		while (spte < epte) {
+		    if (*spte & INTEL_PTE_VALID) {
+#ifdef	MACH_PV_PAGETABLES
+			update[i].ptr = kv_to_ma(spte);
+			update[i].val = *spte & ~INTEL_PTE_WRITE;
+			i++;
+			if (i == HYP_BATCH_MMU_UPDATES) {
+			    hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+			    if (n != i)
+				    panic("couldn't pmap_protect\n");
+			    i = 0;
+			}
+#else	/* MACH_PV_PAGETABLES */
+			*spte &= ~INTEL_PTE_WRITE;
+#endif	/* MACH_PV_PAGETABLES */
+		    }
+		    spte++;
+		}
+#ifdef	MACH_PV_PAGETABLES
+		if (i > HYP_BATCH_MMU_UPDATES)
+			panic("overflowed array in pmap_protect");
+		hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+		if (n != i)
+			panic("couldn't pmap_protect\n");
+#endif	/* MACH_PV_PAGETABLES */
+	    }
+	    s = l;
+	}
+	PMAP_UPDATE_TLBS(map, _s, e);
+
+	simple_unlock(&map->lock);
+	SPLX(spl);
+}
+
+typedef	pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, vm_offset_t addr);
+/*
+* Expand one single level of the page table tree
+*/
+static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int spl,
+                                            pmap_level_getter_t pmap_level,
+                                            pmap_level_getter_t pmap_level_upper,
+                                            int n_per_vm_page,
+                                            struct kmem_cache *cache)
+{
+	pt_entry_t		*pte;
+
+	/*
+	 *	Expand pmap to include this pte.  Assume that
+	 *	pmap is always expanded to include enough hardware
+	 *	pages to map one VM page.
+	 */
+	while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) {
+	    /*
+	     * Need to allocate a new page-table page.
+	     */
+	    vm_offset_t	ptp;
+	    pt_entry_t	*pdp;
+	    int		i;
+
+	    if (pmap == kernel_pmap) {
+		/*
+		 * Would have to enter the new page-table page in
+		 * EVERY pmap.
+		 */
+		panic("pmap_expand kernel pmap to %#zx", v);
+	    }
+
+	    /*
+	     * Unlock the pmap and allocate a new page-table page.
+	     */
+	    PMAP_READ_UNLOCK(pmap, spl);
+
+	    while (!(ptp = kmem_cache_alloc(cache)))
+		VM_PAGE_WAIT((void (*)()) 0);
+	    memset((void *)ptp, 0, PAGE_SIZE);
+
+	    /*
+	     * Re-lock the pmap and check that another thread has
+	     * not already allocated the page-table page.  If it
+	     * has, discard the new page-table page (and try
+	     * again to make sure).
+	     */
+	    PMAP_READ_LOCK(pmap, spl);
+
+	    if (pmap_level(pmap, v) != PT_ENTRY_NULL) {
+		/*
+		 * Oops...
+		 */
+		PMAP_READ_UNLOCK(pmap, spl);
+		kmem_cache_free(cache, ptp);
+		PMAP_READ_LOCK(pmap, spl);
+		continue;
+	    }
+
+	    /*
+	     * Enter the new page table page in the page directory.
+	     */
+	    i = n_per_vm_page;
+	    pdp = pmap_level_upper(pmap, v);
+	    do {
+#ifdef	MACH_PV_PAGETABLES
+		pmap_set_page_readonly((void *) ptp);
+		if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn(ptp)))
+			panic("couldn't pin page %lx(%lx)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+		if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)),
+			pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID
+					      | INTEL_PTE_USER
+					      | INTEL_PTE_WRITE))
+			panic("%s:%d could not set pde %p(%llx,%lx) to %lx(%llx,%lx) %lx\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), (vm_offset_t) pa_to_ma(kvtophys(ptp)), (vm_offset_t) pa_to_pte(kv_to_ma(ptp)));
+#else	/* MACH_PV_PAGETABLES */
+		*pdp = pa_to_pte(kvtophys(ptp)) | INTEL_PTE_VALID
+					        | INTEL_PTE_USER
+					        | INTEL_PTE_WRITE;
+#endif	/* MACH_PV_PAGETABLES */
+		pdp++;	/* Note: This is safe b/c we stay in one page.  */
+		ptp += INTEL_PGBYTES;
+	    } while (--i > 0);
+
+	    /*
+	     * Now, get the address of the page-table entry.
+	     */
+	    continue;
+	}
+        return pte;
+}
+
+/*
+ * Expand, if required, the PMAP to include the virtual address V.
+ * PMAP needs to be locked, and it will be still locked on return. It
+ * can temporarily unlock the PMAP, during allocation or deallocation
+ * of physical pages.
+ */
+static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl)
+{
+#ifdef PAE
+#ifdef __x86_64__
+	pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, 1, &pdpt_cache);
+#endif /* __x86_64__ */
+	pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, 1, &pd_cache);
+#endif /* PAE */
+	return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, ptes_per_vm_page, &pt_cache);
+}
+
+/*
+ *	Insert the given physical page (p) at
+ *	the specified virtual address (v) in the
+ *	target physical map with the protection requested.
+ *
+ *	If specified, the page will be wired down, meaning
+ *	that the related pte can not be reclaimed.
+ *
+ *	NB:  This is the only routine which MAY NOT lazy-evaluate
+ *	or lose information.  That is, this routine must actually
+ *	insert this page into the given map NOW.
+ */
+void pmap_enter(
+	pmap_t			pmap,
+	vm_offset_t		v,
+	phys_addr_t		pa,
+	vm_prot_t		prot,
+	boolean_t		wired)
+{
+	boolean_t		is_physmem;
+	pt_entry_t		*pte;
+	pv_entry_t		pv_h;
+	unsigned long		i, pai;
+	pv_entry_t		pv_e;
+	pt_entry_t		template;
+	int			spl;
+	phys_addr_t		old_pa;
+
+	assert(pa != vm_page_fictitious_addr);
+	if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
+	if (pmap == PMAP_NULL)
+		return;
+
+	if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+		panic("pmap_enter(%lx, %llx) falls in physical memory area!\n", (unsigned long) v, (unsigned long long) pa);
+#if !(__i486__ || __i586__ || __i686__)
+	if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
+	    && !wired /* hack for io_wire */ ) {
+	    /*
+	     *	Because the 386 ignores write protection in kernel mode,
+	     *	we cannot enter a read-only kernel mapping, and must
+	     *	remove an existing mapping if changing it.
+	     */
+	    PMAP_READ_LOCK(pmap, spl);
+
+	    pte = pmap_pte(pmap, v);
+	    if (pte != PT_ENTRY_NULL && *pte != 0) {
+		/*
+		 *	Invalidate the translation buffer,
+		 *	then remove the mapping.
+		 */
+		pmap_remove_range(pmap, v, pte,
+				  pte + ptes_per_vm_page);
+		PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+	    }
+	    PMAP_READ_UNLOCK(pmap, spl);
+	    return;
+	}
+#endif
+
+	/*
+	 *	Must allocate a new pvlist entry while we're unlocked;
+	 *	Allocating may cause pageout (which will lock the pmap system).
+	 *	If we determine we need a pvlist entry, we will unlock
+	 *	and allocate one.  Then we will retry, throughing away
+	 *	the allocated entry later (if we no longer need it).
+	 */
+	pv_e = PV_ENTRY_NULL;
+Retry:
+	PMAP_READ_LOCK(pmap, spl);
+
+	pte = pmap_expand(pmap, v, spl);
+
+	if (vm_page_ready())
+		is_physmem = (vm_page_lookup_pa(pa) != NULL);
+	else
+		is_physmem = (pa < biosmem_directmap_end());
+
+	/*
+	 *	Special case if the physical page is already mapped
+	 *	at this address.
+	 */
+	old_pa = pte_to_pa(*pte);
+	if (*pte && old_pa == pa) {
+	    /*
+	     *	May be changing its wired attribute or protection
+	     */
+
+	    if (wired && !(*pte & INTEL_PTE_WIRED))
+		pmap->stats.wired_count++;
+	    else if (!wired && (*pte & INTEL_PTE_WIRED))
+		pmap->stats.wired_count--;
+
+	    template = pa_to_pte(pa) | INTEL_PTE_VALID;
+	    if (pmap != kernel_pmap)
+		template |= INTEL_PTE_USER;
+	    if (prot & VM_PROT_WRITE)
+		template |= INTEL_PTE_WRITE;
+	    if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
+		&& !is_physmem)
+		template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
+	    if (wired)
+		template |= INTEL_PTE_WIRED;
+	    i = ptes_per_vm_page;
+	    do {
+		if (*pte & INTEL_PTE_MOD)
+		    template |= INTEL_PTE_MOD;
+#ifdef	MACH_PV_PAGETABLES
+		if (!hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template)))
+			panic("%s:%d could not set pte %p to %llx\n",__FILE__,__LINE__,pte,template);
+#else	/* MACH_PV_PAGETABLES */
+		WRITE_PTE(pte, template)
+#endif	/* MACH_PV_PAGETABLES */
+		pte++;
+		pte_increment_pa(template);
+	    } while (--i > 0);
+	    PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+	}
+	else {
+
+	    /*
+	     *	Remove old mapping from the PV list if necessary.
+	     */
+	    if (*pte) {
+		/*
+		 *	Don't free the pte page if removing last
+		 *	mapping - we will immediately replace it.
+		 */
+		pmap_remove_range(pmap, v, pte,
+				  pte + ptes_per_vm_page);
+		PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+	    }
+
+	    if (valid_page(pa)) {
+
+		/*
+		 *	Enter the mapping in the PV list for this
+		 *	physical page.
+		 */
+
+		pai = pa_index(pa);
+		LOCK_PVH(pai);
+		pv_h = pai_to_pvh(pai);
+
+		if (pv_h->pmap == PMAP_NULL) {
+		    /*
+		     *	No mappings yet
+		     */
+		    pv_h->va = v;
+		    pv_h->pmap = pmap;
+		    pv_h->next = PV_ENTRY_NULL;
+		}
+		else {
+#if	DEBUG
+		    {
+			/* check that this mapping is not already there */
+			pv_entry_t	e = pv_h;
+			while (e != PV_ENTRY_NULL) {
+			    if (e->pmap == pmap && e->va == v)
+				panic("pmap_enter: already in pv_list");
+			    e = e->next;
+			}
+		    }
+#endif	/* DEBUG */
+
+		    /*
+		     *	Add new pv_entry after header.
+		     */
+		    if (pv_e == PV_ENTRY_NULL) {
+			PV_ALLOC(pv_e);
+			if (pv_e == PV_ENTRY_NULL) {
+			    UNLOCK_PVH(pai);
+			    PMAP_READ_UNLOCK(pmap, spl);
+
+			    /*
+			     * Refill from cache.
+			     */
+			    pv_e = (pv_entry_t) kmem_cache_alloc(&pv_list_cache);
+			    goto Retry;
+			}
+		    }
+		    pv_e->va = v;
+		    pv_e->pmap = pmap;
+		    pv_e->next = pv_h->next;
+		    pv_h->next = pv_e;
+		    /*
+		     *	Remember that we used the pvlist entry.
+		     */
+		    pv_e = PV_ENTRY_NULL;
+		}
+		UNLOCK_PVH(pai);
+	    }
+
+	    /*
+	     *	And count the mapping.
+	     */
+
+	    pmap->stats.resident_count++;
+	    if (wired)
+		pmap->stats.wired_count++;
+
+	    /*
+	     *	Build a template to speed up entering -
+	     *	only the pfn changes.
+	     */
+	    template = pa_to_pte(pa) | INTEL_PTE_VALID;
+	    if (pmap != kernel_pmap)
+		template |= INTEL_PTE_USER;
+	    if (prot & VM_PROT_WRITE)
+		template |= INTEL_PTE_WRITE;
+	    if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
+		&& !is_physmem)
+		template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
+	    if (wired)
+		template |= INTEL_PTE_WIRED;
+	    i = ptes_per_vm_page;
+	    do {
+#ifdef	MACH_PV_PAGETABLES
+		if (!(hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template))))
+			panic("%s:%d could not set pte %p to %llx\n",__FILE__,__LINE__,pte,template);
+#else	/* MACH_PV_PAGETABLES */
+		WRITE_PTE(pte, template)
+#endif	/* MACH_PV_PAGETABLES */
+		pte++;
+		pte_increment_pa(template);
+	    } while (--i > 0);
+	}
+
+	if (pv_e != PV_ENTRY_NULL) {
+	    PV_FREE(pv_e);
+	}
+
+	PMAP_READ_UNLOCK(pmap, spl);
+}
+
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
+void pmap_change_wiring(
+	pmap_t		map,
+	vm_offset_t	v,
+	boolean_t	wired)
+{
+	pt_entry_t	*pte;
+	int		i;
+	int		spl;
+
+	/*
+	 *	We must grab the pmap system lock because we may
+	 *	change a pte_page queue.
+	 */
+	PMAP_READ_LOCK(map, spl);
+
+	if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
+		panic("pmap_change_wiring: pte missing");
+
+	if (wired && !(*pte & INTEL_PTE_WIRED)) {
+	    /*
+	     *	wiring down mapping
+	     */
+	    map->stats.wired_count++;
+	    i = ptes_per_vm_page;
+	    do {
+		*pte++ |= INTEL_PTE_WIRED;
+	    } while (--i > 0);
+	}
+	else if (!wired && (*pte & INTEL_PTE_WIRED)) {
+	    /*
+	     *	unwiring mapping
+	     */
+	    map->stats.wired_count--;
+	    i = ptes_per_vm_page;
+	    do {
+#ifdef	MACH_PV_PAGETABLES
+		if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WIRED)))
+			panic("%s:%d could not wire down pte %p\n",__FILE__,__LINE__,pte);
+#else	/* MACH_PV_PAGETABLES */
+		*pte &= ~INTEL_PTE_WIRED;
+#endif	/* MACH_PV_PAGETABLES */
+		pte++;
+	    } while (--i > 0);
+	}
+
+	PMAP_READ_UNLOCK(map, spl);
+}
+
+/*
+ *	Routine:	pmap_extract
+ *	Function:
+ *		Extract the physical page address associated
+ *		with the given map/virtual_address pair.
+ */
+
+phys_addr_t pmap_extract(
+	pmap_t		pmap,
+	vm_offset_t	va)
+{
+	pt_entry_t	*pte;
+	phys_addr_t	pa;
+	int		spl;
+
+	SPLVM(spl);
+	simple_lock(&pmap->lock);
+	if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
+	    pa = 0;
+	else if (!(*pte & INTEL_PTE_VALID))
+	    pa = 0;
+	else
+	    pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
+	simple_unlock(&pmap->lock);
+	SPLX(spl);
+	return(pa);
+}
+
+/*
+ *	Copy the range specified by src_addr/len
+ *	from the source map to the range dst_addr/len
+ *	in the destination map.
+ *
+ *	This routine is only advisory and need not do anything.
+ */
+#if	0
+void pmap_copy(
+	pmap_t		dst_pmap,
+	pmap_t		src_pmap,
+	vm_offset_t	dst_addr,
+	vm_size_t	len,
+	vm_offset_t	src_addr)
+{
+}
+#endif	/* 0 */
+
+/*
+ *	Routine:	pmap_collect
+ *	Function:
+ *		Garbage collects the physical map system for
+ *		pages which are no longer used.
+ *		Success need not be guaranteed -- that is, there
+ *		may well be pages which are not referenced, but
+ *		others may be collected.
+ *	Usage:
+ *		Called by the pageout daemon when pages are scarce.
+ */
+void pmap_collect(pmap_t p)
+{
+	pt_entry_t	        *ptp;
+	pt_entry_t		*eptp;
+	phys_addr_t		pa;
+	int			spl, wired;
+
+	if (p == PMAP_NULL)
+		return;
+
+	if (p == kernel_pmap)
+		return;
+
+	/*
+	 * Free the page table tree.
+	 */
+	PMAP_READ_LOCK(p, spl);
+#if PAE
+#ifdef __x86_64__
+	for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
+		pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+		if (!(pdp & INTEL_PTE_VALID))
+			continue;
+		pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+		for (int l3i = 0; l3i < NPTES; l3i++)
+#else /* __x86_64__ */
+		pt_entry_t *pdpbase = p->pdpbase;
+		for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++)
+#endif /* __x86_64__ */
+		{
+			pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+			if (!(pde & INTEL_PTE_VALID))
+				continue;
+			pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+			for (int l2i = 0; l2i < NPTES; l2i++)
+#else /* PAE */
+			pt_entry_t *pdebase = p->dirbase;
+			for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++)
+#endif /* PAE */
+			{
+				pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+				if (!(pte & INTEL_PTE_VALID))
+					continue;
+
+				pa = pte_to_pa(pte);
+				ptp = (pt_entry_t *)phystokv(pa);
+				eptp = ptp + NPTES*ptes_per_vm_page;
+
+				/*
+				 * If the pte page has any wired mappings, we cannot
+				 * free it.
+				 */
+				wired = 0;
+				{
+				    pt_entry_t *ptep;
+				    for (ptep = ptp; ptep < eptp; ptep++) {
+					if (*ptep & INTEL_PTE_WIRED) {
+					    wired = 1;
+					    break;
+					}
+				    }
+				}
+				if (!wired) {
+				    /*
+				     * Remove the virtual addresses mapped by this pte page.
+				     */
+				    { /*XXX big hack*/
+					vm_offset_t va = pagenum2lin(l4i, l3i, l2i, 0);
+					if (p == kernel_pmap)
+					    va = lintokv(va);
+					pmap_remove_range(p, va, ptp, eptp);
+				    }
+
+				    /*
+				     * Invalidate the page directory pointer.
+				     */
+				    {
+					int i = ptes_per_vm_page;
+					pt_entry_t *pdep = &pdebase[l2i];
+					do {
+#ifdef	MACH_PV_PAGETABLES
+					    unsigned long pte = *pdep;
+					    void *ptable = (void*) ptetokv(pte);
+					    if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+						panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+					    if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+						panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
+					    pmap_set_page_readwrite(ptable);
+#else	/* MACH_PV_PAGETABLES */
+					    *pdep++ = 0;
+#endif	/* MACH_PV_PAGETABLES */
+					} while (--i > 0);
+				    }
+
+				    PMAP_READ_UNLOCK(p, spl);
+
+				    /*
+				     * And free the pte page itself.
+				     */
+				    kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+
+				    PMAP_READ_LOCK(p, spl);
+
+				}
+			}
+#if PAE
+			// TODO check l2
+		}
+#ifdef __x86_64__
+			// TODO check l3
+	}
+#endif /* __x86_64__ */
+#endif /* PAE */
+
+	PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
+
+	PMAP_READ_UNLOCK(p, spl);
+	return;
+
+}
+
+#if	MACH_KDB
+/*
+ *	Routine:	pmap_whatis
+ *	Function:
+ *		Check whether this address is within a pmap
+ *	Usage:
+ *		Called from debugger
+ */
+int pmap_whatis(pmap_t p, vm_offset_t a)
+{
+	pt_entry_t	        *ptp;
+	phys_addr_t		pa;
+	int			spl;
+	int			ret = 0;
+
+	if (p == PMAP_NULL)
+		return 0;
+
+	PMAP_READ_LOCK(p, spl);
+#if PAE
+#ifdef __x86_64__
+	if (a >= (vm_offset_t) p->l4base && a < (vm_offset_t) (&p->l4base[NPTES])) {
+		db_printf("L4 for pmap %p\n", p);
+		ret = 1;
+	}
+	for (int l4i = 0; l4i < NPTES; l4i++) {
+		pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+		if (!(pdp & INTEL_PTE_VALID))
+			continue;
+		pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+#else /* __x86_64__ */
+		int l4i = 0;
+		pt_entry_t *pdpbase = p->pdpbase;
+#endif /* __x86_64__ */
+		if (a >= (vm_offset_t) pdpbase && a < (vm_offset_t) (&pdpbase[NPTES])) {
+			db_printf("PDP %d for pmap %p\n", l4i, p);
+			ret = 1;
+		}
+		for (int l3i = 0; l3i < NPTES; l3i++)
+		{
+			pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+			if (!(pde & INTEL_PTE_VALID))
+				continue;
+			pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+#else /* PAE */
+			int l4i = 0, l3i = 0;
+			pt_entry_t *pdebase = p->dirbase;
+#endif /* PAE */
+			if (a >= (vm_offset_t) pdebase && a < (vm_offset_t) (&pdebase[NPTES])) {
+				db_printf("PDE %d %d for pmap %p\n", l4i, l3i, p);
+				ret = 1;
+			}
+			for (int l2i = 0; l2i < NPTES; l2i++)
+			{
+				pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+				if (!(pte & INTEL_PTE_VALID))
+					continue;
+
+				pa = pte_to_pa(pte);
+				ptp = (pt_entry_t *)phystokv(pa);
+
+				if (a >= (vm_offset_t) ptp && a < (vm_offset_t) (&ptp[NPTES*ptes_per_vm_page])) {
+					db_printf("PTP %d %d %d for pmap %p\n", l4i, l3i, l2i, p);
+					ret = 1;
+				}
+			}
+#if PAE
+		}
+#ifdef __x86_64__
+	}
+#endif /* __x86_64__ */
+#endif /* PAE */
+	PMAP_READ_UNLOCK(p, spl);
+
+	if (p == kernel_pmap) {
+		phys_addr_t pa;
+		if (DB_VALID_KERN_ADDR(a))
+			pa = kvtophys(a);
+		else
+			pa = pmap_extract(current_task()->map->pmap, a);
+
+		if (valid_page(pa)) {
+			unsigned long pai;
+			pv_entry_t pv_h;
+
+			pai = pa_index(pa);
+			for (pv_h = pai_to_pvh(pai);
+				pv_h && pv_h->pmap;
+				pv_h = pv_h->next)
+				db_printf("pmap %p at %llx\n", pv_h->pmap, pv_h->va);
+		}
+	}
+
+	return ret;
+}
+#endif /* MACH_KDB */
+
+/*
+ *	Routine:	pmap_activate
+ *	Function:
+ *		Binds the given physical map to the given
+ *		processor, and returns a hardware map description.
+ */
+#if	0
+void pmap_activate(pmap_t my_pmap, thread_t th, int my_cpu)
+{
+	PMAP_ACTIVATE(my_pmap, th, my_cpu);
+}
+#endif	/* 0 */
+
+/*
+ *	Routine:	pmap_deactivate
+ *	Function:
+ *		Indicates that the given physical map is no longer
+ *		in use on the specified processor.  (This is a macro
+ *		in pmap.h)
+ */
+#if	0
+void pmap_deactivate(pmap_t pmap, thread_t th, int which_cpu)
+{
+	PMAP_DEACTIVATE(pmap, th, which_cpu);
+}
+#endif	/* 0 */
+
+/*
+ *	Routine:	pmap_kernel
+ *	Function:
+ *		Returns the physical map handle for the kernel.
+ */
+#if	0
+pmap_t pmap_kernel()
+{
+    	return (kernel_pmap);
+}
+#endif	/* 0 */
+
+/*
+ *	pmap_zero_page zeros the specified (machine independent) page.
+ *	See machine/phys.c or machine/phys.s for implementation.
+ */
+#if	0
+pmap_zero_page(vm_offset_t phys)
+{
+	int	i;
+
+	assert(phys != vm_page_fictitious_addr);
+	i = PAGE_SIZE / INTEL_PGBYTES;
+	phys = intel_pfn(phys);
+
+	while (i--)
+		zero_phys(phys++);
+}
+#endif	/* 0 */
+
+/*
+ *	pmap_copy_page copies the specified (machine independent) page.
+ *	See machine/phys.c or machine/phys.s for implementation.
+ */
+#if	0
+pmap_copy_page(vm_offset_t src, vm_offset_t dst)
+{
+	int	i;
+
+	assert(src != vm_page_fictitious_addr);
+	assert(dst != vm_page_fictitious_addr);
+	i = PAGE_SIZE / INTEL_PGBYTES;
+
+	while (i--) {
+		copy_phys(intel_pfn(src), intel_pfn(dst));
+		src += INTEL_PGBYTES;
+		dst += INTEL_PGBYTES;
+	}
+}
+#endif	/* 0 */
+
+/*
+ *	Routine:	pmap_pageable
+ *	Function:
+ *		Make the specified pages (by pmap, offset)
+ *		pageable (or not) as requested.
+ *
+ *		A page which is not pageable may not take
+ *		a fault; therefore, its page table entry
+ *		must remain valid for the duration.
+ *
+ *		This routine is merely advisory; pmap_enter
+ *		will specify that these pages are to be wired
+ *		down (or not) as appropriate.
+ */
+void
+pmap_pageable(
+	pmap_t		pmap,
+	vm_offset_t	start,
+	vm_offset_t	end,
+	boolean_t	pageable)
+{
+}
+
+/*
+ *	Clear specified attribute bits.
+ */
+static void
+phys_attribute_clear(
+	phys_addr_t	phys,
+	int		bits)
+{
+	pv_entry_t		pv_h;
+	pv_entry_t		pv_e;
+	pt_entry_t		*pte;
+	unsigned long		pai;
+	pmap_t			pmap;
+	int			spl;
+
+	assert(phys != vm_page_fictitious_addr);
+	if (!valid_page(phys)) {
+	    /*
+	     *	Not a managed page.
+	     */
+	    return;
+	}
+
+	/*
+	 *	Lock the pmap system first, since we will be changing
+	 *	several pmaps.
+	 */
+
+	PMAP_WRITE_LOCK(spl);
+
+	pai = pa_index(phys);
+	pv_h = pai_to_pvh(pai);
+
+	/*
+	 * Walk down PV list, clearing all modify or reference bits.
+	 * We do not have to lock the pv_list because we have
+	 * the entire pmap system locked.
+	 */
+	if (pv_h->pmap != PMAP_NULL) {
+	    /*
+	     * There are some mappings.
+	     */
+	    for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+		vm_offset_t va;
+
+		pmap = pv_e->pmap;
+		/*
+		 * Lock the pmap to block pmap_extract and similar routines.
+		 */
+		simple_lock(&pmap->lock);
+
+		va = pv_e->va;
+		pte = pmap_pte(pmap, va);
+
+		/*
+		 * Consistency checks.
+		 */
+		assert(*pte & INTEL_PTE_VALID);
+		assert(pte_to_pa(*pte) == phys);
+
+		/*
+		 * Clear modify or reference bits.
+		 */
+		{
+		    int	i = ptes_per_vm_page;
+		    do {
+#ifdef	MACH_PV_PAGETABLES
+			if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~bits)))
+			    panic("%s:%d could not clear bits %x from pte %p\n",__FILE__,__LINE__,bits,pte);
+#else	/* MACH_PV_PAGETABLES */
+			*pte &= ~bits;
+#endif	/* MACH_PV_PAGETABLES */
+		    } while (--i > 0);
+		}
+		PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+		simple_unlock(&pmap->lock);
+	    }
+	}
+
+	pmap_phys_attributes[pai] &= ~bits;
+
+	PMAP_WRITE_UNLOCK(spl);
+}
+
+/*
+ *	Check specified attribute bits.
+ */
+static boolean_t
+phys_attribute_test(
+	phys_addr_t	phys,
+	int		bits)
+{
+	pv_entry_t		pv_h;
+	pv_entry_t		pv_e;
+	pt_entry_t		*pte;
+	unsigned long		pai;
+	pmap_t			pmap;
+	int			spl;
+
+	assert(phys != vm_page_fictitious_addr);
+	if (!valid_page(phys)) {
+	    /*
+	     *	Not a managed page.
+	     */
+	    return (FALSE);
+	}
+
+	/*
+	 *	Lock the pmap system first, since we will be checking
+	 *	several pmaps.
+	 */
+
+	PMAP_WRITE_LOCK(spl);
+
+	pai = pa_index(phys);
+	pv_h = pai_to_pvh(pai);
+
+	if (pmap_phys_attributes[pai] & bits) {
+	    PMAP_WRITE_UNLOCK(spl);
+	    return (TRUE);
+	}
+
+	/*
+	 * Walk down PV list, checking all mappings.
+	 * We do not have to lock the pv_list because we have
+	 * the entire pmap system locked.
+	 */
+	if (pv_h->pmap != PMAP_NULL) {
+	    /*
+	     * There are some mappings.
+	     */
+	    for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+
+		pmap = pv_e->pmap;
+		/*
+		 * Lock the pmap to block pmap_extract and similar routines.
+		 */
+		simple_lock(&pmap->lock);
+
+		{
+		    vm_offset_t va;
+
+		    va = pv_e->va;
+		    pte = pmap_pte(pmap, va);
+
+		    /*
+		     * Consistency checks.
+		     */
+		    assert(*pte & INTEL_PTE_VALID);
+		    assert(pte_to_pa(*pte) == phys);
+		}
+
+		/*
+		 * Check modify or reference bits.
+		 */
+		{
+		    int	i = ptes_per_vm_page;
+
+		    do {
+			if (*pte & bits) {
+			    simple_unlock(&pmap->lock);
+			    PMAP_WRITE_UNLOCK(spl);
+			    return (TRUE);
+			}
+		    } while (--i > 0);
+		}
+		simple_unlock(&pmap->lock);
+	    }
+	}
+	PMAP_WRITE_UNLOCK(spl);
+	return (FALSE);
+}
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+
+void pmap_clear_modify(phys_addr_t phys)
+{
+	phys_attribute_clear(phys, PHYS_MODIFIED);
+}
+
+/*
+ *	pmap_is_modified:
+ *
+ *	Return whether or not the specified physical page is modified
+ *	by any physical maps.
+ */
+
+boolean_t pmap_is_modified(phys_addr_t phys)
+{
+	return (phys_attribute_test(phys, PHYS_MODIFIED));
+}
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+
+void pmap_clear_reference(phys_addr_t phys)
+{
+	phys_attribute_clear(phys, PHYS_REFERENCED);
+}
+
+/*
+ *	pmap_is_referenced:
+ *
+ *	Return whether or not the specified physical page is referenced
+ *	by any physical maps.
+ */
+
+boolean_t pmap_is_referenced(phys_addr_t phys)
+{
+	return (phys_attribute_test(phys, PHYS_REFERENCED));
+}
+
+#if	NCPUS > 1
+/*
+*	    TLB Coherence Code (TLB "shootdown" code)
+*
+* Threads that belong to the same task share the same address space and
+* hence share a pmap.  However, they  may run on distinct cpus and thus
+* have distinct TLBs that cache page table entries. In order to guarantee
+* the TLBs are consistent, whenever a pmap is changed, all threads that
+* are active in that pmap must have their TLB updated. To keep track of
+* this information, the set of cpus that are currently using a pmap is
+* maintained within each pmap structure (cpus_using). Pmap_activate() and
+* pmap_deactivate add and remove, respectively, a cpu from this set.
+* Since the TLBs are not addressable over the bus, each processor must
+* flush its own TLB; a processor that needs to invalidate another TLB
+* needs to interrupt the processor that owns that TLB to signal the
+* update.
+*
+* Whenever a pmap is updated, the lock on that pmap is locked, and all
+* cpus using the pmap are signaled to invalidate. All threads that need
+* to activate a pmap must wait for the lock to clear to await any updates
+* in progress before using the pmap. They must ACQUIRE the lock to add
+* their cpu to the cpus_using set. An implicit assumption made
+* throughout the TLB code is that all kernel code that runs at or higher
+* than splvm blocks out update interrupts, and that such code does not
+* touch pageable pages.
+*
+* A shootdown interrupt serves another function besides signaling a
+* processor to invalidate. The interrupt routine (pmap_update_interrupt)
+* waits for the both the pmap lock (and the kernel pmap lock) to clear,
+* preventing user code from making implicit pmap updates while the
+* sending processor is performing its update. (This could happen via a
+* user data write reference that turns on the modify bit in the page
+* table). It must wait for any kernel updates that may have started
+* concurrently with a user pmap update because the IPC code
+* changes mappings.
+* Spinning on the VALUES of the locks is sufficient (rather than
+* having to acquire the locks) because any updates that occur subsequent
+* to finding the lock unlocked will be signaled via another interrupt.
+* (This assumes the interrupt is cleared before the low level interrupt code
+* calls pmap_update_interrupt()).
+*
+* The signaling processor must wait for any implicit updates in progress
+* to terminate before continuing with its update. Thus it must wait for an
+* acknowledgement of the interrupt from each processor for which such
+* references could be made. For maintaining this information, a set
+* cpus_active is used. A cpu is in this set if and only if it can
+* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
+* this set; when all such cpus are removed, it is safe to update.
+*
+* Before attempting to acquire the update lock on a pmap, a cpu (A) must
+* be at least at the priority of the interprocessor interrupt
+* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
+* kernel update; it would spin forever in pmap_update_interrupt() trying
+* to acquire the user pmap lock it had already acquired. Furthermore A
+* must remove itself from cpus_active.  Otherwise, another cpu holding
+* the lock (B) could be in the process of sending an update signal to A,
+* and thus be waiting for A to remove itself from cpus_active. If A is
+* spinning on the lock at priority this will never happen and a deadlock
+* will result.
+*/
+
+/*
+ *	Signal another CPU that it must flush its TLB
+ */
+void    signal_cpus(
+	cpu_set		use_list,
+	pmap_t		pmap,
+	vm_offset_t	start, 
+	vm_offset_t	end)
+{
+	int			which_cpu, j;
+	pmap_update_list_t	update_list_p;
+
+	while ((which_cpu = __builtin_ffs(use_list)) != 0) {
+	    which_cpu -= 1;	/* convert to 0 origin */
+
+	    update_list_p = &cpu_update_list[which_cpu];
+	    simple_lock(&update_list_p->lock);
+
+	    j = update_list_p->count;
+	    if (j >= UPDATE_LIST_SIZE) {
+		/*
+		 *	list overflowed.  Change last item to
+		 *	indicate overflow.
+		 */
+		update_list_p->item[UPDATE_LIST_SIZE-1].pmap  = kernel_pmap;
+		update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_USER_ADDRESS;
+		update_list_p->item[UPDATE_LIST_SIZE-1].end   = VM_MAX_KERNEL_ADDRESS;
+	    }
+	    else {
+		update_list_p->item[j].pmap  = pmap;
+		update_list_p->item[j].start = start;
+		update_list_p->item[j].end   = end;
+		update_list_p->count = j+1;
+	    }
+	    cpu_update_needed[which_cpu] = TRUE;
+	    simple_unlock(&update_list_p->lock);
+
+	    __sync_synchronize();
+	    if (((cpus_idle & (1 << which_cpu)) == 0))
+		interrupt_processor(which_cpu);
+	    use_list &= ~(1 << which_cpu);
+	}
+}
+
+void process_pmap_updates(pmap_t my_pmap)
+{
+	int			my_cpu = cpu_number();
+	pmap_update_list_t	update_list_p;
+	int			j;
+	pmap_t			pmap;
+
+	update_list_p = &cpu_update_list[my_cpu];
+	simple_lock(&update_list_p->lock);
+
+	for (j = 0; j < update_list_p->count; j++) {
+	    pmap = update_list_p->item[j].pmap;
+	    if (pmap == my_pmap ||
+		pmap == kernel_pmap) {
+
+		INVALIDATE_TLB(pmap,
+				update_list_p->item[j].start,
+				update_list_p->item[j].end);
+	    }
+	}
+	update_list_p->count = 0;
+	cpu_update_needed[my_cpu] = FALSE;
+	simple_unlock(&update_list_p->lock);
+}
+
+/*
+ *	Interrupt routine for TBIA requested from other processor.
+ */
+void pmap_update_interrupt(void)
+{
+	int		my_cpu;
+	pmap_t		my_pmap;
+	int		s;
+
+	my_cpu = cpu_number();
+
+	/*
+	 *	Exit now if we're idle.  We'll pick up the update request
+	 *	when we go active, and we must not put ourselves back in
+	 *	the active set because we'll never process the interrupt
+	 *	while we're idle (thus hanging the system).
+	 */
+	if (cpus_idle & (1 << my_cpu))
+	    return;
+
+	if (current_thread() == THREAD_NULL)
+	    my_pmap = kernel_pmap;
+	else {
+	    my_pmap = current_pmap();
+	    if (!pmap_in_use(my_pmap, my_cpu))
+		my_pmap = kernel_pmap;
+	}
+
+	/*
+	 *	Raise spl to splvm (above splip) to block out pmap_extract
+	 *	from IO code (which would put this cpu back in the active
+	 *	set).
+	 */
+	s = splvm();
+
+	do {
+
+	    /*
+	     *	Indicate that we're not using either user or kernel
+	     *	pmap.
+	     */
+	    i_bit_clear(my_cpu, &cpus_active);
+
+	    /*
+	     *	Wait for any pmap updates in progress, on either user
+	     *	or kernel pmap.
+	     */
+	    while (my_pmap->lock.lock_data ||
+		   kernel_pmap->lock.lock_data)
+		cpu_pause();
+
+	    process_pmap_updates(my_pmap);
+
+	    i_bit_set(my_cpu, &cpus_active);
+
+	} while (cpu_update_needed[my_cpu]);
+
+	splx(s);
+}
+#else	/* NCPUS > 1 */
+/*
+ *	Dummy routine to satisfy external reference.
+ */
+void pmap_update_interrupt(void)
+{
+	/* should never be called. */
+}
+#endif	/* NCPUS > 1 */
+
+#if defined(__i386__) || defined (__x86_64__)
+/* Unmap page 0 to trap NULL references.  */
+void
+pmap_unmap_page_zero (void)
+{
+  int *pte;
+
+  printf("Unmapping the zero page.  Some BIOS functions may not be working any more.\n");
+  pte = (int *) pmap_pte (kernel_pmap, 0);
+  if (!pte)
+    return;
+  assert (pte);
+#ifdef	MACH_PV_PAGETABLES
+  if (!hyp_mmu_update_pte(kv_to_ma(pte), 0))
+    printf("couldn't unmap page 0\n");
+#else	/* MACH_PV_PAGETABLES */
+  *pte = 0;
+  INVALIDATE_TLB(kernel_pmap, 0, PAGE_SIZE);
+#endif	/* MACH_PV_PAGETABLES */
+}
+#endif /* __i386__ */
+
+void
+pmap_make_temporary_mapping(void)
+{
+	int i;
+	/*
+	 * We'll have to temporarily install a direct mapping
+	 * between physical memory and low linear memory,
+	 * until we start using our new kernel segment descriptors.
+	 */
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+	vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+	if ((vm_offset_t)(-delta) < delta)
+		delta = (vm_offset_t)(-delta);
+	int nb_direct = delta >> PDESHIFT;
+	for (i = 0; i < nb_direct; i++)
+		kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] =
+			kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i];
+#endif
+
+#ifdef LINUX_DEV
+	/* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */
+#if VM_MIN_KERNEL_ADDRESS != 0
+	kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+		kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+#ifdef	MACH_PV_PAGETABLES
+#ifndef __x86_64__
+	const int PDPNUM_KERNEL = PDPNUM;
+#endif
+	for (i = 0; i < PDPNUM_KERNEL; i++)
+		pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES);
+#if PAE
+#ifndef __x86_64__
+	pmap_set_page_readonly_init(kernel_pmap->pdpbase);
+#endif
+#endif	/* PAE */
+#endif	/* MACH_PV_PAGETABLES */
+
+	pmap_set_page_dir();
+}
+
+void
+pmap_set_page_dir(void)
+{
+#if PAE
+#ifdef __x86_64__
+	set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base));
+#else
+	set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase));
+#endif
+#ifndef	MACH_HYP
+	if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
+		panic("CPU doesn't have support for PAE.");
+	set_cr4(get_cr4() | CR4_PAE);
+#endif	/* MACH_HYP */
+#else
+	set_cr3((unsigned long)_kvtophys(kernel_page_dir));
+#endif	/* PAE */
+}
+
+void
+pmap_remove_temporary_mapping(void)
+{
+#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+	int i;
+	vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+	if ((vm_offset_t)(-delta) < delta)
+		delta = (vm_offset_t)(-delta);
+	int nb_direct = delta >> PDESHIFT;
+	/* Get rid of the temporary direct mapping and flush it out of the TLB.  */
+	for (i = 0 ; i < nb_direct; i++) {
+#ifdef	MACH_XEN
+#ifdef	MACH_PSEUDO_PHYS
+		if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0))
+#else	/* MACH_PSEUDO_PHYS */
+		if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif	/* MACH_PSEUDO_PHYS */
+			printf("couldn't unmap frame %d\n", i);
+#else	/* MACH_XEN */
+		kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0;
+#endif	/* MACH_XEN */
+	}
+#endif
+
+#ifdef LINUX_DEV
+	/* Keep BIOS memory mapped */
+#if VM_MIN_KERNEL_ADDRESS != 0
+	kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] =
+		kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)];
+#endif
+#endif /* LINUX_DEV */
+
+	/* Not used after boot, better give it back.  */
+#ifdef	MACH_XEN
+	hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
+#endif	/* MACH_XEN */
+
+	flush_tlb();
+}
diff --git a/riscv/intel/pmap.h b/riscv/intel/pmap.h
new file mode 100644
index 0000000..d745aad
--- /dev/null
+++ b/riscv/intel/pmap.h
@@ -0,0 +1,574 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ *	File:	pmap.h
+ *
+ *	Authors:  Avadis Tevanian, Jr., Michael Wayne Young
+ *	Date:	1985
+ *
+ *	Machine-dependent structures for the physical map module.
+ */
+
+#ifndef	_PMAP_MACHINE_
+#define _PMAP_MACHINE_	1
+
+#ifndef	__ASSEMBLER__
+
+#include <kern/lock.h>
+#include <mach/machine/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
+#include <riscv/proc_reg.h>
+
+/*
+ *	Define the generic in terms of the specific
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define	INTEL_PGBYTES		I386_PGBYTES
+#define INTEL_PGSHIFT		I386_PGSHIFT
+#define	intel_btop(x)		i386_btop(x)
+#define	intel_ptob(x)		i386_ptob(x)
+#define	intel_round_page(x)	i386_round_page(x)
+#define	intel_trunc_page(x)	i386_trunc_page(x)
+#define trunc_intel_to_vm(x)	trunc_i386_to_vm(x)
+#define round_intel_to_vm(x)	round_i386_to_vm(x)
+#define vm_to_intel(x)		vm_to_i386(x)
+#endif /* __i386__ */
+
+/*
+ *	i386/i486 Page Table Entry
+ */
+
+typedef phys_addr_t pt_entry_t;
+#define PT_ENTRY_NULL	((pt_entry_t *) 0)
+
+#endif	/* __ASSEMBLER__ */
+
+#define INTEL_OFFMASK	0xfff	/* offset within page */
+#if PAE
+#ifdef __x86_64__
+#define L4SHIFT		39	/* L4 shift */
+#define L4MASK		0x1ff	/* mask for L4 index */
+#define PDPNUM_KERNEL	(((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
+#define PDPMASK		0x1ff	/* mask for page directory pointer index */
+#else /* __x86_64__ */
+#define PDPNUM		4	/* number of page directory pointers */
+#define PDPMASK		3	/* mask for page directory pointer index */
+#endif /* __x86_64__ */
+#define PDPSHIFT	30	/* page directory pointer */
+#define PDESHIFT	21	/* page descriptor shift */
+#define PDEMASK		0x1ff	/* mask for page descriptor index */
+#define PTESHIFT	12	/* page table shift */
+#define PTEMASK		0x1ff	/* mask for page table index */
+#else	/* PAE */
+#define PDPNUM		1	/* number of page directory pointers */
+#define PDESHIFT	22	/* page descriptor shift */
+#define PDEMASK		0x3ff	/* mask for page descriptor index */
+#define PTESHIFT	12	/* page table shift */
+#define PTEMASK		0x3ff	/* mask for page table index */
+#endif	/* PAE */
+
+/*
+ *	Convert linear offset to L4 pointer index
+ */
+#ifdef __x86_64__
+#define lin2l4num(a)	(((a) >> L4SHIFT) & L4MASK)
+#endif
+
+/*
+ *	Convert linear offset to page descriptor index
+ */
+#define lin2pdenum(a)	(((a) >> PDESHIFT) & PDEMASK)
+
+#if PAE
+/* Special version assuming contiguous page directories.  Making it
+   include the page directory pointer table index too.  */
+#ifdef __x86_64__
+#define lin2pdenum_cont(a)	(((a) >> PDESHIFT) & 0x3ff)
+#else
+#define lin2pdenum_cont(a)	(((a) >> PDESHIFT) & 0x7ff)
+#endif
+#else
+#define lin2pdenum_cont(a)	lin2pdenum(a)
+#endif
+
+/*
+ *	Convert linear offset to page directory pointer index
+ */
+#if PAE
+#define lin2pdpnum(a)	(((a) >> PDPSHIFT) & PDPMASK)
+#endif
+
+/*
+ *	Convert page descriptor index to linear address
+ */
+#define pdenum2lin(a)	((vm_offset_t)(a) << PDESHIFT)
+
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l4num) << L4SHIFT) +        \
+     ((vm_offset_t)(l3num) << PDPSHIFT) +       \
+     ((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l3num) << PDPSHIFT) +       \
+     ((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
+/*
+ *	Convert linear offset to page table index
+ */
+#define ptenum(a)	(((a) >> PTESHIFT) & PTEMASK)
+
+#define NPTES	(intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES	(PDPNUM * (intel_ptob(1)/sizeof(pt_entry_t)))
+
+/*
+ *	Hardware pte bit definitions (to be used directly on the ptes
+ *	without using the bit fields).
+ */
+
+#define INTEL_PTE_VALID		0x00000001
+#define INTEL_PTE_WRITE		0x00000002
+#define INTEL_PTE_USER		0x00000004
+#define INTEL_PTE_WTHRU		0x00000008
+#define INTEL_PTE_NCACHE 	0x00000010
+#define INTEL_PTE_REF		0x00000020
+#define INTEL_PTE_MOD		0x00000040
+#define INTEL_PTE_PS		0x00000080
+#ifdef	MACH_PV_PAGETABLES
+/* Not supported */
+#define INTEL_PTE_GLOBAL	0x00000000
+#else	/* MACH_PV_PAGETABLES */
+#define INTEL_PTE_GLOBAL	0x00000100
+#endif	/* MACH_PV_PAGETABLES */
+#define INTEL_PTE_WIRED		0x00000200
+#ifdef PAE
+#ifdef __x86_64__
+#define INTEL_PTE_PFN		0xfffffffffffff000ULL
+#else /* __x86_64__ */
+#define INTEL_PTE_PFN		0x00007ffffffff000ULL
+#endif/* __x86_64__ */
+#else
+#define INTEL_PTE_PFN		0xfffff000
+#endif
+
+#define	pa_to_pte(a)		((a) & INTEL_PTE_PFN)
+#ifdef	MACH_PSEUDO_PHYS
+#define	pte_to_pa(p)		ma_to_pa((p) & INTEL_PTE_PFN)
+#else	/* MACH_PSEUDO_PHYS */
+#define	pte_to_pa(p)		((p) & INTEL_PTE_PFN)
+#endif	/* MACH_PSEUDO_PHYS */
+#define	pte_increment_pa(p)	((p) += INTEL_OFFMASK+1)
+
+/*
+ *	Convert page table entry to kernel virtual address
+ */
+#define ptetokv(a)	(phystokv(pte_to_pa(a)))
+
+#ifndef	__ASSEMBLER__
+typedef	volatile long	cpu_set;	/* set of CPUs - must be <= 32 */
+					/* changed by other processors */
+
+struct pmap {
+#if ! PAE
+	pt_entry_t	*dirbase;	/* page directory table */
+#else	/* PAE */
+#ifdef __x86_64__
+	pt_entry_t	*l4base;	/* l4 table */
+#ifdef MACH_HYP
+	pt_entry_t	*user_l4base;	/* Userland l4 table */
+	pt_entry_t	*user_pdpbase;	/* Userland l4 table */
+#endif	/* MACH_HYP */
+#else	/* x86_64 */
+	pt_entry_t	*pdpbase;	/* page directory pointer table */
+#endif	/* x86_64 */
+#endif	/* PAE */
+	int		ref_count;	/* reference count */
+	decl_simple_lock_data(,lock)
+					/* lock on map */
+	struct pmap_statistics	stats;	/* map statistics */
+	cpu_set		cpus_using;	/* bitmap of cpus using pmap */
+};
+
+typedef struct pmap	*pmap_t;
+
+#define PMAP_NULL	((pmap_t) 0)
+
+#ifdef	MACH_PV_PAGETABLES
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif	/* MACH_PV_PAGETABLES */
+
+#if PAE
+#ifdef __x86_64__
+/* TODO: support PCID */
+#ifdef MACH_HYP
+#define	set_pmap(pmap)	\
+	MACRO_BEGIN					\
+		set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)); \
+		if (pmap->user_l4base) \
+			if (!hyp_set_user_cr3(kvtophys((vm_offset_t)(pmap)->user_l4base))) \
+				panic("set_user_cr3"); \
+	MACRO_END
+#else	/* MACH_HYP */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->l4base))
+#endif	/* MACH_HYP */
+#else	/* x86_64 */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
+#endif	/* x86_64 */
+#else	/* PAE */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase))
+#endif	/* PAE */
+
+typedef struct {
+	pt_entry_t	*entry;
+	vm_offset_t	vaddr;
+} pmap_mapwindow_t;
+
+extern pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry);
+extern void pmap_put_mapwindow(pmap_mapwindow_t *map);
+
+#define PMAP_NMAPWINDOWS 2	/* Per CPU */
+
+#if	NCPUS > 1
+/*
+ *	List of cpus that are actively using mapped memory.  Any
+ *	pmap update operation must wait for all cpus in this list.
+ *	Update operations must still be queued to cpus not in this
+ *	list.
+ */
+extern cpu_set		cpus_active;
+
+/*
+ *	List of cpus that are idle, but still operating, and will want
+ *	to see any kernel pmap updates when they become active.
+ */
+extern cpu_set		cpus_idle;
+
+/*
+ *	Quick test for pmap update requests.
+ */
+extern volatile
+boolean_t	cpu_update_needed[NCPUS];
+
+/*
+ *	External declarations for PMAP_ACTIVATE.
+ */
+
+void		process_pmap_updates(pmap_t);
+extern	pmap_t	kernel_pmap;
+
+#endif	/* NCPUS > 1 */
+
+void		pmap_update_interrupt(void);
+
+/*
+ *	Machine dependent routines that are used only for i386/i486.
+ */
+
+pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
+
+/*
+ *	Macros for speed.
+ */
+
+#if	NCPUS > 1
+
+/*
+ *	For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage
+ *	fields to control TLB invalidation on other CPUS.
+ */
+
+#define	PMAP_ACTIVATE_KERNEL(my_cpu)	{				\
+									\
+	/*								\
+	 *	Let pmap updates proceed while we wait for this pmap.	\
+	 */								\
+	i_bit_clear((my_cpu), &cpus_active);				\
+									\
+	/*								\
+	 *	Lock the pmap to put this cpu in its active set.	\
+	 *	Wait for updates here.					\
+	 */								\
+	simple_lock(&kernel_pmap->lock);				\
+									\
+	/*								\
+	 *	Process invalidate requests for the kernel pmap.	\
+	 */								\
+	if (cpu_update_needed[(my_cpu)])				\
+	    process_pmap_updates(kernel_pmap);				\
+									\
+	/*								\
+	 *	Mark that this cpu is using the pmap.			\
+	 */								\
+	i_bit_set((my_cpu), &kernel_pmap->cpus_using);			\
+									\
+	/*								\
+	 *	Mark this cpu active - IPL will be lowered by		\
+	 *	load_context().						\
+	 */								\
+	i_bit_set((my_cpu), &cpus_active);				\
+									\
+	simple_unlock(&kernel_pmap->lock);				\
+}
+
+#define	PMAP_DEACTIVATE_KERNEL(my_cpu)	{				\
+	/*								\
+	 *	Mark pmap no longer in use by this cpu even if		\
+	 *	pmap is locked against updates.				\
+	 */								\
+	i_bit_clear((my_cpu), &kernel_pmap->cpus_using);		\
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu)	{			\
+	pmap_t		tpmap = (pmap);					\
+									\
+	if (tpmap == kernel_pmap) {					\
+	    /*								\
+	     *	If this is the kernel pmap, switch to its page tables.	\
+	     */								\
+	    set_pmap(tpmap);						\
+	}								\
+	else {								\
+	    /*								\
+	     *	Let pmap updates proceed while we wait for this pmap.	\
+	     */								\
+	    i_bit_clear((my_cpu), &cpus_active);			\
+									\
+	    /*								\
+	     *	Lock the pmap to put this cpu in its active set.	\
+	     *	Wait for updates here.					\
+	     */								\
+	    simple_lock(&tpmap->lock);					\
+									\
+	    /*								\
+	     *	No need to invalidate the TLB - the entire user pmap	\
+	     *	will be invalidated by reloading dirbase.		\
+	     */								\
+	    set_pmap(tpmap);						\
+									\
+	    /*								\
+	     *	Mark that this cpu is using the pmap.			\
+	     */								\
+	    i_bit_set((my_cpu), &tpmap->cpus_using);			\
+									\
+	    /*								\
+	     *	Mark this cpu active - IPL will be lowered by		\
+	     *	load_context().						\
+	     */								\
+	    i_bit_set((my_cpu), &cpus_active);				\
+									\
+	    simple_unlock(&tpmap->lock);				\
+	}								\
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, my_cpu)	{		\
+	pmap_t		tpmap = (pmap);					\
+									\
+	/*								\
+	 *	Do nothing if this is the kernel pmap.			\
+	 */								\
+	if (tpmap != kernel_pmap) {					\
+	    /*								\
+	     *	Mark pmap no longer in use by this cpu even if		\
+	     *	pmap is locked against updates.				\
+	     */								\
+	    i_bit_clear((my_cpu), &(pmap)->cpus_using);			\
+	}								\
+}
+
+#define MARK_CPU_IDLE(my_cpu)	{					\
+	/*								\
+	 *	Mark this cpu idle, and remove it from the active set,	\
+	 *	since it is not actively using any pmap.  Signal_cpus	\
+	 *	will notice that it is idle, and avoid signaling it,	\
+	 *	but will queue the update request for when the cpu	\
+	 *	becomes active.						\
+	 */								\
+	int	s = splvm();						\
+	i_bit_set((my_cpu), &cpus_idle);				\
+	i_bit_clear((my_cpu), &cpus_active);				\
+	splx(s);							\
+}
+
+#define MARK_CPU_ACTIVE(my_cpu)	{					\
+									\
+	int	s = splvm();						\
+	/*								\
+	 *	If a kernel_pmap update was requested while this cpu	\
+	 *	was idle, process it as if we got the interrupt.	\
+	 *	Before doing so, remove this cpu from the idle set.	\
+	 *	Since we do not grab any pmap locks while we flush	\
+	 *	our TLB, another cpu may start an update operation	\
+	 *	before we finish.  Removing this cpu from the idle	\
+	 *	set assures that we will receive another update		\
+	 *	interrupt if this happens.				\
+	 */								\
+	i_bit_clear((my_cpu), &cpus_idle);				\
+	__sync_synchronize();						\
+									\
+	if (cpu_update_needed[(my_cpu)])				\
+	    pmap_update_interrupt();					\
+									\
+	/*								\
+	 *	Mark that this cpu is now active.			\
+	 */								\
+	i_bit_set((my_cpu), &cpus_active);				\
+	splx(s);							\
+}
+
+#else	/* NCPUS > 1 */
+
+/*
+ *	With only one CPU, we just have to indicate whether the pmap is
+ *	in use.
+ */
+
+#define	PMAP_ACTIVATE_KERNEL(my_cpu)	{				\
+	(void) (my_cpu);						\
+	kernel_pmap->cpus_using = TRUE;					\
+}
+
+#define	PMAP_DEACTIVATE_KERNEL(my_cpu)	{				\
+	(void) (my_cpu);						\
+	kernel_pmap->cpus_using = FALSE;				\
+}
+
+#define	PMAP_ACTIVATE_USER(pmap, th, my_cpu)	{			\
+	pmap_t		tpmap = (pmap);					\
+	(void) (th);							\
+	(void) (my_cpu);						\
+									\
+	set_pmap(tpmap);						\
+	if (tpmap != kernel_pmap) {					\
+	    tpmap->cpus_using = TRUE;					\
+	}								\
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu)	{			\
+	(void) (thread);						\
+	(void) (cpu);							\
+	if ((pmap) != kernel_pmap)					\
+	    (pmap)->cpus_using = FALSE;					\
+}
+
+#endif	/* NCPUS > 1 */
+
+#define PMAP_CONTEXT(pmap, thread)
+
+#define	pmap_kernel()			(kernel_pmap)
+#define pmap_resident_count(pmap)	((pmap)->stats.resident_count)
+#define pmap_phys_address(frame)	((intel_ptob((phys_addr_t) frame)))
+#define pmap_phys_to_frame(phys)	((int) (intel_btop(phys)))
+#define	pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
+#define	pmap_attribute(pmap,addr,size,attr,value) \
+					(KERN_INVALID_ADDRESS)
+
+extern pt_entry_t *kernel_page_dir;
+
+extern vm_offset_t kernel_virtual_start;
+extern vm_offset_t kernel_virtual_end;
+
+/*
+ *  Bootstrap the system enough to run with virtual memory.
+ *  Allocate the kernel page directory and page tables,
+ *  and direct-map all physical memory.
+ *  Called with mapping off.
+ */
+extern void pmap_bootstrap(void);
+
+extern void pmap_set_page_dir(void);
+extern void pmap_make_temporary_mapping(void);
+extern void pmap_remove_temporary_mapping(void);
+
+extern void pmap_unmap_page_zero (void);
+
+/*
+ *  pmap_zero_page zeros the specified (machine independent) page.
+ */
+extern void pmap_zero_page (phys_addr_t);
+
+/*
+ *  pmap_copy_page copies the specified (machine independent) pages.
+ */
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
+
+/*
+ *	copy_to_phys(src_addr_v, dst_addr_p, count)
+ *
+ *	Copy virtual memory to physical memory
+ */
+extern void
+copy_to_phys(
+	vm_offset_t 	src_addr_v, 
+	phys_addr_t 	dst_addr_p,
+	int 		count);
+
+/*
+ *	copy_from_phys(src_addr_p, dst_addr_v, count)
+ *
+ *	Copy physical memory to virtual memory.  The virtual memory
+ *	is assumed to be present (e.g. the buffer pool).
+ */
+extern void
+copy_from_phys(
+	phys_addr_t 	src_addr_p, 
+	vm_offset_t 	dst_addr_v,
+	int 		count);
+
+/*
+ *  kvtophys(addr)
+ *
+ *  Convert a kernel virtual address to a physical address
+ */
+extern phys_addr_t kvtophys (vm_offset_t);
+
+#if NCPUS > 1
+void signal_cpus(
+	cpu_set		use_list,
+	pmap_t		pmap,
+	vm_offset_t	start,
+	vm_offset_t	end);
+#endif	/* NCPUS > 1 */
+
+#endif	/* __ASSEMBLER__ */
+
+#endif	/* _PMAP_MACHINE_ */
diff --git a/riscv/intel/pmap.h~ b/riscv/intel/pmap.h~
new file mode 100644
index 0000000..8b0eba0
--- /dev/null
+++ b/riscv/intel/pmap.h~
@@ -0,0 +1,574 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ *	File:	pmap.h
+ *
+ *	Authors:  Avadis Tevanian, Jr., Michael Wayne Young
+ *	Date:	1985
+ *
+ *	Machine-dependent structures for the physical map module.
+ */
+
+#ifndef	_PMAP_MACHINE_
+#define _PMAP_MACHINE_	1
+
+#ifndef	__ASSEMBLER__
+
+#include <kern/lock.h>
+#include <mach/machine/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
+#include <i386/proc_reg.h>
+
+/*
+ *	Define the generic in terms of the specific
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+#define	INTEL_PGBYTES		I386_PGBYTES
+#define INTEL_PGSHIFT		I386_PGSHIFT
+#define	intel_btop(x)		i386_btop(x)
+#define	intel_ptob(x)		i386_ptob(x)
+#define	intel_round_page(x)	i386_round_page(x)
+#define	intel_trunc_page(x)	i386_trunc_page(x)
+#define trunc_intel_to_vm(x)	trunc_i386_to_vm(x)
+#define round_intel_to_vm(x)	round_i386_to_vm(x)
+#define vm_to_intel(x)		vm_to_i386(x)
+#endif /* __i386__ */
+
+/*
+ *	i386/i486 Page Table Entry
+ */
+
+typedef phys_addr_t pt_entry_t;
+#define PT_ENTRY_NULL	((pt_entry_t *) 0)
+
+#endif	/* __ASSEMBLER__ */
+
+#define INTEL_OFFMASK	0xfff	/* offset within page */
+#if PAE
+#ifdef __x86_64__
+#define L4SHIFT		39	/* L4 shift */
+#define L4MASK		0x1ff	/* mask for L4 index */
+#define PDPNUM_KERNEL	(((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
+#define PDPMASK		0x1ff	/* mask for page directory pointer index */
+#else /* __x86_64__ */
+#define PDPNUM		4	/* number of page directory pointers */
+#define PDPMASK		3	/* mask for page directory pointer index */
+#endif /* __x86_64__ */
+#define PDPSHIFT	30	/* page directory pointer */
+#define PDESHIFT	21	/* page descriptor shift */
+#define PDEMASK		0x1ff	/* mask for page descriptor index */
+#define PTESHIFT	12	/* page table shift */
+#define PTEMASK		0x1ff	/* mask for page table index */
+#else	/* PAE */
+#define PDPNUM		1	/* number of page directory pointers */
+#define PDESHIFT	22	/* page descriptor shift */
+#define PDEMASK		0x3ff	/* mask for page descriptor index */
+#define PTESHIFT	12	/* page table shift */
+#define PTEMASK		0x3ff	/* mask for page table index */
+#endif	/* PAE */
+
+/*
+ *	Convert linear offset to L4 pointer index
+ */
+#ifdef __x86_64__
+#define lin2l4num(a)	(((a) >> L4SHIFT) & L4MASK)
+#endif
+
+/*
+ *	Convert linear offset to page descriptor index
+ */
+#define lin2pdenum(a)	(((a) >> PDESHIFT) & PDEMASK)
+
+#if PAE
+/* Special version assuming contiguous page directories.  Making it
+   include the page directory pointer table index too.  */
+#ifdef __x86_64__
+#define lin2pdenum_cont(a)	(((a) >> PDESHIFT) & 0x3ff)
+#else
+#define lin2pdenum_cont(a)	(((a) >> PDESHIFT) & 0x7ff)
+#endif
+#else
+#define lin2pdenum_cont(a)	lin2pdenum(a)
+#endif
+
+/*
+ *	Convert linear offset to page directory pointer index
+ */
+#if PAE
+#define lin2pdpnum(a)	(((a) >> PDPSHIFT) & PDPMASK)
+#endif
+
+/*
+ *	Convert page descriptor index to linear address
+ */
+#define pdenum2lin(a)	((vm_offset_t)(a) << PDESHIFT)
+
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l4num) << L4SHIFT) +        \
+     ((vm_offset_t)(l3num) << PDPSHIFT) +       \
+     ((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l3num) << PDPSHIFT) +       \
+     ((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+    (((vm_offset_t)(l2num) << PDESHIFT) +       \
+     ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
+/*
+ *	Convert linear offset to page table index
+ */
+#define ptenum(a)	(((a) >> PTESHIFT) & PTEMASK)
+
+#define NPTES	(intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES	(PDPNUM * (intel_ptob(1)/sizeof(pt_entry_t)))
+
+/*
+ *	Hardware pte bit definitions (to be used directly on the ptes
+ *	without using the bit fields).
+ */
+
+#define INTEL_PTE_VALID		0x00000001
+#define INTEL_PTE_WRITE		0x00000002
+#define INTEL_PTE_USER		0x00000004
+#define INTEL_PTE_WTHRU		0x00000008
+#define INTEL_PTE_NCACHE 	0x00000010
+#define INTEL_PTE_REF		0x00000020
+#define INTEL_PTE_MOD		0x00000040
+#define INTEL_PTE_PS		0x00000080
+#ifdef	MACH_PV_PAGETABLES
+/* Not supported */
+#define INTEL_PTE_GLOBAL	0x00000000
+#else	/* MACH_PV_PAGETABLES */
+#define INTEL_PTE_GLOBAL	0x00000100
+#endif	/* MACH_PV_PAGETABLES */
+#define INTEL_PTE_WIRED		0x00000200
+#ifdef PAE
+#ifdef __x86_64__
+#define INTEL_PTE_PFN		0xfffffffffffff000ULL
+#else /* __x86_64__ */
+#define INTEL_PTE_PFN		0x00007ffffffff000ULL
+#endif/* __x86_64__ */
+#else
+#define INTEL_PTE_PFN		0xfffff000
+#endif
+
+#define	pa_to_pte(a)		((a) & INTEL_PTE_PFN)
+#ifdef	MACH_PSEUDO_PHYS
+#define	pte_to_pa(p)		ma_to_pa((p) & INTEL_PTE_PFN)
+#else	/* MACH_PSEUDO_PHYS */
+#define	pte_to_pa(p)		((p) & INTEL_PTE_PFN)
+#endif	/* MACH_PSEUDO_PHYS */
+#define	pte_increment_pa(p)	((p) += INTEL_OFFMASK+1)
+
+/*
+ *	Convert page table entry to kernel virtual address
+ */
+#define ptetokv(a)	(phystokv(pte_to_pa(a)))
+
+#ifndef	__ASSEMBLER__
+typedef	volatile long	cpu_set;	/* set of CPUs - must be <= 32 */
+					/* changed by other processors */
+
+struct pmap {
+#if ! PAE
+	pt_entry_t	*dirbase;	/* page directory table */
+#else	/* PAE */
+#ifdef __x86_64__
+	pt_entry_t	*l4base;	/* l4 table */
+#ifdef MACH_HYP
+	pt_entry_t	*user_l4base;	/* Userland l4 table */
+	pt_entry_t	*user_pdpbase;	/* Userland l4 table */
+#endif	/* MACH_HYP */
+#else	/* x86_64 */
+	pt_entry_t	*pdpbase;	/* page directory pointer table */
+#endif	/* x86_64 */
+#endif	/* PAE */
+	int		ref_count;	/* reference count */
+	decl_simple_lock_data(,lock)
+					/* lock on map */
+	struct pmap_statistics	stats;	/* map statistics */
+	cpu_set		cpus_using;	/* bitmap of cpus using pmap */
+};
+
+typedef struct pmap	*pmap_t;
+
+#define PMAP_NULL	((pmap_t) 0)
+
+#ifdef	MACH_PV_PAGETABLES
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif	/* MACH_PV_PAGETABLES */
+
+#if PAE
+#ifdef __x86_64__
+/* TODO: support PCID */
+#ifdef MACH_HYP
+#define	set_pmap(pmap)	\
+	MACRO_BEGIN					\
+		set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)); \
+		if (pmap->user_l4base) \
+			if (!hyp_set_user_cr3(kvtophys((vm_offset_t)(pmap)->user_l4base))) \
+				panic("set_user_cr3"); \
+	MACRO_END
+#else	/* MACH_HYP */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->l4base))
+#endif	/* MACH_HYP */
+#else	/* x86_64 */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
+#endif	/* x86_64 */
+#else	/* PAE */
+#define	set_pmap(pmap)	set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase))
+#endif	/* PAE */
+
+typedef struct {
+	pt_entry_t	*entry;
+	vm_offset_t	vaddr;
+} pmap_mapwindow_t;
+
+extern pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry);
+extern void pmap_put_mapwindow(pmap_mapwindow_t *map);
+
+#define PMAP_NMAPWINDOWS 2	/* Per CPU */
+
+#if	NCPUS > 1
+/*
+ *	List of cpus that are actively using mapped memory.  Any
+ *	pmap update operation must wait for all cpus in this list.
+ *	Update operations must still be queued to cpus not in this
+ *	list.
+ */
+extern cpu_set		cpus_active;
+
+/*
+ *	List of cpus that are idle, but still operating, and will want
+ *	to see any kernel pmap updates when they become active.
+ */
+extern cpu_set		cpus_idle;
+
+/*
+ *	Quick test for pmap update requests.
+ */
+extern volatile
+boolean_t	cpu_update_needed[NCPUS];
+
+/*
+ *	External declarations for PMAP_ACTIVATE.
+ */
+
+void		process_pmap_updates(pmap_t);
+extern	pmap_t	kernel_pmap;
+
+#endif	/* NCPUS > 1 */
+
+void		pmap_update_interrupt(void);
+
+/*
+ *	Machine dependent routines that are used only for i386/i486.
+ */
+
+pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr);
+
+/*
+ *	Macros for speed.
+ */
+
+#if	NCPUS > 1
+
+/*
+ *	For multiple CPUS, PMAP_ACTIVATE and PMAP_DEACTIVATE must manage
+ *	fields to control TLB invalidation on other CPUS.
+ */
+
+#define	PMAP_ACTIVATE_KERNEL(my_cpu)	{				\
+									\
+	/*								\
+	 *	Let pmap updates proceed while we wait for this pmap.	\
+	 */								\
+	i_bit_clear((my_cpu), &cpus_active);				\
+									\
+	/*								\
+	 *	Lock the pmap to put this cpu in its active set.	\
+	 *	Wait for updates here.					\
+	 */								\
+	simple_lock(&kernel_pmap->lock);				\
+									\
+	/*								\
+	 *	Process invalidate requests for the kernel pmap.	\
+	 */								\
+	if (cpu_update_needed[(my_cpu)])				\
+	    process_pmap_updates(kernel_pmap);				\
+									\
+	/*								\
+	 *	Mark that this cpu is using the pmap.			\
+	 */								\
+	i_bit_set((my_cpu), &kernel_pmap->cpus_using);			\
+									\
+	/*								\
+	 *	Mark this cpu active - IPL will be lowered by		\
+	 *	load_context().						\
+	 */								\
+	i_bit_set((my_cpu), &cpus_active);				\
+									\
+	simple_unlock(&kernel_pmap->lock);				\
+}
+
+#define	PMAP_DEACTIVATE_KERNEL(my_cpu)	{				\
+	/*								\
+	 *	Mark pmap no longer in use by this cpu even if		\
+	 *	pmap is locked against updates.				\
+	 */								\
+	i_bit_clear((my_cpu), &kernel_pmap->cpus_using);		\
+}
+
+#define PMAP_ACTIVATE_USER(pmap, th, my_cpu)	{			\
+	pmap_t		tpmap = (pmap);					\
+									\
+	if (tpmap == kernel_pmap) {					\
+	    /*								\
+	     *	If this is the kernel pmap, switch to its page tables.	\
+	     */								\
+	    set_pmap(tpmap);						\
+	}								\
+	else {								\
+	    /*								\
+	     *	Let pmap updates proceed while we wait for this pmap.	\
+	     */								\
+	    i_bit_clear((my_cpu), &cpus_active);			\
+									\
+	    /*								\
+	     *	Lock the pmap to put this cpu in its active set.	\
+	     *	Wait for updates here.					\
+	     */								\
+	    simple_lock(&tpmap->lock);					\
+									\
+	    /*								\
+	     *	No need to invalidate the TLB - the entire user pmap	\
+	     *	will be invalidated by reloading dirbase.		\
+	     */								\
+	    set_pmap(tpmap);						\
+									\
+	    /*								\
+	     *	Mark that this cpu is using the pmap.			\
+	     */								\
+	    i_bit_set((my_cpu), &tpmap->cpus_using);			\
+									\
+	    /*								\
+	     *	Mark this cpu active - IPL will be lowered by		\
+	     *	load_context().						\
+	     */								\
+	    i_bit_set((my_cpu), &cpus_active);				\
+									\
+	    simple_unlock(&tpmap->lock);				\
+	}								\
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, my_cpu)	{		\
+	pmap_t		tpmap = (pmap);					\
+									\
+	/*								\
+	 *	Do nothing if this is the kernel pmap.			\
+	 */								\
+	if (tpmap != kernel_pmap) {					\
+	    /*								\
+	     *	Mark pmap no longer in use by this cpu even if		\
+	     *	pmap is locked against updates.				\
+	     */								\
+	    i_bit_clear((my_cpu), &(pmap)->cpus_using);			\
+	}								\
+}
+
+#define MARK_CPU_IDLE(my_cpu)	{					\
+	/*								\
+	 *	Mark this cpu idle, and remove it from the active set,	\
+	 *	since it is not actively using any pmap.  Signal_cpus	\
+	 *	will notice that it is idle, and avoid signaling it,	\
+	 *	but will queue the update request for when the cpu	\
+	 *	becomes active.						\
+	 */								\
+	int	s = splvm();						\
+	i_bit_set((my_cpu), &cpus_idle);				\
+	i_bit_clear((my_cpu), &cpus_active);				\
+	splx(s);							\
+}
+
+#define MARK_CPU_ACTIVE(my_cpu)	{					\
+									\
+	int	s = splvm();						\
+	/*								\
+	 *	If a kernel_pmap update was requested while this cpu	\
+	 *	was idle, process it as if we got the interrupt.	\
+	 *	Before doing so, remove this cpu from the idle set.	\
+	 *	Since we do not grab any pmap locks while we flush	\
+	 *	our TLB, another cpu may start an update operation	\
+	 *	before we finish.  Removing this cpu from the idle	\
+	 *	set assures that we will receive another update		\
+	 *	interrupt if this happens.				\
+	 */								\
+	i_bit_clear((my_cpu), &cpus_idle);				\
+	__sync_synchronize();						\
+									\
+	if (cpu_update_needed[(my_cpu)])				\
+	    pmap_update_interrupt();					\
+									\
+	/*								\
+	 *	Mark that this cpu is now active.			\
+	 */								\
+	i_bit_set((my_cpu), &cpus_active);				\
+	splx(s);							\
+}
+
+#else	/* NCPUS > 1 */
+
+/*
+ *	With only one CPU, we just have to indicate whether the pmap is
+ *	in use.
+ */
+
+#define	PMAP_ACTIVATE_KERNEL(my_cpu)	{				\
+	(void) (my_cpu);						\
+	kernel_pmap->cpus_using = TRUE;					\
+}
+
+#define	PMAP_DEACTIVATE_KERNEL(my_cpu)	{				\
+	(void) (my_cpu);						\
+	kernel_pmap->cpus_using = FALSE;				\
+}
+
+#define	PMAP_ACTIVATE_USER(pmap, th, my_cpu)	{			\
+	pmap_t		tpmap = (pmap);					\
+	(void) (th);							\
+	(void) (my_cpu);						\
+									\
+	set_pmap(tpmap);						\
+	if (tpmap != kernel_pmap) {					\
+	    tpmap->cpus_using = TRUE;					\
+	}								\
+}
+
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu)	{			\
+	(void) (thread);						\
+	(void) (cpu);							\
+	if ((pmap) != kernel_pmap)					\
+	    (pmap)->cpus_using = FALSE;					\
+}
+
+#endif	/* NCPUS > 1 */
+
+#define PMAP_CONTEXT(pmap, thread)
+
+#define	pmap_kernel()			(kernel_pmap)
+#define pmap_resident_count(pmap)	((pmap)->stats.resident_count)
+#define pmap_phys_address(frame)	((intel_ptob((phys_addr_t) frame)))
+#define pmap_phys_to_frame(phys)	((int) (intel_btop(phys)))
+#define	pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
+#define	pmap_attribute(pmap,addr,size,attr,value) \
+					(KERN_INVALID_ADDRESS)
+
+extern pt_entry_t *kernel_page_dir;
+
+extern vm_offset_t kernel_virtual_start;
+extern vm_offset_t kernel_virtual_end;
+
+/*
+ *  Bootstrap the system enough to run with virtual memory.
+ *  Allocate the kernel page directory and page tables,
+ *  and direct-map all physical memory.
+ *  Called with mapping off.
+ */
+extern void pmap_bootstrap(void);
+
+extern void pmap_set_page_dir(void);
+extern void pmap_make_temporary_mapping(void);
+extern void pmap_remove_temporary_mapping(void);
+
+extern void pmap_unmap_page_zero (void);
+
+/*
+ *  pmap_zero_page zeros the specified (machine independent) page.
+ */
+extern void pmap_zero_page (phys_addr_t);
+
+/*
+ *  pmap_copy_page copies the specified (machine independent) pages.
+ */
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
+
+/*
+ *	copy_to_phys(src_addr_v, dst_addr_p, count)
+ *
+ *	Copy virtual memory to physical memory
+ */
+extern void
+copy_to_phys(
+	vm_offset_t 	src_addr_v, 
+	phys_addr_t 	dst_addr_p,
+	int 		count);
+
+/*
+ *	copy_from_phys(src_addr_p, dst_addr_v, count)
+ *
+ *	Copy physical memory to virtual memory.  The virtual memory
+ *	is assumed to be present (e.g. the buffer pool).
+ */
+extern void
+copy_from_phys(
+	phys_addr_t 	src_addr_p, 
+	vm_offset_t 	dst_addr_v,
+	int 		count);
+
+/*
+ *  kvtophys(addr)
+ *
+ *  Convert a kernel virtual address to a physical address
+ */
+extern phys_addr_t kvtophys (vm_offset_t);
+
+#if NCPUS > 1
+void signal_cpus(
+	cpu_set		use_list,
+	pmap_t		pmap,
+	vm_offset_t	start,
+	vm_offset_t	end);
+#endif	/* NCPUS > 1 */
+
+#endif	/* __ASSEMBLER__ */
+
+#endif	/* _PMAP_MACHINE_ */
diff --git a/riscv/intel/read_fault.c b/riscv/intel/read_fault.c
new file mode 100644
index 0000000..0b79e3d
--- /dev/null
+++ b/riscv/intel/read_fault.c
@@ -0,0 +1,178 @@
+/* 
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ * 
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ * 
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+
+#include <kern/macros.h>
+
+#if !(__i486__ || __i586__ || __i686__)
+/*
+ *	Expansion of vm_fault for read fault in kernel mode.
+ *	Must enter the mapping as writable, since the i386
+ *	ignores write protection in kernel mode.
+ */
+kern_return_t
+intel_read_fault(
+	vm_map_t	map,
+	vm_offset_t	vaddr)
+{
+	vm_map_version_t	version;	/* Map version for
+						   verification */
+	vm_object_t		object;		/* Top-level object */
+	vm_offset_t		offset;		/* Top-level offset */
+	vm_prot_t		prot;		/* Protection for mapping */
+	vm_page_t		result_page;	/* Result of vm_fault_page */
+	vm_page_t		top_page;	/* Placeholder page */
+	boolean_t		wired;		/* Is map region wired? */
+	kern_return_t		result;
+	vm_page_t		m;
+
+    RetryFault:
+
+	/*
+	 *	Find the backing store object and offset into it
+	 *	to begin search.
+	 */
+	result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+			&object, &offset, &prot, &wired);
+	if (result != KERN_SUCCESS)
+	    return (result);
+
+	/*
+	 *	Make a reference to this object to prevent its
+	 *	disposal while we are playing with it.
+	 */
+	assert(object->ref_count > 0);
+	object->ref_count++;
+	vm_object_paging_begin(object);
+
+	result = vm_fault_page(object, offset, VM_PROT_READ, FALSE, TRUE,
+			       &prot, &result_page, &top_page,
+			       FALSE, (void (*)()) 0);
+
+	if (result != VM_FAULT_SUCCESS) {
+	    vm_object_deallocate(object);
+
+	    switch (result) {
+		case VM_FAULT_RETRY:
+		    goto RetryFault;
+		case VM_FAULT_INTERRUPTED:
+		    return (KERN_SUCCESS);
+		case VM_FAULT_MEMORY_SHORTAGE:
+		    VM_PAGE_WAIT((void (*)()) 0);
+		    goto RetryFault;
+		case VM_FAULT_FICTITIOUS_SHORTAGE:
+		    vm_page_more_fictitious();
+		    goto RetryFault;
+		case VM_FAULT_MEMORY_ERROR:
+		    return (KERN_MEMORY_ERROR);
+	    }
+	}
+
+	m = result_page;
+
+	/*
+	 *	How to clean up the result of vm_fault_page.  This
+	 *	happens whether the mapping is entered or not.
+	 */
+
+#define UNLOCK_AND_DEALLOCATE				\
+	MACRO_BEGIN					\
+	vm_fault_cleanup(m->object, top_page);		\
+	vm_object_deallocate(object);			\
+	MACRO_END
+
+	/*
+	 *	What to do with the resulting page from vm_fault_page
+	 *	if it doesn't get entered into the physical map:
+	 */
+
+#define RELEASE_PAGE(m)					\
+	MACRO_BEGIN					\
+	PAGE_WAKEUP_DONE(m);				\
+	vm_page_lock_queues();				\
+	if (!m->active && !m->inactive)			\
+		vm_page_activate(m);			\
+	vm_page_unlock_queues();			\
+	MACRO_END
+
+	/*
+	 *	We must verify that the maps have not changed.
+	 */
+	vm_object_unlock(m->object);
+	while (!vm_map_verify(map, &version)) {
+	    vm_object_t		retry_object;
+	    vm_offset_t		retry_offset;
+	    vm_prot_t		retry_prot;
+
+	    result = vm_map_lookup(&map, vaddr, VM_PROT_READ, &version,
+				&retry_object, &retry_offset, &retry_prot,
+				&wired);
+	    if (result != KERN_SUCCESS) {
+		vm_object_lock(m->object);
+		RELEASE_PAGE(m);
+		UNLOCK_AND_DEALLOCATE;
+		return (result);
+	    }
+
+	    vm_object_unlock(retry_object);
+
+	    if (retry_object != object || retry_offset != offset) {
+		vm_object_lock(m->object);
+		RELEASE_PAGE(m);
+		UNLOCK_AND_DEALLOCATE;
+		goto RetryFault;
+	    }
+	}
+
+	/*
+	 *	Put the page in the physical map.
+	 */
+	PMAP_ENTER(map->pmap, vaddr, m, VM_PROT_READ|VM_PROT_WRITE, wired);
+
+	vm_object_lock(m->object);
+	vm_page_lock_queues();
+	if (!m->active && !m->inactive)
+		vm_page_activate(m);
+	m->reference = TRUE;
+	vm_page_unlock_queues();
+
+	vm_map_verify_done(map, &version);
+	PAGE_WAKEUP_DONE(m);
+
+	UNLOCK_AND_DEALLOCATE;
+
+#undef	UNLOCK_AND_DEALLOCATE
+#undef	RELEASE_PAGE
+
+	return (KERN_SUCCESS);
+}
+#endif
diff --git a/riscv/intel/read_fault.h b/riscv/intel/read_fault.h
new file mode 100644
index 0000000..8aa3f03
--- /dev/null
+++ b/riscv/intel/read_fault.h
@@ -0,0 +1,35 @@
+/*
+ * Kernel read_fault on i386 functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  Author: Barry deFreese.
+ */
+/*
+ *     Kernel read_fault on i386 functions.
+ *
+ */
+
+#ifndef _READ_FAULT_H_
+#define _READ_FAULT_H_
+
+#include <mach/std_types.h>
+
+extern kern_return_t intel_read_fault(
+        vm_map_t map,
+        vm_offset_t vaddr);
+
+#endif /* _READ_FAULT_H_ */