aboutsummaryrefslogtreecommitdiff
path: root/vm
diff options
context:
space:
mode:
Diffstat (limited to 'vm')
-rw-r--r--vm/memory_object.c1090
-rw-r--r--vm/memory_object.h39
-rw-r--r--vm/memory_object_default.cli28
-rw-r--r--vm/memory_object_proxy.c228
-rw-r--r--vm/memory_object_proxy.h39
-rw-r--r--vm/memory_object_user.cli28
-rw-r--r--vm/pmap.h241
-rw-r--r--vm/vm_debug.c548
-rw-r--r--vm/vm_external.c151
-rw-r--r--vm/vm_external.h95
-rw-r--r--vm/vm_fault.c2136
-rw-r--r--vm/vm_fault.h81
-rw-r--r--vm/vm_init.c88
-rw-r--r--vm/vm_init.h25
-rw-r--r--vm/vm_kern.c1099
-rw-r--r--vm/vm_kern.h100
-rw-r--r--vm/vm_map.c5237
-rw-r--r--vm/vm_map.h585
-rw-r--r--vm/vm_object.c2994
-rw-r--r--vm/vm_object.h415
-rw-r--r--vm/vm_page.c2164
-rw-r--r--vm/vm_page.h567
-rw-r--r--vm/vm_pageout.c515
-rw-r--r--vm/vm_pageout.h53
-rw-r--r--vm/vm_print.h41
-rw-r--r--vm/vm_resident.c1116
-rw-r--r--vm/vm_resident.h45
-rw-r--r--vm/vm_types.h42
-rw-r--r--vm/vm_user.c803
-rw-r--r--vm/vm_user.h60
30 files changed, 20653 insertions, 0 deletions
diff --git a/vm/memory_object.c b/vm/memory_object.c
new file mode 100644
index 0000000..1ea5956
--- /dev/null
+++ b/vm/memory_object.c
@@ -0,0 +1,1090 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/memory_object.c
+ * Author: Michael Wayne Young
+ *
+ * External memory management interface control functions.
+ */
+
+/*
+ * Interface dependencies:
+ */
+
+#include <mach/std_types.h> /* For pointer_t */
+#include <mach/mach_types.h>
+
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <mach/memory_object.h>
+#include <mach/boolean.h>
+#include <mach/vm_prot.h>
+#include <mach/message.h>
+
+#include <vm/memory_object_user.user.h>
+#include <vm/memory_object_default.user.h>
+
+/*
+ * Implementation dependencies:
+ */
+#include <vm/memory_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/pmap.h> /* For copy_to_phys, pmap_clear_modify */
+#include <kern/debug.h> /* For panic() */
+#include <kern/thread.h> /* For current_thread() */
+#include <kern/host.h>
+#include <kern/mach.server.h> /* For rpc prototypes */
+#include <vm/vm_kern.h> /* For kernel_map, vm_move */
+#include <vm/vm_map.h> /* For vm_map_pageable */
+#include <ipc/ipc_port.h>
+
+#if MACH_PAGEMAP
+#include <vm/vm_external.h>
+#endif /* MACH_PAGEMAP */
+
+typedef int memory_object_lock_result_t; /* moved from below */
+
+
+ipc_port_t memory_manager_default = IP_NULL;
+def_simple_lock_data(static,memory_manager_default_lock)
+
+/*
+ * Important note:
+ * All of these routines gain a reference to the
+ * object (first argument) as part of the automatic
+ * argument conversion. Explicit deallocation is necessary.
+ */
+
+kern_return_t memory_object_data_supply(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_offset_t vm_data_copy,
+ unsigned int data_cnt,
+ vm_prot_t lock_value,
+ boolean_t precious,
+ ipc_port_t reply_to,
+ mach_msg_type_name_t reply_to_type)
+{
+ kern_return_t result = KERN_SUCCESS;
+ vm_offset_t error_offset = 0;
+ vm_page_t m;
+ vm_page_t data_m;
+ vm_size_t original_length;
+ vm_offset_t original_offset;
+ vm_page_t *page_list;
+ boolean_t was_absent;
+ vm_map_copy_t data_copy = (vm_map_copy_t)vm_data_copy;
+ vm_map_copy_t orig_copy = data_copy;
+
+ /*
+ * Look for bogus arguments
+ */
+
+ if (object == VM_OBJECT_NULL) {
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if (lock_value & ~VM_PROT_ALL) {
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if ((data_cnt % PAGE_SIZE) != 0) {
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*
+ * Adjust the offset from the memory object to the offset
+ * within the vm_object.
+ */
+
+ original_length = data_cnt;
+ original_offset = offset;
+
+ assert(data_copy->type == VM_MAP_COPY_PAGE_LIST);
+ page_list = &data_copy->cpy_page_list[0];
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ offset -= object->paging_offset;
+
+ /*
+ * Loop over copy stealing pages for pagein.
+ */
+
+ for (; data_cnt > 0 ; data_cnt -= PAGE_SIZE, offset += PAGE_SIZE) {
+
+ assert(data_copy->cpy_npages > 0);
+ data_m = *page_list;
+
+ if (data_m == VM_PAGE_NULL || data_m->tabled ||
+ data_m->error || data_m->absent || data_m->fictitious) {
+
+ panic("Data_supply: bad page");
+ }
+
+ /*
+ * Look up target page and check its state.
+ */
+
+retry_lookup:
+ m = vm_page_lookup(object,offset);
+ if (m == VM_PAGE_NULL) {
+ was_absent = FALSE;
+ }
+ else {
+ if (m->absent && m->busy) {
+
+ /*
+ * Page was requested. Free the busy
+ * page waiting for it. Insertion
+ * of new page happens below.
+ */
+
+ VM_PAGE_FREE(m);
+ was_absent = TRUE;
+ }
+ else {
+
+ /*
+ * Have to wait for page that is busy and
+ * not absent. This is probably going to
+ * be an error, but go back and check.
+ */
+ if (m->busy) {
+ PAGE_ASSERT_WAIT(m, FALSE);
+ vm_object_unlock(object);
+ thread_block((void (*)()) 0);
+ vm_object_lock(object);
+ goto retry_lookup;
+ }
+
+ /*
+ * Page already present; error.
+ * This is an error if data is precious.
+ */
+ result = KERN_MEMORY_PRESENT;
+ error_offset = offset + object->paging_offset;
+
+ break;
+ }
+ }
+
+ /*
+ * Ok to pagein page. Target object now has no page
+ * at offset. Set the page parameters, then drop
+ * in new page and set up pageout state. Object is
+ * still locked here.
+ *
+ * Must clear busy bit in page before inserting it.
+ * Ok to skip wakeup logic because nobody else
+ * can possibly know about this page.
+ */
+
+ data_m->busy = FALSE;
+ data_m->dirty = FALSE;
+ pmap_clear_modify(data_m->phys_addr);
+
+ data_m->page_lock = lock_value;
+ data_m->unlock_request = VM_PROT_NONE;
+ data_m->precious = precious;
+
+ vm_page_lock_queues();
+ vm_page_insert(data_m, object, offset);
+
+ if (was_absent)
+ vm_page_activate(data_m);
+ else
+ vm_page_deactivate(data_m);
+
+ vm_page_unlock_queues();
+
+ /*
+ * Null out this page list entry, and advance to next
+ * page.
+ */
+
+ *page_list++ = VM_PAGE_NULL;
+
+ if (--(data_copy->cpy_npages) == 0 &&
+ vm_map_copy_has_cont(data_copy)) {
+ vm_map_copy_t new_copy;
+
+ vm_object_unlock(object);
+
+ vm_map_copy_invoke_cont(data_copy, &new_copy, &result);
+
+ if (result == KERN_SUCCESS) {
+
+ /*
+ * Consume on success requires that
+ * we keep the original vm_map_copy
+ * around in case something fails.
+ * Free the old copy if it's not the original
+ */
+ if (data_copy != orig_copy) {
+ vm_map_copy_discard(data_copy);
+ }
+
+ if ((data_copy = new_copy) != VM_MAP_COPY_NULL)
+ page_list = &data_copy->cpy_page_list[0];
+
+ vm_object_lock(object);
+ }
+ else {
+ vm_object_lock(object);
+ error_offset = offset + object->paging_offset +
+ PAGE_SIZE;
+ break;
+ }
+ }
+ }
+
+ /*
+ * Send reply if one was requested.
+ */
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ if (vm_map_copy_has_cont(data_copy))
+ vm_map_copy_abort_cont(data_copy);
+
+ if (IP_VALID(reply_to)) {
+ memory_object_supply_completed(
+ reply_to, reply_to_type,
+ object->pager_request,
+ original_offset,
+ original_length,
+ result,
+ error_offset);
+ }
+
+ vm_object_deallocate(object);
+
+ /*
+ * Consume on success: The final data copy must be
+ * be discarded if it is not the original. The original
+ * gets discarded only if this routine succeeds.
+ */
+ if (data_copy != orig_copy)
+ vm_map_copy_discard(data_copy);
+ if (result == KERN_SUCCESS)
+ vm_map_copy_discard(orig_copy);
+
+
+ return(result);
+}
+
+kern_return_t memory_object_data_error(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ kern_return_t error_value)
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size != round_page(size))
+ return(KERN_INVALID_ARGUMENT);
+
+ vm_object_lock(object);
+ offset -= object->paging_offset;
+
+ while (size != 0) {
+ vm_page_t m;
+
+ m = vm_page_lookup(object, offset);
+ if ((m != VM_PAGE_NULL) && m->busy && m->absent) {
+ m->error = TRUE;
+ m->absent = FALSE;
+ vm_object_absent_release(object);
+
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ }
+
+ size -= PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+}
+
+kern_return_t memory_object_data_unavailable(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size)
+{
+#if MACH_PAGEMAP
+ vm_external_t existence_info = VM_EXTERNAL_NULL;
+#endif /* MACH_PAGEMAP */
+
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size != round_page(size))
+ return(KERN_INVALID_ARGUMENT);
+
+#if MACH_PAGEMAP
+ if ((offset == 0) && (size > VM_EXTERNAL_LARGE_SIZE) &&
+ (object->existence_info == VM_EXTERNAL_NULL)) {
+ existence_info = vm_external_create(VM_EXTERNAL_SMALL_SIZE);
+ }
+#endif /* MACH_PAGEMAP */
+
+ vm_object_lock(object);
+#if MACH_PAGEMAP
+ if (existence_info != VM_EXTERNAL_NULL) {
+ object->existence_info = existence_info;
+ }
+ if ((offset == 0) && (size > VM_EXTERNAL_LARGE_SIZE)) {
+ vm_object_unlock(object);
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+ }
+#endif /* MACH_PAGEMAP */
+ offset -= object->paging_offset;
+
+ while (size != 0) {
+ vm_page_t m;
+
+ /*
+ * We're looking for pages that are both busy and
+ * absent (waiting to be filled), converting them
+ * to just absent.
+ *
+ * Pages that are just busy can be ignored entirely.
+ */
+
+ m = vm_page_lookup(object, offset);
+ if ((m != VM_PAGE_NULL) && m->busy && m->absent) {
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ }
+ size -= PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: memory_object_lock_page
+ *
+ * Description:
+ * Perform the appropriate lock operations on the
+ * given page. See the description of
+ * "memory_object_lock_request" for the meanings
+ * of the arguments.
+ *
+ * Returns an indication that the operation
+ * completed, blocked, or that the page must
+ * be cleaned.
+ */
+
+#define MEMORY_OBJECT_LOCK_RESULT_DONE 0
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK 1
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN 2
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN 3
+
+static memory_object_lock_result_t memory_object_lock_page(
+ vm_page_t m,
+ memory_object_return_t should_return,
+ boolean_t should_flush,
+ vm_prot_t prot)
+{
+ /*
+ * Don't worry about pages for which the kernel
+ * does not have any data.
+ */
+
+ if (m->absent)
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+
+ /*
+ * If we cannot change access to the page,
+ * either because a mapping is in progress
+ * (busy page) or because a mapping has been
+ * wired, then give up.
+ */
+
+ if (m->busy)
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+
+ assert(!m->fictitious);
+
+ if (m->wire_count != 0) {
+ /*
+ * If no change would take place
+ * anyway, return successfully.
+ *
+ * No change means:
+ * Not flushing AND
+ * No change to page lock [2 checks] AND
+ * Don't need to send page to manager
+ *
+ * Don't need to send page to manager means:
+ * No clean or return request OR (
+ * Page is not dirty [2 checks] AND (
+ * Page is not precious OR
+ * No request to return precious pages ))
+ *
+ * Now isn't that straightforward and obvious ?? ;-)
+ *
+ * XXX This doesn't handle sending a copy of a wired
+ * XXX page to the pager, but that will require some
+ * XXX significant surgery.
+ */
+
+ if (!should_flush &&
+ ((m->page_lock == prot) || (prot == VM_PROT_NO_CHANGE)) &&
+ ((should_return == MEMORY_OBJECT_RETURN_NONE) ||
+ (!m->dirty && !pmap_is_modified(m->phys_addr) &&
+ (!m->precious ||
+ should_return != MEMORY_OBJECT_RETURN_ALL)))) {
+ /*
+ * Restart page unlock requests,
+ * even though no change took place.
+ * [Memory managers may be expecting
+ * to see new requests.]
+ */
+ m->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(m);
+
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+ }
+
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+ }
+
+ /*
+ * If the page is to be flushed, allow
+ * that to be done as part of the protection.
+ */
+
+ if (should_flush)
+ prot = VM_PROT_ALL;
+
+ /*
+ * Set the page lock.
+ *
+ * If we are decreasing permission, do it now;
+ * let the fault handler take care of increases
+ * (pmap_page_protect may not increase protection).
+ */
+
+ if (prot != VM_PROT_NO_CHANGE) {
+ if ((m->page_lock ^ prot) & prot) {
+ pmap_page_protect(m->phys_addr, VM_PROT_ALL & ~prot);
+ }
+ m->page_lock = prot;
+
+ /*
+ * Restart any past unlock requests, even if no
+ * change resulted. If the manager explicitly
+ * requested no protection change, then it is assumed
+ * to be remembering past requests.
+ */
+
+ m->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(m);
+ }
+
+ /*
+ * Handle cleaning.
+ */
+
+ if (should_return != MEMORY_OBJECT_RETURN_NONE) {
+ /*
+ * Check whether the page is dirty. If
+ * write permission has not been removed,
+ * this may have unpredictable results.
+ */
+
+ if (!m->dirty)
+ m->dirty = pmap_is_modified(m->phys_addr);
+
+ if (m->dirty || (m->precious &&
+ should_return == MEMORY_OBJECT_RETURN_ALL)) {
+ /*
+ * If we weren't planning
+ * to flush the page anyway,
+ * we may need to remove the
+ * page from the pageout
+ * system and from physical
+ * maps now.
+ */
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+
+ if (!should_flush)
+ pmap_page_protect(m->phys_addr,
+ VM_PROT_NONE);
+
+ /*
+ * Cleaning a page will cause
+ * it to be flushed.
+ */
+
+ if (m->dirty)
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
+ else
+ return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
+ }
+ }
+
+ /*
+ * Handle flushing
+ */
+
+ if (should_flush) {
+ VM_PAGE_FREE(m);
+ } else {
+ extern boolean_t vm_page_deactivate_hint;
+
+ /*
+ * XXX Make clean but not flush a paging hint,
+ * and deactivate the pages. This is a hack
+ * because it overloads flush/clean with
+ * implementation-dependent meaning. This only
+ * happens to pages that are already clean.
+ */
+
+ if (vm_page_deactivate_hint &&
+ (should_return != MEMORY_OBJECT_RETURN_NONE)) {
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+ }
+ }
+
+ return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+}
+
+/*
+ * Routine: memory_object_lock_request [user interface]
+ *
+ * Description:
+ * Control use of the data associated with the given
+ * memory object. For each page in the given range,
+ * perform the following operations, in order:
+ * 1) restrict access to the page (disallow
+ * forms specified by "prot");
+ * 2) return data to the manager (if "should_return"
+ * is RETURN_DIRTY and the page is dirty, or
+ * "should_return" is RETURN_ALL and the page
+ * is either dirty or precious); and,
+ * 3) flush the cached copy (if "should_flush"
+ * is asserted).
+ * The set of pages is defined by a starting offset
+ * ("offset") and size ("size"). Only pages with the
+ * same page alignment as the starting offset are
+ * considered.
+ *
+ * A single acknowledgement is sent (to the "reply_to"
+ * port) when these actions are complete. If successful,
+ * the naked send right for reply_to is consumed.
+ */
+
+kern_return_t
+memory_object_lock_request(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ memory_object_return_t should_return,
+ boolean_t should_flush,
+ vm_prot_t prot,
+ ipc_port_t reply_to,
+ mach_msg_type_name_t reply_to_type)
+{
+ vm_page_t m;
+ vm_offset_t original_offset = offset;
+ vm_size_t original_size = size;
+ vm_offset_t paging_offset = 0;
+ vm_object_t new_object = VM_OBJECT_NULL;
+ vm_offset_t new_offset = 0;
+ vm_offset_t last_offset = offset;
+ int page_lock_result;
+ int pageout_action = 0; /* '=0' to quiet lint */
+
+#define DATA_WRITE_MAX 32
+ vm_page_t holding_pages[DATA_WRITE_MAX];
+
+ /*
+ * Check for bogus arguments.
+ */
+ if (object == VM_OBJECT_NULL ||
+ ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE))
+ return (KERN_INVALID_ARGUMENT);
+
+ size = round_page(size);
+
+ /*
+ * Lock the object, and acquire a paging reference to
+ * prevent the memory_object and control ports from
+ * being destroyed.
+ */
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ offset -= object->paging_offset;
+
+ /*
+ * To avoid blocking while scanning for pages, save
+ * dirty pages to be cleaned all at once.
+ *
+ * XXXO A similar strategy could be used to limit the
+ * number of times that a scan must be restarted for
+ * other reasons. Those pages that would require blocking
+ * could be temporarily collected in another list, or
+ * their offsets could be recorded in a small array.
+ */
+
+ /*
+ * XXX NOTE: May want to consider converting this to a page list
+ * XXX vm_map_copy interface. Need to understand object
+ * XXX coalescing implications before doing so.
+ */
+
+#define PAGEOUT_PAGES \
+MACRO_BEGIN \
+ vm_map_copy_t copy; \
+ unsigned i; \
+ vm_page_t hp; \
+ \
+ vm_object_unlock(object); \
+ \
+ (void) vm_map_copyin_object(new_object, 0, new_offset, &copy); \
+ \
+ (void) memory_object_data_return( \
+ object->pager, \
+ object->pager_request, \
+ paging_offset, \
+ (pointer_t) copy, \
+ new_offset, \
+ (pageout_action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \
+ !should_flush); \
+ \
+ vm_object_lock(object); \
+ \
+ for (i = 0; i < atop(new_offset); i++) { \
+ hp = holding_pages[i]; \
+ if (hp != VM_PAGE_NULL) \
+ VM_PAGE_FREE(hp); \
+ } \
+ \
+ new_object = VM_OBJECT_NULL; \
+MACRO_END
+
+ for (;
+ size != 0;
+ size -= PAGE_SIZE, offset += PAGE_SIZE)
+ {
+ /*
+ * Limit the number of pages to be cleaned at once.
+ */
+ if (new_object != VM_OBJECT_NULL &&
+ new_offset >= PAGE_SIZE * DATA_WRITE_MAX)
+ {
+ PAGEOUT_PAGES;
+ }
+
+ while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
+ switch ((page_lock_result = memory_object_lock_page(m,
+ should_return,
+ should_flush,
+ prot)))
+ {
+ case MEMORY_OBJECT_LOCK_RESULT_DONE:
+ /*
+ * End of a cluster of dirty pages.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ continue;
+ }
+ break;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
+ /*
+ * Since it is necessary to block,
+ * clean any dirty pages now.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ continue;
+ }
+
+ PAGE_ASSERT_WAIT(m, FALSE);
+ vm_object_unlock(object);
+ thread_block((void (*)()) 0);
+ vm_object_lock(object);
+ continue;
+
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
+ case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
+ /*
+ * The clean and return cases are similar.
+ *
+ * Mark the page busy since we unlock the
+ * object below.
+ */
+ m->busy = TRUE;
+
+ /*
+ * if this would form a discontiguous block,
+ * clean the old pages and start anew.
+ *
+ * NOTE: The first time through here, new_object
+ * is null, hiding the fact that pageout_action
+ * is not initialized.
+ */
+ if (new_object != VM_OBJECT_NULL &&
+ (last_offset != offset ||
+ pageout_action != page_lock_result)) {
+ PAGEOUT_PAGES;
+ }
+
+ vm_object_unlock(object);
+
+ /*
+ * If we have not already allocated an object
+ * for a range of pages to be written, do so
+ * now.
+ */
+ if (new_object == VM_OBJECT_NULL) {
+ new_object = vm_object_allocate(original_size);
+ new_offset = 0;
+ paging_offset = m->offset +
+ object->paging_offset;
+ pageout_action = page_lock_result;
+ }
+
+ /*
+ * Move or copy the dirty page into the
+ * new object.
+ */
+ m = vm_pageout_setup(m,
+ m->offset + object->paging_offset,
+ new_object,
+ new_offset,
+ should_flush);
+
+ /*
+ * Save the holding page if there is one.
+ */
+ holding_pages[atop(new_offset)] = m;
+ new_offset += PAGE_SIZE;
+ last_offset = offset + PAGE_SIZE;
+
+ vm_object_lock(object);
+ break;
+ }
+ break;
+ }
+ }
+
+ /*
+ * We have completed the scan for applicable pages.
+ * Clean any pages that have been saved.
+ */
+ if (new_object != VM_OBJECT_NULL) {
+ PAGEOUT_PAGES;
+ }
+
+ if (IP_VALID(reply_to)) {
+ vm_object_unlock(object);
+
+ /* consumes our naked send-once/send right for reply_to */
+ (void) memory_object_lock_completed(reply_to, reply_to_type,
+ object->pager_request, original_offset, original_size);
+
+ vm_object_lock(object);
+ }
+
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ vm_object_deallocate(object);
+
+ return (KERN_SUCCESS);
+}
+
+static kern_return_t
+memory_object_set_attributes_common(
+ vm_object_t object,
+ boolean_t may_cache,
+ memory_object_copy_strategy_t copy_strategy)
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*
+ * Verify the attributes of importance
+ */
+
+ switch(copy_strategy) {
+ case MEMORY_OBJECT_COPY_NONE:
+ case MEMORY_OBJECT_COPY_CALL:
+ case MEMORY_OBJECT_COPY_DELAY:
+ case MEMORY_OBJECT_COPY_TEMPORARY:
+ break;
+ default:
+ vm_object_deallocate(object);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if (may_cache)
+ may_cache = TRUE;
+
+ vm_object_lock(object);
+
+ /*
+ * Wake up anyone waiting for the ready attribute
+ * to become asserted.
+ */
+
+ if (!object->pager_ready) {
+ vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
+ }
+
+ /*
+ * Copy the attributes
+ */
+
+ object->can_persist = may_cache;
+ object->pager_ready = TRUE;
+ if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
+ object->temporary = TRUE;
+ } else {
+ object->copy_strategy = copy_strategy;
+ }
+
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * XXX rpd claims that reply_to could be obviated in favor of a client
+ * XXX stub that made change_attributes an RPC. Need investigation.
+ */
+
+kern_return_t memory_object_change_attributes(
+ vm_object_t object,
+ boolean_t may_cache,
+ memory_object_copy_strategy_t copy_strategy,
+ ipc_port_t reply_to,
+ mach_msg_type_name_t reply_to_type)
+{
+ kern_return_t result;
+
+ /*
+ * Do the work and throw away our object reference. It
+ * is important that the object reference be deallocated
+ * BEFORE sending the reply. The whole point of the reply
+ * is that it shows up after the terminate message that
+ * may be generated by setting the object uncacheable.
+ *
+ * XXX may_cache may become a tri-valued variable to handle
+ * XXX uncache if not in use.
+ */
+ result = memory_object_set_attributes_common(object, may_cache,
+ copy_strategy);
+
+ if (IP_VALID(reply_to)) {
+
+ /* consumes our naked send-once/send right for reply_to */
+ (void) memory_object_change_completed(reply_to, reply_to_type,
+ may_cache, copy_strategy);
+
+ }
+
+ return(result);
+}
+
+kern_return_t memory_object_ready(
+ vm_object_t object,
+ boolean_t may_cache,
+ memory_object_copy_strategy_t copy_strategy)
+{
+ return memory_object_set_attributes_common(object, may_cache,
+ copy_strategy);
+}
+
+kern_return_t memory_object_get_attributes(
+ vm_object_t object,
+ boolean_t *object_ready,
+ boolean_t *may_cache,
+ memory_object_copy_strategy_t *copy_strategy)
+{
+ if (object == VM_OBJECT_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ vm_object_lock(object);
+ *may_cache = object->can_persist;
+ *object_ready = object->pager_ready;
+ *copy_strategy = object->copy_strategy;
+ vm_object_unlock(object);
+
+ vm_object_deallocate(object);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * If successful, consumes the supplied naked send right.
+ */
+kern_return_t vm_set_default_memory_manager(
+ const host_t host,
+ ipc_port_t *default_manager)
+{
+ ipc_port_t current_manager;
+ ipc_port_t new_manager;
+ ipc_port_t returned_manager;
+
+ if (host == HOST_NULL)
+ return(KERN_INVALID_HOST);
+
+ new_manager = *default_manager;
+ simple_lock(&memory_manager_default_lock);
+ current_manager = memory_manager_default;
+
+ if (new_manager == IP_NULL) {
+ /*
+ * Retrieve the current value.
+ */
+
+ returned_manager = ipc_port_copy_send(current_manager);
+ } else {
+ /*
+ * Retrieve the current value,
+ * and replace it with the supplied value.
+ * We consume the supplied naked send right.
+ */
+
+ returned_manager = current_manager;
+ memory_manager_default = new_manager;
+
+ /*
+ * In case anyone's been waiting for a memory
+ * manager to be established, wake them up.
+ */
+
+ thread_wakeup((event_t) &memory_manager_default);
+ }
+
+ simple_unlock(&memory_manager_default_lock);
+
+ *default_manager = returned_manager;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: memory_manager_default_reference
+ * Purpose:
+ * Returns a naked send right for the default
+ * memory manager. The returned right is always
+ * valid (not IP_NULL or IP_DEAD).
+ */
+
+ipc_port_t memory_manager_default_reference(void)
+{
+ ipc_port_t current_manager;
+
+ simple_lock(&memory_manager_default_lock);
+
+ while (current_manager = ipc_port_copy_send(memory_manager_default),
+ !IP_VALID(current_manager)) {
+ thread_sleep((event_t) &memory_manager_default,
+ simple_lock_addr(memory_manager_default_lock),
+ FALSE);
+ simple_lock(&memory_manager_default_lock);
+ }
+
+ simple_unlock(&memory_manager_default_lock);
+
+ return current_manager;
+}
+
+/*
+ * Routine: memory_manager_default_port
+ * Purpose:
+ * Returns true if the receiver for the port
+ * is the default memory manager.
+ *
+ * This is a hack to let ds_read_done
+ * know when it should keep memory wired.
+ */
+
+boolean_t memory_manager_default_port(const ipc_port_t port)
+{
+ ipc_port_t current;
+ boolean_t result;
+
+ simple_lock(&memory_manager_default_lock);
+ current = memory_manager_default;
+ if (IP_VALID(current)) {
+ /*
+ * There is no point in bothering to lock
+ * both ports, which would be painful to do.
+ * If the receive rights are moving around,
+ * we might be inaccurate.
+ */
+
+ result = port->ip_receiver == current->ip_receiver;
+ } else
+ result = FALSE;
+ simple_unlock(&memory_manager_default_lock);
+
+ return result;
+}
+
+void memory_manager_default_init(void)
+{
+ memory_manager_default = IP_NULL;
+ simple_lock_init(&memory_manager_default_lock);
+}
diff --git a/vm/memory_object.h b/vm/memory_object.h
new file mode 100644
index 0000000..ee0c963
--- /dev/null
+++ b/vm/memory_object.h
@@ -0,0 +1,39 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _VM_MEMORY_OBJECT_H_
+#define _VM_MEMORY_OBJECT_H_
+
+#include <mach/boolean.h>
+#include <ipc/ipc_types.h>
+
+extern ipc_port_t memory_manager_default_reference(void);
+extern boolean_t memory_manager_default_port(ipc_port_t);
+extern void memory_manager_default_init(void);
+
+extern ipc_port_t memory_manager_default;
+
+#endif /* _VM_MEMORY_OBJECT_H_ */
diff --git a/vm/memory_object_default.cli b/vm/memory_object_default.cli
new file mode 100644
index 0000000..998a986
--- /dev/null
+++ b/vm/memory_object_default.cli
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Bryan Ford, University of Utah CSL
+ */
+/* This is a client presentation file. */
+
+#define KERNEL_USER 1
+#define SEQNOS 1
+
+#include <mach/memory_object_default.defs>
diff --git a/vm/memory_object_proxy.c b/vm/memory_object_proxy.c
new file mode 100644
index 0000000..5724349
--- /dev/null
+++ b/vm/memory_object_proxy.c
@@ -0,0 +1,228 @@
+/* memory_object_proxy.c - Proxy memory objects for Mach.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+ Written by Marcus Brinkmann.
+
+ This file is part of GNU Mach.
+
+ GNU Mach is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GNU Mach is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+/* A proxy memory object is a kernel port that can be used like a real
+ memory object in a vm_map call, except that the current and maximum
+ protection are restricted to the proxy object's maximum protection
+ at the time the mapping is established. The kernel port will hold
+ a reference to the real memory object for the life time of the
+ proxy object.
+
+ Note that we don't need to do any reference counting on the proxy
+ object. Our caller will hold a reference to the proxy object when
+ looking it up, and is expected to acquire its own reference to the
+ real memory object if needed before releasing the reference to the
+ proxy object.
+
+ The user provided real memory object and the maximum protection are
+ not checked for validity. The maximum protection is only used as a
+ mask, and the memory object is validated at the time the mapping is
+ established. */
+
+#include <mach/port.h>
+#include <mach/kern_return.h>
+#include <mach/notify.h>
+#include <mach/vm_prot.h>
+#include <kern/printf.h>
+#include <kern/slab.h>
+#include <kern/mach4.server.h>
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+
+#include <vm/memory_object_proxy.h>
+
+/* The cache which holds our proxy memory objects. */
+static struct kmem_cache memory_object_proxy_cache;
+
+struct memory_object_proxy
+{
+ struct ipc_port *port;
+
+ ipc_port_t object;
+ ipc_port_t notify;
+ vm_prot_t max_protection;
+ vm_offset_t start;
+ vm_offset_t len;
+};
+typedef struct memory_object_proxy *memory_object_proxy_t;
+
+
+void
+memory_object_proxy_init (void)
+{
+ kmem_cache_init (&memory_object_proxy_cache, "memory_object_proxy",
+ sizeof (struct memory_object_proxy), 0, NULL, 0);
+}
+
+/* Lookup a proxy memory object by its port. */
+static memory_object_proxy_t
+memory_object_proxy_port_lookup (ipc_port_t port)
+{
+ memory_object_proxy_t proxy;
+
+ if (!IP_VALID(port))
+ return 0;
+
+ ip_lock (port);
+ if (ip_active (port) && (ip_kotype (port) == IKOT_PAGER_PROXY))
+ proxy = (memory_object_proxy_t) port->ip_kobject;
+ else
+ proxy = 0;
+ ip_unlock (port);
+ return proxy;
+}
+
+
+/* Process a no-sender notification for the proxy memory object
+ port. */
+boolean_t
+memory_object_proxy_notify (mach_msg_header_t *msg)
+{
+ if (msg->msgh_id == MACH_NOTIFY_NO_SENDERS)
+ {
+ memory_object_proxy_t proxy;
+ mach_no_senders_notification_t *ns;
+
+ ns = (mach_no_senders_notification_t *) msg;
+
+ proxy = (memory_object_proxy_t)
+ ((ipc_port_t) ns->not_header.msgh_remote_port)->ip_kobject;
+ if (!proxy)
+ return FALSE;
+ if ((ipc_port_t) ns->not_header.msgh_remote_port != proxy->notify)
+ return FALSE;
+
+ ipc_port_release_send (proxy->object);
+
+ ipc_kobject_set (proxy->port, IKO_NULL, IKOT_NONE);
+ ipc_port_dealloc_kernel (proxy->port);
+ ipc_kobject_set (proxy->notify, IKO_NULL, IKOT_NONE);
+ ipc_port_dealloc_kernel (proxy->notify);
+
+ kmem_cache_free (&memory_object_proxy_cache, (vm_offset_t) proxy);
+
+ return TRUE;
+ }
+
+ printf ("memory_object_proxy_notify: strange notification %d\n",
+ msg->msgh_id);
+ return FALSE;
+}
+
+
+/* Create a new proxy memory object from [START;START+LEN) in the
+ given OBJECT at OFFSET in the new object with the maximum
+ protection MAX_PROTECTION and return it in *PORT. */
+kern_return_t
+memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection,
+ ipc_port_t *object, natural_t object_count,
+ rpc_vm_offset_t *offset, natural_t offset_count,
+ rpc_vm_offset_t *start, natural_t start_count,
+ rpc_vm_size_t *len, natural_t len_count,
+ ipc_port_t *port)
+{
+ memory_object_proxy_t proxy;
+ ipc_port_t notify;
+
+ if (space == IS_NULL)
+ return KERN_INVALID_TASK;
+
+ if (offset_count != object_count || start_count != object_count
+ || len_count != object_count)
+ return KERN_INVALID_ARGUMENT;
+
+ /* FIXME: Support more than one memory object. */
+ if (object_count != 1)
+ return KERN_INVALID_ARGUMENT;
+
+ if (!IP_VALID(object[0]))
+ return KERN_INVALID_NAME;
+
+ /* FIXME: Support a different offset from 0. */
+ if (offset[0] != 0)
+ return KERN_INVALID_ARGUMENT;
+
+ if (start[0] + len[0] < start[0])
+ return KERN_INVALID_ARGUMENT;
+
+ proxy = (memory_object_proxy_t) kmem_cache_alloc (&memory_object_proxy_cache);
+
+ /* Allocate port, keeping a reference for it. */
+ proxy->port = ipc_port_alloc_kernel ();
+ if (proxy->port == IP_NULL)
+ {
+ kmem_cache_free (&memory_object_proxy_cache, (vm_offset_t) proxy);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+ /* Associate the port with the proxy memory object. */
+ ipc_kobject_set (proxy->port, (ipc_kobject_t) proxy, IKOT_PAGER_PROXY);
+
+ /* Request no-senders notifications on the port. */
+ proxy->notify = ipc_port_alloc_kernel ();
+ ipc_kobject_set (proxy->notify, (ipc_kobject_t) proxy, IKOT_PAGER_PROXY);
+ notify = ipc_port_make_sonce (proxy->notify);
+ ip_lock (proxy->port);
+ ipc_port_nsrequest (proxy->port, 1, notify, &notify);
+ assert (notify == IP_NULL);
+
+ /* Consumes the port right */
+ proxy->object = object[0];
+ proxy->max_protection = max_protection;
+ proxy->start = start[0];
+ proxy->len = len[0];
+
+ *port = ipc_port_make_send (proxy->port);
+ return KERN_SUCCESS;
+}
+
+/* Lookup the real memory object and maximum protection for the proxy
+ memory object port PORT, for which the caller holds a reference.
+ *OBJECT is only guaranteed to be valid as long as the caller holds
+ the reference to PORT (unless the caller acquires its own reference
+ to it). If PORT is not a proxy memory object, return
+ KERN_INVALID_ARGUMENT. */
+kern_return_t
+memory_object_proxy_lookup (ipc_port_t port, ipc_port_t *object,
+ vm_prot_t *max_protection, vm_offset_t *start,
+ vm_offset_t *len)
+{
+ memory_object_proxy_t proxy;
+
+ proxy = memory_object_proxy_port_lookup (port);
+ if (!proxy)
+ return KERN_INVALID_ARGUMENT;
+
+ *max_protection = proxy->max_protection;
+ *start = 0;
+ *len = (vm_offset_t) ~0;
+
+ do
+ {
+ *object = proxy->object;
+ if (proxy->len <= *start)
+ *len = 0;
+ else
+ *len = MIN(*len, proxy->len - *start);
+ *start += proxy->start;
+ }
+ while ((proxy = memory_object_proxy_port_lookup (proxy->object)));
+
+ return KERN_SUCCESS;
+}
diff --git a/vm/memory_object_proxy.h b/vm/memory_object_proxy.h
new file mode 100644
index 0000000..8b3f202
--- /dev/null
+++ b/vm/memory_object_proxy.h
@@ -0,0 +1,39 @@
+/* memory_object_proxy.h - Proxy memory objects for Mach.
+ Copyright (C) 2005, 2011 Free Software Foundation, Inc.
+ Written by Marcus Brinkmann.
+
+ This file is part of GNU Mach.
+
+ GNU Mach is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GNU Mach is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#ifndef _VM_MEMORY_OBJECT_PROXY_H_
+#define _VM_MEMORY_OBJECT_PROXY_H_
+
+#include <ipc/ipc_types.h>
+#include <mach/boolean.h>
+#include <mach/machine/kern_return.h>
+#include <mach/machine/vm_types.h>
+#include <mach/message.h>
+#include <mach/vm_prot.h>
+
+extern void memory_object_proxy_init (void);
+extern boolean_t memory_object_proxy_notify (mach_msg_header_t *msg);
+extern kern_return_t memory_object_proxy_lookup (ipc_port_t port,
+ ipc_port_t *object,
+ vm_prot_t *max_protection,
+ vm_offset_t *start,
+ vm_offset_t *len);
+
+#endif /* _VM_MEMORY_OBJECT_PROXY_H_ */
diff --git a/vm/memory_object_user.cli b/vm/memory_object_user.cli
new file mode 100644
index 0000000..2bba41f
--- /dev/null
+++ b/vm/memory_object_user.cli
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Bryan Ford, University of Utah CSL
+ */
+/* This is a client presentation file. */
+
+#define KERNEL_USER 1
+#define SEQNOS 1
+
+#include <mach/memory_object.defs>
diff --git a/vm/pmap.h b/vm/pmap.h
new file mode 100644
index 0000000..aca9ada
--- /dev/null
+++ b/vm/pmap.h
@@ -0,0 +1,241 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/pmap.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1985
+ *
+ * Machine address mapping definitions -- machine-independent
+ * section. [For machine-dependent section, see "machine/pmap.h".]
+ */
+
+#ifndef _VM_PMAP_H_
+#define _VM_PMAP_H_
+
+#include <machine/pmap.h>
+#include <mach/machine/vm_types.h>
+#include <mach/vm_prot.h>
+#include <mach/boolean.h>
+#include <kern/thread.h>
+
+/*
+ * The following is a description of the interface to the
+ * machine-dependent "physical map" data structure. The module
+ * must provide a "pmap_t" data type that represents the
+ * set of valid virtual-to-physical addresses for one user
+ * address space. [The kernel address space is represented
+ * by a distinguished "pmap_t".] The routines described manage
+ * this type, install and update virtual-to-physical mappings,
+ * and perform operations on physical addresses common to
+ * many address spaces.
+ */
+
+/*
+ * Routines used for initialization.
+ * There is traditionally also a pmap_bootstrap,
+ * used very early by machine-dependent code,
+ * but it is not part of the interface.
+ */
+
+/* During VM initialization, steal a chunk of memory. */
+extern vm_offset_t pmap_steal_memory(vm_size_t);
+/* Initialization, after kernel runs in virtual memory. */
+extern void pmap_init(void);
+
+#ifndef MACHINE_PAGES
+/*
+ * If machine/pmap.h defines MACHINE_PAGES, it must implement
+ * the above functions. The pmap module has complete control.
+ * Otherwise, it must implement
+ * pmap_virtual_space
+ * pmap_init
+ * and vm/vm_resident.c implements pmap_steal_memory using
+ * pmap_virtual_space and pmap_enter.
+ */
+
+/* During VM initialization, report virtual space available for the kernel. */
+extern void pmap_virtual_space(vm_offset_t *, vm_offset_t *);
+#endif /* MACHINE_PAGES */
+
+/*
+ * Routines to manage the physical map data structure.
+ */
+
+/* Create a pmap_t. */
+pmap_t pmap_create(vm_size_t size);
+
+/* Return the kernel's pmap_t. */
+#ifndef pmap_kernel
+extern pmap_t pmap_kernel(void);
+#endif /* pmap_kernel */
+
+/* Gain and release a reference. */
+extern void pmap_reference(pmap_t pmap);
+extern void pmap_destroy(pmap_t pmap);
+
+/* Enter a mapping */
+extern void pmap_enter(pmap_t pmap, vm_offset_t va, phys_addr_t pa,
+ vm_prot_t prot, boolean_t wired);
+
+
+/*
+ * Routines that operate on ranges of virtual addresses.
+ */
+
+/* Remove mappings. */
+void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+
+/* Change protections. */
+void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot);
+
+/*
+ * Routines to set up hardware state for physical maps to be used.
+ */
+/* Prepare pmap_t to run on a given processor. */
+extern void pmap_activate(pmap_t, thread_t, int);
+/* Release pmap_t from use on processor. */
+extern void pmap_deactivate(pmap_t, thread_t, int);
+
+
+/*
+ * Routines that operate on physical addresses.
+ */
+
+/* Restrict access to page. */
+void pmap_page_protect(phys_addr_t pa, vm_prot_t prot);
+
+/*
+ * Routines to manage reference/modify bits based on
+ * physical addresses, simulating them if not provided
+ * by the hardware.
+ */
+
+/* Clear reference bit */
+void pmap_clear_reference(phys_addr_t pa);
+
+/* Return reference bit */
+#ifndef pmap_is_referenced
+boolean_t pmap_is_referenced(phys_addr_t pa);
+#endif /* pmap_is_referenced */
+
+/* Clear modify bit */
+void pmap_clear_modify(phys_addr_t pa);
+
+/* Return modify bit */
+boolean_t pmap_is_modified(phys_addr_t pa);
+
+/*
+ * Sundry required routines
+ */
+/* Return a virtual-to-physical mapping, if possible. */
+extern phys_addr_t pmap_extract(pmap_t, vm_offset_t);
+/* Perform garbage collection, if any. */
+extern void pmap_collect(pmap_t);
+
+/* Lookup an address. */
+int pmap_whatis(pmap_t, vm_offset_t);
+
+/* Specify pageability. */
+extern void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t);
+
+/*
+ * Optional routines
+ */
+#ifndef pmap_copy
+/* Copy range of mappings, if desired. */
+extern void pmap_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t,
+ vm_offset_t);
+#endif /* pmap_copy */
+#ifndef pmap_attribute
+/* Get/Set special memory attributes. */
+extern kern_return_t pmap_attribute(void);
+#endif /* pmap_attribute */
+
+/*
+ * Grab a physical page:
+ * the standard memory allocation mechanism
+ * during system initialization.
+ */
+extern vm_offset_t pmap_grab_page (void);
+
+/*
+ * Make the specified pages (by pmap, offset)
+ * pageable (or not) as requested.
+ */
+extern void pmap_pageable(
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end,
+ boolean_t pageable);
+
+/*
+ * Back-door routine for mapping kernel VM at initialization.
+ * Useful for mapping memory outside the range of direct mapped
+ * physical memory (i.e., devices).
+ */
+extern vm_offset_t pmap_map_bd(
+ vm_offset_t virt,
+ phys_addr_t start,
+ phys_addr_t end,
+ vm_prot_t prot);
+
+/*
+ * Routines defined as macros.
+ */
+#ifndef PMAP_ACTIVATE_USER
+#define PMAP_ACTIVATE_USER(pmap, thread, cpu) { \
+ if ((pmap) != kernel_pmap) \
+ PMAP_ACTIVATE(pmap, thread, cpu); \
+}
+#endif /* PMAP_ACTIVATE_USER */
+
+#ifndef PMAP_DEACTIVATE_USER
+#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ if ((pmap) != kernel_pmap) \
+ PMAP_DEACTIVATE(pmap, thread, cpu); \
+}
+#endif /* PMAP_DEACTIVATE_USER */
+
+#ifndef PMAP_ACTIVATE_KERNEL
+#define PMAP_ACTIVATE_KERNEL(cpu) \
+ PMAP_ACTIVATE(kernel_pmap, THREAD_NULL, cpu)
+#endif /* PMAP_ACTIVATE_KERNEL */
+
+#ifndef PMAP_DEACTIVATE_KERNEL
+#define PMAP_DEACTIVATE_KERNEL(cpu) \
+ PMAP_DEACTIVATE(kernel_pmap, THREAD_NULL, cpu)
+#endif /* PMAP_DEACTIVATE_KERNEL */
+
+/*
+ * Exported data structures
+ */
+
+extern pmap_t kernel_pmap; /* The kernel's map */
+
+#endif /* _VM_PMAP_H_ */
diff --git a/vm/vm_debug.c b/vm/vm_debug.c
new file mode 100644
index 0000000..b0dace8
--- /dev/null
+++ b/vm/vm_debug.c
@@ -0,0 +1,548 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_debug.c.
+ * Author: Rich Draves
+ * Date: March, 1990
+ *
+ * Exported kernel calls. See mach_debug/mach_debug.defs.
+ */
+
+#include <string.h>
+
+#include <kern/debug.h>
+#include <kern/thread.h>
+#include <mach/kern_return.h>
+#include <mach/machine/vm_types.h>
+#include <mach/memory_object.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_inherit.h>
+#include <mach/vm_param.h>
+#include <mach_debug/vm_info.h>
+#include <mach_debug/hash_info.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <kern/mach_debug.server.h>
+#include <kern/task.h>
+#include <kern/host.h>
+#include <kern/printf.h>
+#include <ipc/ipc_port.h>
+
+
+#if MACH_VM_DEBUG
+
+/*
+ * Routine: vm_object_real_name
+ * Purpose:
+ * Convert a VM object to a name port.
+ * Conditions:
+ * Takes object and port locks.
+ * Returns:
+ * A naked send right for the object's name port,
+ * or IP_NULL if the object or its name port is null.
+ */
+
+static ipc_port_t
+vm_object_real_name(vm_object_t object)
+{
+ ipc_port_t port = IP_NULL;
+
+ if (object != VM_OBJECT_NULL) {
+ vm_object_lock(object);
+ if (object->pager_name != IP_NULL)
+ port = ipc_port_make_send(object->pager_name);
+ vm_object_unlock(object);
+ }
+
+ return port;
+}
+
+/*
+ * Routine: mach_vm_region_info [kernel call]
+ * Purpose:
+ * Retrieve information about a VM region,
+ * including info about the object chain.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Retrieve region/object info.
+ * KERN_INVALID_TASK The map is null.
+ * KERN_NO_SPACE There is no entry at/after the address.
+ */
+
+kern_return_t
+mach_vm_region_info(
+ vm_map_t map,
+ vm_offset_t address,
+ vm_region_info_t *regionp,
+ ipc_port_t *portp)
+{
+ vm_map_t cmap; /* current map in traversal */
+ vm_map_t nmap; /* next map to look at */
+ vm_map_entry_t entry; /* entry in current map */
+ vm_object_t object;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ /* find the entry containing (or following) the address */
+
+ vm_map_lock_read(map);
+ for (cmap = map;;) {
+ /* cmap is read-locked */
+
+ if (!vm_map_lookup_entry(cmap, address, &entry)) {
+ entry = entry->vme_next;
+ if (entry == vm_map_to_entry(cmap)) {
+ if (map == cmap) {
+ vm_map_unlock_read(cmap);
+ return KERN_NO_SPACE;
+ }
+
+ /* back out to top-level & skip this submap */
+
+ address = vm_map_max(cmap);
+ vm_map_unlock_read(cmap);
+ vm_map_lock_read(map);
+ cmap = map;
+ continue;
+ }
+ }
+
+ if (entry->is_sub_map) {
+ /* move down to the sub map */
+
+ nmap = entry->object.sub_map;
+ vm_map_lock_read(nmap);
+ vm_map_unlock_read(cmap);
+ cmap = nmap;
+ continue;
+ } else {
+ break;
+ }
+ /*NOTREACHED*/
+ }
+
+
+ assert(entry->vme_start < entry->vme_end);
+
+ regionp->vri_start = entry->vme_start;
+ regionp->vri_end = entry->vme_end;
+
+ /* attributes from the real entry */
+
+ regionp->vri_protection = entry->protection;
+ regionp->vri_max_protection = entry->max_protection;
+ regionp->vri_inheritance = entry->inheritance;
+ regionp->vri_wired_count = !!entry->wired_count; /* Doesn't stack */
+ regionp->vri_user_wired_count = regionp->vri_wired_count; /* Obsolete */
+
+ object = entry->object.vm_object;
+ *portp = vm_object_real_name(object);
+ regionp->vri_object = (vm_offset_t) object;
+ regionp->vri_offset = entry->offset;
+ regionp->vri_needs_copy = entry->needs_copy;
+
+ regionp->vri_sharing = entry->is_shared;
+
+ vm_map_unlock_read(cmap);
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: mach_vm_object_info [kernel call]
+ * Purpose:
+ * Retrieve information about a VM object.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * KERN_SUCCESS Retrieved object info.
+ * KERN_INVALID_ARGUMENT The object is null.
+ */
+
+kern_return_t
+mach_vm_object_info(
+ vm_object_t object,
+ vm_object_info_t *infop,
+ ipc_port_t *shadowp,
+ ipc_port_t *copyp)
+{
+ vm_object_info_t info;
+ vm_object_info_state_t state;
+ ipc_port_t shadow, copy;
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ /*
+ * Because of lock-ordering/deadlock considerations,
+ * we can't use vm_object_real_name for the copy object.
+ */
+
+ retry:
+ vm_object_lock(object);
+ copy = IP_NULL;
+ if (object->copy != VM_OBJECT_NULL) {
+ if (!vm_object_lock_try(object->copy)) {
+ vm_object_unlock(object);
+ simple_lock_pause(); /* wait a bit */
+ goto retry;
+ }
+
+ if (object->copy->pager_name != IP_NULL)
+ copy = ipc_port_make_send(object->copy->pager_name);
+ vm_object_unlock(object->copy);
+ }
+ shadow = vm_object_real_name(object->shadow);
+
+ info.voi_object = (vm_offset_t) object;
+ info.voi_pagesize = PAGE_SIZE;
+ info.voi_size = object->size;
+ info.voi_ref_count = object->ref_count;
+ info.voi_resident_page_count = object->resident_page_count;
+ info.voi_absent_count = object->absent_count;
+ info.voi_copy = (vm_offset_t) object->copy;
+ info.voi_shadow = (vm_offset_t) object->shadow;
+ info.voi_shadow_offset = object->shadow_offset;
+ info.voi_paging_offset = object->paging_offset;
+ info.voi_copy_strategy = object->copy_strategy;
+ info.voi_last_alloc = object->last_alloc;
+ info.voi_paging_in_progress = object->paging_in_progress;
+
+ state = 0;
+ if (object->pager_created)
+ state |= VOI_STATE_PAGER_CREATED;
+ if (object->pager_initialized)
+ state |= VOI_STATE_PAGER_INITIALIZED;
+ if (object->pager_ready)
+ state |= VOI_STATE_PAGER_READY;
+ if (object->can_persist)
+ state |= VOI_STATE_CAN_PERSIST;
+ if (object->internal)
+ state |= VOI_STATE_INTERNAL;
+ if (object->temporary)
+ state |= VOI_STATE_TEMPORARY;
+ if (object->alive)
+ state |= VOI_STATE_ALIVE;
+ if (object->lock_in_progress)
+ state |= VOI_STATE_LOCK_IN_PROGRESS;
+ if (object->lock_restart)
+ state |= VOI_STATE_LOCK_RESTART;
+ info.voi_state = state;
+ vm_object_unlock(object);
+
+ *infop = info;
+ *shadowp = shadow;
+ *copyp = copy;
+ return KERN_SUCCESS;
+}
+
+#define VPI_STATE_NODATA (VPI_STATE_BUSY|VPI_STATE_FICTITIOUS| \
+ VPI_STATE_PRIVATE|VPI_STATE_ABSENT)
+
+/*
+ * Routine: mach_vm_object_pages/mach_vm_object_pages_phys/ [kernel call]
+ * Purpose:
+ * Retrieve information about the pages in a VM object.
+ * Conditions:
+ * Nothing locked. Obeys CountInOut protocol.
+ * Returns:
+ * KERN_SUCCESS Retrieved object info.
+ * KERN_INVALID_ARGUMENT The object is null.
+ * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
+ */
+
+static kern_return_t
+_mach_vm_object_pages(
+ vm_object_t object,
+ void* *pagesp,
+ natural_t *countp,
+ int phys)
+{
+ vm_size_t size;
+ vm_offset_t addr;
+ void *pages;
+ unsigned int potential, actual, count;
+ vm_page_t p;
+ kern_return_t kr;
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ /* start with in-line memory */
+
+ pages = *pagesp;
+ potential = *countp;
+
+ for (size = 0;;) {
+ vm_object_lock(object);
+ actual = object->resident_page_count;
+ if (actual <= potential)
+ break;
+ vm_object_unlock(object);
+
+ if (pages != *pagesp)
+ kmem_free(ipc_kernel_map, addr, size);
+
+ if (phys)
+ size = round_page(actual * sizeof(vm_page_phys_info_t));
+ else
+ size = round_page(actual * sizeof(vm_page_info_t));
+ kr = kmem_alloc(ipc_kernel_map, &addr, size);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ pages = (void *) addr;
+ if (phys)
+ potential = size / sizeof(vm_page_phys_info_t);
+ else
+ potential = size / sizeof(vm_page_info_t);
+ }
+ /* object is locked, we have enough wired memory */
+
+ count = 0;
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ vm_page_info_t *info = NULL;
+ vm_page_phys_info_t *info_phys = NULL;
+
+ if (phys)
+ info_phys = pages + count * sizeof(*info_phys);
+ else
+ info = pages + count * sizeof(*info);
+ count++;
+
+ vm_page_info_state_t state = 0;
+
+ if (phys) {
+ info_phys->vpi_offset = p->offset;
+ if (p->phys_addr != (typeof(info_phys->vpi_phys_addr)) p->phys_addr)
+ printf("warning: physical address overflow in mach_vm_object_pages!!\n");
+ info_phys->vpi_phys_addr = p->phys_addr;
+ info_phys->vpi_wire_count = p->wire_count;
+ info_phys->vpi_page_lock = p->page_lock;
+ info_phys->vpi_unlock_request = p->unlock_request;
+ } else {
+ info->vpi_offset = p->offset;
+ if (p->phys_addr != (typeof(info->vpi_phys_addr)) p->phys_addr)
+ printf("warning: physical address overflow in mach_vm_object_pages!!\n");
+ info->vpi_phys_addr = p->phys_addr;
+ info->vpi_wire_count = p->wire_count;
+ info->vpi_page_lock = p->page_lock;
+ info->vpi_unlock_request = p->unlock_request;
+ }
+
+ if (p->busy)
+ state |= VPI_STATE_BUSY;
+ if (p->wanted)
+ state |= VPI_STATE_WANTED;
+ if (p->tabled)
+ state |= VPI_STATE_TABLED;
+ if (p->fictitious)
+ state |= VPI_STATE_FICTITIOUS;
+ if (p->private)
+ state |= VPI_STATE_PRIVATE;
+ if (p->absent)
+ state |= VPI_STATE_ABSENT;
+ if (p->error)
+ state |= VPI_STATE_ERROR;
+ if (p->dirty)
+ state |= VPI_STATE_DIRTY;
+ if (p->precious)
+ state |= VPI_STATE_PRECIOUS;
+ if (p->overwriting)
+ state |= VPI_STATE_OVERWRITING;
+
+ if (((state & (VPI_STATE_NODATA|VPI_STATE_DIRTY)) == 0) &&
+ pmap_is_modified(p->phys_addr)) {
+ state |= VPI_STATE_DIRTY;
+ p->dirty = TRUE;
+ }
+
+ vm_page_lock_queues();
+ if (p->inactive)
+ state |= VPI_STATE_INACTIVE;
+ if (p->active)
+ state |= VPI_STATE_ACTIVE;
+ if (p->laundry)
+ state |= VPI_STATE_LAUNDRY;
+ if (p->free)
+ state |= VPI_STATE_FREE;
+ if (p->reference)
+ state |= VPI_STATE_REFERENCE;
+
+ if (((state & (VPI_STATE_NODATA|VPI_STATE_REFERENCE)) == 0) &&
+ pmap_is_referenced(p->phys_addr)) {
+ state |= VPI_STATE_REFERENCE;
+ p->reference = TRUE;
+ }
+ vm_page_unlock_queues();
+
+ if (phys)
+ info_phys->vpi_state = state;
+ else
+ info->vpi_state = state;
+ }
+
+ if (object->resident_page_count != count)
+ panic("mach_vm_object_pages");
+ vm_object_unlock(object);
+
+ if (pages == *pagesp) {
+ /* data fit in-line; nothing to deallocate */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ kmem_free(ipc_kernel_map, addr, size);
+
+ *countp = 0;
+ } else {
+ vm_size_t size_used, rsize_used;
+ vm_map_copy_t copy;
+
+ /* kmem_alloc doesn't zero memory */
+
+ if (phys)
+ size_used = actual * sizeof(vm_page_phys_info_t);
+ else
+ size_used = actual * sizeof(vm_page_info_t);
+ rsize_used = round_page(size_used);
+
+ if (rsize_used != size)
+ kmem_free(ipc_kernel_map,
+ addr + rsize_used, size - rsize_used);
+
+ if (size_used != rsize_used)
+ memset((void *) (addr + size_used), 0,
+ rsize_used - size_used);
+
+ kr = vm_map_copyin(ipc_kernel_map, addr, rsize_used,
+ TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+
+ *pagesp = (void *) copy;
+ *countp = actual;
+ }
+
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+mach_vm_object_pages(
+ vm_object_t object,
+ vm_page_info_array_t *pagesp,
+ natural_t *countp)
+{
+ return _mach_vm_object_pages(object, (void**) pagesp, countp, 0);
+}
+
+kern_return_t
+mach_vm_object_pages_phys(
+ vm_object_t object,
+ vm_page_phys_info_array_t *pagesp,
+ natural_t *countp)
+{
+ return _mach_vm_object_pages(object, (void**) pagesp, countp, 1);
+}
+
+#endif /* MACH_VM_DEBUG */
+
+/*
+ * Routine: host_virtual_physical_table_info
+ * Purpose:
+ * Return information about the VP table.
+ * Conditions:
+ * Nothing locked. Obeys CountInOut protocol.
+ * Returns:
+ * KERN_SUCCESS Returned information.
+ * KERN_INVALID_HOST The host is null.
+ * KERN_RESOURCE_SHORTAGE Couldn't allocate memory.
+ */
+
+kern_return_t
+host_virtual_physical_table_info(const host_t host,
+ hash_info_bucket_array_t *infop, natural_t *countp)
+{
+ vm_offset_t addr;
+ vm_size_t size = 0;/* '=0' to quiet gcc warnings */
+ hash_info_bucket_t *info;
+ unsigned int potential, actual;
+ kern_return_t kr;
+
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ /* start with in-line data */
+
+ info = *infop;
+ potential = *countp;
+
+ for (;;) {
+ actual = vm_page_info(info, potential);
+ if (actual <= potential)
+ break;
+
+ /* allocate more memory */
+
+ if (info != *infop)
+ kmem_free(ipc_kernel_map, addr, size);
+
+ size = round_page(actual * sizeof *info);
+ kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size);
+ if (kr != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+
+ info = (hash_info_bucket_t *) addr;
+ potential = size/sizeof *info;
+ }
+
+ if (info == *infop) {
+ /* data fit in-line; nothing to deallocate */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ kmem_free(ipc_kernel_map, addr, size);
+
+ *countp = 0;
+ } else {
+ vm_map_copy_t copy;
+ vm_size_t used;
+
+ used = round_page(actual * sizeof *info);
+
+ if (used != size)
+ kmem_free(ipc_kernel_map, addr + used, size - used);
+
+ kr = vm_map_copyin(ipc_kernel_map, addr, used,
+ TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+
+ *infop = (hash_info_bucket_t *) copy;
+ *countp = actual;
+ }
+
+ return KERN_SUCCESS;
+}
diff --git a/vm/vm_external.c b/vm/vm_external.c
new file mode 100644
index 0000000..99f4b9c
--- /dev/null
+++ b/vm/vm_external.c
@@ -0,0 +1,151 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * This module maintains information about the presence of
+ * pages not in memory. Since an external memory object
+ * must maintain a complete knowledge of its contents, this
+ * information takes the form of hints.
+ */
+
+#include <mach/boolean.h>
+#include <kern/slab.h>
+#include <vm/vm_external.h>
+#include <mach/vm_param.h>
+#include <kern/assert.h>
+#include <string.h>
+
+
+
+boolean_t vm_external_unsafe = FALSE;
+
+struct kmem_cache vm_external_cache;
+
+/*
+ * The implementation uses bit arrays to record whether
+ * a page has been written to external storage. For
+ * convenience, these bit arrays come in two sizes
+ * (measured in bytes).
+ */
+
+#define SMALL_SIZE (VM_EXTERNAL_SMALL_SIZE/8)
+#define LARGE_SIZE (VM_EXTERNAL_LARGE_SIZE/8)
+
+struct kmem_cache vm_object_small_existence_map_cache;
+struct kmem_cache vm_object_large_existence_map_cache;
+
+
+vm_external_t vm_external_create(vm_offset_t size)
+{
+ vm_external_t result;
+ vm_size_t bytes;
+
+ result = (vm_external_t) kmem_cache_alloc(&vm_external_cache);
+ result->existence_map = (char *) 0;
+
+ bytes = (atop(size) + 07) >> 3;
+ if (bytes <= SMALL_SIZE) {
+ result->existence_map =
+ (char *) kmem_cache_alloc(&vm_object_small_existence_map_cache);
+ result->existence_size = SMALL_SIZE;
+ } else {
+ result->existence_map =
+ (char *) kmem_cache_alloc(&vm_object_large_existence_map_cache);
+ result->existence_size = LARGE_SIZE;
+ }
+ memset (result->existence_map, 0, result->existence_size);
+ return(result);
+}
+
+void vm_external_destroy(vm_external_t e)
+{
+ if (e == VM_EXTERNAL_NULL)
+ return;
+
+ if (e->existence_map != (char *) 0) {
+ if (e->existence_size <= SMALL_SIZE) {
+ kmem_cache_free(&vm_object_small_existence_map_cache,
+ (vm_offset_t) e->existence_map);
+ } else {
+ kmem_cache_free(&vm_object_large_existence_map_cache,
+ (vm_offset_t) e->existence_map);
+ }
+ }
+ kmem_cache_free(&vm_external_cache, (vm_offset_t) e);
+}
+
+vm_external_state_t _vm_external_state_get(const vm_external_t e,
+ vm_offset_t offset)
+{
+ unsigned
+ int bit, byte;
+
+ if (vm_external_unsafe ||
+ (e == VM_EXTERNAL_NULL) ||
+ (e->existence_map == (char *) 0))
+ return(VM_EXTERNAL_STATE_UNKNOWN);
+
+ bit = atop(offset);
+ byte = bit >> 3;
+ if (byte >= e->existence_size) return (VM_EXTERNAL_STATE_UNKNOWN);
+ return( (e->existence_map[byte] & (1 << (bit & 07))) ?
+ VM_EXTERNAL_STATE_EXISTS : VM_EXTERNAL_STATE_ABSENT );
+}
+
+void vm_external_state_set(
+ vm_external_t e,
+ vm_offset_t offset,
+ vm_external_state_t state)
+{
+ unsigned
+ int bit, byte;
+
+ if ((e == VM_EXTERNAL_NULL) || (e->existence_map == (char *) 0))
+ return;
+
+ if (state != VM_EXTERNAL_STATE_EXISTS)
+ return;
+
+ bit = atop(offset);
+ byte = bit >> 3;
+ if (byte >= e->existence_size) return;
+ e->existence_map[byte] |= (1 << (bit & 07));
+}
+
+void vm_external_module_initialize(void)
+{
+ vm_size_t size = (vm_size_t) sizeof(struct vm_external);
+
+ kmem_cache_init(&vm_external_cache, "vm_external", size, 0,
+ NULL, 0);
+
+ kmem_cache_init(&vm_object_small_existence_map_cache,
+ "small_existence_map", SMALL_SIZE, 0,
+ NULL, 0);
+
+ kmem_cache_init(&vm_object_large_existence_map_cache,
+ "large_existence_map", LARGE_SIZE, 0,
+ NULL, 0);
+}
diff --git a/vm/vm_external.h b/vm/vm_external.h
new file mode 100644
index 0000000..4e44ddf
--- /dev/null
+++ b/vm/vm_external.h
@@ -0,0 +1,95 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#ifndef _VM_VM_EXTERNAL_H_
+#define _VM_VM_EXTERNAL_H_
+
+/*
+ * External page management hint technology
+ *
+ * The data structure exported by this module maintains
+ * a (potentially incomplete) map of the pages written
+ * to external storage for a range of virtual memory.
+ */
+
+/*
+ * The data structure representing the state of pages
+ * on external storage.
+ */
+
+typedef struct vm_external {
+ int existence_size; /* Size of the following bitmap */
+ char *existence_map; /* A bitmap of pages that have
+ * been written to backing
+ * storage.
+ */
+#if 0
+ /* XXX: Currently, existence_count is not used. I guess it
+ could be useful to get rid of the map if the count drops to
+ zero. */
+ int existence_count;/* Number of bits turned on in
+ * existence_map.
+ */
+#endif
+} *vm_external_t;
+
+#define VM_EXTERNAL_NULL ((vm_external_t) 0)
+
+#define VM_EXTERNAL_SMALL_SIZE 128
+#define VM_EXTERNAL_LARGE_SIZE 8192
+
+/*
+ * The states that may be recorded for a page of external storage.
+ */
+
+typedef int vm_external_state_t;
+#define VM_EXTERNAL_STATE_EXISTS 1
+#define VM_EXTERNAL_STATE_UNKNOWN 2
+#define VM_EXTERNAL_STATE_ABSENT 3
+
+
+/*
+ * Routines exported by this module.
+ */
+
+/* Initialize the module */
+extern void vm_external_module_initialize(void);
+/* Create a vm_external_t */
+extern vm_external_t vm_external_create(vm_offset_t);
+/* Destroy one */
+extern void vm_external_destroy(vm_external_t);
+
+/* Set state of a page. */
+extern void vm_external_state_set(vm_external_t, vm_offset_t,
+ vm_external_state_t);
+/* Retrieve the state for a given page, if known. */
+#define vm_external_state_get(e,offset) (((e) != VM_EXTERNAL_NULL) ? \
+ _vm_external_state_get(e, offset) : \
+ VM_EXTERNAL_STATE_UNKNOWN)
+/* HIDDEN routine */
+extern vm_external_state_t _vm_external_state_get(vm_external_t, vm_offset_t);
+
+#endif /* _VM_VM_EXTERNAL_H_ */
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
new file mode 100644
index 0000000..c6e2800
--- /dev/null
+++ b/vm/vm_fault.c
@@ -0,0 +1,2136 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1994,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm_fault.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Page fault handling module.
+ */
+
+#include <kern/printf.h>
+#include <vm/vm_fault.h>
+#include <mach/kern_return.h>
+#include <mach/message.h> /* for error codes */
+#include <kern/counters.h>
+#include <kern/debug.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/pmap.h>
+#include <mach/vm_statistics.h>
+#include <vm/vm_pageout.h>
+#include <mach/vm_param.h>
+#include <mach/memory_object.h>
+#include <vm/memory_object_user.user.h>
+ /* For memory_object_data_{request,unlock} */
+#include <kern/macros.h>
+#include <kern/slab.h>
+
+#if MACH_PCSAMPLE
+#include <kern/pc_sample.h>
+#endif
+
+
+
+/*
+ * State needed by vm_fault_continue.
+ * This is a little hefty to drop directly
+ * into the thread structure.
+ */
+typedef struct vm_fault_state {
+ struct vm_map *vmf_map;
+ vm_offset_t vmf_vaddr;
+ vm_prot_t vmf_fault_type;
+ boolean_t vmf_change_wiring;
+ vm_fault_continuation_t vmf_continuation;
+ vm_map_version_t vmf_version;
+ boolean_t vmf_wired;
+ struct vm_object *vmf_object;
+ vm_offset_t vmf_offset;
+ vm_prot_t vmf_prot;
+
+ boolean_t vmfp_backoff;
+ struct vm_object *vmfp_object;
+ vm_offset_t vmfp_offset;
+ struct vm_page *vmfp_first_m;
+ vm_prot_t vmfp_access;
+} vm_fault_state_t;
+
+struct kmem_cache vm_fault_state_cache;
+
+int vm_object_absent_max = 50;
+
+boolean_t vm_fault_dirty_handling = FALSE;
+boolean_t vm_fault_interruptible = TRUE;
+
+boolean_t software_reference_bits = TRUE;
+
+#if MACH_KDB
+extern struct db_watchpoint *db_watchpoint_list;
+#endif /* MACH_KDB */
+
+/*
+ * Routine: vm_fault_init
+ * Purpose:
+ * Initialize our private data structures.
+ */
+void vm_fault_init(void)
+{
+ kmem_cache_init(&vm_fault_state_cache, "vm_fault_state",
+ sizeof(vm_fault_state_t), 0, NULL, 0);
+}
+
+/*
+ * Routine: vm_fault_cleanup
+ * Purpose:
+ * Clean up the result of vm_fault_page.
+ * Results:
+ * The paging reference for "object" is released.
+ * "object" is unlocked.
+ * If "top_page" is not null, "top_page" is
+ * freed and the paging reference for the object
+ * containing it is released.
+ *
+ * In/out conditions:
+ * "object" must be locked.
+ */
+void
+vm_fault_cleanup(
+ vm_object_t object,
+ vm_page_t top_page)
+{
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ if (top_page != VM_PAGE_NULL) {
+ object = top_page->object;
+ vm_object_lock(object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+}
+
+
+#if MACH_PCSAMPLE
+/*
+ * Do PC sampling on current thread, assuming
+ * that it is the thread taking this page fault.
+ *
+ * Must check for THREAD_NULL, since faults
+ * can occur before threads are running.
+ */
+
+#define vm_stat_sample(flavor) \
+ MACRO_BEGIN \
+ thread_t _thread_ = current_thread(); \
+ \
+ if (_thread_ != THREAD_NULL) \
+ take_pc_sample_macro(_thread_, (flavor), 1, 0); \
+ MACRO_END
+
+#else
+#define vm_stat_sample(x)
+#endif /* MACH_PCSAMPLE */
+
+
+
+/*
+ * Routine: vm_fault_page
+ * Purpose:
+ * Find the resident page for the virtual memory
+ * specified by the given virtual memory object
+ * and offset.
+ * Additional arguments:
+ * The required permissions for the page is given
+ * in "fault_type". Desired permissions are included
+ * in "protection".
+ *
+ * If the desired page is known to be resident (for
+ * example, because it was previously wired down), asserting
+ * the "unwiring" parameter will speed the search.
+ *
+ * If the operation can be interrupted (by thread_abort
+ * or thread_terminate), then the "interruptible"
+ * parameter should be asserted.
+ *
+ * Results:
+ * The page containing the proper data is returned
+ * in "result_page".
+ *
+ * In/out conditions:
+ * The source object must be locked and referenced,
+ * and must donate one paging reference. The reference
+ * is not affected. The paging reference and lock are
+ * consumed.
+ *
+ * If the call succeeds, the object in which "result_page"
+ * resides is left locked and holding a paging reference.
+ * If this is not the original object, a busy page in the
+ * original object is returned in "top_page", to prevent other
+ * callers from pursuing this same data, along with a paging
+ * reference for the original object. The "top_page" should
+ * be destroyed when this guarantee is no longer required.
+ * The "result_page" is also left busy. It is not removed
+ * from the pageout queues.
+ */
+vm_fault_return_t vm_fault_page(
+ /* Arguments: */
+ vm_object_t first_object, /* Object to begin search */
+ vm_offset_t first_offset, /* Offset into object */
+ vm_prot_t fault_type, /* What access is requested */
+ boolean_t must_be_resident,/* Must page be resident? */
+ boolean_t interruptible, /* May fault be interrupted? */
+ /* Modifies in place: */
+ vm_prot_t *protection, /* Protection for mapping */
+ /* Returns: */
+ vm_page_t *result_page, /* Page found, if successful */
+ vm_page_t *top_page, /* Page in top object, if
+ * not result_page.
+ */
+ /* More arguments: */
+ boolean_t resume, /* We are restarting. */
+ continuation_t continuation) /* Continuation for blocking. */
+{
+ vm_page_t m;
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_page_t first_m;
+ vm_object_t next_object;
+ vm_object_t copy_object;
+ boolean_t look_for_page;
+ vm_prot_t access_required;
+
+ if (resume) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ if (state->vmfp_backoff)
+ goto after_block_and_backoff;
+
+ object = state->vmfp_object;
+ offset = state->vmfp_offset;
+ first_m = state->vmfp_first_m;
+ access_required = state->vmfp_access;
+ goto after_thread_block;
+ }
+
+ vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY);
+ vm_stat.faults++; /* needs lock XXX */
+ current_task()->faults++;
+
+/*
+ * Recovery actions
+ */
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ if (vm_fault_dirty_handling
+#if MACH_KDB
+ /*
+ * If there are watchpoints set, then
+ * we don't want to give away write permission
+ * on a read fault. Make the task write fault,
+ * so that the watchpoint code notices the access.
+ */
+ || db_watchpoint_list
+#endif /* MACH_KDB */
+ ) {
+ /*
+ * If we aren't asking for write permission,
+ * then don't give it away. We're using write
+ * faults to set the dirty bit.
+ */
+ if (!(fault_type & VM_PROT_WRITE))
+ *protection &= ~VM_PROT_WRITE;
+ }
+
+ if (!vm_fault_interruptible)
+ interruptible = FALSE;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * Note that we cannot hold any locks during the
+ * pager access or when waiting for memory, so
+ * we use a busy page then.
+ *
+ * Note also that we aren't as concerned about more than
+ * one thread attempting to memory_object_data_unlock
+ * the same page at once, so we don't hold the page
+ * as busy then, but do record the highest unlock
+ * value so far. [Unlock requests may also be delivered
+ * out of order.]
+ *
+ * 2) To prevent another thread from racing us down the
+ * shadow chain and entering a new page in the top
+ * object before we do, we must keep a busy page in
+ * the top object while following the shadow chain.
+ *
+ * 3) We must increment paging_in_progress on any object
+ * for which we have a busy page, to prevent
+ * vm_object_collapse from removing the busy page
+ * without our noticing.
+ *
+ * 4) We leave busy pages on the pageout queues.
+ * If the pageout daemon comes across a busy page,
+ * it will remove the page from the pageout queues.
+ */
+
+ /*
+ * Search for the page at object/offset.
+ */
+
+ object = first_object;
+ offset = first_offset;
+ first_m = VM_PAGE_NULL;
+ access_required = fault_type;
+
+ /*
+ * See whether this page is resident
+ */
+
+ while (TRUE) {
+ m = vm_page_lookup(object, offset);
+ if (m != VM_PAGE_NULL) {
+ /*
+ * If the page is being brought in,
+ * wait for it and then retry.
+ *
+ * A possible optimization: if the page
+ * is known to be resident, we can ignore
+ * pages that are absent (regardless of
+ * whether they're busy).
+ */
+
+ if (m->busy) {
+ kern_return_t wait_result;
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ vm_object_unlock(object);
+ if (continuation != thread_no_continuation) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case
+ * thread_block discards
+ * our kernel stack.
+ */
+
+ state->vmfp_backoff = FALSE;
+ state->vmfp_object = object;
+ state->vmfp_offset = offset;
+ state->vmfp_first_m = first_m;
+ state->vmfp_access =
+ access_required;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_busy_user++);
+ thread_block(continuation);
+ } else
+ {
+ counter(c_vm_fault_page_block_busy_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_thread_block:
+ wait_result = current_thread()->wait_result;
+ vm_object_lock(object);
+ if (wait_result != THREAD_AWAKENED) {
+ vm_fault_cleanup(object, first_m);
+ if (wait_result == THREAD_RESTART)
+ return(VM_FAULT_RETRY);
+ else
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ /*
+ * If the page is in error, give up now.
+ */
+
+ if (m->error) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * If the page isn't busy, but is absent,
+ * then it was deemed "unavailable".
+ */
+
+ if (m->absent) {
+ /*
+ * Remove the non-existent page (unless it's
+ * in the top object) and move on down to the
+ * next object (if there is one).
+ */
+
+ offset += object->shadow_offset;
+ access_required = VM_PROT_READ;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ vm_page_t real_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * Absent page at bottom of shadow
+ * chain; zero fill the page we left
+ * busy in the first object, and flush
+ * the absent page. But first we
+ * need to allocate a real page.
+ */
+
+ real_m = vm_page_grab(VM_PAGE_HIGHMEM);
+ if (real_m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ if (object != first_object) {
+ VM_PAGE_FREE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = first_object;
+ offset = first_offset;
+ m = first_m;
+ first_m = VM_PAGE_NULL;
+ vm_object_lock(object);
+ }
+
+ VM_PAGE_FREE(m);
+ assert(real_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(real_m, object, offset);
+ vm_page_unlock_queues();
+ m = real_m;
+
+ /*
+ * Drop the lock while zero filling
+ * page. Then break because this
+ * is the page we wanted. Checking
+ * the page lock is a waste of time;
+ * this page was either absent or
+ * newly allocated -- in both cases
+ * it can't be page locked by a pager.
+ */
+ vm_object_unlock(object);
+
+ vm_page_zero_fill(m);
+
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+
+ vm_stat.zero_fill_count++;
+ current_task()->zero_fills++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ } else {
+ if (must_be_resident) {
+ vm_object_paging_end(object);
+ } else if (object != first_object) {
+ vm_object_paging_end(object);
+ VM_PAGE_FREE(m);
+ } else {
+ first_m = m;
+ m->absent = FALSE;
+ vm_object_absent_release(object);
+ m->busy = TRUE;
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+ vm_object_lock(next_object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ continue;
+ }
+ }
+
+ /*
+ * If the desired access to this page has
+ * been locked out, request that it be unlocked.
+ */
+
+ if (access_required & m->page_lock) {
+ if ((access_required & m->unlock_request) != access_required) {
+ vm_prot_t new_unlock_request;
+ kern_return_t rc;
+
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ goto block_and_backoff;
+ }
+
+ new_unlock_request = m->unlock_request =
+ (access_required | m->unlock_request);
+ vm_object_unlock(object);
+ if ((rc = memory_object_data_unlock(
+ object->pager,
+ object->pager_request,
+ offset + object->paging_offset,
+ PAGE_SIZE,
+ new_unlock_request))
+ != KERN_SUCCESS) {
+ printf("vm_fault: memory_object_data_unlock failed\n");
+ vm_object_lock(object);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(object);
+ continue;
+ }
+
+ PAGE_ASSERT_WAIT(m, interruptible);
+ goto block_and_backoff;
+ }
+
+ /*
+ * We mark the page busy and leave it on
+ * the pageout queues. If the pageout
+ * deamon comes across it, then it will
+ * remove the page.
+ */
+
+ if (!software_reference_bits) {
+ vm_page_lock_queues();
+ if (m->inactive) {
+ vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ }
+
+ VM_PAGE_QUEUES_REMOVE(m);
+ vm_page_unlock_queues();
+ }
+
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+ break;
+ }
+
+ look_for_page =
+ (object->pager_created)
+#if MACH_PAGEMAP
+ && (vm_external_state_get(object->existence_info, offset + object->paging_offset) !=
+ VM_EXTERNAL_STATE_ABSENT)
+#endif /* MACH_PAGEMAP */
+ ;
+
+ if ((look_for_page || (object == first_object))
+ && !must_be_resident) {
+ /*
+ * Allocate a new page for this object/offset
+ * pair.
+ */
+
+ m = vm_page_grab_fictitious();
+ if (m == VM_PAGE_NULL) {
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_FICTITIOUS_SHORTAGE);
+ }
+
+ vm_page_lock_queues();
+ vm_page_insert(m, object, offset);
+ vm_page_unlock_queues();
+ }
+
+ if (look_for_page && !must_be_resident) {
+ kern_return_t rc;
+
+ /*
+ * If the memory manager is not ready, we
+ * cannot make requests.
+ */
+ if (!object->pager_ready) {
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ if (object->internal) {
+ /*
+ * Requests to the default pager
+ * must reserve a real page in advance,
+ * because the pager's data-provided
+ * won't block for pages.
+ */
+
+ if (m->fictitious && !vm_page_convert(&m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+ } else if (object->absent_count >
+ vm_object_absent_max) {
+ /*
+ * If there are too many outstanding page
+ * requests pending on this object, we
+ * wait for them to be resolved now.
+ */
+
+ vm_object_absent_assert_wait(object, interruptible);
+ VM_PAGE_FREE(m);
+ goto block_and_backoff;
+ }
+
+ /*
+ * Indicate that the page is waiting for data
+ * from the memory manager.
+ */
+
+ m->absent = TRUE;
+ object->absent_count++;
+
+ /*
+ * We have a busy page, so we can
+ * release the object lock.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * Call the memory manager to retrieve the data.
+ */
+
+ vm_stat.pageins++;
+ vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS);
+ current_task()->pageins++;
+
+ if ((rc = memory_object_data_request(object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required)) != KERN_SUCCESS) {
+ if (object->pager && rc != MACH_SEND_INTERRUPTED)
+ printf("%s(0x%p, 0x%p, 0x%zx, 0x%x, 0x%x) failed, %x\n",
+ "memory_object_data_request",
+ object->pager,
+ object->pager_request,
+ m->offset + object->paging_offset,
+ PAGE_SIZE, access_required, rc);
+ /*
+ * Don't want to leave a busy page around,
+ * but the data request may have blocked,
+ * so check if it's still there and busy.
+ */
+ vm_object_lock(object);
+ if (m == vm_page_lookup(object,offset) &&
+ m->absent && m->busy)
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, first_m);
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+
+ /*
+ * Retry with same object/offset, since new data may
+ * be in a different page (i.e., m is meaningless at
+ * this point).
+ */
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * For the XP system, the only case in which we get here is if
+ * object has no pager (or unwiring). If the pager doesn't
+ * have the page this is handled in the m->absent case above
+ * (and if you change things here you should look above).
+ */
+ if (object == first_object)
+ first_m = m;
+ else
+ {
+ assert(m == VM_PAGE_NULL);
+ }
+
+ /*
+ * Move on to the next object. Lock the next
+ * object before unlocking the current one.
+ */
+ access_required = VM_PROT_READ;
+
+ offset += object->shadow_offset;
+ next_object = object->shadow;
+ if (next_object == VM_OBJECT_NULL) {
+ assert(!must_be_resident);
+
+ /*
+ * If there's no object left, fill the page
+ * in the top object with zeros. But first we
+ * need to allocate a real page.
+ */
+
+ if (object != first_object) {
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ object = first_object;
+ offset = first_offset;
+ vm_object_lock(object);
+ }
+
+ m = first_m;
+ assert(m->object == object);
+ first_m = VM_PAGE_NULL;
+
+ if (m->fictitious && !vm_page_convert(&m)) {
+ VM_PAGE_FREE(m);
+ vm_fault_cleanup(object, VM_PAGE_NULL);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_zero_fill(m);
+ vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
+ vm_stat.zero_fill_count++;
+ current_task()->zero_fills++;
+ vm_object_lock(object);
+ pmap_clear_modify(m->phys_addr);
+ break;
+ }
+ else {
+ vm_object_lock(next_object);
+ if ((object != first_object) || must_be_resident)
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ object = next_object;
+ vm_object_paging_begin(object);
+ }
+ }
+
+ /*
+ * PAGE HAS BEEN FOUND.
+ *
+ * This page (m) is:
+ * busy, so that we can play with it;
+ * not absent, so that nobody else will fill it;
+ * possibly eligible for pageout;
+ *
+ * The top-level page (first_m) is:
+ * VM_PAGE_NULL if the page was found in the
+ * top-level object;
+ * busy, not absent, and ineligible for pageout.
+ *
+ * The current object (object) is locked. A paging
+ * reference is held for the current and top-level
+ * objects.
+ */
+
+ assert(m->busy && !m->absent);
+ assert((first_m == VM_PAGE_NULL) ||
+ (first_m->busy && !first_m->absent &&
+ !first_m->active && !first_m->inactive));
+
+ /*
+ * If the page is being written, but isn't
+ * already owned by the top-level object,
+ * we have to copy it into a new page owned
+ * by the top-level object.
+ */
+
+ if (object != first_object) {
+ /*
+ * We only really need to copy if we
+ * want to write it.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ vm_page_t copy_m;
+
+ assert(!must_be_resident);
+
+ /*
+ * If we try to collapse first_object at this
+ * point, we may deadlock when we try to get
+ * the lock on an intermediate object (since we
+ * have the bottom object locked). We can't
+ * unlock the bottom object, because the page
+ * we found may move (by collapse) if we do.
+ *
+ * Instead, we first copy the page. Then, when
+ * we have no more use for the bottom object,
+ * we unlock it and try to collapse.
+ *
+ * Note that we copy the page even if we didn't
+ * need to... that's the breaks.
+ */
+
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_grab(VM_PAGE_HIGHMEM);
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ vm_object_unlock(object);
+ vm_page_copy(m, copy_m);
+ vm_object_lock(object);
+
+ /*
+ * If another map is truly sharing this
+ * page with us, we have to flush all
+ * uses of the original page, since we
+ * can't distinguish those which want the
+ * original from those which need the
+ * new copy.
+ *
+ * XXXO If we know that only one map has
+ * access to this page, then we could
+ * avoid the pmap_page_protect() call.
+ */
+
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ vm_page_unlock_queues();
+
+ /*
+ * We no longer need the old page or object.
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ vm_stat.cow_faults++;
+ vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS);
+ current_task()->cow_faults++;
+ object = first_object;
+ offset = first_offset;
+
+ vm_object_lock(object);
+ VM_PAGE_FREE(first_m);
+ first_m = VM_PAGE_NULL;
+ assert(copy_m->busy);
+ vm_page_lock_queues();
+ vm_page_insert(copy_m, object, offset);
+ vm_page_unlock_queues();
+ m = copy_m;
+
+ /*
+ * Now that we've gotten the copy out of the
+ * way, let's try to collapse the top object.
+ * But we have to play ugly games with
+ * paging_in_progress to do that...
+ */
+
+ vm_object_paging_end(object);
+ vm_object_collapse(object);
+ vm_object_paging_begin(object);
+ }
+ else {
+ *protection &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Now check whether the page needs to be pushed into the
+ * copy object. The use of asymmetric copy on write for
+ * shared temporary objects means that we may do two copies to
+ * satisfy the fault; one above to get the page from a
+ * shadowed object, and one here to push it into the copy.
+ */
+
+ while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
+ vm_offset_t copy_offset;
+ vm_page_t copy_m;
+
+ /*
+ * If the page is being written, but hasn't been
+ * copied to the copy-object, we have to copy it there.
+ */
+
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ *protection &= ~VM_PROT_WRITE;
+ break;
+ }
+
+ /*
+ * If the page was guaranteed to be resident,
+ * we must have already performed the copy.
+ */
+
+ if (must_be_resident)
+ break;
+
+ /*
+ * Try to get the lock on the copy_object.
+ */
+ if (!vm_object_lock_try(copy_object)) {
+ vm_object_unlock(object);
+
+ simple_lock_pause(); /* wait a bit */
+
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Make another reference to the copy-object,
+ * to keep it from disappearing during the
+ * copy.
+ */
+ assert(copy_object->ref_count > 0);
+ copy_object->ref_count++;
+
+ /*
+ * Does the page exist in the copy?
+ */
+ copy_offset = first_offset - copy_object->shadow_offset;
+ copy_m = vm_page_lookup(copy_object, copy_offset);
+ if (copy_m != VM_PAGE_NULL) {
+ if (copy_m->busy) {
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, interruptible);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ goto block_and_backoff;
+ }
+ }
+ else {
+ /*
+ * Allocate a page for the copy
+ */
+ copy_m = vm_page_alloc(copy_object, copy_offset);
+ if (copy_m == VM_PAGE_NULL) {
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+ vm_fault_cleanup(object, first_m);
+ return(VM_FAULT_MEMORY_SHORTAGE);
+ }
+
+ /*
+ * Must copy page into copy-object.
+ */
+
+ vm_page_copy(m, copy_m);
+
+ /*
+ * If the old page was in use by any users
+ * of the copy-object, it must be removed
+ * from all pmaps. (We can't know which
+ * pmaps use it.)
+ */
+
+ vm_page_lock_queues();
+ pmap_page_protect(m->phys_addr, VM_PROT_NONE);
+ copy_m->dirty = TRUE;
+ vm_page_unlock_queues();
+
+ /*
+ * If there's a pager, then immediately
+ * page out this page, using the "initialize"
+ * option. Else, we use the copy.
+ */
+
+ if (!copy_object->pager_created) {
+ vm_page_lock_queues();
+ vm_page_activate(copy_m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(copy_m);
+ } else {
+ /*
+ * The page is already ready for pageout:
+ * not on pageout queues and busy.
+ * Unlock everything except the
+ * copy_object itself.
+ */
+
+ vm_object_unlock(object);
+
+ /*
+ * Write the page to the copy-object,
+ * flushing it from the kernel.
+ */
+
+ vm_pageout_page(copy_m, TRUE, TRUE);
+
+ /*
+ * Since the pageout may have
+ * temporarily dropped the
+ * copy_object's lock, we
+ * check whether we'll have
+ * to deallocate the hard way.
+ */
+
+ if ((copy_object->shadow != object) ||
+ (copy_object->ref_count == 1)) {
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ vm_object_lock(object);
+ continue;
+ }
+
+ /*
+ * Pick back up the old object's
+ * lock. [It is safe to do so,
+ * since it must be deeper in the
+ * object tree.]
+ */
+
+ vm_object_lock(object);
+ }
+
+ /*
+ * Because we're pushing a page upward
+ * in the object tree, we must restart
+ * any faults that are waiting here.
+ * [Note that this is an expansion of
+ * PAGE_WAKEUP that uses the THREAD_RESTART
+ * wait result]. Can't turn off the page's
+ * busy bit because we're not done with it.
+ */
+
+ if (m->wanted) {
+ m->wanted = FALSE;
+ thread_wakeup_with_result((event_t) m,
+ THREAD_RESTART);
+ }
+ }
+
+ /*
+ * The reference count on copy_object must be
+ * at least 2: one for our extra reference,
+ * and at least one from the outside world
+ * (we checked that when we last locked
+ * copy_object).
+ */
+ copy_object->ref_count--;
+ assert(copy_object->ref_count > 0);
+ vm_object_unlock(copy_object);
+
+ break;
+ }
+
+ *result_page = m;
+ *top_page = first_m;
+
+ /*
+ * If the page can be written, assume that it will be.
+ * [Earlier, we restrict the permission to allow write
+ * access only if the fault so required, so we don't
+ * mark read-only data as dirty.]
+ */
+
+ if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
+ m->dirty = TRUE;
+
+ return(VM_FAULT_SUCCESS);
+
+ block_and_backoff:
+ vm_fault_cleanup(object, first_m);
+
+ if (continuation != thread_no_continuation) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case we must restart.
+ */
+
+ state->vmfp_backoff = TRUE;
+ state->vmf_prot = *protection;
+
+ counter(c_vm_fault_page_block_backoff_user++);
+ thread_block(continuation);
+ } else
+ {
+ counter(c_vm_fault_page_block_backoff_kernel++);
+ thread_block((void (*)()) 0);
+ }
+ after_block_and_backoff:
+ if (current_thread()->wait_result == THREAD_AWAKENED)
+ return VM_FAULT_RETRY;
+ else
+ return VM_FAULT_INTERRUPTED;
+
+#undef RELEASE_PAGE
+}
+
+/*
+ * Routine: vm_fault
+ * Purpose:
+ * Handle page faults, including pseudo-faults
+ * used to change the wiring status of pages.
+ * Returns:
+ * If an explicit (expression) continuation is supplied,
+ * then we call the continuation instead of returning.
+ * Implementation:
+ * Explicit continuations make this a little icky,
+ * because it hasn't been rewritten to embrace CPS.
+ * Instead, we have resume arguments for vm_fault and
+ * vm_fault_page, to let continue the fault computation.
+ *
+ * vm_fault and vm_fault_page save mucho state
+ * in the moral equivalent of a closure. The state
+ * structure is allocated when first entering vm_fault
+ * and deallocated when leaving vm_fault.
+ */
+
+static void
+vm_fault_continue(void)
+{
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ (void) vm_fault(state->vmf_map,
+ state->vmf_vaddr,
+ state->vmf_fault_type,
+ state->vmf_change_wiring,
+ TRUE, state->vmf_continuation);
+ /*NOTREACHED*/
+}
+
+kern_return_t vm_fault(
+ vm_map_t map,
+ vm_offset_t vaddr,
+ vm_prot_t fault_type,
+ boolean_t change_wiring,
+ boolean_t resume,
+ vm_fault_continuation_t continuation)
+{
+ vm_map_version_t version; /* Map version for verificiation */
+ boolean_t wired; /* Should mapping be wired down? */
+ vm_object_t object; /* Top-level object */
+ vm_offset_t offset; /* Top-level offset */
+ vm_prot_t prot; /* Protection for mapping */
+ vm_object_t old_copy_object; /* Saved copy object */
+ vm_page_t result_page; /* Result of vm_fault_page */
+ vm_page_t top_page; /* Placeholder page */
+ kern_return_t kr;
+
+ vm_page_t m; /* Fast access to result_page */
+
+ if (resume) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Retrieve cached variables and
+ * continue vm_fault_page.
+ */
+
+ object = state->vmf_object;
+ if (object == VM_OBJECT_NULL)
+ goto RetryFault;
+ version = state->vmf_version;
+ wired = state->vmf_wired;
+ offset = state->vmf_offset;
+ prot = state->vmf_prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ TRUE, vm_fault_continue);
+ goto after_vm_fault_page;
+ }
+
+ if (continuation != vm_fault_no_continuation) {
+ /*
+ * We will probably need to save state.
+ */
+
+ char * state;
+
+ /*
+ * if this assignment stmt is written as
+ * 'active_threads[cpu_number()] = kmem_cache_alloc()',
+ * cpu_number may be evaluated before kmem_cache_alloc;
+ * if kmem_cache_alloc blocks, cpu_number will be wrong
+ */
+
+ state = (char *) kmem_cache_alloc(&vm_fault_state_cache);
+ current_thread()->ith_other = state;
+
+ }
+
+ RetryFault: ;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+
+ if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
+ &object, &offset,
+ &prot, &wired)) != KERN_SUCCESS) {
+ goto done;
+ }
+
+ /*
+ * If the page is wired, we must fault for the current protection
+ * value, to avoid further faults.
+ */
+
+ if (wired)
+ fault_type = prot;
+
+ /*
+ * Make a reference to this object to
+ * prevent its disposal while we are messing with
+ * it. Once we have the reference, the map is free
+ * to be diddled. Since objects reference their
+ * shadows (and copies), they will stay around as well.
+ */
+
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_paging_begin(object);
+
+ if (continuation != vm_fault_no_continuation) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables, in case vm_fault_page discards
+ * our kernel stack and we have to restart.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+
+ state->vmf_version = version;
+ state->vmf_wired = wired;
+ state->vmf_object = object;
+ state->vmf_offset = offset;
+ state->vmf_prot = prot;
+
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, vm_fault_continue);
+ } else
+ {
+ kr = vm_fault_page(object, offset, fault_type,
+ (change_wiring && !wired), !change_wiring,
+ &prot, &result_page, &top_page,
+ FALSE, (void (*)()) 0);
+ }
+ after_vm_fault_page:
+
+ /*
+ * If we didn't succeed, lose the object reference immediately.
+ */
+
+ if (kr != VM_FAULT_SUCCESS)
+ vm_object_deallocate(object);
+
+ /*
+ * See why we failed, and take corrective action.
+ */
+
+ switch (kr) {
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryFault;
+ case VM_FAULT_INTERRUPTED:
+ kr = KERN_SUCCESS;
+ goto done;
+ case VM_FAULT_MEMORY_SHORTAGE:
+ if (continuation != vm_fault_no_continuation) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ /*
+ * Save variables in case VM_PAGE_WAIT
+ * discards our kernel stack.
+ */
+
+ state->vmf_map = map;
+ state->vmf_vaddr = vaddr;
+ state->vmf_fault_type = fault_type;
+ state->vmf_change_wiring = change_wiring;
+ state->vmf_continuation = continuation;
+ state->vmf_object = VM_OBJECT_NULL;
+
+ VM_PAGE_WAIT(vm_fault_continue);
+ } else
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryFault;
+ case VM_FAULT_MEMORY_ERROR:
+ kr = KERN_MEMORY_ERROR;
+ goto done;
+ }
+
+ m = result_page;
+
+ assert((change_wiring && !wired) ?
+ (top_page == VM_PAGE_NULL) :
+ ((top_page == VM_PAGE_NULL) == (m->object == object)));
+
+ /*
+ * How to clean up the result of vm_fault_page. This
+ * happens whether the mapping is entered or not.
+ */
+
+#define UNLOCK_AND_DEALLOCATE \
+ MACRO_BEGIN \
+ vm_fault_cleanup(m->object, top_page); \
+ vm_object_deallocate(object); \
+ MACRO_END
+
+ /*
+ * What to do with the resulting page from vm_fault_page
+ * if it doesn't get entered into the physical map:
+ */
+
+#define RELEASE_PAGE(m) \
+ MACRO_BEGIN \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ if (!m->active && !m->inactive) \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+ /*
+ * We must verify that the maps have not changed
+ * since our last lookup.
+ */
+
+ old_copy_object = m->object->copy;
+
+ vm_object_unlock(m->object);
+ while (!vm_map_verify(map, &version)) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ /*
+ * To avoid trying to write_lock the map while another
+ * thread has it read_locked (in vm_map_pageable), we
+ * do not try for write permission. If the page is
+ * still writable, we will get write permission. If it
+ * is not, or has been marked needs_copy, we enter the
+ * mapping without write permission, and will merely
+ * take another fault.
+ */
+ kr = vm_map_lookup(&map, vaddr,
+ fault_type & ~VM_PROT_WRITE, &version,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired);
+
+ if (kr != KERN_SUCCESS) {
+ vm_object_lock(m->object);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto done;
+ }
+
+ vm_object_unlock(retry_object);
+ vm_object_lock(m->object);
+
+ if ((retry_object != object) ||
+ (retry_offset != offset)) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * Check whether the protection has changed or the object
+ * has been copied while we left the map unlocked.
+ */
+ prot &= retry_prot;
+ vm_object_unlock(m->object);
+ }
+ vm_object_lock(m->object);
+
+ /*
+ * If the copy object changed while the top-level object
+ * was unlocked, then we must take away write permission.
+ */
+
+ if (m->object->copy != old_copy_object)
+ prot &= ~VM_PROT_WRITE;
+
+ /*
+ * If we want to wire down this page, but no longer have
+ * adequate permissions, we must start all over.
+ */
+
+ if (wired && (prot != fault_type)) {
+ vm_map_verify_done(map, &version);
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * It's critically important that a wired-down page be faulted
+ * only once in each map for which it is wired.
+ */
+
+ vm_object_unlock(m->object);
+
+ /*
+ * Put this page into the physical map.
+ * We had to do the unlock above because pmap_enter
+ * may cause other faults. The page may be on
+ * the pageout queues. If the pageout daemon comes
+ * across the page, it will remove it from the queues.
+ */
+
+ PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
+
+ /*
+ * If the page is not wired down and isn't already
+ * on a pageout queue, then put it where the
+ * pageout daemon can find it.
+ */
+ vm_object_lock(m->object);
+ vm_page_lock_queues();
+ if (change_wiring) {
+ if (wired)
+ vm_page_wire(m);
+ else
+ vm_page_unwire(m);
+ } else if (software_reference_bits) {
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ m->reference = TRUE;
+ } else {
+ vm_page_activate(m);
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(map, &version);
+ PAGE_WAKEUP_DONE(m);
+ kr = KERN_SUCCESS;
+ UNLOCK_AND_DEALLOCATE;
+
+#undef UNLOCK_AND_DEALLOCATE
+#undef RELEASE_PAGE
+
+ done:
+ if (continuation != vm_fault_no_continuation) {
+ vm_fault_state_t *state =
+ (vm_fault_state_t *) current_thread()->ith_other;
+
+ kmem_cache_free(&vm_fault_state_cache, (vm_offset_t) state);
+ (*continuation)(kr);
+ /*NOTREACHED*/
+ }
+
+ return(kr);
+}
+
+/*
+ * vm_fault_wire:
+ *
+ * Wire down a range of virtual addresses in a map.
+ */
+void vm_fault_wire(
+ vm_map_t map,
+ vm_map_entry_t entry)
+{
+
+ vm_offset_t va;
+ pmap_t pmap;
+ vm_offset_t end_addr = entry->vme_end;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Inform the physical mapping system that the
+ * range of addresses may not fault, so that
+ * page tables and such can be locked down as well.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
+
+ /*
+ * We simulate a fault to get the page and enter it
+ * in the physical map.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS)
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ }
+}
+
+/*
+ * vm_fault_unwire:
+ *
+ * Unwire a range of virtual addresses in a map.
+ */
+void vm_fault_unwire(
+ vm_map_t map,
+ vm_map_entry_t entry)
+{
+ vm_offset_t va;
+ pmap_t pmap;
+ vm_offset_t end_addr = entry->vme_end;
+ vm_object_t object;
+
+ pmap = vm_map_pmap(map);
+
+ object = (entry->is_sub_map)
+ ? VM_OBJECT_NULL : entry->object.vm_object;
+
+ /*
+ * Since the pages are wired down, we must be able to
+ * get their mappings from the physical map system.
+ */
+
+ for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
+ pmap_change_wiring(pmap, va, FALSE);
+
+ if (object == VM_OBJECT_NULL) {
+ vm_map_lock_set_recursive(map);
+ (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
+ FALSE, (void (*)()) 0);
+ vm_map_lock_clear_recursive(map);
+ } else {
+ vm_prot_t prot;
+ vm_page_t result_page;
+ vm_page_t top_page;
+ vm_fault_return_t result;
+
+ do {
+ prot = VM_PROT_NONE;
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+ result = vm_fault_page(object,
+ entry->offset +
+ (va - entry->vme_start),
+ VM_PROT_NONE, TRUE,
+ FALSE, &prot,
+ &result_page,
+ &top_page,
+ FALSE, (void (*)()) 0);
+ } while (result == VM_FAULT_RETRY);
+
+ if (result != VM_FAULT_SUCCESS)
+ panic("vm_fault_unwire: failure");
+
+ vm_page_lock_queues();
+ vm_page_unwire(result_page);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(result_page);
+
+ vm_fault_cleanup(result_page->object, top_page);
+ }
+ }
+
+ /*
+ * Inform the physical mapping system that the range
+ * of addresses may fault, so that page tables and
+ * such may be unwired themselves.
+ */
+
+ pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
+}
+
+/*
+ * vm_fault_wire_fast:
+ *
+ * Handle common case of a wire down page fault at the given address.
+ * If successful, the page is inserted into the associated physical map.
+ * The map entry is passed in to avoid the overhead of a map lookup.
+ *
+ * NOTE: the given address should be truncated to the
+ * proper page address.
+ *
+ * KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ * a standard error specifying why the fault is fatal is returned.
+ *
+ * The map in question must be referenced, and remains so.
+ * Caller has a read lock on the map.
+ *
+ * This is a stripped version of vm_fault() for wiring pages. Anything
+ * other than the common case will return KERN_FAILURE, and the caller
+ * is expected to call vm_fault().
+ */
+kern_return_t vm_fault_wire_fast(
+ vm_map_t map,
+ vm_offset_t va,
+ vm_map_entry_t entry)
+{
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_page_t m;
+ vm_prot_t prot;
+
+ vm_stat.faults++; /* needs lock XXX */
+ current_task()->faults++;
+/*
+ * Recovery actions
+ */
+
+#undef RELEASE_PAGE
+#define RELEASE_PAGE(m) { \
+ PAGE_WAKEUP_DONE(m); \
+ vm_page_lock_queues(); \
+ vm_page_unwire(m); \
+ vm_page_unlock_queues(); \
+}
+
+
+#undef UNLOCK_THINGS
+#define UNLOCK_THINGS { \
+ object->paging_in_progress--; \
+ vm_object_unlock(object); \
+}
+
+#undef UNLOCK_AND_DEALLOCATE
+#define UNLOCK_AND_DEALLOCATE { \
+ UNLOCK_THINGS; \
+ vm_object_deallocate(object); \
+}
+/*
+ * Give up and have caller do things the hard way.
+ */
+
+#define GIVE_UP { \
+ UNLOCK_AND_DEALLOCATE; \
+ return(KERN_FAILURE); \
+}
+
+
+ /*
+ * If this entry is not directly to a vm_object, bail out.
+ */
+ if (entry->is_sub_map)
+ return(KERN_FAILURE);
+
+ /*
+ * Find the backing store object and offset into it.
+ */
+
+ object = entry->object.vm_object;
+ offset = (va - entry->vme_start) + entry->offset;
+ prot = entry->protection;
+
+ /*
+ * Make a reference to this object to prevent its
+ * disposal while we are messing with it.
+ */
+
+ vm_object_lock(object);
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ object->paging_in_progress++;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * 2) Once we have a busy page, we must remove it from
+ * the pageout queues, so that the pageout daemon
+ * will not grab it away.
+ *
+ */
+
+ /*
+ * Look for page in top-level object. If it's not there or
+ * there's something going on, give up.
+ */
+ m = vm_page_lookup(object, offset);
+ if ((m == VM_PAGE_NULL) || (m->error) ||
+ (m->busy) || (m->absent) || (prot & m->page_lock)) {
+ GIVE_UP;
+ }
+
+ /*
+ * Wire the page down now. All bail outs beyond this
+ * point must unwire the page.
+ */
+
+ vm_page_lock_queues();
+ vm_page_wire(m);
+ vm_page_unlock_queues();
+
+ /*
+ * Mark page busy for other threads.
+ */
+ assert(!m->busy);
+ m->busy = TRUE;
+ assert(!m->absent);
+
+ /*
+ * Give up if the page is being written and there's a copy object
+ */
+ if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
+ RELEASE_PAGE(m);
+ GIVE_UP;
+ }
+
+ /*
+ * Put this page into the physical map.
+ * We have to unlock the object because pmap_enter
+ * may cause other faults.
+ */
+ vm_object_unlock(object);
+
+ PMAP_ENTER(map->pmap, va, m, prot, TRUE);
+
+ /*
+ * Must relock object so that paging_in_progress can be cleared.
+ */
+ vm_object_lock(object);
+
+ /*
+ * Unlock everything, and return
+ */
+
+ PAGE_WAKEUP_DONE(m);
+ UNLOCK_AND_DEALLOCATE;
+
+ return(KERN_SUCCESS);
+
+}
+
+/*
+ * Routine: vm_fault_copy_cleanup
+ * Purpose:
+ * Release a page used by vm_fault_copy.
+ */
+
+static void vm_fault_copy_cleanup(
+ vm_page_t page,
+ vm_page_t top_page)
+{
+ vm_object_t object = page->object;
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(page);
+ vm_page_lock_queues();
+ if (!page->active && !page->inactive)
+ vm_page_activate(page);
+ vm_page_unlock_queues();
+ vm_fault_cleanup(object, top_page);
+}
+
+/*
+ * Routine: vm_fault_copy
+ *
+ * Purpose:
+ * Copy pages from one virtual memory object to another --
+ * neither the source nor destination pages need be resident.
+ *
+ * Before actually copying a page, the version associated with
+ * the destination address map wil be verified.
+ *
+ * In/out conditions:
+ * The caller must hold a reference, but not a lock, to
+ * each of the source and destination objects and to the
+ * destination map.
+ *
+ * Results:
+ * Returns KERN_SUCCESS if no errors were encountered in
+ * reading or writing the data. Returns KERN_INTERRUPTED if
+ * the operation was interrupted (only possible if the
+ * "interruptible" argument is asserted). Other return values
+ * indicate a permanent error in copying the data.
+ *
+ * The actual amount of data copied will be returned in the
+ * "copy_size" argument. In the event that the destination map
+ * verification failed, this amount may be less than the amount
+ * requested.
+ */
+kern_return_t vm_fault_copy(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t *src_size, /* INOUT */
+ vm_object_t dst_object,
+ vm_offset_t dst_offset,
+ vm_map_t dst_map,
+ vm_map_version_t *dst_version,
+ boolean_t interruptible)
+{
+ vm_page_t result_page;
+ vm_prot_t prot;
+
+ vm_page_t src_page;
+ vm_page_t src_top_page;
+
+ vm_page_t dst_page;
+ vm_page_t dst_top_page;
+
+ vm_size_t amount_done;
+ vm_object_t old_copy_object;
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ *src_size = amount_done; \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ amount_done = 0;
+ do { /* while (amount_done != *src_size) */
+
+ RetrySourceFault: ;
+
+ if (src_object == VM_OBJECT_NULL) {
+ /*
+ * No source object. We will just
+ * zero-fill the page in dst_object.
+ */
+
+ src_page = VM_PAGE_NULL;
+ } else {
+ prot = VM_PROT_READ;
+
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+
+ switch (vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, interruptible,
+ &prot, &result_page, &src_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetrySourceFault;
+ case VM_FAULT_INTERRUPTED:
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetrySourceFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetrySourceFault;
+ case VM_FAULT_MEMORY_ERROR:
+ return(KERN_MEMORY_ERROR);
+ }
+
+ src_page = result_page;
+
+ assert((src_top_page == VM_PAGE_NULL) ==
+ (src_page->object == src_object));
+
+ assert ((prot & VM_PROT_READ) != VM_PROT_NONE);
+
+ vm_object_unlock(src_page->object);
+ }
+
+ RetryDestinationFault: ;
+
+ prot = VM_PROT_WRITE;
+
+ vm_object_lock(dst_object);
+ vm_object_paging_begin(dst_object);
+
+ switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE,
+ FALSE, FALSE /* interruptible */,
+ &prot, &result_page, &dst_top_page,
+ FALSE, (void (*)()) 0)) {
+
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_RETRY:
+ goto RetryDestinationFault;
+ case VM_FAULT_INTERRUPTED:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ RETURN(MACH_SEND_INTERRUPTED);
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ goto RetryDestinationFault;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ goto RetryDestinationFault;
+ case VM_FAULT_MEMORY_ERROR:
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page,
+ src_top_page);
+ return(KERN_MEMORY_ERROR);
+ }
+ assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE);
+
+ dst_page = result_page;
+
+ old_copy_object = dst_page->object->copy;
+
+ vm_object_unlock(dst_page->object);
+
+ if (!vm_map_verify(dst_map, dst_version)) {
+
+ BailOut: ;
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+ break;
+ }
+
+
+ vm_object_lock(dst_page->object);
+ if (dst_page->object->copy != old_copy_object) {
+ vm_object_unlock(dst_page->object);
+ vm_map_verify_done(dst_map, dst_version);
+ goto BailOut;
+ }
+ vm_object_unlock(dst_page->object);
+
+ /*
+ * Copy the page, and note that it is dirty
+ * immediately.
+ */
+
+ if (src_page == VM_PAGE_NULL)
+ vm_page_zero_fill(dst_page);
+ else
+ vm_page_copy(src_page, dst_page);
+ dst_page->dirty = TRUE;
+
+ /*
+ * Unlock everything, and return
+ */
+
+ vm_map_verify_done(dst_map, dst_version);
+
+ if (src_page != VM_PAGE_NULL)
+ vm_fault_copy_cleanup(src_page, src_top_page);
+ vm_fault_copy_cleanup(dst_page, dst_top_page);
+
+ amount_done += PAGE_SIZE;
+ src_offset += PAGE_SIZE;
+ dst_offset += PAGE_SIZE;
+
+ } while (amount_done != *src_size);
+
+ RETURN(KERN_SUCCESS);
+#undef RETURN
+
+ /*NOTREACHED*/
+}
+
+
+
+
+
+#ifdef notdef
+
+/*
+ * Routine: vm_fault_page_overwrite
+ *
+ * Description:
+ * A form of vm_fault_page that assumes that the
+ * resulting page will be overwritten in its entirety,
+ * making it unnecessary to obtain the correct *contents*
+ * of the page.
+ *
+ * Implementation:
+ * XXX Untested. Also unused. Eventually, this technology
+ * could be used in vm_fault_copy() to advantage.
+ */
+vm_fault_return_t vm_fault_page_overwrite(
+ vm_object_t dst_object,
+ vm_offset_t dst_offset,
+ vm_page_t *result_page) /* OUT */
+{
+ vm_page_t dst_page;
+
+#define interruptible FALSE /* XXX */
+
+ while (TRUE) {
+ /*
+ * Look for a page at this offset
+ */
+
+ while ((dst_page = vm_page_lookup(dst_object, dst_offset))
+ == VM_PAGE_NULL) {
+ /*
+ * No page, no problem... just allocate one.
+ */
+
+ dst_page = vm_page_alloc(dst_object, dst_offset);
+ if (dst_page == VM_PAGE_NULL) {
+ vm_object_unlock(dst_object);
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /*
+ * Pretend that the memory manager
+ * write-protected the page.
+ *
+ * Note that we will be asking for write
+ * permission without asking for the data
+ * first.
+ */
+
+ dst_page->overwriting = TRUE;
+ dst_page->page_lock = VM_PROT_WRITE;
+ dst_page->absent = TRUE;
+ dst_object->absent_count++;
+
+ break;
+
+ /*
+ * When we bail out, we might have to throw
+ * away the page created here.
+ */
+
+#define DISCARD_PAGE \
+ MACRO_BEGIN \
+ vm_object_lock(dst_object); \
+ dst_page = vm_page_lookup(dst_object, dst_offset); \
+ if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
+ VM_PAGE_FREE(dst_page); \
+ vm_object_unlock(dst_object); \
+ MACRO_END
+ }
+
+ /*
+ * If the page is write-protected...
+ */
+
+ if (dst_page->page_lock & VM_PROT_WRITE) {
+ /*
+ * ... and an unlock request hasn't been sent
+ */
+
+ if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
+ vm_prot_t u;
+ kern_return_t rc;
+
+ /*
+ * ... then send one now.
+ */
+
+ if (!dst_object->pager_ready) {
+ vm_object_assert_wait(dst_object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result !=
+ THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ continue;
+ }
+
+ u = dst_page->unlock_request |= VM_PROT_WRITE;
+ vm_object_unlock(dst_object);
+
+ if ((rc = memory_object_data_unlock(
+ dst_object->pager,
+ dst_object->pager_request,
+ dst_offset + dst_object->paging_offset,
+ PAGE_SIZE,
+ u)) != KERN_SUCCESS) {
+ printf("vm_object_overwrite: memory_object_data_unlock failed\n");
+ DISCARD_PAGE;
+ return((rc == MACH_SEND_INTERRUPTED) ?
+ VM_FAULT_INTERRUPTED :
+ VM_FAULT_MEMORY_ERROR);
+ }
+ vm_object_lock(dst_object);
+ continue;
+ }
+
+ /* ... fall through to wait below */
+ } else {
+ /*
+ * If the page isn't being used for other
+ * purposes, then we're done.
+ */
+ if ( ! (dst_page->busy || dst_page->absent || dst_page->error) )
+ break;
+ }
+
+ PAGE_ASSERT_WAIT(dst_page, interruptible);
+ vm_object_unlock(dst_object);
+ thread_block((void (*)()) 0);
+ if (current_thread()->wait_result != THREAD_AWAKENED) {
+ DISCARD_PAGE;
+ return(VM_FAULT_INTERRUPTED);
+ }
+ }
+
+ *result_page = dst_page;
+ return(VM_FAULT_SUCCESS);
+
+#undef interruptible
+#undef DISCARD_PAGE
+}
+
+#endif /* notdef */
diff --git a/vm/vm_fault.h b/vm/vm_fault.h
new file mode 100644
index 0000000..ae692b1
--- /dev/null
+++ b/vm/vm_fault.h
@@ -0,0 +1,81 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_fault.h
+ *
+ * Page fault handling module declarations.
+ */
+
+#ifndef _VM_VM_FAULT_H_
+#define _VM_VM_FAULT_H_
+
+#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
+#include <vm/vm_map.h>
+#include <vm/vm_types.h>
+
+/*
+ * Page fault handling based on vm_object only.
+ */
+
+typedef kern_return_t vm_fault_return_t;
+#define VM_FAULT_SUCCESS 0
+#define VM_FAULT_RETRY 1
+#define VM_FAULT_INTERRUPTED 2
+#define VM_FAULT_MEMORY_SHORTAGE 3
+#define VM_FAULT_FICTITIOUS_SHORTAGE 4
+#define VM_FAULT_MEMORY_ERROR 5
+
+typedef void (*vm_fault_continuation_t)(kern_return_t);
+#define vm_fault_no_continuation ((vm_fault_continuation_t)0)
+
+extern void vm_fault_init(void);
+extern vm_fault_return_t vm_fault_page(vm_object_t, vm_offset_t, vm_prot_t,
+ boolean_t, boolean_t, vm_prot_t *,
+ vm_page_t *, vm_page_t *, boolean_t,
+ continuation_t);
+
+extern void vm_fault_cleanup(vm_object_t, vm_page_t);
+/*
+ * Page fault handling based on vm_map (or entries therein)
+ */
+
+extern kern_return_t vm_fault(vm_map_t, vm_offset_t, vm_prot_t, boolean_t,
+ boolean_t, vm_fault_continuation_t);
+extern void vm_fault_wire(vm_map_t, vm_map_entry_t);
+extern void vm_fault_unwire(vm_map_t, vm_map_entry_t);
+
+/* Copy pages from one object to another. */
+extern kern_return_t vm_fault_copy(vm_object_t, vm_offset_t, vm_size_t *,
+ vm_object_t, vm_offset_t, vm_map_t,
+ vm_map_version_t *, boolean_t);
+
+kern_return_t vm_fault_wire_fast(
+ vm_map_t map,
+ vm_offset_t va,
+ vm_map_entry_t entry);
+
+#endif /* _VM_VM_FAULT_H_ */
diff --git a/vm/vm_init.c b/vm/vm_init.c
new file mode 100644
index 0000000..593af11
--- /dev/null
+++ b/vm/vm_init.c
@@ -0,0 +1,88 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_init.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Initialize the Virtual Memory subsystem.
+ */
+
+#include <mach/machine/vm_types.h>
+#include <kern/slab.h>
+#include <kern/kalloc.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_init.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+#include <vm/memory_object.h>
+#include <vm/memory_object_proxy.h>
+
+
+/*
+ * vm_mem_bootstrap initializes the virtual memory system.
+ * This is done only by the first cpu up.
+ */
+
+void vm_mem_bootstrap(void)
+{
+ vm_offset_t start, end;
+
+ /*
+ * Initializes resident memory structures.
+ * From here on, all physical memory is accounted for,
+ * and we use only virtual addresses.
+ */
+
+ vm_page_bootstrap(&start, &end);
+
+ /*
+ * Initialize other VM packages
+ */
+
+ slab_bootstrap();
+ vm_object_bootstrap();
+ vm_map_init();
+ kmem_init(start, end);
+ pmap_init();
+ slab_init();
+ kalloc_init();
+ vm_fault_init();
+ vm_page_module_init();
+ memory_manager_default_init();
+}
+
+void vm_mem_init(void)
+{
+ vm_object_init();
+ memory_object_proxy_init();
+ vm_page_info_all();
+}
diff --git a/vm/vm_init.h b/vm/vm_init.h
new file mode 100644
index 0000000..42ef48b
--- /dev/null
+++ b/vm/vm_init.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2013 Free Software Foundation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _VM_VM_INIT_H_
+#define _VM_VM_INIT_H_
+
+extern void vm_mem_init(void);
+extern void vm_mem_bootstrap(void);
+
+#endif /* _VM_VM_INIT_H_ */
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
new file mode 100644
index 0000000..51223d9
--- /dev/null
+++ b/vm/vm_kern.c
@@ -0,0 +1,1099 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_kern.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Kernel memory management.
+ */
+
+#include <string.h>
+
+#include <mach/kern_return.h>
+#include <machine/locore.h>
+#include <machine/vm_param.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/lock.h>
+#include <kern/slab.h>
+#include <kern/thread.h>
+#include <kern/printf.h>
+#include <vm/pmap.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+
+
+/*
+ * Variables exported by this module.
+ */
+
+static struct vm_map kernel_map_store;
+vm_map_t kernel_map = &kernel_map_store;
+vm_map_t kernel_pageable_map;
+
+/*
+ * projected_buffer_allocate
+ *
+ * Allocate a wired-down buffer shared between kernel and user task.
+ * Fresh, zero-filled memory is allocated.
+ * If persistence is false, this buffer can only be deallocated from
+ * user task using projected_buffer_deallocate, and deallocation
+ * from user task also deallocates the buffer from the kernel map.
+ * projected_buffer_collect is called from vm_map_deallocate to
+ * automatically deallocate projected buffers on task_deallocate.
+ * Sharing with more than one user task is achieved by using
+ * projected_buffer_map for the second and subsequent tasks.
+ * The user is precluded from manipulating the VM entry of this buffer
+ * (i.e. changing protection, inheritance or machine attributes).
+ */
+
+kern_return_t
+projected_buffer_allocate(
+ vm_map_t map,
+ vm_size_t size,
+ int persistence,
+ vm_offset_t *kernel_p,
+ vm_offset_t *user_p,
+ vm_prot_t protection,
+ vm_inherit_t inheritance) /*Currently only VM_INHERIT_NONE supported*/
+{
+ vm_object_t object;
+ vm_map_entry_t u_entry, k_entry;
+ vm_offset_t addr;
+ phys_addr_t physical_addr;
+ vm_size_t r_size;
+ kern_return_t kr;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*
+ * Allocate a new object.
+ */
+
+ size = round_page(size);
+ object = vm_object_allocate(size);
+
+ vm_map_lock(kernel_map);
+ kr = vm_map_find_entry(kernel_map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &k_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(kernel_map);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ k_entry->object.vm_object = object;
+ if (!persistence)
+ k_entry->projected_on = (vm_map_entry_t) -1;
+ /*Mark entry so as to automatically deallocate it when
+ last corresponding user entry is deallocated*/
+ vm_map_unlock(kernel_map);
+ *kernel_p = addr;
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &u_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ vm_map_lock(kernel_map);
+ vm_map_entry_delete(kernel_map, k_entry);
+ vm_map_unlock(kernel_map);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ u_entry->object.vm_object = object;
+ vm_object_reference(object);
+ u_entry->projected_on = k_entry;
+ /*Creates coupling with kernel mapping of the buffer, and
+ also guarantees that user cannot directly manipulate
+ buffer VM entry*/
+ u_entry->protection = protection;
+ u_entry->max_protection = protection;
+ u_entry->inheritance = inheritance;
+ vm_map_unlock(map);
+ *user_p = addr;
+
+ /*
+ * Allocate wired-down memory in the object,
+ * and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(object, 0,
+ *kernel_p, *kernel_p + size,
+ VM_PROT_READ | VM_PROT_WRITE);
+ memset((void*) *kernel_p, 0, size); /*Zero fill*/
+
+ /* Set up physical mappings for user pmap */
+
+ pmap_pageable(map->pmap, *user_p, *user_p + size, FALSE);
+ for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
+ physical_addr = pmap_extract(kernel_pmap, *kernel_p + r_size);
+ pmap_enter(map->pmap, *user_p + r_size, physical_addr,
+ protection, TRUE);
+ }
+
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_map
+ *
+ * Map an area of kernel memory onto a task's address space.
+ * No new memory is allocated; the area must previously exist in the
+ * kernel memory map.
+ */
+
+kern_return_t
+projected_buffer_map(
+ vm_map_t map,
+ vm_offset_t kernel_addr,
+ vm_size_t size,
+ vm_offset_t *user_p,
+ vm_prot_t protection,
+ vm_inherit_t inheritance) /*Currently only VM_INHERIT_NONE supported*/
+{
+ vm_map_entry_t u_entry, k_entry;
+ vm_offset_t user_addr;
+ phys_addr_t physical_addr;
+ vm_size_t r_size;
+ kern_return_t kr;
+
+ /*
+ * Find entry in kernel map
+ */
+
+ size = round_page(size);
+ if (map == VM_MAP_NULL || map == kernel_map ||
+ !vm_map_lookup_entry(kernel_map, kernel_addr, &k_entry) ||
+ kernel_addr + size > k_entry->vme_end)
+ return(KERN_INVALID_ARGUMENT);
+
+
+ /*
+ * Create entry in user task
+ */
+
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &user_addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &u_entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ u_entry->object.vm_object = k_entry->object.vm_object;
+ vm_object_reference(k_entry->object.vm_object);
+ u_entry->offset = kernel_addr - k_entry->vme_start + k_entry->offset;
+ u_entry->projected_on = k_entry;
+ /*Creates coupling with kernel mapping of the buffer, and
+ also guarantees that user cannot directly manipulate
+ buffer VM entry*/
+ u_entry->protection = protection;
+ u_entry->max_protection = protection;
+ u_entry->inheritance = inheritance;
+ u_entry->wired_count = k_entry->wired_count;
+ vm_map_unlock(map);
+ *user_p = user_addr;
+
+ /* Set up physical mappings for user pmap */
+
+ pmap_pageable(map->pmap, user_addr, user_addr + size,
+ !k_entry->wired_count);
+ for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
+ physical_addr = pmap_extract(kernel_pmap, kernel_addr + r_size);
+ pmap_enter(map->pmap, user_addr + r_size, physical_addr,
+ protection, k_entry->wired_count);
+ }
+
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_deallocate
+ *
+ * Unmap projected buffer from task's address space.
+ * May also unmap buffer from kernel map, if buffer is not
+ * persistent and only the kernel reference remains.
+ */
+
+kern_return_t
+projected_buffer_deallocate(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_map_entry_t entry, k_entry;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(map);
+ if (!vm_map_lookup_entry(map, start, &entry) ||
+ end > entry->vme_end ||
+ /*Check corresponding kernel entry*/
+ (k_entry = entry->projected_on) == 0) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*Prepare for deallocation*/
+ if (entry->vme_start < start)
+ _vm_map_clip_start(&map->hdr, entry, start, 1);
+ if (entry->vme_end > end)
+ _vm_map_clip_end(&map->hdr, entry, end, 1);
+ if (map->first_free == entry) /*Adjust first_free hint*/
+ map->first_free = entry->vme_prev;
+ entry->projected_on = 0; /*Needed to allow deletion*/
+ entry->wired_count = 0; /*Avoid unwire fault*/
+ vm_map_entry_delete(map, entry);
+ vm_map_unlock(map);
+
+ /*Check if the buffer is not persistent and only the
+ kernel mapping remains, and if so delete it*/
+ vm_map_lock(kernel_map);
+ if (k_entry->projected_on == (vm_map_entry_t) -1 &&
+ k_entry->object.vm_object->ref_count == 1) {
+ if (kernel_map->first_free == k_entry)
+ kernel_map->first_free = k_entry->vme_prev;
+ k_entry->projected_on = 0; /*Allow unwire fault*/
+ vm_map_entry_delete(kernel_map, k_entry);
+ }
+ vm_map_unlock(kernel_map);
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_collect
+ *
+ * Unmap all projected buffers from task's address space.
+ */
+
+kern_return_t
+projected_buffer_collect(vm_map_t map)
+{
+ vm_map_entry_t entry, next;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(KERN_INVALID_ARGUMENT);
+
+ for (entry = vm_map_first_entry(map);
+ entry != vm_map_to_entry(map);
+ entry = next) {
+ next = entry->vme_next;
+ if (entry->projected_on != 0)
+ projected_buffer_deallocate(map, entry->vme_start, entry->vme_end);
+ }
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * projected_buffer_in_range
+ *
+ * Verifies whether a projected buffer exists in the address range
+ * given.
+ */
+
+boolean_t
+projected_buffer_in_range(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_map_entry_t entry;
+
+ if (map == VM_MAP_NULL || map == kernel_map)
+ return(FALSE);
+
+ /*Find first entry*/
+ if (!vm_map_lookup_entry(map, start, &entry))
+ entry = entry->vme_next;
+
+ while (entry != vm_map_to_entry(map) && entry->projected_on == 0 &&
+ entry->vme_start <= end) {
+ entry = entry->vme_next;
+ }
+ return(entry != vm_map_to_entry(map) && entry->vme_start <= end);
+}
+
+
+/*
+ * kmem_alloc:
+ *
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap. The memory is not zero-filled.
+ */
+
+kern_return_t
+kmem_alloc(
+ vm_map_t map,
+ vm_offset_t *addrp,
+ vm_size_t size)
+{
+ vm_object_t object;
+ vm_map_entry_t entry;
+ vm_offset_t addr;
+ unsigned int attempts;
+ kern_return_t kr;
+
+ /*
+ * Allocate a new object. We must do this before locking
+ * the map, lest we risk deadlock with the default pager:
+ * device_read_alloc uses kmem_alloc,
+ * which tries to allocate an object,
+ * which uses kmem_alloc_wired to get memory,
+ * which blocks for pages.
+ * then the default pager needs to read a block
+ * to process a memory_object_data_write,
+ * and device_read_alloc calls kmem_alloc
+ * and deadlocks on the map lock.
+ */
+
+ size = round_page(size);
+ object = vm_object_allocate(size);
+
+ attempts = 0;
+
+retry:
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ VM_OBJECT_NULL, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+
+ if (attempts == 0) {
+ attempts++;
+ slab_collect();
+ goto retry;
+ }
+
+ printf_once("no more room for kmem_alloc in %p (%s)\n",
+ map, map->name);
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ entry->object.vm_object = object;
+ entry->offset = 0;
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(object, 0,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_valloc:
+ *
+ * Allocate addressing space in the kernel's address map
+ * or a submap. The adressing space does not map anything.
+ */
+
+kern_return_t
+kmem_valloc(
+ vm_map_t map,
+ vm_offset_t *addrp,
+ vm_size_t size)
+{
+ vm_map_entry_t entry;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ unsigned int attempts;
+ kern_return_t kr;
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once. We want vm_map_find_entry
+ * to extend an existing entry if possible.
+ */
+
+ size = round_page(size);
+ attempts = 0;
+
+retry:
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
+ kernel_object, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+
+ if (attempts == 0) {
+ attempts++;
+ slab_collect();
+ goto retry;
+ }
+
+ printf_once("no more room for kmem_valloc in %p (%s)\n",
+ map, map->name);
+ return kr;
+ }
+
+ /*
+ * Since we didn't know where the new region would
+ * start, we couldn't supply the correct offset into
+ * the kernel object. We only initialize the entry
+ * if we aren't extending an existing entry.
+ */
+
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ vm_object_reference(kernel_object);
+
+ entry->object.vm_object = kernel_object;
+ entry->offset = offset;
+ }
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Return the memory, not mapped.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_alloc_wired:
+ *
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap. The memory is not zero-filled.
+ *
+ * The memory is allocated in the kernel_object.
+ * It may not be copied with vm_map_copy.
+ */
+
+kern_return_t
+kmem_alloc_wired(
+ vm_map_t map,
+ vm_offset_t *addrp,
+ vm_size_t size)
+{
+ vm_offset_t offset;
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ kr = kmem_valloc(map, &addr, size);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(kernel_object, offset,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_alloc_aligned:
+ *
+ * Like kmem_alloc_wired, except that the memory is aligned.
+ * The size should be a power-of-2.
+ */
+
+kern_return_t
+kmem_alloc_aligned(
+ vm_map_t map,
+ vm_offset_t *addrp,
+ vm_size_t size)
+{
+ vm_map_entry_t entry;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ unsigned int attempts;
+ kern_return_t kr;
+
+ if ((size & (size - 1)) != 0)
+ panic("kmem_alloc_aligned");
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once. We want vm_map_find_entry
+ * to extend an existing entry if possible.
+ */
+
+ size = round_page(size);
+ attempts = 0;
+
+retry:
+ vm_map_lock(map);
+ kr = vm_map_find_entry(map, &addr, size, size - 1,
+ kernel_object, &entry);
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(map);
+
+ if (attempts == 0) {
+ attempts++;
+ slab_collect();
+ goto retry;
+ }
+
+ printf_once("no more room for kmem_alloc_aligned in %p (%s)\n",
+ map, map->name);
+ return kr;
+ }
+
+ /*
+ * Since we didn't know where the new region would
+ * start, we couldn't supply the correct offset into
+ * the kernel object. We only initialize the entry
+ * if we aren't extending an existing entry.
+ */
+
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ vm_object_reference(kernel_object);
+
+ entry->object.vm_object = kernel_object;
+ entry->offset = offset;
+ }
+
+ /*
+ * Since we have not given out this address yet,
+ * it is safe to unlock the map.
+ */
+ vm_map_unlock(map);
+
+ /*
+ * Allocate wired-down memory in the kernel_object,
+ * for this entry, and enter it in the kernel pmap.
+ */
+ kmem_alloc_pages(kernel_object, offset,
+ addr, addr + size,
+ VM_PROT_DEFAULT);
+
+ /*
+ * Return the memory, not zeroed.
+ */
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_map_aligned_table: map a table or structure in a virtual memory page
+ * Align the table initial address with the page initial address.
+ *
+ * Parameters:
+ * phys_address: physical address, the start address of the table.
+ * size: size of the table.
+ * mode: access mode. VM_PROT_READ for read, VM_PROT_WRITE for write.
+ *
+ * Returns a reference to the virtual address if success, NULL if failure.
+ */
+
+void*
+kmem_map_aligned_table(
+ phys_addr_t phys_address,
+ vm_size_t size,
+ int mode)
+{
+ vm_offset_t virt_addr;
+ kern_return_t ret;
+ phys_addr_t into_page = phys_address % PAGE_SIZE;
+ phys_addr_t nearest_page = phys_address - into_page;
+
+ size += into_page;
+
+ ret = kmem_alloc_wired(kernel_map, &virt_addr,
+ round_page(size));
+
+ if (ret != KERN_SUCCESS)
+ return NULL;
+
+ (void) pmap_map_bd(virt_addr, nearest_page,
+ nearest_page + round_page(size), mode);
+
+ /* XXX remember mapping somewhere so we can free it? */
+
+ return (void *) (virt_addr + into_page);
+}
+
+/*
+ * kmem_alloc_pageable:
+ *
+ * Allocate pageable memory in the kernel's address map.
+ */
+
+kern_return_t
+kmem_alloc_pageable(
+ vm_map_t map,
+ vm_offset_t *addrp,
+ vm_size_t size)
+{
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ addr = vm_map_min(map);
+ kr = vm_map_enter(map, &addr, round_page(size),
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS) {
+ printf_once("no more room for kmem_alloc_pageable in %p (%s)\n",
+ map, map->name);
+ return kr;
+ }
+
+ *addrp = addr;
+ return KERN_SUCCESS;
+}
+
+/*
+ * kmem_free:
+ *
+ * Release a region of kernel virtual memory allocated
+ * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable,
+ * and return the physical pages associated with that region.
+ */
+
+void
+kmem_free(
+ vm_map_t map,
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ kern_return_t kr;
+
+ kr = vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+ if (kr != KERN_SUCCESS)
+ panic("kmem_free");
+}
+
+/*
+ * Allocate new wired pages in an object.
+ * The object is assumed to be mapped into the kernel map or
+ * a submap.
+ */
+void
+kmem_alloc_pages(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t protection)
+{
+ /*
+ * Mark the pmap region as not pageable.
+ */
+ pmap_pageable(kernel_pmap, start, end, FALSE);
+
+ while (start < end) {
+ vm_page_t mem;
+
+ vm_object_lock(object);
+
+ /*
+ * Allocate a page
+ */
+ while ((mem = vm_page_alloc(object, offset))
+ == VM_PAGE_NULL) {
+ vm_object_unlock(object);
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(object);
+ }
+
+ /*
+ * Wire it down
+ */
+ vm_page_lock_queues();
+ vm_page_wire(mem);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ /*
+ * Enter it in the kernel pmap
+ */
+ PMAP_ENTER(kernel_pmap, start, mem,
+ protection, TRUE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(mem);
+ vm_object_unlock(object);
+
+ start += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+}
+
+/*
+ * Remap wired pages in an object into a new region.
+ * The object is assumed to be mapped into the kernel map or
+ * a submap.
+ */
+void
+kmem_remap_pages(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t protection)
+{
+ /*
+ * Mark the pmap region as not pageable.
+ */
+ pmap_pageable(kernel_pmap, start, end, FALSE);
+
+ while (start < end) {
+ vm_page_t mem;
+
+ vm_object_lock(object);
+
+ /*
+ * Find a page
+ */
+ if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
+ panic("kmem_remap_pages");
+
+ /*
+ * Wire it down (again)
+ */
+ vm_page_lock_queues();
+ vm_page_wire(mem);
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ /*
+ * Enter it in the kernel pmap. The page isn't busy,
+ * but this shouldn't be a problem because it is wired.
+ */
+ PMAP_ENTER(kernel_pmap, start, mem,
+ protection, TRUE);
+
+ start += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+}
+
+/*
+ * kmem_submap:
+ *
+ * Initializes a map to manage a subrange
+ * of the kernel virtual address space.
+ *
+ * Arguments are as follows:
+ *
+ * map Map to initialize
+ * parent Map to take range from
+ * size Size of range to find
+ * min, max Returned endpoints of map
+ * pageable Can the region be paged
+ */
+
+void
+kmem_submap(
+ vm_map_t map,
+ vm_map_t parent,
+ vm_offset_t *min,
+ vm_offset_t *max,
+ vm_size_t size)
+{
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ size = round_page(size);
+
+ /*
+ * Need reference on submap object because it is internal
+ * to the vm_system. vm_object_enter will never be called
+ * on it (usual source of reference for vm_map_enter).
+ */
+ vm_object_reference(vm_submap_object);
+
+ addr = vm_map_min(parent);
+ kr = vm_map_enter(parent, &addr, size,
+ (vm_offset_t) 0, TRUE,
+ vm_submap_object, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ panic("kmem_submap");
+
+ pmap_reference(vm_map_pmap(parent));
+ vm_map_setup(map, vm_map_pmap(parent), addr, addr + size);
+ kr = vm_map_submap(parent, addr, addr + size, map);
+ if (kr != KERN_SUCCESS)
+ panic("kmem_submap");
+
+ *min = addr;
+ *max = addr + size;
+}
+
+/*
+ * kmem_init:
+ *
+ * Initialize the kernel's virtual memory map, taking
+ * into account all memory allocated up to this time.
+ */
+void kmem_init(
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end);
+
+ /*
+ * Reserve virtual memory allocated up to this time.
+ */
+ if (start != VM_MIN_KERNEL_ADDRESS) {
+ kern_return_t rc;
+ vm_offset_t addr = VM_MIN_KERNEL_ADDRESS;
+ rc = vm_map_enter(kernel_map,
+ &addr, start - VM_MIN_KERNEL_ADDRESS,
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (rc)
+ panic("vm_map_enter failed (%d)\n", rc);
+ }
+}
+
+/*
+ * New and improved IO wiring support.
+ */
+
+/*
+ * kmem_io_map_copyout:
+ *
+ * Establish temporary mapping in designated map for the memory
+ * passed in. Memory format must be a page_list vm_map_copy.
+ * Mapping is READ-ONLY.
+ */
+
+kern_return_t
+kmem_io_map_copyout(
+ vm_map_t map,
+ vm_offset_t *addr, /* actual addr of data */
+ vm_offset_t *alloc_addr, /* page aligned addr */
+ vm_size_t *alloc_size, /* size allocated */
+ vm_map_copy_t copy,
+ vm_size_t min_size) /* Do at least this much */
+{
+ vm_offset_t myaddr, offset;
+ vm_size_t mysize, copy_size;
+ kern_return_t ret;
+ vm_page_t *page_list;
+ vm_map_copy_t new_copy;
+ int i;
+
+ assert(copy->type == VM_MAP_COPY_PAGE_LIST);
+ assert(min_size != 0);
+
+ /*
+ * Figure out the size in vm pages.
+ */
+ min_size += copy->offset - trunc_page(copy->offset);
+ min_size = round_page(min_size);
+ mysize = round_page(copy->offset + copy->size) -
+ trunc_page(copy->offset);
+
+ /*
+ * If total size is larger than one page list and
+ * we don't have to do more than one page list, then
+ * only do one page list.
+ *
+ * XXX Could be much smarter about this ... like trimming length
+ * XXX if we need more than one page list but not all of them.
+ */
+
+ copy_size = ptoa(copy->cpy_npages);
+ if (mysize > copy_size && copy_size > min_size)
+ mysize = copy_size;
+
+ /*
+ * Allocate some address space in the map (must be kernel
+ * space).
+ */
+ myaddr = vm_map_min(map);
+ ret = vm_map_enter(map, &myaddr, mysize,
+ (vm_offset_t) 0, TRUE,
+ VM_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+
+ if (ret != KERN_SUCCESS)
+ return(ret);
+
+ /*
+ * Tell the pmap module that this will be wired, and
+ * enter the mappings.
+ */
+ pmap_pageable(vm_map_pmap(map), myaddr, myaddr + mysize, TRUE);
+
+ *addr = myaddr + (copy->offset - trunc_page(copy->offset));
+ *alloc_addr = myaddr;
+ *alloc_size = mysize;
+
+ offset = myaddr;
+ page_list = &copy->cpy_page_list[0];
+ while (TRUE) {
+ for ( i = 0; i < copy->cpy_npages; i++, offset += PAGE_SIZE) {
+ PMAP_ENTER(vm_map_pmap(map), offset, *page_list,
+ VM_PROT_READ, TRUE);
+ page_list++;
+ }
+
+ if (offset == (myaddr + mysize))
+ break;
+
+ /*
+ * Onward to the next page_list. The extend_cont
+ * leaves the current page list's pages alone;
+ * they'll be cleaned up at discard. Reset this
+ * copy's continuation to discard the next one.
+ */
+ vm_map_copy_invoke_extend_cont(copy, &new_copy, &ret);
+
+ if (ret != KERN_SUCCESS) {
+ kmem_io_map_deallocate(map, myaddr, mysize);
+ return(ret);
+ }
+ copy->cpy_cont = vm_map_copy_discard_cont;
+ copy->cpy_cont_args = (vm_map_copyin_args_t)new_copy;
+ copy = new_copy;
+ page_list = &copy->cpy_page_list[0];
+ }
+
+ return(ret);
+}
+
+/*
+ * kmem_io_map_deallocate:
+ *
+ * Get rid of the mapping established by kmem_io_map_copyout.
+ * Assumes that addr and size have been rounded to page boundaries.
+ * (e.g., the alloc_addr and alloc_size returned by kmem_io_map_copyout)
+ */
+
+void
+kmem_io_map_deallocate(
+ vm_map_t map,
+ vm_offset_t addr,
+ vm_size_t size)
+{
+ /*
+ * Remove the mappings. The pmap_remove is needed.
+ */
+
+ pmap_remove(vm_map_pmap(map), addr, addr + size);
+ vm_map_remove(map, addr, addr + size);
+}
+
+/*
+ * Routine: copyinmap
+ * Purpose:
+ * Like copyin, except that fromaddr is an address
+ * in the specified VM map. This implementation
+ * is incomplete; it handles the current user map
+ * and the kernel map/submaps.
+ */
+
+int copyinmap(
+ vm_map_t map,
+ char *fromaddr,
+ char *toaddr,
+ int length)
+{
+ if (vm_map_pmap(map) == kernel_pmap) {
+ /* assume a correct copy */
+ memcpy(toaddr, fromaddr, length);
+ return 0;
+ }
+
+ if (current_map() == map)
+ return copyin( fromaddr, toaddr, length);
+
+ return 1;
+}
+
+/*
+ * Routine: copyoutmap
+ * Purpose:
+ * Like copyout, except that toaddr is an address
+ * in the specified VM map. This implementation
+ * is incomplete; it handles the current user map
+ * and the kernel map/submaps.
+ */
+
+int copyoutmap(
+ vm_map_t map,
+ char *fromaddr,
+ char *toaddr,
+ int length)
+{
+ if (vm_map_pmap(map) == kernel_pmap) {
+ /* assume a correct copy */
+ memcpy(toaddr, fromaddr, length);
+ return 0;
+ }
+
+ if (current_map() == map)
+ return copyout(fromaddr, toaddr, length);
+
+ return 1;
+}
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
new file mode 100644
index 0000000..13115ff
--- /dev/null
+++ b/vm/vm_kern.h
@@ -0,0 +1,100 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_kern.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Kernel memory management definitions.
+ */
+
+#ifndef _VM_VM_KERN_H_
+#define _VM_VM_KERN_H_
+
+#include <mach/kern_return.h>
+#include <vm/vm_map.h>
+
+extern kern_return_t projected_buffer_allocate(vm_map_t, vm_size_t, int,
+ vm_offset_t *, vm_offset_t *,
+ vm_prot_t, vm_inherit_t);
+extern kern_return_t projected_buffer_deallocate(vm_map_t, vm_offset_t,
+ vm_offset_t);
+extern kern_return_t projected_buffer_map(vm_map_t, vm_offset_t, vm_size_t,
+ vm_offset_t *, vm_prot_t,
+ vm_inherit_t);
+extern kern_return_t projected_buffer_collect(vm_map_t);
+
+extern void kmem_init(vm_offset_t, vm_offset_t);
+
+extern kern_return_t kmem_alloc(vm_map_t, vm_offset_t *, vm_size_t);
+extern kern_return_t kmem_alloc_pageable(vm_map_t, vm_offset_t *,
+ vm_size_t);
+extern kern_return_t kmem_valloc(vm_map_t, vm_offset_t *, vm_size_t);
+extern kern_return_t kmem_alloc_wired(vm_map_t, vm_offset_t *, vm_size_t);
+extern kern_return_t kmem_alloc_aligned(vm_map_t, vm_offset_t *, vm_size_t);
+extern void* kmem_map_aligned_table(phys_addr_t, vm_size_t, int);
+
+extern void kmem_free(vm_map_t, vm_offset_t, vm_size_t);
+
+extern void kmem_submap(vm_map_t, vm_map_t, vm_offset_t *,
+ vm_offset_t *, vm_size_t);
+
+extern kern_return_t kmem_io_map_copyout(vm_map_t, vm_offset_t *,
+ vm_offset_t *, vm_size_t *,
+ vm_map_copy_t, vm_size_t);
+extern void kmem_io_map_deallocate(vm_map_t, vm_offset_t,
+ vm_size_t);
+
+extern int
+copyinmap (vm_map_t map, char *fromaddr, char *toaddr, int length);
+
+extern int
+copyoutmap (vm_map_t map, char *fromaddr, char *toaddr, int length);
+
+extern vm_map_t kernel_map;
+extern vm_map_t kernel_pageable_map;
+extern vm_map_t ipc_kernel_map;
+
+extern boolean_t projected_buffer_in_range(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end);
+
+extern void kmem_alloc_pages(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t protection);
+
+extern void kmem_remap_pages(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t protection);
+
+#endif /* _VM_VM_KERN_H_ */
diff --git a/vm/vm_map.c b/vm/vm_map.c
new file mode 100644
index 0000000..e454bb2
--- /dev/null
+++ b/vm/vm_map.c
@@ -0,0 +1,5237 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_map.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory mapping module.
+ */
+
+#include <kern/printf.h>
+#include <mach/kern_return.h>
+#include <mach/port.h>
+#include <mach/vm_attributes.h>
+#include <mach/vm_param.h>
+#include <mach/vm_wire.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/kalloc.h>
+#include <kern/mach.server.h>
+#include <kern/list.h>
+#include <kern/rbtree.h>
+#include <kern/slab.h>
+#include <kern/mach4.server.h>
+#include <vm/pmap.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_resident.h>
+#include <vm/vm_kern.h>
+#include <vm/memory_object_proxy.h>
+#include <ipc/ipc_port.h>
+#include <string.h>
+
+#if MACH_KDB
+#include <ddb/db_output.h>
+#include <vm/vm_print.h>
+#endif /* MACH_KDB */
+
+/*
+ * Macros to copy a vm_map_entry. We must be careful to correctly
+ * manage the wired page count. vm_map_entry_copy() creates a new
+ * map entry to the same memory - the wired count in the new entry
+ * must be set to zero. vm_map_entry_copy_full() creates a new
+ * entry that is identical to the old entry. This preserves the
+ * wire count; it's used for map splitting and cache changing in
+ * vm_map_copyout.
+ */
+#define vm_map_entry_copy(NEW,OLD) \
+MACRO_BEGIN \
+ *(NEW) = *(OLD); \
+ (NEW)->is_shared = FALSE; \
+ (NEW)->needs_wakeup = FALSE; \
+ (NEW)->in_transition = FALSE; \
+ (NEW)->wired_count = 0; \
+ (NEW)->wired_access = VM_PROT_NONE; \
+MACRO_END
+
+#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
+
+/*
+ * Virtual memory maps provide for the mapping, protection,
+ * and sharing of virtual memory objects. In addition,
+ * this module provides for an efficient virtual copy of
+ * memory from one map to another.
+ *
+ * Synchronization is required prior to most operations.
+ *
+ * Maps consist of an ordered doubly-linked list of simple
+ * entries; a hint and a red-black tree are used to speed up lookups.
+ *
+ * Sharing maps have been deleted from this version of Mach.
+ * All shared objects are now mapped directly into the respective
+ * maps. This requires a change in the copy on write strategy;
+ * the asymmetric (delayed) strategy is used for shared temporary
+ * objects instead of the symmetric (shadow) strategy. This is
+ * selected by the (new) use_shared_copy bit in the object. See
+ * vm_object_copy_temporary in vm_object.c for details. All maps
+ * are now "top level" maps (either task map, kernel map or submap
+ * of the kernel map).
+ *
+ * Since portions of maps are specified by start/end addresses,
+ * which may not align with existing map entries, all
+ * routines merely "clip" entries to these start/end values.
+ * [That is, an entry is split into two, bordering at a
+ * start or end value.] Note that these clippings may not
+ * always be necessary (as the two resulting entries are then
+ * not changed); however, the clipping is done for convenience.
+ * The entries can later be "glued back together" (coalesced).
+ *
+ * The symmetric (shadow) copy strategy implements virtual copy
+ * by copying VM object references from one map to
+ * another, and then marking both regions as copy-on-write.
+ * It is important to note that only one writeable reference
+ * to a VM object region exists in any map when this strategy
+ * is used -- this means that shadow object creation can be
+ * delayed until a write operation occurs. The asymmetric (delayed)
+ * strategy allows multiple maps to have writeable references to
+ * the same region of a vm object, and hence cannot delay creating
+ * its copy objects. See vm_object_copy_temporary() in vm_object.c.
+ * Copying of permanent objects is completely different; see
+ * vm_object_copy_strategically() in vm_object.c.
+ */
+
+struct kmem_cache vm_map_cache; /* cache for vm_map structures */
+struct kmem_cache vm_map_entry_cache; /* cache for vm_map_entry structures */
+struct kmem_cache vm_map_copy_cache; /* cache for vm_map_copy structures */
+
+/*
+ * Placeholder object for submap operations. This object is dropped
+ * into the range by a call to vm_map_find, and removed when
+ * vm_map_submap creates the submap.
+ */
+
+static struct vm_object vm_submap_object_store;
+vm_object_t vm_submap_object = &vm_submap_object_store;
+
+/*
+ * vm_map_init:
+ *
+ * Initialize the vm_map module. Must be called before
+ * any other vm_map routines.
+ *
+ * Map and entry structures are allocated from caches -- we must
+ * initialize those caches.
+ *
+ * There are two caches of interest:
+ *
+ * vm_map_cache: used to allocate maps.
+ * vm_map_entry_cache: used to allocate map entries.
+ *
+ * We make sure the map entry cache allocates memory directly from the
+ * physical allocator to avoid recursion with this module.
+ */
+
+void vm_map_init(void)
+{
+ kmem_cache_init(&vm_map_cache, "vm_map", sizeof(struct vm_map), 0,
+ NULL, 0);
+ kmem_cache_init(&vm_map_entry_cache, "vm_map_entry",
+ sizeof(struct vm_map_entry), 0, NULL,
+ KMEM_CACHE_NOOFFSLAB | KMEM_CACHE_PHYSMEM);
+ kmem_cache_init(&vm_map_copy_cache, "vm_map_copy",
+ sizeof(struct vm_map_copy), 0, NULL, 0);
+
+ /*
+ * Submap object is initialized by vm_object_init.
+ */
+}
+
+void vm_map_setup(
+ vm_map_t map,
+ pmap_t pmap,
+ vm_offset_t min,
+ vm_offset_t max)
+{
+ vm_map_first_entry(map) = vm_map_to_entry(map);
+ vm_map_last_entry(map) = vm_map_to_entry(map);
+ map->hdr.nentries = 0;
+ rbtree_init(&map->hdr.tree);
+ rbtree_init(&map->hdr.gap_tree);
+
+ map->size = 0;
+ map->size_wired = 0;
+ map->ref_count = 1;
+ map->pmap = pmap;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->wiring_required = FALSE;
+ map->wait_for_space = FALSE;
+ map->first_free = vm_map_to_entry(map);
+ map->hint = vm_map_to_entry(map);
+ map->name = NULL;
+ vm_map_lock_init(map);
+ simple_lock_init(&map->ref_lock);
+ simple_lock_init(&map->hint_lock);
+}
+
+/*
+ * vm_map_create:
+ *
+ * Creates and returns a new empty VM map with
+ * the given physical map structure, and having
+ * the given lower and upper address bounds.
+ */
+vm_map_t vm_map_create(
+ pmap_t pmap,
+ vm_offset_t min,
+ vm_offset_t max)
+{
+ vm_map_t result;
+
+ result = (vm_map_t) kmem_cache_alloc(&vm_map_cache);
+ if (result == VM_MAP_NULL)
+ return VM_MAP_NULL;
+
+ vm_map_setup(result, pmap, min, max);
+
+ return(result);
+}
+
+void vm_map_lock(struct vm_map *map)
+{
+ lock_write(&map->lock);
+
+ /*
+ * XXX Memory allocation may occur while a map is locked,
+ * for example when clipping entries. If the system is running
+ * low on memory, allocating may block until pages are
+ * available. But if a map used by the default pager is
+ * kept locked, a deadlock occurs.
+ *
+ * This workaround temporarily elevates the current thread
+ * VM privileges to avoid that particular deadlock, and does
+ * so regardless of the map for convenience, and because it's
+ * currently impossible to predict which map the default pager
+ * may depend on.
+ *
+ * This workaround isn't reliable, and only makes exhaustion
+ * less likely. In particular pageout may cause lots of data
+ * to be passed between the kernel and the pagers, often
+ * in the form of large copy maps. Making the minimum
+ * number of pages depend on the total number of pages
+ * should make exhaustion even less likely.
+ */
+
+ if (current_thread()) {
+ current_thread()->vm_privilege++;
+ assert(current_thread()->vm_privilege != 0);
+ }
+
+ map->timestamp++;
+}
+
+void vm_map_unlock(struct vm_map *map)
+{
+ if (current_thread()) {
+ current_thread()->vm_privilege--;
+ }
+
+ lock_write_done(&map->lock);
+}
+
+/*
+ * vm_map_entry_create: [ internal use only ]
+ *
+ * Allocates a VM map entry for insertion in the
+ * given map (or map copy). No fields are filled.
+ */
+#define vm_map_entry_create(map) \
+ _vm_map_entry_create(&(map)->hdr)
+
+#define vm_map_copy_entry_create(copy) \
+ _vm_map_entry_create(&(copy)->cpy_hdr)
+
+static vm_map_entry_t
+_vm_map_entry_create(const struct vm_map_header *map_header)
+{
+ vm_map_entry_t entry;
+
+ entry = (vm_map_entry_t) kmem_cache_alloc(&vm_map_entry_cache);
+ if (entry == VM_MAP_ENTRY_NULL)
+ panic("vm_map_entry_create");
+
+ return(entry);
+}
+
+/*
+ * vm_map_entry_dispose: [ internal use only ]
+ *
+ * Inverse of vm_map_entry_create.
+ */
+#define vm_map_entry_dispose(map, entry) \
+ _vm_map_entry_dispose(&(map)->hdr, (entry))
+
+#define vm_map_copy_entry_dispose(map, entry) \
+ _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
+
+static void
+_vm_map_entry_dispose(const struct vm_map_header *map_header,
+ vm_map_entry_t entry)
+{
+ (void)map_header;
+
+ kmem_cache_free(&vm_map_entry_cache, (vm_offset_t) entry);
+}
+
+/*
+ * Red-black tree lookup/insert comparison functions
+ */
+static inline int vm_map_entry_cmp_lookup(vm_offset_t addr,
+ const struct rbtree_node *node)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+
+ if (addr < entry->vme_start)
+ return -1;
+ else if (addr < entry->vme_end)
+ return 0;
+ else
+ return 1;
+}
+
+static inline int vm_map_entry_cmp_insert(const struct rbtree_node *a,
+ const struct rbtree_node *b)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(a, struct vm_map_entry, tree_node);
+ return vm_map_entry_cmp_lookup(entry->vme_start, b);
+}
+
+/*
+ * Gap management functions
+ */
+static inline int vm_map_entry_gap_cmp_lookup(vm_size_t gap_size,
+ const struct rbtree_node *node)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(node, struct vm_map_entry, gap_node);
+
+ if (gap_size < entry->gap_size)
+ return -1;
+ else if (gap_size == entry->gap_size)
+ return 0;
+ else
+ return 1;
+}
+
+static inline int vm_map_entry_gap_cmp_insert(const struct rbtree_node *a,
+ const struct rbtree_node *b)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(a, struct vm_map_entry, gap_node);
+ return vm_map_entry_gap_cmp_lookup(entry->gap_size, b);
+}
+
+static int
+vm_map_gap_valid(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ return entry != (struct vm_map_entry *)&hdr->links;
+}
+
+static void
+vm_map_gap_compute(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ struct vm_map_entry *next;
+
+ next = entry->vme_next;
+
+ if (vm_map_gap_valid(hdr, next)) {
+ entry->gap_size = next->vme_start - entry->vme_end;
+ } else {
+ entry->gap_size = hdr->vme_end - entry->vme_end;
+ }
+}
+
+static void
+vm_map_gap_insert_single(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ struct vm_map_entry *tmp;
+ struct rbtree_node *node;
+ unsigned long slot;
+
+ if (!vm_map_gap_valid(hdr, entry)) {
+ return;
+ }
+
+ vm_map_gap_compute(hdr, entry);
+
+ if (entry->gap_size == 0) {
+ return;
+ }
+
+ node = rbtree_lookup_slot(&hdr->gap_tree, entry->gap_size,
+ vm_map_entry_gap_cmp_lookup, slot);
+
+ if (node == NULL) {
+ rbtree_insert_slot(&hdr->gap_tree, slot, &entry->gap_node);
+ list_init(&entry->gap_list);
+ entry->in_gap_tree = 1;
+ } else {
+ tmp = rbtree_entry(node, struct vm_map_entry, gap_node);
+ list_insert_tail(&tmp->gap_list, &entry->gap_list);
+ entry->in_gap_tree = 0;
+ }
+}
+
+static void
+vm_map_gap_remove_single(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ struct vm_map_entry *tmp;
+
+ if (!vm_map_gap_valid(hdr, entry)) {
+ return;
+ }
+
+ if (entry->gap_size == 0) {
+ return;
+ }
+
+ if (!entry->in_gap_tree) {
+ list_remove(&entry->gap_list);
+ return;
+ }
+
+ rbtree_remove(&hdr->gap_tree, &entry->gap_node);
+
+ if (list_empty(&entry->gap_list)) {
+ return;
+ }
+
+ tmp = list_first_entry(&entry->gap_list, struct vm_map_entry, gap_list);
+ assert(tmp->gap_size == entry->gap_size);
+ list_remove(&tmp->gap_list);
+ list_set_head(&tmp->gap_list, &entry->gap_list);
+ assert(!tmp->in_gap_tree);
+ rbtree_insert(&hdr->gap_tree, &tmp->gap_node,
+ vm_map_entry_gap_cmp_insert);
+ tmp->in_gap_tree = 1;
+}
+
+static void
+vm_map_gap_update(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ vm_map_gap_remove_single(hdr, entry);
+ vm_map_gap_insert_single(hdr, entry);
+}
+
+static void
+vm_map_gap_insert(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ vm_map_gap_remove_single(hdr, entry->vme_prev);
+ vm_map_gap_insert_single(hdr, entry->vme_prev);
+ vm_map_gap_insert_single(hdr, entry);
+}
+
+static void
+vm_map_gap_remove(struct vm_map_header *hdr, struct vm_map_entry *entry)
+{
+ vm_map_gap_remove_single(hdr, entry);
+ vm_map_gap_remove_single(hdr, entry->vme_prev);
+ vm_map_gap_insert_single(hdr, entry->vme_prev);
+}
+
+/*
+ * vm_map_entry_{un,}link:
+ *
+ * Insert/remove entries from maps (or map copies).
+ *
+ * The start and end addresses of the entries must be properly set
+ * before using these macros.
+ */
+#define vm_map_entry_link(map, after_where, entry) \
+ _vm_map_entry_link(&(map)->hdr, after_where, entry, 1)
+
+#define vm_map_copy_entry_link(copy, after_where, entry) \
+ _vm_map_entry_link(&(copy)->cpy_hdr, after_where, entry, 0)
+
+#define _vm_map_entry_link(hdr, after_where, entry, link_gap) \
+ MACRO_BEGIN \
+ (hdr)->nentries++; \
+ (entry)->vme_prev = (after_where); \
+ (entry)->vme_next = (after_where)->vme_next; \
+ (entry)->vme_prev->vme_next = \
+ (entry)->vme_next->vme_prev = (entry); \
+ rbtree_insert(&(hdr)->tree, &(entry)->tree_node, \
+ vm_map_entry_cmp_insert); \
+ if (link_gap) \
+ vm_map_gap_insert((hdr), (entry)); \
+ MACRO_END
+
+#define vm_map_entry_unlink(map, entry) \
+ _vm_map_entry_unlink(&(map)->hdr, entry, 1)
+
+#define vm_map_copy_entry_unlink(copy, entry) \
+ _vm_map_entry_unlink(&(copy)->cpy_hdr, entry, 0)
+
+#define _vm_map_entry_unlink(hdr, entry, unlink_gap) \
+ MACRO_BEGIN \
+ (hdr)->nentries--; \
+ (entry)->vme_next->vme_prev = (entry)->vme_prev; \
+ (entry)->vme_prev->vme_next = (entry)->vme_next; \
+ rbtree_remove(&(hdr)->tree, &(entry)->tree_node); \
+ if (unlink_gap) \
+ vm_map_gap_remove((hdr), (entry)); \
+ MACRO_END
+
+/*
+ * vm_map_reference:
+ *
+ * Creates another valid reference to the given map.
+ *
+ */
+void vm_map_reference(vm_map_t map)
+{
+ if (map == VM_MAP_NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ map->ref_count++;
+ simple_unlock(&map->ref_lock);
+}
+
+/*
+ * vm_map_deallocate:
+ *
+ * Removes a reference from the specified map,
+ * destroying it if no references remain.
+ * The map should not be locked.
+ */
+void vm_map_deallocate(vm_map_t map)
+{
+ int c;
+
+ if (map == VM_MAP_NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+
+ if (c > 0) {
+ return;
+ }
+
+ projected_buffer_collect(map);
+ (void) vm_map_delete(map, map->min_offset, map->max_offset);
+
+ pmap_destroy(map->pmap);
+
+ kmem_cache_free(&vm_map_cache, (vm_offset_t) map);
+}
+
+/*
+ * SAVE_HINT:
+ *
+ * Saves the specified entry as the hint for
+ * future lookups. Performs necessary interlocks.
+ */
+#define SAVE_HINT(map,value) \
+ simple_lock(&(map)->hint_lock); \
+ (map)->hint = (value); \
+ simple_unlock(&(map)->hint_lock);
+
+/*
+ * vm_map_lookup_entry: [ internal use only ]
+ *
+ * Finds the map entry containing (or
+ * immediately preceding) the specified address
+ * in the given map; the entry is returned
+ * in the "entry" parameter. The boolean
+ * result indicates whether the address is
+ * actually contained in the map.
+ */
+boolean_t vm_map_lookup_entry(
+ vm_map_t map,
+ vm_offset_t address,
+ vm_map_entry_t *entry) /* OUT */
+{
+ struct rbtree_node *node;
+ vm_map_entry_t hint;
+
+ /*
+ * First, make a quick check to see if we are already
+ * looking at the entry we want (which is often the case).
+ */
+
+ simple_lock(&map->hint_lock);
+ hint = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if ((hint != vm_map_to_entry(map)) && (address >= hint->vme_start)) {
+ if (address < hint->vme_end) {
+ *entry = hint;
+ return(TRUE);
+ } else {
+ vm_map_entry_t next = hint->vme_next;
+
+ if ((next == vm_map_to_entry(map))
+ || (address < next->vme_start)) {
+ *entry = hint;
+ return(FALSE);
+ }
+ }
+ }
+
+ /*
+ * If the hint didn't help, use the red-black tree.
+ */
+
+ node = rbtree_lookup_nearest(&map->hdr.tree, address,
+ vm_map_entry_cmp_lookup, RBTREE_LEFT);
+
+ if (node == NULL) {
+ *entry = vm_map_to_entry(map);
+ SAVE_HINT(map, *entry);
+ return(FALSE);
+ } else {
+ *entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+ SAVE_HINT(map, *entry);
+ return((address < (*entry)->vme_end) ? TRUE : FALSE);
+ }
+}
+
+/*
+ * Find a range of available space from the specified map.
+ *
+ * If successful, this function returns the map entry immediately preceding
+ * the range, and writes the range address in startp. If the map contains
+ * no entry, the entry returned points to the map header.
+ * Otherwise, NULL is returned.
+ *
+ * If map_locked is true, this function will not wait for more space in case
+ * of failure. Otherwise, the map is locked.
+ */
+static struct vm_map_entry *
+vm_map_find_entry_anywhere(struct vm_map *map,
+ vm_size_t size,
+ vm_offset_t mask,
+ boolean_t map_locked,
+ vm_offset_t *startp)
+{
+ struct vm_map_entry *entry;
+ struct rbtree_node *node;
+ vm_size_t max_size;
+ vm_offset_t start, end;
+ vm_offset_t max;
+
+ assert(size != 0);
+
+ max = map->max_offset;
+ if (((mask + 1) & mask) != 0) {
+ /* We have high bits in addition to the low bits */
+
+ int first0 = __builtin_ffs(~mask); /* First zero after low bits */
+ vm_offset_t lowmask = (1UL << (first0-1)) - 1; /* low bits */
+ vm_offset_t himask = mask - lowmask; /* high bits */
+ int second1 = __builtin_ffs(himask); /* First one after low bits */
+
+ max = 1UL << (second1-1);
+
+ if (himask + max != 0) {
+ /* high bits do not continue up to the end */
+ printf("invalid mask %zx\n", mask);
+ return NULL;
+ }
+
+ mask = lowmask;
+ }
+
+ if (!map_locked) {
+ vm_map_lock(map);
+ }
+
+restart:
+ if (map->hdr.nentries == 0) {
+ entry = vm_map_to_entry(map);
+ start = (map->min_offset + mask) & ~mask;
+ end = start + size;
+
+ if ((start < map->min_offset) || (end <= start) || (end > max)) {
+ goto error;
+ }
+
+ *startp = start;
+ return entry;
+ }
+
+ entry = map->first_free;
+
+ if (entry != vm_map_to_entry(map)) {
+ start = (entry->vme_end + mask) & ~mask;
+ end = start + size;
+
+ if ((start >= entry->vme_end)
+ && (end > start)
+ && (end <= max)
+ && (end <= (entry->vme_end + entry->gap_size))) {
+ *startp = start;
+ return entry;
+ }
+ }
+
+ max_size = size + mask;
+
+ if (max_size < size) {
+ printf("max_size %zd got smaller than size %zd with mask %zd\n",
+ max_size, size, mask);
+ goto error;
+ }
+
+ node = rbtree_lookup_nearest(&map->hdr.gap_tree, max_size,
+ vm_map_entry_gap_cmp_lookup, RBTREE_RIGHT);
+
+ if (node == NULL) {
+ if (map_locked || !map->wait_for_space) {
+ goto error;
+ }
+
+ assert_wait((event_t)map, TRUE);
+ vm_map_unlock(map);
+ thread_block(NULL);
+ vm_map_lock(map);
+ goto restart;
+ }
+
+ entry = rbtree_entry(node, struct vm_map_entry, gap_node);
+ assert(entry->in_gap_tree);
+
+ if (!list_empty(&entry->gap_list)) {
+ entry = list_last_entry(&entry->gap_list,
+ struct vm_map_entry, gap_list);
+ }
+
+ assert(entry->gap_size >= max_size);
+ start = (entry->vme_end + mask) & ~mask;
+ assert(start >= entry->vme_end);
+ end = start + size;
+ assert(end > start);
+ assert(end <= (entry->vme_end + entry->gap_size));
+ if (end > max) {
+ /* Does not respect the allowed maximum */
+ printf("%zx does not respect %zx\n", end, max);
+ return NULL;
+ }
+ *startp = start;
+ return entry;
+
+error:
+ printf("no more room in %p (%s)\n", map, map->name);
+ return NULL;
+}
+
+/*
+ * Routine: vm_map_find_entry
+ * Purpose:
+ * Allocate a range in the specified virtual address map,
+ * returning the entry allocated for that range.
+ * Used by kmem_alloc, etc. Returns wired entries.
+ *
+ * The map must be locked.
+ *
+ * If an entry is allocated, the object/offset fields
+ * are initialized to zero. If an object is supplied,
+ * then an existing entry may be extended.
+ */
+kern_return_t vm_map_find_entry(
+ vm_map_t map,
+ vm_offset_t *address, /* OUT */
+ vm_size_t size,
+ vm_offset_t mask,
+ vm_object_t object,
+ vm_map_entry_t *o_entry) /* OUT */
+{
+ vm_map_entry_t entry, new_entry;
+ vm_offset_t start;
+ vm_offset_t end;
+
+ entry = vm_map_find_entry_anywhere(map, size, mask, TRUE, &start);
+
+ if (entry == NULL) {
+ return KERN_NO_SPACE;
+ }
+
+ end = start + size;
+
+ /*
+ * At this point,
+ * "start" and "end" should define the endpoints of the
+ * available new range, and
+ * "entry" should refer to the region before the new
+ * range, and
+ *
+ * the map should be locked.
+ */
+
+ *address = start;
+
+ /*
+ * See whether we can avoid creating a new entry by
+ * extending one of our neighbors. [So far, we only attempt to
+ * extend from below.]
+ */
+
+ if ((object != VM_OBJECT_NULL) &&
+ (entry != vm_map_to_entry(map)) &&
+ (entry->vme_end == start) &&
+ (!entry->is_shared) &&
+ (!entry->is_sub_map) &&
+ (entry->object.vm_object == object) &&
+ (entry->needs_copy == FALSE) &&
+ (entry->inheritance == VM_INHERIT_DEFAULT) &&
+ (entry->protection == VM_PROT_DEFAULT) &&
+ (entry->max_protection == VM_PROT_ALL) &&
+ (entry->wired_count != 0) &&
+ (entry->projected_on == 0)) {
+ /*
+ * Because this is a special case,
+ * we don't need to use vm_object_coalesce.
+ */
+
+ entry->vme_end = end;
+ vm_map_gap_update(&map->hdr, entry);
+ new_entry = entry;
+ } else {
+ new_entry = vm_map_entry_create(map);
+
+ new_entry->vme_start = start;
+ new_entry->vme_end = end;
+
+ new_entry->is_shared = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = VM_OBJECT_NULL;
+ new_entry->offset = (vm_offset_t) 0;
+
+ new_entry->needs_copy = FALSE;
+
+ new_entry->inheritance = VM_INHERIT_DEFAULT;
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_ALL;
+ new_entry->wired_count = 1;
+ new_entry->wired_access = VM_PROT_DEFAULT;
+
+ new_entry->in_transition = FALSE;
+ new_entry->needs_wakeup = FALSE;
+ new_entry->projected_on = 0;
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, entry, new_entry);
+ }
+
+ map->size += size;
+
+ /*
+ * Update the free space hint and the lookup hint
+ */
+
+ map->first_free = new_entry;
+ SAVE_HINT(map, new_entry);
+
+ *o_entry = new_entry;
+ return(KERN_SUCCESS);
+}
+
+boolean_t vm_map_pmap_enter_print = FALSE;
+boolean_t vm_map_pmap_enter_enable = FALSE;
+
+/*
+ * Routine: vm_map_pmap_enter
+ *
+ * Description:
+ * Force pages from the specified object to be entered into
+ * the pmap at the specified address if they are present.
+ * As soon as a page not found in the object the scan ends.
+ *
+ * Returns:
+ * Nothing.
+ *
+ * In/out conditions:
+ * The source map should not be locked on entry.
+ */
+static void
+vm_map_pmap_enter(
+ vm_map_t map,
+ vm_offset_t addr,
+ vm_offset_t end_addr,
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_prot_t protection)
+{
+ while (addr < end_addr) {
+ vm_page_t m;
+
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+
+ m = vm_page_lookup(object, offset);
+ if (m == VM_PAGE_NULL || m->absent) {
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ return;
+ }
+
+ if (vm_map_pmap_enter_print) {
+ printf("vm_map_pmap_enter:");
+ printf("map: %p, addr: %zx, object: %p, offset: %zx\n",
+ map, addr, object, offset);
+ }
+
+ m->busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER(map->pmap, addr, m,
+ protection, FALSE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(m);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ offset += PAGE_SIZE;
+ addr += PAGE_SIZE;
+ }
+}
+
+/*
+ * Routine: vm_map_enter
+ *
+ * Description:
+ * Allocate a range in the specified virtual address map.
+ * The resulting range will refer to memory defined by
+ * the given memory object and offset into that object.
+ *
+ * Arguments are as defined in the vm_map call.
+ */
+kern_return_t vm_map_enter(
+ vm_map_t map,
+ vm_offset_t *address, /* IN/OUT */
+ vm_size_t size,
+ vm_offset_t mask,
+ boolean_t anywhere,
+ vm_object_t object,
+ vm_offset_t offset,
+ boolean_t needs_copy,
+ vm_prot_t cur_protection,
+ vm_prot_t max_protection,
+ vm_inherit_t inheritance)
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t next_entry;
+ vm_offset_t start;
+ vm_offset_t end;
+ kern_return_t result = KERN_SUCCESS;
+
+#define RETURN(value) { result = value; goto BailOut; }
+
+ if (size == 0)
+ return KERN_INVALID_ARGUMENT;
+
+ start = *address;
+
+ if (anywhere) {
+ entry = vm_map_find_entry_anywhere(map, size, mask, FALSE, &start);
+
+ if (entry == NULL) {
+ RETURN(KERN_NO_SPACE);
+ }
+
+ end = start + size;
+ *address = start;
+ next_entry = entry->vme_next;
+ } else {
+ vm_map_entry_t temp_entry;
+
+ /*
+ * Verify that:
+ * the address doesn't itself violate
+ * the mask requirement.
+ */
+
+ if ((start & mask) != 0)
+ return(KERN_NO_SPACE);
+
+ vm_map_lock(map);
+
+ /*
+ * ... the address is within bounds
+ */
+
+ end = start + size;
+
+ if ((start < map->min_offset) ||
+ (end > map->max_offset) ||
+ (start >= end)) {
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * ... the starting address isn't allocated
+ */
+
+ if (vm_map_lookup_entry(map, start, &temp_entry))
+ RETURN(KERN_NO_SPACE);
+
+ entry = temp_entry;
+ next_entry = entry->vme_next;
+
+ /*
+ * ... the next region doesn't overlap the
+ * end point.
+ */
+
+ if ((next_entry != vm_map_to_entry(map)) &&
+ (next_entry->vme_start < end))
+ RETURN(KERN_NO_SPACE);
+ }
+
+ /*
+ * At this point,
+ * "start" and "end" should define the endpoints of the
+ * available new range, and
+ * "entry" should refer to the region before the new
+ * range, and
+ *
+ * the map should be locked.
+ */
+
+ /*
+ * See whether we can avoid creating a new entry (and object) by
+ * extending one of our neighbors.
+ */
+
+ if ((entry != vm_map_to_entry(map)) &&
+ (entry->vme_end == start) &&
+ (!entry->is_shared) &&
+ (!entry->is_sub_map) &&
+ (entry->inheritance == inheritance) &&
+ (entry->protection == cur_protection) &&
+ (entry->max_protection == max_protection) &&
+ (entry->wired_count == 0) &&
+ (entry->projected_on == 0)) {
+ if (vm_object_coalesce(entry->object.vm_object,
+ object,
+ entry->offset,
+ offset,
+ (vm_size_t)(entry->vme_end - entry->vme_start),
+ size,
+ &entry->object.vm_object,
+ &entry->offset)) {
+
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ map->size += size;
+ entry->vme_end = end;
+ vm_map_gap_update(&map->hdr, entry);
+ /*
+ * Now that we did, perhaps we could simplify
+ * things even further by coalescing the next
+ * entry into the one we just extended.
+ */
+ vm_map_coalesce_entry(map, next_entry);
+ RETURN(KERN_SUCCESS);
+ }
+ }
+ if ((next_entry != vm_map_to_entry(map)) &&
+ (next_entry->vme_start == end) &&
+ (!next_entry->is_shared) &&
+ (!next_entry->is_sub_map) &&
+ (next_entry->inheritance == inheritance) &&
+ (next_entry->protection == cur_protection) &&
+ (next_entry->max_protection == max_protection) &&
+ (next_entry->wired_count == 0) &&
+ (next_entry->projected_on == 0)) {
+ if (vm_object_coalesce(object,
+ next_entry->object.vm_object,
+ offset,
+ next_entry->offset,
+ size,
+ (vm_size_t)(next_entry->vme_end - next_entry->vme_start),
+ &next_entry->object.vm_object,
+ &next_entry->offset)) {
+
+ /*
+ * Coalesced the two objects - can extend
+ * the next map entry to include the
+ * new range.
+ */
+ map->size += size;
+ next_entry->vme_start = start;
+ vm_map_gap_update(&map->hdr, entry);
+ /*
+ * Now that we did, perhaps we could simplify
+ * things even further by coalescing the
+ * entry into the previous one.
+ */
+ vm_map_coalesce_entry(map, next_entry);
+ RETURN(KERN_SUCCESS);
+ }
+ }
+
+ /*
+ * Create a new entry
+ */
+
+ /**/ {
+ vm_map_entry_t new_entry;
+
+ new_entry = vm_map_entry_create(map);
+
+ new_entry->vme_start = start;
+ new_entry->vme_end = end;
+
+ new_entry->is_shared = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = object;
+ new_entry->offset = offset;
+
+ new_entry->needs_copy = needs_copy;
+
+ new_entry->inheritance = inheritance;
+ new_entry->protection = cur_protection;
+ new_entry->max_protection = max_protection;
+ new_entry->wired_count = 0;
+ new_entry->wired_access = VM_PROT_NONE;
+
+ new_entry->in_transition = FALSE;
+ new_entry->needs_wakeup = FALSE;
+ new_entry->projected_on = 0;
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, entry, new_entry);
+ map->size += size;
+
+ /*
+ * Update the free space hint and the lookup hint
+ */
+
+ if ((map->first_free == entry) &&
+ ((entry == vm_map_to_entry(map) ? map->min_offset : entry->vme_end)
+ >= new_entry->vme_start))
+ map->first_free = new_entry;
+
+ SAVE_HINT(map, new_entry);
+
+ if (map->wiring_required) {
+ /* Returns with the map read-locked if successful */
+ result = vm_map_pageable(map, start, end, cur_protection, FALSE, FALSE);
+
+ if (result != KERN_SUCCESS) {
+ RETURN(KERN_SUCCESS);
+ }
+ }
+
+ vm_map_unlock(map);
+
+ if ((object != VM_OBJECT_NULL) &&
+ (vm_map_pmap_enter_enable) &&
+ (!anywhere) &&
+ (!needs_copy) &&
+ (size < (128*1024))) {
+ vm_map_pmap_enter(map, start, end,
+ object, offset, cur_protection);
+ }
+
+ return(result);
+ /**/ }
+
+ BailOut: ;
+
+ vm_map_unlock(map);
+ return(result);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_clip_start: [ internal use only ]
+ *
+ * Asserts that the given entry begins at or after
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+#define vm_map_clip_start(map, entry, startaddr) \
+ MACRO_BEGIN \
+ if ((startaddr) > (entry)->vme_start) \
+ _vm_map_clip_start(&(map)->hdr,(entry),(startaddr),1); \
+ MACRO_END
+
+#define vm_map_copy_clip_start(copy, entry, startaddr) \
+ MACRO_BEGIN \
+ if ((startaddr) > (entry)->vme_start) \
+ _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr),0); \
+ MACRO_END
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+void _vm_map_clip_start(
+ struct vm_map_header *map_header,
+ vm_map_entry_t entry,
+ vm_offset_t start,
+ boolean_t link_gap)
+{
+ vm_map_entry_t new_entry;
+
+ /*
+ * Split off the front portion --
+ * note that we must insert the new
+ * entry BEFORE this one, so that
+ * this entry has the specified starting
+ * address.
+ */
+
+ new_entry = _vm_map_entry_create(map_header);
+ vm_map_entry_copy_full(new_entry, entry);
+
+ new_entry->vme_end = start;
+ entry->offset += (start - entry->vme_start);
+ entry->vme_start = start;
+
+ _vm_map_entry_link(map_header, entry->vme_prev, new_entry, link_gap);
+
+ if (entry->is_sub_map)
+ vm_map_reference(new_entry->object.sub_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+#define vm_map_clip_end(map, entry, endaddr) \
+ MACRO_BEGIN \
+ if ((endaddr) < (entry)->vme_end) \
+ _vm_map_clip_end(&(map)->hdr,(entry),(endaddr),1); \
+ MACRO_END
+
+#define vm_map_copy_clip_end(copy, entry, endaddr) \
+ MACRO_BEGIN \
+ if ((endaddr) < (entry)->vme_end) \
+ _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr),0); \
+ MACRO_END
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+void _vm_map_clip_end(
+ struct vm_map_header *map_header,
+ vm_map_entry_t entry,
+ vm_offset_t end,
+ boolean_t link_gap)
+{
+ vm_map_entry_t new_entry;
+
+ /*
+ * Create a new entry and insert it
+ * AFTER the specified entry
+ */
+
+ new_entry = _vm_map_entry_create(map_header);
+ vm_map_entry_copy_full(new_entry, entry);
+
+ new_entry->vme_start = entry->vme_end = end;
+ new_entry->offset += (end - entry->vme_start);
+
+ _vm_map_entry_link(map_header, entry, new_entry, link_gap);
+
+ if (entry->is_sub_map)
+ vm_map_reference(new_entry->object.sub_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * VM_MAP_RANGE_CHECK: [ internal use only ]
+ *
+ * Asserts that the starting and ending region
+ * addresses fall within the valid range of the map.
+ */
+#define VM_MAP_RANGE_CHECK(map, start, end) \
+ { \
+ if (start < vm_map_min(map)) \
+ start = vm_map_min(map); \
+ if (end > vm_map_max(map)) \
+ end = vm_map_max(map); \
+ if (start > end) \
+ start = end; \
+ }
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find using
+ * the vm_submap_object, and no other operations may have been
+ * performed on this range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copyin!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+kern_return_t vm_map_submap(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_map_t submap)
+{
+ vm_map_entry_t entry;
+ kern_return_t result = KERN_INVALID_ARGUMENT;
+ vm_object_t object;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->vme_next;
+
+ vm_map_clip_end(map, entry, end);
+
+ if ((entry->vme_start == start) && (entry->vme_end == end) &&
+ (!entry->is_sub_map) &&
+ ((object = entry->object.vm_object) == vm_submap_object) &&
+ (object->resident_page_count == 0) &&
+ (object->copy == VM_OBJECT_NULL) &&
+ (object->shadow == VM_OBJECT_NULL) &&
+ (!object->pager_created)) {
+ entry->object.vm_object = VM_OBJECT_NULL;
+ vm_object_deallocate(object);
+ entry->is_sub_map = TRUE;
+ vm_map_reference(entry->object.sub_map = submap);
+ result = KERN_SUCCESS;
+ }
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+static void
+vm_map_entry_inc_wired(vm_map_t map, vm_map_entry_t entry)
+{
+ /*
+ * This member is a counter to indicate whether an entry
+ * should be faulted in (first time it is wired, wired_count
+ * goes from 0 to 1) or not (other times, wired_count goes
+ * from 1 to 2 or remains 2).
+ */
+ if (entry->wired_count > 1) {
+ return;
+ }
+
+ if (entry->wired_count == 0) {
+ map->size_wired += entry->vme_end - entry->vme_start;
+ }
+
+ entry->wired_count++;
+}
+
+static void
+vm_map_entry_reset_wired(vm_map_t map, vm_map_entry_t entry)
+{
+ if (entry->wired_count != 0) {
+ map->size_wired -= entry->vme_end - entry->vme_start;
+ entry->wired_count = 0;
+ }
+}
+
+/*
+ * vm_map_pageable_scan: scan entries and update wiring as appropriate
+ *
+ * This function is used by the VM system after either the wiring
+ * access or protection of a mapping changes. It scans part or
+ * all the entries of a map, and either wires, unwires, or skips
+ * entries depending on their state.
+ *
+ * The map must be locked. If wiring faults are performed, the lock
+ * is downgraded to a read lock. The caller should always consider
+ * the map read locked on return.
+ */
+static void
+vm_map_pageable_scan(struct vm_map *map,
+ struct vm_map_entry *start,
+ struct vm_map_entry *end)
+{
+ struct vm_map_entry *entry;
+ boolean_t do_wire_faults;
+
+ /*
+ * Pass 1. Update counters and prepare wiring faults.
+ */
+
+ do_wire_faults = FALSE;
+
+ for (entry = start; entry != end; entry = entry->vme_next) {
+
+ /*
+ * Unwiring.
+ *
+ * Note that unwiring faults can be performed while
+ * holding a write lock on the map. A wiring fault
+ * can only be done with a read lock.
+ */
+
+ if (entry->wired_access == VM_PROT_NONE) {
+ if (entry->wired_count != 0) {
+ vm_map_entry_reset_wired(map, entry);
+ vm_fault_unwire(map, entry);
+ }
+
+ continue;
+ }
+
+ /*
+ * Wiring.
+ */
+
+ if (entry->protection == VM_PROT_NONE) {
+
+ /*
+ * Make sure entries that cannot be accessed
+ * because of their protection aren't wired.
+ */
+
+ if (entry->wired_count == 0) {
+ continue;
+ }
+
+ /*
+ * This normally occurs after changing the protection of
+ * a wired region to VM_PROT_NONE.
+ */
+ vm_map_entry_reset_wired(map, entry);
+ vm_fault_unwire(map, entry);
+ continue;
+ }
+
+ /*
+ * We must do this in two passes:
+ *
+ * 1. Holding the write lock, we create any shadow
+ * or zero-fill objects that need to be created.
+ * Then we increment the wiring count.
+ *
+ * 2. We downgrade to a read lock, and call
+ * vm_fault_wire to fault in the pages for any
+ * newly wired area (wired_count is 1).
+ *
+ * Downgrading to a read lock for vm_fault_wire avoids
+ * a possible deadlock with another thread that may have
+ * faulted on one of the pages to be wired (it would mark
+ * the page busy, blocking us, then in turn block on the
+ * map lock that we hold). Because of problems in the
+ * recursive lock package, we cannot upgrade to a write
+ * lock in vm_map_lookup. Thus, any actions that require
+ * the write lock must be done beforehand. Because we
+ * keep the read lock on the map, the copy-on-write
+ * status of the entries we modify here cannot change.
+ */
+
+ if (entry->wired_count == 0) {
+ /*
+ * Perform actions of vm_map_lookup that need
+ * the write lock on the map: create a shadow
+ * object for a copy-on-write region, or an
+ * object for a zero-fill region.
+ */
+ if (entry->needs_copy &&
+ ((entry->protection & VM_PROT_WRITE) != 0)) {
+ vm_object_shadow(&entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t)(entry->vme_end
+ - entry->vme_start));
+ entry->needs_copy = FALSE;
+ }
+
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+ entry->object.vm_object =
+ vm_object_allocate(
+ (vm_size_t)(entry->vme_end
+ - entry->vme_start));
+ entry->offset = (vm_offset_t)0;
+ }
+ }
+
+ vm_map_entry_inc_wired(map, entry);
+
+ if (entry->wired_count == 1) {
+ do_wire_faults = TRUE;
+ }
+ }
+
+ /*
+ * Pass 2. Trigger wiring faults.
+ */
+
+ if (!do_wire_faults) {
+ return;
+ }
+
+ /*
+ * HACK HACK HACK HACK
+ *
+ * If we are wiring in the kernel map or a submap of it,
+ * unlock the map to avoid deadlocks. We trust that the
+ * kernel threads are well-behaved, and therefore will
+ * not do anything destructive to this region of the map
+ * while we have it unlocked. We cannot trust user threads
+ * to do the same.
+ *
+ * HACK HACK HACK HACK
+ */
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_unlock(map); /* trust me ... */
+ } else {
+ vm_map_lock_set_recursive(map);
+ vm_map_lock_write_to_read(map);
+ }
+
+ for (entry = start; entry != end; entry = entry->vme_next) {
+ /*
+ * The wiring count can only be 1 if it was
+ * incremented by this function right before
+ * downgrading the lock.
+ */
+ if (entry->wired_count == 1) {
+ /*
+ * XXX This assumes that the faults always succeed.
+ */
+ vm_fault_wire(map, entry);
+ }
+ }
+
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_lock(map);
+ } else {
+ vm_map_lock_clear_recursive(map);
+ }
+}
+
+/*
+ * vm_map_protect:
+ *
+ * Sets the protection of the specified address
+ * region in the target map. If "set_max" is
+ * specified, the maximum protection is to be set;
+ * otherwise, only the current protection is affected.
+ */
+kern_return_t vm_map_protect(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t new_prot,
+ boolean_t set_max)
+{
+ vm_map_entry_t current;
+ vm_map_entry_t entry;
+ vm_map_entry_t next;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->vme_next;
+
+ /*
+ * Make a first pass to check for protection
+ * violations.
+ */
+
+ current = entry;
+ while ((current != vm_map_to_entry(map)) &&
+ (current->vme_start < end)) {
+
+ if (current->is_sub_map) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ if ((new_prot & (VM_PROT_NOTIFY | current->max_protection))
+ != new_prot) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ current = current->vme_next;
+ }
+
+ /*
+ * Go back and fix up protections.
+ * [Note that clipping is not necessary the second time.]
+ */
+
+ current = entry;
+
+ while ((current != vm_map_to_entry(map)) &&
+ (current->vme_start < end)) {
+
+ vm_prot_t old_prot;
+
+ vm_map_clip_end(map, current, end);
+
+ old_prot = current->protection;
+ if (set_max)
+ current->protection =
+ (current->max_protection = new_prot) &
+ old_prot;
+ else
+ current->protection = new_prot;
+
+ /*
+ * Make sure the new protection doesn't conflict
+ * with the desired wired access if any.
+ */
+
+ if ((current->protection != VM_PROT_NONE) &&
+ (current->wired_access != VM_PROT_NONE ||
+ map->wiring_required)) {
+ current->wired_access = current->protection;
+ }
+
+ /*
+ * Update physical map if necessary.
+ */
+
+ if (current->protection != old_prot) {
+ pmap_protect(map->pmap, current->vme_start,
+ current->vme_end,
+ current->protection);
+ }
+
+ next = current->vme_next;
+ vm_map_coalesce_entry(map, current);
+ current = next;
+ }
+
+ next = current->vme_next;
+ if (vm_map_coalesce_entry(map, current))
+ current = next;
+
+ /* Returns with the map read-locked if successful */
+ vm_map_pageable_scan(map, entry, current);
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_inherit:
+ *
+ * Sets the inheritance of the specified address
+ * range in the target map. Inheritance
+ * affects how the map will be shared with
+ * child maps at the time of vm_map_fork.
+ */
+kern_return_t vm_map_inherit(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_inherit_t new_inheritance)
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t temp_entry;
+ vm_map_entry_t next;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = temp_entry->vme_next;
+
+ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->inheritance = new_inheritance;
+
+ next = entry->vme_next;
+ vm_map_coalesce_entry(map, entry);
+ entry = next;
+ }
+
+ vm_map_coalesce_entry(map, entry);
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_pageable:
+ *
+ * Sets the pageability of the specified address
+ * range in the target map. Regions specified
+ * as not pageable require locked-down physical
+ * memory and physical page maps. access_type indicates
+ * types of accesses that must not generate page faults.
+ * This is checked against protection of memory being locked-down.
+ * access_type of VM_PROT_NONE makes memory pageable.
+ *
+ * If lock_map is TRUE, the map is locked and unlocked
+ * by this function. Otherwise, it is assumed the caller
+ * already holds the lock, in which case the function
+ * returns with the lock downgraded to a read lock if successful.
+ *
+ * If check_range is TRUE, this function fails if it finds
+ * holes or protection mismatches in the specified range.
+ *
+ * A reference must remain to the map throughout the call.
+ */
+
+kern_return_t vm_map_pageable(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t access_type,
+ boolean_t lock_map,
+ boolean_t check_range)
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t start_entry;
+ vm_map_entry_t end_entry;
+
+ if (lock_map) {
+ vm_map_lock(map);
+ }
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (!vm_map_lookup_entry(map, start, &start_entry)) {
+ /*
+ * Start address is not in map; this is fatal.
+ */
+ if (lock_map) {
+ vm_map_unlock(map);
+ }
+
+ return KERN_NO_SPACE;
+ }
+
+ /*
+ * Pass 1. Clip entries, check for holes and protection mismatches
+ * if requested.
+ */
+
+ vm_map_clip_start(map, start_entry, start);
+
+ for (entry = start_entry;
+ (entry != vm_map_to_entry(map)) &&
+ (entry->vme_start < end);
+ entry = entry->vme_next) {
+ vm_map_clip_end(map, entry, end);
+
+ if (check_range &&
+ (((entry->vme_end < end) &&
+ ((entry->vme_next == vm_map_to_entry(map)) ||
+ (entry->vme_next->vme_start > entry->vme_end))) ||
+ ((entry->protection & access_type) != access_type))) {
+ if (lock_map) {
+ vm_map_unlock(map);
+ }
+
+ return KERN_NO_SPACE;
+ }
+ }
+
+ end_entry = entry;
+
+ /*
+ * Pass 2. Set the desired wired access.
+ */
+
+ for (entry = start_entry; entry != end_entry; entry = entry->vme_next) {
+ entry->wired_access = access_type;
+ }
+
+ /* Returns with the map read-locked */
+ vm_map_pageable_scan(map, start_entry, end_entry);
+
+ if (lock_map) {
+ vm_map_unlock(map);
+ }
+
+ return(KERN_SUCCESS);
+}
+
+/* Update pageability of all the memory currently in the map.
+ * The map must be locked, and protection mismatch will not be checked, see
+ * vm_map_pageable().
+ */
+static kern_return_t
+vm_map_pageable_current(vm_map_t map, vm_prot_t access_type)
+{
+ struct rbtree_node *node;
+ vm_offset_t min_address, max_address;
+
+ node = rbtree_first(&map->hdr.tree);
+ min_address = rbtree_entry(node, struct vm_map_entry,
+ tree_node)->vme_start;
+
+ node = rbtree_last(&map->hdr.tree);
+ max_address = rbtree_entry(node, struct vm_map_entry,
+ tree_node)->vme_end;
+
+ /* Returns with the map read-locked if successful */
+ return vm_map_pageable(map, min_address, max_address,access_type,
+ FALSE, FALSE);
+}
+
+
+/*
+ * vm_map_pageable_all:
+ *
+ * Sets the pageability of an entire map. If the VM_WIRE_CURRENT
+ * flag is set, then all current mappings are locked down. If the
+ * VM_WIRE_FUTURE flag is set, then all mappings created after the
+ * call returns are locked down. If no flags are passed
+ * (i.e. VM_WIRE_NONE), all mappings become pageable again, and
+ * future mappings aren't automatically locked down any more.
+ *
+ * The access type of the mappings match their current protection.
+ * Null mappings (with protection PROT_NONE) are updated to track
+ * that they should be wired in case they become accessible.
+ */
+kern_return_t
+vm_map_pageable_all(struct vm_map *map, vm_wire_t flags)
+{
+ boolean_t wiring_required;
+ kern_return_t kr;
+
+ if ((flags & ~VM_WIRE_ALL) != 0) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ vm_map_lock(map);
+
+ if (flags == VM_WIRE_NONE) {
+ map->wiring_required = FALSE;
+
+ /* Returns with the map read-locked if successful */
+ kr = vm_map_pageable_current(map, VM_PROT_NONE);
+ vm_map_unlock(map);
+ return kr;
+ }
+
+ wiring_required = map->wiring_required;
+
+ if (flags & VM_WIRE_FUTURE) {
+ map->wiring_required = TRUE;
+ }
+
+ if (flags & VM_WIRE_CURRENT) {
+ /* Returns with the map read-locked if successful */
+ kr = vm_map_pageable_current(map, VM_PROT_READ | VM_PROT_WRITE);
+
+ if (kr != KERN_SUCCESS) {
+ if (flags & VM_WIRE_FUTURE) {
+ map->wiring_required = wiring_required;
+ }
+
+ vm_map_unlock(map);
+ return kr;
+ }
+ }
+
+ vm_map_unlock(map);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_map_entry_delete: [ internal use only ]
+ *
+ * Deallocate the given entry from the target map.
+ */
+void vm_map_entry_delete(
+ vm_map_t map,
+ vm_map_entry_t entry)
+{
+ vm_offset_t s, e;
+ vm_size_t size;
+ vm_object_t object;
+ extern vm_object_t kernel_object;
+
+ s = entry->vme_start;
+ e = entry->vme_end;
+ size = e - s;
+
+ /*Check if projected buffer*/
+ if (map != kernel_map && entry->projected_on != 0) {
+ /*Check if projected kernel entry is persistent;
+ may only manipulate directly if it is*/
+ if (entry->projected_on->projected_on == 0)
+ entry->wired_count = 0; /*Avoid unwire fault*/
+ else
+ return;
+ }
+
+ /*
+ * Get the object. Null objects cannot have pmap entries.
+ */
+
+ if ((object = entry->object.vm_object) != VM_OBJECT_NULL) {
+
+ /*
+ * Unwire before removing addresses from the pmap;
+ * otherwise, unwiring will put the entries back in
+ * the pmap.
+ */
+
+ if (entry->wired_count != 0) {
+ vm_map_entry_reset_wired(map, entry);
+ vm_fault_unwire(map, entry);
+ }
+
+ /*
+ * If the object is shared, we must remove
+ * *all* references to this data, since we can't
+ * find all of the physical maps which are sharing
+ * it.
+ */
+
+ if (object == kernel_object) {
+ vm_object_lock(object);
+ vm_object_page_remove(object, entry->offset,
+ entry->offset + size);
+ vm_object_unlock(object);
+ } else if (entry->is_shared) {
+ vm_object_pmap_remove(object,
+ entry->offset,
+ entry->offset + size);
+ } else {
+ pmap_remove(map->pmap, s, e);
+ /*
+ * If this object has no pager and our
+ * reference to the object is the only
+ * one, we can release the deleted pages
+ * now.
+ */
+ vm_object_lock(object);
+ if ((!object->pager_created) &&
+ (object->ref_count == 1) &&
+ (object->paging_in_progress == 0)) {
+ vm_object_page_remove(object,
+ entry->offset,
+ entry->offset + size);
+ }
+ vm_object_unlock(object);
+ }
+ }
+
+ /*
+ * Deallocate the object only after removing all
+ * pmap entries pointing to its pages.
+ */
+
+ if (entry->is_sub_map)
+ vm_map_deallocate(entry->object.sub_map);
+ else
+ vm_object_deallocate(entry->object.vm_object);
+
+ vm_map_entry_unlink(map, entry);
+ map->size -= size;
+
+ vm_map_entry_dispose(map, entry);
+}
+
+/*
+ * vm_map_delete: [ internal use only ]
+ *
+ * Deallocates the given address range from the target
+ * map.
+ */
+
+kern_return_t vm_map_delete(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t first_entry;
+
+ if (map->pmap == kernel_pmap && (start < kernel_virtual_start || end > kernel_virtual_end))
+ panic("vm_map_delete(%lx-%lx) falls in physical memory area!\n", (unsigned long) start, (unsigned long) end);
+
+ /*
+ * Find the start of the region, and clip it
+ */
+
+ if (!vm_map_lookup_entry(map, start, &first_entry))
+ entry = first_entry->vme_next;
+ else {
+ entry = first_entry;
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Fix the lookup hint now, rather than each
+ * time though the loop.
+ */
+
+ SAVE_HINT(map, entry->vme_prev);
+ }
+
+ /*
+ * Save the free space hint
+ */
+
+ if (map->first_free->vme_start >= start)
+ map->first_free = entry->vme_prev;
+
+ /*
+ * Step through all entries in this region
+ */
+
+ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
+ vm_map_entry_t next;
+
+ vm_map_clip_end(map, entry, end);
+
+ /*
+ * If the entry is in transition, we must wait
+ * for it to exit that state. It could be clipped
+ * while we leave the map unlocked.
+ */
+ if(entry->in_transition) {
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(map, FALSE);
+ vm_map_lock(map);
+
+ /*
+ * The entry could have been clipped or it
+ * may not exist anymore. look it up again.
+ */
+ if(!vm_map_lookup_entry(map, start, &entry)) {
+ entry = entry->vme_next;
+ }
+ continue;
+ }
+
+ next = entry->vme_next;
+
+ vm_map_entry_delete(map, entry);
+ entry = next;
+ }
+
+ if (map->wait_for_space)
+ thread_wakeup((event_t) map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_remove:
+ *
+ * Remove the given address range from the target map.
+ * This is the exported form of vm_map_delete.
+ */
+kern_return_t vm_map_remove(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ kern_return_t result;
+
+ vm_map_lock(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ result = vm_map_delete(map, start, end);
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+
+/*
+ * vm_map_copy_steal_pages:
+ *
+ * Steal all the pages from a vm_map_copy page_list by copying ones
+ * that have not already been stolen.
+ */
+static void
+vm_map_copy_steal_pages(vm_map_copy_t copy)
+{
+ vm_page_t m, new_m;
+ int i;
+ vm_object_t object;
+
+ for (i = 0; i < copy->cpy_npages; i++) {
+
+ /*
+ * If the page is not tabled, then it's already stolen.
+ */
+ m = copy->cpy_page_list[i];
+ if (!m->tabled)
+ continue;
+
+ /*
+ * Page was not stolen, get a new
+ * one and do the copy now.
+ */
+ while ((new_m = vm_page_grab(VM_PAGE_HIGHMEM)) == VM_PAGE_NULL) {
+ VM_PAGE_WAIT((void(*)()) 0);
+ }
+
+ vm_page_copy(m, new_m);
+
+ object = m->object;
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ copy->cpy_page_list[i] = new_m;
+ }
+}
+
+/*
+ * vm_map_copy_page_discard:
+ *
+ * Get rid of the pages in a page_list copy. If the pages are
+ * stolen, they are freed. If the pages are not stolen, they
+ * are unbusied, and associated state is cleaned up.
+ */
+void vm_map_copy_page_discard(vm_map_copy_t copy)
+{
+ while (copy->cpy_npages > 0) {
+ vm_page_t m;
+
+ if((m = copy->cpy_page_list[--(copy->cpy_npages)]) !=
+ VM_PAGE_NULL) {
+
+ /*
+ * If it's not in the table, then it's
+ * a stolen page that goes back
+ * to the free list. Else it belongs
+ * to some object, and we hold a
+ * paging reference on that object.
+ */
+ if (!m->tabled) {
+ VM_PAGE_FREE(m);
+ }
+ else {
+ vm_object_t object;
+
+ object = m->object;
+
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (!m->active && !m->inactive)
+ vm_page_activate(m);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+ }
+ }
+ }
+}
+
+/*
+ * Routine: vm_map_copy_discard
+ *
+ * Description:
+ * Dispose of a map copy object (returned by
+ * vm_map_copyin).
+ */
+void
+vm_map_copy_discard(vm_map_copy_t copy)
+{
+free_next_copy:
+ if (copy == VM_MAP_COPY_NULL)
+ return;
+
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ while (vm_map_copy_first_entry(copy) !=
+ vm_map_copy_to_entry(copy)) {
+ vm_map_entry_t entry = vm_map_copy_first_entry(copy);
+
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_object_deallocate(entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, entry);
+ }
+ break;
+ case VM_MAP_COPY_OBJECT:
+ vm_object_deallocate(copy->cpy_object);
+ break;
+ case VM_MAP_COPY_PAGE_LIST:
+
+ /*
+ * To clean this up, we have to unbusy all the pages
+ * and release the paging references in their objects.
+ */
+ if (copy->cpy_npages > 0)
+ vm_map_copy_page_discard(copy);
+
+ /*
+ * If there's a continuation, abort it. The
+ * abort routine releases any storage.
+ */
+ if (vm_map_copy_has_cont(copy)) {
+
+ /*
+ * Special case: recognize
+ * vm_map_copy_discard_cont and optimize
+ * here to avoid tail recursion.
+ */
+ if (copy->cpy_cont == vm_map_copy_discard_cont) {
+ vm_map_copy_t new_copy;
+
+ new_copy = (vm_map_copy_t) copy->cpy_cont_args;
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
+ copy = new_copy;
+ goto free_next_copy;
+ }
+ else {
+ vm_map_copy_abort_cont(copy);
+ }
+ }
+
+ break;
+ }
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
+}
+
+/*
+ * Routine: vm_map_copy_copy
+ *
+ * Description:
+ * Move the information in a map copy object to
+ * a new map copy object, leaving the old one
+ * empty.
+ *
+ * This is used by kernel routines that need
+ * to look at out-of-line data (in copyin form)
+ * before deciding whether to return SUCCESS.
+ * If the routine returns FAILURE, the original
+ * copy object will be deallocated; therefore,
+ * these routines must make a copy of the copy
+ * object and leave the original empty so that
+ * deallocation will not fail.
+ */
+vm_map_copy_t
+vm_map_copy_copy(vm_map_copy_t copy)
+{
+ vm_map_copy_t new_copy;
+
+ if (copy == VM_MAP_COPY_NULL)
+ return VM_MAP_COPY_NULL;
+
+ /*
+ * Allocate a new copy object, and copy the information
+ * from the old one into it.
+ */
+
+ new_copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
+ *new_copy = *copy;
+
+ if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
+ /*
+ * The links in the entry chain must be
+ * changed to point to the new copy object.
+ */
+ vm_map_copy_first_entry(copy)->vme_prev
+ = vm_map_copy_to_entry(new_copy);
+ vm_map_copy_last_entry(copy)->vme_next
+ = vm_map_copy_to_entry(new_copy);
+ }
+
+ /*
+ * Change the old copy object into one that contains
+ * nothing to be deallocated.
+ */
+ copy->type = VM_MAP_COPY_OBJECT;
+ copy->cpy_object = VM_OBJECT_NULL;
+
+ /*
+ * Return the new object.
+ */
+ return new_copy;
+}
+
+/*
+ * Routine: vm_map_copy_discard_cont
+ *
+ * Description:
+ * A version of vm_map_copy_discard that can be called
+ * as a continuation from a vm_map_copy page list.
+ */
+kern_return_t vm_map_copy_discard_cont(
+vm_map_copyin_args_t cont_args,
+vm_map_copy_t *copy_result) /* OUT */
+{
+ vm_map_copy_discard((vm_map_copy_t) cont_args);
+ if (copy_result != (vm_map_copy_t *)0)
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: vm_map_copy_overwrite
+ *
+ * Description:
+ * Copy the memory described by the map copy
+ * object (copy; returned by vm_map_copyin) onto
+ * the specified destination region (dst_map, dst_addr).
+ * The destination must be writeable.
+ *
+ * Unlike vm_map_copyout, this routine actually
+ * writes over previously-mapped memory. If the
+ * previous mapping was to a permanent (user-supplied)
+ * memory object, it is preserved.
+ *
+ * The attributes (protection and inheritance) of the
+ * destination region are preserved.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ *
+ * Implementation notes:
+ * To overwrite temporary virtual memory, it is
+ * sufficient to remove the previous mapping and insert
+ * the new copy. This replacement is done either on
+ * the whole region (if no permanent virtual memory
+ * objects are embedded in the destination region) or
+ * in individual map entries.
+ *
+ * To overwrite permanent virtual memory, it is
+ * necessary to copy each page, as the external
+ * memory management interface currently does not
+ * provide any optimizations.
+ *
+ * Once a page of permanent memory has been overwritten,
+ * it is impossible to interrupt this function; otherwise,
+ * the call would be neither atomic nor location-independent.
+ * The kernel-state portion of a user thread must be
+ * interruptible.
+ *
+ * It may be expensive to forward all requests that might
+ * overwrite permanent memory (vm_write, vm_copy) to
+ * uninterruptible kernel threads. This routine may be
+ * called by interruptible threads; however, success is
+ * not guaranteed -- if the request cannot be performed
+ * atomically and interruptibly, an error indication is
+ * returned.
+ */
+kern_return_t vm_map_copy_overwrite(
+ vm_map_t dst_map,
+ vm_offset_t dst_addr,
+ vm_map_copy_t copy,
+ boolean_t interruptible)
+{
+ vm_size_t size;
+ vm_offset_t start;
+ vm_map_entry_t tmp_entry;
+ vm_map_entry_t entry;
+
+ boolean_t contains_permanent_objects = FALSE;
+
+ interruptible = FALSE; /* XXX */
+
+ /*
+ * Check for null copy object.
+ */
+
+ if (copy == VM_MAP_COPY_NULL)
+ return(KERN_SUCCESS);
+
+ /*
+ * Only works for entry lists at the moment. Will
+ * support page lists LATER.
+ */
+
+ assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
+
+ /*
+ * Currently this routine only handles page-aligned
+ * regions. Eventually, it should handle misalignments
+ * by actually copying pages.
+ */
+
+ if (!page_aligned(copy->offset) ||
+ !page_aligned(copy->size) ||
+ !page_aligned(dst_addr))
+ return(KERN_INVALID_ARGUMENT);
+
+ size = copy->size;
+
+ if (size == 0) {
+ vm_map_copy_discard(copy);
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Verify that the destination is all writeable
+ * initially.
+ */
+start_pass_1:
+ vm_map_lock(dst_map);
+ if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ vm_map_clip_start(dst_map, tmp_entry, dst_addr);
+ for (entry = tmp_entry;;) {
+ vm_size_t sub_size = (entry->vme_end - entry->vme_start);
+ vm_map_entry_t next = entry->vme_next;
+
+ if ( ! (entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * If the entry is in transition, we must wait
+ * for it to exit that state. Anything could happen
+ * when we unlock the map, so start over.
+ */
+ if (entry->in_transition) {
+
+ /*
+ * Say that we are waiting, and wait for entry.
+ */
+ entry->needs_wakeup = TRUE;
+ vm_map_entry_wait(dst_map, FALSE);
+
+ goto start_pass_1;
+ }
+
+ if (size <= sub_size)
+ break;
+
+ if ((next == vm_map_to_entry(dst_map)) ||
+ (next->vme_start != entry->vme_end)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+
+
+ /*
+ * Check for permanent objects in the destination.
+ */
+
+ if ((entry->object.vm_object != VM_OBJECT_NULL) &&
+ !entry->object.vm_object->temporary)
+ contains_permanent_objects = TRUE;
+
+ size -= sub_size;
+ entry = next;
+ }
+
+ /*
+ * If there are permanent objects in the destination, then
+ * the copy cannot be interrupted.
+ */
+
+ if (interruptible && contains_permanent_objects) {
+ vm_map_unlock(dst_map);
+ return(KERN_FAILURE); /* XXX */
+ }
+
+ /*
+ * XXXO If there are no permanent objects in the destination,
+ * XXXO and the destination map entry is not shared,
+ * XXXO then the map entries can be deleted and replaced
+ * XXXO with those from the copy. The following code is the
+ * XXXO basic idea of what to do, but there are lots of annoying
+ * XXXO little details about getting protection and inheritance
+ * XXXO right. Should add protection, inheritance, and sharing checks
+ * XXXO to the above pass and make sure that no wiring is involved.
+ */
+/*
+ * if (!contains_permanent_objects) {
+ *
+ * *
+ * * Run over copy and adjust entries. Steal code
+ * * from vm_map_copyout() to do this.
+ * *
+ *
+ * tmp_entry = tmp_entry->vme_prev;
+ * vm_map_delete(dst_map, dst_addr, dst_addr + copy->size);
+ * vm_map_copy_insert(dst_map, tmp_entry, copy);
+ *
+ * vm_map_unlock(dst_map);
+ * vm_map_copy_discard(copy);
+ * }
+ */
+ /*
+ *
+ * Make a second pass, overwriting the data
+ * At the beginning of each loop iteration,
+ * the next entry to be overwritten is "tmp_entry"
+ * (initially, the value returned from the lookup above),
+ * and the starting address expected in that entry
+ * is "start".
+ */
+
+ start = dst_addr;
+
+ while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
+ vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
+ vm_size_t copy_size = (copy_entry->vme_end - copy_entry->vme_start);
+ vm_object_t object;
+
+ entry = tmp_entry;
+ size = (entry->vme_end - entry->vme_start);
+ /*
+ * Make sure that no holes popped up in the
+ * address map, and that the protection is
+ * still valid, in case the map was unlocked
+ * earlier.
+ */
+
+ if (entry->vme_start != start) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ assert(entry != vm_map_to_entry(dst_map));
+
+ /*
+ * Check protection again
+ */
+
+ if ( ! (entry->protection & VM_PROT_WRITE)) {
+ vm_map_unlock(dst_map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * Adjust to source size first
+ */
+
+ if (copy_size < size) {
+ vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
+ size = copy_size;
+ }
+
+ /*
+ * Adjust to destination size
+ */
+
+ if (size < copy_size) {
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + size);
+ copy_size = size;
+ }
+
+ assert((entry->vme_end - entry->vme_start) == size);
+ assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
+ assert((copy_entry->vme_end - copy_entry->vme_start) == size);
+
+ /*
+ * If the destination contains temporary unshared memory,
+ * we can perform the copy by throwing it away and
+ * installing the source data.
+ */
+
+ object = entry->object.vm_object;
+ if (!entry->is_shared &&
+ ((object == VM_OBJECT_NULL) || object->temporary)) {
+ vm_object_t old_object = entry->object.vm_object;
+ vm_offset_t old_offset = entry->offset;
+
+ entry->object = copy_entry->object;
+ entry->offset = copy_entry->offset;
+ entry->needs_copy = copy_entry->needs_copy;
+ vm_map_entry_reset_wired(dst_map, entry);
+
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+
+ vm_object_pmap_protect(
+ old_object,
+ old_offset,
+ size,
+ dst_map->pmap,
+ tmp_entry->vme_start,
+ VM_PROT_NONE);
+
+ vm_object_deallocate(old_object);
+
+ /*
+ * Set up for the next iteration. The map
+ * has not been unlocked, so the next
+ * address should be at the end of this
+ * entry, and the next map entry should be
+ * the one following it.
+ */
+
+ start = tmp_entry->vme_end;
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ vm_map_version_t version;
+ vm_object_t dst_object = entry->object.vm_object;
+ vm_offset_t dst_offset = entry->offset;
+ kern_return_t r;
+
+ /*
+ * Take an object reference, and record
+ * the map version information so that the
+ * map can be safely unlocked.
+ */
+
+ vm_object_reference(dst_object);
+
+ version.main_timestamp = dst_map->timestamp;
+
+ vm_map_unlock(dst_map);
+
+ /*
+ * Copy as much as possible in one pass
+ */
+
+ copy_size = size;
+ r = vm_fault_copy(
+ copy_entry->object.vm_object,
+ copy_entry->offset,
+ &copy_size,
+ dst_object,
+ dst_offset,
+ dst_map,
+ &version,
+ FALSE /* XXX interruptible */ );
+
+ /*
+ * Release the object reference
+ */
+
+ vm_object_deallocate(dst_object);
+
+ /*
+ * If a hard error occurred, return it now
+ */
+
+ if (r != KERN_SUCCESS)
+ return(r);
+
+ if (copy_size != 0) {
+ /*
+ * Dispose of the copied region
+ */
+
+ vm_map_copy_clip_end(copy, copy_entry,
+ copy_entry->vme_start + copy_size);
+ vm_map_copy_entry_unlink(copy, copy_entry);
+ vm_object_deallocate(copy_entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, copy_entry);
+ }
+
+ /*
+ * Pick up in the destination map where we left off.
+ *
+ * Use the version information to avoid a lookup
+ * in the normal case.
+ */
+
+ start += copy_size;
+ vm_map_lock(dst_map);
+ if ((version.main_timestamp + 1) == dst_map->timestamp) {
+ /* We can safely use saved tmp_entry value */
+
+ vm_map_clip_end(dst_map, tmp_entry, start);
+ tmp_entry = tmp_entry->vme_next;
+ } else {
+ /* Must do lookup of tmp_entry */
+
+ if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
+ vm_map_unlock(dst_map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ vm_map_clip_start(dst_map, tmp_entry, start);
+ }
+ }
+
+ }
+ vm_map_unlock(dst_map);
+
+ /*
+ * Throw away the vm_map_copy object
+ */
+ vm_map_copy_discard(copy);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * Routine: vm_map_copy_insert
+ *
+ * Description:
+ * Link a copy chain ("copy") into a map at the
+ * specified location (after "where").
+ * Side effects:
+ * The copy chain is destroyed.
+ */
+static void
+vm_map_copy_insert(struct vm_map *map, struct vm_map_entry *where,
+ struct vm_map_copy *copy)
+{
+ struct vm_map_entry *entry;
+
+ assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
+
+ for (;;) {
+ entry = vm_map_copy_first_entry(copy);
+
+ if (entry == vm_map_copy_to_entry(copy)) {
+ break;
+ }
+
+ /*
+ * TODO Turn copy maps into their own type so they don't
+ * use any of the tree operations.
+ */
+ vm_map_copy_entry_unlink(copy, entry);
+ vm_map_entry_link(map, where, entry);
+ where = entry;
+ }
+
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t)copy);
+}
+
+/*
+ * Routine: vm_map_copyout
+ *
+ * Description:
+ * Copy out a copy chain ("copy") into newly-allocated
+ * space in the destination map.
+ *
+ * If successful, consumes the copy object.
+ * Otherwise, the caller is responsible for it.
+ */
+kern_return_t vm_map_copyout(
+ vm_map_t dst_map,
+ vm_offset_t *dst_addr, /* OUT */
+ vm_map_copy_t copy)
+{
+ vm_size_t size;
+ vm_size_t adjustment;
+ vm_offset_t start;
+ vm_offset_t vm_copy_start;
+ vm_map_entry_t last;
+ vm_map_entry_t entry;
+ kern_return_t kr;
+
+ /*
+ * Check for null copy object.
+ */
+
+ if (copy == VM_MAP_COPY_NULL) {
+ *dst_addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Check for special copy object, created
+ * by vm_map_copyin_object.
+ */
+
+ if (copy->type == VM_MAP_COPY_OBJECT) {
+ vm_object_t object = copy->cpy_object;
+ vm_size_t offset = copy->offset;
+ vm_size_t tmp_size = copy->size;
+
+ *dst_addr = 0;
+ kr = vm_map_enter(dst_map, dst_addr, tmp_size,
+ (vm_offset_t) 0, TRUE,
+ object, offset, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ return(kr);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
+ return(KERN_SUCCESS);
+ }
+
+ if (copy->type == VM_MAP_COPY_PAGE_LIST)
+ return(vm_map_copyout_page_list(dst_map, dst_addr, copy));
+
+ /*
+ * Find space for the data
+ */
+
+ vm_copy_start = trunc_page(copy->offset);
+ size = round_page(copy->offset + copy->size) - vm_copy_start;
+ last = vm_map_find_entry_anywhere(dst_map, size, 0, FALSE, &start);
+
+ if (last == NULL) {
+ vm_map_unlock(dst_map);
+ return KERN_NO_SPACE;
+ }
+
+ /*
+ * Adjust the addresses in the copy chain, and
+ * reset the region attributes.
+ */
+
+ adjustment = start - vm_copy_start;
+ for (entry = vm_map_copy_first_entry(copy);
+ entry != vm_map_copy_to_entry(copy);
+ entry = entry->vme_next) {
+ entry->vme_start += adjustment;
+ entry->vme_end += adjustment;
+
+ /*
+ * XXX There is no need to update the gap tree here.
+ * See vm_map_copy_insert.
+ */
+
+ entry->inheritance = VM_INHERIT_DEFAULT;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->projected_on = 0;
+
+ /*
+ * If the entry is now wired,
+ * map the pages into the destination map.
+ */
+ if (entry->wired_count != 0) {
+ vm_offset_t va;
+ vm_offset_t offset;
+ vm_object_t object;
+
+ object = entry->object.vm_object;
+ offset = entry->offset;
+ va = entry->vme_start;
+
+ pmap_pageable(dst_map->pmap,
+ entry->vme_start,
+ entry->vme_end,
+ TRUE);
+
+ while (va < entry->vme_end) {
+ vm_page_t m;
+
+ /*
+ * Look up the page in the object.
+ * Assert that the page will be found in the
+ * top object:
+ * either
+ * the object was newly created by
+ * vm_object_copy_slowly, and has
+ * copies of all of the pages from
+ * the source object
+ * or
+ * the object was moved from the old
+ * map entry; because the old map
+ * entry was wired, all of the pages
+ * were in the top-level object.
+ * (XXX not true if we wire pages for
+ * reading)
+ */
+ vm_object_lock(object);
+ vm_object_paging_begin(object);
+
+ m = vm_page_lookup(object, offset);
+ if (m == VM_PAGE_NULL || m->wire_count == 0 ||
+ m->absent)
+ panic("vm_map_copyout: wiring %p", m);
+
+ m->busy = TRUE;
+ vm_object_unlock(object);
+
+ PMAP_ENTER(dst_map->pmap, va, m,
+ entry->protection, TRUE);
+
+ vm_object_lock(object);
+ PAGE_WAKEUP_DONE(m);
+ /* the page is wired, so we don't have to activate */
+ vm_object_paging_end(object);
+ vm_object_unlock(object);
+
+ offset += PAGE_SIZE;
+ va += PAGE_SIZE;
+ }
+ }
+
+
+ }
+
+ /*
+ * Correct the page alignment for the result
+ */
+
+ *dst_addr = start + (copy->offset - vm_copy_start);
+
+ /*
+ * Update the hints and the map size
+ */
+
+ if (dst_map->first_free == last)
+ dst_map->first_free = vm_map_copy_last_entry(copy);
+ SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
+
+ dst_map->size += size;
+
+ /*
+ * Link in the copy
+ */
+
+ vm_map_copy_insert(dst_map, last, copy);
+
+ if (dst_map->wiring_required) {
+ /* Returns with the map read-locked if successful */
+ kr = vm_map_pageable(dst_map, start, start + size,
+ VM_PROT_READ | VM_PROT_WRITE,
+ FALSE, FALSE);
+
+ if (kr != KERN_SUCCESS) {
+ vm_map_unlock(dst_map);
+ return kr;
+ }
+ }
+
+ vm_map_unlock(dst_map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ *
+ * vm_map_copyout_page_list:
+ *
+ * Version of vm_map_copyout() for page list vm map copies.
+ *
+ */
+kern_return_t vm_map_copyout_page_list(
+ vm_map_t dst_map,
+ vm_offset_t *dst_addr, /* OUT */
+ vm_map_copy_t copy)
+{
+ vm_size_t size;
+ vm_offset_t start;
+ vm_offset_t end;
+ vm_offset_t offset;
+ vm_map_entry_t last;
+ vm_object_t object;
+ vm_page_t *page_list, m;
+ vm_map_entry_t entry;
+ vm_offset_t old_last_offset;
+ boolean_t cont_invoked, needs_wakeup = FALSE;
+ kern_return_t result = KERN_SUCCESS;
+ vm_map_copy_t orig_copy;
+ vm_offset_t dst_offset;
+ boolean_t must_wire;
+
+ /*
+ * Make sure the pages are stolen, because we are
+ * going to put them in a new object. Assume that
+ * all pages are identical to first in this regard.
+ */
+
+ page_list = &copy->cpy_page_list[0];
+ if ((*page_list)->tabled)
+ vm_map_copy_steal_pages(copy);
+
+ /*
+ * Find space for the data
+ */
+
+ size = round_page(copy->offset + copy->size) -
+ trunc_page(copy->offset);
+
+ vm_map_lock(dst_map);
+
+ last = vm_map_find_entry_anywhere(dst_map, size, 0, TRUE, &start);
+
+ if (last == NULL) {
+ vm_map_unlock(dst_map);
+ return KERN_NO_SPACE;
+ }
+
+ end = start + size;
+
+ must_wire = dst_map->wiring_required;
+
+ /*
+ * See whether we can avoid creating a new entry (and object) by
+ * extending one of our neighbors. [So far, we only attempt to
+ * extend from below.]
+ *
+ * The code path below here is a bit twisted. If any of the
+ * extension checks fails, we branch to create_object. If
+ * it all works, we fall out the bottom and goto insert_pages.
+ */
+ if (last == vm_map_to_entry(dst_map) ||
+ last->vme_end != start ||
+ last->is_shared != FALSE ||
+ last->is_sub_map != FALSE ||
+ last->inheritance != VM_INHERIT_DEFAULT ||
+ last->protection != VM_PROT_DEFAULT ||
+ last->max_protection != VM_PROT_ALL ||
+ (must_wire ? (last->wired_count == 0)
+ : (last->wired_count != 0))) {
+ goto create_object;
+ }
+
+ /*
+ * If this entry needs an object, make one.
+ */
+ if (last->object.vm_object == VM_OBJECT_NULL) {
+ object = vm_object_allocate(
+ (vm_size_t)(last->vme_end - last->vme_start + size));
+ last->object.vm_object = object;
+ last->offset = 0;
+ vm_object_lock(object);
+ }
+ else {
+ vm_offset_t prev_offset = last->offset;
+ vm_size_t prev_size = start - last->vme_start;
+ vm_size_t new_size;
+
+ /*
+ * This is basically vm_object_coalesce.
+ */
+
+ object = last->object.vm_object;
+ vm_object_lock(object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(object);
+
+ /*
+ * Can't coalesce if pages not mapped to
+ * last may be in use anyway:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * . paging references (pages might be in page-list)
+ */
+
+ if ((object->ref_count > 1) ||
+ object->pager_created ||
+ (object->shadow != VM_OBJECT_NULL) ||
+ (object->copy != VM_OBJECT_NULL) ||
+ (object->paging_in_progress != 0)) {
+ vm_object_unlock(object);
+ goto create_object;
+ }
+
+ /*
+ * Extend the object if necessary. Don't have to call
+ * vm_object_page_remove because the pages aren't mapped,
+ * and vm_page_replace will free up any old ones it encounters.
+ */
+ new_size = prev_offset + prev_size + size;
+ if (new_size > object->size)
+ object->size = new_size;
+ }
+
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ dst_map->size += size;
+ last->vme_end = end;
+ vm_map_gap_update(&dst_map->hdr, last);
+
+ SAVE_HINT(dst_map, last);
+
+ goto insert_pages;
+
+create_object:
+
+ /*
+ * Create object
+ */
+ object = vm_object_allocate(size);
+
+ /*
+ * Create entry
+ */
+
+ entry = vm_map_entry_create(dst_map);
+
+ entry->object.vm_object = object;
+ entry->offset = 0;
+
+ entry->is_shared = FALSE;
+ entry->is_sub_map = FALSE;
+ entry->needs_copy = FALSE;
+ entry->wired_count = 0;
+
+ if (must_wire) {
+ vm_map_entry_inc_wired(dst_map, entry);
+ entry->wired_access = VM_PROT_DEFAULT;
+ } else {
+ entry->wired_access = VM_PROT_NONE;
+ }
+
+ entry->in_transition = TRUE;
+ entry->needs_wakeup = FALSE;
+
+ entry->vme_start = start;
+ entry->vme_end = start + size;
+
+ entry->inheritance = VM_INHERIT_DEFAULT;
+ entry->protection = VM_PROT_DEFAULT;
+ entry->max_protection = VM_PROT_ALL;
+ entry->projected_on = 0;
+
+ vm_object_lock(object);
+
+ /*
+ * Update the hints and the map size
+ */
+ if (dst_map->first_free == last) {
+ dst_map->first_free = entry;
+ }
+ SAVE_HINT(dst_map, entry);
+ dst_map->size += size;
+
+ /*
+ * Link in the entry
+ */
+ vm_map_entry_link(dst_map, last, entry);
+ last = entry;
+
+ /*
+ * Transfer pages into new object.
+ * Scan page list in vm_map_copy.
+ */
+insert_pages:
+ dst_offset = copy->offset & PAGE_MASK;
+ cont_invoked = FALSE;
+ orig_copy = copy;
+ last->in_transition = TRUE;
+ old_last_offset = last->offset
+ + (start - last->vme_start);
+
+ vm_page_lock_queues();
+
+ for (offset = 0; offset < size; offset += PAGE_SIZE) {
+ m = *page_list;
+ assert(m && !m->tabled);
+
+ /*
+ * Must clear busy bit in page before inserting it.
+ * Ok to skip wakeup logic because nobody else
+ * can possibly know about this page.
+ * The page is dirty in its new object.
+ */
+
+ assert(!m->wanted);
+
+ m->busy = FALSE;
+ m->dirty = TRUE;
+ vm_page_replace(m, object, old_last_offset + offset);
+ if (must_wire) {
+ vm_page_wire(m);
+ PMAP_ENTER(dst_map->pmap,
+ last->vme_start + m->offset - last->offset,
+ m, last->protection, TRUE);
+ } else {
+ vm_page_activate(m);
+ }
+
+ *page_list++ = VM_PAGE_NULL;
+ if (--(copy->cpy_npages) == 0 &&
+ vm_map_copy_has_cont(copy)) {
+ vm_map_copy_t new_copy;
+
+ /*
+ * Ok to unlock map because entry is
+ * marked in_transition.
+ */
+ cont_invoked = TRUE;
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+ vm_map_unlock(dst_map);
+ vm_map_copy_invoke_cont(copy, &new_copy, &result);
+
+ if (result == KERN_SUCCESS) {
+
+ /*
+ * If we got back a copy with real pages,
+ * steal them now. Either all of the
+ * pages in the list are tabled or none
+ * of them are; mixtures are not possible.
+ *
+ * Save original copy for consume on
+ * success logic at end of routine.
+ */
+ if (copy != orig_copy)
+ vm_map_copy_discard(copy);
+
+ if ((copy = new_copy) != VM_MAP_COPY_NULL) {
+ page_list = &copy->cpy_page_list[0];
+ if ((*page_list)->tabled)
+ vm_map_copy_steal_pages(copy);
+ }
+ }
+ else {
+ /*
+ * Continuation failed.
+ */
+ vm_map_lock(dst_map);
+ goto error;
+ }
+
+ vm_map_lock(dst_map);
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ }
+ }
+
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ *dst_addr = start + dst_offset;
+
+ /*
+ * Clear the in transition bits. This is easy if we
+ * didn't have a continuation.
+ */
+error:
+ if (!cont_invoked) {
+ /*
+ * We didn't unlock the map, so nobody could
+ * be waiting.
+ */
+ last->in_transition = FALSE;
+ assert(!last->needs_wakeup);
+ needs_wakeup = FALSE;
+ }
+ else {
+ if (!vm_map_lookup_entry(dst_map, start, &entry))
+ panic("vm_map_copyout_page_list: missing entry");
+
+ /*
+ * Clear transition bit for all constituent entries that
+ * were in the original entry. Also check for waiters.
+ */
+ while((entry != vm_map_to_entry(dst_map)) &&
+ (entry->vme_start < end)) {
+ assert(entry->in_transition);
+ entry->in_transition = FALSE;
+ if(entry->needs_wakeup) {
+ entry->needs_wakeup = FALSE;
+ needs_wakeup = TRUE;
+ }
+ entry = entry->vme_next;
+ }
+ }
+
+ if (result != KERN_SUCCESS)
+ vm_map_delete(dst_map, start, end);
+
+ vm_map_unlock(dst_map);
+
+ if (needs_wakeup)
+ vm_map_entry_wakeup(dst_map);
+
+ /*
+ * Consume on success logic.
+ */
+ if (copy != orig_copy) {
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
+ }
+ if (result == KERN_SUCCESS) {
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) orig_copy);
+ }
+
+ return(result);
+}
+
+/*
+ * Routine: vm_map_copyin
+ *
+ * Description:
+ * Copy the specified region (src_addr, len) from the
+ * source address space (src_map), possibly removing
+ * the region from the source address space (src_destroy).
+ *
+ * Returns:
+ * A vm_map_copy_t object (copy_result), suitable for
+ * insertion into another address space (using vm_map_copyout),
+ * copying over another address space region (using
+ * vm_map_copy_overwrite). If the copy is unused, it
+ * should be destroyed (using vm_map_copy_discard).
+ *
+ * In/out conditions:
+ * The source map should not be locked on entry.
+ */
+kern_return_t vm_map_copyin(
+ vm_map_t src_map,
+ vm_offset_t src_addr,
+ vm_size_t len,
+ boolean_t src_destroy,
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ vm_map_entry_t tmp_entry; /* Result of last map lookup --
+ * in multi-level lookup, this
+ * entry contains the actual
+ * vm_object/offset.
+ */
+
+ vm_offset_t src_start; /* Start of current entry --
+ * where copy is taking place now
+ */
+ vm_offset_t src_end; /* End of entire region to be
+ * copied */
+
+ vm_map_copy_t copy; /* Resulting copy */
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+
+ if ((src_addr + len) <= src_addr) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Compute start and end of region
+ */
+
+ src_start = trunc_page(src_addr);
+ src_end = round_page(src_addr + len);
+
+ /*
+ * XXX VM maps shouldn't end at maximum address
+ */
+
+ if (src_end == 0) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Allocate a header element for the list.
+ *
+ * Use the start and end in the header to
+ * remember the endpoints prior to rounding.
+ */
+
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
+ copy->type = VM_MAP_COPY_ENTRY_LIST;
+ copy->cpy_hdr.nentries = 0;
+ rbtree_init(&copy->cpy_hdr.tree);
+ rbtree_init(&copy->cpy_hdr.gap_tree);
+
+ copy->offset = src_addr;
+ copy->size = len;
+
+#define RETURN(x) \
+ MACRO_BEGIN \
+ vm_map_unlock(src_map); \
+ vm_map_copy_discard(copy); \
+ MACRO_RETURN(x); \
+ MACRO_END
+
+ /*
+ * Find the beginning of the region.
+ */
+
+ vm_map_lock(src_map);
+
+ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+ vm_map_clip_start(src_map, tmp_entry, src_start);
+
+ /*
+ * Go through entries until we get to the end.
+ */
+
+ while (TRUE) {
+ vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
+ vm_size_t src_size; /* Size of source
+ * map entry (in both
+ * maps)
+ */
+
+ vm_object_t src_object; /* Object to copy */
+ vm_offset_t src_offset;
+
+ boolean_t src_needs_copy; /* Should source map
+ * be made read-only
+ * for copy-on-write?
+ */
+
+ vm_map_entry_t new_entry; /* Map entry for copy */
+ boolean_t new_entry_needs_copy; /* Will new entry be COW? */
+
+ boolean_t was_wired; /* Was source wired? */
+ vm_map_version_t version; /* Version before locks
+ * dropped to make copy
+ */
+
+ /*
+ * Verify that the region can be read.
+ */
+
+ if (! (src_entry->protection & VM_PROT_READ))
+ RETURN(KERN_PROTECTION_FAILURE);
+
+ /*
+ * Clip against the endpoints of the entire region.
+ */
+
+ vm_map_clip_end(src_map, src_entry, src_end);
+
+ src_size = src_entry->vme_end - src_start;
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset;
+ was_wired = (src_entry->wired_count != 0);
+
+ /*
+ * Create a new address map entry to
+ * hold the result. Fill in the fields from
+ * the appropriate source entries.
+ */
+
+ new_entry = vm_map_copy_entry_create(copy);
+ vm_map_entry_copy(new_entry, src_entry);
+
+ /*
+ * Attempt non-blocking copy-on-write optimizations.
+ */
+
+ if (src_destroy &&
+ (src_object == VM_OBJECT_NULL ||
+ (src_object->temporary && !src_object->use_shared_copy)))
+ {
+ /*
+ * If we are destroying the source, and the object
+ * is temporary, and not shared writable,
+ * we can move the object reference
+ * from the source to the copy. The copy is
+ * copy-on-write only if the source is.
+ * We make another reference to the object, because
+ * destroying the source entry will deallocate it.
+ */
+ vm_object_reference(src_object);
+
+ /*
+ * Copy is always unwired. vm_map_copy_entry
+ * set its wired count to zero.
+ */
+
+ goto CopySuccessful;
+ }
+
+ if (!was_wired &&
+ vm_object_copy_temporary(
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !tmp_entry->needs_copy) {
+ vm_object_pmap_protect(
+ src_object,
+ src_offset,
+ src_size,
+ (src_entry->is_shared ? PMAP_NULL
+ : src_map->pmap),
+ src_entry->vme_start,
+ src_entry->protection &
+ ~VM_PROT_WRITE);
+
+ tmp_entry->needs_copy = TRUE;
+ }
+
+ /*
+ * The map has never been unlocked, so it's safe to
+ * move to the next entry rather than doing another
+ * lookup.
+ */
+
+ goto CopySuccessful;
+ }
+
+ new_entry->needs_copy = FALSE;
+
+ /*
+ * Take an object reference, so that we may
+ * release the map lock(s).
+ */
+
+ assert(src_object != VM_OBJECT_NULL);
+ vm_object_reference(src_object);
+
+ /*
+ * Record the timestamp for later verification.
+ * Unlock the map.
+ */
+
+ version.main_timestamp = src_map->timestamp;
+ vm_map_unlock(src_map);
+
+ /*
+ * Perform the copy
+ */
+
+ if (was_wired) {
+ vm_object_lock(src_object);
+ (void) vm_object_copy_slowly(
+ src_object,
+ src_offset,
+ src_size,
+ FALSE,
+ &new_entry->object.vm_object);
+ new_entry->offset = 0;
+ new_entry->needs_copy = FALSE;
+ } else {
+ kern_return_t result;
+
+ result = vm_object_copy_strategically(src_object,
+ src_offset,
+ src_size,
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &new_entry_needs_copy);
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+
+ if (result != KERN_SUCCESS) {
+ vm_map_copy_entry_dispose(copy, new_entry);
+
+ vm_map_lock(src_map);
+ RETURN(result);
+ }
+
+ }
+
+ /*
+ * Throw away the extra reference
+ */
+
+ vm_object_deallocate(src_object);
+
+ /*
+ * Verify that the map has not substantially
+ * changed while the copy was being made.
+ */
+
+ vm_map_lock(src_map); /* Increments timestamp once! */
+
+ if ((version.main_timestamp + 1) == src_map->timestamp)
+ goto CopySuccessful;
+
+ /*
+ * Simple version comparison failed.
+ *
+ * Retry the lookup and verify that the
+ * same object/offset are still present.
+ *
+ * [Note: a memory manager that colludes with
+ * the calling task can detect that we have
+ * cheated. While the map was unlocked, the
+ * mapping could have been changed and restored.]
+ */
+
+ if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
+ vm_map_copy_entry_dispose(copy, new_entry);
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ src_entry = tmp_entry;
+ vm_map_clip_start(src_map, src_entry, src_start);
+
+ if ((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE)
+ goto VerificationFailed;
+
+ if (src_entry->vme_end < new_entry->vme_end)
+ src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
+
+ if ((src_entry->object.vm_object != src_object) ||
+ (src_entry->offset != src_offset) ) {
+
+ /*
+ * Verification failed.
+ *
+ * Start over with this top-level entry.
+ */
+
+ VerificationFailed: ;
+
+ vm_object_deallocate(new_entry->object.vm_object);
+ vm_map_copy_entry_dispose(copy, new_entry);
+ tmp_entry = src_entry;
+ continue;
+ }
+
+ /*
+ * Verification succeeded.
+ */
+
+ CopySuccessful: ;
+
+ /*
+ * Link in the new copy entry.
+ */
+
+ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
+ new_entry);
+
+ /*
+ * Determine whether the entire region
+ * has been copied.
+ */
+ src_start = new_entry->vme_end;
+ if ((src_start >= src_end) && (src_end != 0))
+ break;
+
+ /*
+ * Verify that there are no gaps in the region
+ */
+
+ tmp_entry = src_entry->vme_next;
+ if (tmp_entry->vme_start != src_start)
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * If the source should be destroyed, do it now, since the
+ * copy was successful.
+ */
+ if (src_destroy)
+ (void) vm_map_delete(src_map, trunc_page(src_addr), src_end);
+
+ vm_map_unlock(src_map);
+
+ *copy_result = copy;
+ return(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_copyin_object:
+ *
+ * Create a copy object from an object.
+ * Our caller donates an object reference.
+ */
+
+kern_return_t vm_map_copyin_object(
+ vm_object_t object,
+ vm_offset_t offset, /* offset of region in object */
+ vm_size_t size, /* size of region in object */
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ vm_map_copy_t copy; /* Resulting copy */
+
+ /*
+ * We drop the object into a special copy object
+ * that contains the object directly. These copy objects
+ * are distinguished by links.
+ */
+
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
+ vm_map_copy_first_entry(copy) =
+ vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL;
+ copy->type = VM_MAP_COPY_OBJECT;
+ copy->cpy_object = object;
+ copy->offset = offset;
+ copy->size = size;
+
+ *copy_result = copy;
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_copyin_page_list_cont:
+ *
+ * Continuation routine for vm_map_copyin_page_list.
+ *
+ * If vm_map_copyin_page_list can't fit the entire vm range
+ * into a single page list object, it creates a continuation.
+ * When the target of the operation has used the pages in the
+ * initial page list, it invokes the continuation, which calls
+ * this routine. If an error happens, the continuation is aborted
+ * (abort arg to this routine is TRUE). To avoid deadlocks, the
+ * pages are discarded from the initial page list before invoking
+ * the continuation.
+ *
+ * NOTE: This is not the same sort of continuation used by
+ * the scheduler.
+ */
+
+static kern_return_t vm_map_copyin_page_list_cont(
+ vm_map_copyin_args_t cont_args,
+ vm_map_copy_t *copy_result) /* OUT */
+{
+ kern_return_t result = 0; /* '=0' to quiet gcc warnings */
+ boolean_t do_abort, src_destroy, src_destroy_only;
+
+ /*
+ * Check for cases that only require memory destruction.
+ */
+ do_abort = (copy_result == (vm_map_copy_t *) 0);
+ src_destroy = (cont_args->destroy_len != (vm_size_t) 0);
+ src_destroy_only = (cont_args->src_len == (vm_size_t) 0);
+
+ if (do_abort || src_destroy_only) {
+ if (src_destroy)
+ result = vm_map_remove(cont_args->map,
+ cont_args->destroy_addr,
+ cont_args->destroy_addr + cont_args->destroy_len);
+ if (!do_abort)
+ *copy_result = VM_MAP_COPY_NULL;
+ }
+ else {
+ result = vm_map_copyin_page_list(cont_args->map,
+ cont_args->src_addr, cont_args->src_len, src_destroy,
+ cont_args->steal_pages, copy_result, TRUE);
+
+ if (src_destroy && !cont_args->steal_pages &&
+ vm_map_copy_has_cont(*copy_result)) {
+ vm_map_copyin_args_t new_args;
+ /*
+ * Transfer old destroy info.
+ */
+ new_args = (vm_map_copyin_args_t)
+ (*copy_result)->cpy_cont_args;
+ new_args->destroy_addr = cont_args->destroy_addr;
+ new_args->destroy_len = cont_args->destroy_len;
+ }
+ }
+
+ vm_map_deallocate(cont_args->map);
+ kfree((vm_offset_t)cont_args, sizeof(vm_map_copyin_args_data_t));
+
+ return(result);
+}
+
+/*
+ * vm_map_copyin_page_list:
+ *
+ * This is a variant of vm_map_copyin that copies in a list of pages.
+ * If steal_pages is TRUE, the pages are only in the returned list.
+ * If steal_pages is FALSE, the pages are busy and still in their
+ * objects. A continuation may be returned if not all the pages fit:
+ * the recipient of this copy_result must be prepared to deal with it.
+ */
+
+kern_return_t vm_map_copyin_page_list(
+ vm_map_t src_map,
+ vm_offset_t src_addr,
+ vm_size_t len,
+ boolean_t src_destroy,
+ boolean_t steal_pages,
+ vm_map_copy_t *copy_result, /* OUT */
+ boolean_t is_cont)
+{
+ vm_map_entry_t src_entry;
+ vm_page_t m;
+ vm_offset_t src_start;
+ vm_offset_t src_end;
+ vm_size_t src_size;
+ vm_object_t src_object;
+ vm_offset_t src_offset;
+ vm_offset_t src_last_offset;
+ vm_map_copy_t copy; /* Resulting copy */
+ kern_return_t result = KERN_SUCCESS;
+ boolean_t need_map_lookup;
+ vm_map_copyin_args_t cont_args;
+
+ /*
+ * If steal_pages is FALSE, this leaves busy pages in
+ * the object. A continuation must be used if src_destroy
+ * is true in this case (!steal_pages && src_destroy).
+ *
+ * XXX Still have a more general problem of what happens
+ * XXX if the same page occurs twice in a list. Deadlock
+ * XXX can happen if vm_fault_page was called. A
+ * XXX possible solution is to use a continuation if vm_fault_page
+ * XXX is called and we cross a map entry boundary.
+ */
+
+ /*
+ * Check for copies of zero bytes.
+ */
+
+ if (len == 0) {
+ *copy_result = VM_MAP_COPY_NULL;
+ return(KERN_SUCCESS);
+ }
+
+ /*
+ * Check that the end address doesn't overflow
+ */
+
+ if ((src_addr + len) <= src_addr) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Compute start and end of region
+ */
+
+ src_start = trunc_page(src_addr);
+ src_end = round_page(src_addr + len);
+
+ /*
+ * XXX VM maps shouldn't end at maximum address
+ */
+
+ if (src_end == 0) {
+ return KERN_INVALID_ADDRESS;
+ }
+
+ /*
+ * Allocate a header element for the page list.
+ *
+ * Record original offset and size, as caller may not
+ * be page-aligned.
+ */
+
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
+ copy->type = VM_MAP_COPY_PAGE_LIST;
+ copy->cpy_npages = 0;
+ copy->offset = src_addr;
+ copy->size = len;
+ copy->cpy_cont = ((kern_return_t (*)()) 0);
+ copy->cpy_cont_args = VM_MAP_COPYIN_ARGS_NULL;
+
+ /*
+ * Find the beginning of the region.
+ */
+
+do_map_lookup:
+
+ vm_map_lock(src_map);
+
+ if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
+ result = KERN_INVALID_ADDRESS;
+ goto error;
+ }
+ need_map_lookup = FALSE;
+
+ /*
+ * Go through entries until we get to the end.
+ */
+
+ while (TRUE) {
+
+ if (! (src_entry->protection & VM_PROT_READ)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto error;
+ }
+
+ if (src_end > src_entry->vme_end)
+ src_size = src_entry->vme_end - src_start;
+ else
+ src_size = src_end - src_start;
+
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset +
+ (src_start - src_entry->vme_start);
+
+ /*
+ * If src_object is NULL, allocate it now;
+ * we're going to fault on it shortly.
+ */
+ if (src_object == VM_OBJECT_NULL) {
+ src_object = vm_object_allocate((vm_size_t)
+ src_entry->vme_end -
+ src_entry->vme_start);
+ src_entry->object.vm_object = src_object;
+ }
+
+ /*
+ * Iterate over pages. Fault in ones that aren't present.
+ */
+ src_last_offset = src_offset + src_size;
+ for (; (src_offset < src_last_offset && !need_map_lookup);
+ src_offset += PAGE_SIZE, src_start += PAGE_SIZE) {
+
+ if (copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
+make_continuation:
+ /*
+ * At this point we have the max number of
+ * pages busy for this thread that we're
+ * willing to allow. Stop here and record
+ * arguments for the remainder. Note:
+ * this means that this routine isn't atomic,
+ * but that's the breaks. Note that only
+ * the first vm_map_copy_t that comes back
+ * from this routine has the right offset
+ * and size; those from continuations are
+ * page rounded, and short by the amount
+ * already done.
+ *
+ * Reset src_end so the src_destroy
+ * code at the bottom doesn't do
+ * something stupid.
+ */
+
+ cont_args = (vm_map_copyin_args_t)
+ kalloc(sizeof(vm_map_copyin_args_data_t));
+ cont_args->map = src_map;
+ vm_map_reference(src_map);
+ cont_args->src_addr = src_start;
+ cont_args->src_len = len - (src_start - src_addr);
+ if (src_destroy) {
+ cont_args->destroy_addr = cont_args->src_addr;
+ cont_args->destroy_len = cont_args->src_len;
+ }
+ else {
+ cont_args->destroy_addr = (vm_offset_t) 0;
+ cont_args->destroy_len = (vm_offset_t) 0;
+ }
+ cont_args->steal_pages = steal_pages;
+
+ copy->cpy_cont_args = cont_args;
+ copy->cpy_cont = vm_map_copyin_page_list_cont;
+
+ src_end = src_start;
+ vm_map_clip_end(src_map, src_entry, src_end);
+ break;
+ }
+
+ /*
+ * Try to find the page of data.
+ */
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ if (((m = vm_page_lookup(src_object, src_offset)) !=
+ VM_PAGE_NULL) && !m->busy && !m->fictitious &&
+ !m->absent && !m->error) {
+
+ /*
+ * This is the page. Mark it busy
+ * and keep the paging reference on
+ * the object whilst we do our thing.
+ */
+ m->busy = TRUE;
+
+ /*
+ * Also write-protect the page, so
+ * that the map`s owner cannot change
+ * the data. The busy bit will prevent
+ * faults on the page from succeeding
+ * until the copy is released; after
+ * that, the page can be re-entered
+ * as writable, since we didn`t alter
+ * the map entry. This scheme is a
+ * cheap copy-on-write.
+ *
+ * Don`t forget the protection and
+ * the page_lock value!
+ *
+ * If the source is being destroyed
+ * AND not shared writable, we don`t
+ * have to protect the page, since
+ * we will destroy the (only)
+ * writable mapping later.
+ */
+ if (!src_destroy ||
+ src_object->use_shared_copy)
+ {
+ pmap_page_protect(m->phys_addr,
+ src_entry->protection
+ & ~m->page_lock
+ & ~VM_PROT_WRITE);
+ }
+
+ }
+ else {
+ vm_prot_t result_prot;
+ vm_page_t top_page;
+ kern_return_t kr;
+
+ /*
+ * Have to fault the page in; must
+ * unlock the map to do so. While
+ * the map is unlocked, anything
+ * can happen, we must lookup the
+ * map entry before continuing.
+ */
+ vm_map_unlock(src_map);
+ need_map_lookup = TRUE;
+retry:
+ result_prot = VM_PROT_READ;
+
+ kr = vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, FALSE,
+ &result_prot, &m, &top_page,
+ FALSE, (void (*)()) 0);
+ /*
+ * Cope with what happened.
+ */
+ switch (kr) {
+ case VM_FAULT_SUCCESS:
+ break;
+ case VM_FAULT_INTERRUPTED: /* ??? */
+ case VM_FAULT_RETRY:
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ vm_object_lock(src_object);
+ vm_object_paging_begin(src_object);
+ goto retry;
+ case VM_FAULT_MEMORY_ERROR:
+ /*
+ * Something broke. If this
+ * is a continuation, return
+ * a partial result if possible,
+ * else fail the whole thing.
+ * In the continuation case, the
+ * next continuation call will
+ * get this error if it persists.
+ */
+ vm_map_lock(src_map);
+ if (is_cont &&
+ copy->cpy_npages != 0)
+ goto make_continuation;
+
+ result = KERN_MEMORY_ERROR;
+ goto error;
+ }
+
+ if (top_page != VM_PAGE_NULL) {
+ vm_object_lock(src_object);
+ VM_PAGE_FREE(top_page);
+ vm_object_paging_end(src_object);
+ vm_object_unlock(src_object);
+ }
+
+ /*
+ * We do not need to write-protect
+ * the page, since it cannot have
+ * been in the pmap (and we did not
+ * enter it above). The busy bit
+ * will protect the page from being
+ * entered as writable until it is
+ * unlocked.
+ */
+
+ }
+
+ /*
+ * The page is busy, its object is locked, and
+ * we have a paging reference on it. Either
+ * the map is locked, or need_map_lookup is
+ * TRUE.
+ *
+ * Put the page in the page list.
+ */
+ copy->cpy_page_list[copy->cpy_npages++] = m;
+ vm_object_unlock(m->object);
+ }
+
+ /*
+ * DETERMINE whether the entire region
+ * has been copied.
+ */
+ if (src_start >= src_end && src_end != 0) {
+ if (need_map_lookup)
+ vm_map_lock(src_map);
+ break;
+ }
+
+ /*
+ * If need_map_lookup is TRUE, have to start over with
+ * another map lookup. Note that we dropped the map
+ * lock (to call vm_fault_page) above only in this case.
+ */
+ if (need_map_lookup)
+ goto do_map_lookup;
+
+ /*
+ * Verify that there are no gaps in the region
+ */
+
+ src_start = src_entry->vme_end;
+ src_entry = src_entry->vme_next;
+ if (src_entry->vme_start != src_start) {
+ result = KERN_INVALID_ADDRESS;
+ goto error;
+ }
+ }
+
+ /*
+ * If steal_pages is true, make sure all
+ * pages in the copy are not in any object
+ * We try to remove them from the original
+ * object, but we may have to copy them.
+ *
+ * At this point every page in the list is busy
+ * and holds a paging reference to its object.
+ * When we're done stealing, every page is busy,
+ * and in no object (m->tabled == FALSE).
+ */
+ src_start = trunc_page(src_addr);
+ if (steal_pages) {
+ int i;
+ vm_offset_t unwire_end;
+
+ unwire_end = src_start;
+ for (i = 0; i < copy->cpy_npages; i++) {
+
+ /*
+ * Remove the page from its object if it
+ * can be stolen. It can be stolen if:
+ *
+ * (1) The source is being destroyed,
+ * the object is temporary, and
+ * not shared.
+ * (2) The page is not precious.
+ *
+ * The not shared check consists of two
+ * parts: (a) there are no objects that
+ * shadow this object. (b) it is not the
+ * object in any shared map entries (i.e.,
+ * use_shared_copy is not set).
+ *
+ * The first check (a) means that we can't
+ * steal pages from objects that are not
+ * at the top of their shadow chains. This
+ * should not be a frequent occurrence.
+ *
+ * Stealing wired pages requires telling the
+ * pmap module to let go of them.
+ *
+ * NOTE: stealing clean pages from objects
+ * whose mappings survive requires a call to
+ * the pmap module. Maybe later.
+ */
+ m = copy->cpy_page_list[i];
+ src_object = m->object;
+ vm_object_lock(src_object);
+
+ if (src_destroy &&
+ src_object->temporary &&
+ (!src_object->shadowed) &&
+ (!src_object->use_shared_copy) &&
+ !m->precious) {
+ vm_offset_t page_vaddr;
+
+ page_vaddr = src_start + (i * PAGE_SIZE);
+ if (m->wire_count > 0) {
+
+ assert(m->wire_count == 1);
+ /*
+ * In order to steal a wired
+ * page, we have to unwire it
+ * first. We do this inline
+ * here because we have the page.
+ *
+ * Step 1: Unwire the map entry.
+ * Also tell the pmap module
+ * that this piece of the
+ * pmap is pageable.
+ */
+ vm_object_unlock(src_object);
+ if (page_vaddr >= unwire_end) {
+ if (!vm_map_lookup_entry(src_map,
+ page_vaddr, &src_entry))
+ panic("vm_map_copyin_page_list: missing wired map entry");
+
+ vm_map_clip_start(src_map, src_entry,
+ page_vaddr);
+ vm_map_clip_end(src_map, src_entry,
+ src_start + src_size);
+
+ assert(src_entry->wired_count > 0);
+ vm_map_entry_reset_wired(src_map, src_entry);
+ unwire_end = src_entry->vme_end;
+ pmap_pageable(vm_map_pmap(src_map),
+ page_vaddr, unwire_end, TRUE);
+ }
+
+ /*
+ * Step 2: Unwire the page.
+ * pmap_remove handles this for us.
+ */
+ vm_object_lock(src_object);
+ }
+
+ /*
+ * Don't need to remove the mapping;
+ * vm_map_delete will handle it.
+ *
+ * Steal the page. Setting the wire count
+ * to zero is vm_page_unwire without
+ * activating the page.
+ */
+ vm_page_lock_queues();
+ vm_page_remove(m);
+ if (m->wire_count > 0) {
+ m->wire_count = 0;
+ vm_page_wire_count--;
+ } else {
+ VM_PAGE_QUEUES_REMOVE(m);
+ }
+ vm_page_unlock_queues();
+ }
+ else {
+ /*
+ * Have to copy this page. Have to
+ * unlock the map while copying,
+ * hence no further page stealing.
+ * Hence just copy all the pages.
+ * Unlock the map while copying;
+ * This means no further page stealing.
+ */
+ vm_object_unlock(src_object);
+ vm_map_unlock(src_map);
+
+ vm_map_copy_steal_pages(copy);
+
+ vm_map_lock(src_map);
+ break;
+ }
+
+ vm_object_paging_end(src_object);
+ vm_object_unlock(src_object);
+ }
+
+ /*
+ * If the source should be destroyed, do it now, since the
+ * copy was successful.
+ */
+
+ if (src_destroy) {
+ (void) vm_map_delete(src_map, src_start, src_end);
+ }
+ }
+ else {
+ /*
+ * !steal_pages leaves busy pages in the map.
+ * This will cause src_destroy to hang. Use
+ * a continuation to prevent this.
+ */
+ if (src_destroy && !vm_map_copy_has_cont(copy)) {
+ cont_args = (vm_map_copyin_args_t)
+ kalloc(sizeof(vm_map_copyin_args_data_t));
+ vm_map_reference(src_map);
+ cont_args->map = src_map;
+ cont_args->src_addr = (vm_offset_t) 0;
+ cont_args->src_len = (vm_size_t) 0;
+ cont_args->destroy_addr = src_start;
+ cont_args->destroy_len = src_end - src_start;
+ cont_args->steal_pages = FALSE;
+
+ copy->cpy_cont_args = cont_args;
+ copy->cpy_cont = vm_map_copyin_page_list_cont;
+ }
+
+ }
+
+ vm_map_unlock(src_map);
+
+ *copy_result = copy;
+ return(result);
+
+error:
+ vm_map_unlock(src_map);
+ vm_map_copy_discard(copy);
+ return(result);
+}
+
+/*
+ * vm_map_fork:
+ *
+ * Create and return a new map based on the old
+ * map, according to the inheritance values on the
+ * regions in that map.
+ *
+ * The source map must not be locked.
+ */
+vm_map_t vm_map_fork(vm_map_t old_map)
+{
+ vm_map_t new_map;
+ vm_map_entry_t old_entry;
+ vm_map_entry_t new_entry;
+ pmap_t new_pmap = pmap_create((vm_size_t) 0);
+ vm_size_t new_size = 0;
+ vm_size_t entry_size;
+ vm_object_t object;
+
+ if (new_pmap == PMAP_NULL)
+ return VM_MAP_NULL;
+
+ vm_map_lock(old_map);
+
+ new_map = vm_map_create(new_pmap,
+ old_map->min_offset,
+ old_map->max_offset);
+ if (new_map == VM_MAP_NULL) {
+ pmap_destroy(new_pmap);
+ return VM_MAP_NULL;
+ }
+
+ for (
+ old_entry = vm_map_first_entry(old_map);
+ old_entry != vm_map_to_entry(old_map);
+ ) {
+ if (old_entry->is_sub_map)
+ panic("vm_map_fork: encountered a submap");
+
+ entry_size = (old_entry->vme_end - old_entry->vme_start);
+
+ switch (old_entry->inheritance) {
+ case VM_INHERIT_NONE:
+ break;
+
+ case VM_INHERIT_SHARE:
+ /*
+ * New sharing code. New map entry
+ * references original object. Temporary
+ * objects use asynchronous copy algorithm for
+ * future copies. First make sure we have
+ * the right object. If we need a shadow,
+ * or someone else already has one, then
+ * make a new shadow and share it.
+ */
+
+ object = old_entry->object.vm_object;
+ if (object == VM_OBJECT_NULL) {
+ object = vm_object_allocate(
+ (vm_size_t)(old_entry->vme_end -
+ old_entry->vme_start));
+ old_entry->offset = 0;
+ old_entry->object.vm_object = object;
+ assert(!old_entry->needs_copy);
+ }
+ else if (old_entry->needs_copy || object->shadowed ||
+ (object->temporary && !old_entry->is_shared &&
+ object->size > (vm_size_t)(old_entry->vme_end -
+ old_entry->vme_start))) {
+
+ assert(object->temporary);
+ assert(!(object->shadowed && old_entry->is_shared));
+ vm_object_shadow(
+ &old_entry->object.vm_object,
+ &old_entry->offset,
+ (vm_size_t) (old_entry->vme_end -
+ old_entry->vme_start));
+
+ /*
+ * If we're making a shadow for other than
+ * copy on write reasons, then we have
+ * to remove write permission.
+ */
+
+ if (!old_entry->needs_copy &&
+ (old_entry->protection & VM_PROT_WRITE)) {
+ pmap_protect(vm_map_pmap(old_map),
+ old_entry->vme_start,
+ old_entry->vme_end,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+ }
+ old_entry->needs_copy = FALSE;
+ object = old_entry->object.vm_object;
+ }
+
+ /*
+ * Set use_shared_copy to indicate that
+ * object must use shared (delayed) copy-on
+ * write. This is ignored for permanent objects.
+ * Bump the reference count for the new entry
+ */
+
+ vm_object_lock(object);
+ object->use_shared_copy = TRUE;
+ object->ref_count++;
+ vm_object_unlock(object);
+
+ new_entry = vm_map_entry_create(new_map);
+
+ if (old_entry->projected_on != 0) {
+ /*
+ * If entry is projected buffer, clone the
+ * entry exactly.
+ */
+
+ vm_map_entry_copy_full(new_entry, old_entry);
+
+ } else {
+ /*
+ * Clone the entry, using object ref from above.
+ * Mark both entries as shared.
+ */
+
+ vm_map_entry_copy(new_entry, old_entry);
+ old_entry->is_shared = TRUE;
+ new_entry->is_shared = TRUE;
+ }
+
+ /*
+ * Insert the entry into the new map -- we
+ * know we're inserting at the end of the new
+ * map.
+ */
+
+ vm_map_entry_link(
+ new_map,
+ vm_map_last_entry(new_map),
+ new_entry);
+
+ /*
+ * Update the physical map
+ */
+
+ pmap_copy(new_map->pmap, old_map->pmap,
+ new_entry->vme_start,
+ entry_size,
+ old_entry->vme_start);
+
+ new_size += entry_size;
+ break;
+
+ case VM_INHERIT_COPY:
+ if (old_entry->wired_count == 0) {
+ boolean_t src_needs_copy;
+ boolean_t new_entry_needs_copy;
+
+ new_entry = vm_map_entry_create(new_map);
+ vm_map_entry_copy(new_entry, old_entry);
+
+ if (vm_object_copy_temporary(
+ &new_entry->object.vm_object,
+ &new_entry->offset,
+ &src_needs_copy,
+ &new_entry_needs_copy)) {
+
+ /*
+ * Handle copy-on-write obligations
+ */
+
+ if (src_needs_copy && !old_entry->needs_copy) {
+ vm_object_pmap_protect(
+ old_entry->object.vm_object,
+ old_entry->offset,
+ entry_size,
+ (old_entry->is_shared ?
+ PMAP_NULL :
+ old_map->pmap),
+ old_entry->vme_start,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+
+ old_entry->needs_copy = TRUE;
+ }
+
+ new_entry->needs_copy = new_entry_needs_copy;
+
+ /*
+ * Insert the entry at the end
+ * of the map.
+ */
+
+ vm_map_entry_link(new_map,
+ vm_map_last_entry(new_map),
+ new_entry);
+
+
+ new_size += entry_size;
+ break;
+ }
+
+ vm_map_entry_dispose(new_map, new_entry);
+ }
+
+ /* INNER BLOCK (copy cannot be optimized) */ {
+
+ vm_offset_t start = old_entry->vme_start;
+ vm_map_copy_t copy;
+ vm_map_entry_t last = vm_map_last_entry(new_map);
+
+ vm_map_unlock(old_map);
+ if (vm_map_copyin(old_map,
+ start,
+ entry_size,
+ FALSE,
+ &copy)
+ != KERN_SUCCESS) {
+ vm_map_lock(old_map);
+ if (!vm_map_lookup_entry(old_map, start, &last))
+ last = last->vme_next;
+ old_entry = last;
+ /*
+ * For some error returns, want to
+ * skip to the next element.
+ */
+
+ continue;
+ }
+
+ /*
+ * Insert the copy into the new map
+ */
+
+ vm_map_copy_insert(new_map, last, copy);
+ new_size += entry_size;
+
+ /*
+ * Pick up the traversal at the end of
+ * the copied region.
+ */
+
+ vm_map_lock(old_map);
+ start += entry_size;
+ if (!vm_map_lookup_entry(old_map, start, &last))
+ last = last->vme_next;
+ else
+ vm_map_clip_start(old_map, last, start);
+ old_entry = last;
+
+ continue;
+ /* INNER BLOCK (copy cannot be optimized) */ }
+ }
+ old_entry = old_entry->vme_next;
+ }
+
+ new_map->size = new_size;
+ vm_map_unlock(old_map);
+
+ return(new_map);
+}
+
+/*
+ * vm_map_lookup:
+ *
+ * Finds the VM object, offset, and
+ * protection for a given virtual address in the
+ * specified map, assuming a page fault of the
+ * type specified.
+ *
+ * Returns the (object, offset, protection) for
+ * this address, whether it is wired down, and whether
+ * this map has the only reference to the data in question.
+ * In order to later verify this lookup, a "version"
+ * is returned.
+ *
+ * The map should not be locked; it will not be
+ * locked on exit. In order to guarantee the
+ * existence of the returned object, it is returned
+ * locked.
+ *
+ * If a lookup is requested with "write protection"
+ * specified, the map may be changed to perform virtual
+ * copying operations, although the data referenced will
+ * remain the same.
+ */
+kern_return_t vm_map_lookup(
+ vm_map_t *var_map, /* IN/OUT */
+ vm_offset_t vaddr,
+ vm_prot_t fault_type,
+
+ vm_map_version_t *out_version, /* OUT */
+ vm_object_t *object, /* OUT */
+ vm_offset_t *offset, /* OUT */
+ vm_prot_t *out_prot, /* OUT */
+ boolean_t *wired) /* OUT */
+{
+ vm_map_entry_t entry;
+ vm_map_t map = *var_map;
+ vm_prot_t prot;
+
+ RetryLookup: ;
+
+ /*
+ * Lookup the faulting address.
+ */
+
+ vm_map_lock_read(map);
+
+#define RETURN(why) \
+ { \
+ vm_map_unlock_read(map); \
+ return(why); \
+ }
+
+ /*
+ * If the map has an interesting hint, try it before calling
+ * full blown lookup routine.
+ */
+
+ simple_lock(&map->hint_lock);
+ entry = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if ((entry == vm_map_to_entry(map)) ||
+ (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
+ vm_map_entry_t tmp_entry;
+
+ /*
+ * Entry was either not a valid hint, or the vaddr
+ * was not contained in the entry, so do a full lookup.
+ */
+ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+
+ entry = tmp_entry;
+ }
+
+ /*
+ * Handle submaps.
+ */
+
+ if (entry->is_sub_map) {
+ vm_map_t old_map = map;
+
+ *var_map = map = entry->object.sub_map;
+ vm_map_unlock_read(old_map);
+ goto RetryLookup;
+ }
+
+ /*
+ * Check whether this task is allowed to have
+ * this page.
+ */
+
+ prot = entry->protection;
+
+ if ((fault_type & (prot)) != fault_type) {
+ if ((prot & VM_PROT_NOTIFY) && (fault_type & VM_PROT_WRITE)) {
+ RETURN(KERN_WRITE_PROTECTION_FAILURE);
+ } else {
+ RETURN(KERN_PROTECTION_FAILURE);
+ }
+ }
+
+ /*
+ * If this page is not pageable, we have to get
+ * it for all possible accesses.
+ */
+
+ if ((*wired = (entry->wired_count != 0)))
+ prot = fault_type = entry->protection;
+
+ /*
+ * If the entry was copy-on-write, we either ...
+ */
+
+ if (entry->needs_copy) {
+ /*
+ * If we want to write the page, we may as well
+ * handle that now since we've got the map locked.
+ *
+ * If we don't need to write the page, we just
+ * demote the permissions allowed.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ /*
+ * Make a new object, and place it in the
+ * object chain. Note that no new references
+ * have appeared -- one just moved from the
+ * map to the new object.
+ */
+
+ if (vm_map_lock_read_to_write(map)) {
+ goto RetryLookup;
+ }
+ map->timestamp++;
+
+ vm_object_shadow(
+ &entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t) (entry->vme_end - entry->vme_start));
+
+ entry->needs_copy = FALSE;
+
+ vm_map_lock_write_to_read(map);
+ }
+ else {
+ /*
+ * We're attempting to read a copy-on-write
+ * page -- don't allow writes.
+ */
+
+ prot &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Create an object if necessary.
+ */
+ if (entry->object.vm_object == VM_OBJECT_NULL) {
+
+ if (vm_map_lock_read_to_write(map)) {
+ goto RetryLookup;
+ }
+
+ entry->object.vm_object = vm_object_allocate(
+ (vm_size_t)(entry->vme_end - entry->vme_start));
+ entry->offset = 0;
+ vm_map_lock_write_to_read(map);
+ }
+
+ /*
+ * Return the object/offset from this entry. If the entry
+ * was copy-on-write or empty, it has been fixed up. Also
+ * return the protection.
+ */
+
+ *offset = (vaddr - entry->vme_start) + entry->offset;
+ *object = entry->object.vm_object;
+ *out_prot = prot;
+
+ /*
+ * Lock the object to prevent it from disappearing
+ */
+
+ vm_object_lock(*object);
+
+ /*
+ * Save the version number and unlock the map.
+ */
+
+ out_version->main_timestamp = map->timestamp;
+
+ RETURN(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_verify:
+ *
+ * Verifies that the map in question has not changed
+ * since the given version. If successful, the map
+ * will not change until vm_map_verify_done() is called.
+ */
+boolean_t vm_map_verify(
+ vm_map_t map,
+ vm_map_version_t *version) /* REF */
+{
+ boolean_t result;
+
+ vm_map_lock_read(map);
+ result = (map->timestamp == version->main_timestamp);
+
+ if (!result)
+ vm_map_unlock_read(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_verify_done:
+ *
+ * Releases locks acquired by a vm_map_verify.
+ *
+ * This is now a macro in vm/vm_map.h. It does a
+ * vm_map_unlock_read on the map.
+ */
+
+/*
+ * vm_region:
+ *
+ * User call to obtain information about a region in
+ * a task's address map.
+ */
+
+kern_return_t vm_region(
+ vm_map_t map,
+ vm_offset_t *address, /* IN/OUT */
+ vm_size_t *size, /* OUT */
+ vm_prot_t *protection, /* OUT */
+ vm_prot_t *max_protection, /* OUT */
+ vm_inherit_t *inheritance, /* OUT */
+ boolean_t *is_shared, /* OUT */
+ ipc_port_t *object_name, /* OUT */
+ vm_offset_t *offset_in_object) /* OUT */
+{
+ vm_map_entry_t tmp_entry;
+ vm_map_entry_t entry;
+ vm_offset_t tmp_offset;
+ vm_offset_t start;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ start = *address;
+
+ vm_map_lock_read(map);
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+ vm_map_unlock_read(map);
+ return(KERN_NO_SPACE);
+ }
+ } else {
+ entry = tmp_entry;
+ }
+
+ start = entry->vme_start;
+ *protection = entry->protection;
+ *max_protection = entry->max_protection;
+ *inheritance = entry->inheritance;
+ *address = start;
+ *size = (entry->vme_end - start);
+
+ tmp_offset = entry->offset;
+
+
+ if (entry->is_sub_map) {
+ *is_shared = FALSE;
+ *object_name = IP_NULL;
+ *offset_in_object = tmp_offset;
+ } else {
+ *is_shared = entry->is_shared;
+ *object_name = vm_object_name(entry->object.vm_object);
+ *offset_in_object = tmp_offset;
+ }
+
+ vm_map_unlock_read(map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_region_create_proxy:
+ *
+ * Gets a proxy to the region that ADDRESS belongs to, starting at the
+ * region start, with MAX_PROTECTION and LEN limited by the region ones,
+ * and returns it in *PORT.
+ */
+kern_return_t
+vm_region_create_proxy (task_t task, vm_address_t address,
+ vm_prot_t max_protection, vm_size_t len,
+ ipc_port_t *port)
+{
+ kern_return_t ret;
+ vm_map_entry_t entry, tmp_entry;
+ vm_object_t object;
+ rpc_vm_offset_t rpc_offset, rpc_start;
+ rpc_vm_size_t rpc_len = (rpc_vm_size_t) len;
+ ipc_port_t pager;
+
+ if (task == TASK_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ vm_map_lock_read(task->map);
+ if (!vm_map_lookup_entry(task->map, address, &tmp_entry)) {
+ if ((entry = tmp_entry->vme_next) == vm_map_to_entry(task->map)) {
+ vm_map_unlock_read(task->map);
+ return(KERN_NO_SPACE);
+ }
+ } else {
+ entry = tmp_entry;
+ }
+
+ if (entry->is_sub_map) {
+ vm_map_unlock_read(task->map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /* Limit the allowed protection and range to the entry ones */
+ if (len > entry->vme_end - entry->vme_start) {
+ vm_map_unlock_read(task->map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ max_protection &= entry->max_protection;
+
+ object = entry->object.vm_object;
+ vm_object_lock(object);
+ /* Create a pager in case this is an internal object that does
+ not yet have one. */
+ vm_object_pager_create(object);
+ pager = ipc_port_copy_send(object->pager);
+ vm_object_unlock(object);
+
+ rpc_start = (address - entry->vme_start) + entry->offset;
+ rpc_offset = 0;
+
+ vm_map_unlock_read(task->map);
+
+ ret = memory_object_create_proxy(task->itk_space, max_protection,
+ &pager, 1,
+ &rpc_offset, 1,
+ &rpc_start, 1,
+ &rpc_len, 1, port);
+ if (ret)
+ ipc_port_release_send(pager);
+
+ return ret;
+}
+
+/*
+ * Routine: vm_map_coalesce_entry
+ * Purpose:
+ * Try to coalesce an entry with the preceeding entry in the map.
+ * Conditions:
+ * The map is locked. If coalesced, the entry is destroyed
+ * by the call.
+ * Returns:
+ * Whether the entry was coalesced.
+ */
+boolean_t
+vm_map_coalesce_entry(
+ vm_map_t map,
+ vm_map_entry_t entry)
+{
+ vm_map_entry_t prev = entry->vme_prev;
+ vm_size_t prev_size;
+ vm_size_t entry_size;
+
+ /*
+ * Check the basic conditions for coalescing the two entries.
+ */
+ if ((entry == vm_map_to_entry(map)) ||
+ (prev == vm_map_to_entry(map)) ||
+ (prev->vme_end != entry->vme_start) ||
+ (prev->is_shared || entry->is_shared) ||
+ (prev->is_sub_map || entry->is_sub_map) ||
+ (prev->inheritance != entry->inheritance) ||
+ (prev->protection != entry->protection) ||
+ (prev->max_protection != entry->max_protection) ||
+ (prev->needs_copy != entry->needs_copy) ||
+ (prev->in_transition || entry->in_transition) ||
+ (prev->wired_count != entry->wired_count) ||
+ (prev->projected_on != 0) ||
+ (entry->projected_on != 0))
+ return FALSE;
+
+ prev_size = prev->vme_end - prev->vme_start;
+ entry_size = entry->vme_end - entry->vme_start;
+ assert(prev->gap_size == 0);
+
+ /*
+ * See if we can coalesce the two objects.
+ */
+ if (!vm_object_coalesce(prev->object.vm_object,
+ entry->object.vm_object,
+ prev->offset,
+ entry->offset,
+ prev_size,
+ entry_size,
+ &prev->object.vm_object,
+ &prev->offset))
+ return FALSE;
+
+ /*
+ * Update the hints.
+ */
+ if (map->hint == entry)
+ SAVE_HINT(map, prev);
+ if (map->first_free == entry)
+ map->first_free = prev;
+
+ /*
+ * Get rid of the entry without changing any wirings or the pmap,
+ * and without altering map->size.
+ */
+ prev->vme_end = entry->vme_end;
+ vm_map_entry_unlink(map, entry);
+ vm_map_entry_dispose(map, entry);
+
+ return TRUE;
+}
+
+
+
+/*
+ * Routine: vm_map_machine_attribute
+ * Purpose:
+ * Provide machine-specific attributes to mappings,
+ * such as cachability etc. for machines that provide
+ * them. NUMA architectures and machines with big/strange
+ * caches will use this.
+ * Note:
+ * Responsibilities for locking and checking are handled here,
+ * everything else in the pmap module. If any non-volatile
+ * information must be kept, the pmap module should handle
+ * it itself. [This assumes that attributes do not
+ * need to be inherited, which seems ok to me]
+ */
+kern_return_t vm_map_machine_attribute(
+ vm_map_t map,
+ vm_offset_t address,
+ vm_size_t size,
+ vm_machine_attribute_t attribute,
+ vm_machine_attribute_val_t* value) /* IN/OUT */
+{
+ kern_return_t ret;
+
+ if (address < vm_map_min(map) ||
+ (address + size) > vm_map_max(map))
+ return KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(map);
+
+ ret = pmap_attribute(map->pmap, address, size, attribute, value);
+
+ vm_map_unlock(map);
+
+ return ret;
+}
+
+/*
+ * Routine: vm_map_msync
+ * Purpose:
+ * Synchronize out pages of the given map out to their memory
+ * manager, if any.
+ */
+kern_return_t vm_map_msync(
+ vm_map_t map,
+ vm_offset_t address,
+ vm_size_t size,
+ vm_sync_t sync_flags)
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ if ((sync_flags & (VM_SYNC_ASYNCHRONOUS | VM_SYNC_SYNCHRONOUS)) ==
+ (VM_SYNC_ASYNCHRONOUS | VM_SYNC_SYNCHRONOUS))
+ return KERN_INVALID_ARGUMENT;
+
+ size = round_page(address + size) - trunc_page(address);
+ address = trunc_page(address);
+
+ if (size == 0)
+ return KERN_SUCCESS;
+
+ /* TODO */
+
+ return KERN_INVALID_ARGUMENT;
+}
+
+
+
+#if MACH_KDB
+
+#define printf kdbprintf
+
+/*
+ * vm_map_print: [ debug ]
+ */
+void vm_map_print(db_expr_t addr, boolean_t have_addr, db_expr_t count, const char *modif)
+{
+ vm_map_t map;
+ vm_map_entry_t entry;
+
+ if (!have_addr)
+ map = current_thread()->task->map;
+ else
+ map = (vm_map_t)addr;
+
+ iprintf("Map 0x%X: name=\"%s\", pmap=0x%X,",
+ (vm_offset_t) map, map->name, (vm_offset_t) (map->pmap));
+ printf("ref=%d,nentries=%d\n", map->ref_count, map->hdr.nentries);
+ printf("size=%lu,resident:%lu,wired=%lu\n", map->size,
+ pmap_resident_count(map->pmap) * PAGE_SIZE, map->size_wired);
+ printf("version=%d\n", map->timestamp);
+ indent += 1;
+ for (entry = vm_map_first_entry(map);
+ entry != vm_map_to_entry(map);
+ entry = entry->vme_next) {
+ static char *inheritance_name[3] = { "share", "copy", "none"};
+
+ iprintf("map entry 0x%X: ", (vm_offset_t) entry);
+ printf("start=0x%X, end=0x%X\n",
+ (vm_offset_t) entry->vme_start, (vm_offset_t) entry->vme_end);
+ iprintf("prot=%X/%X/%s, ",
+ entry->protection,
+ entry->max_protection,
+ inheritance_name[entry->inheritance]);
+ if (entry->wired_count != 0) {
+ printf("wired, ");
+ }
+ if (entry->in_transition) {
+ printf("in transition");
+ if (entry->needs_wakeup)
+ printf("(wake request)");
+ printf(", ");
+ }
+ if (entry->is_sub_map) {
+ printf("submap=0x%X, offset=0x%X\n",
+ (vm_offset_t) entry->object.sub_map,
+ (vm_offset_t) entry->offset);
+ } else {
+ printf("object=0x%X, offset=0x%X",
+ (vm_offset_t) entry->object.vm_object,
+ (vm_offset_t) entry->offset);
+ if (entry->is_shared)
+ printf(", shared");
+ if (entry->needs_copy)
+ printf(", copy needed");
+ printf("\n");
+
+ if ((entry->vme_prev == vm_map_to_entry(map)) ||
+ (entry->vme_prev->object.vm_object != entry->object.vm_object)) {
+ indent += 1;
+ vm_object_print(entry->object.vm_object);
+ indent -= 1;
+ }
+ }
+ }
+ indent -= 1;
+}
+
+/*
+ * Routine: vm_map_copy_print
+ * Purpose:
+ * Pretty-print a copy object for ddb.
+ */
+
+void vm_map_copy_print(const vm_map_copy_t copy)
+{
+ int i, npages;
+
+ printf("copy object 0x%x\n", copy);
+
+ indent += 1;
+
+ iprintf("type=%d", copy->type);
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ printf("[entry_list]");
+ break;
+
+ case VM_MAP_COPY_OBJECT:
+ printf("[object]");
+ break;
+
+ case VM_MAP_COPY_PAGE_LIST:
+ printf("[page_list]");
+ break;
+
+ default:
+ printf("[bad type]");
+ break;
+ }
+ printf(", offset=0x%x", copy->offset);
+ printf(", size=0x%x\n", copy->size);
+
+ switch (copy->type) {
+ case VM_MAP_COPY_ENTRY_LIST:
+ /* XXX add stuff here */
+ break;
+
+ case VM_MAP_COPY_OBJECT:
+ iprintf("object=0x%x\n", copy->cpy_object);
+ break;
+
+ case VM_MAP_COPY_PAGE_LIST:
+ iprintf("npages=%d", copy->cpy_npages);
+ printf(", cont=%x", copy->cpy_cont);
+ printf(", cont_args=%x\n", copy->cpy_cont_args);
+ if (copy->cpy_npages < 0) {
+ npages = 0;
+ } else if (copy->cpy_npages > VM_MAP_COPY_PAGE_LIST_MAX) {
+ npages = VM_MAP_COPY_PAGE_LIST_MAX;
+ } else {
+ npages = copy->cpy_npages;
+ }
+ iprintf("copy->cpy_page_list[0..%d] = {", npages);
+ for (i = 0; i < npages - 1; i++) {
+ printf("0x%x, ", copy->cpy_page_list[i]);
+ }
+ if (npages > 0) {
+ printf("0x%x", copy->cpy_page_list[npages - 1]);
+ }
+ printf("}\n");
+ break;
+ }
+
+ indent -= 1;
+}
+#endif /* MACH_KDB */
diff --git a/vm/vm_map.h b/vm/vm_map.h
new file mode 100644
index 0000000..a4949e4
--- /dev/null
+++ b/vm/vm_map.h
@@ -0,0 +1,585 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_map.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory map module definitions.
+ *
+ * Contributors:
+ * avie, dlb, mwyoung
+ */
+
+#ifndef _VM_VM_MAP_H_
+#define _VM_VM_MAP_H_
+
+#include <mach/kern_return.h>
+#include <mach/boolean.h>
+#include <mach/machine/vm_types.h>
+#include <mach/vm_attributes.h>
+#include <mach/vm_prot.h>
+#include <mach/vm_inherit.h>
+#include <mach/vm_wire.h>
+#include <mach/vm_sync.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_types.h>
+#include <kern/list.h>
+#include <kern/lock.h>
+#include <kern/rbtree.h>
+#include <kern/macros.h>
+
+/* TODO: make it dynamic */
+#define KENTRY_DATA_SIZE (256*PAGE_SIZE)
+
+/*
+ * Types defined:
+ *
+ * vm_map_entry_t an entry in an address map.
+ * vm_map_version_t a timestamp of a map, for use with vm_map_lookup
+ * vm_map_copy_t represents memory copied from an address map,
+ * used for inter-map copy operations
+ */
+
+/*
+ * Type: vm_map_object_t [internal use only]
+ *
+ * Description:
+ * The target of an address mapping, either a virtual
+ * memory object or a sub map (of the kernel map).
+ */
+typedef union vm_map_object {
+ struct vm_object *vm_object; /* object object */
+ struct vm_map *sub_map; /* belongs to another map */
+} vm_map_object_t;
+
+/*
+ * Type: vm_map_entry_t [internal use only]
+ *
+ * Description:
+ * A single mapping within an address map.
+ *
+ * Implementation:
+ * Address map entries consist of start and end addresses,
+ * a VM object (or sub map) and offset into that object,
+ * and user-exported inheritance and protection information.
+ * Control information for virtual copy operations is also
+ * stored in the address map entry.
+ */
+struct vm_map_links {
+ struct vm_map_entry *prev; /* previous entry */
+ struct vm_map_entry *next; /* next entry */
+ vm_offset_t start; /* start address */
+ vm_offset_t end; /* end address */
+};
+
+struct vm_map_entry {
+ struct vm_map_links links; /* links to other entries */
+#define vme_prev links.prev
+#define vme_next links.next
+#define vme_start links.start
+#define vme_end links.end
+ struct rbtree_node tree_node; /* links to other entries in tree */
+ struct rbtree_node gap_node; /* links to other entries in gap tree */
+ struct list gap_list; /* links to other entries with
+ the same gap size */
+ vm_size_t gap_size; /* size of available memory
+ following this entry */
+ union vm_map_object object; /* object I point to */
+ vm_offset_t offset; /* offset into object */
+ unsigned int
+ /* boolean_t */ in_gap_tree:1, /* entry is in the gap tree if true,
+ or linked to other entries with
+ the same gap size if false */
+ /* boolean_t */ is_shared:1, /* region is shared */
+ /* boolean_t */ is_sub_map:1, /* Is "object" a submap? */
+ /* boolean_t */ in_transition:1, /* Entry being changed */
+ /* boolean_t */ needs_wakeup:1, /* Waiters on in_transition */
+ /* Only used when object is a vm_object: */
+ /* boolean_t */ needs_copy:1; /* does object need to be copied */
+
+ /* Only in task maps: */
+ vm_prot_t protection; /* protection code */
+ vm_prot_t max_protection; /* maximum protection */
+ vm_inherit_t inheritance; /* inheritance */
+ unsigned short wired_count; /* can be paged if = 0 */
+ vm_prot_t wired_access; /* wiring access types, as accepted
+ by vm_map_pageable; used on wiring
+ scans when protection != VM_PROT_NONE */
+ struct vm_map_entry *projected_on; /* 0 for normal map entry
+ or persistent kernel map projected buffer entry;
+ -1 for non-persistent kernel map projected buffer entry;
+ pointer to corresponding kernel map entry for user map
+ projected buffer entry */
+};
+
+typedef struct vm_map_entry *vm_map_entry_t;
+
+#define VM_MAP_ENTRY_NULL ((vm_map_entry_t) 0)
+
+/*
+ * Type: struct vm_map_header
+ *
+ * Description:
+ * Header for a vm_map and a vm_map_copy.
+ */
+struct vm_map_header {
+ struct vm_map_links links; /* first, last, min, max */
+ struct rbtree tree; /* Sorted tree of entries */
+ struct rbtree gap_tree; /* Sorted tree of gap lists
+ for allocations */
+ int nentries; /* Number of entries */
+};
+
+/*
+ * Type: vm_map_t [exported; contents invisible]
+ *
+ * Description:
+ * An address map -- a directory relating valid
+ * regions of a task's address space to the corresponding
+ * virtual memory objects.
+ *
+ * Implementation:
+ * Maps are doubly-linked lists of map entries, sorted
+ * by address. They're also contained in a red-black tree.
+ * One hint is used to start searches again at the last
+ * successful search, insertion, or removal. If the hint
+ * lookup failed (i.e. the hint didn't refer to the requested
+ * entry), a BST lookup is performed. Another hint is used to
+ * quickly find free space.
+ */
+struct vm_map {
+ lock_data_t lock; /* Lock for map data */
+ struct vm_map_header hdr; /* Map entry header */
+#define min_offset hdr.links.start /* start of range */
+#define max_offset hdr.links.end /* end of range */
+ pmap_t pmap; /* Physical map */
+ vm_size_t size; /* virtual size */
+ vm_size_t size_wired; /* wired size */
+ int ref_count; /* Reference count */
+ decl_simple_lock_data(, ref_lock) /* Lock for ref_count field */
+ vm_map_entry_t hint; /* hint for quick lookups */
+ decl_simple_lock_data(, hint_lock) /* lock for hint storage */
+ vm_map_entry_t first_free; /* First free space hint */
+
+ /* Flags */
+ unsigned int wait_for_space:1, /* Should callers wait
+ for space? */
+ /* boolean_t */ wiring_required:1; /* New mappings are wired? */
+
+ unsigned int timestamp; /* Version number */
+
+ const char *name; /* Associated name */
+};
+
+#define vm_map_to_entry(map) ((struct vm_map_entry *) &(map)->hdr.links)
+#define vm_map_first_entry(map) ((map)->hdr.links.next)
+#define vm_map_last_entry(map) ((map)->hdr.links.prev)
+
+/*
+ * Type: vm_map_version_t [exported; contents invisible]
+ *
+ * Description:
+ * Map versions may be used to quickly validate a previous
+ * lookup operation.
+ *
+ * Usage note:
+ * Because they are bulky objects, map versions are usually
+ * passed by reference.
+ *
+ * Implementation:
+ * Just a timestamp for the main map.
+ */
+typedef struct vm_map_version {
+ unsigned int main_timestamp;
+} vm_map_version_t;
+
+/*
+ * Type: vm_map_copy_t [exported; contents invisible]
+ *
+ * Description:
+ * A map copy object represents a region of virtual memory
+ * that has been copied from an address map but is still
+ * in transit.
+ *
+ * A map copy object may only be used by a single thread
+ * at a time.
+ *
+ * Implementation:
+ * There are three formats for map copy objects.
+ * The first is very similar to the main
+ * address map in structure, and as a result, some
+ * of the internal maintenance functions/macros can
+ * be used with either address maps or map copy objects.
+ *
+ * The map copy object contains a header links
+ * entry onto which the other entries that represent
+ * the region are chained.
+ *
+ * The second format is a single vm object. This is used
+ * primarily in the pageout path. The third format is a
+ * list of vm pages. An optional continuation provides
+ * a hook to be called to obtain more of the memory,
+ * or perform other operations. The continuation takes 3
+ * arguments, a saved arg buffer, a pointer to a new vm_map_copy
+ * (returned) and an abort flag (abort if TRUE).
+ */
+
+#define VM_MAP_COPY_PAGE_LIST_MAX 64
+
+struct vm_map_copy;
+struct vm_map_copyin_args_data;
+typedef kern_return_t (*vm_map_copy_cont_fn)(struct vm_map_copyin_args_data*, struct vm_map_copy**);
+
+typedef struct vm_map_copy {
+ int type;
+#define VM_MAP_COPY_ENTRY_LIST 1
+#define VM_MAP_COPY_OBJECT 2
+#define VM_MAP_COPY_PAGE_LIST 3
+ vm_offset_t offset;
+ vm_size_t size;
+ union {
+ struct vm_map_header hdr; /* ENTRY_LIST */
+ struct { /* OBJECT */
+ vm_object_t object;
+ } c_o;
+ struct { /* PAGE_LIST */
+ vm_page_t page_list[VM_MAP_COPY_PAGE_LIST_MAX];
+ int npages;
+ vm_map_copy_cont_fn cont;
+ struct vm_map_copyin_args_data* cont_args;
+ } c_p;
+ } c_u;
+} *vm_map_copy_t;
+
+#define cpy_hdr c_u.hdr
+
+#define cpy_object c_u.c_o.object
+
+#define cpy_page_list c_u.c_p.page_list
+#define cpy_npages c_u.c_p.npages
+#define cpy_cont c_u.c_p.cont
+#define cpy_cont_args c_u.c_p.cont_args
+
+#define VM_MAP_COPY_NULL ((vm_map_copy_t) 0)
+
+/*
+ * Useful macros for entry list copy objects
+ */
+
+#define vm_map_copy_to_entry(copy) \
+ ((struct vm_map_entry *) &(copy)->cpy_hdr.links)
+#define vm_map_copy_first_entry(copy) \
+ ((copy)->cpy_hdr.links.next)
+#define vm_map_copy_last_entry(copy) \
+ ((copy)->cpy_hdr.links.prev)
+
+/*
+ * Continuation macros for page list copy objects
+ */
+
+#define vm_map_copy_invoke_cont(old_copy, new_copy, result) \
+MACRO_BEGIN \
+ vm_map_copy_page_discard(old_copy); \
+ *result = (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ new_copy); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+MACRO_END
+
+#define vm_map_copy_invoke_extend_cont(old_copy, new_copy, result) \
+MACRO_BEGIN \
+ *result = (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ new_copy); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+MACRO_END
+
+#define vm_map_copy_abort_cont(old_copy) \
+MACRO_BEGIN \
+ vm_map_copy_page_discard(old_copy); \
+ (*((old_copy)->cpy_cont))((old_copy)->cpy_cont_args, \
+ (vm_map_copy_t *) 0); \
+ (old_copy)->cpy_cont = (kern_return_t (*)()) 0; \
+ (old_copy)->cpy_cont_args = VM_MAP_COPYIN_ARGS_NULL; \
+MACRO_END
+
+#define vm_map_copy_has_cont(copy) \
+ (((copy)->cpy_cont) != (kern_return_t (*)()) 0)
+
+/*
+ * Continuation structures for vm_map_copyin_page_list.
+ */
+
+typedef struct vm_map_copyin_args_data {
+ vm_map_t map;
+ vm_offset_t src_addr;
+ vm_size_t src_len;
+ vm_offset_t destroy_addr;
+ vm_size_t destroy_len;
+ boolean_t steal_pages;
+} vm_map_copyin_args_data_t, *vm_map_copyin_args_t;
+
+#define VM_MAP_COPYIN_ARGS_NULL ((vm_map_copyin_args_t) 0)
+
+/*
+ * Macros: vm_map_lock, etc. [internal use only]
+ * Description:
+ * Perform locking on the data portion of a map.
+ */
+
+#define vm_map_lock_init(map) \
+MACRO_BEGIN \
+ lock_init(&(map)->lock, TRUE); \
+ (map)->timestamp = 0; \
+MACRO_END
+
+void vm_map_lock(struct vm_map *map);
+void vm_map_unlock(struct vm_map *map);
+
+#define vm_map_lock_read(map) lock_read(&(map)->lock)
+#define vm_map_unlock_read(map) lock_read_done(&(map)->lock)
+#define vm_map_lock_write_to_read(map) \
+ lock_write_to_read(&(map)->lock)
+#define vm_map_lock_read_to_write(map) \
+ (lock_read_to_write(&(map)->lock) || (((map)->timestamp++), 0))
+#define vm_map_lock_set_recursive(map) \
+ lock_set_recursive(&(map)->lock)
+#define vm_map_lock_clear_recursive(map) \
+ lock_clear_recursive(&(map)->lock)
+
+/*
+ * Exported procedures that operate on vm_map_t.
+ */
+
+/* Initialize the module */
+extern void vm_map_init(void);
+
+/* Initialize an empty map */
+extern void vm_map_setup(vm_map_t, pmap_t, vm_offset_t, vm_offset_t);
+/* Create an empty map */
+extern vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
+/* Create a map in the image of an existing map */
+extern vm_map_t vm_map_fork(vm_map_t);
+
+/* Gain a reference to an existing map */
+extern void vm_map_reference(vm_map_t);
+/* Lose a reference */
+extern void vm_map_deallocate(vm_map_t);
+
+/* Enter a mapping */
+extern kern_return_t vm_map_enter(vm_map_t, vm_offset_t *, vm_size_t,
+ vm_offset_t, boolean_t, vm_object_t,
+ vm_offset_t, boolean_t, vm_prot_t,
+ vm_prot_t, vm_inherit_t);
+/* Enter a mapping primitive */
+extern kern_return_t vm_map_find_entry(vm_map_t, vm_offset_t *, vm_size_t,
+ vm_offset_t, vm_object_t,
+ vm_map_entry_t *);
+/* Deallocate a region */
+extern kern_return_t vm_map_remove(vm_map_t, vm_offset_t, vm_offset_t);
+/* Change protection */
+extern kern_return_t vm_map_protect(vm_map_t, vm_offset_t, vm_offset_t,
+ vm_prot_t, boolean_t);
+/* Change inheritance */
+extern kern_return_t vm_map_inherit(vm_map_t, vm_offset_t, vm_offset_t,
+ vm_inherit_t);
+
+/* Look up an address */
+extern kern_return_t vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t,
+ vm_map_version_t *, vm_object_t *,
+ vm_offset_t *, vm_prot_t *, boolean_t *);
+/* Find a map entry */
+extern boolean_t vm_map_lookup_entry(vm_map_t, vm_offset_t,
+ vm_map_entry_t *);
+/* Verify that a previous lookup is still valid */
+extern boolean_t vm_map_verify(vm_map_t, vm_map_version_t *);
+/* vm_map_verify_done is now a macro -- see below */
+/* Make a copy of a region */
+extern kern_return_t vm_map_copyin(vm_map_t, vm_offset_t, vm_size_t,
+ boolean_t, vm_map_copy_t *);
+/* Make a copy of a region using a page list copy */
+extern kern_return_t vm_map_copyin_page_list(vm_map_t, vm_offset_t,
+ vm_size_t, boolean_t,
+ boolean_t, vm_map_copy_t *,
+ boolean_t);
+/* Place a copy into a map */
+extern kern_return_t vm_map_copyout(vm_map_t, vm_offset_t *, vm_map_copy_t);
+/* Overwrite existing memory with a copy */
+extern kern_return_t vm_map_copy_overwrite(vm_map_t, vm_offset_t,
+ vm_map_copy_t, boolean_t);
+/* Discard a copy without using it */
+extern void vm_map_copy_discard(vm_map_copy_t);
+extern void vm_map_copy_page_discard(vm_map_copy_t);
+extern vm_map_copy_t vm_map_copy_copy(vm_map_copy_t);
+/* Page list continuation version of previous */
+extern kern_return_t vm_map_copy_discard_cont(vm_map_copyin_args_t,
+ vm_map_copy_t *);
+
+extern boolean_t vm_map_coalesce_entry(vm_map_t, vm_map_entry_t);
+
+/* Add or remove machine- dependent attributes from map regions */
+extern kern_return_t vm_map_machine_attribute(vm_map_t, vm_offset_t,
+ vm_size_t,
+ vm_machine_attribute_t,
+ vm_machine_attribute_val_t *);
+
+extern kern_return_t vm_map_msync(vm_map_t,
+ vm_offset_t, vm_size_t, vm_sync_t);
+
+/* Delete entry from map */
+extern void vm_map_entry_delete(vm_map_t, vm_map_entry_t);
+
+kern_return_t vm_map_delete(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end);
+
+kern_return_t vm_map_copyout_page_list(
+ vm_map_t dst_map,
+ vm_offset_t *dst_addr, /* OUT */
+ vm_map_copy_t copy);
+
+void vm_map_copy_page_discard (vm_map_copy_t copy);
+
+boolean_t vm_map_lookup_entry(
+ vm_map_t map,
+ vm_offset_t address,
+ vm_map_entry_t *entry); /* OUT */
+
+static inline void vm_map_set_name(vm_map_t map, const char *name)
+{
+ map->name = name;
+}
+
+
+/*
+ * Functions implemented as macros
+ */
+#define vm_map_min(map) ((map)->min_offset)
+ /* Lowest valid address in
+ * a map */
+
+#define vm_map_max(map) ((map)->max_offset)
+ /* Highest valid address */
+
+#define vm_map_pmap(map) ((map)->pmap)
+ /* Physical map associated
+ * with this address map */
+
+#define vm_map_verify_done(map, version) (vm_map_unlock_read(map))
+ /* Operation that required
+ * a verified lookup is
+ * now complete */
+/*
+ * Pageability functions.
+ */
+extern kern_return_t vm_map_pageable(vm_map_t, vm_offset_t, vm_offset_t,
+ vm_prot_t, boolean_t, boolean_t);
+
+extern kern_return_t vm_map_pageable_all(vm_map_t, vm_wire_t);
+
+/*
+ * Submap object. Must be used to create memory to be put
+ * in a submap by vm_map_submap.
+ */
+extern vm_object_t vm_submap_object;
+
+/*
+ * vm_map_copyin_object:
+ *
+ * Create a copy object from an object.
+ * Our caller donates an object reference.
+ */
+extern kern_return_t vm_map_copyin_object(
+ vm_object_t object,
+ vm_offset_t offset, /* offset of region in object */
+ vm_size_t size, /* size of region in object */
+ vm_map_copy_t *copy_result); /* OUT */
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find using
+ * the vm_submap_object, and no other operations may have been
+ * performed on this range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copyin!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+extern kern_return_t vm_map_submap(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_map_t submap);
+
+/*
+ * Wait and wakeup macros for in_transition map entries.
+ */
+#define vm_map_entry_wait(map, interruptible) \
+ MACRO_BEGIN \
+ assert_wait((event_t)&(map)->hdr, interruptible); \
+ vm_map_unlock(map); \
+ thread_block((void (*)()) 0); \
+ MACRO_END
+
+#define vm_map_entry_wakeup(map) thread_wakeup((event_t)&(map)->hdr)
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+extern void _vm_map_clip_start(
+ struct vm_map_header *map_header,
+ vm_map_entry_t entry,
+ vm_offset_t start,
+ boolean_t link_gap);
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+void _vm_map_clip_end(
+ struct vm_map_header *map_header,
+ vm_map_entry_t entry,
+ vm_offset_t end,
+ boolean_t link_gap);
+
+#endif /* _VM_VM_MAP_H_ */
diff --git a/vm/vm_object.c b/vm/vm_object.c
new file mode 100644
index 0000000..c238cce
--- /dev/null
+++ b/vm/vm_object.c
@@ -0,0 +1,2994 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_object.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Virtual memory object module.
+ */
+
+#include <kern/printf.h>
+#include <string.h>
+
+#include <mach/memory_object.h>
+#include <vm/memory_object_default.user.h>
+#include <vm/memory_object_user.user.h>
+#include <machine/vm_param.h>
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/mach.server.h>
+#include <kern/lock.h>
+#include <kern/queue.h>
+#include <kern/xpr.h>
+#include <kern/slab.h>
+#include <vm/memory_object.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#if MACH_KDB
+#include <ddb/db_output.h>
+#endif /* MACH_KDB */
+
+void memory_object_release(
+ ipc_port_t pager,
+ pager_request_t pager_request,
+ ipc_port_t pager_name); /* forward */
+
+/*
+ * Virtual memory objects maintain the actual data
+ * associated with allocated virtual memory. A given
+ * page of memory exists within exactly one object.
+ *
+ * An object is only deallocated when all "references"
+ * are given up. Only one "reference" to a given
+ * region of an object should be writeable.
+ *
+ * Associated with each object is a list of all resident
+ * memory pages belonging to that object; this list is
+ * maintained by the "vm_page" module, but locked by the object's
+ * lock.
+ *
+ * Each object also records the memory object port
+ * that is used by the kernel to request and write
+ * back data (the memory object port, field "pager"),
+ * and the ports provided to the memory manager, the server that
+ * manages that data, to return data and control its
+ * use (the memory object control port, field "pager_request")
+ * and for naming (the memory object name port, field "pager_name").
+ *
+ * Virtual memory objects are allocated to provide
+ * zero-filled memory (vm_allocate) or map a user-defined
+ * memory object into a virtual address space (vm_map).
+ *
+ * Virtual memory objects that refer to a user-defined
+ * memory object are called "permanent", because all changes
+ * made in virtual memory are reflected back to the
+ * memory manager, which may then store it permanently.
+ * Other virtual memory objects are called "temporary",
+ * meaning that changes need be written back only when
+ * necessary to reclaim pages, and that storage associated
+ * with the object can be discarded once it is no longer
+ * mapped.
+ *
+ * A permanent memory object may be mapped into more
+ * than one virtual address space. Moreover, two threads
+ * may attempt to make the first mapping of a memory
+ * object concurrently. Only one thread is allowed to
+ * complete this mapping; all others wait for the
+ * "pager_initialized" field is asserted, indicating
+ * that the first thread has initialized all of the
+ * necessary fields in the virtual memory object structure.
+ *
+ * The kernel relies on a *default memory manager* to
+ * provide backing storage for the zero-filled virtual
+ * memory objects. The memory object ports associated
+ * with these temporary virtual memory objects are only
+ * generated and passed to the default memory manager
+ * when it becomes necessary. Virtual memory objects
+ * that depend on the default memory manager are called
+ * "internal". The "pager_created" field is provided to
+ * indicate whether these ports have ever been allocated.
+ *
+ * The kernel may also create virtual memory objects to
+ * hold changed pages after a copy-on-write operation.
+ * In this case, the virtual memory object (and its
+ * backing storage -- its memory object) only contain
+ * those pages that have been changed. The "shadow"
+ * field refers to the virtual memory object that contains
+ * the remainder of the contents. The "shadow_offset"
+ * field indicates where in the "shadow" these contents begin.
+ * The "copy" field refers to a virtual memory object
+ * to which changed pages must be copied before changing
+ * this object, in order to implement another form
+ * of copy-on-write optimization.
+ *
+ * The virtual memory object structure also records
+ * the attributes associated with its memory object.
+ * The "pager_ready", "can_persist" and "copy_strategy"
+ * fields represent those attributes. The "cached_list"
+ * field is used in the implementation of the persistence
+ * attribute.
+ *
+ * ZZZ Continue this comment.
+ */
+
+struct kmem_cache vm_object_cache; /* vm backing store cache */
+
+/*
+ * All wired-down kernel memory belongs to a single virtual
+ * memory object (kernel_object) to avoid wasting data structures.
+ */
+static struct vm_object kernel_object_store;
+vm_object_t kernel_object = &kernel_object_store;
+
+/*
+ * Virtual memory objects that are not referenced by
+ * any address maps, but that are allowed to persist
+ * (an attribute specified by the associated memory manager),
+ * are kept in a queue (vm_object_cached_list).
+ *
+ * When an object from this queue is referenced again,
+ * for example to make another address space mapping,
+ * it must be removed from the queue. That is, the
+ * queue contains *only* objects with zero references.
+ *
+ * The kernel may choose to terminate objects from this
+ * queue in order to reclaim storage. The current policy
+ * is to let memory pressure dynamically adjust the number
+ * of unreferenced objects. The pageout daemon attempts to
+ * collect objects after removing pages from them.
+ *
+ * A simple lock (accessed by routines
+ * vm_object_cache_{lock,lock_try,unlock}) governs the
+ * object cache. It must be held when objects are
+ * added to or removed from the cache (in vm_object_terminate).
+ * The routines that acquire a reference to a virtual
+ * memory object based on one of the memory object ports
+ * must also lock the cache.
+ *
+ * Ideally, the object cache should be more isolated
+ * from the reference mechanism, so that the lock need
+ * not be held to make simple references.
+ */
+queue_head_t vm_object_cached_list;
+
+def_simple_lock_data(static,vm_object_cached_lock_data)
+
+#define vm_object_cache_lock() \
+ simple_lock(&vm_object_cached_lock_data)
+#define vm_object_cache_lock_try() \
+ simple_lock_try(&vm_object_cached_lock_data)
+#define vm_object_cache_unlock() \
+ simple_unlock(&vm_object_cached_lock_data)
+
+/*
+ * Number of physical pages referenced by cached objects.
+ * This counter is protected by its own lock to work around
+ * lock ordering issues.
+ */
+int vm_object_cached_pages;
+
+def_simple_lock_data(static,vm_object_cached_pages_lock_data)
+
+/*
+ * Virtual memory objects are initialized from
+ * a template (see vm_object_allocate).
+ *
+ * When adding a new field to the virtual memory
+ * object structure, be sure to add initialization
+ * (see vm_object_init).
+ */
+struct vm_object vm_object_template;
+
+/*
+ * vm_object_allocate:
+ *
+ * Returns a new object with the given size.
+ */
+
+static void _vm_object_setup(
+ vm_object_t object,
+ vm_size_t size)
+{
+ *object = vm_object_template;
+ queue_init(&object->memq);
+ vm_object_lock_init(object);
+ object->size = size;
+}
+
+static vm_object_t _vm_object_allocate(
+ vm_size_t size)
+{
+ vm_object_t object;
+
+ object = (vm_object_t) kmem_cache_alloc(&vm_object_cache);
+ if (!object)
+ return 0;
+
+ _vm_object_setup(object, size);
+
+ return object;
+}
+
+vm_object_t vm_object_allocate(
+ vm_size_t size)
+{
+ vm_object_t object;
+ ipc_port_t port;
+
+ object = _vm_object_allocate(size);
+ if (object == 0)
+ panic("vm_object_allocate");
+ port = ipc_port_alloc_kernel();
+ if (port == IP_NULL)
+ panic("vm_object_allocate");
+ object->pager_name = port;
+ ipc_kobject_set(port, (ipc_kobject_t) object, IKOT_PAGING_NAME);
+
+ return object;
+}
+
+/*
+ * vm_object_bootstrap:
+ *
+ * Initialize the VM objects module.
+ */
+void vm_object_bootstrap(void)
+{
+ kmem_cache_init(&vm_object_cache, "vm_object",
+ sizeof(struct vm_object), 0, NULL, 0);
+
+ queue_init(&vm_object_cached_list);
+ simple_lock_init(&vm_object_cached_lock_data);
+
+ /*
+ * Fill in a template object, for quick initialization
+ */
+
+ vm_object_template.ref_count = 1;
+ vm_object_template.size = 0;
+ vm_object_template.resident_page_count = 0;
+ vm_object_template.copy = VM_OBJECT_NULL;
+ vm_object_template.shadow = VM_OBJECT_NULL;
+ vm_object_template.shadow_offset = (vm_offset_t) 0;
+
+ vm_object_template.pager = IP_NULL;
+ vm_object_template.paging_offset = 0;
+ vm_object_template.pager_request = PAGER_REQUEST_NULL;
+ vm_object_template.pager_name = IP_NULL;
+
+ vm_object_template.pager_created = FALSE;
+ vm_object_template.pager_initialized = FALSE;
+ vm_object_template.pager_ready = FALSE;
+
+ vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_NONE;
+ /* ignored if temporary, will be reset before
+ * permanent object becomes ready */
+ vm_object_template.use_shared_copy = FALSE;
+ vm_object_template.shadowed = FALSE;
+
+ vm_object_template.absent_count = 0;
+ vm_object_template.all_wanted = 0; /* all bits FALSE */
+
+ vm_object_template.paging_in_progress = 0;
+ vm_object_template.used_for_pageout = FALSE;
+ vm_object_template.can_persist = FALSE;
+ vm_object_template.cached = FALSE;
+ vm_object_template.internal = TRUE;
+ vm_object_template.temporary = TRUE;
+ vm_object_template.alive = TRUE;
+ vm_object_template.lock_in_progress = FALSE;
+ vm_object_template.lock_restart = FALSE;
+ vm_object_template.last_alloc = (vm_offset_t) 0;
+
+#if MACH_PAGEMAP
+ vm_object_template.existence_info = VM_EXTERNAL_NULL;
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Initialize the "kernel object"
+ */
+
+ _vm_object_setup(kernel_object,
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+
+ /*
+ * Initialize the "submap object". Make it as large as the
+ * kernel object so that no limit is imposed on submap sizes.
+ */
+
+ _vm_object_setup(vm_submap_object,
+ VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
+
+#if MACH_PAGEMAP
+ vm_external_module_initialize();
+#endif /* MACH_PAGEMAP */
+}
+
+void vm_object_init(void)
+{
+ /*
+ * Finish initializing the kernel object.
+ * The submap object doesn't need a name port.
+ */
+
+ kernel_object->pager_name = ipc_port_alloc_kernel();
+ ipc_kobject_set(kernel_object->pager_name,
+ (ipc_kobject_t) kernel_object,
+ IKOT_PAGING_NAME);
+}
+
+/*
+ * Object cache management functions.
+ *
+ * Both the cache and the object must be locked
+ * before calling these functions.
+ */
+
+static void vm_object_cache_add(
+ vm_object_t object)
+{
+ assert(!object->cached);
+ queue_enter(&vm_object_cached_list, object, vm_object_t, cached_list);
+ object->cached = TRUE;
+}
+
+static void vm_object_cache_remove(
+ vm_object_t object)
+{
+ assert(object->cached);
+ queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list);
+ object->cached = FALSE;
+}
+
+void vm_object_collect(
+ vm_object_t object)
+{
+ vm_object_unlock(object);
+
+ /*
+ * The cache lock must be acquired in the proper order.
+ */
+
+ vm_object_cache_lock();
+ vm_object_lock(object);
+
+ /*
+ * If the object was referenced while the lock was
+ * dropped, cancel the termination.
+ */
+
+ if (!vm_object_collectable(object)) {
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return;
+ }
+
+ vm_object_cache_remove(object);
+ vm_object_terminate(object);
+}
+
+/*
+ * vm_object_reference:
+ *
+ * Gets another reference to the given object.
+ */
+void vm_object_reference(
+ vm_object_t object)
+{
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_deallocate:
+ *
+ * Release a reference to the specified object,
+ * gained either through a vm_object_allocate
+ * or a vm_object_reference call. When all references
+ * are gone, storage associated with this object
+ * may be relinquished.
+ *
+ * No object may be locked.
+ */
+void vm_object_deallocate(
+ vm_object_t object)
+{
+ vm_object_t temp;
+
+ while (object != VM_OBJECT_NULL) {
+
+ /*
+ * The cache holds a reference (uncounted) to
+ * the object; we must lock it before removing
+ * the object.
+ */
+
+ vm_object_cache_lock();
+
+ /*
+ * Lose the reference
+ */
+ vm_object_lock(object);
+ if (--(object->ref_count) > 0) {
+
+ /*
+ * If there are still references, then
+ * we are done.
+ */
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return;
+ }
+
+ /*
+ * See whether this object can persist. If so, enter
+ * it in the cache.
+ */
+ if (object->can_persist && (object->resident_page_count > 0)) {
+ vm_object_cache_add(object);
+ vm_object_cache_unlock();
+ vm_object_unlock(object);
+ return;
+ }
+
+ if (object->pager_created &&
+ !object->pager_initialized) {
+
+ /*
+ * Have to wait for initialization.
+ * Put reference back and retry
+ * when it's initialized.
+ */
+
+ object->ref_count++;
+ vm_object_assert_wait(object,
+ VM_OBJECT_EVENT_INITIALIZED, FALSE);
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ thread_block((void (*)()) 0);
+ continue;
+ }
+
+ /*
+ * Take the reference to the shadow object
+ * out of the object to be destroyed.
+ */
+
+ temp = object->shadow;
+
+ /*
+ * Destroy the object; the cache lock will
+ * be released in the process.
+ */
+
+ vm_object_terminate(object);
+
+ /*
+ * Deallocate the reference to the shadow
+ * by continuing the loop with that object
+ * in place of the original.
+ */
+
+ object = temp;
+ }
+}
+
+/*
+ * Routine: vm_object_terminate
+ * Purpose:
+ * Free all resources associated with a vm_object.
+ * In/out conditions:
+ * Upon entry, the object and the cache must be locked,
+ * and the object must have no references.
+ *
+ * The shadow object reference is left alone.
+ *
+ * Upon exit, the cache will be unlocked, and the
+ * object will cease to exist.
+ */
+void vm_object_terminate(
+ vm_object_t object)
+{
+ vm_page_t p;
+ vm_object_t shadow_object;
+
+ /*
+ * Make sure the object isn't already being terminated
+ */
+
+ assert(object->alive);
+ object->alive = FALSE;
+
+ /*
+ * Make sure no one can look us up now.
+ */
+
+ vm_object_remove(object);
+ vm_object_cache_unlock();
+
+ /*
+ * Detach the object from its shadow if we are the shadow's
+ * copy.
+ */
+ if ((shadow_object = object->shadow) != VM_OBJECT_NULL) {
+ vm_object_lock(shadow_object);
+ assert((shadow_object->copy == object) ||
+ (shadow_object->copy == VM_OBJECT_NULL));
+ shadow_object->copy = VM_OBJECT_NULL;
+ vm_object_unlock(shadow_object);
+ }
+
+ /*
+ * The pageout daemon might be playing with our pages.
+ * Now that the object is dead, it won't touch any more
+ * pages, but some pages might already be on their way out.
+ * Hence, we wait until the active paging activities have ceased.
+ */
+
+ vm_object_paging_wait(object, FALSE);
+
+ /*
+ * Clean or free the pages, as appropriate.
+ * It is possible for us to find busy/absent pages,
+ * if some faults on this object were aborted.
+ */
+
+ if ((object->temporary) || (object->pager == IP_NULL)) {
+ while (!queue_empty(&object->memq)) {
+ p = (vm_page_t) queue_first(&object->memq);
+
+ VM_PAGE_CHECK(p);
+
+ VM_PAGE_FREE(p);
+ }
+ } else while (!queue_empty(&object->memq)) {
+ p = (vm_page_t) queue_first(&object->memq);
+
+ VM_PAGE_CHECK(p);
+
+ vm_page_lock_queues();
+ VM_PAGE_QUEUES_REMOVE(p);
+ vm_page_unlock_queues();
+
+ if (p->absent || p->private) {
+
+ /*
+ * For private pages, VM_PAGE_FREE just
+ * leaves the page structure around for
+ * its owner to clean up. For absent
+ * pages, the structure is returned to
+ * the appropriate pool.
+ */
+
+ goto free_page;
+ }
+
+ if (!p->dirty)
+ p->dirty = pmap_is_modified(p->phys_addr);
+
+ if (p->dirty || p->precious) {
+ p->busy = TRUE;
+ vm_pageout_page(p, FALSE, TRUE); /* flush page */
+ } else {
+ free_page:
+ VM_PAGE_FREE(p);
+ }
+ }
+
+ assert(object->ref_count == 0);
+ assert(object->paging_in_progress == 0);
+ assert(!object->cached);
+
+ if (!object->internal) {
+ assert(object->resident_page_count == 0);
+
+ vm_page_lock_queues();
+ vm_object_external_count--;
+ vm_page_unlock_queues();
+ }
+
+ /*
+ * Throw away port rights... note that they may
+ * already have been thrown away (by vm_object_destroy
+ * or memory_object_destroy).
+ *
+ * Instead of destroying the control and name ports,
+ * we send all rights off to the memory manager instead,
+ * using memory_object_terminate.
+ */
+
+ vm_object_unlock(object);
+
+ if (object->pager != IP_NULL) {
+ /* consumes our rights for pager, pager_request, pager_name */
+ memory_object_release(object->pager,
+ object->pager_request,
+ object->pager_name);
+ } else if (object->pager_name != IP_NULL) {
+ /* consumes our right for pager_name */
+ ipc_port_dealloc_kernel(object->pager_name);
+ }
+
+#if MACH_PAGEMAP
+ vm_external_destroy(object->existence_info);
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Free the space for the object.
+ */
+
+ kmem_cache_free(&vm_object_cache, (vm_offset_t) object);
+}
+
+/*
+ * Routine: vm_object_pager_wakeup
+ * Purpose: Wake up anyone waiting for IKOT_PAGER_TERMINATING
+ */
+
+void
+vm_object_pager_wakeup(
+ ipc_port_t pager)
+{
+ boolean_t someone_waiting;
+
+ /*
+ * If anyone was waiting for the memory_object_terminate
+ * to be queued, wake them up now.
+ */
+ vm_object_cache_lock();
+ assert(ip_kotype(pager) == IKOT_PAGER_TERMINATING);
+ someone_waiting = (pager->ip_kobject != IKO_NULL);
+ if (ip_active(pager))
+ ipc_kobject_set(pager, IKO_NULL, IKOT_NONE);
+ vm_object_cache_unlock();
+ if (someone_waiting) {
+ thread_wakeup((event_t) pager);
+ }
+}
+
+/*
+ * Routine: memory_object_release
+ * Purpose: Terminate the pager and release port rights,
+ * just like memory_object_terminate, except
+ * that we wake up anyone blocked in vm_object_enter
+ * waiting for termination message to be queued
+ * before calling memory_object_init.
+ */
+void memory_object_release(
+ ipc_port_t pager,
+ pager_request_t pager_request,
+ ipc_port_t pager_name)
+{
+
+ /*
+ * Keep a reference to pager port;
+ * the terminate might otherwise release all references.
+ */
+ ip_reference(pager);
+
+ /*
+ * Terminate the pager.
+ */
+ (void) memory_object_terminate(pager, pager_request, pager_name);
+
+ /*
+ * Wakeup anyone waiting for this terminate
+ */
+ vm_object_pager_wakeup(pager);
+
+ /*
+ * Release reference to pager port.
+ */
+ ip_release(pager);
+}
+
+/*
+ * Routine: vm_object_abort_activity [internal use only]
+ * Purpose:
+ * Abort paging requests pending on this object.
+ * In/out conditions:
+ * The object is locked on entry and exit.
+ */
+static void vm_object_abort_activity(
+ vm_object_t object)
+{
+ vm_page_t p;
+ vm_page_t next;
+
+ /*
+ * Abort all activity that would be waiting
+ * for a result on this memory object.
+ *
+ * We could also choose to destroy all pages
+ * that we have in memory for this object, but
+ * we don't.
+ */
+
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ next = (vm_page_t) queue_next(&p->listq);
+
+ /*
+ * If it's being paged in, destroy it.
+ * If an unlock has been requested, start it again.
+ */
+
+ if (p->busy && p->absent) {
+ VM_PAGE_FREE(p);
+ }
+ else {
+ if (p->unlock_request != VM_PROT_NONE)
+ p->unlock_request = VM_PROT_NONE;
+ PAGE_WAKEUP(p);
+ }
+
+ p = next;
+ }
+
+ /*
+ * Wake up threads waiting for the memory object to
+ * become ready.
+ */
+
+ object->pager_ready = TRUE;
+ vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
+}
+
+/*
+ * Routine: memory_object_destroy [user interface]
+ * Purpose:
+ * Shut down a memory object, despite the
+ * presence of address map (or other) references
+ * to the vm_object.
+ * Note:
+ * This routine may be called either from the user interface,
+ * or from port destruction handling (via vm_object_destroy).
+ */
+kern_return_t memory_object_destroy(
+ vm_object_t object,
+ kern_return_t reason)
+{
+ ipc_port_t old_object, old_name;
+ pager_request_t old_control;
+
+ if (object == VM_OBJECT_NULL)
+ return KERN_SUCCESS;
+
+ /*
+ * Remove the port associations immediately.
+ *
+ * This will prevent the memory manager from further
+ * meddling. [If it wanted to flush data or make
+ * other changes, it should have done so before performing
+ * the destroy call.]
+ */
+
+ vm_object_cache_lock();
+ vm_object_lock(object);
+ vm_object_remove(object);
+ object->can_persist = FALSE;
+ vm_object_cache_unlock();
+
+ /*
+ * Rip out the ports from the vm_object now... this
+ * will prevent new memory_object calls from succeeding.
+ */
+
+ old_object = object->pager;
+ object->pager = IP_NULL;
+
+ old_control = object->pager_request;
+ object->pager_request = PAGER_REQUEST_NULL;
+
+ old_name = object->pager_name;
+ object->pager_name = IP_NULL;
+
+
+ /*
+ * Wait for existing paging activity (that might
+ * have the old ports) to subside.
+ */
+
+ vm_object_paging_wait(object, FALSE);
+ vm_object_unlock(object);
+
+ /*
+ * Shut down the ports now.
+ *
+ * [Paging operations may be proceeding concurrently --
+ * they'll get the null values established above.]
+ */
+
+ if (old_object != IP_NULL) {
+ /* consumes our rights for object, control, name */
+ memory_object_release(old_object, old_control,
+ old_name);
+ } else if (old_name != IP_NULL) {
+ /* consumes our right for name */
+ ipc_port_dealloc_kernel(object->pager_name);
+ }
+
+ /*
+ * Lose the reference that was donated for this routine
+ */
+
+ vm_object_deallocate(object);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_object_pmap_protect
+ *
+ * Purpose:
+ * Reduces the permission for all physical
+ * pages in the specified object range.
+ *
+ * If removing write permission only, it is
+ * sufficient to protect only the pages in
+ * the top-level object; only those pages may
+ * have write permission.
+ *
+ * If removing all access, we must follow the
+ * shadow chain from the top-level object to
+ * remove access to all pages in shadowed objects.
+ *
+ * The object must *not* be locked. The object must
+ * be temporary/internal.
+ *
+ * If pmap is not NULL, this routine assumes that
+ * the only mappings for the pages are in that
+ * pmap.
+ */
+boolean_t vm_object_pmap_protect_by_page = FALSE;
+
+void vm_object_pmap_protect(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ pmap_t pmap,
+ vm_offset_t pmap_start,
+ vm_prot_t prot)
+{
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+
+ assert(object->temporary && object->internal);
+
+ while (TRUE) {
+ if (object->resident_page_count > atop(size) / 2 &&
+ pmap != PMAP_NULL) {
+ vm_object_unlock(object);
+ pmap_protect(pmap, pmap_start, pmap_start + size, prot);
+ return;
+ }
+
+ {
+ vm_page_t p;
+ vm_offset_t end;
+
+ end = offset + size;
+
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (offset <= p->offset) &&
+ (p->offset < end)) {
+ if ((pmap == PMAP_NULL) ||
+ vm_object_pmap_protect_by_page) {
+ pmap_page_protect(p->phys_addr,
+ prot & ~p->page_lock);
+ } else {
+ vm_offset_t start =
+ pmap_start +
+ (p->offset - offset);
+
+ pmap_protect(pmap,
+ start,
+ start + PAGE_SIZE,
+ prot);
+ }
+ }
+ }
+ }
+
+ if (prot == VM_PROT_NONE) {
+ /*
+ * Must follow shadow chain to remove access
+ * to pages in shadowed objects.
+ */
+ vm_object_t next_object;
+
+ next_object = object->shadow;
+ if (next_object != VM_OBJECT_NULL) {
+ offset += object->shadow_offset;
+ vm_object_lock(next_object);
+ vm_object_unlock(object);
+ object = next_object;
+ }
+ else {
+ /*
+ * End of chain - we are done.
+ */
+ break;
+ }
+ }
+ else {
+ /*
+ * Pages in shadowed objects may never have
+ * write permission - we may stop here.
+ */
+ break;
+ }
+ }
+
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_pmap_remove:
+ *
+ * Removes all physical pages in the specified
+ * object range from all physical maps.
+ *
+ * The object must *not* be locked.
+ */
+void vm_object_pmap_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_page_t p;
+
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(object);
+ queue_iterate(&object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (start <= p->offset) &&
+ (p->offset < end))
+ pmap_page_protect(p->phys_addr, VM_PROT_NONE);
+ }
+ vm_object_unlock(object);
+}
+
+/*
+ * Routine: vm_object_copy_slowly
+ *
+ * Description:
+ * Copy the specified range of the source
+ * virtual memory object without using
+ * protection-based optimizations (such
+ * as copy-on-write). The pages in the
+ * region are actually copied.
+ *
+ * In/out conditions:
+ * The caller must hold a reference and a lock
+ * for the source virtual memory object. The source
+ * object will be returned *unlocked*.
+ *
+ * Results:
+ * If the copy is completed successfully, KERN_SUCCESS is
+ * returned. If the caller asserted the interruptible
+ * argument, and an interruption occurred while waiting
+ * for a user-generated event, MACH_SEND_INTERRUPTED is
+ * returned. Other values may be returned to indicate
+ * hard errors during the copy operation.
+ *
+ * A new virtual memory object is returned in a
+ * parameter (_result_object). The contents of this
+ * new object, starting at a zero offset, are a copy
+ * of the source memory region. In the event of
+ * an error, this parameter will contain the value
+ * VM_OBJECT_NULL.
+ */
+kern_return_t vm_object_copy_slowly(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ boolean_t interruptible,
+ vm_object_t *_result_object) /* OUT */
+{
+ vm_object_t new_object;
+ vm_offset_t new_offset;
+
+ if (size == 0) {
+ vm_object_unlock(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Prevent destruction of the source object while we copy.
+ */
+
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ vm_object_unlock(src_object);
+
+ /*
+ * Create a new object to hold the copied pages.
+ * A few notes:
+ * We fill the new object starting at offset 0,
+ * regardless of the input offset.
+ * We don't bother to lock the new object within
+ * this routine, since we have the only reference.
+ */
+
+ new_object = vm_object_allocate(size);
+ new_offset = 0;
+
+ assert(size == trunc_page(size)); /* Will the loop terminate? */
+
+ for ( ;
+ size != 0 ;
+ src_offset += PAGE_SIZE, new_offset += PAGE_SIZE, size -= PAGE_SIZE
+ ) {
+ vm_page_t new_page;
+ vm_fault_return_t result;
+
+ while ((new_page = vm_page_alloc(new_object, new_offset))
+ == VM_PAGE_NULL) {
+ VM_PAGE_WAIT((void (*)()) 0);
+ }
+
+ do {
+ vm_prot_t prot = VM_PROT_READ;
+ vm_page_t _result_page;
+ vm_page_t top_page;
+ vm_page_t result_page;
+
+ vm_object_lock(src_object);
+ src_object->paging_in_progress++;
+
+ result = vm_fault_page(src_object, src_offset,
+ VM_PROT_READ, FALSE, interruptible,
+ &prot, &_result_page, &top_page,
+ FALSE, (void (*)()) 0);
+
+ switch(result) {
+ case VM_FAULT_SUCCESS:
+ result_page = _result_page;
+
+ /*
+ * We don't need to hold the object
+ * lock -- the busy page will be enough.
+ * [We don't care about picking up any
+ * new modifications.]
+ *
+ * Copy the page to the new object.
+ *
+ * POLICY DECISION:
+ * If result_page is clean,
+ * we could steal it instead
+ * of copying.
+ */
+
+ vm_object_unlock(result_page->object);
+ vm_page_copy(result_page, new_page);
+
+ /*
+ * Let go of both pages (make them
+ * not busy, perform wakeup, activate).
+ */
+
+ new_page->busy = FALSE;
+ new_page->dirty = TRUE;
+ vm_object_lock(result_page->object);
+ PAGE_WAKEUP_DONE(result_page);
+
+ vm_page_lock_queues();
+ if (!result_page->active &&
+ !result_page->inactive)
+ vm_page_activate(result_page);
+ vm_page_activate(new_page);
+ vm_page_unlock_queues();
+
+ /*
+ * Release paging references and
+ * top-level placeholder page, if any.
+ */
+
+ vm_fault_cleanup(result_page->object,
+ top_page);
+
+ break;
+
+ case VM_FAULT_RETRY:
+ break;
+
+ case VM_FAULT_MEMORY_SHORTAGE:
+ VM_PAGE_WAIT((void (*)()) 0);
+ break;
+
+ case VM_FAULT_FICTITIOUS_SHORTAGE:
+ vm_page_more_fictitious();
+ break;
+
+ case VM_FAULT_INTERRUPTED:
+ vm_page_free(new_page);
+ vm_object_deallocate(new_object);
+ vm_object_deallocate(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return MACH_SEND_INTERRUPTED;
+
+ case VM_FAULT_MEMORY_ERROR:
+ /*
+ * A policy choice:
+ * (a) ignore pages that we can't
+ * copy
+ * (b) return the null object if
+ * any page fails [chosen]
+ */
+
+ vm_page_free(new_page);
+ vm_object_deallocate(new_object);
+ vm_object_deallocate(src_object);
+ *_result_object = VM_OBJECT_NULL;
+ return KERN_MEMORY_ERROR;
+ }
+ } while (result != VM_FAULT_SUCCESS);
+ }
+
+ /*
+ * Lose the extra reference, and return our object.
+ */
+
+ vm_object_deallocate(src_object);
+ *_result_object = new_object;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_object_copy_temporary
+ *
+ * Purpose:
+ * Copy the specified range of the source virtual
+ * memory object, if it can be done without blocking.
+ *
+ * Results:
+ * If the copy is successful, the copy is returned in
+ * the arguments; otherwise, the arguments are not
+ * affected.
+ *
+ * In/out conditions:
+ * The object should be unlocked on entry and exit.
+ */
+
+boolean_t vm_object_copy_temporary(
+ vm_object_t *_object, /* INOUT */
+ vm_offset_t *_offset, /* INOUT */
+ boolean_t *_src_needs_copy, /* OUT */
+ boolean_t *_dst_needs_copy) /* OUT */
+{
+ vm_object_t object = *_object;
+
+ if (object == VM_OBJECT_NULL) {
+ *_src_needs_copy = FALSE;
+ *_dst_needs_copy = FALSE;
+ return TRUE;
+ }
+
+ /*
+ * If the object is temporary, we can perform
+ * a symmetric copy-on-write without asking.
+ */
+
+ vm_object_lock(object);
+ if (object->temporary) {
+
+ /*
+ * Shared objects use delayed copy
+ */
+ if (object->use_shared_copy) {
+
+ /*
+ * Asymmetric copy strategy. Destination
+ * must be copied (to allow copy object reuse).
+ * Source is unaffected.
+ */
+ vm_object_unlock(object);
+ object = vm_object_copy_delayed(object);
+ *_object = object;
+ *_src_needs_copy = FALSE;
+ *_dst_needs_copy = TRUE;
+ return TRUE;
+ }
+
+ /*
+ * Make another reference to the object.
+ *
+ * Leave object/offset unchanged.
+ */
+
+ assert(object->ref_count > 0);
+ object->ref_count++;
+ object->shadowed = TRUE;
+ vm_object_unlock(object);
+
+ /*
+ * Both source and destination must make
+ * shadows, and the source must be made
+ * read-only if not already.
+ */
+
+ *_src_needs_copy = TRUE;
+ *_dst_needs_copy = TRUE;
+ return TRUE;
+ }
+
+ if (object->pager_ready &&
+ (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY)) {
+ /* XXX Do something intelligent (see temporary code above) */
+ }
+ vm_object_unlock(object);
+
+ return FALSE;
+}
+
+/*
+ * Routine: vm_object_copy_call [internal]
+ *
+ * Description:
+ * Copy the specified (src_offset, size) portion
+ * of the source object (src_object), using the
+ * user-managed copy algorithm.
+ *
+ * In/out conditions:
+ * The source object must be locked on entry. It
+ * will be *unlocked* on exit.
+ *
+ * Results:
+ * If the copy is successful, KERN_SUCCESS is returned.
+ * This routine is interruptible; if a wait for
+ * a user-generated event is interrupted, MACH_SEND_INTERRUPTED
+ * is returned. Other return values indicate hard errors
+ * in creating the user-managed memory object for the copy.
+ *
+ * A new object that represents the copied virtual
+ * memory is returned in a parameter (*_result_object).
+ * If the return value indicates an error, this parameter
+ * is not valid.
+ */
+static kern_return_t vm_object_copy_call(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *_result_object) /* OUT */
+{
+ vm_offset_t src_end = src_offset + size;
+ ipc_port_t new_memory_object;
+ vm_object_t new_object;
+ vm_page_t p;
+
+ /*
+ * Create a memory object port to be associated
+ * with this new vm_object.
+ *
+ * Since the kernel has the only rights to this
+ * port, we need not hold the cache lock.
+ *
+ * Since we have the only object reference, we
+ * need not be worried about collapse operations.
+ *
+ */
+
+ new_memory_object = ipc_port_alloc_kernel();
+ if (new_memory_object == IP_NULL)
+ return KERN_RESOURCE_SHORTAGE;
+
+ /*
+ * Set the backing object for the new
+ * temporary object.
+ */
+
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ vm_object_paging_begin(src_object);
+ vm_object_unlock(src_object);
+
+ /* we hold a naked receive right for new_memory_object */
+ (void) ipc_port_make_send(new_memory_object);
+ /* now we also hold a naked send right for new_memory_object */
+
+ /*
+ * Let the memory manager know that a copy operation
+ * is in progress. Note that we're using the old
+ * memory object's ports (for which we're holding
+ * a paging reference)... the memory manager cannot
+ * yet affect the new memory object.
+ */
+
+ (void) memory_object_copy(src_object->pager,
+ src_object->pager_request,
+ src_offset, size,
+ new_memory_object);
+ /* no longer hold the naked receive right for new_memory_object */
+
+ vm_object_lock(src_object);
+ vm_object_paging_end(src_object);
+
+ /*
+ * Remove write access from all of the pages of
+ * the old memory object that we can.
+ */
+
+ queue_iterate(&src_object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious &&
+ (src_offset <= p->offset) &&
+ (p->offset < src_end) &&
+ !(p->page_lock & VM_PROT_WRITE)) {
+ p->page_lock |= VM_PROT_WRITE;
+ pmap_page_protect(p->phys_addr, VM_PROT_ALL & ~p->page_lock);
+ }
+ }
+
+ vm_object_unlock(src_object);
+
+ /*
+ * Initialize the rest of the paging stuff
+ */
+
+ new_object = vm_object_enter(new_memory_object, size, FALSE);
+ assert(new_object);
+ new_object->shadow = src_object;
+ new_object->shadow_offset = src_offset;
+
+ /*
+ * Drop the reference for new_memory_object taken above.
+ */
+
+ ipc_port_release_send(new_memory_object);
+ /* no longer hold the naked send right for new_memory_object */
+
+ *_result_object = new_object;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Routine: vm_object_copy_delayed [internal]
+ *
+ * Description:
+ * Copy the specified virtual memory object, using
+ * the asymmetric copy-on-write algorithm.
+ *
+ * In/out conditions:
+ * The object must be unlocked on entry.
+ *
+ * This routine will not block waiting for user-generated
+ * events. It is not interruptible.
+ */
+vm_object_t vm_object_copy_delayed(
+ vm_object_t src_object)
+{
+ vm_object_t new_copy;
+ vm_object_t old_copy;
+ vm_page_t p;
+
+ /*
+ * The user-level memory manager wants to see
+ * all of the changes to this object, but it
+ * has promised not to make any changes on its own.
+ *
+ * Perform an asymmetric copy-on-write, as follows:
+ * Create a new object, called a "copy object"
+ * to hold pages modified by the new mapping
+ * (i.e., the copy, not the original mapping).
+ * Record the original object as the backing
+ * object for the copy object. If the
+ * original mapping does not change a page,
+ * it may be used read-only by the copy.
+ * Record the copy object in the original
+ * object. When the original mapping causes
+ * a page to be modified, it must be copied
+ * to a new page that is "pushed" to the
+ * copy object.
+ * Mark the new mapping (the copy object)
+ * copy-on-write. This makes the copy
+ * object itself read-only, allowing it
+ * to be reused if the original mapping
+ * makes no changes, and simplifying the
+ * synchronization required in the "push"
+ * operation described above.
+ *
+ * The copy-on-write is said to be asymmetric because
+ * the original object is *not* marked copy-on-write.
+ * A copied page is pushed to the copy object, regardless
+ * which party attempted to modify the page.
+ *
+ * Repeated asymmetric copy operations may be done.
+ * If the original object has not been changed since
+ * the last copy, its copy object can be reused.
+ * Otherwise, a new copy object can be inserted
+ * between the original object and its previous
+ * copy object. Since any copy object is read-only,
+ * this cannot affect the contents of the previous copy
+ * object.
+ *
+ * Note that a copy object is higher in the object
+ * tree than the original object; therefore, use of
+ * the copy object recorded in the original object
+ * must be done carefully, to avoid deadlock.
+ */
+
+ /*
+ * Allocate a new copy object before locking, even
+ * though we may not need it later.
+ */
+
+ new_copy = vm_object_allocate(src_object->size);
+
+ vm_object_lock(src_object);
+
+ /*
+ * See whether we can reuse the result of a previous
+ * copy operation.
+ */
+ Retry:
+ old_copy = src_object->copy;
+ if (old_copy != VM_OBJECT_NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+
+ simple_lock_pause(); /* wait a bit */
+
+ vm_object_lock(src_object);
+ goto Retry;
+ }
+
+ /*
+ * Determine whether the old copy object has
+ * been modified.
+ */
+
+ if (old_copy->resident_page_count == 0 &&
+ !old_copy->pager_created) {
+ /*
+ * It has not been modified.
+ *
+ * Return another reference to
+ * the existing copy-object.
+ */
+ assert(old_copy->ref_count > 0);
+ old_copy->ref_count++;
+ vm_object_unlock(old_copy);
+ vm_object_unlock(src_object);
+
+ vm_object_deallocate(new_copy);
+
+ return old_copy;
+ }
+
+ /*
+ * The copy-object is always made large enough to
+ * completely shadow the original object, since
+ * it may have several users who want to shadow
+ * the original object at different points.
+ */
+
+ assert((old_copy->shadow == src_object) &&
+ (old_copy->shadow_offset == (vm_offset_t) 0));
+
+ /*
+ * Make the old copy-object shadow the new one.
+ * It will receive no more pages from the original
+ * object.
+ */
+
+ src_object->ref_count--; /* remove ref. from old_copy */
+ assert(src_object->ref_count > 0);
+ old_copy->shadow = new_copy;
+ assert(new_copy->ref_count > 0);
+ new_copy->ref_count++;
+ vm_object_unlock(old_copy); /* done with old_copy */
+ }
+
+ /*
+ * Point the new copy at the existing object.
+ */
+
+ new_copy->shadow = src_object;
+ new_copy->shadow_offset = 0;
+ new_copy->shadowed = TRUE; /* caller must set needs_copy */
+ assert(src_object->ref_count > 0);
+ src_object->ref_count++;
+ src_object->copy = new_copy;
+
+ /*
+ * Mark all pages of the existing object copy-on-write.
+ * This object may have a shadow chain below it, but
+ * those pages will already be marked copy-on-write.
+ */
+
+ queue_iterate(&src_object->memq, p, vm_page_t, listq) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ (VM_PROT_ALL & ~VM_PROT_WRITE &
+ ~p->page_lock));
+ }
+
+ vm_object_unlock(src_object);
+
+ return new_copy;
+}
+
+/*
+ * Routine: vm_object_copy_strategically
+ *
+ * Purpose:
+ * Perform a copy according to the source object's
+ * declared strategy. This operation may block,
+ * and may be interrupted.
+ */
+kern_return_t vm_object_copy_strategically(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *dst_object, /* OUT */
+ vm_offset_t *dst_offset, /* OUT */
+ boolean_t *dst_needs_copy) /* OUT */
+{
+ kern_return_t result = KERN_SUCCESS; /* to quiet gcc warnings */
+ boolean_t interruptible = TRUE; /* XXX */
+
+ assert(src_object != VM_OBJECT_NULL);
+
+ vm_object_lock(src_object);
+
+ /* XXX assert(!src_object->temporary); JSB FIXME */
+
+ /*
+ * The copy strategy is only valid if the memory manager
+ * is "ready".
+ */
+
+ while (!src_object->pager_ready) {
+ vm_object_wait( src_object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ interruptible);
+ if (interruptible &&
+ (current_thread()->wait_result != THREAD_AWAKENED)) {
+ *dst_object = VM_OBJECT_NULL;
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ return MACH_SEND_INTERRUPTED;
+ }
+ vm_object_lock(src_object);
+ }
+
+ /*
+ * The object may be temporary (even though it is external).
+ * If so, do a symmetric copy.
+ */
+
+ if (src_object->temporary) {
+ /*
+ * XXX
+ * This does not count as intelligent!
+ * This buys us the object->temporary optimizations,
+ * but we aren't using a symmetric copy,
+ * which may confuse the vm code. The correct thing
+ * to do here is to figure out what to call to get
+ * a temporary shadowing set up.
+ */
+ src_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
+ }
+
+ /*
+ * The object is permanent. Use the appropriate copy strategy.
+ */
+
+ switch (src_object->copy_strategy) {
+ case MEMORY_OBJECT_COPY_NONE:
+ if ((result = vm_object_copy_slowly(
+ src_object,
+ src_offset,
+ size,
+ interruptible,
+ dst_object))
+ == KERN_SUCCESS) {
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ }
+ break;
+
+ case MEMORY_OBJECT_COPY_CALL:
+ if ((result = vm_object_copy_call(
+ src_object,
+ src_offset,
+ size,
+ dst_object))
+ == KERN_SUCCESS) {
+ *dst_offset = 0;
+ *dst_needs_copy = FALSE;
+ }
+ break;
+
+ case MEMORY_OBJECT_COPY_DELAY:
+ vm_object_unlock(src_object);
+ *dst_object = vm_object_copy_delayed(src_object);
+ *dst_offset = src_offset;
+ *dst_needs_copy = TRUE;
+
+ result = KERN_SUCCESS;
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * vm_object_shadow:
+ *
+ * Create a new object which is backed by the
+ * specified existing object range. The source
+ * object reference is deallocated.
+ *
+ * The new object and offset into that object
+ * are returned in the source parameters.
+ */
+
+void vm_object_shadow(
+ vm_object_t *object, /* IN/OUT */
+ vm_offset_t *offset, /* IN/OUT */
+ vm_size_t length)
+{
+ vm_object_t source;
+ vm_object_t result;
+
+ source = *object;
+
+ /*
+ * Allocate a new object with the given length
+ */
+
+ if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
+ panic("vm_object_shadow: no object for shadowing");
+
+ /*
+ * The new object shadows the source object, adding
+ * a reference to it. Our caller changes his reference
+ * to point to the new object, removing a reference to
+ * the source object. Net result: no change of reference
+ * count.
+ */
+ result->shadow = source;
+
+ /*
+ * Store the offset into the source object,
+ * and fix up the offset into the new object.
+ */
+
+ result->shadow_offset = *offset;
+
+ /*
+ * Return the new things
+ */
+
+ *offset = 0;
+ *object = result;
+}
+
+/*
+ * The relationship between vm_object structures and
+ * the memory_object ports requires careful synchronization.
+ *
+ * All associations are created by vm_object_enter. All three
+ * port fields are filled in, as follows:
+ * pager: the memory_object port itself, supplied by
+ * the user requesting a mapping (or the kernel,
+ * when initializing internal objects); the
+ * kernel simulates holding send rights by keeping
+ * a port reference;
+ * pager_request:
+ * pager_name:
+ * the memory object control and name ports,
+ * created by the kernel; the kernel holds
+ * receive (and ownership) rights to these
+ * ports, but no other references.
+ * All of the ports are referenced by their global names.
+ *
+ * When initialization is complete, the "initialized" field
+ * is asserted. Other mappings using a particular memory object,
+ * and any references to the vm_object gained through the
+ * port association must wait for this initialization to occur.
+ *
+ * In order to allow the memory manager to set attributes before
+ * requests (notably virtual copy operations, but also data or
+ * unlock requests) are made, a "ready" attribute is made available.
+ * Only the memory manager may affect the value of this attribute.
+ * Its value does not affect critical kernel functions, such as
+ * internal object initialization or destruction. [Furthermore,
+ * memory objects created by the kernel are assumed to be ready
+ * immediately; the default memory manager need not explicitly
+ * set the "ready" attribute.]
+ *
+ * [Both the "initialized" and "ready" attribute wait conditions
+ * use the "pager" field as the wait event.]
+ *
+ * The port associations can be broken down by any of the
+ * following routines:
+ * vm_object_terminate:
+ * No references to the vm_object remain, and
+ * the object cannot (or will not) be cached.
+ * This is the normal case, and is done even
+ * though one of the other cases has already been
+ * done.
+ * vm_object_destroy:
+ * The memory_object port has been destroyed,
+ * meaning that the kernel cannot flush dirty
+ * pages or request new data or unlock existing
+ * data.
+ * memory_object_destroy:
+ * The memory manager has requested that the
+ * kernel relinquish rights to the memory object
+ * port. [The memory manager may not want to
+ * destroy the port, but may wish to refuse or
+ * tear down existing memory mappings.]
+ * Each routine that breaks an association must break all of
+ * them at once. At some later time, that routine must clear
+ * the vm_object port fields and release the port rights.
+ * [Furthermore, each routine must cope with the simultaneous
+ * or previous operations of the others.]
+ *
+ * In addition to the lock on the object, the vm_object_cache_lock
+ * governs the port associations. References gained through the
+ * port association require use of the cache lock.
+ *
+ * Because the port fields may be cleared spontaneously, they
+ * cannot be used to determine whether a memory object has
+ * ever been associated with a particular vm_object. [This
+ * knowledge is important to the shadow object mechanism.]
+ * For this reason, an additional "created" attribute is
+ * provided.
+ *
+ * During various paging operations, the port values found in the
+ * vm_object must be valid. To prevent these port rights from being
+ * released, and to prevent the port associations from changing
+ * (other than being removed, i.e., made null), routines may use
+ * the vm_object_paging_begin/end routines [actually, macros].
+ * The implementation uses the "paging_in_progress" and "wanted" fields.
+ * [Operations that alter the validity of the port values include the
+ * termination routines and vm_object_collapse.]
+ */
+
+vm_object_t vm_object_lookup(
+ ipc_port_t port)
+{
+ vm_object_t object = VM_OBJECT_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port) &&
+ (ip_kotype(port) == IKOT_PAGING_REQUEST)) {
+ vm_object_cache_lock();
+ object = (vm_object_t) port->ip_kobject;
+ vm_object_lock(object);
+
+ assert(object->alive);
+
+ if (object->ref_count == 0)
+ vm_object_cache_remove(object);
+
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ }
+ ip_unlock(port);
+ }
+
+ return object;
+}
+
+vm_object_t vm_object_lookup_name(
+ ipc_port_t port)
+{
+ vm_object_t object = VM_OBJECT_NULL;
+
+ if (IP_VALID(port)) {
+ ip_lock(port);
+ if (ip_active(port) &&
+ (ip_kotype(port) == IKOT_PAGING_NAME)) {
+ vm_object_cache_lock();
+ object = (vm_object_t) port->ip_kobject;
+ vm_object_lock(object);
+
+ assert(object->alive);
+
+ if (object->ref_count == 0)
+ vm_object_cache_remove(object);
+
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ }
+ ip_unlock(port);
+ }
+
+ return object;
+}
+
+void vm_object_destroy(
+ ipc_port_t pager)
+{
+ vm_object_t object;
+ pager_request_t old_request;
+ ipc_port_t old_name;
+
+ /*
+ * Perform essentially the same operations as in vm_object_lookup,
+ * except that this time we look up based on the memory_object
+ * port, not the control port.
+ */
+ vm_object_cache_lock();
+ if (ip_kotype(pager) != IKOT_PAGER) {
+ vm_object_cache_unlock();
+ return;
+ }
+
+ object = (vm_object_t) pager->ip_kobject;
+ vm_object_lock(object);
+ if (object->ref_count == 0)
+ vm_object_cache_remove(object);
+ object->ref_count++;
+
+ object->can_persist = FALSE;
+
+ assert(object->pager == pager);
+
+ /*
+ * Remove the port associations.
+ *
+ * Note that the memory_object itself is dead, so
+ * we don't bother with it.
+ */
+
+ object->pager = IP_NULL;
+ vm_object_remove(object);
+
+ old_request = object->pager_request;
+ object->pager_request = PAGER_REQUEST_NULL;
+
+ old_name = object->pager_name;
+ object->pager_name = IP_NULL;
+
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+
+ /*
+ * Clean up the port references. Note that there's no
+ * point in trying the memory_object_terminate call
+ * because the memory_object itself is dead.
+ */
+
+ ipc_port_release_send(pager);
+ if (old_request != IP_NULL)
+ ipc_port_dealloc_kernel(old_request);
+ if (old_name != IP_NULL)
+ ipc_port_dealloc_kernel(old_name);
+
+ /*
+ * Restart pending page requests
+ */
+
+ vm_object_abort_activity(object);
+
+ /*
+ * Lose the object reference.
+ */
+
+ vm_object_deallocate(object);
+}
+
+/*
+ * Routine: vm_object_enter
+ * Purpose:
+ * Find a VM object corresponding to the given
+ * pager; if no such object exists, create one,
+ * and initialize the pager.
+ */
+vm_object_t vm_object_enter(
+ ipc_port_t pager,
+ vm_size_t size,
+ boolean_t internal)
+{
+ vm_object_t object;
+ vm_object_t new_object;
+ boolean_t must_init;
+ ipc_kobject_type_t po;
+
+restart:
+ if (!IP_VALID(pager))
+ return vm_object_allocate(size);
+
+ new_object = VM_OBJECT_NULL;
+ must_init = FALSE;
+
+ /*
+ * Look for an object associated with this port.
+ */
+
+ vm_object_cache_lock();
+ for (;;) {
+ po = ip_kotype(pager);
+
+ /*
+ * If a previous object is being terminated,
+ * we must wait for the termination message
+ * to be queued.
+ *
+ * We set kobject to a non-null value to let the
+ * terminator know that someone is waiting.
+ * Among the possibilities is that the port
+ * could die while we're waiting. Must restart
+ * instead of continuing the loop.
+ */
+
+ if (po == IKOT_PAGER_TERMINATING) {
+ pager->ip_kobject = (ipc_kobject_t) pager;
+ assert_wait((event_t) pager, FALSE);
+ vm_object_cache_unlock();
+ thread_block((void (*)()) 0);
+ goto restart;
+ }
+
+ /*
+ * Bail if there is already a kobject associated
+ * with the pager port.
+ */
+ if (po != IKOT_NONE) {
+ break;
+ }
+
+ /*
+ * We must unlock to create a new object;
+ * if we do so, we must try the lookup again.
+ */
+
+ if (new_object == VM_OBJECT_NULL) {
+ vm_object_cache_unlock();
+ new_object = vm_object_allocate(size);
+ vm_object_cache_lock();
+ } else {
+ /*
+ * Lookup failed twice, and we have something
+ * to insert; set the object.
+ */
+
+ ipc_kobject_set(pager,
+ (ipc_kobject_t) new_object,
+ IKOT_PAGER);
+ new_object = VM_OBJECT_NULL;
+ must_init = TRUE;
+ }
+ }
+
+ if (internal)
+ must_init = TRUE;
+
+ /*
+ * It's only good if it's a VM object!
+ */
+
+ object = (po == IKOT_PAGER) ? (vm_object_t) pager->ip_kobject
+ : VM_OBJECT_NULL;
+
+ if ((object != VM_OBJECT_NULL) && !must_init) {
+ vm_object_lock(object);
+ if (object->ref_count == 0)
+ vm_object_cache_remove(object);
+ object->ref_count++;
+ vm_object_unlock(object);
+
+ vm_stat.hits++;
+ }
+ assert((object == VM_OBJECT_NULL) || (object->ref_count > 0) ||
+ ((object->paging_in_progress != 0) && internal));
+
+ vm_stat.lookups++;
+
+ vm_object_cache_unlock();
+
+ /*
+ * If we raced to create a vm_object but lost, let's
+ * throw away ours.
+ */
+
+ if (new_object != VM_OBJECT_NULL)
+ vm_object_deallocate(new_object);
+
+ if (object == VM_OBJECT_NULL)
+ return(object);
+
+ if (must_init) {
+ /*
+ * Copy the naked send right we were given.
+ */
+
+ pager = ipc_port_copy_send(pager);
+ if (!IP_VALID(pager))
+ panic("vm_object_enter: port died"); /* XXX */
+
+ object->pager_created = TRUE;
+ object->pager = pager;
+
+ /*
+ * Allocate request port.
+ */
+
+ object->pager_request = ipc_port_alloc_kernel();
+ if (object->pager_request == IP_NULL)
+ panic("vm_object_enter: pager request alloc");
+
+ ipc_kobject_set(object->pager_request,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_REQUEST);
+
+ /*
+ * Let the pager know we're using it.
+ */
+
+ if (internal) {
+ /* acquire a naked send right for the DMM */
+ ipc_port_t DMM = memory_manager_default_reference();
+
+ /* mark the object internal */
+ object->internal = TRUE;
+ assert(object->temporary);
+
+ /* default-pager objects are ready immediately */
+ object->pager_ready = TRUE;
+
+ /* consumes the naked send right for DMM */
+ (void) memory_object_create(DMM,
+ pager,
+ object->size,
+ object->pager_request,
+ object->pager_name,
+ PAGE_SIZE);
+ } else {
+ /* the object is external and not temporary */
+ object->internal = FALSE;
+ object->temporary = FALSE;
+
+ assert(object->resident_page_count == 0);
+ vm_object_external_count++;
+
+ /* user pager objects are not ready until marked so */
+ object->pager_ready = FALSE;
+
+ (void) memory_object_init(pager,
+ object->pager_request,
+ object->pager_name,
+ PAGE_SIZE);
+
+ }
+
+ vm_object_lock(object);
+ object->pager_initialized = TRUE;
+
+ vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
+ } else {
+ vm_object_lock(object);
+ }
+ /*
+ * [At this point, the object must be locked]
+ */
+
+ /*
+ * Wait for the work above to be done by the first
+ * thread to map this object.
+ */
+
+ while (!object->pager_initialized) {
+ vm_object_wait( object,
+ VM_OBJECT_EVENT_INITIALIZED,
+ FALSE);
+ vm_object_lock(object);
+ }
+ vm_object_unlock(object);
+
+ return object;
+}
+
+/*
+ * Routine: vm_object_pager_create
+ * Purpose:
+ * Create a memory object for an internal object.
+ * In/out conditions:
+ * The object is locked on entry and exit;
+ * it may be unlocked within this call.
+ * Limitations:
+ * Only one thread may be performing a
+ * vm_object_pager_create on an object at
+ * a time. Presumably, only the pageout
+ * daemon will be using this routine.
+ */
+void vm_object_pager_create(
+ vm_object_t object)
+{
+ ipc_port_t pager;
+
+ if (object->pager_created) {
+ /*
+ * Someone else got to it first...
+ * wait for them to finish initializing
+ */
+
+ while (!object->pager_initialized) {
+ vm_object_wait( object,
+ VM_OBJECT_EVENT_PAGER_READY,
+ FALSE);
+ vm_object_lock(object);
+ }
+ return;
+ }
+
+ /*
+ * Indicate that a memory object has been assigned
+ * before dropping the lock, to prevent a race.
+ */
+
+ object->pager_created = TRUE;
+
+ /*
+ * Prevent collapse or termination by
+ * holding a paging reference
+ */
+
+ vm_object_paging_begin(object);
+ vm_object_unlock(object);
+
+#if MACH_PAGEMAP
+ object->existence_info = vm_external_create(
+ object->size +
+ object->paging_offset);
+ assert((object->size + object->paging_offset) >=
+ object->size);
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Create the pager, and associate with it
+ * this object.
+ *
+ * Note that we only make the port association
+ * so that vm_object_enter can properly look up
+ * the object to complete the initialization...
+ * we do not expect any user to ever map this
+ * object.
+ *
+ * Since the kernel has the only rights to the
+ * port, it's safe to install the association
+ * without holding the cache lock.
+ */
+
+ pager = ipc_port_alloc_kernel();
+ if (pager == IP_NULL)
+ panic("vm_object_pager_create: allocate pager port");
+
+ (void) ipc_port_make_send(pager);
+ ipc_kobject_set(pager, (ipc_kobject_t) object, IKOT_PAGER);
+
+ /*
+ * Initialize the rest of the paging stuff
+ */
+
+ if (vm_object_enter(pager, object->size, TRUE) != object)
+ panic("vm_object_pager_create: mismatch");
+
+ /*
+ * Drop the naked send right taken above.
+ */
+
+ ipc_port_release_send(pager);
+
+ /*
+ * Release the paging reference
+ */
+
+ vm_object_lock(object);
+ vm_object_paging_end(object);
+}
+
+/*
+ * Routine: vm_object_remove
+ * Purpose:
+ * Eliminate the pager/object association
+ * for this pager.
+ * Conditions:
+ * The object cache must be locked.
+ */
+void vm_object_remove(
+ vm_object_t object)
+{
+ ipc_port_t port;
+
+ if ((port = object->pager) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGER)
+ ipc_kobject_set(port, IKO_NULL,
+ IKOT_PAGER_TERMINATING);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad object port");
+ }
+ if ((port = object->pager_request) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGING_REQUEST)
+ ipc_kobject_set(port, IKO_NULL, IKOT_NONE);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad request port");
+ }
+ if ((port = object->pager_name) != IP_NULL) {
+ if (ip_kotype(port) == IKOT_PAGING_NAME)
+ ipc_kobject_set(port, IKO_NULL, IKOT_NONE);
+ else if (ip_kotype(port) != IKOT_NONE)
+ panic("vm_object_remove: bad name port");
+ }
+}
+
+/*
+ * Global variables for vm_object_collapse():
+ *
+ * Counts for normal collapses and bypasses.
+ * Debugging variables, to watch or disable collapse.
+ */
+long object_collapses = 0;
+long object_bypasses = 0;
+
+int vm_object_collapse_debug = 0;
+boolean_t vm_object_collapse_allowed = TRUE;
+boolean_t vm_object_collapse_bypass_allowed = TRUE;
+
+/*
+ * vm_object_collapse:
+ *
+ * Collapse an object with the object backing it.
+ * Pages in the backing object are moved into the
+ * parent, and the backing object is deallocated.
+ *
+ * Requires that the object be locked and the page
+ * queues be unlocked. May unlock/relock the object,
+ * so the caller should hold a reference for the object.
+ */
+void vm_object_collapse(
+ vm_object_t object)
+{
+ vm_object_t backing_object;
+ vm_offset_t backing_offset;
+ vm_size_t size;
+ vm_offset_t new_offset;
+ vm_page_t p, pp;
+ ipc_port_t old_name_port;
+
+ if (!vm_object_collapse_allowed)
+ return;
+
+ while (TRUE) {
+ /*
+ * Verify that the conditions are right for collapse:
+ *
+ * The object exists and no pages in it are currently
+ * being paged out (or have ever been paged out).
+ *
+ * This check is probably overkill -- if a memory
+ * object has not been created, the fault handler
+ * shouldn't release the object lock while paging
+ * is in progress or absent pages exist.
+ */
+ if (object == VM_OBJECT_NULL ||
+ object->pager_created ||
+ object->paging_in_progress != 0 ||
+ object->absent_count != 0)
+ return;
+
+ /*
+ * There is a backing object, and
+ */
+
+ if ((backing_object = object->shadow) == VM_OBJECT_NULL)
+ return;
+
+ vm_object_lock(backing_object);
+ /*
+ * ...
+ * The backing object is not read_only,
+ * and no pages in the backing object are
+ * currently being paged out.
+ * The backing object is internal.
+ *
+ * XXX It may be sufficient for the backing
+ * XXX object to be temporary.
+ */
+
+ if (!backing_object->internal ||
+ backing_object->paging_in_progress != 0) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * The backing object can't be a copy-object:
+ * the shadow_offset for the copy-object must stay
+ * as 0. Furthermore (for the 'we have all the
+ * pages' case), if we bypass backing_object and
+ * just shadow the next object in the chain, old
+ * pages from that object would then have to be copied
+ * BOTH into the (former) backing_object and into the
+ * parent object.
+ */
+ if (backing_object->shadow != VM_OBJECT_NULL &&
+ backing_object->shadow->copy != VM_OBJECT_NULL) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * We know that we can either collapse the backing
+ * object (if the parent is the only reference to
+ * it) or (perhaps) remove the parent's reference
+ * to it.
+ */
+
+ backing_offset = object->shadow_offset;
+ size = object->size;
+
+ /*
+ * If there is exactly one reference to the backing
+ * object, we can collapse it into the parent.
+ */
+
+ if (backing_object->ref_count == 1) {
+ if (!vm_object_cache_lock_try()) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * We can collapse the backing object.
+ *
+ * Move all in-memory pages from backing_object
+ * to the parent. Pages that have been paged out
+ * will be overwritten by any of the parent's
+ * pages that shadow them.
+ */
+
+ while (!queue_empty(&backing_object->memq)) {
+
+ p = (vm_page_t)
+ queue_first(&backing_object->memq);
+
+ new_offset = (p->offset - backing_offset);
+
+ assert(!p->busy || p->absent);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * dispose of it.
+ *
+ * Otherwise, move it as planned.
+ */
+
+ if (p->offset < backing_offset ||
+ new_offset >= size) {
+ VM_PAGE_FREE(p);
+ } else {
+ pp = vm_page_lookup(object, new_offset);
+ if (pp != VM_PAGE_NULL && !pp->absent) {
+ /*
+ * Parent object has a real page.
+ * Throw away the backing object's
+ * page.
+ */
+ VM_PAGE_FREE(p);
+ }
+ else {
+ assert(pp == VM_PAGE_NULL || !
+ "vm_object_collapse: bad case");
+
+ /*
+ * Parent now has no page.
+ * Move the backing object's page up.
+ */
+ vm_page_rename(p, object, new_offset);
+ }
+ }
+ }
+
+ /*
+ * Move the pager from backing_object to object.
+ *
+ * XXX We're only using part of the paging space
+ * for keeps now... we ought to discard the
+ * unused portion.
+ */
+
+ switch (vm_object_collapse_debug) {
+ case 0:
+ break;
+ case 1:
+ if ((backing_object->pager == IP_NULL) &&
+ (backing_object->pager_request ==
+ PAGER_REQUEST_NULL))
+ break;
+ /* Fall through to... */
+
+ default:
+ printf("vm_object_collapse: %p (pager %p, request %p) up to %p\n",
+ backing_object, backing_object->pager, backing_object->pager_request,
+ object);
+ if (vm_object_collapse_debug > 2)
+ SoftDebugger("vm_object_collapse");
+ }
+
+ object->pager = backing_object->pager;
+ if (object->pager != IP_NULL)
+ ipc_kobject_set(object->pager,
+ (ipc_kobject_t) object,
+ IKOT_PAGER);
+ object->pager_initialized = backing_object->pager_initialized;
+ object->pager_ready = backing_object->pager_ready;
+ object->pager_created = backing_object->pager_created;
+
+ object->pager_request = backing_object->pager_request;
+ if (object->pager_request != IP_NULL)
+ ipc_kobject_set(object->pager_request,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_REQUEST);
+ old_name_port = object->pager_name;
+ if (old_name_port != IP_NULL)
+ ipc_kobject_set(old_name_port,
+ IKO_NULL, IKOT_NONE);
+ object->pager_name = backing_object->pager_name;
+ if (object->pager_name != IP_NULL)
+ ipc_kobject_set(object->pager_name,
+ (ipc_kobject_t) object,
+ IKOT_PAGING_NAME);
+
+ vm_object_cache_unlock();
+
+ /*
+ * If there is no pager, leave paging-offset alone.
+ */
+ if (object->pager != IP_NULL)
+ object->paging_offset =
+ backing_object->paging_offset +
+ backing_offset;
+
+#if MACH_PAGEMAP
+ assert(object->existence_info == VM_EXTERNAL_NULL);
+ object->existence_info = backing_object->existence_info;
+#endif /* MACH_PAGEMAP */
+
+ /*
+ * Object now shadows whatever backing_object did.
+ * Note that the reference to backing_object->shadow
+ * moves from within backing_object to within object.
+ */
+
+ object->shadow = backing_object->shadow;
+ object->shadow_offset += backing_object->shadow_offset;
+ if (object->shadow != VM_OBJECT_NULL &&
+ object->shadow->copy != VM_OBJECT_NULL) {
+ panic("vm_object_collapse: we collapsed a copy-object!");
+ }
+ /*
+ * Discard backing_object.
+ *
+ * Since the backing object has no pages, no
+ * pager left, and no object references within it,
+ * all that is necessary is to dispose of it.
+ */
+
+ assert(
+ (backing_object->ref_count == 1) &&
+ (backing_object->resident_page_count == 0) &&
+ (backing_object->paging_in_progress == 0)
+ );
+
+ assert(backing_object->alive);
+ assert(!backing_object->cached);
+ backing_object->alive = FALSE;
+ vm_object_unlock(backing_object);
+
+ vm_object_unlock(object);
+ if (old_name_port != IP_NULL)
+ ipc_port_dealloc_kernel(old_name_port);
+ kmem_cache_free(&vm_object_cache, (vm_offset_t) backing_object);
+ vm_object_lock(object);
+
+ object_collapses++;
+ }
+ else {
+ if (!vm_object_collapse_bypass_allowed) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * If all of the pages in the backing object are
+ * shadowed by the parent object, the parent
+ * object no longer has to shadow the backing
+ * object; it can shadow the next one in the
+ * chain.
+ *
+ * The backing object must not be paged out - we'd
+ * have to check all of the paged-out pages, as
+ * well.
+ */
+
+ if (backing_object->pager_created) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * Should have a check for a 'small' number
+ * of pages here.
+ */
+
+ queue_iterate(&backing_object->memq, p,
+ vm_page_t, listq)
+ {
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * keep going.
+ *
+ * Otherwise, the backing_object must be
+ * left in the chain.
+ */
+
+ if (p->offset >= backing_offset &&
+ new_offset <= size &&
+ (pp = vm_page_lookup(object, new_offset))
+ == VM_PAGE_NULL) {
+ /*
+ * Page still needed.
+ * Can't go any further.
+ */
+ vm_object_unlock(backing_object);
+ return;
+ }
+ }
+
+ /*
+ * Make the parent shadow the next object
+ * in the chain. Deallocating backing_object
+ * will not remove it, since its reference
+ * count is at least 2.
+ */
+
+ vm_object_reference(object->shadow = backing_object->shadow);
+ object->shadow_offset += backing_object->shadow_offset;
+
+ /*
+ * Backing object might have had a copy pointer
+ * to us. If it did, clear it.
+ */
+ if (backing_object->copy == object)
+ backing_object->copy = VM_OBJECT_NULL;
+
+ /*
+ * Drop the reference count on backing_object.
+ * Since its ref_count was at least 2, it
+ * will not vanish; so we don't need to call
+ * vm_object_deallocate.
+ */
+ backing_object->ref_count--;
+ assert(backing_object->ref_count > 0);
+ vm_object_unlock(backing_object);
+
+ object_bypasses ++;
+
+ }
+
+ /*
+ * Try again with this object's new backing object.
+ */
+ }
+}
+
+/*
+ * Routine: vm_object_page_remove: [internal]
+ * Purpose:
+ * Removes all physical pages in the specified
+ * object range from the object's list of pages.
+ *
+ * In/out conditions:
+ * The object must be locked.
+ */
+unsigned int vm_object_page_remove_lookup = 0;
+unsigned int vm_object_page_remove_iterate = 0;
+
+void vm_object_page_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end)
+{
+ vm_page_t p, next;
+
+ /*
+ * One and two page removals are most popular.
+ * The factor of 16 here is somewhat arbitrary.
+ * It balances vm_object_lookup vs iteration.
+ */
+
+ if (atop(end - start) < object->resident_page_count/16) {
+ vm_object_page_remove_lookup++;
+
+ for (; start < end; start += PAGE_SIZE) {
+ p = vm_page_lookup(object, start);
+ if (p != VM_PAGE_NULL) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ VM_PROT_NONE);
+ VM_PAGE_FREE(p);
+ }
+ }
+ } else {
+ vm_object_page_remove_iterate++;
+
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ next = (vm_page_t) queue_next(&p->listq);
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (!p->fictitious)
+ pmap_page_protect(p->phys_addr,
+ VM_PROT_NONE);
+ VM_PAGE_FREE(p);
+ }
+ p = next;
+ }
+ }
+}
+
+/*
+ * Routine: vm_object_coalesce
+ * Purpose:
+ * Tries to coalesce two objects backing up adjoining
+ * regions of memory into a single object.
+ *
+ * NOTE: Only works at the moment if one of the objects
+ * is NULL or if the objects are the same - otherwise,
+ * which object do we lock first?
+ * Returns:
+ * TRUE if objects have been coalesced.
+ * FALSE the objects could not be coalesced.
+ * Parameters:
+ * prev_object First object to coalesce
+ * prev_offset Offset into prev_object
+ * next_object Second object into coalesce
+ * next_offset Offset into next_object
+ *
+ * prev_size Size of reference to prev_object
+ * next_size Size of reference to next_object
+ *
+ * new_object Resulting colesced object
+ * new_offset Offset into the resulting object
+ * Conditions:
+ * The objects must *not* be locked.
+ *
+ * If the objects are coalesced successfully, the caller's
+ * references for both objects are consumed, and the caller
+ * gains a reference for the new object.
+ */
+
+boolean_t vm_object_coalesce(
+ vm_object_t prev_object,
+ vm_object_t next_object,
+ vm_offset_t prev_offset,
+ vm_offset_t next_offset,
+ vm_size_t prev_size,
+ vm_size_t next_size,
+ vm_object_t *new_object, /* OUT */
+ vm_offset_t *new_offset) /* OUT */
+{
+ vm_object_t object;
+ vm_size_t newsize;
+
+ if (prev_object == next_object) {
+ /*
+ * If neither object actually exists,
+ * the offsets don't matter.
+ */
+ if (prev_object == VM_OBJECT_NULL) {
+ *new_object = VM_OBJECT_NULL;
+ *new_offset = 0;
+ return TRUE;
+ }
+
+ if (prev_offset + prev_size == next_offset) {
+ *new_object = prev_object;
+ *new_offset = prev_offset;
+ /*
+ * Deallocate one of the two references.
+ */
+ vm_object_deallocate(prev_object);
+ return TRUE;
+ }
+
+ return FALSE;
+ }
+
+ if (next_object != VM_OBJECT_NULL) {
+ /*
+ * Don't know how to merge two different
+ * objects yet.
+ */
+ if (prev_object != VM_OBJECT_NULL)
+ return FALSE;
+
+ object = next_object;
+ } else {
+ object = prev_object;
+ }
+
+ vm_object_lock(object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(object);
+
+ /*
+ * Can't coalesce if pages not mapped to
+ * the object may be in use anyway:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * . paging references (pages might be in page-list)
+ */
+
+ if ((object->ref_count > 1) ||
+ object->pager_created ||
+ object->used_for_pageout ||
+ (object->shadow != VM_OBJECT_NULL) ||
+ (object->copy != VM_OBJECT_NULL) ||
+ (object->paging_in_progress != 0)) {
+ vm_object_unlock(object);
+ return FALSE;
+ }
+
+ if (object == prev_object) {
+ /*
+ * Remove any pages that may still be in
+ * the object from a previous deallocation.
+ */
+ vm_object_page_remove(object,
+ prev_offset + prev_size,
+ prev_offset + prev_size + next_size);
+ /*
+ * Extend the object if necessary.
+ */
+ newsize = prev_offset + prev_size + next_size;
+ if (newsize > object->size)
+ object->size = newsize;
+
+ *new_offset = prev_offset;
+ } else {
+ /*
+ * Check if we have enough space in the object
+ * offset space to insert the new mapping before
+ * the existing one.
+ */
+ if (next_offset < prev_size) {
+ vm_object_unlock(object);
+ return FALSE;
+ }
+ /*
+ * Remove any pages that may still be in
+ * the object from a previous deallocation.
+ */
+ vm_object_page_remove(object,
+ next_offset - prev_size,
+ next_offset);
+
+ *new_offset = next_offset - prev_size;
+ }
+
+ vm_object_unlock(object);
+ *new_object = object;
+ return TRUE;
+}
+
+vm_object_t vm_object_request_object(
+ ipc_port_t p)
+{
+ return vm_object_lookup(p);
+}
+
+/*
+ * Routine: vm_object_name
+ * Purpose:
+ * Returns a naked send right to the "name" port associated
+ * with this object.
+ */
+ipc_port_t vm_object_name(
+ vm_object_t object)
+{
+ ipc_port_t p;
+
+ if (object == VM_OBJECT_NULL)
+ return IP_NULL;
+
+ vm_object_lock(object);
+
+ while (object->shadow != VM_OBJECT_NULL) {
+ vm_object_t new_object = object->shadow;
+ vm_object_lock(new_object);
+ vm_object_unlock(object);
+ object = new_object;
+ }
+
+ p = object->pager_name;
+ if (p != IP_NULL)
+ p = ipc_port_make_send(p);
+ vm_object_unlock(object);
+
+ return p;
+}
+
+/*
+ * Attach a set of physical pages to an object, so that they can
+ * be mapped by mapping the object. Typically used to map IO memory.
+ *
+ * The mapping function and its private data are used to obtain the
+ * physical addresses for each page to be mapped.
+ */
+kern_return_t
+vm_object_page_map(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ phys_addr_t (*map_fn)(void *, vm_offset_t),
+ void * map_fn_data) /* private to map_fn */
+{
+ int num_pages;
+ int i;
+ vm_page_t m;
+ vm_page_t old_page;
+ phys_addr_t addr;
+
+ num_pages = atop(size);
+
+ for (i = 0; i < num_pages; i++, offset += PAGE_SIZE) {
+
+ addr = (*map_fn)(map_fn_data, offset);
+ if (addr == vm_page_fictitious_addr)
+ return KERN_NO_ACCESS;
+
+ while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
+ vm_page_more_fictitious();
+
+ vm_object_lock(object);
+ if ((old_page = vm_page_lookup(object, offset))
+ != VM_PAGE_NULL)
+ {
+ VM_PAGE_FREE(old_page);
+ }
+
+ vm_page_init(m);
+ m->phys_addr = addr;
+ m->private = TRUE; /* don`t free page */
+ m->wire_count = 1;
+ vm_page_lock_queues();
+ vm_page_insert(m, object, offset);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+ vm_object_unlock(object);
+ }
+ return KERN_SUCCESS;
+}
+
+
+#if MACH_KDB
+#include <vm/vm_print.h>
+#define printf kdbprintf
+
+boolean_t vm_object_print_pages = FALSE;
+
+/*
+ * vm_object_print: [ debug ]
+ */
+void vm_object_print(
+ vm_object_t object)
+{
+ vm_page_t p;
+
+ int count;
+
+ if (object == VM_OBJECT_NULL)
+ return;
+
+ iprintf("Object 0x%X: size=0x%X, %d references",
+ (vm_offset_t) object, (vm_offset_t) object->size,
+ object->ref_count);
+ printf("\n");
+ iprintf("%lu resident pages,", object->resident_page_count);
+ printf(" %d absent pages,", object->absent_count);
+ printf(" %d paging ops\n", object->paging_in_progress);
+ indent += 1;
+ iprintf("memory object=0x%X (offset=0x%X),",
+ (vm_offset_t) object->pager, (vm_offset_t) object->paging_offset);
+ printf("control=0x%X, name=0x%X\n",
+ (vm_offset_t) object->pager_request, (vm_offset_t) object->pager_name);
+ iprintf("%s%s",
+ object->pager_ready ? " ready" : "",
+ object->pager_created ? " created" : "");
+ printf("%s,%s ",
+ object->pager_initialized ? "" : "uninitialized",
+ object->temporary ? "temporary" : "permanent");
+ printf("%s%s,",
+ object->internal ? "internal" : "external",
+ object->can_persist ? " cacheable" : "");
+ printf("copy_strategy=%d\n", (vm_offset_t)object->copy_strategy);
+ iprintf("shadow=0x%X (offset=0x%X),",
+ (vm_offset_t) object->shadow, (vm_offset_t) object->shadow_offset);
+ printf("copy=0x%X\n", (vm_offset_t) object->copy);
+
+ indent += 1;
+
+ if (vm_object_print_pages) {
+ count = 0;
+ p = (vm_page_t) queue_first(&object->memq);
+ while (!queue_end(&object->memq, (queue_entry_t) p)) {
+ if (count == 0) iprintf("memory:=");
+ else if (count == 4) {printf("\n"); iprintf(" ..."); count = 0;}
+ else printf(",");
+ count++;
+
+ printf("(off=0x%X,page=0x%X)", p->offset, (vm_offset_t) p);
+ p = (vm_page_t) queue_next(&p->listq);
+ }
+ if (count != 0)
+ printf("\n");
+ }
+ indent -= 2;
+}
+
+#endif /* MACH_KDB */
diff --git a/vm/vm_object.h b/vm/vm_object.h
new file mode 100644
index 0000000..9c17541
--- /dev/null
+++ b/vm/vm_object.h
@@ -0,0 +1,415 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm_object.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Virtual memory object module definitions.
+ */
+
+#ifndef _VM_VM_OBJECT_H_
+#define _VM_VM_OBJECT_H_
+
+#include <sys/types.h>
+#include <mach/kern_return.h>
+#include <mach/boolean.h>
+#include <mach/memory_object.h>
+#include <mach/port.h>
+#include <mach/vm_prot.h>
+#include <mach/machine/vm_types.h>
+#include <kern/queue.h>
+#include <kern/lock.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/macros.h>
+#include <vm/pmap.h>
+#include <ipc/ipc_types.h>
+
+#if MACH_PAGEMAP
+#include <vm/vm_external.h>
+#endif /* MACH_PAGEMAP */
+
+typedef struct ipc_port * pager_request_t;
+#define PAGER_REQUEST_NULL ((pager_request_t) 0)
+
+/*
+ * We use "struct ipc_port *" instead of "ipc_port_t"
+ * to avoid include file circularities.
+ */
+
+struct vm_object {
+ queue_head_t memq; /* Resident memory */
+ decl_simple_lock_data(, Lock) /* Synchronization */
+#if VM_OBJECT_DEBUG
+ thread_t LockHolder; /* Thread holding Lock */
+#endif /* VM_OBJECT_DEBUG */
+ vm_size_t size; /* Object size (only valid
+ * if internal)
+ */
+
+ int ref_count; /* Number of references */
+ unsigned long resident_page_count;
+ /* number of resident pages */
+
+ struct vm_object *copy; /* Object that should receive
+ * a copy of my changed pages
+ */
+ struct vm_object *shadow; /* My shadow */
+ vm_offset_t shadow_offset; /* Offset into shadow */
+
+ struct ipc_port *pager; /* Where to get data */
+ vm_offset_t paging_offset; /* Offset into memory object */
+ pager_request_t pager_request; /* Where data comes back */
+ struct ipc_port *pager_name; /* How to identify region */
+
+ memory_object_copy_strategy_t
+ copy_strategy; /* How to handle data copy */
+
+ unsigned int
+ absent_count; /* The number of pages that
+ * have been requested but
+ * not filled. That is, the
+ * number of pages for which
+ * the "absent" attribute is
+ * asserted.
+ */
+
+ unsigned int /* boolean_t array */
+ all_wanted; /* Bit array of "want to be
+ * awakened" notations. See
+ * VM_OBJECT_EVENT_* items
+ * below
+ */
+
+ unsigned int
+ paging_in_progress:16,
+ /* The memory object ports are
+ * being used (e.g., for pagein
+ * or pageout) -- don't change any
+ * of these fields (i.e., don't
+ * collapse, destroy or terminate)
+ */
+ /* boolean_t */ used_for_pageout:1,/* The object carries data sent to
+ * a memory manager, which signals
+ * it's done by releasing memory.
+ * This flag prevents coalescing so
+ * that unmapping memory immediately
+ * results in object termination.
+ */
+ /* boolean_t */ pager_created:1,/* Has pager ever been created? */
+ /* boolean_t */ pager_initialized:1,/* Are fields ready to use? */
+ /* boolean_t */ pager_ready:1, /* Will manager take requests? */
+
+ /* boolean_t */ can_persist:1, /* The kernel may keep the data
+ * for this object (and rights to
+ * the memory object) after all
+ * address map references are
+ * deallocated?
+ */
+ /* boolean_t */ internal:1, /* Created by the kernel (and
+ * therefore, managed by the
+ * default memory manger)
+ */
+ /* boolean_t */ temporary:1, /* Permanent objects may be changed
+ * externally by the memory manager,
+ * and changes made in memory must
+ * be reflected back to the memory
+ * manager. Temporary objects lack
+ * both of these characteristics.
+ */
+ /* boolean_t */ alive:1, /* Not yet terminated (debug) */
+ /* boolean_t */ lock_in_progress : 1,
+ /* Is a multi-page lock
+ * request in progress?
+ */
+ /* boolean_t */ lock_restart : 1,
+ /* Should lock request in
+ * progress restart search?
+ */
+ /* boolean_t */ use_shared_copy : 1,/* Use shared (i.e.,
+ * delayed) copy on write */
+ /* boolean_t */ shadowed: 1, /* Shadow may exist */
+
+ /* boolean_t */ cached: 1; /* Object is cached */
+ queue_chain_t cached_list; /* Attachment point for the list
+ * of objects cached as a result
+ * of their can_persist value
+ */
+ vm_offset_t last_alloc; /* last allocation offset */
+#if MACH_PAGEMAP
+ vm_external_t existence_info;
+#endif /* MACH_PAGEMAP */
+};
+
+extern
+vm_object_t kernel_object; /* the single kernel object */
+
+/*
+ * Declare procedures that operate on VM objects.
+ */
+
+extern void vm_object_bootstrap(void);
+extern void vm_object_init(void);
+extern void vm_object_collect(vm_object_t);
+extern void vm_object_terminate(vm_object_t);
+extern vm_object_t vm_object_allocate(vm_size_t);
+extern void vm_object_reference(vm_object_t);
+extern void vm_object_deallocate(vm_object_t);
+extern void vm_object_pmap_protect(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ pmap_t pmap,
+ vm_offset_t pmap_start,
+ vm_prot_t prot);
+extern void vm_object_pmap_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end);
+extern void vm_object_page_remove(
+ vm_object_t object,
+ vm_offset_t start,
+ vm_offset_t end);
+extern void vm_object_shadow(
+ vm_object_t *object, /* in/out */
+ vm_offset_t *offset, /* in/out */
+ vm_size_t length);
+extern void vm_object_collapse(vm_object_t);
+extern vm_object_t vm_object_lookup(struct ipc_port *);
+extern vm_object_t vm_object_lookup_name(struct ipc_port *);
+extern struct ipc_port *vm_object_name(vm_object_t);
+extern void vm_object_remove(vm_object_t);
+
+extern boolean_t vm_object_copy_temporary(
+ vm_object_t *_object, /* in/out */
+ vm_offset_t *_offset, /* in/out */
+ boolean_t *_src_needs_copy, /* out */
+ boolean_t *_dst_needs_copy); /* out */
+extern kern_return_t vm_object_copy_strategically(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ vm_object_t *dst_object, /* out */
+ vm_offset_t *dst_offset, /* out */
+ boolean_t *dst_needs_copy); /* out */
+extern kern_return_t vm_object_copy_slowly(
+ vm_object_t src_object,
+ vm_offset_t src_offset,
+ vm_size_t size,
+ boolean_t interruptible,
+ vm_object_t *_result_object); /* out */
+
+extern vm_object_t vm_object_enter(
+ struct ipc_port *pager,
+ vm_size_t size,
+ boolean_t internal);
+extern void vm_object_pager_create(
+ vm_object_t object);
+extern void vm_object_destroy(
+ struct ipc_port *pager);
+
+extern kern_return_t vm_object_page_map(
+ vm_object_t,
+ vm_offset_t,
+ vm_size_t,
+ phys_addr_t (*)(void *, vm_offset_t),
+ void *);
+
+extern vm_object_t vm_object_request_object(struct ipc_port *);
+
+extern boolean_t vm_object_coalesce(
+ vm_object_t prev_object,
+ vm_object_t next_object,
+ vm_offset_t prev_offset,
+ vm_offset_t next_offset,
+ vm_size_t prev_size,
+ vm_size_t next_size,
+ vm_object_t *new_object, /* OUT */
+ vm_offset_t *new_offset); /* OUT */
+
+extern void vm_object_pager_wakeup(ipc_port_t pager);
+
+void memory_object_release(
+ ipc_port_t pager,
+ pager_request_t pager_request,
+ ipc_port_t pager_name);
+
+void vm_object_deactivate_pages(vm_object_t);
+
+vm_object_t vm_object_copy_delayed(
+ vm_object_t src_object);
+
+/*
+ * Event waiting handling
+ */
+
+#define VM_OBJECT_EVENT_INITIALIZED 0
+#define VM_OBJECT_EVENT_PAGER_READY 1
+#define VM_OBJECT_EVENT_PAGING_IN_PROGRESS 2
+#define VM_OBJECT_EVENT_ABSENT_COUNT 3
+#define VM_OBJECT_EVENT_LOCK_IN_PROGRESS 4
+
+#define vm_object_wait(object, event, interruptible) \
+ MACRO_BEGIN \
+ (object)->all_wanted |= 1 << (event); \
+ vm_object_sleep(((vm_offset_t) object) + (event), \
+ (object), \
+ (interruptible)); \
+ MACRO_END
+
+#define vm_object_assert_wait(object, event, interruptible) \
+ MACRO_BEGIN \
+ (object)->all_wanted |= 1 << (event); \
+ assert_wait((event_t)(((vm_offset_t) object) + (event)), (interruptible)); \
+ MACRO_END
+
+#define vm_object_wakeup(object, event) \
+ MACRO_BEGIN \
+ if ((object)->all_wanted & (1 << (event))) \
+ thread_wakeup((event_t)(((vm_offset_t) object) + (event))); \
+ (object)->all_wanted &= ~(1 << (event)); \
+ MACRO_END
+
+/*
+ * Routines implemented as macros
+ */
+
+#define vm_object_collectable(object) \
+ (((object)->ref_count == 0) \
+ && ((object)->resident_page_count == 0))
+
+#define vm_object_paging_begin(object) \
+ ((object)->paging_in_progress++)
+
+#define vm_object_paging_end(object) \
+ MACRO_BEGIN \
+ assert((object)->paging_in_progress != 0); \
+ if (--(object)->paging_in_progress == 0) { \
+ vm_object_wakeup(object, \
+ VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \
+ } \
+ MACRO_END
+
+#define vm_object_paging_wait(object, interruptible) \
+ MACRO_BEGIN \
+ while ((object)->paging_in_progress != 0) { \
+ vm_object_wait( (object), \
+ VM_OBJECT_EVENT_PAGING_IN_PROGRESS, \
+ (interruptible)); \
+ vm_object_lock(object); \
+ \
+ /*XXX if ((interruptible) && */ \
+ /*XXX (current_thread()->wait_result != THREAD_AWAKENED))*/ \
+ /*XXX break; */ \
+ } \
+ MACRO_END
+
+#define vm_object_absent_assert_wait(object, interruptible) \
+ MACRO_BEGIN \
+ vm_object_assert_wait( (object), \
+ VM_OBJECT_EVENT_ABSENT_COUNT, \
+ (interruptible)); \
+ MACRO_END
+
+
+#define vm_object_absent_release(object) \
+ MACRO_BEGIN \
+ (object)->absent_count--; \
+ vm_object_wakeup((object), \
+ VM_OBJECT_EVENT_ABSENT_COUNT); \
+ MACRO_END
+
+/*
+ * Object locking macros (with and without debugging)
+ */
+
+#if VM_OBJECT_DEBUG
+#define vm_object_lock_init(object) \
+MACRO_BEGIN \
+ simple_lock_init(&(object)->Lock); \
+ (object)->LockHolder = 0; \
+MACRO_END
+#define vm_object_lock(object) \
+MACRO_BEGIN \
+ simple_lock(&(object)->Lock); \
+ (object)->LockHolder = current_thread(); \
+MACRO_END
+#define vm_object_unlock(object) \
+MACRO_BEGIN \
+ if ((object)->LockHolder != current_thread()) \
+ panic("vm_object_unlock 0x%x", (object)); \
+ (object)->LockHolder = 0; \
+ simple_unlock(&(object)->Lock); \
+MACRO_END
+#define vm_object_lock_try(object) \
+ (simple_lock_try(&(object)->Lock) \
+ ? ( ((object)->LockHolder = current_thread()) , TRUE) \
+ : FALSE)
+#define vm_object_sleep(event, object, interruptible) \
+MACRO_BEGIN \
+ if ((object)->LockHolder != current_thread()) \
+ panic("vm_object_sleep %#x", (object)); \
+ (object)->LockHolder = 0; \
+ thread_sleep((event_t)(event), simple_lock_addr((object)->Lock), \
+ (interruptible)); \
+MACRO_END
+#define vm_object_lock_taken(object) \
+ ((object)->LockHolder == current_thread())
+#else /* VM_OBJECT_DEBUG */
+#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock)
+#define vm_object_lock(object) simple_lock(&(object)->Lock)
+#define vm_object_unlock(object) simple_unlock(&(object)->Lock)
+#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock)
+#define vm_object_sleep(event, object, interruptible) \
+ thread_sleep((event_t)(event), simple_lock_addr((object)->Lock), \
+ (interruptible))
+#define vm_object_lock_taken(object) simple_lock_taken(&(object)->Lock)
+#endif /* VM_OBJECT_DEBUG */
+
+/*
+ * Page cache accounting.
+ *
+ * The page queues must be locked when changing these counters.
+ */
+extern int vm_object_external_count;
+extern int vm_object_external_pages;
+
+/* Add a reference to a locked VM object. */
+static inline int
+vm_object_reference_locked (vm_object_t obj)
+{
+ return (++obj->ref_count);
+}
+
+/* Remove a reference from a locked VM object. */
+static inline int
+vm_object_unreference_locked (vm_object_t obj)
+{
+ return (--obj->ref_count);
+}
+
+#endif /* _VM_VM_OBJECT_H_ */
diff --git a/vm/vm_page.c b/vm/vm_page.c
new file mode 100644
index 0000000..04decbb
--- /dev/null
+++ b/vm/vm_page.c
@@ -0,0 +1,2164 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-CPU pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
+ */
+
+#include <string.h>
+#include <kern/assert.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/debug.h>
+#include <kern/list.h>
+#include <kern/lock.h>
+#include <kern/macros.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <mach/vm_param.h>
+#include <machine/pmap.h>
+#include <sys/types.h>
+#include <vm/memory_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#define DEBUG 0
+
+#define __init
+#define __initdata
+#define __read_mostly
+
+#define thread_pin()
+#define thread_unpin()
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PAGE_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PAGE_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PAGE_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_page_cpu_pool {
+ simple_lock_data_t lock;
+ int size;
+ int transfer_size;
+ int nr_pages;
+ struct list pages;
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Special order value for pages that aren't in a free list. Such pages are
+ * either allocated, or part of a free block of pages but not the head page.
+ */
+#define VM_PAGE_ORDER_UNLISTED (VM_PAGE_NR_FREE_LISTS + 1)
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_page_free_list {
+ unsigned long size;
+ struct list blocks;
+};
+
+/*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+ struct list internal_pages;
+ struct list external_pages;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PAGE_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
+ */
+struct vm_page_seg {
+ struct vm_page_cpu_pool cpu_pools[NCPUS];
+
+ phys_addr_t start;
+ phys_addr_t end;
+ struct vm_page *pages;
+ struct vm_page *pages_end;
+ simple_lock_data_t lock;
+ struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
+ unsigned long nr_free_pages;
+
+ /* Free memory thresholds */
+ unsigned long min_free_pages; /* Privileged allocations only */
+ unsigned long low_free_pages; /* Pageout daemon starts scanning */
+ unsigned long high_free_pages; /* Pageout daemon stops scanning,
+ unprivileged allocations resume */
+
+ /* Page cache related data */
+ struct vm_page_queue active_pages;
+ unsigned long nr_active_pages;
+ unsigned long high_active_pages;
+ struct vm_page_queue inactive_pages;
+ unsigned long nr_inactive_pages;
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_page_boot_seg {
+ phys_addr_t start;
+ phys_addr_t end;
+ boolean_t heap_present;
+ phys_addr_t avail_start;
+ phys_addr_t avail_end;
+};
+
+static int vm_page_is_ready __read_mostly;
+
+/*
+ * Segment table.
+ *
+ * The system supports a maximum of 4 segments :
+ * - DMA: suitable for DMA
+ * - DMA32: suitable for DMA when devices support 32-bits addressing
+ * - DIRECTMAP: direct physical mapping, allows direct access from
+ * the kernel with a simple offset translation
+ * - HIGHMEM: must be mapped before it can be accessed
+ *
+ * Segments are ordered by priority, 0 being the lowest priority. Their
+ * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM or
+ * DMA < DIRECTMAP < DMA32 < HIGHMEM.
+ * Some segments may actually be aliases for others, e.g. if DMA is always
+ * possible from the direct physical mapping, DMA and DMA32 are aliases for
+ * DIRECTMAP, in which case the segment table contains DIRECTMAP and HIGHMEM
+ * only.
+ */
+static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_page_segs_size __read_mostly;
+
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
+static void __init
+vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
+{
+ memset(page, 0, sizeof(*page));
+ vm_page_init(page); /* vm_resident members */
+ page->type = VM_PT_RESERVED;
+ page->seg_index = seg_index;
+ page->order = VM_PAGE_ORDER_UNLISTED;
+ page->priv = NULL;
+ page->phys_addr = pa;
+}
+
+void
+vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++)
+ page[i].type = type;
+}
+
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+ return (page->object != NULL)
+ && (page->wire_count == 0)
+ && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+ /*
+ * This function is called on pages pulled from the page queues,
+ * implying they're pageable, which is why the wire count isn't
+ * checked here.
+ */
+
+ return !page->busy
+ && !page->wanted
+ && !page->absent
+ && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+ page->busy = TRUE;
+ pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+ if (!page->dirty) {
+ page->dirty = pmap_is_modified(page->phys_addr);
+ }
+}
+
+static void __init
+vm_page_free_list_init(struct vm_page_free_list *free_list)
+{
+ free_list->size = 0;
+ list_init(&free_list->blocks);
+}
+
+static inline void
+vm_page_free_list_insert(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size++;
+ list_insert_head(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_page_free_list_remove(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order != VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size--;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
+{
+ struct vm_page_free_list *free_list = free_list;
+ struct vm_page *page, *buddy;
+ unsigned int i;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (vm_page_alloc_paused && current_thread()
+ && !current_thread()->vm_privilege) {
+ return NULL;
+ } else if (seg->nr_free_pages <= seg->low_free_pages) {
+ vm_pageout_start();
+
+ if ((seg->nr_free_pages <= seg->min_free_pages)
+ && current_thread() && !current_thread()->vm_privilege) {
+ vm_page_alloc_paused = TRUE;
+ return NULL;
+ }
+ }
+
+ for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
+ free_list = &seg->free_lists[i];
+
+ if (free_list->size != 0)
+ break;
+ }
+
+ if (i == VM_PAGE_NR_FREE_LISTS)
+ return NULL;
+
+ page = list_first_entry(&free_list->blocks, struct vm_page, node);
+ vm_page_free_list_remove(free_list, page);
+ page->order = VM_PAGE_ORDER_UNLISTED;
+
+ while (i > order) {
+ i--;
+ buddy = &page[1 << i];
+ vm_page_free_list_insert(&seg->free_lists[i], buddy);
+ buddy->order = i;
+ }
+
+ seg->nr_free_pages -= (1 << order);
+
+ if (seg->nr_free_pages < seg->min_free_pages) {
+ vm_page_alloc_paused = TRUE;
+ }
+
+ return page;
+}
+
+static void
+vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page *buddy;
+ phys_addr_t pa, buddy_pa;
+ unsigned int nr_pages;
+
+ assert(page >= seg->pages);
+ assert(page < seg->pages_end);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ nr_pages = (1 << order);
+ pa = page->phys_addr;
+
+ while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
+ buddy_pa = pa ^ vm_page_ptoa(1ULL << order);
+
+ if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+ break;
+
+ buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+ if (buddy->order != order)
+ break;
+
+ vm_page_free_list_remove(&seg->free_lists[order], buddy);
+ buddy->order = VM_PAGE_ORDER_UNLISTED;
+ order++;
+ pa &= -vm_page_ptoa(1ULL << order);
+ page = &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ vm_page_free_list_insert(&seg->free_lists[order], page);
+ page->order = order;
+ seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size)
+{
+ simple_lock_init(&cpu_pool->lock);
+ cpu_pool->size = size;
+ cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1)
+ / VM_PAGE_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->nr_pages = 0;
+ list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_page_cpu_pool *
+vm_page_cpu_pool_get(struct vm_page_seg *seg)
+{
+ return &seg->cpu_pools[cpu_number()];
+}
+
+static inline struct vm_page *
+vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool)
+{
+ struct vm_page *page;
+
+ assert(cpu_pool->nr_pages != 0);
+ cpu_pool->nr_pages--;
+ page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+ list_remove(&page->node);
+ return page;
+}
+
+static inline void
+vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
+{
+ assert(cpu_pool->nr_pages < cpu_pool->size);
+ cpu_pool->nr_pages++;
+ list_insert_head(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == 0);
+
+ simple_lock(&seg->lock);
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ page = vm_page_seg_alloc_from_buddy(seg, 0);
+
+ if (page == NULL)
+ break;
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ }
+
+ simple_unlock(&seg->lock);
+
+ return i;
+}
+
+static void
+vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == cpu_pool->size);
+
+ simple_lock(&seg->lock);
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+ list_init(&queue->internal_pages);
+ list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+ if (page->external) {
+ list_insert_tail(&queue->external_pages, &page->node);
+ } else {
+ list_insert_tail(&queue->internal_pages, &page->node);
+ }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+ (void)queue;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+ struct vm_page *page;
+
+ if (!list_empty(&queue->external_pages)) {
+ page = list_first_entry(&queue->external_pages, struct vm_page, node);
+ return page;
+ }
+
+ if (!external_only && !list_empty(&queue->internal_pages)) {
+ page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+ return page;
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+ assert(index < vm_page_segs_size);
+ return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+ unsigned int index;
+
+ index = seg - vm_page_segs;
+ assert(index < vm_page_segs_size);
+ return index;
+}
+
+static phys_addr_t __init
+vm_page_seg_size(struct vm_page_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static int __init
+vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
+{
+ phys_addr_t size;
+
+ size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO;
+
+ if (size == 0)
+ size = 1;
+ else if (size > VM_PAGE_CPU_POOL_MAX_SIZE)
+ size = VM_PAGE_CPU_POOL_MAX_SIZE;
+
+ return size;
+}
+
+static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+ if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+ panic("vm_page: segment too small");
+ }
+
+ seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+ / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+ if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+ seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+ }
+
+ seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+ / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+ if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+ seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+ }
+
+ seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+ / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+ if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+ seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+ }
+}
+
+static void __init
+vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
+ struct vm_page *pages)
+{
+ phys_addr_t pa;
+ int pool_size;
+ unsigned int i;
+
+ seg->start = start;
+ seg->end = end;
+ pool_size = vm_page_seg_compute_pool_size(seg);
+
+ for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+ vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+ seg->pages = pages;
+ seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg));
+ simple_lock_init(&seg->lock);
+
+ for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+ vm_page_free_list_init(&seg->free_lists[i]);
+
+ seg->nr_free_pages = 0;
+
+ vm_page_seg_compute_pageout_thresholds(seg);
+
+ vm_page_queue_init(&seg->active_pages);
+ seg->nr_active_pages = 0;
+ vm_page_queue_init(&seg->inactive_pages);
+ seg->nr_inactive_pages = 0;
+
+ i = vm_page_seg_index(seg);
+
+ for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+ vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
+}
+
+static struct vm_page *
+vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order,
+ unsigned short type)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+ struct vm_page *page;
+ int filled;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == 0) {
+ filled = vm_page_cpu_pool_fill(cpu_pool, seg);
+
+ if (!filled) {
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ return NULL;
+ }
+ }
+
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ page = vm_page_seg_alloc_from_buddy(seg, order);
+ simple_unlock(&seg->lock);
+
+ if (page == NULL)
+ return NULL;
+ }
+
+ assert(page->type == VM_PT_FREE);
+ vm_page_set_type(page, order, type);
+ return page;
+}
+
+static void
+vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+
+ assert(page->type != VM_PT_FREE);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ vm_page_set_type(page, order, VM_PT_FREE);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == cpu_pool->size)
+ vm_page_cpu_pool_drain(cpu_pool, seg);
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ vm_page_seg_free_to_buddy(seg, page, order);
+ simple_unlock(&seg->lock);
+ }
+}
+
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->active = TRUE;
+ page->reference = TRUE;
+ vm_page_queue_push(&seg->active_pages, page);
+ seg->nr_active_pages++;
+ vm_page_active_count++;
+}
+
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && page->active && !page->inactive);
+ page->active = FALSE;
+ vm_page_queue_remove(&seg->active_pages, page);
+ seg->nr_active_pages--;
+ vm_page_active_count--;
+}
+
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->inactive = TRUE;
+ vm_page_queue_push(&seg->inactive_pages, page);
+ seg->nr_inactive_pages++;
+ vm_page_inactive_count++;
+}
+
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && page->inactive);
+ page->inactive = FALSE;
+ vm_page_queue_remove(&seg->inactive_pages, page);
+ seg->nr_inactive_pages--;
+ vm_page_inactive_count--;
+}
+
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->active_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_active_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_active_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_active_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_inactive_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_inactive_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+ boolean_t external_only,
+ boolean_t *was_active)
+{
+ struct vm_page *page;
+
+ page = vm_page_seg_pull_inactive_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = FALSE;
+ return page;
+ }
+
+ page = vm_page_seg_pull_active_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = TRUE;
+ return page;
+ }
+
+ return NULL;
+}
+
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->high_free_pages);
+}
+
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
+{
+ if ((seg->nr_active_pages + seg->nr_inactive_pages) == 0) {
+ /* Nothing to page out, assume segment is usable */
+ return TRUE;
+ }
+
+ return (seg->nr_free_pages >= seg->high_free_pages);
+}
+
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ assert(seg1 != seg2);
+
+ if (seg1 < seg2) {
+ simple_lock(&seg1->lock);
+ simple_lock(&seg2->lock);
+ } else {
+ simple_lock(&seg2->lock);
+ simple_lock(&seg1->lock);
+ }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ simple_unlock(&seg1->lock);
+ simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+ struct vm_page_seg *remote_seg)
+{
+ struct vm_page *src, *dest;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t was_active;
+
+ vm_page_lock_queues();
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_seg_double_lock(seg, remote_seg);
+
+ if (vm_page_seg_usable(seg)
+ || !vm_page_seg_page_available(remote_seg)) {
+ goto error;
+ }
+
+ src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+ if (src == NULL) {
+ goto error;
+ }
+
+ assert(src->object != NULL);
+ assert(!src->fictitious && !src->private);
+ assert(src->wire_count == 0);
+ assert(src->type != VM_PT_FREE);
+ assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+ dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+ assert(dest != NULL);
+
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+ src->reference = TRUE;
+ }
+
+ object = src->object;
+ offset = src->offset;
+ vm_page_remove(src);
+
+ vm_page_remove_mappings(src);
+
+ vm_page_set_type(dest, 0, src->type);
+ memcpy(&dest->vm_page_header, &src->vm_page_header,
+ VM_PAGE_BODY_SIZE);
+ vm_page_copy(src, dest);
+
+ if (!src->dirty) {
+ pmap_clear_modify(dest->phys_addr);
+ }
+
+ dest->busy = FALSE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_init(src);
+ src->free = TRUE;
+ simple_lock(&seg->lock);
+ vm_page_set_type(src, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(seg, src, 0);
+ simple_unlock(&seg->lock);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ vm_object_lock(object);
+ vm_page_insert(dest, object, offset);
+ vm_object_unlock(object);
+
+ if (was_active) {
+ vm_page_activate(dest);
+ } else {
+ vm_page_deactivate(dest);
+ }
+
+ vm_page_unlock_queues();
+
+ return TRUE;
+
+error:
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+ struct vm_page_seg *remote_seg;
+ unsigned int i;
+ boolean_t balanced;
+
+ /*
+ * It's important here that pages are moved to lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ remote_seg = vm_page_seg_get(i);
+
+ if (remote_seg == seg) {
+ continue;
+ }
+
+ balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg, boolean_t external_only,
+ boolean_t alloc_paused)
+{
+ struct vm_page *page;
+ boolean_t reclaim, double_paging;
+ vm_object_t object;
+ boolean_t was_active;
+
+ page = NULL;
+ object = NULL;
+ double_paging = FALSE;
+
+restart:
+ vm_page_lock_queues();
+ simple_lock(&seg->lock);
+
+ if (page != NULL) {
+ vm_object_lock(page->object);
+ } else {
+ page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+ if (page == NULL) {
+ goto out;
+ }
+ }
+
+ assert(page->object != NULL);
+ assert(!page->fictitious && !page->private);
+ assert(page->wire_count == 0);
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ object = page->object;
+
+ if (!was_active
+ && (page->reference || pmap_is_referenced(page->phys_addr))) {
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ vm_object_unlock(object);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ vm_page_unlock_queues();
+ page = NULL;
+ goto restart;
+ }
+
+ vm_page_remove_mappings(page);
+
+ if (!page->dirty && !page->precious) {
+ reclaim = TRUE;
+ goto out;
+ }
+
+ reclaim = FALSE;
+
+ /*
+ * If we are very low on memory, then we can't rely on an external
+ * pager to clean a dirty page, because external pagers are not
+ * vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup not to do any special
+ * processing of this page since it's immediately going to be
+ * double paged out to the default pager. The laundry bit is
+ * reset and the page is inserted into an internal object by
+ * vm_pageout_setup before the second double paging pass.
+ *
+ * There is one important special case: the default pager can
+ * back external memory objects. When receiving the first
+ * pageout request, where the page is no longer present, a
+ * fault could occur, during which the map would be locked.
+ * This fault would cause a new paging request to the default
+ * pager. Receiving that request would deadlock when trying to
+ * lock the map again. Instead, the page isn't double paged
+ * and vm_pageout_setup wires the page down, trusting the
+ * default pager as for internal pages.
+ */
+
+ assert(!page->laundry);
+ assert(!(double_paging && page->external));
+
+ if (object->internal || !alloc_paused ||
+ memory_manager_default_port(object->pager)) {
+ double_paging = FALSE;
+ } else {
+ double_paging = page->laundry = TRUE;
+ }
+
+out:
+ simple_unlock(&seg->lock);
+
+ if (object == NULL) {
+ vm_page_unlock_queues();
+ return FALSE;
+ }
+
+ if (reclaim) {
+ vm_page_free(page);
+ vm_page_unlock_queues();
+
+ if (vm_object_collectable(object)) {
+ vm_object_collect(object);
+ } else {
+ vm_object_unlock(object);
+ }
+
+ return TRUE;
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create one and hand it
+ * to the default pager. First try to collapse, so we don't create
+ * one unnecessarily.
+ */
+
+ if (!object->pager_initialized) {
+ vm_object_collapse(object);
+ }
+
+ if (!object->pager_initialized) {
+ vm_object_pager_create(object);
+ }
+
+ if (!object->pager_initialized) {
+ panic("vm_page_seg_evict");
+ }
+
+ vm_pageout_page(page, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+
+ if (double_paging) {
+ goto restart;
+ }
+
+ return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+ seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+ / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+
+ simple_lock(&seg->lock);
+
+ vm_page_seg_compute_high_active_page(seg);
+
+ while (seg->nr_active_pages > seg->high_active_pages) {
+ page = vm_page_seg_pull_active_page(seg, FALSE);
+
+ if (page == NULL) {
+ break;
+ }
+
+ page->reference = FALSE;
+ pmap_clear_reference(page->phys_addr);
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start < end);
+ assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+ seg = &vm_page_boot_segs[seg_index];
+ seg->start = start;
+ seg->end = end;
+ seg->heap_present = FALSE;
+
+#if DEBUG
+ printf("vm_page: load: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+
+ vm_page_segs_size++;
+}
+
+void
+vm_page_load_heap(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+
+ seg = &vm_page_boot_segs[seg_index];
+
+ assert(seg->start <= start);
+ assert(end <= seg-> end);
+
+ seg->avail_start = start;
+ seg->avail_end = end;
+ seg->heap_present = TRUE;
+
+#if DEBUG
+ printf("vm_page: heap: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+}
+
+int
+vm_page_ready(void)
+{
+ return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+ unsigned int seg_index;
+
+ switch (selector) {
+ case VM_PAGE_SEL_DMA:
+ seg_index = VM_PAGE_SEG_DMA;
+ break;
+ case VM_PAGE_SEL_DMA32:
+ seg_index = VM_PAGE_SEG_DMA32;
+ break;
+ case VM_PAGE_SEL_DIRECTMAP:
+ seg_index = VM_PAGE_SEG_DIRECTMAP;
+ break;
+ case VM_PAGE_SEL_HIGHMEM:
+ seg_index = VM_PAGE_SEG_HIGHMEM;
+ break;
+ default:
+ panic("vm_page: invalid selector");
+ }
+
+ return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+ return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+ unsigned int i;
+ int expect_loaded;
+
+ if (vm_page_segs_size == 0)
+ panic("vm_page: no physical memory loaded");
+
+ for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+ expect_loaded = (i < vm_page_segs_size);
+
+ if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+ continue;
+
+ panic("vm_page: invalid boot segment table");
+ }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+ return seg->avail_end - seg->avail_start;
+}
+
+phys_addr_t __init
+vm_page_bootalloc(size_t size)
+{
+ struct vm_page_boot_seg *seg;
+ phys_addr_t pa;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+ i < vm_page_segs_size;
+ i--) {
+ seg = &vm_page_boot_segs[i];
+
+ if (size <= vm_page_boot_seg_avail_size(seg)) {
+ pa = seg->avail_start;
+ seg->avail_start += vm_page_round(size);
+ return pa;
+ }
+ }
+
+ panic("vm_page: no physical memory available");
+}
+
+void __init
+vm_page_setup(void)
+{
+ struct vm_page_boot_seg *boot_seg;
+ struct vm_page_seg *seg;
+ struct vm_page *table, *page, *end;
+ size_t nr_pages, table_size;
+ unsigned long va;
+ unsigned int i;
+ phys_addr_t pa;
+
+ vm_page_check_boot_segs();
+
+ /*
+ * Compute the page table size.
+ */
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+
+ table_size = vm_page_round(nr_pages * sizeof(struct vm_page));
+ printf("vm_page: page table size: %lu entries (%luk)\n", nr_pages,
+ table_size >> 10);
+ table = (struct vm_page *)pmap_steal_memory(table_size);
+ va = (unsigned long)table;
+
+ /*
+ * Initialize the segments, associating them to the page table. When
+ * the segments are initialized, all their pages are set allocated.
+ * Pages are then released, which populates the free lists.
+ */
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ boot_seg = &vm_page_boot_segs[i];
+ vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table);
+ page = seg->pages + vm_page_atop(boot_seg->avail_start
+ - boot_seg->start);
+ end = seg->pages + vm_page_atop(boot_seg->avail_end
+ - boot_seg->start);
+
+ while (page < end) {
+ page->type = VM_PT_FREE;
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ page++;
+ }
+
+ table += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ while (va < (unsigned long)table) {
+ pa = pmap_extract(kernel_pmap, va);
+ page = vm_page_lookup_pa(pa);
+ assert((page != NULL) && (page->type == VM_PT_RESERVED));
+ page->type = VM_PT_TABLE;
+ va += PAGE_SIZE;
+ }
+
+ vm_page_is_ready = 1;
+}
+
+void __init
+vm_page_manage(struct vm_page *page)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+ assert(page->type == VM_PT_RESERVED);
+
+ vm_page_set_type(page, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_page_lookup_pa(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end))
+ return &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+ if (page->fictitious) {
+ if (page->private) {
+ panic("vm_page: page both fictitious and private");
+ }
+
+ if (page->phys_addr != vm_page_fictitious_addr) {
+ panic("vm_page: invalid fictitious page");
+ }
+ } else {
+ struct vm_page_seg *seg;
+
+ if (page->phys_addr == vm_page_fictitious_addr) {
+ panic("vm_page: real page has fictitious address");
+ }
+
+ seg = vm_page_lookup_seg(page);
+
+ if (seg == NULL) {
+ if (!page->private) {
+ panic("vm_page: page claims it's managed but not in any segment");
+ }
+ } else {
+ if (page->private) {
+ struct vm_page *real_page;
+
+ if (vm_page_pageable(page)) {
+ panic("vm_page: private page is pageable");
+ }
+
+ real_page = vm_page_lookup_pa(page->phys_addr);
+
+ if (vm_page_pageable(real_page)) {
+ panic("vm_page: page underlying private page is pageable");
+ }
+
+ if ((real_page->type == VM_PT_FREE)
+ || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+ panic("vm_page: page underlying private pagei is free");
+ }
+ } else {
+ unsigned int index;
+
+ index = vm_page_seg_index(seg);
+
+ if (index != page->seg_index) {
+ panic("vm_page: page segment mismatch");
+ }
+ }
+ }
+ }
+}
+
+struct vm_page *
+vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
+{
+ struct vm_page *page;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+ page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+ if (page != NULL)
+ return page;
+ }
+
+ if (!current_thread() || current_thread()->vm_privilege)
+ panic("vm_page: privileged thread unable to allocate page");
+
+ return NULL;
+}
+
+void
+vm_page_free_pa(struct vm_page *page, unsigned int order)
+{
+ assert(page != NULL);
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+
+ vm_page_seg_free(&vm_page_segs[page->seg_index], page, order);
+}
+
+const char *
+vm_page_seg_name(unsigned int seg_index)
+{
+ /* Don't use a switch statement since segments can be aliased */
+ if (seg_index == VM_PAGE_SEG_HIGHMEM)
+ return "HIGHMEM";
+ else if (seg_index == VM_PAGE_SEG_DIRECTMAP)
+ return "DIRECTMAP";
+ else if (seg_index == VM_PAGE_SEG_DMA32)
+ return "DMA32";
+ else if (seg_index == VM_PAGE_SEG_DMA)
+ return "DMA";
+ else
+ panic("vm_page: invalid segment index");
+}
+
+void
+vm_page_info_all(void)
+{
+ struct vm_page_seg *seg;
+ unsigned long pages;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ pages = (unsigned long)(seg->pages_end - seg->pages);
+ printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
+ vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
+ seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+ vm_page_seg_name(vm_page_seg_index(seg)),
+ seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
+ }
+}
+
+phys_addr_t
+vm_page_seg_end(unsigned int selector)
+{
+ return vm_page_segs[vm_page_select_alloc_seg(selector)].end;
+}
+
+static unsigned long
+vm_page_boot_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ if (!vm_page_is_ready) {
+ return vm_page_boot_table_size();
+ }
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_seg_size(&vm_page_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_index(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned long index;
+ unsigned int i;
+
+ index = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end)) {
+ return index + vm_page_atop(pa - seg->start);
+ }
+
+ index += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ panic("vm_page: invalid physical address");
+}
+
+phys_addr_t
+vm_page_mem_size(void)
+{
+ phys_addr_t total;
+ unsigned int i;
+
+ total = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ total += vm_page_seg_size(&vm_page_segs[i]);
+ }
+
+ return total;
+}
+
+unsigned long
+vm_page_mem_free(void)
+{
+ unsigned long total;
+ unsigned int i;
+
+ total = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ total += vm_page_segs[i].nr_free_pages;
+ }
+
+ return total;
+}
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+ VM_PAGE_CHECK(page);
+
+ if (page->wire_count == 0) {
+ vm_page_queues_remove(page);
+
+ if (!page->private && !page->fictitious) {
+ vm_page_wire_count++;
+ }
+ }
+
+ page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ assert(page->wire_count != 0);
+ page->wire_count--;
+
+ if ((page->wire_count != 0)
+ || page->fictitious
+ || page->private) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+
+ vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (page->active || (page->inactive && page->reference)) {
+ if (!page->fictitious && !page->private && !page->absent) {
+ pmap_clear_reference(page->phys_addr);
+ }
+
+ page->reference = FALSE;
+ vm_page_queues_remove(page);
+ }
+
+ if ((page->wire_count == 0) && !page->fictitious
+ && !page->private && !page->inactive) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_inactive_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * Unconditionally remove so that, even if the page was already
+ * active, it gets back to the end of the active queue.
+ */
+ vm_page_queues_remove(page);
+
+ if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ if (page->active)
+ panic("vm_page_activate: already active");
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ assert(!page->active || !page->inactive);
+
+ if (!page->active && !page->inactive) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+
+ if (page->active) {
+ vm_page_seg_remove_active_page(seg, page);
+ } else {
+ vm_page_seg_remove_inactive_page(seg, page);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t usable;
+ unsigned int i;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ usable = vm_page_seg_usable(seg);
+ simple_unlock(&seg->lock);
+
+ if (!usable) {
+ return FALSE;
+ }
+ }
+
+ vm_page_external_laundry_count = -1;
+ vm_page_alloc_paused = FALSE;
+ thread_wakeup(&vm_page_alloc_paused);
+ return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t page_available;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ page_available = vm_page_seg_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (page_available) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+ boolean_t balanced;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are moved from higher priority
+ * segments first.
+ */
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+ boolean_t balanced;
+
+ while (vm_page_may_balance()) {
+ balanced = vm_page_balance_once();
+
+ if (!balanced) {
+ break;
+ }
+ }
+
+ return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only, boolean_t alloc_paused)
+{
+ boolean_t evicted;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are evicted from lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ evicted = vm_page_seg_evict(vm_page_seg_get(i),
+ external_only, alloc_paused);
+
+ if (evicted) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY 5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+ boolean_t pause, evicted, external_only, alloc_paused;
+ unsigned int i;
+
+ *should_wait = TRUE;
+ external_only = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_external_laundry_count = 0;
+ alloc_paused = vm_page_alloc_paused;
+ simple_unlock(&vm_page_queue_free_lock);
+
+again:
+ vm_page_lock_queues();
+ pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+ vm_page_unlock_queues();
+
+ if (pause) {
+ simple_lock(&vm_page_queue_free_lock);
+ return FALSE;
+ }
+
+ for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+ evicted = vm_page_evict_once(external_only, alloc_paused);
+
+ if (!evicted) {
+ break;
+ }
+ }
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Keep in mind eviction may not cause pageouts, since non-precious
+ * clean pages are simply released.
+ */
+ if ((vm_page_laundry_count == 0) && (vm_page_external_laundry_count == 0)) {
+ /*
+ * No pageout, but some clean pages were freed. Start a complete
+ * scan again without waiting.
+ */
+ if (evicted) {
+ *should_wait = FALSE;
+ return FALSE;
+ }
+
+ /*
+ * Eviction failed, consider pages from internal objects on the
+ * next attempt.
+ */
+ if (external_only) {
+ simple_unlock(&vm_page_queue_free_lock);
+ external_only = FALSE;
+ goto again;
+ }
+
+ /*
+ * TODO Find out what could cause this and how to deal with it.
+ * This will likely require an out-of-memory killer.
+ */
+
+ {
+ static boolean_t warned = FALSE;
+
+ if (!warned) {
+ printf("vm_page warning: unable to recycle any page\n");
+ warned = 1;
+ }
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+ unsigned int i;
+
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ vm_page_seg_refill_inactive(vm_page_seg_get(i));
+ }
+
+ vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+ assert(!current_thread()->vm_privilege);
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ if (!vm_page_alloc_paused) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return;
+ }
+
+ assert_wait(&vm_page_alloc_paused, FALSE);
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+}
+
+#if MACH_KDB
+#include <ddb/db_output.h>
+#define PAGES_PER_MB ((1<<20) / PAGE_SIZE)
+void db_show_vmstat(void)
+{
+ integer_t free_count = vm_page_mem_free();
+ unsigned i;
+
+ db_printf("%-20s %10uM\n", "size:",
+ (free_count + vm_page_active_count +
+ vm_page_inactive_count + vm_page_wire_count)
+ / PAGES_PER_MB);
+
+ db_printf("%-20s %10uM\n", "free:",
+ free_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "active:",
+ vm_page_active_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "inactive:",
+ vm_page_inactive_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "wired:",
+ vm_page_wire_count / PAGES_PER_MB);
+
+ db_printf("%-20s %10uM\n", "zero filled:",
+ vm_stat.zero_fill_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "reactivated:",
+ vm_stat.reactivations / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "pageins:",
+ vm_stat.pageins / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "pageouts:",
+ vm_stat.pageouts / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "page faults:",
+ vm_stat.faults / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "cow faults:",
+ vm_stat.cow_faults / PAGES_PER_MB);
+ db_printf("%-20s %10u%\n", "memobj hit ratio:",
+ (vm_stat.hits * 100) / vm_stat.lookups);
+
+ db_printf("%-20s %10u%\n", "cached_memobjs",
+ vm_object_external_count);
+ db_printf("%-20s %10uM\n", "cache",
+ vm_object_external_pages / PAGES_PER_MB);
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ {
+ db_printf("\nSegment %s:\n", vm_page_seg_name(i));
+ db_printf("%-20s %10uM\n", "size:",
+ vm_page_seg_size(&vm_page_segs[i]) >> 20);
+ db_printf("%-20s %10uM\n", "free:",
+ vm_page_segs[i].nr_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "min_free:",
+ vm_page_segs[i].min_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "low_free:",
+ vm_page_segs[i].low_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "high_free:",
+ vm_page_segs[i].high_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "active:",
+ vm_page_segs[i].nr_active_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "high active:",
+ vm_page_segs[i].high_active_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "inactive:",
+ vm_page_segs[i].nr_inactive_pages / PAGES_PER_MB);
+ }
+}
+#endif /* MACH_KDB */
diff --git a/vm/vm_page.h b/vm/vm_page.h
new file mode 100644
index 0000000..3be75f1
--- /dev/null
+++ b/vm/vm_page.h
@@ -0,0 +1,567 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_page.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * Resident memory system definitions.
+ */
+
+#ifndef _VM_VM_PAGE_H_
+#define _VM_VM_PAGE_H_
+
+#include <mach/boolean.h>
+#include <mach/vm_prot.h>
+#include <machine/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_types.h>
+#include <kern/queue.h>
+#include <kern/list.h>
+#include <kern/lock.h>
+#include <kern/log2.h>
+
+#include <kern/macros.h>
+#include <kern/sched_prim.h> /* definitions of wait/wakeup */
+
+#if MACH_VM_DEBUG
+#include <mach_debug/hash_info.h>
+#endif
+
+/*
+ * Management of resident (logical) pages.
+ *
+ * A small structure is kept for each resident
+ * page, indexed by page number. Each structure
+ * is an element of several lists:
+ *
+ * A hash table bucket used to quickly
+ * perform object/offset lookups
+ *
+ * A list of all pages for a given object,
+ * so they can be quickly deactivated at
+ * time of deallocation.
+ *
+ * An ordered list of pages due for pageout.
+ *
+ * In addition, the structure contains the object
+ * and offset to which this page belongs (for pageout),
+ * and sundry status bits.
+ *
+ * Fields in this structure are locked either by the lock on the
+ * object that the page belongs to (O) or by the lock on the page
+ * queues (P). [Some fields require that both locks be held to
+ * change that field; holding either lock is sufficient to read.]
+ */
+
+struct vm_page {
+ struct list node; /* page queues or free list (P) */
+ void *priv;
+
+ /*
+ * This member is used throughout the code and may only change for
+ * fictitious pages.
+ */
+ phys_addr_t phys_addr;
+
+ queue_chain_t listq; /* all pages in same object (O) */
+ struct vm_page *next; /* VP bucket link (O) */
+
+ /* We use an empty struct as the delimiter. */
+ struct {} vm_page_header;
+
+ vm_object_t object; /* which object am I in (O,P) */
+ vm_offset_t offset; /* offset into that object (O,P) */
+
+ unsigned int wire_count:15, /* how many wired down maps use me?
+ (O&P) */
+ /* boolean_t */ inactive:1, /* page is in inactive list (P) */
+ active:1, /* page is in active list (P) */
+ laundry:1, /* page is being cleaned now (P)*/
+ external_laundry:1, /* same as laundry for external pagers (P)*/
+ free:1, /* page is on free list (P) */
+ reference:1, /* page has been used (P) */
+ external:1, /* page in external object (P) */
+ busy:1, /* page is in transit (O) */
+ wanted:1, /* someone is waiting for page (O) */
+ tabled:1, /* page is in VP table (O) */
+ fictitious:1, /* Physical page doesn't exist (O) */
+ private:1, /* Page should not be returned to
+ * the free list (O) */
+ absent:1, /* Data has been requested, but is
+ * not yet available (O) */
+ error:1, /* Data manager was unable to provide
+ * data due to error (O) */
+ dirty:1, /* Page must be cleaned (O) */
+ precious:1, /* Page is precious; data must be
+ * returned even if clean (O) */
+ overwriting:1; /* Request to unlock has been made
+ * without having data. (O)
+ * [See vm_object_overwrite] */
+
+ vm_prot_t page_lock:3; /* Uses prohibited by data manager (O) */
+ vm_prot_t unlock_request:3; /* Outstanding unlock request (O) */
+
+ struct {} vm_page_footer;
+
+ unsigned short type:2;
+ unsigned short seg_index:2;
+ unsigned short order:4;
+};
+
+#define VM_PAGE_BODY_SIZE \
+ (offsetof(struct vm_page, vm_page_footer) \
+ - offsetof(struct vm_page, vm_page_header))
+
+/*
+ * For debugging, this macro can be defined to perform
+ * some useful check on a page structure.
+ */
+
+#define VM_PAGE_CHECK(mem) vm_page_check(mem)
+
+void vm_page_check(const struct vm_page *page);
+
+/*
+ * Each pageable resident page falls into one of three lists:
+ *
+ * free
+ * Available for allocation now.
+ * inactive
+ * Not referenced in any map, but still has an
+ * object/offset-page mapping, and may be dirty.
+ * This is the list of pages that should be
+ * paged out next.
+ * active
+ * A list of pages which have been placed in
+ * at least one physical map. This list is
+ * ordered, in LRU-like fashion.
+ */
+
+#define VM_PAGE_DMA 0x01
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+#define VM_PAGE_DIRECTMAP 0x02
+#define VM_PAGE_DMA32 0x04
+#else
+#define VM_PAGE_DMA32 0x02
+#define VM_PAGE_DIRECTMAP 0x04
+#endif
+#define VM_PAGE_HIGHMEM 0x08
+
+extern
+int vm_page_fictitious_count;/* How many fictitious pages are free? */
+extern
+int vm_page_active_count; /* How many pages are active? */
+extern
+int vm_page_inactive_count; /* How many pages are inactive? */
+extern
+int vm_page_wire_count; /* How many pages are wired? */
+extern
+int vm_page_laundry_count; /* How many pages being laundered? */
+extern
+int vm_page_external_laundry_count; /* How many external pages being paged out? */
+
+decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
+ page queues */
+decl_simple_lock_data(extern,vm_page_queue_free_lock)
+ /* lock on free page queue */
+
+extern phys_addr_t vm_page_fictitious_addr;
+ /* (fake) phys_addr of fictitious pages */
+
+extern void vm_page_bootstrap(
+ vm_offset_t *startp,
+ vm_offset_t *endp);
+extern void vm_page_module_init(void);
+
+extern vm_page_t vm_page_lookup(
+ vm_object_t object,
+ vm_offset_t offset);
+extern vm_page_t vm_page_grab_fictitious(void);
+extern boolean_t vm_page_convert(vm_page_t *);
+extern void vm_page_more_fictitious(void);
+extern vm_page_t vm_page_grab(unsigned flags);
+extern void vm_page_release(vm_page_t, boolean_t, boolean_t);
+extern phys_addr_t vm_page_grab_phys_addr(void);
+extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int);
+extern void vm_page_free_contig(vm_page_t, vm_size_t);
+extern void vm_page_wait(void (*)(void));
+extern vm_page_t vm_page_alloc(
+ vm_object_t object,
+ vm_offset_t offset);
+extern void vm_page_init(
+ vm_page_t mem);
+extern void vm_page_free(vm_page_t);
+extern void vm_page_activate(vm_page_t);
+extern void vm_page_deactivate(vm_page_t);
+extern void vm_page_rename(
+ vm_page_t mem,
+ vm_object_t new_object,
+ vm_offset_t new_offset);
+extern void vm_page_insert(
+ vm_page_t mem,
+ vm_object_t object,
+ vm_offset_t offset);
+extern void vm_page_remove(
+ vm_page_t mem);
+
+extern void vm_page_zero_fill(vm_page_t);
+extern void vm_page_copy(vm_page_t src_m, vm_page_t dest_m);
+
+extern void vm_page_wire(vm_page_t);
+extern void vm_page_unwire(vm_page_t);
+
+#if MACH_VM_DEBUG
+extern unsigned int vm_page_info(
+ hash_info_bucket_t *info,
+ unsigned int count);
+#endif
+
+/*
+ * Functions implemented as macros
+ */
+
+#define PAGE_ASSERT_WAIT(m, interruptible) \
+ MACRO_BEGIN \
+ (m)->wanted = TRUE; \
+ assert_wait((event_t) (m), (interruptible)); \
+ MACRO_END
+
+#define PAGE_WAKEUP_DONE(m) \
+ MACRO_BEGIN \
+ (m)->busy = FALSE; \
+ if ((m)->wanted) { \
+ (m)->wanted = FALSE; \
+ thread_wakeup(((event_t) m)); \
+ } \
+ MACRO_END
+
+#define PAGE_WAKEUP(m) \
+ MACRO_BEGIN \
+ if ((m)->wanted) { \
+ (m)->wanted = FALSE; \
+ thread_wakeup((event_t) (m)); \
+ } \
+ MACRO_END
+
+#define VM_PAGE_FREE(p) \
+ MACRO_BEGIN \
+ vm_page_lock_queues(); \
+ vm_page_free(p); \
+ vm_page_unlock_queues(); \
+ MACRO_END
+
+/*
+ * Macro to be used in place of pmap_enter()
+ */
+
+#define PMAP_ENTER(pmap, virtual_address, page, protection, wired) \
+ MACRO_BEGIN \
+ pmap_enter( \
+ (pmap), \
+ (virtual_address), \
+ (page)->phys_addr, \
+ (protection) & ~(page)->page_lock, \
+ (wired) \
+ ); \
+ MACRO_END
+
+#define VM_PAGE_WAIT(continuation) vm_page_wait(continuation)
+
+#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
+#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
+
+#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem)
+
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Physical page management.
+ */
+
+/*
+ * Address/page conversion and rounding macros (not inline functions to
+ * be easily usable on both virtual and physical addresses, which may not
+ * have the same type size).
+ */
+#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
+#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
+#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE)
+#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE)
+#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE)
+
+/*
+ * Segment selectors.
+ *
+ * Selector-to-segment-list translation table :
+ * DMA DMA
+ * if 32bit PAE
+ * DIRECTMAP DMA32 DMA
+ * DMA32 DMA32 DIRECTMAP DMA
+ * HIGHMEM HIGHMEM DMA32 DIRECTMAP DMA
+ * else
+ * DMA32 DMA32 DMA
+ * DIRECTMAP DIRECTMAP DMA32 DMA
+ * HIGHMEM HIGHMEM DIRECTMAP DMA32 DMA
+ * endif
+ */
+#define VM_PAGE_SEL_DMA 0
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+#define VM_PAGE_SEL_DIRECTMAP 1
+#define VM_PAGE_SEL_DMA32 2
+#else
+#define VM_PAGE_SEL_DMA32 1
+#define VM_PAGE_SEL_DIRECTMAP 2
+#endif
+#define VM_PAGE_SEL_HIGHMEM 3
+
+/*
+ * Page usage types.
+ */
+#define VM_PT_FREE 0 /* Page unused */
+#define VM_PT_RESERVED 1 /* Page reserved at boot time */
+#define VM_PT_TABLE 2 /* Page is part of the page table */
+#define VM_PT_KERNEL 3 /* Type for generic kernel allocations */
+
+static inline unsigned short
+vm_page_type(const struct vm_page *page)
+{
+ return page->type;
+}
+
+void vm_page_set_type(struct vm_page *page, unsigned int order,
+ unsigned short type);
+
+static inline unsigned int
+vm_page_order(size_t size)
+{
+ return iorder2(vm_page_atop(vm_page_round(size)));
+}
+
+static inline phys_addr_t
+vm_page_to_pa(const struct vm_page *page)
+{
+ return page->phys_addr;
+}
+
+/*
+ * Associate private data with a page.
+ */
+static inline void
+vm_page_set_priv(struct vm_page *page, void *priv)
+{
+ page->priv = priv;
+}
+
+static inline void *
+vm_page_get_priv(const struct vm_page *page)
+{
+ return page->priv;
+}
+
+/*
+ * Load physical memory into the vm_page module at boot time.
+ *
+ * All addresses must be page-aligned. Segments can be loaded in any order.
+ */
+void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end);
+
+/*
+ * Load available physical memory into the vm_page module at boot time.
+ *
+ * The segment referred to must have been loaded with vm_page_load
+ * before loading its heap.
+ */
+void vm_page_load_heap(unsigned int seg_index, phys_addr_t start,
+ phys_addr_t end);
+
+/*
+ * Return true if the vm_page module is completely initialized, false
+ * otherwise, in which case only vm_page_bootalloc() can be used for
+ * allocations.
+ */
+int vm_page_ready(void);
+
+/*
+ * Early allocation function.
+ *
+ * This function is used by the vm_resident module to implement
+ * pmap_steal_memory. It can be used after physical segments have been loaded
+ * and before the vm_page module is initialized.
+ */
+phys_addr_t vm_page_bootalloc(size_t size);
+
+/*
+ * Set up the vm_page module.
+ *
+ * Architecture-specific code must have loaded segments before calling this
+ * function. Segments must comply with the selector-to-segment-list table,
+ * e.g. HIGHMEM is loaded if and only if DIRECTMAP, DMA32 and DMA are loaded,
+ * notwithstanding segment aliasing.
+ *
+ * Once this function returns, the vm_page module is ready, and normal
+ * allocation functions can be used.
+ */
+void vm_page_setup(void);
+
+/*
+ * Make the given page managed by the vm_page module.
+ *
+ * If additional memory can be made usable after the VM system is initialized,
+ * it should be reported through this function.
+ */
+void vm_page_manage(struct vm_page *page);
+
+/*
+ * Return the page descriptor for the given physical address.
+ */
+struct vm_page * vm_page_lookup_pa(phys_addr_t pa);
+
+/*
+ * Allocate a block of 2^order physical pages.
+ *
+ * The selector is used to determine the segments from which allocation can
+ * be attempted.
+ *
+ * This function should only be used by the vm_resident module.
+ */
+struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector,
+ unsigned short type);
+
+/*
+ * Release a block of 2^order physical pages.
+ *
+ * This function should only be used by the vm_resident module.
+ */
+void vm_page_free_pa(struct vm_page *page, unsigned int order);
+
+/*
+ * Return the name of the given segment.
+ */
+const char * vm_page_seg_name(unsigned int seg_index);
+
+/*
+ * Display internal information about the module.
+ */
+void vm_page_info_all(void);
+
+/*
+ * Return the maximum physical address for a given segment selector.
+ */
+phys_addr_t vm_page_seg_end(unsigned int selector);
+
+/*
+ * Return the total number of physical pages.
+ */
+unsigned long vm_page_table_size(void);
+
+/*
+ * Return the index of a page in the page table.
+ */
+unsigned long vm_page_table_index(phys_addr_t pa);
+
+/*
+ * Return the total amount of physical memory.
+ */
+phys_addr_t vm_page_mem_size(void);
+
+/*
+ * Return the amount of free (unused) pages.
+ *
+ * XXX This currently relies on the kernel being non preemptible and
+ * uniprocessor.
+ */
+unsigned long vm_page_mem_free(void);
+
+/*
+ * Remove the given page from any page queue it might be in.
+ */
+void vm_page_queues_remove(struct vm_page *page);
+
+/*
+ * Balance physical pages among segments.
+ *
+ * This function should be called first by the pageout daemon
+ * on memory pressure, since it may be unnecessary to perform any
+ * other operation, let alone shrink caches, if balancing is
+ * enough to make enough free pages.
+ *
+ * Return TRUE if balancing made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_balance(void);
+
+/*
+ * Evict physical pages.
+ *
+ * This function should be called by the pageout daemon after balancing
+ * the segments and shrinking kernel caches.
+ *
+ * Return TRUE if eviction made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * Otherwise, report whether the pageout daemon should wait (some pages
+ * have been paged out) or not (only clean pages have been released).
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_evict(boolean_t *should_wait);
+
+/*
+ * Turn active pages into inactive ones for second-chance LRU
+ * approximation.
+ *
+ * This function should be called by the pageout daemon on memory pressure,
+ * i.e. right before evicting pages.
+ *
+ * XXX This is probably not the best strategy, compared to keeping the
+ * active/inactive ratio in check at all times, but this means less
+ * frequent refills.
+ */
+void vm_page_refill_inactive(void);
+
+/*
+ * Print vmstat information
+ */
+void db_show_vmstat(void);
+
+#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
new file mode 100644
index 0000000..e2f4cf2
--- /dev/null
+++ b/vm/vm_pageout.c
@@ -0,0 +1,515 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_pageout.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1985
+ *
+ * The proverbial page-out daemon.
+ */
+
+#include <device/net_io.h>
+#include <mach/mach_types.h>
+#include <mach/memory_object.h>
+#include <vm/memory_object_default.user.h>
+#include <vm/memory_object_user.user.h>
+#include <mach/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <kern/counters.h>
+#include <kern/debug.h>
+#include <kern/slab.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/printf.h>
+#include <vm/memory_object.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <machine/locore.h>
+
+#define DEBUG 0
+
+/*
+ * Maximum delay, in milliseconds, between two pageout scans.
+ */
+#define VM_PAGEOUT_TIMEOUT 50
+
+/*
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
+ */
+static int vm_pageout_requested;
+
+/*
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
+ */
+static int vm_pageout_continue;
+
+/*
+ * Routine: vm_pageout_setup
+ * Purpose:
+ * Set up a page for pageout.
+ *
+ * Move or copy the page to a new object, as part
+ * of which it will be sent to its memory manager
+ * in a memory_object_data_return or memory_object_initialize
+ * message.
+ *
+ * The "paging_offset" argument specifies the offset
+ * of the page within its external memory object.
+ *
+ * The "new_object" and "new_offset" arguments
+ * indicate where the page should be moved.
+ *
+ * The "flush" argument specifies whether the page
+ * should be flushed from its object. If not, a
+ * copy of the page is moved to the new object.
+ *
+ * In/Out conditions:
+ * The page in question must not be on any pageout queues,
+ * and must be busy. The object to which it belongs
+ * must be unlocked, and the caller must hold a paging
+ * reference to it. The new_object must not be locked.
+ *
+ * If the page is flushed from its original object,
+ * this routine returns a pointer to a place-holder page,
+ * inserted at the same offset, to block out-of-order
+ * requests for the page. The place-holder page must
+ * be freed after the data_return or initialize message
+ * has been sent. If the page is copied,
+ * the holding page is VM_PAGE_NULL.
+ *
+ * The original page is put on a paging queue and marked
+ * not busy on exit.
+ */
+vm_page_t
+vm_pageout_setup(
+ vm_page_t m,
+ vm_offset_t paging_offset,
+ vm_object_t new_object,
+ vm_offset_t new_offset,
+ boolean_t flush)
+{
+ vm_object_t old_object = m->object;
+ vm_page_t holding_page = 0; /*'=0'to quiet gcc warnings*/
+ vm_page_t new_m;
+
+ assert(m->busy && !m->absent && !m->fictitious);
+
+ /*
+ * If we are not flushing the page, allocate a
+ * page in the object.
+ */
+ if (!flush) {
+ for (;;) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ vm_object_unlock(new_object);
+
+ if (new_m != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT(NULL);
+ }
+ }
+
+ if (flush) {
+ /*
+ * Create a place-holder page where the old one was,
+ * to prevent anyone from attempting to page in this
+ * page while we`re unlocked.
+ */
+ while ((holding_page = vm_page_grab_fictitious())
+ == VM_PAGE_NULL)
+ vm_page_more_fictitious();
+
+ vm_object_lock(old_object);
+ vm_page_lock_queues();
+ vm_page_remove(m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP_DONE(m);
+
+ vm_page_lock_queues();
+ vm_page_insert(holding_page, old_object, m->offset);
+ vm_page_unlock_queues();
+
+ /*
+ * Record that this page has been written out
+ */
+#if MACH_PAGEMAP
+ vm_external_state_set(old_object->existence_info,
+ paging_offset,
+ VM_EXTERNAL_STATE_EXISTS);
+#endif /* MACH_PAGEMAP */
+
+ vm_object_unlock(old_object);
+
+ vm_object_lock(new_object);
+
+ /*
+ * Move this page into the new object
+ */
+
+ vm_page_lock_queues();
+ vm_page_insert(m, new_object, new_offset);
+ vm_page_unlock_queues();
+
+ m->dirty = TRUE;
+ m->precious = FALSE;
+ m->page_lock = VM_PROT_NONE;
+ m->unlock_request = VM_PROT_NONE;
+ }
+ else {
+ /*
+ * Copy the data into the new page,
+ * and mark the new page as clean.
+ */
+ vm_page_copy(m, new_m);
+
+ vm_object_lock(old_object);
+ m->dirty = FALSE;
+ pmap_clear_modify(m->phys_addr);
+
+ /*
+ * Deactivate old page.
+ */
+ vm_page_lock_queues();
+ vm_page_deactivate(m);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP_DONE(m);
+
+ /*
+ * Record that this page has been written out
+ */
+
+#if MACH_PAGEMAP
+ vm_external_state_set(old_object->existence_info,
+ paging_offset,
+ VM_EXTERNAL_STATE_EXISTS);
+#endif /* MACH_PAGEMAP */
+
+ vm_object_unlock(old_object);
+
+ vm_object_lock(new_object);
+
+ /*
+ * Use the new page below.
+ */
+ m = new_m;
+ m->dirty = TRUE;
+ assert(!m->precious);
+ PAGE_WAKEUP_DONE(m);
+ }
+
+ /*
+ * Make the old page eligible for replacement again; if a
+ * user-supplied memory manager fails to release the page,
+ * it will be paged out again to the default memory manager.
+ *
+ * Note that pages written to the default memory manager
+ * must be wired down -- in return, it guarantees to free
+ * this page, rather than reusing it.
+ */
+
+ vm_page_lock_queues();
+ vm_stat.pageouts++;
+ if (m->laundry) {
+
+ /*
+ * The caller is telling us that it is going to
+ * immediately double page this page to the default
+ * pager.
+ */
+
+ assert(!old_object->internal);
+ m->laundry = FALSE;
+ } else if (old_object->internal ||
+ memory_manager_default_port(old_object->pager)) {
+ m->laundry = TRUE;
+ vm_page_laundry_count++;
+
+ vm_page_wire(m);
+ } else {
+ m->external_laundry = TRUE;
+
+ /*
+ * If vm_page_external_laundry_count is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_laundry_count >= 0) {
+ vm_page_external_laundry_count++;
+ }
+
+ vm_page_activate(m);
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Since IPC operations may block, we drop locks now.
+ * [The placeholder page is busy, and we still have
+ * paging_in_progress incremented.]
+ */
+
+ vm_object_unlock(new_object);
+
+ /*
+ * Return the placeholder page to simplify cleanup.
+ */
+ return (flush ? holding_page : VM_PAGE_NULL);
+}
+
+/*
+ * Routine: vm_pageout_page
+ * Purpose:
+ * Causes the specified page to be written back to
+ * the appropriate memory object.
+ *
+ * The "initial" argument specifies whether this
+ * data is an initialization only, and should use
+ * memory_object_data_initialize instead of
+ * memory_object_data_return.
+ *
+ * The "flush" argument specifies whether the page
+ * should be flushed from the object. If not, a
+ * copy of the data is sent to the memory object.
+ *
+ * In/out conditions:
+ * The page in question must not be on any pageout queues.
+ * The object to which it belongs must be locked.
+ * Implementation:
+ * Move this page to a completely new object, if flushing;
+ * copy to a new page in a new object, if not.
+ */
+void
+vm_pageout_page(
+ vm_page_t m,
+ boolean_t initial,
+ boolean_t flush)
+{
+ vm_map_copy_t copy;
+ vm_object_t old_object;
+ vm_object_t new_object;
+ vm_page_t holding_page;
+ vm_offset_t paging_offset;
+ kern_return_t rc;
+ boolean_t precious_clean;
+
+ assert(m->busy);
+
+ /*
+ * Cleaning but not flushing a clean precious page is a
+ * no-op. Remember whether page is clean and precious now
+ * because vm_pageout_setup will mark it dirty and not precious.
+ *
+ * XXX Check if precious_clean && !flush can really happen.
+ */
+ precious_clean = (!m->dirty) && m->precious;
+ if (precious_clean && !flush) {
+ PAGE_WAKEUP_DONE(m);
+ return;
+ }
+
+ /*
+ * Verify that we really want to clean this page.
+ */
+ if (m->absent || m->error || (!m->dirty && !m->precious)) {
+ VM_PAGE_FREE(m);
+ return;
+ }
+
+ /*
+ * Create a paging reference to let us play with the object.
+ */
+ old_object = m->object;
+ paging_offset = m->offset + old_object->paging_offset;
+ vm_object_paging_begin(old_object);
+ vm_object_unlock(old_object);
+
+ /*
+ * Allocate a new object into which we can put the page.
+ */
+ new_object = vm_object_allocate(PAGE_SIZE);
+ new_object->used_for_pageout = TRUE;
+
+ /*
+ * Move the page into the new object.
+ */
+ holding_page = vm_pageout_setup(m,
+ paging_offset,
+ new_object,
+ 0, /* new offset */
+ flush); /* flush */
+
+ rc = vm_map_copyin_object(new_object, 0, PAGE_SIZE, &copy);
+ assert(rc == KERN_SUCCESS);
+
+ if (initial) {
+ rc = memory_object_data_initialize(
+ old_object->pager,
+ old_object->pager_request,
+ paging_offset, (pointer_t) copy, PAGE_SIZE);
+ }
+ else {
+ rc = memory_object_data_return(
+ old_object->pager,
+ old_object->pager_request,
+ paging_offset, (pointer_t) copy, PAGE_SIZE,
+ !precious_clean, !flush);
+ }
+
+ if (rc != KERN_SUCCESS)
+ vm_map_copy_discard(copy);
+
+ /*
+ * Clean up.
+ */
+ vm_object_lock(old_object);
+ if (holding_page != VM_PAGE_NULL)
+ VM_PAGE_FREE(holding_page);
+ vm_object_paging_end(old_object);
+}
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ *
+ * Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ * in which case should_wait indicates whether the pageout daemon
+ * should wait to allow pagers to keep up.
+ *
+ * It returns with vm_page_queue_free_lock held.
+ */
+
+static boolean_t vm_pageout_scan(boolean_t *should_wait)
+{
+ boolean_t done;
+
+ /*
+ * Try balancing pages among segments first, since this
+ * may be enough to resume unprivileged allocations.
+ */
+
+ /* This function returns with vm_page_queue_free_lock held */
+ done = vm_page_balance();
+
+ if (done) {
+ return TRUE;
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * Balancing is not enough. Shrink caches and scan pages
+ * for eviction.
+ */
+
+ stack_collect();
+ net_kmsg_collect();
+ consider_task_collect();
+ if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
+ pointless to call consider_thread_collect. */
+ consider_thread_collect();
+
+ /*
+ * slab_collect should be last, because the other operations
+ * might return memory to caches.
+ */
+ slab_collect();
+
+ vm_page_refill_inactive();
+
+ /* This function returns with vm_page_queue_free_lock held */
+ return vm_page_evict(should_wait);
+}
+
+void vm_pageout(void)
+{
+ boolean_t done, should_wait;
+
+ current_thread()->vm_privilege = 1;
+ stack_privilege(current_thread());
+ thread_set_own_priority(0);
+
+ for (;;) {
+ done = vm_pageout_scan(&should_wait);
+ /* we hold vm_page_queue_free_lock now */
+
+ if (done) {
+ thread_sleep(&vm_pageout_requested,
+ simple_lock_addr(vm_page_queue_free_lock),
+ FALSE);
+ } else if (should_wait) {
+ assert_wait(&vm_pageout_continue, FALSE);
+ thread_set_timeout(VM_PAGEOUT_TIMEOUT * hz / 1000);
+ simple_unlock(&vm_page_queue_free_lock);
+ thread_block(NULL);
+
+#if DEBUG
+ if (current_thread()->wait_result != THREAD_AWAKENED) {
+ printf("vm_pageout: timeout,"
+ " vm_page_laundry_count:%d"
+ " vm_page_external_laundry_count:%d\n",
+ vm_page_laundry_count,
+ vm_page_external_laundry_count);
+ }
+#endif
+ } else {
+ simple_unlock(&vm_page_queue_free_lock);
+ }
+ }
+}
+
+/*
+ * Start pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
+{
+ if (!current_thread())
+ return;
+
+ thread_wakeup_one(&vm_pageout_requested);
+}
+
+/*
+ * Resume pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+ thread_wakeup_one(&vm_pageout_continue);
+}
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
new file mode 100644
index 0000000..6ddd821
--- /dev/null
+++ b/vm/vm_pageout.h
@@ -0,0 +1,53 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_pageout.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1986
+ *
+ * Declarations for the pageout daemon interface.
+ */
+
+#ifndef _VM_VM_PAGEOUT_H_
+#define _VM_VM_PAGEOUT_H_
+
+#include <vm/vm_page.h>
+
+/*
+ * Exported routines.
+ */
+
+extern vm_page_t vm_pageout_setup(vm_page_t, vm_offset_t, vm_object_t,
+ vm_offset_t, boolean_t);
+extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t);
+
+extern void vm_pageout(void) __attribute__((noreturn));
+
+extern void vm_pageout_start(void);
+
+extern void vm_pageout_resume(void);
+
+#endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/vm/vm_print.h b/vm/vm_print.h
new file mode 100644
index 0000000..8a36d75
--- /dev/null
+++ b/vm/vm_print.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2013 Free Software Foundation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef VM_PRINT_H
+#define VM_PRINT_H
+
+#include <vm/vm_map.h>
+#include <machine/db_machdep.h>
+
+/* Debugging: print a map */
+extern void vm_map_print(db_expr_t addr, boolean_t have_addr,
+ db_expr_t count, const char *modif);
+
+/* Pretty-print a copy object for ddb. */
+extern void vm_map_copy_print(const vm_map_copy_t);
+
+#include <vm/vm_object.h>
+
+extern void vm_object_print(vm_object_t);
+
+#include <vm/vm_page.h>
+
+extern void vm_page_print(const vm_page_t);
+
+#endif /* VM_PRINT_H */
+
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
new file mode 100644
index 0000000..3f0cc90
--- /dev/null
+++ b/vm/vm_resident.c
@@ -0,0 +1,1116 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
+ * Copyright (c) 1993,1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_resident.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Resident memory management module.
+ */
+
+#include <kern/printf.h>
+#include <string.h>
+
+#include <mach/vm_prot.h>
+#include <kern/counters.h>
+#include <kern/debug.h>
+#include <kern/list.h>
+#include <kern/sched_prim.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <mach/vm_statistics.h>
+#include <machine/vm_param.h>
+#include <kern/xpr.h>
+#include <kern/slab.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_resident.h>
+
+#if MACH_VM_DEBUG
+#include <mach/kern_return.h>
+#include <mach_debug/hash_info.h>
+#include <vm/vm_user.h>
+#endif
+
+#if MACH_KDB
+#include <ddb/db_output.h>
+#include <vm/vm_print.h>
+#endif /* MACH_KDB */
+
+
+/*
+ * Associated with each page of user-allocatable memory is a
+ * page structure.
+ */
+
+/*
+ * These variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes. The implementation of pmap_steal_memory
+ * here also uses them internally.
+ */
+
+vm_offset_t virtual_space_start;
+vm_offset_t virtual_space_end;
+
+/*
+ * The vm_page_lookup() routine, which provides for fast
+ * (virtual memory object, offset) to page lookup, employs
+ * the following hash table. The vm_page_{insert,remove}
+ * routines install and remove associations in the table.
+ * [This table is often called the virtual-to-physical,
+ * or VP, table.]
+ */
+typedef struct {
+ decl_simple_lock_data(,lock)
+ vm_page_t pages;
+} vm_page_bucket_t;
+
+vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
+unsigned long vm_page_bucket_count = 0; /* How big is array? */
+unsigned long vm_page_hash_mask; /* Mask for hash function */
+
+static struct list vm_page_queue_fictitious;
+def_simple_lock_data(,vm_page_queue_free_lock)
+int vm_page_fictitious_count;
+int vm_object_external_count;
+int vm_object_external_pages;
+
+/*
+ * Occasionally, the virtual memory system uses
+ * resident page structures that do not refer to
+ * real pages, for example to leave a page with
+ * important state information in the VP table.
+ *
+ * These page structures are allocated the way
+ * most other kernel structures are.
+ */
+struct kmem_cache vm_page_cache;
+
+/*
+ * Fictitious pages don't have a physical address,
+ * but we must initialize phys_addr to something.
+ * For debugging, this should be a strange value
+ * that the pmap module can recognize in assertions.
+ */
+phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
+
+/*
+ * Resident page structures are also chained on
+ * queues that are used by the page replacement
+ * system (pageout daemon). These queues are
+ * defined here, but are shared by the pageout
+ * module.
+ */
+def_simple_lock_data(,vm_page_queue_lock)
+int vm_page_active_count;
+int vm_page_inactive_count;
+int vm_page_wire_count;
+
+/*
+ * Several page replacement parameters are also
+ * shared with this module, so that page allocation
+ * (done here in vm_page_alloc) can trigger the
+ * pageout daemon.
+ */
+int vm_page_laundry_count = 0;
+int vm_page_external_laundry_count = 0;
+
+
+/*
+ * The VM system has a couple of heuristics for deciding
+ * that pages are "uninteresting" and should be placed
+ * on the inactive queue as likely candidates for replacement.
+ * These variables let the heuristics be controlled at run-time
+ * to make experimentation easier.
+ */
+
+boolean_t vm_page_deactivate_behind = TRUE;
+boolean_t vm_page_deactivate_hint = TRUE;
+
+/*
+ * vm_page_bootstrap:
+ *
+ * Initializes the resident memory module.
+ *
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ * Returns the range of available kernel virtual memory.
+ */
+
+void vm_page_bootstrap(
+ vm_offset_t *startp,
+ vm_offset_t *endp)
+{
+ int i;
+
+ /*
+ * Initialize the page queues.
+ */
+
+ simple_lock_init(&vm_page_queue_free_lock);
+ simple_lock_init(&vm_page_queue_lock);
+
+ list_init(&vm_page_queue_fictitious);
+
+ /*
+ * Allocate (and initialize) the virtual-to-physical
+ * table hash buckets.
+ *
+ * The number of buckets should be a power of two to
+ * get a good hash function. The following computation
+ * chooses the first power of two that is greater
+ * than the number of physical pages in the system.
+ */
+
+ if (vm_page_bucket_count == 0) {
+ unsigned long npages = vm_page_table_size();
+
+ vm_page_bucket_count = 1;
+ while (vm_page_bucket_count < npages)
+ vm_page_bucket_count <<= 1;
+ }
+
+ vm_page_hash_mask = vm_page_bucket_count - 1;
+
+ if (vm_page_hash_mask & vm_page_bucket_count)
+ printf("vm_page_bootstrap: WARNING -- strange page hash\n");
+
+ vm_page_buckets = (vm_page_bucket_t *)
+ pmap_steal_memory(vm_page_bucket_count *
+ sizeof(vm_page_bucket_t));
+
+ for (i = 0; i < vm_page_bucket_count; i++) {
+ vm_page_bucket_t *bucket = &vm_page_buckets[i];
+
+ bucket->pages = VM_PAGE_NULL;
+ simple_lock_init(&bucket->lock);
+ }
+
+ vm_page_setup();
+
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
+
+ *startp = virtual_space_start;
+ *endp = virtual_space_end;
+}
+
+#ifndef MACHINE_PAGES
+/*
+ * We implement pmap_steal_memory with the help
+ * of two simpler functions, pmap_virtual_space and vm_page_bootalloc.
+ */
+
+vm_offset_t pmap_steal_memory(
+ vm_size_t size)
+{
+ vm_offset_t addr, vaddr;
+ phys_addr_t paddr;
+
+ size = round_page(size);
+
+ /*
+ * If this is the first call to pmap_steal_memory,
+ * we have to initialize ourself.
+ */
+
+ if (virtual_space_start == virtual_space_end) {
+ pmap_virtual_space(&virtual_space_start, &virtual_space_end);
+
+ /*
+ * The initial values must be aligned properly, and
+ * we don't trust the pmap module to do it right.
+ */
+
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
+ }
+
+ /*
+ * Allocate virtual memory for this request.
+ */
+
+ addr = virtual_space_start;
+ virtual_space_start += size;
+
+ /*
+ * Allocate and map physical pages to back new virtual pages.
+ */
+
+ for (vaddr = round_page(addr);
+ vaddr < addr + size;
+ vaddr += PAGE_SIZE) {
+ paddr = vm_page_bootalloc(PAGE_SIZE);
+
+ /*
+ * XXX Logically, these mappings should be wired,
+ * but some pmap modules barf if they are.
+ */
+
+ pmap_enter(kernel_pmap, vaddr, paddr,
+ VM_PROT_READ|VM_PROT_WRITE, FALSE);
+ }
+
+ return addr;
+}
+#endif /* MACHINE_PAGES */
+
+/*
+ * Routine: vm_page_module_init
+ * Purpose:
+ * Second initialization pass, to be done after
+ * the basic VM system is ready.
+ */
+void vm_page_module_init(void)
+{
+ kmem_cache_init(&vm_page_cache, "vm_page", sizeof(struct vm_page), 0,
+ NULL, 0);
+}
+
+/*
+ * vm_page_hash:
+ *
+ * Distributes the object/offset key pair among hash buckets.
+ *
+ * NOTE: To get a good hash function, the bucket count should
+ * be a power of two.
+ */
+#define vm_page_hash(object, offset) \
+ (((unsigned int)(vm_offset_t)object + (unsigned int)atop(offset)) \
+ & vm_page_hash_mask)
+
+/*
+ * vm_page_insert: [ internal use only ]
+ *
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
+ *
+ * The object and page must be locked.
+ * The free page queue must not be locked.
+ */
+
+void vm_page_insert(
+ vm_page_t mem,
+ vm_object_t object,
+ vm_offset_t offset)
+{
+ vm_page_bucket_t *bucket;
+
+ VM_PAGE_CHECK(mem);
+
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->external);
+
+ if (!object->internal) {
+ mem->external = TRUE;
+ vm_object_external_pages++;
+ }
+
+ if (mem->tabled)
+ panic("vm_page_insert");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ simple_lock(&bucket->lock);
+ mem->next = bucket->pages;
+ bucket->pages = mem;
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ queue_enter(&object->memq, mem, vm_page_t, listq);
+ mem->tabled = TRUE;
+
+ /*
+ * Show that the object has one more resident page.
+ */
+
+ object->resident_page_count++;
+ assert(object->resident_page_count != 0);
+
+ /*
+ * Detect sequential access and inactivate previous page.
+ * We ignore busy pages.
+ */
+
+ if (vm_page_deactivate_behind &&
+ (offset == object->last_alloc + PAGE_SIZE)) {
+ vm_page_t last_mem;
+
+ last_mem = vm_page_lookup(object, object->last_alloc);
+ if ((last_mem != VM_PAGE_NULL) && !last_mem->busy)
+ vm_page_deactivate(last_mem);
+ }
+ object->last_alloc = offset;
+}
+
+/*
+ * vm_page_replace:
+ *
+ * Exactly like vm_page_insert, except that we first
+ * remove any existing page at the given offset in object
+ * and we don't do deactivate-behind.
+ *
+ * The object and page must be locked.
+ * The free page queue must not be locked.
+ */
+
+void vm_page_replace(
+ vm_page_t mem,
+ vm_object_t object,
+ vm_offset_t offset)
+{
+ vm_page_bucket_t *bucket;
+
+ VM_PAGE_CHECK(mem);
+
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->external);
+
+ if (!object->internal) {
+ mem->external = TRUE;
+ vm_object_external_pages++;
+ }
+
+ if (mem->tabled)
+ panic("vm_page_replace");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table,
+ * replacing any page that might have been there.
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ simple_lock(&bucket->lock);
+ if (bucket->pages) {
+ vm_page_t *mp = &bucket->pages;
+ vm_page_t m = *mp;
+ do {
+ if (m->object == object && m->offset == offset) {
+ /*
+ * Remove page from bucket and from object,
+ * and return it to the free list.
+ */
+ *mp = m->next;
+ queue_remove(&object->memq, m, vm_page_t,
+ listq);
+ m->tabled = FALSE;
+ object->resident_page_count--;
+ VM_PAGE_QUEUES_REMOVE(m);
+
+ if (m->external) {
+ m->external = FALSE;
+ vm_object_external_pages--;
+ }
+
+ /*
+ * Return page to the free list.
+ * Note the page is not tabled now, so this
+ * won't self-deadlock on the bucket lock.
+ */
+
+ vm_page_free(m);
+ break;
+ }
+ mp = &m->next;
+ } while ((m = *mp) != 0);
+ mem->next = bucket->pages;
+ } else {
+ mem->next = VM_PAGE_NULL;
+ }
+ bucket->pages = mem;
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ queue_enter(&object->memq, mem, vm_page_t, listq);
+ mem->tabled = TRUE;
+
+ /*
+ * And show that the object has one more resident
+ * page.
+ */
+
+ object->resident_page_count++;
+ assert(object->resident_page_count != 0);
+}
+
+/*
+ * vm_page_remove: [ internal use only ]
+ *
+ * Removes the given mem entry from the object/offset-page
+ * table, the object page list, and the page queues.
+ *
+ * The object and page must be locked.
+ * The free page queue must not be locked.
+ */
+
+void vm_page_remove(
+ vm_page_t mem)
+{
+ vm_page_bucket_t *bucket;
+ vm_page_t this;
+
+ assert(mem->tabled);
+ VM_PAGE_CHECK(mem);
+
+ /*
+ * Remove from the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
+ simple_lock(&bucket->lock);
+ if ((this = bucket->pages) == mem) {
+ /* optimize for common case */
+
+ bucket->pages = mem->next;
+ } else {
+ vm_page_t *prev;
+
+ for (prev = &this->next;
+ (this = *prev) != mem;
+ prev = &this->next)
+ continue;
+ *prev = this->next;
+ }
+ simple_unlock(&bucket->lock);
+
+ /*
+ * Now remove from the object's list of backed pages.
+ */
+
+ queue_remove(&mem->object->memq, mem, vm_page_t, listq);
+
+ /*
+ * And show that the object has one fewer resident
+ * page.
+ */
+
+ mem->object->resident_page_count--;
+
+ mem->tabled = FALSE;
+
+ VM_PAGE_QUEUES_REMOVE(mem);
+
+ if (mem->external) {
+ mem->external = FALSE;
+ vm_object_external_pages--;
+ }
+}
+
+/*
+ * vm_page_lookup:
+ *
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, VM_PAGE_NULL is returned.
+ *
+ * The object must be locked. No side effects.
+ */
+
+vm_page_t vm_page_lookup(
+ vm_object_t object,
+ vm_offset_t offset)
+{
+ vm_page_t mem;
+ vm_page_bucket_t *bucket;
+
+ /*
+ * Search the hash table for this object/offset pair
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+
+ simple_lock(&bucket->lock);
+ for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
+ VM_PAGE_CHECK(mem);
+ if ((mem->object == object) && (mem->offset == offset))
+ break;
+ }
+ simple_unlock(&bucket->lock);
+ return mem;
+}
+
+/*
+ * vm_page_rename:
+ *
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
+ *
+ * The object must be locked.
+ */
+void vm_page_rename(
+ vm_page_t mem,
+ vm_object_t new_object,
+ vm_offset_t new_offset)
+{
+ /*
+ * Changes to mem->object require the page lock because
+ * the pageout daemon uses that lock to get the object.
+ */
+
+ vm_page_lock_queues();
+ vm_page_remove(mem);
+ vm_page_insert(mem, new_object, new_offset);
+ vm_page_unlock_queues();
+}
+
+static void vm_page_init_template(vm_page_t m)
+{
+ m->object = VM_OBJECT_NULL; /* reset later */
+ m->offset = 0; /* reset later */
+ m->wire_count = 0;
+
+ m->inactive = FALSE;
+ m->active = FALSE;
+ m->laundry = FALSE;
+ m->external_laundry = FALSE;
+ m->free = FALSE;
+ m->external = FALSE;
+
+ m->busy = TRUE;
+ m->wanted = FALSE;
+ m->tabled = FALSE;
+ m->fictitious = FALSE;
+ m->private = FALSE;
+ m->absent = FALSE;
+ m->error = FALSE;
+ m->dirty = FALSE;
+ m->precious = FALSE;
+ m->reference = FALSE;
+
+ m->page_lock = VM_PROT_NONE;
+ m->unlock_request = VM_PROT_NONE;
+}
+
+/*
+ * vm_page_init:
+ *
+ * Initialize the fields in a new page.
+ * This takes a structure with random values and initializes it
+ * so that it can be given to vm_page_release or vm_page_insert.
+ */
+void vm_page_init(
+ vm_page_t mem)
+{
+ vm_page_init_template(mem);
+}
+
+/*
+ * vm_page_grab_fictitious:
+ *
+ * Remove a fictitious page from the free list.
+ * Returns VM_PAGE_NULL if there are no free pages.
+ */
+
+vm_page_t vm_page_grab_fictitious(void)
+{
+ vm_page_t m;
+
+ simple_lock(&vm_page_queue_free_lock);
+ if (list_empty(&vm_page_queue_fictitious)) {
+ m = VM_PAGE_NULL;
+ } else {
+ m = list_first_entry(&vm_page_queue_fictitious,
+ struct vm_page, node);
+ assert(m->fictitious);
+ list_remove(&m->node);
+ m->free = FALSE;
+ vm_page_fictitious_count--;
+ }
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return m;
+}
+
+/*
+ * vm_page_release_fictitious:
+ *
+ * Release a fictitious page to the free list.
+ */
+
+static void vm_page_release_fictitious(
+ vm_page_t m)
+{
+ simple_lock(&vm_page_queue_free_lock);
+ if (m->free)
+ panic("vm_page_release_fictitious");
+ m->free = TRUE;
+ list_insert_head(&vm_page_queue_fictitious, &m->node);
+ vm_page_fictitious_count++;
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_more_fictitious:
+ *
+ * Add more fictitious pages to the free list.
+ * Allowed to block.
+ */
+
+int vm_page_fictitious_quantum = 5;
+
+void vm_page_more_fictitious(void)
+{
+ vm_page_t m;
+ int i;
+
+ for (i = 0; i < vm_page_fictitious_quantum; i++) {
+ m = (vm_page_t) kmem_cache_alloc(&vm_page_cache);
+ if (m == VM_PAGE_NULL)
+ panic("vm_page_more_fictitious");
+
+ vm_page_init(m);
+ m->phys_addr = vm_page_fictitious_addr;
+ m->fictitious = TRUE;
+ vm_page_release_fictitious(m);
+ }
+}
+
+/*
+ * vm_page_convert:
+ *
+ * Attempt to convert a fictitious page into a real page.
+ *
+ * The object referenced by *MP must be locked.
+ */
+
+boolean_t vm_page_convert(struct vm_page **mp)
+{
+ struct vm_page *real_m, *fict_m;
+ vm_object_t object;
+ vm_offset_t offset;
+
+ fict_m = *mp;
+
+ assert(fict_m->fictitious);
+ assert(fict_m->phys_addr == vm_page_fictitious_addr);
+ assert(!fict_m->active);
+ assert(!fict_m->inactive);
+
+ real_m = vm_page_grab(VM_PAGE_HIGHMEM);
+ if (real_m == VM_PAGE_NULL)
+ return FALSE;
+
+ object = fict_m->object;
+ offset = fict_m->offset;
+ vm_page_remove(fict_m);
+
+ memcpy(&real_m->vm_page_header,
+ &fict_m->vm_page_header,
+ VM_PAGE_BODY_SIZE);
+ real_m->fictitious = FALSE;
+
+ vm_page_insert(real_m, object, offset);
+
+ assert(real_m->phys_addr != vm_page_fictitious_addr);
+ assert(fict_m->fictitious);
+ assert(fict_m->phys_addr == vm_page_fictitious_addr);
+
+ vm_page_release_fictitious(fict_m);
+ *mp = real_m;
+ return TRUE;
+}
+
+/*
+ * vm_page_grab:
+ *
+ * Remove a page from the free list.
+ * Returns VM_PAGE_NULL if the free list is too small.
+ *
+ * FLAGS specify which constraint should be enforced for the allocated
+ * addresses.
+ */
+
+vm_page_t vm_page_grab(unsigned flags)
+{
+ unsigned selector;
+ vm_page_t mem;
+
+ if (flags & VM_PAGE_HIGHMEM)
+ selector = VM_PAGE_SEL_HIGHMEM;
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+ else if (flags & VM_PAGE_DMA32)
+ selector = VM_PAGE_SEL_DMA32;
+#endif
+ else if (flags & VM_PAGE_DIRECTMAP)
+ selector = VM_PAGE_SEL_DIRECTMAP;
+#if defined(VM_PAGE_DMA32_LIMIT) && VM_PAGE_DMA32_LIMIT <= VM_PAGE_DIRECTMAP_LIMIT
+ else if (flags & VM_PAGE_DMA32)
+ selector = VM_PAGE_SEL_DMA32;
+#endif
+ else
+ selector = VM_PAGE_SEL_DMA;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * XXX Mach has many modules that merely assume memory is
+ * directly mapped in kernel space. Instead of updating all
+ * users, we assume those which need specific physical memory
+ * properties will wire down their pages, either because
+ * they can't be paged (not part of an object), or with
+ * explicit VM calls. The strategy is then to let memory
+ * pressure balance the physical segments with pageable pages.
+ */
+ mem = vm_page_alloc_pa(0, selector, VM_PT_KERNEL);
+
+ if (mem == NULL) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return NULL;
+ }
+
+ mem->free = FALSE;
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return mem;
+}
+
+phys_addr_t vm_page_grab_phys_addr(void)
+{
+ vm_page_t p = vm_page_grab(VM_PAGE_DIRECTMAP);
+ if (p == VM_PAGE_NULL)
+ return -1;
+ else
+ return p->phys_addr;
+}
+
+/*
+ * vm_page_release:
+ *
+ * Return a page to the free list.
+ */
+
+void vm_page_release(
+ vm_page_t mem,
+ boolean_t laundry,
+ boolean_t external_laundry)
+{
+ simple_lock(&vm_page_queue_free_lock);
+ if (mem->free)
+ panic("vm_page_release");
+ mem->free = TRUE;
+ vm_page_free_pa(mem, 0);
+ if (laundry) {
+ vm_page_laundry_count--;
+
+ if (vm_page_laundry_count == 0) {
+ vm_pageout_resume();
+ }
+ }
+ if (external_laundry) {
+
+ /*
+ * If vm_page_external_laundry_count is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_laundry_count > 0) {
+ vm_page_external_laundry_count--;
+
+ if (vm_page_external_laundry_count == 0) {
+ vm_pageout_resume();
+ }
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_grab_contig:
+ *
+ * Remove a block of contiguous pages from the free list.
+ * Returns VM_PAGE_NULL if the request fails.
+ */
+
+vm_page_t vm_page_grab_contig(
+ vm_size_t size,
+ unsigned int selector)
+{
+ unsigned int i, order, nr_pages;
+ vm_page_t mem;
+
+ order = vm_page_order(size);
+ nr_pages = 1 << order;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /* TODO Allow caller to pass type */
+ mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
+
+ if (mem == NULL) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ mem[i].free = FALSE;
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return mem;
+}
+
+/*
+ * vm_page_free_contig:
+ *
+ * Return a block of contiguous pages to the free list.
+ */
+
+void vm_page_free_contig(vm_page_t mem, vm_size_t size)
+{
+ unsigned int i, order, nr_pages;
+
+ order = vm_page_order(size);
+ nr_pages = 1 << order;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < nr_pages; i++) {
+ if (mem[i].free)
+ panic("vm_page_free_contig");
+
+ mem[i].free = TRUE;
+ }
+
+ vm_page_free_pa(mem, order);
+
+ simple_unlock(&vm_page_queue_free_lock);
+}
+
+/*
+ * vm_page_alloc:
+ *
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
+ *
+ * Object must be locked.
+ */
+
+vm_page_t vm_page_alloc(
+ vm_object_t object,
+ vm_offset_t offset)
+{
+ vm_page_t mem;
+
+ mem = vm_page_grab(VM_PAGE_HIGHMEM);
+ if (mem == VM_PAGE_NULL)
+ return VM_PAGE_NULL;
+
+ vm_page_lock_queues();
+ vm_page_insert(mem, object, offset);
+ vm_page_unlock_queues();
+
+ return mem;
+}
+
+/*
+ * vm_page_free:
+ *
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
+ *
+ * Object and page queues must be locked prior to entry.
+ */
+void vm_page_free(
+ vm_page_t mem)
+{
+ if (mem->free)
+ panic("vm_page_free");
+
+ if (mem->tabled) {
+ vm_page_remove(mem);
+ }
+
+ assert(!mem->active && !mem->inactive);
+
+ if (mem->wire_count != 0) {
+ if (!mem->private && !mem->fictitious)
+ vm_page_wire_count--;
+ mem->wire_count = 0;
+ }
+
+ PAGE_WAKEUP_DONE(mem);
+
+ if (mem->absent)
+ vm_object_absent_release(mem->object);
+
+ /*
+ * XXX The calls to vm_page_init here are
+ * really overkill.
+ */
+
+ if (mem->private || mem->fictitious) {
+ vm_page_init(mem);
+ mem->phys_addr = vm_page_fictitious_addr;
+ mem->fictitious = TRUE;
+ vm_page_release_fictitious(mem);
+ } else {
+ boolean_t laundry = mem->laundry;
+ boolean_t external_laundry = mem->external_laundry;
+ vm_page_init(mem);
+ vm_page_release(mem, laundry, external_laundry);
+ }
+}
+
+/*
+ * vm_page_zero_fill:
+ *
+ * Zero-fill the specified page.
+ */
+void vm_page_zero_fill(
+ vm_page_t m)
+{
+ VM_PAGE_CHECK(m);
+
+ pmap_zero_page(m->phys_addr);
+}
+
+/*
+ * vm_page_copy:
+ *
+ * Copy one page to another
+ */
+
+void vm_page_copy(
+ vm_page_t src_m,
+ vm_page_t dest_m)
+{
+ VM_PAGE_CHECK(src_m);
+ VM_PAGE_CHECK(dest_m);
+
+ pmap_copy_page(src_m->phys_addr, dest_m->phys_addr);
+}
+
+#if MACH_VM_DEBUG
+/*
+ * Routine: vm_page_info
+ * Purpose:
+ * Return information about the global VP table.
+ * Fills the buffer with as much information as possible
+ * and returns the desired size of the buffer.
+ * Conditions:
+ * Nothing locked. The caller should provide
+ * possibly-pageable memory.
+ */
+
+unsigned int
+vm_page_info(
+ hash_info_bucket_t *info,
+ unsigned int count)
+{
+ int i;
+
+ if (vm_page_bucket_count < count)
+ count = vm_page_bucket_count;
+
+ for (i = 0; i < count; i++) {
+ vm_page_bucket_t *bucket = &vm_page_buckets[i];
+ unsigned int bucket_count = 0;
+ vm_page_t m;
+
+ simple_lock(&bucket->lock);
+ for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
+ bucket_count++;
+ simple_unlock(&bucket->lock);
+
+ /* don't touch pageable memory while holding locks */
+ info[i].hib_count = bucket_count;
+ }
+
+ return vm_page_bucket_count;
+}
+#endif /* MACH_VM_DEBUG */
+
+
+#if MACH_KDB
+#define printf kdbprintf
+
+/*
+ * Routine: vm_page_print [exported]
+ */
+void vm_page_print(const vm_page_t p)
+{
+ iprintf("Page 0x%X: object 0x%X,", (vm_offset_t) p, (vm_offset_t) p->object);
+ printf(" offset 0x%X", p->offset);
+ printf("wire_count %d,", p->wire_count);
+ printf(" %s",
+ (p->active ? "active" : (p->inactive ? "inactive" : "loose")));
+ printf("%s",
+ (p->free ? " free" : ""));
+ printf("%s ",
+ (p->laundry ? " laundry" : ""));
+ printf("%s",
+ (p->dirty ? "dirty" : "clean"));
+ printf("%s",
+ (p->busy ? " busy" : ""));
+ printf("%s",
+ (p->absent ? " absent" : ""));
+ printf("%s",
+ (p->error ? " error" : ""));
+ printf("%s",
+ (p->fictitious ? " fictitious" : ""));
+ printf("%s",
+ (p->private ? " private" : ""));
+ printf("%s",
+ (p->wanted ? " wanted" : ""));
+ printf("%s,",
+ (p->tabled ? "" : "not_tabled"));
+ printf("phys_addr = 0x%X, lock = 0x%X, unlock_request = 0x%X\n",
+ p->phys_addr,
+ (vm_offset_t) p->page_lock,
+ (vm_offset_t) p->unlock_request);
+}
+#endif /* MACH_KDB */
diff --git a/vm/vm_resident.h b/vm/vm_resident.h
new file mode 100644
index 0000000..e8bf681
--- /dev/null
+++ b/vm/vm_resident.h
@@ -0,0 +1,45 @@
+/*
+ * Resident memory management module functions.
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Resident memory management module functions.
+ *
+ */
+
+#ifndef _VM_RESIDENT_H_
+#define _VM_RESIDENT_H_
+
+#include <mach/std_types.h>
+
+/*
+ * vm_page_replace:
+ *
+ * Exactly like vm_page_insert, except that we first
+ * remove any existing page at the given offset in object
+ * and we don't do deactivate-behind.
+ *
+ * The object and page must be locked.
+ */
+extern void vm_page_replace (
+ vm_page_t mem,
+ vm_object_t object,
+ vm_offset_t offset);
+
+#endif /* _VM_RESIDENT_H_ */
diff --git a/vm/vm_types.h b/vm/vm_types.h
new file mode 100644
index 0000000..f64ebee
--- /dev/null
+++ b/vm/vm_types.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2007 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Written by Thomas Schwinge.
+ */
+
+#ifndef VM_VM_TYPES_H
+#define VM_VM_TYPES_H
+
+/*
+ * Types defined:
+ *
+ * vm_map_t the high-level address map data structure.
+ * vm_object_t Virtual memory object.
+ * vm_page_t See `vm/vm_page.h'.
+ */
+
+typedef struct vm_map *vm_map_t;
+#define VM_MAP_NULL ((vm_map_t) 0)
+
+typedef struct vm_object *vm_object_t;
+#define VM_OBJECT_NULL ((vm_object_t) 0)
+
+typedef struct vm_page *vm_page_t;
+#define VM_PAGE_NULL ((vm_page_t) 0)
+
+
+#endif /* VM_VM_TYPES_H */
diff --git a/vm/vm_user.c b/vm/vm_user.c
new file mode 100644
index 0000000..868230a
--- /dev/null
+++ b/vm/vm_user.c
@@ -0,0 +1,803 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_user.c
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * User-exported virtual memory functions.
+ */
+
+#include <mach/boolean.h>
+#include <mach/kern_return.h>
+#include <mach/mach_types.h> /* to get vm_address_t */
+#include <mach/memory_object.h>
+#include <mach/std_types.h> /* to get pointer_t */
+#include <mach/vm_attributes.h>
+#include <mach/vm_param.h>
+#include <mach/vm_statistics.h>
+#include <mach/vm_cache_statistics.h>
+#include <mach/vm_sync.h>
+#include <kern/gnumach.server.h>
+#include <kern/host.h>
+#include <kern/mach.server.h>
+#include <kern/mach_host.server.h>
+#include <kern/task.h>
+#include <vm/vm_fault.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/memory_object_proxy.h>
+#include <vm/vm_page.h>
+
+
+
+vm_statistics_data_t vm_stat;
+
+/*
+ * vm_allocate allocates "zero fill" memory in the specfied
+ * map.
+ */
+kern_return_t vm_allocate(
+ vm_map_t map,
+ vm_offset_t *addr,
+ vm_size_t size,
+ boolean_t anywhere)
+{
+ kern_return_t result;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+ if (size == 0) {
+ *addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ if (anywhere)
+ *addr = vm_map_min(map);
+ else
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ result = vm_map_enter(
+ map,
+ addr,
+ size,
+ (vm_offset_t)0,
+ anywhere,
+ VM_OBJECT_NULL,
+ (vm_offset_t)0,
+ FALSE,
+ VM_PROT_DEFAULT,
+ VM_PROT_ALL,
+ VM_INHERIT_DEFAULT);
+
+ return(result);
+}
+
+/*
+ * vm_deallocate deallocates the specified range of addresses in the
+ * specified address map.
+ */
+kern_return_t vm_deallocate(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_size_t size)
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size == (vm_offset_t) 0)
+ return(KERN_SUCCESS);
+
+ return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
+}
+
+/*
+ * vm_inherit sets the inheritance of the specified range in the
+ * specified map.
+ */
+kern_return_t vm_inherit(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_size_t size,
+ vm_inherit_t new_inheritance)
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ switch (new_inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_inherit(map,
+ trunc_page(start),
+ round_page(start+size),
+ new_inheritance));
+}
+
+/*
+ * vm_protect sets the protection of the specified range in the
+ * specified map.
+ */
+
+kern_return_t vm_protect(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_size_t size,
+ boolean_t set_maximum,
+ vm_prot_t new_protection)
+{
+ if ((map == VM_MAP_NULL) ||
+ (new_protection & ~(VM_PROT_ALL|VM_PROT_NOTIFY)))
+ return(KERN_INVALID_ARGUMENT);
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_protect(map,
+ trunc_page(start),
+ round_page(start+size),
+ new_protection,
+ set_maximum));
+}
+
+kern_return_t vm_statistics(
+ vm_map_t map,
+ vm_statistics_data_t *stat)
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ *stat = vm_stat;
+
+ stat->pagesize = PAGE_SIZE;
+ stat->free_count = vm_page_mem_free();
+ stat->active_count = vm_page_active_count;
+ stat->inactive_count = vm_page_inactive_count;
+ stat->wire_count = vm_page_wire_count;
+
+ return(KERN_SUCCESS);
+}
+
+kern_return_t vm_cache_statistics(
+ vm_map_t map,
+ vm_cache_statistics_data_t *stats)
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ stats->cache_object_count = vm_object_external_count;
+ stats->cache_count = vm_object_external_pages;
+
+ /* XXX Not implemented yet */
+ stats->active_tmp_count = 0;
+ stats->inactive_tmp_count = 0;
+ stats->active_perm_count = 0;
+ stats->inactive_perm_count = 0;
+ stats->dirty_count = 0;
+ stats->laundry_count = 0;
+ stats->writeback_count = 0;
+ stats->slab_count = 0;
+ stats->slab_reclaim_count = 0;
+ return KERN_SUCCESS;
+}
+
+/*
+ * Handle machine-specific attributes for a mapping, such
+ * as cachability, migrability, etc.
+ */
+kern_return_t vm_machine_attribute(
+ vm_map_t map,
+ vm_address_t address,
+ vm_size_t size,
+ vm_machine_attribute_t attribute,
+ vm_machine_attribute_val_t* value) /* IN/OUT */
+{
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, address, address+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ return vm_map_machine_attribute(map, address, size, attribute, value);
+}
+
+kern_return_t vm_read(
+ vm_map_t map,
+ vm_address_t address,
+ vm_size_t size,
+ pointer_t *data,
+ mach_msg_type_number_t *data_size)
+{
+ kern_return_t error;
+ vm_map_copy_t ipc_address;
+
+ if (map == VM_MAP_NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if ((error = vm_map_copyin(map,
+ address,
+ size,
+ FALSE, /* src_destroy */
+ &ipc_address)) == KERN_SUCCESS) {
+ *data = (pointer_t) ipc_address;
+ *data_size = size;
+ }
+ return(error);
+}
+
+kern_return_t vm_write(
+ vm_map_t map,
+ vm_address_t address,
+ pointer_t data,
+ mach_msg_type_number_t size)
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ return vm_map_copy_overwrite(map, address, (vm_map_copy_t) data,
+ FALSE /* interruptible XXX */);
+}
+
+kern_return_t vm_copy(
+ vm_map_t map,
+ vm_address_t source_address,
+ vm_size_t size,
+ vm_address_t dest_address)
+{
+ vm_map_copy_t copy;
+ kern_return_t kr;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ kr = vm_map_copyin(map, source_address, size,
+ FALSE, &copy);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ kr = vm_map_copy_overwrite(map, dest_address, copy,
+ FALSE /* interruptible XXX */);
+ if (kr != KERN_SUCCESS) {
+ vm_map_copy_discard(copy);
+ return kr;
+ }
+
+ return KERN_SUCCESS;
+}
+
+
+/*
+ * Routine: vm_map
+ */
+kern_return_t vm_map(
+ vm_map_t target_map,
+ vm_offset_t *address,
+ vm_size_t size,
+ vm_offset_t mask,
+ boolean_t anywhere,
+ ipc_port_t memory_object,
+ vm_offset_t offset,
+ boolean_t copy,
+ vm_prot_t cur_protection,
+ vm_prot_t max_protection,
+ vm_inherit_t inheritance)
+{
+ vm_object_t object;
+ kern_return_t result;
+
+ if ((target_map == VM_MAP_NULL) ||
+ (cur_protection & ~VM_PROT_ALL) ||
+ (max_protection & ~VM_PROT_ALL))
+ return(KERN_INVALID_ARGUMENT);
+
+ switch (inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ if (size == 0)
+ return KERN_INVALID_ARGUMENT;
+
+#ifdef USER32
+ if (mask & 0x80000000)
+ mask |= 0xffffffff00000000;
+#endif
+
+ *address = trunc_page(*address);
+ size = round_page(size);
+
+ if (!IP_VALID(memory_object)) {
+ object = VM_OBJECT_NULL;
+ offset = 0;
+ copy = FALSE;
+ } else if ((object = vm_object_enter(memory_object, size, FALSE))
+ == VM_OBJECT_NULL)
+ {
+ ipc_port_t real_memobj;
+ vm_prot_t prot;
+ vm_offset_t start;
+ vm_offset_t len;
+
+ result = memory_object_proxy_lookup (memory_object, &real_memobj,
+ &prot, &start, &len);
+ if (result != KERN_SUCCESS)
+ return result;
+
+ if (!copy)
+ {
+ /* Reduce the allowed access to the memory object. */
+ max_protection &= prot;
+ cur_protection &= prot;
+ }
+ else
+ {
+ /* Disallow making a copy unless the proxy allows reading. */
+ if (!(prot & VM_PROT_READ))
+ return KERN_PROTECTION_FAILURE;
+ }
+
+ /* Reduce the allowed range */
+ if ((start + offset + size) > (start + len))
+ return KERN_INVALID_ARGUMENT;
+
+ offset += start;
+
+ if ((object = vm_object_enter(real_memobj, size, FALSE))
+ == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /*
+ * Perform the copy if requested
+ */
+
+ if (copy) {
+ vm_object_t new_object;
+ vm_offset_t new_offset;
+
+ result = vm_object_copy_strategically(object, offset, size,
+ &new_object, &new_offset,
+ &copy);
+
+ /*
+ * Throw away the reference to the
+ * original object, as it won't be mapped.
+ */
+
+ vm_object_deallocate(object);
+
+ if (result != KERN_SUCCESS)
+ return (result);
+
+ object = new_object;
+ offset = new_offset;
+ }
+
+ if ((result = vm_map_enter(target_map,
+ address, size, mask, anywhere,
+ object, offset,
+ copy,
+ cur_protection, max_protection, inheritance
+ )) != KERN_SUCCESS)
+ vm_object_deallocate(object);
+ return(result);
+}
+
+/*
+ * Specify that the range of the virtual address space
+ * of the target task must not cause page faults for
+ * the indicated accesses.
+ *
+ * [ To unwire the pages, specify VM_PROT_NONE. ]
+ */
+kern_return_t vm_wire(const ipc_port_t port,
+ vm_map_t map,
+ vm_offset_t start,
+ vm_size_t size,
+ vm_prot_t access)
+{
+ boolean_t priv;
+
+ if (!IP_VALID(port))
+ return KERN_INVALID_HOST;
+
+ ip_lock(port);
+ if (!ip_active(port) ||
+ (ip_kotype(port) != IKOT_HOST_PRIV
+ && ip_kotype(port) != IKOT_HOST))
+ {
+ ip_unlock(port);
+ return KERN_INVALID_HOST;
+ }
+
+ priv = ip_kotype(port) == IKOT_HOST_PRIV;
+ ip_unlock(port);
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ if (access & ~VM_PROT_ALL)
+ return KERN_INVALID_ARGUMENT;
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, start, start+size))
+ return(KERN_INVALID_ARGUMENT);
+
+ /* TODO: make it tunable */
+ if (!priv && access != VM_PROT_NONE && map->size_wired + size > (8<<20))
+ return KERN_NO_ACCESS;
+
+ return vm_map_pageable(map, trunc_page(start), round_page(start+size),
+ access, TRUE, TRUE);
+}
+
+kern_return_t vm_wire_all(const ipc_port_t port, vm_map_t map, vm_wire_t flags)
+{
+ if (!IP_VALID(port))
+ return KERN_INVALID_HOST;
+
+ ip_lock(port);
+
+ if (!ip_active(port)
+ || (ip_kotype(port) != IKOT_HOST_PRIV)) {
+ ip_unlock(port);
+ return KERN_INVALID_HOST;
+ }
+
+ ip_unlock(port);
+
+ if (map == VM_MAP_NULL) {
+ return KERN_INVALID_TASK;
+ }
+
+ if (flags & ~VM_WIRE_ALL) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ /*Check if range includes projected buffer;
+ user is not allowed direct manipulation in that case*/
+ if (projected_buffer_in_range(map, map->min_offset, map->max_offset)) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
+ return vm_map_pageable_all(map, flags);
+}
+
+/*
+ * vm_object_sync synchronizes out pages from the memory object to its
+ * memory manager, if any.
+ */
+kern_return_t vm_object_sync(
+ vm_object_t object,
+ vm_offset_t offset,
+ vm_size_t size,
+ boolean_t should_flush,
+ boolean_t should_return,
+ boolean_t should_iosync)
+{
+ if (object == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ /* FIXME: we should rather introduce an internal function, e.g.
+ vm_object_update, rather than calling memory_object_lock_request. */
+ vm_object_reference(object);
+
+ /* This is already always synchronous for now. */
+ (void) should_iosync;
+
+ size = round_page(offset + size) - trunc_page(offset);
+ offset = trunc_page(offset);
+
+ return memory_object_lock_request(object, offset, size,
+ should_return ?
+ MEMORY_OBJECT_RETURN_ALL :
+ MEMORY_OBJECT_RETURN_NONE,
+ should_flush,
+ VM_PROT_NO_CHANGE,
+ NULL, 0);
+}
+
+/*
+ * vm_msync synchronizes out pages from the map to their memory manager,
+ * if any.
+ */
+kern_return_t vm_msync(
+ vm_map_t map,
+ vm_address_t address,
+ vm_size_t size,
+ vm_sync_t sync_flags)
+{
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_ARGUMENT;
+
+ return vm_map_msync(map, (vm_offset_t) address, size, sync_flags);
+}
+
+/*
+ * vm_allocate_contiguous allocates "zero fill" physical memory and maps
+ * it into in the specfied map.
+ */
+/* TODO: respect physical alignment (palign)
+ * and minimum physical address (pmin)
+ */
+kern_return_t vm_allocate_contiguous(
+ host_t host_priv,
+ vm_map_t map,
+ vm_address_t *result_vaddr,
+ rpc_phys_addr_t *result_paddr,
+ vm_size_t size,
+ rpc_phys_addr_t pmin,
+ rpc_phys_addr_t pmax,
+ rpc_phys_addr_t palign)
+{
+ vm_size_t alloc_size;
+ unsigned int npages;
+ unsigned int i;
+ unsigned int order;
+ unsigned int selector;
+ vm_page_t pages;
+ vm_object_t object;
+ kern_return_t kr;
+ vm_address_t vaddr;
+
+ if (host_priv == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ /* FIXME */
+ if (pmin != 0)
+ return KERN_INVALID_ARGUMENT;
+
+ if (palign == 0)
+ palign = PAGE_SIZE;
+
+ /* FIXME: Allows some small alignments less than page size */
+ if ((palign < PAGE_SIZE) && (PAGE_SIZE % palign == 0))
+ palign = PAGE_SIZE;
+
+ /* FIXME */
+ if (palign != PAGE_SIZE)
+ return KERN_INVALID_ARGUMENT;
+
+ selector = VM_PAGE_SEL_DMA;
+ if (pmax > VM_PAGE_DMA_LIMIT)
+#ifdef VM_PAGE_DMA32_LIMIT
+#if VM_PAGE_DMA32_LIMIT < VM_PAGE_DIRECTMAP_LIMIT
+ if (pmax <= VM_PAGE_DMA32_LIMIT)
+ selector = VM_PAGE_SEL_DMA32;
+ if (pmax > VM_PAGE_DMA32_LIMIT)
+#endif
+#endif
+ if (pmax <= VM_PAGE_DIRECTMAP_LIMIT)
+ selector = VM_PAGE_SEL_DIRECTMAP;
+ if (pmax > VM_PAGE_DIRECTMAP_LIMIT)
+#ifdef VM_PAGE_DMA32_LIMIT
+#if VM_PAGE_DMA32_LIMIT > VM_PAGE_DIRECTMAP_LIMIT
+ if (pmax <= VM_PAGE_DMA32_LIMIT)
+ selector = VM_PAGE_SEL_DMA32;
+ if (pmax > VM_PAGE_DMA32_LIMIT)
+#endif
+#endif
+ if (pmax <= VM_PAGE_HIGHMEM_LIMIT)
+ selector = VM_PAGE_SEL_HIGHMEM;
+
+ size = vm_page_round(size);
+
+ if (size == 0)
+ return KERN_INVALID_ARGUMENT;
+
+ object = vm_object_allocate(size);
+
+ if (object == NULL)
+ return KERN_RESOURCE_SHORTAGE;
+
+ /*
+ * XXX The page allocator returns blocks with a power-of-two size.
+ * The requested size may not be a power-of-two, requiring some
+ * work to release back the pages that aren't needed.
+ */
+ order = vm_page_order(size);
+ alloc_size = (1 << (order + PAGE_SHIFT));
+ npages = vm_page_atop(alloc_size);
+
+ pages = vm_page_grab_contig(alloc_size, selector);
+
+ if (pages == NULL) {
+ vm_object_deallocate(object);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+
+ vm_object_lock(object);
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_atop(size); i++) {
+ /*
+ * XXX We can safely handle contiguous pages as an array,
+ * but this relies on knowing the implementation of the
+ * page allocator.
+ */
+ pages[i].busy = FALSE;
+ vm_page_insert(&pages[i], object, vm_page_ptoa(i));
+ vm_page_wire(&pages[i]);
+ }
+
+ vm_page_unlock_queues();
+ vm_object_unlock(object);
+
+ for (i = vm_page_atop(size); i < npages; i++) {
+ vm_page_release(&pages[i], FALSE, FALSE);
+ }
+
+ vaddr = 0;
+ kr = vm_map_enter(map, &vaddr, size, 0, TRUE, object, 0, FALSE,
+ VM_PROT_READ | VM_PROT_WRITE,
+ VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_DEFAULT);
+
+ if (kr != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ return kr;
+ }
+
+ kr = vm_map_pageable(map, vaddr, vaddr + size,
+ VM_PROT_READ | VM_PROT_WRITE,
+ TRUE, TRUE);
+
+ if (kr != KERN_SUCCESS) {
+ vm_map_remove(map, vaddr, vaddr + size);
+ return kr;
+ }
+
+ *result_vaddr = vaddr;
+ *result_paddr = pages->phys_addr;
+
+ assert(*result_paddr >= pmin);
+ assert(*result_paddr + size <= pmax);
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * vm_pages_phys returns information about a region of memory
+ */
+kern_return_t vm_pages_phys(
+ host_t host,
+ vm_map_t map,
+ vm_address_t address,
+ vm_size_t size,
+ rpc_phys_addr_array_t *pagespp,
+ mach_msg_type_number_t *countp)
+{
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+ if (map == VM_MAP_NULL)
+ return KERN_INVALID_TASK;
+
+ if (!page_aligned(address))
+ return KERN_INVALID_ARGUMENT;
+ if (!page_aligned(size))
+ return KERN_INVALID_ARGUMENT;
+
+ mach_msg_type_number_t count = atop(size), cur;
+ rpc_phys_addr_array_t pagesp = *pagespp;
+ kern_return_t kr;
+
+ if (*countp < count) {
+ vm_offset_t allocated;
+ /* Avoid faults while we keep vm locks */
+ kr = kmem_alloc(ipc_kernel_map, &allocated,
+ count * sizeof(pagesp[0]));
+ if (kr != KERN_SUCCESS)
+ return KERN_RESOURCE_SHORTAGE;
+ pagesp = (rpc_phys_addr_array_t) allocated;
+ }
+
+ for (cur = 0; cur < count; cur++) {
+ vm_map_t cmap; /* current map in traversal */
+ rpc_phys_addr_t paddr;
+ vm_map_entry_t entry; /* entry in current map */
+
+ /* find the entry containing (or following) the address */
+ vm_map_lock_read(map);
+ for (cmap = map;;) {
+ /* cmap is read-locked */
+
+ if (!vm_map_lookup_entry(cmap, address, &entry)) {
+ entry = VM_MAP_ENTRY_NULL;
+ break;
+ }
+
+ if (entry->is_sub_map) {
+ /* move down to the sub map */
+
+ vm_map_t nmap = entry->object.sub_map;
+ vm_map_lock_read(nmap);
+ vm_map_unlock_read(cmap);
+ cmap = nmap;
+ continue;
+ } else {
+ /* Found it */
+ break;
+ }
+ /*NOTREACHED*/
+ }
+
+ paddr = 0;
+ if (entry) {
+ vm_offset_t offset = address - entry->vme_start + entry->offset;
+ vm_object_t object = entry->object.vm_object;
+
+ if (object) {
+ vm_object_lock(object);
+ vm_page_t page = vm_page_lookup(object, offset);
+ if (page) {
+ if (page->phys_addr != (typeof(pagesp[cur])) page->phys_addr)
+ printf("warning: physical address overflow in vm_pages_phys!!\n");
+ else
+ paddr = page->phys_addr;
+ }
+ vm_object_unlock(object);
+ }
+ }
+ vm_map_unlock_read(cmap);
+ pagesp[cur] = paddr;
+
+ address += PAGE_SIZE;
+ }
+
+ if (pagesp != *pagespp) {
+ vm_map_copy_t copy;
+ kr = vm_map_copyin(ipc_kernel_map, (vm_offset_t) pagesp,
+ count * sizeof(pagesp[0]), TRUE, &copy);
+ assert(kr == KERN_SUCCESS);
+ *pagespp = (rpc_phys_addr_array_t) copy;
+ }
+
+ *countp = count;
+
+ return KERN_SUCCESS;
+}
diff --git a/vm/vm_user.h b/vm/vm_user.h
new file mode 100644
index 0000000..c6f20a8
--- /dev/null
+++ b/vm/vm_user.h
@@ -0,0 +1,60 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: vm/vm_user.h
+ * Author: Avadis Tevanian, Jr., Michael Wayne Young
+ * Date: 1986
+ *
+ * Declarations of user-visible virtual address space
+ * management functionality.
+ */
+
+#ifndef _VM_VM_USER_H_
+#define _VM_VM_USER_H_
+
+#include <mach/kern_return.h>
+#include <mach/std_types.h>
+#include <mach/mach_types.h>
+
+extern kern_return_t vm_allocate(vm_map_t, vm_offset_t *, vm_size_t,
+ boolean_t);
+extern kern_return_t vm_deallocate(vm_map_t, vm_offset_t, vm_size_t);
+extern kern_return_t vm_inherit(vm_map_t, vm_offset_t, vm_size_t,
+ vm_inherit_t);
+extern kern_return_t vm_protect(vm_map_t, vm_offset_t, vm_size_t, boolean_t,
+ vm_prot_t);
+extern kern_return_t vm_statistics(vm_map_t, vm_statistics_data_t *);
+extern kern_return_t vm_cache_statistics(vm_map_t, vm_cache_statistics_data_t *);
+extern kern_return_t vm_read(vm_map_t, vm_address_t, vm_size_t, pointer_t *,
+ vm_size_t *);
+extern kern_return_t vm_write(vm_map_t, vm_address_t, pointer_t, vm_size_t);
+extern kern_return_t vm_copy(vm_map_t, vm_address_t, vm_size_t,
+ vm_address_t);
+extern kern_return_t vm_map(vm_map_t, vm_offset_t *, vm_size_t, vm_offset_t,
+ boolean_t, ipc_port_t, vm_offset_t, boolean_t,
+ vm_prot_t, vm_prot_t, vm_inherit_t);
+
+#endif /* _VM_VM_USER_H_ */