aboutsummaryrefslogtreecommitdiff
path: root/vm/vm_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'vm/vm_page.c')
-rw-r--r--vm/vm_page.c2164
1 files changed, 2164 insertions, 0 deletions
diff --git a/vm/vm_page.c b/vm/vm_page.c
new file mode 100644
index 0000000..04decbb
--- /dev/null
+++ b/vm/vm_page.c
@@ -0,0 +1,2164 @@
+/*
+ * Copyright (c) 2010-2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-CPU pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
+ */
+
+#include <string.h>
+#include <kern/assert.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/debug.h>
+#include <kern/list.h>
+#include <kern/lock.h>
+#include <kern/macros.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <mach/vm_param.h>
+#include <machine/pmap.h>
+#include <sys/types.h>
+#include <vm/memory_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#define DEBUG 0
+
+#define __init
+#define __initdata
+#define __read_mostly
+
+#define thread_pin()
+#define thread_unpin()
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PAGE_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PAGE_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PAGE_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PAGE_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_page_cpu_pool {
+ simple_lock_data_t lock;
+ int size;
+ int transfer_size;
+ int nr_pages;
+ struct list pages;
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Special order value for pages that aren't in a free list. Such pages are
+ * either allocated, or part of a free block of pages but not the head page.
+ */
+#define VM_PAGE_ORDER_UNLISTED (VM_PAGE_NR_FREE_LISTS + 1)
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_page_free_list {
+ unsigned long size;
+ struct list blocks;
+};
+
+/*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+ struct list internal_pages;
+ struct list external_pages;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PAGE_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
+ */
+struct vm_page_seg {
+ struct vm_page_cpu_pool cpu_pools[NCPUS];
+
+ phys_addr_t start;
+ phys_addr_t end;
+ struct vm_page *pages;
+ struct vm_page *pages_end;
+ simple_lock_data_t lock;
+ struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
+ unsigned long nr_free_pages;
+
+ /* Free memory thresholds */
+ unsigned long min_free_pages; /* Privileged allocations only */
+ unsigned long low_free_pages; /* Pageout daemon starts scanning */
+ unsigned long high_free_pages; /* Pageout daemon stops scanning,
+ unprivileged allocations resume */
+
+ /* Page cache related data */
+ struct vm_page_queue active_pages;
+ unsigned long nr_active_pages;
+ unsigned long high_active_pages;
+ struct vm_page_queue inactive_pages;
+ unsigned long nr_inactive_pages;
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_page_boot_seg {
+ phys_addr_t start;
+ phys_addr_t end;
+ boolean_t heap_present;
+ phys_addr_t avail_start;
+ phys_addr_t avail_end;
+};
+
+static int vm_page_is_ready __read_mostly;
+
+/*
+ * Segment table.
+ *
+ * The system supports a maximum of 4 segments :
+ * - DMA: suitable for DMA
+ * - DMA32: suitable for DMA when devices support 32-bits addressing
+ * - DIRECTMAP: direct physical mapping, allows direct access from
+ * the kernel with a simple offset translation
+ * - HIGHMEM: must be mapped before it can be accessed
+ *
+ * Segments are ordered by priority, 0 being the lowest priority. Their
+ * relative priorities are DMA < DMA32 < DIRECTMAP < HIGHMEM or
+ * DMA < DIRECTMAP < DMA32 < HIGHMEM.
+ * Some segments may actually be aliases for others, e.g. if DMA is always
+ * possible from the direct physical mapping, DMA and DMA32 are aliases for
+ * DIRECTMAP, in which case the segment table contains DIRECTMAP and HIGHMEM
+ * only.
+ */
+static struct vm_page_seg vm_page_segs[VM_PAGE_MAX_SEGS];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_page_boot_seg vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_page_segs_size __read_mostly;
+
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
+static void __init
+vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
+{
+ memset(page, 0, sizeof(*page));
+ vm_page_init(page); /* vm_resident members */
+ page->type = VM_PT_RESERVED;
+ page->seg_index = seg_index;
+ page->order = VM_PAGE_ORDER_UNLISTED;
+ page->priv = NULL;
+ page->phys_addr = pa;
+}
+
+void
+vm_page_set_type(struct vm_page *page, unsigned int order, unsigned short type)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++)
+ page[i].type = type;
+}
+
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+ return (page->object != NULL)
+ && (page->wire_count == 0)
+ && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+ /*
+ * This function is called on pages pulled from the page queues,
+ * implying they're pageable, which is why the wire count isn't
+ * checked here.
+ */
+
+ return !page->busy
+ && !page->wanted
+ && !page->absent
+ && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+ page->busy = TRUE;
+ pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+ if (!page->dirty) {
+ page->dirty = pmap_is_modified(page->phys_addr);
+ }
+}
+
+static void __init
+vm_page_free_list_init(struct vm_page_free_list *free_list)
+{
+ free_list->size = 0;
+ list_init(&free_list->blocks);
+}
+
+static inline void
+vm_page_free_list_insert(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size++;
+ list_insert_head(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_page_free_list_remove(struct vm_page_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order != VM_PAGE_ORDER_UNLISTED);
+
+ free_list->size--;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg, unsigned int order)
+{
+ struct vm_page_free_list *free_list = free_list;
+ struct vm_page *page, *buddy;
+ unsigned int i;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (vm_page_alloc_paused && current_thread()
+ && !current_thread()->vm_privilege) {
+ return NULL;
+ } else if (seg->nr_free_pages <= seg->low_free_pages) {
+ vm_pageout_start();
+
+ if ((seg->nr_free_pages <= seg->min_free_pages)
+ && current_thread() && !current_thread()->vm_privilege) {
+ vm_page_alloc_paused = TRUE;
+ return NULL;
+ }
+ }
+
+ for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
+ free_list = &seg->free_lists[i];
+
+ if (free_list->size != 0)
+ break;
+ }
+
+ if (i == VM_PAGE_NR_FREE_LISTS)
+ return NULL;
+
+ page = list_first_entry(&free_list->blocks, struct vm_page, node);
+ vm_page_free_list_remove(free_list, page);
+ page->order = VM_PAGE_ORDER_UNLISTED;
+
+ while (i > order) {
+ i--;
+ buddy = &page[1 << i];
+ vm_page_free_list_insert(&seg->free_lists[i], buddy);
+ buddy->order = i;
+ }
+
+ seg->nr_free_pages -= (1 << order);
+
+ if (seg->nr_free_pages < seg->min_free_pages) {
+ vm_page_alloc_paused = TRUE;
+ }
+
+ return page;
+}
+
+static void
+vm_page_seg_free_to_buddy(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page *buddy;
+ phys_addr_t pa, buddy_pa;
+ unsigned int nr_pages;
+
+ assert(page >= seg->pages);
+ assert(page < seg->pages_end);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ nr_pages = (1 << order);
+ pa = page->phys_addr;
+
+ while (order < (VM_PAGE_NR_FREE_LISTS - 1)) {
+ buddy_pa = pa ^ vm_page_ptoa(1ULL << order);
+
+ if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+ break;
+
+ buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+ if (buddy->order != order)
+ break;
+
+ vm_page_free_list_remove(&seg->free_lists[order], buddy);
+ buddy->order = VM_PAGE_ORDER_UNLISTED;
+ order++;
+ pa &= -vm_page_ptoa(1ULL << order);
+ page = &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ vm_page_free_list_insert(&seg->free_lists[order], page);
+ page->order = order;
+ seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_page_cpu_pool_init(struct vm_page_cpu_pool *cpu_pool, int size)
+{
+ simple_lock_init(&cpu_pool->lock);
+ cpu_pool->size = size;
+ cpu_pool->transfer_size = (size + VM_PAGE_CPU_POOL_TRANSFER_RATIO - 1)
+ / VM_PAGE_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->nr_pages = 0;
+ list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_page_cpu_pool *
+vm_page_cpu_pool_get(struct vm_page_seg *seg)
+{
+ return &seg->cpu_pools[cpu_number()];
+}
+
+static inline struct vm_page *
+vm_page_cpu_pool_pop(struct vm_page_cpu_pool *cpu_pool)
+{
+ struct vm_page *page;
+
+ assert(cpu_pool->nr_pages != 0);
+ cpu_pool->nr_pages--;
+ page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+ list_remove(&page->node);
+ return page;
+}
+
+static inline void
+vm_page_cpu_pool_push(struct vm_page_cpu_pool *cpu_pool, struct vm_page *page)
+{
+ assert(cpu_pool->nr_pages < cpu_pool->size);
+ cpu_pool->nr_pages++;
+ list_insert_head(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_page_cpu_pool_fill(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == 0);
+
+ simple_lock(&seg->lock);
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ page = vm_page_seg_alloc_from_buddy(seg, 0);
+
+ if (page == NULL)
+ break;
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ }
+
+ simple_unlock(&seg->lock);
+
+ return i;
+}
+
+static void
+vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
+ struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == cpu_pool->size);
+
+ simple_lock(&seg->lock);
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+ list_init(&queue->internal_pages);
+ list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+ if (page->external) {
+ list_insert_tail(&queue->external_pages, &page->node);
+ } else {
+ list_insert_tail(&queue->internal_pages, &page->node);
+ }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+ (void)queue;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+ struct vm_page *page;
+
+ if (!list_empty(&queue->external_pages)) {
+ page = list_first_entry(&queue->external_pages, struct vm_page, node);
+ return page;
+ }
+
+ if (!external_only && !list_empty(&queue->internal_pages)) {
+ page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+ return page;
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+ assert(index < vm_page_segs_size);
+ return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+ unsigned int index;
+
+ index = seg - vm_page_segs;
+ assert(index < vm_page_segs_size);
+ return index;
+}
+
+static phys_addr_t __init
+vm_page_seg_size(struct vm_page_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static int __init
+vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
+{
+ phys_addr_t size;
+
+ size = vm_page_atop(vm_page_seg_size(seg)) / VM_PAGE_CPU_POOL_RATIO;
+
+ if (size == 0)
+ size = 1;
+ else if (size > VM_PAGE_CPU_POOL_MAX_SIZE)
+ size = VM_PAGE_CPU_POOL_MAX_SIZE;
+
+ return size;
+}
+
+static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+ if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+ panic("vm_page: segment too small");
+ }
+
+ seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+ / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+ if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+ seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+ }
+
+ seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+ / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+ if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+ seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+ }
+
+ seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+ / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+ if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+ seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+ }
+}
+
+static void __init
+vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
+ struct vm_page *pages)
+{
+ phys_addr_t pa;
+ int pool_size;
+ unsigned int i;
+
+ seg->start = start;
+ seg->end = end;
+ pool_size = vm_page_seg_compute_pool_size(seg);
+
+ for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+ vm_page_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+ seg->pages = pages;
+ seg->pages_end = pages + vm_page_atop(vm_page_seg_size(seg));
+ simple_lock_init(&seg->lock);
+
+ for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+ vm_page_free_list_init(&seg->free_lists[i]);
+
+ seg->nr_free_pages = 0;
+
+ vm_page_seg_compute_pageout_thresholds(seg);
+
+ vm_page_queue_init(&seg->active_pages);
+ seg->nr_active_pages = 0;
+ vm_page_queue_init(&seg->inactive_pages);
+ seg->nr_inactive_pages = 0;
+
+ i = vm_page_seg_index(seg);
+
+ for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+ vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
+}
+
+static struct vm_page *
+vm_page_seg_alloc(struct vm_page_seg *seg, unsigned int order,
+ unsigned short type)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+ struct vm_page *page;
+ int filled;
+
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == 0) {
+ filled = vm_page_cpu_pool_fill(cpu_pool, seg);
+
+ if (!filled) {
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ return NULL;
+ }
+ }
+
+ page = vm_page_cpu_pool_pop(cpu_pool);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ page = vm_page_seg_alloc_from_buddy(seg, order);
+ simple_unlock(&seg->lock);
+
+ if (page == NULL)
+ return NULL;
+ }
+
+ assert(page->type == VM_PT_FREE);
+ vm_page_set_type(page, order, type);
+ return page;
+}
+
+static void
+vm_page_seg_free(struct vm_page_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page_cpu_pool *cpu_pool;
+
+ assert(page->type != VM_PT_FREE);
+ assert(order < VM_PAGE_NR_FREE_LISTS);
+
+ vm_page_set_type(page, order, VM_PT_FREE);
+
+ if (order == 0) {
+ thread_pin();
+ cpu_pool = vm_page_cpu_pool_get(seg);
+ simple_lock(&cpu_pool->lock);
+
+ if (cpu_pool->nr_pages == cpu_pool->size)
+ vm_page_cpu_pool_drain(cpu_pool, seg);
+
+ vm_page_cpu_pool_push(cpu_pool, page);
+ simple_unlock(&cpu_pool->lock);
+ thread_unpin();
+ } else {
+ simple_lock(&seg->lock);
+ vm_page_seg_free_to_buddy(seg, page, order);
+ simple_unlock(&seg->lock);
+ }
+}
+
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->active = TRUE;
+ page->reference = TRUE;
+ vm_page_queue_push(&seg->active_pages, page);
+ seg->nr_active_pages++;
+ vm_page_active_count++;
+}
+
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && page->active && !page->inactive);
+ page->active = FALSE;
+ vm_page_queue_remove(&seg->active_pages, page);
+ seg->nr_active_pages--;
+ vm_page_active_count--;
+}
+
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->inactive = TRUE;
+ vm_page_queue_push(&seg->inactive_pages, page);
+ seg->nr_inactive_pages++;
+ vm_page_inactive_count++;
+}
+
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && page->inactive);
+ page->inactive = FALSE;
+ vm_page_queue_remove(&seg->inactive_pages, page);
+ seg->nr_inactive_pages--;
+ vm_page_inactive_count--;
+}
+
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->active_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_active_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_active_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_active_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t external_only)
+{
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_inactive_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_inactive_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
+}
+
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+ boolean_t external_only,
+ boolean_t *was_active)
+{
+ struct vm_page *page;
+
+ page = vm_page_seg_pull_inactive_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = FALSE;
+ return page;
+ }
+
+ page = vm_page_seg_pull_active_page(seg, external_only);
+
+ if (page != NULL) {
+ *was_active = TRUE;
+ return page;
+ }
+
+ return NULL;
+}
+
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
+{
+ return (seg->nr_free_pages > seg->high_free_pages);
+}
+
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
+{
+ if ((seg->nr_active_pages + seg->nr_inactive_pages) == 0) {
+ /* Nothing to page out, assume segment is usable */
+ return TRUE;
+ }
+
+ return (seg->nr_free_pages >= seg->high_free_pages);
+}
+
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ assert(seg1 != seg2);
+
+ if (seg1 < seg2) {
+ simple_lock(&seg1->lock);
+ simple_lock(&seg2->lock);
+ } else {
+ simple_lock(&seg2->lock);
+ simple_lock(&seg1->lock);
+ }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ simple_unlock(&seg1->lock);
+ simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+ struct vm_page_seg *remote_seg)
+{
+ struct vm_page *src, *dest;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t was_active;
+
+ vm_page_lock_queues();
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_seg_double_lock(seg, remote_seg);
+
+ if (vm_page_seg_usable(seg)
+ || !vm_page_seg_page_available(remote_seg)) {
+ goto error;
+ }
+
+ src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+ if (src == NULL) {
+ goto error;
+ }
+
+ assert(src->object != NULL);
+ assert(!src->fictitious && !src->private);
+ assert(src->wire_count == 0);
+ assert(src->type != VM_PT_FREE);
+ assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+ dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+ assert(dest != NULL);
+
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+ src->reference = TRUE;
+ }
+
+ object = src->object;
+ offset = src->offset;
+ vm_page_remove(src);
+
+ vm_page_remove_mappings(src);
+
+ vm_page_set_type(dest, 0, src->type);
+ memcpy(&dest->vm_page_header, &src->vm_page_header,
+ VM_PAGE_BODY_SIZE);
+ vm_page_copy(src, dest);
+
+ if (!src->dirty) {
+ pmap_clear_modify(dest->phys_addr);
+ }
+
+ dest->busy = FALSE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_init(src);
+ src->free = TRUE;
+ simple_lock(&seg->lock);
+ vm_page_set_type(src, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(seg, src, 0);
+ simple_unlock(&seg->lock);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ vm_object_lock(object);
+ vm_page_insert(dest, object, offset);
+ vm_object_unlock(object);
+
+ if (was_active) {
+ vm_page_activate(dest);
+ } else {
+ vm_page_deactivate(dest);
+ }
+
+ vm_page_unlock_queues();
+
+ return TRUE;
+
+error:
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+ struct vm_page_seg *remote_seg;
+ unsigned int i;
+ boolean_t balanced;
+
+ /*
+ * It's important here that pages are moved to lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ remote_seg = vm_page_seg_get(i);
+
+ if (remote_seg == seg) {
+ continue;
+ }
+
+ balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg, boolean_t external_only,
+ boolean_t alloc_paused)
+{
+ struct vm_page *page;
+ boolean_t reclaim, double_paging;
+ vm_object_t object;
+ boolean_t was_active;
+
+ page = NULL;
+ object = NULL;
+ double_paging = FALSE;
+
+restart:
+ vm_page_lock_queues();
+ simple_lock(&seg->lock);
+
+ if (page != NULL) {
+ vm_object_lock(page->object);
+ } else {
+ page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+ if (page == NULL) {
+ goto out;
+ }
+ }
+
+ assert(page->object != NULL);
+ assert(!page->fictitious && !page->private);
+ assert(page->wire_count == 0);
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ object = page->object;
+
+ if (!was_active
+ && (page->reference || pmap_is_referenced(page->phys_addr))) {
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ vm_object_unlock(object);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ vm_page_unlock_queues();
+ page = NULL;
+ goto restart;
+ }
+
+ vm_page_remove_mappings(page);
+
+ if (!page->dirty && !page->precious) {
+ reclaim = TRUE;
+ goto out;
+ }
+
+ reclaim = FALSE;
+
+ /*
+ * If we are very low on memory, then we can't rely on an external
+ * pager to clean a dirty page, because external pagers are not
+ * vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup not to do any special
+ * processing of this page since it's immediately going to be
+ * double paged out to the default pager. The laundry bit is
+ * reset and the page is inserted into an internal object by
+ * vm_pageout_setup before the second double paging pass.
+ *
+ * There is one important special case: the default pager can
+ * back external memory objects. When receiving the first
+ * pageout request, where the page is no longer present, a
+ * fault could occur, during which the map would be locked.
+ * This fault would cause a new paging request to the default
+ * pager. Receiving that request would deadlock when trying to
+ * lock the map again. Instead, the page isn't double paged
+ * and vm_pageout_setup wires the page down, trusting the
+ * default pager as for internal pages.
+ */
+
+ assert(!page->laundry);
+ assert(!(double_paging && page->external));
+
+ if (object->internal || !alloc_paused ||
+ memory_manager_default_port(object->pager)) {
+ double_paging = FALSE;
+ } else {
+ double_paging = page->laundry = TRUE;
+ }
+
+out:
+ simple_unlock(&seg->lock);
+
+ if (object == NULL) {
+ vm_page_unlock_queues();
+ return FALSE;
+ }
+
+ if (reclaim) {
+ vm_page_free(page);
+ vm_page_unlock_queues();
+
+ if (vm_object_collectable(object)) {
+ vm_object_collect(object);
+ } else {
+ vm_object_unlock(object);
+ }
+
+ return TRUE;
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create one and hand it
+ * to the default pager. First try to collapse, so we don't create
+ * one unnecessarily.
+ */
+
+ if (!object->pager_initialized) {
+ vm_object_collapse(object);
+ }
+
+ if (!object->pager_initialized) {
+ vm_object_pager_create(object);
+ }
+
+ if (!object->pager_initialized) {
+ panic("vm_page_seg_evict");
+ }
+
+ vm_pageout_page(page, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+
+ if (double_paging) {
+ goto restart;
+ }
+
+ return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+ seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+ / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+
+ simple_lock(&seg->lock);
+
+ vm_page_seg_compute_high_active_page(seg);
+
+ while (seg->nr_active_pages > seg->high_active_pages) {
+ page = vm_page_seg_pull_active_page(seg, FALSE);
+
+ if (page == NULL) {
+ break;
+ }
+
+ page->reference = FALSE;
+ pmap_clear_reference(page->phys_addr);
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start < end);
+ assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+ seg = &vm_page_boot_segs[seg_index];
+ seg->start = start;
+ seg->end = end;
+ seg->heap_present = FALSE;
+
+#if DEBUG
+ printf("vm_page: load: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+
+ vm_page_segs_size++;
+}
+
+void
+vm_page_load_heap(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+
+ seg = &vm_page_boot_segs[seg_index];
+
+ assert(seg->start <= start);
+ assert(end <= seg-> end);
+
+ seg->avail_start = start;
+ seg->avail_end = end;
+ seg->heap_present = TRUE;
+
+#if DEBUG
+ printf("vm_page: heap: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+}
+
+int
+vm_page_ready(void)
+{
+ return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+ unsigned int seg_index;
+
+ switch (selector) {
+ case VM_PAGE_SEL_DMA:
+ seg_index = VM_PAGE_SEG_DMA;
+ break;
+ case VM_PAGE_SEL_DMA32:
+ seg_index = VM_PAGE_SEG_DMA32;
+ break;
+ case VM_PAGE_SEL_DIRECTMAP:
+ seg_index = VM_PAGE_SEG_DIRECTMAP;
+ break;
+ case VM_PAGE_SEL_HIGHMEM:
+ seg_index = VM_PAGE_SEG_HIGHMEM;
+ break;
+ default:
+ panic("vm_page: invalid selector");
+ }
+
+ return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+ return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+ unsigned int i;
+ int expect_loaded;
+
+ if (vm_page_segs_size == 0)
+ panic("vm_page: no physical memory loaded");
+
+ for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+ expect_loaded = (i < vm_page_segs_size);
+
+ if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+ continue;
+
+ panic("vm_page: invalid boot segment table");
+ }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+ return seg->avail_end - seg->avail_start;
+}
+
+phys_addr_t __init
+vm_page_bootalloc(size_t size)
+{
+ struct vm_page_boot_seg *seg;
+ phys_addr_t pa;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+ i < vm_page_segs_size;
+ i--) {
+ seg = &vm_page_boot_segs[i];
+
+ if (size <= vm_page_boot_seg_avail_size(seg)) {
+ pa = seg->avail_start;
+ seg->avail_start += vm_page_round(size);
+ return pa;
+ }
+ }
+
+ panic("vm_page: no physical memory available");
+}
+
+void __init
+vm_page_setup(void)
+{
+ struct vm_page_boot_seg *boot_seg;
+ struct vm_page_seg *seg;
+ struct vm_page *table, *page, *end;
+ size_t nr_pages, table_size;
+ unsigned long va;
+ unsigned int i;
+ phys_addr_t pa;
+
+ vm_page_check_boot_segs();
+
+ /*
+ * Compute the page table size.
+ */
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+
+ table_size = vm_page_round(nr_pages * sizeof(struct vm_page));
+ printf("vm_page: page table size: %lu entries (%luk)\n", nr_pages,
+ table_size >> 10);
+ table = (struct vm_page *)pmap_steal_memory(table_size);
+ va = (unsigned long)table;
+
+ /*
+ * Initialize the segments, associating them to the page table. When
+ * the segments are initialized, all their pages are set allocated.
+ * Pages are then released, which populates the free lists.
+ */
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ boot_seg = &vm_page_boot_segs[i];
+ vm_page_seg_init(seg, boot_seg->start, boot_seg->end, table);
+ page = seg->pages + vm_page_atop(boot_seg->avail_start
+ - boot_seg->start);
+ end = seg->pages + vm_page_atop(boot_seg->avail_end
+ - boot_seg->start);
+
+ while (page < end) {
+ page->type = VM_PT_FREE;
+ vm_page_seg_free_to_buddy(seg, page, 0);
+ page++;
+ }
+
+ table += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ while (va < (unsigned long)table) {
+ pa = pmap_extract(kernel_pmap, va);
+ page = vm_page_lookup_pa(pa);
+ assert((page != NULL) && (page->type == VM_PT_RESERVED));
+ page->type = VM_PT_TABLE;
+ va += PAGE_SIZE;
+ }
+
+ vm_page_is_ready = 1;
+}
+
+void __init
+vm_page_manage(struct vm_page *page)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+ assert(page->type == VM_PT_RESERVED);
+
+ vm_page_set_type(page, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(&vm_page_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_page_lookup_pa(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end))
+ return &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+ if (page->fictitious) {
+ if (page->private) {
+ panic("vm_page: page both fictitious and private");
+ }
+
+ if (page->phys_addr != vm_page_fictitious_addr) {
+ panic("vm_page: invalid fictitious page");
+ }
+ } else {
+ struct vm_page_seg *seg;
+
+ if (page->phys_addr == vm_page_fictitious_addr) {
+ panic("vm_page: real page has fictitious address");
+ }
+
+ seg = vm_page_lookup_seg(page);
+
+ if (seg == NULL) {
+ if (!page->private) {
+ panic("vm_page: page claims it's managed but not in any segment");
+ }
+ } else {
+ if (page->private) {
+ struct vm_page *real_page;
+
+ if (vm_page_pageable(page)) {
+ panic("vm_page: private page is pageable");
+ }
+
+ real_page = vm_page_lookup_pa(page->phys_addr);
+
+ if (vm_page_pageable(real_page)) {
+ panic("vm_page: page underlying private page is pageable");
+ }
+
+ if ((real_page->type == VM_PT_FREE)
+ || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+ panic("vm_page: page underlying private pagei is free");
+ }
+ } else {
+ unsigned int index;
+
+ index = vm_page_seg_index(seg);
+
+ if (index != page->seg_index) {
+ panic("vm_page: page segment mismatch");
+ }
+ }
+ }
+ }
+}
+
+struct vm_page *
+vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short type)
+{
+ struct vm_page *page;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+ page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+ if (page != NULL)
+ return page;
+ }
+
+ if (!current_thread() || current_thread()->vm_privilege)
+ panic("vm_page: privileged thread unable to allocate page");
+
+ return NULL;
+}
+
+void
+vm_page_free_pa(struct vm_page *page, unsigned int order)
+{
+ assert(page != NULL);
+ assert(page->seg_index < ARRAY_SIZE(vm_page_segs));
+
+ vm_page_seg_free(&vm_page_segs[page->seg_index], page, order);
+}
+
+const char *
+vm_page_seg_name(unsigned int seg_index)
+{
+ /* Don't use a switch statement since segments can be aliased */
+ if (seg_index == VM_PAGE_SEG_HIGHMEM)
+ return "HIGHMEM";
+ else if (seg_index == VM_PAGE_SEG_DIRECTMAP)
+ return "DIRECTMAP";
+ else if (seg_index == VM_PAGE_SEG_DMA32)
+ return "DMA32";
+ else if (seg_index == VM_PAGE_SEG_DMA)
+ return "DMA";
+ else
+ panic("vm_page: invalid segment index");
+}
+
+void
+vm_page_info_all(void)
+{
+ struct vm_page_seg *seg;
+ unsigned long pages;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+ pages = (unsigned long)(seg->pages_end - seg->pages);
+ printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
+ vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
+ seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+ vm_page_seg_name(vm_page_seg_index(seg)),
+ seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
+ }
+}
+
+phys_addr_t
+vm_page_seg_end(unsigned int selector)
+{
+ return vm_page_segs[vm_page_select_alloc_seg(selector)].end;
+}
+
+static unsigned long
+vm_page_boot_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ if (!vm_page_is_ready) {
+ return vm_page_boot_table_size();
+ }
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_seg_size(&vm_page_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_index(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned long index;
+ unsigned int i;
+
+ index = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end)) {
+ return index + vm_page_atop(pa - seg->start);
+ }
+
+ index += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ panic("vm_page: invalid physical address");
+}
+
+phys_addr_t
+vm_page_mem_size(void)
+{
+ phys_addr_t total;
+ unsigned int i;
+
+ total = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ total += vm_page_seg_size(&vm_page_segs[i]);
+ }
+
+ return total;
+}
+
+unsigned long
+vm_page_mem_free(void)
+{
+ unsigned long total;
+ unsigned int i;
+
+ total = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ total += vm_page_segs[i].nr_free_pages;
+ }
+
+ return total;
+}
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+ VM_PAGE_CHECK(page);
+
+ if (page->wire_count == 0) {
+ vm_page_queues_remove(page);
+
+ if (!page->private && !page->fictitious) {
+ vm_page_wire_count++;
+ }
+ }
+
+ page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ assert(page->wire_count != 0);
+ page->wire_count--;
+
+ if ((page->wire_count != 0)
+ || page->fictitious
+ || page->private) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+
+ vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (page->active || (page->inactive && page->reference)) {
+ if (!page->fictitious && !page->private && !page->absent) {
+ pmap_clear_reference(page->phys_addr);
+ }
+
+ page->reference = FALSE;
+ vm_page_queues_remove(page);
+ }
+
+ if ((page->wire_count == 0) && !page->fictitious
+ && !page->private && !page->inactive) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_inactive_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * Unconditionally remove so that, even if the page was already
+ * active, it gets back to the end of the active queue.
+ */
+ vm_page_queues_remove(page);
+
+ if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ if (page->active)
+ panic("vm_page_activate: already active");
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ assert(!page->active || !page->inactive);
+
+ if (!page->active && !page->inactive) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+
+ if (page->active) {
+ vm_page_seg_remove_active_page(seg, page);
+ } else {
+ vm_page_seg_remove_inactive_page(seg, page);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t usable;
+ unsigned int i;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ usable = vm_page_seg_usable(seg);
+ simple_unlock(&seg->lock);
+
+ if (!usable) {
+ return FALSE;
+ }
+ }
+
+ vm_page_external_laundry_count = -1;
+ vm_page_alloc_paused = FALSE;
+ thread_wakeup(&vm_page_alloc_paused);
+ return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t page_available;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ page_available = vm_page_seg_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (page_available) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+ boolean_t balanced;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are moved from higher priority
+ * segments first.
+ */
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+ boolean_t balanced;
+
+ while (vm_page_may_balance()) {
+ balanced = vm_page_balance_once();
+
+ if (!balanced) {
+ break;
+ }
+ }
+
+ return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only, boolean_t alloc_paused)
+{
+ boolean_t evicted;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are evicted from lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ evicted = vm_page_seg_evict(vm_page_seg_get(i),
+ external_only, alloc_paused);
+
+ if (evicted) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY 5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+ boolean_t pause, evicted, external_only, alloc_paused;
+ unsigned int i;
+
+ *should_wait = TRUE;
+ external_only = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_external_laundry_count = 0;
+ alloc_paused = vm_page_alloc_paused;
+ simple_unlock(&vm_page_queue_free_lock);
+
+again:
+ vm_page_lock_queues();
+ pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+ vm_page_unlock_queues();
+
+ if (pause) {
+ simple_lock(&vm_page_queue_free_lock);
+ return FALSE;
+ }
+
+ for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+ evicted = vm_page_evict_once(external_only, alloc_paused);
+
+ if (!evicted) {
+ break;
+ }
+ }
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Keep in mind eviction may not cause pageouts, since non-precious
+ * clean pages are simply released.
+ */
+ if ((vm_page_laundry_count == 0) && (vm_page_external_laundry_count == 0)) {
+ /*
+ * No pageout, but some clean pages were freed. Start a complete
+ * scan again without waiting.
+ */
+ if (evicted) {
+ *should_wait = FALSE;
+ return FALSE;
+ }
+
+ /*
+ * Eviction failed, consider pages from internal objects on the
+ * next attempt.
+ */
+ if (external_only) {
+ simple_unlock(&vm_page_queue_free_lock);
+ external_only = FALSE;
+ goto again;
+ }
+
+ /*
+ * TODO Find out what could cause this and how to deal with it.
+ * This will likely require an out-of-memory killer.
+ */
+
+ {
+ static boolean_t warned = FALSE;
+
+ if (!warned) {
+ printf("vm_page warning: unable to recycle any page\n");
+ warned = 1;
+ }
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+ unsigned int i;
+
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ vm_page_seg_refill_inactive(vm_page_seg_get(i));
+ }
+
+ vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+ assert(!current_thread()->vm_privilege);
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ if (!vm_page_alloc_paused) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return;
+ }
+
+ assert_wait(&vm_page_alloc_paused, FALSE);
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+}
+
+#if MACH_KDB
+#include <ddb/db_output.h>
+#define PAGES_PER_MB ((1<<20) / PAGE_SIZE)
+void db_show_vmstat(void)
+{
+ integer_t free_count = vm_page_mem_free();
+ unsigned i;
+
+ db_printf("%-20s %10uM\n", "size:",
+ (free_count + vm_page_active_count +
+ vm_page_inactive_count + vm_page_wire_count)
+ / PAGES_PER_MB);
+
+ db_printf("%-20s %10uM\n", "free:",
+ free_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "active:",
+ vm_page_active_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "inactive:",
+ vm_page_inactive_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "wired:",
+ vm_page_wire_count / PAGES_PER_MB);
+
+ db_printf("%-20s %10uM\n", "zero filled:",
+ vm_stat.zero_fill_count / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "reactivated:",
+ vm_stat.reactivations / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "pageins:",
+ vm_stat.pageins / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "pageouts:",
+ vm_stat.pageouts / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "page faults:",
+ vm_stat.faults / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "cow faults:",
+ vm_stat.cow_faults / PAGES_PER_MB);
+ db_printf("%-20s %10u%\n", "memobj hit ratio:",
+ (vm_stat.hits * 100) / vm_stat.lookups);
+
+ db_printf("%-20s %10u%\n", "cached_memobjs",
+ vm_object_external_count);
+ db_printf("%-20s %10uM\n", "cache",
+ vm_object_external_pages / PAGES_PER_MB);
+
+ for (i = 0; i < vm_page_segs_size; i++)
+ {
+ db_printf("\nSegment %s:\n", vm_page_seg_name(i));
+ db_printf("%-20s %10uM\n", "size:",
+ vm_page_seg_size(&vm_page_segs[i]) >> 20);
+ db_printf("%-20s %10uM\n", "free:",
+ vm_page_segs[i].nr_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "min_free:",
+ vm_page_segs[i].min_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "low_free:",
+ vm_page_segs[i].low_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "high_free:",
+ vm_page_segs[i].high_free_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "active:",
+ vm_page_segs[i].nr_active_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "high active:",
+ vm_page_segs[i].high_active_pages / PAGES_PER_MB);
+ db_printf("%-20s %10uM\n", "inactive:",
+ vm_page_segs[i].nr_inactive_pages / PAGES_PER_MB);
+ }
+}
+#endif /* MACH_KDB */