diff options
author | Pasha <pasha@member.fsf.org> | 2024-02-20 18:49:50 +0000 |
---|---|---|
committer | Pasha <pasha@member.fsf.org> | 2024-02-20 18:49:50 +0000 |
commit | 5e0b8d508ed51004bd836384293be00950ee62c9 (patch) | |
tree | e3f16b1aa8b7177032ce3ec429fbad2b1d92a876 /kern | |
download | gnumach-riscv-5e0b8d508ed51004bd836384293be00950ee62c9.tar.gz gnumach-riscv-5e0b8d508ed51004bd836384293be00950ee62c9.tar.bz2 |
init gnumach copy
Diffstat (limited to 'kern')
104 files changed, 32427 insertions, 0 deletions
diff --git a/kern/.gitignore b/kern/.gitignore new file mode 100644 index 0000000..72bccc6 --- /dev/null +++ b/kern/.gitignore @@ -0,0 +1,2 @@ +exc.none.defs.c +exc.none.msgids diff --git a/kern/act.c b/kern/act.c new file mode 100644 index 0000000..3819ef3 --- /dev/null +++ b/kern/act.c @@ -0,0 +1,1118 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: act.c + * + * Activation management routines + * + */ + +#ifdef MIGRATING_THREADS + +#include <string.h> + +#include <mach/kern_return.h> +#include <mach/alert.h> +#include <kern/slab.h> +#include <kern/thread.h> +#include <kern/task.h> +#include <kern/debug.h> +#include <kern/act.h> +#include <kern/current.h> +#include "ipc_target.h" + +static void special_handler(ReturnHandler *rh, struct Act *act); + +#ifdef ACT_STATIC_KLUDGE +#undef ACT_STATIC_KLUDGE +#define ACT_STATIC_KLUDGE 300 +#endif + +#ifndef ACT_STATIC_KLUDGE +static struct kmem_cache act_cache; +#else +static Act *act_freelist; +static Act free_acts[ACT_STATIC_KLUDGE]; +#endif + +/* This is a rather special activation + which resides at the top and bottom of every thread. + When the last "real" activation on a thread is destroyed, + the null_act on the bottom gets invoked, destroying the thread. + At the top, the null_act acts as an "invalid" cached activation, + which will always fail the cached-activation test on RPC paths. + + As you might expect, most of its members have no particular value. + alerts is zero. */ +Act null_act; + +void +global_act_init(void) +{ +#ifndef ACT_STATIC_KLUDGE + kmem_cache_init(&act_cache, "Act", sizeof(struct Act), 0, + NULL, 0); +#else + int i; + +printf("activations: [%x-%x]\n", &free_acts[0], &free_acts[ACT_STATIC_KLUDGE]); + act_freelist = &free_acts[0]; + free_acts[0].ipt_next = 0; + for (i = 1; i < ACT_STATIC_KLUDGE; i++) { + free_acts[i].ipt_next = act_freelist; + act_freelist = &free_acts[i]; + } + /* XXX simple_lock_init(&act_freelist->lock); */ +#endif + +#if 0 + simple_lock_init(&null_act.lock); + refcount_init(&null_act.ref_count, 1); +#endif + + act_machine_init(); +} + +/* Create a new activation in a specific task. + Locking: Task */ +kern_return_t act_create(task_t task, vm_offset_t user_stack, + vm_offset_t user_rbuf, vm_size_t user_rbuf_size, + struct Act **new_act) +{ + Act *act; + +#ifndef ACT_STATIC_KLUDGE + act = (Act*)kmem_cache_alloc(&act_cache); + if (act == 0) + return(KERN_RESOURCE_SHORTAGE); +#else + /* XXX ipt_lock(act_freelist); */ + act = act_freelist; + if (act == 0) panic("out of activations"); + act_freelist = act->ipt_next; + /* XXX ipt_unlock(act_freelist); */ + act->ipt_next = 0; +#endif + memset(act, 0, sizeof(*act)); /*XXX shouldn't be needed */ + +#ifdef DEBUG + act->lower = act->higher = 0; +#endif + + /* Start with one reference for being active, another for the caller */ + simple_lock_init(&act->lock); + refcount_init(&act->ref_count, 2); + + /* Latch onto the task. */ + act->task = task; + task_reference(task); + + /* Other simple setup */ + act->ipt = 0; + act->thread = 0; + act->suspend_count = 0; + act->active = 1; + act->handlers = 0; + + /* The special_handler will always be last on the returnhandlers list. */ + act->special_handler.next = 0; + act->special_handler.handler = special_handler; + + ipc_act_init(task, act); + act_machine_create(task, act, user_stack, user_rbuf, user_rbuf_size); + + task_lock(task); + + /* Chain the act onto the task's list */ + act->task_links.next = task->acts.next; + act->task_links.prev = &task->acts; + task->acts.next->prev = &act->task_links; + task->acts.next = &act->task_links; + task->act_count++; + + task_unlock(task); + + *new_act = act; + return KERN_SUCCESS; +} + +/* This is called when an act's ref_count drops to zero. + This can only happen when thread is zero (not in use), + ipt is zero (not attached to any ipt), + and active is false (terminated). */ +static void act_free(Act *inc) +{ + act_machine_destroy(inc); + ipc_act_destroy(inc); + + /* Drop the task reference. */ + task_deallocate(inc->task); + + /* Put the act back on the act cache */ +#ifndef ACT_STATIC_KLUDGE + kmem_cache_free(&act_cache, (vm_offset_t)inc); +#else + /* XXX ipt_lock(act_freelist); */ + inc->ipt_next = act_freelist; + act_freelist = inc; + /* XXX ipt_unlock(act_freelist); */ +#endif +} + +void act_deallocate(Act *inc) +{ + refcount_drop(&inc->ref_count, act_free(inc)); +} + +/* Attach an act to the top of a thread ("push the stack"). + The thread must be either the current one or a brand-new one. + Assumes the act is active but not in use. + Assumes that if it is attached to an ipt (i.e. the ipt pointer is nonzero), + the act has already been taken off the ipt's list. + + Already locked: cur_thread, act */ +void act_attach(Act *act, thread_t thread, unsigned init_alert_mask) +{ + Act *lower; + + act->thread = thread; + + /* The thread holds a reference to the activation while using it. */ + refcount_take(&act->ref_count); + + /* XXX detach any cached activations from above the target */ + + /* Chain the act onto the thread's act stack. */ + lower = thread->top_act; + act->lower = lower; + lower->higher = act; + thread->top_act = act; + + act->alert_mask = init_alert_mask; + act->alerts = lower->alerts & init_alert_mask; +} + +/* Remove the current act from the top of the current thread ("pop the stack"). + Return it to the ipt it lives on, if any. + Locking: Thread > Act(not on ipt) > ipc_target */ +void act_detach(Act *cur_act) +{ + thread_t cur_thread = cur_act->thread; + + thread_lock(cur_thread); + act_lock(cur_act); + + /* Unlink the act from the thread's act stack */ + cur_thread->top_act = cur_act->lower; + cur_act->thread = 0; +#ifdef DEBUG + cur_act->lower = cur_act->higher = 0; +#endif + + thread_unlock(cur_thread); + + /* Return it to the ipt's list */ + if (cur_act->ipt) + { + ipt_lock(cur_act->ipt); + cur_act->ipt_next = cur_act->ipt->ipt_acts; + cur_act->ipt->ipt_acts = cur_act; + ipt_unlock(cur_act->ipt); +#if 0 + printf(" return to ipt %x\n", cur_act->ipt); +#endif + } + + act_unlock(cur_act); + + /* Drop the act reference taken for being in use. */ + refcount_drop(&cur_act->ref_count, act_free(cur_act)); +} + + + +/*** Activation control support routines ***/ + +/* This is called by system-dependent code + when it detects that act->handlers is non-null + while returning into user mode. + Activations linked onto an ipt always have null act->handlers, + so RPC entry paths need not check it. + + Locking: Act */ +void act_execute_returnhandlers(void) +{ + Act *act = current_act(); + +#if 0 + printf("execute_returnhandlers\n"); +#endif + while (1) { + ReturnHandler *rh; + + /* Grab the next returnhandler */ + act_lock(act); + rh = act->handlers; + if (!rh) { + act_unlock(act); + return; + } + act->handlers = rh->next; + act_unlock(act); + + /* Execute it */ + (*rh->handler)(rh, act); + } +} + +/* Try to nudge an act into executing its returnhandler chain. + Ensures that the activation will execute its returnhandlers + before it next executes any of its user-level code. + Also ensures that it is safe to break the thread's activation chain + immediately above this activation, + by rolling out of any outstanding two-way-optimized RPC. + + The target activation is not necessarily active + or even in use by a thread. + If it isn't, this routine does nothing. + + Already locked: Act */ +static void act_nudge(struct Act *act) +{ + /* If it's suspended, wake it up. */ + thread_wakeup(&act->suspend_count); + + /* Do a machine-dependent low-level nudge. + If we're on a multiprocessor, + this may mean sending an interprocessor interrupt. + In any case, it means rolling out of two-way-optimized RPC paths. */ + act_machine_nudge(act); +} + +/* Install the special returnhandler that handles suspension and termination, + if it hasn't been installed already. + + Already locked: Act */ +static void install_special_handler(struct Act *act) +{ + ReturnHandler **rh; + + /* The work handler must always be the last ReturnHandler on the list, + because it can do tricky things like detach the act. */ + for (rh = &act->handlers; *rh; rh = &(*rh)->next); + if (rh != &act->special_handler.next) { + *rh = &act->special_handler; + } + + /* Nudge the target activation, + to ensure that it will see the returnhandler we're adding. */ + act_nudge(act); +} + +/* Locking: Act */ +static void special_handler(ReturnHandler *rh, struct Act *cur_act) +{ + retry: + + act_lock(cur_act); + + /* If someone has killed this invocation, + invoke the return path with a terminated exception. */ + if (!cur_act->active) { + act_unlock(cur_act); + act_machine_return(KERN_TERMINATED); + /* XXX should just set the activation's reentry_routine + and then return from special_handler(). + The magic reentry_routine should just pop its own activation + and chain to the reentry_routine of the _lower_ activation. + If that lower activation is the null_act, + the thread will then be terminated. */ + } + + /* If we're suspended, go to sleep and wait for someone to wake us up. */ + if (cur_act->suspend_count) { + act_unlock(cur_act); + /* XXX mp unsafe */ + thread_wait((int)&cur_act->suspend_count, FALSE); + + act_lock(cur_act); + + /* If we're still (or again) suspended, + go to sleep again after executing any new returnhandlers that may have appeared. */ + if (cur_act->suspend_count) + install_special_handler(cur_act); + } + + act_unlock(cur_act); +} + +#if 0 /************************ OLD SEMI-OBSOLETE CODE *********************/ +static __dead void act_throughcall_return(Act *act) +{ + /* Done - destroy the act and return */ + act_detach(act); + act_terminate(act); + act_deallocate(act); + + /* XXX */ + thread_terminate_self(); +} + +__dead void act_throughcall(task_t task, void (*infunc)()) +{ + thread_t thread = current_thread(); + Act *act; + ReturnHandler rh; + int rc; + + rc = act_create(task, 0, 0, 0, &act); + if (rc) return rc; + + act->return_routine = act_throughcall_return; + + thread_lock(thread); + act_lock(act); + + act_attach(thread, act, 0); + + rh.handler = infunc; + rh.next = act->handlers; + act->handlers = &rh; + + act_unlock(act); + thread_unlock(thread); + + /* Call through the act into the returnhandler list */ + act_machine_throughcall(act); +} + + +/* Grab an act from the specified pool, to pass to act_upcall. + Returns with the act locked, since it's in an inconsistent state + (not on its ipt but not on a thread either). + Returns null if no acts are available on the ipt. + + Locking: ipc_target > Act(on ipt) */ +Act *act_grab(struct ipc_target *ipt) +{ + Act *act; + + ipt_lock(ipt); + + retry: + + /* Pull an act off the ipt's list. */ + act = ipt->acts; + if (!act) + goto none_avail; + ipt->acts = act->ipt_next; + + act_lock(act); + + /* If it's been terminated, drop it and get another one. */ + if (!act->active) { +#if 0 + printf("dropping terminated act %08x\n", act); +#endif + /* XXX ipt_deallocate(ipt); */ + act->ipt = 0; + act_unlock(act); + act_deallocate(act); + goto retry; + } + +none_avail: + ipt_unlock(ipt); + + return act; +} + +/* Try to make an upcall with an act on the specified ipt. + If the ipt is empty, returns KERN_RESOURCE_SHORTAGE. XXX??? + + Locking: ipc_target > Act > Thread */ +kern_return_t act_upcall(struct Act *act, unsigned init_alert_mask, + vm_offset_t user_entrypoint, vm_offset_t user_data) +{ + thread_t cur_thread = current_thread(); + int rc; + + /* XXX locking */ + + act_attach(cur_thread, act, init_alert_mask); + + /* Make the upcall into the destination task */ + rc = act_machine_upcall(act, user_entrypoint, user_data); + + /* Done - detach the act and return */ + act_detach(act); + + return rc; +} +#endif /************************ END OF OLD SEMI-OBSOLETE CODE *********************/ + + + + +/*** Act service routines ***/ + +/* Lock this act and its current thread. + We can only find the thread from the act + and the thread must be locked before the act, + requiring a little icky juggling. + + If the thread is not currently on any thread, + returns with only the act locked. + + Note that this routine is not called on any performance-critical path. + It is only for explicit act operations + which don't happen often. + + Locking: Thread > Act */ +static thread_t act_lock_thread(Act *act) +{ + thread_t thread; + + retry: + + /* Find the thread */ + act_lock(act); + thread = act->thread; + if (thread == 0) + { + act_unlock(act); + return 0; + } + thread_reference(thread); + act_unlock(act); + + /* Lock the thread and re-lock the act, + and make sure the thread didn't change. */ + thread_lock(thread); + act_lock(act); + if (act->thread != thread) + { + act_unlock(act); + thread_unlock(thread); + thread_deallocate(thread); + goto retry; + } + + thread_deallocate(thread); + + return thread; +} + +/* Already locked: act->task + Locking: Task > Act */ +kern_return_t act_terminate_task_locked(struct Act *act) +{ + act_lock(act); + + if (act->active) + { + /* Unlink the act from the task's act list, + so it doesn't appear in calls to task_acts and such. + The act still keeps its ref on the task, however, + until it loses all its own references and is freed. */ + act->task_links.next->prev = act->task_links.prev; + act->task_links.prev->next = act->task_links.next; + act->task->act_count--; + + /* Remove it from any ipc_target. XXX is this right? */ + act_set_target(act, 0); + + /* This will allow no more control operations on this act. */ + act->active = 0; + + /* When the special_handler gets executed, + it will see the terminated condition and exit immediately. */ + install_special_handler(act); + + /* Drop the act reference taken for being active. + (There is still at least one reference left: the one we were passed.) */ + act_deallocate(act); + } + + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Locking: Task > Act */ +kern_return_t act_terminate(struct Act *act) +{ + task_t task = act->task; + kern_return_t rc; + + /* act->task never changes, + so we can read it before locking the act. */ + task_lock(act->task); + + rc = act_terminate_task_locked(act); + + task_unlock(act->task); + + return rc; +} + +/* If this Act is on a Thread and is not the topmost, + yank it and everything below it off of the thread's stack + and put it all on a new thread forked from the original one. + May fail due to resource shortage, but can always be retried. + + Locking: Thread > Act */ +kern_return_t act_yank(Act *act) +{ + thread_t thread = act_lock_thread(act); + +#if 0 + printf("act_yank inc %08x thread %08x\n", act, thread); +#endif + if (thread) + { + if (thread->top_act != act) + { + printf("detaching act %08x from thread %08x\n", act, thread); + + /* Nudge the activation into a clean point for detachment. */ + act_nudge(act); + + /* Now detach the activation + and give the orphan its own flow of control. */ + /*XXX*/ + } + + thread_unlock(thread); + } + act_unlock(act); + + /* Ask the thread to return as quickly as possible, + because its results are now useless. */ + act_abort(act); + + return KERN_SUCCESS; +} + +/* Assign an activation to a specific ipc_target. + Fails if the activation is already assigned to another pool. + If ipt == 0, we remove the from its ipt. + + Locking: Act(not on ipt) > ipc_target > Act(on ipt) */ +kern_return_t act_set_target(Act *act, struct ipc_target *ipt) +{ + act_lock(act); + + if (ipt == 0) + { + Act **lact; + + ipt = act->ipt; + if (ipt == 0) + return; + + /* XXX This is a violation of the locking order. */ + ipt_lock(ipt); + for (lact = &ipt->ipt_acts; *lact; lact = &((*lact)->ipt_next)) + if (act == *lact) + { + *lact = act->ipt_next; + break; + } + ipt_unlock(ipt); + + act->ipt = 0; + /* XXX ipt_deallocate(ipt); */ + act_deallocate(act); + return; + } + if (act->ipt != ipt) + { + if (act->ipt != 0) + { + act_unlock(act); + return KERN_FAILURE; /*XXX*/ + } + act->ipt = ipt; + ipt->ipt_type |= IPT_TYPE_MIGRATE_RPC; + + /* They get references to each other. */ + act_reference(act); + ipt_reference(ipt); + + /* If it is available, + add it to the ipt's available-activation list. */ + if ((act->thread == 0) && (act->suspend_count == 0)) + { + ipt_lock(ipt); + act->ipt_next = ipt->ipt_acts; + act->ipt->ipt_acts = act; + ipt_unlock(ipt); + } + } + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Register an alert from this activation. + Each set bit is propagated upward from (but not including) this activation, + until the top of the chain is reached or the bit is masked. + + Locking: Thread > Act */ +kern_return_t act_alert(struct Act *act, unsigned alerts) +{ + thread_t thread = act_lock_thread(act); + +#if 0 + printf("act_alert %08x: %08x\n", act, alerts); +#endif + if (thread) + { + struct Act *act_up = act; + while ((alerts) && (act_up != thread->top_act)) + { + act_up = act_up->higher; + alerts &= act_up->alert_mask; + act_up->alerts |= alerts; + } + + /* XXX If we reach the top, and it is blocked in glue code, do something. */ + + thread_unlock(thread); + } + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Locking: Thread > Act */ +kern_return_t act_abort(struct Act *act) +{ + return act_alert(act, ALERT_ABORT_STRONG); +} + +/* Locking: Thread > Act */ +kern_return_t act_abort_safely(struct Act *act) +{ + return act_alert(act, ALERT_ABORT_SAFE); +} + +/* Locking: Thread > Act */ +kern_return_t act_alert_mask(struct Act *act, unsigned alert_mask) +{ + panic("act_alert_mask\n"); + return KERN_SUCCESS; +} + +/* Locking: Thread > Act */ +kern_return_t act_suspend(struct Act *act) +{ + thread_t thread = act_lock_thread(act); + kern_return_t rc = KERN_SUCCESS; + +#if 0 + printf("act_suspend %08x\n", act); +#endif + if (act->active) + { + if (act->suspend_count++ == 0) + { + /* XXX remove from ipt */ + install_special_handler(act); + act_nudge(act); + } + } + else + rc = KERN_TERMINATED; + + if (thread) + thread_unlock(thread); + act_unlock(act); + + return rc; +} + +/* Locking: Act */ +kern_return_t act_resume(struct Act *act) +{ +#if 0 + printf("act_resume %08x from %d\n", act, act->suspend_count); +#endif + + act_lock(act); + if (!act->active) + { + act_unlock(act); + return KERN_TERMINATED; + } + + if (act->suspend_count > 0) { + if (--act->suspend_count == 0) { + thread_wakeup(&act->suspend_count); + /* XXX return to ipt */ + } + } + + act_unlock(act); + + return KERN_SUCCESS; +} + +typedef struct GetSetState { + struct ReturnHandler rh; + int flavor; + void *state; + int *pcount; + int result; +} GetSetState; + +/* Locking: Thread */ +kern_return_t get_set_state(struct Act *act, int flavor, void *state, int *pcount, + void (*handler)(ReturnHandler *rh, struct Act *act)) +{ + GetSetState gss; + + /* Initialize a small parameter structure */ + gss.rh.handler = handler; + gss.flavor = flavor; + gss.state = state; + gss.pcount = pcount; + + /* Add it to the act's return handler list */ + act_lock(act); + gss.rh.next = act->handlers; + act->handlers = &gss.rh; + + act_nudge(act); + + act_unlock(act); + /* XXX mp unsafe */ + thread_wait((int)&gss, 0); /* XXX could be interruptible */ + + return gss.result; +} + +static void get_state_handler(ReturnHandler *rh, struct Act *act) +{ + GetSetState *gss = (GetSetState*)rh; + + gss->result = act_machine_get_state(act, gss->flavor, gss->state, gss->pcount); + thread_wakeup((int)gss); +} + +/* Locking: Thread */ +kern_return_t act_get_state(struct Act *act, int flavor, natural_t *state, natural_t *pcount) +{ + return get_set_state(act, flavor, state, pcount, get_state_handler); +} + +static void set_state_handler(ReturnHandler *rh, struct Act *act) +{ + GetSetState *gss = (GetSetState*)rh; + + gss->result = act_machine_set_state(act, gss->flavor, gss->state, *gss->pcount); + thread_wakeup((int)gss); +} + +/* Locking: Thread */ +kern_return_t act_set_state(struct Act *act, int flavor, natural_t *state, natural_t count) +{ + return get_set_state(act, flavor, state, &count, set_state_handler); +} + + + +/*** backward compatibility hacks ***/ + +#include <mach/thread_info.h> +#include <mach/thread_special_ports.h> +#include <ipc/ipc_port.h> + +kern_return_t act_thread_info(Act *act, int flavor, + thread_info_t thread_info_out, unsigned *thread_info_count) +{ + return thread_info(act->thread, flavor, thread_info_out, thread_info_count); +} + +kern_return_t +act_thread_assign(Act *act, processor_set_t new_pset) +{ + return thread_assign(act->thread, new_pset); +} + +kern_return_t +act_thread_assign_default(Act *act) +{ + return thread_assign_default(act->thread); +} + +kern_return_t +act_thread_get_assignment(Act *act, processor_set_t *pset) +{ + return thread_get_assignment(act->thread, pset); +} + +kern_return_t +act_thread_priority(Act *act, int priority, boolean_t set_max) +{ + return thread_priority(act->thread, priority, set_max); +} + +kern_return_t +act_thread_max_priority(Act *act, processor_set_t *pset, int max_priority) +{ + return thread_max_priority(act->thread, pset, max_priority); +} + +kern_return_t +act_thread_policy(Act *act, int policy, int data) +{ + return thread_policy(act->thread, policy, data); +} + +kern_return_t +act_thread_wire(struct host *host, Act *act, boolean_t wired) +{ + return thread_wire(host, act->thread, wired); +} + +kern_return_t +act_thread_depress_abort(Act *act) +{ + return thread_depress_abort(act->thread); +} + +/* + * Routine: act_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the thread's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +act_get_special_port(Act *act, int which, ipc_port_t *portp) +{ + ipc_port_t *whichp; + ipc_port_t port; + +#if 0 + printf("act_get_special_port\n"); +#endif + if (act == 0) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case THREAD_KERNEL_PORT: + whichp = &act->self_port; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &act->exception_port; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + thread_lock(act->thread); + + if (act->self_port == IP_NULL) { + thread_unlock(act->thread); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + thread_unlock(act->thread); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: act_set_special_port [kernel call] + * Purpose: + * Changes one of the thread's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +act_set_special_port(Act *act, int which, ipc_port_t port) +{ + ipc_port_t *whichp; + ipc_port_t old; + +#if 0 + printf("act_set_special_port\n"); +#endif + if (act == 0) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case THREAD_KERNEL_PORT: + whichp = &act->self_port; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &act->exception_port; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + thread_lock(act->thread); + if (act->self_port == IP_NULL) { + thread_unlock(act->thread); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + thread_unlock(act->thread); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * XXX lame, non-blocking ways to get/set state. + * Return thread's machine-dependent state. + */ +kern_return_t +act_get_state_immediate( + Act *act, + int flavor, + void *old_state, /* pointer to OUT array */ + unsigned int *old_state_count) /*IN/OUT*/ +{ + kern_return_t ret; + + act_lock(act); + /* not the top activation, return current state */ + if (act->thread && act->thread->top_act != act) { + ret = act_machine_get_state(act, flavor, + old_state, old_state_count); + act_unlock(act); + return ret; + } + act_unlock(act); + + /* not sure this makes sense */ + return act_get_state(act, flavor, old_state, old_state_count); +} + +/* + * Change thread's machine-dependent state. + */ +kern_return_t +act_set_state_immediate( + Act *act, + int flavor, + void *new_state, + unsigned int new_state_count) +{ + kern_return_t ret; + + act_lock(act); + /* not the top activation, set it now */ + if (act->thread && act->thread->top_act != act) { + ret = act_machine_set_state(act, flavor, + new_state, new_state_count); + act_unlock(act); + return ret; + } + act_unlock(act); + + /* not sure this makes sense */ + return act_set_state(act, flavor, new_state, new_state_count); +} + +void act_count(void) +{ + int i; + Act *act; + static int amin = ACT_STATIC_KLUDGE; + + i = 0; + for (act = act_freelist; act; act = act->ipt_next) + i++; + if (i < amin) + amin = i; + printf("%d of %d activations in use, %d max\n", + ACT_STATIC_KLUDGE-i, ACT_STATIC_KLUDGE, ACT_STATIC_KLUDGE-amin); +} + +void dump_act(act) + Act *act; +{ + act_count(); + kact_count(); + while (act) { + printf("%08.8x: thread=%x, task=%x, hi=%x, lo=%x, ref=%x\n", + act, act->thread, act->task, + act->higher, act->lower, act->ref_count); + printf("\talerts=%x, mask=%x, susp=%x, active=%x\n", + act->alerts, act->alert_mask, + act->suspend_count, act->active); + machine_dump_act(&act->mact); + if (act == act->lower) + break; + act = act->lower; + } +} + +#ifdef ACTWATCH +Act * +get_next_act(int sp) +{ + static int i; + Act *act; + + while (1) { + if (i == ACT_STATIC_KLUDGE) { + i = 0; + return 0; + } + act = &free_acts[i]; + i++; + if (act->mact.space == sp) + return act; + } +} +#endif /* ACTWATCH */ + +#endif /* MIGRATING_THREADS */ diff --git a/kern/act.h b/kern/act.h new file mode 100644 index 0000000..f46f53a --- /dev/null +++ b/kern/act.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: act.h + * + * This defines the Act structure, + * which is the kernel representation of a user-space activation. + * + */ + +#ifndef _KERN_ACT_H_ +#define _KERN_ACT_H_ + +#ifdef MIGRATING_THREADS + +#ifndef __dead /* XXX */ +#define __dead +#endif + +#include <mach/vm_param.h> +#include <mach/port.h> +#include <kern/lock.h> +#include <kern/refcount.h> +#include <kern/queue.h> + +struct task; +struct thread; +struct Act; + + +struct ReturnHandler { + struct ReturnHandler *next; + void (*handler)(struct ReturnHandler *rh, struct Act *act); +}; +typedef struct ReturnHandler ReturnHandler; + + + +struct Act { + + /*** Task linkage ***/ + + /* Links for task's circular list of activations. + The activation is only on the task's activation list while active. + Must be first. */ + queue_chain_t task_links; + + /* Reference to the task this activation is in. + This is constant as long as the activation is allocated. */ + struct task *task; + + + + /*** Machine-dependent state ***/ + /* XXX should be first to allow maximum flexibility to MD code */ + MachineAct mact; + + + + /*** Consistency ***/ + RefCount ref_count; + decl_simple_lock_data(,lock) + + + + /*** ipc_target-related stuff ***/ + + /* ActPool this activation normally lives on, zero if none. + The activation and actpool hold references to each other as long as this is nonzero + (even when the activation isn't actually on the actpool's list). */ + struct ipc_target *ipt; + + /* Link on the ipt's list of activations. + The activation is only actually on the ipt's list (and hence this is valid) + when we're not in use (thread == 0) and not suspended (suspend_count == 0). */ + struct Act *ipt_next; + + + + /*** Thread linkage ***/ + + /* Thread this activation is in, zero if not in use. + The thread holds a reference on the activation while this is nonzero. */ + struct thread *thread; + + /* The rest in this section is only valid when thread is nonzero. */ + + /* Next higher and next lower activation on the thread's activation stack. + For a topmost activation or the null_act, higher is undefined. + The bottommost activation is always the null_act. */ + struct Act *higher, *lower; + + /* Alert bits pending at this activation; + some of them may have propagated from lower activations. */ + unsigned alerts; + + /* Mask of alert bits to be allowed to pass through from lower levels. */ + unsigned alert_mask; + + + + /*** Control information ***/ + + /* Number of outstanding suspensions on this activation. */ + int suspend_count; + + /* This is normally true, but is set to false when the activation is terminated. */ + int active; + + /* Chain of return handlers to be called + before the thread is allowed to return to this invocation */ + ReturnHandler *handlers; + + /* A special ReturnHandler attached to the above chain to handle suspension and such */ + ReturnHandler special_handler; + + + + /* Special ports attached to this activation */ + struct ipc_port *self; /* not a right, doesn't hold ref */ + struct ipc_port *self_port; /* a send right */ + struct ipc_port *exception_port; /* a send right */ + struct ipc_port *syscall_port; /* a send right */ +}; +typedef struct Act Act; +typedef struct Act *act_t; +typedef mach_port_t *act_array_t; + +#define ACT_NULL ((Act*)0) + + +/* Exported to world */ +kern_return_t act_create(struct task *task, vm_offset_t user_stack, vm_offset_t user_rbuf, vm_size_t user_rbuf_size, struct Act **new_act); +kern_return_t act_alert_mask(struct Act *act, unsigned alert_mask); +kern_return_t act_alert(struct Act *act, unsigned alerts); +kern_return_t act_abort(struct Act *act); +kern_return_t act_abort_safely(struct Act *act); +kern_return_t act_terminate(struct Act *act); +kern_return_t act_suspend(struct Act *act); +kern_return_t act_resume(struct Act *act); +kern_return_t act_get_state(struct Act *act, int flavor, + natural_t *state, natural_t *pcount); +kern_return_t act_set_state(struct Act *act, int flavor, + natural_t *state, natural_t count); + +#define act_lock(act) simple_lock(&(act)->lock) +#define act_unlock(act) simple_unlock(&(act)->lock) + +#define act_reference(act) refcount_take(&(act)->ref_count) +void act_deallocate(struct Act *act); + +/* Exported to startup.c */ +void act_init(void); + +/* Exported to task.c */ +kern_return_t act_terminate_task_locked(struct Act *act); + +/* Exported to thread.c */ +extern Act null_act; + +/* Exported to machine-dependent activation code */ +void act_execute_returnhandlers(void); + + + +/* System-dependent functions */ +kern_return_t act_machine_create(struct task *task, Act *inc, vm_offset_t user_stack, vm_offset_t user_rbuf, vm_size_t user_rbuf_size); +void act_machine_destroy(Act *inc); +kern_return_t act_machine_set_state(Act *inc, int flavor, int *tstate, unsigned count); +kern_return_t act_machine_get_state(Act *inc, int flavor, int *tstate, unsigned *count); + + + +#endif /* MIGRATING_THREADS */ +#endif /* _KERN_ACT_H_ */ diff --git a/kern/assert.h b/kern/assert.h new file mode 100644 index 0000000..fed2a20 --- /dev/null +++ b/kern/assert.h @@ -0,0 +1,54 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_ASSERT_H_ +#define _KERN_ASSERT_H_ + +/* assert.h 4.2 85/01/21 */ + +#include <kern/macros.h> + +#ifndef NDEBUG +#define MACH_ASSERT 1 +#endif + +#if MACH_ASSERT +extern void Assert(const char *exp, const char *filename, int line, + const char *fun) __attribute__ ((noreturn)); + +#define assert(ex) \ + (likely(ex) \ + ? (void) (0) \ + : Assert (#ex, __FILE__, __LINE__, __FUNCTION__)) + +#define assert_static(x) assert(x) + +#else /* MACH_ASSERT */ +#define assert(ex) +#define assert_static(ex) +#endif /* MACH_ASSERT */ + +#endif /* _KERN_ASSERT_H_ */ diff --git a/kern/ast.c b/kern/ast.c new file mode 100644 index 0000000..8c514b3 --- /dev/null +++ b/kern/ast.c @@ -0,0 +1,235 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * + * This file contains routines to check whether an ast is needed. + * + * ast_check() - check whether ast is needed for interrupt or context + * switch. Usually called by clock interrupt handler. + * + */ + +#include <kern/ast.h> +#include <kern/counters.h> +#include <kern/debug.h> +#include "cpu_number.h" +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/processor.h> +#include <device/net_io.h> + +#include <machine/machspl.h> /* for splsched */ + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif /* MACH_FIXPRI */ + + +volatile ast_t need_ast[NCPUS]; + +void +ast_init(void) +{ +#ifndef MACHINE_AST + int i; + + for (i=0; i<NCPUS; i++) + need_ast[i] = 0; +#endif /* MACHINE_AST */ +} + +void +ast_taken(void) +{ + thread_t self = current_thread(); + ast_t reasons; + + /* + * Interrupts are still disabled. + * We must clear need_ast and then enable interrupts. + */ + + reasons = need_ast[cpu_number()]; + need_ast[cpu_number()] = AST_ZILCH; + (void) spl0(); + + /* + * These actions must not block. + */ + + if (reasons & AST_NETWORK) + net_ast(); + + /* + * Make darn sure that we don't call thread_halt_self + * or thread_block from the idle thread. + */ + + if (self != current_processor()->idle_thread) { +#ifndef MIGRATING_THREADS + while (thread_should_halt(self)) + thread_halt_self(thread_exception_return); +#endif + + /* + * One of the previous actions might well have + * woken a high-priority thread, so we use + * csw_needed in addition to AST_BLOCK. + */ + + if ((reasons & AST_BLOCK) || + csw_needed(self, current_processor())) { + counter(c_ast_taken_block++); + thread_block(thread_exception_return); + } + } +} + +void +ast_check(void) +{ + int mycpu = cpu_number(); + processor_t myprocessor; + thread_t thread = current_thread(); + run_queue_t rq; + spl_t s = splsched(); + + /* + * Check processor state for ast conditions. + */ + myprocessor = cpu_to_processor(mycpu); + switch(myprocessor->state) { + case PROCESSOR_OFF_LINE: + case PROCESSOR_IDLE: + case PROCESSOR_DISPATCHING: + /* + * No ast. + */ + break; + +#if NCPUS > 1 + case PROCESSOR_ASSIGN: + case PROCESSOR_SHUTDOWN: + /* + * Need ast to force action thread onto processor. + * + * XXX Should check if action thread is already there. + */ + ast_on(mycpu, AST_BLOCK); + break; +#endif /* NCPUS > 1 */ + + case PROCESSOR_RUNNING: + + /* + * Propagate thread ast to processor. If we already + * need an ast, don't look for more reasons. + */ + ast_propagate(thread, mycpu); + if (ast_needed(mycpu)) + break; + + /* + * Context switch check. The csw_needed macro isn't + * used here because the rq->low hint may be wrong, + * and fixing it here avoids an extra ast. + * First check the easy cases. + */ + if (thread->state & TH_SUSP || myprocessor->runq.count > 0) { + ast_on(mycpu, AST_BLOCK); + break; + } + + /* + * Update lazy evaluated runq->low if only timesharing. + */ +#if MACH_FIXPRI + if (myprocessor->processor_set->policies & POLICY_FIXEDPRI) { + if (csw_needed(thread,myprocessor)) { + ast_on(mycpu, AST_BLOCK); + break; + } + else { + /* + * For fixed priority threads, set first_quantum + * so entire new quantum is used. + */ + if (thread->policy == POLICY_FIXEDPRI) + myprocessor->first_quantum = TRUE; + } + } + else { +#endif /* MACH_FIXPRI */ + rq = &(myprocessor->processor_set->runq); + if (!(myprocessor->first_quantum) && (rq->count > 0)) { + queue_t q; + /* + * This is not the first quantum, and there may + * be something in the processor_set runq. + * Check whether low hint is accurate. + */ + q = rq->runq + *(volatile int *)&rq->low; + if (queue_empty(q)) { + int i; + + /* + * Need to recheck and possibly update hint. + */ + runq_lock(rq); + q = rq->runq + rq->low; + if (rq->count > 0) { + for (i = rq->low; i < NRQS; i++) { + if(!(queue_empty(q))) + break; + q++; + } + rq->low = i; + } + runq_unlock(rq); + } + + if (rq->low <= thread->sched_pri) { + ast_on(mycpu, AST_BLOCK); + break; + } + } +#if MACH_FIXPRI + } +#endif /* MACH_FIXPRI */ + break; + + default: + panic("ast_check: Bad processor state (cpu %d processor %p) state: %d", + mycpu, myprocessor, myprocessor->state); + } + + (void) splx(s); +} diff --git a/kern/ast.h b/kern/ast.h new file mode 100644 index 0000000..aded167 --- /dev/null +++ b/kern/ast.h @@ -0,0 +1,139 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/ast.h: Definitions for Asynchronous System Traps. + */ + +#ifndef _KERN_AST_H_ +#define _KERN_AST_H_ + +/* + * A CPU takes an AST when it is about to return to user code. + * Instead of going back to user code, it calls ast_taken. + * Machine-dependent code is responsible for maintaining + * a set of reasons for an AST, and passing this set to ast_taken. + */ + +#include <kern/kern_types.h> +#include <kern/macros.h> +#include <machine/ast.h> + +/* + * Bits for reasons + */ + +#define AST_ZILCH 0x0 +#define AST_HALT 0x1 +#define AST_TERMINATE 0x2 +#define AST_BLOCK 0x4 +#define AST_NETWORK 0x8 +#define AST_NETIPC 0x10 + +#define AST_SCHEDULING (AST_HALT|AST_TERMINATE|AST_BLOCK) + +/* + * Per-thread ASTs are reset at context-switch time. + * machine/ast.h can define MACHINE_AST_PER_THREAD. + */ + +#ifndef MACHINE_AST_PER_THREAD +#define MACHINE_AST_PER_THREAD 0 +#endif + +#define AST_PER_THREAD (AST_HALT | AST_TERMINATE | MACHINE_AST_PER_THREAD) + +typedef unsigned long ast_t; + +extern volatile ast_t need_ast[NCPUS]; + +#ifdef MACHINE_AST +/* + * machine/ast.h is responsible for defining aston and astoff. + */ +#else /* MACHINE_AST */ + +#define aston(mycpu) +#define astoff(mycpu) + +#endif /* MACHINE_AST */ + +extern void ast_taken(void); + +/* + * ast_needed, ast_on, ast_off, ast_context, and ast_propagate + * assume splsched. mycpu is always cpu_number(). It is an + * argument in case cpu_number() is expensive. + */ + +#define ast_needed(mycpu) need_ast[mycpu] + +#define ast_on(mycpu, reasons) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] |= (reasons)) != AST_ZILCH) \ + { aston(mycpu); } \ +MACRO_END + +#define ast_off(mycpu, reasons) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] &= ~(reasons)) == AST_ZILCH) \ + { astoff(mycpu); } \ +MACRO_END + +#define ast_propagate(thread, mycpu) ast_on((mycpu), (thread)->ast) + +#define ast_context(thread, mycpu) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] = \ + (need_ast[mycpu] &~ AST_PER_THREAD) | (thread)->ast) \ + != AST_ZILCH) \ + { aston(mycpu); } \ + else \ + { astoff(mycpu); } \ +MACRO_END + + +#define thread_ast_set(thread, reason) (thread)->ast |= (reason) +#define thread_ast_clear(thread, reason) (thread)->ast &= ~(reason) +#define thread_ast_clear_all(thread) (thread)->ast = AST_ZILCH + +/* + * NOTE: if thread is the current thread, thread_ast_set should + * be followed by ast_propagate(). + */ + +extern void ast_init (void); + +extern void ast_check (void); + +#if NCPUS > 1 +extern void init_ast_check(const processor_t processor); +extern void cause_ast_check(const processor_t processor); +#endif + +#endif /* _KERN_AST_H_ */ diff --git a/kern/atomic.h b/kern/atomic.h new file mode 100644 index 0000000..00da164 --- /dev/null +++ b/kern/atomic.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + Contributed by Agustina Arzille <avarzille@riseup.net>, 2017. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either + version 2 of the license, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _KERN_ATOMIC_H_ +#define _KERN_ATOMIC_H_ 1 + +/* Atomically compare *PTR with EXP and set it to NVAL if they're equal. + * Evaluates to a boolean, indicating whether the comparison was successful.*/ +#define __atomic_cas_helper(ptr, exp, nval, mo) \ + ({ \ + typeof(exp) __e = (exp); \ + __atomic_compare_exchange_n ((ptr), &__e, (nval), 0, \ + __ATOMIC_##mo, __ATOMIC_RELAXED); \ + }) + +#define atomic_cas_acq(ptr, exp, nval) \ + __atomic_cas_helper (ptr, exp, nval, ACQUIRE) + +#define atomic_cas_rel(ptr, exp, nval) \ + __atomic_cas_helper (ptr, exp, nval, RELEASE) + +#define atomic_cas_seq(ptr, exp, nval) \ + __atomic_cas_helper (ptr, exp, nval, SEQ_CST) + +/* Atomically exchange the value of *PTR with VAL, evaluating to + * its previous value. */ +#define __atomic_swap_helper(ptr, val, mo) \ + __atomic_exchange_n ((ptr), (val), __ATOMIC_##mo) + +#define atomic_swap_acq(ptr, val) \ + __atomic_swap_helper (ptr, val, ACQUIRE) + +#define atomic_swap_rel(ptr, val) \ + __atomic_swap_helper (ptr, val, RELEASE) + +#define atomic_swap_seq(ptr, val) \ + __atomic_swap_helper (ptr, val, SEQ_CST) + +#endif diff --git a/kern/boot_script.c b/kern/boot_script.c new file mode 100644 index 0000000..07ce4b3 --- /dev/null +++ b/kern/boot_script.c @@ -0,0 +1,791 @@ +/* Boot script parser for Mach. */ + +/* Written by Shantanu Goel (goel@cs.columbia.edu). */ + +#include <mach/mach_types.h> +#include <string.h> +#include <kern/printf.h> +#include "boot_script.h" +#include "bootstrap.h" + + +/* This structure describes a symbol. */ +struct sym +{ + /* Symbol name. */ + const char *name; + + /* Type of value returned by function. */ + int type; + + /* Symbol value. */ + long val; + + /* For function symbols; type of value returned by function. */ + int ret_type; + + /* For function symbols; if set, execute function at the time + of command execution, not during parsing. A function with + this field set must also have `no_arg' set. Also, the function's + `val' argument will always be NULL. */ + int run_on_exec; +}; + +/* Additional values symbols can take. + These are only used internally. */ +#define VAL_SYM 10 /* symbol table entry */ +#define VAL_FUNC 11 /* function pointer */ + +/* This structure describes an argument. */ +struct arg +{ + /* Argument text copied verbatim. 0 if none. */ + char *text; + + /* Type of value assigned. 0 if none. */ + int type; + + /* Argument value. */ + long val; +}; + +/* List of commands. */ +static struct cmd **cmds = 0; + +/* Amount allocated for `cmds'. */ +static int cmds_alloc = 0; + +/* Next available slot in `cmds'. */ +static int cmds_index = 0; + +/* Symbol table. */ +static struct sym **symtab = 0; + +/* Amount allocated for `symtab'. */ +static int symtab_alloc = 0; + +/* Next available slot in `symtab'. */ +static int symtab_index = 0; + +/* Create a task and suspend it. */ +static int +create_task (struct cmd *cmd, long *val) +{ + int err = boot_script_task_create (cmd); + *val = (long) cmd->task; + return err; +} + +/* Resume a task. */ +static int +resume_task (struct cmd *cmd, const long *val) +{ + return boot_script_task_resume (cmd); +} + +/* Resume a task when the user hits return. */ +static int +prompt_resume_task (struct cmd *cmd, const long *val) +{ + return boot_script_prompt_task_resume (cmd); +} + +/* List of builtin symbols. */ +static struct sym builtin_symbols[] = +{ + { "task-create", VAL_FUNC, (long) create_task, VAL_TASK, 0 }, + { "task-resume", VAL_FUNC, (long) resume_task, VAL_NONE, 1 }, + { "prompt-task-resume", VAL_FUNC, (long) prompt_resume_task, VAL_NONE, 1 }, +}; +#define NUM_BUILTIN (sizeof (builtin_symbols) / sizeof (builtin_symbols[0])) + +/* Free CMD and all storage associated with it. + If ABORTING is set, terminate the task associated with CMD, + otherwise just deallocate the send right. */ +static void +free_cmd (struct cmd *cmd, int aborting) +{ + if (cmd->task) + boot_script_free_task (cmd->task, aborting); + if (cmd->args) + { + int i; + for (i = 0; i < cmd->args_index; i++) + boot_script_free (cmd->args[i], sizeof *cmd->args[i]); + boot_script_free (cmd->args, sizeof cmd->args[0] * cmd->args_alloc); + } + if (cmd->exec_funcs) + boot_script_free (cmd->exec_funcs, + sizeof cmd->exec_funcs[0] * cmd->exec_funcs_alloc); + boot_script_free (cmd, sizeof *cmd); +} + +/* Free all storage allocated by the parser. + If ABORTING is set, terminate all tasks. */ +static void +cleanup (int aborting) +{ + int i; + + for (i = 0; i < cmds_index; i++) + free_cmd (cmds[i], aborting); + boot_script_free (cmds, sizeof cmds[0] * cmds_alloc); + cmds = 0; + cmds_index = cmds_alloc = 0; + + for (i = 0; i < symtab_index; i++) + boot_script_free (symtab[i], sizeof *symtab[i]); + boot_script_free (symtab, sizeof symtab[0] * symtab_alloc); + symtab = 0; + symtab_index = symtab_alloc = 0; +} + +/* Add PTR to the list of pointers PTR_LIST, which + currently has ALLOC amount of space allocated to it, and + whose next available slot is INDEX. If more space + needs to to allocated, INCR is the amount by which + to increase it. Return 0 on success, non-zero otherwise. */ +static int +add_list (void *ptr, void ***ptr_list, int *alloc, int *index, int incr) +{ + if (*index == *alloc) + { + void **p; + + *alloc += incr; + p = boot_script_malloc (*alloc * sizeof (void *)); + if (! p) + { + *alloc -= incr; + return 1; + } + if (*ptr_list) + { + memcpy (p, *ptr_list, *index * sizeof (void *)); + boot_script_free (*ptr_list, (*alloc - incr) * sizeof (void *)); + } + *ptr_list = p; + } + *(*ptr_list + *index) = ptr; + *index += 1; + return 0; +} + +/* Create an argument with TEXT, value type TYPE, and value VAL. + Add the argument to the argument list of CMD. */ +static struct arg * +add_arg (struct cmd *cmd, char *text, int type, long val) +{ + struct arg *arg; + + arg = boot_script_malloc (sizeof (struct arg)); + if (arg) + { + arg->text = text; + arg->type = type; + arg->val = val; + if (add_list (arg, (void ***) &cmd->args, + &cmd->args_alloc, &cmd->args_index, 5)) + { + boot_script_free (arg, sizeof *arg); + return 0; + } + } + return arg; +} + +/* Search for the symbol NAME in the symbol table. */ +static struct sym * +sym_lookup (const char *name) +{ + int i; + + for (i = 0; i < symtab_index; i++) + if (! strcmp (name, symtab[i]->name)) + return symtab[i]; + return 0; +} + +/* Create an entry for symbol NAME in the symbol table. */ +static struct sym * +sym_enter (const char *name) +{ + struct sym *sym; + + sym = boot_script_malloc (sizeof (struct sym)); + if (sym) + { + memset (sym, 0, sizeof (struct sym)); + sym->name = name; + if (add_list (sym, (void ***) &symtab, &symtab_alloc, &symtab_index, 20)) + { + boot_script_free (sym, sizeof *sym); + return 0; + } + } + return sym; +} + +/* Parse the command line CMDLINE. */ +int +boot_script_parse_line (void *hook, char *cmdline) +{ + char *p, *q; + int error; + struct cmd *cmd; + struct arg *arg; + + /* Extract command name. Ignore line if it lacks a command. */ + for (p = cmdline; *p == ' ' || *p == '\t'; p++) + ; + if (*p == '#') + /* Ignore comment line. */ + return 0; + +#if 0 + if (*p && *p != ' ' && *p != '\t' && *p != '\n') + { + printf ("(bootstrap): %s\n", cmdline); + } +#endif + + for (q = p; *q && *q != ' ' && *q != '\t' && *q != '\n'; q++) + ; + if (p == q) + return 0; + + *q = '\0'; + + /* Allocate a command structure. */ + cmd = boot_script_malloc (sizeof (struct cmd)); + if (! cmd) + return BOOT_SCRIPT_NOMEM; + memset (cmd, 0, sizeof (struct cmd)); + cmd->hook = hook; + cmd->path = p; + p = q + 1; + + for (arg = 0;;) + { + if (! arg) + { + /* Skip whitespace. */ + while (*p == ' ' || *p == '\t') + p++; + + /* End of command line. */ + if (! *p || *p == '\n') + { + /* Add command to list. */ + if (add_list (cmd, (void ***) &cmds, + &cmds_alloc, &cmds_index, 10)) + { + error = BOOT_SCRIPT_NOMEM; + goto bad; + } + return 0; + } + } + + /* Look for a symbol. */ + if (arg || (*p == '$' && (*(p + 1) == '{' || *(p + 1) == '('))) + { + char end_char = (*(p + 1) == '{') ? '}' : ')'; + struct sym *sym = 0; + + for (p += 2;;) + { + char c; + unsigned i; + int type; + long val; + struct sym *s; + + /* Parse symbol name. */ + for (q = p; *q && *q != '\n' && *q != end_char && *q != '='; q++) + ; + if (p == q || ! *q || *q == '\n' + || (end_char == '}' && *q != '}')) + { + error = BOOT_SCRIPT_SYNTAX_ERROR; + goto bad; + } + c = *q; + *q = '\0'; + + /* See if this is a builtin symbol. */ + for (i = 0; i < NUM_BUILTIN; i++) + if (! strcmp (p, builtin_symbols[i].name)) + break; + + if (i < NUM_BUILTIN) + s = &builtin_symbols[i]; + else + { + /* Look up symbol in symbol table. + If no entry exists, create one. */ + s = sym_lookup (p); + if (! s) + { + s = sym_enter (p); + if (! s) + { + error = BOOT_SCRIPT_NOMEM; + goto bad; + } + } + } + + /* Only values are allowed in ${...} constructs. */ + if (end_char == '}' && s->type == VAL_FUNC) + return BOOT_SCRIPT_INVALID_SYM; + + /* Check that assignment is valid. */ + if (c == '=' && s->type == VAL_FUNC) + { + error = BOOT_SCRIPT_INVALID_ASG; + goto bad; + } + + /* For function symbols, execute the function. */ + if (s->type == VAL_FUNC) + { + if (! s->run_on_exec) + { + (error + = ((*((int (*) (struct cmd *, long *)) s->val)) + (cmd, &val))); + if (error) + goto bad; + type = s->ret_type; + } + else + { + if (add_list (s, (void ***) &cmd->exec_funcs, + &cmd->exec_funcs_alloc, + &cmd->exec_funcs_index, 5)) + { + error = BOOT_SCRIPT_NOMEM; + goto bad; + } + type = VAL_NONE; + goto out; + } + } + else if (s->type == VAL_NONE) + { + type = VAL_SYM; + val = (long) s; + } + else + { + type = s->type; + val = s->val; + } + + if (sym) + { + sym->type = type; + sym->val = val; + } + else if (arg) + { + arg->type = type; + arg->val = val; + } + + out: + p = q + 1; + if (c == end_char) + { + /* Create an argument if necessary. + We create an argument if the symbol appears + in the expression by itself. + + NOTE: This is temporary till the boot filesystem + servers support arguments. When that happens, + symbol values will only be printed if they're + associated with an argument. */ + if (! arg && end_char == '}') + { + if (! add_arg (cmd, 0, type, val)) + { + error = BOOT_SCRIPT_NOMEM; + goto bad; + } + } + arg = 0; + break; + } + if (s->type != VAL_FUNC) + sym = s; + } + } + else + { + char c; + + /* Command argument; just copy the text. */ + for (q = p;; q++) + { + if (! *q || *q == ' ' || *q == '\t' || *q == '\n') + break; + if (*q == '$' && *(q + 1) == '{') + break; + } + c = *q; + *q = '\0'; + + /* Add argument to list. */ + arg = add_arg (cmd, p, VAL_NONE, 0); + if (! arg) + { + error = BOOT_SCRIPT_NOMEM; + goto bad; + } + if (c == '$') + p = q; + else + { + if (c) + p = q + 1; + else + p = q; + arg = 0; + } + } + } + + + bad: + free_cmd (cmd, 1); + cleanup (1); + return error; +} + +/* Ensure that the command line buffer can accommodate LEN bytes of space. */ +#define CHECK_CMDLINE_LEN(len) \ +{ \ + if (cmdline_alloc - cmdline_index < len) \ + { \ + char *ptr; \ + int alloc, i; \ + alloc = cmdline_alloc + len - (cmdline_alloc - cmdline_index) + 100; \ + ptr = boot_script_malloc (alloc); \ + if (! ptr) \ + { \ + error = BOOT_SCRIPT_NOMEM; \ + goto done; \ + } \ + memcpy (ptr, cmdline, cmdline_index); \ + for (i = 0; i < argc; ++i) \ + argv[i] = ptr + (argv[i] - cmdline); \ + boot_script_free (cmdline, cmdline_alloc); \ + cmdline = ptr; \ + cmdline_alloc = alloc; \ + } \ +} + +/* Execute commands previously parsed. */ +int +boot_script_exec (void) +{ + int cmd_index; + + for (cmd_index = 0; cmd_index < cmds_index; cmd_index++) + { + char **argv, *cmdline; + int i, argc, cmdline_alloc; + int cmdline_index, error, arg_index; + struct cmd *cmd = cmds[cmd_index]; + + /* Skip command if it doesn't have an associated task. */ + if (cmd->task == 0) + continue; + + /* Allocate a command line and copy command name. */ + cmdline_index = strlen (cmd->path) + 1; + cmdline_alloc = cmdline_index + 100; + cmdline = boot_script_malloc (cmdline_alloc); + if (! cmdline) + { + cleanup (1); + return BOOT_SCRIPT_NOMEM; + } + memcpy (cmdline, cmd->path, cmdline_index); + + /* Allocate argument vector. */ + argv = boot_script_malloc (sizeof (char *) * (cmd->args_index + 2)); + if (! argv) + { + boot_script_free (cmdline, cmdline_alloc); + cleanup (1); + return BOOT_SCRIPT_NOMEM; + } + argv[0] = cmdline; + argc = 1; + + /* Build arguments. */ + for (arg_index = 0; arg_index < cmd->args_index; arg_index++) + { + struct arg *arg = cmd->args[arg_index]; + + /* Copy argument text. */ + if (arg->text) + { + int len = strlen (arg->text); + + if (arg->type == VAL_NONE) + len++; + CHECK_CMDLINE_LEN (len); + memcpy (cmdline + cmdline_index, arg->text, len); + argv[argc++] = &cmdline[cmdline_index]; + cmdline_index += len; + } + + /* Add value of any symbol associated with this argument. */ + if (arg->type != VAL_NONE) + { + char *p, buf[50]; + int len; + mach_port_name_t name; + + if (arg->type == VAL_SYM) + { + struct sym *sym = (struct sym *) arg->val; + + /* Resolve symbol value. */ + while (sym->type == VAL_SYM) + sym = (struct sym *) sym->val; + if (sym->type == VAL_NONE) + { + error = BOOT_SCRIPT_UNDEF_SYM; + printf("bootstrap script missing symbol '%s'\n", sym->name); + goto done; + } + arg->type = sym->type; + arg->val = sym->val; + } + + /* Print argument value. */ + switch (arg->type) + { + case VAL_STR: + p = (char *) arg->val; + len = strlen (p); + break; + + case VAL_TASK: + case VAL_PORT: + if (arg->type == VAL_TASK) + /* Insert send right to task port. */ + error = boot_script_insert_task_port + (cmd, (task_t) arg->val, &name); + else + /* Insert send right. */ + error = boot_script_insert_right (cmd, + (mach_port_t) arg->val, + &name); + if (error) + goto done; + + i = name; + p = buf + sizeof (buf); + len = 0; + do + { + *--p = i % 10 + '0'; + len++; + } + while (i /= 10); + break; + + default: + error = BOOT_SCRIPT_BAD_TYPE; + goto done; + } + len++; + CHECK_CMDLINE_LEN (len); + memcpy (cmdline + cmdline_index, p, len - 1); + *(cmdline + cmdline_index + len - 1) = '\0'; + if (! arg->text) + argv[argc++] = &cmdline[cmdline_index]; + cmdline_index += len; + } + } + + /* Terminate argument vector. */ + argv[argc] = 0; + + /* Execute the command. */ + if (boot_script_exec_cmd (cmd->hook, cmd->task, cmd->path, + argc, argv, cmdline, cmdline_index)) + { + error = BOOT_SCRIPT_EXEC_ERROR; + goto done; + } + + error = 0; + + done: + boot_script_free (cmdline, cmdline_alloc); + boot_script_free (argv, sizeof (char *) * (cmd->args_index + 2)); + if (error) + { + cleanup (1); + return error; + } + } + + for (cmd_index = 0; cmd_index < cmds_index; cmd_index++) + { + int i; + struct cmd *cmd = cmds[cmd_index]; + + /* Execute functions that want to be run on exec. */ + for (i = 0; i < cmd->exec_funcs_index; i++) + { + struct sym *sym = cmd->exec_funcs[i]; + int error = ((*((int (*) (struct cmd *, int *)) sym->val)) + (cmd, 0)); + if (error) + { + cleanup (1); + return error; + } + } + } + + cleanup (0); + return 0; +} + +/* Create an entry for the variable NAME with TYPE and value VAL, + in the symbol table. */ +int +boot_script_set_variable (const char *name, int type, long val) +{ + struct sym *sym = sym_enter (name); + + if (sym) + { + sym->type = type; + sym->val = val; + } + return sym ? 0 : 1; +} + + +/* Define the function NAME, which will return type RET_TYPE. */ +int +boot_script_define_function (const char *name, int ret_type, + int (*func) (const struct cmd *cmd, int *val)) +{ + struct sym *sym = sym_enter (name); + + if (sym) + { + sym->type = VAL_FUNC; + sym->val = (long) func; + sym->ret_type = ret_type; + sym->run_on_exec = ret_type == VAL_NONE; + } + return sym ? 0 : 1; +} + + +/* Return a string describing ERR. */ +char * +boot_script_error_string (int err) +{ + switch (err) + { + case BOOT_SCRIPT_NOMEM: + return "no memory"; + + case BOOT_SCRIPT_SYNTAX_ERROR: + return "syntax error"; + + case BOOT_SCRIPT_INVALID_ASG: + return "invalid variable in assignment"; + + case BOOT_SCRIPT_MACH_ERROR: + return "mach error"; + + case BOOT_SCRIPT_UNDEF_SYM: + return "undefined symbol"; + + case BOOT_SCRIPT_EXEC_ERROR: + return "exec error"; + + case BOOT_SCRIPT_INVALID_SYM: + return "invalid variable in expression"; + + case BOOT_SCRIPT_BAD_TYPE: + return "invalid value type"; + } + return 0; +} + +#ifdef BOOT_SCRIPT_TEST +#include <stdio.h> + +int +boot_script_exec_cmd (void *hook, + mach_port_t task, char *path, int argc, + char **argv, char *strings, int stringlen) +{ + int i; + + printf ("port = %d: ", (int) task); + for (i = 0; i < argc; i++) + printf ("%s ", argv[i]); + printf ("\n"); + return 0; +} + +void +main (int argc, char **argv) +{ + char buf[500], *p; + int len; + FILE *fp; + mach_port_name_t host_port, device_port; + + if (argc < 2) + { + fprintf (stderr, "Usage: %s <script>\n", argv[0]); + exit (1); + } + fp = fopen (argv[1], "r"); + if (! fp) + { + fprintf (stderr, "Can't open %s\n", argv[1]); + exit (1); + } + host_port = 1; + device_port = 2; + boot_script_set_variable ("host-port", VAL_PORT, (int) host_port); + boot_script_set_variable ("device-port", VAL_PORT, (int) device_port); + boot_script_set_variable ("root-device", VAL_STR, (int) "hd0a"); + boot_script_set_variable ("boot-args", VAL_STR, (int) "-ad"); + p = buf; + len = sizeof (buf); + while (fgets (p, len, fp)) + { + int i, err; + + i = strlen (p) + 1; + err = boot_script_parse_line (0, p); + if (err) + { + fprintf (stderr, "error %s\n", boot_script_error_string (err)); + exit (1); + } + p += i; + len -= i; + } + boot_script_exec (); + exit (0); +} +#endif /* BOOT_SCRIPT_TEST */ diff --git a/kern/boot_script.h b/kern/boot_script.h new file mode 100644 index 0000000..d1f968d --- /dev/null +++ b/kern/boot_script.h @@ -0,0 +1,111 @@ +/* Definitions for boot script parser for Mach. */ + +#ifndef _boot_script_h +#define _boot_script_h + +/* Written by Shantanu Goel (goel@cs.columbia.edu). */ + +/* Error codes returned by boot_script_parse_line() + and boot_script_exec_cmd(). */ +#define BOOT_SCRIPT_NOMEM 1 +#define BOOT_SCRIPT_SYNTAX_ERROR 2 +#define BOOT_SCRIPT_INVALID_ASG 3 +#define BOOT_SCRIPT_MACH_ERROR 4 +#define BOOT_SCRIPT_UNDEF_SYM 5 +#define BOOT_SCRIPT_EXEC_ERROR 6 +#define BOOT_SCRIPT_INVALID_SYM 7 +#define BOOT_SCRIPT_BAD_TYPE 8 + +/* Legal values for argument `type' to function + boot_script_set_variable and boot_script_define_function. */ +#define VAL_NONE 0 /* none -- function runs at exec time */ +#define VAL_STR 1 /* string */ +#define VAL_PORT 2 /* port */ +#define VAL_TASK 3 /* task port */ + +/* This structure describes a command. */ +struct cmd +{ + /* Cookie passed in to boot_script_parse_line. */ + void *hook; + + /* Path of executable. */ + char *path; + + /* Task port. */ + task_t task; + + /* Argument list. */ + struct arg **args; + + /* Amount allocated for `args'. */ + int args_alloc; + + /* Next available slot in `args'. */ + int args_index; + + /* List of functions that want to be run on command execution. */ + struct sym **exec_funcs; + + /* Amount allocated for `exec_funcs'. */ + int exec_funcs_alloc; + + /* Next available slot in `exec_funcs'. */ + int exec_funcs_index; +}; + + +/* The user must define these functions, we work like malloc and free. */ +void *boot_script_malloc (unsigned int); +void boot_script_free (void *, unsigned int); + +/* The user must define this function. Load the image of the + executable specified by PATH in TASK. Create a thread + in TASK and point it at the executable's entry point. Initialize + TASK's stack with argument vector ARGV of length ARGC whose + strings are STRINGS. STRINGS has length STRINGLEN. + Return 0 for success, non-zero otherwise. */ +int boot_script_exec_cmd (void *hook, + task_t task, char *path, int argc, + char **argv, char *strings, int stringlen); + +/* The user must define this functions to perform the corresponding + Mach task manipulations. */ +int boot_script_task_create (struct cmd *); /* task_create + task_suspend */ +int boot_script_task_resume (struct cmd *); +int boot_script_prompt_task_resume (struct cmd *); +int boot_script_insert_right (struct cmd *, mach_port_t, mach_port_name_t *namep); +int boot_script_insert_task_port (struct cmd *, task_t, mach_port_name_t *namep); + +/* The user must define this function to clean up the `task_t' + returned by boot_script_task_create. */ +void boot_script_free_task (task_t task, int aborting); + + +/* Parse the command line LINE. This causes the command line to be + converted into an internal format. Returns 0 for success, non-zero + otherwise. + + NOTE: The parser writes into the line so it must not be a string constant. + It is also the responsibility of the caller not to deallocate the line + across calls to the parser. */ +int boot_script_parse_line (void *hook, char *cmdline); + +/* Execute the command lines prevously parsed. + Returns 0 for success, non-zero otherwise. */ +int boot_script_exec (void); + +/* Create an entry in the symbol table for variable NAME, + whose type is TYPE and value is VAL. Returns 0 on success, + non-zero otherwise. */ +int boot_script_set_variable (const char *name, int type, long val); + +/* Define the function NAME, which will return type RET_TYPE. */ +int boot_script_define_function (const char *name, int ret_type, + int (*func) (const struct cmd *cmd, int *val)); + +/* Returns a string describing the error ERR. */ +char *boot_script_error_string (int err); + + +#endif /* _boot_script_h */ diff --git a/kern/bootstrap.c b/kern/bootstrap.c new file mode 100644 index 0000000..49358ac --- /dev/null +++ b/kern/bootstrap.c @@ -0,0 +1,918 @@ +/* + * Mach Operating System + * Copyright (c) 1992-1989 Carnegie Mellon University. + * Copyright (c) 1995-1993 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Bootstrap the various built-in servers. + */ + +#include <alloca.h> +#include <string.h> + +#include <mach/port.h> +#include <mach/message.h> +#include <machine/locore.h> +#include <machine/vm_param.h> +#include <machine/pcb.h> +#include <ipc/ipc_port.h> +#include <ipc/mach_port.server.h> +#include <kern/bootstrap.h> +#include <kern/debug.h> +#include <kern/host.h> +#include <kern/printf.h> +#include <kern/kalloc.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/lock.h> +#include <vm/vm_kern.h> +#include <vm/vm_user.h> +#include <vm/pmap.h> +#include <device/device_port.h> + +#if MACH_KDB +#include <machine/db_machdep.h> +#include <ddb/db_sym.h> +#endif + +#if OSKIT_MACH +#include <stddef.h> +#include <oskit/machine/base_multiboot.h> +#include <oskit/exec/exec.h> +#include <oskit/c/stdio.h> +#define safe_gets(s, n) fgets((s),(n),stdin) +#else +#include <mach/machine/multiboot.h> +#include <mach/exec/exec.h> +#ifdef MACH_XEN +#include <mach/xen.h> +extern struct start_info boot_info; /* XXX put this in a header! */ +#else /* MACH_XEN */ +extern struct multiboot_raw_info boot_info; /* XXX put this in a header! */ +#endif /* MACH_XEN */ +#endif + +#include "boot_script.h" + + +static mach_port_name_t boot_device_port; /* local name */ +static mach_port_name_t boot_host_port; /* local name */ + +extern char *kernel_cmdline; + +static void user_bootstrap(void); /* forward */ +static void user_bootstrap_compat(void); /* forward */ +static void bootstrap_exec_compat(void *exec_data); /* forward */ +static void get_compat_strings(char *flags_str, char *root_str); /* forward */ + +static mach_port_name_t +task_insert_send_right( + task_t task, + ipc_port_t port) +{ + mach_port_name_t name; + + for (name = 1;; name++) { + kern_return_t kr; + + kr = mach_port_insert_right(task->itk_space, name, + port, MACH_MSG_TYPE_PORT_SEND); + if (kr == KERN_SUCCESS) + break; + assert(kr == KERN_NAME_EXISTS); + } + + return name; +} + +static void +free_bootstrap_pages(phys_addr_t start, phys_addr_t end) +{ + struct vm_page *page; + + while (start < end) + { + page = vm_page_lookup_pa(start); + assert(page != NULL); + vm_page_manage(page); + start += PAGE_SIZE; + } +} + +void bootstrap_create(void) +{ + int compat; + unsigned n = 0; +#ifdef MACH_XEN +#ifdef __x86_64__ // 32_ON_64 actually + struct multiboot32_module *bmods32 = (struct multiboot32_module *) + boot_info.mod_start; + struct multiboot_module *bmods; + if (bmods32) { + int i; + for (n = 0; bmods32[n].mod_start; n++) + ; + bmods = alloca(n * sizeof(*bmods)); + for (i = 0; i < n ; i++) + { + bmods[i].mod_start = kvtophys(bmods32[i].mod_start + (vm_offset_t) bmods32); + bmods[i].mod_end = kvtophys(bmods32[i].mod_end + (vm_offset_t) bmods32); + bmods[i].string = kvtophys(bmods32[i].string + (vm_offset_t) bmods32); + } + } +#else + struct multiboot_module *bmods = (struct multiboot_module *) + boot_info.mod_start; + if (bmods) + for (n = 0; bmods[n].mod_start; n++) { + bmods[n].mod_start = kvtophys(bmods[n].mod_start + (vm_offset_t) bmods); + bmods[n].mod_end = kvtophys(bmods[n].mod_end + (vm_offset_t) bmods); + bmods[n].string = kvtophys(bmods[n].string + (vm_offset_t) bmods); + } +#endif + boot_info.mods_count = n; + boot_info.flags |= MULTIBOOT_MODS; +#else /* MACH_XEN */ +#ifdef __x86_64__ + struct multiboot_raw_module *bmods32 = ((struct multiboot_raw_module *) + phystokv(boot_info.mods_addr)); + struct multiboot_module *bmods=NULL; + if (bmods32) + { + int i; + bmods = alloca(boot_info.mods_count * sizeof(*bmods)); + for (i=0; i<boot_info.mods_count; i++) + { + bmods[i].mod_start = bmods32[i].mod_start; + bmods[i].mod_end = bmods32[i].mod_end; + bmods[i].string = bmods32[i].string; + } + } +#else + struct multiboot_module *bmods = ((struct multiboot_module *) + phystokv(boot_info.mods_addr)); +#endif +#endif /* MACH_XEN */ + if (!(boot_info.flags & MULTIBOOT_MODS) + || (boot_info.mods_count == 0)) + panic ("No bootstrap code loaded with the kernel!"); + + compat = boot_info.mods_count == 1; + if (compat) + { + char *p = strchr((char*)phystokv(bmods[0].string), ' '); + if (p != 0) + do + ++p; + while (*p == ' ' || *p == '\n'); + compat = p == 0 || *p == '\0'; + } + + if (compat) + { + printf("Loading single multiboot module in compat mode: %s\n", + (char*)phystokv(bmods[0].string)); + bootstrap_exec_compat(&bmods[0]); + } + else + { + unsigned i; + int losers; + + /* Initialize boot script variables. We leak these send rights. */ + losers = boot_script_set_variable + ("host-port", VAL_PORT, + (long) realhost.host_priv_self); + if (losers) + panic ("cannot set boot-script variable host-port: %s", + boot_script_error_string (losers)); + losers = boot_script_set_variable + ("device-port", VAL_PORT, + (long) master_device_port); + if (losers) + panic ("cannot set boot-script variable device-port: %s", + boot_script_error_string (losers)); + losers = boot_script_set_variable + ("kernel-task", VAL_PORT, + (long) kernel_task->itk_self); + if (losers) + panic ("cannot set boot-script variable kernel-task: %s", + boot_script_error_string (losers)); + + losers = boot_script_set_variable ("kernel-command-line", VAL_STR, + (long) kernel_cmdline); + if (losers) + panic ("cannot set boot-script variable %s: %s", + "kernel-command-line", boot_script_error_string (losers)); + + { + /* Set the same boot script variables that the old Hurd's + serverboot did, so an old Hurd and boot script previously + used with serverboot can be used directly with this kernel. */ + + char *flag_string = alloca(1024); + char *root_string = alloca(1024); + + /* + * Get the (compatibility) boot flags and root name strings. + */ + get_compat_strings(flag_string, root_string); + + losers = boot_script_set_variable ("boot-args", VAL_STR, + (long) flag_string); + if (losers) + panic ("cannot set boot-script variable %s: %s", + "boot-args", boot_script_error_string (losers)); + losers = boot_script_set_variable ("root-device", VAL_STR, + (long) root_string); + if (losers) + panic ("cannot set boot-script variable %s: %s", + "root-device", boot_script_error_string (losers)); + } + +#if OSKIT_MACH + { + /* The oskit's "environ" array contains all the words from + the multiboot command line that looked like VAR=VAL. + We set each of these as boot-script variables, which + can be used for things like ${root}. */ + + extern char **environ; + char **ep; + for (ep = environ; *ep != 0; ++ep) + { + size_t len = strlen (*ep) + 1; + char *var = memcpy (alloca (len), *ep, len); + char *val = strchr (var, '='); + *val++ = '\0'; + losers = boot_script_set_variable (var, VAL_STR, (long) val); + if (losers) + panic ("cannot set boot-script variable %s: %s", + var, boot_script_error_string (losers)); + } + } +#else /* GNUmach, not oskit-mach */ + { + /* Turn each `FOO=BAR' word in the command line into a boot script + variable ${FOO} with value BAR. This matches what we get from + oskit's environ in the oskit-mach case (above). */ + + int len = strlen (kernel_cmdline) + 1; + char *s = memcpy (alloca (len), kernel_cmdline, len); + char *word; + while ((word = strsep (&s, " \t")) != 0) + { + char *eq = strchr (word, '='); + if (eq == 0) + continue; + *eq++ = '\0'; + losers = boot_script_set_variable (word, VAL_STR, (long) eq); + if (losers) + panic ("cannot set boot-script variable %s: %s", + word, boot_script_error_string (losers)); + } + } +#endif + + for (i = 0; i < boot_info.mods_count; ++i) + { + int err; + char *line = (char*)phystokv(bmods[i].string); + printf ("module %d: %s\n", i, line); + err = boot_script_parse_line (&bmods[i], line); + if (err) + { + printf ("\n\tERROR: %s", boot_script_error_string (err)); + ++losers; + } + } + printf ("%d multiboot modules\n", i); + if (losers) + panic ("%d of %d boot script commands could not be parsed", + losers, boot_info.mods_count); + losers = boot_script_exec (); + if (losers) + panic ("ERROR in executing boot script: %s", + boot_script_error_string (losers)); + } + /* XXX we could free the memory used + by the boot loader's descriptors and such. */ + for (n = 0; n < boot_info.mods_count; n++) + free_bootstrap_pages(bmods[n].mod_start, bmods[n].mod_end); +} + +static void +bootstrap_exec_compat(void *e) +{ + task_t bootstrap_task; + thread_t bootstrap_thread; + + /* + * Create the bootstrap task. + */ + + (void) task_create(TASK_NULL, FALSE, &bootstrap_task); + (void) thread_create(bootstrap_task, &bootstrap_thread); + + /* + * Insert send rights to the master host and device ports. + */ + + boot_host_port = + task_insert_send_right(bootstrap_task, + ipc_port_make_send(realhost.host_priv_self)); + + boot_device_port = + task_insert_send_right(bootstrap_task, + ipc_port_make_send(master_device_port)); + + /* + * Start the bootstrap thread. + */ + bootstrap_thread->saved.other = e; + thread_start(bootstrap_thread, user_bootstrap_compat); + (void) thread_resume(bootstrap_thread); +} + +/* + * The following code runs as the kernel mode portion of the + * first user thread. + */ + +/* + * Convert an unsigned integer to its decimal representation. + */ +static void +itoa( + char *str, + vm_size_t num) +{ + char buf[sizeof(vm_size_t)*2+3]; + char *np; + + np = buf + sizeof(buf); + *--np = 0; + + do { + *--np = '0' + num % 10; + num /= 10; + } while (num != 0); + + strcpy(str, np); +} + +/* + * Collect the boot flags into a single argument string, + * for compatibility with existing bootstrap and startup code. + * Format as a standard flag argument: '-qsdn...' + */ +static void get_compat_strings(char *flags_str, char *root_str) +{ + char *ip, *cp; + + strcpy (root_str, "UNKNOWN"); + + cp = flags_str; + *cp++ = '-'; + + for (ip = kernel_cmdline; *ip; ) + { + if (*ip == ' ') + { + ip++; + } + else if (*ip == '-') + { + ip++; + while (*ip > ' ') + *cp++ = *ip++; + } + else if (strncmp(ip, "root=", 5) == 0) + { + char *rp = root_str; + + ip += 5; + if (strncmp(ip, "/dev/", 5) == 0) + ip += 5; + while (*ip > ' ') + *rp++ = *ip++; + *rp = '\0'; + } + else + { + while (*ip > ' ') + ip++; + } + } + + if (cp == &flags_str[1]) /* no flags */ + *cp++ = 'x'; + *cp = '\0'; +} + +#if 0 +/* + * Copy boot_data (executable) to the user portion of this task. + */ +static boolean_t load_protect_text = TRUE; +#if MACH_KDB + /* if set, fault in the text segment */ +static boolean_t load_fault_in_text = TRUE; +#endif + +static vm_offset_t +boot_map( + void * data, /* private data */ + vm_offset_t offset) /* offset to map */ +{ + vm_offset_t start_offset = (vm_offset_t) data; + + return pmap_extract(kernel_pmap, start_offset + offset); +} + + +#if BOOTSTRAP_SYMBOLS +static boolean_t load_bootstrap_symbols = TRUE; +#else +static boolean_t load_bootstrap_symbols = FALSE; +#endif +#endif + + + +static int +boot_read(void *handle, vm_offset_t file_ofs, void *buf, vm_size_t size, + vm_size_t *out_actual) +{ + struct multiboot_module *mod = handle; + + if (mod->mod_start + file_ofs + size > mod->mod_end) + return -1; + + memcpy(buf, (const char*) phystokv (mod->mod_start) + file_ofs, size); + *out_actual = size; + return 0; +} + +static int +read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size, + vm_offset_t mem_addr, vm_size_t mem_size, + exec_sectype_t sec_type) +{ + struct multiboot_module *mod = handle; + + vm_map_t user_map = current_task()->map; + vm_offset_t start_page, end_page; + vm_prot_t mem_prot = sec_type & EXEC_SECTYPE_PROT_MASK; + int err; + + if (mod->mod_start + file_ofs + file_size > mod->mod_end) + return -1; + + if (!(sec_type & EXEC_SECTYPE_ALLOC)) + return 0; + + assert(mem_size > 0); + assert(mem_size >= file_size); + + start_page = trunc_page(mem_addr); + end_page = round_page(mem_addr + mem_size); + +#if 0 + printf("reading bootstrap section %08x-%08x-%08x prot %d pages %08x-%08x\n", + mem_addr, mem_addr+file_size, mem_addr+mem_size, mem_prot, start_page, end_page); +#endif + + err = vm_allocate(user_map, &start_page, end_page - start_page, FALSE); + assert(err == 0); + assert(start_page == trunc_page(mem_addr)); + + if (file_size > 0) + { + err = copyout((char *)phystokv (mod->mod_start) + file_ofs, + (void *)mem_addr, file_size); + assert(err == 0); + } + + if (mem_prot != VM_PROT_ALL) + { + err = vm_protect(user_map, start_page, end_page - start_page, FALSE, mem_prot); + assert(err == 0); + } + + return 0; +} + +static void copy_bootstrap(void *e, exec_info_t *boot_exec_info) +{ + /* vm_map_t user_map = current_task()->map; */ + int err; + + if ((err = exec_load(boot_read, read_exec, e, boot_exec_info))) + panic("Cannot load user-bootstrap image: error code %d", err); + +#if MACH_KDB + /* + * Enter the bootstrap symbol table. + */ + +#if 0 /*XXX*/ + if (load_bootstrap_symbols) + (void) X_db_sym_init( + (char*) boot_start+lp->sym_offset, + (char*) boot_start+lp->sym_offset+lp->sym_size, + "bootstrap", + (char *) user_map); +#endif + +#if 0 /*XXX*/ + if (load_fault_in_text) + { + vm_offset_t lenp = round_page(lp->text_start+lp->text_size) - + trunc_page(lp->text_start); + vm_offset_t i = 0; + + while (i < lenp) + { + vm_fault(user_map, text_page_start +i, + load_protect_text ? + VM_PROT_READ|VM_PROT_EXECUTE : + VM_PROT_READ|VM_PROT_EXECUTE | VM_PROT_WRITE, + 0,0,0); + i = round_page (i+1); + } + } +#endif +#endif /* MACH_KDB */ +} + +/* + * Allocate the stack, and build the argument list. + */ +static void +build_args_and_stack(struct exec_info *boot_exec_info, + char **argv, char **envp) +{ + vm_offset_t stack_base; + vm_size_t stack_size; + char * arg_ptr; + long arg_count, envc; + int arg_len; + char * arg_pos; + int arg_item_len; + char * string_pos; + rpc_vm_offset_t zero = 0; + int i; + +#define STACK_SIZE (2*64*1024) + + /* + * Calculate the size of the argument list. + */ + arg_len = 0; + arg_count = 0; + while (argv[arg_count] != 0) { + arg_ptr = argv[arg_count++]; + arg_len += strlen(arg_ptr) + 1; + } + envc = 0; + if (envp != 0) + while (envp[envc] != 0) + arg_len += strlen (envp[envc++]) + 1; + + /* + * Add space for: + * arg count + * pointers to arguments + * trailing 0 pointer + * pointers to environment variables + * trailing 0 pointer + */ + arg_len += (sizeof(rpc_vm_offset_t) + + (arg_count + 1 + envc + 1) * sizeof(rpc_vm_offset_t)); + + /* + * Allocate the stack. + */ + stack_size = round_page(STACK_SIZE); + stack_base = user_stack_low(stack_size); + + (void) vm_allocate(current_task()->map, + &stack_base, + stack_size, + FALSE); + + arg_pos = (char *) + set_user_regs(stack_base, stack_size, boot_exec_info, arg_len); + + /* + * Start the strings after the arg-count and pointers + */ + string_pos = (arg_pos + + sizeof(rpc_vm_offset_t) + + (arg_count + 1 + envc + 1) * sizeof(rpc_vm_offset_t)); + + /* + * first the argument count + */ + (void) copyout(&arg_count, + arg_pos, + sizeof(rpc_vm_offset_t)); + arg_pos += sizeof(rpc_vm_offset_t); + + /* + * Then the strings and string pointers for each argument + */ + for (i = 0; i < arg_count; ++i) { + rpc_vm_offset_t pos = convert_vm_to_user((vm_offset_t) string_pos); + arg_ptr = argv[i]; + arg_item_len = strlen(arg_ptr) + 1; /* include trailing 0 */ + + /* set string pointer */ + (void) copyout(&pos, arg_pos, sizeof (rpc_vm_offset_t)); + arg_pos += sizeof(rpc_vm_offset_t); + + /* copy string */ + (void) copyout(arg_ptr, string_pos, arg_item_len); + string_pos += arg_item_len; + } + + /* + * Null terminator for argv. + */ + (void) copyout(&zero, arg_pos, sizeof(rpc_vm_offset_t)); + arg_pos += sizeof(rpc_vm_offset_t); + + /* + * Then the strings and string pointers for each environment variable + */ + for (i = 0; i < envc; ++i) { + rpc_vm_offset_t pos = convert_vm_to_user((vm_offset_t) string_pos); + arg_ptr = envp[i]; + arg_item_len = strlen(arg_ptr) + 1; /* include trailing 0 */ + + /* set string pointer */ + (void) copyout(&pos, arg_pos, sizeof (rpc_vm_offset_t)); + arg_pos += sizeof(rpc_vm_offset_t); + + /* copy string */ + (void) copyout(arg_ptr, string_pos, arg_item_len); + string_pos += arg_item_len; + } + + /* + * Null terminator for envp. + */ + (void) copyout(&zero, arg_pos, sizeof(rpc_vm_offset_t)); +} + + +static void +user_bootstrap_compat(void) +{ + exec_info_t boot_exec_info; + + char host_string[12]; + char device_string[12]; + char flag_string[1024]; + char root_string[1024]; + + /* + * Copy the bootstrap code from boot_exec into the user task. + */ + copy_bootstrap(current_thread()->saved.other, &boot_exec_info); + + /* + * Convert the host and device ports to strings, + * to put in the argument list. + */ + itoa(host_string, boot_host_port); + itoa(device_string, boot_device_port); + + /* + * Get the (compatibility) boot flags and root name strings. + */ + get_compat_strings(flag_string, root_string); + + /* + * Build the argument list and insert in the user task. + * Argument list is + * "bootstrap -<boothowto> <host_port> <device_port> <root_name>" + +$0 ${boot-args} ${host-port} ${device-port} ${root-device} $(task-create) $(task-resume) + + */ + { + char *argv[] = { "bootstrap", + flag_string, + host_string, + device_string, + root_string, + 0 }; + char *envp[] = { 0, 0 }; + if (kernel_cmdline[0] != '\0') + { + static const char cmdline_var[] = "MULTIBOOT_CMDLINE="; + envp[0] = alloca (sizeof cmdline_var + strlen (kernel_cmdline)); + memcpy (envp[0], cmdline_var, sizeof cmdline_var - 1); + strcpy (envp[0] + sizeof cmdline_var - 1, kernel_cmdline); + } + build_args_and_stack(&boot_exec_info, argv, envp); + } + + /* + * Exit to user thread. + */ + thread_bootstrap_return(); + /*NOTREACHED*/ +} + + +struct user_bootstrap_info +{ + struct multiboot_module *mod; + char **argv; + int done; + decl_simple_lock_data(,lock) +}; + +int +boot_script_exec_cmd (void *hook, task_t task, char *path, int argc, + char **argv, char *strings, int stringlen) +{ + struct multiboot_module *mod = hook; + + int err; + + if (task != MACH_PORT_NULL) + { + thread_t thread; + struct user_bootstrap_info info = { mod, argv, 0, }; + simple_lock_init (&info.lock); + + err = thread_create ((task_t)task, &thread); + assert(err == 0); + simple_lock (&info.lock); + thread->saved.other = &info; + thread_start (thread, user_bootstrap); + err = thread_resume (thread); + assert(err == 0); + + /* We need to synchronize with the new thread and block this + main thread until it has finished referring to our local state. */ + while (! info.done) + { + thread_sleep ((event_t) &info, simple_lock_addr(info.lock), FALSE); + simple_lock (&info.lock); + } + simple_unlock (&info.lock); + thread_deallocate (thread); + printf ("\n"); + } + + return 0; +} + +static void user_bootstrap(void) +{ + struct user_bootstrap_info *info = current_thread()->saved.other; + exec_info_t boot_exec_info; + int err; + char **av; + + /* Load this task up from the executable file in the module. */ + err = exec_load(boot_read, read_exec, info->mod, &boot_exec_info); + if (err) + panic ("Cannot load user executable module (error code %d): %s", + err, info->argv[0]); + + printf ("task loaded:"); + + /* Set up the stack with arguments. */ + build_args_and_stack(&boot_exec_info, info->argv, 0); + + for (av = info->argv; *av != 0; ++av) + printf (" %s", *av); + + task_suspend (current_task()); + + /* Tell the bootstrap thread running boot_script_exec_cmd + that we are done looking at INFO. */ + simple_lock (&info->lock); + assert (!info->done); + info->done = 1; + simple_unlock (&info->lock); + thread_wakeup ((event_t) info); + + /* + * Exit to user thread. + */ + thread_bootstrap_return(); + /*NOTREACHED*/ +} + + + +void * +boot_script_malloc (unsigned int size) +{ + return (void *) kalloc (size); +} + +void +boot_script_free (void *ptr, unsigned int size) +{ + kfree ((vm_offset_t)ptr, size); +} + +int +boot_script_task_create (struct cmd *cmd) +{ + kern_return_t rc = task_create_kernel(TASK_NULL, FALSE, &cmd->task); + if (rc) + { + printf("boot_script_task_create failed with %x\n", rc); + return BOOT_SCRIPT_MACH_ERROR; + } + task_set_name(cmd->task, cmd->path); + return 0; +} + +int +boot_script_task_resume (struct cmd *cmd) +{ + kern_return_t rc = task_resume (cmd->task); + if (rc) + { + printf("boot_script_task_resume failed with %x\n", rc); + return BOOT_SCRIPT_MACH_ERROR; + } + printf ("\nstart %s: ", cmd->path); + return 0; +} + +int +boot_script_prompt_task_resume (struct cmd *cmd) +{ +#if ! MACH_KDB + char xx[5]; +#endif + + printf ("Pausing for %s...\n", cmd->path); + +#if ! MACH_KDB + printf ("Hit <return> to resume bootstrap."); + safe_gets (xx, sizeof xx); +#else + SoftDebugger("Hit `c<return>' to resume bootstrap."); +#endif + + return boot_script_task_resume (cmd); +} + +void +boot_script_free_task (task_t task, int aborting) +{ + if (aborting) + task_terminate (task); + task_deallocate (task); +} + +int +boot_script_insert_right (struct cmd *cmd, mach_port_t port, mach_port_name_t *name) +{ + *name = task_insert_send_right (cmd->task, + ipc_port_make_send((ipc_port_t) port)); + return 0; +} + +int +boot_script_insert_task_port (struct cmd *cmd, task_t task, mach_port_name_t *name) +{ + *name = task_insert_send_right (cmd->task, + ipc_port_make_send(task->itk_sself)); + return 0; +} diff --git a/kern/bootstrap.h b/kern/bootstrap.h new file mode 100644 index 0000000..309a63f --- /dev/null +++ b/kern/bootstrap.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2013 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_BOOTSTRAP_H_ +#define _KERN_BOOTSTRAP_H_ + +#include <kern/boot_script.h> + +void bootstrap_create(void); + +#endif /* _KERN_BOOTSTRAP_H_ */ diff --git a/kern/counters.c b/kern/counters.c new file mode 100644 index 0000000..0a0665b --- /dev/null +++ b/kern/counters.c @@ -0,0 +1,82 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <kern/counters.h> + +/* + * We explicitly initialize the counters to make + * them contiguous in the kernel's data space. + * This makes them easier to examine with ddb. + */ + +#if MACH_COUNTERS +mach_counter_t c_thread_invoke_hits = 0; +mach_counter_t c_thread_invoke_misses = 0; +mach_counter_t c_thread_invoke_csw = 0; +mach_counter_t c_thread_handoff_hits = 0; +mach_counter_t c_thread_handoff_misses = 0; +mach_counter_t c_threads_current = 0; +mach_counter_t c_threads_max = 0; +mach_counter_t c_threads_min = 0; +mach_counter_t c_threads_total = 0; +mach_counter_t c_stacks_current = 0; +mach_counter_t c_stacks_max = 0; +mach_counter_t c_stacks_min = 0; +mach_counter_t c_stacks_total = 0; +mach_counter_t c_stack_alloc_hits = 0; +mach_counter_t c_stack_alloc_misses = 0; +mach_counter_t c_stack_alloc_max = 0; +mach_counter_t c_clock_ticks = 0; +mach_counter_t c_ipc_mqueue_send_block = 0; +mach_counter_t c_ipc_mqueue_receive_block_user = 0; +mach_counter_t c_ipc_mqueue_receive_block_kernel = 0; +mach_counter_t c_mach_msg_trap_block_fast = 0; +mach_counter_t c_mach_msg_trap_block_slow = 0; +mach_counter_t c_mach_msg_trap_block_exc = 0; +mach_counter_t c_exception_raise_block = 0; +mach_counter_t c_swtch_block = 0; +mach_counter_t c_swtch_pri_block = 0; +mach_counter_t c_thread_switch_block = 0; +mach_counter_t c_thread_switch_handoff = 0; +mach_counter_t c_ast_taken_block = 0; +mach_counter_t c_thread_halt_self_block = 0; +mach_counter_t c_vm_fault_page_block_busy_user = 0; +mach_counter_t c_vm_fault_page_block_busy_kernel = 0; +mach_counter_t c_vm_fault_page_block_backoff_user = 0; +mach_counter_t c_vm_fault_page_block_backoff_kernel = 0; +mach_counter_t c_vm_page_wait_block_user = 0; +mach_counter_t c_vm_page_wait_block_kernel = 0; +mach_counter_t c_vm_pageout_block = 0; +mach_counter_t c_vm_pageout_scan_block = 0; +mach_counter_t c_idle_thread_block = 0; +mach_counter_t c_idle_thread_handoff = 0; +mach_counter_t c_sched_thread_block = 0; +mach_counter_t c_io_done_thread_block = 0; +mach_counter_t c_net_thread_block = 0; +mach_counter_t c_reaper_thread_block = 0; +mach_counter_t c_swapin_thread_block = 0; +mach_counter_t c_action_thread_block = 0; +#endif /* MACH_COUNTERS */ diff --git a/kern/counters.h b/kern/counters.h new file mode 100644 index 0000000..aa1e739 --- /dev/null +++ b/kern/counters.h @@ -0,0 +1,107 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_COUNTERS_ +#define _KERN_COUNTERS_ + +/* + * We can count various interesting events and paths. + * + * Use counter() to change the counters, eg: + * counter(c_idle_thread_block++); + * Use counter_always() for non-conditional counters. + */ + +#define counter_always(code) code + +#if MACH_COUNTERS + +#define counter(code) counter_always(code) + +#else /* MACH_COUNTERS */ + +#define counter(code) + +#endif /* MACH_COUNTERS */ + +/* + * We define the counters with individual integers, + * instead of a big structure, so that ddb + * will know the addresses of the counters. + */ + +typedef unsigned int mach_counter_t; + +#if MACH_COUNTERS +extern mach_counter_t c_thread_invoke_hits; +extern mach_counter_t c_thread_invoke_misses; +extern mach_counter_t c_thread_invoke_csw; +extern mach_counter_t c_thread_handoff_hits; +extern mach_counter_t c_thread_handoff_misses; +extern mach_counter_t c_threads_current; +extern mach_counter_t c_threads_max; +extern mach_counter_t c_threads_min; +extern mach_counter_t c_threads_total; +extern mach_counter_t c_stacks_current; +extern mach_counter_t c_stacks_max; +extern mach_counter_t c_stacks_min; +extern mach_counter_t c_stacks_total; +extern mach_counter_t c_stack_alloc_hits; +extern mach_counter_t c_stack_alloc_misses; +extern mach_counter_t c_stack_alloc_max; +extern mach_counter_t c_clock_ticks; +extern mach_counter_t c_ipc_mqueue_send_block; +extern mach_counter_t c_ipc_mqueue_receive_block_user; +extern mach_counter_t c_ipc_mqueue_receive_block_kernel; +extern mach_counter_t c_mach_msg_trap_block_fast; +extern mach_counter_t c_mach_msg_trap_block_slow; +extern mach_counter_t c_mach_msg_trap_block_exc; +extern mach_counter_t c_exception_raise_block; +extern mach_counter_t c_swtch_block; +extern mach_counter_t c_swtch_pri_block; +extern mach_counter_t c_thread_switch_block; +extern mach_counter_t c_thread_switch_handoff; +extern mach_counter_t c_ast_taken_block; +extern mach_counter_t c_thread_halt_self_block; +extern mach_counter_t c_vm_fault_page_block_busy_user; +extern mach_counter_t c_vm_fault_page_block_busy_kernel; +extern mach_counter_t c_vm_fault_page_block_backoff_user; +extern mach_counter_t c_vm_fault_page_block_backoff_kernel; +extern mach_counter_t c_vm_page_wait_block_user; +extern mach_counter_t c_vm_page_wait_block_kernel; +extern mach_counter_t c_vm_pageout_block; +extern mach_counter_t c_vm_pageout_scan_block; +extern mach_counter_t c_idle_thread_block; +extern mach_counter_t c_idle_thread_handoff; +extern mach_counter_t c_sched_thread_block; +extern mach_counter_t c_io_done_thread_block; +extern mach_counter_t c_net_thread_block; +extern mach_counter_t c_reaper_thread_block; +extern mach_counter_t c_swapin_thread_block; +extern mach_counter_t c_action_thread_block; +#endif /* MACH_COUNTERS */ + +#endif /* _KERN_COUNTERS_ */ diff --git a/kern/cpu_number.h b/kern/cpu_number.h new file mode 100644 index 0000000..1abe3db --- /dev/null +++ b/kern/cpu_number.h @@ -0,0 +1,47 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_CPU_NUMBER_H_ +#define _KERN_CPU_NUMBER_H_ + +#include <machine/cpu_number.h> + +/* + * Definitions for cpu identification in multi-processors. + */ + +extern int master_cpu; /* 'master' processor - keeps time */ + +#if (NCPUS == 1) + /* cpu number is always 0 on a single processor system */ +#define cpu_number() (0) +#define cpu_number_slow() (0) + +#endif /* NCPUS == 1 */ + +#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) + +#endif /* _KERN_CPU_NUMBER_H_ */ diff --git a/kern/debug.c b/kern/debug.c new file mode 100644 index 0000000..eec2f14 --- /dev/null +++ b/kern/debug.c @@ -0,0 +1,207 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/xen.h> + +#include <kern/printf.h> +#include <stdarg.h> + +#include "cpu_number.h" +#include <kern/lock.h> +#include <kern/thread.h> + +#include <kern/debug.h> + +#include <machine/loose_ends.h> +#include <machine/model_dep.h> + +#include <device/cons.h> + +#if NCPUS>1 +simple_lock_irq_data_t Assert_print_lock; /* uninited, we take our chances */ +#endif + +static void +do_cnputc(char c, vm_offset_t offset) +{ + cnputc(c); +} + +void +Assert(const char *exp, const char *file, int line, const char *fun) +{ +#if NCPUS > 1 + spl_t s = simple_lock_irq(&Assert_print_lock); + printf("{cpu%d} %s:%d: %s: Assertion `%s' failed.", + cpu_number(), file, line, fun, exp); + simple_unlock_irq(s, &Assert_print_lock); +#else + printf("%s:%d: %s: Assertion `%s' failed.", + file, line, fun, exp); +#endif + + Debugger("assertion failure"); +} + +void SoftDebugger(const char *message) +{ + printf("Debugger invoked: %s\n", message); + +#if !MACH_KDB + printf("But no debugger, continuing.\n"); + return; +#endif + +#if defined(vax) || defined(PC532) + asm("bpt"); +#endif /* vax */ + +#ifdef sun3 + current_thread()->pcb->flag |= TRACE_KDB; + asm("orw #0x00008000,sr"); +#endif /* sun3 */ +#ifdef sun4 + current_thread()->pcb->pcb_flag |= TRACE_KDB; + asm("ta 0x81"); +#endif /* sun4 */ + +#if defined(mips ) || defined(i860) || defined(alpha) + gimmeabreak(); +#endif + +#if defined(__i386__) || defined(__x86_64__) + asm("int3"); +#endif +} + +void Debugger(const char *message) +{ +#if !MACH_KDB + panic("Debugger invoked, but there isn't one!"); +#endif + + SoftDebugger(message); + + panic("Debugger returned!"); +} + +/* Be prepared to panic anytime, + even before panic_init() gets called from the "normal" place in kern/startup.c. + (panic_init() still needs to be called from there + to make sure we get initialized before starting multiple processors.) */ +def_simple_lock_irq_data(static, panic_lock) + +const char *panicstr; +int paniccpu; + +void +panic_init(void) +{ +} + +#if ! MACH_KBD +extern boolean_t reboot_on_panic; +#endif + +/*VARARGS1*/ +void +Panic(const char *file, int line, const char *fun, const char *s, ...) +{ + va_list listp; + spl_t spl; + + panic_init(); + + spl = simple_lock_irq(&panic_lock); + if (panicstr) { + if (cpu_number() != paniccpu) { + simple_unlock_irq(spl, &panic_lock); + halt_cpu(); + /* NOTREACHED */ + } + } + else { + panicstr = s; + paniccpu = cpu_number(); + } + simple_unlock_irq(spl, &panic_lock); + printf("panic "); +#if NCPUS > 1 + printf("{cpu%d} ", paniccpu); +#endif + printf("%s:%d: %s: ",file, line, fun); + va_start(listp, s); + _doprnt(s, listp, do_cnputc, 16, 0); + va_end(listp); + printf("\n"); + +#if MACH_KDB + Debugger("panic"); +#else +# ifdef MACH_HYP + hyp_crash(); +# else + /* Give the user time to see the message */ + { + int i = 1000; /* seconds */ + while (i--) + delay (1000000); /* microseconds */ + } + + halt_all_cpus (reboot_on_panic); +# endif /* MACH_HYP */ +#endif +} + +/* + * We'd like to use BSD's log routines here... + */ +/*VARARGS2*/ +void +log(int level, const char *fmt, ...) +{ + va_list listp; + + va_start(listp, fmt); + _doprnt(fmt, listp, do_cnputc, 16, 0); + va_end(listp); +} + +/* GCC references this for stack protection. */ +unsigned char __stack_chk_guard [ sizeof (vm_offset_t) ] = +{ + [ sizeof (vm_offset_t) - 3 ] = '\r', + [ sizeof (vm_offset_t) - 2 ] = '\n', + [ sizeof (vm_offset_t) - 1 ] = 0xff, +}; + +void __stack_chk_fail (void); + +void +__stack_chk_fail (void) +{ + panic("stack smashing detected"); +} diff --git a/kern/debug.h b/kern/debug.h new file mode 100644 index 0000000..1a5cd07 --- /dev/null +++ b/kern/debug.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: debug.h + * Author: Bryan Ford + * + * This file contains definitions for kernel debugging, + * which are compiled in on the DEBUG symbol. + * + */ +#ifndef _mach_debug__debug_ +#define _mach_debug__debug_ + +#include <kern/assert.h> /*XXX*/ + +#ifndef NDEBUG + +#define here() printf("@ %s:%d\n", __FILE__, __LINE__) +#define message(args) ({ printf("@ %s:%d: ", __FILE__, __LINE__); printf args; printf("\n"); }) + +#define otsan() panic("%s:%d: off the straight and narrow!", __FILE__, __LINE__) + +#define struct_id_decl unsigned struct_id; +#define struct_id_init(p,id) ((p)->struct_id = (id)) +#define struct_id_denit(p) ((p)->struct_id = 0) +#define struct_id_verify(p,id) \ + ({ if ((p)->struct_id != (id)) \ + panic("%s:%d: "#p" (%08x) struct_id should be "#id" (%08x), is %08x\n", \ + __FILE__, __LINE__, (p), (id), (p->struct_id)); \ + }) + +#else /* NDEBUG */ + +#define otsan() + +#define struct_id_decl +#define struct_id_init(p,id) +#define struct_id_denit(p) +#define struct_id_verify(p,id) + +#endif /* NDEBUG */ + +extern void log (int level, const char *fmt, ...); + +extern void panic_init(void); +extern void Panic (const char *file, int line, const char *fun, + const char *s, ...) + __attribute__ ((noreturn, format (printf, 4, 5))); +#define panic(s, ...) \ + Panic (__FILE__, __LINE__, __FUNCTION__, s, ##__VA_ARGS__) + +extern void SoftDebugger (const char *message); +extern void Debugger (const char *message) __attribute__ ((noreturn)); + +#endif /* _mach_debug__debug_ */ diff --git a/kern/elf-load.c b/kern/elf-load.c new file mode 100644 index 0000000..ce86327 --- /dev/null +++ b/kern/elf-load.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 1995, 1994, 1993, 1992, 1991, 1990 + * Open Software Foundation, Inc. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of ("OSF") or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL OSF BE LIABLE FOR ANY + * SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE + */ +/* + * OSF Research Institute MK6.1 (unencumbered) 1/31/1995 + */ + +#include <alloca.h> +#include <mach/machine/vm_types.h> +#include <mach/exec/elf.h> +#include <mach/exec/exec.h> + +int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec, + void *handle, exec_info_t *out_info) +{ + vm_size_t actual; + Elf_Ehdr x; + Elf_Phdr *phdr, *ph; + vm_size_t phsize; + int i; + int result; + vm_offset_t loadbase = 0; + + /* Read the ELF header. */ + if ((result = (*read)(handle, 0, &x, sizeof(x), &actual)) != 0) + return result; + if (actual < sizeof(x)) + return EX_NOT_EXECUTABLE; + + if ((x.e_ident[EI_MAG0] != ELFMAG0) || + (x.e_ident[EI_MAG1] != ELFMAG1) || + (x.e_ident[EI_MAG2] != ELFMAG2) || + (x.e_ident[EI_MAG3] != ELFMAG3)) + return EX_NOT_EXECUTABLE; + + /* Make sure the file is of the right architecture. */ + if ((x.e_ident[EI_CLASS] != MY_ELF_CLASS) || + (x.e_ident[EI_DATA] != MY_EI_DATA) || + (x.e_machine != MY_E_MACHINE)) + return EX_WRONG_ARCH; + + /* Leave room for mmaps etc. before PIE binaries. + * Could add address randomization here. */ + if (x.e_type == ET_DYN || x.e_type == ET_REL) + loadbase = 128 << 20; + + /* XXX others */ + out_info->entry = (vm_offset_t) x.e_entry + loadbase; + + phsize = x.e_phnum * x.e_phentsize; + phdr = (Elf_Phdr *)alloca(phsize); + + result = (*read)(handle, x.e_phoff, phdr, phsize, &actual); + if (result) + return result; + if (actual < phsize) + return EX_CORRUPT; + + for (i = 0; i < x.e_phnum; i++) + { + ph = (Elf_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize); + if (ph->p_type == PT_LOAD) + { + exec_sectype_t type = EXEC_SECTYPE_ALLOC | + EXEC_SECTYPE_LOAD; + if (ph->p_flags & PF_R) type |= EXEC_SECTYPE_READ; + if (ph->p_flags & PF_W) type |= EXEC_SECTYPE_WRITE; + if (ph->p_flags & PF_X) type |= EXEC_SECTYPE_EXECUTE; + + result = (*read_exec)(handle, + ph->p_offset, ph->p_filesz, + ph->p_vaddr + loadbase, ph->p_memsz, type); + if (result) + return result; + } + } + + return 0; +} + diff --git a/kern/eventcount.c b/kern/eventcount.c new file mode 100644 index 0000000..1cbc15a --- /dev/null +++ b/kern/eventcount.c @@ -0,0 +1,361 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: eventcount.c + * Author: Alessandro Forin + * Date: 10/91 + * + * Eventcounters, for user-level drivers synchronization + * + */ + +#include <kern/printf.h> +#include <string.h> + +#include <mach/machine.h> +#include <kern/ast.h> +#include <kern/debug.h> +#include "cpu_number.h" +#include <kern/lock.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> + +#include <machine/machspl.h> /* For def'n of splsched() */ + +#include <kern/eventcount.h> + +#define MAX_EVCS 10 /* xxx for now */ +evc_t all_eventcounters[MAX_EVCS]; + +/* + * Initialization + */ +void +evc_init(evc_t ev) +{ + int i; + + memset(ev, 0, sizeof(*ev)); + + /* keep track of who is who */ + for (i = 0; i < MAX_EVCS; i++) + if (all_eventcounters[i] == 0) break; + if (i == MAX_EVCS) { + printf("Too many eventcounters\n"); + return; + } + + all_eventcounters[i] = ev; + ev->ev_id = i; + ev->sanity = ev; + ev->waiting_thread = THREAD_NULL; + simple_lock_init(&ev->lock); +} + +/* + * Finalization + */ +void +evc_destroy(evc_t ev) +{ + evc_signal(ev); + ev->sanity = 0; + if (all_eventcounters[ev->ev_id] == ev) + all_eventcounters[ev->ev_id] = 0; + ev->ev_id = -1; +} + +/* + * Thread termination. + * HORRIBLE. This stuff needs to be fixed. + */ +void evc_notify_abort(const thread_t thread) +{ + int i; + evc_t ev; + int s = splsched(); + for (i = 0; i < MAX_EVCS; i++) { + ev = all_eventcounters[i]; + if (ev) { + simple_lock(&ev->lock); + if (ev->waiting_thread == thread) + { + ev->waiting_thread = 0; + /* Removal of a waiting thread has to bump the count by one */ + ev->count++; + } + simple_unlock(&ev->lock); + } + } + splx(s); +} + +/* + * Just so that we return success, and give + * up the stack while blocked + */ +static void __attribute__((noreturn)) +evc_continue(void) +{ + thread_syscall_return(KERN_SUCCESS); + /* NOTREACHED */ +} + +/* + * User-trappable + */ +kern_return_t evc_wait(natural_t ev_id) +{ + spl_t s; + kern_return_t ret; + evc_t ev; + + if ((ev_id >= MAX_EVCS) || + ((ev = all_eventcounters[ev_id]) == 0) || + (ev->ev_id != ev_id) || (ev->sanity != ev)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + simple_lock(&ev->lock); + /* + * The values assumed by the "count" field are + * as follows: + * 0 At initialization time, and with no + * waiting thread means no events pending; + * with waiting thread means the event + * was signalled and the thread not yet resumed + * -1 no events, there must be a waiting thread + * N>0 no waiting thread means N pending, + * with waiting thread N-1 pending. + * + */ + if (ev->count > 0) { + ev->count--; + ret = KERN_SUCCESS; + } else { + if (ev->waiting_thread == THREAD_NULL) { + ev->count--; + ev->waiting_thread = current_thread(); + assert_wait((event_t) 0, TRUE); /* ifnot race */ + simple_unlock(&ev->lock); + thread_block(evc_continue); + return KERN_SUCCESS; + } + ret = KERN_NO_SPACE; /* XX */ + } + simple_unlock(&ev->lock); + splx(s); + return ret; +} + +/* + * User-trappable + */ +kern_return_t evc_wait_clear(natural_t ev_id) +{ + spl_t s; + evc_t ev; + + if ((ev_id >= MAX_EVCS) || + ((ev = all_eventcounters[ev_id]) == 0) || + (ev->ev_id != ev_id) || (ev->sanity != ev)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + simple_lock(&ev->lock); + + /* + * The values assumed by the "count" field are + * as follows: + * 0 At initialization time, and with no + * waiting thread means no events pending; + * with waiting thread means the event + * was signalled and the thread not yet resumed + * -1 no events, there must be a waiting thread + * N>0 no waiting thread means N pending, + * with waiting thread N-1 pending. + * + */ + /* + * Note that we always clear count before blocking. + */ + if (ev->waiting_thread == THREAD_NULL) { + ev->count = -1; + ev->waiting_thread = current_thread(); + assert_wait((event_t) 0, TRUE); /* ifnot race */ + simple_unlock(&ev->lock); + thread_block(evc_continue); + /* NOTREACHED */ + } + + simple_unlock(&ev->lock); + splx(s); + return KERN_NO_SPACE; /* XX */ +} + +/* + * Called exclusively from interrupt context + */ +void +evc_signal(evc_t ev) +{ + volatile thread_t thread; + int state; + spl_t s; + if (ev->sanity != ev) + return; + + s = splsched(); + simple_lock(&ev->lock); + ev->count++; + if (thread = ev->waiting_thread, thread != THREAD_NULL) + { + ev->waiting_thread = 0; + +#if (NCPUS > 1) + retry: + while((thread->state & TH_RUN) || thread->lock.lock_data) + cpu_pause(); +#endif + thread_lock(thread); + + /* make thread runnable on this processor */ + /* taken from clear_wait */ + switch ((state = thread->state) & TH_SCHED_STATE) + { + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; +#if NCPUS > 1 + thread_setrun(thread, TRUE); +#else + simpler_thread_setrun(thread, TRUE); +#endif + thread_unlock(thread); + break; + + case TH_RUN | TH_WAIT: +#if (NCPUS > 1) + /* + * Legal on MP: between assert_wait() + * and thread_block(), in evc_wait() above. + * + * Mmm. Maybe don't need now that the while(..) check is + * done before the thread lock is grabbed..... + */ + thread_unlock(thread); + goto retry; +#else + /*FALLTHROUGH*/ +#endif + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + + /* + * Either already running, or suspended. + * Just clear the wait. + */ + thread->state = state &~ TH_WAIT; + thread_unlock(thread); + break; + + default: + /* + * Not waiting. + */ + panic("evc_signal.3"); + thread_unlock(thread); + break; + } + } + + simple_unlock(&ev->lock); + splx(s); +} + +#if NCPUS <= 1 +/* + * The scheduler is too messy for my old little brain + */ +void +simpler_thread_setrun( + thread_t th, + boolean_t may_preempt) +{ + struct run_queue *rq; + int whichq; + + /* + * XXX should replace queue with a boolean in this case. + */ + if (default_pset.idle_count > 0) { + processor_t processor; + + processor = (processor_t) queue_first(&default_pset.idle_queue); + queue_remove(&default_pset.idle_queue, processor, + processor_t, processor_queue); + default_pset.idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + return; + } + rq = &(master_processor->runq); + ast_on(cpu_number(), AST_BLOCK); + + whichq = (th)->sched_pri; + runq_lock(rq); /* lock the run queue */ + enqueue_head(&(rq)->runq[whichq], &((th)->links)); + + if (whichq < (rq)->low || (rq)->count == 0) + (rq)->low = whichq; /* minimize */ + (rq)->count++; +#ifdef MIGRATING_THREADS + (th)->shuttle.runq = (rq); +#else + (th)->runq = (rq); +#endif + runq_unlock(rq); + + /* + * Turn off first_quantum to allow context switch. + */ + current_processor()->first_quantum = FALSE; +} +#endif /* NCPUS > 1 */ + diff --git a/kern/eventcount.h b/kern/eventcount.h new file mode 100644 index 0000000..598d7e0 --- /dev/null +++ b/kern/eventcount.h @@ -0,0 +1,66 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: eventcount.c + * Author: Alessandro Forin + * Date: 10/91 + * + * Eventcounters, for user-level drivers synchronization + * + */ + +#ifndef _KERN_EVENTCOUNT_H_ +#define _KERN_EVENTCOUNT_H_ 1 + +#include <kern/lock.h> + +/* kernel visible only */ + +typedef struct evc { + int count; + thread_t waiting_thread; + natural_t ev_id; + struct evc *sanity; + decl_simple_lock_data(, lock) +} *evc_t; + +extern void evc_init(evc_t ev), + evc_destroy(evc_t ev), + evc_signal(evc_t ev), + evc_notify_abort(thread_t thread); + +/* kernel and user visible */ + +extern kern_return_t evc_wait(natural_t ev_id); +extern kern_return_t evc_wait_clear(natural_t ev_id); + +#if NCPUS <= 1 +void simpler_thread_setrun( + thread_t th, + boolean_t may_preempt); +#endif + +#endif /* _KERN_EVENTCOUNT_H_ */ diff --git a/kern/exc.defs b/kern/exc.defs new file mode 100644 index 0000000..e614fff --- /dev/null +++ b/kern/exc.defs @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2016 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* We use custom functions to send exceptions. These functions can + be found in `exception.c'. We use this file merely to produce the + list of message ids. */ + +#include <mach/exc.defs> diff --git a/kern/exception.c b/kern/exception.c new file mode 100644 index 0000000..15f2970 --- /dev/null +++ b/kern/exception.c @@ -0,0 +1,1023 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990,1989,1988,1987 Carnegie Mellon University. + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/message.h> +#include <mach/port.h> +#include <mach/mig_errors.h> +#include <machine/locore.h> +#include <ipc/port.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_notify.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_pset.h> +#include <ipc/mach_msg.h> +#include <ipc/ipc_machdep.h> +#include <kern/counters.h> +#include <kern/debug.h> +#include <kern/ipc_tt.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/processor.h> +#include <kern/printf.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/exception.h> +#include <kern/macros.h> +#include <mach/machine/vm_types.h> + +#if MACH_KDB +#include <machine/trap.h> +#include <ddb/db_output.h> + +boolean_t debug_user_with_kdb = FALSE; +#endif /* MACH_KDB */ + +#ifdef KEEP_STACKS +/* + * Some obsolete architectures don't support kernel stack discarding + * or the thread_exception_return, thread_syscall_return continuations. + * For these architectures, the NOTREACHED comments below are incorrect. + * The exception function is expected to return. + * So the return statements along the slow paths are important. + */ +#endif /* KEEP_STACKS */ + +/* + * Routine: exception + * Purpose: + * The current thread caught an exception. + * We make an up-call to the thread's exception server. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_exception_return and thread_kdb_return + * are possible. + * Returns: + * Doesn't return. + */ + +void +exception( + integer_t _exception, + integer_t code, + long_integer_t subcode) +{ + ipc_thread_t self = current_thread(); + ipc_port_t exc_port; + + if (_exception == KERN_SUCCESS) + panic("exception"); + + /* + * Optimized version of retrieve_thread_exception. + */ + + ith_lock(self); + assert(self->ith_self != IP_NULL); + exc_port = self->ith_exception; + if (!IP_VALID(exc_port)) { + ith_unlock(self); + exception_try_task(_exception, code, subcode); + /*NOTREACHED*/ + } + + ip_lock(exc_port); + ith_unlock(self); + if (!ip_active(exc_port)) { + ip_unlock(exc_port); + exception_try_task(_exception, code, subcode); + /*NOTREACHED*/ + } + + /* + * Make a naked send right for the exception port. + */ + + ip_reference(exc_port); + exc_port->ip_srights++; + ip_unlock(exc_port); + + /* + * If this exception port doesn't work, + * we will want to try the task's exception port. + * Indicate this by saving the exception state. + */ + + self->ith_exc = _exception; + self->ith_exc_code = code; + self->ith_exc_subcode = subcode; + + exception_raise(exc_port, + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), + _exception, code, subcode); + /*NOTREACHED*/ +} + +/* + * Routine: exception_try_task + * Purpose: + * The current thread caught an exception. + * We make an up-call to the task's exception server. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_exception_return and thread_kdb_return + * are possible. + * Returns: + * Doesn't return. + */ + +void +exception_try_task( + integer_t _exception, + integer_t code, + long_integer_t subcode) +{ + ipc_thread_t self = current_thread(); + task_t task = self->task; + ipc_port_t exc_port; + + /* + * Optimized version of retrieve_task_exception. + */ + + itk_lock(task); + assert(task->itk_self != IP_NULL); + exc_port = task->itk_exception; + if (!IP_VALID(exc_port)) { + itk_unlock(task); + exception_no_server(); + /*NOTREACHED*/ + } + + ip_lock(exc_port); + itk_unlock(task); + if (!ip_active(exc_port)) { + ip_unlock(exc_port); + exception_no_server(); + /*NOTREACHED*/ + } + + /* + * Make a naked send right for the exception port. + */ + + ip_reference(exc_port); + exc_port->ip_srights++; + ip_unlock(exc_port); + + /* + * This is the thread's last chance. + * Clear the saved exception state. + */ + + self->ith_exc = KERN_SUCCESS; + + exception_raise(exc_port, + retrieve_thread_self_fast(self), + retrieve_task_self_fast(task), + _exception, code, subcode); + /*NOTREACHED*/ +} + +/* + * Routine: exception_no_server + * Purpose: + * The current thread took an exception, + * and no exception server took responsibility + * for the exception. So good bye, charlie. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_kdb_return is possible. + * Returns: + * Doesn't return. + */ + +void +exception_no_server(void) +{ + ipc_thread_t self = current_thread(); + + /* + * If this thread is being terminated, cooperate. + */ + + while (thread_should_halt(self)) + thread_halt_self(thread_exception_return); + + +#if 0 + if (thread_suspend (self) == KERN_SUCCESS) + thread_exception_return (); +#endif + +#if MACH_KDB + if (debug_user_with_kdb) { + /* + * Debug the exception with kdb. + * If kdb handles the exception, + * then thread_kdb_return won't return. + */ + + db_printf("No exception server, calling kdb...\n"); + thread_kdb_return(); + } +#endif /* MACH_KDB */ + + /* + * All else failed; terminate task. + */ + + (void) task_terminate(self->task); + thread_halt_self(thread_exception_return); + panic("terminating the task didn't kill us"); + /*NOTREACHED*/ +} + +#define MACH_EXCEPTION_ID 2400 /* from mach/exc.defs */ +#define MACH_EXCEPTION_REPLY_ID (MACH_EXCEPTION_ID + 100) + +struct mach_exception { + mach_msg_header_t Head; + mach_msg_type_t threadType; + mach_port_t thread; + mach_msg_type_t taskType; + mach_port_t task; + mach_msg_type_t exceptionType; + integer_t exception; + mach_msg_type_t codeType; + integer_t code; + mach_msg_type_t subcodeType; + rpc_long_integer_t subcode; +}; + +#define INTEGER_T_SIZE_IN_BITS (8 * sizeof(integer_t)) +#define INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_T +#define RPC_LONG_INTEGER_T_SIZE_IN_BITS (8 * sizeof(rpc_long_integer_t)) +#if defined(__x86_64__) && !defined(USER32) +#define RPC_LONG_INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_64 +#else +#define RPC_LONG_INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_32 +#endif + /* in mach/machine/vm_types.h */ + +mach_msg_type_t exc_port_proto = { + .msgt_name = MACH_MSG_TYPE_PORT_SEND, + .msgt_size = PORT_T_SIZE_IN_BITS, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 +}; + +mach_msg_type_t exc_code_proto = { + .msgt_name = INTEGER_T_TYPE, + .msgt_size = INTEGER_T_SIZE_IN_BITS, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 +}; + +mach_msg_type_t exc_subcode_proto = { + .msgt_name = RPC_LONG_INTEGER_T_TYPE, + .msgt_size = RPC_LONG_INTEGER_T_SIZE_IN_BITS, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 +}; + +/* + * Routine: exception_raise + * Purpose: + * Make an exception_raise up-call to an exception server. + * + * dest_port must be a valid naked send right. + * thread_port and task_port are naked send rights. + * All three are always consumed. + * + * self->ith_exc, self->ith_exc_code, self->ith_exc_subcode + * must be appropriately initialized. + * Conditions: + * Nothing locked. We are being called in an exception context, + * so thread_exception_return may be called. + * Returns: + * Doesn't return. + */ + +int exception_raise_misses = 0; + +void +exception_raise( + ipc_port_t dest_port, + ipc_port_t thread_port, + ipc_port_t task_port, + integer_t _exception, + integer_t code, + long_integer_t subcode) +{ + ipc_thread_t self = current_thread(); + ipc_thread_t receiver; + ipc_port_t reply_port; + ipc_mqueue_t dest_mqueue; + ipc_mqueue_t reply_mqueue; + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + assert(IP_VALID(dest_port)); + + /* + * We will eventually need a message buffer. + * Grab the buffer now, while nothing is locked. + * This buffer will get handed to the exception server, + * and it will give the buffer back with its reply. + */ + + kmsg = ikm_cache_alloc(); + if (kmsg == IKM_NULL) + panic("exception_raise"); + + /* + * We need a reply port for the RPC. + * Check first for a cached port. + */ + + ith_lock(self); + assert(self->ith_self != IP_NULL); + + reply_port = self->ith_rpc_reply; + if (reply_port == IP_NULL) { + ith_unlock(self); + reply_port = ipc_port_alloc_reply(); + ith_lock(self); + if ((reply_port == IP_NULL) || + (self->ith_rpc_reply != IP_NULL)) + panic("exception_raise"); + self->ith_rpc_reply = reply_port; + } + + ip_lock(reply_port); + assert(ip_active(reply_port)); + ith_unlock(self); + + /* + * Make a naked send-once right for the reply port, + * to hand to the exception server. + * Make an extra reference for the reply port, + * to receive on. This protects us against + * mach_msg_abort_rpc. + */ + + reply_port->ip_sorights++; + ip_reference(reply_port); + + ip_reference(reply_port); + self->ith_port = reply_port; + + reply_mqueue = &reply_port->ip_messages; + imq_lock(reply_mqueue); + assert(ipc_kmsg_queue_empty(&reply_mqueue->imq_messages)); + ip_unlock(reply_port); + + /* + * Make sure we can queue to the destination port. + */ + + if (!ip_lock_try(dest_port)) { + imq_unlock(reply_mqueue); + goto slow_exception_raise; + } + + if (!ip_active(dest_port) || + (dest_port->ip_receiver == ipc_space_kernel)) { + imq_unlock(reply_mqueue); + ip_unlock(dest_port); + goto slow_exception_raise; + } + + /* + * Find the destination message queue. + */ + + { + ipc_pset_t dest_pset; + + dest_pset = dest_port->ip_pset; + if (dest_pset == IPS_NULL) + dest_mqueue = &dest_port->ip_messages; + else + dest_mqueue = &dest_pset->ips_messages; + } + + if (!imq_lock_try(dest_mqueue)) { + imq_unlock(reply_mqueue); + ip_unlock(dest_port); + goto slow_exception_raise; + } + + /* + * Safe to unlock dest_port, because we hold + * dest_mqueue locked. We never bother changing + * dest_port->ip_msgcount. + */ + + ip_unlock(dest_port); + + receiver = ipc_thread_queue_first(&dest_mqueue->imq_threads); + if ((receiver == ITH_NULL) || + !((receiver->swap_func == mach_msg_continue) || + ((receiver->swap_func == mach_msg_receive_continue) && + (sizeof(struct mach_exception) <= receiver->ith_msize) && + ((receiver->ith_option & MACH_RCV_NOTIFY) == 0))) || + !thread_handoff(self, exception_raise_continue, receiver)) { + imq_unlock(reply_mqueue); + imq_unlock(dest_mqueue); + goto slow_exception_raise; + } + counter(c_exception_raise_block++); + + assert(current_thread() == receiver); + + /* + * We need to finish preparing self for its + * time asleep in reply_mqueue. self is left + * holding the extra ref for reply_port. + */ + + ipc_thread_enqueue_macro(&reply_mqueue->imq_threads, self); + self->ith_state = MACH_RCV_IN_PROGRESS; + self->ith_msize = MACH_MSG_SIZE_MAX; + imq_unlock(reply_mqueue); + + /* + * Finish extracting receiver from dest_mqueue. + */ + + ipc_thread_rmqueue_first_macro( + &dest_mqueue->imq_threads, receiver); + imq_unlock(dest_mqueue); + + /* + * Release the receiver's reference for his object. + */ + { + ipc_object_t object = receiver->ith_object; + + io_lock(object); + io_release(object); + io_check_unlock(object); + } + + { + struct mach_exception *exc = + (struct mach_exception *) &kmsg->ikm_header; + ipc_space_t space = receiver->task->itk_space; + + /* + * We are running as the receiver now. We hold + * the following resources, which must be consumed: + * kmsg, send-once right for reply_port + * send rights for dest_port, thread_port, task_port + * Synthesize a kmsg for copyout to the receiver. + */ + + exc->Head.msgh_bits = (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, + MACH_MSG_TYPE_PORT_SEND) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_size = sizeof *exc; + /* exc->Head.msgh_remote_port later */ + /* exc->Head.msgh_local_port later */ + exc->Head.msgh_seqno = 0; + exc->Head.msgh_id = MACH_EXCEPTION_ID; + exc->threadType = exc_port_proto; + /* exc->thread later */ + exc->taskType = exc_port_proto; + /* exc->task later */ + exc->exceptionType = exc_code_proto; + exc->exception = _exception; + exc->codeType = exc_code_proto; + exc->code = code; + exc->subcodeType = exc_subcode_proto; + exc->subcode = subcode; + + /* + * Check that the receiver can handle the message. + */ + + if (receiver->ith_rcv_size < sizeof(struct mach_exception)) { + /* + * ipc_kmsg_destroy is a handy way to consume + * the resources we hold, but it requires setup. + */ + + exc->Head.msgh_bits = + (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + exc->thread = (mach_port_t) thread_port; + exc->task = (mach_port_t) task_port; + + ipc_kmsg_destroy(kmsg); + thread_syscall_return(MACH_RCV_TOO_LARGE); + /*NOTREACHED*/ + } + + is_write_lock(space); + assert(space->is_active); + + /* + * To do an atomic copyout, need simultaneous + * locks on both ports and the space. + */ + + ip_lock(dest_port); + if (!ip_active(dest_port) || + !ip_lock_try(reply_port)) { + abort_copyout: + ip_unlock(dest_port); + is_write_unlock(space); + + /* + * Oh well, we have to do the header the slow way. + * First make it look like it's in-transit. + */ + + exc->Head.msgh_bits = + (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + + mr = ipc_kmsg_copyout_header(&exc->Head, space, + MACH_PORT_NULL); + if (mr == MACH_MSG_SUCCESS) + goto copyout_body; + + /* + * Ack! Prepare for ipc_kmsg_copyout_dest. + * It will consume thread_port and task_port. + */ + + exc->thread = (mach_port_t) thread_port; + exc->task = (mach_port_t) task_port; + + ipc_kmsg_copyout_dest(kmsg, space); + (void) ipc_kmsg_put(receiver->ith_msg, kmsg, + sizeof(mach_msg_header_t)); + thread_syscall_return(mr); + /*NOTREACHED*/ + } + + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + goto abort_copyout; + } + + assert(reply_port->ip_sorights > 0); + ip_unlock(reply_port); + + { + kern_return_t kr; + ipc_entry_t entry; + mach_port_name_t port_name; + + kr = ipc_entry_get (space, &port_name, &entry); + if (kr) + goto abort_copyout; + exc->Head.msgh_remote_port = (mach_port_t) port_name; + { + mach_port_gen_t gen; + + assert((entry->ie_bits &~ IE_BITS_GEN_MASK) == 0); + gen = entry->ie_bits + IE_BITS_GEN_ONE; + + /* optimized ipc_right_copyout */ + + entry->ie_bits = gen | (MACH_PORT_TYPE_SEND_ONCE | 1); + } + + entry->ie_object = (ipc_object_t) reply_port; + is_write_unlock(space); + } + + /* optimized ipc_object_copyout_dest */ + + assert(dest_port->ip_srights > 0); + ip_release(dest_port); + + exc->Head.msgh_local_port = + ((dest_port->ip_receiver == space) ? + dest_port->ip_receiver_name : MACH_PORT_NULL); + + if ((--dest_port->ip_srights == 0) && + (dest_port->ip_nsrequest != IP_NULL)) { + ipc_port_t nsrequest; + mach_port_mscount_t mscount; + + /* a rather rare case */ + + nsrequest = dest_port->ip_nsrequest; + mscount = dest_port->ip_mscount; + dest_port->ip_nsrequest = IP_NULL; + ip_unlock(dest_port); + + ipc_notify_no_senders(nsrequest, mscount); + } else + ip_unlock(dest_port); + + copyout_body: + /* + * Optimized version of ipc_kmsg_copyout_body, + * to handle the two ports in the body. + */ + + mr = (ipc_kmsg_copyout_object_to_port(space, (ipc_object_t) thread_port, + MACH_MSG_TYPE_PORT_SEND, &exc->thread) | + ipc_kmsg_copyout_object_to_port(space, (ipc_object_t) task_port, + MACH_MSG_TYPE_PORT_SEND, &exc->task)); + if (mr != MACH_MSG_SUCCESS) { + (void) ipc_kmsg_put(receiver->ith_msg, kmsg, + kmsg->ikm_header.msgh_size); + thread_syscall_return(mr | MACH_RCV_BODY_ERROR); + /*NOTREACHED*/ + } + } + + /* + * Optimized version of ipc_kmsg_put. + * We must check ikm_cache after copyoutmsg. + */ + + ikm_check_initialized(kmsg, kmsg->ikm_size); + assert(kmsg->ikm_size == IKM_SAVED_KMSG_SIZE); + + if (copyoutmsg(&kmsg->ikm_header, receiver->ith_msg, + sizeof(struct mach_exception))) { + mr = ipc_kmsg_put(receiver->ith_msg, kmsg, + kmsg->ikm_header.msgh_size); + thread_syscall_return(mr); + /*NOTREACHED*/ + } + + if (!ikm_cache_free_try(kmsg)) { + mr = ipc_kmsg_put(receiver->ith_msg, kmsg, + kmsg->ikm_header.msgh_size); + thread_syscall_return(mr); + /*NOTREACHED*/ + } + + thread_syscall_return(MACH_MSG_SUCCESS); + /*NOTREACHED*/ +#ifndef __GNUC__ + return; /* help for the compiler */ +#endif + + slow_exception_raise: { + struct mach_exception *exc = + (struct mach_exception *) &kmsg->ikm_header; + ipc_kmsg_t reply_kmsg; + mach_port_seqno_t reply_seqno; + + exception_raise_misses++; + + /* + * We hold the following resources, which must be consumed: + * kmsg, send-once right and ref for reply_port + * send rights for dest_port, thread_port, task_port + * Synthesize a kmsg to send. + */ + + exc->Head.msgh_bits = (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_size = sizeof *exc; + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + exc->Head.msgh_seqno = 0; + exc->Head.msgh_id = MACH_EXCEPTION_ID; + exc->threadType = exc_port_proto; + exc->thread = (mach_port_t) thread_port; + exc->taskType = exc_port_proto; + exc->task = (mach_port_t) task_port; + exc->exceptionType = exc_code_proto; + exc->exception = _exception; + exc->codeType = exc_code_proto; + exc->code = code; + exc->subcodeType = exc_subcode_proto; + exc->subcode = subcode; + + ipc_mqueue_send_always(kmsg); + + /* + * We are left with a ref for reply_port, + * which we use to receive the reply message. + */ + + ip_lock(reply_port); + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + exception_raise_continue_slow(MACH_RCV_PORT_DIED, IKM_NULL, /*dummy*/0); + /*NOTREACHED*/ + } + + imq_lock(reply_mqueue); + ip_unlock(reply_port); + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, exception_raise_continue, + &reply_kmsg, &reply_seqno); + /* reply_mqueue is unlocked */ + + exception_raise_continue_slow(mr, reply_kmsg, reply_seqno); + /*NOTREACHED*/ + } +} + +/* Macro used by MIG to cleanly check the type. */ +#define BAD_TYPECHECK(type, check) unlikely (({\ + union { mach_msg_type_t t; uint32_t w; } _t, _c;\ + _t.t = *(type); _c.t = *(check);_t.w != _c.w; })) + +/* Type descriptor for the return code. */ +mach_msg_type_t exc_RetCode_proto = { + .msgt_name = MACH_MSG_TYPE_INTEGER_32, + .msgt_size = 32, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 +}; + +/* + * Routine: exception_parse_reply + * Purpose: + * Parse and consume an exception reply message. + * Conditions: + * The destination port right has already been consumed. + * The message buffer and anything else in it is consumed. + * Returns: + * The reply return code. + */ + +kern_return_t +exception_parse_reply(ipc_kmsg_t kmsg) +{ + mig_reply_header_t *msg = + (mig_reply_header_t *) &kmsg->ikm_header; + kern_return_t kr; + + if ((msg->Head.msgh_bits != + MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0)) || + (msg->Head.msgh_size != sizeof *msg) || + (msg->Head.msgh_id != MACH_EXCEPTION_REPLY_ID) || + (BAD_TYPECHECK(&msg->RetCodeType, &exc_RetCode_proto))) { + /* + * Bozo user sent us a misformatted reply. + */ + + kmsg->ikm_header.msgh_remote_port = MACH_PORT_NULL; + ipc_kmsg_destroy(kmsg); + return MIG_REPLY_MISMATCH; + } + + kr = msg->RetCode; + + ikm_cache_free(kmsg); + + return kr; +} + +/* + * Routine: exception_raise_continue + * Purpose: + * Continue after blocking for an exception. + * Conditions: + * Nothing locked. We are running on a new kernel stack, + * with the exception state saved in the thread. From here + * control goes back to user space. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue(void) +{ + ipc_thread_t self = current_thread(); + ipc_port_t reply_port = self->ith_port; + ipc_mqueue_t reply_mqueue = &reply_port->ip_messages; + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; + mach_msg_return_t mr; + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + TRUE, exception_raise_continue, + &kmsg, &seqno); + /* reply_mqueue is unlocked */ + + exception_raise_continue_slow(mr, kmsg, seqno); + /*NOTREACHED*/ +} + +/* + * Routine: thread_release_and_exception_return + * Purpose: + * Continue after thread was halted. + * Conditions: + * Nothing locked. We are running on a new kernel stack and + * control goes back to thread_exception_return. + * Returns: + * Doesn't return. + */ +static void +thread_release_and_exception_return(void) +{ + ipc_thread_t self = current_thread(); + /* reply port must be released */ + ipc_port_release(self->ith_port); + thread_exception_return(); + /*NOTREACHED*/ +} + +/* + * Routine: exception_raise_continue_slow + * Purpose: + * Continue after finishing an ipc_mqueue_receive + * for an exception reply message. + * Conditions: + * Nothing locked. We hold a ref for reply_port. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue_slow( + mach_msg_return_t mr, + ipc_kmsg_t kmsg, + mach_port_seqno_t seqno) +{ + ipc_thread_t self = current_thread(); + ipc_port_t reply_port = self->ith_port; + ipc_mqueue_t reply_mqueue = &reply_port->ip_messages; + + while (mr == MACH_RCV_INTERRUPTED) { + /* + * Somebody is trying to force this thread + * to a clean point. We must cooperate + * and then resume the receive. + */ + + while (thread_should_halt(self)) { + /* if thread is about to terminate, release the port */ + if (self->ast & AST_TERMINATE) + ipc_port_release(reply_port); + /* + * Use the continuation to release the port in + * case the thread is about to halt. + */ + thread_halt_self(thread_release_and_exception_return); + } + + ip_lock(reply_port); + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + mr = MACH_RCV_PORT_DIED; + break; + } + + imq_lock(reply_mqueue); + ip_unlock(reply_port); + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, exception_raise_continue, + &kmsg, &seqno); + /* reply_mqueue is unlocked */ + } + ipc_port_release(reply_port); + + assert((mr == MACH_MSG_SUCCESS) || + (mr == MACH_RCV_PORT_DIED)); + + if (mr == MACH_MSG_SUCCESS) { + /* + * Consume the reply message. + */ + + ipc_port_release_sonce(reply_port); + mr = exception_parse_reply(kmsg); + } + + if ((mr == KERN_SUCCESS) || + (mr == MACH_RCV_PORT_DIED)) { + thread_exception_return(); + /*NOTREACHED*/ + } + + if (self->ith_exc != KERN_SUCCESS) { + exception_try_task(self->ith_exc, + self->ith_exc_code, + self->ith_exc_subcode); + /*NOTREACHED*/ + } + + exception_no_server(); + /*NOTREACHED*/ +} + +/* + * Routine: exception_raise_continue_fast + * Purpose: + * Special-purpose fast continuation for exceptions. + * Conditions: + * reply_port is locked and alive. + * kmsg is our reply message. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue_fast( + ipc_port_t reply_port, + ipc_kmsg_t kmsg) +{ + ipc_thread_t self = current_thread(); + kern_return_t kr; + + assert(ip_active(reply_port)); + assert(reply_port == self->ith_port); + assert(reply_port == (ipc_port_t) kmsg->ikm_header.msgh_remote_port); + assert(MACH_MSGH_BITS_REMOTE(kmsg->ikm_header.msgh_bits) == + MACH_MSG_TYPE_PORT_SEND_ONCE); + + /* + * Release the send-once right (from the message header) + * and the saved reference (from self->ith_port). + */ + + reply_port->ip_sorights--; + ip_release(reply_port); + ip_release(reply_port); + ip_unlock(reply_port); + + /* + * Consume the reply message. + */ + + kr = exception_parse_reply(kmsg); + if (kr == KERN_SUCCESS) { + thread_exception_return(); + /*NOTREACHED*/ + } + + if (self->ith_exc != KERN_SUCCESS) { + exception_try_task(self->ith_exc, + self->ith_exc_code, + self->ith_exc_subcode); + /*NOTREACHED*/ + } + + exception_no_server(); + /*NOTREACHED*/ +} diff --git a/kern/exception.h b/kern/exception.h new file mode 100644 index 0000000..36138da --- /dev/null +++ b/kern/exception.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_EXCEPTION_H_ +#define _KERN_EXCEPTION_H_ + +#include <ipc/ipc_types.h> +#include <ipc/ipc_kmsg.h> + +extern void +exception( + integer_t _exception, + integer_t code, + long_integer_t subcode) __attribute__ ((noreturn)); + +extern void +exception_try_task( + integer_t _exception, + integer_t code, + long_integer_t subcode) __attribute__ ((noreturn)); + +extern void +exception_no_server(void) __attribute__ ((noreturn)); + +extern void +exception_raise( + ipc_port_t dest_port, + ipc_port_t thread_port, + ipc_port_t task_port, + integer_t _exception, + integer_t code, + long_integer_t subcode) __attribute__ ((noreturn)); + +extern kern_return_t +exception_parse_reply(ipc_kmsg_t kmsg); + +extern void +exception_raise_continue(void) __attribute__ ((noreturn)); + +extern void +exception_raise_continue_slow( + mach_msg_return_t mr, + ipc_kmsg_t kmsg, + mach_port_seqno_t seqno) __attribute__ ((noreturn)); + +extern void +exception_raise_continue_fast( + ipc_port_t reply_port, + ipc_kmsg_t kmsg) __attribute__ ((noreturn)); + +#endif /* _KERN_EXCEPTION_H_ */ diff --git a/kern/experimental.srv b/kern/experimental.srv new file mode 100644 index 0000000..2ccfd78 --- /dev/null +++ b/kern/experimental.srv @@ -0,0 +1,3 @@ +#define KERNEL_SERVER 1 + +#include <mach/experimental.defs> diff --git a/kern/gnumach.srv b/kern/gnumach.srv new file mode 100644 index 0000000..38bc909 --- /dev/null +++ b/kern/gnumach.srv @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#include <mach/gnumach.defs> diff --git a/kern/gsync.c b/kern/gsync.c new file mode 100644 index 0000000..e73a6cf --- /dev/null +++ b/kern/gsync.c @@ -0,0 +1,517 @@ +/* Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Agustina Arzille <avarzille@riseup.net>, 2016. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either + version 2 of the license, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <kern/gsync.h> +#include <kern/kmutex.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/list.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> + +/* An entry in the global hash table. */ +struct gsync_hbucket +{ + struct list entries; + struct kmutex lock; +}; + +/* A key used to uniquely identify an address that a thread is + * waiting on. Its members' values depend on whether said + * address is shared or task-local. Note that different types of keys + * should never compare equal, since a task map should never have + * the same address as a VM object. */ +union gsync_key +{ + struct + { + vm_map_t map; + vm_offset_t addr; + } local; + + struct + { + vm_object_t obj; + vm_offset_t off; + } shared; + + struct + { + unsigned long u; + unsigned long v; + } any; +}; + +/* A thread that is blocked on an address with 'gsync_wait'. */ +struct gsync_waiter +{ + struct list link; + union gsync_key key; + thread_t waiter; +}; + +/* Needed data for temporary mappings. */ +struct vm_args +{ + vm_object_t obj; + vm_offset_t off; +}; + +#define GSYNC_NBUCKETS 512 +static struct gsync_hbucket gsync_buckets[GSYNC_NBUCKETS]; + +void gsync_setup (void) +{ + int i; + for (i = 0; i < GSYNC_NBUCKETS; ++i) + { + list_init (&gsync_buckets[i].entries); + kmutex_init (&gsync_buckets[i].lock); + } +} + +/* Convenience comparison functions for gsync_key's. */ + +static inline int +gsync_key_eq (const union gsync_key *lp, + const union gsync_key *rp) +{ + return (lp->any.u == rp->any.u && lp->any.v == rp->any.v); +} + +static inline int +gsync_key_lt (const union gsync_key *lp, + const union gsync_key *rp) +{ + return (lp->any.u < rp->any.u || + (lp->any.u == rp->any.u && lp->any.v < rp->any.v)); +} + +#define MIX2_LL(x, y) ((((x) << 5) | ((x) >> 27)) ^ (y)) + +static inline unsigned int +gsync_key_hash (const union gsync_key *keyp) +{ + unsigned int ret = sizeof (void *); +#ifndef __LP64__ + ret = MIX2_LL (ret, keyp->any.u); + ret = MIX2_LL (ret, keyp->any.v); +#else + ret = MIX2_LL (ret, keyp->any.u & ~0U); + ret = MIX2_LL (ret, keyp->any.u >> 32); + ret = MIX2_LL (ret, keyp->any.v & ~0U); + ret = MIX2_LL (ret, keyp->any.v >> 32); +#endif + return (ret); +} + +/* Perform a VM lookup for the address in the map. The FLAGS + * parameter is used to specify some attributes for the address, + * such as protection. Place the corresponding VM object/offset pair + * in VAP. Returns 0 if successful, -1 otherwise. */ +static int +probe_address (vm_map_t map, vm_offset_t addr, + int flags, struct vm_args *vap) +{ + vm_prot_t prot = VM_PROT_READ | + ((flags & GSYNC_MUTATE) ? VM_PROT_WRITE : 0); + vm_map_version_t ver; + vm_prot_t rprot; + boolean_t wired_p; + + if (vm_map_lookup (&map, addr, prot, &ver, + &vap->obj, &vap->off, &rprot, &wired_p) != KERN_SUCCESS) + return (-1); + else if ((rprot & prot) != prot) + { + vm_object_unlock (vap->obj); + return (-1); + } + + return (0); +} + +/* Initialize the key with its needed members, depending on whether the + * address is local or shared. Also stores the VM object and offset inside + * the argument VAP for future use. */ +static int +gsync_prepare_key (task_t task, vm_offset_t addr, int flags, + union gsync_key *keyp, struct vm_args *vap) +{ + if (probe_address (task->map, addr, flags, vap) < 0) + return (-1); + else if (flags & GSYNC_SHARED) + { + /* For a shared address, we need the VM object + * and offset as the keys. */ + keyp->shared.obj = vap->obj; + keyp->shared.off = vap->off; + } + else + { + /* Task-local address. The keys are the task's map and + * the virtual address itself. */ + keyp->local.map = task->map; + keyp->local.addr = addr; + } + + return ((int)(gsync_key_hash (keyp) % GSYNC_NBUCKETS)); +} + +static inline struct gsync_waiter* +node_to_waiter (struct list *nodep) +{ + return (list_entry (nodep, struct gsync_waiter, link)); +} + +static inline struct list* +gsync_find_key (const struct list *entries, + const union gsync_key *keyp, int *exactp) +{ + /* Look for a key that matches. We take advantage of the fact + * that the entries are sorted to break out of the loop as + * early as possible. */ + struct list *runp; + list_for_each (entries, runp) + { + struct gsync_waiter *p = node_to_waiter (runp); + if (gsync_key_lt (keyp, &p->key)) + break; + else if (gsync_key_eq (keyp, &p->key)) + { + if (exactp != 0) + *exactp = 1; + break; + } + } + + return (runp); +} + +/* Create a temporary mapping in the kernel.*/ +static inline vm_offset_t +temp_mapping (struct vm_args *vap, vm_offset_t addr, vm_prot_t prot) +{ + vm_offset_t paddr = VM_MIN_KERNEL_ADDRESS; + /* Adjust the offset for addresses that aren't page-aligned. */ + vm_offset_t off = vap->off - (addr - trunc_page (addr)); + + if (vm_map_enter (kernel_map, &paddr, PAGE_SIZE, + 0, TRUE, vap->obj, off, FALSE, prot, VM_PROT_ALL, + VM_INHERIT_DEFAULT) != KERN_SUCCESS) + paddr = 0; + + return (paddr); +} + +kern_return_t gsync_wait (task_t task, vm_offset_t addr, + unsigned int lo, unsigned int hi, natural_t msec, int flags) +{ + if (task == 0) + return (KERN_INVALID_TASK); + else if (addr % sizeof (int) != 0) + return (KERN_INVALID_ADDRESS); + + vm_map_lock_read (task->map); + + struct gsync_waiter w; + struct vm_args va; + boolean_t remote = task != current_task (); + int bucket = gsync_prepare_key (task, addr, flags, &w.key, &va); + + if (bucket < 0) + { + vm_map_unlock_read (task->map); + return (KERN_INVALID_ADDRESS); + } + else if (remote) + /* The VM object is returned locked. However, we are about to acquire + * a sleeping lock for a bucket, so we must not hold any simple + * locks. To prevent this object from going away, we add a reference + * to it when requested. */ + vm_object_reference_locked (va.obj); + + /* We no longer need the lock on the VM object. */ + vm_object_unlock (va.obj); + + struct gsync_hbucket *hbp = gsync_buckets + bucket; + kmutex_lock (&hbp->lock, FALSE); + + /* Before doing any work, check that the expected value(s) + * match the contents of the address. Otherwise, the waiting + * thread could potentially miss a wakeup. */ + + boolean_t equal; + if (! remote) + equal = ((unsigned int *)addr)[0] == lo && + ((flags & GSYNC_QUAD) == 0 || + ((unsigned int *)addr)[1] == hi); + else + { + vm_offset_t paddr = temp_mapping (&va, addr, VM_PROT_READ); + if (unlikely (paddr == 0)) + { + kmutex_unlock (&hbp->lock); + vm_map_unlock_read (task->map); + /* Make sure to remove the reference we added. */ + vm_object_deallocate (va.obj); + return (KERN_MEMORY_FAILURE); + } + + vm_offset_t off = addr & (PAGE_SIZE - 1); + paddr += off; + + equal = ((unsigned int *)paddr)[0] == lo && + ((flags & GSYNC_QUAD) == 0 || + ((unsigned int *)paddr)[1] == hi); + + paddr -= off; + + /* Note that the call to 'vm_map_remove' will unreference + * the VM object, so we don't have to do it ourselves. */ + vm_map_remove (kernel_map, paddr, paddr + PAGE_SIZE); + } + + /* Done with the task's map. */ + vm_map_unlock_read (task->map); + + if (! equal) + { + kmutex_unlock (&hbp->lock); + return (KERN_INVALID_ARGUMENT); + } + + /* Look for the first entry in the hash bucket that + * compares strictly greater than this waiter. */ + struct list *runp; + list_for_each (&hbp->entries, runp) + if (gsync_key_lt (&w.key, &node_to_waiter(runp)->key)) + break; + + /* Finally, add ourselves to the list and go to sleep. */ + list_add (runp->prev, runp, &w.link); + w.waiter = current_thread (); + + if (flags & GSYNC_TIMED) + thread_will_wait_with_timeout (w.waiter, msec); + else + thread_will_wait (w.waiter); + + kmutex_unlock (&hbp->lock); + thread_block (thread_no_continuation); + + /* We're back. */ + kern_return_t ret = KERN_SUCCESS; + if (current_thread()->wait_result != THREAD_AWAKENED) + { + /* We were interrupted or timed out. */ + kmutex_lock (&hbp->lock, FALSE); + if (!list_node_unlinked (&w.link)) + list_remove (&w.link); + kmutex_unlock (&hbp->lock); + + /* Map the error code. */ + ret = current_thread()->wait_result == THREAD_INTERRUPTED ? + KERN_INTERRUPTED : KERN_TIMEDOUT; + } + + return (ret); +} + +/* Remove a waiter from the queue, wake it up, and + * return the next node. */ +static inline struct list* +dequeue_waiter (struct list *nodep) +{ + struct list *nextp = list_next (nodep); + list_remove (nodep); + list_node_init (nodep); + clear_wait (node_to_waiter(nodep)->waiter, + THREAD_AWAKENED, FALSE); + return (nextp); +} + +kern_return_t gsync_wake (task_t task, + vm_offset_t addr, unsigned int val, int flags) +{ + if (task == 0) + return (KERN_INVALID_TASK); + else if (addr % sizeof (int) != 0) + return (KERN_INVALID_ADDRESS); + + vm_map_lock_read (task->map); + + union gsync_key key; + struct vm_args va; + int bucket = gsync_prepare_key (task, addr, flags, &key, &va); + + if (bucket < 0) + { + vm_map_unlock_read (task->map); + return (KERN_INVALID_ADDRESS); + } + else if (current_task () != task && (flags & GSYNC_MUTATE) != 0) + /* See above on why we do this. */ + vm_object_reference_locked (va.obj); + + /* Done with the VM object lock. */ + vm_object_unlock (va.obj); + + kern_return_t ret = KERN_INVALID_ARGUMENT; + struct gsync_hbucket *hbp = gsync_buckets + bucket; + + kmutex_lock (&hbp->lock, FALSE); + + if (flags & GSYNC_MUTATE) + { + /* Set the contents of the address to the specified value, + * even if we don't end up waking any threads. Note that + * the buckets' simple locks give us atomicity. */ + + if (task != current_task ()) + { + vm_offset_t paddr = temp_mapping (&va, addr, + VM_PROT_READ | VM_PROT_WRITE); + + if (paddr == 0) + { + kmutex_unlock (&hbp->lock); + vm_map_unlock_read (task->map); + vm_object_deallocate (va.obj); + return (KERN_MEMORY_FAILURE); + } + + addr = paddr + (addr & (PAGE_SIZE - 1)); + } + + *(unsigned int *)addr = val; + if (task != current_task ()) + vm_map_remove (kernel_map, addr, addr + sizeof (int)); + } + + vm_map_unlock_read (task->map); + + int found = 0; + struct list *runp = gsync_find_key (&hbp->entries, &key, &found); + if (found) + { + do + runp = dequeue_waiter (runp); + while ((flags & GSYNC_BROADCAST) && + !list_end (&hbp->entries, runp) && + gsync_key_eq (&node_to_waiter(runp)->key, &key)); + + ret = KERN_SUCCESS; + } + + kmutex_unlock (&hbp->lock); + return (ret); +} + +kern_return_t gsync_requeue (task_t task, vm_offset_t src, + vm_offset_t dst, boolean_t wake_one, int flags) +{ + if (task == 0) + return (KERN_INVALID_TASK); + else if (src % sizeof (int) != 0 || dst % sizeof (int) != 0) + return (KERN_INVALID_ADDRESS); + + union gsync_key src_k, dst_k; + struct vm_args va; + + int src_bkt = gsync_prepare_key (task, src, flags, &src_k, &va); + if (src_bkt < 0) + return (KERN_INVALID_ADDRESS); + + /* Unlock the VM object before the second lookup. */ + vm_object_unlock (va.obj); + + int dst_bkt = gsync_prepare_key (task, dst, flags, &dst_k, &va); + if (dst_bkt < 0) + return (KERN_INVALID_ADDRESS); + + /* We never create any temporary mappings in 'requeue', so we + * can unlock the VM object right now. */ + vm_object_unlock (va.obj); + + /* If we're asked to unconditionally wake up a waiter, then + * we need to remove a maximum of two threads from the queue. */ + unsigned int nw = 1 + wake_one; + struct gsync_hbucket *bp1 = gsync_buckets + src_bkt; + struct gsync_hbucket *bp2 = gsync_buckets + dst_bkt; + + /* Acquire the locks in order, to prevent any potential deadlock. */ + if (bp1 == bp2) + kmutex_lock (&bp1->lock, FALSE); + else if ((unsigned long)bp1 < (unsigned long)bp2) + { + kmutex_lock (&bp1->lock, FALSE); + kmutex_lock (&bp2->lock, FALSE); + } + else + { + kmutex_lock (&bp2->lock, FALSE); + kmutex_lock (&bp1->lock, FALSE); + } + + kern_return_t ret = KERN_SUCCESS; + int exact; + struct list *inp = gsync_find_key (&bp1->entries, &src_k, &exact); + + if (! exact) + /* There are no waiters in the source queue. */ + ret = KERN_INVALID_ARGUMENT; + else + { + struct list *outp = gsync_find_key (&bp2->entries, &dst_k, 0); + + /* We're going to need a node that points one past the + * end of the waiters in the source queue. */ + struct list *endp = inp; + + do + { + /* Modify the keys while iterating. */ + node_to_waiter(endp)->key = dst_k; + endp = list_next (endp); + } + while (((flags & GSYNC_BROADCAST) || --nw != 0) && + !list_end (&bp1->entries, endp) && + gsync_key_eq (&node_to_waiter(endp)->key, &src_k)); + + /* Splice the list by removing waiters from the source queue + * and inserting them into the destination queue. */ + inp->prev->next = endp; + endp->prev->next = outp->next; + endp->prev = inp->prev; + + outp->next = inp; + inp->prev = outp; + + if (wake_one) + (void)dequeue_waiter (inp); + } + + /* Release the locks and we're done.*/ + kmutex_unlock (&bp1->lock); + if (bp1 != bp2) + kmutex_unlock (&bp2->lock); + + return (ret); +} + diff --git a/kern/gsync.h b/kern/gsync.h new file mode 100644 index 0000000..8f69be3 --- /dev/null +++ b/kern/gsync.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2016 Free Software Foundation, Inc. + Contributed by Agustina Arzille <avarzille@riseup.net>, 2016. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either + version 2 of the license, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _KERN_GSYNC_H_ +#define _KERN_GSYNC_H_ 1 + +#define GSYNC_SHARED 0x01 +#define GSYNC_QUAD 0x02 +#define GSYNC_TIMED 0x04 +#define GSYNC_BROADCAST 0x08 +#define GSYNC_MUTATE 0x10 + +#include <mach/mach_types.h> + +void gsync_setup (void); + +kern_return_t gsync_wait (task_t task, vm_offset_t addr, + unsigned int lo, unsigned int hi, natural_t msec, int flags); + +kern_return_t gsync_wake (task_t task, + vm_offset_t addr, unsigned int val, int flags); + +kern_return_t gsync_requeue (task_t task, vm_offset_t src_addr, + vm_offset_t dst_addr, boolean_t wake_one, int flags); + +#endif diff --git a/kern/host.c b/kern/host.c new file mode 100644 index 0000000..6939437 --- /dev/null +++ b/kern/host.c @@ -0,0 +1,389 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * host.c + * + * Non-ipc host functions. + */ + +#include <string.h> + +#include <kern/assert.h> +#include <kern/debug.h> +#include <kern/kalloc.h> +#include <kern/host.h> +#include <mach/host_info.h> +#include <mach/kern_return.h> +#include <mach/machine.h> +#include <mach/port.h> +#include <kern/processor.h> +#include <kern/ipc_host.h> +#include <kern/mach_clock.h> +#include <kern/mach_host.server.h> +#include <mach/vm_param.h> + +host_data_t realhost; + +kern_return_t host_processors( + const host_t host, + processor_array_t *processor_list, + natural_t *countp) +{ + unsigned i; + processor_t *tp; + vm_offset_t addr; + unsigned int count; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Determine how many processors we have. + * (This number shouldn't change.) + */ + + count = 0; + for (i = 0; i < NCPUS; i++) + if (machine_slot[i].is_cpu) + count++; + + if (count == 0) + panic("host_processors"); + + addr = kalloc((vm_size_t) (count * sizeof(mach_port_t))); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + + tp = (processor_t *) addr; + for (i = 0; i < NCPUS; i++) + if (machine_slot[i].is_cpu) + *tp++ = cpu_to_processor(i); + + *countp = count; + *processor_list = (mach_port_t *) addr; + + /* do the conversion that Mig should handle */ + + tp = (processor_t *) addr; + for (i = 0; i < count; i++) + ((mach_port_t *) tp)[i] = + (mach_port_t)convert_processor_to_port(tp[i]); + + return KERN_SUCCESS; +} + +kern_return_t host_info( + const host_t host, + int flavor, + host_info_t info, + natural_t *count) +{ + integer_t i, *slot_ptr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + switch(flavor) { + + case HOST_BASIC_INFO: + { + host_basic_info_t basic_info; + + /* + * Basic information about this host. + */ + if (*count < HOST_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (host_basic_info_t) info; + + basic_info->max_cpus = machine_info.max_cpus; + basic_info->avail_cpus = machine_info.avail_cpus; + basic_info->memory_size = machine_info.memory_size; + basic_info->cpu_type = + machine_slot[master_processor->slot_num].cpu_type; + basic_info->cpu_subtype = + machine_slot[master_processor->slot_num].cpu_subtype; + + *count = HOST_BASIC_INFO_COUNT; + return KERN_SUCCESS; + } + + case HOST_PROCESSOR_SLOTS: + /* + * Return numbers of slots with active processors + * in them. + */ + if (*count < NCPUS) + return KERN_INVALID_ARGUMENT; + + slot_ptr = (integer_t *)info; + *count = 0; + for (i = 0; i < NCPUS; i++) { + if (machine_slot[i].is_cpu && + machine_slot[i].running) { + *slot_ptr++ = i; + (*count)++; + } + } + return KERN_SUCCESS; + + case HOST_SCHED_INFO: + { + host_sched_info_t sched_info; + extern int min_quantum; + /* minimum quantum, in ticks */ + + /* + * Return scheduler information. + */ + if (*count < HOST_SCHED_INFO_COUNT) + return(KERN_FAILURE); + + sched_info = (host_sched_info_t) info; + + sched_info->min_timeout = tick / 1000; + /* convert microseconds to milliseconds */ + sched_info->min_quantum = min_quantum * tick / 1000; + /* convert ticks to milliseconds */ + + *count = HOST_SCHED_INFO_COUNT; + return KERN_SUCCESS; + } + + case HOST_LOAD_INFO: + { + host_load_info_t load_info; + extern long avenrun[3], mach_factor[3]; + + if (*count < HOST_LOAD_INFO_COUNT) + return KERN_FAILURE; + + load_info = (host_load_info_t) info; + + memcpy(load_info->avenrun, + avenrun, + sizeof avenrun); + memcpy(load_info->mach_factor, + mach_factor, + sizeof mach_factor); + + *count = HOST_LOAD_INFO_COUNT; + return KERN_SUCCESS; + } + + default: + return KERN_INVALID_ARGUMENT; + } +} + +/* + * Return kernel version string (more than you ever + * wanted to know about what version of the kernel this is). + */ + +kern_return_t host_get_kernel_version( + const host_t host, + kernel_version_t out_version) +{ + extern char version[]; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + (void) strncpy(out_version, version, sizeof(kernel_version_t)); + + return KERN_SUCCESS; +} + +#if !defined(__x86_64__) || defined(USER32) +/* Same as above, but does not exist for x86_64. */ +kern_return_t host_kernel_version( + const host_t host, + kernel_version_t out_version) +{ + return host_get_kernel_version(host, out_version); +} +#endif + +/* + * host_processor_sets: + * + * List all processor sets on the host. + */ +#if MACH_HOST +kern_return_t +host_processor_sets( + const host_t host, + processor_set_name_array_t *pset_list, + natural_t *count) +{ + unsigned int actual; /* this many psets */ + processor_set_t pset; + processor_set_t *psets; + int i; + + vm_size_t size; + vm_size_t size_needed; + vm_offset_t addr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + simple_lock(&all_psets_lock); + actual = all_psets_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock and allocate more memory */ + simple_unlock(&all_psets_lock); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the all_psets_lock */ + + psets = (processor_set_t *) addr; + + for (i = 0, pset = (processor_set_t) queue_first(&all_psets); + i < actual; + i++, pset = (processor_set_t) queue_next(&pset->all_psets)) { + /* take ref for convert_pset_name_to_port */ + pset_reference(pset); + psets[i] = pset; + } + assert(queue_end(&all_psets, (queue_entry_t) pset)); + + /* can unlock now that we've got the pset refs */ + simple_unlock(&all_psets_lock); + + /* + * Always have default port. + */ + + assert(actual > 0); + + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; i++) + pset_deallocate(psets[i]); + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + memcpy((void *) newaddr, (void *) addr, size_needed); + kfree(addr, size); + psets = (processor_set_t *) newaddr; + } + + *pset_list = (mach_port_t *) psets; + *count = actual; + + /* do the conversion that Mig should handle */ + + for (i = 0; i < actual; i++) + ((mach_port_t *) psets)[i] = + (mach_port_t)convert_pset_name_to_port(psets[i]); + + return KERN_SUCCESS; +} +#else /* MACH_HOST */ +/* + * Only one processor set, the default processor set, in this case. + */ +kern_return_t +host_processor_sets( + const host_t host, + processor_set_name_array_t *pset_list, + natural_t *count) +{ + vm_offset_t addr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Allocate memory. Can be pageable because it won't be + * touched while holding a lock. + */ + + addr = kalloc((vm_size_t) sizeof(mach_port_t)); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + + /* take for for convert_pset_name_to_port */ + pset_reference(&default_pset); + /* do the conversion that Mig should handle */ + *((mach_port_t *) addr) = + (mach_port_t) convert_pset_name_to_port(&default_pset); + + *pset_list = (mach_port_t *) addr; + *count = 1; + + return KERN_SUCCESS; +} +#endif /* MACH_HOST */ + +/* + * host_processor_set_priv: + * + * Return control port for given processor set. + */ +kern_return_t +host_processor_set_priv( + const host_t host, + processor_set_t pset_name, + processor_set_t *pset) +{ + if ((host == HOST_NULL) || (pset_name == PROCESSOR_SET_NULL)) { + *pset = PROCESSOR_SET_NULL; + return KERN_INVALID_ARGUMENT; + } + + *pset = pset_name; + pset_reference(*pset); + return KERN_SUCCESS; +} diff --git a/kern/host.h b/kern/host.h new file mode 100644 index 0000000..5771da1 --- /dev/null +++ b/kern/host.h @@ -0,0 +1,48 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/host.h + * + * Definitions for host data structures. + * + */ + +#ifndef _KERN_HOST_H_ +#define _KERN_HOST_H_ + +struct host { + struct ipc_port *host_self; + struct ipc_port *host_priv_self; +}; + +typedef struct host *host_t; +typedef struct host host_data_t; + +#define HOST_NULL ((host_t)0) + +extern host_data_t realhost; + +#endif /* _KERN_HOST_H_ */ diff --git a/kern/ipc_host.c b/kern/ipc_host.c new file mode 100644 index 0000000..6b81862 --- /dev/null +++ b/kern/ipc_host.c @@ -0,0 +1,451 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/ipc_host.c + * + * Routines to implement host ports. + */ + +#include <mach/message.h> +#include <kern/debug.h> +#include <kern/host.h> +#include <kern/mach_host.server.h> +#include <kern/processor.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_host.h> +#include <kern/ipc_kobject.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> +#include <mach/mach_traps.h> + +#include <machine/machspl.h> /* for spl */ + + + +/* + * ipc_host_init: set up various things. + */ + +void ipc_host_init(void) +{ + ipc_port_t port; + /* + * Allocate and set up the two host ports. + */ + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_host_init"); + + ipc_kobject_set(port, (ipc_kobject_t) &realhost, IKOT_HOST); + realhost.host_self = port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_host_init"); + + ipc_kobject_set(port, (ipc_kobject_t) &realhost, IKOT_HOST_PRIV); + realhost.host_priv_self = port; + + /* + * Set up ipc for default processor set. + */ + ipc_pset_init(&default_pset); + ipc_pset_enable(&default_pset); + + /* + * And for master processor + */ + ipc_processor_init(master_processor); +} + +/* + * Routine: mach_host_self [mach trap] + * Purpose: + * Give the caller send rights for his own host port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_name_t +mach_host_self(void) +{ + ipc_port_t sright; + + sright = ipc_port_make_send(realhost.host_self); + return ipc_port_copyout_send(sright, current_space()); +} + +/* + * ipc_processor_init: + * + * Initialize ipc access to processor by allocating port. + * Enable ipc control of processor by setting port object. + */ + +void +ipc_processor_init( + processor_t processor) +{ + ipc_port_t port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_processor_init"); + processor->processor_self = port; + ipc_kobject_set(port, (ipc_kobject_t) processor, IKOT_PROCESSOR); +} + + +/* + * ipc_pset_init: + * + * Initialize ipc control of a processor set by allocating its ports. + */ + +void +ipc_pset_init( + processor_set_t pset) +{ + ipc_port_t port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_pset_init"); + pset->pset_self = port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_pset_init"); + pset->pset_name_self = port; +} + +/* + * ipc_pset_enable: + * + * Enable ipc access to a processor set. + */ +void +ipc_pset_enable( + processor_set_t pset) +{ + pset_lock(pset); + if (pset->active) { + ipc_kobject_set(pset->pset_self, + (ipc_kobject_t) pset, IKOT_PSET); + ipc_kobject_set(pset->pset_name_self, + (ipc_kobject_t) pset, IKOT_PSET_NAME); + pset_ref_lock(pset); + pset->ref_count += 2; + pset_ref_unlock(pset); + } + pset_unlock(pset); +} + +/* + * ipc_pset_disable: + * + * Disable ipc access to a processor set by clearing the port objects. + * Caller must hold pset lock and a reference to the pset. Ok to + * just decrement pset reference count as a result. + */ +void +ipc_pset_disable( + processor_set_t pset) +{ + ipc_kobject_set(pset->pset_self, IKO_NULL, IKOT_NONE); + ipc_kobject_set(pset->pset_name_self, IKO_NULL, IKOT_NONE); + pset->ref_count -= 2; +} + +/* + * ipc_pset_terminate: + * + * Processor set is dead. Deallocate the ipc control structures. + */ +void +ipc_pset_terminate( + processor_set_t pset) +{ + ipc_port_dealloc_kernel(pset->pset_self); + ipc_port_dealloc_kernel(pset->pset_name_self); +} + +/* + * processor_set_default: + * + * Return ports for manipulating default_processor set. + */ +kern_return_t +processor_set_default( + const host_t host, + processor_set_t *pset) +{ + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + *pset = &default_pset; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * Routine: convert_port_to_host + * Purpose: + * Convert from a port to a host. + * Doesn't consume the port ref; the host produced may be null. + * Conditions: + * Nothing locked. + */ + +host_t +convert_port_to_host( + ipc_port_t port) +{ + host_t host = HOST_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + ((ip_kotype(port) == IKOT_HOST) || + (ip_kotype(port) == IKOT_HOST_PRIV))) + host = (host_t) port->ip_kobject; + ip_unlock(port); + } + + return host; +} + +/* + * Routine: convert_port_to_host_priv + * Purpose: + * Convert from a port to a host. + * Doesn't consume the port ref; the host produced may be null. + * Conditions: + * Nothing locked. + */ + +host_t +convert_port_to_host_priv( + ipc_port_t port) +{ + host_t host = HOST_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_HOST_PRIV)) + host = (host_t) port->ip_kobject; + ip_unlock(port); + } + + return host; +} + +/* + * Routine: convert_port_to_processor + * Purpose: + * Convert from a port to a processor. + * Doesn't consume the port ref; + * the processor produced may be null. + * Conditions: + * Nothing locked. + */ + +processor_t +convert_port_to_processor( + ipc_port_t port) +{ + processor_t processor = PROCESSOR_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_PROCESSOR)) + processor = (processor_t) port->ip_kobject; + ip_unlock(port); + } + + return processor; +} + +/* + * Routine: convert_port_to_pset + * Purpose: + * Convert from a port to a pset. + * Doesn't consume the port ref; produces a pset ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +processor_set_t +convert_port_to_pset( + ipc_port_t port) +{ + processor_set_t pset = PROCESSOR_SET_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_PSET)) { + pset = (processor_set_t) port->ip_kobject; + pset_reference(pset); + } + ip_unlock(port); + } + + return pset; +} + +/* + * Routine: convert_port_to_pset_name + * Purpose: + * Convert from a port to a pset. + * Doesn't consume the port ref; produces a pset ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +processor_set_t +convert_port_to_pset_name( + ipc_port_t port) +{ + processor_set_t pset = PROCESSOR_SET_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + ((ip_kotype(port) == IKOT_PSET) || + (ip_kotype(port) == IKOT_PSET_NAME))) { + pset = (processor_set_t) port->ip_kobject; + pset_reference(pset); + } + ip_unlock(port); + } + + return pset; +} + +/* + * Routine: convert_host_to_port + * Purpose: + * Convert from a host to a port. + * Produces a naked send right which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_host_to_port( + host_t host) +{ + ipc_port_t port; + + port = ipc_port_make_send(host->host_self); + + return port; +} + +/* + * Routine: convert_processor_to_port + * Purpose: + * Convert from a processor to a port. + * Produces a naked send right which is always valid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_processor_to_port(processor_t processor) +{ + ipc_port_t port; + + port = ipc_port_make_send(processor->processor_self); + + return port; +} + +/* + * Routine: convert_pset_to_port + * Purpose: + * Convert from a pset to a port. + * Consumes a pset ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_pset_to_port( + processor_set_t pset) +{ + ipc_port_t port; + + pset_lock(pset); + if (pset->active) + port = ipc_port_make_send(pset->pset_self); + else + port = IP_NULL; + pset_unlock(pset); + + pset_deallocate(pset); + return port; +} + +/* + * Routine: convert_pset_name_to_port + * Purpose: + * Convert from a pset to a port. + * Consumes a pset ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_pset_name_to_port( + processor_set_t pset) +{ + ipc_port_t port; + + pset_lock(pset); + if (pset->active) + port = ipc_port_make_send(pset->pset_name_self); + else + port = IP_NULL; + pset_unlock(pset); + + pset_deallocate(pset); + return port; +} diff --git a/kern/ipc_host.h b/kern/ipc_host.h new file mode 100644 index 0000000..cd2ffaa --- /dev/null +++ b/kern/ipc_host.h @@ -0,0 +1,72 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_HOST_H_ +#define _KERN_IPC_HOST_H_ + +#include <mach/port.h> +#include <kern/processor.h> + +extern void ipc_host_init(void); + +extern void ipc_processor_init(processor_t); + +extern void ipc_pset_init(processor_set_t); +extern void ipc_pset_enable(processor_set_t); +extern void ipc_pset_disable(processor_set_t); +extern void ipc_pset_terminate(processor_set_t); + +extern struct host * +convert_port_to_host(struct ipc_port *); + +extern struct ipc_port * +convert_host_to_port(struct host *); + +extern struct host * +convert_port_to_host_priv(struct ipc_port *); + +extern processor_t +convert_port_to_processor(struct ipc_port *); + +extern struct ipc_port * +convert_processor_to_port(processor_t); + +extern processor_set_t +convert_port_to_pset(struct ipc_port *); + +extern struct ipc_port * +convert_pset_to_port(processor_set_t); + +extern processor_set_t +convert_port_to_pset_name(struct ipc_port *); + +extern struct ipc_port * +convert_pset_name_to_port(processor_set_t); + +#endif /* _KERN_IPC_HOST_H_ */ diff --git a/kern/ipc_kobject.c b/kern/ipc_kobject.c new file mode 100644 index 0000000..0a81595 --- /dev/null +++ b/kern/ipc_kobject.c @@ -0,0 +1,365 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: kern/ipc_kobject.c + * Author: Rich Draves + * Date: 1989 + * + * Functions for letting a port represent a kernel object. + */ + +#include <kern/debug.h> +#include <kern/printf.h> +#include <mach/port.h> +#include <mach/kern_return.h> +#include <mach/message.h> +#include <mach/mig_errors.h> +#include <mach/notify.h> +#include <kern/ipc_kobject.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_thread.h> +#include <vm/vm_object.h> +#include <vm/memory_object_proxy.h> +#include <device/ds_routines.h> + +#include <kern/mach.server.h> +#include <ipc/mach_port.server.h> +#include <kern/mach_host.server.h> +#include <device/device.server.h> +#include <device/device_pager.server.h> +#include <kern/mach4.server.h> +#include <kern/gnumach.server.h> +#include <kern/experimental.server.h> + +#if MACH_DEBUG +#include <kern/mach_debug.server.h> +#endif + +#if MACH_MACHINE_ROUTINES +#include <machine/machine_routines.h> +#include MACHINE_SERVER_HEADER +#endif + + +/* + * Routine: ipc_kobject_server + * Purpose: + * Handle a message sent to the kernel. + * Generates a reply message. + * Conditions: + * Nothing locked. + */ + +ipc_kmsg_t +ipc_kobject_server(ipc_kmsg_t request) +{ + mach_msg_size_t reply_size = ikm_less_overhead(8192); + ipc_kmsg_t reply; + kern_return_t kr; + mig_routine_t routine; + ipc_port_t *destp; + + reply = ikm_alloc(reply_size); + if (reply == IKM_NULL) { + printf("ipc_kobject_server: dropping request\n"); + ipc_kmsg_destroy(request); + return IKM_NULL; + } + ikm_init(reply, reply_size); + + /* + * Initialize reply message. + */ + { +#define InP ((mach_msg_header_t *) &request->ikm_header) +#define OutP ((mig_reply_header_t *) &reply->ikm_header) + + static const mach_msg_type_t RetCodeType = { + .msgt_name = MACH_MSG_TYPE_INTEGER_32, + .msgt_size = 32, + .msgt_number = 1, + .msgt_inline = TRUE, + .msgt_longform = FALSE, + .msgt_deallocate = FALSE, + .msgt_unused = 0 + }; + OutP->Head.msgh_bits = + MACH_MSGH_BITS(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0); + OutP->Head.msgh_size = sizeof(mig_reply_header_t); + OutP->Head.msgh_remote_port = InP->msgh_local_port; + OutP->Head.msgh_local_port = MACH_PORT_NULL; + OutP->Head.msgh_seqno = 0; + OutP->Head.msgh_id = InP->msgh_id + 100; +#if 0 + if (InP->msgh_id) { + static long _calls; + static struct { long id, count; } _counts[512]; + int i, id; + + id = InP->msgh_id; + for (i = 0; i < 511; i++) { + if (_counts[i].id == 0) { + _counts[i].id = id; + _counts[i].count++; + break; + } + if (_counts[i].id == id) { + _counts[i].count++; + break; + } + } + if (i == 511) { + _counts[i].id = id; + _counts[i].count++; + } + if ((++_calls & 0x7fff) == 0) + for (i = 0; i < 512; i++) { + if (_counts[i].id == 0) + break; + printf("%d: %d\n", + _counts[i].id, _counts[i].count); + } + } +#endif + + OutP->RetCodeType = RetCodeType; + +#undef InP +#undef OutP + } + + /* + * Find the server routine to call, and call it + * to perform the kernel function + */ + { + check_simple_locks(); + if ((routine = mach_server_routine(&request->ikm_header)) != 0 + || (routine = mach_port_server_routine(&request->ikm_header)) != 0 + || (routine = mach_host_server_routine(&request->ikm_header)) != 0 + || (routine = device_server_routine(&request->ikm_header)) != 0 + || (routine = device_pager_server_routine(&request->ikm_header)) != 0 +#if MACH_DEBUG + || (routine = mach_debug_server_routine(&request->ikm_header)) != 0 +#endif /* MACH_DEBUG */ + || (routine = mach4_server_routine(&request->ikm_header)) != 0 + || (routine = gnumach_server_routine(&request->ikm_header)) != 0 + || (routine = experimental_server_routine(&request->ikm_header)) != 0 +#if MACH_MACHINE_ROUTINES + || (routine = MACHINE_SERVER_ROUTINE(&request->ikm_header)) != 0 +#endif /* MACH_MACHINE_ROUTINES */ + ) { + (*routine)(&request->ikm_header, &reply->ikm_header); + kernel_task->messages_received++; + } else { + if (!ipc_kobject_notify(&request->ikm_header, + &reply->ikm_header)) { + ((mig_reply_header_t *) &reply->ikm_header)->RetCode + = MIG_BAD_ID; +#if MACH_IPC_TEST + printf("ipc_kobject_server: bogus kernel message, id=%d\n", + request->ikm_header.msgh_id); +#endif /* MACH_IPC_TEST */ + } else { + kernel_task->messages_received++; + } + } + kernel_task->messages_sent++; + } + check_simple_locks(); + + /* + * Destroy destination. The following code differs from + * ipc_object_destroy in that we release the send-once + * right instead of generating a send-once notification + * (which would bring us here again, creating a loop). + * It also differs in that we only expect send or + * send-once rights, never receive rights. + * + * We set msgh_remote_port to IP_NULL so that the kmsg + * destroy routines don't try to destroy the port twice. + */ + destp = (ipc_port_t *) &request->ikm_header.msgh_remote_port; + switch (MACH_MSGH_BITS_REMOTE(request->ikm_header.msgh_bits)) { + case MACH_MSG_TYPE_PORT_SEND: + ipc_port_release_send(*destp); + break; + + case MACH_MSG_TYPE_PORT_SEND_ONCE: + ipc_port_release_sonce(*destp); + break; + + default: +#if MACH_ASSERT + assert(!"ipc_object_destroy: strange destination rights"); +#else + panic("ipc_object_destroy: strange destination rights"); +#endif + } + *destp = IP_NULL; + + kr = ((mig_reply_header_t *) &reply->ikm_header)->RetCode; + if ((kr == KERN_SUCCESS) || (kr == MIG_NO_REPLY)) { + /* + * The server function is responsible for the contents + * of the message. The reply port right is moved + * to the reply message, and we have deallocated + * the destination port right, so we just need + * to free the kmsg. + */ + + /* like ipc_kmsg_put, but without the copyout */ + + ikm_check_initialized(request, request->ikm_size); + ikm_cache_free(request); + } else { + /* + * The message contents of the request are intact. + * Destroy everything except the reply port right, + * which is needed in the reply message. + */ + + request->ikm_header.msgh_local_port = MACH_PORT_NULL; + ipc_kmsg_destroy(request); + } + + if (kr == MIG_NO_REPLY) { + /* + * The server function will send a reply message + * using the reply port right, which it has saved. + */ + + ikm_free(reply); + return IKM_NULL; + } else if (!IP_VALID((ipc_port_t)reply->ikm_header.msgh_remote_port)) { + /* + * Can't queue the reply message if the destination + * (the reply port) isn't valid. + */ + + ipc_kmsg_destroy(reply); + return IKM_NULL; + } + + return reply; +} + +/* + * Routine: ipc_kobject_set + * Purpose: + * Make a port represent a kernel object of the given type. + * The caller is responsible for handling refs for the + * kernel object, if necessary. + * Conditions: + * Nothing locked. The port must be active. + */ + +void +ipc_kobject_set(ipc_port_t port, ipc_kobject_t kobject, ipc_kobject_type_t type) +{ + ip_lock(port); + assert(ip_active(port)); + port->ip_bits = (port->ip_bits &~ IO_BITS_KOTYPE) | type; + port->ip_kobject = kobject; + ip_unlock(port); +} + +/* + * Routine: ipc_kobject_destroy + * Purpose: + * Release any kernel object resources associated + * with the port, which is being destroyed. + * + * This should only be needed when resources are + * associated with a user's port. In the normal case, + * when the kernel is the receiver, the code calling + * ipc_port_dealloc_kernel should clean up the resources. + * Conditions: + * The port is not locked, but it is dead. + */ + +void +ipc_kobject_destroy( + ipc_port_t port) +{ + switch (ip_kotype(port)) { + case IKOT_PAGER: + vm_object_destroy(port); + break; + + case IKOT_PAGER_TERMINATING: + vm_object_pager_wakeup(port); + break; + + default: +#if MACH_ASSERT + printf("ipc_kobject_destroy: port 0x%p, kobj 0x%zd, type %d\n", + port, port->ip_kobject, ip_kotype(port)); +#endif /* MACH_ASSERT */ + break; + } +} + +/* + * Routine: ipc_kobject_notify + * Purpose: + * Deliver notifications to kobjects that care about them. + */ + +boolean_t +ipc_kobject_notify(mach_msg_header_t *request_header, + mach_msg_header_t *reply_header) +{ + ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port; + + ((mig_reply_header_t *) reply_header)->RetCode = MIG_NO_REPLY; + switch (request_header->msgh_id) { + case MACH_NOTIFY_PORT_DELETED: + case MACH_NOTIFY_MSG_ACCEPTED: + case MACH_NOTIFY_PORT_DESTROYED: + case MACH_NOTIFY_NO_SENDERS: + case MACH_NOTIFY_SEND_ONCE: + case MACH_NOTIFY_DEAD_NAME: + break; + + default: + return FALSE; + } + switch (ip_kotype(port)) { + case IKOT_DEVICE: + return ds_notify(request_header); + + case IKOT_PAGER_PROXY: + return memory_object_proxy_notify(request_header); + + default: + return FALSE; + } +} diff --git a/kern/ipc_kobject.h b/kern/ipc_kobject.h new file mode 100644 index 0000000..606a66a --- /dev/null +++ b/kern/ipc_kobject.h @@ -0,0 +1,123 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: kern/ipc_kobject.h + * Author: Rich Draves + * Date: 1989 + * + * Declarations for letting a port represent a kernel object. + */ + +#ifndef _KERN_IPC_KOBJECT_H_ +#define _KERN_IPC_KOBJECT_H_ + +#include <mach/machine/vm_types.h> +#include <ipc/ipc_types.h> +#include <ipc/ipc_kmsg.h> + +typedef vm_offset_t ipc_kobject_t; + +#define IKO_NULL ((ipc_kobject_t) 0) + +typedef unsigned int ipc_kobject_type_t; + +#define IKOT_NONE 0 +#define IKOT_THREAD 1 +#define IKOT_TASK 2 +#define IKOT_HOST 3 +#define IKOT_HOST_PRIV 4 +#define IKOT_PROCESSOR 5 +#define IKOT_PSET 6 +#define IKOT_PSET_NAME 7 +#define IKOT_PAGER 8 +#define IKOT_PAGING_REQUEST 9 +#define IKOT_DEVICE 10 +#define IKOT_XMM_OBJECT 11 +#define IKOT_XMM_PAGER 12 +#define IKOT_XMM_KERNEL 13 +#define IKOT_XMM_REPLY 14 +#define IKOT_PAGER_TERMINATING 15 +#define IKOT_PAGING_NAME 16 +#define IKOT_HOST_SECURITY 17 +#define IKOT_LEDGER 18 +#define IKOT_MASTER_DEVICE 19 +#define IKOT_ACT 20 +#define IKOT_SUBSYSTEM 21 +#define IKOT_IO_DONE_QUEUE 22 +#define IKOT_SEMAPHORE 23 +#define IKOT_LOCK_SET 24 +#define IKOT_CLOCK 25 +#define IKOT_CLOCK_CTRL 26 +#define IKOT_PAGER_PROXY 27 + /* << new entries here */ +#define IKOT_UNKNOWN 28 /* magic catchall */ +#define IKOT_MAX_TYPE 29 /* # of IKOT_ types */ + /* Please keep ipc/ipc_object.c:ikot_print_array up to date */ + +#define is_ipc_kobject(ikot) (ikot != IKOT_NONE) + +/* + * Define types of kernel objects that use page lists instead + * of entry lists for copyin of out of line memory. + */ + +#define ipc_kobject_vm_page_list(ikot) \ + ((ikot == IKOT_PAGING_REQUEST) || (ikot == IKOT_DEVICE)) + +#define ipc_kobject_vm_page_steal(ikot) (ikot == IKOT_PAGING_REQUEST) + +/* Initialize kernel server dispatch table */ +/* XXX +extern void mig_init(void); +*/ + +/* Dispatch a kernel server function */ +extern ipc_kmsg_t ipc_kobject_server( + ipc_kmsg_t request); + +/* Make a port represent a kernel object of the given type */ +extern void ipc_kobject_set( + ipc_port_t port, + ipc_kobject_t kobject, + ipc_kobject_type_t type); + +/* Release any kernel object resources associated with a port */ +extern void ipc_kobject_destroy( + ipc_port_t port); + +/* Deliver notifications to kobjects that care about them */ +extern boolean_t ipc_kobject_notify ( + mach_msg_header_t *request_header, + mach_msg_header_t *reply_header); + +#define null_conversion(port) (port) + +#endif /* _KERN_IPC_KOBJECT_H_ */ diff --git a/kern/ipc_mig.c b/kern/ipc_mig.c new file mode 100644 index 0000000..d26d2c6 --- /dev/null +++ b/kern/ipc_mig.c @@ -0,0 +1,984 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/boolean.h> +#include <mach/port.h> +#include <mach/message.h> +#include <mach/mig_support.h> +#include <mach/thread_status.h> +#include <machine/locore.h> +#include <machine/copy_user.h> +#include <kern/ast.h> +#include <kern/debug.h> +#include <kern/ipc_tt.h> +#include <kern/syscall_subr.h> +#include <kern/thread.h> +#include <kern/task.h> +#include <kern/ipc_kobject.h> +#include <kern/ipc_tt.h> +#include <kern/ipc_mig.h> +#include <vm/vm_map.h> +#include <vm/vm_user.h> +#include <ipc/port.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_mqueue.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_pset.h> +#include <ipc/ipc_thread.h> +#include <ipc/mach_port.server.h> +#include <device/dev_hdr.h> +#include <device/device_types.h> +#include <device/ds_routines.h> + +/* + * Routine: mach_msg_send_from_kernel + * Purpose: + * Send a message from the kernel. + * + * This is used by the client side of KernelUser interfaces + * to implement SimpleRoutines. Currently, this includes + * device_reply and memory_object messages. + * Conditions: + * Nothing locked. + * Returns: + * MACH_MSG_SUCCESS Sent the message. + * MACH_SEND_INVALID_DATA Bad destination port. + */ + +mach_msg_return_t +mach_msg_send_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size) +{ + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + if (!MACH_PORT_VALID(msg->msgh_remote_port)) + return MACH_SEND_INVALID_DEST; + + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + panic("mach_msg_send_from_kernel"); + + ipc_kmsg_copyin_from_kernel(kmsg); + ipc_mqueue_send_always(kmsg); + + return MACH_MSG_SUCCESS; +} + +mach_msg_return_t +mach_msg_rpc_from_kernel(const mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t reply_size) +{ + panic("mach_msg_rpc_from_kernel"); /*XXX*/ +} + +/* + * Routine: mach_msg_abort_rpc + * Purpose: + * Destroy the thread's ith_rpc_reply port. + * This will interrupt a mach_msg_rpc_from_kernel + * with a MACH_RCV_PORT_DIED return code. + * Conditions: + * Nothing locked. + */ + +void +mach_msg_abort_rpc(ipc_thread_t thread) +{ + ipc_port_t reply = IP_NULL; + + ith_lock(thread); + if (thread->ith_self != IP_NULL) { + reply = thread->ith_rpc_reply; + thread->ith_rpc_reply = IP_NULL; + } + ith_unlock(thread); + + if (reply != IP_NULL) + ipc_port_dealloc_reply(reply); +} + +/* + * Routine: mach_msg + * Purpose: + * Like mach_msg_trap except that message buffers + * live in kernel space. Doesn't handle any options. + * + * This is used by in-kernel server threads to make + * kernel calls, to receive request messages, and + * to send reply messages. + * Conditions: + * Nothing locked. + * Returns: + */ + +mach_msg_return_t +mach_msg( + mach_msg_header_t *msg, + mach_msg_option_t option, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size, + mach_port_name_t rcv_name, + mach_msg_timeout_t time_out, + mach_port_name_t notify) +{ + ipc_space_t space = current_space(); + vm_map_t map = current_map(); + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; + mach_msg_return_t mr; + + if (option & MACH_SEND_MSG) { + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + panic("mach_msg"); + + mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL); + if (mr != MACH_MSG_SUCCESS) { + ikm_free(kmsg); + return mr; + } + + do + mr = ipc_mqueue_send(kmsg, MACH_MSG_OPTION_NONE, + MACH_MSG_TIMEOUT_NONE); + while (mr == MACH_SEND_INTERRUPTED); + assert(mr == MACH_MSG_SUCCESS); + } + + if (option & MACH_RCV_MSG) { + do { + ipc_object_t object; + ipc_mqueue_t mqueue; + + mr = ipc_mqueue_copyin(space, rcv_name, + &mqueue, &object); + if (mr != MACH_MSG_SUCCESS) + return mr; + /* hold ref for object; mqueue is locked */ + + mr = ipc_mqueue_receive(mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, IMQ_NULL_CONTINUE, + &kmsg, &seqno); + /* mqueue is unlocked */ + ipc_object_release(object); + } while (mr == MACH_RCV_INTERRUPTED); + if (mr != MACH_MSG_SUCCESS) + return mr; + + kmsg->ikm_header.msgh_seqno = seqno; + + if (rcv_size < msg_usize(&kmsg->ikm_header)) { + ipc_kmsg_copyout_dest(kmsg, space); + ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + return MACH_RCV_TOO_LARGE; + } + + mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL); + if (mr != MACH_MSG_SUCCESS) { + if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { + ipc_kmsg_put_to_kernel(msg, kmsg, + kmsg->ikm_header.msgh_size); + } else { + ipc_kmsg_copyout_dest(kmsg, space); + ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + } + + return mr; + } + + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + } + + return MACH_MSG_SUCCESS; +} + +/* + * Routine: mig_get_reply_port + * Purpose: + * Called by client side interfaces living in the kernel + * to get a reply port. This port is used for + * mach_msg() calls which are kernel calls. + */ + +mach_port_name_t +mig_get_reply_port(void) +{ + ipc_thread_t self = current_thread(); + + if (self->ith_mig_reply == MACH_PORT_NULL) + self->ith_mig_reply = mach_reply_port(); + + return self->ith_mig_reply; +} + +/* + * Routine: mig_dealloc_reply_port + * Purpose: + * Called by client side interfaces to get rid of a reply port. + * Shouldn't ever be called inside the kernel, because + * kernel calls shouldn't prompt Mig to call it. + */ + +void +mig_dealloc_reply_port( + mach_port_t reply_port) +{ + panic("mig_dealloc_reply_port"); +} + +/* + * Routine: mig_put_reply_port + * Purpose: + * Called by client side interfaces after each RPC to + * let the client recycle the reply port if it wishes. + */ +void +mig_put_reply_port( + mach_port_t reply_port) +{ +} + +/* + * mig_strncpy.c - by Joshua Block + * + * mig_strncpy -- Bounded string copy. Does what the library routine + * strncpy does: Copies the (null terminated) string in src into dest, + * a buffer of length len. Returns the length of the destination + * string excluding the terminating null. + * + * Parameters: + * + * dest - Pointer to destination buffer. + * + * src - Pointer to source string. + * + * len - Length of destination buffer. + */ +vm_size_t +mig_strncpy(char *dest, const char *src, vm_size_t len) +{ + char *dest_ = dest; + int i; + + if (len <= 0) + return 0; + + for (i = 0; i < len; i++) { + if (! (*dest = *src)) + break; + dest++; + src++; + } + + return dest - dest_; +} + +/* Called by MiG to deallocate memory, which in this case happens + * to be kernel memory. */ +void +mig_deallocate(vm_address_t addr, vm_size_t size) +{ + (void) size; + /* We do the same thing as in ipc_kmsg_clean_body. */ + vm_map_copy_discard((vm_map_copy_t) addr); +} + +#define fast_send_right_lookup(name, port, abort) \ +MACRO_BEGIN \ + ipc_space_t space = current_space(); \ + ipc_entry_t entry; \ + \ + is_read_lock(space); \ + assert(space->is_active); \ + \ + entry = ipc_entry_lookup (space, name); \ + if (entry == IE_NULL) { \ + is_read_unlock (space); \ + abort; \ + } \ + \ + if (IE_BITS_TYPE (entry->ie_bits) != MACH_PORT_TYPE_SEND) { \ + is_read_unlock (space); \ + abort; \ + } \ + \ + port = (ipc_port_t) entry->ie_object; \ + assert(port != IP_NULL); \ + \ + ip_lock(port); \ + /* can safely unlock space now that port is locked */ \ + is_read_unlock(space); \ +MACRO_END + +static device_t +port_name_to_device(mach_port_name_t name) +{ + ipc_port_t port; + device_t device; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + /* + * Now map the port object to a device object. + * This is an inline version of dev_port_lookup(). + */ + if (ip_active(port) && (ip_kotype(port) == IKOT_DEVICE)) { + device = (device_t) port->ip_kobject; + device_reference(device); + ip_unlock(port); + return device; + } + + ip_unlock(port); + return DEVICE_NULL; + + /* + * The slow case. The port wasn't easily accessible. + */ + abort: { + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return DEVICE_NULL; + + device = dev_port_lookup(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + return device; + } +} + +static thread_t +port_name_to_thread(mach_port_name_t name) +{ + ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_THREAD)) { + thread_t thread; + + thread = (thread_t) port->ip_kobject; + assert(thread != THREAD_NULL); + + /* thread referencing is a bit complicated, + so don't bother to expand inline */ + thread_reference(thread); + ip_unlock(port); + + return thread; + } + + ip_unlock(port); + return THREAD_NULL; + + abort: { + thread_t thread; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return THREAD_NULL; + + thread = convert_port_to_thread(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return thread; + } +} + +static task_t +port_name_to_task(mach_port_name_t name) +{ + ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + task_t task; + + task = (task_t) port->ip_kobject; + assert(task != TASK_NULL); + + task_lock(task); + /* can safely unlock port now that task is locked */ + ip_unlock(port); + + task->ref_count++; + task_unlock(task); + + return task; + } + + ip_unlock(port); + return TASK_NULL; + + abort: { + task_t task; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return TASK_NULL; + + task = convert_port_to_task(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return task; + } +} + +static vm_map_t +port_name_to_map( + mach_port_name_t name) +{ + ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + vm_map_t map; + + map = ((task_t) port->ip_kobject)->map; + assert(map != VM_MAP_NULL); + + simple_lock(&map->ref_lock); + /* can safely unlock port now that map is locked */ + ip_unlock(port); + + map->ref_count++; + simple_unlock(&map->ref_lock); + + return map; + } + + ip_unlock(port); + return VM_MAP_NULL; + + abort: { + vm_map_t map; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return VM_MAP_NULL; + + map = convert_port_to_map(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return map; + } +} + +static ipc_space_t +port_name_to_space(mach_port_name_t name) +{ + ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + ipc_space_t space; + + space = ((task_t) port->ip_kobject)->itk_space; + assert(space != IS_NULL); + + simple_lock(&space->is_ref_lock_data); + /* can safely unlock port now that space is locked */ + ip_unlock(port); + + space->is_references++; + simple_unlock(&space->is_ref_lock_data); + + return space; + } + + ip_unlock(port); + return IS_NULL; + + abort: { + ipc_space_t space; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return IS_NULL; + + space = convert_port_to_space(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return space; + } +} + +/* + * Things to keep in mind: + * + * The idea here is to duplicate the semantics of the true kernel RPC. + * The destination port/object should be checked first, before anything + * that the user might notice (like ipc_object_copyin). Return + * MACH_SEND_INTERRUPTED if it isn't correct, so that the user stub + * knows to fall back on an RPC. For other return values, it won't + * retry with an RPC. The retry might get a different (incorrect) rc. + * Return values are only set (and should only be set, with copyout) + * on successful calls. + */ + +kern_return_t +syscall_vm_map( + mach_port_name_t target_map, + rpc_vm_offset_t *address, + rpc_vm_size_t size, + rpc_vm_offset_t mask, + boolean_t anywhere, + mach_port_name_t memory_object, + rpc_vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_t map; + ipc_port_t port; + vm_offset_t addr; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + if (MACH_PORT_NAME_VALID(memory_object)) { + result = ipc_object_copyin(current_space(), memory_object, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &port); + if (result != KERN_SUCCESS) { + vm_map_deallocate(map); + return result; + } + } else + port = (ipc_port_t)invalid_name_to_port(memory_object); + + copyin_address(address, &addr); + result = vm_map(map, &addr, size, mask, anywhere, + port, offset, copy, + cur_protection, max_protection, inheritance); + if (result == KERN_SUCCESS) + copyout_address(&addr, address); + if (IP_VALID(port)) + ipc_port_release_send(port); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_vm_allocate( + mach_port_name_t target_map, + rpc_vm_offset_t *address, + rpc_vm_size_t size, + boolean_t anywhere) +{ + vm_map_t map; + vm_offset_t addr; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + copyin_address(address, &addr); + result = vm_allocate(map, &addr, size, anywhere); + if (result == KERN_SUCCESS) + copyout_address(&addr, address); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_vm_deallocate( + mach_port_name_t target_map, + rpc_vm_offset_t start, + rpc_vm_size_t size) +{ + vm_map_t map; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + result = vm_deallocate(map, start, size); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_task_create( + mach_port_name_t parent_task, + boolean_t inherit_memory, + mach_port_name_t *child_task) /* OUT */ +{ + task_t t, c; + ipc_port_t port; + mach_port_name_t name; + kern_return_t result; + + t = port_name_to_task(parent_task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_create(t, inherit_memory, &c); + if (result == KERN_SUCCESS) { + port = (ipc_port_t) convert_task_to_port(c); + /* always returns a name, even for non-success return codes */ + (void) ipc_kmsg_copyout_object(current_space(), + (ipc_object_t) port, + MACH_MSG_TYPE_PORT_SEND, &name); + copyout(&name, child_task, sizeof(mach_port_name_t)); + } + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_terminate(mach_port_name_t task) +{ + task_t t; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_terminate(t); + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_suspend(mach_port_name_t task) +{ + task_t t; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_suspend(t); + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_set_special_port( + mach_port_name_t task, + int which_port, + mach_port_name_t port_name) +{ + task_t t; + ipc_port_t port; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + if (MACH_PORT_NAME_VALID(port_name)) { + result = ipc_object_copyin(current_space(), port_name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &port); + if (result != KERN_SUCCESS) { + task_deallocate(t); + return result; + } + } else + port = (ipc_port_t)invalid_name_to_port(port_name); + + result = task_set_special_port(t, which_port, port); + if ((result != KERN_SUCCESS) && IP_VALID(port)) + ipc_port_release_send(port); + task_deallocate(t); + + return result; +} + +kern_return_t +syscall_mach_port_allocate( + mach_port_name_t task, + mach_port_right_t right, + mach_port_name_t *namep) +{ + ipc_space_t space; + mach_port_name_t name; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_allocate(space, right, &name); + if (kr == KERN_SUCCESS) + { + copyout(&name, namep, sizeof(mach_port_name_t)); + } + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_allocate_name( + mach_port_name_t task, + mach_port_right_t right, + mach_port_name_t name) +{ + ipc_space_t space; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_allocate_name(space, right, name); + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_deallocate( + mach_port_name_t task, + mach_port_name_t name) +{ + ipc_space_t space; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_deallocate(space, name); + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_insert_right( + mach_port_name_t task, + mach_port_name_t name, + mach_port_name_t right, + mach_msg_type_name_t rightType) +{ + ipc_space_t space; + ipc_object_t object; + mach_msg_type_name_t newtype; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + if (!MACH_MSG_TYPE_PORT_ANY(rightType)) { + is_release(space); + return KERN_INVALID_VALUE; + } + + if (MACH_PORT_NAME_VALID(right)) { + kr = ipc_object_copyin(current_space(), right, rightType, + &object); + if (kr != KERN_SUCCESS) { + is_release(space); + return kr; + } + } else + object = (ipc_object_t)invalid_name_to_port(right); + newtype = ipc_object_copyin_type(rightType); + + kr = mach_port_insert_right(space, name, (ipc_port_t) object, newtype); + if ((kr != KERN_SUCCESS) && IO_VALID(object)) + ipc_object_destroy(object, newtype); + is_release(space); + + return kr; +} + +kern_return_t syscall_thread_depress_abort(mach_port_name_t thread) +{ + thread_t t; + kern_return_t result; + + t = port_name_to_thread(thread); + if (t == THREAD_NULL) + return MACH_SEND_INTERRUPTED; + + result = thread_depress_abort(t); + thread_deallocate(t); + + return result; +} + +/* + * Device traps -- these are way experimental. + */ +io_return_t +syscall_device_write_request(mach_port_name_t device_name, + mach_port_name_t reply_name, + dev_mode_t mode, + rpc_recnum_t recnum, + rpc_vm_offset_t data, + rpc_vm_size_t data_count) +{ + device_t dev; + /*ipc_port_t reply_port;*/ + io_return_t res; + + /* + * First try to translate the device name. + * + * If this fails, return KERN_INVALID_CAPABILITY. + * Caller knows that this most likely means that + * device is not local to node and IPC should be used. + * + * If kernel doesn't do device traps, kern_invalid() + * will be called instead of this function which will + * return KERN_INVALID_ARGUMENT. + */ + dev = port_name_to_device(device_name); + if (dev == DEVICE_NULL) + return KERN_INVALID_CAPABILITY; + + /* + * Translate reply port. + */ + /*if (reply_name == MACH_PORT_NULL) + reply_port = IP_NULL; + */ + if (reply_name != MACH_PORT_NULL) { + /* Homey don't play that. */ + device_deallocate(dev); + return KERN_INVALID_RIGHT; + } + + /* note: doesn't take reply_port arg yet. */ + res = ds_device_write_trap(dev, /*reply_port,*/ + mode, recnum, + data, data_count); + + /* + * Give up reference from port_name_to_device. + */ + device_deallocate(dev); + return res; +} + +io_return_t +syscall_device_writev_request(mach_port_name_t device_name, + mach_port_name_t reply_name, + dev_mode_t mode, + rpc_recnum_t recnum, + rpc_io_buf_vec_t *iovec, + rpc_vm_size_t iocount) +{ + device_t dev; + /*ipc_port_t reply_port;*/ + io_return_t res; + + /* + * First try to translate the device name. + * + * If this fails, return KERN_INVALID_CAPABILITY. + * Caller knows that this most likely means that + * device is not local to node and IPC should be used. + * + * If kernel doesn't do device traps, kern_invalid() + * will be called instead of this function which will + * return KERN_INVALID_ARGUMENT. + */ + dev = port_name_to_device(device_name); + if (dev == DEVICE_NULL) + return KERN_INVALID_CAPABILITY; + + /* + * Translate reply port. + */ + /*if (reply_name == MACH_PORT_NULL) + reply_port = IP_NULL; + */ + if (reply_name != MACH_PORT_NULL) { + /* Homey don't play that. */ + device_deallocate(dev); + return KERN_INVALID_RIGHT; + } + + /* note: doesn't take reply_port arg yet. */ + res = ds_device_writev_trap(dev, /*reply_port,*/ + mode, recnum, + iovec, iocount); + + /* + * Give up reference from port_name_to_device. + */ + device_deallocate(dev); + return res; +} diff --git a/kern/ipc_mig.h b/kern/ipc_mig.h new file mode 100644 index 0000000..422e8d8 --- /dev/null +++ b/kern/ipc_mig.h @@ -0,0 +1,143 @@ +/* + * MIG IPC functions + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Barry deFreese. + */ +/* + * MIG IPC functions. + * + */ + +#ifndef _IPC_MIG_H_ +#define _IPC_MIG_H_ + +#include <mach/std_types.h> +#include <device/device_types.h> +#include <ipc/ipc_thread.h> + +/* + * Routine: mach_msg_send_from_kernel + * Purpose: + * Send a message from the kernel. + * + * This is used by the client side of KernelUser interfaces + * to implement SimpleRoutines. Currently, this includes + * device_reply and memory_object messages. + * Conditions: + * Nothing locked. + * Returns: + * MACH_MSG_SUCCESS Sent the message. + * MACH_SEND_INVALID_DATA Bad destination port. + */ +extern mach_msg_return_t mach_msg_send_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size); + +/* + * Routine: mach_msg_abort_rpc + * Purpose: + * Destroy the thread's ith_rpc_reply port. + * This will interrupt a mach_msg_rpc_from_kernel + * with a MACH_RCV_PORT_DIED return code. + * Conditions: + * Nothing locked. + */ +extern void mach_msg_abort_rpc (ipc_thread_t); + +extern mach_msg_return_t mach_msg_rpc_from_kernel( + const mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t reply_size); + +extern kern_return_t syscall_vm_map( + mach_port_name_t target_map, + rpc_vm_offset_t *address, + rpc_vm_size_t size, + rpc_vm_offset_t mask, + boolean_t anywhere, + mach_port_name_t memory_object, + rpc_vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance); + +extern kern_return_t syscall_vm_allocate( + mach_port_name_t target_map, + rpc_vm_offset_t *address, + rpc_vm_size_t size, + boolean_t anywhere); + +extern kern_return_t syscall_vm_deallocate( + mach_port_name_t target_map, + rpc_vm_offset_t start, + rpc_vm_size_t size); + +extern kern_return_t syscall_task_create( + mach_port_name_t parent_task, + boolean_t inherit_memory, + mach_port_name_t *child_task); + +extern kern_return_t syscall_task_terminate(mach_port_name_t task); + +extern kern_return_t syscall_task_suspend(mach_port_name_t task); + +extern kern_return_t syscall_task_set_special_port( + mach_port_name_t task, + int which_port, + mach_port_name_t port_name); + +extern kern_return_t syscall_mach_port_allocate( + mach_port_name_t task, + mach_port_right_t right, + mach_port_name_t *namep); + +extern kern_return_t syscall_mach_port_deallocate( + mach_port_name_t task, + mach_port_name_t name); + +extern kern_return_t syscall_mach_port_insert_right( + mach_port_name_t task, + mach_port_name_t name, + mach_port_name_t right, + mach_msg_type_name_t rightType); + +extern kern_return_t syscall_mach_port_allocate_name( + mach_port_name_t task, + mach_port_right_t right, + mach_port_name_t name); + +extern kern_return_t syscall_thread_depress_abort(mach_port_name_t thread); + +extern io_return_t syscall_device_write_request( + mach_port_name_t device_name, + mach_port_name_t reply_name, + dev_mode_t mode, + rpc_recnum_t recnum, + rpc_vm_offset_t data, + rpc_vm_size_t data_count); + +io_return_t syscall_device_writev_request( + mach_port_name_t device_name, + mach_port_name_t reply_name, + dev_mode_t mode, + rpc_recnum_t recnum, + rpc_io_buf_vec_t *iovec, + rpc_vm_size_t iocount); + +#endif /* _IPC_MIG_H_ */ diff --git a/kern/ipc_sched.c b/kern/ipc_sched.c new file mode 100644 index 0000000..4519c65 --- /dev/null +++ b/kern/ipc_sched.c @@ -0,0 +1,283 @@ +/* + * Mach Operating System + * Copyright (c) 1993, 1992,1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/message.h> +#include <kern/counters.h> +#include "cpu_number.h" +#include <kern/debug.h> +#include <kern/lock.h> +#include <kern/mach_clock.h> +#include <kern/thread.h> +#include <kern/sched_prim.h> +#include <kern/processor.h> +#include <kern/thread_swap.h> +#include <kern/ipc_sched.h> +#include <machine/machspl.h> /* for splsched/splx */ +#include <machine/pmap.h> + + + +/* + * These functions really belong in kern/sched_prim.c. + */ + +/* + * Routine: thread_go + * Purpose: + * Start a thread running. + * Conditions: + * IPC locks may be held. + */ + +void +thread_go( + thread_t thread) +{ + int state; + spl_t s; + + s = splsched(); + thread_lock(thread); + + reset_timeout_check(&thread->timer); + + state = thread->state; + switch (state & TH_SCHED_STATE) { + + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = THREAD_AWAKENED; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state & ~TH_WAIT; + thread->wait_result = THREAD_AWAKENED; + break; + + default: + /* + * Not waiting. + */ + break; + } + + thread_unlock(thread); + splx(s); +} + +/* + * Routine: thread_will_wait + * Purpose: + * Assert that the thread intends to block. + */ + +void +thread_will_wait( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + + assert(thread->wait_result = -1); /* for later assertions */ + thread->state |= TH_WAIT; + + thread_unlock(thread); + splx(s); +} + +/* + * Routine: thread_will_wait_with_timeout + * Purpose: + * Assert that the thread intends to block, + * with a timeout. + */ + +void +thread_will_wait_with_timeout( + thread_t thread, + mach_msg_timeout_t msecs) +{ + natural_t ticks = convert_ipc_timeout_to_ticks(msecs); + spl_t s; + + s = splsched(); + thread_lock(thread); + + assert(thread->wait_result = -1); /* for later assertions */ + thread->state |= TH_WAIT; + + set_timeout(&thread->timer, ticks); + + thread_unlock(thread); + splx(s); +} + +#if MACH_HOST +#define check_processor_set(thread) \ + (current_processor()->processor_set == (thread)->processor_set) +#else /* MACH_HOST */ +#define check_processor_set(thread) TRUE +#endif /* MACH_HOST */ + +#if NCPUS > 1 +#define check_bound_processor(thread) \ + ((thread)->bound_processor == PROCESSOR_NULL || \ + (thread)->bound_processor == current_processor()) +#else /* NCPUS > 1 */ +#define check_bound_processor(thread) TRUE +#endif /* NCPUS > 1 */ + +/* + * Routine: thread_handoff + * Purpose: + * Switch to a new thread (new), leaving the current + * thread (old) blocked. If successful, moves the + * kernel stack from old to new and returns as the + * new thread. An explicit continuation for the old thread + * must be supplied. + * + * NOTE: Although we wakeup new, we don't set new->wait_result. + * Returns: + * TRUE if the handoff happened. + */ + +boolean_t +thread_handoff( + thread_t old, + continuation_t continuation, + thread_t new) +{ + spl_t s; + + assert(current_thread() == old); + + /* + * XXX Dubious things here: + * I don't check the idle_count on the processor set. + * No scheduling priority or policy checks. + * I assume the new thread is interruptible. + */ + + s = splsched(); + thread_lock(new); + + /* + * The first thing we must do is check the state + * of the threads, to ensure we can handoff. + * This check uses current_processor()->processor_set, + * which we can read without locking. + */ + + if ((old->stack_privilege == current_stack()) || + (new->state != (TH_WAIT|TH_SWAPPED)) || + !check_processor_set(new) || + !check_bound_processor(new)) { + thread_unlock(new); + (void) splx(s); + + counter(c_thread_handoff_misses++); + return FALSE; + } + + reset_timeout_check(&new->timer); + + new->state = TH_RUN; + thread_unlock(new); + +#if NCPUS > 1 + new->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + ast_context(new, cpu_number()); + timer_switch(&new->system_timer); + + /* + * stack_handoff is machine-dependent. It does the + * machine-dependent components of a context-switch, like + * changing address spaces. It updates active_thread. + */ + + stack_handoff(old, new); + + /* + * Now we must dispose of the old thread. + * This is like thread_continue, except + * that the old thread isn't waiting yet. + */ + + thread_lock(old); + old->swap_func = continuation; + assert(old->wait_result = -1); /* for later assertions */ + + if (old->state == TH_RUN) { + /* + * This is our fast path. + */ + + old->state = TH_WAIT|TH_SWAPPED; + } + else if (old->state == (TH_RUN|TH_SUSP)) { + /* + * Somebody is trying to suspend the thread. + */ + + old->state = TH_WAIT|TH_SUSP|TH_SWAPPED; + if (old->wake_active) { + /* + * Someone wants to know when the thread + * really stops. + */ + old->wake_active = FALSE; + thread_unlock(old); + thread_wakeup(TH_EV_WAKE_ACTIVE(old)); + goto after_old_thread; + } + } else + panic("thread_handoff"); + + thread_unlock(old); + after_old_thread: + (void) splx(s); + + counter(c_thread_handoff_hits++); + return TRUE; +} diff --git a/kern/ipc_sched.h b/kern/ipc_sched.h new file mode 100644 index 0000000..bdee832 --- /dev/null +++ b/kern/ipc_sched.h @@ -0,0 +1,32 @@ +/* + * Mach Operating System + * Copyright (c) 1992,1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_SCHED_H_ +#define _KERN_IPC_SCHED_H_ + +#include <kern/sched_prim.h> + +#endif /* _KERN_IPC_SCHED_H_ */ diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c new file mode 100644 index 0000000..7c9a0b8 --- /dev/null +++ b/kern/ipc_tt.c @@ -0,0 +1,1113 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: ipc_tt.c + * Purpose: + * Task and thread related IPC functions. + */ + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/mach_param.h> +#include <mach/mach_traps.h> +#include <mach/task_special_ports.h> +#include <mach/thread_special_ports.h> +#include <vm/vm_kern.h> +#include <kern/debug.h> +#include <kern/kalloc.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_kobject.h> +#include <kern/ipc_tt.h> +#include <kern/mach.server.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_table.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_right.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_object.h> + + + +/* + * Routine: ipc_task_init + * Purpose: + * Initialize a task's IPC state. + * + * If non-null, some state will be inherited from the parent. + * The parent must be appropriately initialized. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_init( + task_t task, + task_t parent) +{ + ipc_space_t space; + ipc_port_t kport; + kern_return_t kr; + int i; + + + kr = ipc_space_create(&space); + if (kr != KERN_SUCCESS) + panic("ipc_task_init"); + + + kport = ipc_port_alloc_kernel(); + if (kport == IP_NULL) + panic("ipc_task_init"); + + itk_lock_init(task); + task->itk_self = kport; + task->itk_sself = ipc_port_make_send(kport); + task->itk_space = space; + + if (parent == TASK_NULL) { + task->itk_exception = IP_NULL; + task->itk_bootstrap = IP_NULL; + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + task->itk_registered[i] = IP_NULL; + } else { + itk_lock(parent); + assert(parent->itk_self != IP_NULL); + + /* inherit registered ports */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + task->itk_registered[i] = + ipc_port_copy_send(parent->itk_registered[i]); + + /* inherit exception and bootstrap ports */ + + task->itk_exception = + ipc_port_copy_send(parent->itk_exception); + task->itk_bootstrap = + ipc_port_copy_send(parent->itk_bootstrap); + + itk_unlock(parent); + } +} + +/* + * Routine: ipc_task_enable + * Purpose: + * Enable a task for IPC access. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_enable( + task_t task) +{ + ipc_port_t kport; + + itk_lock(task); + kport = task->itk_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, (ipc_kobject_t) task, IKOT_TASK); + itk_unlock(task); +} + +/* + * Routine: ipc_task_disable + * Purpose: + * Disable IPC access to a task. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_disable( + task_t task) +{ + ipc_port_t kport; + + itk_lock(task); + kport = task->itk_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); + itk_unlock(task); +} + +/* + * Routine: ipc_task_terminate + * Purpose: + * Clean up and destroy a task's IPC state. + * Conditions: + * Nothing locked. The task must be suspended. + * (Or the current thread must be in the task.) + */ + +void +ipc_task_terminate( + task_t task) +{ + ipc_port_t kport; + int i; + + itk_lock(task); + kport = task->itk_self; + + if (kport == IP_NULL) { + /* the task is already terminated (can this happen?) */ + itk_unlock(task); + return; + } + + task->itk_self = IP_NULL; + itk_unlock(task); + + /* release the naked send rights */ + + if (IP_VALID(task->itk_sself)) + ipc_port_release_send(task->itk_sself); + if (IP_VALID(task->itk_exception)) + ipc_port_release_send(task->itk_exception); + if (IP_VALID(task->itk_bootstrap)) + ipc_port_release_send(task->itk_bootstrap); + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + if (IP_VALID(task->itk_registered[i])) + ipc_port_release_send(task->itk_registered[i]); + + /* destroy the space, leaving just a reference for it */ + + ipc_space_destroy(task->itk_space); + + /* destroy the kernel port */ + + ipc_port_dealloc_kernel(kport); +} + +/* + * Routine: ipc_thread_init + * Purpose: + * Initialize a thread's IPC state. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_init(thread_t thread) +{ + ipc_port_t kport; + + kport = ipc_port_alloc_kernel(); + if (kport == IP_NULL) + panic("ipc_thread_init"); + + ipc_thread_links_init(thread); + ipc_kmsg_queue_init(&thread->ith_messages); + + ith_lock_init(thread); + thread->ith_self = kport; + thread->ith_sself = ipc_port_make_send(kport); + thread->ith_exception = IP_NULL; + + thread->ith_mig_reply = MACH_PORT_NULL; + thread->ith_rpc_reply = IP_NULL; +} + +/* + * Routine: ipc_thread_enable + * Purpose: + * Enable a thread for IPC access. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_enable(thread_t thread) +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, (ipc_kobject_t) thread, IKOT_THREAD); + ith_unlock(thread); +} + +/* + * Routine: ipc_thread_disable + * Purpose: + * Disable IPC access to a thread. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_disable(thread_t thread) +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); + ith_unlock(thread); +} + +/* + * Routine: ipc_thread_terminate + * Purpose: + * Clean up and destroy a thread's IPC state. + * Conditions: + * Nothing locked. The thread must be suspended. + * (Or be the current thread.) + */ + +void +ipc_thread_terminate(thread_t thread) +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + + if (kport == IP_NULL) { + /* the thread is already terminated (can this happen?) */ + ith_unlock(thread); + return; + } + + thread->ith_self = IP_NULL; + ith_unlock(thread); + + assert(ipc_kmsg_queue_empty(&thread->ith_messages)); + + /* release the naked send rights */ + + if (IP_VALID(thread->ith_sself)) + ipc_port_release_send(thread->ith_sself); + if (IP_VALID(thread->ith_exception)) + ipc_port_release_send(thread->ith_exception); + + /* destroy the kernel port */ + + ipc_port_dealloc_kernel(kport); +} + +#if 0 +/* + * Routine: retrieve_task_self + * Purpose: + * Return a send right (possibly null/dead) + * for the task's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_self(task) + task_t task; +{ + ipc_port_t port; + + assert(task != TASK_NULL); + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_copy_send(task->itk_sself); + else + port = IP_NULL; + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_self + * Purpose: + * Return a send right (possibly null/dead) + * for the thread's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_self(thread) + thread_t thread; +{ + ipc_port_t port; + + assert(thread != ITH_NULL); + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_copy_send(thread->ith_sself); + else + port = IP_NULL; + ith_unlock(thread); + + return port; +} +#endif /* 0 */ + +/* + * Routine: retrieve_task_self_fast + * Purpose: + * Optimized version of retrieve_task_self, + * that only works for the current task. + * + * Return a send right (possibly null/dead) + * for the task's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_self_fast( + task_t task) +{ + ipc_port_t port; + + assert(task == current_task()); + + itk_lock(task); + assert(task->itk_self != IP_NULL); + + if ((port = task->itk_sself) == task->itk_self) { + /* no interposing */ + + ip_lock(port); + assert(ip_active(port)); + ip_reference(port); + port->ip_srights++; + ip_unlock(port); + } else + port = ipc_port_copy_send(port); + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_self_fast + * Purpose: + * Optimized version of retrieve_thread_self, + * that only works for the current thread. + * + * Return a send right (possibly null/dead) + * for the thread's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_self_fast(thread_t thread) +{ + ipc_port_t port; + + assert(thread == current_thread()); + + ith_lock(thread); + assert(thread->ith_self != IP_NULL); + + if ((port = thread->ith_sself) == thread->ith_self) { + /* no interposing */ + + ip_lock(port); + assert(ip_active(port)); + ip_reference(port); + port->ip_srights++; + ip_unlock(port); + } else + port = ipc_port_copy_send(port); + ith_unlock(thread); + + return port; +} + +#if 0 +/* + * Routine: retrieve_task_exception + * Purpose: + * Return a send right (possibly null/dead) + * for the task's exception port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_exception(task) + task_t task; +{ + ipc_port_t port; + + assert(task != TASK_NULL); + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_copy_send(task->itk_exception); + else + port = IP_NULL; + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_exception + * Purpose: + * Return a send right (possibly null/dead) + * for the thread's exception port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_exception(thread) + thread_t thread; +{ + ipc_port_t port; + + assert(thread != ITH_NULL); + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_copy_send(thread->ith_exception); + else + port = IP_NULL; + ith_unlock(thread); + + return port; +} +#endif /* 0 */ + +/* + * Routine: mach_task_self [mach trap] + * Purpose: + * Give the caller send rights for his own task port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_name_t +mach_task_self(void) +{ + task_t task = current_task(); + ipc_port_t sright; + + sright = retrieve_task_self_fast(task); + return ipc_port_copyout_send(sright, task->itk_space); +} + +/* + * Routine: mach_thread_self [mach trap] + * Purpose: + * Give the caller send rights for his own thread port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_name_t +mach_thread_self(void) +{ + thread_t thread = current_thread(); + task_t task = thread->task; + ipc_port_t sright; + + sright = retrieve_thread_self_fast(thread); + return ipc_port_copyout_send(sright, task->itk_space); +} + +/* + * Routine: mach_reply_port [mach trap] + * Purpose: + * Allocate a port for the caller. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_name_t +mach_reply_port(void) +{ + ipc_port_t port; + mach_port_name_t name; + kern_return_t kr; + + kr = ipc_port_alloc(current_task()->itk_space, &name, &port); + if (kr == KERN_SUCCESS) + ip_unlock(port); + else + name = MACH_PORT_NULL; + + return name; +} + +/* + * Routine: task_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the task's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_FAILURE The task/space is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +task_get_special_port( + task_t task, + int which, + ipc_port_t *portp) +{ + ipc_port_t *whichp; + ipc_port_t port; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case TASK_KERNEL_PORT: + whichp = &task->itk_sself; + break; + + case TASK_EXCEPTION_PORT: + whichp = &task->itk_exception; + break; + + case TASK_BOOTSTRAP_PORT: + whichp = &task->itk_bootstrap; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + itk_unlock(task); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: task_set_special_port [kernel call] + * Purpose: + * Changes one of the task's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_FAILURE The task/space is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +task_set_special_port( + task_t task, + int which, + const ipc_port_t port) +{ + ipc_port_t *whichp; + ipc_port_t old; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case TASK_KERNEL_PORT: + whichp = &task->itk_sself; + break; + + case TASK_EXCEPTION_PORT: + whichp = &task->itk_exception; + break; + + case TASK_BOOTSTRAP_PORT: + whichp = &task->itk_bootstrap; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + itk_unlock(task); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * Routine: thread_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the thread's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_get_special_port( + thread_t thread, + int which, + ipc_port_t *portp) +{ + ipc_port_t *whichp; + ipc_port_t port; + + if (thread == ITH_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &thread->ith_exception; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + ith_lock(thread); + if (thread->ith_self == IP_NULL) { + ith_unlock(thread); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + ith_unlock(thread); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: thread_set_special_port [kernel call] + * Purpose: + * Changes one of the thread's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_set_special_port( + thread_t thread, + int which, + ipc_port_t port) +{ + ipc_port_t *whichp; + ipc_port_t old; + + if (thread == ITH_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &thread->ith_exception; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + ith_lock(thread); + if (thread->ith_self == IP_NULL) { + ith_unlock(thread); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + ith_unlock(thread); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * Routine: mach_ports_register [kernel call] + * Purpose: + * Stash a handful of port send rights in the task. + * Child tasks will inherit these rights, but they + * must use mach_ports_lookup to acquire them. + * + * The rights are supplied in a (wired) kalloc'd segment. + * Rights which aren't supplied are assumed to be null. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied rights and memory. + * Returns: + * KERN_SUCCESS Stashed the port rights. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_INVALID_ARGUMENT The task is dead. + * KERN_INVALID_ARGUMENT Too many port rights supplied. + */ + +kern_return_t +mach_ports_register( + task_t task, + mach_port_array_t memory, + mach_msg_type_number_t portsCnt) +{ + ipc_port_t ports[TASK_PORT_REGISTER_MAX]; + unsigned i; + + if ((task == TASK_NULL) || + (portsCnt > TASK_PORT_REGISTER_MAX)) + return KERN_INVALID_ARGUMENT; + + /* + * Pad the port rights with nulls. + */ + + for (i = 0; i < portsCnt; i++) + ports[i] = (ipc_port_t)memory[i]; + for (; i < TASK_PORT_REGISTER_MAX; i++) + ports[i] = IP_NULL; + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_INVALID_ARGUMENT; + } + + /* + * Replace the old send rights with the new. + * Release the old rights after unlocking. + */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) { + ipc_port_t old; + + old = task->itk_registered[i]; + task->itk_registered[i] = ports[i]; + ports[i] = old; + } + + itk_unlock(task); + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + if (IP_VALID(ports[i])) + ipc_port_release_send(ports[i]); + + /* + * Now that the operation is known to be successful, + * we can free the memory. + */ + + if (portsCnt != 0) + kfree((vm_offset_t) memory, + (vm_size_t) (portsCnt * sizeof(mach_port_t))); + + return KERN_SUCCESS; +} + +/* + * Routine: mach_ports_lookup [kernel call] + * Purpose: + * Retrieves (clones) the stashed port send rights. + * Conditions: + * Nothing locked. If successful, the caller gets + * rights and memory. + * Returns: + * KERN_SUCCESS Retrieved the send rights. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_INVALID_ARGUMENT The task is dead. + * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. + */ + +kern_return_t +mach_ports_lookup( + task_t task, + mach_port_t **portsp, + mach_msg_type_number_t *portsCnt) +{ + vm_offset_t memory; + vm_size_t size; + ipc_port_t *ports; + int i; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + size = (vm_size_t) (TASK_PORT_REGISTER_MAX * sizeof(ipc_port_t)); + + memory = kalloc(size); + if (memory == 0) + return KERN_RESOURCE_SHORTAGE; + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + + kfree(memory, size); + return KERN_INVALID_ARGUMENT; + } + + ports = (ipc_port_t *) memory; + + /* + * Clone port rights. Because kalloc'd memory + * is wired, we won't fault while holding the task lock. + */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + ports[i] = ipc_port_copy_send(task->itk_registered[i]); + + itk_unlock(task); + + *portsp = (mach_port_t *)ports; + *portsCnt = TASK_PORT_REGISTER_MAX; + return KERN_SUCCESS; +} + +/* + * Routine: convert_port_to_task + * Purpose: + * Convert from a port to a task. + * Doesn't consume the port ref; produces a task ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +task_t +convert_port_to_task( + ipc_port_t port) +{ + task_t task = TASK_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + task = (task_t) port->ip_kobject; + task_reference(task); + } + ip_unlock(port); + } + + return task; +} + +/* + * Routine: convert_port_to_space + * Purpose: + * Convert from a port to a space. + * Doesn't consume the port ref; produces a space ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +ipc_space_t +convert_port_to_space( + ipc_port_t port) +{ + ipc_space_t space = IS_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + space = ((task_t) port->ip_kobject)->itk_space; + is_reference(space); + } + ip_unlock(port); + } + + return space; +} + +/* + * Routine: convert_port_to_map + * Purpose: + * Convert from a port to a map. + * Doesn't consume the port ref; produces a map ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +vm_map_t +convert_port_to_map(ipc_port_t port) +{ + vm_map_t map = VM_MAP_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + map = ((task_t) port->ip_kobject)->map; + vm_map_reference(map); + } + ip_unlock(port); + } + + return map; +} + +/* + * Routine: convert_port_to_thread + * Purpose: + * Convert from a port to a thread. + * Doesn't consume the port ref; produces a thread ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +thread_t +convert_port_to_thread(ipc_port_t port) +{ + thread_t thread = THREAD_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_THREAD)) { + thread = (thread_t) port->ip_kobject; + thread_reference(thread); + } + ip_unlock(port); + } + + return thread; +} + +/* + * Routine: convert_task_to_port + * Purpose: + * Convert from a task to a port. + * Consumes a task ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_task_to_port(task_t task) +{ + ipc_port_t port; + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_make_send(task->itk_self); + else + port = IP_NULL; + itk_unlock(task); + + task_deallocate(task); + return port; +} + +/* + * Routine: convert_thread_to_port + * Purpose: + * Convert from a thread to a port. + * Consumes a thread ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_thread_to_port(thread_t thread) +{ + ipc_port_t port; + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_make_send(thread->ith_self); + else + port = IP_NULL; + ith_unlock(thread); + + thread_deallocate(thread); + return port; +} + +/* + * Routine: space_deallocate + * Purpose: + * Deallocate a space ref produced by convert_port_to_space. + * Conditions: + * Nothing locked. + */ + +void +space_deallocate(ipc_space_t space) +{ + if (space != IS_NULL) + is_release(space); +} diff --git a/kern/ipc_tt.h b/kern/ipc_tt.h new file mode 100644 index 0000000..5c66738 --- /dev/null +++ b/kern/ipc_tt.h @@ -0,0 +1,92 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_TT_H_ +#define _KERN_IPC_TT_H_ + +#include <mach/boolean.h> +#include <mach/mach_types.h> +#include <mach/port.h> + +extern void ipc_task_init(task_t, task_t); +extern void ipc_task_enable(task_t); +extern void ipc_task_disable(task_t); +extern void ipc_task_terminate(task_t); + +extern void ipc_thread_init(thread_t); +extern void ipc_thread_enable(thread_t); +extern void ipc_thread_disable(thread_t); +extern void ipc_thread_terminate(thread_t); + +extern struct ipc_port * +retrieve_task_self(task_t); + +extern struct ipc_port * +retrieve_task_self_fast(task_t); + +extern struct ipc_port * +retrieve_thread_self(thread_t); + +extern struct ipc_port * +retrieve_thread_self_fast(thread_t); + +extern struct ipc_port * +retrieve_task_exception(task_t); + +extern struct ipc_port * +retrieve_thread_exception(thread_t); + +extern struct task * +convert_port_to_task(struct ipc_port *); + +extern struct ipc_port * +convert_task_to_port(task_t); + +extern void +task_deallocate(task_t); + +extern struct thread * +convert_port_to_thread(struct ipc_port *); + +extern struct ipc_port * +convert_thread_to_port(thread_t); + +extern void +thread_deallocate(thread_t); + +extern struct vm_map * +convert_port_to_map(struct ipc_port *); + +extern struct ipc_space * +convert_port_to_space(struct ipc_port *); + +extern void +space_deallocate(ipc_space_t); + +mach_port_name_t +mach_reply_port (void); + +#endif /* _KERN_IPC_TT_H_ */ diff --git a/kern/kalloc.h b/kern/kalloc.h new file mode 100644 index 0000000..004e3a6 --- /dev/null +++ b/kern/kalloc.h @@ -0,0 +1,38 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_KALLOC_H_ +#define _KERN_KALLOC_H_ + +#include <mach/machine/vm_types.h> +#include <vm/vm_types.h> + +extern vm_offset_t kalloc (vm_size_t size); +extern void kfree (vm_offset_t data, vm_size_t size); + +extern void kalloc_init (void); + +#endif /* _KERN_KALLOC_H_ */ diff --git a/kern/kern_types.h b/kern/kern_types.h new file mode 100644 index 0000000..f715cb1 --- /dev/null +++ b/kern/kern_types.h @@ -0,0 +1,70 @@ +/* + * Mach Operating System + * Copyright (c) 1992 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_KERN_TYPES_H_ +#define _KERN_KERN_TYPES_H_ + +#include <mach/port.h> /* for mach_port_t */ + +/* + * Common kernel type declarations. + * These are handles to opaque data structures defined elsewhere. + * + * These types are recursively included in each other`s definitions. + * This file exists to export the common declarations to each + * of the definitions, and to other files that need only the + * type declarations. + */ + +/* + * Task structure, from kern/task.h + */ +typedef struct task * task_t; +#define TASK_NULL ((task_t) 0) + +typedef mach_port_t * task_array_t; /* should be task_t * */ + +/* + * Thread structure, from kern/thread.h + */ +typedef struct thread * thread_t; +#define THREAD_NULL ((thread_t) 0) + +typedef mach_port_t * thread_array_t; /* should be thread_t * */ + +/* + * Processor structure, from kern/processor.h + */ +typedef struct processor * processor_t; +#define PROCESSOR_NULL ((processor_t) 0) + +/* + * Processor set structure, from kern/processor.h + */ +typedef struct processor_set * processor_set_t; +#define PROCESSOR_SET_NULL ((processor_set_t) 0) + +#endif /* _KERN_KERN_TYPES_H_ */ diff --git a/kern/kmutex.c b/kern/kmutex.c new file mode 100644 index 0000000..5926d1d --- /dev/null +++ b/kern/kmutex.c @@ -0,0 +1,76 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + Contributed by Agustina Arzille <avarzille@riseup.net>, 2017. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either + version 2 of the license, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <kern/kmutex.h> +#include <kern/atomic.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> + +void kmutex_init (struct kmutex *mtxp) +{ + mtxp->state = KMUTEX_AVAIL; + simple_lock_init (&mtxp->lock); +} + +kern_return_t kmutex_lock (struct kmutex *mtxp, boolean_t interruptible) +{ + check_simple_locks (); + + if (atomic_cas_acq (&mtxp->state, KMUTEX_AVAIL, KMUTEX_LOCKED)) + /* Unowned mutex - We're done. */ + return (KERN_SUCCESS); + + /* The mutex is locked. We may have to sleep. */ + simple_lock (&mtxp->lock); + if (atomic_swap_acq (&mtxp->state, KMUTEX_CONTENDED) == KMUTEX_AVAIL) + { + /* The mutex was released in-between. */ + simple_unlock (&mtxp->lock); + return (KERN_SUCCESS); + } + + /* Sleep and check the result value of the waiting, in order to + * inform our caller if we were interrupted or not. Note that + * we don't need to set again the mutex state. The owner will + * handle that in every case. */ + thread_sleep ((event_t)mtxp, (simple_lock_t)&mtxp->lock, interruptible); + return (current_thread()->wait_result == THREAD_AWAKENED ? + KERN_SUCCESS : KERN_INTERRUPTED); +} + +kern_return_t kmutex_trylock (struct kmutex *mtxp) +{ + return (atomic_cas_acq (&mtxp->state, KMUTEX_AVAIL, KMUTEX_LOCKED) ? + KERN_SUCCESS : KERN_FAILURE); +} + +void kmutex_unlock (struct kmutex *mtxp) +{ + if (atomic_cas_rel (&mtxp->state, KMUTEX_LOCKED, KMUTEX_AVAIL)) + /* No waiters - We're done. */ + return; + + simple_lock (&mtxp->lock); + + if (!thread_wakeup_one ((event_t)mtxp)) + /* Any threads that were waiting on this mutex were + * interrupted and left - Reset the mutex state. */ + mtxp->state = KMUTEX_AVAIL; + + simple_unlock (&mtxp->lock); +} diff --git a/kern/kmutex.h b/kern/kmutex.h new file mode 100644 index 0000000..2981515 --- /dev/null +++ b/kern/kmutex.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + Contributed by Agustina Arzille <avarzille@riseup.net>, 2017. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either + version 2 of the license, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this program; if not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _KERN_KMUTEX_H_ +#define _KERN_KMUTEX_H_ 1 + +#include <kern/lock.h> +#include <mach/kern_return.h> + +struct kmutex +{ + unsigned int state; + decl_simple_lock_data (, lock) +}; + +/* Possible values for the mutex state. */ +#define KMUTEX_AVAIL 0 +#define KMUTEX_LOCKED 1 +#define KMUTEX_CONTENDED 2 + +/* Initialize mutex in *MTXP. */ +extern void kmutex_init (struct kmutex *mtxp); + +/* Acquire lock MTXP. If INTERRUPTIBLE is true, the sleep may be + * prematurely terminated, in which case the function returns + * KERN_INTERRUPTED. Otherwise, KERN_SUCCESS is returned. */ +extern kern_return_t kmutex_lock (struct kmutex *mtxp, + boolean_t interruptible); + +/* Try to acquire the lock MTXP without sleeping. + * Returns KERN_SUCCESS if successful, KERN_FAILURE otherwise. */ +extern kern_return_t kmutex_trylock (struct kmutex *mtxp); + +/* Unlock the mutex MTXP. */ +extern void kmutex_unlock (struct kmutex *mtxp); + +#endif diff --git a/kern/list.h b/kern/list.h new file mode 100644 index 0000000..be92762 --- /dev/null +++ b/kern/list.h @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2009, 2010 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Simple doubly-linked list. + */ + +#ifndef _KERN_LIST_H +#define _KERN_LIST_H + +#include <stddef.h> +#include <sys/types.h> +#include <kern/macros.h> + +/* + * Structure used as both head and node. + * + * This implementation relies on using the same type for both heads and nodes. + * + * It is recommended to encode the use of struct list variables in their names, + * e.g. struct list free_list or struct list free_objects is a good hint for a + * list of free objects. A declaration like struct list free_node clearly + * indicates it is used as part of a node in the free list. + */ +struct list { + struct list *prev; + struct list *next; +}; + +/* + * Static list initializer. + */ +#define LIST_INITIALIZER(list) { &(list), &(list) } + +/* + * Initialize a list. + */ +static inline void list_init(struct list *list) +{ + list->prev = list; + list->next = list; +} + +/* + * Initialize a list node. + * + * An entry is in no list when its node members point to NULL. + */ +static inline void list_node_init(struct list *node) +{ + node->prev = NULL; + node->next = NULL; +} + +/* + * Return true if node is in no list. + */ +static inline int list_node_unlinked(const struct list *node) +{ + return node->prev == NULL; +} + +/* + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define list_entry(node, type, member) structof(node, type, member) + +/* + * Return the first node of a list. + */ +static inline struct list * list_first(const struct list *list) +{ + return list->next; +} + +/* + * Return the last node of a list. + */ +static inline struct list * list_last(const struct list *list) +{ + return list->prev; +} + +/* + * Return the node next to the given node. + */ +static inline struct list * list_next(const struct list *node) +{ + return node->next; +} + +/* + * Return the node previous to the given node. + */ +static inline struct list * list_prev(const struct list *node) +{ + return node->prev; +} + +/* + * Get the first entry of a list. + */ +#define list_first_entry(list, type, member) \ + list_entry(list_first(list), type, member) + +/* + * Get the last entry of a list. + */ +#define list_last_entry(list, type, member) \ + list_entry(list_last(list), type, member) + +/* + * Return true if node is after the last or before the first node of the list. + */ +static inline int list_end(const struct list *list, const struct list *node) +{ + return list == node; +} + +/* + * Return true if list is empty. + */ +static inline int list_empty(const struct list *list) +{ + return list == list->next; +} + +/* + * Return true if list contains exactly one node. + */ +static inline int list_singular(const struct list *list) +{ + return (list != list->next) && (list->next == list->prev); +} + +/* + * Split list2 by moving its nodes up to (but not including) the given + * node into list1 (which can be in a stale state). + * + * If list2 is empty, or node is list2 or list2->next, nothing is done. + */ +static inline void list_split(struct list *list1, struct list *list2, + struct list *node) +{ + if (list_empty(list2) || (list2->next == node) || list_end(list2, node)) + return; + + list1->next = list2->next; + list1->next->prev = list1; + + list1->prev = node->prev; + node->prev->next = list1; + + list2->next = node; + node->prev = list2; +} + +/* + * Append the nodes of list2 at the end of list1. + * + * After completion, list2 is stale. + */ +static inline void list_concat(struct list *list1, const struct list *list2) +{ + struct list *last1, *first2, *last2; + + if (list_empty(list2)) + return; + + last1 = list1->prev; + first2 = list2->next; + last2 = list2->prev; + + last1->next = first2; + first2->prev = last1; + + last2->next = list1; + list1->prev = last2; +} + +/* + * Set the new head of a list. + * + * This function is an optimized version of : + * list_init(&new_list); + * list_concat(&new_list, &old_list); + * + * After completion, old_head is stale. + */ +static inline void list_set_head(struct list *new_head, + const struct list *old_head) +{ + if (list_empty(old_head)) { + list_init(new_head); + return; + } + + *new_head = *old_head; + new_head->next->prev = new_head; + new_head->prev->next = new_head; +} + +/* + * Add a node between two nodes. + */ +static inline void list_add(struct list *prev, struct list *next, + struct list *node) +{ + next->prev = node; + node->next = next; + + prev->next = node; + node->prev = prev; +} + +/* + * Insert a node at the head of a list. + */ +static inline void list_insert_head(struct list *list, struct list *node) +{ + list_add(list, list->next, node); +} + +/* + * Insert a node at the tail of a list. + */ +static inline void list_insert_tail(struct list *list, struct list *node) +{ + list_add(list->prev, list, node); +} + +/* + * Insert a node before another node. + */ +static inline void list_insert_before(struct list *next, struct list *node) +{ + list_add(next->prev, next, node); +} + +/* + * Insert a node after another node. + */ +static inline void list_insert_after(struct list *prev, struct list *node) +{ + list_add(prev, prev->next, node); +} + +/* + * Remove a node from a list. + * + * After completion, the node is stale. + */ +static inline void list_remove(struct list *node) +{ + node->prev->next = node->next; + node->next->prev = node->prev; +} + +/* + * Forge a loop to process all nodes of a list. + * + * The node must not be altered during the loop. + */ +#define list_for_each(list, node) \ +for (node = list_first(list); \ + !list_end(list, node); \ + node = list_next(node)) + +/* + * Forge a loop to process all nodes of a list. + */ +#define list_for_each_safe(list, node, tmp) \ +for (node = list_first(list), tmp = list_next(node); \ + !list_end(list, node); \ + node = tmp, tmp = list_next(node)) + +/* + * Version of list_for_each() that processes nodes backward. + */ +#define list_for_each_reverse(list, node) \ +for (node = list_last(list); \ + !list_end(list, node); \ + node = list_prev(node)) + +/* + * Version of list_for_each_safe() that processes nodes backward. + */ +#define list_for_each_reverse_safe(list, node, tmp) \ +for (node = list_last(list), tmp = list_prev(node); \ + !list_end(list, node); \ + node = tmp, tmp = list_prev(node)) + +/* + * Forge a loop to process all entries of a list. + * + * The entry node must not be altered during the loop. + */ +#define list_for_each_entry(list, entry, member) \ +for (entry = list_entry(list_first(list), typeof(*entry), member); \ + !list_end(list, &entry->member); \ + entry = list_entry(list_next(&entry->member), typeof(*entry), \ + member)) + +/* + * Forge a loop to process all entries of a list. + */ +#define list_for_each_entry_safe(list, entry, tmp, member) \ +for (entry = list_entry(list_first(list), typeof(*entry), member), \ + tmp = list_entry(list_next(&entry->member), typeof(*entry), \ + member); \ + !list_end(list, &entry->member); \ + entry = tmp, tmp = list_entry(list_next(&entry->member), \ + typeof(*entry), member)) + +/* + * Version of list_for_each_entry() that processes entries backward. + */ +#define list_for_each_entry_reverse(list, entry, member) \ +for (entry = list_entry(list_last(list), typeof(*entry), member); \ + !list_end(list, &entry->member); \ + entry = list_entry(list_prev(&entry->member), typeof(*entry), \ + member)) + +/* + * Version of list_for_each_entry_safe() that processes entries backward. + */ +#define list_for_each_entry_reverse_safe(list, entry, tmp, member) \ +for (entry = list_entry(list_last(list), typeof(*entry), member), \ + tmp = list_entry(list_prev(&entry->member), typeof(*entry), \ + member); \ + !list_end(list, &entry->member); \ + entry = tmp, tmp = list_entry(list_prev(&entry->member), \ + typeof(*entry), member)) + +#endif /* _KERN_LIST_H */ diff --git a/kern/lock.c b/kern/lock.c new file mode 100644 index 0000000..36b6d20 --- /dev/null +++ b/kern/lock.c @@ -0,0 +1,689 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives implementation + */ + +#include <string.h> + +#include <machine/smp.h> + +#include <kern/debug.h> +#include <kern/lock.h> +#include <kern/thread.h> +#include <kern/sched_prim.h> +#if MACH_KDB +#include <machine/db_machdep.h> +#include <ddb/db_output.h> +#include <ddb/db_sym.h> +#endif + + +#if NCPUS > 1 + +/* + * Module: lock + * Function: + * Provide reader/writer sychronization. + * Implementation: + * Simple interlock on a bit. Readers first interlock, + * increment the reader count, then let go. Writers hold + * the interlock (thus preventing further readers), and + * wait for already-accepted readers to go away. + */ + +/* + * The simple-lock routines are the primitives out of which + * the lock package is built. The implementation is left + * to the machine-dependent code. + */ + +#ifdef notdef +/* + * A sample implementation of simple locks. + * assumes: + * boolean_t test_and_set(boolean_t *) + * indivisibly sets the boolean to TRUE + * and returns its old value + * and that setting a boolean to FALSE is indivisible. + */ +/* + * simple_lock_init initializes a simple lock. A simple lock + * may only be used for exclusive locks. + */ + +void simple_lock_init(simple_lock_t l) +{ + *(boolean_t *)l = FALSE; +} + +void simple_lock(simple_lock_t l) +{ + while (test_and_set((boolean_t *)l)) + cpu_pause(); +} + +void simple_unlock(simple_lock_t l) +{ + *(boolean_t *)l = FALSE; +} + +boolean_t simple_lock_try(simple_lock_t l) +{ + return (!test_and_set((boolean_t *)l)); +} +#endif /* notdef */ +#endif /* NCPUS > 1 */ + +#if NCPUS > 1 +static int lock_wait_time = 100; +#else /* NCPUS > 1 */ + + /* + * It is silly to spin on a uni-processor as if we + * thought something magical would happen to the + * want_write bit while we are executing. + */ +static int lock_wait_time = 0; +#endif /* NCPUS > 1 */ + +#if MACH_SLOCKS && NCPUS == 1 +/* + * This code does not protect simple_locks_taken and simple_locks_info. + * It works despite the fact that interrupt code does use simple locks. + * This is because interrupts use locks in a stack-like manner. + * Each interrupt releases all the locks it acquires, so the data + * structures end up in the same state after the interrupt as before. + * The only precaution necessary is that simple_locks_taken be + * incremented first and decremented last, so that interrupt handlers + * don't over-write active slots in simple_locks_info. + */ + +unsigned int simple_locks_taken = 0; + +#define NSLINFO 1000 /* maximum number of locks held */ + +struct simple_locks_info { + simple_lock_t l; + const char *expr; + const char *loc; +} simple_locks_info[NSLINFO]; + +int do_check_simple_locks = 1; + +void check_simple_locks(void) +{ + assert(! do_check_simple_locks || simple_locks_taken == 0); +} + +void check_simple_locks_enable(void) +{ + do_check_simple_locks = 1; +} + +void check_simple_locks_disable(void) +{ + do_check_simple_locks = 0; +} + +/* Need simple lock sanity checking code if simple locks are being + compiled in, and we are compiling for a uniprocessor. */ + +void simple_lock_init( + simple_lock_t l) +{ + l->lock_data = 0; +} + +void _simple_lock( + simple_lock_t l, + const char *expression, + const char *location) +{ + struct simple_locks_info *info; + + assert(l->lock_data == 0); + + l->lock_data = 1; + + info = &simple_locks_info[simple_locks_taken++]; + barrier(); + info->l = l; + info->expr = expression; + info->loc = location; +} + +boolean_t _simple_lock_try( + simple_lock_t l, + const char *expression, + const char *location) +{ + struct simple_locks_info *info; + + if (l->lock_data != 0) + return FALSE; + + l->lock_data = 1; + + info = &simple_locks_info[simple_locks_taken++]; + barrier(); + info->l = l; + info->expr = expression; + info->loc = location; + + return TRUE; +} + +void _simple_unlock( + simple_lock_t l) +{ + assert(l->lock_data != 0); + + l->lock_data = 0; + + if (simple_locks_info[simple_locks_taken-1].l != l) { + unsigned int i = simple_locks_taken; + + /* out-of-order unlocking */ + + do + if (i == 0) + panic("simple_unlock"); + while (simple_locks_info[--i].l != l); + + simple_locks_info[i] = simple_locks_info[simple_locks_taken-1]; + } + barrier(); + simple_locks_taken--; + simple_locks_info[simple_locks_taken] = (struct simple_locks_info) {0}; +} + +#endif /* MACH_SLOCKS && NCPUS == 1 */ + +/* + * Routine: lock_init + * Function: + * Initialize a lock; required before use. + * Note that clients declare the "struct lock" + * variables and then initialize them, rather + * than getting a new one from this module. + */ +void lock_init( + lock_t l, + boolean_t can_sleep) +{ + memset(l, 0, sizeof(lock_data_t)); + simple_lock_init(&l->interlock); + l->want_write = FALSE; + l->want_upgrade = FALSE; + l->read_count = 0; + l->can_sleep = can_sleep; + l->thread = (struct thread *)-1; /* XXX */ + l->recursion_depth = 0; +} + +void lock_sleepable( + lock_t l, + boolean_t can_sleep) +{ + simple_lock(&l->interlock); + l->can_sleep = can_sleep; + simple_unlock(&l->interlock); +} + + +/* + * Sleep locks. These use the same data structure and algorithm + * as the spin locks, but the process sleeps while it is waiting + * for the lock. These work on uniprocessor systems. + */ + +void lock_write( + lock_t l) +{ + int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return; + } + + /* + * Try to acquire the want_write bit. + */ + while (l->want_write) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && l->want_write) + cpu_pause(); + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->want_write) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + l->want_write = TRUE; + + /* Wait for readers (and upgrades) to finish */ + + while ((l->read_count != 0) || l->want_upgrade) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && (l->read_count != 0 || + l->want_upgrade)) + cpu_pause(); + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } +#if MACH_LDEBUG + l->writer = current_thread(); +#endif /* MACH_LDEBUG */ + simple_unlock(&l->interlock); +} + +void lock_done( + lock_t l) +{ + simple_lock(&l->interlock); + + if (l->read_count != 0) + l->read_count--; + else + if (l->recursion_depth != 0) + l->recursion_depth--; + else + if (l->want_upgrade) { + l->want_upgrade = FALSE; +#if MACH_LDEBUG + assert(l->writer == current_thread()); + l->writer = THREAD_NULL; +#endif /* MACH_LDEBUG */ + } else { + l->want_write = FALSE; +#if MACH_LDEBUG + assert(l->writer == current_thread()); + l->writer = THREAD_NULL; +#endif /* MACH_LDEBUG */ + } + + /* + * There is no reason to wakeup a waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + thread_wakeup(l); + } + + simple_unlock(&l->interlock); +} + +void lock_read( + lock_t l) +{ + int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->read_count++; + simple_unlock(&l->interlock); + return; + } + + while (l->want_write || l->want_upgrade) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && (l->want_write || l->want_upgrade)) + cpu_pause(); + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->want_write || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + + l->read_count++; + simple_unlock(&l->interlock); +} + +/* + * Routine: lock_read_to_write + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ +boolean_t lock_read_to_write( + lock_t l) +{ + int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + l->read_count--; + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return(FALSE); + } + + if (l->want_upgrade) { + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + thread_wakeup(l); + } + + simple_unlock(&l->interlock); + return TRUE; + } + + l->want_upgrade = TRUE; + + while (l->read_count != 0) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && l->read_count != 0) + cpu_pause(); + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->read_count != 0) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + +#if MACH_LDEBUG + l->writer = current_thread(); +#endif /* MACH_LDEBUG */ + simple_unlock(&l->interlock); + return FALSE; +} + +void lock_write_to_read( + lock_t l) +{ + simple_lock(&l->interlock); +#if MACH_LDEBUG + assert(l->writer == current_thread()); +#endif /* MACH_LDEBUG */ + + l->read_count++; + if (l->recursion_depth != 0) + l->recursion_depth--; + else + if (l->want_upgrade) + l->want_upgrade = FALSE; + else + l->want_write = FALSE; + + if (l->waiting) { + l->waiting = FALSE; + thread_wakeup(l); + } + +#if MACH_LDEBUG + assert(l->writer == current_thread()); + l->writer = THREAD_NULL; +#endif /* MACH_LDEBUG */ + simple_unlock(&l->interlock); +} + + +/* + * Routine: lock_try_write + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t lock_try_write( + lock_t l) +{ + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_write || l->want_upgrade || l->read_count) { + /* + * Can't get lock. + */ + simple_unlock(&l->interlock); + return FALSE; + } + + /* + * Have lock. + */ + + l->want_write = TRUE; +#if MACH_LDEBUG + l->writer = current_thread(); +#endif /* MACH_LDEBUG */ + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Routine: lock_try_read + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t lock_try_read( + lock_t l) +{ + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->read_count++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_write || l->want_upgrade) { + simple_unlock(&l->interlock); + return FALSE; + } + + l->read_count++; + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Routine: lock_try_read_to_write + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * the read lock is still held upon return. + * + * Returns FALSE if the upgrade *failed*. + */ +boolean_t lock_try_read_to_write( + lock_t l) +{ + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->read_count--; + l->recursion_depth++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_upgrade) { + simple_unlock(&l->interlock); + return FALSE; + } + l->want_upgrade = TRUE; + l->read_count--; + + while (l->read_count != 0) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + +#if MACH_LDEBUG + l->writer = current_thread(); +#endif /* MACH_LDEBUG */ + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Allow a process that has a lock for write to acquire it + * recursively (for read, write, or update). + */ +void lock_set_recursive( + lock_t l) +{ + simple_lock(&l->interlock); +#if MACH_LDEBUG + assert(l->writer == current_thread()); +#endif /* MACH_LDEBUG */ + + if (!l->want_write) { + panic("lock_set_recursive: don't have write lock"); + } + l->thread = current_thread(); + simple_unlock(&l->interlock); +} + +/* + * Prevent a lock from being re-acquired. + */ +void lock_clear_recursive( + lock_t l) +{ + simple_lock(&l->interlock); + if (l->thread != current_thread()) { + panic("lock_clear_recursive: wrong thread"); + } + if (l->recursion_depth == 0) + l->thread = (struct thread *)-1; /* XXX */ + simple_unlock(&l->interlock); +} + +#if MACH_KDB +#if MACH_SLOCKS && NCPUS == 1 +void db_show_all_slocks(void) +{ + int i; + struct simple_locks_info *info; + simple_lock_t l; + + for (i = 0; i < simple_locks_taken; i++) { + info = &simple_locks_info[i]; + db_printf("%d: %s (", i, info->expr); + db_printsym(info->l, DB_STGY_ANY); + db_printf(") locked by %s\n", info->loc); + } +} +#else /* MACH_SLOCKS && NCPUS == 1 */ +void db_show_all_slocks(void) +{ +#if MACH_LOCK_MON + lip(); +#else + db_printf("simple lock info not available\n"); +#endif +} +#endif /* MACH_SLOCKS && NCPUS == 1 */ +#endif /* MACH_KDB */ diff --git a/kern/lock.h b/kern/lock.h new file mode 100644 index 0000000..9d081d3 --- /dev/null +++ b/kern/lock.h @@ -0,0 +1,316 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives definitions + */ + +#ifndef _KERN_LOCK_H_ +#define _KERN_LOCK_H_ + +#include <mach/boolean.h> +#include <mach/machine/vm_types.h> +#include <machine/spl.h> + +/* + * Note: we cannot blindly use simple locks in interrupt handlers, otherwise one + * may try to acquire a lock while already having the lock, thus a deadlock. + * + * When locks are needed in interrupt handlers, the _irq versions of the calls + * should be used, which disable interrupts (by calling splhigh) before acquiring + * the lock, thus preventing the deadlock. They need to be used this way: + * + * spl_t s = simple_lock_irq(&mylock); + * [... critical section] + * simple_unlock_irq(s, &mylock); + * + * To catch faulty code, when MACH_LDEBUG is set we check that non-_irq versions + * are not called while handling an interrupt. + * + * In the following, the _nocheck versions don't check anything, the _irq + * versions disable interrupts, and the pristine versions add a check when + * MACH_LDEBUG is set. + */ + +#if NCPUS > 1 +#include <machine/lock.h>/*XXX*/ +#if MACH_LOCK_MON == 0 +#define simple_lock_nocheck _simple_lock +#define simple_lock_try_nocheck _simple_lock_try +#define simple_unlock_nocheck _simple_unlock +#else +#define simple_lock_nocheck simple_lock +#define simple_lock_try_nocheck simple_lock_try +#define simple_unlock_nocheck simple_unlock +#endif +#endif + +#define MACH_SLOCKS NCPUS > 1 + +/* + * A simple spin lock. + */ + +struct slock { + volatile natural_t lock_data; /* in general 1 bit is sufficient */ + struct {} is_a_simple_lock; +}; + +/* + * Used by macros to assert that the given argument is a simple + * lock. + */ +#define simple_lock_assert(l) (void) &(l)->is_a_simple_lock + +typedef struct slock simple_lock_data_t; +typedef struct slock *simple_lock_t; + +#if MACH_SLOCKS +/* + * Use the locks. + */ + +#define decl_simple_lock_data(class,name) \ +class simple_lock_data_t name; +#define def_simple_lock_data(class,name) \ +class simple_lock_data_t name = SIMPLE_LOCK_INITIALIZER(&name); +#define def_simple_lock_irq_data(class,name) \ +class simple_lock_irq_data_t name = { SIMPLE_LOCK_INITIALIZER(&name.lock) }; + +#define simple_lock_addr(lock) (simple_lock_assert(&(lock)), \ + &(lock)) +#define simple_lock_irq_addr(l) (simple_lock_irq_assert(&(l)), \ + &(l)->lock) + +#if (NCPUS > 1) + +/* + * The single-CPU debugging routines are not valid + * on a multiprocessor. + */ +#define simple_lock_taken(lock) (simple_lock_assert(lock), \ + 1) /* always succeeds */ +#define check_simple_locks() +#define check_simple_locks_enable() +#define check_simple_locks_disable() + +#else /* NCPUS > 1 */ +/* + * Use our single-CPU locking test routines. + */ + +extern void simple_lock_init(simple_lock_t); +extern void _simple_lock(simple_lock_t, + const char *, const char *); +extern void _simple_unlock(simple_lock_t); +extern boolean_t _simple_lock_try(simple_lock_t, + const char *, const char *); + +/* We provide simple_lock and simple_lock_try so that we can save the + location. */ +#define XSTR(x) #x +#define STR(x) XSTR(x) +#define LOCATION __FILE__ ":" STR(__LINE__) + +#define simple_lock_nocheck(lock) _simple_lock((lock), #lock, LOCATION) +#define simple_lock_try_nocheck(lock) _simple_lock_try((lock), #lock, LOCATION) +#define simple_unlock_nocheck(lock) _simple_unlock((lock)) + +#define simple_lock_pause() +#define simple_lock_taken(lock) (simple_lock_assert(lock), \ + (lock)->lock_data) + +extern void check_simple_locks(void); +extern void check_simple_locks_enable(void); +extern void check_simple_locks_disable(void); + +#endif /* NCPUS > 1 */ + +#else /* MACH_SLOCKS */ +/* + * Do not allocate storage for locks if not needed. + */ +struct simple_lock_data_empty { struct {} is_a_simple_lock; }; +struct simple_lock_irq_data_empty { struct simple_lock_data_empty slock; }; +#define decl_simple_lock_data(class,name) \ +class struct simple_lock_data_empty name; +#define def_simple_lock_data(class,name) \ +class struct simple_lock_data_empty name; +#define def_simple_lock_irq_data(class,name) \ +class struct simple_lock_irq_data_empty name; +#define simple_lock_addr(lock) (simple_lock_assert(&(lock)), \ + (simple_lock_t)0) +#define simple_lock_irq_addr(lock) (simple_lock_irq_assert(&(lock)), \ + (simple_lock_t)0) + +/* + * No multiprocessor locking is necessary. + */ +#define simple_lock_init(l) simple_lock_assert(l) +#define simple_lock_nocheck(l) simple_lock_assert(l) +#define simple_unlock_nocheck(l) simple_lock_assert(l) +#define simple_lock_try_nocheck(l) (simple_lock_assert(l), \ + TRUE) /* always succeeds */ +#define simple_lock_taken(l) (simple_lock_assert(l), \ + 1) /* always succeeds */ +#define check_simple_locks() +#define check_simple_locks_enable() +#define check_simple_locks_disable() +#define simple_lock_pause() + +#endif /* MACH_SLOCKS */ + + +#define decl_mutex_data(class,name) decl_simple_lock_data(class,name) +#define def_mutex_data(class,name) def_simple_lock_data(class,name) +#define mutex_try(l) simple_lock_try(l) +#define mutex_lock(l) simple_lock(l) +#define mutex_unlock(l) simple_unlock(l) +#define mutex_init(l) simple_lock_init(l) + + +/* + * The general lock structure. Provides for multiple readers, + * upgrading from read to write, and sleeping until the lock + * can be gained. + * + * On some architectures, assembly language code in the 'inline' + * program fiddles the lock structures. It must be changed in + * concert with the structure layout. + * + * Only the "interlock" field is used for hardware exclusion; + * other fields are modified with normal instructions after + * acquiring the interlock bit. + */ +struct lock { + struct thread *thread; /* Thread that has lock, if + recursive locking allowed */ + unsigned int read_count:16, /* Number of accepted readers */ + /* boolean_t */ want_upgrade:1, /* Read-to-write upgrade waiting */ + /* boolean_t */ want_write:1, /* Writer is waiting, or + locked for write */ + /* boolean_t */ waiting:1, /* Someone is sleeping on lock */ + /* boolean_t */ can_sleep:1, /* Can attempts to lock go to sleep? */ + recursion_depth:12, /* Depth of recursion */ + :0; +#if MACH_LDEBUG + struct thread *writer; +#endif /* MACH_LDEBUG */ + decl_simple_lock_data(,interlock) + /* Hardware interlock field. + Last in the structure so that + field offsets are the same whether + or not it is present. */ +}; + +typedef struct lock lock_data_t; +typedef struct lock *lock_t; + +/* Sleep locks must work even if no multiprocessing */ + +extern void lock_init(lock_t, boolean_t); +extern void lock_sleepable(lock_t, boolean_t); +extern void lock_write(lock_t); +extern void lock_read(lock_t); +extern void lock_done(lock_t); +extern boolean_t lock_read_to_write(lock_t); +extern void lock_write_to_read(lock_t); +extern boolean_t lock_try_write(lock_t); +extern boolean_t lock_try_read(lock_t); +extern boolean_t lock_try_read_to_write(lock_t); + +#define lock_read_done(l) lock_done(l) +#define lock_write_done(l) lock_done(l) + +extern void lock_set_recursive(lock_t); +extern void lock_clear_recursive(lock_t); + +/* Lock debugging support. */ +#if ! MACH_LDEBUG +#define have_read_lock(l) 1 +#define have_write_lock(l) 1 +#define lock_check_no_interrupts() +#else /* MACH_LDEBUG */ +/* XXX: We don't keep track of readers, so this is an approximation. */ +#define have_read_lock(l) ((l)->read_count > 0) +#define have_write_lock(l) ((l)->writer == current_thread()) +extern unsigned long in_interrupt[NCPUS]; +#define lock_check_no_interrupts() assert(!in_interrupt[cpu_number()]) +#endif /* MACH_LDEBUG */ +#define have_lock(l) (have_read_lock(l) || have_write_lock(l)) + +/* These are defined elsewhere with lock monitoring */ +#if MACH_LOCK_MON == 0 +#define simple_lock(l) do { \ + lock_check_no_interrupts(); \ + simple_lock_nocheck(l); \ +} while (0) +#define simple_lock_try(l) ({ \ + lock_check_no_interrupts(); \ + simple_lock_try_nocheck(l); \ +}) +#define simple_unlock(l) do { \ + lock_check_no_interrupts(); \ + simple_unlock_nocheck(l); \ +} while (0) +#endif + +/* _irq variants */ + +struct slock_irq { + struct slock slock; +}; + +#define simple_lock_irq_assert(l) simple_lock_assert(&(l)->slock) + +typedef struct slock_irq simple_lock_irq_data_t; +typedef struct slock_irq *simple_lock_irq_t; + +#define decl_simple_lock_irq_data(class,name) \ +class simple_lock_irq_data_t name; + +#define simple_lock_init_irq(l) simple_lock_init(&(l)->slock) + +#define simple_lock_irq(l) ({ \ + spl_t __s = splhigh(); \ + simple_lock_nocheck(&(l)->slock); \ + __s; \ +}) +#define simple_unlock_irq(s, l) do { \ + simple_unlock_nocheck(&(l)->slock); \ + splx(s); \ +} while (0) + +#if MACH_KDB +extern void db_show_all_slocks(void); +#endif /* MACH_KDB */ + +extern void lip(void); + +#endif /* _KERN_LOCK_H_ */ diff --git a/kern/lock_mon.c b/kern/lock_mon.c new file mode 100644 index 0000000..3ca4592 --- /dev/null +++ b/kern/lock_mon.c @@ -0,0 +1,364 @@ +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ +/* + * Copyright 1990 by Open Software Foundation, + * Grenoble, FRANCE + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of OSF or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Support For MP Debugging + * if MACH_MP_DEBUG is on, we use alternate locking + * routines do detect dealocks + * Support for MP lock monitoring (MACH_LOCK_MON). + * Registers use of locks, contention. + * Depending on hardware also records time spent with locks held + */ + +#include <sys/types.h> +#include <string.h> + +#include <mach/machine/vm_types.h> +#include <mach/boolean.h> +#include <kern/thread.h> +#include <kern/lock.h> +#include <kern/printf.h> +#include <kern/mach_clock.h> +#include <machine/ipl.h> +#include <ddb/db_sym.h> +#include <ddb/db_output.h> + +static void lis(int arg, int abs, int count); + +def_simple_lock_data(, kdb_lock) +def_simple_lock_data(, printf_lock) + +#if NCPUS > 1 && MACH_LOCK_MON +#define TIME_STAMP 1 +typedef unsigned int time_stamp_t; +/* in milliseconds */ +#define time_stamp (elapsed_ticks * 1000 / hz) + +#define LOCK_INFO_MAX (1024*32) +#define LOCK_INFO_HASH_COUNT 1024 +#define LOCK_INFO_PER_BUCKET (LOCK_INFO_MAX/LOCK_INFO_HASH_COUNT) + +#define HASH_LOCK(lock) ((long)lock>>5 & (LOCK_INFO_HASH_COUNT-1)) + +struct lock_info { + unsigned int success; + unsigned int fail; + unsigned int masked; + unsigned int stack; + time_stamp_t time; + decl_simple_lock_data(, *lock) + vm_offset_t caller; +}; + +struct lock_info_bucket { + struct lock_info info[LOCK_INFO_PER_BUCKET]; +}; + +static void print_lock_info(struct lock_info *li); + +struct lock_info_bucket lock_info[LOCK_INFO_HASH_COUNT]; +struct lock_info default_lock_info; +unsigned default_lock_stack = 0; + +extern spl_t curr_ipl[]; + + + +struct lock_info * +locate_lock_info(lock) +decl_simple_lock_data(, **lock) +{ + struct lock_info *li = &(lock_info[HASH_LOCK(*lock)].info[0]); + int i; + + for (i=0; i < LOCK_INFO_PER_BUCKET; i++, li++) + if (li->lock) { + if (li->lock == *lock) + return(li); + } else { + li->lock = *lock; + li->caller = *((vm_offset_t *)lock - 1); + return(li); + } + db_printf("out of lock_info slots\n"); + li = &default_lock_info; + return(li); +} + + +void simple_lock(lock) +decl_simple_lock_data(, *lock) +{ + struct lock_info *li = locate_lock_info(&lock); + int my_cpu = cpu_number(); + + if (current_thread()) + li->stack = current_thread()->lock_stack++; + if (curr_ipl[my_cpu]) + li->masked++; + if (_simple_lock_try(lock)) + li->success++; + else { + _simple_lock(lock); + li->fail++; + } + li->time = time_stamp - li->time; +} + +int simple_lock_try(lock) +decl_simple_lock_data(, *lock) +{ + struct lock_info *li = locate_lock_info(&lock); + int my_cpu = cpu_number(); + + if (curr_ipl[my_cpu]) + li->masked++; + if (_simple_lock_try(lock)) { + li->success++; + li->time = time_stamp - li->time; + if (current_thread()) + li->stack = current_thread()->lock_stack++; + return(1); + } else { + li->fail++; + return(0); + } +} + +void simple_unlock(lock) +decl_simple_lock_data(, *lock) +{ + time_stamp_t stamp = time_stamp; + time_stamp_t *time = &locate_lock_info(&lock)->time; + unsigned *lock_stack; + + *time = stamp - *time; + _simple_unlock(lock); + if (current_thread()) { + lock_stack = ¤t_thread()->lock_stack; + if (*lock_stack) + (*lock_stack)--; + } +} + +void lip(void) { + lis(4, 1, 0); +} + +#define lock_info_sort lis + +static void lock_info_sort(int arg, int abs, int count) +{ + struct lock_info *li, mean; + int bucket = 0; + int i; + unsigned max_val; + unsigned old_val = (unsigned)-1; + struct lock_info *target_li = &lock_info[0].info[0]; + unsigned sum; + unsigned empty, total; + unsigned curval; + + printf("\nSUCCESS FAIL MASKED STACK TIME LOCK/CALLER\n"); + if (!count) + count = 8 ; + while (count && target_li) { + empty = LOCK_INFO_HASH_COUNT; + target_li = 0; + total = 0; + max_val = 0; + mean.success = 0; + mean.fail = 0; + mean.masked = 0; + mean.stack = 0; + mean.time = 0; + mean.lock = (simple_lock_data_t *) &lock_info; + mean.caller = (vm_offset_t) &lock_info; + for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { + li = &lock_info[bucket].info[0]; + if (li->lock) + empty--; + for (i= 0; i< LOCK_INFO_PER_BUCKET && li->lock; i++, li++) { + if (li->lock == &kdb_lock || li->lock == &printf_lock) + continue; + total++; + curval = *((int *)li + arg); + sum = li->success + li->fail; + if(!sum && !abs) + continue; + if (!abs) switch(arg) { + case 0: + break; + case 1: + case 2: + curval = (curval*100) / sum; + break; + case 3: + case 4: + curval = curval / sum; + break; + } + if (curval > max_val && curval < old_val) { + max_val = curval; + target_li = li; + } + if (curval == old_val && count != 0) { + print_lock_info(li); + count--; + } + mean.success += li->success; + mean.fail += li->fail; + mean.masked += li->masked; + mean.stack += li->stack; + mean.time += li->time; + } + } + if (target_li) + old_val = max_val; + } + db_printf("\n%d total locks, %d empty buckets", total, empty ); + if (default_lock_info.success) + db_printf(", default: %d", default_lock_info.success + default_lock_info.fail); + db_printf("\n"); + print_lock_info(&mean); +} + +#define lock_info_clear lic + +void lock_info_clear(void) +{ + struct lock_info *li; + int bucket = 0; + int i; + for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { + li = &lock_info[bucket].info[0]; + for (i= 0; i< LOCK_INFO_PER_BUCKET; i++, li++) { + memset(li, 0, sizeof(struct lock_info)); + } + } + memset(&default_lock_info, 0, sizeof(struct lock_info)); +} + +static void print_lock_info(struct lock_info *li) +{ + db_addr_t off; + int sum = li->success + li->fail; + db_printf("%d %d/%d %d/%d %d/%d %d/%d ", li->success, + li->fail, (li->fail*100)/sum, + li->masked, (li->masked*100)/sum, + li->stack, li->stack/sum, + li->time, li->time/sum); + db_free_symbol(db_search_symbol((db_addr_t) li->lock, 0, &off)); + if (off < 1024) + db_printsym((db_addr_t) li->lock, 0); + else { + db_printsym(li->caller, 0); + db_printf("(%X)", li->lock); + } + db_printf("\n"); +} + +#endif /* NCPUS > 1 && MACH_LOCK_MON */ + +#if TIME_STAMP + +/* + * Measure lock/unlock operations + */ + +void time_lock(int loops) +{ + decl_simple_lock_data(, lock) + time_stamp_t stamp; + int i; + + + if (!loops) + loops = 1000; + simple_lock_init(&lock); + stamp = time_stamp; + for (i = 0; i < loops; i++) { + simple_lock(&lock); + simple_unlock(&lock); + } + stamp = time_stamp - stamp; + db_printf("%d stamps for simple_locks\n", stamp/loops); +#if MACH_LOCK_MON + stamp = time_stamp; + for (i = 0; i < loops; i++) { + _simple_lock(&lock); + _simple_unlock(&lock); + } + stamp = time_stamp - stamp; + db_printf("%d stamps for _simple_locks\n", stamp/loops); +#endif /* MACH_LOCK_MON */ +} +#endif /* TIME_STAMP */ + +#if MACH_MP_DEBUG + +/* + * Arrange in the lock routines to call the following + * routines. This way, when locks are free there is no performance + * penalty + */ + +void +retry_simple_lock(lock) +decl_simple_lock_data(, *lock) +{ + count = 0; + + while(!simple_lock_try(lock)) + if (count++ > 1000000 && lock != &kdb_lock) { + if (lock == &printf_lock) + return; + db_printf("cpu %d looping on simple_lock(%x) called by %x\n", + cpu_number(), lock, *(((int *)&lock) -1)); + SoftDebugger("simple_lock timeout"); + count = 0; + } +} + +void +retry_bit_lock(index, addr) +{ + count = 0; + + while(!bit_lock_try(index, addr)) + if (count++ > 1000000) { + db_printf("cpu %d looping on bit_lock(%x, %x) called by %x\n", + cpu_number(), index, addr, *(((int *)&index) -1)); + SoftDebugger("bit_lock timeout"); + count = 0; + } +} +#endif /* MACH_MP_DEBUG */ diff --git a/kern/log2.h b/kern/log2.h new file mode 100644 index 0000000..0e67701 --- /dev/null +++ b/kern/log2.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Integer base 2 logarithm operations. + */ + +#ifndef _KERN_LOG2_H +#define _KERN_LOG2_H + +#include <kern/assert.h> + +#ifdef __LP64__ +#define LONG_BIT 64 +#else /* __LP64__ */ +#define LONG_BIT 32 +#endif /* __LP64__ */ + +static inline unsigned int +ilog2(unsigned long x) +{ + assert(x != 0); + return LONG_BIT - __builtin_clzl(x) - 1; +} + +static inline unsigned int +iorder2(unsigned long size) +{ + assert(size != 0); + + if (size == 1) + return 0; + + return ilog2(size - 1) + 1; +} + +#endif /* _KERN_LOG2_H */ diff --git a/kern/mach.srv b/kern/mach.srv new file mode 100644 index 0000000..b1cec60 --- /dev/null +++ b/kern/mach.srv @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#ifdef MIGRATING_THREADS +#define task_threads task_acts +#define thread_terminate act_terminate +#define thread_set_state act_set_state_immediate +#define thread_get_state act_get_state_immediate +#define thread_info act_thread_info +#define thread_suspend act_suspend +#define thread_resume act_resume +#define thread_abort act_abort +#define thread_set_special_port act_set_special_port +#define thread_get_special_port act_get_special_port +#endif /* MIGRATING_THREADS */ + +#include <mach/mach.defs> diff --git a/kern/mach4.srv b/kern/mach4.srv new file mode 100644 index 0000000..ead5484 --- /dev/null +++ b/kern/mach4.srv @@ -0,0 +1,32 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#ifdef MIGRATING_THREADS +#define thread_enable_pc_sampling act_enable_pc_sampling +#define thread_disable_pc_sampling act_disable_pc_sampling +#define thread_get_sampled_pcs act_get_sampled_pcs +#endif /* MIGRATING_THREADS */ + +#include <mach/mach4.defs> diff --git a/kern/mach_clock.c b/kern/mach_clock.c new file mode 100644 index 0000000..864704c --- /dev/null +++ b/kern/mach_clock.c @@ -0,0 +1,657 @@ +/* + * Mach Operating System + * Copyright (c) 1994-1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: mach_clock.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Clock primitives. + */ + +#include <string.h> + +#include <mach/boolean.h> +#include <mach/machine.h> +#include <mach/time_value.h> +#include <mach/vm_param.h> +#include <mach/vm_prot.h> +#include <kern/counters.h> +#include "cpu_number.h" +#include <kern/debug.h> +#include <kern/host.h> +#include <kern/lock.h> +#include <kern/mach_clock.h> +#include <kern/mach_host.server.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/timer.h> +#include <kern/priority.h> +#include <vm/vm_kern.h> +#include <machine/mach_param.h> /* HZ */ +#include <machine/machspl.h> +#include <machine/model_dep.h> + +#if MACH_PCSAMPLE +#include <kern/pc_sample.h> +#endif + +#define MICROSECONDS_IN_ONE_SECOND 1000000 + +int hz = HZ; /* number of ticks per second */ +int tick = (MICROSECONDS_IN_ONE_SECOND / HZ); /* number of usec per tick */ +time_value64_t time = { 0, 0 }; /* time since bootup (uncorrected) */ +unsigned long elapsed_ticks = 0; /* ticks elapsed since bootup */ + +int timedelta = 0; +int tickdelta = 0; + +#if HZ > 500 +unsigned tickadj = 1; /* can adjust HZ usecs per second */ +#else +unsigned tickadj = 500 / HZ; /* can adjust 100 usecs per second */ +#endif +unsigned bigadj = 1000000; /* adjust 10*tickadj if adjustment + > bigadj */ + +/* + * This update protocol, with a check value, allows + * do { + * secs = mtime->seconds; + * __sync_synchronize(); + * usecs = mtime->microseconds; + * __sync_synchronize(); + * } while (secs != mtime->check_seconds); + * to read the time correctly. + */ + +volatile mapped_time_value_t *mtime = 0; + +#define update_mapped_time(time) \ +MACRO_BEGIN \ + if (mtime != 0) { \ + mtime->check_seconds = (time)->seconds; \ + mtime->check_seconds64 = (time)->seconds; \ + __sync_synchronize(); \ + mtime->microseconds = (time)->nanoseconds / 1000; \ + mtime->time_value.nanoseconds = (time)->nanoseconds; \ + __sync_synchronize(); \ + mtime->seconds = (time)->seconds; \ + mtime->time_value.seconds = (time)->seconds; \ + } \ +MACRO_END + +#define read_mapped_time(time) \ +MACRO_BEGIN \ + do { \ + (time)->seconds = mtime->time_value.seconds; \ + __sync_synchronize(); \ + (time)->nanoseconds = mtime->time_value.nanoseconds; \ + __sync_synchronize(); \ + } while ((time)->seconds != mtime->check_seconds64); \ +MACRO_END + +def_simple_lock_irq_data(static, timer_lock) /* lock for ... */ +timer_elt_data_t timer_head; /* ordered list of timeouts */ + /* (doubles as end-of-list) */ + +/* + * Handle clock interrupts. + * + * The clock interrupt is assumed to be called at a (more or less) + * constant rate. The rate must be identical on all CPUS (XXX - fix). + * + * Usec is the number of microseconds that have elapsed since the + * last clock tick. It may be constant or computed, depending on + * the accuracy of the hardware clock. + * + */ +void clock_interrupt( + int usec, /* microseconds per tick */ + boolean_t usermode, /* executing user code */ + boolean_t basepri, /* at base priority */ + vm_offset_t pc) /* address of interrupted instruction */ +{ + int my_cpu = cpu_number(); + thread_t thread = current_thread(); + + counter(c_clock_ticks++); + counter(c_threads_total += c_threads_current); + counter(c_stacks_total += c_stacks_current); + +#if STAT_TIME + /* + * Increment the thread time, if using + * statistical timing. + */ + if (usermode) { + timer_bump(&thread->user_timer, usec); + } + else { + /* Only bump timer if threads are initialized */ + if (thread) + timer_bump(&thread->system_timer, usec); + } +#endif /* STAT_TIME */ + + /* + * Increment the CPU time statistics. + */ + { + int state; + + if (usermode) + state = CPU_STATE_USER; + else if (!cpu_idle(my_cpu)) + state = CPU_STATE_SYSTEM; + else + state = CPU_STATE_IDLE; + + machine_slot[my_cpu].cpu_ticks[state]++; + + /* + * Adjust the thread's priority and check for + * quantum expiration. + */ + + thread_quantum_update(my_cpu, thread, 1, state); + } + +#if MACH_PCSAMPLE + /* + * Take a sample of pc for the user if required. + * This had better be MP safe. It might be interesting + * to keep track of cpu in the sample. + */ +#ifndef MACH_KERNSAMPLE + if (usermode) +#endif + { + if (thread) + take_pc_sample_macro(thread, SAMPLED_PC_PERIODIC, usermode, pc); + } +#endif /* MACH_PCSAMPLE */ + + /* + * Time-of-day and time-out list are updated only + * on the master CPU. + */ + if (my_cpu == master_cpu) { + + spl_t s; + timer_elt_t telt; + boolean_t needsoft = FALSE; + + + /* + * Update the tick count since bootup, and handle + * timeouts. + */ + + s = simple_lock_irq(&timer_lock); + + elapsed_ticks++; + + telt = (timer_elt_t)queue_first(&timer_head.chain); + if (telt->ticks <= elapsed_ticks) + needsoft = TRUE; + simple_unlock_irq(s, &timer_lock); + + /* + * Increment the time-of-day clock. + */ + if (timedelta == 0) { + time_value64_add_nanos(&time, usec * 1000); + } + else { + int delta; + + if (timedelta < 0) { + if (usec > tickdelta) { + delta = usec - tickdelta; + timedelta += tickdelta; + } else { + /* Not enough time has passed, defer overflowing + * correction for later, keep only one microsecond + * delta */ + delta = 1; + timedelta += usec - 1; + } + } + else { + delta = usec + tickdelta; + timedelta -= tickdelta; + } + time_value64_add_nanos(&time, delta * 1000); + } + update_mapped_time(&time); + + /* + * Schedule soft-interrupt for timeout if needed + */ + if (needsoft) { + if (basepri) { + (void) splsoftclock(); + softclock(); + } + else { + setsoftclock(); + } + } + } +} + +/* + * There is a nasty race between softclock and reset_timeout. + * For example, scheduling code looks at timer_set and calls + * reset_timeout, thinking the timer is set. However, softclock + * has already removed the timer but hasn't called thread_timeout + * yet. + * + * Interim solution: We initialize timers after pulling + * them out of the queue, so a race with reset_timeout won't + * hurt. The timeout functions (eg, thread_timeout, + * thread_depress_timeout) check timer_set/depress_priority + * to see if the timer has been cancelled and if so do nothing. + * + * This still isn't correct. For example, softclock pulls a + * timer off the queue, then thread_go resets timer_set (but + * reset_timeout does nothing), then thread_set_timeout puts the + * timer back on the queue and sets timer_set, then + * thread_timeout finally runs and clears timer_set, then + * thread_set_timeout tries to put the timer on the queue again + * and corrupts it. + */ + +void softclock(void) +{ + /* + * Handle timeouts. + */ + spl_t s; + timer_elt_t telt; + void (*fcn)( void * param ); + void *param; + + while (TRUE) { + s = simple_lock_irq(&timer_lock); + telt = (timer_elt_t) queue_first(&timer_head.chain); + if (telt->ticks > elapsed_ticks) { + simple_unlock_irq(s, &timer_lock); + break; + } + fcn = telt->fcn; + param = telt->param; + + remqueue(&timer_head.chain, (queue_entry_t)telt); + telt->set = TELT_UNSET; + simple_unlock_irq(s, &timer_lock); + + assert(fcn != 0); + (*fcn)(param); + } +} + +/* + * Set timeout. + * + * Parameters: + * telt timer element. Function and param are already set. + * interval time-out interval, in hz. + */ +void set_timeout( + timer_elt_t telt, /* already loaded */ + unsigned int interval) +{ + spl_t s; + timer_elt_t next; + + s = simple_lock_irq(&timer_lock); + + interval += elapsed_ticks; + + for (next = (timer_elt_t)queue_first(&timer_head.chain); + ; + next = (timer_elt_t)queue_next((queue_entry_t)next)) { + + if (next->ticks > interval) + break; + } + telt->ticks = interval; + /* + * Insert new timer element before 'next' + * (after 'next'->prev) + */ + insque((queue_entry_t) telt, ((queue_entry_t)next)->prev); + telt->set = TELT_SET; + simple_unlock_irq(s, &timer_lock); +} + +boolean_t reset_timeout(timer_elt_t telt) +{ + spl_t s; + + s = simple_lock_irq(&timer_lock); + if (telt->set) { + remqueue(&timer_head.chain, (queue_entry_t)telt); + telt->set = TELT_UNSET; + simple_unlock_irq(s, &timer_lock); + return TRUE; + } + else { + simple_unlock_irq(s, &timer_lock); + return FALSE; + } +} + +void init_timeout(void) +{ + simple_lock_init_irq(&timer_lock); + queue_init(&timer_head.chain); + timer_head.ticks = ~0; /* MAXUINT - sentinel */ + + elapsed_ticks = 0; +} + +/* + * We record timestamps using the boot-time clock. We keep track of + * the boot-time clock by storing the difference to the real-time + * clock. + */ +struct time_value64 clock_boottime_offset; + +/* + * Update the offset of the boot-time clock from the real-time clock. + * This function must be called when the real-time clock is updated. + * This function must be called at SPLHIGH. + */ +static void +clock_boottime_update(const struct time_value64 *new_time) +{ + struct time_value64 delta = time; + time_value64_sub(&delta, new_time); + time_value64_add(&clock_boottime_offset, &delta); +} + +/* + * Record a timestamp in STAMP. Records values in the boot-time clock + * frame. + */ +void +record_time_stamp(time_value64_t *stamp) +{ + read_mapped_time(stamp); + time_value64_add(stamp, &clock_boottime_offset); +} + +/* + * Read a timestamp in STAMP into RESULT. Returns values in the + * real-time clock frame. + */ +void +read_time_stamp (const time_value64_t *stamp, time_value64_t *result) +{ + *result = *stamp; + time_value64_sub(result, &clock_boottime_offset); +} + + +/* + * Read the time (deprecated version). + */ +kern_return_t +host_get_time(const host_t host, time_value_t *current_time) +{ + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + + time_value64_t current_time64; + read_mapped_time(¤t_time64); + TIME_VALUE64_TO_TIME_VALUE(¤t_time64, current_time); + return (KERN_SUCCESS); +} + +/* + * Read the time. + */ +kern_return_t +host_get_time64(const host_t host, time_value64_t *current_time) +{ + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + + read_mapped_time(current_time); + return (KERN_SUCCESS); +} + +/* + * Set the time. Only available to privileged users. + */ +kern_return_t +host_set_time(const host_t host, time_value_t new_time) +{ + time_value64_t new_time64; + TIME_VALUE_TO_TIME_VALUE64(&new_time, &new_time64); + return host_set_time64(host, new_time64); +} + +kern_return_t +host_set_time64(const host_t host, time_value64_t new_time) +{ + spl_t s; + + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + +#if NCPUS > 1 + /* + * Switch to the master CPU to synchronize correctly. + */ + thread_bind(current_thread(), master_processor); + if (current_processor() != master_processor) + thread_block(thread_no_continuation); +#endif /* NCPUS > 1 */ + + s = splhigh(); + clock_boottime_update(&new_time); + time = new_time; + update_mapped_time(&time); + resettodr(); + splx(s); + +#if NCPUS > 1 + /* + * Switch off the master CPU. + */ + thread_bind(current_thread(), PROCESSOR_NULL); +#endif /* NCPUS > 1 */ + + return(KERN_SUCCESS); +} + +/* + * Adjust the time gradually. + */ +kern_return_t +host_adjust_time( + const host_t host, + time_value_t new_adjustment, + time_value_t *old_adjustment /* OUT */) +{ + time_value64_t old_adjustment64; + time_value64_t new_adjustment64; + kern_return_t ret; + + TIME_VALUE_TO_TIME_VALUE64(&new_adjustment, &new_adjustment64); + ret = host_adjust_time64(host, new_adjustment64, &old_adjustment64); + if (ret == KERN_SUCCESS) { + TIME_VALUE64_TO_TIME_VALUE(&old_adjustment64, old_adjustment); + } + return ret; +} + +/* + * Adjust the time gradually. + */ +kern_return_t +host_adjust_time64( + const host_t host, + time_value64_t new_adjustment, + time_value64_t *old_adjustment /* OUT */) +{ + time_value64_t oadj; + uint64_t ndelta_microseconds; + spl_t s; + + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + /* Note we only adjust up to microsecond precision */ + ndelta_microseconds = new_adjustment.seconds * MICROSECONDS_IN_ONE_SECOND + + new_adjustment.nanoseconds / 1000; + +#if NCPUS > 1 + thread_bind(current_thread(), master_processor); + if (current_processor() != master_processor) + thread_block(thread_no_continuation); +#endif /* NCPUS > 1 */ + + s = splclock(); + + oadj.seconds = timedelta / MICROSECONDS_IN_ONE_SECOND; + oadj.nanoseconds = (timedelta % MICROSECONDS_IN_ONE_SECOND) * 1000; + + if (timedelta == 0) { + if (ndelta_microseconds > bigadj) + tickdelta = 10 * tickadj; + else + tickdelta = tickadj; + } + /* Make ndelta_microseconds a multiple of tickdelta */ + if (ndelta_microseconds % tickdelta) + ndelta_microseconds = ndelta_microseconds / tickdelta * tickdelta; + + timedelta = ndelta_microseconds; + + splx(s); +#if NCPUS > 1 + thread_bind(current_thread(), PROCESSOR_NULL); +#endif /* NCPUS > 1 */ + + *old_adjustment = oadj; + + return (KERN_SUCCESS); +} + +void mapable_time_init(void) +{ + if (kmem_alloc_wired(kernel_map, (vm_offset_t *) &mtime, PAGE_SIZE) + != KERN_SUCCESS) + panic("mapable_time_init"); + memset((void *) mtime, 0, PAGE_SIZE); + update_mapped_time(&time); +} + +int timeopen(dev_t dev, int flag, io_req_t ior) +{ + return(0); +} +void timeclose(dev_t dev, int flag) +{ + return; +} + +/* + * Compatibility for device drivers. + * New code should use set_timeout/reset_timeout and private timers. + * These code can't use a cache to allocate timers, because + * it can be called from interrupt handlers. + */ + +#define NTIMERS 20 + +timer_elt_data_t timeout_timers[NTIMERS]; + +/* + * Set timeout. + * + * fcn: function to call + * param: parameter to pass to function + * interval: timeout interval, in hz. + */ +void timeout( + void (*fcn)(void *param), + void * param, + int interval) +{ + spl_t s; + timer_elt_t elt; + + s = simple_lock_irq(&timer_lock); + for (elt = &timeout_timers[0]; elt < &timeout_timers[NTIMERS]; elt++) + if (elt->set == TELT_UNSET) + break; + if (elt == &timeout_timers[NTIMERS]) + panic("timeout"); + elt->fcn = fcn; + elt->param = param; + elt->set = TELT_ALLOC; + simple_unlock_irq(s, &timer_lock); + + set_timeout(elt, (unsigned int)interval); +} + +/* + * Returns a boolean indicating whether the timeout element was found + * and removed. + */ +boolean_t untimeout(void (*fcn)( void * param ), const void *param) +{ + spl_t s; + timer_elt_t elt; + + s = simple_lock_irq(&timer_lock); + queue_iterate(&timer_head.chain, elt, timer_elt_t, chain) { + + if ((fcn == elt->fcn) && (param == elt->param)) { + /* + * Found it. + */ + remqueue(&timer_head.chain, (queue_entry_t)elt); + elt->set = TELT_UNSET; + + simple_unlock_irq(s, &timer_lock); + return (TRUE); + } + } + simple_unlock_irq(s, &timer_lock); + return (FALSE); +} diff --git a/kern/mach_clock.h b/kern/mach_clock.h new file mode 100644 index 0000000..66903b8 --- /dev/null +++ b/kern/mach_clock.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2006, 2007 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Barry deFreese and others. + */ + +#ifndef _KERN_MACH_CLOCK_H_ +#define _KERN_MACH_CLOCK_H_ + +/* + * Mach time-out and time-of-day facility. + */ + +#include <mach/machine/kern_return.h> +#include <mach/time_value.h> +#include <kern/host.h> +#include <kern/queue.h> +#include <sys/types.h> + +struct io_req; +typedef struct io_req *io_req_t; + + +/* Timers in kernel. */ +extern unsigned long elapsed_ticks; /* number of ticks elapsed since bootup */ +extern int hz; /* number of ticks per second */ +extern int tick; /* number of usec per tick */ + +extern time_value64_t time; /* time since bootup (uncorrected) */ + +typedef void timer_func_t(void *); + +/* Time-out element. */ +struct timer_elt { + queue_chain_t chain; /* chain in order of expiration */ + timer_func_t *fcn; /* function to call */ + void * param; /* with this parameter */ + unsigned long ticks; /* expiration time, in ticks */ + int set; /* unset | set | allocated */ +}; +#define TELT_UNSET 0 /* timer not set */ +#define TELT_SET 1 /* timer set */ +#define TELT_ALLOC 2 /* timer allocated from pool */ + +typedef struct timer_elt timer_elt_data_t; +typedef struct timer_elt *timer_elt_t; + + +extern void clock_interrupt( + int usec, + boolean_t usermode, + boolean_t basepri, + vm_offset_t pc); + +extern void softclock (void); + +/* For `private' timer elements. */ +extern void set_timeout( + timer_elt_t telt, + unsigned int interval); +extern boolean_t reset_timeout(timer_elt_t telt); + +#define set_timeout_setup(telt,fcn,param,interval) \ + ((telt)->fcn = (fcn), \ + (telt)->param = (param), \ + (telt)->private = TRUE, \ + set_timeout((telt), (interval))) + +#define reset_timeout_check(t) \ + MACRO_BEGIN \ + if ((t)->set) \ + reset_timeout((t)); \ + MACRO_END + +extern void init_timeout (void); + +/* + * Record a timestamp in STAMP. Records values in the boot-time clock + * frame. + */ +extern void record_time_stamp (time_value64_t *stamp); + +/* + * Read a timestamp in STAMP into RESULT. Returns values in the + * real-time clock frame. + */ +extern void read_time_stamp (const time_value64_t *stamp, time_value64_t *result); + +extern void mapable_time_init (void); + +/* For public timer elements. */ +extern void timeout(timer_func_t *fcn, void *param, int interval); +extern boolean_t untimeout(timer_func_t *fcn, const void *param); + +extern int timeopen(dev_t dev, int flag, io_req_t ior); +extern void timeclose(dev_t dev, int flag); + +#endif /* _KERN_MACH_CLOCK_H_ */ diff --git a/kern/mach_debug.srv b/kern/mach_debug.srv new file mode 100644 index 0000000..c78b9a4 --- /dev/null +++ b/kern/mach_debug.srv @@ -0,0 +1,26 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#include <mach_debug/mach_debug.defs> diff --git a/kern/mach_factor.c b/kern/mach_factor.c new file mode 100644 index 0000000..debce0b --- /dev/null +++ b/kern/mach_factor.c @@ -0,0 +1,150 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/mach_factor.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Compute the Mach Factor. + */ + +#include <mach/machine.h> +#include <mach/processor_info.h> +#include <kern/mach_clock.h> +#include <kern/sched.h> +#include <kern/processor.h> +#include <mach/kern_return.h> +#include <mach/port.h> + +#include "mach_factor.h" + +long avenrun[3] = {0, 0, 0}; +long mach_factor[3] = {0, 0, 0}; + +/* + * Values are scaled by LOAD_SCALE, defined in processor_info.h + */ +static long fract[3] = { + 800, /* (4.0/5.0) 5 second average */ + 966, /* (29.0/30.0) 30 second average */ + 983, /* (59.0/60.) 1 minute average */ +}; + +void compute_mach_factor(void) +{ + processor_set_t pset; + processor_t processor; + int ncpus; + int nthreads; + long factor_now; + long average_now; + long load_now; + + simple_lock(&all_psets_lock); + pset = (processor_set_t) queue_first(&all_psets); + while (!queue_end(&all_psets, (queue_entry_t)pset)) { + + /* + * If no processors, this pset is in suspended animation. + * No load calculations are performed. + */ + pset_lock(pset); + if((ncpus = pset->processor_count) > 0) { + + /* + * Count number of threads. + */ + nthreads = pset->runq.count; + processor = (processor_t) queue_first(&pset->processors); + while (!queue_end(&pset->processors, + (queue_entry_t)processor)) { + nthreads += processor->runq.count; + processor = + (processor_t) queue_next(&processor->processors); + } + + /* + * account for threads on cpus. + */ + nthreads += ncpus - pset->idle_count; + + /* + * The current thread (running this calculation) + * doesn't count; it's always in the default pset. + */ + if (pset == &default_pset) + nthreads -= 1; + + if (nthreads > ncpus) { + factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1); + load_now = (nthreads << SCHED_SHIFT) / ncpus; + } + else { + factor_now = (ncpus - nthreads) * LOAD_SCALE; + load_now = SCHED_SCALE; + } + + /* + * Load average and mach factor calculations for + * those that ask about these things. + */ + + average_now = nthreads * LOAD_SCALE; + + pset->mach_factor = + ((pset->mach_factor << 2) + factor_now)/5; + pset->load_average = + ((pset->load_average << 2) + average_now)/5; + + /* + * And some ugly stuff to keep w happy. + */ + if (pset == &default_pset) { + int i; + + for (i = 0; i < 3; i++) { + mach_factor[i] = ( (mach_factor[i]*fract[i]) + + (factor_now*(LOAD_SCALE-fract[i])) ) + / LOAD_SCALE; + avenrun[i] = ( (avenrun[i]*fract[i]) + + (average_now*(LOAD_SCALE-fract[i])) ) + / LOAD_SCALE; + } + } + + /* + * sched_load is the only thing used by scheduler. + * It is always at least 1 (i.e. SCHED_SCALE). + */ + pset->sched_load = (pset->sched_load + load_now) >> 1; + } + + pset_unlock(pset); + pset = (processor_set_t) queue_next(&pset->all_psets); + } + + simple_unlock(&all_psets_lock); +} diff --git a/kern/mach_factor.h b/kern/mach_factor.h new file mode 100644 index 0000000..0ec64be --- /dev/null +++ b/kern/mach_factor.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2006, 2007 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Barry deFreese. + */ +/* + * + */ + +#ifndef _KERN_MACH_FACTOR_H_ +#define _KERN_MACH_FACTOR_H_ + +#include <sys/types.h> + +extern void compute_mach_factor(void); + +#endif /* _KERN_MACH_FACTOR_H_ */ diff --git a/kern/mach_host.srv b/kern/mach_host.srv new file mode 100644 index 0000000..a18ab1c --- /dev/null +++ b/kern/mach_host.srv @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#ifdef MIGRATING_THREADS +#define thread_assign act_thread_assign +#define thread_assign_default act_thread_assign_default +#define thread_get_assignment act_thread_get_assignment +#define thread_priority act_thread_priority +#define thread_max_priority act_thread_max_priority +#define thread_policy act_thread_policy +#define thread_depress_abort act_thread_depress_abort +#define thread_wire act_thread_wire +#endif /* MIGRATING_THREADS */ + +#include <mach/mach_host.defs> diff --git a/kern/machine.c b/kern/machine.c new file mode 100644 index 0000000..f757d14 --- /dev/null +++ b/kern/machine.c @@ -0,0 +1,672 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/machine.c + * Author: Avadis Tevanian, Jr. + * Date: 1987 + * + * Support for machine independent machine abstraction. + */ + +#include <string.h> +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/mach_types.h> +#include <mach/machine.h> +#include <mach/host_info.h> +#include <kern/counters.h> +#include <kern/debug.h> +#include <kern/ipc_host.h> +#include <kern/host.h> +#include <kern/machine.h> +#include <kern/mach_host.server.h> +#include <kern/lock.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/printf.h> +#include <machine/machspl.h> /* for splsched */ +#include <machine/model_dep.h> +#include <machine/pcb.h> +#include <sys/reboot.h> + + + +/* + * Exported variables: + */ + +struct machine_info machine_info; +struct machine_slot machine_slot[NCPUS]; + +queue_head_t action_queue; /* assign/shutdown queue */ +def_simple_lock_data(,action_lock); + +/* + * cpu_up: + * + * Flag specified cpu as up and running. Called when a processor comes + * online. + */ +void cpu_up(int cpu) +{ + struct machine_slot *ms; + processor_t processor; + spl_t s; + + processor = cpu_to_processor(cpu); + pset_lock(&default_pset); +#if MACH_HOST + pset_lock(slave_pset); +#endif + s = splsched(); + processor_lock(processor); +#if NCPUS > 1 + init_ast_check(processor); +#endif /* NCPUS > 1 */ + ms = &machine_slot[cpu]; + ms->running = TRUE; + machine_info.avail_cpus++; +#if MACH_HOST + if (cpu != 0) + pset_add_processor(slave_pset, processor); + else +#endif + pset_add_processor(&default_pset, processor); + processor->state = PROCESSOR_RUNNING; + processor_unlock(processor); + splx(s); +#if MACH_HOST + pset_unlock(slave_pset); +#endif + pset_unlock(&default_pset); +} + +kern_return_t +host_reboot(const host_t host, int options) +{ + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + if (options & RB_DEBUGGER) { + Debugger("Debugger"); + } else { +#ifdef parisc +/* XXX this could be made common */ + halt_all_cpus(options); +#else + halt_all_cpus(!(options & RB_HALT)); +#endif + } + return (KERN_SUCCESS); +} + +#if NCPUS > 1 + +/* + * cpu_down: + * + * Flag specified cpu as down. Called when a processor is about to + * go offline. + */ +static void cpu_down(int cpu) +{ + struct machine_slot *ms; + processor_t processor; + spl_t s; + + s = splsched(); + processor = cpu_to_processor(cpu); + processor_lock(processor); + ms = &machine_slot[cpu]; + ms->running = FALSE; + machine_info.avail_cpus--; + /* + * processor has already been removed from pset. + */ + processor->processor_set_next = PROCESSOR_SET_NULL; + processor->state = PROCESSOR_OFF_LINE; + processor_unlock(processor); + splx(s); +} + +/* + * processor_request_action - common internals of processor_assign + * and processor_shutdown. If new_pset is null, this is + * a shutdown, else it's an assign and caller must donate + * a reference. + */ +static void +processor_request_action( + processor_t processor, + processor_set_t new_pset) +{ + processor_set_t pset; + + /* + * Processor must be in a processor set. Must lock its idle lock to + * get at processor state. + */ + pset = processor->processor_set; + simple_lock(&pset->idle_lock); + + /* + * If the processor is dispatching, let it finish - it will set its + * state to running very soon. + */ + while (*(volatile int *)&processor->state == PROCESSOR_DISPATCHING) + cpu_pause(); + + /* + * Now lock the action queue and do the dirty work. + */ + simple_lock(&action_lock); + + switch (processor->state) { + case PROCESSOR_IDLE: + /* + * Remove from idle queue. + */ + queue_remove(&pset->idle_queue, processor, processor_t, + processor_queue); + pset->idle_count--; + + /* fall through ... */ + case PROCESSOR_RUNNING: + /* + * Put it on the action queue. + */ + queue_enter(&action_queue, processor, processor_t, + processor_queue); + + /* fall through ... */ + case PROCESSOR_ASSIGN: + /* + * And ask the action_thread to do the work. + */ + + if (new_pset == PROCESSOR_SET_NULL) { + processor->state = PROCESSOR_SHUTDOWN; + } + else { + assert(processor->state != PROCESSOR_ASSIGN); + processor->state = PROCESSOR_ASSIGN; + processor->processor_set_next = new_pset; + } + break; + + default: + printf("state: %d\n", processor->state); + panic("processor_request_action: bad state"); + } + simple_unlock(&action_lock); + simple_unlock(&pset->idle_lock); + + thread_wakeup((event_t)&action_queue); +} + +#if MACH_HOST +/* + * processor_assign() changes the processor set that a processor is + * assigned to. Any previous assignment in progress is overridden. + * Synchronizes with assignment completion if wait is TRUE. + */ +kern_return_t +processor_assign( + processor_t processor, + processor_set_t new_pset, + boolean_t wait) +{ + spl_t s; + + /* + * Check for null arguments. + * XXX Can't assign master processor. + */ + if (processor == PROCESSOR_NULL || new_pset == PROCESSOR_SET_NULL || + processor == master_processor) { + return(KERN_INVALID_ARGUMENT); + } + + /* + * Get pset reference to donate to processor_request_action. + */ + pset_reference(new_pset); + + /* + * Check processor status. + * If shutdown or being shutdown, can`t reassign. + * If being assigned, wait for assignment to finish. + */ +Retry: + s = splsched(); + processor_lock(processor); + if(processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN) { + /* + * Already shutdown or being shutdown -- Can't reassign. + */ + processor_unlock(processor); + (void) splx(s); + pset_deallocate(new_pset); + return(KERN_FAILURE); + } + + if (processor->state == PROCESSOR_ASSIGN) { + assert_wait((event_t) processor, TRUE); + processor_unlock(processor); + splx(s); + thread_block(thread_no_continuation); + goto Retry; + } + + /* + * Avoid work if processor is already in this processor set. + */ + if (processor->processor_set == new_pset) { + processor_unlock(processor); + (void) splx(s); + /* clean up dangling ref */ + pset_deallocate(new_pset); + return(KERN_SUCCESS); + } + + /* + * OK to start processor assignment. + */ + processor_request_action(processor, new_pset); + + /* + * Synchronization with completion. + */ + if (wait) { + while (processor->state == PROCESSOR_ASSIGN || + processor->state == PROCESSOR_SHUTDOWN) { + assert_wait((event_t)processor, TRUE); + processor_unlock(processor); + splx(s); + thread_block(thread_no_continuation); + s = splsched(); + processor_lock(processor); + } + } + processor_unlock(processor); + splx(s); + + return(KERN_SUCCESS); +} + +#else /* MACH_HOST */ + +kern_return_t +processor_assign( + processor_t processor, + processor_set_t new_pset, + boolean_t wait) +{ + return KERN_FAILURE; +} + +#endif /* MACH_HOST */ + +/* + * processor_shutdown() queues a processor up for shutdown. + * Any assignment in progress is overriden. It does not synchronize + * with the shutdown (can be called from interrupt level). + */ +kern_return_t +processor_shutdown(processor_t processor) +{ + spl_t s; + + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + processor_lock(processor); + if(processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN) { + /* + * Already shutdown or being shutdown -- nothing to do. + */ + processor_unlock(processor); + splx(s); + return(KERN_SUCCESS); + } + + processor_request_action(processor, PROCESSOR_SET_NULL); + processor_unlock(processor); + splx(s); + + return(KERN_SUCCESS); +} + +/* + * processor_doaction actually does the shutdown. The trick here + * is to schedule ourselves onto a cpu and then save our + * context back into the runqs before taking out the cpu. + */ +static void processor_doaction(processor_t processor) +{ + thread_t this_thread; + spl_t s; + processor_set_t pset; +#if MACH_HOST + processor_set_t new_pset; + thread_t thread; + thread_t prev_thread = THREAD_NULL; + boolean_t have_pset_ref = FALSE; +#endif /* MACH_HOST */ + + /* + * Get onto the processor to shutdown + */ + this_thread = current_thread(); + thread_bind(this_thread, processor); + thread_block(thread_no_continuation); + + pset = processor->processor_set; +#if MACH_HOST + /* + * If this is the last processor in the processor_set, + * stop all the threads first. + */ + pset_lock(pset); + if (pset->processor_count == 1) { + /* + * First suspend all of them. + */ + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + thread_hold(thread); + } + pset->empty = TRUE; + /* + * Now actually stop them. Need a pset reference. + */ + pset->ref_count++; + have_pset_ref = TRUE; + +Restart_thread: + prev_thread = THREAD_NULL; + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + thread_reference(thread); + pset_unlock(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + + /* + * Only wait for threads still in the pset. + */ + thread_freeze(thread); + if (thread->processor_set != pset) { + /* + * It got away - start over. + */ + thread_unfreeze(thread); + thread_deallocate(thread); + pset_lock(pset); + goto Restart_thread; + } + + (void) thread_dowait(thread, TRUE); + prev_thread = thread; + pset_lock(pset); + thread_unfreeze(prev_thread); + } + } + pset_unlock(pset); + + /* + * At this point, it is ok to remove the processor from the pset. + * We can use processor->processor_set_next without locking the + * processor, since it cannot change while processor->state is + * PROCESSOR_ASSIGN or PROCESSOR_SHUTDOWN. + */ + + new_pset = processor->processor_set_next; + +Restart_pset: + if (new_pset) { + /* + * Reassigning processor. + */ + + if ((integer_t) pset < (integer_t) new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + if (!(new_pset->active)) { + pset_unlock(new_pset); + pset_unlock(pset); + pset_deallocate(new_pset); + new_pset = &default_pset; + pset_reference(new_pset); + goto Restart_pset; + } + + /* + * Handle remove last / assign first race. + * Only happens if there is more than one action thread. + */ + while (new_pset->empty && new_pset->processor_count > 0) { + pset_unlock(new_pset); + pset_unlock(pset); + while (*(volatile boolean_t *)&new_pset->empty && + *(volatile int *)&new_pset->processor_count > 0) + /* spin */; + goto Restart_pset; + } + + /* + * Lock the processor. new_pset should not have changed. + */ + s = splsched(); + processor_lock(processor); + assert(processor->processor_set_next == new_pset); + + /* + * Shutdown may have been requested while this assignment + * was in progress. + */ + if (processor->state == PROCESSOR_SHUTDOWN) { + processor->processor_set_next = PROCESSOR_SET_NULL; + pset_unlock(new_pset); + goto shutdown; /* releases pset reference */ + } + + /* + * Do assignment, then wakeup anyone waiting for it. + */ + pset_remove_processor(pset, processor); + pset_unlock(pset); + + pset_add_processor(new_pset, processor); + if (new_pset->empty) { + /* + * Set all the threads loose. + * + * NOTE: this appears to violate the locking + * order, since the processor lock should + * be taken AFTER a thread lock. However, + * thread_setrun (called by thread_release) + * only takes the processor lock if the + * processor is idle. The processor is + * not idle here. + */ + queue_iterate(&new_pset->threads, thread, thread_t, + pset_threads) { + thread_release(thread); + } + new_pset->empty = FALSE; + } + processor->processor_set_next = PROCESSOR_SET_NULL; + processor->state = PROCESSOR_RUNNING; + thread_wakeup((event_t)processor); + processor_unlock(processor); + splx(s); + pset_unlock(new_pset); + + /* + * Clean up dangling references, and release our binding. + */ + pset_deallocate(new_pset); + if (have_pset_ref) + pset_deallocate(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + thread_bind(this_thread, PROCESSOR_NULL); + + thread_block(thread_no_continuation); + return; + } + +#endif /* MACH_HOST */ + + /* + * Do shutdown, make sure we live when processor dies. + */ + if (processor->state != PROCESSOR_SHUTDOWN) { + printf("state: %d\n", processor->state); + panic("action_thread -- bad processor state"); + } + + s = splsched(); + processor_lock(processor); + +#if MACH_HOST + shutdown: +#endif /* MACH_HOST */ + pset_remove_processor(pset, processor); + processor_unlock(processor); + pset_unlock(pset); + splx(s); + + /* + * Clean up dangling references, and release our binding. + */ +#if MACH_HOST + if (new_pset != PROCESSOR_SET_NULL) + pset_deallocate(new_pset); + if (have_pset_ref) + pset_deallocate(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); +#endif /* MACH_HOST */ + + thread_bind(this_thread, PROCESSOR_NULL); + switch_to_shutdown_context(this_thread, + processor_doshutdown, + processor); + +} + +/* + * action_thread() shuts down processors or changes their assignment. + */ +void __attribute__((noreturn)) action_thread_continue(void) +{ + processor_t processor; + spl_t s; + + while (TRUE) { + s = splsched(); + simple_lock(&action_lock); + while ( !queue_empty(&action_queue)) { + processor = (processor_t) queue_first(&action_queue); + queue_remove(&action_queue, processor, processor_t, + processor_queue); + simple_unlock(&action_lock); + (void) splx(s); + + processor_doaction(processor); + + s = splsched(); + simple_lock(&action_lock); + } + + assert_wait((event_t) &action_queue, FALSE); + simple_unlock(&action_lock); + (void) splx(s); + counter(c_action_thread_block++); + thread_block(action_thread_continue); + } +} + +void __attribute__((noreturn)) action_thread(void) +{ + action_thread_continue(); + /*NOTREACHED*/ +} + +/* + * Actually do the processor shutdown. This is called at splsched, + * running on the processor's shutdown stack. + */ + +void processor_doshutdown(processor_t processor) +{ + int cpu = processor->slot_num; + + timer_switch(&kernel_timer[cpu]); + + /* + * Ok, now exit this cpu. + */ + PMAP_DEACTIVATE_KERNEL(cpu); +#ifndef MIGRATING_THREADS + percpu_array[cpu].active_thread = THREAD_NULL; +#endif + cpu_down(cpu); + thread_wakeup((event_t)processor); + halt_cpu(); + /* + * The action thread returns to life after the call to + * switch_to_shutdown_context above, on some other cpu. + */ + + /*NOTREACHED*/ +} +#else /* NCPUS > 1 */ + +kern_return_t +processor_assign( + processor_t processor, + processor_set_t new_pset, + boolean_t wait) +{ + return(KERN_FAILURE); +} + +#endif /* NCPUS > 1 */ diff --git a/kern/machine.h b/kern/machine.h new file mode 100644 index 0000000..5c55d2c --- /dev/null +++ b/kern/machine.h @@ -0,0 +1,59 @@ +/* + * Machine abstraction functions + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Barry deFreese. + */ +/* + * Machine abstraction functions. + * + */ + +#ifndef _MACHINE_H_ +#define _MACHINE_H_ + +#include <mach/std_types.h> + +/* + * cpu_up: + * + * Flag specified cpu as up and running. Called when a processor comes + * online. + */ +extern void cpu_up (int); + +/* + * processor_assign() changes the processor set that a processor is + * assigned to. Any previous assignment in progress is overridden. + * Synchronizes with assignment completion if wait is TRUE. + */ +extern kern_return_t processor_assign (processor_t, processor_set_t, boolean_t); + +/* + * processor_shutdown() queues a processor up for shutdown. + * Any assignment in progress is overriden. It does not synchronize + * with the shutdown (can be called from interrupt level). + */ +extern kern_return_t processor_shutdown (processor_t); + +/* + * action_thread() shuts down processors or changes their assignment. + */ +extern void action_thread_continue (void) __attribute__((noreturn)); +extern void action_thread(void) __attribute__((noreturn)); + +#endif /* _MACHINE_H_ */ diff --git a/kern/macros.h b/kern/macros.h new file mode 100644 index 0000000..01deab6 --- /dev/null +++ b/kern/macros.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2009-2015 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Helper macros. + * + * Upstream site with license notes : + * http://git.sceen.net/rbraun/librbraun.git/ + */ + +#ifndef _KERN_MACROS_H +#define _KERN_MACROS_H + +#define MACRO_BEGIN ({ +#define MACRO_END }) +#define MACRO_RETURN if (1) return + +#define __QUOTE(x) #x +#define QUOTE(x) __QUOTE(x) + +#ifdef __ASSEMBLER__ +#define DECL_CONST(x, s) x +#else /* __ASSEMBLER__ */ +#define __DECL_CONST(x, s) x##s +#define DECL_CONST(x, s) __DECL_CONST(x, s) +#endif /* __ASSEMBLER__ */ + +#define STRLEN(x) (sizeof(x) - 1) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define DIV_CEIL(n, d) (((n) + (d) - 1) / (d)) + +#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0) +#define ISP2(x) P2ALIGNED(x, x) +#define P2ALIGN(x, a) ((x) & -(a)) +#define P2ROUND(x, a) (-(-(x) & -(a))) +#define P2END(x, a) (-(~(x) & -(a))) + +#define structof(ptr, type, member) \ + ((type *)((char *)(ptr) - offsetof(type, member))) + +#define access_once(x) (*(volatile typeof(x) *)&(x)) + +#define alignof(x) __alignof__(x) + +#ifndef likely +#define likely(expr) __builtin_expect(!!(expr), 1) +#endif /* likely */ +#ifndef unlikely +#define unlikely(expr) __builtin_expect(!!(expr), 0) +#endif /* unlikely */ + +#ifndef barrier +#define barrier() asm volatile("" : : : "memory") +#endif /* barrier */ + +#define __noreturn __attribute__((noreturn)) +#define __aligned(x) __attribute__((aligned(x))) +#define __always_inline inline __attribute__((always_inline)) +#ifndef __section +#define __section(x) __attribute__((section(x))) +#endif /* __section */ +#define __packed __attribute__((packed)) +#define __alias(x) __attribute__((alias(x))) + +#define __format_printf(fmt, args) \ + __attribute__((format(printf, fmt, args))) + +#endif /* _KERN_MACROS_H */ diff --git a/kern/pc_sample.c b/kern/pc_sample.c new file mode 100644 index 0000000..497bd89 --- /dev/null +++ b/kern/pc_sample.c @@ -0,0 +1,306 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <kern/printf.h> +#include <string.h> + +#include <mach/mach_types.h> /* vm_address_t */ +#include <mach/std_types.h> /* pointer_t */ +#include <mach/pc_sample.h> +#include <machine/trap.h> +#include <kern/kalloc.h> +#include <kern/host.h> +#include <kern/thread.h> +#include <kern/pc_sample.h> +#include <kern/mach4.server.h> +#include <kern/mach_clock.h> + +#if MACH_PCSAMPLE + +#define MAX_PC_SAMPLES 512 + +typedef sampled_pc_t sampled_pcs[MAX_PC_SAMPLES]; + +void take_pc_sample( + const thread_t t, + sample_control_t *cp, + sampled_pc_flavor_t flavor, + boolean_t usermode, + vm_offset_t kern_pc) +{ + vm_offset_t pc; + struct sampled_pc *sample; + + if (usermode) + pc = interrupted_pc(t); + else + pc = kern_pc; + + cp->seqno++; + sample = &((sampled_pc_t *)cp->buffer)[cp->seqno % MAX_PC_SAMPLES]; + sample->id = (rpc_vm_offset_t)(vm_offset_t)t; + sample->pc = (rpc_vm_offset_t)pc; + sample->sampletype = flavor; +} + +kern_return_t +thread_enable_pc_sampling( + thread_t thread, + int *tickp, + sampled_pc_flavor_t flavors) +{ + vm_offset_t buf; + + if (thread == THREAD_NULL) { + return KERN_INVALID_ARGUMENT; + } + if (thread->pc_sample.buffer == 0) { + buf = (vm_offset_t) kalloc(sizeof (sampled_pcs)); + if (buf == 0) { + printf("thread_enable_pc_sampling: kalloc failed\n"); + return KERN_INVALID_ARGUMENT; + } + thread->pc_sample.buffer = buf; + thread->pc_sample.seqno = 0; + } + *tickp = tick; + thread->pc_sample.sampletypes = flavors; + return KERN_SUCCESS; +} + +kern_return_t +task_enable_pc_sampling( + task_t task, + int *tickp, + sampled_pc_flavor_t flavors) +{ + vm_offset_t buf; + + if (task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + if (task->pc_sample.buffer == 0) { + buf = (vm_offset_t) kalloc(sizeof (sampled_pcs)); + if (buf == 0) { + printf("task_enable_pc_sampling: kalloc failed\n"); + return KERN_INVALID_ARGUMENT; + } + task->pc_sample.buffer = buf; + task->pc_sample.seqno = 0; + } + *tickp = tick; + task->pc_sample.sampletypes = flavors; + return KERN_SUCCESS; +} + +kern_return_t +thread_disable_pc_sampling( + thread_t thread, + int *samplecntp) +{ + vm_offset_t buf; + + if (thread == THREAD_NULL) { + return KERN_INVALID_ARGUMENT; + } + if ((buf = thread->pc_sample.buffer) != 0) + kfree(buf, sizeof (sampled_pcs)); + thread->pc_sample.buffer = (vm_offset_t) 0; + thread->pc_sample.seqno = 0; + thread->pc_sample.sampletypes = 0; /* shut off sampling */ + + return KERN_SUCCESS; +} + +kern_return_t +task_disable_pc_sampling( + task_t task, + int *samplecntp) +{ + vm_offset_t buf; + + if (task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + if ((buf = task->pc_sample.buffer) != 0) + kfree(buf, sizeof (sampled_pcs)); + task->pc_sample.buffer = (vm_offset_t) 0; + task->pc_sample.seqno = 0; + task->pc_sample.sampletypes = 0; /* shut off sampling */ + + return KERN_SUCCESS; +} + +static kern_return_t +get_sampled_pcs( + sample_control_t *cp, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + mach_msg_type_number_t *sampled_pcs_cntp) +{ + int nsamples; + sampled_pc_seqno_t seqidx1, seqidx2; + + nsamples = cp->seqno - *seqnop; + seqidx1 = *seqnop % MAX_PC_SAMPLES; /* index of *seqnop */ + seqidx2 = cp->seqno % MAX_PC_SAMPLES; /* index of cp->seqno */ + + if (nsamples > MAX_PC_SAMPLES) { + nsamples = MAX_PC_SAMPLES; + seqidx1 = (seqidx2 + 1) % MAX_PC_SAMPLES; + } + + if (nsamples > 0) { + /* + * Carefully copy sampled_pcs into sampled_pcs_msgbuf IN ORDER. + */ + if (seqidx1 < seqidx2) { + /* + * Simple case: no wraparound. + * Copy from seqidx1 to seqidx2. + */ + memcpy(sampled_pcs_out, + (sampled_pc_array_t)cp->buffer + seqidx1 + 1, + nsamples * sizeof(sampled_pc_t)); + } else { + /* seqidx1 > seqidx2 -- Handle wraparound. */ + + memcpy(sampled_pcs_out, + (sampled_pc_array_t)cp->buffer + seqidx1 + 1, + (MAX_PC_SAMPLES - seqidx1 - 1) * sizeof(sampled_pc_t)); + + memcpy(sampled_pcs_out + (MAX_PC_SAMPLES - seqidx1 - 1), + (sampled_pc_array_t)cp->buffer, + (seqidx2 + 1) * sizeof(sampled_pc_t)); + } + } else if (nsamples < 0) { + /* Bogus SEQNO supplied. */ + nsamples = 0; + } else { + /* could either be zero because of overflow, or because + * we are being lied to. In either case, return nothing. + * If overflow, only once in a blue moon. If being lied to, + * then we have no obligation to return anything useful anyway. + */ + ; + } + + *sampled_pcs_cntp = nsamples; + *seqnop = cp->seqno; + return KERN_SUCCESS; +} + +kern_return_t +thread_get_sampled_pcs( + thread_t thread, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + mach_msg_type_number_t *sampled_pcs_cntp) +{ + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + if (thread->pc_sample.buffer == 0) + return KERN_FAILURE; + + return get_sampled_pcs(&thread->pc_sample, seqnop, sampled_pcs_out, + sampled_pcs_cntp); +} + +kern_return_t +task_get_sampled_pcs( + task_t task, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + mach_msg_type_number_t *sampled_pcs_cntp) +{ + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + if (task->pc_sample.buffer == 0) + return KERN_FAILURE; + + return get_sampled_pcs(&task->pc_sample, seqnop, sampled_pcs_out, + sampled_pcs_cntp); +} + +#else /* MACH_PCSAMPLE */ + +kern_return_t +thread_enable_pc_sampling( + thread_t thread, + int *tickp, + sampled_pc_flavor_t flavors) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_enable_pc_sampling( + task_t task, + int *tickp, + sampled_pc_flavor_t flavors) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +thread_disable_pc_sampling( + thread_t thread, + int *samplecntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_disable_pc_sampling( + task_t task, + int *samplecntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +thread_get_sampled_pcs( + thread_t thread, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + mach_msg_type_number_t *sampled_pcs_cntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_get_sampled_pcs( + task_t task, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + mach_msg_type_number_t *sampled_pcs_cntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +#endif /* MACH_PCSAMPLE */ diff --git a/kern/pc_sample.h b/kern/pc_sample.h new file mode 100644 index 0000000..04ca667 --- /dev/null +++ b/kern/pc_sample.h @@ -0,0 +1,94 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * HISTORY + * Revision 1.1.1.1 1997/02/25 21:28:25 thomas + * Initial source + * + * Revision 1.1.1.1 1996/10/30 01:38:13 thomas + * Imported from UK22 + * + * Revision 1.1 1994/11/02 02:24:15 law + * Initial revision + * + * Revision 2.2 93/11/17 19:06:01 dbg + * Moved kernel internal definitions here from mach/pc_sample.h. + * [93/09/24 dbg] + * + */ + +/* + * Kernel definitions for PC sampling. + */ +#ifndef _KERN_PC_SAMPLE_H_ +#define _KERN_PC_SAMPLE_H_ + +#include <mach/pc_sample.h> +#include <mach/machine/vm_types.h> +#include <kern/kern_types.h> +#include <kern/macros.h> + +/* + * Control structure for sampling, included in + * threads and tasks. If sampletypes is 0, no + * sampling is done. + */ + +struct sample_control { + vm_offset_t buffer; + unsigned int seqno; + sampled_pc_flavor_t sampletypes; +}; + +typedef struct sample_control sample_control_t; + +/* + * Routines to take PC samples. + */ +extern void take_pc_sample( + thread_t thread, + sample_control_t *cp, + sampled_pc_flavor_t flavor, + boolean_t usermode, + vm_offset_t pc); + +/* + * Macro to do quick flavor check for sampling, + * on both threads and tasks. + */ +#define take_pc_sample_macro(thread, flavor, usermode, pc) \ + MACRO_BEGIN \ + task_t task; \ + \ + if ((thread)->pc_sample.sampletypes & (flavor)) \ + take_pc_sample((thread), &(thread)->pc_sample, (flavor), usermode, pc); \ + \ + task = (thread)->task; \ + if (task->pc_sample.sampletypes & (flavor)) \ + take_pc_sample((thread), &task->pc_sample, (flavor), usermode, pc); \ + MACRO_END + +#endif /* _KERN_PC_SAMPLE_H_ */ diff --git a/kern/printf.c b/kern/printf.c new file mode 100644 index 0000000..cbc27ae --- /dev/null +++ b/kern/printf.c @@ -0,0 +1,656 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Common code for printf et al. + * + * The calling routine typically takes a variable number of arguments, + * and passes the address of the first one. This implementation + * assumes a straightforward, stack implementation, aligned to the + * machine's wordsize. Increasing addresses are assumed to point to + * successive arguments (left-to-right), as is the case for a machine + * with a downward-growing stack with arguments pushed right-to-left. + * + * To write, for example, fprintf() using this routine, the code + * + * fprintf(fd, format, args) + * FILE *fd; + * char *format; + * { + * va_list listp; + * va_start(listp, fmt); + * _doprnt(format, &args, fd); + * va_end(listp); + * } + * + * would suffice. (This example does not handle the fprintf's "return + * value" correctly, but who looks at the return value of fprintf + * anyway?) + * + * This version implements the following printf features: + * + * %d decimal conversion + * %u unsigned conversion + * %p pointer address + * %x hexadecimal conversion + * %X hexadecimal conversion with capital letters + * %o octal conversion + * %c character + * %s string + * %m.n field width, precision + * %-m.n left adjustment + * %0m.n zero-padding + * %*.* width and precision taken from arguments + * + * This version does not implement %f, %e, or %g. It accepts, but + * ignores, an `l' as in %ld, %lo, %lx, and %lu, and therefore will not + * work correctly on machines for which sizeof(long) != sizeof(int). + * It does not even parse %D, %O, or %U; you should be using %ld, %o and + * %lu if you mean long conversion. + * + * As mentioned, this version does not return any reasonable value. + * + * Permission is granted to use, modify, or propagate this code as + * long as this notice is incorporated. + * + * Steve Summit 3/25/87 + */ + +/* + * Added formats for decoding device registers: + * + * printf("reg = %b", regval, "<base><arg>*") + * + * where <base> is the output base expressed as a control character: + * i.e. '\10' gives octal, '\20' gives hex. Each <arg> is a sequence of + * characters, the first of which gives the bit number to be inspected + * (origin 1), and the rest (up to a control character (<= 32)) give the + * name of the register. Thus + * printf("reg = %b\n", 3, "\10\2BITTWO\1BITONE") + * would produce + * reg = 3<BITTWO,BITONE> + * + * If the second character in <arg> is also a control character, it + * indicates the last bit of a bit field. In this case, printf will extract + * bits <1> to <2> and print it. Characters following the second control + * character are printed before the bit field. + * printf("reg = %b\n", 0xb, "\10\4\3FIELD1=\2BITTWO\1BITONE") + * would produce + * reg = b<FIELD1=2,BITONE> + */ +/* + * Added for general use: + * # prefix for alternate format: + * 0x (0X) for hex + * leading 0 for octal + * + print '+' if positive + * blank print ' ' if positive + * + * z signed hexadecimal + * r signed, 'radix' + * n unsigned, 'radix' + * + * D,U,O,Z same as corresponding lower-case versions + * (compatibility) + */ + +#include <stdarg.h> +#include <string.h> +#include <device/cons.h> +#include <kern/printf.h> +#include <mach/boolean.h> + + +#define isdigit(d) ((d) >= '0' && (d) <= '9') +#define Ctod(c) ((c) - '0') + +#define MAXBUF (sizeof(long long int) * 8) /* enough for binary */ + + +void printnum( + unsigned long long u, + int base, + void (*putc)( char, vm_offset_t ), + vm_offset_t putc_arg) +{ + char buf[MAXBUF]; /* build number here */ + char * p = &buf[MAXBUF-1]; + static char digs[] = "0123456789abcdef"; + + do { + *p-- = digs[u % base]; + u /= base; + } while (u != 0); + + while (++p != &buf[MAXBUF]) + (*putc)(*p, putc_arg); + +} + +boolean_t _doprnt_truncates = FALSE; + +void _doprnt( + const char *fmt, + va_list argp, + /* character output routine */ + void (*putc)( char, vm_offset_t), + int radix, /* default radix - for '%r' */ + vm_offset_t putc_arg) +{ + int length; + int prec; + boolean_t ladjust; + char padc; + long long n; + unsigned long long u; + int have_long_long; + int plus_sign; + int sign_char; + boolean_t altfmt, truncate; + int base; + char c; + + while ((c = *fmt) != '\0') { + if (c != '%') { + (*putc)(c, putc_arg); + fmt++; + continue; + } + + fmt++; + + length = 0; + prec = -1; + ladjust = FALSE; + padc = ' '; + plus_sign = 0; + sign_char = 0; + altfmt = FALSE; + have_long_long = FALSE; + + while (TRUE) { + c = *fmt; + if (c == '#') { + altfmt = TRUE; + } + else if (c == '-') { + ladjust = TRUE; + } + else if (c == '+') { + plus_sign = '+'; + } + else if (c == ' ') { + if (plus_sign == 0) + plus_sign = ' '; + } + else + break; + fmt++; + } + + if (c == '0') { + padc = '0'; + c = *++fmt; + } + + if (isdigit(c)) { + while(isdigit(c)) { + length = 10 * length + Ctod(c); + c = *++fmt; + } + } + else if (c == '*') { + length = va_arg(argp, int); + c = *++fmt; + if (length < 0) { + ladjust = !ladjust; + length = -length; + } + } + + if (c == '.') { + c = *++fmt; + if (isdigit(c)) { + prec = 0; + while(isdigit(c)) { + prec = 10 * prec + Ctod(c); + c = *++fmt; + } + } + else if (c == '*') { + prec = va_arg(argp, int); + c = *++fmt; + } + } + + if (c == 'l') + c = *++fmt; /* need it if sizeof(int) < sizeof(long) */ + if (c == 'l') { + c = *++fmt; /* handle `long long' */ + have_long_long = TRUE; + } + + truncate = FALSE; + + switch(c) { + case 'b': + case 'B': + { + char *p; + boolean_t any; + int i; + + if (! have_long_long) + u = va_arg(argp, unsigned long); + else + u = va_arg(argp, unsigned long long); + p = va_arg(argp, char *); + base = *p++; + printnum(u, base, putc, putc_arg); + + if (u == 0) + break; + + any = FALSE; + while ((i = *p++)) { + /* NOTE: The '32' here is because ascii space */ + if (*p <= 32) { + /* + * Bit field + */ + int j; + if (any) + (*putc)(',', putc_arg); + else { + (*putc)('<', putc_arg); + any = TRUE; + } + j = *p++; + for (; (c = *p) > 32; p++) + (*putc)(c, putc_arg); + printnum((unsigned)( (u>>(j-1)) & ((2<<(i-j))-1)), + base, putc, putc_arg); + } + else if (u & (1<<(i-1))) { + if (any) + (*putc)(',', putc_arg); + else { + (*putc)('<', putc_arg); + any = TRUE; + } + for (; (c = *p) > 32; p++) + (*putc)(c, putc_arg); + } + else { + for (; *p > 32; p++) + continue; + } + } + if (any) + (*putc)('>', putc_arg); + break; + } + + case 'c': + c = va_arg(argp, int); + (*putc)(c, putc_arg); + break; + + case 's': + { + char *p; + char *p2; + + if (prec == -1) + prec = 0x7fffffff; /* MAXINT */ + + p = va_arg(argp, char *); + + if (p == (char *)0) + p = ""; + + if (length > 0 && !ladjust) { + n = 0; + p2 = p; + + for (; *p != '\0' && n < prec; p++) + n++; + + p = p2; + + while (n < length) { + (*putc)(' ', putc_arg); + n++; + } + } + + n = 0; + + while (*p != '\0') { + if (++n > prec) + break; + + (*putc)(*p++, putc_arg); + } + + if (n < length && ladjust) { + while (n < length) { + (*putc)(' ', putc_arg); + n++; + } + } + + break; + } + + case 'o': + truncate = _doprnt_truncates; + case 'O': + base = 8; + goto print_unsigned; + + case 'd': + truncate = _doprnt_truncates; + case 'D': + base = 10; + goto print_signed; + + case 'u': + truncate = _doprnt_truncates; + case 'U': + base = 10; + goto print_unsigned; + + case 'p': + case 'x': + truncate = _doprnt_truncates; + case 'X': + base = 16; + goto print_unsigned; + + case 'z': + truncate = _doprnt_truncates; + case 'Z': + base = 16; + goto print_signed; + + case 'r': + truncate = _doprnt_truncates; + case 'R': + base = radix; + goto print_signed; + + case 'n': + truncate = _doprnt_truncates; + case 'N': + base = radix; + goto print_unsigned; + + print_signed: + if (! have_long_long) + n = va_arg(argp, long); + else + n = va_arg(argp, long long); + if (n >= 0) { + u = n; + sign_char = plus_sign; + } + else { + u = -n; + sign_char = '-'; + } + goto print_num; + + print_unsigned: + if (! have_long_long) + u = va_arg(argp, unsigned long); + else + u = va_arg(argp, unsigned long long); + goto print_num; + + print_num: + { + char buf[MAXBUF]; /* build number here */ + char * p = &buf[MAXBUF-1]; + static char digits[] = "0123456789abcdef"; + char *prefix = 0; + + if (truncate) u = (long)((int)(u)); + + if (u != 0 && altfmt) { + if (base == 8) + prefix = "0"; + else if (base == 16) + prefix = "0x"; + } + + do { + *p-- = digits[u % base]; + u /= base; + } while (u != 0); + + length -= (&buf[MAXBUF-1] - p); + if (sign_char) + length--; + if (prefix) + length -= strlen(prefix); + + if (padc == ' ' && !ladjust) { + /* blank padding goes before prefix */ + while (--length >= 0) + (*putc)(' ', putc_arg); + } + if (sign_char) + (*putc)(sign_char, putc_arg); + if (prefix) + while (*prefix) + (*putc)(*prefix++, putc_arg); + if (padc == '0') { + /* zero padding goes after sign and prefix */ + while (--length >= 0) + (*putc)('0', putc_arg); + } + while (++p != &buf[MAXBUF]) + (*putc)(*p, putc_arg); + + if (ladjust) { + while (--length >= 0) + (*putc)(' ', putc_arg); + } + break; + } + + case '\0': + fmt--; + break; + + default: + (*putc)(c, putc_arg); + } + fmt++; + } +} + +/* + * Printing (to console) + */ + +int vprintf(const char *fmt, va_list listp) +{ + _doprnt(fmt, listp, (void (*)( char, vm_offset_t)) cnputc, 16, 0); + return 0; +} + +/*VARARGS1*/ +int printf(const char *fmt, ...) +{ + va_list listp; + va_start(listp, fmt); + vprintf(fmt, listp); + va_end(listp); + return 0; +} + +int indent = 0; + +/* + * Printing (to console) with indentation. + */ +/*VARARGS1*/ +void iprintf(const char *fmt, ...) +{ + va_list listp; + int i; + + for (i = indent; i > 0; ){ + if (i >= 8) { + printf("\t"); + i -= 8; + } + else { + printf(" "); + i--; + } + } + va_start(listp, fmt); + _doprnt(fmt, listp, (void (*)( char, vm_offset_t)) cnputc, 16, 0); + va_end(listp); +} + +/* + * Printing to generic buffer + * Returns #bytes printed. + * Strings are zero-terminated. + */ +static void +sputc( + char c, + vm_offset_t arg) +{ + char **bufp = (char **) arg; + char *p = *bufp; + *p++ = c; + *bufp = p; +} + +int +sprintf(char *buf, const char *fmt, ...) +{ + va_list listp; + char *start = buf; + + va_start(listp, fmt); + _doprnt(fmt, listp, sputc, 16, (vm_offset_t)&buf); + va_end(listp); + + *buf = 0; + return (buf - start); +} + +struct vsnprintf_cookie +{ + char *buf; + int index; + int max_len; +}; + +static void +snputc(char c, vm_offset_t arg) +{ + struct vsnprintf_cookie *cookie = (void *) arg; + + if (cookie->index < cookie->max_len) + cookie->buf[cookie->index ++] = c; +} + +int +vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + struct vsnprintf_cookie cookie + = { .buf = buf, .index = 0, .max_len = size }; + + _doprnt (fmt, args, snputc, 16, (vm_offset_t)&cookie); + cookie.buf[cookie.index] = '\0'; + + return cookie.index; +} + +int +snprintf(char *buf, size_t size, const char *fmt, ...) +{ + int written; + va_list listp; + va_start(listp, fmt); + written = vsnprintf(buf, size, fmt, listp); + va_end(listp); + return written; +} + +void safe_gets( + char *str, + int maxlen) +{ + char *lp; + int c; + char *strmax = str + maxlen - 1; /* allow space for trailing 0 */ + + lp = str; + for (;;) { + c = cngetc(); + switch (c) { + case '\n': + case '\r': + printf("\n"); + *lp++ = 0; + return; + + case '\b': + case '#': + case '\177': + if (lp > str) { + printf("\b \b"); + lp--; + } + continue; + + case '@': + case 'u'&037: + lp = str; + printf("\n\r"); + continue; + + default: + if (c >= ' ' && c < '\177') { + if (lp < strmax) { + *lp++ = c; + printf("%c", c); + } + else { + printf("%c", '\007'); /* beep */ + } + } + } + } +} diff --git a/kern/printf.h b/kern/printf.h new file mode 100644 index 0000000..b72640a --- /dev/null +++ b/kern/printf.h @@ -0,0 +1,68 @@ +/* + * Header file for printf type functions. + * Copyright (C) 2006, 2007 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * String handling functions. + * + */ + +#ifndef _MACH_SA_SYS_PRINTF_H_ +#define _MACH_SA_SYS_PRINTF_H_ + +#include <sys/types.h> +#include <stdarg.h> + +extern void _doprnt (const char *fmt, + va_list argp, + void (*putc)(char, vm_offset_t), + int radix, + vm_offset_t putc_arg); + +extern void printnum (unsigned long long u, int base, + void (*putc)(char, vm_offset_t), + vm_offset_t putc_arg); + +extern int sprintf (char *buf, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int snprintf (char *buf, size_t size, const char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vsnprintf (char *buf, size_t size, const char *fmt, va_list args) + __attribute__ ((format (printf, 3, 0))); + + +extern int printf (const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +#define printf_once(fmt, ...) \ + MACRO_BEGIN \ + static int __once = 0; \ + if (!__once) { \ + printf(fmt, ##__VA_ARGS__); \ + __once = 1; \ + } \ + MACRO_END + +extern int indent; +extern void iprintf (const char *fmt, ...); + +extern int vprintf(const char *fmt, va_list listp); + +extern void safe_gets (char *str, int maxlen); + +#endif /* _MACH_SA_SYS_PRINTF_H_ */ + diff --git a/kern/priority.c b/kern/priority.c new file mode 100644 index 0000000..fe11d0d --- /dev/null +++ b/kern/priority.c @@ -0,0 +1,223 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: priority.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Clock primitives. + */ + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/machine.h> +#include <kern/host.h> +#include <kern/mach_clock.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/priority.h> +#include <kern/processor.h> +#include <kern/timer.h> +#include <machine/machspl.h> + + + +/* + * USAGE_THRESHOLD is the amount by which usage must change to + * cause a priority shift that moves a thread between run queues. + */ + +#ifdef PRI_SHIFT_2 +#if PRI_SHIFT_2 > 0 +#define USAGE_THRESHOLD (((1 << PRI_SHIFT) + (1 << PRI_SHIFT_2)) << (2 + SCHED_SHIFT)) +#else /* PRI_SHIFT_2 > 0 */ +#define USAGE_THRESHOLD (((1 << PRI_SHIFT) - (1 << -(PRI_SHIFT_2))) << (2 + SCHED_SHIFT)) +#endif /* PRI_SHIFT_2 > 0 */ +#else /* PRI_SHIFT_2 */ +#define USAGE_THRESHOLD (1 << (PRI_SHIFT + 2 + SCHED_SHIFT)) +#endif /* PRI_SHIFT_2 */ + +/* + * thread_quantum_update: + * + * Recalculate the quantum and priority for a thread. + * The number of ticks that has elapsed since we were last called + * is passed as "nticks." + * + * Called only from clock_interrupt(). + */ + +void thread_quantum_update( + int mycpu, + thread_t thread, + int nticks, + int state) +{ + int quantum; + processor_t myprocessor; +#if NCPUS > 1 + processor_set_t pset; +#endif + spl_t s; + + myprocessor = cpu_to_processor(mycpu); +#if NCPUS > 1 + pset = myprocessor->processor_set; + if (pset == 0) { + /* + * Processor is being reassigned. + * Should rewrite processor assignment code to + * block clock interrupts. + */ + return; + } +#endif /* NCPUS > 1 */ + + /* + * Account for thread's utilization of these ticks. + * This assumes that there is *always* a current thread. + * When the processor is idle, it should be the idle thread. + */ + + /* + * Update set_quantum and calculate the current quantum. + */ +#if NCPUS > 1 + pset->set_quantum = pset->machine_quantum[ + ((pset->runq.count > pset->processor_count) ? + pset->processor_count : pset->runq.count)]; + + if (myprocessor->runq.count != 0) + quantum = min_quantum; + else + quantum = pset->set_quantum; +#else /* NCPUS > 1 */ + quantum = min_quantum; + default_pset.set_quantum = quantum; +#endif /* NCPUS > 1 */ + + /* + * Now recompute the priority of the thread if appropriate. + */ + + if (state != CPU_STATE_IDLE) { + myprocessor->quantum -= nticks; +#if NCPUS > 1 + /* + * Runtime quantum adjustment. Use quantum_adj_index + * to avoid synchronizing quantum expirations. + */ + if ((quantum != myprocessor->last_quantum) && + (pset->processor_count > 1)) { + myprocessor->last_quantum = quantum; + simple_lock(&pset->quantum_adj_lock); + quantum = min_quantum + (pset->quantum_adj_index * + (quantum - min_quantum)) / + (pset->processor_count - 1); + if (++(pset->quantum_adj_index) >= + pset->processor_count) + pset->quantum_adj_index = 0; + simple_unlock(&pset->quantum_adj_lock); + } +#endif /* NCPUS > 1 */ + if (myprocessor->quantum <= 0) { + s = splsched(); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) { + update_priority(thread); + } + else { + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + thread_timer_delta(thread); + thread->sched_usage += + thread->sched_delta; + thread->sched_delta = 0; + compute_my_priority(thread); + } + } + thread_unlock(thread); + (void) splx(s); + /* + * This quantum is up, give this thread another. + */ + myprocessor->first_quantum = FALSE; +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + myprocessor->quantum += quantum; +#if MACH_FIXPRI + } + else { + /* + * Fixed priority has per-thread quantum. + * + */ + myprocessor->quantum += thread->sched_data; + } +#endif /* MACH_FIXPRI */ + } + /* + * Recompute priority if appropriate. + */ + else { + s = splsched(); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) { + update_priority(thread); + } + else { + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + thread_timer_delta(thread); + if (thread->sched_delta >= USAGE_THRESHOLD) { + thread->sched_usage += + thread->sched_delta; + thread->sched_delta = 0; + compute_my_priority(thread); + } + } + } + thread_unlock(thread); + (void) splx(s); + } + /* + * Check for and schedule ast if needed. + */ + ast_check(); + } +} + diff --git a/kern/priority.h b/kern/priority.h new file mode 100644 index 0000000..2da93eb --- /dev/null +++ b/kern/priority.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_PRIORITY_H_ +#define _KERN_PRIORITY_H_ + +extern void thread_quantum_update( + int mycpu, + thread_t thread, + int nticks, + int state); + +#endif /* _KERN_PRIORITY_H_ */ diff --git a/kern/processor.c b/kern/processor.c new file mode 100644 index 0000000..71bbb75 --- /dev/null +++ b/kern/processor.c @@ -0,0 +1,1034 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * processor.c: processor and processor_set manipulation routines. + */ + +#include <string.h> + +#include <mach/boolean.h> +#include <mach/policy.h> +#include <mach/processor_info.h> +#include <mach/vm_param.h> +#include <kern/cpu_number.h> +#include <kern/debug.h> +#include <kern/kalloc.h> +#include <kern/lock.h> +#include <kern/host.h> +#include <kern/ipc_tt.h> +#include <kern/machine.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_host.h> +#include <ipc/ipc_port.h> +#include <machine/mp_desc.h> + +#if MACH_HOST +#include <kern/slab.h> +struct kmem_cache pset_cache; +struct processor_set *slave_pset; +#endif /* MACH_HOST */ + + +/* + * Exported variables. + */ +int master_cpu; + +struct processor_set default_pset; + +queue_head_t all_psets; +int all_psets_count; +def_simple_lock_data(, all_psets_lock); + +processor_t master_processor; + +/* + * Bootstrap the processor/pset system so the scheduler can run. + */ +void pset_sys_bootstrap(void) +{ + int i; + + pset_init(&default_pset); + default_pset.empty = FALSE; + for (i = 0; i < NCPUS; i++) { + /* + * Initialize processor data structures. + * Note that cpu_to_processor is processor_ptr. + */ + processor_init(processor_ptr(i), i); + } + master_processor = cpu_to_processor(master_cpu); + queue_init(&all_psets); + simple_lock_init(&all_psets_lock); + queue_enter(&all_psets, &default_pset, processor_set_t, all_psets); + all_psets_count = 1; + default_pset.active = TRUE; + default_pset.empty = FALSE; + + /* + * Note: the default_pset has a max_priority of BASEPRI_USER. + * Internal kernel threads override this in kernel_thread. + */ +} + +#if MACH_HOST +/* + * Rest of pset system initializations. + */ +void pset_sys_init(void) +{ + int i; + processor_t processor; + + /* + * Allocate the cache for processor sets. + */ + kmem_cache_init(&pset_cache, "processor_set", + sizeof(struct processor_set), 0, NULL, 0); + + /* + * Give each processor a control port. + * The master processor already has one. + */ + for (i = 0; i < NCPUS; i++) { + processor = cpu_to_processor(i); + if (processor != master_processor && + machine_slot[i].is_cpu) + { + ipc_processor_init(processor); + } + } + + processor_set_create(&realhost, &slave_pset, &slave_pset); +} +#endif /* MACH_HOST */ + +/* + * Initialize the given processor_set structure. + */ + +void pset_init( + processor_set_t pset) +{ + int i; + + simple_lock_init(&pset->runq.lock); + pset->runq.low = 0; + pset->runq.count = 0; + for (i = 0; i < NRQS; i++) { + queue_init(&(pset->runq.runq[i])); + } + queue_init(&pset->idle_queue); + pset->idle_count = 0; + simple_lock_init(&pset->idle_lock); + queue_init(&pset->processors); + pset->processor_count = 0; + pset->empty = TRUE; + queue_init(&pset->tasks); + pset->task_count = 0; + queue_init(&pset->threads); + pset->thread_count = 0; + pset->ref_count = 1; + simple_lock_init(&pset->ref_lock); + queue_init(&pset->all_psets); + pset->active = FALSE; + simple_lock_init(&pset->lock); + pset->pset_self = IP_NULL; + pset->pset_name_self = IP_NULL; + pset->max_priority = BASEPRI_USER; +#if MACH_FIXPRI + pset->policies = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + pset->set_quantum = min_quantum; +#if NCPUS > 1 + pset->quantum_adj_index = 0; + simple_lock_init(&pset->quantum_adj_lock); + + for (i = 0; i <= NCPUS; i++) { + pset->machine_quantum[i] = min_quantum; + } +#endif /* NCPUS > 1 */ + pset->mach_factor = 0; + pset->load_average = 0; + pset->sched_load = SCHED_SCALE; /* i.e. 1 */ +} + +/* + * Initialize the given processor structure for the processor in + * the slot specified by slot_num. + */ + +void processor_init( + processor_t pr, + int slot_num) +{ + int i; + + simple_lock_init(&pr->runq.lock); + pr->runq.low = 0; + pr->runq.count = 0; + for (i = 0; i < NRQS; i++) { + queue_init(&(pr->runq.runq[i])); + } + queue_init(&pr->processor_queue); + pr->state = PROCESSOR_OFF_LINE; + pr->next_thread = THREAD_NULL; + pr->idle_thread = THREAD_NULL; + pr->quantum = 0; + pr->first_quantum = FALSE; + pr->last_quantum = 0; + pr->processor_set = PROCESSOR_SET_NULL; + pr->processor_set_next = PROCESSOR_SET_NULL; + queue_init(&pr->processors); + simple_lock_init(&pr->lock); + pr->processor_self = IP_NULL; + pr->slot_num = slot_num; +} + +/* + * pset_remove_processor() removes a processor from a processor_set. + * It can only be called on the current processor. Caller must + * hold lock on current processor and processor set. + */ + +void pset_remove_processor( + processor_set_t pset, + processor_t processor) +{ + if (pset != processor->processor_set) + panic("pset_remove_processor: wrong pset"); + + queue_remove(&pset->processors, processor, processor_t, processors); + processor->processor_set = PROCESSOR_SET_NULL; + pset->processor_count--; + quantum_set(pset); +} + +/* + * pset_add_processor() adds a processor to a processor_set. + * It can only be called on the current processor. Caller must + * hold lock on curent processor and on pset. No reference counting on + * processors. Processor reference to pset is implicit. + */ + +void pset_add_processor( + processor_set_t pset, + processor_t processor) +{ + queue_enter(&pset->processors, processor, processor_t, processors); + processor->processor_set = pset; + pset->processor_count++; + pset->empty = FALSE; + quantum_set(pset); +} + +/* + * pset_remove_task() removes a task from a processor_set. + * Caller must hold locks on pset and task. Pset reference count + * is not decremented; caller must explicitly pset_deallocate. + */ + +void pset_remove_task( + processor_set_t pset, + task_t task) +{ + if (pset != task->processor_set) + return; + + queue_remove(&pset->tasks, task, task_t, pset_tasks); + task->processor_set = PROCESSOR_SET_NULL; + pset->task_count--; +} + +/* + * pset_add_task() adds a task to a processor_set. + * Caller must hold locks on pset and task. Pset references to + * tasks are implicit. + */ + +void pset_add_task( + processor_set_t pset, + task_t task) +{ + queue_enter(&pset->tasks, task, task_t, pset_tasks); + task->processor_set = pset; + pset->task_count++; +} + +/* + * pset_remove_thread() removes a thread from a processor_set. + * Caller must hold locks on pset and thread. Pset reference count + * is not decremented; caller must explicitly pset_deallocate. + */ + +void pset_remove_thread( + processor_set_t pset, + thread_t thread) +{ + queue_remove(&pset->threads, thread, thread_t, pset_threads); + thread->processor_set = PROCESSOR_SET_NULL; + pset->thread_count--; +} + +/* + * pset_add_thread() adds a thread to a processor_set. + * Caller must hold locks on pset and thread. Pset references to + * threads are implicit. + */ + +void pset_add_thread( + processor_set_t pset, + thread_t thread) +{ + queue_enter(&pset->threads, thread, thread_t, pset_threads); + thread->processor_set = pset; + pset->thread_count++; +} + +/* + * thread_change_psets() changes the pset of a thread. Caller must + * hold locks on both psets and thread. The old pset must be + * explicitly pset_deallocat()'ed by caller. + */ + +void thread_change_psets( + thread_t thread, + processor_set_t old_pset, + processor_set_t new_pset) +{ + queue_remove(&old_pset->threads, thread, thread_t, pset_threads); + old_pset->thread_count--; + queue_enter(&new_pset->threads, thread, thread_t, pset_threads); + thread->processor_set = new_pset; + new_pset->thread_count++; +} + +/* + * pset_deallocate: + * + * Remove one reference to the processor set. Destroy processor_set + * if this was the last reference. + */ +void pset_deallocate( + processor_set_t pset) +{ + if (pset == PROCESSOR_SET_NULL) + return; + + pset_ref_lock(pset); + if (--pset->ref_count > 0) { + pset_ref_unlock(pset); + return; + } +#if !MACH_HOST + panic("pset_deallocate: default_pset destroyed"); +#endif /* !MACH_HOST */ + +#if MACH_HOST + /* + * Reference count is zero, however the all_psets list + * holds an implicit reference and may make new ones. + * Its lock also dominates the pset lock. To check for this, + * temporarily restore one reference, and then lock the + * other structures in the right order. + */ + pset->ref_count = 1; + pset_ref_unlock(pset); + + simple_lock(&all_psets_lock); + pset_ref_lock(pset); + if (--pset->ref_count > 0) { + /* + * Made an extra reference. + */ + pset_ref_unlock(pset); + simple_unlock(&all_psets_lock); + return; + } + + /* + * Ok to destroy pset. Make a few paranoia checks. + */ + + if ((pset == &default_pset) || (pset->thread_count > 0) || + (pset->task_count > 0) || pset->processor_count > 0) { + panic("pset_deallocate: destroy default or active pset"); + } + /* + * Remove from all_psets queue. + */ + queue_remove(&all_psets, pset, processor_set_t, all_psets); + all_psets_count--; + + pset_ref_unlock(pset); + simple_unlock(&all_psets_lock); + + /* + * That's it, free data structure. + */ + kmem_cache_free(&pset_cache, (vm_offset_t)pset); +#endif /* MACH_HOST */ +} + +/* + * pset_reference: + * + * Add one reference to the processor set. + */ +void pset_reference( + processor_set_t pset) +{ + pset_ref_lock(pset); + pset->ref_count++; + pset_ref_unlock(pset); +} + +kern_return_t +processor_info( + processor_t processor, + int flavor, + host_t *host, + processor_info_t info, + natural_t *count) +{ + int slot_num, state; + processor_basic_info_t basic_info; + + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor != PROCESSOR_BASIC_INFO || + *count < PROCESSOR_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (processor_basic_info_t) info; + + slot_num = processor->slot_num; + basic_info->cpu_type = machine_slot[slot_num].cpu_type; + basic_info->cpu_subtype = machine_slot[slot_num].cpu_subtype; + state = processor->state; + if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) + basic_info->running = FALSE; + else + basic_info->running = TRUE; + basic_info->slot_num = slot_num; + if (processor == master_processor) + basic_info->is_master = TRUE; + else + basic_info->is_master = FALSE; + + *count = PROCESSOR_BASIC_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; +} + +kern_return_t processor_start( + processor_t processor) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; +#if NCPUS > 1 + return cpu_start(processor->slot_num); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +kern_return_t processor_exit( + processor_t processor) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + +#if NCPUS > 1 + return processor_shutdown(processor); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +kern_return_t +processor_control( + processor_t processor, + processor_info_t info, + natural_t count) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + +#if NCPUS > 1 + return cpu_control(processor->slot_num, (int *)info, count); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +/* + * Precalculate the appropriate system quanta based on load. The + * index into machine_quantum is the number of threads on the + * processor set queue. It is limited to the number of processors in + * the set. + */ + +void quantum_set( + processor_set_t pset) +{ +#if NCPUS > 1 + int i, ncpus; + + ncpus = pset->processor_count; + + for ( i=1 ; i <= ncpus ; i++) { + pset->machine_quantum[i] = + ((min_quantum * ncpus) + (i/2)) / i ; + } + pset->machine_quantum[0] = 2 * pset->machine_quantum[1]; + + i = ((pset->runq.count > pset->processor_count) ? + pset->processor_count : pset->runq.count); + pset->set_quantum = pset->machine_quantum[i]; +#else /* NCPUS > 1 */ + default_pset.set_quantum = min_quantum; +#endif /* NCPUS > 1 */ +} + +#if MACH_HOST +/* + * processor_set_create: + * + * Create and return a new processor set. + */ + +kern_return_t +processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name) +{ + processor_set_t pset; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + pset = (processor_set_t) kmem_cache_alloc(&pset_cache); + pset_init(pset); + pset_reference(pset); /* for new_set out argument */ + pset_reference(pset); /* for new_name out argument */ + ipc_pset_init(pset); + pset->active = TRUE; + + simple_lock(&all_psets_lock); + queue_enter(&all_psets, pset, processor_set_t, all_psets); + all_psets_count++; + simple_unlock(&all_psets_lock); + + ipc_pset_enable(pset); + + *new_set = pset; + *new_name = pset; + return KERN_SUCCESS; +} + +/* + * processor_set_destroy: + * + * destroy a processor set. Any tasks, threads or processors + * currently assigned to it are reassigned to the default pset. + */ +kern_return_t processor_set_destroy( + processor_set_t pset) +{ + queue_entry_t elem; + queue_head_t *list; + + if (pset == PROCESSOR_SET_NULL || pset == &default_pset) + return KERN_INVALID_ARGUMENT; + + /* + * Handle multiple termination race. First one through sets + * active to FALSE and disables ipc access. + */ + pset_lock(pset); + if (!(pset->active)) { + pset_unlock(pset); + return KERN_FAILURE; + } + + pset->active = FALSE; + ipc_pset_disable(pset); + + + /* + * Now reassign everything in this set to the default set. + */ + + if (pset->task_count > 0) { + list = &pset->tasks; + while (!queue_empty(list)) { + elem = queue_first(list); + task_reference((task_t) elem); + pset_unlock(pset); + task_assign((task_t) elem, &default_pset, FALSE); + task_deallocate((task_t) elem); + pset_lock(pset); + } + } + + if (pset->thread_count > 0) { + list = &pset->threads; + while (!queue_empty(list)) { + elem = queue_first(list); + thread_reference((thread_t) elem); + pset_unlock(pset); + thread_assign((thread_t) elem, &default_pset); + thread_deallocate((thread_t) elem); + pset_lock(pset); + } + } + + if (pset->processor_count > 0) { + list = &pset->processors; + while(!queue_empty(list)) { + elem = queue_first(list); + pset_unlock(pset); + processor_assign((processor_t) elem, &default_pset, TRUE); + pset_lock(pset); + } + } + + pset_unlock(pset); + + /* + * Destroy ipc state. + */ + ipc_pset_terminate(pset); + + /* + * Deallocate pset's reference to itself. + */ + pset_deallocate(pset); + return KERN_SUCCESS; +} + +#else /* MACH_HOST */ + +kern_return_t +processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name) +{ + return KERN_FAILURE; +} + +kern_return_t processor_set_destroy( + processor_set_t pset) +{ + return KERN_FAILURE; +} + +#endif /* MACH_HOST */ + +kern_return_t +processor_get_assignment( + processor_t processor, + processor_set_t *pset) +{ + int state; + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + + state = processor->state; + if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) + return KERN_FAILURE; + + *pset = processor->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +kern_return_t +processor_set_info( + processor_set_t pset, + int flavor, + host_t *host, + processor_set_info_t info, + natural_t *count) +{ + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor == PROCESSOR_SET_BASIC_INFO) { + processor_set_basic_info_t basic_info; + + if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (processor_set_basic_info_t) info; + + pset_lock(pset); + basic_info->processor_count = pset->processor_count; + basic_info->task_count = pset->task_count; + basic_info->thread_count = pset->thread_count; + basic_info->mach_factor = pset->mach_factor; + basic_info->load_average = pset->load_average; + pset_unlock(pset); + + *count = PROCESSOR_SET_BASIC_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; + } + else if (flavor == PROCESSOR_SET_SCHED_INFO) { + processor_set_sched_info_t sched_info; + + if (*count < PROCESSOR_SET_SCHED_INFO_COUNT) + return KERN_FAILURE; + + sched_info = (processor_set_sched_info_t) info; + + pset_lock(pset); +#if MACH_FIXPRI + sched_info->policies = pset->policies; +#else /* MACH_FIXPRI */ + sched_info->policies = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + sched_info->max_priority = pset->max_priority; + pset_unlock(pset); + + *count = PROCESSOR_SET_SCHED_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; + } + + *host = HOST_NULL; + return KERN_INVALID_ARGUMENT; +} + +/* + * processor_set_max_priority: + * + * Specify max priority permitted on processor set. This affects + * newly created and assigned threads. Optionally change existing + * ones. + */ +kern_return_t +processor_set_max_priority( + processor_set_t pset, + int max_priority, + boolean_t change_threads) +{ + if (pset == PROCESSOR_SET_NULL || invalid_pri(max_priority)) + return KERN_INVALID_ARGUMENT; + + pset_lock(pset); + pset->max_priority = max_priority; + + if (change_threads) { + queue_head_t *list; + thread_t thread; + + list = &pset->threads; + queue_iterate(list, thread, thread_t, pset_threads) { + if (thread->max_priority < max_priority) + thread_max_priority(thread, pset, max_priority); + } + } + + pset_unlock(pset); + + return KERN_SUCCESS; +} + +/* + * processor_set_policy_enable: + * + * Allow indicated policy on processor set. + */ + +kern_return_t +processor_set_policy_enable( + processor_set_t pset, + int policy) +{ + if ((pset == PROCESSOR_SET_NULL) || invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + pset_lock(pset); + pset->policies |= policy; + pset_unlock(pset); + + return KERN_SUCCESS; +#else /* MACH_FIXPRI */ + if (policy == POLICY_TIMESHARE) + return KERN_SUCCESS; + else + return KERN_FAILURE; +#endif /* MACH_FIXPRI */ +} + +/* + * processor_set_policy_disable: + * + * Forbid indicated policy on processor set. Time sharing cannot + * be forbidden. + */ + +kern_return_t +processor_set_policy_disable( + processor_set_t pset, + int policy, + boolean_t change_threads) +{ + if ((pset == PROCESSOR_SET_NULL) || policy == POLICY_TIMESHARE || + invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + pset_lock(pset); + + /* + * Check if policy enabled. Disable if so, then handle + * change_threads. + */ + if (pset->policies & policy) { + pset->policies &= ~policy; + + if (change_threads) { + queue_head_t *list; + thread_t thread; + + list = &pset->threads; + queue_iterate(list, thread, thread_t, pset_threads) { + if (thread->policy == policy) + thread_policy(thread, POLICY_TIMESHARE, 0); + } + } + } + pset_unlock(pset); +#endif /* MACH_FIXPRI */ + + return KERN_SUCCESS; +} + +#define THING_TASK 0 +#define THING_THREAD 1 + +/* + * processor_set_things: + * + * Common internals for processor_set_{threads,tasks} + */ +static kern_return_t +processor_set_things( + processor_set_t pset, + mach_port_t **thing_list, + natural_t *count, + int type) +{ + unsigned int actual; /* this many things */ + unsigned i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + return KERN_FAILURE; + } + + if (type == THING_TASK) + actual = pset->task_count; + else + actual = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the processor_set is locked & active */ + + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + task_t task; + + for (i = 0, task = (task_t) queue_first(&pset->tasks); + i < actual; + i++, task = (task_t) queue_next(&task->pset_tasks)) { + /* take ref for convert_task_to_port */ + task_reference(task); + tasks[i] = task; + } + assert(queue_end(&pset->tasks, (queue_entry_t) task)); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + thread_t thread; + + for (i = 0, thread = (thread_t) queue_first(&pset->threads); + i < actual; + i++, + thread = (thread_t) queue_next(&thread->pset_threads)) { + /* take ref for convert_thread_to_port */ + thread_reference(thread); + threads[i] = thread; + } + assert(queue_end(&pset->threads, (queue_entry_t) thread)); + break; + } + } + + /* can unlock processor set now that we have the task/thread refs */ + pset_unlock(pset); + + if (actual == 0) { + /* no things, so return null pointer and deallocate memory */ + *thing_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + + for (i = 0; i < actual; i++) + task_deallocate(tasks[i]); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + break; + } + } + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + memcpy((void *) newaddr, (void *) addr, size_needed); + kfree(addr, size); + addr = newaddr; + } + + *thing_list = (mach_port_t *) addr; + *count = actual; + + /* do the conversion that Mig should handle */ + + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + + for (i = 0; i < actual; i++) + ((mach_port_t *) tasks)[i] = + (mach_port_t)convert_task_to_port(tasks[i]); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + + for (i = 0; i < actual; i++) + ((mach_port_t *) threads)[i] = + (mach_port_t)convert_thread_to_port(threads[i]); + break; + } + } + } + + return KERN_SUCCESS; +} + + +/* + * processor_set_tasks: + * + * List all tasks in the processor set. + */ +kern_return_t +processor_set_tasks( + processor_set_t pset, + task_array_t *task_list, + natural_t *count) +{ + return processor_set_things(pset, task_list, count, THING_TASK); +} + +/* + * processor_set_threads: + * + * List all threads in the processor set. + */ +kern_return_t +processor_set_threads( + processor_set_t pset, + thread_array_t *thread_list, + natural_t *count) +{ + return processor_set_things(pset, thread_list, count, THING_THREAD); +} diff --git a/kern/processor.h b/kern/processor.h new file mode 100644 index 0000000..747badf --- /dev/null +++ b/kern/processor.h @@ -0,0 +1,326 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * processor.h: Processor and processor-set definitions. + */ + +#ifndef _KERN_PROCESSOR_H_ +#define _KERN_PROCESSOR_H_ + +/* + * Data structures for managing processors and sets of processors. + */ + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/port.h> +#include <mach/processor_info.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/kern_types.h> +#include <kern/host.h> + +#if NCPUS > 1 +#include <machine/ast_types.h> +#endif /* NCPUS > 1 */ + +struct processor_set { + struct run_queue runq; /* runq for this set */ + queue_head_t idle_queue; /* idle processors */ + int idle_count; /* how many ? */ + decl_simple_lock_data(, idle_lock) /* lock for above, shall be taken at splsched only */ + queue_head_t processors; /* all processors here */ + int processor_count; /* how many ? */ + boolean_t empty; /* true if no processors */ + queue_head_t tasks; /* tasks assigned */ + int task_count; /* how many */ + queue_head_t threads; /* threads in this set */ + int thread_count; /* how many */ + int ref_count; /* structure ref count */ + decl_simple_lock_data(, ref_lock) /* lock for ref count */ + queue_chain_t all_psets; /* link for all_psets */ + boolean_t active; /* is pset in use */ + decl_simple_lock_data(, lock) /* lock for everything else */ + struct ipc_port * pset_self; /* port for operations */ + struct ipc_port * pset_name_self; /* port for information */ + int max_priority; /* maximum priority */ +#if MACH_FIXPRI + int policies; /* bit vector for policies */ +#endif /* MACH_FIXPRI */ + int set_quantum; /* current default quantum */ +#if NCPUS > 1 + int quantum_adj_index; /* runtime quantum adj. */ + decl_simple_lock_data(, quantum_adj_lock) /* lock for above */ + int machine_quantum[NCPUS+1]; /* ditto */ +#endif /* NCPUS > 1 */ + long mach_factor; /* mach_factor */ + long load_average; /* load_average */ + long sched_load; /* load avg for scheduler */ +}; +extern struct processor_set default_pset; +#if MACH_HOST +extern struct processor_set *slave_pset; +#endif + +struct processor { + struct run_queue runq; /* local runq for this processor */ + /* XXX want to do this round robin eventually */ + queue_chain_t processor_queue; /* idle/assign/shutdown queue link */ + int state; /* See below */ + struct thread *next_thread; /* next thread to run if dispatched */ + struct thread *idle_thread; /* this processor's idle thread. */ + int quantum; /* quantum for current thread */ + boolean_t first_quantum; /* first quantum in succession */ + int last_quantum; /* last quantum assigned */ + + processor_set_t processor_set; /* processor set I belong to */ + processor_set_t processor_set_next; /* set I will belong to */ + queue_chain_t processors; /* all processors in set */ + decl_simple_lock_data(, lock) + struct ipc_port *processor_self; /* port for operations */ + int slot_num; /* machine-indep slot number */ +#if NCPUS > 1 + ast_check_t ast_check_data; /* for remote ast_check invocation */ +#endif /* NCPUS > 1 */ + /* punt id data temporarily */ +}; +typedef struct processor Processor; +extern struct processor processor_array[NCPUS]; + +#include <kern/cpu_number.h> +#include <machine/percpu.h> + +/* + * Chain of all processor sets. + */ +extern queue_head_t all_psets; +extern int all_psets_count; +decl_simple_lock_data(extern, all_psets_lock); + +/* + * The lock ordering is: + * + * all_psets_lock + * | + * | + * V + * pset_lock + * | + * +-----------+---------------+-------------------+ + * | | | | + * | | | | + * | | V V + * | | task_lock pset_self->ip_lock + * | | | | + * | | +-----------+---------------+ | + * | | | | | + * | V V V V + * | thread_lock* pset_ref_lock + * | | + * | +-------+ + * | | | + * | | V + * | | runq_lock* + * | | + * V V + * processor_lock* + * | + * | + * V + * pset_idle_lock* + * | + * | + * V + * action_lock* + * + * Locks marked with "*" are taken at splsched. + */ + +/* + * XXX need a pointer to the master processor structure + */ + +extern processor_t master_processor; + +/* + * NOTE: The processor->processor_set link is needed in one of the + * scheduler's critical paths. [Figure out where to look for another + * thread to run on this processor.] It is accessed without locking. + * The following access protocol controls this field. + * + * Read from own processor - just read. + * Read from another processor - lock processor structure during read. + * Write from own processor - lock processor structure during write. + * Write from another processor - NOT PERMITTED. + * + */ + +/* + * Processor state locking: + * + * Values for the processor state are defined below. If the processor + * is off-line or being shutdown, then it is only necessary to lock + * the processor to change its state. Otherwise it is only necessary + * to lock its processor set's idle_lock. Scheduler code will + * typically lock only the idle_lock, but processor manipulation code + * will often lock both. + */ + +#define PROCESSOR_OFF_LINE 0 /* Not in system */ +#define PROCESSOR_RUNNING 1 /* Running normally */ +#define PROCESSOR_IDLE 2 /* idle */ +#define PROCESSOR_DISPATCHING 3 /* dispatching (idle -> running) */ +#define PROCESSOR_ASSIGN 4 /* Assignment is changing */ +#define PROCESSOR_SHUTDOWN 5 /* Being shutdown */ + +#define processor_ptr(i) (&percpu_array[i].processor) +#define cpu_to_processor processor_ptr + +#define current_processor() (percpu_ptr(struct processor, processor)) +#define current_processor_set() (current_processor()->processor_set) + +/* Compatibility -- will go away */ + +#define cpu_state(slot_num) (processor_ptr(slot_num)->state) +#define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE) + +/* Useful lock macros */ + +#define pset_lock(pset) simple_lock(&(pset)->lock) +#define pset_unlock(pset) simple_unlock(&(pset)->lock) +#define pset_ref_lock(pset) simple_lock(&(pset)->ref_lock) +#define pset_ref_unlock(pset) simple_unlock(&(pset)->ref_lock) + +/* Shall be taken at splsched only */ +#define processor_lock(pr) simple_lock(&(pr)->lock) +#define processor_unlock(pr) simple_unlock(&(pr)->lock) + +typedef mach_port_t *processor_array_t; +typedef mach_port_t *processor_set_array_t; +typedef mach_port_t *processor_set_name_array_t; + + +/* + * Exported functions + */ + +/* Initialization */ + +#ifdef KERNEL +#if MACH_HOST +extern void pset_sys_init(void); +#endif /* MACH_HOST */ + +/* Pset internal functions */ + +extern void pset_sys_bootstrap(void); +extern void pset_reference(processor_set_t); +extern void pset_deallocate(processor_set_t); +extern void pset_remove_processor(processor_set_t, processor_t); +extern void pset_add_processor(processor_set_t, processor_t); +extern void pset_remove_task(processor_set_t, struct task *); +extern void pset_add_task(processor_set_t, struct task *); +extern void pset_remove_thread(processor_set_t, struct thread *); +extern void pset_add_thread(processor_set_t, struct thread *); +extern void thread_change_psets(struct thread *, + processor_set_t, processor_set_t); + +/* Processor interface */ + +extern kern_return_t processor_get_assignment( + processor_t processor, + processor_set_t *processor_set); + +extern kern_return_t processor_info( + processor_t processor, + int flavor, + host_t * host, + processor_info_t info, + natural_t * count); + +extern kern_return_t processor_start( + processor_t processor); + +extern kern_return_t processor_exit( + processor_t processor); + +extern kern_return_t processor_control( + processor_t processor, + processor_info_t info, + natural_t count); + +/* Pset interface */ + +extern kern_return_t processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name); + +extern kern_return_t processor_set_destroy( + processor_set_t pset); + +extern kern_return_t processor_set_info( + processor_set_t pset, + int flavor, + host_t *host, + processor_set_info_t info, + natural_t *count); + +extern kern_return_t processor_set_max_priority( + processor_set_t pset, + int max_priority, + boolean_t change_threads); + +extern kern_return_t processor_set_policy_enable( + processor_set_t pset, + int policy); + +extern kern_return_t processor_set_policy_disable( + processor_set_t pset, + int policy, + boolean_t change_threads); + +extern kern_return_t processor_set_tasks( + processor_set_t pset, + task_array_t *task_list, + natural_t *count); + +extern kern_return_t processor_set_threads( + processor_set_t pset, + thread_array_t *thread_list, + natural_t *count); +#endif + +void processor_doshutdown(processor_t processor); +void quantum_set(processor_set_t pset); +void pset_init(processor_set_t pset); +void processor_init(processor_t pr, int slot_num); + +#endif /* _KERN_PROCESSOR_H_ */ diff --git a/kern/profile.c b/kern/profile.c new file mode 100644 index 0000000..4fcd541 --- /dev/null +++ b/kern/profile.c @@ -0,0 +1,408 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright 1991 by Open Software Foundation, + * Grenoble, FRANCE + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of OSF or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if 0 + +#include <kern/thread.h> +#include <kern/queue.h> +#include <mach/profil.h> +#include <kern/sched_prim.h> +#include <ipc/ipc_space.h> + +extern vm_map_t kernel_map; /* can be discarded, defined in <vm/vm_kern.h> */ + +thread_t profile_thread_id = THREAD_NULL; + + +void profile_thread() +{ + struct message { + mach_msg_header_t head; + mach_msg_type_t type; + int arg[SIZE_PROF_BUFFER+1]; + } msg; + + spl_t s; + buf_to_send_t buf_entry; + queue_entry_t prof_queue_entry; + prof_data_t pbuf; + simple_lock_t lock; + msg_return_t mr; + int j; + + /* Initialise the queue header for the prof_queue */ + mpqueue_init(&prof_queue); + + /* Template initialisation of header and type structures */ + msg.head.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE); + msg.head.msgh_size = sizeof(msg); + msg.head.msgh_local_port = MACH_PORT_NULL; + msg.head.msgh_kind = MACH_MSGH_KIND_NORMAL; + msg.head.msgh_id = 666666; + + msg.type.msgt_name = MACH_MSG_TYPE_INTEGER_32; + msg.type.msgt_size = 32; + msg.type.msgt_number = SIZE_PROF_BUFFER+1; + msg.type.msgt_inline = TRUE; + msg.type.msgt_longform = FALSE; + msg.type.msgt_deallocate = FALSE; + msg.type.msgt_unused = 0; + + while (TRUE) { + + /* Dequeue the first buffer. */ + s = splsched(); + mpdequeue_head(&prof_queue, &prof_queue_entry); + splx(s); + + if ((buf_entry = (buf_to_send_t) prof_queue_entry) == NULLBTS) + { + thread_sleep((event_t) profile_thread, lock, TRUE); + if (current_thread()->wait_result != THREAD_AWAKENED) + break; + } + else { + task_t curr_task; + thread_t curr_th; + int *sample; + int curr_buf; + int imax; + + curr_th = (thread_t) buf_entry->thread; + curr_buf = (int) buf_entry->number; + pbuf = curr_th->profil_buffer; + + /* Set the remote port */ + msg.head.msgh_remote_port = (mach_port_t) pbuf->prof_port; + + + sample = pbuf->prof_area[curr_buf].p_zone; + imax = pbuf->prof_area[curr_buf].p_index; + for(j=0 ;j<imax; j++,sample++) + msg.arg[j] = *sample; + + /* Let hardclock() know you've finished the dirty job */ + pbuf->prof_area[curr_buf].p_full = FALSE; + + /* + * Store the number of samples actually sent + * as the last element of the array. + */ + msg.arg[SIZE_PROF_BUFFER] = imax; + + mr = mach_msg(&(msg.head), MACH_SEND_MSG, + sizeof(struct message), 0, + MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + + if (mr != MACH_MSG_SUCCESS) { +printf("profile_thread: mach_msg failed returned %x\n",(int)mr); + } + + if (buf_entry->wakeme) + thread_wakeup((event_t) &buf_entry->wakeme); + kmem_free(kernel_map, (buf_to_send_t) buf_entry, + sizeof(struct buf_to_send)); + + } + + } + /* The profile thread has been signalled to exit. There may still + be sample data queued for us, which we must now throw away. + Once we set profile_thread_id to null, hardclock() will stop + queueing any additional samples, so we do not need to alter + the interrupt level. */ + profile_thread_id = THREAD_NULL; + while (1) { + mpdequeue_head(&prof_queue, &prof_queue_entry); + if ((buf_entry = (buf_to_send_t) prof_queue_entry) == NULLBTS) + break; + if (buf_entry->wakeme) + thread_wakeup((event_t) &buf_entry->wakeme); + kmem_free(kernel_map, (buf_to_send_t) buf_entry, + sizeof(struct buf_to_send)); + } + + thread_halt_self(thread_exception_return); +} + + + +#include <mach/message.h> + +void +send_last_sample_buf(thread_t th) +{ + spl_t s; + buf_to_send_t buf_entry; + vm_offset_t vm_buf_entry; + + if (th->profil_buffer == NULLPBUF) + return; + + /* Ask for the sending of the last PC buffer. + * Make a request to the profile_thread by inserting + * the buffer in the send queue, and wake it up. + * The last buffer must be inserted at the head of the + * send queue, so the profile_thread handles it immediately. + */ + if (kmem_alloc( kernel_map, &vm_buf_entry, + sizeof(struct buf_to_send)) != KERN_SUCCESS) + return; + buf_entry = (buf_to_send_t) vm_buf_entry; + buf_entry->thread = (int *) th; + buf_entry->number = th->profil_buffer->prof_index; + + /* Watch out in case profile thread exits while we are about to + queue data for it. */ + s = splsched(); + if (profile_thread_id != THREAD_NULL) { + simple_lock_t lock; + buf_entry->wakeme = 1; + mpenqueue_tail( &prof_queue, &(buf_entry->list)); + thread_wakeup((event_t) profile_thread); + assert_wait((event_t) &buf_entry->wakeme, TRUE); + splx(s); + thread_block(thread_no_continuation); + } else { + splx(s); + kmem_free(kernel_map, vm_buf_entry, sizeof(struct buf_to_send)); + } +} + +/* + * Profile current thread + */ + +profile(pc) { + + /* Find out which thread has been interrupted. */ + thread_t it_thread = current_thread(); + int inout_val = pc; + buf_to_send_t buf_entry; + vm_offset_t vm_buf_entry; + int *val; + /* + * Test if the current thread is to be sampled + */ + if (it_thread->thread_profiled) { + /* Inserts the PC value in the buffer of the thread */ + set_pbuf_value(it_thread->profil_buffer, &inout_val); + switch(inout_val) { + case 0: + if (profile_thread_id == THREAD_NULL) { + reset_pbuf_area(it_thread->profil_buffer); + } else printf("ERROR : hardclock : full buffer unsent\n"); + break; + case 1: + /* Normal case, value successfully inserted */ + break; + case 2 : + /* + * The value we have just inserted caused the + * buffer to be full, and ready to be sent. + * If profile_thread_id is null, the profile + * thread has been killed. Since this generally + * happens only when the O/S server task of which + * it is a part is killed, it is not a great loss + * to throw away the data. + */ + if (profile_thread_id == THREAD_NULL || + kmem_alloc(kernel_map, + &vm_buf_entry , + sizeof(struct buf_to_send)) != + KERN_SUCCESS) { + reset_pbuf_area(it_thread->profil_buffer); + break; + } + buf_entry = (buf_to_send_t) vm_buf_entry; + buf_entry->thread = (int *)it_thread; + buf_entry->number = + (it_thread->profil_buffer)->prof_index; + mpenqueue_tail(&prof_queue, &(buf_entry->list)); + + /* Switch to another buffer */ + reset_pbuf_area(it_thread->profil_buffer); + + /* Wake up the profile thread */ + if (profile_thread_id != THREAD_NULL) + thread_wakeup((event_t) profile_thread); + break; + + default: + printf("ERROR: profile : unexpected case\n"); + } + } +} + + +/* The task parameter in this and the subsequent routine is needed for + MiG, even though it is not used in the function itself. */ + +kern_return_t +mach_sample_thread (ipc_space_t task, + ipc_object_t reply, + thread_t cur_thread) +{ +/* + * This routine is called every time that a new thread has made + * a request for the sampling service. We must keep track of the + * correspondance between it's identity (cur_thread) and the port + * we are going to use as a reply port to send out the samples resulting + * from its execution. + */ + prof_data_t pbuf; + vm_offset_t vmpbuf; + + if (reply != MACH_PORT_NULL) { + if (cur_thread->thread_profiled && cur_thread->thread_profiled_own) { + if (reply == cur_thread->profil_buffer->prof_port) + return KERN_SUCCESS; + mach_sample_thread(MACH_PORT_NULL, cur_thread); + } + /* Start profiling this thread , do the initialization. */ + alloc_pbuf_area(pbuf, vmpbuf); + if ((cur_thread->profil_buffer = pbuf) == NULLPBUF) { +printf("ERROR:mach_sample_thread:cannot allocate pbuf\n"); + return KERN_RESOURCE_SHORTAGE; + } else { + if (!set_pbuf_nb(pbuf, NB_PROF_BUFFER-1)) { +printf("ERROR:mach_sample_thread:cannot set pbuf_nb\n"); + return KERN_FAILURE; + } + reset_pbuf_area(pbuf); + } + + pbuf->prof_port = reply; + cur_thread->thread_profiled = TRUE; + cur_thread->thread_profiled_own = TRUE; + if (profile_thread_id == THREAD_NULL) + profile_thread_id = kernel_thread(current_task(), profile_thread); + } else { + if (!cur_thread->thread_profiled_own) + cur_thread->thread_profiled = FALSE; + if (!cur_thread->thread_profiled) + return KERN_SUCCESS; + + send_last_sample_buf(cur_thread); + + /* Stop profiling this thread, do the cleanup. */ + + cur_thread->thread_profiled_own = FALSE; + cur_thread->thread_profiled = FALSE; + dealloc_pbuf_area(cur_thread->profil_buffer); + cur_thread->profil_buffer = NULLPBUF; + } + + return KERN_SUCCESS; +} + +kern_return_t +mach_sample_task (ipc_space_t task, ipc_object_t reply, task_t cur_task) +{ + prof_data_t pbuf=cur_task->profil_buffer; + vm_offset_t vmpbuf; + int turnon = (reply != MACH_PORT_NULL); + + if (turnon) { + if (cur_task->task_profiled) { + if (cur_task->profil_buffer->prof_port == reply) + return KERN_SUCCESS; + (void) mach_sample_task(task, MACH_PORT_NULL, cur_task); + } + if (pbuf == NULLPBUF) { + alloc_pbuf_area(pbuf, vmpbuf); + if (pbuf == NULLPBUF) { + return KERN_RESOURCE_SHORTAGE; + } + cur_task->profil_buffer = pbuf; + } + if (!set_pbuf_nb(pbuf, NB_PROF_BUFFER-1)) { + return KERN_FAILURE; + } + reset_pbuf_area(pbuf); + pbuf->prof_port = reply; + } + + if (turnon != cur_task->task_profiled) { + int actual,i,sentone; + thread_t thread; + + if (turnon && profile_thread_id == THREAD_NULL) + profile_thread_id = + kernel_thread(current_task(), profile_thread); + cur_task->task_profiled = turnon; + actual = cur_task->thread_count; + sentone = 0; + for (i=0, thread=(thread_t) queue_first(&cur_task->thread_list); + i < actual; + i++, thread=(thread_t) queue_next(&thread->thread_list)) { + if (!thread->thread_profiled_own) { + thread->thread_profiled = turnon; + if (turnon) + thread->profil_buffer = cur_task->profil_buffer; + else if (!sentone) { + send_last_sample_buf(thread); + sentone = 1; + } + } + } + if (!turnon) { + dealloc_pbuf_area(pbuf); + cur_task->profil_buffer = NULLPBUF; + } + } + + return KERN_SUCCESS; +} + +#endif /* 0 */ diff --git a/kern/queue.c b/kern/queue.c new file mode 100644 index 0000000..f532620 --- /dev/null +++ b/kern/queue.c @@ -0,0 +1,121 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Routines to implement queue package. + */ + +#include <kern/queue.h> + + + +/* + * Insert element at head of queue. + */ +void enqueue_head( + queue_t que, + queue_entry_t elt) +{ + elt->next = que->next; + elt->prev = que; + elt->next->prev = elt; + que->next = elt; +} + +/* + * Insert element at tail of queue. + */ +void enqueue_tail( + queue_t que, + queue_entry_t elt) +{ + elt->next = que; + elt->prev = que->prev; + elt->prev->next = elt; + que->prev = elt; +} + +/* + * Remove and return element at head of queue. + */ +queue_entry_t dequeue_head( + queue_t que) +{ + queue_entry_t elt; + + if (que->next == que) + return((queue_entry_t)0); + + elt = que->next; + elt->next->prev = que; + que->next = elt->next; + return(elt); +} + +/* + * Remove and return element at tail of queue. + */ +queue_entry_t dequeue_tail( + queue_t que) +{ + queue_entry_t elt; + + if (que->prev == que) + return((queue_entry_t)0); + + elt = que->prev; + elt->prev->next = que; + que->prev = elt->prev; + return(elt); +} + +/* + * Remove arbitrary element from queue. + * Does not check whether element is on queue - the world + * will go haywire if it isn't. + */ + +/*ARGSUSED*/ +void remqueue( + queue_t que, + queue_entry_t elt) +{ + elt->next->prev = elt->prev; + elt->prev->next = elt->next; +} + +/* + * Routines to directly imitate the VAX hardware queue + * package. + */ +void insque( + struct queue_entry *entry, + struct queue_entry *pred) +{ + entry->next = pred->next; + entry->prev = pred; + (pred->next)->prev = entry; + pred->next = entry; +} diff --git a/kern/queue.h b/kern/queue.h new file mode 100644 index 0000000..f0b4002 --- /dev/null +++ b/kern/queue.h @@ -0,0 +1,391 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon rights + * to redistribute these changes. + */ +/* + * File: queue.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * Type definitions for generic queues. + * + */ + +#ifndef _KERN_QUEUE_H_ +#define _KERN_QUEUE_H_ + +#include <kern/lock.h> + +/* + * Queue of abstract objects. Queue is maintained + * within that object. + * + * Supports fast removal from within the queue. + * + * How to declare a queue of elements of type "foo_t": + * In the "*foo_t" type, you must have a field of + * type "queue_chain_t" to hold together this queue. + * There may be more than one chain through a + * "foo_t", for use by different queues. + * + * Declare the queue as a "queue_t" type. + * + * Elements of the queue (of type "foo_t", that is) + * are referred to by reference, and cast to type + * "queue_entry_t" within this module. + */ + +/* + * A generic doubly-linked list (queue). + */ + +struct queue_entry { + struct queue_entry *next; /* next element */ + struct queue_entry *prev; /* previous element */ +}; + +typedef struct queue_entry *queue_t; +typedef struct queue_entry queue_head_t; +typedef struct queue_entry queue_chain_t; +typedef struct queue_entry *queue_entry_t; + +/* + * enqueue puts "elt" on the "queue". + * dequeue returns the first element in the "queue". + * remqueue removes the specified "elt" from the specified "queue". + */ + +#define enqueue(queue,elt) enqueue_tail(queue, elt) +#define dequeue(queue) dequeue_head(queue) + +void enqueue_head(queue_t, queue_entry_t); +void enqueue_tail(queue_t, queue_entry_t); +queue_entry_t dequeue_head(queue_t); +queue_entry_t dequeue_tail(queue_t); +void remqueue(queue_t, queue_entry_t); +void insque(queue_entry_t, queue_entry_t); + +/* + * Macro: queue_assert + * Function: + * Used by macros to assert that the given argument is a + * queue. + */ +#define queue_assert(q) (void) ((void) (q)->next, (q)->prev) + +/* + * Macro: queue_init + * Function: + * Initialize the given queue. + * Header: + * void queue_init(q) + * queue_t q; *MODIFIED* + */ +#define queue_init(q) ((q)->next = (q)->prev = q) + +/* + * Macro: queue_first + * Function: + * Returns the first entry in the queue, + * Header: + * queue_entry_t queue_first(q) + * queue_t q; *IN* + */ +#define queue_first(q) (queue_assert(q), (q)->next) + +/* + * Macro: queue_next + * Function: + * Returns the entry after an item in the queue. + * Header: + * queue_entry_t queue_next(qc) + * queue_t qc; + */ +#define queue_next(qc) (queue_assert(qc), (qc)->next) + +/* + * Macro: queue_last + * Function: + * Returns the last entry in the queue. + * Header: + * queue_entry_t queue_last(q) + * queue_t q; *IN* + */ +#define queue_last(q) (queue_assert(q), (q)->prev) + +/* + * Macro: queue_prev + * Function: + * Returns the entry before an item in the queue. + * Header: + * queue_entry_t queue_prev(qc) + * queue_t qc; + */ +#define queue_prev(qc) (queue_assert(qc), (qc)->prev) + +/* + * Macro: queue_end + * Function: + * Tests whether a new entry is really the end of + * the queue. + * Header: + * boolean_t queue_end(q, qe) + * queue_t q; + * queue_entry_t qe; + */ +#define queue_end(q, qe) (queue_assert(q), queue_assert(qe), \ + (q) == (qe)) + +/* + * Macro: queue_empty + * Function: + * Tests whether a queue is empty. + * Header: + * boolean_t queue_empty(q) + * queue_t q; + */ +#define queue_empty(q) queue_end((q), queue_first(q)) + + +/*----------------------------------------------------------------*/ +/* + * Macros that operate on generic structures. The queue + * chain may be at any location within the structure, and there + * may be more than one chain. + */ + +/* + * Macro: queue_enter + * Function: + * Insert a new element at the tail of the queue. + * Header: + * void queue_enter(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter(head, elt, type, field) \ +{ \ + queue_assert(head); \ + queue_assert(&(elt)->field); \ + queue_entry_t prev; \ + \ + prev = (head)->prev; \ + if ((head) == prev) { \ + (head)->next = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)prev)->field.next = (queue_entry_t)(elt);\ + } \ + (elt)->field.prev = prev; \ + (elt)->field.next = head; \ + (head)->prev = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_enter_first + * Function: + * Insert a new element at the head of the queue. + * Header: + * void queue_enter_first(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter_first(head, elt, type, field) \ +{ \ + queue_assert(head); \ + queue_assert(&(elt)->field); \ + queue_entry_t next; \ + \ + next = (head)->next; \ + if ((head) == next) { \ + (head)->prev = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)next)->field.prev = (queue_entry_t)(elt);\ + } \ + (elt)->field.next = next; \ + (elt)->field.prev = head; \ + (head)->next = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_field [internal use only] + * Function: + * Find the queue_chain_t (or queue_t) for the + * given element (thing) in the given queue (head) + */ +#define queue_field(head, thing, type, field) \ + (((head) == (thing)) ? (head) : &((type)(thing))->field) + +/* + * Macro: queue_remove + * Function: + * Remove an arbitrary item from the queue. + * Header: + * void queue_remove(q, qe, type, field) + * arguments as in queue_enter + */ +#define queue_remove(head, elt, type, field) \ +{ \ + queue_assert(head); \ + queue_assert(&(elt)->field); \ + queue_entry_t next, prev; \ + \ + next = (elt)->field.next; \ + prev = (elt)->field.prev; \ + \ + if ((head) == next) \ + (head)->prev = prev; \ + else \ + ((type)next)->field.prev = prev; \ + \ + if ((head) == prev) \ + (head)->next = next; \ + else \ + ((type)prev)->field.next = next; \ +} + +/* + * Macro: queue_remove_first + * Function: + * Remove and return the entry at the head of + * the queue. + * Header: + * queue_remove_first(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_first(head, entry, type, field) \ +{ \ + queue_assert(head); \ + queue_assert(&(entry)->field); \ + queue_entry_t next; \ + \ + (entry) = (type) ((head)->next); \ + next = (entry)->field.next; \ + \ + if ((head) == next) \ + (head)->prev = (head); \ + else \ + ((type)(next))->field.prev = (head); \ + (head)->next = next; \ +} + +/* + * Macro: queue_remove_last + * Function: + * Remove and return the entry at the tail of + * the queue. + * Header: + * queue_remove_last(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_last(head, entry, type, field) \ +{ \ + queue_assert(head); \ + queue_assert(&(entry)->field); \ + queue_entry_t prev; \ + \ + (entry) = (type) ((head)->prev); \ + prev = (entry)->field.prev; \ + \ + if ((head) == prev) \ + (head)->next = (head); \ + else \ + ((type)(prev))->field.next = (head); \ + (head)->prev = prev; \ +} + +/* + * Macro: queue_assign + */ +#define queue_assign(to, from, type, field) \ +{ \ + queue_assert(&(to)->field); \ + queue_assert(&(from)->field); \ + ((type)((from)->prev))->field.next = (to); \ + ((type)((from)->next))->field.prev = (to); \ + *to = *from; \ +} + +/* + * Macro: queue_iterate + * Function: + * iterate over each item in the queue. + * Generates a 'for' loop, setting elt to + * each item in turn (by reference). + * Header: + * queue_iterate(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_iterate(head, elt, type, field) \ + for ((elt) = (type) queue_first(head); \ + !queue_end((head), (queue_entry_t)(elt)); \ + (elt) = (type) queue_next(&(elt)->field)) + + + +/*----------------------------------------------------------------*/ +/* + * Define macros for queues with locks. + */ +struct mpqueue_head { + struct queue_entry head; /* header for queue */ + struct slock lock; /* lock for queue */ +}; + +typedef struct mpqueue_head mpqueue_head_t; + +#define round_mpq(size) (size) + +#define mpqueue_init(q) \ + { \ + queue_init(&(q)->head); \ + simple_lock_init(&(q)->lock); \ + } + +#define mpenqueue_tail(q, elt) \ + simple_lock(&(q)->lock); \ + enqueue_tail(&(q)->head, elt); \ + simple_unlock(&(q)->lock); + +#define mpdequeue_head(q, elt) \ + simple_lock(&(q)->lock); \ + if (queue_empty(&(q)->head)) \ + *(elt) = 0; \ + else \ + *(elt) = dequeue_head(&(q)->head); \ + simple_unlock(&(q)->lock); + +/* + * Old queue stuff, will go away soon. + */ + +#endif /* _KERN_QUEUE_H_ */ diff --git a/kern/rbtree.c b/kern/rbtree.c new file mode 100644 index 0000000..0f5eb9a --- /dev/null +++ b/kern/rbtree.c @@ -0,0 +1,483 @@ +/* + * Copyright (c) 2010, 2012 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <kern/assert.h> +#include <kern/rbtree.h> +#include <kern/rbtree_i.h> +#include <sys/types.h> + +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/* + * Return the index of a node in the children array of its parent. + * + * The parent parameter must not be null, and must be the parent of the + * given node. + */ +static inline int rbtree_index(const struct rbtree_node *node, + const struct rbtree_node *parent) +{ + assert(parent != NULL); + assert((node == NULL) || (rbtree_parent(node) == parent)); + + if (parent->children[RBTREE_LEFT] == node) + return RBTREE_LEFT; + + assert(parent->children[RBTREE_RIGHT] == node); + + return RBTREE_RIGHT; +} + +/* + * Return the color of a node. + */ +static inline int rbtree_color(const struct rbtree_node *node) +{ + return node->parent & RBTREE_COLOR_MASK; +} + +/* + * Return true if the node is red. + */ +static inline int rbtree_is_red(const struct rbtree_node *node) +{ + return rbtree_color(node) == RBTREE_COLOR_RED; +} + +/* + * Return true if the node is black. + */ +static inline int rbtree_is_black(const struct rbtree_node *node) +{ + return rbtree_color(node) == RBTREE_COLOR_BLACK; +} + +/* + * Set the parent of a node, retaining its current color. + */ +static inline void rbtree_set_parent(struct rbtree_node *node, + struct rbtree_node *parent) +{ + assert(rbtree_check_alignment(node)); + assert(rbtree_check_alignment(parent)); + + node->parent = (unsigned long)parent | (node->parent & RBTREE_COLOR_MASK); +} + +/* + * Set the color of a node, retaining its current parent. + */ +static inline void rbtree_set_color(struct rbtree_node *node, int color) +{ + assert((color & ~RBTREE_COLOR_MASK) == 0); + node->parent = (node->parent & RBTREE_PARENT_MASK) | color; +} + +/* + * Set the color of a node to red, retaining its current parent. + */ +static inline void rbtree_set_red(struct rbtree_node *node) +{ + rbtree_set_color(node, RBTREE_COLOR_RED); +} + +/* + * Set the color of a node to black, retaining its current parent. + */ +static inline void rbtree_set_black(struct rbtree_node *node) +{ + rbtree_set_color(node, RBTREE_COLOR_BLACK); +} + +/* + * Perform a tree rotation, rooted at the given node. + * + * The direction parameter defines the rotation direction and is either + * RBTREE_LEFT or RBTREE_RIGHT. + */ +static void rbtree_rotate(struct rbtree *tree, struct rbtree_node *node, + int direction) +{ + struct rbtree_node *parent, *rnode; + int left, right; + + left = direction; + right = 1 - left; + parent = rbtree_parent(node); + rnode = node->children[right]; + + node->children[right] = rnode->children[left]; + + if (rnode->children[left] != NULL) + rbtree_set_parent(rnode->children[left], node); + + rnode->children[left] = node; + rbtree_set_parent(rnode, parent); + + if (unlikely(parent == NULL)) + tree->root = rnode; + else + parent->children[rbtree_index(node, parent)] = rnode; + + rbtree_set_parent(node, rnode); +} + +void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, + int index, struct rbtree_node *node) +{ + struct rbtree_node *grand_parent, *uncle, *tmp; + int left, right; + + assert(rbtree_check_alignment(parent)); + assert(rbtree_check_alignment(node)); + + node->parent = (unsigned long)parent | RBTREE_COLOR_RED; + node->children[RBTREE_LEFT] = NULL; + node->children[RBTREE_RIGHT] = NULL; + + if (unlikely(parent == NULL)) + tree->root = node; + else + parent->children[index] = node; + + for (;;) { + if (parent == NULL) { + rbtree_set_black(node); + break; + } + + if (rbtree_is_black(parent)) + break; + + grand_parent = rbtree_parent(parent); + assert(grand_parent != NULL); + + left = rbtree_index(parent, grand_parent); + right = 1 - left; + + uncle = grand_parent->children[right]; + + /* + * Uncle is red. Flip colors and repeat at grand parent. + */ + if ((uncle != NULL) && rbtree_is_red(uncle)) { + rbtree_set_black(uncle); + rbtree_set_black(parent); + rbtree_set_red(grand_parent); + node = grand_parent; + parent = rbtree_parent(node); + continue; + } + + /* + * Node is the right child of its parent. Rotate left at parent. + */ + if (parent->children[right] == node) { + rbtree_rotate(tree, parent, left); + tmp = node; + node = parent; + parent = tmp; + } + + /* + * Node is the left child of its parent. Handle colors, rotate right + * at grand parent, and leave. + */ + rbtree_set_black(parent); + rbtree_set_red(grand_parent); + rbtree_rotate(tree, grand_parent, right); + break; + } + + assert(rbtree_is_black(tree->root)); +} + +void rbtree_remove(struct rbtree *tree, struct rbtree_node *node) +{ + struct rbtree_node *child, *parent, *brother; + int color, left, right; + + if (node->children[RBTREE_LEFT] == NULL) + child = node->children[RBTREE_RIGHT]; + else if (node->children[RBTREE_RIGHT] == NULL) + child = node->children[RBTREE_LEFT]; + else { + struct rbtree_node *successor; + + /* + * Two-children case: replace the node with its successor. + */ + + successor = node->children[RBTREE_RIGHT]; + + while (successor->children[RBTREE_LEFT] != NULL) + successor = successor->children[RBTREE_LEFT]; + + color = rbtree_color(successor); + child = successor->children[RBTREE_RIGHT]; + parent = rbtree_parent(node); + + if (unlikely(parent == NULL)) + tree->root = successor; + else + parent->children[rbtree_index(node, parent)] = successor; + + parent = rbtree_parent(successor); + + /* + * Set parent directly to keep the original color. + */ + successor->parent = node->parent; + successor->children[RBTREE_LEFT] = node->children[RBTREE_LEFT]; + rbtree_set_parent(successor->children[RBTREE_LEFT], successor); + + if (node == parent) + parent = successor; + else { + successor->children[RBTREE_RIGHT] = node->children[RBTREE_RIGHT]; + rbtree_set_parent(successor->children[RBTREE_RIGHT], successor); + parent->children[RBTREE_LEFT] = child; + + if (child != NULL) + rbtree_set_parent(child, parent); + } + + goto update_color; + } + + /* + * Node has at most one child. + */ + + color = rbtree_color(node); + parent = rbtree_parent(node); + + if (child != NULL) + rbtree_set_parent(child, parent); + + if (unlikely(parent == NULL)) + tree->root = child; + else + parent->children[rbtree_index(node, parent)] = child; + + /* + * The node has been removed, update the colors. The child pointer can + * be null, in which case it is considered a black leaf. + */ +update_color: + if (color == RBTREE_COLOR_RED) + return; + + for (;;) { + if ((child != NULL) && rbtree_is_red(child)) { + rbtree_set_black(child); + break; + } + + if (parent == NULL) + break; + + left = rbtree_index(child, parent); + right = 1 - left; + + brother = parent->children[right]; + + /* + * Brother is red. Recolor and rotate left at parent so that brother + * becomes black. + */ + if (rbtree_is_red(brother)) { + rbtree_set_black(brother); + rbtree_set_red(parent); + rbtree_rotate(tree, parent, left); + brother = parent->children[right]; + } + + /* + * Brother has no red child. Recolor and repeat at parent. + */ + if (((brother->children[RBTREE_LEFT] == NULL) + || rbtree_is_black(brother->children[RBTREE_LEFT])) + && ((brother->children[RBTREE_RIGHT] == NULL) + || rbtree_is_black(brother->children[RBTREE_RIGHT]))) { + rbtree_set_red(brother); + child = parent; + parent = rbtree_parent(child); + continue; + } + + /* + * Brother's right child is black. Recolor and rotate right at brother. + */ + if ((brother->children[right] == NULL) + || rbtree_is_black(brother->children[right])) { + rbtree_set_black(brother->children[left]); + rbtree_set_red(brother); + rbtree_rotate(tree, brother, right); + brother = parent->children[right]; + } + + /* + * Brother's left child is black. Exchange parent and brother colors + * (we already know brother is black), set brother's right child black, + * rotate left at parent and leave. + */ + rbtree_set_color(brother, rbtree_color(parent)); + rbtree_set_black(parent); + rbtree_set_black(brother->children[right]); + rbtree_rotate(tree, parent, left); + break; + } + + assert((tree->root == NULL) || rbtree_is_black(tree->root)); +} + +struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index, + int direction) +{ + assert(rbtree_check_index(direction)); + + if (parent == NULL) + return NULL; + + assert(rbtree_check_index(index)); + + if (index != direction) + return parent; + + return rbtree_walk(parent, direction); +} + +struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction) +{ + struct rbtree_node *prev, *cur; + + assert(rbtree_check_index(direction)); + + prev = NULL; + + for (cur = tree->root; cur != NULL; cur = cur->children[direction]) + prev = cur; + + return prev; +} + +struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction) +{ + int left, right; + + assert(rbtree_check_index(direction)); + + left = direction; + right = 1 - left; + + if (node == NULL) + return NULL; + + if (node->children[left] != NULL) { + node = node->children[left]; + + while (node->children[right] != NULL) + node = node->children[right]; + } else { + struct rbtree_node *parent; + int index; + + for (;;) { + parent = rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = rbtree_index(node, parent); + node = parent; + + if (index == right) + break; + } + } + + return node; +} + +/* + * Return the left-most deepest child node of the given node. + */ +static struct rbtree_node * rbtree_find_deepest(struct rbtree_node *node) +{ + struct rbtree_node *parent; + + assert(node != NULL); + + for (;;) { + parent = node; + node = node->children[RBTREE_LEFT]; + + if (node == NULL) { + node = parent->children[RBTREE_RIGHT]; + + if (node == NULL) + return parent; + } + } +} + +struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree) +{ + struct rbtree_node *node; + + node = tree->root; + + if (node == NULL) + return NULL; + + return rbtree_find_deepest(node); +} + +struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node) +{ + struct rbtree_node *parent; + int index; + + if (node == NULL) + return NULL; + + assert(node->children[RBTREE_LEFT] == NULL); + assert(node->children[RBTREE_RIGHT] == NULL); + + parent = rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = rbtree_index(node, parent); + parent->children[index] = NULL; + node = parent->children[RBTREE_RIGHT]; + + if (node == NULL) + return parent; + + return rbtree_find_deepest(node); +} diff --git a/kern/rbtree.h b/kern/rbtree.h new file mode 100644 index 0000000..f885fe7 --- /dev/null +++ b/kern/rbtree.h @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Red-black tree. + */ + +#ifndef _KERN_RBTREE_H +#define _KERN_RBTREE_H + +#include <stddef.h> +#include <kern/assert.h> +#include <kern/macros.h> +#include <sys/types.h> + +/* + * Indexes of the left and right nodes in the children array of a node. + */ +#define RBTREE_LEFT 0 +#define RBTREE_RIGHT 1 + +/* + * Red-black node. + */ +struct rbtree_node; + +/* + * Red-black tree. + */ +struct rbtree; + +/* + * Static tree initializer. + */ +#define RBTREE_INITIALIZER { NULL } + +#include "rbtree_i.h" + +/* + * Initialize a tree. + */ +static inline void rbtree_init(struct rbtree *tree) +{ + tree->root = NULL; +} + +/* + * Initialize a node. + * + * A node is in no tree when its parent points to itself. + */ +static inline void rbtree_node_init(struct rbtree_node *node) +{ + assert(rbtree_check_alignment(node)); + + node->parent = (unsigned long)node | RBTREE_COLOR_RED; + node->children[RBTREE_LEFT] = NULL; + node->children[RBTREE_RIGHT] = NULL; +} + +/* + * Return true if node is in no tree. + */ +static inline int rbtree_node_unlinked(const struct rbtree_node *node) +{ + return rbtree_parent(node) == node; +} + +/* + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define rbtree_entry(node, type, member) structof(node, type, member) + +/* + * Return true if tree is empty. + */ +static inline int rbtree_empty(const struct rbtree *tree) +{ + return tree->root == NULL; +} + +/* + * Look up a node in a tree. + * + * Note that implementing the lookup algorithm as a macro gives two benefits: + * First, it avoids the overhead of a callback function. Next, the type of the + * cmp_fn parameter isn't rigid. The only guarantee offered by this + * implementation is that the key parameter is the first parameter given to + * cmp_fn. This way, users can pass only the value they need for comparison + * instead of e.g. allocating a full structure on the stack. + * + * See rbtree_insert(). + */ +#define rbtree_lookup(tree, key, cmp_fn) \ +MACRO_BEGIN \ + struct rbtree_node *___cur; \ + int ___diff; \ + \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___cur = ___cur->children[rbtree_d2i(___diff)]; \ + } \ + \ + ___cur; \ +MACRO_END + +/* + * Look up a node or one of its nearest nodes in a tree. + * + * This macro essentially acts as rbtree_lookup() but if no entry matched + * the key, an additional step is performed to obtain the next or previous + * node, depending on the direction (left or right). + * + * The constraints that apply to the key parameter are the same as for + * rbtree_lookup(). + */ +#define rbtree_lookup_nearest(tree, key, cmp_fn, dir) \ +MACRO_BEGIN \ + struct rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = -1; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___prev = ___cur; \ + ___index = rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + if (___cur == NULL) \ + ___cur = rbtree_nearest(___prev, ___index, dir); \ + \ + ___cur; \ +MACRO_END + +/* + * Insert a node in a tree. + * + * This macro performs a standard lookup to obtain the insertion point of + * the given node in the tree (it is assumed that the inserted node never + * compares equal to any other entry in the tree) and links the node. It + * then checks red-black rules violations, and rebalances the tree if + * necessary. + * + * Unlike rbtree_lookup(), the cmp_fn parameter must compare two complete + * entries, so it is suggested to use two different comparison inline + * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no + * guarantee about the order of the nodes given to the comparison function. + * + * See rbtree_lookup(). + */ +#define rbtree_insert(tree, node, cmp_fn) \ +MACRO_BEGIN \ + struct rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = -1; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(node, ___cur); \ + assert(___diff != 0); \ + ___prev = ___cur; \ + ___index = rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + rbtree_insert_rebalance(tree, ___prev, ___index, node); \ +MACRO_END + +/* + * Look up a node/slot pair in a tree. + * + * This macro essentially acts as rbtree_lookup() but in addition to a node, + * it also returns a slot, which identifies an insertion point in the tree. + * If the returned node is null, the slot can be used by rbtree_insert_slot() + * to insert without the overhead of an additional lookup. The slot is a + * simple unsigned long integer. + * + * The constraints that apply to the key parameter are the same as for + * rbtree_lookup(). + */ +#define rbtree_lookup_slot(tree, key, cmp_fn, slot) \ +MACRO_BEGIN \ + struct rbtree_node *___cur, *___prev; \ + int ___diff, ___index; \ + \ + ___prev = NULL; \ + ___index = 0; \ + ___cur = (tree)->root; \ + \ + while (___cur != NULL) { \ + ___diff = cmp_fn(key, ___cur); \ + \ + if (___diff == 0) \ + break; \ + \ + ___prev = ___cur; \ + ___index = rbtree_d2i(___diff); \ + ___cur = ___cur->children[___index]; \ + } \ + \ + (slot) = rbtree_slot(___prev, ___index); \ + ___cur; \ +MACRO_END + +/* + * Insert a node at an insertion point in a tree. + * + * This macro essentially acts as rbtree_insert() except that it doesn't + * obtain the insertion point with a standard lookup. The insertion point + * is obtained by calling rbtree_lookup_slot(). In addition, the new node + * must not compare equal to an existing node in the tree (i.e. the slot + * must denote a null node). + */ +static inline void +rbtree_insert_slot(struct rbtree *tree, unsigned long slot, + struct rbtree_node *node) +{ + struct rbtree_node *parent; + int index; + + parent = rbtree_slot_parent(slot); + index = rbtree_slot_index(slot); + rbtree_insert_rebalance(tree, parent, index, node); +} + +/* + * Remove a node from a tree. + * + * After completion, the node is stale. + */ +void rbtree_remove(struct rbtree *tree, struct rbtree_node *node); + +/* + * Return the first node of a tree. + */ +#define rbtree_first(tree) rbtree_firstlast(tree, RBTREE_LEFT) + +/* + * Return the last node of a tree. + */ +#define rbtree_last(tree) rbtree_firstlast(tree, RBTREE_RIGHT) + +/* + * Return the node previous to the given node. + */ +#define rbtree_prev(node) rbtree_walk(node, RBTREE_LEFT) + +/* + * Return the node next to the given node. + */ +#define rbtree_next(node) rbtree_walk(node, RBTREE_RIGHT) + +/* + * Forge a loop to process all nodes of a tree, removing them when visited. + * + * This macro can only be used to destroy a tree, so that the resources used + * by the entries can be released by the user. It basically removes all nodes + * without doing any color checking. + * + * After completion, all nodes and the tree root member are stale. + */ +#define rbtree_for_each_remove(tree, node, tmp) \ +for (node = rbtree_postwalk_deepest(tree), \ + tmp = rbtree_postwalk_unlink(node); \ + node != NULL; \ + node = tmp, tmp = rbtree_postwalk_unlink(node)) + +#endif /* _KERN_RBTREE_H */ diff --git a/kern/rbtree_i.h b/kern/rbtree_i.h new file mode 100644 index 0000000..69dfb9d --- /dev/null +++ b/kern/rbtree_i.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _KERN_RBTREE_I_H +#define _KERN_RBTREE_I_H + +#include <kern/assert.h> + +/* + * Red-black node structure. + * + * To reduce the number of branches and the instruction cache footprint, + * the left and right child pointers are stored in an array, and the symmetry + * of most tree operations is exploited by using left/right variables when + * referring to children. + * + * In addition, this implementation assumes that all nodes are 4-byte aligned, + * so that the least significant bit of the parent member can be used to store + * the color of the node. This is true for all modern 32 and 64 bits + * architectures, as long as the nodes aren't embedded in structures with + * special alignment constraints such as member packing. + */ +struct rbtree_node { + unsigned long parent; + struct rbtree_node *children[2]; +}; + +/* + * Red-black tree structure. + */ +struct rbtree { + struct rbtree_node *root; +}; + +/* + * Masks applied on the parent member of a node to obtain either the + * color or the parent address. + */ +#define RBTREE_COLOR_MASK 0x1UL +#define RBTREE_PARENT_MASK (~0x3UL) + +/* + * Node colors. + */ +#define RBTREE_COLOR_RED 0 +#define RBTREE_COLOR_BLACK 1 + +/* + * Masks applied on slots to obtain either the child index or the parent + * address. + */ +#define RBTREE_SLOT_INDEX_MASK 0x1UL +#define RBTREE_SLOT_PARENT_MASK (~RBTREE_SLOT_INDEX_MASK) + +/* + * Return true if the given pointer is suitably aligned. + */ +static inline int rbtree_check_alignment(const struct rbtree_node *node) +{ + return ((unsigned long)node & (~RBTREE_PARENT_MASK)) == 0; +} + +/* + * Return true if the given index is a valid child index. + */ +static inline int rbtree_check_index(int index) +{ + return index == (index & 1); +} + +/* + * Convert the result of a comparison into an index in the children array + * (0 or 1). + * + * This function is mostly used when looking up a node. + */ +static inline int rbtree_d2i(int diff) +{ + return !(diff <= 0); +} + +/* + * Return the parent of a node. + */ +static inline struct rbtree_node * rbtree_parent(const struct rbtree_node *node) +{ + return (struct rbtree_node *)(node->parent & RBTREE_PARENT_MASK); +} + +/* + * Translate an insertion point into a slot. + */ +static inline unsigned long rbtree_slot(struct rbtree_node *parent, int index) +{ + assert(rbtree_check_alignment(parent)); + assert(rbtree_check_index(index)); + return (unsigned long)parent | index; +} + +/* + * Extract the parent address from a slot. + */ +static inline struct rbtree_node * rbtree_slot_parent(unsigned long slot) +{ + return (struct rbtree_node *)(slot & RBTREE_SLOT_PARENT_MASK); +} + +/* + * Extract the index from a slot. + */ +static inline int rbtree_slot_index(unsigned long slot) +{ + return slot & RBTREE_SLOT_INDEX_MASK; +} + +/* + * Insert a node in a tree, rebalancing it if necessary. + * + * The index parameter is the index in the children array of the parent where + * the new node is to be inserted. It is ignored if the parent is null. + * + * This function is intended to be used by the rbtree_insert() macro only. + */ +void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, + int index, struct rbtree_node *node); + +/* + * Return the previous or next node relative to a location in a tree. + * + * The parent and index parameters define the location, which can be empty. + * The direction parameter is either RBTREE_LEFT (to obtain the previous + * node) or RBTREE_RIGHT (to obtain the next one). + */ +struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index, + int direction); + +/* + * Return the first or last node of a tree. + * + * The direction parameter is either RBTREE_LEFT (to obtain the first node) + * or RBTREE_RIGHT (to obtain the last one). + */ +struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction); + +/* + * Return the node next to, or previous to the given node. + * + * The direction parameter is either RBTREE_LEFT (to obtain the previous node) + * or RBTREE_RIGHT (to obtain the next one). + */ +struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction); + +/* + * Return the left-most deepest node of a tree, which is the starting point of + * the postorder traversal performed by rbtree_for_each_remove(). + */ +struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree); + +/* + * Unlink a node from its tree and return the next (right) node in postorder. + */ +struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node); + +#endif /* _KERN_RBTREE_I_H */ diff --git a/kern/rdxtree.c b/kern/rdxtree.c new file mode 100644 index 0000000..a23d6e7 --- /dev/null +++ b/kern/rdxtree.c @@ -0,0 +1,830 @@ +/* + * Copyright (c) 2011-2015 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Upstream site with license notes : + * http://git.sceen.net/rbraun/librbraun.git/ + */ + +#include <kern/assert.h> +#include <kern/slab.h> +#include <mach/kern_return.h> +#include <stddef.h> +#include <string.h> + +#include "macros.h" +#include "rdxtree.h" +#include "rdxtree_i.h" + +/* XXX */ +#define CHAR_BIT 8U +#define ERR_SUCCESS KERN_SUCCESS +#define ERR_BUSY KERN_INVALID_ARGUMENT +#define ERR_NOMEM KERN_RESOURCE_SHORTAGE + +/* + * Mask applied on an entry to obtain its address. + */ +#define RDXTREE_ENTRY_ADDR_MASK (~0x3UL) + +/* + * Global properties used to shape radix trees. + */ +#define RDXTREE_RADIX 6 +#define RDXTREE_RADIX_SIZE (1UL << RDXTREE_RADIX) +#define RDXTREE_RADIX_MASK (RDXTREE_RADIX_SIZE - 1) + +#if RDXTREE_RADIX < 6 +typedef unsigned long rdxtree_bm_t; +#define rdxtree_ffs(x) __builtin_ffsl(x) +#elif RDXTREE_RADIX == 6 /* RDXTREE_RADIX < 6 */ +typedef unsigned long long rdxtree_bm_t; +#define rdxtree_ffs(x) __builtin_ffsll(x) +#else /* RDXTREE_RADIX < 6 */ +#error "radix too high" +#endif /* RDXTREE_RADIX < 6 */ + +/* + * Allocation bitmap size in bits. + */ +#define RDXTREE_BM_SIZE (sizeof(rdxtree_bm_t) * CHAR_BIT) + +/* + * Empty/full allocation bitmap words. + */ +#define RDXTREE_BM_EMPTY ((rdxtree_bm_t)0) +#define RDXTREE_BM_FULL \ + ((~(rdxtree_bm_t)0) >> (RDXTREE_BM_SIZE - RDXTREE_RADIX_SIZE)) + +/* + * These macros can be replaced by actual functions in an environment + * that provides lockless synchronization such as RCU. + */ +#define llsync_assign_ptr(ptr, value) ((ptr) = (value)) +#define llsync_read_ptr(ptr) (ptr) + +/* + * Radix tree node. + * + * The height of a tree is the number of nodes to traverse until stored + * pointers are reached. A height of 0 means the entries of a node (or the + * tree root) directly point to stored pointers. + * + * The index is valid if and only if the parent isn't NULL. + * + * Concerning the allocation bitmap, a bit is set when the node it denotes, + * or one of its children, can be used to allocate an entry. Conversely, a bit + * is clear when the matching node and all of its children have no free entry. + * + * In order to support safe lockless lookups, in particular during a resize, + * each node includes the height of its subtree, which is invariant during + * the entire node lifetime. Since the tree height does vary, it can't be + * used to determine whether the tree root is a node or a stored pointer. + * This implementation assumes that all nodes and stored pointers are at least + * 4-byte aligned, and uses the least significant bit of entries to indicate + * the pointer type. This bit is set for internal nodes, and clear for stored + * pointers so that they can be accessed from slots without conversion. + */ +struct rdxtree_node { + struct rdxtree_node *parent; + unsigned int index; + unsigned int height; + unsigned int nr_entries; + rdxtree_bm_t alloc_bm; + void *entries[RDXTREE_RADIX_SIZE]; +}; + +/* + * We allocate nodes using the slab allocator. + */ +static struct kmem_cache rdxtree_node_cache; + +void +rdxtree_cache_init(void) +{ + kmem_cache_init(&rdxtree_node_cache, "rdxtree_node", + sizeof(struct rdxtree_node), 0, NULL, 0); +} + +#ifdef RDXTREE_ENABLE_NODE_CREATION_FAILURES +unsigned int rdxtree_fail_node_creation_threshold; +unsigned int rdxtree_nr_node_creations; +#endif /* RDXTREE_ENABLE_NODE_CREATION_FAILURES */ + +static inline int +rdxtree_check_alignment(const void *ptr) +{ + return ((unsigned long)ptr & ~RDXTREE_ENTRY_ADDR_MASK) == 0; +} + +static inline void * +rdxtree_entry_addr(void *entry) +{ + return (void *)((unsigned long)entry & RDXTREE_ENTRY_ADDR_MASK); +} + +static inline int +rdxtree_entry_is_node(const void *entry) +{ + return ((unsigned long)entry & 1) != 0; +} + +static inline void * +rdxtree_node_to_entry(struct rdxtree_node *node) +{ + return (void *)((unsigned long)node | 1); +} + +static int +rdxtree_node_create(struct rdxtree_node **nodep, unsigned int height) +{ + struct rdxtree_node *node; + +#ifdef RDXTREE_ENABLE_NODE_CREATION_FAILURES + if (rdxtree_fail_node_creation_threshold != 0) { + rdxtree_nr_node_creations++; + + if (rdxtree_nr_node_creations == rdxtree_fail_node_creation_threshold) + return ERR_NOMEM; + } +#endif /* RDXTREE_ENABLE_NODE_CREATION_FAILURES */ + + node = (struct rdxtree_node *) kmem_cache_alloc(&rdxtree_node_cache); + + if (node == NULL) + return ERR_NOMEM; + + assert(rdxtree_check_alignment(node)); + node->parent = NULL; + node->height = height; + node->nr_entries = 0; + node->alloc_bm = RDXTREE_BM_FULL; + memset(node->entries, 0, sizeof(node->entries)); + *nodep = node; + return 0; +} + +static void +rdxtree_node_schedule_destruction(struct rdxtree_node *node) +{ + /* + * This function is intended to use the appropriate interface to defer + * destruction until all read-side references are dropped in an + * environment that provides lockless synchronization. + * + * Otherwise, it simply "schedules" destruction immediately. + */ + kmem_cache_free(&rdxtree_node_cache, (vm_offset_t) node); +} + +static inline void +rdxtree_node_link(struct rdxtree_node *node, struct rdxtree_node *parent, + unsigned int index) +{ + node->parent = parent; + node->index = index; +} + +static inline void +rdxtree_node_unlink(struct rdxtree_node *node) +{ + assert(node->parent != NULL); + node->parent = NULL; +} + +static inline int +rdxtree_node_full(struct rdxtree_node *node) +{ + return (node->nr_entries == ARRAY_SIZE(node->entries)); +} + +static inline int +rdxtree_node_empty(struct rdxtree_node *node) +{ + return (node->nr_entries == 0); +} + +static inline void +rdxtree_node_insert(struct rdxtree_node *node, unsigned int index, + void *entry) +{ + assert(index < ARRAY_SIZE(node->entries)); + assert(node->entries[index] == NULL); + + node->nr_entries++; + llsync_assign_ptr(node->entries[index], entry); +} + +static inline void +rdxtree_node_insert_node(struct rdxtree_node *node, unsigned int index, + struct rdxtree_node *child) +{ + rdxtree_node_insert(node, index, rdxtree_node_to_entry(child)); +} + +static inline void +rdxtree_node_remove(struct rdxtree_node *node, unsigned int index) +{ + assert(index < ARRAY_SIZE(node->entries)); + assert(node->entries[index] != NULL); + + node->nr_entries--; + llsync_assign_ptr(node->entries[index], NULL); +} + +static inline void * +rdxtree_node_find(struct rdxtree_node *node, unsigned int *indexp) +{ + unsigned int index; + void *ptr; + + index = *indexp; + + while (index < ARRAY_SIZE(node->entries)) { + ptr = rdxtree_entry_addr(llsync_read_ptr(node->entries[index])); + + if (ptr != NULL) { + *indexp = index; + return ptr; + } + + index++; + } + + return NULL; +} + +static inline void +rdxtree_node_bm_set(struct rdxtree_node *node, unsigned int index) +{ + node->alloc_bm |= (rdxtree_bm_t)1 << index; +} + +static inline void +rdxtree_node_bm_clear(struct rdxtree_node *node, unsigned int index) +{ + node->alloc_bm &= ~((rdxtree_bm_t)1 << index); +} + +static inline int +rdxtree_node_bm_is_set(struct rdxtree_node *node, unsigned int index) +{ + return (node->alloc_bm & ((rdxtree_bm_t)1 << index)); +} + +static inline int +rdxtree_node_bm_empty(struct rdxtree_node *node) +{ + return (node->alloc_bm == RDXTREE_BM_EMPTY); +} + +static inline unsigned int +rdxtree_node_bm_first(struct rdxtree_node *node) +{ + return rdxtree_ffs(node->alloc_bm) - 1; +} + +static inline rdxtree_key_t +rdxtree_max_key(unsigned int height) +{ + size_t shift; + + shift = RDXTREE_RADIX * height; + + if (likely(shift < (sizeof(rdxtree_key_t) * CHAR_BIT))) + return ((rdxtree_key_t)1 << shift) - 1; + else + return ~((rdxtree_key_t)0); +} + +static void +rdxtree_shrink(struct rdxtree *tree) +{ + struct rdxtree_node *node; + void *entry; + + while (tree->height > 0) { + node = rdxtree_entry_addr(tree->root); + + if (node->nr_entries != 1) + break; + + entry = node->entries[0]; + + if (entry == NULL) + break; + + tree->height--; + + if (tree->height > 0) + rdxtree_node_unlink(rdxtree_entry_addr(entry)); + + llsync_assign_ptr(tree->root, entry); + rdxtree_node_schedule_destruction(node); + } +} + +static int +rdxtree_grow(struct rdxtree *tree, rdxtree_key_t key) +{ + struct rdxtree_node *root, *node; + unsigned int new_height; + int error; + + new_height = tree->height + 1; + + while (key > rdxtree_max_key(new_height)) + new_height++; + + if (tree->root == NULL) { + tree->height = new_height; + return ERR_SUCCESS; + } + + root = rdxtree_entry_addr(tree->root); + + do { + error = rdxtree_node_create(&node, tree->height); + + if (error) { + rdxtree_shrink(tree); + return error; + } + + if (tree->height == 0) + rdxtree_node_bm_clear(node, 0); + else { + rdxtree_node_link(root, node, 0); + + if (rdxtree_node_bm_empty(root)) + rdxtree_node_bm_clear(node, 0); + } + + rdxtree_node_insert(node, 0, tree->root); + tree->height++; + llsync_assign_ptr(tree->root, rdxtree_node_to_entry(node)); + root = node; + } while (new_height > tree->height); + + return ERR_SUCCESS; +} + +static void +rdxtree_cleanup(struct rdxtree *tree, struct rdxtree_node *node) +{ + struct rdxtree_node *prev; + + for (;;) { + if (likely(!rdxtree_node_empty(node))) { + if (unlikely(node->parent == NULL)) + rdxtree_shrink(tree); + + break; + } + + if (node->parent == NULL) { + tree->height = 0; + llsync_assign_ptr(tree->root, NULL); + rdxtree_node_schedule_destruction(node); + break; + } + + prev = node; + node = node->parent; + rdxtree_node_unlink(prev); + rdxtree_node_remove(node, prev->index); + rdxtree_node_schedule_destruction(prev); + } +} + +static void +rdxtree_insert_bm_clear(struct rdxtree_node *node, unsigned int index) +{ + for (;;) { + rdxtree_node_bm_clear(node, index); + + if (!rdxtree_node_full(node) || (node->parent == NULL)) + break; + + index = node->index; + node = node->parent; + } +} + +int +rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key, + void *ptr, void ***slotp) +{ + struct rdxtree_node *node, *prev; + unsigned int height, shift, index = index; + int error; + + assert(ptr != NULL); + assert(rdxtree_check_alignment(ptr)); + + if (unlikely(key > rdxtree_max_key(tree->height))) { + error = rdxtree_grow(tree, key); + + if (error) + return error; + } + + height = tree->height; + + if (unlikely(height == 0)) { + if (tree->root != NULL) + return ERR_BUSY; + + llsync_assign_ptr(tree->root, ptr); + + if (slotp != NULL) + *slotp = &tree->root; + + return ERR_SUCCESS; + } + + node = rdxtree_entry_addr(tree->root); + shift = (height - 1) * RDXTREE_RADIX; + prev = NULL; + + do { + if (node == NULL) { + error = rdxtree_node_create(&node, height - 1); + + if (error) { + if (prev == NULL) + tree->height = 0; + else + rdxtree_cleanup(tree, prev); + + return error; + } + + if (prev == NULL) + llsync_assign_ptr(tree->root, rdxtree_node_to_entry(node)); + else { + rdxtree_node_link(node, prev, index); + rdxtree_node_insert_node(prev, index, node); + } + } + + prev = node; + index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; + node = rdxtree_entry_addr(prev->entries[index]); + shift -= RDXTREE_RADIX; + height--; + } while (height > 0); + + if (unlikely(node != NULL)) + return ERR_BUSY; + + rdxtree_node_insert(prev, index, ptr); + rdxtree_insert_bm_clear(prev, index); + + if (slotp != NULL) + *slotp = &prev->entries[index]; + + return ERR_SUCCESS; +} + +int +rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr, + rdxtree_key_t *keyp, void ***slotp) +{ + struct rdxtree_node *node, *prev; + unsigned int height, shift, index = index; + rdxtree_key_t key; + int error; + + assert(ptr != NULL); + assert(rdxtree_check_alignment(ptr)); + + height = tree->height; + + if (unlikely(height == 0)) { + if (tree->root == NULL) { + llsync_assign_ptr(tree->root, ptr); + *keyp = 0; + + if (slotp != NULL) + *slotp = &tree->root; + + return ERR_SUCCESS; + } + + goto grow; + } + + node = rdxtree_entry_addr(tree->root); + key = 0; + shift = (height - 1) * RDXTREE_RADIX; + prev = NULL; + + do { + if (node == NULL) { + error = rdxtree_node_create(&node, height - 1); + + if (error) { + rdxtree_cleanup(tree, prev); + return error; + } + + rdxtree_node_link(node, prev, index); + rdxtree_node_insert_node(prev, index, node); + } + + prev = node; + index = rdxtree_node_bm_first(node); + + if (index == (unsigned int)-1) + goto grow; + + key |= (rdxtree_key_t)index << shift; + node = rdxtree_entry_addr(node->entries[index]); + shift -= RDXTREE_RADIX; + height--; + } while (height > 0); + + rdxtree_node_insert(prev, index, ptr); + rdxtree_insert_bm_clear(prev, index); + + if (slotp != NULL) + *slotp = &prev->entries[index]; + + goto out; + +grow: + key = rdxtree_max_key(height) + 1; + error = rdxtree_insert_common(tree, key, ptr, slotp); + + if (error) + return error; + +out: + *keyp = key; + return ERR_SUCCESS; +} + +static void +rdxtree_remove_bm_set(struct rdxtree_node *node, unsigned int index) +{ + do { + rdxtree_node_bm_set(node, index); + + if (node->parent == NULL) + break; + + index = node->index; + node = node->parent; + } while (!rdxtree_node_bm_is_set(node, index)); +} + +void * +rdxtree_remove(struct rdxtree *tree, rdxtree_key_t key) +{ + struct rdxtree_node *node, *prev; + unsigned int height, shift, index; + + height = tree->height; + + if (unlikely(key > rdxtree_max_key(height))) + return NULL; + + node = rdxtree_entry_addr(tree->root); + + if (unlikely(height == 0)) { + llsync_assign_ptr(tree->root, NULL); + return node; + } + + shift = (height - 1) * RDXTREE_RADIX; + + do { + if (node == NULL) + return NULL; + + prev = node; + index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; + node = rdxtree_entry_addr(node->entries[index]); + shift -= RDXTREE_RADIX; + height--; + } while (height > 0); + + if (node == NULL) + return NULL; + + rdxtree_node_remove(prev, index); + rdxtree_remove_bm_set(prev, index); + rdxtree_cleanup(tree, prev); + return node; +} + +void * +rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key, + int get_slot) +{ + struct rdxtree_node *node, *prev; + unsigned int height, shift, index; + void *entry; + + entry = llsync_read_ptr(tree->root); + + if (entry == NULL) { + node = NULL; + height = 0; + } else { + node = rdxtree_entry_addr(entry); + height = rdxtree_entry_is_node(entry) ? node->height + 1 : 0; + } + + if (key > rdxtree_max_key(height)) + return NULL; + + if (height == 0) { + if (node == NULL) + return NULL; + + return get_slot ? (void *)&tree->root : node; + } + + shift = (height - 1) * RDXTREE_RADIX; + + do { + if (node == NULL) + return NULL; + + prev = node; + index = (unsigned int)(key >> shift) & RDXTREE_RADIX_MASK; + entry = llsync_read_ptr(node->entries[index]); + node = rdxtree_entry_addr(entry); + shift -= RDXTREE_RADIX; + height--; + } while (height > 0); + + if (node == NULL) + return NULL; + + return get_slot ? (void *)&prev->entries[index] : node; +} + +void * +rdxtree_replace_slot(void **slot, void *ptr) +{ + void *old; + + assert(ptr != NULL); + assert(rdxtree_check_alignment(ptr)); + + old = *slot; + assert(old != NULL); + assert(rdxtree_check_alignment(old)); + llsync_assign_ptr(*slot, ptr); + return old; +} + +static void * +rdxtree_walk_next(struct rdxtree *tree, struct rdxtree_iter *iter) +{ + struct rdxtree_node *root, *node, *prev; + unsigned int height, shift, index, orig_index; + rdxtree_key_t key; + void *entry; + + entry = llsync_read_ptr(tree->root); + + if (entry == NULL) + return NULL; + + if (!rdxtree_entry_is_node(entry)) { + if (iter->key != (rdxtree_key_t)-1) + return NULL; + else { + iter->key = 0; + return rdxtree_entry_addr(entry); + } + } + + key = iter->key + 1; + + if ((key == 0) && (iter->node != NULL)) + return NULL; + + root = rdxtree_entry_addr(entry); + +restart: + node = root; + height = root->height + 1; + + if (key > rdxtree_max_key(height)) + return NULL; + + shift = (height - 1) * RDXTREE_RADIX; + + do { + prev = node; + index = (key >> shift) & RDXTREE_RADIX_MASK; + orig_index = index; + node = rdxtree_node_find(node, &index); + + if (node == NULL) { + shift += RDXTREE_RADIX; + key = ((key >> shift) + 1) << shift; + + if (key == 0) + return NULL; + + goto restart; + } + + if (orig_index != index) + key = ((key >> shift) + (index - orig_index)) << shift; + + shift -= RDXTREE_RADIX; + height--; + } while (height > 0); + + iter->node = prev; + iter->key = key; + return node; +} + +void * +rdxtree_walk(struct rdxtree *tree, struct rdxtree_iter *iter) +{ + unsigned int index, orig_index; + void *ptr; + + if (iter->node == NULL) + return rdxtree_walk_next(tree, iter); + + index = (iter->key + 1) & RDXTREE_RADIX_MASK; + + if (index != 0) { + orig_index = index; + ptr = rdxtree_node_find(iter->node, &index); + + if (ptr != NULL) { + iter->key += (index - orig_index) + 1; + return ptr; + } + } + + return rdxtree_walk_next(tree, iter); +} + +void +rdxtree_remove_all(struct rdxtree *tree) +{ + struct rdxtree_node *node, *parent; + struct rdxtree_iter iter; + + if (tree->height == 0) { + if (tree->root != NULL) + llsync_assign_ptr(tree->root, NULL); + + return; + } + + for (;;) { + rdxtree_iter_init(&iter); + rdxtree_walk_next(tree, &iter); + + if (iter.node == NULL) + break; + + node = iter.node; + parent = node->parent; + + if (parent == NULL) + rdxtree_init(tree); + else { + rdxtree_node_remove(parent, node->index); + rdxtree_remove_bm_set(parent, node->index); + rdxtree_cleanup(tree, parent); + node->parent = NULL; + } + + rdxtree_node_schedule_destruction(node); + } +} diff --git a/kern/rdxtree.h b/kern/rdxtree.h new file mode 100644 index 0000000..9892d56 --- /dev/null +++ b/kern/rdxtree.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2011-2015 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Radix tree. + * + * In addition to the standard insertion operation, this implementation + * can allocate keys for the caller at insertion time. + * + * Upstream site with license notes : + * http://git.sceen.net/rbraun/librbraun.git/ + */ + +#ifndef _RDXTREE_H +#define _RDXTREE_H + +#include <stddef.h> +#include <stdint.h> + +/* + * Initialize the node cache. + */ +void rdxtree_cache_init(void); + +/* + * This macro selects between 32 or 64-bits (the default) keys. + */ +#if 0 +#define RDXTREE_KEY_32 +#endif + +#ifdef RDXTREE_KEY_32 +typedef uint32_t rdxtree_key_t; +#else /* RDXTREE_KEY_32 */ +typedef uint64_t rdxtree_key_t; +#endif /* RDXTREE_KEY_32 */ + +/* + * Radix tree. + */ +struct rdxtree; + +/* + * Radix tree iterator. + */ +struct rdxtree_iter; + +/* + * Static tree initializer. + */ +#define RDXTREE_INITIALIZER { 0, NULL } + +#include "rdxtree_i.h" + +/* + * Initialize a tree. + */ +static inline void +rdxtree_init(struct rdxtree *tree) +{ + tree->height = 0; + tree->root = NULL; +} + +/* + * Insert a pointer in a tree. + * + * The ptr parameter must not be NULL. + */ +static inline int +rdxtree_insert(struct rdxtree *tree, rdxtree_key_t key, void *ptr) +{ + return rdxtree_insert_common(tree, key, ptr, NULL); +} + +/* + * Insert a pointer in a tree and obtain its slot. + * + * The ptr and slotp parameters must not be NULL. If successful, the slot of + * the newly inserted pointer is stored at the address pointed to by the slotp + * parameter. + */ +static inline int +rdxtree_insert_slot(struct rdxtree *tree, rdxtree_key_t key, + void *ptr, void ***slotp) +{ + return rdxtree_insert_common(tree, key, ptr, slotp); +} + +/* + * Insert a pointer in a tree, for which a new key is allocated. + * + * The ptr and keyp parameters must not be NULL. The newly allocated key is + * stored at the address pointed to by the keyp parameter. + */ +static inline int +rdxtree_insert_alloc(struct rdxtree *tree, void *ptr, rdxtree_key_t *keyp) +{ + return rdxtree_insert_alloc_common(tree, ptr, keyp, NULL); +} + +/* + * Insert a pointer in a tree, for which a new key is allocated, and obtain + * its slot. + * + * The ptr, keyp and slotp parameters must not be NULL. The newly allocated + * key is stored at the address pointed to by the keyp parameter while the + * slot of the inserted pointer is stored at the address pointed to by the + * slotp parameter. + */ +static inline int +rdxtree_insert_alloc_slot(struct rdxtree *tree, void *ptr, + rdxtree_key_t *keyp, void ***slotp) +{ + return rdxtree_insert_alloc_common(tree, ptr, keyp, slotp); +} + +/* + * Remove a pointer from a tree. + * + * The matching pointer is returned if successful, NULL otherwise. + */ +void * rdxtree_remove(struct rdxtree *tree, rdxtree_key_t key); + +/* + * Look up a pointer in a tree. + * + * The matching pointer is returned if successful, NULL otherwise. + */ +static inline void * +rdxtree_lookup(const struct rdxtree *tree, rdxtree_key_t key) +{ + return rdxtree_lookup_common(tree, key, 0); +} + +/* + * Look up a slot in a tree. + * + * A slot is a pointer to a stored pointer in a tree. It can be used as + * a placeholder for fast replacements to avoid multiple lookups on the same + * key. + * + * A slot for the matching pointer is returned if successful, NULL otherwise. + * + * See rdxtree_replace_slot(). + */ +static inline void ** +rdxtree_lookup_slot(const struct rdxtree *tree, rdxtree_key_t key) +{ + return rdxtree_lookup_common(tree, key, 1); +} + +/* + * Replace a pointer in a tree. + * + * The ptr parameter must not be NULL. The previous pointer is returned. + * + * See rdxtree_lookup_slot(). + */ +void * rdxtree_replace_slot(void **slot, void *ptr); + +/* + * Forge a loop to process all pointers of a tree. + */ +#define rdxtree_for_each(tree, iter, ptr) \ +for (rdxtree_iter_init(iter), ptr = rdxtree_walk(tree, iter); \ + ptr != NULL; \ + ptr = rdxtree_walk(tree, iter)) + +/* + * Return the key of the current pointer from an iterator. + */ +static inline rdxtree_key_t +rdxtree_iter_key(const struct rdxtree_iter *iter) +{ + return iter->key; +} + +/* + * Remove all pointers from a tree. + * + * The common way to destroy a tree and its pointers is to loop over all + * the pointers using rdxtree_for_each(), freeing them, then call this + * function. + */ +void rdxtree_remove_all(struct rdxtree *tree); + +#endif /* _RDXTREE_H */ diff --git a/kern/rdxtree_i.h b/kern/rdxtree_i.h new file mode 100644 index 0000000..d9a59bf --- /dev/null +++ b/kern/rdxtree_i.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011-2015 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Upstream site with license notes : + * http://git.sceen.net/rbraun/librbraun.git/ + */ + +#ifndef _RDXTREE_I_H +#define _RDXTREE_I_H + +/* + * Radix tree. + */ +struct rdxtree { + unsigned int height; + void *root; +}; + +/* + * Radix tree iterator. + * + * The node member refers to the node containing the current pointer, if any. + * The key member refers to the current pointer, and is valid if and only if + * rdxtree_walk() has been called at least once on the iterator. + */ +struct rdxtree_iter { + void *node; + rdxtree_key_t key; +}; + +/* + * Initialize an iterator. + */ +static inline void +rdxtree_iter_init(struct rdxtree_iter *iter) +{ + iter->node = NULL; + iter->key = (rdxtree_key_t)-1; +} + +int rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key, + void *ptr, void ***slotp); + +int rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr, + rdxtree_key_t *keyp, void ***slotp); + +void * rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key, + int get_slot); + +void * rdxtree_walk(struct rdxtree *tree, struct rdxtree_iter *iter); + +#endif /* _RDXTREE_I_H */ diff --git a/kern/refcount.h b/kern/refcount.h new file mode 100644 index 0000000..f32feb8 --- /dev/null +++ b/kern/refcount.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: refcount.h + * + * This defines the system-independent part of the atomic reference count data type. + * + */ + +#ifndef _KERN_REFCOUNT_H_ +#define _KERN_REFCOUNT_H_ + +#include <kern/macros.h> + +/* Unless the above include file specified otherwise, + use the system-independent (unoptimized) atomic reference counter. */ +#ifndef MACHINE_REFCOUNT + +#include <kern/lock.h> + +struct RefCount { + decl_simple_lock_data(,lock) /* lock for reference count */ + int ref_count; /* number of references */ +}; +typedef struct RefCount RefCount; + +#define refcount_init(refcount, refs) \ + MACRO_BEGIN \ + simple_lock_init(&(refcount)->lock); \ + ((refcount)->ref_count = (refs)); \ + MACRO_END + +#define refcount_take(refcount) \ + MACRO_BEGIN \ + simple_lock(&(refcount)->lock); \ + (refcount)->ref_count++; \ + simple_unlock(&(refcount)->lock); \ + MACRO_END + +#define refcount_drop(refcount, func) \ + MACRO_BEGIN \ + int new_value; \ + simple_lock(&(refcount)->lock); \ + new_value = --(refcount)->ref_count; \ + simple_unlock(&(refcount)->lock); \ + if (new_value == 0) { func; } \ + MACRO_END + +#endif /* MACHINE_REFCOUNT */ + +#endif /* _KERN_REFCOUNT_H_ */ diff --git a/kern/sched.h b/kern/sched.h new file mode 100644 index 0000000..d7e74d3 --- /dev/null +++ b/kern/sched.h @@ -0,0 +1,186 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * Header file for scheduler. + * + */ + +#ifndef _KERN_SCHED_H_ +#define _KERN_SCHED_H_ + +#include <kern/queue.h> +#include <kern/lock.h> +#include <kern/kern_types.h> +#include <kern/macros.h> + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif /* MACH_FIXPRI */ + +#if STAT_TIME + +/* + * Statistical timing uses microseconds as timer units. 17 bit shift + * yields priorities. PRI_SHIFT_2 isn't needed. + */ +#define PRI_SHIFT 17 + +#else /* STAT_TIME */ + +/* + * Otherwise machine provides shift(s) based on time units it uses. + */ +#include <machine/sched_param.h> + +#endif /* STAT_TIME */ +#define NRQS 64 /* 64 run queues per cpu */ + +struct run_queue { + queue_head_t runq[NRQS]; /* one for each priority */ + decl_simple_lock_data(, lock) /* one lock for all queues, + shall be taken at splsched + only */ + int low; /* low queue value */ + int count; /* count of threads runable */ +}; + +typedef struct run_queue *run_queue_t; +#define RUN_QUEUE_NULL ((run_queue_t) 0) + +/* Shall be taken at splsched only */ +#ifdef MACH_LDEBUG +#define runq_lock(rq) do { \ + assert_splsched(); \ + simple_lock_nocheck(&(rq)->lock); \ +} while (0) +#define runq_unlock(rq) do { \ + assert_splsched(); \ + simple_unlock_nocheck(&(rq)->lock); \ +} while (0) +#else +#define runq_lock(rq) simple_lock_nocheck(&(rq)->lock) +#define runq_unlock(rq) simple_unlock_nocheck(&(rq)->lock) +#endif + +#if MACH_FIXPRI +/* + * NOTE: For fixed priority threads, first_quantum indicates + * whether context switch at same priority is ok. For timeshareing + * it indicates whether preempt is ok. + */ + +#define csw_needed(thread, processor) ((thread)->state & TH_SUSP || \ + ((processor)->runq.count > 0) || \ + ((thread)->policy == POLICY_TIMESHARE && \ + (processor)->first_quantum == FALSE && \ + (processor)->processor_set->runq.count > 0 && \ + (processor)->processor_set->runq.low <= \ + (thread)->sched_pri) || \ + ((thread)->policy == POLICY_FIXEDPRI && \ + (processor)->processor_set->runq.count > 0 && \ + ((((processor)->first_quantum == FALSE) && \ + ((processor)->processor_set->runq.low <= \ + (thread)->sched_pri)) || \ + ((processor)->processor_set->runq.low < \ + (thread)->sched_pri)))) + +#else /* MACH_FIXPRI */ +#define csw_needed(thread, processor) ((thread)->state & TH_SUSP || \ + ((processor)->runq.count > 0) || \ + ((processor)->first_quantum == FALSE && \ + ((processor)->processor_set->runq.count > 0 && \ + (processor)->processor_set->runq.low <= \ + ((thread)->sched_pri)))) +#endif /* MACH_FIXPRI */ + +/* + * Scheduler routines. + */ + +extern struct run_queue *rem_runq(thread_t); +extern struct thread *choose_thread(processor_t); +extern queue_head_t action_queue; /* assign/shutdown queue */ +decl_simple_lock_data(extern,action_lock); + +extern int min_quantum; /* defines max context switch rate */ +#define MIN_QUANTUM (hz / 33) /* context switch 33 times/second */ + +/* + * Default base priorities for threads. + */ +#define BASEPRI_SYSTEM 6 +#define BASEPRI_USER 25 + +/* + * Macro to check for invalid priorities. + */ + +#define invalid_pri(pri) (((pri) < 0) || ((pri) >= NRQS)) + +/* + * Shift structures for holding update shifts. Actual computation + * is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the + * +/- is determined by the sign of shift 2. + */ +struct shift { + int shift1; + int shift2; +}; + +typedef struct shift *shift_t, shift_data_t; + +/* + * sched_tick increments once a second. Used to age priorities. + */ + +extern unsigned sched_tick; + +#define SCHED_SCALE 128 +#define SCHED_SHIFT 7 + +/* + * thread_timer_delta macro takes care of both thread timers. + */ + +#define thread_timer_delta(thread) \ +MACRO_BEGIN \ + unsigned delta; \ + \ + delta = 0; \ + TIMER_DELTA((thread)->system_timer, \ + (thread)->system_timer_save, delta); \ + TIMER_DELTA((thread)->user_timer, \ + (thread)->user_timer_save, delta); \ + (thread)->cpu_delta += delta; \ + (thread)->sched_delta += delta * \ + (thread)->processor_set->sched_load; \ +MACRO_END + +#endif /* _KERN_SCHED_H_ */ diff --git a/kern/sched_prim.c b/kern/sched_prim.c new file mode 100644 index 0000000..24f342f --- /dev/null +++ b/kern/sched_prim.c @@ -0,0 +1,2059 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched_prim.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Scheduling primitives + * + */ + +#include <kern/printf.h> +#include <mach/machine.h> +#include <machine/locore.h> +#include <machine/machspl.h> /* For def'n of splsched() */ +#include <machine/model_dep.h> +#include <kern/ast.h> +#include <kern/counters.h> +#include <kern/cpu_number.h> +#include <kern/debug.h> +#include <kern/lock.h> +#include <kern/mach_clock.h> +#include <kern/mach_factor.h> +#include <kern/macros.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/smp.h> +#include <kern/syscall_subr.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <vm/pmap.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif /* MACH_FIXPRI */ + +int min_quantum; /* defines max context switch rate */ + +unsigned sched_tick; + +thread_t sched_thread_id; + +timer_elt_data_t recompute_priorities_timer; + +/* + * State machine + * + * states are combinations of: + * R running + * W waiting (or on wait queue) + * S suspended (or will suspend) + * N non-interruptible + * + * init action + * assert_wait thread_block clear_wait suspend resume + * + * R RW, RWN R; setrun - RS - + * RS RWS, RWNS S; wake_active - - R + * RN RWN RN; setrun - RNS - + * RNS RWNS RNS; setrun - - RN + * + * RW W R RWS - + * RWN WN RN RWNS - + * RWS WS; wake_active RS - RW + * RWNS WNS RNS - RWN + * + * W R; setrun WS - + * WN RN; setrun WNS - + * WNS RNS; setrun - WN + * + * S - - R + * WS S - W + * + */ + +/* + * Waiting protocols and implementation: + * + * Each thread may be waiting for exactly one event; this event + * is set using assert_wait(). That thread may be awakened either + * by performing a thread_wakeup_prim() on its event, + * or by directly waking that thread up with clear_wait(). + * + * The implementation of wait events uses a hash table. Each + * bucket is queue of threads having the same hash function + * value; the chain for the queue (linked list) is the run queue + * field. [It is not possible to be waiting and runnable at the + * same time.] + * + * Locks on both the thread and on the hash buckets govern the + * wait event field and the queue chain field. Because wakeup + * operations only have the event as an argument, the event hash + * bucket must be locked before any thread. + * + * Scheduling operations may also occur at interrupt level; therefore, + * interrupts below splsched() must be prevented when holding + * thread or hash bucket locks. + * + * The wait event hash table declarations are as follows: + */ + +#define NUMQUEUES 1031 + +/* Shall be taken at splsched only */ +decl_simple_lock_data(static, wait_lock[NUMQUEUES]) /* Lock for... */ +queue_head_t wait_queue[NUMQUEUES]; + +#ifdef MACH_LDEBUG +#define waitq_lock(wl) do { \ + assert_splsched(); \ + simple_lock_nocheck(wl); \ +} while (0) +#define waitq_unlock(wl) do { \ + assert_splsched(); \ + simple_unlock_nocheck(wl); \ +} while (0) +#else +#define waitq_lock(wl) simple_lock_nocheck(wl) +#define waitq_unlock(wl) simple_unlock_nocheck(wl) +#endif + + +/* NOTE: we want a small positive integer out of this */ +#define wait_hash(event) \ + ((((long)(event) < 0) ? ~(long)(event) : (long)(event)) % NUMQUEUES) + +static void wait_queue_init(void) +{ + int i; + + for (i = 0; i < NUMQUEUES; i++) { + queue_init(&wait_queue[i]); + simple_lock_init(&wait_lock[i]); + } +} + +void sched_init(void) +{ + recompute_priorities_timer.fcn = recompute_priorities; + recompute_priorities_timer.param = NULL; + + min_quantum = MIN_QUANTUM; + wait_queue_init(); + pset_sys_bootstrap(); /* initialize processor mgmt. */ + queue_init(&action_queue); + simple_lock_init(&action_lock); + sched_tick = 0; + ast_init(); +} + +/* + * Thread timeout routine, called when timer expires. + * Called at splsoftclock. + */ +static void thread_timeout( + void *_thread) +{ + thread_t thread = _thread; + assert(thread->timer.set == TELT_UNSET); + + clear_wait(thread, THREAD_TIMED_OUT, FALSE); +} + +/* + * thread_set_timeout: + * + * Set a timer for the current thread, if the thread + * is ready to wait. Must be called between assert_wait() + * and thread_block(). + */ + +void thread_set_timeout( + int t) /* timeout interval in ticks */ +{ + thread_t thread = current_thread(); + spl_t s; + + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_WAIT) != 0) { + set_timeout(&thread->timer, t); + } + thread_unlock(thread); + splx(s); +} + +/* + * Set up thread timeout element when thread is created. + */ +void thread_timeout_setup( + thread_t thread) +{ + thread->timer.fcn = thread_timeout; + thread->timer.param = thread; + thread->depress_timer.fcn = (void (*)(void*))thread_depress_timeout; + thread->depress_timer.param = thread; +} + +/* + * assert_wait: + * + * Assert that the current thread is about to go to + * sleep until the specified event occurs. + */ +void assert_wait( + event_t event, + boolean_t interruptible) +{ + queue_t q; + int index; + thread_t thread; + decl_simple_lock_data( , *lock); + spl_t s; + + thread = current_thread(); + if (thread->wait_event != 0) { + panic("assert_wait: already asserted event %p\n", + thread->wait_event); + } + s = splsched(); + if (event != 0) { + index = wait_hash(event); + q = &wait_queue[index]; + lock = &wait_lock[index]; + waitq_lock(lock); + thread_lock(thread); + enqueue_tail(q, &(thread->links)); + thread->wait_event = event; + if (interruptible) + thread->state |= TH_WAIT; + else + thread->state |= TH_WAIT | TH_UNINT; + thread_unlock(thread); + waitq_unlock(lock); + } + else { + thread_lock(thread); + if (interruptible) + thread->state |= TH_WAIT; + else + thread->state |= TH_WAIT | TH_UNINT; + thread_unlock(thread); + } + splx(s); +} + +/* + * clear_wait: + * + * Clear the wait condition for the specified thread. Start the thread + * executing if that is appropriate. + * + * parameters: + * thread thread to awaken + * result Wakeup result the thread should see + * interrupt_only Don't wake up the thread if it isn't + * interruptible. + */ +void clear_wait( + thread_t thread, + int result, + boolean_t interrupt_only) +{ + int index; + queue_t q; + decl_simple_lock_data( , *lock); + event_t event; + spl_t s; + + s = splsched(); + thread_lock(thread); + if (interrupt_only && (thread->state & TH_UNINT)) { + /* + * can`t interrupt thread + */ + thread_unlock(thread); + splx(s); + return; + } + + event = thread->wait_event; + if (event != 0) { + thread_unlock(thread); + index = wait_hash(event); + q = &wait_queue[index]; + lock = &wait_lock[index]; + waitq_lock(lock); + /* + * If the thread is still waiting on that event, + * then remove it from the list. If it is waiting + * on a different event, or no event at all, then + * someone else did our job for us. + */ + thread_lock(thread); + if (thread->wait_event == event) { + remqueue(q, (queue_entry_t)thread); + thread->wait_event = 0; + event = 0; /* cause to run below */ + } + waitq_unlock(lock); + } + if (event == 0) { + int state = thread->state; + + reset_timeout_check(&thread->timer); + + switch (state & TH_SCHED_STATE) { + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = result; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state &~ TH_WAIT; + thread->wait_result = result; + break; + + default: + /* + * Not waiting. + */ + break; + } + } + thread_unlock(thread); + splx(s); +} + +#define state_panic(thread) \ + panic ("thread %p has unexpected state %x (%s%s%s%s%s%s%s%s)", \ + thread, thread->state, \ + thread->state & TH_WAIT ? "TH_WAIT|" : "", \ + thread->state & TH_SUSP ? "TH_SUSP|" : "", \ + thread->state & TH_RUN ? "TH_RUN|" : "", \ + thread->state & TH_UNINT ? "TH_UNINT|" : "", \ + thread->state & TH_HALTED ? "TH_HALTED|" : "", \ + thread->state & TH_IDLE ? "TH_IDLE|" : "", \ + thread->state & TH_SWAPPED ? "TH_SWAPPED|" : "", \ + thread->state & TH_SW_COMING_IN ? "TH_SW_COMING_IN|" : "") + +/* + * thread_wakeup_prim: + * + * Common routine for thread_wakeup, thread_wakeup_with_result, + * and thread_wakeup_one. + * + */ +boolean_t thread_wakeup_prim( + event_t event, + boolean_t one_thread, + int result) +{ + queue_t q; + int index; + boolean_t woke = FALSE; + thread_t thread, next_th; + decl_simple_lock_data( , *lock); + spl_t s; + int state; + + index = wait_hash(event); + q = &wait_queue[index]; + s = splsched(); + lock = &wait_lock[index]; + waitq_lock(lock); + thread = (thread_t) queue_first(q); + while (!queue_end(q, (queue_entry_t)thread)) { + next_th = (thread_t) queue_next((queue_t) thread); + + if (thread->wait_event == event) { + thread_lock(thread); + remqueue(q, (queue_entry_t) thread); + thread->wait_event = 0; + reset_timeout_check(&thread->timer); + + state = thread->state; + switch (state & TH_SCHED_STATE) { + + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = result; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state &~ TH_WAIT; + thread->wait_result = result; + break; + + default: + state_panic(thread); + break; + } + thread_unlock(thread); + woke = TRUE; + if (one_thread) + break; + } + thread = next_th; + } + waitq_unlock(lock); + splx(s); + return (woke); +} + +/* + * thread_sleep: + * + * Cause the current thread to wait until the specified event + * occurs. The specified lock is unlocked before releasing + * the cpu. (This is a convenient way to sleep without manually + * calling assert_wait). + * + * Note: if the event may be woken from an interrupt handler, this must be + * called at an spl level that prevents such interrupts. + */ +void thread_sleep( + event_t event, + simple_lock_t lock, + boolean_t interruptible) +{ + assert_wait(event, interruptible); /* assert event */ + simple_unlock(lock); /* release the lock */ + thread_block(thread_no_continuation); /* block ourselves */ +} + +/* + * thread_bind: + * + * Force a thread to execute on the specified processor. + * If the thread is currently executing, it may wait until its + * time slice is up before switching onto the specified processor. + * + * A processor of PROCESSOR_NULL causes the thread to be unbound. + * xxx - DO NOT export this to users. + */ +void thread_bind( + thread_t thread, + processor_t processor) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->bound_processor = processor; + thread_unlock(thread); + (void) splx(s); +} + +/* + * Select a thread for this processor (the current processor) to run. + * May select the current thread. + * Assumes splsched. + */ + +static thread_t thread_select( + processor_t myprocessor) +{ + thread_t thread; + + myprocessor->first_quantum = TRUE; + /* + * Check for obvious simple case; local runq is + * empty and global runq has entry at hint. + */ + if (myprocessor->runq.count > 0) { + thread = choose_thread(myprocessor); + myprocessor->quantum = min_quantum; + } + else { + processor_set_t pset; + +#if MACH_HOST + pset = myprocessor->processor_set; +#else /* MACH_HOST */ + pset = &default_pset; +#endif /* MACH_HOST */ + simple_lock(&pset->runq.lock); +#if DEBUG + checkrq(&pset->runq, "thread_select"); +#endif /* DEBUG */ + if (pset->runq.count == 0) { + /* + * Nothing else runnable. Return if this + * thread is still runnable on this processor. + * Check for priority update if required. + */ + thread = current_thread(); + if ((thread->state == TH_RUN) && +#if MACH_HOST + (thread->processor_set == pset) && +#endif /* MACH_HOST */ + ((thread->bound_processor == PROCESSOR_NULL) || + (thread->bound_processor == myprocessor))) { + + simple_unlock(&pset->runq.lock); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) + update_priority(thread); + thread_unlock(thread); + } + else { + thread = choose_pset_thread(myprocessor, pset); + } + } + else { + queue_t q; + + /* + * If there is a thread at hint, grab it, + * else call choose_pset_thread. + */ + q = pset->runq.runq + pset->runq.low; + + if (queue_empty(q)) { + pset->runq.low++; + thread = choose_pset_thread(myprocessor, pset); + } + else { + thread = (thread_t) dequeue_head(q); + thread->runq = RUN_QUEUE_NULL; + pset->runq.count--; +#if MACH_FIXPRI + /* + * Cannot lazy evaluate pset->runq.low for + * fixed priority policy + */ + if ((pset->runq.count > 0) && + (pset->policies & POLICY_FIXEDPRI)) { + while (queue_empty(q)) { + pset->runq.low++; + q++; + } + } +#endif /* MACH_FIXPRI */ +#if DEBUG + checkrq(&pset->runq, "thread_select: after"); +#endif /* DEBUG */ + simple_unlock(&pset->runq.lock); + } + } + +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + myprocessor->quantum = pset->set_quantum; +#if MACH_FIXPRI + } + else { + /* + * POLICY_FIXEDPRI + */ + myprocessor->quantum = thread->sched_data; + } +#endif /* MACH_FIXPRI */ + } + + return thread; +} + +/* + * Stop running the current thread and start running the new thread. + * If continuation is non-zero, and the current thread is blocked, + * then it will resume by executing continuation on a new stack. + * Returns TRUE if the hand-off succeeds. + * Assumes splsched. + */ + +boolean_t thread_invoke( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread) +{ + /* + * Check for invoking the same thread. + */ + if (old_thread == new_thread) { + /* + * Mark thread interruptible. + * Run continuation if there is one. + */ + thread_lock(new_thread); + new_thread->state &= ~TH_UNINT; + thread_unlock(new_thread); + thread_wakeup(TH_EV_STATE(new_thread)); + + if (continuation != thread_no_continuation) { + (void) spl0(); + call_continuation(continuation); + /*NOTREACHED*/ + } + return TRUE; + } + + /* + * Check for stack-handoff. + */ + thread_lock(new_thread); + if ((old_thread->stack_privilege != current_stack()) && + (continuation != thread_no_continuation)) + { + switch (new_thread->state & TH_SWAP_STATE) { + case TH_SWAPPED: + + new_thread->state &= ~(TH_SWAPPED | TH_UNINT); + thread_unlock(new_thread); + thread_wakeup(TH_EV_STATE(new_thread)); + +#if NCPUS > 1 + new_thread->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + /* + * Set up ast context of new thread and + * switch to its timer. + */ + ast_context(new_thread, cpu_number()); + timer_switch(&new_thread->system_timer); + + stack_handoff(old_thread, new_thread); + + /* + * We can dispatch the old thread now. + * This is like thread_dispatch, except + * that the old thread is left swapped + * *without* freeing its stack. + * This path is also much more frequent + * than actual calls to thread_dispatch. + */ + + thread_lock(old_thread); + old_thread->swap_func = continuation; + + switch (old_thread->state) { + case TH_RUN | TH_SUSP: + case TH_RUN | TH_SUSP | TH_HALTED: + case TH_RUN | TH_WAIT | TH_SUSP: + /* + * Suspend the thread + */ + old_thread->state = (old_thread->state & ~TH_RUN) + | TH_SWAPPED; + if (old_thread->wake_active) { + old_thread->wake_active = FALSE; + thread_unlock(old_thread); + thread_wakeup(TH_EV_WAKE_ACTIVE(old_thread)); + + goto after_old_thread; + } + break; + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_UNINT: + case TH_RUN: + /* + * We can`t suspend the thread yet, + * or it`s still running. + * Put back on a run queue. + */ + old_thread->state |= TH_SWAPPED; + thread_setrun(old_thread, FALSE); + break; + + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT: + /* + * Waiting, and not suspendable. + */ + old_thread->state = (old_thread->state & ~TH_RUN) + | TH_SWAPPED; + break; + + case TH_RUN | TH_IDLE: + /* + * Drop idle thread -- it is already in + * idle_thread_array. + */ + old_thread->state = TH_RUN | TH_IDLE | TH_SWAPPED; + break; + + default: + state_panic(old_thread); + } + thread_unlock(old_thread); + after_old_thread: + + /* + * call_continuation calls the continuation + * after resetting the current stack pointer + * to recover stack space. If we called + * the continuation directly, we would risk + * running out of stack. + */ + + counter(c_thread_invoke_hits++); + (void) spl0(); + call_continuation(new_thread->swap_func); + /*NOTREACHED*/ + return TRUE; /* help for the compiler */ + + case TH_SW_COMING_IN: + /* + * Waiting for a stack + */ + thread_swapin(new_thread); + thread_unlock(new_thread); + counter(c_thread_invoke_misses++); + return FALSE; + + case 0: + /* + * Already has a stack - can`t handoff. + */ + break; + } + } + + else { + /* + * Check that the thread is swapped-in. + */ + if (new_thread->state & TH_SWAPPED) { + if ((new_thread->state & TH_SW_COMING_IN) || + !stack_alloc_try(new_thread, thread_continue)) + { + thread_swapin(new_thread); + thread_unlock(new_thread); + counter(c_thread_invoke_misses++); + return FALSE; + } + } + } + + new_thread->state &= ~(TH_SWAPPED | TH_UNINT); + thread_unlock(new_thread); + thread_wakeup(TH_EV_STATE(new_thread)); + + /* + * Thread is now interruptible. + */ +#if NCPUS > 1 + new_thread->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + /* + * Set up ast context of new thread and switch to its timer. + */ + ast_context(new_thread, cpu_number()); + timer_switch(&new_thread->system_timer); + + /* + * switch_context is machine-dependent. It does the + * machine-dependent components of a context-switch, like + * changing address spaces. It updates active_thread. + * It returns only if a continuation is not supplied. + */ + counter(c_thread_invoke_csw++); + old_thread = switch_context(old_thread, continuation, new_thread); + + /* + * We're back. Now old_thread is the thread that resumed + * us, and we have to dispatch it. + */ + thread_dispatch(old_thread); + + return TRUE; +} + +/* + * thread_continue: + * + * Called when the current thread is given a new stack. + * Called at splsched. + */ +void thread_continue( + thread_t old_thread) +{ + continuation_t continuation = current_thread()->swap_func; + + /* + * We must dispatch the old thread and then + * call the current thread's continuation. + * There might not be an old thread, if we are + * the first thread to run on this processor. + */ + + if (old_thread != THREAD_NULL) + thread_dispatch(old_thread); + (void) spl0(); + (*continuation)(); + /*NOTREACHED*/ +} + + +/* + * thread_block: + * + * Block the current thread. If the thread is runnable + * then someone must have woken it up between its request + * to sleep and now. In this case, it goes back on a + * run queue. + * + * If a continuation is specified, then thread_block will + * attempt to discard the thread's kernel stack. When the + * thread resumes, it will execute the continuation function + * on a new kernel stack. + */ + +void thread_block( + continuation_t continuation) +{ + thread_t thread = current_thread(); + processor_t myprocessor = cpu_to_processor(cpu_number()); + thread_t new_thread; + spl_t s; + + check_simple_locks(); + + s = splsched(); + +#if FAST_TAS + { + extern void recover_ras(); + + if (csw_needed(thread, myprocessor)) + recover_ras(thread); + } +#endif /* FAST_TAS */ + + ast_off(cpu_number(), AST_BLOCK); + + do + new_thread = thread_select(myprocessor); + while (!thread_invoke(thread, continuation, new_thread)); + + splx(s); +} + +/* + * thread_run: + * + * Switch directly from the current thread to a specified + * thread. Both the current and new threads must be + * runnable. + * + * If a continuation is specified, then thread_block will + * attempt to discard the current thread's kernel stack. When the + * thread resumes, it will execute the continuation function + * on a new kernel stack. + */ +void thread_run( + continuation_t continuation, + thread_t new_thread) +{ + thread_t thread = current_thread(); + processor_t myprocessor = cpu_to_processor(cpu_number()); + spl_t s; + + check_simple_locks(); + + s = splsched(); + + while (!thread_invoke(thread, continuation, new_thread)) + new_thread = thread_select(myprocessor); + + splx(s); +} + +/* + * Dispatches a running thread that is not on a runq. + * Called at splsched. + */ + +void thread_dispatch( + thread_t thread) +{ + /* + * If we are discarding the thread's stack, we must do it + * before the thread has a chance to run. + */ + + thread_lock(thread); + + if (thread->swap_func != thread_no_continuation) { + assert((thread->state & TH_SWAP_STATE) == 0); + thread->state |= TH_SWAPPED; + stack_free(thread); + } + + switch (thread->state &~ TH_SWAP_STATE) { + case TH_RUN | TH_SUSP: + case TH_RUN | TH_SUSP | TH_HALTED: + case TH_RUN | TH_WAIT | TH_SUSP: + /* + * Suspend the thread + */ + thread->state &= ~TH_RUN; + if (thread->wake_active) { + thread->wake_active = FALSE; + thread_unlock(thread); + thread_wakeup(TH_EV_WAKE_ACTIVE(thread)); + return; + } + break; + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_UNINT: + case TH_RUN: + /* + * No reason to stop. Put back on a run queue. + */ + thread_setrun(thread, FALSE); + break; + + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT: + /* + * Waiting, and not suspended. + */ + thread->state &= ~TH_RUN; + break; + + case TH_RUN | TH_IDLE: + /* + * Drop idle thread -- it is already in + * idle_thread_array. + */ + break; + + default: + state_panic(thread); + } + thread_unlock(thread); +} + + +/* + * Define shifts for simulating (5/8)**n + */ + +shift_data_t wait_shift[32] = { + {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7}, + {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13}, + {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18}, + {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}}; + +/* + * do_priority_computation: + * + * Calculate new priority for thread based on its base priority plus + * accumulated usage. PRI_SHIFT and PRI_SHIFT_2 convert from + * usage to priorities. SCHED_SHIFT converts for the scaling + * of the sched_usage field by SCHED_SCALE. This scaling comes + * from the multiplication by sched_load (thread_timer_delta) + * in sched.h. sched_load is calculated as a scaled overload + * factor in compute_mach_factor (mach_factor.c). + */ + +#ifdef PRI_SHIFT_2 +#if PRI_SHIFT_2 > 0 +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ + + ((th)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT)); \ + if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ + MACRO_END +#else /* PRI_SHIFT_2 */ +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ + - ((th)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2)); \ + if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ + MACRO_END +#endif /* PRI_SHIFT_2 */ +#else /* defined(PRI_SHIFT_2) */ +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)); \ + if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ + MACRO_END +#endif /* defined(PRI_SHIFT_2) */ + +/* + * compute_priority: + * + * Compute the effective priority of the specified thread. + * The effective priority computation is as follows: + * + * Take the base priority for this thread and add + * to it an increment derived from its cpu_usage. + * + * The thread *must* be locked by the caller. + */ + +void compute_priority( + thread_t thread, + boolean_t resched) +{ + int pri; + +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + do_priority_computation(thread, pri); + if (thread->depress_priority < 0) + set_pri(thread, pri, resched); + else + thread->depress_priority = pri; +#if MACH_FIXPRI + } + else { + set_pri(thread, thread->priority, resched); + } +#endif /* MACH_FIXPRI */ +} + +/* + * compute_my_priority: + * + * Version of compute priority for current thread or thread + * being manipulated by scheduler (going on or off a runq). + * Only used for priority updates. Policy or priority changes + * must call compute_priority above. Caller must have thread + * locked and know it is timesharing and not depressed. + */ + +void compute_my_priority( + thread_t thread) +{ + int temp_pri; + + do_priority_computation(thread,temp_pri); + thread->sched_pri = temp_pri; +} + +/* + * recompute_priorities: + * + * Update the priorities of all threads periodically. + */ +void recompute_priorities(void *param) +{ + sched_tick++; /* age usage one more time */ + set_timeout(&recompute_priorities_timer, hz); + /* + * Wakeup scheduler thread. + */ + if (sched_thread_id != THREAD_NULL) { + clear_wait(sched_thread_id, THREAD_AWAKENED, FALSE); + } +} + +/* + * update_priority + * + * Cause the priority computation of a thread that has been + * sleeping or suspended to "catch up" with the system. Thread + * *MUST* be locked by caller. If thread is running, then this + * can only be called by the thread on itself. + */ +void update_priority( + thread_t thread) +{ + unsigned int ticks; + shift_t shiftp; + int temp_pri; + + ticks = sched_tick - thread->sched_stamp; + + assert(ticks != 0); + + /* + * If asleep for more than 30 seconds forget all + * cpu_usage, else catch up on missed aging. + * 5/8 ** n is approximated by the two shifts + * in the wait_shift array. + */ + thread->sched_stamp += ticks; + thread_timer_delta(thread); + if (ticks > 30) { + thread->cpu_usage = 0; + thread->sched_usage = 0; + } + else { + thread->cpu_usage += thread->cpu_delta; + thread->sched_usage += thread->sched_delta; + shiftp = &wait_shift[ticks]; + if (shiftp->shift2 > 0) { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) + + (thread->cpu_usage >> shiftp->shift2); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) + + (thread->sched_usage >> shiftp->shift2); + } + else { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) - + (thread->cpu_usage >> -(shiftp->shift2)); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) - + (thread->sched_usage >> -(shiftp->shift2)); + } + } + thread->cpu_delta = 0; + thread->sched_delta = 0; + /* + * Recompute priority if appropriate. + */ + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + do_priority_computation(thread, temp_pri); + thread->sched_pri = temp_pri; + } +} + +/* + * run_queue_enqueue macro for thread_setrun(). + */ +#if DEBUG +#define run_queue_enqueue(rq, th) \ + MACRO_BEGIN \ + unsigned int whichq; \ + \ + whichq = (th)->sched_pri; \ + if (whichq >= NRQS) { \ + printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ + whichq = NRQS - 1; \ + } \ + \ + runq_lock(rq); /* lock the run queue */ \ + checkrq((rq), "thread_setrun: before adding thread"); \ + enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \ + \ + if (whichq < (rq)->low || (rq)->count == 0) \ + (rq)->low = whichq; /* minimize */ \ + \ + (rq)->count++; \ + (th)->runq = (rq); \ + thread_check((th), (rq)); \ + checkrq((rq), "thread_setrun: after adding thread"); \ + runq_unlock(rq); \ + MACRO_END +#else /* DEBUG */ +#define run_queue_enqueue(rq, th) \ + MACRO_BEGIN \ + unsigned int whichq; \ + \ + whichq = (th)->sched_pri; \ + if (whichq >= NRQS) { \ + printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ + whichq = NRQS - 1; \ + } \ + \ + runq_lock(rq); /* lock the run queue */ \ + enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \ + \ + if (whichq < (rq)->low || (rq)->count == 0) \ + (rq)->low = whichq; /* minimize */ \ + \ + (rq)->count++; \ + (th)->runq = (rq); \ + runq_unlock(rq); \ + MACRO_END +#endif /* DEBUG */ +/* + * thread_setrun: + * + * Make thread runnable; dispatch directly onto an idle processor + * if possible. Else put on appropriate run queue (processor + * if bound, else processor set. Caller must have lock on thread. + * This is always called at splsched. + */ + +void thread_setrun( + thread_t th, + boolean_t may_preempt) +{ + processor_t processor; + run_queue_t rq; +#if NCPUS > 1 + processor_set_t pset; +#endif /* NCPUS > 1 */ + + /* + * Update priority if needed. + */ + if (th->sched_stamp != sched_tick) { + update_priority(th); + } + + assert(th->runq == RUN_QUEUE_NULL); + +#if NCPUS > 1 + /* + * Try to dispatch the thread directly onto an idle processor. + */ + if ((processor = th->bound_processor) == PROCESSOR_NULL) { + /* + * Not bound, any processor in the processor set is ok. + */ + pset = th->processor_set; +#if HW_FOOTPRINT + /* + * But first check the last processor it ran on. + */ + processor = th->last_processor; + if (processor->state == PROCESSOR_IDLE) { + processor_lock(processor); + simple_lock(&pset->idle_lock); + if ((processor->state == PROCESSOR_IDLE) +#if MACH_HOST + && (processor->processor_set == pset) +#endif /* MACH_HOST */ + ) { + queue_remove(&pset->idle_queue, processor, + processor_t, processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + processor_unlock(processor); + if (processor != current_processor()) + cause_ast_check(processor); + return; + } + simple_unlock(&pset->idle_lock); + processor_unlock(processor); + } +#endif /* HW_FOOTPRINT */ + + if (pset->idle_count > 0) { + simple_lock(&pset->idle_lock); + if (pset->idle_count > 0) { + processor = (processor_t) queue_first(&pset->idle_queue); + queue_remove(&(pset->idle_queue), processor, processor_t, + processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + if (processor != current_processor()) + cause_ast_check(processor); + return; + } + simple_unlock(&pset->idle_lock); + } + rq = &(pset->runq); + run_queue_enqueue(rq,th); + /* + * Preempt check + */ + if (may_preempt && +#if MACH_HOST + (pset == current_processor()->processor_set) && +#endif /* MACH_HOST */ + (current_thread()->sched_pri > th->sched_pri)) { + /* + * Turn off first_quantum to allow csw. + */ + current_processor()->first_quantum = FALSE; + ast_on(cpu_number(), AST_BLOCK); + } + } + else { + /* + * Bound, can only run on bound processor. Have to lock + * processor here because it may not be the current one. + */ + if (processor->state == PROCESSOR_IDLE) { + processor_lock(processor); + pset = processor->processor_set; + simple_lock(&pset->idle_lock); + if (processor->state == PROCESSOR_IDLE) { + queue_remove(&pset->idle_queue, processor, + processor_t, processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + processor_unlock(processor); + if (processor != current_processor()) + cause_ast_check(processor); + return; + } + simple_unlock(&pset->idle_lock); + processor_unlock(processor); + } + rq = &(processor->runq); + run_queue_enqueue(rq,th); + + /* + * Cause ast on processor if processor is on line. + */ + if (processor == current_processor()) { + ast_on(cpu_number(), AST_BLOCK); + } + else if ((processor->state != PROCESSOR_OFF_LINE)) { + cause_ast_check(processor); + } + } +#else /* NCPUS > 1 */ + /* + * XXX should replace queue with a boolean in this case. + */ + if (default_pset.idle_count > 0) { + processor = (processor_t) queue_first(&default_pset.idle_queue); + queue_remove(&default_pset.idle_queue, processor, + processor_t, processor_queue); + default_pset.idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + return; + } + if (th->bound_processor == PROCESSOR_NULL) { + rq = &(default_pset.runq); + } + else { + rq = &(master_processor->runq); + ast_on(cpu_number(), AST_BLOCK); + } + run_queue_enqueue(rq,th); + + /* + * Preempt check + */ + if (may_preempt && (current_thread()->sched_pri > th->sched_pri)) { + /* + * Turn off first_quantum to allow context switch. + */ + current_processor()->first_quantum = FALSE; + ast_on(cpu_number(), AST_BLOCK); + } +#endif /* NCPUS > 1 */ +} + +/* + * set_pri: + * + * Set the priority of the specified thread to the specified + * priority. This may cause the thread to change queues. + * + * The thread *must* be locked by the caller. + */ + +void set_pri( + thread_t th, + int pri, + boolean_t resched) +{ + struct run_queue *rq; + + rq = rem_runq(th); + th->sched_pri = pri; + if (rq != RUN_QUEUE_NULL) { + if (resched) + thread_setrun(th, TRUE); + else + run_queue_enqueue(rq, th); + } +} + +/* + * rem_runq: + * + * Remove a thread from its run queue. + * The run queue that the process was on is returned + * (or RUN_QUEUE_NULL if not on a run queue). Thread *must* be locked + * before calling this routine. Unusual locking protocol on runq + * field in thread structure makes this code interesting; see thread.h. + */ + +struct run_queue *rem_runq( + thread_t th) +{ + struct run_queue *rq; + + rq = th->runq; + /* + * If rq is RUN_QUEUE_NULL, the thread will stay out of the + * run_queues because the caller locked the thread. Otherwise + * the thread is on a runq, but could leave. + */ + if (rq != RUN_QUEUE_NULL) { + runq_lock(rq); +#if DEBUG + checkrq(rq, "rem_runq: at entry"); +#endif /* DEBUG */ + if (rq == th->runq) { + /* + * Thread is in a runq and we have a lock on + * that runq. + */ +#if DEBUG + checkrq(rq, "rem_runq: before removing thread"); + thread_check(th, rq); +#endif /* DEBUG */ + remqueue(&rq->runq[0], (queue_entry_t) th); + rq->count--; +#if DEBUG + checkrq(rq, "rem_runq: after removing thread"); +#endif /* DEBUG */ + th->runq = RUN_QUEUE_NULL; + runq_unlock(rq); + } + else { + /* + * The thread left the runq before we could + * lock the runq. It is not on a runq now, and + * can't move again because this routine's + * caller locked the thread. + */ + runq_unlock(rq); + rq = RUN_QUEUE_NULL; + } + } + + return rq; +} + + +/* + * choose_thread: + * + * Choose a thread to execute. The thread chosen is removed + * from its run queue. Note that this requires only that the runq + * lock be held. + * + * Strategy: + * Check processor runq first; if anything found, run it. + * Else check pset runq; if nothing found, return idle thread. + * + * Second line of strategy is implemented by choose_pset_thread. + * This is only called on processor startup and when thread_block + * thinks there's something in the processor runq. + */ + +thread_t choose_thread( + processor_t myprocessor) +{ + thread_t th; + queue_t q; + run_queue_t runq; + int i; + processor_set_t pset; + + runq = &myprocessor->runq; + + simple_lock(&runq->lock); + if (runq->count > 0) { + q = runq->runq + runq->low; + for (i = runq->low; i < NRQS ; i++, q++) { + if (!queue_empty(q)) { + th = (thread_t) dequeue_head(q); + th->runq = RUN_QUEUE_NULL; + runq->count--; + runq->low = i; + simple_unlock(&runq->lock); + return th; + } + } + panic("choose_thread"); + /*NOTREACHED*/ + } + simple_unlock(&runq->lock); + + pset = myprocessor->processor_set; + + simple_lock(&pset->runq.lock); + return choose_pset_thread(myprocessor,pset); +} + +/* + * choose_pset_thread: choose a thread from processor_set runq or + * set processor idle and choose its idle thread. + * + * Caller must be at splsched and have a lock on the runq. This + * lock is released by this routine. myprocessor is always the current + * processor, and pset must be its processor set. + * This routine chooses and removes a thread from the runq if there + * is one (and returns it), else it sets the processor idle and + * returns its idle thread. + */ + +thread_t choose_pset_thread( + processor_t myprocessor, + processor_set_t pset) +{ + run_queue_t runq; + thread_t th; + queue_t q; + int i; + + runq = &pset->runq; + + if (runq->count > 0) { + q = runq->runq + runq->low; + for (i = runq->low; i < NRQS ; i++, q++) { + if (!queue_empty(q)) { + th = (thread_t) dequeue_head(q); + th->runq = RUN_QUEUE_NULL; + runq->count--; + /* + * For POLICY_FIXEDPRI, runq->low must be + * accurate! + */ +#if MACH_FIXPRI + if ((runq->count > 0) && + (pset->policies & POLICY_FIXEDPRI)) { + while (queue_empty(q)) { + q++; + i++; + } + } +#endif /* MACH_FIXPRI */ + runq->low = i; +#if DEBUG + checkrq(runq, "choose_pset_thread"); +#endif /* DEBUG */ + simple_unlock(&runq->lock); + return th; + } + } + panic("choose_pset_thread"); + /*NOTREACHED*/ + } + simple_unlock(&runq->lock); + + /* + * Nothing is runnable, so set this processor idle if it + * was running. If it was in an assignment or shutdown, + * leave it alone. Return its idle thread. + */ + simple_lock(&pset->idle_lock); + if (myprocessor->state == PROCESSOR_RUNNING) { + myprocessor->state = PROCESSOR_IDLE; + /* + * XXX Until it goes away, put master on end of queue, others + * XXX on front so master gets used last. + */ + if (myprocessor == master_processor) { + queue_enter(&(pset->idle_queue), myprocessor, + processor_t, processor_queue); + } + else { + queue_enter_first(&(pset->idle_queue), myprocessor, + processor_t, processor_queue); + } + + pset->idle_count++; + } + simple_unlock(&pset->idle_lock); + + return myprocessor->idle_thread; +} + +/* + * no_dispatch_count counts number of times processors go non-idle + * without being dispatched. This should be very rare. + */ +int no_dispatch_count = 0; + +/* + * This is the idle thread, which just looks for other threads + * to execute. + */ + +static void __attribute__((noreturn)) idle_thread_continue(void) +{ + processor_t myprocessor; + volatile thread_t *threadp; + volatile int *gcount; + volatile int *lcount; + thread_t new_thread; + int state; + int mycpu; + spl_t s; + + mycpu = cpu_number(); + myprocessor = current_processor(); + threadp = (volatile thread_t *) &myprocessor->next_thread; + lcount = (volatile int *) &myprocessor->runq.count; + + while (TRUE) { +#ifdef MARK_CPU_IDLE + MARK_CPU_IDLE(mycpu); +#endif /* MARK_CPU_IDLE */ + +#if MACH_HOST + gcount = (volatile int *) + &myprocessor->processor_set->runq.count; +#else /* MACH_HOST */ + gcount = (volatile int *) &default_pset.runq.count; +#endif /* MACH_HOST */ + +/* + * This cpu will be dispatched (by thread_setrun) by setting next_thread + * to the value of the thread to run next. Also check runq counts. + */ + while ((*threadp == (volatile thread_t)THREAD_NULL) && + (*gcount == 0) && (*lcount == 0)) { + + /* check for ASTs while we wait */ + + if (need_ast[mycpu] &~ AST_SCHEDULING) { + (void) splsched(); + /* don't allow scheduling ASTs */ + need_ast[mycpu] &= ~AST_SCHEDULING; + ast_taken(); + /* back at spl0 */ + } + + /* + * machine_idle is a machine dependent function, + * to conserve power. + */ +#if POWER_SAVE + machine_idle(mycpu); +#endif /* POWER_SAVE */ + } + +#ifdef MARK_CPU_ACTIVE + MARK_CPU_ACTIVE(mycpu); +#endif /* MARK_CPU_ACTIVE */ + + s = splsched(); + + /* + * This is not a switch statement to avoid the + * bounds checking code in the common case. + */ +retry: + state = myprocessor->state; + if (state == PROCESSOR_DISPATCHING) { + /* + * Commmon case -- cpu dispatched. + */ + new_thread = (thread_t) *threadp; + *threadp = (volatile thread_t) THREAD_NULL; + myprocessor->state = PROCESSOR_RUNNING; + /* + * set up quantum for new thread. + */ +#if MACH_FIXPRI + if (new_thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + /* + * Just use set quantum. No point in + * checking for shorter local runq quantum; + * csw_needed will handle correctly. + */ +#if MACH_HOST + myprocessor->quantum = new_thread-> + processor_set->set_quantum; +#else /* MACH_HOST */ + myprocessor->quantum = + default_pset.set_quantum; +#endif /* MACH_HOST */ + +#if MACH_FIXPRI + } + else { + /* + * POLICY_FIXEDPRI + */ + myprocessor->quantum = new_thread->sched_data; + } +#endif /* MACH_FIXPRI */ + myprocessor->first_quantum = TRUE; + counter(c_idle_thread_handoff++); + thread_run(idle_thread_continue, new_thread); + } + else if (state == PROCESSOR_IDLE) { + processor_set_t pset; + + pset = myprocessor->processor_set; + simple_lock(&pset->idle_lock); + if (myprocessor->state != PROCESSOR_IDLE) { + /* + * Something happened, try again. + */ + simple_unlock(&pset->idle_lock); + goto retry; + } + /* + * Processor was not dispatched (Rare). + * Set it running again. + */ + no_dispatch_count++; + pset->idle_count--; + queue_remove(&pset->idle_queue, myprocessor, + processor_t, processor_queue); + myprocessor->state = PROCESSOR_RUNNING; + simple_unlock(&pset->idle_lock); + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + } + else if ((state == PROCESSOR_ASSIGN) || + (state == PROCESSOR_SHUTDOWN)) { + /* + * Changing processor sets, or going off-line. + * Release next_thread if there is one. Actual + * thread to run is on a runq. + */ + if ((new_thread = (thread_t)*threadp)!= THREAD_NULL) { + *threadp = (volatile thread_t) THREAD_NULL; + thread_lock(new_thread); + thread_setrun(new_thread, FALSE); + thread_unlock(new_thread); + } + + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + } + else { + printf(" Bad processor state %d (Cpu %d)\n", + cpu_state(mycpu), mycpu); + panic("idle_thread"); + } + + (void) splx(s); + } +} + +void idle_thread(void) +{ + thread_t self = current_thread(); + spl_t s; + + stack_privilege(self); + + s = splsched(); + self->priority = NRQS-1; + self->sched_pri = NRQS-1; + + /* + * Set the idle flag to indicate that this is an idle thread, + * enter ourselves in the idle array, and thread_block() to get + * out of the run queues (and set the processor idle when we + * run next time). + */ + thread_lock(self); + self->state |= TH_IDLE; + thread_unlock(self); + current_processor()->idle_thread = self; + (void) splx(s); + + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + idle_thread_continue(); + /*NOTREACHED*/ +} + +/* + * sched_thread: scheduler thread. + * + * This thread handles periodic calculations in the scheduler that + * we don't want to do at interrupt level. This allows us to + * avoid blocking. + */ +static void sched_thread_continue(void) +{ + while (TRUE) { + (void) compute_mach_factor(); + + /* + * Check for stuck threads. This can't be done off of + * the callout queue because it requires operations that + * can't be used from interrupt level. + */ + if (sched_tick & 1) + do_thread_scan(); + + assert_wait((event_t) 0, FALSE); + counter(c_sched_thread_block++); + thread_block(sched_thread_continue); + } +} + +void sched_thread(void) +{ + sched_thread_id = current_thread(); + + /* + * Sleep on event 0, recompute_priorities() will awaken + * us by calling clear_wait(). + */ + assert_wait((event_t) 0, FALSE); + counter(c_sched_thread_block++); + thread_block(sched_thread_continue); + sched_thread_continue(); + /*NOTREACHED*/ +} + +#define MAX_STUCK_THREADS 16 + +/* + * do_thread_scan: scan for stuck threads. A thread is stuck if + * it is runnable but its priority is so low that it has not + * run for several seconds. Its priority should be higher, but + * won't be until it runs and calls update_priority. The scanner + * finds these threads and does the updates. + * + * Scanner runs in two passes. Pass one squirrels likely + * thread ids away in an array, and removes them from the run queue. + * Pass two does the priority updates. This is necessary because + * the run queue lock is required for the candidate scan, but + * cannot be held during updates [set_pri will deadlock]. + * + * Array length should be enough so that restart isn't necessary, + * but restart logic is included. Does not scan processor runqs. + * + */ + +boolean_t do_thread_scan_debug = FALSE; + +thread_t stuck_threads[MAX_STUCK_THREADS]; +int stuck_count = 0; + +/* + * do_runq_scan is the guts of pass 1. It scans a runq for + * stuck threads. A boolean is returned indicating whether + * it ran out of space. + */ + +static boolean_t +do_runq_scan( + run_queue_t runq) +{ + spl_t s; + queue_t q; + thread_t thread; + int count; + + s = splsched(); + simple_lock(&runq->lock); + if((count = runq->count) > 0) { + q = runq->runq + runq->low; + while (count > 0) { + thread = (thread_t) queue_first(q); + while (!queue_end(q, (queue_entry_t) thread)) { + /* + * Get the next thread now, since we may + * remove this thread from the run queue. + */ + thread_t next = (thread_t) queue_next(&thread->links); + + if ((thread->state & TH_SCHED_STATE) == TH_RUN && + sched_tick - thread->sched_stamp > 1) { + /* + * Stuck, save its id for later. + */ + if (stuck_count == MAX_STUCK_THREADS) { + /* + * !@#$% No more room. + */ + simple_unlock(&runq->lock); + splx(s); + return TRUE; + } + /* + * We can`t take the thread_lock here, + * since we already have the runq lock. + * So we can`t grab a reference to the + * thread. However, a thread that is + * in RUN state cannot be deallocated + * until it stops running. If it isn`t + * on the runq, then thread_halt cannot + * see it. So we remove the thread + * from the runq to make it safe. + */ + remqueue(q, (queue_entry_t) thread); + runq->count--; + thread->runq = RUN_QUEUE_NULL; + + stuck_threads[stuck_count++] = thread; +if (do_thread_scan_debug) + printf("do_runq_scan: adding thread %p\n", thread); + } + count--; + thread = next; + } + q++; + } + } + simple_unlock(&runq->lock); + splx(s); + + return FALSE; +} + +void do_thread_scan(void) +{ + spl_t s; + boolean_t restart_needed = 0; + thread_t thread; + int i; +#if MACH_HOST + processor_set_t pset; +#endif /* MACH_HOST */ + + do { +#if MACH_HOST + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + if ((restart_needed = do_runq_scan(&pset->runq))) + break; + } + simple_unlock(&all_psets_lock); +#else /* MACH_HOST */ + restart_needed = do_runq_scan(&default_pset.runq); +#endif /* MACH_HOST */ + if (!restart_needed) { + for (i = 0; i < smp_get_numcpus(); i++) { + if ((restart_needed = do_runq_scan(&cpu_to_processor(i)->runq))) + break; + } + } + + /* + * Ok, we now have a collection of candidates -- fix them. + */ + + while (stuck_count > 0) { + thread = stuck_threads[--stuck_count]; + stuck_threads[stuck_count] = THREAD_NULL; + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_SCHED_STATE) == TH_RUN) { + /* + * Do the priority update. Call + * thread_setrun because thread is + * off the run queues. + */ + update_priority(thread); + thread_setrun(thread, TRUE); + } + thread_unlock(thread); + splx(s); + } + } while (restart_needed); +} + +#if DEBUG +void checkrq( + run_queue_t rq, + const char *msg) +{ + queue_t q1; + int i, j; + queue_entry_t e; + int low; + + low = -1; + j = 0; + q1 = rq->runq; + for (i = 0; i < NRQS; i++) { + if (q1->next == q1) { + if (q1->prev != q1) + panic("checkrq: empty at %s", msg); + } + else { + if (low == -1) + low = i; + + for (e = q1->next; e != q1; e = e->next) { + j++; + if (e->next->prev != e) + panic("checkrq-2 at %s", msg); + if (e->prev->next != e) + panic("checkrq-3 at %s", msg); + } + } + q1++; + } + if (j != rq->count) + panic("checkrq: count wrong at %s", msg); + if (rq->count != 0 && low < rq->low) + panic("checkrq: low wrong at %s", msg); +} + +void thread_check( + thread_t th, + run_queue_t rq) +{ + unsigned int whichq; + + whichq = th->sched_pri; + if (whichq >= NRQS) { + printf("thread_check: priority too high\n"); + whichq = NRQS-1; + } + if ((th->links.next == &rq->runq[whichq]) && + (rq->runq[whichq].prev != (queue_entry_t)th)) + panic("thread_check"); +} +#endif /* DEBUG */ diff --git a/kern/sched_prim.h b/kern/sched_prim.h new file mode 100644 index 0000000..c250b22 --- /dev/null +++ b/kern/sched_prim.h @@ -0,0 +1,189 @@ +/* + * Mach Operating System + * Copyright (c) 1992,1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched_prim.h + * Author: David Golub + * + * Scheduling primitive definitions file + * + */ + +#ifndef _KERN_SCHED_PRIM_H_ +#define _KERN_SCHED_PRIM_H_ + +#include <mach/boolean.h> +#include <mach/message.h> /* for mach_msg_timeout_t */ +#include <kern/lock.h> +#include <kern/kern_types.h> /* for thread_t */ + +/* + * Possible results of assert_wait - returned in + * current_thread()->wait_result. + */ +#define THREAD_AWAKENED 0 /* normal wakeup */ +#define THREAD_TIMED_OUT 1 /* timeout expired */ +#define THREAD_INTERRUPTED 2 /* interrupted by clear_wait */ +#define THREAD_RESTART 3 /* restart operation entirely */ + +typedef void *event_t; /* wait event */ + +typedef void (*continuation_t)(void); /* continuation */ + +#define thread_no_continuation ((continuation_t) 0) /* no continuation */ + +/* + * Exported interface to sched_prim.c. + */ + +extern void sched_init(void); + +extern void assert_wait( + event_t event, + boolean_t interruptible); +extern void clear_wait( + thread_t thread, + int result, + boolean_t interrupt_only); +extern void thread_sleep( + event_t event, + simple_lock_t lock, + boolean_t interruptible); +extern void thread_wakeup(void); /* for function pointers */ +extern boolean_t thread_wakeup_prim( + event_t event, + boolean_t one_thread, + int result); +extern boolean_t thread_invoke( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void thread_block( + continuation_t continuation); +extern void thread_run( + continuation_t continuation, + thread_t new_thread); +extern void thread_set_timeout( + int t); +extern void thread_setrun( + thread_t thread, + boolean_t may_preempt); +extern void thread_dispatch( + thread_t thread); +extern void thread_continue( + thread_t old_thread); +extern void thread_go( + thread_t thread); +extern void thread_will_wait( + thread_t thread); +extern void thread_will_wait_with_timeout( + thread_t thread, + mach_msg_timeout_t msecs); +extern boolean_t thread_handoff( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void recompute_priorities(void *param); +extern void update_priority( + thread_t thread); +extern void compute_my_priority( + thread_t thread); +extern void thread_bind( + thread_t thread, + processor_t processor); +extern void compute_priority( + thread_t thread, + boolean_t resched); +extern void thread_timeout_setup( + thread_t thread); + +/* + * Routines defined as macros + */ + +#define thread_wakeup(x) \ + thread_wakeup_prim((x), FALSE, THREAD_AWAKENED) +#define thread_wakeup_with_result(x, z) \ + thread_wakeup_prim((x), FALSE, (z)) +#define thread_wakeup_one(x) \ + thread_wakeup_prim((x), TRUE, THREAD_AWAKENED) + +/* + * Machine-dependent code must define these functions. + */ + +extern void thread_bootstrap_return(void) __attribute__((noreturn)); +extern void thread_exception_return(void) __attribute__((noreturn)); +extern void __attribute__((__noreturn__)) thread_syscall_return(kern_return_t); + +extern thread_t switch_context( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void stack_handoff( + thread_t old_thread, + thread_t new_thread); + +/* + * These functions are either defined in kern/thread.c + * via machine-dependent stack_attach and stack_detach functions, + * or are defined directly by machine-dependent code. + */ + +extern kern_return_t stack_alloc( + thread_t thread, + void (*resume)(thread_t)); +extern boolean_t stack_alloc_try( + thread_t thread, + void (*resume)(thread_t)); +extern void stack_free( + thread_t thread); + +/* + * Convert a timeout in milliseconds (mach_msg_timeout_t) + * to a timeout in ticks (for use by set_timeout). + * This conversion rounds UP so that small timeouts + * at least wait for one tick instead of not waiting at all. + */ + +#define convert_ipc_timeout_to_ticks(millis) \ + (((millis) * hz + 999) / 1000) + +void set_pri(thread_t th, int pri, boolean_t resched); +void do_thread_scan(void); +thread_t choose_pset_thread(processor_t myprocessor, processor_set_t pset); + +#if DEBUG +#include <kern/sched.h> /* for run_queue_t */ + +void checkrq(run_queue_t rq, const char *msg); +void thread_check(thread_t th, run_queue_t rq); +#endif /* DEBUG */ + +extern void idle_thread(void) __attribute__((noreturn)); +extern void sched_thread(void); +extern int stuck_count; + +#endif /* _KERN_SCHED_PRIM_H_ */ diff --git a/kern/shuttle.h b/kern/shuttle.h new file mode 100644 index 0000000..0b1c2c5 --- /dev/null +++ b/kern/shuttle.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: shuttle.h + * Author: Bryan Ford + * + * This file contains definitions for shuttles, + * which handle microscheduling for individual threads. + * + */ + +#ifndef _KERN_SHUTTLE_H_ +#define _KERN_SHUTTLE_H_ + +#include <kern/lock.h> + + + +struct Shuttle { + /* XXX must be first in thread */ +/* + * NOTE: The runq field in the thread structure has an unusual + * locking protocol. If its value is RUN_QUEUE_NULL, then it is + * locked by the thread_lock, but if its value is something else + * (i.e. a run_queue) then it is locked by that run_queue's lock. + */ + queue_chain_t links; /* current run queue links */ + run_queue_t runq; /* run queue p is on SEE BELOW */ + + /* Next pointer when on a queue */ + struct Shuttle *next; + + /* Micropriority level */ + int priority; + + /* General-purpose pointer field whose use depends on what the + thread happens to be doing */ + void *message; + + int foobar[1]; +}; +typedef struct Shuttle Shuttle; + + + +/* Exported functions */ + + + +/* Machine-dependent code must define the following functions */ + + + +#endif /* _KERN_SHUTTLE_H_ */ diff --git a/kern/slab.c b/kern/slab.c new file mode 100644 index 0000000..dc44e42 --- /dev/null +++ b/kern/slab.c @@ -0,0 +1,1686 @@ +/* + * Copyright (c) 2011 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Object caching and general purpose memory allocator. + * + * This allocator is based on the paper "The Slab Allocator: An Object-Caching + * Kernel Memory Allocator" by Jeff Bonwick. + * + * It allows the allocation of objects (i.e. fixed-size typed buffers) from + * caches and is efficient in both space and time. This implementation follows + * many of the indications from the paper mentioned. The most notable + * differences are outlined below. + * + * The per-cache self-scaling hash table for buffer-to-bufctl conversion, + * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by + * a red-black tree storing slabs, sorted by address. The use of a + * self-balancing tree for buffer-to-slab conversions provides a few advantages + * over a hash table. Unlike a hash table, a BST provides a "lookup nearest" + * operation, so obtaining the slab data (whether it is embedded in the slab or + * off slab) from a buffer address simply consists of a "lookup nearest towards + * 0" tree search. Finally, a self-balancing tree is a true self-scaling data + * structure, whereas a hash table requires periodic maintenance and complete + * resizing, which is expensive. The only drawback is that releasing a buffer + * to the slab layer takes logarithmic time instead of constant time. + * + * This implementation uses per-cpu pools of objects, which service most + * allocation requests. These pools act as caches (but are named differently + * to avoid confusion with CPU caches) that reduce contention on multiprocessor + * systems. When a pool is empty and cannot provide an object, it is filled by + * transferring multiple objects from the slab layer. The symmetric case is + * handled likewise. + */ + +#include <string.h> +#include <kern/assert.h> +#include <kern/mach_clock.h> +#include <kern/macros.h> +#include <kern/printf.h> +#include <kern/slab.h> +#include <kern/kalloc.h> +#include <kern/cpu_number.h> +#include <kern/mach_debug.server.h> +#include <mach/vm_param.h> +#include <mach/machine/vm_types.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_types.h> +#include <sys/types.h> + +#ifdef MACH_DEBUG +#include <mach_debug/slab_info.h> +#endif + +/* + * Utility macros. + */ +#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0) +#define ISP2(x) P2ALIGNED(x, x) +#define P2ALIGN(x, a) ((x) & -(a)) +#define P2ROUND(x, a) (-(-(x) & -(a))) +#define P2END(x, a) (-(~(x) & -(a))) +#define likely(expr) __builtin_expect(!!(expr), 1) +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/* + * Minimum required alignment. + */ +#define KMEM_ALIGN_MIN 8 + +/* + * Special buffer size under which slab data is unconditionnally allocated + * from its associated slab. + */ +#define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8) + +/* + * Time (in ticks) between two garbage collection operations. + */ +#define KMEM_GC_INTERVAL (5 * hz) + +/* + * The transfer size of a CPU pool is computed by dividing the pool size by + * this value. + */ +#define KMEM_CPU_POOL_TRANSFER_RATIO 2 + +/* + * Redzone guard word. + */ +#ifdef __LP64__ +#if _HOST_BIG_ENDIAN +#define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL +#endif /* _HOST_BIG_ENDIAN */ +#else /* __LP64__ */ +#if _HOST_BIG_ENDIAN +#define KMEM_REDZONE_WORD 0xfeedfaceUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_REDZONE_WORD 0xcefaedfeUL +#endif /* _HOST_BIG_ENDIAN */ +#endif /* __LP64__ */ + +/* + * Redzone byte for padding. + */ +#define KMEM_REDZONE_BYTE 0xbb + +/* + * Shift for the first kalloc cache size. + */ +#define KALLOC_FIRST_SHIFT 5 + +/* + * Number of caches backing general purpose allocations. + */ +#define KALLOC_NR_CACHES 13 + +/* + * Values the buftag state member can take. + */ +#ifdef __LP64__ +#if _HOST_BIG_ENDIAN +#define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL +#define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL +#define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL +#endif /* _HOST_BIG_ENDIAN */ +#else /* __LP64__ */ +#if _HOST_BIG_ENDIAN +#define KMEM_BUFTAG_ALLOC 0xa110c8edUL +#define KMEM_BUFTAG_FREE 0xf4eeb10cUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_BUFTAG_ALLOC 0xedc810a1UL +#define KMEM_BUFTAG_FREE 0x0cb1eef4UL +#endif /* _HOST_BIG_ENDIAN */ +#endif /* __LP64__ */ + +/* + * Free and uninitialized patterns. + * + * These values are unconditionnally 64-bit wide since buffers are at least + * 8-byte aligned. + */ +#if _HOST_BIG_ENDIAN +#define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL +#define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL +#define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL +#endif /* _HOST_BIG_ENDIAN */ + +/* + * Cache flags. + * + * The flags don't change once set and can be tested without locking. + */ +#define KMEM_CF_SLAB_EXTERNAL 0x01 /* Slab data is off slab */ +#define KMEM_CF_PHYSMEM 0x02 /* Allocate from physical memory */ +#define KMEM_CF_DIRECT 0x04 /* Direct buf-to-slab translation + (implies !KMEM_CF_SLAB_EXTERNAL) */ +#define KMEM_CF_USE_TREE 0x08 /* Use red-black tree to track slab + data */ +#define KMEM_CF_USE_PAGE 0x10 /* Use page private data to track slab + data (implies KMEM_CF_SLAB_EXTERNAL + and KMEM_CF_PHYSMEM) */ +#define KMEM_CF_VERIFY 0x20 /* Debugging facilities enabled + (implies KMEM_CF_USE_TREE) */ + +/* + * Options for kmem_cache_alloc_verify(). + */ +#define KMEM_AV_NOCONSTRUCT 0 +#define KMEM_AV_CONSTRUCT 1 + +/* + * Error codes for kmem_cache_error(). + */ +#define KMEM_ERR_INVALID 0 /* Invalid address being freed */ +#define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */ +#define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */ +#define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */ +#define KMEM_ERR_REDZONE 4 /* Redzone violation */ + +#if SLAB_USE_CPU_POOLS +/* + * Available CPU pool types. + * + * For each entry, the CPU pool size applies from the entry buf_size + * (excluded) up to (and including) the buf_size of the preceding entry. + * + * See struct kmem_cpu_pool_type for a description of the values. + */ +static struct kmem_cpu_pool_type kmem_cpu_pool_types[] = { + { 32768, 1, 0, NULL }, + { 4096, 8, CPU_L1_SIZE, NULL }, + { 256, 64, CPU_L1_SIZE, NULL }, + { 0, 128, CPU_L1_SIZE, NULL } +}; + +/* + * Caches where CPU pool arrays are allocated from. + */ +static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)]; +#endif /* SLAB_USE_CPU_POOLS */ + +/* + * Cache for off slab data. + */ +static struct kmem_cache kmem_slab_cache; + +/* + * General purpose caches array. + */ +static struct kmem_cache kalloc_caches[KALLOC_NR_CACHES]; + +/* + * List of all caches managed by the allocator. + */ +static struct list kmem_cache_list; +static unsigned int kmem_nr_caches; +static simple_lock_data_t __attribute__((used)) kmem_cache_list_lock; + +/* + * Time of the last memory reclaim, in clock ticks. + */ +static unsigned long kmem_gc_last_tick; + +#define kmem_error(format, ...) \ + panic("mem: error: %s(): " format "\n", __func__, \ + ## __VA_ARGS__) + +#define kmem_warn(format, ...) \ + printf("mem: warning: %s(): " format "\n", __func__, \ + ## __VA_ARGS__) + +#define kmem_print(format, ...) \ + printf(format "\n", ## __VA_ARGS__) + +static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, + void *arg); +static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache); +static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf); + +static void * kmem_buf_verify_bytes(void *buf, void *pattern, size_t size) +{ + char *ptr, *pattern_ptr, *end; + + end = buf + size; + + for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++) + if (*ptr != *pattern_ptr) + return ptr; + + return NULL; +} + +static void * kmem_buf_verify(void *buf, uint64_t pattern, vm_size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) + if (*ptr != pattern) + return kmem_buf_verify_bytes(ptr, &pattern, sizeof(pattern)); + + return NULL; +} + +static void kmem_buf_fill(void *buf, uint64_t pattern, size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) + *ptr = pattern; +} + +static void * kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new, + size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) { + if (*ptr != old) + return kmem_buf_verify_bytes(ptr, &old, sizeof(old)); + + *ptr = new; + } + + return NULL; +} + +static inline union kmem_bufctl * +kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache) +{ + return (union kmem_bufctl *)(buf + cache->bufctl_dist); +} + +static inline struct kmem_buftag * +kmem_buf_to_buftag(void *buf, struct kmem_cache *cache) +{ + return (struct kmem_buftag *)(buf + cache->buftag_dist); +} + +static inline void * kmem_bufctl_to_buf(union kmem_bufctl *bufctl, + struct kmem_cache *cache) +{ + return (void *)bufctl - cache->bufctl_dist; +} + +static vm_offset_t +kmem_pagealloc_physmem(vm_size_t size) +{ + struct vm_page *page; + + assert(size == PAGE_SIZE); + + for (;;) { + page = vm_page_grab(VM_PAGE_DIRECTMAP); + + if (page != NULL) + break; + + VM_PAGE_WAIT(NULL); + } + + return phystokv(vm_page_to_pa(page)); +} + +static void +kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size) +{ + struct vm_page *page; + + assert(size == PAGE_SIZE); + page = vm_page_lookup_pa(kvtophys(addr)); + assert(page != NULL); + vm_page_release(page, FALSE, FALSE); +} + +static vm_offset_t +kmem_pagealloc_virtual(vm_size_t size, vm_size_t align) +{ + vm_offset_t addr; + kern_return_t kr; + + assert(size > PAGE_SIZE); + size = vm_page_round(size); + + if (align <= PAGE_SIZE) + kr = kmem_alloc_wired(kernel_map, &addr, size); + else + kr = kmem_alloc_aligned(kernel_map, &addr, size); + + if (kr != KERN_SUCCESS) + return 0; + + return addr; +} + +static void +kmem_pagefree_virtual(vm_offset_t addr, vm_size_t size) +{ + if (addr < kernel_virtual_start || addr + size > kernel_virtual_end) + panic("kmem_pagefree_virtual(%lx-%lx) falls in physical memory area!\n", + (unsigned long) addr, (unsigned long) addr + size); + assert(size > PAGE_SIZE); + size = vm_page_round(size); + kmem_free(kernel_map, addr, size); +} + +static vm_offset_t +kmem_pagealloc(vm_size_t size, vm_size_t align, int flags) +{ + assert(align <= size); + return (flags & KMEM_CF_PHYSMEM) + ? kmem_pagealloc_physmem(size) + : kmem_pagealloc_virtual(size, align); +} + +static void +kmem_pagefree(vm_offset_t addr, vm_size_t size, int flags) +{ + return (flags & KMEM_CF_PHYSMEM) + ? kmem_pagefree_physmem(addr, size) + : kmem_pagefree_virtual(addr, size); +} + +static void kmem_slab_create_verify(struct kmem_slab *slab, + struct kmem_cache *cache) +{ + struct kmem_buftag *buftag; + size_t buf_size; + unsigned long buffers; + void *buf; + + buf_size = cache->buf_size; + buf = slab->addr; + buftag = kmem_buf_to_buftag(buf, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + buftag->state = KMEM_BUFTAG_FREE; + buf += buf_size; + buftag = kmem_buf_to_buftag(buf, cache); + } +} + +/* + * Create an empty slab for a cache. + * + * The caller must drop all locks before calling this function. + */ +static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache, + size_t color) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + size_t buf_size; + unsigned long buffers; + vm_offset_t slab_buf; + + slab_buf = kmem_pagealloc(cache->slab_size, cache->align, cache->flags); + + if (slab_buf == 0) + return NULL; + + if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { + slab = (struct kmem_slab *)kmem_cache_alloc(&kmem_slab_cache); + + if (slab == NULL) { + kmem_pagefree(slab_buf, cache->slab_size, cache->flags); + return NULL; + } + + if (cache->flags & KMEM_CF_USE_PAGE) { + struct vm_page *page; + + page = vm_page_lookup_pa(kvtophys(slab_buf)); + assert(page != NULL); + vm_page_set_priv(page, slab); + } + } else { + slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1; + } + + slab->cache = cache; + list_node_init(&slab->list_node); + rbtree_node_init(&slab->tree_node); + slab->nr_refs = 0; + slab->first_free = NULL; + slab->addr = (void *)(slab_buf + color); + + buf_size = cache->buf_size; + bufctl = kmem_buf_to_bufctl(slab->addr, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + bufctl->next = slab->first_free; + slab->first_free = bufctl; + bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size); + } + + if (cache->flags & KMEM_CF_VERIFY) + kmem_slab_create_verify(slab, cache); + + return slab; +} + +static void kmem_slab_destroy_verify(struct kmem_slab *slab, + struct kmem_cache *cache) +{ + struct kmem_buftag *buftag; + size_t buf_size; + unsigned long buffers; + void *buf, *addr; + + buf_size = cache->buf_size; + buf = slab->addr; + buftag = kmem_buf_to_buftag(buf, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + if (buftag->state != KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + + addr = kmem_buf_verify(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + + if (addr != NULL) + kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr); + + buf += buf_size; + buftag = kmem_buf_to_buftag(buf, cache); + } +} + +/* + * Destroy a slab. + * + * The caller must drop all locks before calling this function. + */ +static void kmem_slab_destroy(struct kmem_slab *slab, struct kmem_cache *cache) +{ + vm_offset_t slab_buf; + + assert(slab->nr_refs == 0); + assert(slab->first_free != NULL); + + if (cache->flags & KMEM_CF_VERIFY) + kmem_slab_destroy_verify(slab, cache); + + slab_buf = (vm_offset_t)P2ALIGN((unsigned long)slab->addr, PAGE_SIZE); + + if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { + if (cache->flags & KMEM_CF_USE_PAGE) { + struct vm_page *page; + + /* Not strictly needed, but let's increase safety */ + page = vm_page_lookup_pa(kvtophys(slab_buf)); + assert(page != NULL); + vm_page_set_priv(page, NULL); + } + + kmem_cache_free(&kmem_slab_cache, (vm_offset_t)slab); + } + + kmem_pagefree(slab_buf, cache->slab_size, cache->flags); +} + +static inline int kmem_slab_cmp_lookup(const void *addr, + const struct rbtree_node *node) +{ + struct kmem_slab *slab; + + slab = rbtree_entry(node, struct kmem_slab, tree_node); + + if (addr == slab->addr) + return 0; + else if (addr < slab->addr) + return -1; + else + return 1; +} + +static inline int kmem_slab_cmp_insert(const struct rbtree_node *a, + const struct rbtree_node *b) +{ + struct kmem_slab *slab; + + slab = rbtree_entry(a, struct kmem_slab, tree_node); + return kmem_slab_cmp_lookup(slab->addr, b); +} + +#if SLAB_USE_CPU_POOLS +static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + simple_lock_init(&cpu_pool->lock); + cpu_pool->flags = cache->flags; + cpu_pool->size = 0; + cpu_pool->transfer_size = 0; + cpu_pool->nr_objs = 0; + cpu_pool->array = NULL; +} + +/* + * Return a CPU pool. + * + * This function will generally return the pool matching the CPU running the + * calling thread. Because of context switches and thread migration, the + * caller might be running on another processor after this function returns. + * Although not optimal, this should rarely happen, and it doesn't affect the + * allocator operations in any other way, as CPU pools are always valid, and + * their access is serialized by a lock. + */ +static inline struct kmem_cpu_pool * kmem_cpu_pool_get(struct kmem_cache *cache) +{ + return &cache->cpu_pools[cpu_number()]; +} + +static inline void kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache, void **array) +{ + cpu_pool->size = cache->cpu_pool_type->array_size; + cpu_pool->transfer_size = (cpu_pool->size + + KMEM_CPU_POOL_TRANSFER_RATIO - 1) + / KMEM_CPU_POOL_TRANSFER_RATIO; + cpu_pool->array = array; +} + +static inline void * kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool) +{ + cpu_pool->nr_objs--; + return cpu_pool->array[cpu_pool->nr_objs]; +} + +static inline void kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj) +{ + cpu_pool->array[cpu_pool->nr_objs] = obj; + cpu_pool->nr_objs++; +} + +static int kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + kmem_cache_ctor_t ctor; + void *buf; + int i; + + ctor = (cpu_pool->flags & KMEM_CF_VERIFY) ? NULL : cache->ctor; + + simple_lock(&cache->lock); + + for (i = 0; i < cpu_pool->transfer_size; i++) { + buf = kmem_cache_alloc_from_slab(cache); + + if (buf == NULL) + break; + + if (ctor != NULL) + ctor(buf); + + kmem_cpu_pool_push(cpu_pool, buf); + } + + simple_unlock(&cache->lock); + + return i; +} + +static void kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + void *obj; + int i; + + simple_lock(&cache->lock); + + for (i = cpu_pool->transfer_size; i > 0; i--) { + obj = kmem_cpu_pool_pop(cpu_pool); + kmem_cache_free_to_slab(cache, obj); + } + + simple_unlock(&cache->lock); +} +#endif /* SLAB_USE_CPU_POOLS */ + +static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, + void *arg) +{ + struct kmem_buftag *buftag; + + kmem_warn("cache: %s, buffer: %p", cache->name, (void *)buf); + + switch(error) { + case KMEM_ERR_INVALID: + kmem_error("freeing invalid address"); + break; + case KMEM_ERR_DOUBLEFREE: + kmem_error("attempting to free the same address twice"); + break; + case KMEM_ERR_BUFTAG: + buftag = arg; + kmem_error("invalid buftag content, buftag state: %p", + (void *)buftag->state); + break; + case KMEM_ERR_MODIFIED: + kmem_error("free buffer modified, fault address: %p, " + "offset in buffer: %td", arg, arg - buf); + break; + case KMEM_ERR_REDZONE: + kmem_error("write beyond end of buffer, fault address: %p, " + "offset in buffer: %td", arg, arg - buf); + break; + default: + kmem_error("unknown error"); + } + + /* + * Never reached. + */ +} + +/* + * Compute properties such as slab size for the given cache. + * + * Once the slab size is known, this function sets the related properties + * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx + * flags depending on the resulting layout. + */ +static void kmem_cache_compute_properties(struct kmem_cache *cache, int flags) +{ + size_t size, waste; + int embed; + + if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD) + flags |= KMEM_CACHE_NOOFFSLAB; + + cache->slab_size = PAGE_SIZE; + + for (;;) { + if (flags & KMEM_CACHE_NOOFFSLAB) + embed = 1; + else { + waste = cache->slab_size % cache->buf_size; + embed = (sizeof(struct kmem_slab) <= waste); + } + + size = cache->slab_size; + + if (embed) + size -= sizeof(struct kmem_slab); + + if (size >= cache->buf_size) + break; + + cache->slab_size += PAGE_SIZE; + } + + cache->bufs_per_slab = size / cache->buf_size; + cache->color_max = size % cache->buf_size; + + if (cache->color_max >= PAGE_SIZE) + cache->color_max = 0; + + if (!embed) + cache->flags |= KMEM_CF_SLAB_EXTERNAL; + + if ((flags & KMEM_CACHE_PHYSMEM) || (cache->slab_size == PAGE_SIZE)) { + cache->flags |= KMEM_CF_PHYSMEM; + + /* + * Avoid using larger-than-page slabs backed by the direct physical + * mapping to completely prevent physical memory fragmentation from + * making slab allocations fail. + */ + if (cache->slab_size != PAGE_SIZE) + panic("slab: invalid cache parameters"); + } + + if (cache->flags & KMEM_CF_VERIFY) + cache->flags |= KMEM_CF_USE_TREE; + + if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { + if (cache->flags & KMEM_CF_PHYSMEM) + cache->flags |= KMEM_CF_USE_PAGE; + else + cache->flags |= KMEM_CF_USE_TREE; + } else { + if (cache->slab_size == PAGE_SIZE) + cache->flags |= KMEM_CF_DIRECT; + else + cache->flags |= KMEM_CF_USE_TREE; + } +} + +void kmem_cache_init(struct kmem_cache *cache, const char *name, + size_t obj_size, size_t align, + kmem_cache_ctor_t ctor, int flags) +{ +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool_type *cpu_pool_type; + size_t i; +#endif /* SLAB_USE_CPU_POOLS */ + size_t buf_size; + + cache->flags = 0; +#if SLAB_VERIFY + if (obj_size < PAGE_SIZE - sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag)) + cache->flags |= KMEM_CF_VERIFY; +#endif /* SLAB_VERIFY */ + + if (flags & KMEM_CACHE_VERIFY) + cache->flags |= KMEM_CF_VERIFY; + + if (align < KMEM_ALIGN_MIN) + align = KMEM_ALIGN_MIN; + + assert(obj_size > 0); + assert(ISP2(align)); + + buf_size = P2ROUND(obj_size, align); + + simple_lock_init(&cache->lock); + list_node_init(&cache->node); + list_init(&cache->partial_slabs); + list_init(&cache->free_slabs); + rbtree_init(&cache->active_slabs); + cache->obj_size = obj_size; + cache->align = align; + cache->buf_size = buf_size; + cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl); + cache->color = 0; + cache->nr_objs = 0; + cache->nr_bufs = 0; + cache->nr_slabs = 0; + cache->nr_free_slabs = 0; + cache->ctor = ctor; + strncpy(cache->name, name, sizeof(cache->name)); + cache->name[sizeof(cache->name) - 1] = '\0'; + cache->buftag_dist = 0; + cache->redzone_pad = 0; + + if (cache->flags & KMEM_CF_VERIFY) { + cache->bufctl_dist = buf_size; + cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl); + cache->redzone_pad = cache->bufctl_dist - cache->obj_size; + buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag); + buf_size = P2ROUND(buf_size, align); + cache->buf_size = buf_size; + } + + kmem_cache_compute_properties(cache, flags); + +#if SLAB_USE_CPU_POOLS + for (cpu_pool_type = kmem_cpu_pool_types; + buf_size <= cpu_pool_type->buf_size; + cpu_pool_type++); + + cache->cpu_pool_type = cpu_pool_type; + + for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++) + kmem_cpu_pool_init(&cache->cpu_pools[i], cache); +#endif /* SLAB_USE_CPU_POOLS */ + + simple_lock(&kmem_cache_list_lock); + list_insert_tail(&kmem_cache_list, &cache->node); + kmem_nr_caches++; + simple_unlock(&kmem_cache_list_lock); +} + +static inline int kmem_cache_empty(struct kmem_cache *cache) +{ + return cache->nr_objs == cache->nr_bufs; +} + +static int kmem_cache_grow(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + size_t color; + int empty; + + simple_lock(&cache->lock); + + if (!kmem_cache_empty(cache)) { + simple_unlock(&cache->lock); + return 1; + } + + color = cache->color; + cache->color += cache->align; + + if (cache->color > cache->color_max) + cache->color = 0; + + simple_unlock(&cache->lock); + + slab = kmem_slab_create(cache, color); + + simple_lock(&cache->lock); + + if (slab != NULL) { + list_insert_head(&cache->free_slabs, &slab->list_node); + cache->nr_bufs += cache->bufs_per_slab; + cache->nr_slabs++; + cache->nr_free_slabs++; + } + + /* + * Even if our slab creation failed, another thread might have succeeded + * in growing the cache. + */ + empty = kmem_cache_empty(cache); + + simple_unlock(&cache->lock); + + return !empty; +} + +static void kmem_cache_reap(struct kmem_cache *cache, struct list *dead_slabs) +{ + simple_lock(&cache->lock); + + list_concat(dead_slabs, &cache->free_slabs); + list_init(&cache->free_slabs); + cache->nr_bufs -= cache->bufs_per_slab * cache->nr_free_slabs; + cache->nr_slabs -= cache->nr_free_slabs; + cache->nr_free_slabs = 0; + + simple_unlock(&cache->lock); +} + +/* + * Allocate a raw (unconstructed) buffer from the slab layer of a cache. + * + * The cache must be locked before calling this function. + */ +static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + + if (!list_empty(&cache->partial_slabs)) + slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, + list_node); + else if (!list_empty(&cache->free_slabs)) + slab = list_first_entry(&cache->free_slabs, struct kmem_slab, + list_node); + else + return NULL; + + bufctl = slab->first_free; + assert(bufctl != NULL); + slab->first_free = bufctl->next; + slab->nr_refs++; + cache->nr_objs++; + + if (slab->nr_refs == cache->bufs_per_slab) { + /* The slab has become complete */ + list_remove(&slab->list_node); + + if (slab->nr_refs == 1) + cache->nr_free_slabs--; + } else if (slab->nr_refs == 1) { + /* + * The slab has become partial. Insert the new slab at the end of + * the list to reduce fragmentation. + */ + list_remove(&slab->list_node); + list_insert_tail(&cache->partial_slabs, &slab->list_node); + cache->nr_free_slabs--; + } + + if ((slab->nr_refs == 1) && (cache->flags & KMEM_CF_USE_TREE)) + rbtree_insert(&cache->active_slabs, &slab->tree_node, + kmem_slab_cmp_insert); + + return kmem_bufctl_to_buf(bufctl, cache); +} + +/* + * Release a buffer to the slab layer of a cache. + * + * The cache must be locked before calling this function. + */ +static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + + if (cache->flags & KMEM_CF_DIRECT) { + assert(cache->slab_size == PAGE_SIZE); + slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size) + - 1; + } else if (cache->flags & KMEM_CF_USE_PAGE) { + struct vm_page *page; + + page = vm_page_lookup_pa(kvtophys((vm_offset_t)buf)); + assert(page != NULL); + slab = vm_page_get_priv(page); + } else { + struct rbtree_node *node; + + assert(cache->flags & KMEM_CF_USE_TREE); + node = rbtree_lookup_nearest(&cache->active_slabs, buf, + kmem_slab_cmp_lookup, RBTREE_LEFT); + assert(node != NULL); + slab = rbtree_entry(node, struct kmem_slab, tree_node); + } + + assert((unsigned long)buf >= (unsigned long)slab->addr); + assert(((unsigned long)buf + cache->buf_size) + <= vm_page_trunc((unsigned long)slab->addr + cache->slab_size)); + + assert(slab->nr_refs >= 1); + assert(slab->nr_refs <= cache->bufs_per_slab); + bufctl = kmem_buf_to_bufctl(buf, cache); + bufctl->next = slab->first_free; + slab->first_free = bufctl; + slab->nr_refs--; + cache->nr_objs--; + + if (slab->nr_refs == 0) { + /* The slab has become free */ + + if (cache->flags & KMEM_CF_USE_TREE) + rbtree_remove(&cache->active_slabs, &slab->tree_node); + + if (cache->bufs_per_slab > 1) + list_remove(&slab->list_node); + + list_insert_head(&cache->free_slabs, &slab->list_node); + cache->nr_free_slabs++; + } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) { + /* The slab has become partial */ + list_insert_head(&cache->partial_slabs, &slab->list_node); + } +} + +static void kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf, + int construct) +{ + struct kmem_buftag *buftag; + union kmem_bufctl *bufctl; + void *addr; + + buftag = kmem_buf_to_buftag(buf, cache); + + if (buftag->state != KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + + addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN, + cache->bufctl_dist); + + if (addr != NULL) + kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr); + + addr = buf + cache->obj_size; + memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad); + + bufctl = kmem_buf_to_bufctl(buf, cache); + bufctl->redzone = KMEM_REDZONE_WORD; + buftag->state = KMEM_BUFTAG_ALLOC; + + if (construct && (cache->ctor != NULL)) + cache->ctor(buf); +} + +vm_offset_t kmem_cache_alloc(struct kmem_cache *cache) +{ + int filled; + void *buf; + +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool *cpu_pool; + + cpu_pool = kmem_cpu_pool_get(cache); + + if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL) + goto slab_alloc; + + simple_lock(&cpu_pool->lock); + +fast_alloc: + if (likely(cpu_pool->nr_objs > 0)) { + buf = kmem_cpu_pool_pop(cpu_pool); + simple_unlock(&cpu_pool->lock); + + if (cpu_pool->flags & KMEM_CF_VERIFY) + kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT); + + return (vm_offset_t)buf; + } + + if (cpu_pool->array != NULL) { + filled = kmem_cpu_pool_fill(cpu_pool, cache); + + if (!filled) { + simple_unlock(&cpu_pool->lock); + + filled = kmem_cache_grow(cache); + + if (!filled) + return 0; + + simple_lock(&cpu_pool->lock); + } + + goto fast_alloc; + } + + simple_unlock(&cpu_pool->lock); +#endif /* SLAB_USE_CPU_POOLS */ + +slab_alloc: + simple_lock(&cache->lock); + buf = kmem_cache_alloc_from_slab(cache); + simple_unlock(&cache->lock); + + if (buf == NULL) { + filled = kmem_cache_grow(cache); + + if (!filled) + return 0; + + goto slab_alloc; + } + + if (cache->flags & KMEM_CF_VERIFY) + kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT); + + if (cache->ctor != NULL) + cache->ctor(buf); + + return (vm_offset_t)buf; +} + +static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf) +{ + struct rbtree_node *node; + struct kmem_buftag *buftag; + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + unsigned char *redzone_byte; + unsigned long slabend; + + assert(cache->flags & KMEM_CF_USE_TREE); + + simple_lock(&cache->lock); + node = rbtree_lookup_nearest(&cache->active_slabs, buf, + kmem_slab_cmp_lookup, RBTREE_LEFT); + simple_unlock(&cache->lock); + + if (node == NULL) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + slab = rbtree_entry(node, struct kmem_slab, tree_node); + slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE); + + if ((unsigned long)buf >= slabend) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + if ((((unsigned long)buf - (unsigned long)slab->addr) % cache->buf_size) + != 0) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + /* + * As the buffer address is valid, accessing its buftag is safe. + */ + buftag = kmem_buf_to_buftag(buf, cache); + + if (buftag->state != KMEM_BUFTAG_ALLOC) { + if (buftag->state == KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL); + else + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + } + + redzone_byte = buf + cache->obj_size; + bufctl = kmem_buf_to_bufctl(buf, cache); + + while (redzone_byte < (unsigned char *)bufctl) { + if (*redzone_byte != KMEM_REDZONE_BYTE) + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + + redzone_byte++; + } + + if (bufctl->redzone != KMEM_REDZONE_WORD) { + unsigned long word; + + word = KMEM_REDZONE_WORD; + redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word, + sizeof(bufctl->redzone)); + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + } + + kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + buftag->state = KMEM_BUFTAG_FREE; +} + +void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj) +{ +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool *cpu_pool; + void **array; + + cpu_pool = kmem_cpu_pool_get(cache); + + if (cpu_pool->flags & KMEM_CF_VERIFY) { +#else /* SLAB_USE_CPU_POOLS */ + if (cache->flags & KMEM_CF_VERIFY) { +#endif /* SLAB_USE_CPU_POOLS */ + kmem_cache_free_verify(cache, (void *)obj); + } + +#if SLAB_USE_CPU_POOLS + if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL) + goto slab_free; + + simple_lock(&cpu_pool->lock); + +fast_free: + if (likely(cpu_pool->nr_objs < cpu_pool->size)) { + kmem_cpu_pool_push(cpu_pool, (void *)obj); + simple_unlock(&cpu_pool->lock); + return; + } + + if (cpu_pool->array != NULL) { + kmem_cpu_pool_drain(cpu_pool, cache); + goto fast_free; + } + + simple_unlock(&cpu_pool->lock); + + array = (void *)kmem_cache_alloc(cache->cpu_pool_type->array_cache); + + if (array != NULL) { + simple_lock(&cpu_pool->lock); + + /* + * Another thread may have built the CPU pool while the lock was + * dropped. + */ + if (cpu_pool->array != NULL) { + simple_unlock(&cpu_pool->lock); + kmem_cache_free(cache->cpu_pool_type->array_cache, + (vm_offset_t)array); + simple_lock(&cpu_pool->lock); + goto fast_free; + } + + kmem_cpu_pool_build(cpu_pool, cache, array); + goto fast_free; + } + +slab_free: +#endif /* SLAB_USE_CPU_POOLS */ + + simple_lock(&cache->lock); + kmem_cache_free_to_slab(cache, (void *)obj); + simple_unlock(&cache->lock); +} + +void slab_collect(void) +{ + struct kmem_cache *cache; + struct kmem_slab *slab; + struct list dead_slabs; + + if (elapsed_ticks <= (kmem_gc_last_tick + KMEM_GC_INTERVAL)) + return; + + kmem_gc_last_tick = elapsed_ticks; + + list_init(&dead_slabs); + + simple_lock(&kmem_cache_list_lock); + + list_for_each_entry(&kmem_cache_list, cache, node) + kmem_cache_reap(cache, &dead_slabs); + + simple_unlock(&kmem_cache_list_lock); + + while (!list_empty(&dead_slabs)) { + slab = list_first_entry(&dead_slabs, struct kmem_slab, list_node); + list_remove(&slab->list_node); + kmem_slab_destroy(slab, slab->cache); + } +} + +void slab_bootstrap(void) +{ + /* Make sure a bufctl can always be stored in a buffer */ + assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN); + + list_init(&kmem_cache_list); + simple_lock_init(&kmem_cache_list_lock); +} + +void slab_init(void) +{ +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool_type *cpu_pool_type; + char name[KMEM_CACHE_NAME_SIZE]; + size_t i, size; +#endif /* SLAB_USE_CPU_POOLS */ + +#if SLAB_USE_CPU_POOLS + for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) { + cpu_pool_type = &kmem_cpu_pool_types[i]; + cpu_pool_type->array_cache = &kmem_cpu_array_caches[i]; + sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size); + size = sizeof(void *) * cpu_pool_type->array_size; + kmem_cache_init(cpu_pool_type->array_cache, name, size, + cpu_pool_type->array_align, NULL, 0); + } +#endif /* SLAB_USE_CPU_POOLS */ + + /* + * Prevent off slab data for the slab cache to avoid infinite recursion. + */ + kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab), + 0, NULL, KMEM_CACHE_NOOFFSLAB); +} + +void kalloc_init(void) +{ + char name[KMEM_CACHE_NAME_SIZE]; + size_t i, size; + + size = 1 << KALLOC_FIRST_SHIFT; + + for (i = 0; i < ARRAY_SIZE(kalloc_caches); i++) { + sprintf(name, "kalloc_%lu", size); + kmem_cache_init(&kalloc_caches[i], name, size, 0, NULL, 0); + size <<= 1; + } +} + +/* + * Return the kalloc cache index matching the given allocation size, which + * must be strictly greater than 0. + */ +static inline size_t kalloc_get_index(unsigned long size) +{ + assert(size != 0); + + size = (size - 1) >> KALLOC_FIRST_SHIFT; + + if (size == 0) + return 0; + else + return (sizeof(long) * 8) - __builtin_clzl(size); +} + +static void kalloc_verify(struct kmem_cache *cache, void *buf, size_t size) +{ + size_t redzone_size; + void *redzone; + + assert(size <= cache->obj_size); + + redzone = buf + size; + redzone_size = cache->obj_size - size; + memset(redzone, KMEM_REDZONE_BYTE, redzone_size); +} + +vm_offset_t kalloc(vm_size_t size) +{ + size_t index; + void *buf; + + if (size == 0) + return 0; + + index = kalloc_get_index(size); + + if (index < ARRAY_SIZE(kalloc_caches)) { + struct kmem_cache *cache; + + cache = &kalloc_caches[index]; + buf = (void *)kmem_cache_alloc(cache); + + if ((buf != 0) && (cache->flags & KMEM_CF_VERIFY)) + kalloc_verify(cache, buf, size); + } else if (size <= PAGE_SIZE) { + buf = (void *)kmem_pagealloc_physmem(PAGE_SIZE); + } else { + buf = (void *)kmem_pagealloc_virtual(size, 0); + } + + return (vm_offset_t)buf; +} + +static void kfree_verify(struct kmem_cache *cache, void *buf, size_t size) +{ + unsigned char *redzone_byte, *redzone_end; + + assert(size <= cache->obj_size); + + redzone_byte = buf + size; + redzone_end = buf + cache->obj_size; + + while (redzone_byte < redzone_end) { + if (*redzone_byte != KMEM_REDZONE_BYTE) + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + + redzone_byte++; + } +} + +void kfree(vm_offset_t data, vm_size_t size) +{ + size_t index; + + if ((data == 0) || (size == 0)) + return; + + index = kalloc_get_index(size); + + if (index < ARRAY_SIZE(kalloc_caches)) { + struct kmem_cache *cache; + + cache = &kalloc_caches[index]; + + if (cache->flags & KMEM_CF_VERIFY) + kfree_verify(cache, (void *)data, size); + + kmem_cache_free(cache, data); + } else if (size <= PAGE_SIZE) { + kmem_pagefree_physmem(data, PAGE_SIZE); + } else { + kmem_pagefree_virtual(data, size); + } +} + +static void _slab_info(int (printx)(const char *fmt, ...)) +{ + struct kmem_cache *cache; + vm_size_t mem_usage, mem_reclaimable, mem_total, mem_total_reclaimable; + + mem_total = 0; + mem_total_reclaimable = 0; + + printx("cache obj slab bufs objs bufs" + " total reclaimable\n" + "name flags size size /slab usage count" + " memory memory\n"); + + simple_lock(&kmem_cache_list_lock); + + list_for_each_entry(&kmem_cache_list, cache, node) { + simple_lock(&cache->lock); + + mem_usage = (cache->nr_slabs * cache->slab_size) >> 10; + mem_reclaimable = (cache->nr_free_slabs * cache->slab_size) >> 10; + + printx("%-20s %04x %7lu %3luk %4lu %6lu %6lu %7uk %10uk\n", + cache->name, cache->flags, cache->obj_size, + cache->slab_size >> 10, + cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs, + mem_usage, mem_reclaimable); + + simple_unlock(&cache->lock); + + mem_total += mem_usage; + mem_total_reclaimable += mem_reclaimable; + } + + simple_unlock(&kmem_cache_list_lock); + + printx("total: %uk, reclaimable: %uk\n", + mem_total, mem_total_reclaimable); +} + +void slab_info(void) +{ + _slab_info(printf); +} + +#if MACH_KDB +#include <ddb/db_output.h> + +void db_show_slab_info(void) +{ + _slab_info(db_printf); +} + +void db_whatis_slab(vm_offset_t a) +{ + struct kmem_cache *cache; + int done = 0; + +#ifndef SLAB_VERIFY + db_printf("enabling SLAB_VERIFY is recommended\n"); +#endif + + simple_lock(&kmem_cache_list_lock); + + list_for_each_entry(&kmem_cache_list, cache, node) { + if (a >= (vm_offset_t) cache + && a < (vm_offset_t) cache + sizeof(*cache)) + db_printf("Cache %s\n", cache->name); + + simple_lock(&cache->lock); + + if (cache->flags & KMEM_CF_USE_TREE) { + struct rbtree_node *node; + + node = rbtree_lookup_nearest(&cache->active_slabs, (void*) a, + kmem_slab_cmp_lookup, RBTREE_LEFT); + if (node) { + struct kmem_slab *slab; + slab = rbtree_entry(node, struct kmem_slab, tree_node); + if (a >= (vm_offset_t) slab->addr + && a < (vm_offset_t) slab->addr + cache->slab_size) { + db_printf("Allocated from cache %s\n", cache->name); + done = 1; + goto out_cache; + } + } + } + + union kmem_bufctl *free; + struct kmem_slab *slab; + + list_for_each_entry(&cache->partial_slabs, slab, list_node) { + if (a >= (vm_offset_t) slab->addr + && a < (vm_offset_t) slab->addr + cache->slab_size) { + db_printf("In cache %s\n", cache->name); + + for (free = slab->first_free; free; free = free->next) { + void *buf = kmem_bufctl_to_buf(free, cache); + + if (a >= (vm_offset_t) buf + && a < (vm_offset_t) buf + cache->buf_size) { + db_printf(" In free list\n"); + break; + } + } + + done = 1; + goto out_cache; + } + } + + list_for_each_entry(&cache->free_slabs, slab, list_node) { + if (a >= (vm_offset_t) slab->addr + && a < (vm_offset_t) slab->addr + cache->slab_size) { + db_printf("In cache %s\n", cache->name); + + for (free = slab->first_free; free; free = free->next) { + void *buf = kmem_bufctl_to_buf(free, cache); + + if (a >= (vm_offset_t) buf + && a < (vm_offset_t) buf + cache->buf_size) { + db_printf(" In free list\n"); + break; + } + } + + done = 1; + goto out_cache; + } + } + +out_cache: + simple_unlock(&cache->lock); + if (done) + goto out; + } + +out: + simple_unlock(&kmem_cache_list_lock); +} + +#endif /* MACH_KDB */ + +#if MACH_DEBUG +kern_return_t host_slab_info(host_t host, cache_info_array_t *infop, + unsigned int *infoCntp) +{ + struct kmem_cache *cache; + cache_info_t *info; + unsigned int i, nr_caches; + vm_size_t info_size; + kern_return_t kr; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + /* Assume the cache list is mostly unaltered once the kernel is ready */ + +retry: + /* Harmless unsynchronized access, real value checked later */ + nr_caches = kmem_nr_caches; + info_size = nr_caches * sizeof(*info); + info = (cache_info_t *)kalloc(info_size); + + if (info == NULL) + return KERN_RESOURCE_SHORTAGE; + + i = 0; + + simple_lock(&kmem_cache_list_lock); + + if (nr_caches != kmem_nr_caches) { + simple_unlock(&kmem_cache_list_lock); + kfree((vm_offset_t)info, info_size); + goto retry; + } + + list_for_each_entry(&kmem_cache_list, cache, node) { + simple_lock(&cache->lock); + info[i].flags = cache->flags; +#if SLAB_USE_CPU_POOLS + info[i].cpu_pool_size = cache->cpu_pool_type->array_size; +#else /* SLAB_USE_CPU_POOLS */ + info[i].cpu_pool_size = 0; +#endif /* SLAB_USE_CPU_POOLS */ + info[i].obj_size = cache->obj_size; + info[i].align = cache->align; + info[i].buf_size = cache->buf_size; + info[i].slab_size = cache->slab_size; + info[i].bufs_per_slab = cache->bufs_per_slab; + info[i].nr_objs = cache->nr_objs; + info[i].nr_bufs = cache->nr_bufs; + info[i].nr_slabs = cache->nr_slabs; + info[i].nr_free_slabs = cache->nr_free_slabs; + strncpy(info[i].name, cache->name, sizeof(info[i].name)); + info[i].name[sizeof(info[i].name) - 1] = '\0'; + simple_unlock(&cache->lock); + + i++; + } + + simple_unlock(&kmem_cache_list_lock); + + if (nr_caches <= *infoCntp) { + memcpy(*infop, info, info_size); + } else { + vm_offset_t info_addr; + vm_size_t total_size; + vm_map_copy_t copy; + + kr = kmem_alloc_pageable(ipc_kernel_map, &info_addr, info_size); + + if (kr != KERN_SUCCESS) + goto out; + + memcpy((char *)info_addr, info, info_size); + total_size = round_page(info_size); + + if (info_size < total_size) + memset((char *)(info_addr + info_size), + 0, total_size - info_size); + + kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, TRUE, ©); + assert(kr == KERN_SUCCESS); + *infop = (cache_info_t *)copy; + } + + *infoCntp = nr_caches; + kr = KERN_SUCCESS; + +out: + kfree((vm_offset_t)info, info_size); + + return kr; +} +#endif /* MACH_DEBUG */ diff --git a/kern/slab.h b/kern/slab.h new file mode 100644 index 0000000..4d51755 --- /dev/null +++ b/kern/slab.h @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2011 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Copyright (c) 2010, 2011 Richard Braun. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Object caching memory allocator. + */ + +#ifndef _KERN_SLAB_H +#define _KERN_SLAB_H + +#include <cache.h> +#include <kern/cpu_number.h> +#include <kern/lock.h> +#include <kern/list.h> +#include <kern/rbtree.h> +#include <mach/machine/vm_types.h> +#include <sys/types.h> +#include <vm/vm_types.h> + +struct kmem_cache; + +#if SLAB_USE_CPU_POOLS + +/* + * Per-processor cache of pre-constructed objects. + * + * The flags member is a read-only CPU-local copy of the parent cache flags. + */ +struct kmem_cpu_pool { + simple_lock_data_t lock; + int flags; + int size; + int transfer_size; + int nr_objs; + void **array; +} __attribute__((aligned(CPU_L1_SIZE))); + +/* + * When a cache is created, its CPU pool type is determined from the buffer + * size. For small buffer sizes, many objects can be cached in a CPU pool. + * Conversely, for large buffer sizes, this would incur much overhead, so only + * a few objects are stored in a CPU pool. + */ +struct kmem_cpu_pool_type { + size_t buf_size; + int array_size; + size_t array_align; + struct kmem_cache *array_cache; +}; +#endif /* SLAB_USE_CPU_POOLS */ + +/* + * Buffer descriptor. + * + * For normal caches (i.e. without SLAB_CF_VERIFY), bufctls are located at the + * end of (but inside) each buffer. If SLAB_CF_VERIFY is set, bufctls are + * located after each buffer. + * + * When an object is allocated to a client, its bufctl isn't used. This memory + * is instead used for redzoning if cache debugging is in effect. + */ +union kmem_bufctl { + union kmem_bufctl *next; + unsigned long redzone; +}; + +/* + * Buffer tag. + * + * This structure is only used for SLAB_CF_VERIFY caches. It is located after + * the bufctl and includes information about the state of the buffer it + * describes (allocated or not). It should be thought of as a debugging + * extension of the bufctl. + */ +struct kmem_buftag { + unsigned long state; +}; + +/* + * Page-aligned collection of unconstructed buffers. + */ +struct kmem_slab { + struct kmem_cache *cache; + struct list list_node; + struct rbtree_node tree_node; + unsigned long nr_refs; + union kmem_bufctl *first_free; + void *addr; +}; + +/* + * Type for constructor functions. + * + * The pre-constructed state of an object is supposed to include only + * elements such as e.g. linked lists, locks, reference counters. Therefore + * constructors are expected to 1) never fail and 2) not need any + * user-provided data. The first constraint implies that object construction + * never performs dynamic resource allocation, which also means there is no + * need for destructors. + */ +typedef void (*kmem_cache_ctor_t)(void *obj); + +/* + * Cache name buffer size. The size is chosen so that struct + * kmem_cache fits into two cache lines. The size of a cache line on + * a typical CPU is 64 bytes. + */ +#define KMEM_CACHE_NAME_SIZE 24 + +/* + * Cache of objects. + * + * Locking order : cpu_pool -> cache. CPU pools locking is ordered by CPU ID. + * + * Currently, SLAB_USE_CPU_POOLS is not defined. KMEM_CACHE_NAME_SIZE + * is chosen so that the struct fits into two cache lines. The first + * cache line contains all hot fields. + */ +struct kmem_cache { +#if SLAB_USE_CPU_POOLS + /* CPU pool layer */ + struct kmem_cpu_pool cpu_pools[NCPUS]; + struct kmem_cpu_pool_type *cpu_pool_type; +#endif /* SLAB_USE_CPU_POOLS */ + + /* Slab layer */ + simple_lock_data_t lock; + struct list node; /* Cache list linkage */ + struct list partial_slabs; + struct list free_slabs; + struct rbtree active_slabs; + int flags; + size_t bufctl_dist; /* Distance from buffer to bufctl */ + size_t slab_size; + long_natural_t bufs_per_slab; + long_natural_t nr_objs; /* Number of allocated objects */ + long_natural_t nr_free_slabs; + kmem_cache_ctor_t ctor; + /* All fields below are cold */ + size_t obj_size; /* User-provided size */ + /* Assuming ! SLAB_USE_CPU_POOLS, here is the cacheline boundary */ + size_t align; + size_t buf_size; /* Aligned object size */ + size_t color; + size_t color_max; + long_natural_t nr_bufs; /* Total number of buffers */ + long_natural_t nr_slabs; + char name[KMEM_CACHE_NAME_SIZE]; + size_t buftag_dist; /* Distance from buffer to buftag */ + size_t redzone_pad; /* Bytes from end of object to redzone word */ +} __cacheline_aligned; + +/* + * Mach-style declarations for struct kmem_cache. + */ +typedef struct kmem_cache *kmem_cache_t; +#define KMEM_CACHE_NULL ((kmem_cache_t) 0) + +/* + * Cache initialization flags. + */ +#define KMEM_CACHE_NOOFFSLAB 0x1 /* Don't allocate external slab data */ +#define KMEM_CACHE_PHYSMEM 0x2 /* Allocate from physical memory */ +#define KMEM_CACHE_VERIFY 0x4 /* Use debugging facilities */ + +/* + * Initialize a cache. + */ +void kmem_cache_init(struct kmem_cache *cache, const char *name, + size_t obj_size, size_t align, + kmem_cache_ctor_t ctor, int flags); + +/* + * Allocate an object from a cache. + */ +vm_offset_t kmem_cache_alloc(struct kmem_cache *cache); + +/* + * Release an object to its cache. + */ +void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj); + +/* + * Initialize the memory allocator module. + */ +void slab_bootstrap(void); +void slab_init(void); + +/* + * Release free slabs to the VM system. + */ +void slab_collect(void); + +/* + * Display a summary of all kernel caches. + */ +void slab_info(void); + +#if MACH_KDB +void db_show_slab_info(void); +void db_whatis_slab(vm_offset_t addr); +#endif /* MACH_KDB */ + +#endif /* _KERN_SLAB_H */ diff --git a/kern/smp.c b/kern/smp.c new file mode 100644 index 0000000..295f703 --- /dev/null +++ b/kern/smp.c @@ -0,0 +1,49 @@ +/* smp.c - Template for generic SMP controller for Mach. + Copyright (C) 2020 Free Software Foundation, Inc. + Written by Almudena Garcia Jurado-Centurion + + This file is part of GNU Mach. + + GNU Mach is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU Mach is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <kern/smp.h> +#include <machine/smp.h> +#include <stdint.h> + +struct smp_data { + uint8_t num_cpus; +} smp_info; + +/* + * smp_set_numcpus: initialize the number of cpus in smp_info structure + */ + +void smp_set_numcpus(uint8_t numcpus) +{ + smp_info.num_cpus = numcpus; +} + +/* + * smp_get_numcpus: returns the number of cpus existing in the machine + */ +uint8_t smp_get_numcpus(void) +{ + uint8_t numcpus = smp_info.num_cpus; + + if (numcpus == 0) + return 1; /* Although SMP doesn't find cpus, always there are at least one. */ + else + return numcpus; +} diff --git a/kern/smp.h b/kern/smp.h new file mode 100644 index 0000000..44e96f3 --- /dev/null +++ b/kern/smp.h @@ -0,0 +1,24 @@ +/* smp.h - Template for generic SMP controller for Mach. Header file + Copyright (C) 2020 Free Software Foundation, Inc. + Written by Almudena Garcia Jurado-Centurion + + This file is part of GNU Mach. + + GNU Mach is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU Mach is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <stdint.h> + +void smp_set_numcpus(uint8_t numcpus); +uint8_t smp_get_numcpus(void); diff --git a/kern/startup.c b/kern/startup.c new file mode 100644 index 0000000..e72cf6f --- /dev/null +++ b/kern/startup.c @@ -0,0 +1,316 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Mach kernel startup. + */ + +#include <string.h> + +#include <mach/boolean.h> +#include <mach/machine.h> +#include <mach/task_special_ports.h> +#include <mach/vm_param.h> +#include <ipc/ipc_init.h> +#include <kern/cpu_number.h> +#include <kern/debug.h> +#include <kern/gsync.h> +#include <kern/machine.h> +#include <kern/mach_factor.h> +#include <kern/mach_clock.h> +#include <kern/processor.h> +#include <kern/rdxtree.h> +#include <kern/sched_prim.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <kern/timer.h> +#include <kern/xpr.h> +#include <kern/bootstrap.h> +#include <kern/startup.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_init.h> +#include <vm/vm_pageout.h> +#include <machine/machspl.h> +#include <machine/pcb.h> +#include <machine/pmap.h> +#include <machine/model_dep.h> +#include <mach/version.h> +#include <device/device_init.h> +#include <device/intr.h> + +#if MACH_KDB +#include <device/cons.h> +#endif /* MACH_KDB */ + +#if ! MACH_KBD +boolean_t reboot_on_panic = TRUE; +#endif + +#if NCPUS > 1 +#include <machine/mp_desc.h> +#include <kern/smp.h> +#include <kern/machine.h> +#endif /* NCPUS > 1 */ + +/* XX */ +extern char *kernel_cmdline; + +/* + * Running in virtual memory, on the interrupt stack. + * Does not return. Dispatches initial thread. + * + * Assumes that master_cpu is set. + */ +void setup_main(void) +{ + thread_t startup_thread; + phys_addr_t memsize; + +#if MACH_KDB + /* + * Cause a breakpoint trap to the debugger before proceeding + * any further if the proper option flag was specified + * on the kernel's command line. + * XXX check for surrounding spaces. + */ + if (strstr(kernel_cmdline, "-d ")) { + cninit(); /* need console for debugger */ + SoftDebugger("init"); + } +#else /* MACH_KDB */ + if (strstr (kernel_cmdline, "-H ")) { + reboot_on_panic = FALSE; + } +#endif /* MACH_KDB */ + + panic_init(); + + sched_init(); + vm_mem_bootstrap(); + rdxtree_cache_init(); + ipc_bootstrap(); + vm_mem_init(); + ipc_init(); + + /* + * As soon as the virtual memory system is up, we record + * that this CPU is using the kernel pmap. + */ + PMAP_ACTIVATE_KERNEL(master_cpu); + + init_timers(); + init_timeout(); + +#if XPR_DEBUG + xprbootstrap(); +#endif /* XPR_DEBUG */ + + machine_init(); + + mapable_time_init(); + + machine_info.max_cpus = NCPUS; + memsize = vm_page_mem_size(); + machine_info.memory_size = memsize; + if (machine_info.memory_size < memsize) + /* Overflow, report at least 4GB */ + machine_info.memory_size = ~0; + machine_info.avail_cpus = 0; + machine_info.major_version = KERNEL_MAJOR_VERSION; + machine_info.minor_version = KERNEL_MINOR_VERSION; + + /* + * Initialize the IPC, task, and thread subsystems. + */ + task_init(); + thread_init(); + swapper_init(); +#if MACH_HOST + pset_sys_init(); +#endif /* MACH_HOST */ + + /* + * Kick off the time-out driven routines by calling + * them the first time. + */ + recompute_priorities(NULL); + compute_mach_factor(); + + gsync_setup (); + + /* + * Create a kernel thread to start the other kernel + * threads. Thread_resume (from kernel_thread) calls + * thread_setrun, which may look at current thread; + * we must avoid this, since there is no current thread. + */ + + /* + * Create the thread, and point it at the routine. + */ + (void) thread_create(kernel_task, &startup_thread); + thread_start(startup_thread, start_kernel_threads); + + /* + * Give it a kernel stack. + */ + thread_doswapin(startup_thread); + + /* + * Pretend it is already running, and resume it. + * Since it looks as if it is running, thread_resume + * will not try to put it on the run queues. + * + * We can do all of this without locking, because nothing + * else is running yet. + */ + startup_thread->state |= TH_RUN; + (void) thread_resume(startup_thread); + + /* + * Start the thread. + */ + cpu_launch_first_thread(startup_thread); + /*NOTREACHED*/ +} + +/* + * Now running in a thread. Create the rest of the kernel threads + * and the bootstrap task. + */ +void start_kernel_threads(void) +{ + int i; + + /* + * Create the idle threads and the other + * service threads. + */ + for (i = 0; i < NCPUS; i++) { + if (machine_slot[i].is_cpu) { + thread_t th; + + (void) thread_create(kernel_task, &th); + thread_bind(th, cpu_to_processor(i)); + thread_start(th, idle_thread); + thread_doswapin(th); + (void) thread_resume(th); + } + } + + (void) kernel_thread(kernel_task, reaper_thread, (char *) 0); + (void) kernel_thread(kernel_task, swapin_thread, (char *) 0); + (void) kernel_thread(kernel_task, sched_thread, (char *) 0); +#ifndef MACH_XEN + (void) kernel_thread(kernel_task, intr_thread, (char *)0); +#endif /* MACH_XEN */ + +#if NCPUS > 1 + /* + * Create the shutdown thread. + */ + (void) kernel_thread(kernel_task, action_thread, (char *) 0); + + /* + * Allow other CPUs to run. + */ + start_other_cpus(); +#endif /* NCPUS > 1 */ + + /* + * Create the device service. + */ + device_service_create(); + + /* + * Initialize kernel task's creation time. + * When we created the kernel task in task_init, the mapped + * time was not yet available. Now, last thing before starting + * the user bootstrap, record the current time as the kernel + * task's creation time. + */ + record_time_stamp (&kernel_task->creation_time); + + /* + * Start the user bootstrap. + */ + bootstrap_create(); + +#if XPR_DEBUG + xprinit(); /* XXX */ +#endif /* XPR_DEBUG */ + + /* + * Become the pageout daemon. + */ + (void) spl0(); + vm_pageout(); + /*NOTREACHED*/ +} + +/* + * Start up the first thread on a CPU. + * First thread is specified for the master CPU. + */ +void cpu_launch_first_thread(thread_t th) +{ + int mycpu; + + mycpu = cpu_number(); + + cpu_up(mycpu); + + start_timer(&kernel_timer[mycpu]); + + /* + * Block all interrupts for choose_thread. + */ + (void) splhigh(); + + if (th == THREAD_NULL) + th = choose_thread(cpu_to_processor(mycpu)); + if (th == THREAD_NULL) + panic("cpu_launch_first_thread"); + + PMAP_ACTIVATE_KERNEL(mycpu); + + percpu_assign(active_thread, th); + percpu_assign(active_stack, th->kernel_stack); + thread_lock(th); + th->state &= ~TH_UNINT; + thread_unlock(th); + timer_switch(&th->system_timer); + + PMAP_ACTIVATE_USER(vm_map_pmap(th->task->map), th, mycpu); + + startrtclock(); /* needs an active thread */ + + load_context(th); + /*NOTREACHED*/ +} diff --git a/kern/startup.h b/kern/startup.h new file mode 100644 index 0000000..d924d15 --- /dev/null +++ b/kern/startup.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2013 Free Software Foundation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_STARTUP_H_ +#define _KERN_STARTUP_H_ + +#include <kern/thread.h> + +extern void setup_main(void); +void cpu_launch_first_thread(thread_t th); +void start_kernel_threads(void); + +#endif /* _KERN_STARTUP_H_ */ diff --git a/kern/strings.c b/kern/strings.c new file mode 100644 index 0000000..7e7fda0 --- /dev/null +++ b/kern/strings.c @@ -0,0 +1,275 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: strings.c + * Author: Robert V. Baron, Carnegie Mellon University + * Date: ??/92 + * + * String functions. + */ + +#include <string.h> + +#ifdef strcpy +#undef strcmp +#undef strncmp +#undef strcpy +#undef strncpy +#undef strlen +#endif + +/* + * Abstract: + * strcmp (s1, s2) compares the strings "s1" and "s2". + * It returns 0 if the strings are identical. It returns + * > 0 if the first character that differs in the two strings + * is larger in s1 than in s2 or if s1 is longer than s2 and + * the contents are identical up to the length of s2. + * It returns < 0 if the first differing character is smaller + * in s1 than in s2 or if s1 is shorter than s2 and the + * contents are identical up to the length of s1. + */ + +int __attribute__ ((pure)) +strcmp( + const char *s1, + const char *s2) +{ + unsigned int a, b; + + do { + a = *s1++; + b = *s2++; + if (a != b) + return a-b; /* includes case when + 'a' is zero and 'b' is not zero + or vice versa */ + } while (a != '\0'); + + return 0; /* both are zero */ +} + + +/* + * Abstract: + * strncmp (s1, s2, n) compares the strings "s1" and "s2" + * in exactly the same way as strcmp does. Except the + * comparison runs for at most "n" characters. + */ + +int __attribute__ ((pure)) +strncmp( + const char *s1, + const char *s2, + size_t n) +{ + unsigned int a, b; + + while (n != 0) { + a = *s1++; + b = *s2++; + if (a != b) + return a-b; /* includes case when + 'a' is zero and 'b' is not zero + or vice versa */ + if (a == '\0') + return 0; /* both are zero */ + n--; + } + + return 0; +} + + +/* + * Abstract: + * strcpy copies the contents of the string "from" including + * the null terminator to the string "to". A pointer to "to" + * is returned. + */ + +char * +strcpy( + char *to, + const char *from) +{ + char *ret = to; + + while ((*to++ = *from++) != '\0') + continue; + + return ret; +} + +/* + * Abstract: + * strncpy copies "count" characters from the "from" string to + * the "to" string. If "from" contains less than "count" characters + * "to" will be padded with null characters until exactly "count" + * characters have been written. The return value is a pointer + * to the "to" string. + */ + +char * +strncpy( + char *to, + const char *from, + size_t count) +{ + char *ret = to; + + while (count != 0) { + count--; + if ((*to++ = *from++) == '\0') + break; + } + + while (count != 0) { + *to++ = '\0'; + count--; + } + + return ret; +} + +/* + * Abstract: + * strlen returns the number of characters in "string" preceding + * the terminating null character. + */ + +size_t __attribute__ ((pure)) +strlen( + const char *string) +{ + const char *ret = string; + + while (*string++ != '\0') + continue; + + return string - 1 - ret; +} + +/* + * Abstract: + * strchr returns a pointer to the first occurrence of the character + * "c" in the string "s". If "c" is not found, return NULL. + */ +char * +strchr( + const char *s, + int c) +{ + while (*s != c) { + if (*s == '\0') { + return NULL; + } + + s++; + } + + return (char *)s; +} + +/* + * Abstract: + * strsep extracts tokens from strings. If "*sp" is NULL, return NULL + * and do nothing. Otherwise, find the first token in string "*sp". + * Tokens are delimited by characters in the string "delim". If no + * delimiter is found, the token is the entire string "*sp", and "*sp" + * is made NULL. Otherwise, overwrite the delimiter with a null byte, + * and make "*sp" point past it. + */ +char * +strsep( + char **sp, + const char *delim) +{ + const char *d; + char *s, *t; + + s = t = *sp; + + if (s == NULL) { + return NULL; + } + + for (;;) { + if (*s == '\0') { + *sp = NULL; + return t; + } + + d = delim; + + for (;;) { + if (*d == '\0') { + break; + } + + if (*d == *s) { + *s = '\0'; + *sp = s + 1; + return t; + } + + d++; + } + + s++; + } +} + +/* + * Abstract: + * strstr returns a pointer to the first occurrence of the substring + * "find" in the string "s". If no substring was found, return NULL. + */ +char * +strstr( + const char *s, + const char *find) +{ + size_t len; + + len = strlen(find); + + if (len == 0) { + return (char *)s; + } + + for (;;) { + if (*s == '\0') { + return NULL; + } + + if (strncmp(s, find, len) == 0) { + return (char *)s; + } + + s++; + } +} diff --git a/kern/syscall_emulation.c b/kern/syscall_emulation.c new file mode 100644 index 0000000..620c235 --- /dev/null +++ b/kern/syscall_emulation.c @@ -0,0 +1,453 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <string.h> + +#include <mach/error.h> +#include <mach/vm_param.h> +#include <kern/syscall_emulation.h> +#include <kern/task.h> +#include <kern/kalloc.h> +#include <kern/mach.server.h> +#include <vm/vm_kern.h> + +/* XXX */ +#define syscall_emulation_sync(task) + + + +/* + * WARNING: + * This code knows that kalloc() allocates memory most efficiently + * in sizes that are powers of 2, and asks for those sizes. + */ + +/* + * Go from number of entries to size of struct eml_dispatch and back. + */ +#define base_size (sizeof(struct eml_dispatch) - sizeof(eml_routine_t)) +#define count_to_size(count) \ + (base_size + sizeof(vm_offset_t) * (count)) + +#define size_to_count(size) \ + ( ((size) - base_size) / sizeof(vm_offset_t) ) + +/* + * eml_init: initialize user space emulation code + */ +void eml_init(void) +{ +} + +/* + * eml_task_reference() [Exported] + * + * Bumps the reference count on the common emulation + * vector. + */ + +void eml_task_reference( + task_t task, + task_t parent) +{ + eml_dispatch_t eml; + + if (parent == TASK_NULL) + eml = EML_DISPATCH_NULL; + else + eml = parent->eml_dispatch; + + if (eml != EML_DISPATCH_NULL) { + simple_lock(&eml->lock); + eml->ref_count++; + simple_unlock(&eml->lock); + } + task->eml_dispatch = eml; +} + + +/* + * eml_task_deallocate() [Exported] + * + * Cleans up after the emulation code when a process exits. + */ + +void eml_task_deallocate(const task_t task) +{ + eml_dispatch_t eml; + + eml = task->eml_dispatch; + if (eml != EML_DISPATCH_NULL) { + int count; + + simple_lock(&eml->lock); + count = --eml->ref_count; + simple_unlock(&eml->lock); + + if (count == 0) + kfree((vm_offset_t)eml, count_to_size(eml->disp_count)); + } +} + +/* + * task_set_emulation_vector: [Server Entry] + * set a list of emulated system calls for this task. + */ +static kern_return_t +task_set_emulation_vector_internal( + task_t task, + int vector_start, + emulation_vector_t emulation_vector, + unsigned int emulation_vector_count) +{ + eml_dispatch_t cur_eml, new_eml, old_eml; + vm_size_t new_size; + int cur_start, cur_end; + int new_start = 0, new_end = 0; + int vector_end; + + if (task == TASK_NULL) + return EML_BAD_TASK; + + vector_end = vector_start + emulation_vector_count; + + /* + * We try to re-use the existing emulation vector + * if possible. We can reuse the vector if it + * is not shared with another task and if it is + * large enough to contain the entries we are + * supplying. + * + * We must grab the lock on the task to check whether + * there is an emulation vector. + * If the vector is shared or not large enough, we + * need to drop the lock and allocate a new emulation + * vector. + * + * While the lock is dropped, the emulation vector + * may be released by all other tasks (giving us + * exclusive use), or may be enlarged by another + * task_set_emulation_vector call. Therefore, + * after allocating the new emulation vector, we + * must grab the lock again to check whether we + * really need the new vector we just allocated. + * + * Since an emulation vector cannot be altered + * if it is in use by more than one task, the + * task lock is sufficient to protect the vector`s + * start, count, and contents. The lock in the + * vector protects only the reference count. + */ + + old_eml = EML_DISPATCH_NULL; /* vector to discard */ + new_eml = EML_DISPATCH_NULL; /* new vector */ + + for (;;) { + /* + * Find the current emulation vector. + * See whether we can overwrite it. + */ + task_lock(task); + cur_eml = task->eml_dispatch; + if (cur_eml != EML_DISPATCH_NULL) { + cur_start = cur_eml->disp_min; + cur_end = cur_eml->disp_count + cur_start; + + simple_lock(&cur_eml->lock); + if (cur_eml->ref_count == 1 && + cur_start <= vector_start && + cur_end >= vector_end) + { + /* + * Can use the existing emulation vector. + * Discard any new one we allocated. + */ + simple_unlock(&cur_eml->lock); + old_eml = new_eml; + break; + } + + if (new_eml != EML_DISPATCH_NULL && + new_start <= cur_start && + new_end >= cur_end) + { + /* + * A new vector was allocated, and it is large enough + * to hold all the entries from the current vector. + * Copy the entries to the new emulation vector, + * deallocate the current one, and use the new one. + */ + memcpy(&new_eml->disp_vector[cur_start-new_start], + &cur_eml->disp_vector[0], + cur_eml->disp_count * sizeof(vm_offset_t)); + + if (--cur_eml->ref_count == 0) + old_eml = cur_eml; /* discard old vector */ + simple_unlock(&cur_eml->lock); + + task->eml_dispatch = new_eml; + syscall_emulation_sync(task); + cur_eml = new_eml; + break; + } + simple_unlock(&cur_eml->lock); + + /* + * Need a new emulation vector. + * Ensure it will hold all the entries from + * both the old and new emulation vectors. + */ + new_start = vector_start; + if (new_start > cur_start) + new_start = cur_start; + new_end = vector_end; + if (new_end < cur_end) + new_end = cur_end; + } + else { + /* + * There is no current emulation vector. + * If a new one was allocated, use it. + */ + if (new_eml != EML_DISPATCH_NULL) { + task->eml_dispatch = new_eml; + cur_eml = new_eml; + break; + } + + /* + * Compute the size needed for the new vector. + */ + new_start = vector_start; + new_end = vector_end; + } + + /* + * Have no vector (or one that is no longer large enough). + * Drop all the locks and allocate a new vector. + * Repeat the loop to check whether the old vector was + * changed while we didn`t hold the locks. + */ + + task_unlock(task); + + if (new_eml != EML_DISPATCH_NULL) + kfree((vm_offset_t)new_eml, count_to_size(new_eml->disp_count)); + + new_size = count_to_size(new_end - new_start); + new_eml = (eml_dispatch_t) kalloc(new_size); + + memset(new_eml, 0, new_size); + simple_lock_init(&new_eml->lock); + new_eml->ref_count = 1; + new_eml->disp_min = new_start; + new_eml->disp_count = new_end - new_start; + + continue; + } + + /* + * We have the emulation vector. + * Install the new emulation entries. + */ + memcpy(&cur_eml->disp_vector[vector_start - cur_eml->disp_min], + &emulation_vector[0], + emulation_vector_count * sizeof(vm_offset_t)); + + task_unlock(task); + + /* + * Discard any old emulation vector we don`t need. + */ + if (old_eml) + kfree((vm_offset_t) old_eml, count_to_size(old_eml->disp_count)); + + return KERN_SUCCESS; +} + +/* + * task_set_emulation_vector: [Server Entry] + * + * Set the list of emulated system calls for this task. + * The list is out-of-line. + */ +kern_return_t +task_set_emulation_vector( + task_t task, + int vector_start, + emulation_vector_t emulation_vector, + unsigned int emulation_vector_count) +{ + kern_return_t kr; + vm_offset_t emul_vector_addr; + + if (task == TASK_NULL) + return EML_BAD_TASK; /* XXX sb KERN_INVALID_ARGUMENT */ + + /* + * The emulation vector is really a vm_map_copy_t. + */ + kr = vm_map_copyout(ipc_kernel_map, &emul_vector_addr, + (vm_map_copy_t) emulation_vector); + if (kr != KERN_SUCCESS) + return kr; + + /* + * Do the work. + */ + kr = task_set_emulation_vector_internal( + task, + vector_start, + (emulation_vector_t) emul_vector_addr, + emulation_vector_count); + + /* + * Discard the memory + */ + (void) kmem_free(ipc_kernel_map, + emul_vector_addr, + emulation_vector_count * sizeof(eml_dispatch_t)); + + return kr; +} + +/* + * task_get_emulation_vector: [Server Entry] + * + * Get the list of emulated system calls for this task. + * List is returned out-of-line. + */ +kern_return_t +task_get_emulation_vector( + task_t task, + int *vector_start, /* out */ + emulation_vector_t *emulation_vector, /* out */ + unsigned int *emulation_vector_count) /* out */ +{ + eml_dispatch_t eml; + vm_size_t vector_size, size; + vm_offset_t addr; + + if (task == TASK_NULL) + return EML_BAD_TASK; + + addr = 0; + size = 0; + + for(;;) { + vm_size_t size_needed; + + task_lock(task); + eml = task->eml_dispatch; + if (eml == EML_DISPATCH_NULL) { + task_unlock(task); + if (addr) + (void) kmem_free(ipc_kernel_map, addr, size); + *vector_start = 0; + *emulation_vector = 0; + *emulation_vector_count = 0; + return KERN_SUCCESS; + } + + /* + * Do we have the memory we need? + */ + vector_size = eml->disp_count * sizeof(vm_offset_t); + + size_needed = round_page(vector_size); + if (size_needed <= size) + break; + + /* + * If not, unlock the task and allocate more memory. + */ + task_unlock(task); + + if (size != 0) + kmem_free(ipc_kernel_map, addr, size); + + size = size_needed; + if (kmem_alloc(ipc_kernel_map, &addr, size) != KERN_SUCCESS) + return KERN_RESOURCE_SHORTAGE; + } + + /* + * Copy out the dispatch addresses + */ + *vector_start = eml->disp_min; + *emulation_vector_count = eml->disp_count; + memcpy((void *)addr, + eml->disp_vector, + vector_size); + + /* + * Unlock the task and free any memory we did not need + */ + task_unlock(task); + + { + vm_size_t size_used, size_left; + vm_map_copy_t memory; + + /* + * Free any unused memory beyond the end of the last page used + */ + size_used = round_page(vector_size); + if (size_used != size) + (void) kmem_free(ipc_kernel_map, + addr + size_used, + size - size_used); + + /* + * Zero the remainder of the page being returned. + */ + size_left = size_used - vector_size; + if (size_left > 0) + memset((char *)addr + vector_size, 0, size_left); + + /* + * Make memory into copyin form - this unwires it. + */ + (void) vm_map_copyin(ipc_kernel_map, addr, vector_size, TRUE, &memory); + + *emulation_vector = (emulation_vector_t) memory; + } + + return KERN_SUCCESS; +} + +/* + * task_set_emulation: [Server Entry] + * set up for user space emulation of syscalls within this task. + */ +kern_return_t task_set_emulation( + task_t task, + vm_offset_t routine_entry_pt, + int routine_number) +{ + return task_set_emulation_vector_internal(task, routine_number, + &routine_entry_pt, 1); +} diff --git a/kern/syscall_emulation.h b/kern/syscall_emulation.h new file mode 100644 index 0000000..bf20e44 --- /dev/null +++ b/kern/syscall_emulation.h @@ -0,0 +1,67 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_SYSCALL_EMULATION_H_ +#define _KERN_SYSCALL_EMULATION_H_ + +#ifndef __ASSEMBLER__ +#include <mach/machine/vm_types.h> +#include <kern/lock.h> +#include <kern/task.h> + +typedef vm_offset_t eml_routine_t; + +typedef struct eml_dispatch { + decl_simple_lock_data(, lock) /* lock for reference count */ + int ref_count; /* reference count */ + int disp_count; /* count of entries in vector */ + int disp_min; /* index of lowest entry in vector */ + eml_routine_t disp_vector[1]; /* first entry in array of dispatch */ + /* routines (array has disp_count */ + /* elements) */ +} *eml_dispatch_t; + +typedef vm_offset_t *emulation_vector_t; /* Variable-length array */ + +#define EML_ROUTINE_NULL (eml_routine_t)0 +#define EML_DISPATCH_NULL (eml_dispatch_t)0 + +#define EML_SUCCESS (0) + +#define EML_MOD (err_kern|err_sub(2)) +#define EML_BAD_TASK (EML_MOD|0x0001) +#define EML_BAD_CNT (EML_MOD|0x0002) + +extern void eml_init(void); +extern void eml_task_reference(task_t task, task_t parent); +extern void eml_task_deallocate(task_t task); + +#endif /* __ASSEMBLER__ */ + +#endif /* _KERN_SYSCALL_EMULATION_H_ */ diff --git a/kern/syscall_subr.c b/kern/syscall_subr.c new file mode 100644 index 0000000..0030e02 --- /dev/null +++ b/kern/syscall_subr.c @@ -0,0 +1,386 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/boolean.h> +#include <mach/thread_switch.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> +#include <kern/counters.h> +#include <kern/ipc_kobject.h> +#include <kern/mach_clock.h> +#include <kern/printf.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/syscall_subr.h> +#include <kern/ipc_sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <machine/machspl.h> /* for splsched */ + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif /* MACH_FIXPRI */ + +/* + * swtch and swtch_pri both attempt to context switch (logic in + * thread_block no-ops the context switch if nothing would happen). + * A boolean is returned that indicates whether there is anything + * else runnable. + * + * This boolean can be used by a thread waiting on a + * lock or condition: If FALSE is returned, the thread is justified + * in becoming a resource hog by continuing to spin because there's + * nothing else useful that the processor could do. If TRUE is + * returned, the thread should make one more check on the + * lock and then be a good citizen and really suspend. + */ +static void swtch_continue(void) +{ + processor_t myprocessor; + + myprocessor = current_processor(); + thread_syscall_return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); + /*NOTREACHED*/ +} + +boolean_t swtch(void) +{ + processor_t myprocessor; + +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->runq.count == 0 && + myprocessor->processor_set->runq.count == 0) + return(FALSE); +#endif /* NCPUS > 1 */ + + counter(c_swtch_block++); + thread_block(swtch_continue); + myprocessor = current_processor(); + return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); +} + +static void swtch_pri_continue(void) +{ + thread_t thread = current_thread(); + processor_t myprocessor; + + if (thread->depress_priority >= 0) + (void) thread_depress_abort(thread); + myprocessor = current_processor(); + thread_syscall_return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); + /*NOTREACHED*/ +} + +boolean_t swtch_pri(int pri) +{ + thread_t thread = current_thread(); + processor_t myprocessor; + +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->runq.count == 0 && + myprocessor->processor_set->runq.count == 0) + return(FALSE); +#endif /* NCPUS > 1 */ + + /* + * XXX need to think about depression duration. + * XXX currently using min quantum. + */ + thread_depress_priority(thread, min_quantum); + + counter(c_swtch_pri_block++); + thread_block(swtch_pri_continue); + + if (thread->depress_priority >= 0) + (void) thread_depress_abort(thread); + myprocessor = current_processor(); + return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); +} + +static void thread_switch_continue(void) +{ + thread_t cur_thread = current_thread(); + + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + thread_syscall_return(KERN_SUCCESS); + /*NOTREACHED*/ +} + +/* + * thread_switch: + * + * Context switch. User may supply thread hint. + * + * Fixed priority threads that call this get what they asked for + * even if that violates priority order. + */ +kern_return_t thread_switch( + mach_port_name_t thread_name, + int option, + mach_msg_timeout_t option_time) +{ + thread_t cur_thread = current_thread(); + processor_t myprocessor; + ipc_port_t port; + + /* + * Process option. + */ + switch (option) { + case SWITCH_OPTION_NONE: + /* + * Nothing to do. + */ + break; + + case SWITCH_OPTION_DEPRESS: + /* + * Depress priority for given time. + */ + thread_depress_priority(cur_thread, option_time); + break; + + case SWITCH_OPTION_WAIT: + thread_will_wait_with_timeout(cur_thread, option_time); + break; + + default: + return(KERN_INVALID_ARGUMENT); + } + +#ifndef MIGRATING_THREADS /* XXX thread_run defunct */ + /* + * Check and act on thread hint if appropriate. + */ + if ((thread_name != 0) && + (ipc_port_translate_send(cur_thread->task->itk_space, + thread_name, &port) == KERN_SUCCESS)) { + /* port is locked, but it might not be active */ + + /* + * Get corresponding thread. + */ + if (ip_active(port) && (ip_kotype(port) == IKOT_THREAD)) { + thread_t thread; + spl_t s; + + thread = (thread_t) port->ip_kobject; + /* + * Check if the thread is in the right pset. Then + * pull it off its run queue. If it + * doesn't come, then it's not eligible. + */ + s = splsched(); + thread_lock(thread); + if ((thread->processor_set == cur_thread->processor_set) + && (rem_runq(thread) != RUN_QUEUE_NULL)) { + /* + * Hah, got it!! + */ + thread_unlock(thread); + (void) splx(s); + ip_unlock(port); + /* XXX thread might disappear on us now? */ +#if MACH_FIXPRI + if (thread->policy == POLICY_FIXEDPRI) { + myprocessor = current_processor(); + myprocessor->quantum = thread->sched_data; + myprocessor->first_quantum = TRUE; + } +#endif /* MACH_FIXPRI */ + counter(c_thread_switch_handoff++); + thread_run(thread_switch_continue, thread); + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + + return(KERN_SUCCESS); + } + thread_unlock(thread); + (void) splx(s); + } + ip_unlock(port); + } +#endif /* not MIGRATING_THREADS */ + + /* + * No handoff hint supplied, or hint was wrong. Call thread_block() in + * hopes of running something else. If nothing else is runnable, + * thread_block will detect this. WARNING: thread_switch with no + * option will not do anything useful if the thread calling it is the + * highest priority thread (can easily happen with a collection + * of timesharing threads). + */ +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->processor_set->runq.count > 0 || + myprocessor->runq.count > 0) +#endif /* NCPUS > 1 */ + { + counter(c_thread_switch_block++); + thread_block(thread_switch_continue); + } + + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + return(KERN_SUCCESS); +} + +/* + * thread_depress_priority + * + * Depress thread's priority to lowest possible for specified period. + * Intended for use when thread wants a lock but doesn't know which + * other thread is holding it. As with thread_switch, fixed + * priority threads get exactly what they asked for. Users access + * this by the SWITCH_OPTION_DEPRESS option to thread_switch. A Time + * of zero will result in no timeout being scheduled. + */ +void +thread_depress_priority( + thread_t thread, + mach_msg_timeout_t depress_time) +{ + unsigned int ticks; + spl_t s; + + /* convert from milliseconds to ticks */ + ticks = convert_ipc_timeout_to_ticks(depress_time); + + s = splsched(); + thread_lock(thread); + + /* + * If thread is already depressed, override previous depression. + */ + reset_timeout_check(&thread->depress_timer); + + /* + * Save current priority, then set priority and + * sched_pri to their lowest possible values. + */ + thread->depress_priority = thread->priority; + thread->priority = NRQS-1; + thread->sched_pri = NRQS-1; + if (ticks != 0) + set_timeout(&thread->depress_timer, ticks); + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_depress_timeout: + * + * Timeout routine for priority depression. + */ +void +thread_depress_timeout(thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + + /* + * If we lose a race with thread_depress_abort, + * then depress_priority might be -1. + */ + + if (thread->depress_priority >= 0) { + thread->priority = thread->depress_priority; + thread->depress_priority = -1; + compute_priority(thread, FALSE); + } + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_depress_abort: + * + * Prematurely abort priority depression if there is one. + */ +kern_return_t +thread_depress_abort(thread_t thread) +{ + spl_t s; + + if (thread == THREAD_NULL) + return(KERN_INVALID_ARGUMENT); + + s = splsched(); + thread_lock(thread); + + /* + * Only restore priority if thread is depressed. + */ + if (thread->depress_priority >= 0) { + reset_timeout_check(&thread->depress_timer); + thread->priority = thread->depress_priority; + thread->depress_priority = -1; + compute_priority(thread, FALSE); + } + + thread_unlock(thread); + (void) splx(s); + return(KERN_SUCCESS); +} + +/* + * mach_print + * + * Display a null-terminated character string on the Mach console. + * This system call is meant as a debugging tool useful to circumvent + * messaging altogether. + */ +#ifdef MACH_KDB +void +mach_print(const char *s) +{ + printf("%s", s); +} +#endif /* MACH_KDB */ diff --git a/kern/syscall_subr.h b/kern/syscall_subr.h new file mode 100644 index 0000000..c9a2777 --- /dev/null +++ b/kern/syscall_subr.h @@ -0,0 +1,42 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <sys/types.h> +#include <mach/mach_types.h> +#include <kern/kern_types.h> + +#ifndef _KERN_SYSCALL_SUBR_H_ +#define _KERN_SYSCALL_SUBR_H_ + +extern int swtch(void); +extern int swtch_pri(int); +extern int thread_switch(mach_port_name_t, int, mach_msg_timeout_t); +extern void thread_depress_timeout(thread_t); +extern kern_return_t thread_depress_abort(thread_t); +extern void mach_print(const char *); +extern void thread_depress_priority(thread_t thread, mach_msg_timeout_t depress_time); + +#endif /* _KERN_SYSCALL_SUBR_H_ */ diff --git a/kern/syscall_sw.c b/kern/syscall_sw.c new file mode 100644 index 0000000..4249b71 --- /dev/null +++ b/kern/syscall_sw.c @@ -0,0 +1,224 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/port.h> +#include <mach/kern_return.h> +#include <kern/debug.h> +#include <kern/syscall_sw.h> + +/* Include declarations of the trap functions. */ +#include <mach/mach_traps.h> +#include <mach/message.h> +#include <kern/syscall_subr.h> +#include <kern/ipc_mig.h> +#include <kern/eventcount.h> +#include <ipc/mach_port.server.h> + + +/* + * To add a new entry: + * Add an "MACH_TRAP(routine, arg count)" to the table below. + * + * Add trap definition to mach/syscall_sw.h and + * recompile user library. + * + * WARNING: If you add a trap which requires more than 7 + * parameters, mach/ca/syscall_sw.h and ca/trap.c both need + * to be modified for it to work successfully on an + * RT. Similarly, mach/mips/syscall_sw.h and mips/locore.s + * need to be modified before it will work on Pmaxen. + * + * WARNING: Don't use numbers 0 through -9. They (along with + * the positive numbers) are reserved for Unix. + */ + +boolean_t kern_invalid_debug = FALSE; + +static mach_port_name_t null_port(void) +{ + if (kern_invalid_debug) SoftDebugger("null_port mach trap"); + return(MACH_PORT_NULL); +} + +static kern_return_t kern_invalid(void) +{ + if (kern_invalid_debug) SoftDebugger("kern_invalid mach trap"); + return(KERN_INVALID_ARGUMENT); +} + +mach_trap_t mach_trap_table[] = { + MACH_TRAP(kern_invalid, 0), /* 0 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 1 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 2 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 3 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 4 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 5 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 6 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 7 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 8 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 9 */ /* Unix */ + MACH_TRAP(null_port, 0), /* 10 */ + MACH_TRAP(null_port, 0), /* 11 */ + MACH_TRAP(null_port, 0), /* 12 */ + MACH_TRAP(null_port, 0), /* 13 */ + MACH_TRAP(kern_invalid, 0), /* 14 */ + MACH_TRAP(kern_invalid, 0), /* 15 */ + MACH_TRAP(kern_invalid, 0), /* 16 */ + MACH_TRAP_STACK(evc_wait, 1), /* 17 */ + MACH_TRAP_STACK(evc_wait_clear, 1), /* 18 */ + MACH_TRAP(kern_invalid, 0), /* 19 */ + MACH_TRAP(kern_invalid, 0), /* 20 */ + MACH_TRAP(kern_invalid, 0), /* 21 */ + MACH_TRAP(kern_invalid, 0), /* 22 */ + MACH_TRAP(kern_invalid, 0), /* 23 */ + MACH_TRAP(kern_invalid, 0), /* 24 */ + MACH_TRAP_STACK(mach_msg_trap, 7), /* 25 */ + MACH_TRAP(mach_reply_port, 0), /* 26 */ + MACH_TRAP(mach_thread_self, 0), /* 27 */ + MACH_TRAP(mach_task_self, 0), /* 28 */ + MACH_TRAP(mach_host_self, 0), /* 29 */ +#ifdef MACH_KDB + MACH_TRAP_STACK(mach_print, 1), /* 30 */ +#else /* MACH_KDB */ + MACH_TRAP_STACK(kern_invalid, 0), /* 30 */ +#endif /* MACH_KDB */ + + MACH_TRAP(kern_invalid, 0), /* 31 */ + MACH_TRAP(kern_invalid, 0), /* 32 */ + MACH_TRAP(kern_invalid, 0), /* 33 emul: task_by_pid */ + MACH_TRAP(kern_invalid, 0), /* 34 emul: pid_by_task */ + MACH_TRAP(kern_invalid, 0), /* 35 */ + MACH_TRAP(kern_invalid, 0), /* 36 */ + MACH_TRAP(kern_invalid, 0), /* 37 */ + MACH_TRAP(kern_invalid, 0), /* 38 */ + + MACH_TRAP(syscall_device_writev_request, 6), /* 39 */ + MACH_TRAP(syscall_device_write_request, 6), /* 40 */ + + MACH_TRAP(kern_invalid, 0), /* 41 emul: init_process */ + MACH_TRAP(kern_invalid, 0), /* 42 */ + MACH_TRAP(kern_invalid, 0), /* 43 emul: map_fd */ + MACH_TRAP(kern_invalid, 0), /* 44 emul: rfs_make_symlink */ + MACH_TRAP(kern_invalid, 0), /* 45 */ + MACH_TRAP(kern_invalid, 0), /* 46 */ + MACH_TRAP(kern_invalid, 0), /* 47 */ + MACH_TRAP(kern_invalid, 0), /* 48 */ + MACH_TRAP(kern_invalid, 0), /* 49 */ + + MACH_TRAP(kern_invalid, 0), /* 50 */ + MACH_TRAP(kern_invalid, 0), /* 51 */ + MACH_TRAP(kern_invalid, 0), /* 52 emul: htg_syscall */ + MACH_TRAP(kern_invalid, 0), /* 53 emul: set_ras_address */ + MACH_TRAP(kern_invalid, 0), /* 54 */ + MACH_TRAP(null_port, 0), /* 55 */ + MACH_TRAP(null_port, 0), /* 56 */ + MACH_TRAP(kern_invalid, 0), /* 57 */ + MACH_TRAP(kern_invalid, 0), /* 58 */ + MACH_TRAP_STACK(swtch_pri, 1), /* 59 */ + + MACH_TRAP_STACK(swtch, 0), /* 60 */ + MACH_TRAP_STACK(thread_switch, 3), /* 61 */ + MACH_TRAP(kern_invalid, 0), /* 62 */ + MACH_TRAP(kern_invalid, 0), /* 63 */ + MACH_TRAP(syscall_vm_map, 11), /* 64 */ + MACH_TRAP(syscall_vm_allocate, 4), /* 65 */ + MACH_TRAP(syscall_vm_deallocate, 3), /* 66 */ + MACH_TRAP(kern_invalid, 0), /* 67 */ + MACH_TRAP(syscall_task_create, 3), /* 68 */ + MACH_TRAP(syscall_task_terminate, 1), /* 69 */ + + MACH_TRAP(syscall_task_suspend, 1), /* 70 */ + MACH_TRAP(syscall_task_set_special_port, 3), /* 71 */ + MACH_TRAP(syscall_mach_port_allocate, 3), /* 72 */ + MACH_TRAP(syscall_mach_port_deallocate, 2), /* 73 */ + MACH_TRAP(syscall_mach_port_insert_right, 4), /* 74 */ + MACH_TRAP(syscall_mach_port_allocate_name, 3), /* 75 */ + MACH_TRAP(syscall_thread_depress_abort, 1), /* 76 */ + MACH_TRAP(kern_invalid, 0), /* 77 */ + MACH_TRAP(kern_invalid, 0), /* 78 */ + MACH_TRAP(kern_invalid, 0), /* 79 */ + + MACH_TRAP(kern_invalid, 0), /* 80 */ + MACH_TRAP(kern_invalid, 0), /* 81 */ + MACH_TRAP(kern_invalid, 0), /* 82 */ + MACH_TRAP(kern_invalid, 0), /* 83 */ + MACH_TRAP(kern_invalid, 0), /* 84 */ + MACH_TRAP(kern_invalid, 0), /* 85 */ + MACH_TRAP(kern_invalid, 0), /* 86 */ + MACH_TRAP(kern_invalid, 0), /* 87 */ + MACH_TRAP(kern_invalid, 0), /* 88 */ + MACH_TRAP(kern_invalid, 0), /* 89 */ + MACH_TRAP(kern_invalid, 0), /* 90 */ + MACH_TRAP(kern_invalid, 0), /* 91 */ + MACH_TRAP(kern_invalid, 0), /* 92 */ + MACH_TRAP(kern_invalid, 0), /* 93 */ + MACH_TRAP(kern_invalid, 0), /* 94 */ + MACH_TRAP(kern_invalid, 0), /* 95 */ + + MACH_TRAP(kern_invalid, 0), /* 96 */ + MACH_TRAP(kern_invalid, 0), /* 97 */ + + MACH_TRAP(kern_invalid, 0), /* 98 */ + MACH_TRAP(kern_invalid, 0), /* 99 */ + + MACH_TRAP(kern_invalid, 0), /* 100 */ + MACH_TRAP(kern_invalid, 0), /* 101 */ + MACH_TRAP(kern_invalid, 0), /* 102 */ + MACH_TRAP(kern_invalid, 0), /* 103 */ + MACH_TRAP(kern_invalid, 0), /* 104 */ + MACH_TRAP(kern_invalid, 0), /* 105 */ + MACH_TRAP(kern_invalid, 0), /* 106 */ + MACH_TRAP(kern_invalid, 0), /* 107 */ + MACH_TRAP(kern_invalid, 0), /* 108 */ + MACH_TRAP(kern_invalid, 0), /* 109 */ + + MACH_TRAP(kern_invalid, 0), /* 110 */ + MACH_TRAP(kern_invalid, 0), /* 111 */ + MACH_TRAP(kern_invalid, 0), /* 112 */ + MACH_TRAP(kern_invalid, 0), /* 113 */ + MACH_TRAP(kern_invalid, 0), /* 114 */ + MACH_TRAP(kern_invalid, 0), /* 115 */ + MACH_TRAP(kern_invalid, 0), /* 116 */ + MACH_TRAP(kern_invalid, 0), /* 117 */ + MACH_TRAP(kern_invalid, 0), /* 118 */ + MACH_TRAP(kern_invalid, 0), /* 119 */ + + MACH_TRAP(kern_invalid, 0), /* 120 */ + MACH_TRAP(kern_invalid, 0), /* 121 */ + MACH_TRAP(kern_invalid, 0), /* 122 */ + MACH_TRAP(kern_invalid, 0), /* 123 */ + MACH_TRAP(kern_invalid, 0), /* 124 */ + MACH_TRAP(kern_invalid, 0), /* 125 */ + MACH_TRAP(kern_invalid, 0), /* 126 */ + MACH_TRAP(kern_invalid, 0), /* 127 */ + MACH_TRAP(kern_invalid, 0), /* 128 */ + MACH_TRAP(kern_invalid, 0), /* 129 */ +}; + +int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0])); diff --git a/kern/syscall_sw.h b/kern/syscall_sw.h new file mode 100644 index 0000000..9e76fc6 --- /dev/null +++ b/kern/syscall_sw.h @@ -0,0 +1,57 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_SYSCALL_SW_H_ +#define _KERN_SYSCALL_SW_H_ + +#include <mach/boolean.h> + +/* + * mach_trap_stack indicates the trap may discard + * its kernel stack. Some architectures may need + * to save more state in the pcb for these traps. + * + * Note: this is indexed manually by locore.S! + */ + +typedef void (*generic_trap_function)(void); + +typedef struct { + int mach_trap_arg_count; + generic_trap_function mach_trap_function; + boolean_t mach_trap_stack; + const char *mach_trap_name; +} mach_trap_t; + +extern mach_trap_t mach_trap_table[]; +extern int mach_trap_count; + +#define MACH_TRAP(name, arg_count) \ + { (arg_count), (generic_trap_function) (name), FALSE, #name } +#define MACH_TRAP_STACK(name, arg_count) \ + { (arg_count), (generic_trap_function) (name), TRUE, #name } + +#endif /* _KERN_SYSCALL_SW_H_ */ diff --git a/kern/task.c b/kern/task.c new file mode 100644 index 0000000..60ab4d7 --- /dev/null +++ b/kern/task.c @@ -0,0 +1,1351 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/task.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub, + * David Black + * + * Task management primitives implementation. + */ + +#include <string.h> + +#include <mach/machine/vm_types.h> +#include <mach/vm_param.h> +#include <mach/task_info.h> +#include <mach/task_special_ports.h> +#include <mach_debug/mach_debug_types.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_types.h> +#include <kern/debug.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/slab.h> +#include <kern/gnumach.server.h> +#include <kern/kalloc.h> +#include <kern/mach.server.h> +#include <kern/mach_host.server.h> +#include <kern/processor.h> +#include <kern/printf.h> +#include <kern/sched_prim.h> /* for thread_wakeup */ +#include <kern/ipc_tt.h> +#include <kern/syscall_emulation.h> +#include <kern/task_notify.user.h> +#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */ +#include <machine/machspl.h> /* for splsched */ + +task_t kernel_task = TASK_NULL; +struct kmem_cache task_cache; + +/* Where to send notifications about newly created tasks. */ +ipc_port_t new_task_notification = NULL; + +void task_init(void) +{ + kmem_cache_init(&task_cache, "task", sizeof(struct task), 0, + NULL, 0); + + eml_init(); + machine_task_module_init (); + + /* + * Create the kernel task as the first task. + * Task_create must assign to kernel_task as a side effect, + * for other initialization. (:-() + */ + (void) task_create_kernel(TASK_NULL, FALSE, &kernel_task); + (void) task_set_name(kernel_task, "gnumach"); + vm_map_set_name(kernel_map, kernel_task->name); +} + +kern_return_t task_create( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task) /* OUT */ +{ + if (parent_task == TASK_NULL) + return KERN_INVALID_TASK; + + return task_create_kernel (parent_task, inherit_memory, + child_task); +} + +kern_return_t +task_create_kernel( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task) /* OUT */ +{ + task_t new_task; + processor_set_t pset; +#if FAST_TAS + int i; +#endif + + new_task = (task_t) kmem_cache_alloc(&task_cache); + if (new_task == TASK_NULL) + return KERN_RESOURCE_SHORTAGE; + + /* one ref for just being alive; one for our caller */ + new_task->ref_count = 2; + + if (child_task == &kernel_task) { + new_task->map = kernel_map; + } else if (inherit_memory) { + new_task->map = vm_map_fork(parent_task->map); + } else { + pmap_t new_pmap = pmap_create((vm_size_t) 0); + if (new_pmap == PMAP_NULL) + new_task->map = VM_MAP_NULL; + else { + new_task->map = vm_map_create(new_pmap, + round_page(VM_MIN_USER_ADDRESS), + trunc_page(VM_MAX_USER_ADDRESS)); + if (new_task->map == VM_MAP_NULL) + pmap_destroy(new_pmap); + } + } + if (new_task->map == VM_MAP_NULL) { + kmem_cache_free(&task_cache, (vm_address_t) new_task); + return KERN_RESOURCE_SHORTAGE; + } + if (child_task != &kernel_task) + vm_map_set_name(new_task->map, new_task->name); + + simple_lock_init(&new_task->lock); + queue_init(&new_task->thread_list); + new_task->suspend_count = 0; + new_task->active = TRUE; + new_task->user_stop_count = 0; + new_task->thread_count = 0; + new_task->faults = 0; + new_task->zero_fills = 0; + new_task->reactivations = 0; + new_task->pageins = 0; + new_task->cow_faults = 0; + new_task->messages_sent = 0; + new_task->messages_received = 0; + + eml_task_reference(new_task, parent_task); + + ipc_task_init(new_task, parent_task); + machine_task_init (new_task); + + time_value64_init(&new_task->total_user_time); + time_value64_init(&new_task->total_system_time); + + record_time_stamp (&new_task->creation_time); + + if (parent_task != TASK_NULL) { + task_lock(parent_task); + pset = parent_task->processor_set; + if (!pset->active) + pset = &default_pset; + pset_reference(pset); + new_task->priority = parent_task->priority; + task_unlock(parent_task); + } + else { + pset = &default_pset; + pset_reference(pset); + new_task->priority = BASEPRI_USER; + } + pset_lock(pset); + pset_add_task(pset, new_task); + pset_unlock(pset); + + new_task->may_assign = TRUE; + new_task->assign_active = FALSE; + new_task->essential = FALSE; + +#if MACH_PCSAMPLE + new_task->pc_sample.buffer = 0; + new_task->pc_sample.seqno = 0; + new_task->pc_sample.sampletypes = 0; +#endif /* MACH_PCSAMPLE */ + +#if FAST_TAS + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if (inherit_memory) { + new_task->fast_tas_base[i] = parent_task->fast_tas_base[i]; + new_task->fast_tas_end[i] = parent_task->fast_tas_end[i]; + } else { + new_task->fast_tas_base[i] = (vm_offset_t)0; + new_task->fast_tas_end[i] = (vm_offset_t)0; + } + } +#endif /* FAST_TAS */ + + if (parent_task == TASK_NULL) + snprintf (new_task->name, sizeof new_task->name, "%p", + new_task); + else + snprintf (new_task->name, sizeof new_task->name, "(%.*s)", + (int) (sizeof new_task->name - 3), parent_task->name); + + if (new_task_notification != NULL) { + task_reference (new_task); + task_reference (parent_task); + mach_notify_new_task (new_task_notification, + convert_task_to_port (new_task), + parent_task + ? convert_task_to_port (parent_task) + : IP_NULL); + } + + ipc_task_enable(new_task); + + *child_task = new_task; + return KERN_SUCCESS; +} + +/* + * task_deallocate: + * + * Give up a reference to the specified task and destroy it if there + * are no other references left. It is assumed that the current thread + * is never in this task. + */ +void task_deallocate( + task_t task) +{ + int c; + processor_set_t pset; + + if (task == TASK_NULL) + return; + + task_lock(task); + c = --(task->ref_count); + task_unlock(task); + if (c != 0) + return; + + machine_task_terminate (task); + + eml_task_deallocate(task); + + pset = task->processor_set; + pset_lock(pset); + pset_remove_task(pset,task); + pset_unlock(pset); + pset_deallocate(pset); + vm_map_deallocate(task->map); + is_release(task->itk_space); + kmem_cache_free(&task_cache, (vm_offset_t) task); +} + +void task_reference( + task_t task) +{ + if (task == TASK_NULL) + return; + + task_lock(task); + task->ref_count++; + task_unlock(task); +} + +/* + * task_terminate: + * + * Terminate the specified task. See comments on thread_terminate + * (kern/thread.c) about problems with terminating the "current task." + */ +kern_return_t task_terminate( + task_t task) +{ + thread_t thread, cur_thread; + queue_head_t *list; + task_t cur_task; + spl_t s; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + list = &task->thread_list; + cur_task = current_task(); + cur_thread = current_thread(); + + /* + * Deactivate task so that it can't be terminated again, + * and so lengthy operations in progress will abort. + * + * If the current thread is in this task, remove it from + * the task's thread list to keep the thread-termination + * loop simple. + */ + if (task == cur_task) { + task_lock(task); + if (!task->active) { + /* + * Task is already being terminated. + */ + task_unlock(task); + return KERN_FAILURE; + } + /* + * Make sure current thread is not being terminated. + */ + s = splsched(); + thread_lock(cur_thread); + if (!cur_thread->active) { + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + task_hold_locked(task); + task->active = FALSE; + queue_remove(list, cur_thread, thread_t, thread_list); + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + + /* + * Shut down this thread's ipc now because it must + * be left alone to terminate the task. + */ + ipc_thread_disable(cur_thread); + ipc_thread_terminate(cur_thread); + } + else { + /* + * Lock both current and victim task to check for + * potential deadlock. + */ + if ((vm_offset_t)task < (vm_offset_t)cur_task) { + task_lock(task); + task_lock(cur_task); + } + else { + task_lock(cur_task); + task_lock(task); + } + /* + * Check if current thread or task is being terminated. + */ + s = splsched(); + thread_lock(cur_thread); + if ((!cur_task->active) ||(!cur_thread->active)) { + /* + * Current task or thread is being terminated. + */ + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + task_unlock(cur_task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(cur_task); + + if (!task->active) { + /* + * Task is already being terminated. + */ + task_unlock(task); + return KERN_FAILURE; + } + task_hold_locked(task); + task->active = FALSE; + task_unlock(task); + } + + /* + * Prevent further execution of the task. ipc_task_disable + * prevents further task operations via the task port. + * If this is the current task, the current thread will + * be left running. + */ + (void) task_dowait(task,TRUE); /* may block */ + ipc_task_disable(task); + + /* + * Terminate each thread in the task. + * + * The task_port is closed down, so no more thread_create + * operations can be done. Thread_force_terminate closes the + * thread port for each thread; when that is done, the + * thread will eventually disappear. Thus the loop will + * terminate. Call thread_force_terminate instead of + * thread_terminate to avoid deadlock checks. Need + * to call thread_block() inside loop because some other + * thread (e.g., the reaper) may have to run to get rid + * of all references to the thread; it won't vanish from + * the task's thread list until the last one is gone. + */ + task_lock(task); + while (!queue_empty(list)) { + thread = (thread_t) queue_first(list); + thread_reference(thread); + task_unlock(task); + thread_force_terminate(thread); + thread_deallocate(thread); + thread_block(thread_no_continuation); + task_lock(task); + } + task_unlock(task); + + /* + * Shut down IPC. + */ + ipc_task_terminate(task); + + + /* + * Deallocate the task's reference to itself. + */ + task_deallocate(task); + + /* + * If the current thread is in this task, it has not yet + * been terminated (since it was removed from the task's + * thread-list). Put it back in the thread list (for + * completeness), and terminate it. Since it holds the + * last reference to the task, terminating it will deallocate + * the task. + */ + if (cur_thread->task == task) { + task_lock(task); + s = splsched(); + queue_enter(list, cur_thread, thread_t, thread_list); + (void) splx(s); + task_unlock(task); + (void) thread_terminate(cur_thread); + } + + return KERN_SUCCESS; +} + +/* + * task_hold: + * + * Suspend execution of the specified task. + * This is a recursive-style suspension of the task, a count of + * suspends is maintained. + * + * CONDITIONS: the task is locked and active. + */ +void task_hold_locked( + task_t task) +{ + queue_head_t *list; + thread_t thread, cur_thread; + + assert(task->active); + + cur_thread = current_thread(); + + task->suspend_count++; + + /* + * Iterate through all the threads and hold them. + * Do not hold the current thread if it is within the + * task. + */ + list = &task->thread_list; + queue_iterate(list, thread, thread_t, thread_list) { + if (thread != cur_thread) + thread_hold(thread); + } +} + +/* + * task_hold: + * + * Suspend execution of the specified task. + * This is a recursive-style suspension of the task, a count of + * suspends is maintained. + */ +kern_return_t task_hold( + task_t task) +{ + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + task_hold_locked(task); + + task_unlock(task); + return KERN_SUCCESS; +} + +/* + * task_dowait: + * + * Wait until the task has really been suspended (all of the threads + * are stopped). Skip the current thread if it is within the task. + * + * If task is deactivated while waiting, return a failure code unless + * must_wait is true. + */ +kern_return_t task_dowait( + task_t task, + boolean_t must_wait) +{ + queue_head_t *list; + thread_t thread, cur_thread, prev_thread; + kern_return_t ret = KERN_SUCCESS; + + /* + * Iterate through all the threads. + * While waiting for each thread, we gain a reference to it + * to prevent it from going away on us. This guarantees + * that the "next" thread in the list will be a valid thread. + * + * We depend on the fact that if threads are created while + * we are looping through the threads, they will be held + * automatically. We don't care about threads that get + * deallocated along the way (the reference prevents it + * from happening to the thread we are working with). + * + * If the current thread is in the affected task, it is skipped. + * + * If the task is deactivated before we're done, and we don't + * have to wait for it (must_wait is FALSE), just bail out. + */ + cur_thread = current_thread(); + + list = &task->thread_list; + prev_thread = THREAD_NULL; + task_lock(task); + queue_iterate(list, thread, thread_t, thread_list) { + if (!(task->active) && !(must_wait)) { + ret = KERN_FAILURE; + break; + } + if (thread != cur_thread) { + thread_reference(thread); + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + /* may block */ + (void) thread_dowait(thread, TRUE); /* may block */ + prev_thread = thread; + task_lock(task); + } + } + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + return ret; +} + +kern_return_t task_release( + task_t task) +{ + queue_head_t *list; + thread_t thread, next; + + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + task->suspend_count--; + + /* + * Iterate through all the threads and release them + */ + list = &task->thread_list; + thread = (thread_t) queue_first(list); + while (!queue_end(list, (queue_entry_t) thread)) { + next = (thread_t) queue_next(&thread->thread_list); + thread_release(thread); + thread = next; + } + task_unlock(task); + return KERN_SUCCESS; +} + +kern_return_t task_threads( + task_t task, + thread_array_t *thread_list, + natural_t *count) +{ + unsigned int actual; /* this many threads */ + thread_t thread; + thread_t *threads; + unsigned i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + actual = task->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock the task and allocate more memory */ + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the task is locked & active */ + + threads = (thread_t *) addr; + + for (i = 0, thread = (thread_t) queue_first(&task->thread_list); + i < actual; + i++, thread = (thread_t) queue_next(&thread->thread_list)) { + /* take ref for convert_thread_to_port */ + thread_reference(thread); + threads[i] = thread; + } + assert(queue_end(&task->thread_list, (queue_entry_t) thread)); + + /* can unlock task now that we've got the thread refs */ + task_unlock(task); + + if (actual == 0) { + /* no threads, so return null pointer and deallocate memory */ + + *thread_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + memcpy((void *) newaddr, (void *) addr, size_needed); + kfree(addr, size); + threads = (thread_t *) newaddr; + } + + *thread_list = (mach_port_t *) threads; + *count = actual; + + /* do the conversion that Mig should handle */ + + for (i = 0; i < actual; i++) + ((ipc_port_t *) threads)[i] = + convert_thread_to_port(threads[i]); + } + + return KERN_SUCCESS; +} + +kern_return_t task_suspend( + task_t task) +{ + boolean_t hold; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + hold = FALSE; + task_lock(task); + if ((task->user_stop_count)++ == 0) + hold = TRUE; + task_unlock(task); + + /* + * If the stop count was positive, the task is + * already stopped and we can exit. + */ + if (!hold) { + return KERN_SUCCESS; + } + + /* + * Hold all of the threads in the task, and wait for + * them to stop. If the current thread is within + * this task, hold it separately so that all of the + * other threads can stop first. + */ + + if (task_hold(task) != KERN_SUCCESS) + return KERN_FAILURE; + + if (task_dowait(task, FALSE) != KERN_SUCCESS) + return KERN_FAILURE; + + if (current_task() == task) { + spl_t s; + + thread_hold(current_thread()); + /* + * We want to call thread_block on our way out, + * to stop running. + */ + s = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(s); + } + + return KERN_SUCCESS; +} + +kern_return_t task_resume( + task_t task) +{ + boolean_t release; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + release = FALSE; + task_lock(task); + if (task->user_stop_count > 0) { + if (--(task->user_stop_count) == 0) + release = TRUE; + } + else { + task_unlock(task); + return KERN_FAILURE; + } + task_unlock(task); + + /* + * Release the task if necessary. + */ + if (release) + return task_release(task); + + return KERN_SUCCESS; +} + +kern_return_t task_info( + task_t task, + int flavor, + task_info_t task_info_out, /* pointer to OUT array */ + natural_t *task_info_count) /* IN/OUT */ +{ + vm_map_t map; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (flavor) { + case TASK_BASIC_INFO: + { + task_basic_info_t basic_info; + + /* Allow *task_info_count to be smaller than the provided amount + * that does not contain the new time_value64_t fields as some + * callers might not know about them yet. */ + + if (*task_info_count < + TASK_BASIC_INFO_COUNT - 3 * sizeof(time_value64_t)/sizeof(integer_t)) + return KERN_INVALID_ARGUMENT; + + basic_info = (task_basic_info_t) task_info_out; + + map = (task == kernel_task) ? kernel_map : task->map; + + basic_info->virtual_size = map->size; + basic_info->resident_size = pmap_resident_count(map->pmap) + * PAGE_SIZE; + + task_lock(task); + basic_info->base_priority = task->priority; + basic_info->suspend_count = task->user_stop_count; + TIME_VALUE64_TO_TIME_VALUE(&task->total_user_time, + &basic_info->user_time); + TIME_VALUE64_TO_TIME_VALUE(&task->total_system_time, + &basic_info->system_time); + time_value64_t creation_time64; + read_time_stamp(&task->creation_time, &creation_time64); + TIME_VALUE64_TO_TIME_VALUE(&creation_time64, &basic_info->creation_time); + if (*task_info_count == TASK_BASIC_INFO_COUNT) { + /* Copy new time_value64_t fields */ + basic_info->user_time64 = task->total_user_time; + basic_info->system_time64 = task->total_system_time; + basic_info->creation_time64 = creation_time64; + } + task_unlock(task); + + if (*task_info_count > TASK_BASIC_INFO_COUNT) + *task_info_count = TASK_BASIC_INFO_COUNT; + break; + } + + case TASK_EVENTS_INFO: + { + task_events_info_t event_info; + + if (*task_info_count < TASK_EVENTS_INFO_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + event_info = (task_events_info_t) task_info_out; + + task_lock(task); + event_info->faults = task->faults; + event_info->zero_fills = task->zero_fills; + event_info->reactivations = task->reactivations; + event_info->pageins = task->pageins; + event_info->cow_faults = task->cow_faults; + event_info->messages_sent = task->messages_sent; + event_info->messages_received = task->messages_received; + task_unlock(task); + + *task_info_count = TASK_EVENTS_INFO_COUNT; + break; + } + + case TASK_THREAD_TIMES_INFO: + { + task_thread_times_info_t times_info; + thread_t thread; + + /* Callers might not known about time_value64_t fields yet. */ + if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT - (2 * sizeof(time_value64_t)) / sizeof(integer_t)) { + return KERN_INVALID_ARGUMENT; + } + + times_info = (task_thread_times_info_t) task_info_out; + + time_value64_t acc_user_time, acc_system_time; + time_value64_init(&acc_user_time); + time_value64_init(&acc_system_time); + + task_lock(task); + queue_iterate(&task->thread_list, thread, + thread_t, thread_list) + { + time_value64_t user_time, system_time; + spl_t s; + + s = splsched(); + thread_lock(thread); + + thread_read_times(thread, &user_time, &system_time); + + thread_unlock(thread); + splx(s); + + time_value64_add(&acc_user_time, &user_time); + time_value64_add(&acc_system_time, &system_time); + } + task_unlock(task); + TIME_VALUE64_TO_TIME_VALUE(&acc_user_time, ×_info->user_time); + TIME_VALUE64_TO_TIME_VALUE(&acc_system_time, ×_info->system_time); + if (*task_info_count >= TASK_THREAD_TIMES_INFO_COUNT) { + /* Copy new time_value64_t fields */ + times_info->user_time64 = acc_user_time; + times_info->system_time64 = acc_system_time; + } + + if (*task_info_count > TASK_THREAD_TIMES_INFO_COUNT) + *task_info_count = TASK_THREAD_TIMES_INFO_COUNT; + break; + } + + default: + return KERN_INVALID_ARGUMENT; + } + + return KERN_SUCCESS; +} + +#if MACH_HOST +/* + * task_assign: + * + * Change the assigned processor set for the task + */ +kern_return_t +task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads) +{ + kern_return_t ret = KERN_SUCCESS; + thread_t thread, prev_thread; + queue_head_t *list; + processor_set_t pset; + + if (task == TASK_NULL || new_pset == PROCESSOR_SET_NULL) { + return KERN_INVALID_ARGUMENT; + } + + /* + * Freeze task`s assignment. Prelude to assigning + * task. Only one freeze may be held per task. + */ + + task_lock(task); + while (task->may_assign == FALSE) { + task->assign_active = TRUE; + assert_wait((event_t)&task->assign_active, TRUE); + task_unlock(task); + thread_block(thread_no_continuation); + task_lock(task); + } + + /* + * Avoid work if task already in this processor set. + */ + if (task->processor_set == new_pset) { + /* + * No need for task->assign_active wakeup: + * task->may_assign is still TRUE. + */ + task_unlock(task); + return KERN_SUCCESS; + } + + task->may_assign = FALSE; + task_unlock(task); + + /* + * Safe to get the task`s pset: it cannot change while + * task is frozen. + */ + pset = task->processor_set; + + /* + * Lock both psets now. Use ordering to avoid deadlock. + */ + Restart: + if ((vm_offset_t) pset < (vm_offset_t) new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + + /* + * Check if new_pset is ok to assign to. If not, + * reassign to default_pset. + */ + if (!new_pset->active) { + pset_unlock(pset); + pset_unlock(new_pset); + new_pset = &default_pset; + goto Restart; + } + + pset_reference(new_pset); + + /* + * Now grab the task lock and move the task. + */ + + task_lock(task); + pset_remove_task(pset, task); + pset_add_task(new_pset, task); + + pset_unlock(pset); + pset_unlock(new_pset); + + if (assign_threads == FALSE) { + /* + * We leave existing threads at their + * old assignments. Unfreeze task`s + * assignment. + */ + task->may_assign = TRUE; + if (task->assign_active) { + task->assign_active = FALSE; + thread_wakeup((event_t) &task->assign_active); + } + task_unlock(task); + pset_deallocate(pset); + return KERN_SUCCESS; + } + + /* + * If current thread is in task, freeze its assignment. + */ + if (current_thread()->task == task) { + task_unlock(task); + thread_freeze(current_thread()); + task_lock(task); + } + + /* + * Iterate down the thread list reassigning all the threads. + * New threads pick up task's new processor set automatically. + * Do current thread last because new pset may be empty. + */ + list = &task->thread_list; + prev_thread = THREAD_NULL; + queue_iterate(list, thread, thread_t, thread_list) { + if (!(task->active)) { + ret = KERN_FAILURE; + break; + } + if (thread != current_thread()) { + thread_reference(thread); + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + thread_assign(thread,new_pset); /* may block */ + prev_thread = thread; + task_lock(task); + } + } + + /* + * Done, wakeup anyone waiting for us. + */ + task->may_assign = TRUE; + if (task->assign_active) { + task->assign_active = FALSE; + thread_wakeup((event_t)&task->assign_active); + } + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + + /* + * Finish assignment of current thread. + */ + if (current_thread()->task == task) + thread_doassign(current_thread(), new_pset, TRUE); + + pset_deallocate(pset); + + return ret; +} +#else /* MACH_HOST */ +/* + * task_assign: + * + * Change the assigned processor set for the task + */ +kern_return_t +task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads) +{ + return KERN_FAILURE; +} +#endif /* MACH_HOST */ + + +/* + * task_assign_default: + * + * Version of task_assign to assign to default processor set. + */ +kern_return_t +task_assign_default( + task_t task, + boolean_t assign_threads) +{ + return task_assign(task, &default_pset, assign_threads); +} + +/* + * task_get_assignment + * + * Return name of processor set that task is assigned to. + */ +kern_return_t task_get_assignment( + task_t task, + processor_set_t *pset) +{ + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + if (!task->active) + return KERN_FAILURE; + + *pset = task->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * task_priority + * + * Set priority of task; used only for newly created threads. + * Optionally change priorities of threads. + */ +kern_return_t +task_priority( + task_t task, + int priority, + boolean_t change_threads) +{ + kern_return_t ret = KERN_SUCCESS; + + if (task == TASK_NULL || invalid_pri(priority)) + return KERN_INVALID_ARGUMENT; + + task_lock(task); + task->priority = priority; + + if (change_threads) { + thread_t thread; + queue_head_t *list; + + list = &task->thread_list; + queue_iterate(list, thread, thread_t, thread_list) { + if (thread_priority(thread, priority, FALSE) + != KERN_SUCCESS) + ret = KERN_FAILURE; + } + } + + task_unlock(task); + return ret; +} + +/* + * task_set_name + * + * Set the name of task TASK to NAME. This is a debugging aid. + * NAME will be used in error messages printed by the kernel. + */ +kern_return_t +task_set_name( + task_t task, + const_kernel_debug_name_t name) +{ + strncpy(task->name, name, sizeof task->name - 1); + task->name[sizeof task->name - 1] = '\0'; + return KERN_SUCCESS; +} + +/* + * task_set_essential + * + * Set whether TASK is an essential task, i.e. the whole system will crash + * if this task crashes. + */ +kern_return_t +task_set_essential( + task_t task, + boolean_t essential) +{ + task->essential = !!essential; + return KERN_SUCCESS; +} + +/* + * task_collect_scan: + * + * Attempt to free resources owned by tasks. + */ + +static void task_collect_scan(void) +{ + task_t task, prev_task; + processor_set_t pset, prev_pset; + + prev_task = TASK_NULL; + prev_pset = PROCESSOR_SET_NULL; + + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + pset_lock(pset); + queue_iterate(&pset->tasks, task, task_t, pset_tasks) { + task_reference(task); + pset_reference(pset); + pset_unlock(pset); + simple_unlock(&all_psets_lock); + + machine_task_collect (task); + pmap_collect(task->map->pmap); + + if (prev_task != TASK_NULL) + task_deallocate(prev_task); + prev_task = task; + + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); + prev_pset = pset; + + simple_lock(&all_psets_lock); + pset_lock(pset); + } + pset_unlock(pset); + } + simple_unlock(&all_psets_lock); + + if (prev_task != TASK_NULL) + task_deallocate(prev_task); + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); +} + +boolean_t task_collect_allowed = TRUE; +unsigned task_collect_last_tick = 0; +unsigned task_collect_max_rate = 0; /* in ticks */ + +/* + * consider_task_collect: + * + * Called by the pageout daemon when the system needs more free pages. + */ + +void consider_task_collect(void) +{ + /* + * By default, don't attempt task collection more frequently + * than once a second. + */ + + if (task_collect_max_rate == 0) + task_collect_max_rate = hz; + + if (task_collect_allowed && + (sched_tick > (task_collect_last_tick + + task_collect_max_rate / (hz / 1)))) { + task_collect_last_tick = sched_tick; + task_collect_scan(); + } +} + +kern_return_t +task_ras_control( + task_t task, + vm_offset_t pc, + vm_offset_t endpc, + int flavor) +{ + kern_return_t ret = KERN_FAILURE; + +#if FAST_TAS + int i; + + ret = KERN_SUCCESS; + task_lock(task); + switch (flavor) { + case TASK_RAS_CONTROL_PURGE_ALL: /* remove all RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + task->fast_tas_base[i] = task->fast_tas_end[i] = 0; + } + break; + case TASK_RAS_CONTROL_PURGE_ONE: /* remove this RAS, collapse remaining */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if ( (task->fast_tas_base[i] == pc) + && (task->fast_tas_end[i] == endpc)) { + while (i < TASK_FAST_TAS_NRAS-1) { + task->fast_tas_base[i] = task->fast_tas_base[i+1]; + task->fast_tas_end[i] = task->fast_tas_end[i+1]; + i++; + } + task->fast_tas_base[TASK_FAST_TAS_NRAS-1] = 0; + task->fast_tas_end[TASK_FAST_TAS_NRAS-1] = 0; + break; + } + } + if (i == TASK_FAST_TAS_NRAS) { + ret = KERN_INVALID_ADDRESS; + } + break; + case TASK_RAS_CONTROL_PURGE_ALL_AND_INSTALL_ONE: + /* remove all RAS an install this RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + task->fast_tas_base[i] = task->fast_tas_end[i] = 0; + } + /* FALL THROUGH */ + case TASK_RAS_CONTROL_INSTALL_ONE: /* install this RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if ( (task->fast_tas_base[i] == pc) + && (task->fast_tas_end[i] == endpc)) { + /* already installed */ + break; + } + if ((task->fast_tas_base[i] == 0) && (task->fast_tas_end[i] == 0)){ + task->fast_tas_base[i] = pc; + task->fast_tas_end[i] = endpc; + break; + } + } + if (i == TASK_FAST_TAS_NRAS) { + ret = KERN_RESOURCE_SHORTAGE; + } + break; + default: ret = KERN_INVALID_VALUE; + break; + } + task_unlock(task); +#endif /* FAST_TAS */ + return ret; +} + +/* + * register_new_task_notification + * + * Register a port to which a notification about newly created + * tasks are sent. + */ +kern_return_t +register_new_task_notification( + const host_t host, + ipc_port_t notification) +{ + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + if (new_task_notification != NULL) + return KERN_NO_ACCESS; + + new_task_notification = notification; + return KERN_SUCCESS; +} diff --git a/kern/task.h b/kern/task.h new file mode 100644 index 0000000..9521e95 --- /dev/null +++ b/kern/task.h @@ -0,0 +1,197 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: task.h + * Author: Avadis Tevanian, Jr. + * + * This file contains the structure definitions for tasks. + * + */ + +#ifndef _KERN_TASK_H_ +#define _KERN_TASK_H_ + +#include <mach/boolean.h> +#include <mach/port.h> +#include <mach/time_value.h> +#include <mach/mach_param.h> +#include <mach/task_info.h> +#include <mach_debug/mach_debug_types.h> +#include <kern/kern_types.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <kern/pc_sample.h> +#include <kern/processor.h> +#include <kern/syscall_emulation.h> +#include <vm/vm_types.h> +#include <machine/task.h> + +/* + * Task name buffer size. The size is chosen so that struct task fits + * into three cache lines. The size of a cache line on a typical CPU + * is 64 bytes. + */ +#define TASK_NAME_SIZE 32 + +struct task { + /* Synchronization/destruction information */ + decl_simple_lock_data(,lock) /* Task's lock */ + int ref_count; /* Number of references to me */ + + /* Flags */ + unsigned char assign_active; /* waiting for may_assign */ + unsigned char active:1, /* Task has not been terminated */ + /* boolean_t */ may_assign:1, /* can assigned pset be changed? */ + essential:1; /* Is this task essential for the system? */ + + /* Miscellaneous */ + vm_map_t map; /* Address space description */ + queue_chain_t pset_tasks; /* list of tasks assigned to pset */ + int suspend_count; /* Internal scheduling only */ + + /* Thread information */ + queue_head_t thread_list; /* list of threads */ + int thread_count; /* number of threads */ + processor_set_t processor_set; /* processor set for new threads */ + + /* User-visible scheduling information */ + int user_stop_count; /* outstanding stops */ + int priority; /* for new threads */ + + /* Statistics */ + time_value64_t total_user_time; + /* total user time for dead threads */ + time_value64_t total_system_time; + /* total system time for dead threads */ + + time_value64_t creation_time; /* time stamp at creation */ + + /* IPC structures */ + decl_simple_lock_data(, itk_lock_data) + struct ipc_port *itk_self; /* not a right, doesn't hold ref */ + struct ipc_port *itk_sself; /* a send right */ + struct ipc_port *itk_exception; /* a send right */ + struct ipc_port *itk_bootstrap; /* a send right */ + struct ipc_port *itk_registered[TASK_PORT_REGISTER_MAX]; + /* all send rights */ + + struct ipc_space *itk_space; + + /* User space system call emulation support */ + struct eml_dispatch *eml_dispatch; + + sample_control_t pc_sample; + +#if FAST_TAS +#define TASK_FAST_TAS_NRAS 8 + vm_offset_t fast_tas_base[TASK_FAST_TAS_NRAS]; + vm_offset_t fast_tas_end[TASK_FAST_TAS_NRAS]; +#endif /* FAST_TAS */ + + /* Hardware specific data. */ + machine_task_t machine; + + /* Statistics */ + long_natural_t faults; /* page faults counter */ + long_natural_t zero_fills; /* zero fill pages counter */ + long_natural_t reactivations; /* reactivated pages counter */ + long_natural_t pageins; /* actual pageins couter */ + long_natural_t cow_faults; /* copy-on-write faults counter */ + long_natural_t messages_sent; /* messages sent counter */ + long_natural_t messages_received; /* messages received counter */ + + char name[TASK_NAME_SIZE]; +}; + +#define task_lock(task) simple_lock(&(task)->lock) +#define task_unlock(task) simple_unlock(&(task)->lock) + +#define itk_lock_init(task) simple_lock_init(&(task)->itk_lock_data) +#define itk_lock(task) simple_lock(&(task)->itk_lock_data) +#define itk_unlock(task) simple_unlock(&(task)->itk_lock_data) + +/* + * Exported routines/macros + */ + +extern kern_return_t task_create( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task); +extern kern_return_t task_create_kernel( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task); +extern kern_return_t task_terminate( + task_t task); +extern kern_return_t task_suspend( + task_t task); +extern kern_return_t task_resume( + task_t task); +extern kern_return_t task_threads( + task_t task, + thread_array_t *thread_list, + natural_t *count); +extern kern_return_t task_info( + task_t task, + int flavor, + task_info_t task_info_out, + natural_t *task_info_count); +extern kern_return_t task_get_special_port( + task_t task, + int which, + struct ipc_port **portp); +extern kern_return_t task_set_special_port( + task_t task, + int which, + struct ipc_port *port); +extern kern_return_t task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads); +extern kern_return_t task_assign_default( + task_t task, + boolean_t assign_threads); +extern kern_return_t task_set_name( + task_t task, + const_kernel_debug_name_t name); +extern void consider_task_collect(void); + +/* + * Internal only routines + */ + +extern void task_init(void); +extern void task_reference(task_t); +extern void task_deallocate(task_t); +extern void task_hold_locked(task_t); +extern kern_return_t task_hold(task_t); +extern kern_return_t task_dowait(task_t, boolean_t); +extern kern_return_t task_release(task_t); + +extern task_t kernel_task; + +#endif /* _KERN_TASK_H_ */ diff --git a/kern/task_notify.cli b/kern/task_notify.cli new file mode 100644 index 0000000..c6c85d9 --- /dev/null +++ b/kern/task_notify.cli @@ -0,0 +1,7 @@ +/* XXX */ + +/* This is a client presentation file. */ + +#define KERNEL_USER 1 + +#include <mach/task_notify.defs> diff --git a/kern/thread.c b/kern/thread.c new file mode 100644 index 0000000..2eab1ca --- /dev/null +++ b/kern/thread.c @@ -0,0 +1,2646 @@ +/* + * Mach Operating System + * Copyright (c) 1994-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/thread.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub + * Date: 1986 + * + * Thread management primitives implementation. + */ + +#include <kern/printf.h> +#include <mach/message.h> +#include <mach/std_types.h> +#include <mach/policy.h> +#include <mach/thread_info.h> +#include <mach/thread_special_ports.h> +#include <mach/thread_status.h> +#include <mach/time_value.h> +#include <mach/vm_prot.h> +#include <mach/vm_inherit.h> +#include <machine/vm_param.h> +#include <kern/ast.h> +#include <kern/counters.h> +#include <kern/debug.h> +#include <kern/eventcount.h> +#include <kern/gnumach.server.h> +#include <kern/ipc_mig.h> +#include <kern/ipc_tt.h> +#include <kern/mach_debug.server.h> +#include <kern/mach_host.server.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/syscall_subr.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <kern/host.h> +#include <kern/kalloc.h> +#include <kern/slab.h> +#include <kern/smp.h> +#include <kern/mach_clock.h> +#include <string.h> +#include <vm/vm_kern.h> +#include <vm/vm_user.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_port.h> +#include <ipc/mach_msg.h> +#include <ipc/mach_port.server.h> +#include <machine/machspl.h> /* for splsched */ +#include <machine/pcb.h> +#include <machine/thread.h> /* for MACHINE_STACK */ + +struct kmem_cache thread_cache; +struct kmem_cache thread_stack_cache; + +queue_head_t reaper_queue; +def_simple_lock_data(static, reaper_lock) + +/* private */ +struct thread thread_template; + +#if MACH_DEBUG +#define STACK_MARKER 0xdeadbeefU +boolean_t stack_check_usage = FALSE; +def_simple_lock_data(static, stack_usage_lock) +vm_size_t stack_max_usage = 0; +#endif /* MACH_DEBUG */ + +/* + * Machine-dependent code must define: + * pcb_init + * pcb_terminate + * pcb_collect + * + * The thread->pcb field is reserved for machine-dependent code. + */ + +#ifdef MACHINE_STACK +/* + * Machine-dependent code must define: + * stack_alloc_try + * stack_alloc + * stack_free + * stack_handoff + * stack_collect + * and if MACH_DEBUG: + * stack_statistics + */ +#else /* MACHINE_STACK */ +/* + * We allocate stacks from generic kernel VM. + * Machine-dependent code must define: + * stack_attach + * stack_detach + * stack_handoff + * + * The stack_free_list can only be accessed at splsched, + * because stack_alloc_try/thread_invoke operate at splsched. + */ + +def_simple_lock_data(static, stack_lock_data)/* splsched only */ +#define stack_lock() simple_lock(&stack_lock_data) +#define stack_unlock() simple_unlock(&stack_lock_data) + +vm_offset_t stack_free_list; /* splsched only */ +unsigned int stack_free_count = 0; /* splsched only */ +unsigned int stack_free_limit = 1; /* patchable */ + +/* + * The next field is at the base of the stack, + * so the low end is left unsullied. + */ + +#define stack_next(stack) (*((vm_offset_t *)((stack) + KERNEL_STACK_SIZE) - 1)) + +/* + * stack_alloc_try: + * + * Non-blocking attempt to allocate a kernel stack. + * Called at splsched with the thread locked. + */ + +boolean_t stack_alloc_try( + thread_t thread, + void (*resume)(thread_t)) +{ + vm_offset_t stack; + + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + } else { + stack = thread->stack_privilege; + } + stack_unlock(); + + if (stack != 0) { + stack_attach(thread, stack, resume); + counter(c_stack_alloc_hits++); + return TRUE; + } else { + counter(c_stack_alloc_misses++); + return FALSE; + } +} + +/* + * stack_alloc: + * + * Allocate a kernel stack for a thread. + * May block. + */ + +kern_return_t stack_alloc( + thread_t thread, + void (*resume)(thread_t)) +{ + vm_offset_t stack; + spl_t s; + + /* + * We first try the free list. It is probably empty, + * or stack_alloc_try would have succeeded, but possibly + * a stack was freed before the swapin thread got to us. + */ + + s = splsched(); + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + } + stack_unlock(); + (void) splx(s); + + if (stack == 0) { + stack = kmem_cache_alloc(&thread_stack_cache); + assert(stack != 0); +#if MACH_DEBUG + stack_init(stack); +#endif /* MACH_DEBUG */ + } + + stack_attach(thread, stack, resume); + return KERN_SUCCESS; +} + +/* + * stack_free: + * + * Free a thread's kernel stack. + * Called at splsched with the thread locked. + */ + +void stack_free( + thread_t thread) +{ + vm_offset_t stack; + + stack = stack_detach(thread); + + if (stack != thread->stack_privilege) { + stack_lock(); + stack_next(stack) = stack_free_list; + stack_free_list = stack; + stack_free_count += 1; +#if MACH_COUNTERS + if (stack_free_count > c_stack_alloc_max) + c_stack_alloc_max = stack_free_count; +#endif /* MACH_COUNTERS */ + stack_unlock(); + } +} + +/* + * stack_collect: + * + * Free excess kernel stacks. + * May block. + */ + +void stack_collect(void) +{ + vm_offset_t stack; + spl_t s; + + s = splsched(); + stack_lock(); + while (stack_free_count > stack_free_limit) { + stack = stack_free_list; + stack_free_list = stack_next(stack); + stack_free_count--; + stack_unlock(); + (void) splx(s); + +#if MACH_DEBUG + stack_finalize(stack); +#endif /* MACH_DEBUG */ + kmem_cache_free(&thread_stack_cache, stack); + + s = splsched(); + stack_lock(); + } + stack_unlock(); + (void) splx(s); +} +#endif /* MACHINE_STACK */ + +/* + * stack_privilege: + * + * stack_alloc_try on this thread must always succeed. + */ + +void stack_privilege( + thread_t thread) +{ + /* + * This implementation only works for the current thread. + */ + + if (thread != current_thread()) + panic("stack_privilege"); + + if (thread->stack_privilege == 0) + thread->stack_privilege = current_stack(); +} + +void thread_init(void) +{ + kmem_cache_init(&thread_cache, "thread", sizeof(struct thread), 0, + NULL, 0); + /* + * Kernel stacks should be naturally aligned, + * so that it is easy to find the starting/ending + * addresses of a stack given an address in the middle. + */ + kmem_cache_init(&thread_stack_cache, "thread_stack", + KERNEL_STACK_SIZE, KERNEL_STACK_SIZE, + NULL, 0); + + /* + * Fill in a template thread for fast initialization. + * [Fields that must be (or are typically) reset at + * time of creation are so noted.] + */ + + /* thread_template.links (none) */ + thread_template.runq = RUN_QUEUE_NULL; + + /* thread_template.task (later) */ + /* thread_template.thread_list (later) */ + /* thread_template.pset_threads (later) */ + + /* thread_template.lock (later) */ + /* one ref for being alive; one for the guy who creates the thread */ + thread_template.ref_count = 2; + + thread_template.pcb = (pcb_t) 0; /* (reset) */ + thread_template.kernel_stack = (vm_offset_t) 0; + thread_template.stack_privilege = (vm_offset_t) 0; + + thread_template.wait_event = 0; + /* thread_template.suspend_count (later) */ + thread_template.wait_result = KERN_SUCCESS; + thread_template.wake_active = FALSE; + thread_template.state = TH_SUSP | TH_SWAPPED; + thread_template.swap_func = thread_bootstrap_return; + +/* thread_template.priority (later) */ + thread_template.max_priority = BASEPRI_USER; +/* thread_template.sched_pri (later - compute_priority) */ +#if MACH_FIXPRI + thread_template.sched_data = 0; + thread_template.policy = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + thread_template.depress_priority = -1; + thread_template.cpu_usage = 0; + thread_template.sched_usage = 0; + /* thread_template.sched_stamp (later) */ + + thread_template.recover = (vm_offset_t) 0; + thread_template.vm_privilege = 0; + + thread_template.user_stop_count = 1; + + /* thread_template.<IPC structures> (later) */ + + timer_init(&(thread_template.user_timer)); + timer_init(&(thread_template.system_timer)); + thread_template.user_timer_save.low = 0; + thread_template.user_timer_save.high = 0; + thread_template.system_timer_save.low = 0; + thread_template.system_timer_save.high = 0; + thread_template.cpu_delta = 0; + thread_template.sched_delta = 0; + + thread_template.active = FALSE; /* reset */ + thread_template.ast = AST_ZILCH; + + /* thread_template.processor_set (later) */ + thread_template.bound_processor = PROCESSOR_NULL; +#if MACH_HOST + thread_template.may_assign = TRUE; + thread_template.assign_active = FALSE; +#endif /* MACH_HOST */ + +#if NCPUS > 1 + /* thread_template.last_processor (later) */ +#endif /* NCPUS > 1 */ + + /* + * Initialize other data structures used in + * this module. + */ + + queue_init(&reaper_queue); + simple_lock_init(&reaper_lock); + +#ifndef MACHINE_STACK + simple_lock_init(&stack_lock_data); +#endif /* MACHINE_STACK */ + +#if MACH_DEBUG + simple_lock_init(&stack_usage_lock); +#endif /* MACH_DEBUG */ + + /* + * Initialize any machine-dependent + * per-thread structures necessary. + */ + + pcb_module_init(); +} + +kern_return_t thread_create( + task_t parent_task, + thread_t *child_thread) /* OUT */ +{ + thread_t new_thread; + processor_set_t pset; + + if (parent_task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Allocate a thread and initialize static fields + */ + + new_thread = (thread_t) kmem_cache_alloc(&thread_cache); + + if (new_thread == THREAD_NULL) + return KERN_RESOURCE_SHORTAGE; + + *new_thread = thread_template; + + record_time_stamp (&new_thread->creation_time); + + /* + * Initialize runtime-dependent fields + */ + + new_thread->task = parent_task; + simple_lock_init(&new_thread->lock); + new_thread->sched_stamp = sched_tick; + thread_timeout_setup(new_thread); + + /* + * Create a pcb. The kernel stack is created later, + * when the thread is swapped-in. + */ + pcb_init(parent_task, new_thread); + + ipc_thread_init(new_thread); + + /* + * Find the processor set for the parent task. + */ + task_lock(parent_task); + pset = parent_task->processor_set; + pset_reference(pset); + task_unlock(parent_task); + + /* + * This thread will mosty probably start working, assume it + * will take its share of CPU, to avoid having to find it out + * slowly. Decaying will however fix that quickly if it actually + * does not work + */ + new_thread->cpu_usage = TIMER_RATE * SCHED_SCALE / + (pset->load_average >= SCHED_SCALE ? + pset->load_average : SCHED_SCALE); + new_thread->sched_usage = TIMER_RATE * SCHED_SCALE; + + /* + * Lock both the processor set and the task, + * so that the thread can be added to both + * simultaneously. Processor set must be + * locked first. + */ + + Restart: + pset_lock(pset); + task_lock(parent_task); + + /* + * If the task has changed processor sets, + * catch up (involves lots of lock juggling). + */ + { + processor_set_t cur_pset; + + cur_pset = parent_task->processor_set; + if (!cur_pset->active) + cur_pset = &default_pset; + + if (cur_pset != pset) { + pset_reference(cur_pset); + task_unlock(parent_task); + pset_unlock(pset); + pset_deallocate(pset); + pset = cur_pset; + goto Restart; + } + } + + /* + * Set the thread`s priority from the pset and task. + */ + + new_thread->priority = parent_task->priority; + if (pset->max_priority > new_thread->max_priority) + new_thread->max_priority = pset->max_priority; + if (new_thread->max_priority > new_thread->priority) + new_thread->priority = new_thread->max_priority; + /* + * Don't need to lock thread here because it can't + * possibly execute and no one else knows about it. + */ + compute_priority(new_thread, TRUE); + + /* + * Thread is suspended if the task is. Add 1 to + * suspend count since thread is created in suspended + * state. + */ + new_thread->suspend_count = parent_task->suspend_count + 1; + + /* + * Add the thread to the processor set. + * If the pset is empty, suspend the thread again. + */ + + pset_add_thread(pset, new_thread); + if (pset->empty) + new_thread->suspend_count++; + +#if HW_FOOTPRINT + /* + * Need to set last_processor, idle processor would be best, but + * that requires extra locking nonsense. Go for tail of + * processors queue to avoid master. + */ + if (!pset->empty) { + new_thread->last_processor = + (processor_t)queue_first(&pset->processors); + } + else { + /* + * Thread created in empty processor set. Pick + * master processor as an acceptable legal value. + */ + new_thread->last_processor = master_processor; + } +#else /* HW_FOOTPRINT */ + /* + * Don't need to initialize because the context switch + * code will set it before it can be used. + */ +#endif /* HW_FOOTPRINT */ + +#if MACH_PCSAMPLE + new_thread->pc_sample.seqno = 0; + new_thread->pc_sample.sampletypes = 0; +#endif /* MACH_PCSAMPLE */ + + new_thread->pc_sample.buffer = 0; + + /* Inherit the task name as the thread name. */ + memcpy (new_thread->name, parent_task->name, THREAD_NAME_SIZE); + + /* + * Add the thread to the task`s list of threads. + * The new thread holds another reference to the task. + */ + + parent_task->ref_count++; + + parent_task->thread_count++; + queue_enter(&parent_task->thread_list, new_thread, thread_t, + thread_list); + + /* + * Finally, mark the thread active. + */ + + new_thread->active = TRUE; + + if (!parent_task->active) { + task_unlock(parent_task); + pset_unlock(pset); + (void) thread_terminate(new_thread); + /* release ref we would have given our caller */ + thread_deallocate(new_thread); + return KERN_FAILURE; + } + task_unlock(parent_task); + pset_unlock(pset); + + ipc_thread_enable(new_thread); + + *child_thread = new_thread; + return KERN_SUCCESS; +} + +unsigned int thread_deallocate_stack = 0; + +void thread_deallocate( + thread_t thread) +{ + spl_t s; + task_t task; + processor_set_t pset; + + time_value64_t user_time, system_time; + + if (thread == THREAD_NULL) + return; + + /* + * First, check for new count > 0 (the common case). + * Only the thread needs to be locked. + */ + s = splsched(); + thread_lock(thread); + if (--thread->ref_count > 0) { + thread_unlock(thread); + (void) splx(s); + return; + } + + /* + * Count is zero. However, the task's and processor set's + * thread lists have implicit references to + * the thread, and may make new ones. Their locks also + * dominate the thread lock. To check for this, we + * temporarily restore the one thread reference, unlock + * the thread, and then lock the other structures in + * the proper order. + */ + thread->ref_count = 1; + thread_unlock(thread); + (void) splx(s); + + pset = thread->processor_set; + pset_lock(pset); + +#if MACH_HOST + /* + * The thread might have moved. + */ + while (pset != thread->processor_set) { + pset_unlock(pset); + pset = thread->processor_set; + pset_lock(pset); + } +#endif /* MACH_HOST */ + + task = thread->task; + task_lock(task); + + s = splsched(); + thread_lock(thread); + + if (--thread->ref_count > 0) { + /* + * Task or processor_set made extra reference. + */ + thread_unlock(thread); + (void) splx(s); + task_unlock(task); + pset_unlock(pset); + return; + } + + /* + * Thread has no references - we can remove it. + */ + + /* + * Remove pending timeouts. + */ + reset_timeout_check(&thread->timer); + + reset_timeout_check(&thread->depress_timer); + thread->depress_priority = -1; + + /* + * Accumulate times for dead threads in task. + */ + thread_read_times(thread, &user_time, &system_time); + time_value64_add(&task->total_user_time, &user_time); + time_value64_add(&task->total_system_time, &system_time); + + /* + * Remove thread from task list and processor_set threads list. + */ + task->thread_count--; + queue_remove(&task->thread_list, thread, thread_t, thread_list); + + pset_remove_thread(pset, thread); + + thread_unlock(thread); /* no more references - safe */ + (void) splx(s); + task_unlock(task); + pset_unlock(pset); + pset_deallocate(pset); + + /* + * A couple of quick sanity checks + */ + + if (thread == current_thread()) { + panic("thread deallocating itself"); + } + if ((thread->state & ~(TH_RUN | TH_HALTED | TH_SWAPPED)) != TH_SUSP) + panic("unstopped thread destroyed!"); + + /* + * Deallocate the task reference, since we know the thread + * is not running. + */ + task_deallocate(thread->task); /* may block */ + + /* + * Clean up any machine-dependent resources. + */ + if ((thread->state & TH_SWAPPED) == 0) { + splsched(); + stack_free(thread); + (void) splx(s); + thread_deallocate_stack++; + } + /* + * Rattle the event count machinery (gag) + */ + evc_notify_abort(thread); + + pcb_terminate(thread); + kmem_cache_free(&thread_cache, (vm_offset_t) thread); +} + +void thread_reference( + thread_t thread) +{ + spl_t s; + + if (thread == THREAD_NULL) + return; + + s = splsched(); + thread_lock(thread); + thread->ref_count++; + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_terminate: + * + * Permanently stop execution of the specified thread. + * + * A thread to be terminated must be allowed to clean up any state + * that it has before it exits. The thread is broken out of any + * wait condition that it is in, and signalled to exit. It then + * cleans up its state and calls thread_halt_self on its way out of + * the kernel. The caller waits for the thread to halt, terminates + * its IPC state, and then deallocates it. + * + * If the caller is the current thread, it must still exit the kernel + * to clean up any state (thread and port references, messages, etc). + * When it exits the kernel, it then terminates its IPC state and + * queues itself for the reaper thread, which will wait for the thread + * to stop and then deallocate it. (A thread cannot deallocate itself, + * since it needs a kernel stack to execute.) + */ +kern_return_t thread_terminate( + thread_t thread) +{ + thread_t cur_thread = current_thread(); + task_t cur_task; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Break IPC control over the thread. + */ + ipc_thread_disable(thread); + + if (thread == cur_thread) { + + /* + * Current thread will queue itself for reaper when + * exiting kernel. + */ + s = splsched(); + thread_lock(thread); + if (thread->active) { + thread->active = FALSE; + thread_ast_set(thread, AST_TERMINATE); + } + thread_unlock(thread); + ast_on(cpu_number(), AST_TERMINATE); + splx(s); + return KERN_SUCCESS; + } + + /* + * Lock both threads and the current task + * to check termination races and prevent deadlocks. + */ + cur_task = current_task(); + task_lock(cur_task); + s = splsched(); + if ((vm_offset_t)thread < (vm_offset_t)cur_thread) { + thread_lock(thread); + thread_lock(cur_thread); + } + else { + thread_lock(cur_thread); + thread_lock(thread); + } + + /* + * If the current thread is being terminated, help out. + */ + if ((!cur_task->active) || (!cur_thread->active)) { + thread_unlock(cur_thread); + thread_unlock(thread); + (void) splx(s); + task_unlock(cur_task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + + thread_unlock(cur_thread); + task_unlock(cur_task); + + /* + * Terminate victim thread. + */ + if (!thread->active) { + /* + * Someone else got there first. + */ + thread_unlock(thread); + (void) splx(s); + return KERN_FAILURE; + } + + thread->active = FALSE; + + thread_unlock(thread); + (void) splx(s); + +#if MACH_HOST + /* + * Reassign thread to default pset if needed. + */ + thread_freeze(thread); + if (thread->processor_set != &default_pset) + thread_doassign(thread, &default_pset, FALSE); +#endif /* MACH_HOST */ + + /* + * Halt the victim at the clean point. + */ + (void) thread_halt(thread, TRUE); +#if MACH_HOST + thread_unfreeze(thread); +#endif /* MACH_HOST */ + /* + * Shut down the victims IPC and deallocate its + * reference to itself. + */ + ipc_thread_terminate(thread); + thread_deallocate(thread); + return KERN_SUCCESS; +} + +kern_return_t thread_terminate_release( + thread_t thread, + task_t task, + mach_port_name_t thread_name, + mach_port_name_t reply_port, + vm_offset_t address, + vm_size_t size) +{ + if (task == NULL) + return KERN_INVALID_ARGUMENT; + + if (thread == NULL) + return KERN_INVALID_ARGUMENT; + + mach_port_deallocate(task->itk_space, thread_name); + + if (reply_port != MACH_PORT_NULL) + mach_port_destroy(task->itk_space, reply_port); + + if ((address != 0) || (size != 0)) + vm_deallocate(task->map, address, size); + + return thread_terminate(thread); +} + +/* + * thread_force_terminate: + * + * Version of thread_terminate called by task_terminate. thread is + * not the current thread. task_terminate is the dominant operation, + * so we can force this thread to stop. + */ +void +thread_force_terminate( + thread_t thread) +{ + boolean_t deallocate_here; + spl_t s; + + ipc_thread_disable(thread); + +#if MACH_HOST + /* + * Reassign thread to default pset if needed. + */ + thread_freeze(thread); + if (thread->processor_set != &default_pset) + thread_doassign(thread, &default_pset, FALSE); +#endif /* MACH_HOST */ + + s = splsched(); + thread_lock(thread); + deallocate_here = thread->active; + thread->active = FALSE; + thread_unlock(thread); + (void) splx(s); + + (void) thread_halt(thread, TRUE); + ipc_thread_terminate(thread); + +#if MACH_HOST + thread_unfreeze(thread); +#endif /* MACH_HOST */ + + if (deallocate_here) + thread_deallocate(thread); +} + + +/* + * Halt a thread at a clean point, leaving it suspended. + * + * must_halt indicates whether thread must halt. + * + */ +kern_return_t thread_halt( + thread_t thread, + boolean_t must_halt) +{ + thread_t cur_thread = current_thread(); + kern_return_t ret; + spl_t s; + + if (thread == cur_thread) + panic("thread_halt: trying to halt current thread."); + /* + * If must_halt is FALSE, then a check must be made for + * a cycle of halt operations. + */ + if (!must_halt) { + /* + * Grab both thread locks. + */ + s = splsched(); + if ((vm_offset_t)thread < (vm_offset_t)cur_thread) { + thread_lock(thread); + thread_lock(cur_thread); + } + else { + thread_lock(cur_thread); + thread_lock(thread); + } + + /* + * If target thread is already halted, grab a hold + * on it and return. + */ + if (thread->state & TH_HALTED) { + thread->suspend_count++; + thread_unlock(cur_thread); + thread_unlock(thread); + (void) splx(s); + return KERN_SUCCESS; + } + + /* + * If someone is trying to halt us, we have a potential + * halt cycle. Break the cycle by interrupting anyone + * who is trying to halt us, and causing this operation + * to fail; retry logic will only retry operations + * that cannot deadlock. (If must_halt is TRUE, this + * operation can never cause a deadlock.) + */ + if (cur_thread->ast & AST_HALT) { + thread_wakeup_with_result(TH_EV_WAKE_ACTIVE(cur_thread), + THREAD_INTERRUPTED); + thread_unlock(thread); + thread_unlock(cur_thread); + (void) splx(s); + return KERN_FAILURE; + } + + thread_unlock(cur_thread); + + } + else { + /* + * Lock thread and check whether it is already halted. + */ + s = splsched(); + thread_lock(thread); + if (thread->state & TH_HALTED) { + thread->suspend_count++; + thread_unlock(thread); + (void) splx(s); + return KERN_SUCCESS; + } + } + + /* + * Suspend thread - inline version of thread_hold() because + * thread is already locked. + */ + thread->suspend_count++; + thread->state |= TH_SUSP; + + /* + * If someone else is halting it, wait for that to complete. + * Fail if wait interrupted and must_halt is false. + */ + while ((thread->ast & AST_HALT) && (!(thread->state & TH_HALTED))) { + thread->wake_active = TRUE; + thread_sleep(TH_EV_WAKE_ACTIVE(thread), + simple_lock_addr(thread->lock), TRUE); + + if (thread->state & TH_HALTED) { + (void) splx(s); + return KERN_SUCCESS; + } + if ((current_thread()->wait_result != THREAD_AWAKENED) + && !(must_halt)) { + (void) splx(s); + thread_release(thread); + return KERN_FAILURE; + } + thread_lock(thread); + } + + /* + * Otherwise, have to do it ourselves. + */ + + thread_ast_set(thread, AST_HALT); + + while (TRUE) { + /* + * Wait for thread to stop. + */ + thread_unlock(thread); + (void) splx(s); + + ret = thread_dowait(thread, must_halt); + + /* + * If the dowait failed, so do we. Drop AST_HALT, and + * wake up anyone else who might be waiting for it. + */ + if (ret != KERN_SUCCESS) { + s = splsched(); + thread_lock(thread); + thread_ast_clear(thread, AST_HALT); + thread_wakeup_with_result(TH_EV_WAKE_ACTIVE(thread), + THREAD_INTERRUPTED); + thread_unlock(thread); + (void) splx(s); + + thread_release(thread); + return ret; + } + + /* + * Clear any interruptible wait. + */ + clear_wait(thread, THREAD_INTERRUPTED, TRUE); + + /* + * If the thread's at a clean point, we're done. + * Don't need a lock because it really is stopped. + */ + if (thread->state & TH_HALTED) + return KERN_SUCCESS; + + /* + * If the thread is at a nice continuation, + * or a continuation with a cleanup routine, + * call the cleanup routine. + */ + if ((((thread->swap_func == mach_msg_continue) || + (thread->swap_func == mach_msg_receive_continue)) && + mach_msg_interrupt(thread)) || + (thread->swap_func == thread_exception_return) || + (thread->swap_func == thread_bootstrap_return)) { + s = splsched(); + thread_lock(thread); + thread->state |= TH_HALTED; + thread_ast_clear(thread, AST_HALT); + thread_unlock(thread); + splx(s); + + return KERN_SUCCESS; + } + + /* + * Force the thread to stop at a clean + * point, and arrange to wait for it. + * + * Set it running, so it can notice. Override + * the suspend count. We know that the thread + * is suspended and not waiting. + * + * Since the thread may hit an interruptible wait + * before it reaches a clean point, we must force it + * to wake us up when it does so. This involves some + * trickery: + * We mark the thread SUSPENDED so that thread_block + * will suspend it and wake us up. + * We mark the thread RUNNING so that it will run. + * We mark the thread UN-INTERRUPTIBLE (!) so that + * some other thread trying to halt or suspend it won't + * take it off the run queue before it runs. Since + * dispatching a thread (the tail of thread_invoke) marks + * the thread interruptible, it will stop at the next + * context switch or interruptible wait. + */ + + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_SCHED_STATE) != TH_SUSP) + panic("thread_halt"); + thread->state |= TH_RUN | TH_UNINT; + thread_setrun(thread, FALSE); + + /* + * Continue loop and wait for thread to stop. + */ + } +} + +static void __attribute__((noreturn)) walking_zombie(void) +{ + panic("the zombie walks!"); +} + +/* + * Thread calls this routine on exit from the kernel when it + * notices a halt request. + */ +void thread_halt_self(continuation_t continuation) +{ + thread_t thread = current_thread(); + spl_t s; + + if (thread->ast & AST_TERMINATE) { + /* + * Thread is terminating itself. Shut + * down IPC, then queue it up for the + * reaper thread. + */ + ipc_thread_terminate(thread); + + thread_hold(thread); + + s = splsched(); + simple_lock(&reaper_lock); + enqueue_tail(&reaper_queue, &(thread->links)); + simple_unlock(&reaper_lock); + + thread_lock(thread); + thread->state |= TH_HALTED; + thread_unlock(thread); + (void) splx(s); + + thread_wakeup((event_t)&reaper_queue); + counter(c_thread_halt_self_block++); + thread_block(walking_zombie); + /*NOTREACHED*/ + } else { + /* + * Thread was asked to halt - show that it + * has done so. + */ + s = splsched(); + thread_lock(thread); + thread->state |= TH_HALTED; + thread_ast_clear(thread, AST_HALT); + thread_unlock(thread); + splx(s); + counter(c_thread_halt_self_block++); + thread_block(continuation); + /* + * thread_release resets TH_HALTED. + */ + } +} + +/* + * thread_hold: + * + * Suspend execution of the specified thread. + * This is a recursive-style suspension of the thread, a count of + * suspends is maintained. + */ +void thread_hold( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->suspend_count++; + thread->state |= TH_SUSP; + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_dowait: + * + * Wait for a thread to actually enter stopped state. + * + * must_halt argument indicates if this may fail on interruption. + * This is FALSE only if called from thread_abort via thread_halt. + */ +kern_return_t +thread_dowait( + thread_t thread, + boolean_t must_halt) +{ + boolean_t need_wakeup; + kern_return_t ret = KERN_SUCCESS; + spl_t s; + + if (thread == current_thread()) + panic("thread_dowait"); + + /* + * If a thread is not interruptible, it may not be suspended + * until it becomes interruptible. In this case, we wait for + * the thread to stop itself, and indicate that we are waiting + * for it to stop so that it can wake us up when it does stop. + * + * If the thread is interruptible, we may be able to suspend + * it immediately. There are several cases: + * + * 1) The thread is already stopped (trivial) + * 2) The thread is runnable (marked RUN and on a run queue). + * We pull it off the run queue and mark it stopped. + * 3) The thread is running. We wait for it to stop. + */ + + need_wakeup = FALSE; + s = splsched(); + thread_lock(thread); + + for (;;) { + switch (thread->state & TH_SCHED_STATE) { + case TH_SUSP: + case TH_WAIT | TH_SUSP: + /* + * Thread is already suspended, or sleeping in an + * interruptible wait. We win! + */ + break; + + case TH_RUN | TH_SUSP: + /* + * The thread is interruptible. If we can pull + * it off a runq, stop it here. + */ + if (rem_runq(thread) != RUN_QUEUE_NULL) { + thread->state &= ~TH_RUN; + need_wakeup = thread->wake_active; + thread->wake_active = FALSE; + break; + } +#if NCPUS > 1 + /* + * The thread must be running, so make its + * processor execute ast_check(). This + * should cause the thread to take an ast and + * context switch to suspend for us. + */ + cause_ast_check(thread->last_processor); +#endif /* NCPUS > 1 */ + + /* + * Fall through to wait for thread to stop. + */ + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Wait for the thread to stop, or sleep interruptibly + * (thread_block will stop it in the latter case). + * Check for failure if interrupted. + */ + thread->wake_active = TRUE; + thread_sleep(TH_EV_WAKE_ACTIVE(thread), + simple_lock_addr(thread->lock), TRUE); + thread_lock(thread); + if ((current_thread()->wait_result != THREAD_AWAKENED) && + !must_halt) { + ret = KERN_FAILURE; + break; + } + + /* + * Repeat loop to check thread`s state. + */ + continue; + } + /* + * Thread is stopped at this point. + */ + break; + } + + thread_unlock(thread); + (void) splx(s); + + if (need_wakeup) + thread_wakeup(TH_EV_WAKE_ACTIVE(thread)); + + return ret; +} + +void thread_release( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + if (--thread->suspend_count == 0) { + thread->state &= ~(TH_SUSP | TH_HALTED); + if ((thread->state & (TH_WAIT | TH_RUN)) == 0) { + /* was only suspended */ + thread->state |= TH_RUN; + thread_setrun(thread, TRUE); + } + } + thread_unlock(thread); + (void) splx(s); +} + +kern_return_t thread_suspend( + thread_t thread) +{ + boolean_t hold; + spl_t spl; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + hold = FALSE; + spl = splsched(); + thread_lock(thread); + /* Wait for thread to get interruptible */ + while (thread->state & TH_UNINT) { + assert_wait(TH_EV_STATE(thread), TRUE); + thread_unlock(thread); + thread_block(thread_no_continuation); + thread_lock(thread); + } + if (thread->user_stop_count++ == 0) { + hold = TRUE; + thread->suspend_count++; + thread->state |= TH_SUSP; + } + thread_unlock(thread); + (void) splx(spl); + + /* + * Now wait for the thread if necessary. + */ + if (hold) { + if (thread == current_thread()) { + /* + * We want to call thread_block on our way out, + * to stop running. + */ + spl = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(spl); + } else + (void) thread_dowait(thread, TRUE); + } + return KERN_SUCCESS; +} + + +kern_return_t thread_resume( + thread_t thread) +{ + kern_return_t ret; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + ret = KERN_SUCCESS; + + s = splsched(); + thread_lock(thread); + if (thread->user_stop_count > 0) { + if (--thread->user_stop_count == 0) { + if (--thread->suspend_count == 0) { + thread->state &= ~(TH_SUSP | TH_HALTED); + if ((thread->state & (TH_WAIT | TH_RUN)) == 0) { + /* was only suspended */ + thread->state |= TH_RUN; + thread_setrun(thread, TRUE); + } + } + } + } + else { + ret = KERN_FAILURE; + } + + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * Return thread's machine-dependent state. + */ +kern_return_t thread_get_state( + thread_t thread, + int flavor, + thread_state_t old_state, /* pointer to OUT array */ + natural_t *old_state_count) /*IN/OUT*/ +{ + kern_return_t ret; + +#if defined(__i386__) || defined(__x86_64__) + if (flavor == i386_DEBUG_STATE && thread == current_thread()) + /* This state can be obtained directly for the curren thread. */ + return thread_getstatus(thread, flavor, old_state, old_state_count); +#endif + + if (thread == THREAD_NULL || thread == current_thread()) + return KERN_INVALID_ARGUMENT; + + thread_hold(thread); + (void) thread_dowait(thread, TRUE); + + ret = thread_getstatus(thread, flavor, old_state, old_state_count); + + thread_release(thread); + return ret; +} + +/* + * Change thread's machine-dependent state. + */ +kern_return_t thread_set_state( + thread_t thread, + int flavor, + thread_state_t new_state, + natural_t new_state_count) +{ + kern_return_t ret; + +#if defined(__i386__) || defined(__x86_64__) + if (flavor == i386_DEBUG_STATE && thread == current_thread()) + /* This state can be set directly for the curren thread. */ + return thread_setstatus(thread, flavor, new_state, new_state_count); + if (flavor == i386_FSGS_BASE_STATE && thread == current_thread()) + /* This state can be set directly for the curren thread. */ + return thread_setstatus(thread, flavor, new_state, new_state_count); +#endif + + if (thread == THREAD_NULL || thread == current_thread()) + return KERN_INVALID_ARGUMENT; + + thread_hold(thread); + (void) thread_dowait(thread, TRUE); + + ret = thread_setstatus(thread, flavor, new_state, new_state_count); + + thread_release(thread); + return ret; +} + +kern_return_t thread_info( + thread_t thread, + int flavor, + thread_info_t thread_info_out, /* pointer to OUT array */ + natural_t *thread_info_count) /*IN/OUT*/ +{ + int state, flags; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor == THREAD_BASIC_INFO) { + thread_basic_info_t basic_info; + + /* Allow *thread_info_count to be smaller than the provided amount + * that does not contain the new time_value64_t fields as some + * callers might not know about them yet. */ + + if (*thread_info_count < + THREAD_BASIC_INFO_COUNT - 3 * sizeof(time_value64_t)/sizeof(natural_t)) + return KERN_INVALID_ARGUMENT; + + basic_info = (thread_basic_info_t) thread_info_out; + + s = splsched(); + thread_lock(thread); + + /* + * Update lazy-evaluated scheduler info because someone wants it. + */ + if ((thread->state & TH_RUN) == 0 && + thread->sched_stamp != sched_tick) + update_priority(thread); + + /* fill in info */ + + time_value64_t user_time, system_time; + thread_read_times(thread, &user_time, &system_time); + TIME_VALUE64_TO_TIME_VALUE(&user_time, &basic_info->user_time); + TIME_VALUE64_TO_TIME_VALUE(&system_time, &basic_info->system_time); + + basic_info->base_priority = thread->priority; + basic_info->cur_priority = thread->sched_pri; + time_value64_t creation_time; + read_time_stamp(&thread->creation_time, &creation_time); + TIME_VALUE64_TO_TIME_VALUE(&creation_time, &basic_info->creation_time); + + if (*thread_info_count == THREAD_BASIC_INFO_COUNT) { + /* Copy new time_value64_t fields */ + basic_info->user_time64 = user_time; + basic_info->system_time64 = user_time; + basic_info->creation_time64 = creation_time; + } + + /* + * To calculate cpu_usage, first correct for timer rate, + * then for 5/8 ageing. The correction factor [3/5] is + * (1/(5/8) - 1). + */ + basic_info->cpu_usage = thread->cpu_usage / + (TIMER_RATE/TH_USAGE_SCALE); + basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; + + flags = 0; + if (thread->state & TH_SWAPPED) + flags |= TH_FLAGS_SWAPPED; + if (thread->state & TH_IDLE) + flags |= TH_FLAGS_IDLE; + + if (thread->state & TH_HALTED) + state = TH_STATE_HALTED; + else + if (thread->state & TH_RUN) + state = TH_STATE_RUNNING; + else + if (thread->state & TH_UNINT) + state = TH_STATE_UNINTERRUPTIBLE; + else + if (thread->state & TH_SUSP) + state = TH_STATE_STOPPED; + else + if (thread->state & TH_WAIT) + state = TH_STATE_WAITING; + else + state = 0; /* ? */ + + basic_info->run_state = state; + basic_info->flags = flags; + basic_info->suspend_count = thread->user_stop_count; + if (state == TH_STATE_RUNNING) + basic_info->sleep_time = 0; + else + basic_info->sleep_time = sched_tick - thread->sched_stamp; + + thread_unlock(thread); + splx(s); + + if (*thread_info_count > THREAD_BASIC_INFO_COUNT) + *thread_info_count = THREAD_BASIC_INFO_COUNT; + return KERN_SUCCESS; + } + else if (flavor == THREAD_SCHED_INFO) { + thread_sched_info_t sched_info; + + /* Allow *thread_info_count to be one smaller than the + usual amount, because last_processor is a + new member that some callers might not know about. */ + if (*thread_info_count < THREAD_SCHED_INFO_COUNT -1) + return KERN_INVALID_ARGUMENT; + + sched_info = (thread_sched_info_t) thread_info_out; + + s = splsched(); + thread_lock(thread); + +#if MACH_FIXPRI + sched_info->policy = thread->policy; + if (thread->policy == POLICY_FIXEDPRI) + sched_info->data = (thread->sched_data * tick)/1000; + else + sched_info->data = 0; + +#else /* MACH_FIXPRI */ + sched_info->policy = POLICY_TIMESHARE; + sched_info->data = 0; +#endif /* MACH_FIXPRI */ + + sched_info->base_priority = thread->priority; + sched_info->max_priority = thread->max_priority; + sched_info->cur_priority = thread->sched_pri; + + sched_info->depressed = (thread->depress_priority >= 0); + sched_info->depress_priority = thread->depress_priority; + +#if NCPUS > 1 + if (thread->last_processor) + sched_info->last_processor = thread->last_processor->slot_num; + else +#endif + sched_info->last_processor = 0; + + thread_unlock(thread); + splx(s); + + *thread_info_count = THREAD_SCHED_INFO_COUNT; + return KERN_SUCCESS; + } + + return KERN_INVALID_ARGUMENT; +} + +kern_return_t thread_abort( + thread_t thread) +{ + if (thread == THREAD_NULL || thread == current_thread()) { + return KERN_INVALID_ARGUMENT; + } + + /* + * + * clear it of an event wait + */ + + evc_notify_abort(thread); + + /* + * Try to force the thread to a clean point + * If the halt operation fails return KERN_ABORTED. + * ipc code will convert this to an ipc interrupted error code. + */ + if (thread_halt(thread, FALSE) != KERN_SUCCESS) + return KERN_ABORTED; + + /* + * If the thread was in an exception, abort that too. + */ + mach_msg_abort_rpc(thread); + + /* + * Then set it going again. + */ + thread_release(thread); + + /* + * Also abort any depression. + */ + if (thread->depress_priority != -1) + thread_depress_abort(thread); + + return KERN_SUCCESS; +} + +/* + * thread_start: + * + * Start a thread at the specified routine. + * The thread must be in a swapped state. + */ + +void +thread_start( + thread_t thread, + continuation_t start) +{ + thread->swap_func = start; +} + +/* + * kernel_thread: + * + * Start up a kernel thread in the specified task. + */ + +thread_t kernel_thread( + task_t task, + continuation_t start, + void * arg) +{ + kern_return_t kr; + thread_t thread; + + kr = thread_create(task, &thread); + if (kr != KERN_SUCCESS) + return THREAD_NULL; + + /* release "extra" ref that thread_create gave us */ + thread_deallocate(thread); + thread_start(thread, start); + thread->ith_other = arg; + + /* + * We ensure that the kernel thread starts with a stack. + * The swapin mechanism might not be operational yet. + */ + thread_doswapin(thread); + thread->max_priority = BASEPRI_SYSTEM; + thread->priority = BASEPRI_SYSTEM; + thread->sched_pri = BASEPRI_SYSTEM; + (void) thread_resume(thread); + return thread; +} + +/* + * reaper_thread: + * + * This kernel thread runs forever looking for threads to destroy + * (when they request that they be destroyed, of course). + */ +static void __attribute__((noreturn)) reaper_thread_continue(void) +{ + for (;;) { + thread_t thread; + spl_t s; + + s = splsched(); + simple_lock(&reaper_lock); + + while ((thread = (thread_t) dequeue_head(&reaper_queue)) + != THREAD_NULL) { + simple_unlock(&reaper_lock); + (void) splx(s); + + (void) thread_dowait(thread, TRUE); /* may block */ + thread_deallocate(thread); /* may block */ + + s = splsched(); + simple_lock(&reaper_lock); + } + + assert_wait((event_t) &reaper_queue, FALSE); + simple_unlock(&reaper_lock); + (void) splx(s); + counter(c_reaper_thread_block++); + thread_block(reaper_thread_continue); + } +} + +void reaper_thread(void) +{ + reaper_thread_continue(); + /*NOTREACHED*/ +} + +#if MACH_HOST +/* + * thread_assign: + * + * Change processor set assignment. + * Caller must hold an extra reference to the thread (if this is + * called directly from the ipc interface, this is an operation + * in progress reference). Caller must hold no locks -- this may block. + */ + +kern_return_t +thread_assign(thread_t thread, + processor_set_t new_pset) +{ + if (thread == THREAD_NULL || new_pset == PROCESSOR_SET_NULL) { + return KERN_INVALID_ARGUMENT; + } + + thread_freeze(thread); + thread_doassign(thread, new_pset, TRUE); + + return KERN_SUCCESS; +} + +/* + * thread_freeze: + * + * Freeze thread's assignment. Prelude to assigning thread. + * Only one freeze may be held per thread. + */ +void +thread_freeze(thread_t thread) +{ + spl_t s; + /* + * Freeze the assignment, deferring to a prior freeze. + */ + s = splsched(); + thread_lock(thread); + while (thread->may_assign == FALSE) { + thread->assign_active = TRUE; + thread_sleep((event_t) &thread->assign_active, + simple_lock_addr(thread->lock), FALSE); + thread_lock(thread); + } + thread->may_assign = FALSE; + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_unfreeze: release freeze on thread's assignment. + */ +void +thread_unfreeze( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->may_assign = TRUE; + if (thread->assign_active) { + thread->assign_active = FALSE; + thread_wakeup((event_t)&thread->assign_active); + } + thread_unlock(thread); + splx(s); +} + +/* + * thread_doassign: + * + * Actually do thread assignment. thread_will_assign must have been + * called on the thread. release_freeze argument indicates whether + * to release freeze on thread. + */ + +void +thread_doassign( + thread_t thread, + processor_set_t new_pset, + boolean_t release_freeze) +{ + processor_set_t pset; + boolean_t old_empty, new_empty; + boolean_t recompute_pri = FALSE; + spl_t s; + + /* + * Check for silly no-op. + */ + pset = thread->processor_set; + if (pset == new_pset) { + if (release_freeze) + thread_unfreeze(thread); + return; + } + /* + * Suspend the thread and stop it if it's not the current thread. + */ + thread_hold(thread); + if (thread != current_thread()) + (void) thread_dowait(thread, TRUE); + + /* + * Lock both psets now, use ordering to avoid deadlocks. + */ +Restart: + if ((vm_offset_t)pset < (vm_offset_t)new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + + /* + * Check if new_pset is ok to assign to. If not, reassign + * to default_pset. + */ + if (!new_pset->active) { + pset_unlock(pset); + pset_unlock(new_pset); + new_pset = &default_pset; + goto Restart; + } + + pset_reference(new_pset); + + /* + * Grab the thread lock and move the thread. + * Then drop the lock on the old pset and the thread's + * reference to it. + */ + s = splsched(); + thread_lock(thread); + + thread_change_psets(thread, pset, new_pset); + + old_empty = pset->empty; + new_empty = new_pset->empty; + + pset_unlock(pset); + + /* + * Reset policy and priorities if needed. + */ +#if MACH_FIXPRI + if ((thread->policy & new_pset->policies) == 0) { + thread->policy = POLICY_TIMESHARE; + recompute_pri = TRUE; + } +#endif /* MACH_FIXPRI */ + + if (thread->max_priority < new_pset->max_priority) { + thread->max_priority = new_pset->max_priority; + if (thread->priority < thread->max_priority) { + thread->priority = thread->max_priority; + recompute_pri = TRUE; + } + else { + if ((thread->depress_priority >= 0) && + (thread->depress_priority < thread->max_priority)) { + thread->depress_priority = thread->max_priority; + } + } + } + + pset_unlock(new_pset); + + if (recompute_pri) + compute_priority(thread, TRUE); + + if (release_freeze) { + thread->may_assign = TRUE; + if (thread->assign_active) { + thread->assign_active = FALSE; + thread_wakeup((event_t)&thread->assign_active); + } + } + + thread_unlock(thread); + splx(s); + + pset_deallocate(pset); + + /* + * Figure out hold status of thread. Threads assigned to empty + * psets must be held. Therefore: + * If old pset was empty release its hold. + * Release our hold from above unless new pset is empty. + */ + + if (old_empty) + thread_release(thread); + if (!new_empty) + thread_release(thread); + + /* + * If current_thread is assigned, context switch to force + * assignment to happen. This also causes hold to take + * effect if the new pset is empty. + */ + if (thread == current_thread()) { + s = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(s); + } +} +#else /* MACH_HOST */ +kern_return_t +thread_assign( + thread_t thread, + processor_set_t new_pset) +{ + return KERN_FAILURE; +} +#endif /* MACH_HOST */ + +/* + * thread_assign_default: + * + * Special version of thread_assign for assigning threads to default + * processor set. + */ +kern_return_t +thread_assign_default( + thread_t thread) +{ + return thread_assign(thread, &default_pset); +} + +/* + * thread_get_assignment + * + * Return current assignment for this thread. + */ +kern_return_t thread_get_assignment( + thread_t thread, + processor_set_t *pset) +{ + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + *pset = thread->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * thread_priority: + * + * Set priority (and possibly max priority) for thread. + */ +kern_return_t +thread_priority( + thread_t thread, + int priority, + boolean_t set_max) +{ + spl_t s; + kern_return_t ret = KERN_SUCCESS; + + if ((thread == THREAD_NULL) || invalid_pri(priority)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + + /* + * Check for violation of max priority + */ + if (priority < thread->max_priority) + ret = KERN_FAILURE; + else { + /* + * Set priorities. If a depression is in progress, + * change the priority to restore. + */ + if (thread->depress_priority >= 0) + thread->depress_priority = priority; + + else { + thread->priority = priority; + compute_priority(thread, TRUE); + } + + if (set_max) + thread->max_priority = priority; + } + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * thread_set_own_priority: + * + * Internal use only; sets the priority of the calling thread. + * Will adjust max_priority if necessary. + */ +void +thread_set_own_priority( + int priority) +{ + spl_t s; + thread_t thread = current_thread(); + + s = splsched(); + thread_lock(thread); + + if (priority < thread->max_priority) + thread->max_priority = priority; + thread->priority = priority; + compute_priority(thread, TRUE); + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_max_priority: + * + * Reset the max priority for a thread. + */ +kern_return_t +thread_max_priority( + thread_t thread, + processor_set_t pset, + int max_priority) +{ + spl_t s; + kern_return_t ret = KERN_SUCCESS; + + if ((thread == THREAD_NULL) || (pset == PROCESSOR_SET_NULL) || + invalid_pri(max_priority)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + +#if MACH_HOST + /* + * Check for wrong processor set. + */ + if (pset != thread->processor_set) + ret = KERN_FAILURE; + + else { +#endif /* MACH_HOST */ + thread->max_priority = max_priority; + + /* + * Reset priority if it violates new max priority + */ + if (max_priority > thread->priority) { + thread->priority = max_priority; + + compute_priority(thread, TRUE); + } + else { + if (thread->depress_priority >= 0 && + max_priority > thread->depress_priority) + thread->depress_priority = max_priority; + } +#if MACH_HOST + } +#endif /* MACH_HOST */ + + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * thread_policy: + * + * Set scheduling policy for thread. + */ +kern_return_t +thread_policy( + thread_t thread, + int policy, + int data) +{ +#if MACH_FIXPRI + kern_return_t ret = KERN_SUCCESS; + int temp; + spl_t s; +#endif /* MACH_FIXPRI */ + + if ((thread == THREAD_NULL) || invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + s = splsched(); + thread_lock(thread); + + /* + * Check if changing policy. + */ + if (policy == thread->policy) { + /* + * Just changing data. This is meaningless for + * timesharing, quantum for fixed priority (but + * has no effect until current quantum runs out). + */ + if (policy == POLICY_FIXEDPRI) { + temp = data * 1000; + if (temp % tick) + temp += tick; + thread->sched_data = temp/tick; + } + } + else { + /* + * Changing policy. Check if new policy is allowed. + */ + if ((thread->processor_set->policies & policy) == 0) + ret = KERN_FAILURE; + else { + /* + * Changing policy. Save data and calculate new + * priority. + */ + thread->policy = policy; + if (policy == POLICY_FIXEDPRI) { + temp = data * 1000; + if (temp % tick) + temp += tick; + thread->sched_data = temp/tick; + } + compute_priority(thread, TRUE); + } + } + thread_unlock(thread); + (void) splx(s); + + return ret; +#else /* MACH_FIXPRI */ + if (policy == POLICY_TIMESHARE) + return KERN_SUCCESS; + else + return KERN_FAILURE; +#endif /* MACH_FIXPRI */ +} + +/* + * thread_wire: + * + * Specify that the target thread must always be able + * to run and to allocate memory. + */ +kern_return_t +thread_wire( + host_t host, + thread_t thread, + boolean_t wired) +{ + spl_t s; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * This implementation only works for the current thread. + * See stack_privilege. + */ + if (thread != current_thread()) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + + if (wired) { + thread->vm_privilege = 1; + stack_privilege(thread); + } + else { + thread->vm_privilege = 0; +/*XXX stack_unprivilege(thread); */ + thread->stack_privilege = 0; + } + + thread_unlock(thread); + splx(s); + + return KERN_SUCCESS; +} + +/* + * thread_collect_scan: + * + * Attempt to free resources owned by threads. + * pcb_collect doesn't do anything yet. + */ + +static void thread_collect_scan(void) +{ + thread_t thread, prev_thread; + processor_set_t pset, prev_pset; + + prev_thread = THREAD_NULL; + prev_pset = PROCESSOR_SET_NULL; + + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + pset_lock(pset); + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + spl_t s = splsched(); + thread_lock(thread); + + /* + * Only collect threads which are + * not runnable and are swapped. + */ + + if ((thread->state & (TH_RUN|TH_SWAPPED)) + == TH_SWAPPED) { + thread->ref_count++; + thread_unlock(thread); + (void) splx(s); + pset->ref_count++; + pset_unlock(pset); + simple_unlock(&all_psets_lock); + + pcb_collect(thread); + + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + prev_thread = thread; + + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); + prev_pset = pset; + + simple_lock(&all_psets_lock); + pset_lock(pset); + } else { + thread_unlock(thread); + (void) splx(s); + } + } + pset_unlock(pset); + } + simple_unlock(&all_psets_lock); + + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); +} + +boolean_t thread_collect_allowed = TRUE; +unsigned thread_collect_last_tick = 0; +unsigned thread_collect_max_rate = 0; /* in ticks */ + +/* + * consider_thread_collect: + * + * Called by the pageout daemon when the system needs more free pages. + */ + +void consider_thread_collect(void) +{ + /* + * By default, don't attempt thread collection more frequently + * than once a second. + */ + + if (thread_collect_max_rate == 0) + thread_collect_max_rate = hz; + + if (thread_collect_allowed && + (sched_tick > + (thread_collect_last_tick + + thread_collect_max_rate / (hz / 1)))) { + thread_collect_last_tick = sched_tick; + thread_collect_scan(); + } +} + +#if MACH_DEBUG + +static vm_size_t stack_usage(vm_offset_t stack) +{ + unsigned i; + + for (i = 0; i < KERNEL_STACK_SIZE/sizeof(unsigned int); i++) + if (((unsigned int *)stack)[i] != STACK_MARKER) + break; + + return KERNEL_STACK_SIZE - i * sizeof(unsigned int); +} + +/* + * Machine-dependent code should call stack_init + * before doing its own initialization of the stack. + */ + +void stack_init( + vm_offset_t stack) +{ + if (stack_check_usage) { + unsigned i; + + for (i = 0; i < KERNEL_STACK_SIZE/sizeof(unsigned int); i++) + ((unsigned int *)stack)[i] = STACK_MARKER; + } +} + +/* + * Machine-dependent code should call stack_finalize + * before releasing the stack memory. + */ + +void stack_finalize( + vm_offset_t stack) +{ + if (stack_check_usage) { + vm_size_t used = stack_usage(stack); + + simple_lock(&stack_usage_lock); + if (used > stack_max_usage) + stack_max_usage = used; + simple_unlock(&stack_usage_lock); + } +} + +#ifndef MACHINE_STACK +/* + * stack_statistics: + * + * Return statistics on cached kernel stacks. + * *maxusagep must be initialized by the caller. + */ + +static void stack_statistics( + natural_t *totalp, + vm_size_t *maxusagep) +{ + spl_t s; + + s = splsched(); + stack_lock(); + if (stack_check_usage) { + vm_offset_t stack; + + /* + * This is pretty expensive to do at splsched, + * but it only happens when someone makes + * a debugging call, so it should be OK. + */ + + for (stack = stack_free_list; stack != 0; + stack = stack_next(stack)) { + vm_size_t usage = stack_usage(stack); + + if (usage > *maxusagep) + *maxusagep = usage; + } + } + + *totalp = stack_free_count; + stack_unlock(); + (void) splx(s); +} +#endif /* MACHINE_STACK */ + +kern_return_t host_stack_usage( + host_t host, + vm_size_t *reservedp, + unsigned int *totalp, + vm_size_t *spacep, + vm_size_t *residentp, + vm_size_t *maxusagep, + vm_offset_t *maxstackp) +{ + natural_t total; + vm_size_t maxusage; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + simple_lock(&stack_usage_lock); + maxusage = stack_max_usage; + simple_unlock(&stack_usage_lock); + + stack_statistics(&total, &maxusage); + + *reservedp = 0; + *totalp = total; + *spacep = *residentp = total * round_page(KERNEL_STACK_SIZE); + *maxusagep = maxusage; + *maxstackp = 0; + return KERN_SUCCESS; +} + +kern_return_t processor_set_stack_usage( + processor_set_t pset, + unsigned int *totalp, + vm_size_t *spacep, + vm_size_t *residentp, + vm_size_t *maxusagep, + vm_offset_t *maxstackp) +{ + unsigned int total; + vm_size_t maxusage; + vm_offset_t maxstack; + + thread_t *threads; + thread_t tmp_thread; + + unsigned int actual; /* this many things */ + unsigned int i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + return KERN_INVALID_ARGUMENT; + } + + actual = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(thread_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the processor_set is locked & active */ + + threads = (thread_t *) addr; + for (i = 0, tmp_thread = (thread_t) queue_first(&pset->threads); + i < actual; + i++, + tmp_thread = (thread_t) queue_next(&tmp_thread->pset_threads)) { + thread_reference(tmp_thread); + threads[i] = tmp_thread; + } + assert(queue_end(&pset->threads, (queue_entry_t) tmp_thread)); + + /* can unlock processor set now that we have the thread refs */ + pset_unlock(pset); + + /* calculate maxusage and free thread references */ + + total = 0; + maxusage = 0; + maxstack = 0; + for (i = 0; i < actual; i++) { + thread_t thread = threads[i]; + vm_offset_t stack = 0; + + /* + * thread->kernel_stack is only accurate if the + * thread isn't swapped and is not executing. + * + * Of course, we don't have the appropriate locks + * for these shenanigans. + */ + + if ((thread->state & TH_SWAPPED) == 0) { + int cpu; + + stack = thread->kernel_stack; + + for (cpu = 0; cpu < smp_get_numcpus(); cpu++) + if (percpu_array[cpu].active_thread == thread) { + stack = percpu_array[cpu].active_stack; + break; + } + } + + if (stack != 0) { + total++; + + if (stack_check_usage) { + vm_size_t usage = stack_usage(stack); + + if (usage > maxusage) { + maxusage = usage; + maxstack = (vm_offset_t) thread; + } + } + } + + thread_deallocate(thread); + } + + if (size != 0) + kfree(addr, size); + + *totalp = total; + *residentp = *spacep = total * round_page(KERNEL_STACK_SIZE); + *maxusagep = maxusage; + *maxstackp = maxstack; + return KERN_SUCCESS; +} + +/* + * Useful in the debugger: + */ +void +thread_stats(void) +{ + thread_t thread; + int total = 0, rpcreply = 0; + + queue_iterate(&default_pset.threads, thread, thread_t, pset_threads) { + total++; + if (thread->ith_rpc_reply != IP_NULL) + rpcreply++; + } + + printf("%d total threads.\n", total); + printf("%d using rpc_reply.\n", rpcreply); +} +#endif /* MACH_DEBUG */ + +/* + * thread_set_name + * + * Set the name of thread THREAD to NAME. + */ +kern_return_t +thread_set_name( + thread_t thread, + const_kernel_debug_name_t name) +{ + strncpy(thread->name, name, sizeof thread->name - 1); + thread->name[sizeof thread->name - 1] = '\0'; + return KERN_SUCCESS; +} diff --git a/kern/thread.h b/kern/thread.h new file mode 100644 index 0000000..81d3292 --- /dev/null +++ b/kern/thread.h @@ -0,0 +1,437 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: thread.h + * Author: Avadis Tevanian, Jr. + * + * This file contains the structure definitions for threads. + * + */ + +#ifndef _KERN_THREAD_H_ +#define _KERN_THREAD_H_ + +#include <mach/boolean.h> +#include <mach/thread_info.h> +#include <mach/thread_status.h> +#include <mach/machine/vm_types.h> +#include <mach/message.h> +#include <mach/port.h> +#include <mach/vm_prot.h> +#include <kern/ast.h> +#include <kern/mach_clock.h> +#include <kern/queue.h> +#include <kern/pc_sample.h> +#include <kern/processor.h> +#include <kern/sched_prim.h> /* event_t, continuation_t */ +#include <kern/timer.h> +#include <kern/lock.h> +#include <kern/sched.h> +#include <kern/task.h> /* for current_space(), current_map() */ +#include <machine/thread.h> +#include <ipc/ipc_kmsg_queue.h> + +/* + * Thread name buffer size. Use the same size as the task so + * the thread can inherit the task's name. + */ +#define THREAD_NAME_SIZE TASK_NAME_SIZE + +struct thread { + /* Run queues */ + queue_chain_t links; /* current run queue links */ + run_queue_t runq; /* run queue p is on SEE BELOW */ +/* + * NOTE: The runq field in the thread structure has an unusual + * locking protocol. If its value is RUN_QUEUE_NULL, then it is + * locked by the thread_lock, but if its value is something else + * (i.e. a run_queue) then it is locked by that run_queue's lock. + */ + + /* Task information */ + task_t task; /* Task to which I belong */ + queue_chain_t thread_list; /* list of threads in task */ + + /* Flags */ + /* The flags are grouped here, but documented at the original + position. */ + union { + struct { + unsigned state:16; + unsigned wake_active:1; + unsigned active:1; + }; + event_t event_key; +/* These keys can be used with thread_wakeup and friends. */ +#define TH_EV_WAKE_ACTIVE(t) ((event_t) (&(t)->event_key + 0)) +#define TH_EV_STATE(t) ((event_t) (&(t)->event_key + 1)) + }; + + /* Thread bookkeeping */ + queue_chain_t pset_threads; /* list of all threads in proc set*/ + + /* Self-preservation */ + decl_simple_lock_data(,lock) + int ref_count; /* number of references to me */ + + /* Hardware state */ + pcb_t pcb; /* hardware pcb & machine state */ + vm_offset_t kernel_stack; /* accurate only if the thread is + not swapped and not executing */ + vm_offset_t stack_privilege;/* reserved kernel stack */ + + /* Swapping information */ + continuation_t swap_func; /* start here after swapin */ + + /* Blocking information */ + event_t wait_event; /* event we are waiting on */ + int suspend_count; /* internal use only */ + kern_return_t wait_result; /* outcome of wait - + may be examined by this thread + WITHOUT locking */ + /* Defined above */ + /* boolean_t wake_active; someone is waiting for this + thread to become suspended */ + /* int state; Thread state: */ +/* + * Thread states [bits or'ed] + */ +#define TH_WAIT 0x01 /* thread is queued for waiting */ +#define TH_SUSP 0x02 /* thread has been asked to stop */ +#define TH_RUN 0x04 /* thread is running or on runq */ +#define TH_UNINT 0x08 /* thread is waiting uninteruptibly */ +#define TH_HALTED 0x10 /* thread is halted at clean point ? */ + +#define TH_IDLE 0x80 /* thread is an idle thread */ + +#define TH_SCHED_STATE (TH_WAIT|TH_SUSP|TH_RUN|TH_UNINT) + +#define TH_SWAPPED 0x0100 /* thread has no kernel stack */ +#define TH_SW_COMING_IN 0x0200 /* thread is waiting for kernel stack */ + +#define TH_SWAP_STATE (TH_SWAPPED | TH_SW_COMING_IN) + + /* Scheduling information */ + int priority; /* thread's priority */ + int max_priority; /* maximum priority */ + int sched_pri; /* scheduled (computed) priority */ +#if MACH_FIXPRI + int sched_data; /* for use by policy */ + int policy; /* scheduling policy */ +#endif /* MACH_FIXPRI */ + int depress_priority; /* depressed from this priority */ + unsigned int cpu_usage; /* exp. decaying cpu usage [%cpu] */ + unsigned int sched_usage; /* load-weighted cpu usage [sched] */ + unsigned int sched_stamp; /* last time priority was updated */ + + /* VM global variables */ + + vm_offset_t recover; /* page fault recovery (copyin/out) */ + unsigned int vm_privilege; /* Can use reserved memory? + Implemented as a counter */ + + /* User-visible scheduling state */ + int user_stop_count; /* outstanding stops */ + + /* IPC data structures */ + struct thread *ith_next, *ith_prev; + mach_msg_return_t ith_state; + union { + mach_msg_size_t msize; /* max size for recvd msg */ + struct ipc_kmsg *kmsg; /* received message */ + } data; + mach_port_seqno_t ith_seqno; /* seqno of recvd message */ + + /* This queue is used only when destroying messages: + it prevents nasty recursion problems when destroying one message + causes other messages to be destroyed. + This queue should always be empty under normal circumstances. + See ipc_kmsg_destroy() for more details. */ + struct ipc_kmsg_queue ith_messages; + + decl_simple_lock_data(, ith_lock_data) + struct ipc_port *ith_self; /* not a right, doesn't hold ref */ + struct ipc_port *ith_sself; /* a send right */ + struct ipc_port *ith_exception; /* a send right */ + + mach_port_name_t ith_mig_reply; /* reply port for mig */ + struct ipc_port *ith_rpc_reply; /* reply port for kernel RPCs */ + + /* State saved when thread's stack is discarded */ + union { + struct { + mach_msg_user_header_t *msg; + mach_msg_option_t option; + mach_msg_size_t rcv_size; + mach_msg_timeout_t timeout; + mach_port_name_t notify; + struct ipc_object *object; + struct ipc_mqueue *mqueue; + } receive; + struct { + struct ipc_port *port; + int exc; + int code; + long subcode; + } exception; + void *other; /* catch-all for other state */ + } saved; + + /* Timing data structures */ + timer_data_t user_timer; /* user mode timer */ + timer_data_t system_timer; /* system mode timer */ + timer_save_data_t user_timer_save; /* saved user timer value */ + timer_save_data_t system_timer_save; /* saved sys timer val. */ + unsigned int cpu_delta; /* cpu usage since last update */ + unsigned int sched_delta; /* weighted cpu usage since update */ + + /* Creation time stamp */ + time_value64_t creation_time; + + /* Time-outs */ + timer_elt_data_t timer; /* timer for thread */ + timer_elt_data_t depress_timer; /* timer for priority depression */ + + /* Ast/Halt data structures */ + /* Defined above */ + /* boolean_t active; how alive is the thread */ + int ast; /* ast's needed. See ast.h */ + + /* Processor data structures */ + processor_set_t processor_set; /* assigned processor set */ + processor_t bound_processor; /* bound to processor ?*/ + + sample_control_t pc_sample; + +#if MACH_HOST + boolean_t may_assign; /* may assignment change? */ + boolean_t assign_active; /* someone waiting for may_assign */ +#endif /* MACH_HOST */ + +#if NCPUS > 1 + processor_t last_processor; /* processor this last ran on */ +#endif /* NCPUS > 1 */ + +#if MACH_LOCK_MON + unsigned lock_stack; +#endif + + char name[THREAD_NAME_SIZE]; +}; + +#include <kern/cpu_number.h> + +/* typedef of thread_t is in kern/kern_types.h */ +typedef struct thread_shuttle *thread_shuttle_t; +#define THREAD_NULL ((thread_t) 0) +#define THREAD_SHUTTLE_NULL ((thread_shuttle_t)0) + +#define ith_msize data.msize +#define ith_kmsg data.kmsg +#define ith_wait_result wait_result + +#define ith_msg saved.receive.msg +#define ith_option saved.receive.option +#define ith_rcv_size saved.receive.rcv_size +#define ith_timeout saved.receive.timeout +#define ith_notify saved.receive.notify +#define ith_object saved.receive.object +#define ith_mqueue saved.receive.mqueue + +#define ith_port saved.exception.port +#define ith_exc saved.exception.exc +#define ith_exc_code saved.exception.code +#define ith_exc_subcode saved.exception.subcode + +#define ith_other saved.other + +#ifndef _KERN_KERN_TYPES_H_ +typedef struct thread *thread_t; + +#define THREAD_NULL ((thread_t) 0) + +typedef mach_port_t *thread_array_t; +#endif /* _KERN_KERN_TYPES_H_ */ + +#ifdef KERNEL +/* + * User routines + */ + +extern kern_return_t thread_create( + task_t parent_task, + thread_t *child_thread); +extern kern_return_t thread_terminate( + thread_t thread); +extern kern_return_t thread_terminate_release( + thread_t thread, + task_t task, + mach_port_name_t thread_name, + mach_port_name_t reply_port, + vm_offset_t address, + vm_size_t size); +extern kern_return_t thread_suspend( + thread_t thread); +extern kern_return_t thread_resume( + thread_t thread); +extern kern_return_t thread_abort( + thread_t thread); +extern void thread_start( + thread_t thread, + continuation_t start); +extern thread_t kernel_thread( + task_t task, + continuation_t start, + void *arg); +extern kern_return_t thread_priority( + thread_t thread, + int priority, + boolean_t set_max); +extern void thread_set_own_priority( + int priority); +extern kern_return_t thread_max_priority( + thread_t thread, + processor_set_t pset, + int max_priority); +extern kern_return_t thread_policy( + thread_t thread, + int policy, + int data); +extern void consider_thread_collect( + void); +extern void stack_privilege( + thread_t thread); +extern kern_return_t thread_get_state( + thread_t thread, + int flavor, + thread_state_t old_state, + natural_t *old_state_count); +extern kern_return_t thread_set_state( + thread_t thread, + int flavor, + thread_state_t new_state, + natural_t new_state_count); +extern kern_return_t thread_get_special_port( + thread_t thread, + int which, + struct ipc_port **portp); +extern kern_return_t thread_set_special_port( + thread_t thread, + int which, + struct ipc_port *port); +extern kern_return_t thread_info( + thread_t thread, + int flavor, + thread_info_t thread_info_out, + natural_t *thread_info_count); +extern kern_return_t thread_assign( + thread_t thread, + processor_set_t new_pset); +extern kern_return_t thread_assign_default( + thread_t thread); +extern void stack_collect(void); +#endif + +/* + * Kernel-only routines + */ + +extern void thread_init(void); +extern void thread_reference(thread_t); +extern void thread_deallocate(thread_t); +extern void thread_hold(thread_t); +extern kern_return_t thread_dowait( + thread_t thread, + boolean_t must_halt); +extern void thread_release(thread_t); +extern kern_return_t thread_halt( + thread_t thread, + boolean_t must_halt); +extern void thread_halt_self(continuation_t); +extern void thread_force_terminate(thread_t); +extern thread_t kernel_thread( + task_t task, + void (*start)(void), + void * arg); + +extern void reaper_thread(void) __attribute__((noreturn)); + +#if MACH_HOST +extern void thread_freeze( + thread_t thread); +extern void thread_doassign( + thread_t thread, + processor_set_t new_pset, + boolean_t release_freeze); +extern void thread_unfreeze( + thread_t thread); +#endif /* MACH_HOST */ + +/* + * Macro-defined routines + */ + +#define thread_pcb(th) ((th)->pcb) + +/* Shall be taken at splsched only */ +#ifdef MACH_LDEBUG +#define thread_lock(th) do { \ + assert_splsched(); \ + simple_lock_nocheck(&(th)->lock); \ +} while (0) +#define thread_unlock(th) do { \ + assert_splsched(); \ + simple_unlock_nocheck(&(th)->lock); \ +} while (0) +#else +#define thread_lock(th) simple_lock_nocheck(&(th)->lock) +#define thread_unlock(th) simple_unlock_nocheck(&(th)->lock) +#endif + +#define thread_should_halt(thread) \ + ((thread)->ast & (AST_HALT|AST_TERMINATE)) + +/* + * Machine specific implementations of the current thread macro + * designate this by defining CURRENT_THREAD. + */ +#ifndef CURRENT_THREAD +#define current_thread() (percpu_get(thread_t, active_thread)) +#endif /* CURRENT_THREAD */ + +#define current_stack() (percpu_get(vm_offset_t, active_stack)) + +#define current_task() (current_thread()->task) +#define current_space() (current_task()->itk_space) +#define current_map() (current_task()->map) + +#if MACH_DEBUG +void stack_init(vm_offset_t stack); +void stack_finalize(vm_offset_t stack); +void thread_stats(void); +#endif /* MACH_DEBUG */ + +#endif /* _KERN_THREAD_H_ */ diff --git a/kern/thread_swap.c b/kern/thread_swap.c new file mode 100644 index 0000000..a5fc052 --- /dev/null +++ b/kern/thread_swap.c @@ -0,0 +1,200 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * + * File: kern/thread_swap.c + * Author: Avadis Tevanian, Jr. + * Date: 1987 + * + * Mach thread swapper: + * Find idle threads to swap, freeing up kernel stack resources + * at the expense of allowing them to execute. + * + * Swap in threads that need to be run. This is done here + * by the swapper thread since it cannot be done (in general) + * when the kernel tries to place a thread on a run queue. + * + * Note: The act of swapping a thread in Mach does not mean that + * its memory gets forcibly swapped to secondary storage. The memory + * for the task corresponding to a swapped thread is paged out + * through the normal paging mechanism. + * + */ + +#include <ipc/ipc_kmsg.h> +#include <kern/counters.h> +#include <kern/debug.h> +#include <kern/thread.h> +#include <kern/lock.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <mach/vm_param.h> +#include <kern/sched_prim.h> +#include <kern/processor.h> +#include <kern/thread_swap.h> +#include <machine/machspl.h> /* for splsched */ + + + +queue_head_t swapin_queue; +def_simple_lock_data(static, swapper_lock_data) + +#define swapper_lock() simple_lock(&swapper_lock_data) +#define swapper_unlock() simple_unlock(&swapper_lock_data) + +/* + * swapper_init: [exported] + * + * Initialize the swapper module. + */ +void swapper_init(void) +{ + queue_init(&swapin_queue); + simple_lock_init(&swapper_lock_data); +} + +/* + * thread_swapin: [exported] + * + * Place the specified thread in the list of threads to swapin. It + * is assumed that the thread is locked, therefore we are at splsched. + * + * We don't bother with stack_alloc_try to optimize swapin; + * our callers have already tried that route. + */ + +void thread_swapin(thread_t thread) +{ + switch (thread->state & TH_SWAP_STATE) { + case TH_SWAPPED: + /* + * Swapped out - queue for swapin thread. + */ + thread->state = (thread->state & ~TH_SWAP_STATE) + | TH_SW_COMING_IN; + swapper_lock(); + enqueue_tail(&swapin_queue, &(thread->links)); + swapper_unlock(); + thread_wakeup((event_t) &swapin_queue); + break; + + case TH_SW_COMING_IN: + /* + * Already queued for swapin thread, or being + * swapped in. + */ + break; + + default: + /* + * Already swapped in. + */ + panic("thread_swapin"); + } +} + +/* + * thread_doswapin: + * + * Swapin the specified thread, if it should be runnable, then put + * it on a run queue. No locks should be held on entry, as it is + * likely that this routine will sleep (waiting for stack allocation). + */ +kern_return_t thread_doswapin(thread_t thread) +{ + kern_return_t kr; + spl_t s; + + /* + * Allocate the kernel stack. + */ + + kr = stack_alloc(thread, thread_continue); + if (kr != KERN_SUCCESS) + return kr; + + /* + * Place on run queue. + */ + + s = splsched(); + thread_lock(thread); + thread->state &= ~(TH_SWAPPED | TH_SW_COMING_IN); + if (thread->state & TH_RUN) + thread_setrun(thread, TRUE); + thread_unlock(thread); + (void) splx(s); + return KERN_SUCCESS; +} + +/* + * swapin_thread: [exported] + * + * This procedure executes as a kernel thread. Threads that need to + * be swapped in are swapped in by this thread. + */ +static void __attribute__((noreturn)) swapin_thread_continue(void) +{ + for (;;) { + thread_t thread; + spl_t s; + + s = splsched(); + swapper_lock(); + + while ((thread = (thread_t) dequeue_head(&swapin_queue)) + != THREAD_NULL) { + kern_return_t kr; + swapper_unlock(); + (void) splx(s); + + kr = thread_doswapin(thread); /* may block */ + + s = splsched(); + swapper_lock(); + + if (kr != KERN_SUCCESS) { + enqueue_head(&swapin_queue, + (queue_entry_t) thread); + break; + } + } + + assert_wait((event_t) &swapin_queue, FALSE); + swapper_unlock(); + (void) splx(s); + counter(c_swapin_thread_block++); + thread_block(swapin_thread_continue); + } +} + +void swapin_thread(void) +{ + stack_privilege(current_thread()); + + swapin_thread_continue(); + /*NOTREACHED*/ +} diff --git a/kern/thread_swap.h b/kern/thread_swap.h new file mode 100644 index 0000000..d032acc --- /dev/null +++ b/kern/thread_swap.h @@ -0,0 +1,43 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/thread_swap.h + * + * Declarations of thread swapping routines. + */ + +#ifndef _KERN_THREAD_SWAP_H_ +#define _KERN_THREAD_SWAP_H_ + +/* + * exported routines + */ +extern void swapper_init(void); +extern void thread_swapin(thread_t thread); +extern kern_return_t thread_doswapin(thread_t thread); +extern void swapin_thread(void) __attribute__((noreturn)); + +#endif /* _KERN_THREAD_SWAP_H_ */ diff --git a/kern/timer.c b/kern/timer.c new file mode 100644 index 0000000..13dfc20 --- /dev/null +++ b/kern/timer.c @@ -0,0 +1,501 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/kern_return.h> +#include <mach/port.h> +#include <kern/queue.h> +#include <kern/thread.h> +#include <mach/time_value.h> +#include <kern/timer.h> +#include <kern/cpu_number.h> + +#include <kern/assert.h> +#include <kern/macros.h> + + + +timer_t current_timer[NCPUS]; +timer_data_t kernel_timer[NCPUS]; + +/* + * init_timers initializes all non-thread timers and puts the + * service routine on the callout queue. All timers must be + * serviced by the callout routine once an hour. + */ +void init_timers(void) +{ + int i; + timer_t this_timer; + + /* + * Initialize all the kernel timers and start the one + * for this cpu (master) slaves start theirs later. + */ + this_timer = &kernel_timer[0]; + for ( i=0 ; i<NCPUS ; i++, this_timer++) { + timer_init(this_timer); + current_timer[i] = (timer_t) 0; + } + + start_timer(&kernel_timer[cpu_number()]); +} + +/* + * timer_init initializes a single timer. + */ +void timer_init(timer_t this_timer) +{ + this_timer->low_bits = 0; + this_timer->high_bits = 0; + this_timer->tstamp = 0; + this_timer->high_bits_check = 0; +} + +#if STAT_TIME +#else /* STAT_TIME */ + +#ifdef MACHINE_TIMER_ROUTINES + +/* + * Machine-dependent code implements the timer routines. + */ + +#else /* MACHINE_TIMER_ROUTINES */ + +/* + * start_timer starts the given timer for this cpu. It is called + * exactly once for each cpu during the boot sequence. + */ +void +start_timer(timer_t timer) +{ + timer->tstamp = get_timestamp(); + current_timer[cpu_number()] = timer; +} + +/* + * time_trap_uentry does trap entry timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. Must only be called if trap was + * from user mode. + */ +void +time_trap_uentry(unsigned ts) +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif /* TIMER_MAX */ + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + + /* + * Record new timer. + */ + mytimer = &(current_thread()->system_timer); + current_timer[mycpu] = mytimer; + mytimer->tstamp = ts; +} + +/* + * time_trap_uexit does trap exit timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. Must only be called if returning to + * user mode. + */ +void +time_trap_uexit(int ts) +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif /* TIMER_MAX */ + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); /* SYSTEMMODE */ + } + + mytimer = &(current_thread()->user_timer); + + /* + * Record new timer. + */ + current_timer[mycpu] = mytimer; + mytimer->tstamp = ts; +} + +/* + * time_int_entry does interrupt entry timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. new_timer is the new timer to + * switch to. This routine returns the currently running timer, + * which MUST be pushed onto the stack by the caller, or otherwise + * saved for time_int_exit. + */ +timer_t +time_int_entry( + unsigned ts, + timer_t new_timer) +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif /* TIMER_MAX */ + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * Switch to new timer, and save old one on stack. + */ + new_timer->tstamp = ts; + current_timer[mycpu] = new_timer; + return(mytimer); +} + +/* + * time_int_exit does interrupt exit timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. old_timer is the timer value pushed + * onto the stack or otherwise saved after time_int_entry returned + * it. + */ +void +time_int_exit( + unsigned ts, + timer_t old_timer) +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif /* TIMER_MAX */ + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * If normalization requested, do it. + */ + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + if (old_timer->low_bits & TIMER_LOW_FULL) { + timer_normalize(old_timer); + } + + /* + * Start timer that was running before interrupt. + */ + old_timer->tstamp = ts; + current_timer[mycpu] = old_timer; +} + +/* + * timer_switch switches to a new timer. The machine + * dependent routine/macro get_timestamp must return a timestamp. + * Caller must lock out interrupts. + */ +void +timer_switch(timer_t new_timer) +{ + int elapsed; + int mycpu; + timer_t mytimer; + unsigned ts; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + ts = get_timestamp(); + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif /* TIMER_MAX */ + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * Normalization check + */ + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + + /* + * Record new timer. + */ + current_timer[mycpu] = new_timer; + new_timer->tstamp = ts; +} + +#endif /* MACHINE_TIMER_ROUTINES */ +#endif /* STAT_TIME */ + +/* + * timer_normalize normalizes the value of a timer. It is + * called only rarely, to make sure low_bits never overflows. + */ +void timer_normalize(timer_t timer) +{ + unsigned int high_increment; + + /* + * Calculate high_increment, then write high check field first + * followed by low and high. timer_grab() reads these fields in + * reverse order so if high and high check match, we know + * that the values read are ok. + */ + + high_increment = timer->low_bits/TIMER_HIGH_UNIT; + timer->high_bits_check += high_increment; + __sync_synchronize(); + timer->low_bits %= TIMER_HIGH_UNIT; + __sync_synchronize(); + timer->high_bits += high_increment; +} + +/* + * timer_grab() retrieves the value of a timer. + * + * Critical scheduling code uses TIMER_DELTA macro in timer.h + * (called from thread_timer_delta in sched.h). + * + * Keep coherent with db_time_grab below. + */ + +static void timer_grab( + timer_t timer, + timer_save_t save) +{ +#if MACH_ASSERT + unsigned int passes=0; +#endif + do { + (save)->high = (timer)->high_bits; + __sync_synchronize (); + (save)->low = (timer)->low_bits; + __sync_synchronize (); + /* + * If the timer was normalized while we were doing this, + * the high_bits value read above and the high_bits check + * value will not match because high_bits_check is the first + * field touched by the normalization procedure, and + * high_bits is the last. + * + * Additions to timer only touch low bits and + * are therefore atomic with respect to this. + */ +#if MACH_ASSERT + passes++; + assert((passes < 10000) ? (1) : ((timer->high_bits_check = save->high), 0)); +#endif + } while ( (save)->high != (timer)->high_bits_check); +} + +#define TIMER_TO_TIME_VALUE64(tv, timer) do { \ + (tv)->seconds = (timer)->high + (timer)->low / 1000000; \ + (tv)->nanoseconds = (timer)->low % 1000000 * 1000; \ +} while(0); + +/* + * timer_read reads the value of a timer into a time_value64_t. If the + * timer was modified during the read, retry. The value returned + * is accurate to the last update; time accumulated by a running + * timer since its last timestamp is not included. + */ + +void +timer_read( + timer_t timer, + time_value64_t *tv) +{ + timer_save_data_t temp; + + timer_grab(timer,&temp); + /* + * Normalize the result + */ +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif /* TIMER_ADJUST */ + TIMER_TO_TIME_VALUE64(tv, &temp); +} + +/* + * thread_read_times reads the user and system times from a thread. + * Time accumulated since last timestamp is not included. Should + * be called at splsched() to avoid having user and system times + * be out of step. Doesn't care if caller locked thread. + * + * Needs to be kept coherent with thread_read_times ahead. + */ +void thread_read_times( + thread_t thread, + time_value64_t *user_time_p, + time_value64_t *system_time_p) +{ + timer_read(&thread->user_timer, user_time_p); + timer_read(&thread->system_timer, system_time_p); +} + +#if MACH_DEBUG + +/* + * + * Db_timer_grab(): used by db_thread_read_times. An nonblocking + * version of db_thread_get_times. Keep coherent with timer_grab + * above. + * + */ +static void db_timer_grab( + timer_t timer, + timer_save_t save) +{ + /* Don't worry about coherency */ + + (save)->high = (timer)->high_bits; + (save)->low = (timer)->low_bits; +} + +static void +nonblocking_timer_read( + timer_t timer, + time_value64_t *tv) +{ + timer_save_data_t temp; + + db_timer_grab(timer, &temp); + /* + * Normalize the result + */ +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif /* TIMER_ADJUST */ + TIMER_TO_TIME_VALUE64(tv, &temp); +} + +/* + * Db_thread_read_times: A version of thread_read_times that + * can be called by the debugger. This version does not call + * timer_grab, which can block. Please keep it up to date with + * thread_read_times above. + * + */ +void db_thread_read_times( + thread_t thread, + time_value64_t *user_time_p, + time_value64_t *system_time_p) +{ + nonblocking_timer_read(&thread->user_timer, user_time_p); + nonblocking_timer_read(&thread->system_timer, system_time_p); +} +#endif /* MACH_DEBUG */ + +/* + * timer_delta takes the difference of a saved timer value + * and the current one, and updates the saved value to current. + * The difference is returned as a function value. See + * TIMER_DELTA macro (timer.h) for optimization to this. + */ + +unsigned +timer_delta( + timer_t timer, + timer_save_t save) +{ + timer_save_data_t new_save; + unsigned result; + + timer_grab(timer,&new_save); + result = (new_save.high - save->high) * TIMER_HIGH_UNIT + + new_save.low - save->low; + save->high = new_save.high; + save->low = new_save.low; + return(result); +} diff --git a/kern/timer.h b/kern/timer.h new file mode 100644 index 0000000..92259a2 --- /dev/null +++ b/kern/timer.h @@ -0,0 +1,195 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_TIMER_H_ +#define _KERN_TIMER_H_ + +#include <kern/macros.h> + +#if STAT_TIME +/* + * Statistical timer definitions - use microseconds in timer, seconds + * in high unit field. No adjustment needed to convert to time_value64_t + * as a result. Service timers once an hour. + */ + +/* + * TIMER_MAX is needed if a 32-bit rollover timer needs to be adjusted for + * maximum value. + */ +#undef TIMER_MAX + +/* + * TIMER_RATE is the rate of the timer in ticks per second. It is used to + * calculate percent cpu usage. + */ +#define TIMER_RATE 1000000 + +/* + * TIMER_HIGH_UNIT is the unit for high_bits in terms of low_bits. + * Setting it to TIMER_RATE makes the high unit seconds. + */ +#define TIMER_HIGH_UNIT TIMER_RATE + +/* + * TIMER_ADJUST is used to adjust the value of a timer after it has been + * copied into a time_value64_t. No adjustment is needed if high_bits is in + * seconds. + */ +#undef TIMER_ADJUST + +/* + * MACHINE_TIMER_ROUTINES should defined if the timer routines are + * implemented in machine-dependent code (e.g. assembly language). + */ +#undef MACHINE_TIMER_ROUTINES + +#else /* STAT_TIME */ +/* + * Machine dependent definitions based on hardware support. + */ + +#include <machine/timer.h> + +#endif /* STAT_TIME */ + +/* + * Definitions for accurate timers. high_bits_check is a copy of + * high_bits that allows reader to verify that values read are ok. + */ + +struct timer { + unsigned low_bits; + unsigned high_bits; + unsigned high_bits_check; + unsigned tstamp; +}; + +typedef struct timer timer_data_t; +typedef struct timer *timer_t; + +/* + * Mask to check if low_bits is in danger of overflowing + */ + +#define TIMER_LOW_FULL 0x80000000U + +/* + * Kernel timers and current timer array. [Exported] + */ + +extern timer_t current_timer[NCPUS]; +extern timer_data_t kernel_timer[NCPUS]; + +/* + * save structure for timer readings. This is used to save timer + * readings for elapsed time computations. + */ + +struct timer_save { + unsigned low; + unsigned high; +}; + +typedef struct timer_save timer_save_data_t, *timer_save_t; + +/* + * Exported kernel interface to timers + */ + +#if STAT_TIME +#define start_timer(timer) +#define timer_switch(timer) +#else /* STAT_TIME */ +extern void start_timer(timer_t); +extern void timer_switch(timer_t); +#endif /* STAT_TIME */ + +extern void timer_read(timer_t, time_value64_t *); +extern void thread_read_times(thread_t, time_value64_t *, time_value64_t *); +extern unsigned timer_delta(timer_t, timer_save_t); +extern void timer_normalize(timer_t); +extern void timer_init(timer_t); + +#if STAT_TIME +/* + * Macro to bump timer values. + */ +#define timer_bump(timer, usec) \ +MACRO_BEGIN \ + (timer)->low_bits += usec; \ + if ((timer)->low_bits & TIMER_LOW_FULL) { \ + timer_normalize(timer); \ + } \ +MACRO_END + +#else /* STAT_TIME */ +/* + * Exported hardware interface to timers + */ +extern void time_trap_uentry(unsigned); +extern void time_trap_uexit(int); +extern timer_t time_int_entry(unsigned, timer_t); +extern void time_int_exit(unsigned, timer_t); +#endif /* STAT_TIME */ + +/* + * TIMER_DELTA finds the difference between a timer and a saved value, + * and updates the saved value. Look at high_bits check field after + * reading low because that's the first written by a normalize + * operation; this isn't necessary for current usage because + * this macro is only used when the timer can't be normalized: + * thread is not running, or running thread calls it on itself at + * splsched(). + */ + +#define TIMER_DELTA(timer, save, result) \ +MACRO_BEGIN \ + unsigned temp; \ + \ + temp = (timer).low_bits; \ + if ((save).high != (timer).high_bits_check) { \ + result += timer_delta(&(timer), &(save)); \ + } \ + else { \ + result += temp - (save).low; \ + (save).low = temp; \ + } \ +MACRO_END + +extern void init_timers(void); + +void timer_init(timer_t this_timer); + +#if MACH_DEBUG +void db_thread_read_times( + thread_t thread, + time_value64_t *user_time_p, + time_value64_t *system_time_p); +#endif + + +#endif /* _KERN_TIMER_H_ */ diff --git a/kern/xpr.c b/kern/xpr.c new file mode 100644 index 0000000..1b551eb --- /dev/null +++ b/kern/xpr.c @@ -0,0 +1,197 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * xpr silent tracing circular buffer. + */ +#include <string.h> + +#include <kern/debug.h> +#include <kern/xpr.h> +#include <kern/lock.h> +#include "cpu_number.h" +#include <machine/machspl.h> +#include <vm/vm_kern.h> + + +/* + * After a spontaneous reboot, it is desirable to look + * at the old xpr buffer. Assuming xprbootstrap allocates + * the buffer in the same place in physical memory and + * the reboot doesn't clear memory, this should work. + * xprptr will be reset, but the saved value should be OK. + * Just set xprenable false so the buffer isn't overwritten. + */ + +def_simple_lock_data(static, xprlock) + +boolean_t xprenable = TRUE; /* Enable xpr tracing */ +int nxprbufs = 0; /* Number of contiguous xprbufs allocated */ +int xprflags = 0; /* Bit mask of xpr flags enabled */ +struct xprbuf *xprbase; /* Pointer to circular buffer nxprbufs*sizeof(xprbuf)*/ +struct xprbuf *xprptr; /* Currently allocated xprbuf */ +struct xprbuf *xprlast; /* Pointer to end of circular buffer */ + +/*VARARGS1*/ +void xpr( + char *msg, + int arg1, + int arg2, + int arg3, + int arg4, + int arg5) +{ + spl_t s; + struct xprbuf *x; + + /* If we aren't initialized, ignore trace request */ + if (!xprenable || (xprptr == 0)) + return; + /* Guard against all interrupts and allocate next buffer. */ + s = splhigh(); + simple_lock(&xprlock); + x = xprptr++; + if (xprptr >= xprlast) { + /* wrap around */ + xprptr = xprbase; + } + /* Save xprptr in allocated memory. */ + *(struct xprbuf **)xprlast = xprptr; + simple_unlock(&xprlock); + splx(s); + x->msg = msg; + x->arg1 = arg1; + x->arg2 = arg2; + x->arg3 = arg3; + x->arg4 = arg4; + x->arg5 = arg5; + x->timestamp = XPR_TIMESTAMP; + x->cpuinfo = cpu_number(); +} + +void xprbootstrap(void) +{ + vm_offset_t addr; + vm_size_t size; + kern_return_t kr; + + simple_lock_init(&xprlock); + if (nxprbufs == 0) + return; /* assume XPR support not desired */ + + /* leave room at the end for a saved copy of xprptr */ + size = nxprbufs * sizeof(struct xprbuf) + sizeof xprptr; + + kr = kmem_alloc_wired(kernel_map, &addr, size); + if (kr != KERN_SUCCESS) + panic("xprbootstrap"); + + if (xprenable) { + /* + * If xprenable is set (the default) then we zero + * the buffer so xpr_dump doesn't encounter bad pointers. + * If xprenable isn't set, then we preserve + * the original contents of the buffer. This is useful + * if memory survives reboots, so xpr_dump can show + * the previous buffer contents. + */ + + memset((void *) addr, 0, size); + } + + xprbase = (struct xprbuf *) addr; + xprlast = &xprbase[nxprbufs]; + xprptr = xprbase; /* setting xprptr enables tracing */ +} + +int xprinitial = 0; + +void xprinit(void) +{ + xprflags |= xprinitial; +} + +#if MACH_KDB +#include <machine/setjmp.h> +#include <ddb/db_output.h> + +extern jmp_buf_t *db_recover; + +/* + * Print current content of xpr buffers (KDB's sake) + * Use stack order to make it understandable. + * + * Called as "!xpr_dump" this dumps the kernel's xpr buffer. + * Called with arguments, it can dump xpr buffers in user tasks, + * assuming they use the same format as the kernel. + */ +void xpr_dump( + struct xprbuf *base, + int nbufs) +{ + jmp_buf_t db_jmpbuf; + jmp_buf_t *prev; + struct xprbuf *last, *ptr; + struct xprbuf *x; + int i; + spl_t s = s; + + if (base == 0) { + base = xprbase; + nbufs = nxprbufs; + } + + if (nbufs == 0) + return; + + if (base == xprbase) { + s = splhigh(); + simple_lock(&xprlock); + } + + last = base + nbufs; + ptr = * (struct xprbuf **) last; + + prev = db_recover; + if (_setjmp(db_recover = &db_jmpbuf) == 0) + for (x = ptr, i = 0; i < nbufs; i++) { + if (--x < base) + x = last - 1; + + if (x->msg == 0) + break; + + db_printf("<%d:%x:%x> ", x - base, x->cpuinfo, x->timestamp); + db_printf(x->msg, x->arg1,x->arg2,x->arg3,x->arg4,x->arg5); + } + db_recover = prev; + + if (base == xprbase) { + simple_unlock(&xprlock); + (void) splx(s); + } +} +#endif /* MACH_KDB */ diff --git a/kern/xpr.h b/kern/xpr.h new file mode 100644 index 0000000..72f6817 --- /dev/null +++ b/kern/xpr.h @@ -0,0 +1,97 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Include file for xpr circular buffer silent tracing. + * + */ +/* + * If the kernel flag XPRDEBUG is set, the XPR macro is enabled. The + * macro should be invoked something like the following: + * XPR(XPR_SYSCALLS, ("syscall: %d, 0x%x\n", syscallno, arg1); + * which will expand into the following code: + * if (xprflags & XPR_SYSCALLS) + * xpr("syscall: %d, 0x%x\n", syscallno, arg1); + * Xpr will log the pointer to the printf string and up to 6 arguments, + * along with a timestamp and cpuinfo (for multi-processor systems), into + * a circular buffer. The actual printf processing is delayed until after + * the buffer has been collected. It is assumed that the text/data segments + * of the kernel can easily be reconstructed in a post-processor which + * performs the printf processing. + * + * If the XPRDEBUG compilation switch is not set, the XPR macro expands + * to nothing. + */ + +#ifndef _KERN_XPR_H_ +#define _KERN_XPR_H_ + +#ifndef KERNEL +#include <sys/features.h> +#endif /* KERNEL */ + +#include <machine/xpr.h> + +#if XPR_DEBUG + +#define XPR(flags,xprargs) if(xprflags&flags) xpr xprargs + +extern int xprflags; +/* + * flags for message types. + */ +#define XPR_SYSCALLS 0x00000001 +#define XPR_TRAPS 0x00000002 +#define XPR_SCHED 0x00000004 +#define XPR_NPTCP 0x00000008 +#define XPR_NP 0x00000010 +#define XPR_TCP 0x00000020 + +#define XPR_VM_OBJECT (1 << 8) +#define XPR_VM_OBJECT_CACHE (1 << 9) +#define XPR_VM_PAGE (1 << 10) +#define XPR_VM_PAGEOUT (1 << 11) +#define XPR_MEMORY_OBJECT (1 << 12) +#define XPR_VM_FAULT (1 << 13) +#define XPR_INODE_PAGER (1 << 14) +#define XPR_INODE_PAGER_DATA (1 << 15) + +#else /* XPR_DEBUG */ +#define XPR(flags,xprargs) +#endif /* XPR_DEBUG */ + +struct xprbuf { + char *msg; + int arg1,arg2,arg3,arg4,arg5; + int timestamp; + int cpuinfo; +}; + +extern void xpr(char *, int, int, int, int, int); +extern void xpr_dump(struct xprbuf *, int); +extern void xprinit(void); +extern void xprbootstrap(void); + +#endif /* _KERN_XPR_H_ */ |