Index: bsd/conf/MASTER
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/conf/MASTER,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 MASTER
--- bsd/conf/MASTER	5 Sep 2002 20:29:57 -0000	1.1.1.2
+++ bsd/conf/MASTER	3 Dec 2002 20:03:13 -0000
@@ -252,6 +252,8 @@
 #
 pseudo-device	bpfilter	4	init	bpf_init
 
+pseudo-device	systrace	1	init	systrace_init
+
 #
 #  shim to "linux" mach disk drivers  (mach drivers must also be turned on)
 #
@@ -259,4 +261,3 @@
 #pseudo-device diskshim
 
 pseudo-device	random		1	init	random_init
-
Index: bsd/conf/files
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/conf/files,v
retrieving revision 1.1.1.3
diff -u -r1.1.1.3 files
--- bsd/conf/files	5 Sep 2002 20:29:59 -0000	1.1.1.3
+++ bsd/conf/files	3 Dec 2002 20:03:13 -0000
@@ -60,6 +60,7 @@
 OPTIONS/ktrace				optional ktrace
 OPTIONS/profiling			optional profiling
 OPTIONS/vndevice			optional vndevice
+OPTIONS/systrace			optional systrace
 
 #
 # Network options
@@ -462,6 +463,7 @@
 bsd/kern/kern_subr.c			standard
 bsd/kern/kern_synch.c			standard
 bsd/kern/kern_sysctl.c			standard
+bsd/kern/kern_systrace.c		optional systrace
 bsd/kern/kern_newsysctl.c		standard
 bsd/kern/kern_mib.c			standard
 bsd/kern/sysctl_init.c			standard
Index: bsd/dev/ppc/systemcalls.c
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/dev/ppc/systemcalls.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 systemcalls.c
--- bsd/dev/ppc/systemcalls.c	5 Sep 2002 20:30:13 -0000	1.1.1.2
+++ bsd/dev/ppc/systemcalls.c	3 Dec 2002 20:03:13 -0000
@@ -20,6 +20,8 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
+#include "systrace.h"
+
 #include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/thread_act.h>
@@ -36,6 +38,7 @@
 #include <sys/errno.h>
 #include <sys/ktrace.h>
 #include <sys/kdebug.h>
+#include <sys/systrace.h>
 
 extern void
 unix_syscall(
@@ -152,7 +155,11 @@
 
 	if (KTRPOINT(proc, KTR_SYSCALL))
 		ktrsyscall(proc, code, callp->sy_narg, uthread->uu_arg, funnel_type);
-
+#if NSYSTRACE > 0
+	if ((proc->p_flag & P_SYSTRACE) && (funnel_type != THR_FUNNEL_NULL))
+		error = systrace_enter(proc, code, uthread->uu_arg);
+	if (!error)
+#endif
 	error = (*(callp->sy_call))(proc, (void *)uthread->uu_arg, &(uthread->uu_rval[0]));
 
 	regs = find_user_regs(thread_act);
@@ -173,7 +180,10 @@
 
 	if (KTRPOINT(proc, KTR_SYSRET))
 		ktrsysret(proc, code, error, uthread->uu_rval[0], funnel_type);
-
+#if NSYSTRACE > 0
+	if ((proc->p_flag & P_SYSTRACE) && (funnel_type != THR_FUNNEL_NULL))
+		systrace_exit(proc, code, uthread->uu_arg, &(uthread->uu_rval[0]), error);
+#endif
 	if(funnel_type == KERNEL_FUNNEL) 
 		 exit_funnel_section(kernel_flock);
 	else if (funnel_type == NETWORK_FUNNEL)
Index: bsd/kern/kern_exec.c
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_exec.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 kern_exec.c
--- bsd/kern/kern_exec.c	5 Sep 2002 20:30:24 -0000	1.1.1.2
+++ bsd/kern/kern_exec.c	3 Dec 2002 20:03:14 -0000
@@ -656,7 +656,8 @@
 	if (load_result.unixproc) {
 		int pathptr;
 		
-		ucp = ucp - nc - NBPW;	/* begining of the STRING AREA */
+		/* begining of the STRING AREA */
+		ucp = ucp - nc - NBPW - STACKGAPLEN;
 
 		/*
 		 * Support for new app package launching for Mac OS X allocates
Index: bsd/kern/kern_exit.c
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_exit.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 kern_exit.c
--- bsd/kern/kern_exit.c	5 Sep 2002 20:30:24 -0000	1.1.1.2
+++ bsd/kern/kern_exit.c	3 Dec 2002 20:03:14 -0000
@@ -59,6 +59,8 @@
  *
  *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
  */
+
+#include "systrace.h"
  
 #include <machine/reg.h>
 #include <machine/psl.h>
@@ -91,6 +93,7 @@
 #if KTRACE   
 #include <sys/ktrace.h>
 #endif
+#include <sys/systrace.h>
 
 extern char init_task_failure_data[];
 int exit1 __P((struct proc *, int, int *));
@@ -319,6 +322,10 @@
 		p->p_tracep = NULL;
 		vrele(tvp);
 	}
+#endif
+#if NSYSTRACE > 0
+	if (p->p_flag & P_SYSTRACE)
+		systrace_sys_exit(p);
 #endif
 
 	q = p->p_children.lh_first;
Index: bsd/kern/kern_fork.c
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/kern/kern_fork.c,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 kern_fork.c
--- bsd/kern/kern_fork.c	5 Sep 2002 20:30:24 -0000	1.1.1.2
+++ bsd/kern/kern_fork.c	3 Dec 2002 20:03:15 -0000
@@ -60,6 +60,8 @@
  *	@(#)kern_fork.c	8.8 (Berkeley) 2/14/95
  */
 
+#include "systrace.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
@@ -74,6 +76,7 @@
 #if KTRACE
 #include <sys/ktrace.h>
 #endif
+#include <sys/systrace.h>
 
 #include <mach/mach_types.h>
 #include <kern/mach_param.h>
@@ -163,6 +166,11 @@
 	newproc->p_flag  |= P_INVFORK;
 	newproc->p_vforkact = cur_act;
 
+#if NSYSTRACE > 0
+	/* Tell systrace what's happening. */
+	if (p->p_flag & P_SYSTRACE)
+		systrace_sys_fork(p, newproc);
+#endif
 	ut->uu_flag |= P_VFORK;
 	ut->uu_proc = newproc;
 	ut->uu_userstate = (void *)act_thread_csave();
@@ -368,6 +376,13 @@
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
 	TAILQ_INIT(&p2->p_evlist);
+
+#if NSYSTRACE > 0
+	/* Tell systrace what's happening. */
+	if (p1->p_flag & P_SYSTRACE)
+		systrace_sys_fork(p1, p2);
+#endif
+
 	/*
 	 * Make child runnable, set start time.
 	 */
@@ -541,7 +556,6 @@
 	}
 #endif
 	return(p2);
-
 }
 
 #include <kern/zalloc.h>
Index: bsd/sys/file.h
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/file.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 file.h
--- bsd/sys/file.h	5 Sep 2002 20:30:54 -0000	1.1.1.2
+++ bsd/sys/file.h	3 Dec 2002 20:03:16 -0000
@@ -81,6 +81,7 @@
 #define	DTYPE_SOCKET	2	/* communications endpoint */
 #define	DTYPE_PSXSHM	3	/* POSIX Shared memory */
 #define	DTYPE_PSXSEM	4	/* POSIX Semaphores */
+#define DTYPE_SYSTRACE	5	/* Systrace fileops */
 	short	f_type;		/* descriptor type */
 	short	f_count;	/* reference count */
 	short	f_msgcount;	/* references from message queue */
Index: bsd/sys/proc.h
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/proc.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 proc.h
--- bsd/sys/proc.h	5 Sep 2002 20:30:56 -0000	1.1.1.2
+++ bsd/sys/proc.h	3 Dec 2002 20:03:16 -0000
@@ -153,6 +153,8 @@
 	int	p_traceflag;		/* Kernel trace points. */
 	struct	vnode *p_tracep;	/* Trace to vnode. */
 
+	void	*p_systrace;		/* Back pointer to systrace */
+
 	sigset_t p_siglist;		/* DEPRECATED. */
 
 	struct	vnode *p_textvp;	/* Vnode of executable. */
@@ -308,8 +310,10 @@
 /* Should be moved to machine-dependent areas. */
 #define	P_OWEUPC	0x08000	/* Owe process an addupc() call at next ast. */
 
+#define P_SYSTRACE	0x10000	/* Process system call tracing active */
+
 /* XXX Not sure what to do with these, yet. */
-#define	P_FSTRACE	0x10000	/* tracing via file system (elsewhere?) */
+/* #define	P_FSTRACE	0x10000	*/ /* tracing via file system (elsewhere?) */
 #define	P_SSTEP		0x20000	/* process needs single-step fixup ??? */
 
 #define	P_WAITING	0x0040000	/* process has a wait() in progress */
Index: bsd/sys/systm.h
===================================================================
RCS file: /Volumes/src/cvs/od/src/xnu/bsd/sys/systm.h,v
retrieving revision 1.1.1.2
diff -u -r1.1.1.2 systm.h
--- bsd/sys/systm.h	5 Sep 2002 20:30:57 -0000	1.1.1.2
+++ bsd/sys/systm.h	3 Dec 2002 20:03:16 -0000
@@ -142,6 +142,8 @@
 #define getenv_int(a,b) (*b = 0)
 #define	KASSERT(exp,msg)
 
+#define STACKGAPLEN	512
+
 /*
  * General function declarations.
  */
--- /dev/null	Tue Dec  3 14:27:21 2002
+++ bsd/kern/kern_systrace.c	Tue Dec  3 14:39:54 2002
@@ -0,0 +1,1772 @@
+/*	$OpenBSD: systrace.c,v 1.25 2002/11/10 04:34:56 art Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/thread_act.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/tree.h>
+#include <sys/malloc.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/signalvar.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/systrace.h>
+
+#include <kern/assert.h>
+#include <mach/machine.h>
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#include <kern/task.h>
+#include <vm/vm_kern.h>
+
+caddr_t	stackgap_init(const struct proc *, size_t);
+void	*stackgap_alloc(const struct proc *, caddr_t *, size_t);
+struct proc *systrace_curproc(void);
+
+int	systraceopen(dev_t, int, int, struct proc *);
+int	systraceclose(dev_t, int, int, struct proc *);
+int	systraceread(dev_t, struct uio *, int);
+int	systracewrite(dev_t, struct uio *, int);
+int	systraceioctl(dev_t, u_long, caddr_t, int, struct proc *);
+int	systraceselect(dev_t, int, struct proc *);
+
+uid_t	systrace_seteuid(struct proc *,  uid_t);
+gid_t	systrace_setegid(struct proc *,  gid_t);
+int	systracef_read(struct file *, struct uio *, struct ucred *, int, struct proc *);
+int	systracef_write(struct file *, struct uio *, struct ucred *, int, struct proc *);
+int	systracef_ioctl(struct file *, u_long, caddr_t, struct proc *p);
+int	systracef_select(struct file *, int, void *, struct proc *);
+int	systracef_close(struct file *, struct proc *);
+
+struct str_policy {
+	TAILQ_ENTRY(str_policy) next;
+
+	int nr;
+
+	int refcount;
+
+	int nsysent;
+	u_char *sysent;
+};
+
+#define STR_PROC_ONQUEUE	0x01
+#define STR_PROC_WAITANSWER	0x02
+#define STR_PROC_SYSCALLRES	0x04
+#define STR_PROC_REPORT		0x08	/* Report emulation */
+#define STR_PROC_NEEDSEQNR	0x10	/* Answer must quote seqnr */
+#define STR_PROC_SETEUID	0x20	/* Elevate privileges */
+#define STR_PROC_SETEGID	0x40
+
+struct str_process {
+	TAILQ_ENTRY(str_process) next;
+	TAILQ_ENTRY(str_process) msg_next;
+
+	struct proc *proc;
+	int forcereport;
+	uid_t olduid;
+	gid_t oldgid;
+
+	pid_t pid;
+
+	struct fsystrace *parent;
+	struct str_policy *policy;
+
+	struct systrace_replace *replace;
+
+	int flags;
+	short answer;
+	short error;
+	u_int16_t seqnr;	/* expected reply sequence number */
+
+	uid_t seteuid;
+	uid_t saveuid;
+	gid_t setegid;
+	gid_t savegid;
+
+	struct str_message msg;
+};
+
+void systrace_lock(void);
+void systrace_unlock(void);
+
+/* Needs to be called with fst locked */
+
+int	systrace_attach(struct fsystrace *, pid_t);
+int	systrace_detach(struct str_process *);
+int	systrace_answer(struct str_process *, struct systrace_answer *);
+int	systrace_io(struct str_process *, struct systrace_io *);
+int	systrace_policy(struct fsystrace *, struct systrace_policy *);
+int	systrace_preprepl(struct str_process *, struct systrace_replace *);
+int	systrace_replace(struct str_process *, size_t, register_t []);
+int	systrace_getcwd(struct fsystrace *, struct str_process *);
+
+int	systrace_processready(struct str_process *);
+struct proc *systrace_find(struct str_process *);
+struct str_process *systrace_findpid(struct fsystrace *fst, pid_t pid);
+void	systrace_wakeup(struct fsystrace *);
+void	systrace_closepolicy(struct fsystrace *, struct str_policy *);
+int	systrace_insert_process(struct fsystrace *, struct proc *);
+struct str_policy *systrace_newpolicy(struct fsystrace *, int);
+int	systrace_msg_child(struct fsystrace *, struct str_process *, pid_t);
+int	systrace_msg_ask(struct fsystrace *, struct str_process *,
+	    int, size_t, register_t []);
+int	systrace_msg_result(struct fsystrace *, struct str_process *,
+	    int, int, size_t, register_t [], register_t []);
+int	systrace_msg_emul(struct fsystrace *, struct str_process *);
+int	systrace_msg_ugid(struct fsystrace *, struct str_process *);
+int	systrace_make_msg(struct str_process *, int, void *, size_t);
+
+#define SYSTRACE_MAJOR	-1	/* kernel picks major */
+
+/*
+ * A struct describing which functions will get invoked for certain
+ * actions.
+ */
+static struct cdevsw systrace_cdevsw =
+{
+	systraceopen,		/* open */
+	systraceclose,		/* close */
+	systraceread,		/* read */
+	systracewrite,		/* write */
+	systraceioctl,			/* ioctl */
+	nulldev,			/* stop */
+	nulldev,			/* reset */
+	NULL,				/* tty's */
+	eno_select,			/* select */
+	eno_mmap,			/* mmap */
+	eno_strat,			/* strategy */
+	eno_getc,			/* getc */
+	eno_putc,			/* putc */
+	0					/* type */
+};
+
+static struct fileops systracefops = {
+	systracef_read,
+	systracef_write,
+	systracef_ioctl,
+	systracef_select,
+	systracef_close
+};
+
+static int gSystraceInstalled = 0;
+
+int systrace_debug = 0;
+static struct lock__bsd__ systrace_lck;
+
+#define DPRINTF(y)	if (systrace_debug) printf y;
+
+/*
+ * Called to initialize our device,
+ * and to register ourselves with devfs
+ */
+
+void
+systrace_init()
+{
+	int ret;
+
+	if (gSystraceInstalled)
+		return;
+
+	/* install us in the file system */
+	gSystraceInstalled = 1;
+
+	lockinit(&systrace_lck, PLOCK, "systrace", 0, 0);
+
+	ret = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
+	if (ret < 0) {
+		printf("systrace_init: failed to allocate a major number!\n");
+		gSystraceInstalled = 0;
+		return;
+	}
+
+	devfs_make_node(makedev (ret, 0), DEVFS_CHAR,
+		UID_ROOT, GID_WHEEL, 0644, "systrace", 0);
+}
+
+struct proc *
+systrace_curproc(void)
+{
+	thread_act_t		thread_act;
+	struct uthread		*uthread;
+	struct proc *proc;
+
+	thread_act = current_act();
+	uthread = get_bsdthread_info(thread_act);
+
+	if (!(uthread->uu_flag & P_VFORK))
+		proc = (struct proc *)get_bsdtask_info(current_task());
+	else
+		proc = current_proc();
+
+	return (proc);
+}
+
+/* ARGSUSED */
+int
+systracef_read(struct file *fp,	struct uio *uio, struct ucred *cred, int flags,
+    struct proc *p)
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	struct str_process *process;
+	int error = 0;
+
+	if (uio->uio_resid != sizeof(struct str_message))
+		return (EINVAL);
+
+ again:
+	systrace_lock();
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+	systrace_unlock();
+	if ((process = TAILQ_FIRST(&fst->messages)) != NULL) {
+		error = uiomove((caddr_t)&process->msg,
+		    sizeof(struct str_message), uio);
+		if (!error) {
+			TAILQ_REMOVE(&fst->messages, process, msg_next);
+			CLR(process->flags, STR_PROC_ONQUEUE);
+
+			if (SYSTR_MSG_NOPROCESS(process))
+				FREE(process, M_TEMP);
+
+		}
+	} else if (TAILQ_FIRST(&fst->processes) == NULL) {
+		/* EOF situation */
+		;
+	} else {
+		if (fp->f_flag & FNONBLOCK)
+			error = EAGAIN;
+		else {
+			lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+			error = tsleep(fst, PWAIT|PCATCH, "systrrd", 0);
+			if (error)
+				goto out;
+			goto again;
+		}
+
+	}
+
+	lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+ out:
+	return (error);
+}
+
+/* ARGSUSED */
+int
+systracef_write(struct file *fp, struct uio *uio, struct ucred *cred,
+    int flags, struct proc *p)
+{
+	return (EIO);
+}
+
+#define POLICY_VALID(x)	((x) == SYSTR_POLICY_PERMIT || \
+			 (x) == SYSTR_POLICY_ASK || \
+			 (x) == SYSTR_POLICY_NEVER)
+
+/* ARGSUSED */
+int
+systracef_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
+{
+	int ret = 0;
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	struct filedesc *fdp;
+	struct str_process *strp;
+	pid_t pid = 0;
+
+	switch (cmd) {
+	case FIONBIO:
+	case FIOASYNC:
+		return (0);
+
+	case STRIOCDETACH:
+	case STRIOCREPORT:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCANSWER:
+		pid = ((struct systrace_answer *)data)->stra_pid;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCIO:
+		pid = ((struct systrace_io *)data)->strio_pid;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCGETCWD:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	case STRIOCATTACH:
+	case STRIOCRESCWD:
+	case STRIOCPOLICY:
+		break;
+	case STRIOCREPLACE:
+		pid = ((struct systrace_replace *)data)->strr_pid;
+		if (!pid)
+			ret = EINVAL;
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+	if (ret)
+		return (ret);
+
+	systrace_lock();
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+	systrace_unlock();
+	if (pid) {
+		strp = systrace_findpid(fst, pid);
+		if (strp == NULL) {
+			ret = ESRCH;
+			goto unlock;
+		}
+	}
+
+	switch (cmd) {
+	case STRIOCATTACH:
+		pid = *(pid_t *)data;
+		if (!pid)
+			ret = EINVAL;
+		else
+			ret = systrace_attach(fst, pid);
+		DPRINTF(("%s: attach to %u: %d\n", __func__, pid, ret));
+		break;
+	case STRIOCDETACH:
+		ret = systrace_detach(strp);
+		break;
+	case STRIOCREPORT:
+		SET(strp->flags, STR_PROC_REPORT);
+		break;
+	case STRIOCANSWER:
+		ret = systrace_answer(strp, (struct systrace_answer *)data);
+		break;
+	case STRIOCIO:
+		ret = systrace_io(strp, (struct systrace_io *)data);
+		break;
+	case STRIOCPOLICY:
+		ret = systrace_policy(fst, (struct systrace_policy *)data);
+		break;
+	case STRIOCREPLACE:
+		ret = systrace_preprepl(strp, (struct systrace_replace *)data);
+		break;
+	case STRIOCRESCWD:
+		if (!fst->fd_pid) {
+			ret = EINVAL;
+			break;
+		}
+		fdp = p->p_fd;
+
+		/* Release cwd from other process */
+		if (fdp->fd_cdir)
+			vrele(fdp->fd_cdir);
+		if (fdp->fd_rdir)
+			vrele(fdp->fd_rdir);
+		/* This restores the cwd we had before */
+		fdp->fd_cdir = fst->fd_cdir;
+		fdp->fd_rdir = fst->fd_rdir;
+		/* Note that we are normal again */
+		fst->fd_pid = 0;
+		fst->fd_cdir = fst->fd_rdir = NULL;
+		break;
+	case STRIOCGETCWD:
+		ret = systrace_getcwd(fst, strp);
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+
+ unlock:
+	lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+	return (ret);
+}
+
+/* ARGSUSED */
+int
+systracef_select(struct file *fp, int which, void *wql, struct proc *p)
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	int ready = 0;
+
+	if (which != FREAD)
+		return (0);
+
+	systrace_lock();
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+	systrace_unlock();
+	ready = TAILQ_FIRST(&fst->messages) != NULL;
+	if (!ready)
+		selrecord(p, &fst->si, wql);
+	lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+
+	return (ready);
+}
+
+/* ARGSUSED */
+int
+systracef_stat(fp, sb, p)
+	struct file *fp;
+	struct stat *sb;
+	struct proc *p;
+{
+	return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+systracef_close(fp, p)
+	struct file *fp;
+	struct proc *p;
+{
+	struct fsystrace *fst = (struct fsystrace *)fp->f_data;
+	struct str_process *strp;
+	struct str_policy *strpol;
+
+	systrace_lock();
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+	systrace_unlock();
+
+	/* Untrace all processes */
+	for (strp = TAILQ_FIRST(&fst->processes); strp;
+	    strp = TAILQ_FIRST(&fst->processes)) {
+		struct proc *q = strp->proc;
+
+		systrace_detach(strp);
+		psignal(q, SIGKILL);
+	}
+
+	/* Clean up fork and exit messages */
+	for (strp = TAILQ_FIRST(&fst->messages); strp;
+	    strp = TAILQ_FIRST(&fst->messages)) {
+		TAILQ_REMOVE(&fst->messages, strp, msg_next);
+		FREE(strp, M_TEMP);
+	}
+
+	/* Clean up all policies */
+	for (strpol = TAILQ_FIRST(&fst->policies); strpol;
+	    strpol = TAILQ_FIRST(&fst->policies))
+		systrace_closepolicy(fst, strpol);
+
+	/* Release vnodes */
+	if (fst->fd_cdir)
+		vrele(fst->fd_cdir);
+	if (fst->fd_rdir)
+		vrele(fst->fd_rdir);
+	lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+
+	FREE(fp->f_data, M_TEMP); /* was M_XDATA */
+	fp->f_data = NULL;
+
+	return (0);
+}
+
+void
+systrace_lock(void)
+{
+	struct proc *curproc = systrace_curproc();
+	lockmgr(&systrace_lck, LK_EXCLUSIVE, NULL, curproc);
+}
+
+void
+systrace_unlock(void)
+{
+	struct proc *curproc = systrace_curproc();
+	lockmgr(&systrace_lck, LK_RELEASE, NULL, curproc);
+}
+
+int
+systraceopen(dev, flag, mode, p)
+	dev_t	dev;
+	int	flag;
+	int	mode;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+systraceclose(dev, flag, mode, p)
+	dev_t	dev;
+	int	flag;
+	int	mode;
+	struct proc *p;
+{
+	return (0);
+}
+
+int
+systraceread(dev, uio, ioflag)
+	dev_t	dev;
+	struct uio *uio;
+	int	ioflag;
+{
+	return (EIO);
+}
+
+int
+systracewrite(dev, uio, ioflag)
+	dev_t	dev;
+	struct uio *uio;
+	int	ioflag;
+{
+	return (EIO);
+}
+
+int
+systraceioctl(dev, cmd, data, flag, p)
+	dev_t	dev;
+	u_long	cmd;
+	caddr_t	data;
+	int	flag;
+	struct proc *p;
+{
+	struct file *f;
+	struct fsystrace *fst = NULL;
+	int fd, error;
+
+	switch (cmd) {
+	case SYSTR_CLONE:
+		MALLOC(fst, struct fsystrace *, sizeof(struct fsystrace),
+		    M_TEMP, M_WAITOK); /* was M_XDATA */
+
+		memset(fst, 0, sizeof(struct fsystrace));
+		lockinit(&fst->lock, PLOCK, "systrace", 0, 0);
+		TAILQ_INIT(&fst->processes);
+		TAILQ_INIT(&fst->messages);
+		TAILQ_INIT(&fst->policies);
+
+		if (suser(p->p_ucred, &p->p_acflag) == 0)
+			fst->issuser = 1;
+		fst->p_ruid = p->p_cred->p_ruid;
+		fst->p_rgid = p->p_cred->p_rgid;
+
+		error = falloc(p, &f, &fd);
+		if (error) {
+			FREE(fst, M_TEMP); /* was M_XDATA */
+			return (error);
+		}
+		f->f_flag = FREAD | FWRITE;
+		f->f_type = DTYPE_SYSTRACE;
+		f->f_ops = &systracefops;
+		f->f_data = (caddr_t) fst;
+		*(int *)data = fd;
+		*fdflags(p, fd) &= ~UF_RESERVED;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+int
+systraceselect(dev, rw, p)
+	dev_t	dev;
+	int	rw;
+	struct proc *p;
+{
+	return (0);
+}
+
+void
+systrace_wakeup(struct fsystrace *fst)
+{
+	wakeup((caddr_t)fst);
+	selwakeup(&fst->si);
+}
+
+struct proc *
+systrace_find(struct str_process *strp)
+{
+	struct proc *proc;
+
+	if ((proc = pfind(strp->pid)) == NULL)
+		return (NULL);
+
+	if (proc != strp->proc)
+		return (NULL);
+
+	if (!ISSET(proc->p_flag, P_SYSTRACE))
+		return (NULL);
+
+	return (proc);
+}
+
+void
+systrace_sys_exit(struct proc *proc)
+{
+	struct proc *curproc = systrace_curproc();
+	struct str_process *strp;
+	struct fsystrace *fst;
+
+	systrace_lock();
+	strp = proc->p_systrace;
+	if (strp != NULL) {
+		fst = strp->parent;
+		lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, curproc);
+		systrace_unlock();
+
+		/* Insert Exit message */
+		systrace_msg_child(fst, strp, -1);
+
+		systrace_detach(strp);
+		lockmgr(&fst->lock, LK_RELEASE, NULL, curproc);
+	} else
+		systrace_unlock();
+	CLR(proc->p_flag, P_SYSTRACE);
+}
+
+void
+systrace_sys_fork(struct proc *oldproc, struct proc *p)
+{
+	struct proc *curproc = systrace_curproc();
+	struct str_process *oldstrp, *strp;
+	struct fsystrace *fst;
+
+	systrace_lock();
+	oldstrp = oldproc->p_systrace;
+	if (oldstrp == NULL) {
+		systrace_unlock();
+		return;
+	}
+
+	fst = oldstrp->parent;
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, curproc);
+	systrace_unlock();
+
+	if (systrace_insert_process(fst, p))
+		goto out;
+	if ((strp = systrace_findpid(fst, p->p_pid)) == NULL)
+		panic("systrace_sys_fork");
+
+	/* Reference policy */
+	if ((strp->policy = oldstrp->policy) != NULL)
+		strp->policy->refcount++;
+
+	/* Insert fork message */
+	systrace_msg_child(fst, oldstrp, p->p_pid);
+ out:
+	lockmgr(&fst->lock, LK_RELEASE, NULL, curproc);
+}
+
+int
+systrace_enter(struct proc *p, int code, register_t args[])
+{
+	struct sysent *callp;
+	struct str_process *strp;
+	struct str_policy *strpolicy;
+	struct fsystrace *fst = NULL;
+	struct pcred *pc;
+	int policy, error = 0, maycontrol = 0, issuser = 0;
+	int argsize;
+
+	if (code < 0 || code >= nsysent)
+		return (EINVAL);
+
+	systrace_lock();
+	strp = p->p_systrace;
+	if (strp == NULL) {
+		systrace_unlock();
+		return (EINVAL);
+	}
+
+	assert(strp->proc == p);
+
+	fst = strp->parent;
+
+	lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+	systrace_unlock();
+
+	strp->forcereport = 0;
+
+	/*
+	 * We can not monitor a SUID process unless we are root,
+	 * but we wait until it executes something unprivileged.
+	 * A non-root user may only monitor if the real uid and
+	 * real gid match the monitored process.  Changing the
+	 * uid or gid causes P_SUGID to be set.
+	 */
+	if (fst->issuser) {
+		maycontrol = 1;
+		issuser =1 ;
+	} else if (!(p->p_flag & P_SUGID)) {
+		maycontrol = fst->p_ruid == p->p_cred->p_ruid &&
+		    fst->p_rgid == p->p_cred->p_rgid;
+	}
+
+	if (!maycontrol) {
+		policy = SYSTR_POLICY_PERMIT;
+	} else {
+		/* Find out current policy */
+		if ((strpolicy = strp->policy) == NULL)
+			policy = SYSTR_POLICY_ASK;
+		else {
+			if (code >= strpolicy->nsysent)
+				policy = SYSTR_POLICY_NEVER;
+			else
+				policy = strpolicy->sysent[code];
+		}
+	}
+
+	callp = &sysent[code];
+	argsize = callp->sy_narg * sizeof(int);
+
+	switch (policy) {
+	case SYSTR_POLICY_PERMIT:
+		break;
+	case SYSTR_POLICY_ASK:
+		/* Puts the current process to sleep, return unlocked */
+		error = systrace_msg_ask(fst, strp, code, argsize, args);
+
+		/* lock has been released in systrace_msg_ask() */
+		fst = NULL;
+
+		/* We might have detached by now for some reason */
+		if (error)
+			break;
+
+		systrace_lock();
+		if ((strp = p->p_systrace) != NULL) {
+			fst = strp->parent;
+			lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+			systrace_unlock();
+
+			if (strp->answer == SYSTR_POLICY_NEVER) {
+				error = strp->error;
+				if (strp->replace != NULL) {
+					FREE(strp->replace, M_TEMP);/*M_XDATA*/
+					strp->replace = NULL;
+				}
+			} else {
+				/* Replace the arguments if necessary */
+				if (strp->replace != NULL) {
+					error = systrace_replace(strp, argsize, args);
+				}
+			}
+		} else
+			systrace_unlock();
+		break;
+	default:
+		if (policy > 0)
+			error = policy;
+		else
+			error = EPERM;
+		break;
+	}
+
+	if (fst) {
+		lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+		fst = NULL;
+	}
+
+	systrace_lock();
+	if ((strp = p->p_systrace) == NULL)
+		goto out;
+
+	if (error) {
+		strp->forcereport = -1;
+		goto out;
+	}
+
+	pc = p->p_cred;
+	strp->olduid = pc->p_ruid;
+	strp->oldgid = pc->p_rgid;
+		
+	/* Elevate privileges as desired */
+	if (issuser) {
+		if (ISSET(strp->flags, STR_PROC_SETEUID))
+			strp->saveuid = systrace_seteuid(p, strp->seteuid);
+		if (ISSET(strp->flags, STR_PROC_SETEGID))
+			strp->savegid = systrace_setegid(p, strp->setegid);
+	} else
+		CLR(strp->flags, STR_PROC_SETEUID|STR_PROC_SETEGID);
+ out:
+	systrace_unlock();
+	return (error);
+}
+
+void
+systrace_exit(struct proc *p, register_t code, register_t args[],
+    register_t retval[], int error)
+{
+	const struct sysent *callp;
+	struct str_process *strp;
+	struct fsystrace *fst;
+	struct pcred *pc;
+	int argsize;
+
+	systrace_lock();
+	strp = p->p_systrace;
+	if (strp == NULL || strp->forcereport == -1) {
+		systrace_unlock();
+		return;
+	}
+
+	callp = &sysent[code];
+	argsize = callp->sy_narg * sizeof(int);
+
+	/* Return to old privileges */
+	if (ISSET(strp->flags, STR_PROC_SETEUID)) {
+		if (pc->pc_ucred->cr_uid == strp->seteuid)
+			systrace_seteuid(p, strp->saveuid);
+		CLR(strp->flags, STR_PROC_SETEUID);
+	}
+	if (ISSET(strp->flags, STR_PROC_SETEGID)) {
+		if (pc->pc_ucred->cr_gid == strp->setegid)
+			systrace_setegid(p, strp->savegid);
+		CLR(strp->flags, STR_PROC_SETEGID);
+	}
+
+	if (p->p_flag & P_SUGID) {
+		if ((fst = strp->parent) == NULL || !fst->issuser) {
+			systrace_unlock();
+			return;
+		}
+	}
+
+	/* See if we should force a report */
+	if (strp != NULL && ISSET(strp->flags, STR_PROC_REPORT)) {
+		CLR(strp->flags, STR_PROC_REPORT);
+		strp->forcereport = 1;
+	}
+
+	if (strp->forcereport && strp != NULL) {
+		fst = strp->parent;
+		lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+		systrace_unlock();
+
+		/* Old policy is without meaning now */
+		if (strp->policy) {
+			systrace_closepolicy(fst, strp->policy);
+			strp->policy = NULL;
+		}
+		systrace_msg_emul(fst, strp);
+	} else
+		systrace_unlock();
+
+	systrace_lock();
+	if ((strp = p->p_systrace) == NULL) {
+		systrace_unlock();
+		return;
+	}
+
+	/* Report if effective uid or gid changed */
+	if (strp->olduid != p->p_cred->p_ruid ||
+	    strp->oldgid != p->p_cred->p_rgid) {
+		fst = strp->parent;
+		lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+		systrace_unlock();
+			
+		systrace_msg_ugid(fst, strp);
+	} else
+		systrace_unlock();
+
+	/* Report result from system call */
+	systrace_lock();
+	if ((strp = p->p_systrace) == NULL) {
+		systrace_unlock();
+		return;
+	}
+
+	if (ISSET(strp->flags, STR_PROC_SYSCALLRES)) {
+		fst = strp->parent;
+		lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+		systrace_unlock();
+
+		CLR(strp->flags, STR_PROC_SYSCALLRES);
+
+		systrace_msg_result(fst, strp, error, code, argsize, args, retval);
+	} else
+		systrace_unlock();
+}
+
+uid_t
+systrace_seteuid(struct proc *p,  uid_t euid)
+{
+	struct pcred *pc = p->p_cred;
+	uid_t oeuid = pc->pc_ucred->cr_uid;
+
+	if (pc->pc_ucred->cr_uid == euid)
+		return (oeuid);
+
+	/*
+	 * Copy credentials so other references do not see our changes.
+	 */
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_uid = euid;
+	p->p_flag |= P_SUGID;
+
+	return (oeuid);
+}
+
+gid_t
+systrace_setegid(struct proc *p,  gid_t egid)
+{
+	struct pcred *pc = p->p_cred;
+	gid_t oegid = pc->pc_ucred->cr_gid;
+
+	if (pc->pc_ucred->cr_gid == egid)
+		return (oegid);
+
+	/*
+	 * Copy credentials so other references do not see our changes.
+	 */
+	pc->pc_ucred = crcopy(pc->pc_ucred);
+	pc->pc_ucred->cr_gid = egid;
+	p->p_flag |= P_SUGID;
+
+	return (oegid);
+}
+
+/* Called with fst locked */
+
+int
+systrace_answer(struct str_process *strp, struct systrace_answer *ans)
+{
+	int error = 0;
+
+	DPRINTF(("%s: %u: policy %d\n", __func__,
+	    ans->stra_pid, ans->stra_policy));
+
+	if (!POLICY_VALID(ans->stra_policy)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/* Check if answer is in sync with us */
+	if (ans->stra_seqnr != strp->seqnr) {
+		error = ESRCH;
+		goto out;
+	}
+
+	if ((error = systrace_processready(strp)) != 0)
+		goto out;
+
+	strp->answer = ans->stra_policy;
+	strp->error = ans->stra_error;
+	if (!strp->error)
+		strp->error = EPERM;
+	if (ISSET(ans->stra_flags, SYSTR_FLAGS_RESULT))
+		SET(strp->flags, STR_PROC_SYSCALLRES);
+
+	/* See if we should elevate privileges for this system call */
+	if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEUID)) {
+		SET(strp->flags, STR_PROC_SETEUID);
+		strp->seteuid = ans->stra_seteuid;
+	}
+	if (ISSET(ans->stra_flags, SYSTR_FLAGS_SETEGID)) {
+		SET(strp->flags, STR_PROC_SETEGID);
+		strp->setegid = ans->stra_setegid;
+	}
+	
+
+	/* Clearing the flag indicates to the process that it woke up */
+	CLR(strp->flags, STR_PROC_WAITANSWER);
+	wakeup(strp);
+ out:
+
+	return (error);
+}
+
+int
+systrace_policy(struct fsystrace *fst, struct systrace_policy *pol)
+{
+	struct str_policy *strpol;
+	struct str_process *strp;
+
+	switch(pol->strp_op) {
+	case SYSTR_POLICY_NEW:
+		DPRINTF(("%s: new, ents %d\n", __func__,
+			    pol->strp_maxents));
+		if (pol->strp_maxents <= 0 || pol->strp_maxents > 1024)
+			return (EINVAL);
+		strpol = systrace_newpolicy(fst, pol->strp_maxents);
+		if (strpol == NULL)
+			return (ENOBUFS);
+		pol->strp_num = strpol->nr;
+		break;
+	case SYSTR_POLICY_ASSIGN:
+		DPRINTF(("%s: %d -> pid %d\n", __func__,
+			    pol->strp_num, pol->strp_pid));
+
+		/* Find right policy by number */
+		TAILQ_FOREACH(strpol, &fst->policies, next)
+		    if (strpol->nr == pol->strp_num)
+			    break;
+		if (strpol == NULL)
+			return (EINVAL);
+
+		strp = systrace_findpid(fst, pol->strp_pid);
+		if (strp == NULL)
+			return (EINVAL);
+
+		if (strp->policy)
+			systrace_closepolicy(fst, strp->policy);
+		strp->policy = strpol;
+		strpol->refcount++;
+
+		break;
+	case SYSTR_POLICY_MODIFY:
+		DPRINTF(("%s: %d: code %d -> policy %d\n", __func__,
+		    pol->strp_num, pol->strp_code, pol->strp_policy));
+		if (!POLICY_VALID(pol->strp_policy))
+			return (EINVAL);
+		TAILQ_FOREACH(strpol, &fst->policies, next)
+		    if (strpol->nr == pol->strp_num)
+			    break;
+		if (strpol == NULL)
+			return (EINVAL);
+		if (pol->strp_code < 0 || pol->strp_code >= strpol->nsysent)
+			return (EINVAL);
+		strpol->sysent[pol->strp_code] = pol->strp_policy;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+int
+systrace_processready(struct str_process *strp)
+{
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE))
+		return (EBUSY);
+
+	if (!ISSET(strp->flags, STR_PROC_WAITANSWER))
+		return (EBUSY);
+
+	/* XXX - BSD person can not tell if a process is sleeping ???
+	if (strp->proc->p_stat != SSLEEP)
+		return (EBUSY);
+	*/
+
+	return (0);
+}
+
+int
+systrace_getcwd(struct fsystrace *fst, struct str_process *strp)
+{
+	struct proc *curproc = systrace_curproc();
+	struct filedesc *myfdp, *fdp;
+	int error;
+
+	DPRINTF(("%s: %d\n", __func__, strp->pid));
+
+	error = systrace_processready(strp);
+	if (error)
+		return (error);
+
+	myfdp = curproc->p_fd;
+	fdp = strp->proc->p_fd;
+	if (myfdp == NULL || fdp == NULL)
+		return (EINVAL);
+
+	/* Store our current values */
+	fst->fd_pid = strp->pid;
+	fst->fd_cdir = myfdp->fd_cdir;
+	fst->fd_rdir = myfdp->fd_rdir;
+
+	if ((myfdp->fd_cdir = fdp->fd_cdir) != NULL)
+		VREF(myfdp->fd_cdir);
+	if ((myfdp->fd_rdir = fdp->fd_rdir) != NULL)
+		VREF(myfdp->fd_rdir);
+
+	return (0);
+}
+
+int
+systrace_io(struct str_process *strp, struct systrace_io *io)
+{
+	struct proc *curproc = systrace_curproc();
+	struct proc *p = curproc, *t = strp->proc;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+
+	DPRINTF(("%s: %u: %p(%lu)\n", __func__,
+	    io->strio_pid, io->strio_offs, (u_long)io->strio_len));
+
+	switch (io->strio_op) {
+	case SYSTR_READ:
+		uio.uio_rw = UIO_READ;
+		break;
+	case SYSTR_WRITE:
+		uio.uio_rw = UIO_WRITE;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	error = systrace_processready(strp);
+	if (error)
+		goto out;
+
+	iov.iov_base = io->strio_addr;
+	iov.iov_len = io->strio_len;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = (off_t)(long)io->strio_offs;
+	uio.uio_resid = io->strio_len;
+	uio.uio_segflg = UIO_USERSPACE;
+	uio.uio_procp = p;
+
+	error = systrace_domem(p, t, NULL, &uio);
+	io->strio_len -= uio.uio_resid;
+ out:
+
+	return (error);
+}
+
+int
+systrace_attach(struct fsystrace *fst, pid_t pid)
+{
+	struct proc *curproc = systrace_curproc();
+	int error = 0;
+	struct proc *proc, *p = curproc;
+
+	if ((proc = pfind(pid)) == NULL) {
+		error = ESRCH;
+		goto out;
+	}
+
+	/*
+	 * You can't attach to a process if:
+	 *	(1) it's the process that's doing the attaching,
+	 */
+	if (proc->p_pid == p->p_pid) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 *	(2) it's a system process
+	 */
+	if (ISSET(proc->p_flag, P_SYSTEM)) {
+		error = EPERM;
+		goto out;
+	}
+
+	/*
+	 *	(3) it's being traced already
+	 */
+	if (ISSET(proc->p_flag, P_SYSTRACE)) {
+		error = EBUSY;
+		goto out;
+	}
+
+	/*
+	 *	(4) it's not owned by you, or the last exec
+	 *	    gave us setuid/setgid privs (unless
+	 *	    you're root), or...
+	 *
+	 *      [Note: once P_SUGID gets set in execve(), it stays
+	 *	set until the process does another execve(). Hence
+	 *	this prevents a setuid process which revokes it's
+	 *	special privilidges using setuid() from being
+	 *	traced. This is good security.]
+	 */
+	if ((proc->p_cred->p_ruid != p->p_cred->p_ruid ||
+		ISSET(proc->p_flag, P_SUGID)) &&
+	    (error = suser(p->p_ucred, &p->p_acflag)) != 0)
+		goto out;
+
+	/*
+	 *	(5) ...it's init, which controls the security level
+	 *	    of the entire system, and the system was not
+	 *          compiled with permanently insecure mode turned
+	 *	    on.
+	 */
+	if ((proc->p_pid == 1) && (securelevel > -1)) {
+		error = EPERM;
+		goto out;
+	}
+
+	error = systrace_insert_process(fst, proc);
+
+ out:
+	return (error);
+}
+
+/* Prepare to replace arguments */
+
+int
+systrace_preprepl(struct str_process *strp, struct systrace_replace *repl)
+{
+	size_t len;
+	int i, ret = 0;
+
+	ret = systrace_processready(strp);
+	if (ret)
+		return (ret);
+
+	if (strp->replace != NULL) {
+		FREE(strp->replace, M_TEMP); /* M_XDATA */
+		strp->replace = NULL;
+	}
+
+	if (repl->strr_nrepl < 0 || repl->strr_nrepl > SYSTR_MAXARGS)
+		return (EINVAL);
+
+	for (i = 0, len = 0; i < repl->strr_nrepl; i++) {
+		len += repl->strr_offlen[i];
+		if (repl->strr_offlen[i] == 0)
+			continue;
+		if (repl->strr_offlen[i] + repl->strr_off[i] > len)
+			return (EINVAL);
+	}
+
+	/* Make sure that the length adds up */
+	if (repl->strr_len != len)
+		return (EINVAL);
+
+	/* Check against a maximum length */
+	if (repl->strr_len > 2048)
+		return (EINVAL);
+
+	MALLOC(strp->replace, struct systrace_replace *,
+	    sizeof(struct systrace_replace) + len,
+	    M_TEMP, M_WAITOK); /* M_XDATA */
+
+	memcpy(strp->replace, repl, sizeof(struct systrace_replace));
+	ret = copyin(repl->strr_base, strp->replace + 1, len);
+	if (ret) {
+		FREE(strp->replace, M_TEMP); /* M_XDATA */
+		strp->replace = NULL;
+		return (ret);
+	}
+
+	/* Adjust the offset */
+	repl = strp->replace;
+	repl->strr_base = (caddr_t)(repl + 1);
+
+	return (0);
+}
+
+/*
+ * Replace the arguments with arguments from the monitoring process.
+ */
+
+int
+systrace_replace(struct str_process *strp, size_t argsize, register_t args[])
+{
+	struct proc *p = strp->proc;
+	struct systrace_replace *repl = strp->replace;
+	caddr_t sg, kdata, udata, kbase, ubase;
+	int i, maxarg, ind, ret = 0;
+
+	maxarg = argsize/sizeof(register_t);
+	sg = stackgap_init(p, 0);
+	ubase = stackgap_alloc(p, &sg, repl->strr_len);
+
+	kbase = repl->strr_base;
+	for (i = 0; i < maxarg && i < repl->strr_nrepl; i++) {
+		ind = repl->strr_argind[i];
+		if (ind < 0 || ind >= maxarg) {
+			ret = EINVAL;
+			goto out;
+		}
+		if (repl->strr_offlen[i] == 0) {
+			args[ind] = repl->strr_off[i];
+			continue;
+		}
+		kdata = kbase + repl->strr_off[i];
+		udata = ubase + repl->strr_off[i];
+		if (copyout(kdata, udata, repl->strr_offlen[i])) {
+			ret = EINVAL;
+			goto out;
+		}
+
+		/* Replace the argument with the new address */
+		args[ind] = (register_t)udata;
+	}
+
+ out:
+	FREE(repl, M_TEMP); /* M_XDATA */
+	strp->replace = NULL;
+	return (ret);
+}
+
+struct str_process *
+systrace_findpid(struct fsystrace *fst, pid_t pid)
+{
+	struct str_process *strp;
+	struct proc *proc = NULL;
+
+	TAILQ_FOREACH(strp, &fst->processes, next)
+	    if (strp->pid == pid)
+		    break;
+
+	if (strp == NULL)
+		return (NULL);
+
+	proc = systrace_find(strp);
+
+	return (proc ? strp : NULL);
+}
+
+int
+systrace_detach(struct str_process *strp)
+{
+	struct proc *proc;
+	struct fsystrace *fst = NULL;
+	int error = 0;
+
+	DPRINTF(("%s: Trying to detach from %d\n", __func__, strp->pid));
+
+	if ((proc = systrace_find(strp)) != NULL) {
+		CLR(proc->p_flag, P_SYSTRACE);
+		proc->p_systrace = NULL;
+	} else
+		error = ESRCH;
+
+	if (ISSET(strp->flags, STR_PROC_WAITANSWER)) {
+		CLR(strp->flags, STR_PROC_WAITANSWER);
+		wakeup(strp);
+	}
+
+	fst = strp->parent;
+	systrace_wakeup(fst);
+
+	if (ISSET(strp->flags, STR_PROC_ONQUEUE))
+		TAILQ_REMOVE(&fst->messages, strp, msg_next);
+
+	TAILQ_REMOVE(&fst->processes, strp, next);
+	fst->nprocesses--;
+
+	if (strp->policy)
+		systrace_closepolicy(fst, strp->policy);
+	if (strp->replace)
+		FREE(strp->replace, M_TEMP); /* M_XDATA */
+	FREE(strp, M_TEMP);
+
+	return (error);
+}
+
+void
+systrace_closepolicy(struct fsystrace *fst, struct str_policy *policy)
+{
+	if (--policy->refcount)
+		return;
+
+	fst->npolicies--;
+
+	if (policy->nsysent)
+		FREE(policy->sysent, M_TEMP); /* M_XDATA */
+
+	TAILQ_REMOVE(&fst->policies, policy, next);
+
+	FREE(policy, M_TEMP);
+}
+
+
+int
+systrace_insert_process(struct fsystrace *fst, struct proc *proc)
+{
+	struct str_process *strp;
+
+	MALLOC(strp, struct str_process *, sizeof(struct str_process), M_TEMP,
+	    M_NOWAIT);
+	if (strp == NULL)
+		return (ENOBUFS);
+
+	memset((caddr_t)strp, 0, sizeof(struct str_process));
+	strp->pid = proc->p_pid;
+	strp->proc = proc;
+	strp->parent = fst;
+
+	TAILQ_INSERT_TAIL(&fst->processes, strp, next);
+	fst->nprocesses++;
+
+	proc->p_systrace = strp;
+	SET(proc->p_flag, P_SYSTRACE);
+
+	return (0);
+}
+
+struct str_policy *
+systrace_newpolicy(struct fsystrace *fst, int maxents)
+{
+	struct str_policy *pol;
+	int i;
+
+	if (fst->npolicies > SYSTR_MAX_POLICIES && !fst->issuser)
+		return (NULL);
+
+	MALLOC(pol, struct str_policy *, sizeof(struct str_policy), M_TEMP,
+	    M_NOWAIT);
+	if (pol == NULL)
+		return (NULL);
+
+	DPRINTF(("%s: allocating %d -> %lu\n", __func__,
+		     maxents, (u_long)maxents * sizeof(int)));
+
+	memset((caddr_t)pol, 0, sizeof(struct str_policy));
+
+	MALLOC(pol->sysent, u_char *, maxents * sizeof(u_char),
+	    M_TEMP, M_WAITOK); /* M_XDATA */
+	pol->nsysent = maxents;
+	for (i = 0; i < maxents; i++)
+		pol->sysent[i] = SYSTR_POLICY_ASK;
+
+	fst->npolicies++;
+	pol->nr = fst->npolicynr++;
+	pol->refcount = 1;
+
+	TAILQ_INSERT_TAIL(&fst->policies, pol, next);
+
+	return (pol);
+}
+
+int
+systrace_msg_ask(struct fsystrace *fst, struct str_process *strp,
+    int code, size_t argsize, register_t args[])
+{
+	struct str_msg_ask msg_ask;
+	int i;
+
+	msg_ask.code = code;
+	msg_ask.argsize = argsize;
+	for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++)
+		msg_ask.args[i] = args[i];
+
+	return (systrace_make_msg(strp, SYSTR_MSG_ASK,
+		    &msg_ask, sizeof(msg_ask)));
+}
+
+int
+systrace_msg_result(struct fsystrace *fst, struct str_process *strp,
+    int error, int code, size_t argsize, register_t args[], register_t rval[])
+{
+	struct str_msg_ask msg_ask;
+	int i;
+
+	msg_ask.code = code;
+	msg_ask.argsize = argsize;
+	msg_ask.result = error;
+	for (i = 0; i < (argsize/sizeof(register_t)) && i < SYSTR_MAXARGS; i++)
+		msg_ask.args[i] = args[i];
+
+	msg_ask.rval[0] = rval[0];
+	msg_ask.rval[1] = rval[1];
+
+	return (systrace_make_msg(strp, SYSTR_MSG_RES,
+		    &msg_ask, sizeof(msg_ask)));
+}
+
+int
+systrace_msg_emul(struct fsystrace *fst, struct str_process *strp)
+{
+	struct str_msg_emul msg_emul;
+	struct proc *p = strp->proc;
+
+	memcpy(msg_emul.emul, "darwin\0\0", SYSTR_EMULEN);
+
+	return (systrace_make_msg(strp, SYSTR_MSG_EMUL, &msg_emul,
+		    sizeof(msg_emul)));
+}
+
+int
+systrace_msg_ugid(struct fsystrace *fst, struct str_process *strp)
+{
+	struct str_msg_ugid msg_ugid;
+	struct proc *p = strp->proc;
+
+	msg_ugid.uid = p->p_cred->p_ruid;
+	msg_ugid.gid = p->p_cred->p_rgid;
+
+	return (systrace_make_msg(strp, SYSTR_MSG_UGID,
+		    &msg_ugid, sizeof(msg_ugid)));
+}
+
+int
+systrace_make_msg(struct str_process *strp, int type, void *data, size_t len)
+{
+	struct str_message *msg = &strp->msg;
+	struct fsystrace *fst = strp->parent;
+	struct proc *p = strp->proc;
+	int st, again;
+
+	do {
+		again = 0;
+		if (ISSET(strp->flags, STR_PROC_ONQUEUE)) {
+			/* We need to wait before we can post this message.
+			 * Multi-threading causes this issue.
+			 */
+			again = 1;
+			goto out;
+		}
+
+		memcpy(&msg->msg_data, data, len);
+		msg->msg_seqnr = ++strp->seqnr;
+		msg->msg_type = type;
+		msg->msg_pid = strp->pid;
+		if (strp->policy)
+			msg->msg_policy = strp->policy->nr;
+		else
+			msg->msg_policy = -1;
+
+		TAILQ_INSERT_TAIL(&fst->messages, strp, msg_next);
+		SET(strp->flags, STR_PROC_ONQUEUE);
+
+	out:
+		SET(strp->flags, STR_PROC_WAITANSWER);
+		systrace_wakeup(fst);
+
+		while (1) {
+			/* Release the lock - XXX */
+			lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+			st = tsleep(strp, PWAIT | PCATCH, "systrmsg", 0);
+			if (st != 0)
+				return (EINTR);
+
+			systrace_lock();
+
+			/* If we detach, then everything is permitted */
+			if ((strp = p->p_systrace) == NULL) {
+				systrace_unlock();
+				return (0);
+			}
+			fst = strp->parent;
+			lockmgr(&fst->lock, LK_EXCLUSIVE, NULL, p);
+			systrace_unlock();
+
+			if (!ISSET(strp->flags, STR_PROC_WAITANSWER))
+				break;
+		}
+	
+	} while (again);
+
+	lockmgr(&fst->lock, LK_RELEASE, NULL, p);
+
+	return (0);
+}
+
+int
+systrace_msg_child(struct fsystrace *fst, struct str_process *strp, pid_t npid)
+{
+	struct str_process *nstrp;
+	struct str_message *msg;
+	struct str_msg_child *msg_child;
+
+	MALLOC(nstrp, struct str_process *, sizeof(struct str_process), M_TEMP,
+	    M_WAITOK);
+	memset(nstrp, 0, sizeof(struct str_process));
+
+	DPRINTF(("%s: %p: pid %d -> pid %d\n", __func__,
+		    nstrp, strp->pid, npid));
+
+	msg = &nstrp->msg;
+	msg_child = &msg->msg_data.msg_child;
+
+	msg->msg_type = SYSTR_MSG_CHILD;
+	msg->msg_pid = strp->pid;
+	if (strp->policy)
+		msg->msg_policy = strp->policy->nr;
+	else
+		msg->msg_policy = -1;
+	msg_child->new_pid = npid;
+
+	TAILQ_INSERT_TAIL(&fst->messages, nstrp, msg_next);
+
+	systrace_wakeup(fst);
+
+	return (0);
+}
+
+/* Simple allocator for the stackgap adapated from NetBSD */
+
+caddr_t
+stackgap_init(const struct proc *p, size_t sz)
+{
+	if (sz == 0)
+		sz = STACKGAPLEN;
+	if (sz > STACKGAPLEN)
+		panic("size %lu > STACKGAPLEN", (unsigned long)sz);
+/* XXX - i386 might have sigcode on stack */
+#define szsigcode (0)
+	return (caddr_t)(((unsigned long)p->user_stack
+		- (unsigned long)szsigcode - sz) & ~ALIGNBYTES);
+#undef szsigcode
+}
+
+
+void *
+stackgap_alloc(const struct proc *p, caddr_t *sgp, size_t sz)
+{
+	void *n = (void *) *sgp;
+	caddr_t nsgp;
+	int sigsize = 0;
+	
+	sz = ALIGN(sz);
+	nsgp = *sgp + sz;
+	if (nsgp > (((caddr_t)p->user_stack) - sigsize))
+		return NULL;
+	*sgp = nsgp;
+	return n;
+}
+
+/* User Address Space memory functions */
+
+static int
+systrace_rmem(p, uio)
+	struct proc *p;
+	struct uio *uio;
+{
+	struct proc *curproc = systrace_curproc();
+	vm_offset_t	kv_start, copy_end;
+	vm_map_t proc_map;
+	struct task * task;
+	kern_return_t ret;
+	int error = 0;
+
+	if (uio->uio_rw == UIO_WRITE)
+		return (EINVAL);
+
+	if ((p->p_ucred->cr_uid != curproc->p_ucred->cr_uid) 
+		&& suser(curproc->p_ucred, &curproc->p_acflag))
+		return (EPERM);
+
+	task = p->task;
+	if (task == NULL)
+		return (EINVAL);
+
+	if (!task_reference_try(task))
+		return (EINVAL);
+
+	proc_map = get_task_map(task);
+
+	/* Allocate a single page */
+	ret = kmem_alloc(kernel_map, &kv_start, PAGE_SIZE);
+	if (ret != KERN_SUCCESS) {
+		task_deallocate(task);
+		return(ENOMEM);
+	}
+	/*
+	 * Only map in one page at a time.  We don't have to, but it
+	 * makes things easier.  This way is trivial - right?
+	 */
+	do {
+		int needed = 0;
+		vm_map_copy_t	tmp;
+		vm_offset_t	arg_addr;
+		vm_size_t	arg_size;
+		caddr_t data;
+		unsigned size;
+		vm_offset_t	dealloc_start;	/* area to remove from kernel map */
+		vm_offset_t	dealloc_end;
+		int		*ip;
+		kern_return_t ret;
+
+		vm_offset_t kva;
+		vm_offset_t uva;
+		int page_offset;		/* offset into page */
+		vm_offset_t pageno;		/* page number */
+		vm_offset_t off;
+		u_int len;
+
+		uva = (vm_offset_t) uio->uio_offset;
+		if ((caddr_t)uva > p->user_stack) {
+			error = 0;
+			break;
+		}
+
+		/*
+		 * Get the page number of this segment.
+		 */
+		pageno = trunc_page(uva);
+		page_offset = uva - pageno;
+
+		if(vm_map_copyin(proc_map, pageno, PAGE_SIZE, FALSE, &tmp)
+		    != KERN_SUCCESS) {
+			error = EIO;
+			break;
+		}
+
+		if(vm_map_copy_overwrite(kernel_map, kv_start, tmp, FALSE)
+		    != KERN_SUCCESS) {
+			error = EIO;
+			break;
+		}
+
+		/*
+		 * How many bytes to copy
+		 */
+		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+		/*
+		 * Now do the i/o move.
+		 */
+		error = uiomove((caddr_t)kv_start + page_offset, len, uio);
+
+
+	} while (error == 0 && uio->uio_resid > 0);
+
+	task_deallocate(task);
+	kmem_free(kernel_map, kv_start, PAGE_SIZE);
+
+	return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+systrace_domem(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+	return (systrace_rmem(p, uio));
+}
--- /dev/null	Tue Dec  3 14:27:21 2002
+++ bsd/sys/systrace.h	Tue Dec  3 12:39:54 2002
@@ -0,0 +1,195 @@
+/*	$OpenBSD: systrace.h,v 1.11 2002/10/25 23:22:58 fgsch Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYSTRACE_H_
+#define _SYSTRACE_H_
+
+#include <sys/ioccom.h>
+
+#define	SYSTR_CLONE	_IOR('s', 1, int)
+
+#define SYSTR_EMULEN	8	/* sync with sys proc */
+
+struct str_msg_emul {
+	char emul[SYSTR_EMULEN];
+};
+
+struct str_msg_ugid {
+	uid_t uid;
+	gid_t gid;
+};
+
+#define SYSTR_MAX_POLICIES	64
+#define SYSTR_MAXARGS		64
+
+struct str_msg_ask {
+	int code;
+	int argsize;
+	register_t args[SYSTR_MAXARGS];
+	register_t rval[2];
+	int result;
+};
+
+/* Queued on fork or exit of a process */
+
+struct str_msg_child {
+	pid_t new_pid;
+};
+
+#define SYSTR_MSG_ASK	1
+#define SYSTR_MSG_RES	2
+#define SYSTR_MSG_EMUL	3
+#define SYSTR_MSG_CHILD	4
+#define SYSTR_MSG_UGID	5
+
+#define SYSTR_MSG_NOPROCESS(x) \
+	((x)->msg.msg_type == SYSTR_MSG_CHILD)
+
+struct str_message {
+	int msg_type;
+	pid_t msg_pid;
+	u_int16_t msg_seqnr;	/* answer has to match seqnr */
+	short msg_policy;
+	union {
+		struct str_msg_emul msg_emul;
+		struct str_msg_ugid msg_ugid;
+		struct str_msg_ask msg_ask;
+		struct str_msg_child msg_child;
+	} msg_data;
+};
+
+struct systrace_answer {
+	pid_t stra_pid;
+	u_int16_t stra_seqnr;
+	short reserved;
+	uid_t stra_seteuid;	/* elevated privileges for system call */
+	uid_t stra_setegid;
+	int stra_policy;
+	int stra_error;
+	int stra_flags;
+};
+
+#define SYSTR_READ		1
+#define SYSTR_WRITE		2
+
+struct systrace_io {
+	pid_t strio_pid;
+	int strio_op;
+	void *strio_offs;
+	void *strio_addr;
+	size_t strio_len;
+};
+
+#define SYSTR_POLICY_NEW	1
+#define SYSTR_POLICY_ASSIGN	2
+#define SYSTR_POLICY_MODIFY	3
+
+struct systrace_policy {
+	int strp_op;
+	int strp_num;
+	union {
+		struct {
+			short code;
+			short policy;
+		} assign;
+		pid_t pid;
+		int maxents;
+	} strp_data;
+};
+
+#define strp_pid	strp_data.pid
+#define strp_maxents	strp_data.maxents
+#define strp_code	strp_data.assign.code
+#define strp_policy	strp_data.assign.policy
+
+struct systrace_replace {
+	pid_t strr_pid;
+	int strr_nrepl;
+	caddr_t	strr_base;	/* Base memory */
+	size_t strr_len;	/* Length of memory */
+	int strr_argind[SYSTR_MAXARGS];
+	size_t strr_off[SYSTR_MAXARGS];
+	size_t strr_offlen[SYSTR_MAXARGS];
+};
+
+#define STRIOCATTACH	_IOW('s', 101, pid_t)
+#define STRIOCDETACH	_IOW('s', 102, pid_t)
+#define STRIOCANSWER	_IOW('s', 103, struct systrace_answer)
+#define STRIOCIO	_IOWR('s', 104, struct systrace_io)
+#define STRIOCPOLICY	_IOWR('s', 105, struct systrace_policy)
+#define STRIOCGETCWD	_IOW('s', 106, pid_t)
+#define STRIOCRESCWD	_IO('s', 107)
+#define STRIOCREPORT	_IOW('s', 108, pid_t)
+#define STRIOCREPLACE	_IOW('s', 109, struct systrace_replace)
+
+#define SYSTR_POLICY_ASK	0
+#define SYSTR_POLICY_PERMIT	1
+#define SYSTR_POLICY_NEVER	2
+
+#define SYSTR_FLAGS_RESULT	0x001
+#define SYSTR_FLAGS_SETEUID	0x002
+#define SYSTR_FLAGS_SETEGID	0x004
+
+#ifdef KERNEL
+struct str_process;
+struct fsystrace {
+	struct lock__bsd__ lock;
+	struct selinfo si;
+
+	TAILQ_HEAD(strprocessq, str_process) processes;
+	int nprocesses;
+
+	TAILQ_HEAD(strpolicyq, str_policy) policies;
+
+	struct strprocessq messages;
+
+	int npolicynr;
+	int npolicies;
+
+	int issuser;
+	uid_t p_ruid;
+	gid_t p_rgid;
+
+	/* cwd magic */
+	pid_t fd_pid;
+	struct vnode *fd_cdir;
+	struct vnode *fd_rdir;
+};
+
+/* Internal prototypes */
+
+int systrace_enter(struct proc *, register_t, register_t []);
+void systrace_exit(struct proc *, register_t, register_t [], register_t [], int);
+void systrace_sys_exit(struct proc *);
+void systrace_sys_fork(struct proc *, struct proc *);
+
+#endif /* KERNEL */
+#endif /* _SYSTRACE_H_ */
--- /dev/null	Tue Dec  3 14:27:21 2002
+++ bsd/sys/tree.h	Tue Nov 26 21:43:14 2002
@@ -0,0 +1,678 @@
+/*	$NetBSD: tree.h,v 1.5 2002/11/02 07:35:07 perry Exp $	*/
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_TREE_H_
+#define	_SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+	
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (NULL);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root))) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+	int rbe_color;			/* node color */		\
+}
+
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_COLOR(elm, field)		(elm)->field.rbe_color
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do {					\
+	RB_PARENT(elm, field) = parent;					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	RB_COLOR(elm, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(black, red, field) do {				\
+	RB_COLOR(black, field) = RB_BLACK;				\
+	RB_COLOR(red, field) = RB_RED;					\
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) {		\
+		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) {		\
+		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {		\
+		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
+			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
+		else							\
+			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	RB_PARENT(elm, field) = (tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((RB_PARENT(tmp, field)))					\
+		RB_AUGMENT(RB_PARENT(tmp, field));			\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp)				\
+void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *);		\
+struct type *name##_RB_INSERT(struct name *, struct type *);		\
+struct type *name##_RB_FIND(struct name *, struct type *);		\
+struct type *name##_RB_NEXT(struct name *, struct type *);		\
+struct type *name##_RB_MINMAX(struct name *, int);			\
+									\
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp)				\
+void									\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = RB_PARENT(elm, field)) &&			\
+	    RB_COLOR(parent, field) == RB_RED) {			\
+		gparent = RB_PARENT(parent, field);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
+				RB_COLOR(tmp, field) = RB_BLACK;	\
+				RB_SET_BLACKRED(parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(parent, gparent, field);	\
+			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
+		}							\
+	}								\
+	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
+}									\
+									\
+void									\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)))\
+						RB_COLOR(oleft, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_RIGHT(tmp, field))		\
+					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_LEFT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (RB_COLOR(tmp, field) == RB_RED) {		\
+				RB_SET_BLACKRED(tmp, parent, field);	\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+				RB_COLOR(tmp, field) = RB_RED;		\
+				elm = parent;				\
+				parent = RB_PARENT(elm, field);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)))\
+						RB_COLOR(oright, field) = RB_BLACK;\
+					RB_COLOR(tmp, field) = RB_RED;	\
+					RB_ROTATE_LEFT(head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+				RB_COLOR(parent, field) = RB_BLACK;	\
+				if (RB_LEFT(tmp, field))		\
+					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+				RB_ROTATE_RIGHT(head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		RB_COLOR(elm, field) = RB_BLACK;			\
+}									\
+									\
+struct type *								\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)))			\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = RB_PARENT(elm, field);				\
+		color = RB_COLOR(elm, field);				\
+		if (child)						\
+			RB_PARENT(child, field) = parent;		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (RB_PARENT(elm, field) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (RB_PARENT(old, field)) {				\
+			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+				RB_LEFT(RB_PARENT(old, field), field) = elm;\
+			else						\
+				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+			RB_AUGMENT(RB_PARENT(old, field));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
+		if (RB_RIGHT(old, field))				\
+			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = RB_PARENT(left, field)));	\
+		}							\
+		goto color;						\
+	}								\
+	parent = RB_PARENT(elm, field);					\
+	color = RB_COLOR(elm, field);					\
+	if (child)							\
+		RB_PARENT(child, field) = parent;			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+struct type *								\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+struct type *								\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_RB_NEXT(struct name *head, struct type *elm)			\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (RB_PARENT(elm, field) &&				\
+		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
+			elm = RB_PARENT(elm, field);			\
+		else {							\
+			while (RB_PARENT(elm, field) &&			\
+			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+				elm = RB_PARENT(elm, field);		\
+			elm = RB_PARENT(elm, field);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+struct type *								\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(x, y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(head, x))
+
+#endif	/* _SYS_TREE_H_ */
