summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorFreya Murphy <freya@freyacat.org>2025-03-31 12:41:04 -0400
committerFreya Murphy <freya@freyacat.org>2025-03-31 12:41:04 -0400
commit4dc44e8fce222a21f5eb8b86ecb226855c34999b (patch)
tree83e42cbd2cfde437a35216e9c1ee58c0af995e94 /kernel
parenttrack changes (diff)
downloadcomus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.tar.gz
comus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.tar.bz2
comus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.zip
track upstream
Diffstat (limited to 'kernel')
-rw-r--r--kernel/isrs.S16
-rw-r--r--kernel/kernel.c34
-rw-r--r--kernel/kernel.ld7
-rw-r--r--kernel/procs.c43
-rw-r--r--kernel/startup.S24
-rw-r--r--kernel/syscalls.c2
-rw-r--r--kernel/user.c261
-rw-r--r--kernel/vm.c335
8 files changed, 500 insertions, 222 deletions
diff --git a/kernel/isrs.S b/kernel/isrs.S
index ec539f2..f5fdbca 100644
--- a/kernel/isrs.S
+++ b/kernel/isrs.S
@@ -19,6 +19,8 @@
# .arch i386
#include <bootstrap.h>
+#include <offsets.h>
+#include <vm.h>
/*
** Configuration options - define in Makefile
@@ -100,8 +102,8 @@ isr_save:
**
** Set up parameters for the ISR call.
*/
- movl 52(%esp),%eax // get vector number and error code
- movl 56(%esp),%ebx
+ movl CTX_vector(%esp),%eax // get vector number and error code
+ movl CTX_code(%esp),%ebx
/*
***********************
@@ -120,11 +122,12 @@ isr_save:
// save the context pointer
movl current, %edx
- movl %esp, (%edx)
+ movl %esp, PCB_context(%edx)
// also save the page directory pointer
movl %cr3, %ecx
- movl %ecx, 4(%edx)
+ addl $KERN_BASE, %ecx // convert to a virtual address
+ movl %ecx, PCB_pdir(%edx)
// switch to the system stack
//
@@ -166,8 +169,9 @@ isr_restore:
***********************
*/
movl current, %ebx // return to the user stack
- movl (%ebx), %esp // ESP --> context save area
- movl 4(%ebx), %ecx // page directory pointer
+ movl PCB_context(%ebx), %esp // ESP --> context save area
+ movl PCB_pdir(%ebx), %ecx // page directory pointer
+ subl $KERN_BASE, %ecx // convert to a physical address
movl %ecx, %cr3
// now we're operating with the user process'
diff --git a/kernel/kernel.c b/kernel/kernel.c
index 1b940df..44a8eee 100644
--- a/kernel/kernel.c
+++ b/kernel/kernel.c
@@ -194,11 +194,11 @@ static void stats(int code)
case 'q': // dump the queues
// code to dump out any/all queues
- pcb_queue_dump("R", ready);
- pcb_queue_dump("W", waiting);
- pcb_queue_dump("S", sleeping);
- pcb_queue_dump("Z", zombie);
- pcb_queue_dump("I", sioread);
+ pcb_queue_dump("R", ready, true);
+ pcb_queue_dump("W", waiting, true);
+ pcb_queue_dump("S", sleeping, true);
+ pcb_queue_dump("Z", zombie, true);
+ pcb_queue_dump("I", sioread, true);
break;
case 'r': // print system configuration information
@@ -309,7 +309,7 @@ int main(void)
// report our configuration options
kreport(true);
- delay(DELAY_3_SEC);
+ delay(DELAY_2_SEC);
/*
** Other tasks typically performed here:
@@ -341,10 +341,11 @@ int main(void)
const char *args[2] = { "init", NULL };
// load it
- assert(user_load(prog, init_pcb, args) == SUCCESS);
+ assert(user_load(prog, init_pcb, args, true) == SUCCESS);
// send it on its merry way
schedule(init_pcb);
+ dispatch();
#ifdef TRACE_CX
// if we're using a scrolling region, wait a bit more and then set it up
@@ -366,9 +367,6 @@ int main(void)
"================================================================================");
#endif
- // switch to the "real" kernel page directory
- vm_set_kvm();
-
/*
** END OF TERM-SPECIFIC CODE
**
@@ -380,5 +378,21 @@ int main(void)
sio_enable(SIO_RX);
+ // produce a "system state" report
+ cio_puts("System status: Queues ");
+ pcb_queue_dump("R", ready, true);
+ pcb_queue_dump("W", waiting, true);
+ pcb_queue_dump("S", sleeping, true);
+ pcb_queue_dump("Z", zombie, true);
+ pcb_queue_dump("I", sioread, true);
+ ptable_dump_counts();
+ pcb_dump("Current: ", current, true);
+
+ delay(DELAY_3_SEC);
+
+ vm_print(current->pdir, true, TwoLevel);
+
+ delay(DELAY_3_SEC);
+
return 0;
}
diff --git a/kernel/kernel.ld b/kernel/kernel.ld
index 3167327..83f211c 100644
--- a/kernel/kernel.ld
+++ b/kernel/kernel.ld
@@ -26,6 +26,7 @@ SECTIONS
}
/* Could put STABs here */
+ /*
.stab : {
PROVIDE(__STAB_BEGIN__ = .);
*(.stab);
@@ -36,6 +37,7 @@ SECTIONS
*(.stabstr);
PROVIDE(__STABSTR_END__ = .);
}
+ */
/* Align the data segment at the next page boundary */
. = ALIGN(0x1000);
@@ -65,7 +67,8 @@ SECTIONS
PROVIDE(_end = .);
/DISCARD/ : {
- /* *(.stab .stab_info .stabstr) */
- *(.eh_frame .note.GNU-stack .note.gnu.property .comment)
+ *(.stab .stab_info .stabstr)
+ *(.eh_frame .eh_frame_hdr)
+ *(.note.GNU-stack .note.gnu.property .comment)
}
}
diff --git a/kernel/procs.c b/kernel/procs.c
index 88589d0..20e6784 100644
--- a/kernel/procs.c
+++ b/kernel/procs.c
@@ -74,7 +74,7 @@ uint_t next_pid;
pcb_t *init_pcb;
// table of state name strings
-const char *state_str[N_STATES] = {
+const char state_str[N_STATES][4] = {
[STATE_UNUSED] = "Unu", // "Unused"
[STATE_NEW] = "New",
[STATE_READY] = "Rdy", // "Ready"
@@ -87,16 +87,16 @@ const char *state_str[N_STATES] = {
};
// table of priority name strings
-const char *prio_str[N_PRIOS] = { [PRIO_HIGH] = "High",
- [PRIO_STD] = "User",
- [PRIO_LOW] = "Low ",
- [PRIO_DEFERRED] = "Def " };
+const char prio_str[N_PRIOS][5] = { [PRIO_HIGH] = "High",
+ [PRIO_STD] = "User",
+ [PRIO_LOW] = "Low ",
+ [PRIO_DEFERRED] = "Def " };
// table of queue ordering name strings
-const char *ord_str[N_PRIOS] = { [O_FIFO] = "FIFO",
- [O_PRIO] = "PRIO",
- [O_PID] = "PID ",
- [O_WAKEUP] = "WAKE" };
+const char ord_str[N_PRIOS][5] = { [O_FIFO] = "FIFO",
+ [O_PRIO] = "PRIO",
+ [O_PID] = "PID ",
+ [O_WAKEUP] = "WAKE" };
/*
** PRIVATE FUNCTIONS
@@ -939,7 +939,7 @@ void ctx_dump_all(const char *msg)
}
/**
-** _pcb_dump(msg,pcb)
+** pcb_dump(msg,pcb,all)
**
** Dumps the contents of this PCB to the console
**
@@ -964,7 +964,15 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all)
}
cio_printf(" %d", pcb->pid);
+
cio_printf(" %s", pcb->state >= N_STATES ? "???" : state_str[pcb->state]);
+#if 0
+ if( pcb->state >= N_STATES ) {
+ cio_puts( " ????" );
+ } else {
+ cio_printf( " %s", state_str[pcb->state] );
+ }
+#endif
if (!all) {
// just printing IDs and states on one line
@@ -974,6 +982,13 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all)
// now, the rest of the contents
cio_printf(" %s",
pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority]);
+#if 0
+ if( pcb->priority >= N_PRIOS ) {
+ cio_puts( " ???" );
+ } else {
+ cio_printf( " %s", prio_str[pcb->priority] );
+ }
+#endif
cio_printf(" ticks %u xit %d wake %08x\n", pcb->ticks, pcb->exit_status,
pcb->wakeup);
@@ -1111,6 +1126,14 @@ void ptable_dump_counts(void)
for (n = 0; n < N_STATES; ++n) {
cio_printf(" %u %s", nstate[n],
state_str[n] != NULL ? state_str[n] : "???");
+#if 0
+ cio_printf( " %u ", nstate[n] );
+ if( state_str[n][0] != '\0' ) {
+ cio_puts( state_str[n] );
+ } else {
+ cio_puts( "???" );
+ }
+#endif
}
cio_putchar('\n');
}
diff --git a/kernel/startup.S b/kernel/startup.S
index 73a081e..94b93b0 100644
--- a/kernel/startup.S
+++ b/kernel/startup.S
@@ -33,18 +33,18 @@
/*
** A symbol for locating the beginning of the code.
*/
- .globl begtext
-
.text
-begtext:
+
+ .globl begtext
+ .globl _start
+_start = V2PNC(begtext)
/*
** The entry point. When we get here, we have just entered protected
** mode, so all the segment registers are incorrect except for CS.
*/
- .globl _start
+begtext:
-_start:
cli /* seems to be reset on entry to p. mode */
movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */
outb $CMOS_ADDR /* turned them off) */
@@ -110,10 +110,18 @@ clearbss:
# set the initial frame pointer
xorl %ebp, %ebp
+ # now, jump and switch into using high addresses
+ # we use an indirect jump here because the assembler
+ # would ordinarily generate a PC-relative target
+ # address for the jump, which would not have the
+ # desired effect
+ movl $onward, %eax
+ jmp *%eax
+
+onward:
+
/*
-** Call the system initialization routine, and switch to
-** executing at high addresses. We use an indirect jump
-** here to avoid getting a PC-relative 'jmp' instruction.
+** Call the system initialization routine.
**
** Alternate idea: push the address of isr_restore
** and just do an indirect jump?
diff --git a/kernel/syscalls.c b/kernel/syscalls.c
index 0653c93..92a0a23 100644
--- a/kernel/syscalls.c
+++ b/kernel/syscalls.c
@@ -354,7 +354,7 @@ SYSIMPL(exec)
pcb->pdir = NULL;
// "load" it and set up the VM tables for this process
- int status = user_load(prog, pcb, args);
+ int status = user_load(prog, pcb, args, false);
if (status != SUCCESS) {
RET(pcb) = status;
SYSCALL_EXIT(status);
diff --git a/kernel/user.c b/kernel/user.c
index e9398ff..0e5e186 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -426,44 +426,27 @@ static int read_phdrs(elfhdr_t *hdr, pcb_t *pcb)
** @param pcb Pointer to the PCB for the process
** @param entry Entry point for the new process
** @param args Argument vector to be put in place
+** @param sys Is the argument vector from kernel code?
**
-** @return A pointer to the context_t on the stack, or NULL
+** @return A (user VA) pointer to the context_t on the stack, or NULL
*/
-static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
+static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args,
+ bool_t sys)
{
#if TRACING_USER
- cio_printf("stksetup: pcb %08x, entry %08, args %08x\n", (uint32_t)pcb,
+ cio_printf("stksetup: pcb %08x, entry %08x, args %08x\n", (uint32_t)pcb,
entry, (uint32_t)args);
#endif
/*
- ** First, we need to count the space we'll need for the argument
+ ** First, we need to calculate the space we'll need for the argument
** vector and strings.
- */
-
- int argbytes = 0;
- int argc = 0;
-
- while (args[argc] != NULL) {
- int n = strlen(args[argc]) + 1;
- // can't go over one page in size
- if ((argbytes + n) > SZ_PAGE) {
- // oops - ignore this and any others
- break;
- }
- argbytes += n;
- ++argc;
- }
-
- // Round up the byte count to the next multiple of four.
- argbytes = (argbytes + 3) & MOD4_MASK;
-
- /*
- ** Allocate the arrays. We are safe using dynamic arrays here
- ** because we're using the OS stack, not the user stack.
**
- ** We want the argstrings and argv arrays to contain all zeroes.
- ** The C standard states, in section 6.7.8, that
+ ** Keeping track of kernel vs. user VAs is tricky, so we'll use
+ ** a prefix on variable names: kv_* is a kernel virtual address;
+ ** uv_* is a user virtual address.
+ **
+ ** We rely on the C standard, section 6.7.8, to clear these arrays:
**
** "21 If there are fewer initializers in a brace-enclosed list
** than there are elements or members of an aggregate, or
@@ -472,31 +455,36 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** the remainder of the aggregate shall be initialized
** implicitly the same as objects that have static storage
** duration."
- **
- ** Sadly, because we're using variable-sized arrays, we can't
- ** rely on this, so we have to call memclr() instead. :-( In
- ** truth, it doesn't really cost us much more time, but it's an
- ** annoyance.
*/
- char argstrings[argbytes];
- char *argv[argc + 1];
+ int argbytes = 0; // total length of arg strings
+ int argc = 0; // number of argv entries
+ const char *kv_strs[N_ARGS] = { 0 }; // converted user arg string pointers
+ int strlengths[N_ARGS] = { 0 }; // length of each string
+ const char *uv_argv[N_ARGS] = { 0 }; // argv pointers
- CLEAR(argstrings);
- CLEAR(argv);
+ /*
+ ** IF the argument list given to us came from user code, we need
+ ** to convert its address and the addresses it contains to kernel
+ ** VAs; otherwise, we can use them directly.
+ */
+ char **kv_args = sys ? args : vm_uva2kva(pcb->pdir, (void *)args);
- // Next, duplicate the argument strings, and create pointers to
- // each one in our argv.
- char *tmp = argstrings;
- for (int i = 0; i < argc; ++i) {
- int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string
- strcpy(tmp, args[i]); // add to our buffer
- argv[i] = tmp; // remember where it was
- tmp += nb; // move on
+ while (kv_args[argc] != NULL) {
+ kv_strs[argc] = sys ? args[argc] :
+ vm_uva2kva(pcb->pdir, (void *)(kv_args[argc]));
+ strlengths[argc] = strlen(kv_strs[argc]) + 1;
+ // can't go over one page in size
+ if ((argbytes + strlengths[argc]) > SZ_PAGE) {
+ // oops - ignore this and any others
+ break;
+ }
+ argbytes += strlengths[argc];
+ ++argc;
}
- // trailing NULL pointer
- argv[argc] = NULL;
+ // Round up the byte count to the next multiple of four.
+ argbytes = (argbytes + 3) & MOD4_MASK;
/*
** The pages for the stack were cleared when they were allocated,
@@ -524,32 +512,44 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** see below for more information.
*/
- // Pointer to the last word in stack. We get this from the
- // VM hierarchy. Get the PDE entry for the user address space.
- pde_t stack_pde = pcb->pdir[USER_PDE];
+ /*
+ ** Find the user stack. The PDE entry for user address space points
+ ** to a page table for the first 4MB of the address space, but the
+ ** "pointer" there a physical frame address.
+ */
+ pde_t *kv_userpt = (pde_t *)P2V(PTE_ADDR(pcb->pdir[USER_PDE]));
+ assert(kv_userpt != NULL);
- // The PDE entry points to the PT, which is an array of PTE. The last
- // two entries are for the stack; pull out the last one.
- pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2];
+ /*
+ ** The final entries in that PMT are for the pages of the user stack.
+ ** Grab the address of the frame for the last one. (Again, we need
+ ** to convert it to a virtual address we can use.)
+ */
+
+ // the PMT entry for that page
+ pte_t pmt_entry = kv_userpt[USER_STK_LAST_PTE];
+ assert(IS_PRESENT(pmt_entry));
+
+ // kernel VA for the first byte following that page
+ uint8_t *kv_ptr = (uint8_t *)P2V(PTE_ADDR(pmt_entry) + SZ_PAGE);
+ assert(kv_ptr != NULL);
- // OK, now we have the PTE. The frame address of the last page is
- // in this PTE. Find the address immediately after that.
- uint32_t *ptr = (uint32_t *)((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE);
+ // user VA for the first byte following that page
+ uint32_t *uv_ptr = (uint32_t *)(USER_STACK_P2 + SZ_PAGE);
- // Pointer to where the arg strings should be filled in.
- char *strings = (char *)((uint32_t)ptr - argbytes);
+ // Pointers to where the arg strings should be filled in.
+ uint32_t kv_strings = ((uint32_t)kv_ptr) - argbytes;
+ uint32_t uv_strings = ((uint32_t)uv_ptr) - argbytes;
- // back the pointer up to the nearest word boundary; because we're
+ // back the pointers up to the nearest word boundary; because we're
// moving toward location 0, the nearest word boundary is just the
// next smaller address whose low-order two bits are zeroes
- strings = (char *)((uint32_t)strings & MOD4_MASK);
-
- // Copy over the argv strings.
- memcpy((void *)strings, argstrings, argbytes);
+ kv_strings &= MOD4_MASK;
+ uv_strings &= MOD4_MASK;
/*
- ** Next, we need to copy over the argv pointers. Start by
- ** determining where 'argc' should go.
+ ** Next, we need to copy over the data. Start by determining where
+ ** where 'argc' should go.
**
** Stack alignment is controlled by the SysV ABI i386 supplement,
** version 1.2 (June 23, 2016), which states in section 2.2.2:
@@ -563,7 +563,7 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
**
** Isn't technical documentation fun? Ultimately, this means that
** the first parameter to main() should be on the stack at an address
- ** that is a multiple of 16.
+ ** that is a multiple of 16. In our case, that is 'argc'.
**
** The space needed for argc, argv, and the argv array itself is
** argc + 3 words (argc+1 for the argv entries, plus one word each
@@ -571,45 +571,66 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
*/
int nwords = argc + 3;
- uint32_t *acptr = ((uint32_t *)strings) - nwords;
+ uint32_t *kv_acptr = ((uint32_t *)kv_strings) - nwords;
+ uint32_t *uv_acptr = ((uint32_t *)uv_strings) - nwords;
- /*
- ** Next, back up until we're at a multiple-of-16 address. Because we
- ** are moving to a lower address, its upper 28 bits are identical to
- ** the address we currently have, so we can do this with a bitwise
- ** AND to just turn off the lower four bits.
- */
+ // back these up to multiple-of-16 addresses for stack alignment
+ kv_acptr = (uint32_t *)(((uint32_t)kv_acptr) & MOD16_MASK);
+ uv_acptr = (uint32_t *)(((uint32_t)uv_acptr) & MOD16_MASK);
- acptr = (uint32_t *)(((uint32_t)acptr) & MOD16_MASK);
+ // the argv location
+ uint32_t *kv_avptr = kv_acptr + 1;
- // copy in 'argc'
- *acptr = argc;
+ // the user address for the first argv entry
+ uint32_t *uv_avptr = uv_acptr + 2;
- // next, 'argv', which follows 'argc'; 'argv' points to the
- // word that follows it in the stack
- uint32_t *avptr = acptr + 2;
- *(acptr + 1) = (uint32_t)avptr;
+ // Copy over the argv strings.
+ for (int i = 0; i < argc; ++i) {
+ // copy the string using kernel addresses
+ strcpy((char *)kv_strings, kv_args[i]);
+
+ // remember the user address where this string went
+ uv_argv[i] = (char *)uv_strings;
+
+ // adjust both string addresses
+ kv_strings += strlengths[i];
+ uv_strings += strlengths[i];
+ }
/*
- ** Next, we copy in all argc+1 pointers.
+ ** Next, we copy in argc, argv, and the pointers. The stack will
+ ** look something like this:
+ **
+ ** kv_avptr
+ ** kv_acptr |
+ ** | |
+ ** v v
+ ** argc argv av[0] av[1] etc NULL str0 str1 etc.
+ ** [....][....][....][....] ... [0000] ... [......0......0.........]
+ ** | ^ | | ^ ^
+ ** | | | | | |
+ ** ------ | ---------------------|-------
+ ** ---------------------------
*/
- // Adjust and copy the string pointers.
- for (int i = 0; i <= argc; ++i) {
- if (argv[i] != NULL) {
- // an actual pointer - adjust it and copy it in
- *avptr = (uint32_t)strings;
- // skip to the next entry in the array
- strings += strlen(argv[i]) + 1;
- } else {
- // end of the line!
- *avptr = NULL;
- }
- ++avptr;
+ // copy in 'argc'
+ *kv_acptr = argc;
+
+ // copy in 'argv'
+ *kv_avptr++ = (uint32_t)uv_avptr;
+
+ // now, the argv entries themselves
+ for (int i = 0; i < argc; ++i) {
+ *kv_avptr++ = (uint32_t)uv_argv[i];
}
+ // and the trailing NULL
+ *kv_avptr = NULL;
+
/*
- ** Now, we need to set up the initial context for the executing
+ ** Almost done!
+ **
+ ** Now we need to set up the initial context for the executing
** process.
**
** When this process is dispatched, the context restore code will
@@ -618,27 +639,34 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** the interrupt "returns" to the entry point of the process.
*/
- // Locate the context save area on the stack.
- context_t *ctx = ((context_t *)avptr) - 1;
+ // Locate the context save area on the stack by backup up one
+ // "context" from where the argc value is saved
+ context_t *kv_ctx = ((context_t *)kv_acptr) - 1;
+ uint32_t uv_ctx = (uint32_t)(((context_t *)uv_acptr) - 1);
/*
** We cleared the entire stack earlier, so all the context
** fields currently contain zeroes. We now need to fill in
** all the important fields.
+ **
+ ** Note: we don't need to set the ESP value for the process,
+ ** as the 'popa' that restores the general registers doesn't
+ ** actually restore ESP from the context area - it leaves it
+ ** where it winds up.
*/
- ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0
- ctx->eip = entry; // initial EIP
- ctx->cs = GDT_CODE; // segment registers
- ctx->ss = GDT_STACK;
- ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA;
+ kv_ctx->eflags = DEFAULT_EFLAGS; // IF enabled, IOPL 0
+ kv_ctx->eip = entry; // initial EIP
+ kv_ctx->cs = GDT_CODE; // segment registers
+ kv_ctx->ss = GDT_STACK;
+ kv_ctx->ds = kv_ctx->es = kv_ctx->fs = kv_ctx->gs = GDT_DATA;
/*
** Return the new context pointer to the caller. It will be our
** caller's responsibility to schedule this process.
*/
- return (ctx);
+ return ((context_t *)uv_ctx);
}
/*
@@ -809,10 +837,11 @@ int user_duplicate(pcb_t *new, pcb_t *old)
** @param ptab A pointer to the program table entry to be loaded
** @param pcb The PCB for the program being loaded
** @param args The argument vector for the program
+** @param sys Is the argument vector from kernel code?
**
** @return the status of the load attempt
*/
-int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
+int user_load(prog_t *ptab, pcb_t *pcb, const char **args, bool_t sys)
{
// NULL pointers are bad!
assert1(ptab != NULL);
@@ -820,7 +849,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
assert1(args != NULL);
#if TRACING_USER
- cio_printf("uload: prog '%s' pcb %08x args %08x\n",
+ cio_printf("Uload: prog '%s' pcb %08x args %08x\n",
ptab->name[0] ? ptab->name : "?", (uint32_t)pcb, (uint32_t)args);
#endif
@@ -832,8 +861,16 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
(uint32_t)ptab, ptab->name, ptab->offset, ptab->size,
ptab->flags);
cio_printf(" args %08x:", (uint32_t)args);
- for (int i = 0; args[i] != NULL; ++i) {
- cio_printf(" [%d] %s", i, args[i]);
+ if (sys) {
+ for (int i = 0; args[i] != NULL; ++i) {
+ cio_printf(" [%d] %s", i, args[i]);
+ }
+ } else {
+ char **kv_args = vm_uva2kva(pcb->pdir, args);
+ for (int i = 0; kv_args[i] != NULL; ++i) {
+ cio_printf(" [%d] %s", i,
+ (char *)vm_uva2kva(pcb->pdir, kv_args[i]));
+ }
}
cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid);
dump_fhdr(hdr);
@@ -853,8 +890,8 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
// read all the program headers
int stat = read_phdrs(hdr, pcb);
if (stat != SUCCESS) {
- // TODO figure out a better way to deal with this
- PANIC(0, "user_load: phdr read failed");
+ cio_printf("Uload: read_phdrs('%s') returned %d\n", ptab->name, stat);
+ PANIC(0, "User_load: phdr read failed");
}
// next, set up the runtime stack - just like setting up loadable
@@ -862,12 +899,12 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
stat =
vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0);
if (stat != SUCCESS) {
- // TODO yadda yadda...
- PANIC(0, "user_load: vm_add failed");
+ cio_printf("Uload: vm_add('%s') stack returned %d\n", ptab->name, stat);
+ PANIC(0, "user_load: vm_add stack failed");
}
// set up the command-line arguments
- pcb->context = stack_setup(pcb, hdr->e_entry, args);
+ pcb->context = stack_setup(pcb, hdr->e_entry, args, sys);
return SUCCESS;
}
@@ -883,7 +920,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
void user_cleanup(pcb_t *pcb)
{
#if TRACING_USER
- cio_printf("uclean: %08x\n", (uint32_t)pcb);
+ cio_printf("Uclean: %08x\n", (uint32_t)pcb);
#endif
if (pcb == NULL) {
diff --git a/kernel/vm.c b/kernel/vm.c
index 7d43bb8..a700bcb 100644
--- a/kernel/vm.c
+++ b/kernel/vm.c
@@ -58,45 +58,155 @@ static void vm_isr(int vector, int code)
}
/**
-** Name: uva2kva
+** Name: ptcount
**
-** Convert a user VA into a kernel address. Works for all addresses -
-** if the address is a page address, the PERMS(va) value will be 0;
-** otherwise, it is the offset into the page.
+** Count the number of each type of entry in a page table.
+** Returns a 32-bit result containing two 16-bit counts:
**
-** @param pdir Pointer to the page directory to examine
-** @param va Virtual address to check
+** Upper half Lower half
+** PDIR: # of 4MB entries # of 'present' entries
+** PMT: zero # of 'present' entries
+**
+** The number of "not present" can be calculated from these.
+**
+** @param pt Pointer to the page table
+** @param dir Is it a page directory (vs. a page table)?
*/
ATTR_UNUSED
-static void *uva2kva(pde_t *pdir, void *va)
+static uint32_t ptcount(pte_t *ptr, bool_t dir)
{
- // find the PMT entry for this address
- pte_t *pte = vm_getpte(pdir, va, false);
- if (pte == NULL) {
- return NULL;
+ uint16_t n_np = 0, n_p = 0, n_lg = 0;
+
+ for (int i = 0; i < N_PTE; ++i) {
+ pde_t entry = *ptr++;
+ if (!IS_PRESENT(entry)) {
+ ++n_np;
+ continue;
+ }
+ if (dir && IS_LARGE(entry)) {
+ ++n_lg;
+ } else {
+ ++n_p;
+ }
}
- // get the entry
- pte_t entry = *pte;
+ // n_lg will be 0 for PMTs
+ return (n_lg << 16) | n_p;
+}
- // is this a valid address for the user?
- if (IS_PRESENT(entry)) {
- return NULL;
+// decode a PDE
+static void pde_prt(uint32_t level, uint32_t i, uint32_t entry)
+{
+ // indent
+ for (int n = 0; n <= level; ++n)
+ cio_puts(" ");
+ // line header
+ cio_printf("[%08x] %08x", i, entry);
+ // perms
+ if (IS_LARGE(entry)) { // PS is 1
+ if ((entry & PDE_PAT) != 0)
+ cio_puts(" PAT");
+ if ((entry & PDE_G) != 0)
+ cio_puts(" G");
+ cio_puts(" PS");
+ if ((entry & PDE_D) != 0)
+ cio_puts(" D");
}
+ if ((entry & PDE_A) != 0)
+ cio_puts(" A");
+ if ((entry & PDE_PCD) != 0)
+ cio_puts(" CD");
+ if ((entry & PDE_PWT) != 0)
+ cio_puts(" WT");
+ if ((entry & PDE_US) != 0)
+ cio_puts(" U");
+ if ((entry & PDE_RW) != 0)
+ cio_puts(" W");
+ cio_puts((entry & PDE_P) != 0 ? " P" : "!P");
- // is this a system-only page?
- if (IS_SYSTEM(entry)) {
- return NULL;
+ cio_printf(" --> %s %08x", IS_LARGE(entry) ? "Pg" : "PT", PDE_ADDR(entry));
+}
+
+// decode a PTE
+static void pte_prt(uint32_t level, uint32_t i, uint32_t entry)
+{
+ // indent
+ for (int n = 0; n <= level; ++n)
+ cio_puts(" ");
+ // line header
+ cio_printf("[%08x] %08x", i, entry);
+ // perms
+ if ((entry & PDE_G) != 0)
+ cio_puts(" G");
+ if ((entry & PDE_PAT) != 0)
+ cio_puts(" PAT");
+ if ((entry & PDE_D) != 0)
+ cio_puts(" D");
+ if ((entry & PDE_A) != 0)
+ cio_puts(" A");
+ if ((entry & PDE_PCD) != 0)
+ cio_puts(" CD");
+ if ((entry & PDE_PWT) != 0)
+ cio_puts(" WT");
+ if ((entry & PDE_US) != 0)
+ cio_puts(" U");
+ if ((entry & PDE_RW) != 0)
+ cio_puts(" W");
+ cio_puts((entry & PDE_P) != 0 ? " P" : "!P");
+
+ cio_printf(" --> Pg %08x", PTE_ADDR(entry));
+}
+
+/**
+** Name: pdump
+**
+** Recursive helper for table hierarchy dump.
+**
+** @param level Current hierarchy level
+** @param pt Page table to display
+** @param dir Is it a page directory (vs. a page table)?
+** @param mode How to display the entries
+*/
+ATTR_UNUSED
+static void pdump(uint_t level, void *pt, bool_t dir, enum vmmode_e mode)
+{
+ pte_t *ptr = (pte_t *)pt;
+
+ cio_printf("? at 0x%08x:", dir ? "PDir" : "PTbl", (uint32_t)pt);
+ uint32_t nums = ptcount(ptr, dir);
+ if (dir) {
+ cio_printf(" %u 4MB", (nums >> 16));
}
+ cio_printf(" %u P %u !P\n", nums & 0xffff,
+ N_PTE - ((nums >> 16) + (nums & 0xffff)));
- // get the physical address
- uint32_t frame = PTE_ADDR(*pte) | PERMS(va);
+ for (uint32_t i = 0; i < (uint32_t)N_PTE; ++i) {
+ pte_t entry = *ptr;
+ if (dir) {
+ // this is a PDIR entry; could be either a 4MB
+ // page, or a PMT pointer
+ if (mode > Simple) {
+ pde_prt(level, i, entry);
+ cio_putchar('\n');
+ if (!IS_LARGE(entry)) {
+ pdump(level + 1, (void *)*ptr, false, mode);
+ }
+ }
+ } else {
+ // just a PMT entry
+ if (mode > Simple) {
+ pte_prt(level, i, entry);
+ cio_putchar('\n');
+ }
+ }
- return (void *)P2V(frame);
+ // move to the next entry
+ ++ptr;
+ }
}
/**
-** Name: ptdump
+** Name: pmt_dump
**
** Dump the non-zero entries of a page table or directory
**
@@ -105,7 +215,8 @@ static void *uva2kva(pde_t *pdir, void *va)
** @param start First entry to process
** @param num Number of entries to process
*/
-static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num)
+ATTR_UNUSED
+static void pmt_dump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num)
{
cio_printf("\n\nP%c dump", dir ? 'D' : 'T');
cio_printf(" of %08x", (uint32_t)pt);
@@ -169,7 +280,23 @@ void vm_init(void)
assert(kpdir != NULL);
#if TRACING_VM
- cio_printf("vm_init: kpdir is %08x\n", kpdir);
+ cio_printf("vm_init: kpdir %08x, adding user pages\n", kpdir);
+#endif
+
+ // add the entries for the user address space
+ for (uint32_t addr = 0; addr < NUM_4MB; addr += SZ_PAGE) {
+ int stat = vm_map(kpdir, (void *)addr, addr, SZ_PAGE, PTE_RW);
+ if (stat != SUCCESS) {
+ cio_printf("vm_init, map %08x->%08x failed, status %d\n", addr,
+ addr, stat);
+ PANIC(0, "vm_init user range map failed");
+ }
+#if TRACING_VM
+ cio_putchar('.');
+#endif
+ }
+#if TRACING_VM
+ cio_puts(" done\n");
#endif
// switch to it
@@ -184,6 +311,44 @@ void vm_init(void)
}
/**
+** Name: vm_uva2kva
+**
+** Convert a user VA into a kernel address. Works for all addresses -
+** if the address is a page address, the low-order nine bits will be
+** zeroes; otherwise, they is the offset into the page, which is
+** unchanged within the address spaces.
+**
+** @param pdir Pointer to the page directory to examine
+** @param va Virtual address to check
+*/
+void *vm_uva2kva(pde_t *pdir, void *va)
+{
+ // find the PMT entry for this address
+ pte_t *pte = vm_getpte(pdir, va, false);
+ if (pte == NULL) {
+ return NULL;
+ }
+
+ // get the entry
+ pte_t entry = *pte;
+
+ // is this a valid address for the user?
+ if (IS_PRESENT(entry)) {
+ return NULL;
+ }
+
+ // is this a system-only page?
+ if (IS_SYSTEM(entry)) {
+ return NULL;
+ }
+
+ // get the physical address
+ uint32_t frame = PTE_ADDR(*pte) | PERMS(va);
+
+ return (void *)P2V(frame);
+}
+
+/**
** Name: vm_pagedup
**
** Duplicate a page of memory
@@ -206,46 +371,45 @@ void *vm_pagedup(void *old)
**
** Duplicate a page directory entry
**
-** @param dst Pointer to where the duplicate should go
-** @param curr Pointer to the entry to be duplicated
+** @param entry The entry to be duplicated
**
-** @return true on success, else false
+** @return the new entry, or -1 on error
*/
-bool_t vm_pdedup(pde_t *dst, pde_t *curr)
+pde_t vm_pdedup(pde_t entry)
{
- assert1(curr != NULL);
- assert1(dst != NULL);
-
#if TRACING_VM
- cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr);
+ cio_printf("vm_pdedup curr %08x\n", (uint32_t)entry);
#endif
- pde_t entry = *curr;
// simplest case
if (!IS_PRESENT(entry)) {
- *dst = 0;
- return true;
+ return 0;
}
- // OK, we have an entry; allocate a page table for it
- pte_t *newtbl = (pte_t *)km_page_alloc();
- if (newtbl == NULL) {
- return false;
+ // is this a large page?
+ if (IS_LARGE(entry)) {
+ // just copy it
+ return entry;
}
- // we could clear the new table, but we'll be assigning to
- // each entry anyway, so we'll save the execution time
+ // OK, we have a 4KB entry; allocate a page table for it
+ pte_t *tblva = (pte_t *)km_page_alloc();
+ if (tblva == NULL) {
+ return (uint32_t)-1;
+ }
- // address of the page table for this directory entry
- pte_t *old = (pte_t *)PDE_ADDR(entry);
+ // make sure the entries are all initially 'not present'
+ memclr(tblva, SZ_PAGE);
- // pointer to the first PTE in the new table
- pte_t *new = newtbl;
+ // VA of the page table for this directory entry
+ pte_t *old = (pte_t *)P2V(PDE_ADDR(entry));
+
+ // pointer to the first PTE in the new table (already a VA)
+ pte_t *new = tblva;
for (int i = 0; i < N_PTE; ++i) {
- if (!IS_PRESENT(*old)) {
- *new = 0;
- } else {
+ // only need to copy 'present' entries
+ if (IS_PRESENT(*old)) {
*new = *old;
}
++old;
@@ -253,10 +417,8 @@ bool_t vm_pdedup(pde_t *dst, pde_t *curr)
}
// replace the page table address
- // upper 22 bits from 'newtbl', lower 12 from '*curr'
- *dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry));
-
- return true;
+ // (PA of page table, lower 12 bits from '*curr')
+ return (pde_t)(V2P(PTE_ADDR(tblva)) | PERMS(entry));
}
/**
@@ -282,8 +444,7 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
assert1(pdir != NULL);
// get the PDIR entry for this virtual address
- uint32_t ix = PDIX(va);
- pde_t *pde_ptr = &pdir[ix];
+ pde_t *pde_ptr = &pdir[PDIX(va)];
// is it already set up?
if (IS_PRESENT(*pde_ptr)) {
@@ -319,10 +480,8 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
*pde_ptr = V2P(ptbl) | PDE_P | PDE_RW;
}
- // finally, return a pointer to the entry in the
- // page table for this VA
- ix = PTIX(va);
- return &ptbl[ix];
+ // finally, return a pointer to the entry in the page table for this VA
+ return &ptbl[PTIX(va)];
}
// Set up kernel part of a page table.
@@ -337,7 +496,7 @@ pde_t *vm_mkkvm(void)
}
#if 0 && TRACING_VM
cio_puts( "\nEntering vm_mkkvm\n" );
- ptdump( pdir, true, 0, N_PDE );
+ pmt_dump( pdir, true, 0, N_PDE );
#endif
// clear it out to disable all the entries
@@ -361,8 +520,8 @@ pde_t *vm_mkkvm(void)
}
#if 0 && TRACING_VM
cio_puts( "\nvm_mkkvm() final PD:\n" );
- ptdump( pdir, true, 0, 16 );
- ptdump( pdir, true, 0x200, 16 );
+ pmt_dump( pdir, true, 0, 16 );
+ pmt_dump( pdir, true, 0x200, 16 );
#endif
return pdir;
@@ -382,19 +541,26 @@ pde_t *vm_mkuvm(void)
return NULL;
}
- // iterate through the kernel page directory
- pde_t *curr = kpdir;
- pde_t *dst = new;
- for (int i = 0; i < N_PDE; ++i) {
+ // iterate through the 'system' portions of the kernel
+ // page directory
+ int i = PDIX(KERN_BASE);
+ pde_t *curr = &kpdir[i];
+ pde_t *dst = &new[i];
+ while (i < N_PDE) {
if (*curr != 0) {
// found an active one - duplicate it
- if (!vm_pdedup(dst, curr)) {
+ pde_t entry = vm_pdedup(*curr);
+ if (entry == (uint32_t)-1) {
return NULL;
}
+ *dst = entry;
+ } else {
+ *dst = 0;
}
++curr;
++dst;
+ ++i;
}
return new;
@@ -481,7 +647,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// figure out where this page will go in the hierarchy
pte_t *pte = vm_getpte(pdir, va, true);
if (pte == NULL) {
- // TODO if i > 0, this isn't the first frame - is
+ // if i > 0, this isn't the first frame - is
// there anything to do about other frames?
// POSSIBLE MEMORY LEAK?
return E_NO_MEMORY;
@@ -490,7 +656,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// allocate the frame
void *page = km_page_alloc();
if (page == NULL) {
- // TODO same question here
+ // same question here
return E_NO_MEMORY;
}
@@ -498,7 +664,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
memclr(page, SZ_PAGE);
// create the PTE for this frame
- uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase);
+ uint32_t entry = (uint32_t)(PTE_ADDR(V2P(page)) | entrybase);
*pte = entry;
// copy data if we need to
@@ -506,7 +672,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// how much to copy
uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes;
// do it!
- memcpy((void *)page, (void *)data, num);
+ memmove((void *)page, (void *)data, num);
// adjust all the pointers
data += num; // where to continue
bytes -= num; // what's left to copy
@@ -644,7 +810,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
PDIX(addr), PTIX(addr));
// dump the directory
- ptdump(pdir, true, PDIX(addr), 4);
+ pmt_dump(pdir, true, PDIX(addr), 4);
// find the relevant PDE entry
uint32_t ix = PDIX(va);
@@ -653,7 +819,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
// round the PMT index down
uint32_t ix2 = PTIX(va) & MOD4_MASK;
// dump the PMT for the relevant directory entry
- ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4);
+ pmt_dump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4);
}
#endif
PANIC(0, "mapping an already-mapped address");
@@ -735,3 +901,26 @@ int vm_uvmdup(pde_t *new, pde_t *old)
return SUCCESS;
}
+
+/**
+** Name: vm_print
+**
+** Print out a paging hierarchy.
+**
+** @param pt Page table to display
+** @param dir Is it a page directory (vs. a page table)?
+** @param mode How to display the entries
+*/
+void vm_print(void *pt, bool_t dir, enum vmmode_e mode)
+{
+ cio_puts("\nVM hierarchy");
+ if (pt == NULL) {
+ cio_puts(" (NULL pointer)\n");
+ return;
+ }
+
+ cio_printf("Starting at 0x%08x (%s):\n", (uint32_t)pt,
+ dir ? "PDIR" : "PMT");
+
+ pdump(0, pt, dir, mode);
+}