diff options
author | Freya Murphy <freya@freyacat.org> | 2025-03-31 12:41:04 -0400 |
---|---|---|
committer | Freya Murphy <freya@freyacat.org> | 2025-03-31 12:41:04 -0400 |
commit | 4dc44e8fce222a21f5eb8b86ecb226855c34999b (patch) | |
tree | 83e42cbd2cfde437a35216e9c1ee58c0af995e94 /kernel | |
parent | track changes (diff) | |
download | comus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.tar.gz comus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.tar.bz2 comus-4dc44e8fce222a21f5eb8b86ecb226855c34999b.zip |
track upstream
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/isrs.S | 16 | ||||
-rw-r--r-- | kernel/kernel.c | 34 | ||||
-rw-r--r-- | kernel/kernel.ld | 7 | ||||
-rw-r--r-- | kernel/procs.c | 43 | ||||
-rw-r--r-- | kernel/startup.S | 24 | ||||
-rw-r--r-- | kernel/syscalls.c | 2 | ||||
-rw-r--r-- | kernel/user.c | 261 | ||||
-rw-r--r-- | kernel/vm.c | 335 |
8 files changed, 500 insertions, 222 deletions
diff --git a/kernel/isrs.S b/kernel/isrs.S index ec539f2..f5fdbca 100644 --- a/kernel/isrs.S +++ b/kernel/isrs.S @@ -19,6 +19,8 @@ # .arch i386 #include <bootstrap.h> +#include <offsets.h> +#include <vm.h> /* ** Configuration options - define in Makefile @@ -100,8 +102,8 @@ isr_save: ** ** Set up parameters for the ISR call. */ - movl 52(%esp),%eax // get vector number and error code - movl 56(%esp),%ebx + movl CTX_vector(%esp),%eax // get vector number and error code + movl CTX_code(%esp),%ebx /* *********************** @@ -120,11 +122,12 @@ isr_save: // save the context pointer movl current, %edx - movl %esp, (%edx) + movl %esp, PCB_context(%edx) // also save the page directory pointer movl %cr3, %ecx - movl %ecx, 4(%edx) + addl $KERN_BASE, %ecx // convert to a virtual address + movl %ecx, PCB_pdir(%edx) // switch to the system stack // @@ -166,8 +169,9 @@ isr_restore: *********************** */ movl current, %ebx // return to the user stack - movl (%ebx), %esp // ESP --> context save area - movl 4(%ebx), %ecx // page directory pointer + movl PCB_context(%ebx), %esp // ESP --> context save area + movl PCB_pdir(%ebx), %ecx // page directory pointer + subl $KERN_BASE, %ecx // convert to a physical address movl %ecx, %cr3 // now we're operating with the user process' diff --git a/kernel/kernel.c b/kernel/kernel.c index 1b940df..44a8eee 100644 --- a/kernel/kernel.c +++ b/kernel/kernel.c @@ -194,11 +194,11 @@ static void stats(int code) case 'q': // dump the queues // code to dump out any/all queues - pcb_queue_dump("R", ready); - pcb_queue_dump("W", waiting); - pcb_queue_dump("S", sleeping); - pcb_queue_dump("Z", zombie); - pcb_queue_dump("I", sioread); + pcb_queue_dump("R", ready, true); + pcb_queue_dump("W", waiting, true); + pcb_queue_dump("S", sleeping, true); + pcb_queue_dump("Z", zombie, true); + pcb_queue_dump("I", sioread, true); break; case 'r': // print system configuration information @@ -309,7 +309,7 @@ int main(void) // report our configuration options kreport(true); - delay(DELAY_3_SEC); + delay(DELAY_2_SEC); /* ** Other tasks typically performed here: @@ -341,10 +341,11 @@ int main(void) const char *args[2] = { "init", NULL }; // load it - assert(user_load(prog, init_pcb, args) == SUCCESS); + assert(user_load(prog, init_pcb, args, true) == SUCCESS); // send it on its merry way schedule(init_pcb); + dispatch(); #ifdef TRACE_CX // if we're using a scrolling region, wait a bit more and then set it up @@ -366,9 +367,6 @@ int main(void) "================================================================================"); #endif - // switch to the "real" kernel page directory - vm_set_kvm(); - /* ** END OF TERM-SPECIFIC CODE ** @@ -380,5 +378,21 @@ int main(void) sio_enable(SIO_RX); + // produce a "system state" report + cio_puts("System status: Queues "); + pcb_queue_dump("R", ready, true); + pcb_queue_dump("W", waiting, true); + pcb_queue_dump("S", sleeping, true); + pcb_queue_dump("Z", zombie, true); + pcb_queue_dump("I", sioread, true); + ptable_dump_counts(); + pcb_dump("Current: ", current, true); + + delay(DELAY_3_SEC); + + vm_print(current->pdir, true, TwoLevel); + + delay(DELAY_3_SEC); + return 0; } diff --git a/kernel/kernel.ld b/kernel/kernel.ld index 3167327..83f211c 100644 --- a/kernel/kernel.ld +++ b/kernel/kernel.ld @@ -26,6 +26,7 @@ SECTIONS } /* Could put STABs here */ + /* .stab : { PROVIDE(__STAB_BEGIN__ = .); *(.stab); @@ -36,6 +37,7 @@ SECTIONS *(.stabstr); PROVIDE(__STABSTR_END__ = .); } + */ /* Align the data segment at the next page boundary */ . = ALIGN(0x1000); @@ -65,7 +67,8 @@ SECTIONS PROVIDE(_end = .); /DISCARD/ : { - /* *(.stab .stab_info .stabstr) */ - *(.eh_frame .note.GNU-stack .note.gnu.property .comment) + *(.stab .stab_info .stabstr) + *(.eh_frame .eh_frame_hdr) + *(.note.GNU-stack .note.gnu.property .comment) } } diff --git a/kernel/procs.c b/kernel/procs.c index 88589d0..20e6784 100644 --- a/kernel/procs.c +++ b/kernel/procs.c @@ -74,7 +74,7 @@ uint_t next_pid; pcb_t *init_pcb; // table of state name strings -const char *state_str[N_STATES] = { +const char state_str[N_STATES][4] = { [STATE_UNUSED] = "Unu", // "Unused" [STATE_NEW] = "New", [STATE_READY] = "Rdy", // "Ready" @@ -87,16 +87,16 @@ const char *state_str[N_STATES] = { }; // table of priority name strings -const char *prio_str[N_PRIOS] = { [PRIO_HIGH] = "High", - [PRIO_STD] = "User", - [PRIO_LOW] = "Low ", - [PRIO_DEFERRED] = "Def " }; +const char prio_str[N_PRIOS][5] = { [PRIO_HIGH] = "High", + [PRIO_STD] = "User", + [PRIO_LOW] = "Low ", + [PRIO_DEFERRED] = "Def " }; // table of queue ordering name strings -const char *ord_str[N_PRIOS] = { [O_FIFO] = "FIFO", - [O_PRIO] = "PRIO", - [O_PID] = "PID ", - [O_WAKEUP] = "WAKE" }; +const char ord_str[N_PRIOS][5] = { [O_FIFO] = "FIFO", + [O_PRIO] = "PRIO", + [O_PID] = "PID ", + [O_WAKEUP] = "WAKE" }; /* ** PRIVATE FUNCTIONS @@ -939,7 +939,7 @@ void ctx_dump_all(const char *msg) } /** -** _pcb_dump(msg,pcb) +** pcb_dump(msg,pcb,all) ** ** Dumps the contents of this PCB to the console ** @@ -964,7 +964,15 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all) } cio_printf(" %d", pcb->pid); + cio_printf(" %s", pcb->state >= N_STATES ? "???" : state_str[pcb->state]); +#if 0 + if( pcb->state >= N_STATES ) { + cio_puts( " ????" ); + } else { + cio_printf( " %s", state_str[pcb->state] ); + } +#endif if (!all) { // just printing IDs and states on one line @@ -974,6 +982,13 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all) // now, the rest of the contents cio_printf(" %s", pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority]); +#if 0 + if( pcb->priority >= N_PRIOS ) { + cio_puts( " ???" ); + } else { + cio_printf( " %s", prio_str[pcb->priority] ); + } +#endif cio_printf(" ticks %u xit %d wake %08x\n", pcb->ticks, pcb->exit_status, pcb->wakeup); @@ -1111,6 +1126,14 @@ void ptable_dump_counts(void) for (n = 0; n < N_STATES; ++n) { cio_printf(" %u %s", nstate[n], state_str[n] != NULL ? state_str[n] : "???"); +#if 0 + cio_printf( " %u ", nstate[n] ); + if( state_str[n][0] != '\0' ) { + cio_puts( state_str[n] ); + } else { + cio_puts( "???" ); + } +#endif } cio_putchar('\n'); } diff --git a/kernel/startup.S b/kernel/startup.S index 73a081e..94b93b0 100644 --- a/kernel/startup.S +++ b/kernel/startup.S @@ -33,18 +33,18 @@ /* ** A symbol for locating the beginning of the code. */ - .globl begtext - .text -begtext: + + .globl begtext + .globl _start +_start = V2PNC(begtext) /* ** The entry point. When we get here, we have just entered protected ** mode, so all the segment registers are incorrect except for CS. */ - .globl _start +begtext: -_start: cli /* seems to be reset on entry to p. mode */ movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */ outb $CMOS_ADDR /* turned them off) */ @@ -110,10 +110,18 @@ clearbss: # set the initial frame pointer xorl %ebp, %ebp + # now, jump and switch into using high addresses + # we use an indirect jump here because the assembler + # would ordinarily generate a PC-relative target + # address for the jump, which would not have the + # desired effect + movl $onward, %eax + jmp *%eax + +onward: + /* -** Call the system initialization routine, and switch to -** executing at high addresses. We use an indirect jump -** here to avoid getting a PC-relative 'jmp' instruction. +** Call the system initialization routine. ** ** Alternate idea: push the address of isr_restore ** and just do an indirect jump? diff --git a/kernel/syscalls.c b/kernel/syscalls.c index 0653c93..92a0a23 100644 --- a/kernel/syscalls.c +++ b/kernel/syscalls.c @@ -354,7 +354,7 @@ SYSIMPL(exec) pcb->pdir = NULL; // "load" it and set up the VM tables for this process - int status = user_load(prog, pcb, args); + int status = user_load(prog, pcb, args, false); if (status != SUCCESS) { RET(pcb) = status; SYSCALL_EXIT(status); diff --git a/kernel/user.c b/kernel/user.c index e9398ff..0e5e186 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -426,44 +426,27 @@ static int read_phdrs(elfhdr_t *hdr, pcb_t *pcb) ** @param pcb Pointer to the PCB for the process ** @param entry Entry point for the new process ** @param args Argument vector to be put in place +** @param sys Is the argument vector from kernel code? ** -** @return A pointer to the context_t on the stack, or NULL +** @return A (user VA) pointer to the context_t on the stack, or NULL */ -static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) +static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args, + bool_t sys) { #if TRACING_USER - cio_printf("stksetup: pcb %08x, entry %08, args %08x\n", (uint32_t)pcb, + cio_printf("stksetup: pcb %08x, entry %08x, args %08x\n", (uint32_t)pcb, entry, (uint32_t)args); #endif /* - ** First, we need to count the space we'll need for the argument + ** First, we need to calculate the space we'll need for the argument ** vector and strings. - */ - - int argbytes = 0; - int argc = 0; - - while (args[argc] != NULL) { - int n = strlen(args[argc]) + 1; - // can't go over one page in size - if ((argbytes + n) > SZ_PAGE) { - // oops - ignore this and any others - break; - } - argbytes += n; - ++argc; - } - - // Round up the byte count to the next multiple of four. - argbytes = (argbytes + 3) & MOD4_MASK; - - /* - ** Allocate the arrays. We are safe using dynamic arrays here - ** because we're using the OS stack, not the user stack. ** - ** We want the argstrings and argv arrays to contain all zeroes. - ** The C standard states, in section 6.7.8, that + ** Keeping track of kernel vs. user VAs is tricky, so we'll use + ** a prefix on variable names: kv_* is a kernel virtual address; + ** uv_* is a user virtual address. + ** + ** We rely on the C standard, section 6.7.8, to clear these arrays: ** ** "21 If there are fewer initializers in a brace-enclosed list ** than there are elements or members of an aggregate, or @@ -472,31 +455,36 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** the remainder of the aggregate shall be initialized ** implicitly the same as objects that have static storage ** duration." - ** - ** Sadly, because we're using variable-sized arrays, we can't - ** rely on this, so we have to call memclr() instead. :-( In - ** truth, it doesn't really cost us much more time, but it's an - ** annoyance. */ - char argstrings[argbytes]; - char *argv[argc + 1]; + int argbytes = 0; // total length of arg strings + int argc = 0; // number of argv entries + const char *kv_strs[N_ARGS] = { 0 }; // converted user arg string pointers + int strlengths[N_ARGS] = { 0 }; // length of each string + const char *uv_argv[N_ARGS] = { 0 }; // argv pointers - CLEAR(argstrings); - CLEAR(argv); + /* + ** IF the argument list given to us came from user code, we need + ** to convert its address and the addresses it contains to kernel + ** VAs; otherwise, we can use them directly. + */ + char **kv_args = sys ? args : vm_uva2kva(pcb->pdir, (void *)args); - // Next, duplicate the argument strings, and create pointers to - // each one in our argv. - char *tmp = argstrings; - for (int i = 0; i < argc; ++i) { - int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string - strcpy(tmp, args[i]); // add to our buffer - argv[i] = tmp; // remember where it was - tmp += nb; // move on + while (kv_args[argc] != NULL) { + kv_strs[argc] = sys ? args[argc] : + vm_uva2kva(pcb->pdir, (void *)(kv_args[argc])); + strlengths[argc] = strlen(kv_strs[argc]) + 1; + // can't go over one page in size + if ((argbytes + strlengths[argc]) > SZ_PAGE) { + // oops - ignore this and any others + break; + } + argbytes += strlengths[argc]; + ++argc; } - // trailing NULL pointer - argv[argc] = NULL; + // Round up the byte count to the next multiple of four. + argbytes = (argbytes + 3) & MOD4_MASK; /* ** The pages for the stack were cleared when they were allocated, @@ -524,32 +512,44 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** see below for more information. */ - // Pointer to the last word in stack. We get this from the - // VM hierarchy. Get the PDE entry for the user address space. - pde_t stack_pde = pcb->pdir[USER_PDE]; + /* + ** Find the user stack. The PDE entry for user address space points + ** to a page table for the first 4MB of the address space, but the + ** "pointer" there a physical frame address. + */ + pde_t *kv_userpt = (pde_t *)P2V(PTE_ADDR(pcb->pdir[USER_PDE])); + assert(kv_userpt != NULL); - // The PDE entry points to the PT, which is an array of PTE. The last - // two entries are for the stack; pull out the last one. - pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2]; + /* + ** The final entries in that PMT are for the pages of the user stack. + ** Grab the address of the frame for the last one. (Again, we need + ** to convert it to a virtual address we can use.) + */ + + // the PMT entry for that page + pte_t pmt_entry = kv_userpt[USER_STK_LAST_PTE]; + assert(IS_PRESENT(pmt_entry)); + + // kernel VA for the first byte following that page + uint8_t *kv_ptr = (uint8_t *)P2V(PTE_ADDR(pmt_entry) + SZ_PAGE); + assert(kv_ptr != NULL); - // OK, now we have the PTE. The frame address of the last page is - // in this PTE. Find the address immediately after that. - uint32_t *ptr = (uint32_t *)((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE); + // user VA for the first byte following that page + uint32_t *uv_ptr = (uint32_t *)(USER_STACK_P2 + SZ_PAGE); - // Pointer to where the arg strings should be filled in. - char *strings = (char *)((uint32_t)ptr - argbytes); + // Pointers to where the arg strings should be filled in. + uint32_t kv_strings = ((uint32_t)kv_ptr) - argbytes; + uint32_t uv_strings = ((uint32_t)uv_ptr) - argbytes; - // back the pointer up to the nearest word boundary; because we're + // back the pointers up to the nearest word boundary; because we're // moving toward location 0, the nearest word boundary is just the // next smaller address whose low-order two bits are zeroes - strings = (char *)((uint32_t)strings & MOD4_MASK); - - // Copy over the argv strings. - memcpy((void *)strings, argstrings, argbytes); + kv_strings &= MOD4_MASK; + uv_strings &= MOD4_MASK; /* - ** Next, we need to copy over the argv pointers. Start by - ** determining where 'argc' should go. + ** Next, we need to copy over the data. Start by determining where + ** where 'argc' should go. ** ** Stack alignment is controlled by the SysV ABI i386 supplement, ** version 1.2 (June 23, 2016), which states in section 2.2.2: @@ -563,7 +563,7 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** ** Isn't technical documentation fun? Ultimately, this means that ** the first parameter to main() should be on the stack at an address - ** that is a multiple of 16. + ** that is a multiple of 16. In our case, that is 'argc'. ** ** The space needed for argc, argv, and the argv array itself is ** argc + 3 words (argc+1 for the argv entries, plus one word each @@ -571,45 +571,66 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) */ int nwords = argc + 3; - uint32_t *acptr = ((uint32_t *)strings) - nwords; + uint32_t *kv_acptr = ((uint32_t *)kv_strings) - nwords; + uint32_t *uv_acptr = ((uint32_t *)uv_strings) - nwords; - /* - ** Next, back up until we're at a multiple-of-16 address. Because we - ** are moving to a lower address, its upper 28 bits are identical to - ** the address we currently have, so we can do this with a bitwise - ** AND to just turn off the lower four bits. - */ + // back these up to multiple-of-16 addresses for stack alignment + kv_acptr = (uint32_t *)(((uint32_t)kv_acptr) & MOD16_MASK); + uv_acptr = (uint32_t *)(((uint32_t)uv_acptr) & MOD16_MASK); - acptr = (uint32_t *)(((uint32_t)acptr) & MOD16_MASK); + // the argv location + uint32_t *kv_avptr = kv_acptr + 1; - // copy in 'argc' - *acptr = argc; + // the user address for the first argv entry + uint32_t *uv_avptr = uv_acptr + 2; - // next, 'argv', which follows 'argc'; 'argv' points to the - // word that follows it in the stack - uint32_t *avptr = acptr + 2; - *(acptr + 1) = (uint32_t)avptr; + // Copy over the argv strings. + for (int i = 0; i < argc; ++i) { + // copy the string using kernel addresses + strcpy((char *)kv_strings, kv_args[i]); + + // remember the user address where this string went + uv_argv[i] = (char *)uv_strings; + + // adjust both string addresses + kv_strings += strlengths[i]; + uv_strings += strlengths[i]; + } /* - ** Next, we copy in all argc+1 pointers. + ** Next, we copy in argc, argv, and the pointers. The stack will + ** look something like this: + ** + ** kv_avptr + ** kv_acptr | + ** | | + ** v v + ** argc argv av[0] av[1] etc NULL str0 str1 etc. + ** [....][....][....][....] ... [0000] ... [......0......0.........] + ** | ^ | | ^ ^ + ** | | | | | | + ** ------ | ---------------------|------- + ** --------------------------- */ - // Adjust and copy the string pointers. - for (int i = 0; i <= argc; ++i) { - if (argv[i] != NULL) { - // an actual pointer - adjust it and copy it in - *avptr = (uint32_t)strings; - // skip to the next entry in the array - strings += strlen(argv[i]) + 1; - } else { - // end of the line! - *avptr = NULL; - } - ++avptr; + // copy in 'argc' + *kv_acptr = argc; + + // copy in 'argv' + *kv_avptr++ = (uint32_t)uv_avptr; + + // now, the argv entries themselves + for (int i = 0; i < argc; ++i) { + *kv_avptr++ = (uint32_t)uv_argv[i]; } + // and the trailing NULL + *kv_avptr = NULL; + /* - ** Now, we need to set up the initial context for the executing + ** Almost done! + ** + ** Now we need to set up the initial context for the executing ** process. ** ** When this process is dispatched, the context restore code will @@ -618,27 +639,34 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** the interrupt "returns" to the entry point of the process. */ - // Locate the context save area on the stack. - context_t *ctx = ((context_t *)avptr) - 1; + // Locate the context save area on the stack by backup up one + // "context" from where the argc value is saved + context_t *kv_ctx = ((context_t *)kv_acptr) - 1; + uint32_t uv_ctx = (uint32_t)(((context_t *)uv_acptr) - 1); /* ** We cleared the entire stack earlier, so all the context ** fields currently contain zeroes. We now need to fill in ** all the important fields. + ** + ** Note: we don't need to set the ESP value for the process, + ** as the 'popa' that restores the general registers doesn't + ** actually restore ESP from the context area - it leaves it + ** where it winds up. */ - ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0 - ctx->eip = entry; // initial EIP - ctx->cs = GDT_CODE; // segment registers - ctx->ss = GDT_STACK; - ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA; + kv_ctx->eflags = DEFAULT_EFLAGS; // IF enabled, IOPL 0 + kv_ctx->eip = entry; // initial EIP + kv_ctx->cs = GDT_CODE; // segment registers + kv_ctx->ss = GDT_STACK; + kv_ctx->ds = kv_ctx->es = kv_ctx->fs = kv_ctx->gs = GDT_DATA; /* ** Return the new context pointer to the caller. It will be our ** caller's responsibility to schedule this process. */ - return (ctx); + return ((context_t *)uv_ctx); } /* @@ -809,10 +837,11 @@ int user_duplicate(pcb_t *new, pcb_t *old) ** @param ptab A pointer to the program table entry to be loaded ** @param pcb The PCB for the program being loaded ** @param args The argument vector for the program +** @param sys Is the argument vector from kernel code? ** ** @return the status of the load attempt */ -int user_load(prog_t *ptab, pcb_t *pcb, const char **args) +int user_load(prog_t *ptab, pcb_t *pcb, const char **args, bool_t sys) { // NULL pointers are bad! assert1(ptab != NULL); @@ -820,7 +849,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) assert1(args != NULL); #if TRACING_USER - cio_printf("uload: prog '%s' pcb %08x args %08x\n", + cio_printf("Uload: prog '%s' pcb %08x args %08x\n", ptab->name[0] ? ptab->name : "?", (uint32_t)pcb, (uint32_t)args); #endif @@ -832,8 +861,16 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) (uint32_t)ptab, ptab->name, ptab->offset, ptab->size, ptab->flags); cio_printf(" args %08x:", (uint32_t)args); - for (int i = 0; args[i] != NULL; ++i) { - cio_printf(" [%d] %s", i, args[i]); + if (sys) { + for (int i = 0; args[i] != NULL; ++i) { + cio_printf(" [%d] %s", i, args[i]); + } + } else { + char **kv_args = vm_uva2kva(pcb->pdir, args); + for (int i = 0; kv_args[i] != NULL; ++i) { + cio_printf(" [%d] %s", i, + (char *)vm_uva2kva(pcb->pdir, kv_args[i])); + } } cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid); dump_fhdr(hdr); @@ -853,8 +890,8 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) // read all the program headers int stat = read_phdrs(hdr, pcb); if (stat != SUCCESS) { - // TODO figure out a better way to deal with this - PANIC(0, "user_load: phdr read failed"); + cio_printf("Uload: read_phdrs('%s') returned %d\n", ptab->name, stat); + PANIC(0, "User_load: phdr read failed"); } // next, set up the runtime stack - just like setting up loadable @@ -862,12 +899,12 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) stat = vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0); if (stat != SUCCESS) { - // TODO yadda yadda... - PANIC(0, "user_load: vm_add failed"); + cio_printf("Uload: vm_add('%s') stack returned %d\n", ptab->name, stat); + PANIC(0, "user_load: vm_add stack failed"); } // set up the command-line arguments - pcb->context = stack_setup(pcb, hdr->e_entry, args); + pcb->context = stack_setup(pcb, hdr->e_entry, args, sys); return SUCCESS; } @@ -883,7 +920,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) void user_cleanup(pcb_t *pcb) { #if TRACING_USER - cio_printf("uclean: %08x\n", (uint32_t)pcb); + cio_printf("Uclean: %08x\n", (uint32_t)pcb); #endif if (pcb == NULL) { diff --git a/kernel/vm.c b/kernel/vm.c index 7d43bb8..a700bcb 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -58,45 +58,155 @@ static void vm_isr(int vector, int code) } /** -** Name: uva2kva +** Name: ptcount ** -** Convert a user VA into a kernel address. Works for all addresses - -** if the address is a page address, the PERMS(va) value will be 0; -** otherwise, it is the offset into the page. +** Count the number of each type of entry in a page table. +** Returns a 32-bit result containing two 16-bit counts: ** -** @param pdir Pointer to the page directory to examine -** @param va Virtual address to check +** Upper half Lower half +** PDIR: # of 4MB entries # of 'present' entries +** PMT: zero # of 'present' entries +** +** The number of "not present" can be calculated from these. +** +** @param pt Pointer to the page table +** @param dir Is it a page directory (vs. a page table)? */ ATTR_UNUSED -static void *uva2kva(pde_t *pdir, void *va) +static uint32_t ptcount(pte_t *ptr, bool_t dir) { - // find the PMT entry for this address - pte_t *pte = vm_getpte(pdir, va, false); - if (pte == NULL) { - return NULL; + uint16_t n_np = 0, n_p = 0, n_lg = 0; + + for (int i = 0; i < N_PTE; ++i) { + pde_t entry = *ptr++; + if (!IS_PRESENT(entry)) { + ++n_np; + continue; + } + if (dir && IS_LARGE(entry)) { + ++n_lg; + } else { + ++n_p; + } } - // get the entry - pte_t entry = *pte; + // n_lg will be 0 for PMTs + return (n_lg << 16) | n_p; +} - // is this a valid address for the user? - if (IS_PRESENT(entry)) { - return NULL; +// decode a PDE +static void pde_prt(uint32_t level, uint32_t i, uint32_t entry) +{ + // indent + for (int n = 0; n <= level; ++n) + cio_puts(" "); + // line header + cio_printf("[%08x] %08x", i, entry); + // perms + if (IS_LARGE(entry)) { // PS is 1 + if ((entry & PDE_PAT) != 0) + cio_puts(" PAT"); + if ((entry & PDE_G) != 0) + cio_puts(" G"); + cio_puts(" PS"); + if ((entry & PDE_D) != 0) + cio_puts(" D"); } + if ((entry & PDE_A) != 0) + cio_puts(" A"); + if ((entry & PDE_PCD) != 0) + cio_puts(" CD"); + if ((entry & PDE_PWT) != 0) + cio_puts(" WT"); + if ((entry & PDE_US) != 0) + cio_puts(" U"); + if ((entry & PDE_RW) != 0) + cio_puts(" W"); + cio_puts((entry & PDE_P) != 0 ? " P" : "!P"); - // is this a system-only page? - if (IS_SYSTEM(entry)) { - return NULL; + cio_printf(" --> %s %08x", IS_LARGE(entry) ? "Pg" : "PT", PDE_ADDR(entry)); +} + +// decode a PTE +static void pte_prt(uint32_t level, uint32_t i, uint32_t entry) +{ + // indent + for (int n = 0; n <= level; ++n) + cio_puts(" "); + // line header + cio_printf("[%08x] %08x", i, entry); + // perms + if ((entry & PDE_G) != 0) + cio_puts(" G"); + if ((entry & PDE_PAT) != 0) + cio_puts(" PAT"); + if ((entry & PDE_D) != 0) + cio_puts(" D"); + if ((entry & PDE_A) != 0) + cio_puts(" A"); + if ((entry & PDE_PCD) != 0) + cio_puts(" CD"); + if ((entry & PDE_PWT) != 0) + cio_puts(" WT"); + if ((entry & PDE_US) != 0) + cio_puts(" U"); + if ((entry & PDE_RW) != 0) + cio_puts(" W"); + cio_puts((entry & PDE_P) != 0 ? " P" : "!P"); + + cio_printf(" --> Pg %08x", PTE_ADDR(entry)); +} + +/** +** Name: pdump +** +** Recursive helper for table hierarchy dump. +** +** @param level Current hierarchy level +** @param pt Page table to display +** @param dir Is it a page directory (vs. a page table)? +** @param mode How to display the entries +*/ +ATTR_UNUSED +static void pdump(uint_t level, void *pt, bool_t dir, enum vmmode_e mode) +{ + pte_t *ptr = (pte_t *)pt; + + cio_printf("? at 0x%08x:", dir ? "PDir" : "PTbl", (uint32_t)pt); + uint32_t nums = ptcount(ptr, dir); + if (dir) { + cio_printf(" %u 4MB", (nums >> 16)); } + cio_printf(" %u P %u !P\n", nums & 0xffff, + N_PTE - ((nums >> 16) + (nums & 0xffff))); - // get the physical address - uint32_t frame = PTE_ADDR(*pte) | PERMS(va); + for (uint32_t i = 0; i < (uint32_t)N_PTE; ++i) { + pte_t entry = *ptr; + if (dir) { + // this is a PDIR entry; could be either a 4MB + // page, or a PMT pointer + if (mode > Simple) { + pde_prt(level, i, entry); + cio_putchar('\n'); + if (!IS_LARGE(entry)) { + pdump(level + 1, (void *)*ptr, false, mode); + } + } + } else { + // just a PMT entry + if (mode > Simple) { + pte_prt(level, i, entry); + cio_putchar('\n'); + } + } - return (void *)P2V(frame); + // move to the next entry + ++ptr; + } } /** -** Name: ptdump +** Name: pmt_dump ** ** Dump the non-zero entries of a page table or directory ** @@ -105,7 +215,8 @@ static void *uva2kva(pde_t *pdir, void *va) ** @param start First entry to process ** @param num Number of entries to process */ -static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num) +ATTR_UNUSED +static void pmt_dump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num) { cio_printf("\n\nP%c dump", dir ? 'D' : 'T'); cio_printf(" of %08x", (uint32_t)pt); @@ -169,7 +280,23 @@ void vm_init(void) assert(kpdir != NULL); #if TRACING_VM - cio_printf("vm_init: kpdir is %08x\n", kpdir); + cio_printf("vm_init: kpdir %08x, adding user pages\n", kpdir); +#endif + + // add the entries for the user address space + for (uint32_t addr = 0; addr < NUM_4MB; addr += SZ_PAGE) { + int stat = vm_map(kpdir, (void *)addr, addr, SZ_PAGE, PTE_RW); + if (stat != SUCCESS) { + cio_printf("vm_init, map %08x->%08x failed, status %d\n", addr, + addr, stat); + PANIC(0, "vm_init user range map failed"); + } +#if TRACING_VM + cio_putchar('.'); +#endif + } +#if TRACING_VM + cio_puts(" done\n"); #endif // switch to it @@ -184,6 +311,44 @@ void vm_init(void) } /** +** Name: vm_uva2kva +** +** Convert a user VA into a kernel address. Works for all addresses - +** if the address is a page address, the low-order nine bits will be +** zeroes; otherwise, they is the offset into the page, which is +** unchanged within the address spaces. +** +** @param pdir Pointer to the page directory to examine +** @param va Virtual address to check +*/ +void *vm_uva2kva(pde_t *pdir, void *va) +{ + // find the PMT entry for this address + pte_t *pte = vm_getpte(pdir, va, false); + if (pte == NULL) { + return NULL; + } + + // get the entry + pte_t entry = *pte; + + // is this a valid address for the user? + if (IS_PRESENT(entry)) { + return NULL; + } + + // is this a system-only page? + if (IS_SYSTEM(entry)) { + return NULL; + } + + // get the physical address + uint32_t frame = PTE_ADDR(*pte) | PERMS(va); + + return (void *)P2V(frame); +} + +/** ** Name: vm_pagedup ** ** Duplicate a page of memory @@ -206,46 +371,45 @@ void *vm_pagedup(void *old) ** ** Duplicate a page directory entry ** -** @param dst Pointer to where the duplicate should go -** @param curr Pointer to the entry to be duplicated +** @param entry The entry to be duplicated ** -** @return true on success, else false +** @return the new entry, or -1 on error */ -bool_t vm_pdedup(pde_t *dst, pde_t *curr) +pde_t vm_pdedup(pde_t entry) { - assert1(curr != NULL); - assert1(dst != NULL); - #if TRACING_VM - cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr); + cio_printf("vm_pdedup curr %08x\n", (uint32_t)entry); #endif - pde_t entry = *curr; // simplest case if (!IS_PRESENT(entry)) { - *dst = 0; - return true; + return 0; } - // OK, we have an entry; allocate a page table for it - pte_t *newtbl = (pte_t *)km_page_alloc(); - if (newtbl == NULL) { - return false; + // is this a large page? + if (IS_LARGE(entry)) { + // just copy it + return entry; } - // we could clear the new table, but we'll be assigning to - // each entry anyway, so we'll save the execution time + // OK, we have a 4KB entry; allocate a page table for it + pte_t *tblva = (pte_t *)km_page_alloc(); + if (tblva == NULL) { + return (uint32_t)-1; + } - // address of the page table for this directory entry - pte_t *old = (pte_t *)PDE_ADDR(entry); + // make sure the entries are all initially 'not present' + memclr(tblva, SZ_PAGE); - // pointer to the first PTE in the new table - pte_t *new = newtbl; + // VA of the page table for this directory entry + pte_t *old = (pte_t *)P2V(PDE_ADDR(entry)); + + // pointer to the first PTE in the new table (already a VA) + pte_t *new = tblva; for (int i = 0; i < N_PTE; ++i) { - if (!IS_PRESENT(*old)) { - *new = 0; - } else { + // only need to copy 'present' entries + if (IS_PRESENT(*old)) { *new = *old; } ++old; @@ -253,10 +417,8 @@ bool_t vm_pdedup(pde_t *dst, pde_t *curr) } // replace the page table address - // upper 22 bits from 'newtbl', lower 12 from '*curr' - *dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry)); - - return true; + // (PA of page table, lower 12 bits from '*curr') + return (pde_t)(V2P(PTE_ADDR(tblva)) | PERMS(entry)); } /** @@ -282,8 +444,7 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc) assert1(pdir != NULL); // get the PDIR entry for this virtual address - uint32_t ix = PDIX(va); - pde_t *pde_ptr = &pdir[ix]; + pde_t *pde_ptr = &pdir[PDIX(va)]; // is it already set up? if (IS_PRESENT(*pde_ptr)) { @@ -319,10 +480,8 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc) *pde_ptr = V2P(ptbl) | PDE_P | PDE_RW; } - // finally, return a pointer to the entry in the - // page table for this VA - ix = PTIX(va); - return &ptbl[ix]; + // finally, return a pointer to the entry in the page table for this VA + return &ptbl[PTIX(va)]; } // Set up kernel part of a page table. @@ -337,7 +496,7 @@ pde_t *vm_mkkvm(void) } #if 0 && TRACING_VM cio_puts( "\nEntering vm_mkkvm\n" ); - ptdump( pdir, true, 0, N_PDE ); + pmt_dump( pdir, true, 0, N_PDE ); #endif // clear it out to disable all the entries @@ -361,8 +520,8 @@ pde_t *vm_mkkvm(void) } #if 0 && TRACING_VM cio_puts( "\nvm_mkkvm() final PD:\n" ); - ptdump( pdir, true, 0, 16 ); - ptdump( pdir, true, 0x200, 16 ); + pmt_dump( pdir, true, 0, 16 ); + pmt_dump( pdir, true, 0x200, 16 ); #endif return pdir; @@ -382,19 +541,26 @@ pde_t *vm_mkuvm(void) return NULL; } - // iterate through the kernel page directory - pde_t *curr = kpdir; - pde_t *dst = new; - for (int i = 0; i < N_PDE; ++i) { + // iterate through the 'system' portions of the kernel + // page directory + int i = PDIX(KERN_BASE); + pde_t *curr = &kpdir[i]; + pde_t *dst = &new[i]; + while (i < N_PDE) { if (*curr != 0) { // found an active one - duplicate it - if (!vm_pdedup(dst, curr)) { + pde_t entry = vm_pdedup(*curr); + if (entry == (uint32_t)-1) { return NULL; } + *dst = entry; + } else { + *dst = 0; } ++curr; ++dst; + ++i; } return new; @@ -481,7 +647,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // figure out where this page will go in the hierarchy pte_t *pte = vm_getpte(pdir, va, true); if (pte == NULL) { - // TODO if i > 0, this isn't the first frame - is + // if i > 0, this isn't the first frame - is // there anything to do about other frames? // POSSIBLE MEMORY LEAK? return E_NO_MEMORY; @@ -490,7 +656,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // allocate the frame void *page = km_page_alloc(); if (page == NULL) { - // TODO same question here + // same question here return E_NO_MEMORY; } @@ -498,7 +664,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, memclr(page, SZ_PAGE); // create the PTE for this frame - uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase); + uint32_t entry = (uint32_t)(PTE_ADDR(V2P(page)) | entrybase); *pte = entry; // copy data if we need to @@ -506,7 +672,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // how much to copy uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes; // do it! - memcpy((void *)page, (void *)data, num); + memmove((void *)page, (void *)data, num); // adjust all the pointers data += num; // where to continue bytes -= num; // what's left to copy @@ -644,7 +810,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm) PDIX(addr), PTIX(addr)); // dump the directory - ptdump(pdir, true, PDIX(addr), 4); + pmt_dump(pdir, true, PDIX(addr), 4); // find the relevant PDE entry uint32_t ix = PDIX(va); @@ -653,7 +819,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm) // round the PMT index down uint32_t ix2 = PTIX(va) & MOD4_MASK; // dump the PMT for the relevant directory entry - ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4); + pmt_dump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4); } #endif PANIC(0, "mapping an already-mapped address"); @@ -735,3 +901,26 @@ int vm_uvmdup(pde_t *new, pde_t *old) return SUCCESS; } + +/** +** Name: vm_print +** +** Print out a paging hierarchy. +** +** @param pt Page table to display +** @param dir Is it a page directory (vs. a page table)? +** @param mode How to display the entries +*/ +void vm_print(void *pt, bool_t dir, enum vmmode_e mode) +{ + cio_puts("\nVM hierarchy"); + if (pt == NULL) { + cio_puts(" (NULL pointer)\n"); + return; + } + + cio_printf("Starting at 0x%08x (%s):\n", (uint32_t)pt, + dir ? "PDIR" : "PMT"); + + pdump(0, pt, dir, mode); +} |