diff --git a/include/defs.h b/include/defs.h index cb29fc3..018b14e 100644 --- a/include/defs.h +++ b/include/defs.h @@ -36,9 +36,6 @@ #define CHAN_CIO 0 #define CHAN_SIO 1 -// maximum allowable number of command-line arguments -#define MAX_ARGS 10 - // sizes of various things #define NUM_1KB 0x00000400 // 2^10 #define NUM_4KB 0x00001000 // 2^12 @@ -46,7 +43,7 @@ #define NUM_4MB 0x00400000 // 2^22 #define NUM_1GB 0x40000000 // 2^30 #define NUM_2GB 0x80000000 // 2^31 -#define NUM_3GB 0xc0000000 +#define NUM_3GB 0xc0000000 // 1GB + 2GB #ifndef ASM_SRC diff --git a/include/kdefs.h b/include/kdefs.h index 80ee3c7..d26fa3d 100644 --- a/include/kdefs.h +++ b/include/kdefs.h @@ -30,6 +30,11 @@ // declarations for modulus checking of (e.g.) sizes and addresses +#define LOW_9_BITS 0x00000fff +#define LOW_22_BITS 0x003fffff +#define HIGH_20_BITS 0xfffff000 +#define HIGH_10_BITS 0xffc00000 + #define MOD4_BITS 0x00000003 #define MOD4_MASK 0xfffffffc #define MOD4_INC 0x00000004 diff --git a/include/offsets.h b/include/offsets.h new file mode 100644 index 0000000..c0cc029 --- /dev/null +++ b/include/offsets.h @@ -0,0 +1,85 @@ +/** +** @file offsets.h +** +** GENERATED AUTOMATICALLY - DO NOT EDIT +** +** Creation date: Mon Mar 31 11:38:04 2025 +** +** This header file contains C Preprocessor macros which expand +** into the byte offsets needed to reach fields within structs +** used in the baseline system. Should those struct declarations +** change, the Offsets program should be modified (if needed), +** recompiled, and re-run to recreate this file. +*/ + +#ifndef OFFSETS_H_ +#define OFFSETS_H_ + +// Sizes of basic types + +#define SZ_char 1 +#define SZ_short 2 +#define SZ_int 4 +#define SZ_long 4 +#define SZ_long_long 8 +#define SZ_pointer 4 + +// Sizes of our types + +#define SZ_int8_t 1 +#define SZ_uint8_t 1 +#define SZ_int16_t 2 +#define SZ_uint16_t 2 +#define SZ_int32_t 4 +#define SZ_uint32_t 4 +#define SZ_int64_t 8 +#define SZ_uint64_t 8 +#define SZ_bool_t 1 + +// context_t structure + +#define SZ_CTX 72 + +#define CTX_ss 0 +#define CTX_gs 4 +#define CTX_fs 8 +#define CTX_es 12 +#define CTX_ds 16 +#define CTX_edi 20 +#define CTX_esi 24 +#define CTX_ebp 28 +#define CTX_esp 32 +#define CTX_ebx 36 +#define CTX_edx 40 +#define CTX_ecx 44 +#define CTX_eax 48 +#define CTX_vector 52 +#define CTX_code 56 +#define CTX_eip 60 +#define CTX_cs 64 +#define CTX_eflags 68 + +// section_t structure + +#define SZ_SCT 8 + +#define SCT_length 0 +#define SCT_addr 4 + +// pcb_t structure + +#define SZ_PCB 72 + +#define PCB_context 0 +#define PCB_pdir 4 +#define PCB_sects 8 +#define PCB_next 40 +#define PCB_parent 44 +#define PCB_wakeup 48 +#define PCB_exit_status 52 +#define PCB_pid 56 +#define PCB_state 60 +#define PCB_priority 64 +#define PCB_ticks 68 + +#endif diff --git a/include/params.h b/include/params.h index dabbe8d..7a41e02 100644 --- a/include/params.h +++ b/include/params.h @@ -20,11 +20,13 @@ // Upper bound on the number of simultaneous user-level // processes in the system (completely arbitrary) - #define N_PROCS 25 -// Clock frequency (Hz) +// Limit on the number of entries in argv[], INCLUDING +// the trailing NULL pointer (also completely arbitrary) +#define N_ARGS 10 +// Clock frequency (Hz) #define CLOCK_FREQ 1000 #define TICKS_PER_MS 1 diff --git a/include/procs.h b/include/procs.h index 9e4d8e6..bc5b705 100644 --- a/include/procs.h +++ b/include/procs.h @@ -205,13 +205,13 @@ extern uint_t next_pid; extern pcb_t *init_pcb; // table of state name strings -extern const char *state_str[N_STATES]; +extern const char state_str[N_STATES][4]; // table of priority name strings -extern const char *prio_str[N_PRIOS]; +extern const char prio_str[N_PRIOS][5]; // table of queue ordering name strings -extern const char *ord_str[N_ORDERINGS]; +extern const char ord_str[N_ORDERINGS][5]; /* ** Prototypes diff --git a/include/user.h b/include/user.h index 8eef187..672f916 100644 --- a/include/user.h +++ b/include/user.h @@ -118,10 +118,11 @@ int user_duplicate(pcb_t *new, pcb_t *old); ** @param prog A pointer to the program table entry to be loaded ** @param pcb The PCB for the program being loaded ** @param args The argument vector for the program +** @param sys Is the argument vector from kernel code? ** ** @return the status of the load attempt */ -int user_load(prog_t *prog, pcb_t *pcb, const char **args); +int user_load(prog_t *prog, pcb_t *pcb, const char **args, bool_t sys); /** ** Name: user_cleanup diff --git a/include/vm.h b/include/vm.h index 8eac999..dc12568 100644 --- a/include/vm.h +++ b/include/vm.h @@ -64,16 +64,20 @@ */ // user virtual addresses +#define USER_BASE 0x00000000 +#define USER_MAX 0x003fffff #define USER_TEXT 0x00001000 #define USER_STACK 0x003fe000 +#define USER_STACK_P1 USER_STACK +#define USER_STACK_P2 0x003ff000 #define USER_STK_END 0x00400000 // how to find the addresses of the stack pages in the VM hierarchy // user address space is the first 4MB of virtual memory #define USER_PDE 0 -// the stack occupies the last two pages of the address space -#define USER_STK_PTE1 1022 -#define USER_STK_PTE2 1023 +// the stack occupies this range of pages in the user address space +#define USER_STK_FIRST_PTE 1022 +#define USER_STK_LAST_PTE 1023 // some important memory addresses #define KERN_BASE 0x80000000 // start of "kernel" memory @@ -173,7 +177,7 @@ #define IS_USER(entry) (((entry) & PDE_US) != 0) // low-order nine bits of PDEs and PTEs hold "permission" flag bits -#define PERMS_MASK MOD4K_MASK +#define PERMS_MASK MOD4K_BITS // 4KB frame numbers are 20 bits wide #define FRAME_4K_SHIFT 12 @@ -200,10 +204,14 @@ // everything has nine bits of permission flags #define PERMS(p) (((uint32_t)(p)) & PERMS_MASK) -// extract the table indices from a 32-bit address +// extract the table indices from a 32-bit VA #define PDIX(v) ((((uint32_t)(v)) >> PDIX_SHIFT) & PIX2I_MASK) #define PTIX(v) ((((uint32_t)(v)) >> PTIX_SHIFT) & PIX2I_MASK) +// extract the byte offset from a 32-bit VA +#define OFFSET_4K(v) (((uint32_t)(v)) & MOD4K_BITS) +#define OFFSET_4M(v) (((uint32_t)(v)) & MOD4M_BITS) + /* ** Types */ @@ -215,31 +223,34 @@ // PDE for 4KB pages typedef struct pdek_s { - uint_t p : 1; // present - uint_t rw : 1; // writable - uint_t us : 1; // user/supervisor - uint_t pwt : 1; // cache write-through - uint_t pcd : 1; // cache disable - uint_t a : 1; // accessed - uint_t avl1 : 1; // ignored (available) - uint_t ps : 1; // page size (must be 0) - uint_t avl2 : 4; // ignored (available) - uint_t fa : 20; // frame address + uint_t p : 1; // 0: present + uint_t rw : 1; // 1: writable + uint_t us : 1; // 2: user/supervisor + uint_t pwt : 1; // 3: cache write-through + uint_t pcd : 1; // 4: cache disable + uint_t a : 1; // 5: accessed + uint_t avl1 : 1; // 6: ignored (available) + uint_t ps : 1; // 7: page size (must be 0) + uint_t avl2 : 4; // 11-8: ignored (available) + uint_t fa : 20; // 31-12: frame address } pdek_f_t; // PDE for 4MB pages typedef struct pdem_s { - uint_t p : 1; // present - uint_t rw : 1; // writable - uint_t us : 1; // user/supervisor - uint_t pwt : 1; // cache write-through - uint_t pcd : 1; // cache disable - uint_t a : 1; // accessed - uint_t d : 1; // dirty - uint_t ps : 1; // page size (must be 1) - uint_t g : 1; // global - uint_t avl : 3; // ignored (available) - uint_t fa : 20; // frame address + uint_t p : 1; // 0: present + uint_t rw : 1; // 1: writable + uint_t us : 1; // 2: user/supervisor + uint_t pwt : 1; // 3: cache write-through + uint_t pcd : 1; // 4: cache disable + uint_t a : 1; // 5: accessed + uint_t d : 1; // 6: dirty + uint_t ps : 1; // 7: page size (must be 1) + uint_t g : 1; // 8: global + uint_t avl : 3; // 11-9: ignored (available) + uint_t pat : 1; // 12: page attribute table in use + uint_t fa2 : 4; // 16-13: bits 35-32 of frame address (36-bit addrs) + uint_t rsv : 5; // 21-17: reserved - must be zero + uint_t fa : 10; // 31-22: bits 31-22 of frame address } pdem_f_t; // page table entries @@ -249,17 +260,17 @@ typedef struct pdem_s { // broken out into fields typedef struct pte_s { - uint_t p : 1; // present - uint_t rw : 1; // writable - uint_t us : 1; // user/supervisor - uint_t pwt : 1; // cache write-through - uint_t pcd : 1; // cache disable - uint_t a : 1; // accessed - uint_t d : 1; // dirty - uint_t pat : 1; // page attribute table in use - uint_t g : 1; // global - uint_t avl : 3; // ignored (available) - uint_t fa : 20; // frame address + uint_t p : 1; // 0: present + uint_t rw : 1; // 1: writable + uint_t us : 1; // 2: user/supervisor + uint_t pwt : 1; // 3: cache write-through + uint_t pcd : 1; // 4: cache disable + uint_t a : 1; // 5: accessed + uint_t d : 1; // 6: dirty + uint_t pat : 1; // 7: page attribute table in use + uint_t g : 1; // 8: global + uint_t avl : 3; // 11-9: ignored (available) + uint_t fa : 20; // 31-12: frame address } ptef_t; // page fault error code bits @@ -291,6 +302,17 @@ typedef struct mapping_t { uint32_t perm; // access control } mapping_t; +// Modes for dumping out page hierarchies +enum vmmode_e { + Simple = 0, // just count 'present' entries at each level + OneLevel, // top-level only: count entries, decode 'present' + TwoLevel, // count entries & decode at each level + Full // ??? in case we need more? + // sentinel + , + N_VMMODES +}; + /* ** Globals */ @@ -312,6 +334,19 @@ extern pde_t *kpdir; */ void vm_init(void); +/** +** Name: vm_uva2kva +** +** Convert a user VA into a kernel address. Works for all addresses - +** if the address is a page address, the low-order nine bits will be +** zeroes; otherwise, they is the offset into the page, which is +** unchanged within the address spaces. +** +** @param pdir Pointer to the page directory to examine +** @param va Virtual address to check +*/ +void *vm_uva2kva(pde_t *pdir, void *va); + /** ** Name: vm_pagedup ** @@ -323,6 +358,17 @@ void vm_init(void); */ void *vm_pagedup(void *old); +/** +** Name: vm_pdedup +** +** Duplicate a page directory entry +** +** @param entry The entry to be duplicated +** +** @return the new entry, or -1 on error. +*/ +pde_t vm_pdedup(pde_t entry); + /** ** Name: vm_ptdup ** @@ -439,6 +485,17 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm); */ int vm_uvmdup(pde_t *new, pde_t *old); +/** +** Name: vm_print +** +** Print out a paging hierarchy. +** +** @param pt Page table to display +** @param dir Is it a page directory (vs. a page table)? +** @param mode How to display the entries +*/ +void vm_print(void *pt, bool_t dir, enum vmmode_e mode); + #endif /* !ASM_SRC */ #endif diff --git a/kernel/isrs.S b/kernel/isrs.S index ec539f2..f5fdbca 100644 --- a/kernel/isrs.S +++ b/kernel/isrs.S @@ -19,6 +19,8 @@ # .arch i386 #include <bootstrap.h> +#include <offsets.h> +#include <vm.h> /* ** Configuration options - define in Makefile @@ -100,8 +102,8 @@ isr_save: ** ** Set up parameters for the ISR call. */ - movl 52(%esp),%eax // get vector number and error code - movl 56(%esp),%ebx + movl CTX_vector(%esp),%eax // get vector number and error code + movl CTX_code(%esp),%ebx /* *********************** @@ -120,11 +122,12 @@ isr_save: // save the context pointer movl current, %edx - movl %esp, (%edx) + movl %esp, PCB_context(%edx) // also save the page directory pointer movl %cr3, %ecx - movl %ecx, 4(%edx) + addl $KERN_BASE, %ecx // convert to a virtual address + movl %ecx, PCB_pdir(%edx) // switch to the system stack // @@ -166,8 +169,9 @@ isr_restore: *********************** */ movl current, %ebx // return to the user stack - movl (%ebx), %esp // ESP --> context save area - movl 4(%ebx), %ecx // page directory pointer + movl PCB_context(%ebx), %esp // ESP --> context save area + movl PCB_pdir(%ebx), %ecx // page directory pointer + subl $KERN_BASE, %ecx // convert to a physical address movl %ecx, %cr3 // now we're operating with the user process' diff --git a/kernel/kernel.c b/kernel/kernel.c index 1b940df..44a8eee 100644 --- a/kernel/kernel.c +++ b/kernel/kernel.c @@ -194,11 +194,11 @@ static void stats(int code) case 'q': // dump the queues // code to dump out any/all queues - pcb_queue_dump("R", ready); - pcb_queue_dump("W", waiting); - pcb_queue_dump("S", sleeping); - pcb_queue_dump("Z", zombie); - pcb_queue_dump("I", sioread); + pcb_queue_dump("R", ready, true); + pcb_queue_dump("W", waiting, true); + pcb_queue_dump("S", sleeping, true); + pcb_queue_dump("Z", zombie, true); + pcb_queue_dump("I", sioread, true); break; case 'r': // print system configuration information @@ -309,7 +309,7 @@ int main(void) // report our configuration options kreport(true); - delay(DELAY_3_SEC); + delay(DELAY_2_SEC); /* ** Other tasks typically performed here: @@ -341,10 +341,11 @@ int main(void) const char *args[2] = { "init", NULL }; // load it - assert(user_load(prog, init_pcb, args) == SUCCESS); + assert(user_load(prog, init_pcb, args, true) == SUCCESS); // send it on its merry way schedule(init_pcb); + dispatch(); #ifdef TRACE_CX // if we're using a scrolling region, wait a bit more and then set it up @@ -366,9 +367,6 @@ int main(void) "================================================================================"); #endif - // switch to the "real" kernel page directory - vm_set_kvm(); - /* ** END OF TERM-SPECIFIC CODE ** @@ -380,5 +378,21 @@ int main(void) sio_enable(SIO_RX); + // produce a "system state" report + cio_puts("System status: Queues "); + pcb_queue_dump("R", ready, true); + pcb_queue_dump("W", waiting, true); + pcb_queue_dump("S", sleeping, true); + pcb_queue_dump("Z", zombie, true); + pcb_queue_dump("I", sioread, true); + ptable_dump_counts(); + pcb_dump("Current: ", current, true); + + delay(DELAY_3_SEC); + + vm_print(current->pdir, true, TwoLevel); + + delay(DELAY_3_SEC); + return 0; } diff --git a/kernel/kernel.ld b/kernel/kernel.ld index 3167327..83f211c 100644 --- a/kernel/kernel.ld +++ b/kernel/kernel.ld @@ -26,6 +26,7 @@ SECTIONS } /* Could put STABs here */ + /* .stab : { PROVIDE(__STAB_BEGIN__ = .); *(.stab); @@ -36,6 +37,7 @@ SECTIONS *(.stabstr); PROVIDE(__STABSTR_END__ = .); } + */ /* Align the data segment at the next page boundary */ . = ALIGN(0x1000); @@ -65,7 +67,8 @@ SECTIONS PROVIDE(_end = .); /DISCARD/ : { - /* *(.stab .stab_info .stabstr) */ - *(.eh_frame .note.GNU-stack .note.gnu.property .comment) + *(.stab .stab_info .stabstr) + *(.eh_frame .eh_frame_hdr) + *(.note.GNU-stack .note.gnu.property .comment) } } diff --git a/kernel/procs.c b/kernel/procs.c index 88589d0..20e6784 100644 --- a/kernel/procs.c +++ b/kernel/procs.c @@ -74,7 +74,7 @@ uint_t next_pid; pcb_t *init_pcb; // table of state name strings -const char *state_str[N_STATES] = { +const char state_str[N_STATES][4] = { [STATE_UNUSED] = "Unu", // "Unused" [STATE_NEW] = "New", [STATE_READY] = "Rdy", // "Ready" @@ -87,16 +87,16 @@ const char *state_str[N_STATES] = { }; // table of priority name strings -const char *prio_str[N_PRIOS] = { [PRIO_HIGH] = "High", - [PRIO_STD] = "User", - [PRIO_LOW] = "Low ", - [PRIO_DEFERRED] = "Def " }; +const char prio_str[N_PRIOS][5] = { [PRIO_HIGH] = "High", + [PRIO_STD] = "User", + [PRIO_LOW] = "Low ", + [PRIO_DEFERRED] = "Def " }; // table of queue ordering name strings -const char *ord_str[N_PRIOS] = { [O_FIFO] = "FIFO", - [O_PRIO] = "PRIO", - [O_PID] = "PID ", - [O_WAKEUP] = "WAKE" }; +const char ord_str[N_PRIOS][5] = { [O_FIFO] = "FIFO", + [O_PRIO] = "PRIO", + [O_PID] = "PID ", + [O_WAKEUP] = "WAKE" }; /* ** PRIVATE FUNCTIONS @@ -939,7 +939,7 @@ void ctx_dump_all(const char *msg) } /** -** _pcb_dump(msg,pcb) +** pcb_dump(msg,pcb,all) ** ** Dumps the contents of this PCB to the console ** @@ -964,7 +964,15 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all) } cio_printf(" %d", pcb->pid); + cio_printf(" %s", pcb->state >= N_STATES ? "???" : state_str[pcb->state]); +#if 0 + if( pcb->state >= N_STATES ) { + cio_puts( " ????" ); + } else { + cio_printf( " %s", state_str[pcb->state] ); + } +#endif if (!all) { // just printing IDs and states on one line @@ -974,6 +982,13 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all) // now, the rest of the contents cio_printf(" %s", pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority]); +#if 0 + if( pcb->priority >= N_PRIOS ) { + cio_puts( " ???" ); + } else { + cio_printf( " %s", prio_str[pcb->priority] ); + } +#endif cio_printf(" ticks %u xit %d wake %08x\n", pcb->ticks, pcb->exit_status, pcb->wakeup); @@ -1111,6 +1126,14 @@ void ptable_dump_counts(void) for (n = 0; n < N_STATES; ++n) { cio_printf(" %u %s", nstate[n], state_str[n] != NULL ? state_str[n] : "???"); +#if 0 + cio_printf( " %u ", nstate[n] ); + if( state_str[n][0] != '\0' ) { + cio_puts( state_str[n] ); + } else { + cio_puts( "???" ); + } +#endif } cio_putchar('\n'); } diff --git a/kernel/startup.S b/kernel/startup.S index 73a081e..94b93b0 100644 --- a/kernel/startup.S +++ b/kernel/startup.S @@ -33,18 +33,18 @@ /* ** A symbol for locating the beginning of the code. */ - .globl begtext - .text -begtext: + + .globl begtext + .globl _start +_start = V2PNC(begtext) /* ** The entry point. When we get here, we have just entered protected ** mode, so all the segment registers are incorrect except for CS. */ - .globl _start +begtext: -_start: cli /* seems to be reset on entry to p. mode */ movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */ outb $CMOS_ADDR /* turned them off) */ @@ -110,10 +110,18 @@ clearbss: # set the initial frame pointer xorl %ebp, %ebp + # now, jump and switch into using high addresses + # we use an indirect jump here because the assembler + # would ordinarily generate a PC-relative target + # address for the jump, which would not have the + # desired effect + movl $onward, %eax + jmp *%eax + +onward: + /* -** Call the system initialization routine, and switch to -** executing at high addresses. We use an indirect jump -** here to avoid getting a PC-relative 'jmp' instruction. +** Call the system initialization routine. ** ** Alternate idea: push the address of isr_restore ** and just do an indirect jump? diff --git a/kernel/syscalls.c b/kernel/syscalls.c index 0653c93..92a0a23 100644 --- a/kernel/syscalls.c +++ b/kernel/syscalls.c @@ -354,7 +354,7 @@ SYSIMPL(exec) pcb->pdir = NULL; // "load" it and set up the VM tables for this process - int status = user_load(prog, pcb, args); + int status = user_load(prog, pcb, args, false); if (status != SUCCESS) { RET(pcb) = status; SYSCALL_EXIT(status); diff --git a/kernel/user.c b/kernel/user.c index e9398ff..0e5e186 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -426,44 +426,27 @@ static int read_phdrs(elfhdr_t *hdr, pcb_t *pcb) ** @param pcb Pointer to the PCB for the process ** @param entry Entry point for the new process ** @param args Argument vector to be put in place +** @param sys Is the argument vector from kernel code? ** -** @return A pointer to the context_t on the stack, or NULL +** @return A (user VA) pointer to the context_t on the stack, or NULL */ -static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) +static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args, + bool_t sys) { #if TRACING_USER - cio_printf("stksetup: pcb %08x, entry %08, args %08x\n", (uint32_t)pcb, + cio_printf("stksetup: pcb %08x, entry %08x, args %08x\n", (uint32_t)pcb, entry, (uint32_t)args); #endif /* - ** First, we need to count the space we'll need for the argument + ** First, we need to calculate the space we'll need for the argument ** vector and strings. - */ - - int argbytes = 0; - int argc = 0; - - while (args[argc] != NULL) { - int n = strlen(args[argc]) + 1; - // can't go over one page in size - if ((argbytes + n) > SZ_PAGE) { - // oops - ignore this and any others - break; - } - argbytes += n; - ++argc; - } - - // Round up the byte count to the next multiple of four. - argbytes = (argbytes + 3) & MOD4_MASK; - - /* - ** Allocate the arrays. We are safe using dynamic arrays here - ** because we're using the OS stack, not the user stack. ** - ** We want the argstrings and argv arrays to contain all zeroes. - ** The C standard states, in section 6.7.8, that + ** Keeping track of kernel vs. user VAs is tricky, so we'll use + ** a prefix on variable names: kv_* is a kernel virtual address; + ** uv_* is a user virtual address. + ** + ** We rely on the C standard, section 6.7.8, to clear these arrays: ** ** "21 If there are fewer initializers in a brace-enclosed list ** than there are elements or members of an aggregate, or @@ -472,31 +455,36 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** the remainder of the aggregate shall be initialized ** implicitly the same as objects that have static storage ** duration." - ** - ** Sadly, because we're using variable-sized arrays, we can't - ** rely on this, so we have to call memclr() instead. :-( In - ** truth, it doesn't really cost us much more time, but it's an - ** annoyance. */ - char argstrings[argbytes]; - char *argv[argc + 1]; + int argbytes = 0; // total length of arg strings + int argc = 0; // number of argv entries + const char *kv_strs[N_ARGS] = { 0 }; // converted user arg string pointers + int strlengths[N_ARGS] = { 0 }; // length of each string + const char *uv_argv[N_ARGS] = { 0 }; // argv pointers - CLEAR(argstrings); - CLEAR(argv); + /* + ** IF the argument list given to us came from user code, we need + ** to convert its address and the addresses it contains to kernel + ** VAs; otherwise, we can use them directly. + */ + char **kv_args = sys ? args : vm_uva2kva(pcb->pdir, (void *)args); - // Next, duplicate the argument strings, and create pointers to - // each one in our argv. - char *tmp = argstrings; - for (int i = 0; i < argc; ++i) { - int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string - strcpy(tmp, args[i]); // add to our buffer - argv[i] = tmp; // remember where it was - tmp += nb; // move on + while (kv_args[argc] != NULL) { + kv_strs[argc] = sys ? args[argc] : + vm_uva2kva(pcb->pdir, (void *)(kv_args[argc])); + strlengths[argc] = strlen(kv_strs[argc]) + 1; + // can't go over one page in size + if ((argbytes + strlengths[argc]) > SZ_PAGE) { + // oops - ignore this and any others + break; + } + argbytes += strlengths[argc]; + ++argc; } - // trailing NULL pointer - argv[argc] = NULL; + // Round up the byte count to the next multiple of four. + argbytes = (argbytes + 3) & MOD4_MASK; /* ** The pages for the stack were cleared when they were allocated, @@ -524,32 +512,44 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** see below for more information. */ - // Pointer to the last word in stack. We get this from the - // VM hierarchy. Get the PDE entry for the user address space. - pde_t stack_pde = pcb->pdir[USER_PDE]; - - // The PDE entry points to the PT, which is an array of PTE. The last - // two entries are for the stack; pull out the last one. - pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2]; - - // OK, now we have the PTE. The frame address of the last page is - // in this PTE. Find the address immediately after that. - uint32_t *ptr = (uint32_t *)((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE); - - // Pointer to where the arg strings should be filled in. - char *strings = (char *)((uint32_t)ptr - argbytes); - - // back the pointer up to the nearest word boundary; because we're - // moving toward location 0, the nearest word boundary is just the - // next smaller address whose low-order two bits are zeroes - strings = (char *)((uint32_t)strings & MOD4_MASK); - - // Copy over the argv strings. - memcpy((void *)strings, argstrings, argbytes); + /* + ** Find the user stack. The PDE entry for user address space points + ** to a page table for the first 4MB of the address space, but the + ** "pointer" there a physical frame address. + */ + pde_t *kv_userpt = (pde_t *)P2V(PTE_ADDR(pcb->pdir[USER_PDE])); + assert(kv_userpt != NULL); /* - ** Next, we need to copy over the argv pointers. Start by - ** determining where 'argc' should go. + ** The final entries in that PMT are for the pages of the user stack. + ** Grab the address of the frame for the last one. (Again, we need + ** to convert it to a virtual address we can use.) + */ + + // the PMT entry for that page + pte_t pmt_entry = kv_userpt[USER_STK_LAST_PTE]; + assert(IS_PRESENT(pmt_entry)); + + // kernel VA for the first byte following that page + uint8_t *kv_ptr = (uint8_t *)P2V(PTE_ADDR(pmt_entry) + SZ_PAGE); + assert(kv_ptr != NULL); + + // user VA for the first byte following that page + uint32_t *uv_ptr = (uint32_t *)(USER_STACK_P2 + SZ_PAGE); + + // Pointers to where the arg strings should be filled in. + uint32_t kv_strings = ((uint32_t)kv_ptr) - argbytes; + uint32_t uv_strings = ((uint32_t)uv_ptr) - argbytes; + + // back the pointers up to the nearest word boundary; because we're + // moving toward location 0, the nearest word boundary is just the + // next smaller address whose low-order two bits are zeroes + kv_strings &= MOD4_MASK; + uv_strings &= MOD4_MASK; + + /* + ** Next, we need to copy over the data. Start by determining where + ** where 'argc' should go. ** ** Stack alignment is controlled by the SysV ABI i386 supplement, ** version 1.2 (June 23, 2016), which states in section 2.2.2: @@ -563,7 +563,7 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** ** Isn't technical documentation fun? Ultimately, this means that ** the first parameter to main() should be on the stack at an address - ** that is a multiple of 16. + ** that is a multiple of 16. In our case, that is 'argc'. ** ** The space needed for argc, argv, and the argv array itself is ** argc + 3 words (argc+1 for the argv entries, plus one word each @@ -571,45 +571,66 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) */ int nwords = argc + 3; - uint32_t *acptr = ((uint32_t *)strings) - nwords; + uint32_t *kv_acptr = ((uint32_t *)kv_strings) - nwords; + uint32_t *uv_acptr = ((uint32_t *)uv_strings) - nwords; - /* - ** Next, back up until we're at a multiple-of-16 address. Because we - ** are moving to a lower address, its upper 28 bits are identical to - ** the address we currently have, so we can do this with a bitwise - ** AND to just turn off the lower four bits. - */ + // back these up to multiple-of-16 addresses for stack alignment + kv_acptr = (uint32_t *)(((uint32_t)kv_acptr) & MOD16_MASK); + uv_acptr = (uint32_t *)(((uint32_t)uv_acptr) & MOD16_MASK); - acptr = (uint32_t *)(((uint32_t)acptr) & MOD16_MASK); + // the argv location + uint32_t *kv_avptr = kv_acptr + 1; - // copy in 'argc' - *acptr = argc; + // the user address for the first argv entry + uint32_t *uv_avptr = uv_acptr + 2; - // next, 'argv', which follows 'argc'; 'argv' points to the - // word that follows it in the stack - uint32_t *avptr = acptr + 2; - *(acptr + 1) = (uint32_t)avptr; + // Copy over the argv strings. + for (int i = 0; i < argc; ++i) { + // copy the string using kernel addresses + strcpy((char *)kv_strings, kv_args[i]); - /* - ** Next, we copy in all argc+1 pointers. - */ + // remember the user address where this string went + uv_argv[i] = (char *)uv_strings; - // Adjust and copy the string pointers. - for (int i = 0; i <= argc; ++i) { - if (argv[i] != NULL) { - // an actual pointer - adjust it and copy it in - *avptr = (uint32_t)strings; - // skip to the next entry in the array - strings += strlen(argv[i]) + 1; - } else { - // end of the line! - *avptr = NULL; - } - ++avptr; + // adjust both string addresses + kv_strings += strlengths[i]; + uv_strings += strlengths[i]; } /* - ** Now, we need to set up the initial context for the executing + ** Next, we copy in argc, argv, and the pointers. The stack will + ** look something like this: + ** + ** kv_avptr + ** kv_acptr | + ** | | + ** v v + ** argc argv av[0] av[1] etc NULL str0 str1 etc. + ** [....][....][....][....] ... [0000] ... [......0......0.........] + ** | ^ | | ^ ^ + ** | | | | | | + ** ------ | ---------------------|------- + ** --------------------------- + */ + + // copy in 'argc' + *kv_acptr = argc; + + // copy in 'argv' + *kv_avptr++ = (uint32_t)uv_avptr; + + // now, the argv entries themselves + for (int i = 0; i < argc; ++i) { + *kv_avptr++ = (uint32_t)uv_argv[i]; + } + + // and the trailing NULL + *kv_avptr = NULL; + + /* + ** Almost done! + ** + ** Now we need to set up the initial context for the executing ** process. ** ** When this process is dispatched, the context restore code will @@ -618,27 +639,34 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) ** the interrupt "returns" to the entry point of the process. */ - // Locate the context save area on the stack. - context_t *ctx = ((context_t *)avptr) - 1; + // Locate the context save area on the stack by backup up one + // "context" from where the argc value is saved + context_t *kv_ctx = ((context_t *)kv_acptr) - 1; + uint32_t uv_ctx = (uint32_t)(((context_t *)uv_acptr) - 1); /* ** We cleared the entire stack earlier, so all the context ** fields currently contain zeroes. We now need to fill in ** all the important fields. + ** + ** Note: we don't need to set the ESP value for the process, + ** as the 'popa' that restores the general registers doesn't + ** actually restore ESP from the context area - it leaves it + ** where it winds up. */ - ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0 - ctx->eip = entry; // initial EIP - ctx->cs = GDT_CODE; // segment registers - ctx->ss = GDT_STACK; - ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA; + kv_ctx->eflags = DEFAULT_EFLAGS; // IF enabled, IOPL 0 + kv_ctx->eip = entry; // initial EIP + kv_ctx->cs = GDT_CODE; // segment registers + kv_ctx->ss = GDT_STACK; + kv_ctx->ds = kv_ctx->es = kv_ctx->fs = kv_ctx->gs = GDT_DATA; /* ** Return the new context pointer to the caller. It will be our ** caller's responsibility to schedule this process. */ - return (ctx); + return ((context_t *)uv_ctx); } /* @@ -809,10 +837,11 @@ int user_duplicate(pcb_t *new, pcb_t *old) ** @param ptab A pointer to the program table entry to be loaded ** @param pcb The PCB for the program being loaded ** @param args The argument vector for the program +** @param sys Is the argument vector from kernel code? ** ** @return the status of the load attempt */ -int user_load(prog_t *ptab, pcb_t *pcb, const char **args) +int user_load(prog_t *ptab, pcb_t *pcb, const char **args, bool_t sys) { // NULL pointers are bad! assert1(ptab != NULL); @@ -820,7 +849,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) assert1(args != NULL); #if TRACING_USER - cio_printf("uload: prog '%s' pcb %08x args %08x\n", + cio_printf("Uload: prog '%s' pcb %08x args %08x\n", ptab->name[0] ? ptab->name : "?", (uint32_t)pcb, (uint32_t)args); #endif @@ -832,8 +861,16 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) (uint32_t)ptab, ptab->name, ptab->offset, ptab->size, ptab->flags); cio_printf(" args %08x:", (uint32_t)args); - for (int i = 0; args[i] != NULL; ++i) { - cio_printf(" [%d] %s", i, args[i]); + if (sys) { + for (int i = 0; args[i] != NULL; ++i) { + cio_printf(" [%d] %s", i, args[i]); + } + } else { + char **kv_args = vm_uva2kva(pcb->pdir, args); + for (int i = 0; kv_args[i] != NULL; ++i) { + cio_printf(" [%d] %s", i, + (char *)vm_uva2kva(pcb->pdir, kv_args[i])); + } } cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid); dump_fhdr(hdr); @@ -853,8 +890,8 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) // read all the program headers int stat = read_phdrs(hdr, pcb); if (stat != SUCCESS) { - // TODO figure out a better way to deal with this - PANIC(0, "user_load: phdr read failed"); + cio_printf("Uload: read_phdrs('%s') returned %d\n", ptab->name, stat); + PANIC(0, "User_load: phdr read failed"); } // next, set up the runtime stack - just like setting up loadable @@ -862,12 +899,12 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) stat = vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0); if (stat != SUCCESS) { - // TODO yadda yadda... - PANIC(0, "user_load: vm_add failed"); + cio_printf("Uload: vm_add('%s') stack returned %d\n", ptab->name, stat); + PANIC(0, "user_load: vm_add stack failed"); } // set up the command-line arguments - pcb->context = stack_setup(pcb, hdr->e_entry, args); + pcb->context = stack_setup(pcb, hdr->e_entry, args, sys); return SUCCESS; } @@ -883,7 +920,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) void user_cleanup(pcb_t *pcb) { #if TRACING_USER - cio_printf("uclean: %08x\n", (uint32_t)pcb); + cio_printf("Uclean: %08x\n", (uint32_t)pcb); #endif if (pcb == NULL) { diff --git a/kernel/vm.c b/kernel/vm.c index 7d43bb8..a700bcb 100644 --- a/kernel/vm.c +++ b/kernel/vm.c @@ -58,45 +58,155 @@ static void vm_isr(int vector, int code) } /** -** Name: uva2kva +** Name: ptcount ** -** Convert a user VA into a kernel address. Works for all addresses - -** if the address is a page address, the PERMS(va) value will be 0; -** otherwise, it is the offset into the page. +** Count the number of each type of entry in a page table. +** Returns a 32-bit result containing two 16-bit counts: ** -** @param pdir Pointer to the page directory to examine -** @param va Virtual address to check +** Upper half Lower half +** PDIR: # of 4MB entries # of 'present' entries +** PMT: zero # of 'present' entries +** +** The number of "not present" can be calculated from these. +** +** @param pt Pointer to the page table +** @param dir Is it a page directory (vs. a page table)? */ ATTR_UNUSED -static void *uva2kva(pde_t *pdir, void *va) +static uint32_t ptcount(pte_t *ptr, bool_t dir) { - // find the PMT entry for this address - pte_t *pte = vm_getpte(pdir, va, false); - if (pte == NULL) { - return NULL; + uint16_t n_np = 0, n_p = 0, n_lg = 0; + + for (int i = 0; i < N_PTE; ++i) { + pde_t entry = *ptr++; + if (!IS_PRESENT(entry)) { + ++n_np; + continue; + } + if (dir && IS_LARGE(entry)) { + ++n_lg; + } else { + ++n_p; + } } - // get the entry - pte_t entry = *pte; + // n_lg will be 0 for PMTs + return (n_lg << 16) | n_p; +} - // is this a valid address for the user? - if (IS_PRESENT(entry)) { - return NULL; +// decode a PDE +static void pde_prt(uint32_t level, uint32_t i, uint32_t entry) +{ + // indent + for (int n = 0; n <= level; ++n) + cio_puts(" "); + // line header + cio_printf("[%08x] %08x", i, entry); + // perms + if (IS_LARGE(entry)) { // PS is 1 + if ((entry & PDE_PAT) != 0) + cio_puts(" PAT"); + if ((entry & PDE_G) != 0) + cio_puts(" G"); + cio_puts(" PS"); + if ((entry & PDE_D) != 0) + cio_puts(" D"); } + if ((entry & PDE_A) != 0) + cio_puts(" A"); + if ((entry & PDE_PCD) != 0) + cio_puts(" CD"); + if ((entry & PDE_PWT) != 0) + cio_puts(" WT"); + if ((entry & PDE_US) != 0) + cio_puts(" U"); + if ((entry & PDE_RW) != 0) + cio_puts(" W"); + cio_puts((entry & PDE_P) != 0 ? " P" : "!P"); - // is this a system-only page? - if (IS_SYSTEM(entry)) { - return NULL; - } + cio_printf(" --> %s %08x", IS_LARGE(entry) ? "Pg" : "PT", PDE_ADDR(entry)); +} - // get the physical address - uint32_t frame = PTE_ADDR(*pte) | PERMS(va); +// decode a PTE +static void pte_prt(uint32_t level, uint32_t i, uint32_t entry) +{ + // indent + for (int n = 0; n <= level; ++n) + cio_puts(" "); + // line header + cio_printf("[%08x] %08x", i, entry); + // perms + if ((entry & PDE_G) != 0) + cio_puts(" G"); + if ((entry & PDE_PAT) != 0) + cio_puts(" PAT"); + if ((entry & PDE_D) != 0) + cio_puts(" D"); + if ((entry & PDE_A) != 0) + cio_puts(" A"); + if ((entry & PDE_PCD) != 0) + cio_puts(" CD"); + if ((entry & PDE_PWT) != 0) + cio_puts(" WT"); + if ((entry & PDE_US) != 0) + cio_puts(" U"); + if ((entry & PDE_RW) != 0) + cio_puts(" W"); + cio_puts((entry & PDE_P) != 0 ? " P" : "!P"); - return (void *)P2V(frame); + cio_printf(" --> Pg %08x", PTE_ADDR(entry)); } /** -** Name: ptdump +** Name: pdump +** +** Recursive helper for table hierarchy dump. +** +** @param level Current hierarchy level +** @param pt Page table to display +** @param dir Is it a page directory (vs. a page table)? +** @param mode How to display the entries +*/ +ATTR_UNUSED +static void pdump(uint_t level, void *pt, bool_t dir, enum vmmode_e mode) +{ + pte_t *ptr = (pte_t *)pt; + + cio_printf("? at 0x%08x:", dir ? "PDir" : "PTbl", (uint32_t)pt); + uint32_t nums = ptcount(ptr, dir); + if (dir) { + cio_printf(" %u 4MB", (nums >> 16)); + } + cio_printf(" %u P %u !P\n", nums & 0xffff, + N_PTE - ((nums >> 16) + (nums & 0xffff))); + + for (uint32_t i = 0; i < (uint32_t)N_PTE; ++i) { + pte_t entry = *ptr; + if (dir) { + // this is a PDIR entry; could be either a 4MB + // page, or a PMT pointer + if (mode > Simple) { + pde_prt(level, i, entry); + cio_putchar('\n'); + if (!IS_LARGE(entry)) { + pdump(level + 1, (void *)*ptr, false, mode); + } + } + } else { + // just a PMT entry + if (mode > Simple) { + pte_prt(level, i, entry); + cio_putchar('\n'); + } + } + + // move to the next entry + ++ptr; + } +} + +/** +** Name: pmt_dump ** ** Dump the non-zero entries of a page table or directory ** @@ -105,7 +215,8 @@ static void *uva2kva(pde_t *pdir, void *va) ** @param start First entry to process ** @param num Number of entries to process */ -static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num) +ATTR_UNUSED +static void pmt_dump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num) { cio_printf("\n\nP%c dump", dir ? 'D' : 'T'); cio_printf(" of %08x", (uint32_t)pt); @@ -169,7 +280,23 @@ void vm_init(void) assert(kpdir != NULL); #if TRACING_VM - cio_printf("vm_init: kpdir is %08x\n", kpdir); + cio_printf("vm_init: kpdir %08x, adding user pages\n", kpdir); +#endif + + // add the entries for the user address space + for (uint32_t addr = 0; addr < NUM_4MB; addr += SZ_PAGE) { + int stat = vm_map(kpdir, (void *)addr, addr, SZ_PAGE, PTE_RW); + if (stat != SUCCESS) { + cio_printf("vm_init, map %08x->%08x failed, status %d\n", addr, + addr, stat); + PANIC(0, "vm_init user range map failed"); + } +#if TRACING_VM + cio_putchar('.'); +#endif + } +#if TRACING_VM + cio_puts(" done\n"); #endif // switch to it @@ -183,6 +310,44 @@ void vm_init(void) install_isr(VEC_PAGE_FAULT, vm_isr); } +/** +** Name: vm_uva2kva +** +** Convert a user VA into a kernel address. Works for all addresses - +** if the address is a page address, the low-order nine bits will be +** zeroes; otherwise, they is the offset into the page, which is +** unchanged within the address spaces. +** +** @param pdir Pointer to the page directory to examine +** @param va Virtual address to check +*/ +void *vm_uva2kva(pde_t *pdir, void *va) +{ + // find the PMT entry for this address + pte_t *pte = vm_getpte(pdir, va, false); + if (pte == NULL) { + return NULL; + } + + // get the entry + pte_t entry = *pte; + + // is this a valid address for the user? + if (IS_PRESENT(entry)) { + return NULL; + } + + // is this a system-only page? + if (IS_SYSTEM(entry)) { + return NULL; + } + + // get the physical address + uint32_t frame = PTE_ADDR(*pte) | PERMS(va); + + return (void *)P2V(frame); +} + /** ** Name: vm_pagedup ** @@ -206,46 +371,45 @@ void *vm_pagedup(void *old) ** ** Duplicate a page directory entry ** -** @param dst Pointer to where the duplicate should go -** @param curr Pointer to the entry to be duplicated +** @param entry The entry to be duplicated ** -** @return true on success, else false +** @return the new entry, or -1 on error */ -bool_t vm_pdedup(pde_t *dst, pde_t *curr) +pde_t vm_pdedup(pde_t entry) { - assert1(curr != NULL); - assert1(dst != NULL); - #if TRACING_VM - cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr); + cio_printf("vm_pdedup curr %08x\n", (uint32_t)entry); #endif - pde_t entry = *curr; // simplest case if (!IS_PRESENT(entry)) { - *dst = 0; - return true; + return 0; } - // OK, we have an entry; allocate a page table for it - pte_t *newtbl = (pte_t *)km_page_alloc(); - if (newtbl == NULL) { - return false; + // is this a large page? + if (IS_LARGE(entry)) { + // just copy it + return entry; } - // we could clear the new table, but we'll be assigning to - // each entry anyway, so we'll save the execution time + // OK, we have a 4KB entry; allocate a page table for it + pte_t *tblva = (pte_t *)km_page_alloc(); + if (tblva == NULL) { + return (uint32_t)-1; + } - // address of the page table for this directory entry - pte_t *old = (pte_t *)PDE_ADDR(entry); + // make sure the entries are all initially 'not present' + memclr(tblva, SZ_PAGE); - // pointer to the first PTE in the new table - pte_t *new = newtbl; + // VA of the page table for this directory entry + pte_t *old = (pte_t *)P2V(PDE_ADDR(entry)); + + // pointer to the first PTE in the new table (already a VA) + pte_t *new = tblva; for (int i = 0; i < N_PTE; ++i) { - if (!IS_PRESENT(*old)) { - *new = 0; - } else { + // only need to copy 'present' entries + if (IS_PRESENT(*old)) { *new = *old; } ++old; @@ -253,10 +417,8 @@ bool_t vm_pdedup(pde_t *dst, pde_t *curr) } // replace the page table address - // upper 22 bits from 'newtbl', lower 12 from '*curr' - *dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry)); - - return true; + // (PA of page table, lower 12 bits from '*curr') + return (pde_t)(V2P(PTE_ADDR(tblva)) | PERMS(entry)); } /** @@ -282,8 +444,7 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc) assert1(pdir != NULL); // get the PDIR entry for this virtual address - uint32_t ix = PDIX(va); - pde_t *pde_ptr = &pdir[ix]; + pde_t *pde_ptr = &pdir[PDIX(va)]; // is it already set up? if (IS_PRESENT(*pde_ptr)) { @@ -319,10 +480,8 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc) *pde_ptr = V2P(ptbl) | PDE_P | PDE_RW; } - // finally, return a pointer to the entry in the - // page table for this VA - ix = PTIX(va); - return &ptbl[ix]; + // finally, return a pointer to the entry in the page table for this VA + return &ptbl[PTIX(va)]; } // Set up kernel part of a page table. @@ -337,7 +496,7 @@ pde_t *vm_mkkvm(void) } #if 0 && TRACING_VM cio_puts( "\nEntering vm_mkkvm\n" ); - ptdump( pdir, true, 0, N_PDE ); + pmt_dump( pdir, true, 0, N_PDE ); #endif // clear it out to disable all the entries @@ -361,8 +520,8 @@ pde_t *vm_mkkvm(void) } #if 0 && TRACING_VM cio_puts( "\nvm_mkkvm() final PD:\n" ); - ptdump( pdir, true, 0, 16 ); - ptdump( pdir, true, 0x200, 16 ); + pmt_dump( pdir, true, 0, 16 ); + pmt_dump( pdir, true, 0x200, 16 ); #endif return pdir; @@ -382,19 +541,26 @@ pde_t *vm_mkuvm(void) return NULL; } - // iterate through the kernel page directory - pde_t *curr = kpdir; - pde_t *dst = new; - for (int i = 0; i < N_PDE; ++i) { + // iterate through the 'system' portions of the kernel + // page directory + int i = PDIX(KERN_BASE); + pde_t *curr = &kpdir[i]; + pde_t *dst = &new[i]; + while (i < N_PDE) { if (*curr != 0) { // found an active one - duplicate it - if (!vm_pdedup(dst, curr)) { + pde_t entry = vm_pdedup(*curr); + if (entry == (uint32_t)-1) { return NULL; } + *dst = entry; + } else { + *dst = 0; } ++curr; ++dst; + ++i; } return new; @@ -481,7 +647,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // figure out where this page will go in the hierarchy pte_t *pte = vm_getpte(pdir, va, true); if (pte == NULL) { - // TODO if i > 0, this isn't the first frame - is + // if i > 0, this isn't the first frame - is // there anything to do about other frames? // POSSIBLE MEMORY LEAK? return E_NO_MEMORY; @@ -490,7 +656,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // allocate the frame void *page = km_page_alloc(); if (page == NULL) { - // TODO same question here + // same question here return E_NO_MEMORY; } @@ -498,7 +664,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, memclr(page, SZ_PAGE); // create the PTE for this frame - uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase); + uint32_t entry = (uint32_t)(PTE_ADDR(V2P(page)) | entrybase); *pte = entry; // copy data if we need to @@ -506,7 +672,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size, // how much to copy uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes; // do it! - memcpy((void *)page, (void *)data, num); + memmove((void *)page, (void *)data, num); // adjust all the pointers data += num; // where to continue bytes -= num; // what's left to copy @@ -644,7 +810,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm) PDIX(addr), PTIX(addr)); // dump the directory - ptdump(pdir, true, PDIX(addr), 4); + pmt_dump(pdir, true, PDIX(addr), 4); // find the relevant PDE entry uint32_t ix = PDIX(va); @@ -653,7 +819,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm) // round the PMT index down uint32_t ix2 = PTIX(va) & MOD4_MASK; // dump the PMT for the relevant directory entry - ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4); + pmt_dump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4); } #endif PANIC(0, "mapping an already-mapped address"); @@ -735,3 +901,26 @@ int vm_uvmdup(pde_t *new, pde_t *old) return SUCCESS; } + +/** +** Name: vm_print +** +** Print out a paging hierarchy. +** +** @param pt Page table to display +** @param dir Is it a page directory (vs. a page table)? +** @param mode How to display the entries +*/ +void vm_print(void *pt, bool_t dir, enum vmmode_e mode) +{ + cio_puts("\nVM hierarchy"); + if (pt == NULL) { + cio_puts(" (NULL pointer)\n"); + return; + } + + cio_printf("Starting at 0x%08x (%s):\n", (uint32_t)pt, + dir ? "PDIR" : "PMT"); + + pdump(0, pt, dir, mode); +} diff --git a/user/init.c b/user/init.c index 98d9755..be8e393 100644 --- a/user/init.c +++ b/user/init.c @@ -20,7 +20,7 @@ typedef struct proc_s { uint_t pid; // its PID (when spawned) uint8_t e_prio; // process priority char select[3]; // identifying character, NUL, extra - char *args[MAX_ARGS]; // argument vector strings + char *args[N_ARGS]; // argument vector strings } proc_t; /* diff --git a/user/shell.c b/user/shell.c index 628e9b7..f8c13cd 100644 --- a/user/shell.c +++ b/user/shell.c @@ -27,7 +27,7 @@ typedef struct proc_s { uint_t index; // process table index int8_t prio; // process priority char select[3]; // identifying character, NUL, extra - char *args[MAX_ARGS]; // argument vector strings + char *args[N_ARGS]; // argument vector strings } proc_t; /* diff --git a/util/default.ld b/util/default.ld new file mode 100644 index 0000000..0c600e4 --- /dev/null +++ b/util/default.ld @@ -0,0 +1,247 @@ +GNU ld (GNU Binutils for Ubuntu) 2.30 + Supported emulations: + elf_x86_64 + elf32_x86_64 + elf_i386 + elf_iamcu + i386linux + elf_l1om + elf_k1om + i386pep + i386pe +using internal linker script: +================================================== +/* Script for -z combreloc: combine and sort reloc sections */ +/* Copyright (C) 2014-2018 Free Software Foundation, Inc. + Copying and distribution of this script, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. */ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", + "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_start) +SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu64"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib"); +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; + .interp : { *(.interp) } + .note.gnu.build-id : { *(.note.gnu.build-id) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rela.dyn : + { + *(.rela.init) + *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) + *(.rela.fini) + *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) + *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) + *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) + *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) + *(.rela.ctors) + *(.rela.dtors) + *(.rela.got) + *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) + *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) + *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) + *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) + *(.rela.ifunc) + } + .rela.plt : + { + *(.rela.plt) + PROVIDE_HIDDEN (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN (__rela_iplt_end = .); + } + .init : + { + KEEP (*(SORT_NONE(.init))) + } + .plt : { *(.plt) *(.iplt) } +.plt.got : { *(.plt.got) } +.plt.sec : { *(.plt.sec) } + .text : + { + *(.text.unlikely .text.*_unlikely .text.unlikely.*) + *(.text.exit .text.exit.*) + *(.text.startup .text.startup.*) + *(.text.hot .text.hot.*) + *(.text .stub .text.* .gnu.linkonce.t.*) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + } + .fini : + { + KEEP (*(SORT_NONE(.fini))) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } + .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) } + .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table + .gcc_except_table.*) } + .gnu_extab : ONLY_IF_RO { *(.gnu_extab*) } + /* These sections are generated by the Sun/Oracle C++ compiler. */ + .exception_ranges : ONLY_IF_RO { *(.exception_ranges + .exception_ranges*) } + /* Adjust the address for the data segment. We want to adjust up to + the same address within the page on the next page up. */ + . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); + /* Exception handling */ + .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) } + .gnu_extab : ONLY_IF_RW { *(.gnu_extab) } + .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } + .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } + /* Thread Local Storage sections */ + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + .ctors : + { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin.o(.ctors)) + KEEP (*crtbegin?.o(.ctors)) + /* We don't want to include the .ctor section from + the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : + { + KEEP (*crtbegin.o(.dtors)) + KEEP (*crtbegin?.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) } + .dynamic : { *(.dynamic) } + .got : { *(.got) *(.igot) } + . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .); + .got.plt : { *(.got.plt) *(.igot.plt) } + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + _edata = .; PROVIDE (edata = .); + . = .; + __bss_start = .; + .bss : + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. + FIXME: Why do we need it? When there is no .bss section, we don't + pad the .data section. */ + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + .lbss : + { + *(.dynlbss) + *(.lbss .lbss.* .gnu.linkonce.lb.*) + *(LARGE_COMMON) + } + . = ALIGN(64 / 8); + . = SEGMENT_START("ldata-segment", .); + .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.lrodata .lrodata.* .gnu.linkonce.lr.*) + } + .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.ldata .ldata.* .gnu.linkonce.l.*) + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + . = ALIGN(64 / 8); + _end = .; PROVIDE (end = .); + . = DATA_SEGMENT_END (.); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF Extension. */ + .debug_macro 0 : { *(.debug_macro) } + .debug_addr 0 : { *(.debug_addr) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } +} + + +==================================================