/** ** @file user.c ** ** @author CSCI-452 class of 20245 ** ** @brief User-level code manipulation routines */ #define KERNEL_SRC #include <common.h> #include <bootstrap.h> #include <elf.h> #include <user.h> #include <vm.h> /* ** PRIVATE DEFINITIONS */ /* ** PRIVATE DATA TYPES */ /* ** PRIVATE GLOBAL VARIABLES */ /* ** PUBLIC GLOBAL VARIABLES */ /* ** Location of the "user blob" in memory. ** ** These variables are filled in by the code in startup.S using values ** passed to it from the bootstrap. ** ** These are visible so that the startup code can find them. */ uint16_t user_offset; // byte offset from the segment base uint16_t user_segment; // segment base address uint16_t user_sectors; // number of 512-byte sectors it occupies header_t *user_header; // filled in by the user_init routine prog_t *prog_table; // filled in by the user_init routine /* ** PRIVATE FUNCTIONS */ #if TRACING_ELF /* ** This is debugging support code; if not debugging the ELF ** handling code, it won't be compiled into the kernel. */ // buffer used by some of these functions static char ebuf[16]; /* ** File header functions */ // interpret the file class static const char *fh_eclass(e32_si class) { switch (class) { case ELF_CLASS_NONE: return ("None"); break; case ELF_CLASS_32: return ("EC32"); break; case ELF_CLASS_64: return ("EC64"); break; } return ("????"); } // interpret the data encoding static const char *fh_edata(e32_si data) { switch (data) { case ELF_DATA_NONE: return ("Invd"); break; case ELF_DATA_2LSB: return ("2CLE"); break; case ELF_DATA_2MSB: return ("2CBE"); break; } return ("????"); } // interpret the file type static const char *fh_htype(e32_h type) { switch (type) { case ET_NONE: return ("none"); break; case ET_REL: return ("rel"); break; case ET_EXEC: return ("exec"); break; case ET_DYN: return ("dyn"); break; case ET_CORE: return ("core"); break; default: if (type >= ET_LO_OS && type <= ET_HI_OS) return ("OSsp"); else if (type >= ET_LO_CP && type <= ET_HI_CP) return ("CPsp"); } sprint(ebuf, "0x%04x", type); return ((const char *)ebuf); } // interpret the machine type static const char *fh_mtype(e32_h machine) { switch (machine) { case EM_NONE: return ("None"); break; case EM_386: return ("386"); break; case EM_ARM: return ("ARM"); break; case EM_X86_64: return ("AMD64"); break; case EM_AARCH64: return ("AARCH64"); break; case EM_RISCV: return ("RISC-V"); break; } return ("Other"); } // dump the program header static void dump_fhdr(elfhdr_t *hdr) { cio_puts("File header: magic "); for (int i = EI_MAG0; i <= EI_MAG3; ++i) put_char_or_code(hdr->e_ident.bytes[i]); cio_printf(" class %s", fh_eclass(hdr->e_ident.f.class)); cio_printf(" enc %s", fh_edata(hdr->e_ident.f.data)); cio_printf(" ver %u\n", hdr->e_ident.f.version); cio_printf(" type %s", fh_htype(hdr->e_type)); cio_printf(" mach %s", fh_mtype(hdr->e_machine)); cio_printf(" vers %d", hdr->e_version); cio_printf(" entr %08x\n", hdr->e_entry); cio_printf(" phoff %08x", hdr->e_phoff); cio_printf(" shoff %08x", hdr->e_shoff); cio_printf(" flags %08x", (uint32_t)hdr->e_flags); cio_printf(" ehsize %u\n", hdr->e_ehsize); cio_printf(" phentsize %u", hdr->e_phentsize); cio_printf(" phnum %u", hdr->e_phnum); cio_printf(" shentsize %u", hdr->e_shentsize); cio_printf(" shnum %u", hdr->e_shnum); cio_printf(" shstrndx %u\n", hdr->e_shstrndx); } /* ** Program header functions */ // categorize the header type static const char *ph_type(e32_w type) { switch (type) { case PT_NULL: return ("Unused"); break; case PT_LOAD: return ("Load"); break; case PT_DYNAMIC: return ("DLI"); break; case PT_INTERP: return ("Interp"); break; case PT_NOTE: return ("Aux"); break; case PT_SHLIB: return ("RSVD"); break; case PT_PHDR: return ("PTentry"); break; case PT_TLS: return ("TLS"); break; default: if (type >= PT_LO_OS && type <= PT_HI_OS) return ("OSsp"); else if (type >= PT_LO_CP && type <= PT_HI_CP) return ("CPsp"); } sprint(ebuf, "0x%08x", type); return ((const char *)ebuf); } // report the individual flags static void ph_flags(e32_w flags) { if ((flags & PF_R) != 0) cio_putchar('R'); if ((flags & PF_W) != 0) cio_putchar('W'); if ((flags & PF_E) != 0) cio_putchar('X'); } // dump a program header static void dump_phdr(elfproghdr_t *hdr, int n) { cio_printf("Prog header %d, type %s\n", n, ph_type(hdr->p_type)); cio_printf(" offset %08x", hdr->p_offset); cio_printf(" va %08x", hdr->p_va); cio_printf(" pa %08x\n", hdr->p_pa); cio_printf(" filesz %08x", hdr->p_filesz); cio_printf(" memsz %08x", hdr->p_memsz); cio_puts(" flags "); ph_flags(hdr->p_flags); cio_printf(" align %08x", hdr->p_align); cio_putchar('\n'); } /* ** Section header functions */ // interpret the header type static const char *sh_type(e32_w type) { switch (type) { case SHT_NULL: return ("Unused"); break; case SHT_PROGBITS: return ("Progbits"); break; case SHT_SYMTAB: return ("Symtab"); break; case SHT_STRTAB: return ("Strtab"); break; case SHT_RELA: return ("Rela"); break; case SHT_HASH: return ("Hash"); break; case SHT_DYNAMIC: return ("Dynamic"); break; case SHT_NOTE: return ("Note"); break; case SHT_NOBITS: return ("Nobits"); break; case SHT_REL: return ("Rel"); break; case SHT_SHLIB: return ("Shlib"); break; case SHT_DYNSYM: return ("Dynsym"); break; default: if (type >= SHT_LO_CP && type <= SHT_HI_CP) return ("CCsp"); else if (type >= SHT_LO_US && type <= SHT_HI_US) return ("User"); } sprint(ebuf, "0x%08x", type); return ((const char *)ebuf); } // report the various flags static void sh_flags(unsigned int flags) { if ((flags & SHF_WRITE) != 0) cio_putchar('W'); if ((flags & SHF_ALLOC) != 0) cio_putchar('A'); if ((flags & SHF_EXECINSTR) != 0) cio_putchar('X'); if ((flags & SHF_MERGE) != 0) cio_putchar('M'); if ((flags & SHF_STRINGS) != 0) cio_putchar('S'); if ((flags & SHF_INFO_LINK) != 0) cio_putchar('L'); if ((flags & SHF_LINK_ORDER) != 0) cio_putchar('o'); if ((flags & SHF_OS_NONCON) != 0) cio_putchar('n'); if ((flags & SHF_GROUP) != 0) cio_putchar('g'); if ((flags & SHF_TLS) != 0) cio_putchar('t'); } // dump a section header ATTR_UNUSED static void dump_shdr(elfsecthdr_t *hdr, int n) { cio_printf("Sect header %d, type %d (%s), name %s\n", n, hdr->sh_type, sh_type(hdr->sh_type)); cio_printf(" flags %08x ", (uint32_t)hdr->sh_flags); sh_flags(hdr->sh_flags); cio_printf(" addr %08x", hdr->sh_addr); cio_printf(" offset %08x", hdr->sh_offset); cio_printf(" size %08x\n", hdr->sh_size); cio_printf(" link %08x", hdr->sh_link); cio_printf(" info %08x", hdr->sh_info); cio_printf(" align %08x", hdr->sh_addralign); cio_printf(" entsz %08x\n", hdr->sh_entsize); } #endif /** ** read_phdrs(addr,phoff,phentsize,phnum) ** ** Parses the ELF program headers and each segment described into memory. ** ** @param hdr Pointer to the program header ** @param pcb Pointer to the PCB (and its PDE) ** ** @return status of the attempt: ** SUCCESS everything loaded correctly ** E_LOAD_LIMIT more than N_LOADABLE PT_LOAD sections ** other status returned from vm_add() */ static int read_phdrs(elfhdr_t *hdr, pcb_t *pcb) { // sanity check assert1(hdr != NULL); assert2(pcb != NULL); #if TRACING_USER cio_printf("read_phdrs(%08x,%08x)\n", (uint32_t)hdr, (uint32_t)pcb); #endif // iterate through the program headers uint_t nhdrs = hdr->e_phnum; // pointer to the first header table entry elfproghdr_t *curr = (elfproghdr_t *)((uint32_t)hdr + hdr->e_phoff); // process them all int loaded = 0; for (uint_t i = 0; i < nhdrs; ++i, ++curr) { #if TRACING_ELF dump_phdr(curr, i); #endif if (curr->p_type != PT_LOAD) { // not loadable --> we'll skip it continue; } if (loaded >= N_LOADABLE) { #if TRACING_USER cio_puts(" LIMIT\n"); #endif return E_LOAD_LIMIT; } // set a pointer to the bytes within the object file char *data = (char *)(((uint32_t)hdr) + curr->p_offset); #if TRACING_USER cio_printf(" data @ %08x", (uint32_t)data); #endif // copy the pages into memory int stat = vm_add(pcb->pdir, curr->p_flags & PF_W, false, (char *)curr->p_va, curr->p_memsz, data, curr->p_filesz); if (stat != SUCCESS) { // TODO what else should we do here? check for memory leak? return stat; } // set the section table entry in the PCB pcb->sects[loaded].length = curr->p_memsz; pcb->sects[loaded].addr = curr->p_va; #if TRACING_USER cio_printf(" loaded %u @ %08x\n", pcb->sects[loaded].length, pcb->sects[loaded].addr); #endif ++loaded; } return SUCCESS; } /** ** Name: stack_setup ** ** Set up the stack for a new process ** ** @param pcb Pointer to the PCB for the process ** @param entry Entry point for the new process ** @param args Argument vector to be put in place ** ** @return A pointer to the context_t on the stack, or NULL */ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) { /* ** First, we need to count the space we'll need for the argument ** vector and strings. */ int argbytes = 0; int argc = 0; while (args[argc] != NULL) { int n = strlen(args[argc]) + 1; // can't go over one page in size if ((argbytes + n) > SZ_PAGE) { // oops - ignore this and any others break; } argbytes += n; ++argc; } // Round up the byte count to the next multiple of four. argbytes = (argbytes + 3) & MOD4_MASK; /* ** Allocate the arrays. We are safe using dynamic arrays here ** because we're using the OS stack, not the user stack. ** ** We want the argstrings and argv arrays to contain all zeroes. ** The C standard states, in section 6.7.8, that ** ** "21 If there are fewer initializers in a brace-enclosed list ** than there are elements or members of an aggregate, or ** fewer characters in a string literal used to initialize an ** array of known size than there are elements in the array, ** the remainder of the aggregate shall be initialized ** implicitly the same as objects that have static storage ** duration." ** ** Sadly, because we're using variable-sized arrays, we can't ** rely on this, so we have to call memclr() instead. :-( In ** truth, it doesn't really cost us much more time, but it's an ** annoyance. */ char argstrings[argbytes]; char *argv[argc + 1]; CLEAR(argstrings); CLEAR(argv); // Next, duplicate the argument strings, and create pointers to // each one in our argv. char *tmp = argstrings; for (int i = 0; i < argc; ++i) { int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string strcpy(tmp, args[i]); // add to our buffer argv[i] = tmp; // remember where it was tmp += nb; // move on } // trailing NULL pointer argv[argc] = NULL; /* ** The pages for the stack were cleared when they were allocated, ** so we don't need to remember to do that. ** ** We reserve one longword at the bottom of the stack to hold a ** pointer to where argv is on the stack. ** ** The user code was linked with a startup function that defines ** the entry point (_start), calls main(), and then calls exit() ** if main() returns. We need to set up the stack this way: ** ** esp -> context <- context save area ** ... <- context save area ** context <- context save area ** entry_pt <- return address for the ISR ** argc <- argument count for main() ** /-> argv <- argv pointer for main() ** | ... <- argv array w/trailing NULL ** | ... <- argv character strings ** \--- ptr <- last word in stack ** ** Stack alignment rules for the SysV ABI i386 supplement dictate that ** the 'argc' parameter must be at an address that is a multiple of 16; ** see below for more information. */ // Pointer to the last word in stack. We get this from the // VM hierarchy. Get the PDE entry for the user address space. pde_t stack_pde = pcb->pdir[USER_PDE]; // The PDE entry points to the PT, which is an array of PTE. The last // two entries are for the stack; pull out the last one. pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2]; // OK, now we have the PTE. The frame address of the last page is // in this PTE. Find the address immediately after that. uint32_t *ptr = (uint32_t *)((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE); // Pointer to where the arg strings should be filled in. char *strings = (char *)((uint32_t)ptr - argbytes); // back the pointer up to the nearest word boundary; because we're // moving toward location 0, the nearest word boundary is just the // next smaller address whose low-order two bits are zeroes strings = (char *)((uint32_t)strings & MOD4_MASK); // Copy over the argv strings. memcpy((void *)strings, argstrings, argbytes); /* ** Next, we need to copy over the argv pointers. Start by ** determining where 'argc' should go. ** ** Stack alignment is controlled by the SysV ABI i386 supplement, ** version 1.2 (June 23, 2016), which states in section 2.2.2: ** ** "The end of the input argument area shall be aligned on a 16 ** (32 or 64, if __m256 or __m512 is passed on stack) byte boundary. ** In other words, the value (%esp + 4) is always a multiple of 16 ** (32 or 64) when control is transferred to the function entry ** point. The stack pointer, %esp, always points to the end of the ** latest allocated stack frame." ** ** Isn't technical documentation fun? Ultimately, this means that ** the first parameter to main() should be on the stack at an address ** that is a multiple of 16. ** ** The space needed for argc, argv, and the argv array itself is ** argc + 3 words (argc+1 for the argv entries, plus one word each ** for argc and argv). We back up that much from 'strings'. */ int nwords = argc + 3; uint32_t *acptr = ((uint32_t *)strings) - nwords; /* ** Next, back up until we're at a multiple-of-16 address. Because we ** are moving to a lower address, its upper 28 bits are identical to ** the address we currently have, so we can do this with a bitwise ** AND to just turn off the lower four bits. */ acptr = (uint32_t *)(((uint32_t)acptr) & MOD16_MASK); // copy in 'argc' *acptr = argc; // next, 'argv', which follows 'argc'; 'argv' points to the // word that follows it in the stack uint32_t *avptr = acptr + 2; *(acptr + 1) = (uint32_t)avptr; /* ** Next, we copy in all argc+1 pointers. */ // Adjust and copy the string pointers. for (int i = 0; i <= argc; ++i) { if (argv[i] != NULL) { // an actual pointer - adjust it and copy it in *avptr = (uint32_t)strings; // skip to the next entry in the array strings += strlen(argv[i]) + 1; } else { // end of the line! *avptr = NULL; } ++avptr; } /* ** Now, we need to set up the initial context for the executing ** process. ** ** When this process is dispatched, the context restore code will ** pop all the saved context information off the stack, including ** the saved EIP, CS, and EFLAGS. We set those fields up so that ** the interrupt "returns" to the entry point of the process. */ // Locate the context save area on the stack. context_t *ctx = ((context_t *)avptr) - 1; /* ** We cleared the entire stack earlier, so all the context ** fields currently contain zeroes. We now need to fill in ** all the important fields. */ ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0 ctx->eip = entry; // initial EIP ctx->cs = GDT_CODE; // segment registers ctx->ss = GDT_STACK; ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA; /* ** Return the new context pointer to the caller. It will be our ** caller's responsibility to schedule this process. */ return (ctx); } /* ** PUBLIC FUNCTIONS */ /** ** Name: user_init ** ** Initializes the user support module. */ void user_init(void) { #if TRACING_INIT cio_puts(" User"); #endif // This is gross, but we need to get this information somehow. // Access the "user blob" data in the second bootstrap sector uint16_t *blobdata = (uint16_t *)USER_BLOB_DATA; user_offset = *blobdata++; user_segment = *blobdata++; user_sectors = *blobdata++; #if TRACING_USER cio_printf("\nUser blob: %u sectors @ %04x:%04x", user_sectors, user_segment, user_offset); #endif // calculate the location of the user blob if (user_sectors > 0) { // calculate the address of the header user_header = (header_t *)(KERN_BASE + ((((uint_t)user_segment) << 4) + ((uint_t)user_offset))); // the program table immediate follows the blob header prog_table = (prog_t *)(user_header + 1); #if TRACING_USER cio_printf(", hdr %08x, %u progs, tbl %08x\n", (uint32_t)user_header, user_header->num, (uint32_t)prog_table); #endif } else { // too bad, so sad! user_header = NULL; prog_table = NULL; #if TRACING_USER cio_putchar('\n'); #endif } } /** ** Name: user_locate ** ** Locates a user program in the user code archive. ** ** @param what The ID of the user program to find ** ** @return pointer to the program table entry in the code archive, or NULL */ prog_t *user_locate(uint_t what) { // no programs if there is no blob! if (user_header == NULL) { return NULL; } // make sure this is a reasonable program to request if (what >= user_header->num) { // no such program! return NULL; } // find the entry in the program table prog_t *prog = &prog_table[what]; // if there are no bytes, it's useless if (prog->size < 1) { return NULL; } // return the program table pointer return prog; } /** ** Name: user_duplicate ** ** Duplicates the memory setup for an existing process. ** ** @param new The PCB for the new copy of the program ** @param old The PCB for the existing the program ** ** @return the status of the duplicate attempt */ int user_duplicate(pcb_t *new, pcb_t *old) { // We need to do a recursive duplication of the process address // space of the current process. First, we create a new user // page directory. Next, we'll duplicate the USER_PDE page // table. Finally, we'll go through that table and duplicate // all the frames. // create the initial VM hierarchy pde_t *pdir = vm_mkuvm(); if (pdir == NULL) { return E_NO_MEMORY; } new->pdir = pdir; // Next, add a USER_PDE page table that's a duplicate of the // current process' page table if (!vm_uvmdup(new->pdir, old->pdir)) { // check for memory leak? return E_NO_MEMORY; } // We don't do copy-on-write, so we must duplicate all the // individual page frames. Iterate through all the user-level // PDE entries, and replace the existing frames with duplicates. // // NOTE: we only deal with pdir[0] here, as we are limiting // the user address space to the first 4MB. If the size of // the address space goes up, this code will need to be // modified to loop over the larger space. // pointer to the PMT for the user pte_t *pt = (pte_t *)(pdir[USER_PDE]); assert(pt != NULL); for (int i = 0; i < N_PTE; ++i) { // get the current entry from the PMT pte_t entry = *pt; // if this entry is present, if (IS_PRESENT(entry)) { // duplicate the frame pointed to by this PTE void *tmp = vm_pagedup((void *)PTE_ADDR(entry)); // replace the old frame number with the new one *pt = (pte_t)(((uint32_t)tmp) | PERMS(entry)); } else { *pt = 0; } ++pt; } return SUCCESS; } /** ** Name: user_load ** ** Loads a user program from the user code archive into memory. ** Allocates all needed frames and sets up the VM tables. ** ** @param ptab A pointer to the program table entry to be loaded ** @param pcb The PCB for the program being loaded ** @param args The argument vector for the program ** ** @return the status of the load attempt */ int user_load(prog_t *ptab, pcb_t *pcb, const char **args) { // NULL pointers are bad! assert1(ptab != NULL); assert1(pcb != NULL); assert1(args != NULL); // locate the ELF binary elfhdr_t *hdr = (elfhdr_t *)((uint32_t)user_header + ptab->offset); #if TRACING_ELF cio_printf("Load: ptab %08x: '%s', off %08x, size %08x, flags %08x\n", (uint32_t)ptab, ptab->name, ptab->offset, ptab->size, ptab->flags); cio_printf(" args %08x:", (uint32_t)args); for (int i = 0; args[i] != NULL; ++i) { cio_printf(" [%d] %s", i, args[i]); } cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid); dump_fhdr(hdr); #endif // verify the ELF header if (hdr->e_ident.f.magic != ELF_MAGIC) { return E_BAD_PARAM; } // allocate a page directory pcb->pdir = vm_mkuvm(); if (pcb->pdir == NULL) { return E_NO_MEMORY; } // read all the program headers int stat = read_phdrs(hdr, pcb); if (stat != SUCCESS) { // TODO figure out a better way to deal with this PANIC(0, "user_load: phdr read failed"); } // next, set up the runtime stack - just like setting up loadable // sections, except nothing to copy stat = vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0); if (stat != SUCCESS) { // TODO yadda yadda... PANIC(0, "user_load: vm_add failed"); } // set up the command-line arguments pcb->context = stack_setup(pcb, hdr->e_entry, args); return SUCCESS; } /** ** Name: user_cleanup ** ** "Unloads" a user program. Deallocates all memory frames and ** cleans up the VM structures. ** ** @param pcb The PCB of the program to be unloaded */ void user_cleanup(pcb_t *pcb) { if (pcb == NULL) { // should this be an error? return; } vm_free(pcb->pdir); pcb->pdir = NULL; }