track upstream

This commit is contained in:
Murphy 2025-03-31 12:41:04 -04:00
parent 4bf4659392
commit 4dc44e8fce
Signed by: freya
GPG key ID: 9FBC6FFD6D2DBF17
18 changed files with 952 additions and 280 deletions

View file

@ -36,9 +36,6 @@
#define CHAN_CIO 0 #define CHAN_CIO 0
#define CHAN_SIO 1 #define CHAN_SIO 1
// maximum allowable number of command-line arguments
#define MAX_ARGS 10
// sizes of various things // sizes of various things
#define NUM_1KB 0x00000400 // 2^10 #define NUM_1KB 0x00000400 // 2^10
#define NUM_4KB 0x00001000 // 2^12 #define NUM_4KB 0x00001000 // 2^12
@ -46,7 +43,7 @@
#define NUM_4MB 0x00400000 // 2^22 #define NUM_4MB 0x00400000 // 2^22
#define NUM_1GB 0x40000000 // 2^30 #define NUM_1GB 0x40000000 // 2^30
#define NUM_2GB 0x80000000 // 2^31 #define NUM_2GB 0x80000000 // 2^31
#define NUM_3GB 0xc0000000 #define NUM_3GB 0xc0000000 // 1GB + 2GB
#ifndef ASM_SRC #ifndef ASM_SRC

View file

@ -30,6 +30,11 @@
// declarations for modulus checking of (e.g.) sizes and addresses // declarations for modulus checking of (e.g.) sizes and addresses
#define LOW_9_BITS 0x00000fff
#define LOW_22_BITS 0x003fffff
#define HIGH_20_BITS 0xfffff000
#define HIGH_10_BITS 0xffc00000
#define MOD4_BITS 0x00000003 #define MOD4_BITS 0x00000003
#define MOD4_MASK 0xfffffffc #define MOD4_MASK 0xfffffffc
#define MOD4_INC 0x00000004 #define MOD4_INC 0x00000004

85
include/offsets.h Normal file
View file

@ -0,0 +1,85 @@
/**
** @file offsets.h
**
** GENERATED AUTOMATICALLY - DO NOT EDIT
**
** Creation date: Mon Mar 31 11:38:04 2025
**
** This header file contains C Preprocessor macros which expand
** into the byte offsets needed to reach fields within structs
** used in the baseline system. Should those struct declarations
** change, the Offsets program should be modified (if needed),
** recompiled, and re-run to recreate this file.
*/
#ifndef OFFSETS_H_
#define OFFSETS_H_
// Sizes of basic types
#define SZ_char 1
#define SZ_short 2
#define SZ_int 4
#define SZ_long 4
#define SZ_long_long 8
#define SZ_pointer 4
// Sizes of our types
#define SZ_int8_t 1
#define SZ_uint8_t 1
#define SZ_int16_t 2
#define SZ_uint16_t 2
#define SZ_int32_t 4
#define SZ_uint32_t 4
#define SZ_int64_t 8
#define SZ_uint64_t 8
#define SZ_bool_t 1
// context_t structure
#define SZ_CTX 72
#define CTX_ss 0
#define CTX_gs 4
#define CTX_fs 8
#define CTX_es 12
#define CTX_ds 16
#define CTX_edi 20
#define CTX_esi 24
#define CTX_ebp 28
#define CTX_esp 32
#define CTX_ebx 36
#define CTX_edx 40
#define CTX_ecx 44
#define CTX_eax 48
#define CTX_vector 52
#define CTX_code 56
#define CTX_eip 60
#define CTX_cs 64
#define CTX_eflags 68
// section_t structure
#define SZ_SCT 8
#define SCT_length 0
#define SCT_addr 4
// pcb_t structure
#define SZ_PCB 72
#define PCB_context 0
#define PCB_pdir 4
#define PCB_sects 8
#define PCB_next 40
#define PCB_parent 44
#define PCB_wakeup 48
#define PCB_exit_status 52
#define PCB_pid 56
#define PCB_state 60
#define PCB_priority 64
#define PCB_ticks 68
#endif

View file

@ -20,11 +20,13 @@
// Upper bound on the number of simultaneous user-level // Upper bound on the number of simultaneous user-level
// processes in the system (completely arbitrary) // processes in the system (completely arbitrary)
#define N_PROCS 25 #define N_PROCS 25
// Clock frequency (Hz) // Limit on the number of entries in argv[], INCLUDING
// the trailing NULL pointer (also completely arbitrary)
#define N_ARGS 10
// Clock frequency (Hz)
#define CLOCK_FREQ 1000 #define CLOCK_FREQ 1000
#define TICKS_PER_MS 1 #define TICKS_PER_MS 1

View file

@ -205,13 +205,13 @@ extern uint_t next_pid;
extern pcb_t *init_pcb; extern pcb_t *init_pcb;
// table of state name strings // table of state name strings
extern const char *state_str[N_STATES]; extern const char state_str[N_STATES][4];
// table of priority name strings // table of priority name strings
extern const char *prio_str[N_PRIOS]; extern const char prio_str[N_PRIOS][5];
// table of queue ordering name strings // table of queue ordering name strings
extern const char *ord_str[N_ORDERINGS]; extern const char ord_str[N_ORDERINGS][5];
/* /*
** Prototypes ** Prototypes

View file

@ -118,10 +118,11 @@ int user_duplicate(pcb_t *new, pcb_t *old);
** @param prog A pointer to the program table entry to be loaded ** @param prog A pointer to the program table entry to be loaded
** @param pcb The PCB for the program being loaded ** @param pcb The PCB for the program being loaded
** @param args The argument vector for the program ** @param args The argument vector for the program
** @param sys Is the argument vector from kernel code?
** **
** @return the status of the load attempt ** @return the status of the load attempt
*/ */
int user_load(prog_t *prog, pcb_t *pcb, const char **args); int user_load(prog_t *prog, pcb_t *pcb, const char **args, bool_t sys);
/** /**
** Name: user_cleanup ** Name: user_cleanup

View file

@ -64,16 +64,20 @@
*/ */
// user virtual addresses // user virtual addresses
#define USER_BASE 0x00000000
#define USER_MAX 0x003fffff
#define USER_TEXT 0x00001000 #define USER_TEXT 0x00001000
#define USER_STACK 0x003fe000 #define USER_STACK 0x003fe000
#define USER_STACK_P1 USER_STACK
#define USER_STACK_P2 0x003ff000
#define USER_STK_END 0x00400000 #define USER_STK_END 0x00400000
// how to find the addresses of the stack pages in the VM hierarchy // how to find the addresses of the stack pages in the VM hierarchy
// user address space is the first 4MB of virtual memory // user address space is the first 4MB of virtual memory
#define USER_PDE 0 #define USER_PDE 0
// the stack occupies the last two pages of the address space // the stack occupies this range of pages in the user address space
#define USER_STK_PTE1 1022 #define USER_STK_FIRST_PTE 1022
#define USER_STK_PTE2 1023 #define USER_STK_LAST_PTE 1023
// some important memory addresses // some important memory addresses
#define KERN_BASE 0x80000000 // start of "kernel" memory #define KERN_BASE 0x80000000 // start of "kernel" memory
@ -173,7 +177,7 @@
#define IS_USER(entry) (((entry) & PDE_US) != 0) #define IS_USER(entry) (((entry) & PDE_US) != 0)
// low-order nine bits of PDEs and PTEs hold "permission" flag bits // low-order nine bits of PDEs and PTEs hold "permission" flag bits
#define PERMS_MASK MOD4K_MASK #define PERMS_MASK MOD4K_BITS
// 4KB frame numbers are 20 bits wide // 4KB frame numbers are 20 bits wide
#define FRAME_4K_SHIFT 12 #define FRAME_4K_SHIFT 12
@ -200,10 +204,14 @@
// everything has nine bits of permission flags // everything has nine bits of permission flags
#define PERMS(p) (((uint32_t)(p)) & PERMS_MASK) #define PERMS(p) (((uint32_t)(p)) & PERMS_MASK)
// extract the table indices from a 32-bit address // extract the table indices from a 32-bit VA
#define PDIX(v) ((((uint32_t)(v)) >> PDIX_SHIFT) & PIX2I_MASK) #define PDIX(v) ((((uint32_t)(v)) >> PDIX_SHIFT) & PIX2I_MASK)
#define PTIX(v) ((((uint32_t)(v)) >> PTIX_SHIFT) & PIX2I_MASK) #define PTIX(v) ((((uint32_t)(v)) >> PTIX_SHIFT) & PIX2I_MASK)
// extract the byte offset from a 32-bit VA
#define OFFSET_4K(v) (((uint32_t)(v)) & MOD4K_BITS)
#define OFFSET_4M(v) (((uint32_t)(v)) & MOD4M_BITS)
/* /*
** Types ** Types
*/ */
@ -215,31 +223,34 @@
// PDE for 4KB pages // PDE for 4KB pages
typedef struct pdek_s { typedef struct pdek_s {
uint_t p : 1; // present uint_t p : 1; // 0: present
uint_t rw : 1; // writable uint_t rw : 1; // 1: writable
uint_t us : 1; // user/supervisor uint_t us : 1; // 2: user/supervisor
uint_t pwt : 1; // cache write-through uint_t pwt : 1; // 3: cache write-through
uint_t pcd : 1; // cache disable uint_t pcd : 1; // 4: cache disable
uint_t a : 1; // accessed uint_t a : 1; // 5: accessed
uint_t avl1 : 1; // ignored (available) uint_t avl1 : 1; // 6: ignored (available)
uint_t ps : 1; // page size (must be 0) uint_t ps : 1; // 7: page size (must be 0)
uint_t avl2 : 4; // ignored (available) uint_t avl2 : 4; // 11-8: ignored (available)
uint_t fa : 20; // frame address uint_t fa : 20; // 31-12: frame address
} pdek_f_t; } pdek_f_t;
// PDE for 4MB pages // PDE for 4MB pages
typedef struct pdem_s { typedef struct pdem_s {
uint_t p : 1; // present uint_t p : 1; // 0: present
uint_t rw : 1; // writable uint_t rw : 1; // 1: writable
uint_t us : 1; // user/supervisor uint_t us : 1; // 2: user/supervisor
uint_t pwt : 1; // cache write-through uint_t pwt : 1; // 3: cache write-through
uint_t pcd : 1; // cache disable uint_t pcd : 1; // 4: cache disable
uint_t a : 1; // accessed uint_t a : 1; // 5: accessed
uint_t d : 1; // dirty uint_t d : 1; // 6: dirty
uint_t ps : 1; // page size (must be 1) uint_t ps : 1; // 7: page size (must be 1)
uint_t g : 1; // global uint_t g : 1; // 8: global
uint_t avl : 3; // ignored (available) uint_t avl : 3; // 11-9: ignored (available)
uint_t fa : 20; // frame address uint_t pat : 1; // 12: page attribute table in use
uint_t fa2 : 4; // 16-13: bits 35-32 of frame address (36-bit addrs)
uint_t rsv : 5; // 21-17: reserved - must be zero
uint_t fa : 10; // 31-22: bits 31-22 of frame address
} pdem_f_t; } pdem_f_t;
// page table entries // page table entries
@ -249,17 +260,17 @@ typedef struct pdem_s {
// broken out into fields // broken out into fields
typedef struct pte_s { typedef struct pte_s {
uint_t p : 1; // present uint_t p : 1; // 0: present
uint_t rw : 1; // writable uint_t rw : 1; // 1: writable
uint_t us : 1; // user/supervisor uint_t us : 1; // 2: user/supervisor
uint_t pwt : 1; // cache write-through uint_t pwt : 1; // 3: cache write-through
uint_t pcd : 1; // cache disable uint_t pcd : 1; // 4: cache disable
uint_t a : 1; // accessed uint_t a : 1; // 5: accessed
uint_t d : 1; // dirty uint_t d : 1; // 6: dirty
uint_t pat : 1; // page attribute table in use uint_t pat : 1; // 7: page attribute table in use
uint_t g : 1; // global uint_t g : 1; // 8: global
uint_t avl : 3; // ignored (available) uint_t avl : 3; // 11-9: ignored (available)
uint_t fa : 20; // frame address uint_t fa : 20; // 31-12: frame address
} ptef_t; } ptef_t;
// page fault error code bits // page fault error code bits
@ -291,6 +302,17 @@ typedef struct mapping_t {
uint32_t perm; // access control uint32_t perm; // access control
} mapping_t; } mapping_t;
// Modes for dumping out page hierarchies
enum vmmode_e {
Simple = 0, // just count 'present' entries at each level
OneLevel, // top-level only: count entries, decode 'present'
TwoLevel, // count entries & decode at each level
Full // ??? in case we need more?
// sentinel
,
N_VMMODES
};
/* /*
** Globals ** Globals
*/ */
@ -312,6 +334,19 @@ extern pde_t *kpdir;
*/ */
void vm_init(void); void vm_init(void);
/**
** Name: vm_uva2kva
**
** Convert a user VA into a kernel address. Works for all addresses -
** if the address is a page address, the low-order nine bits will be
** zeroes; otherwise, they is the offset into the page, which is
** unchanged within the address spaces.
**
** @param pdir Pointer to the page directory to examine
** @param va Virtual address to check
*/
void *vm_uva2kva(pde_t *pdir, void *va);
/** /**
** Name: vm_pagedup ** Name: vm_pagedup
** **
@ -323,6 +358,17 @@ void vm_init(void);
*/ */
void *vm_pagedup(void *old); void *vm_pagedup(void *old);
/**
** Name: vm_pdedup
**
** Duplicate a page directory entry
**
** @param entry The entry to be duplicated
**
** @return the new entry, or -1 on error.
*/
pde_t vm_pdedup(pde_t entry);
/** /**
** Name: vm_ptdup ** Name: vm_ptdup
** **
@ -439,6 +485,17 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm);
*/ */
int vm_uvmdup(pde_t *new, pde_t *old); int vm_uvmdup(pde_t *new, pde_t *old);
/**
** Name: vm_print
**
** Print out a paging hierarchy.
**
** @param pt Page table to display
** @param dir Is it a page directory (vs. a page table)?
** @param mode How to display the entries
*/
void vm_print(void *pt, bool_t dir, enum vmmode_e mode);
#endif /* !ASM_SRC */ #endif /* !ASM_SRC */
#endif #endif

View file

@ -19,6 +19,8 @@
# .arch i386 # .arch i386
#include <bootstrap.h> #include <bootstrap.h>
#include <offsets.h>
#include <vm.h>
/* /*
** Configuration options - define in Makefile ** Configuration options - define in Makefile
@ -100,8 +102,8 @@ isr_save:
** **
** Set up parameters for the ISR call. ** Set up parameters for the ISR call.
*/ */
movl 52(%esp),%eax // get vector number and error code movl CTX_vector(%esp),%eax // get vector number and error code
movl 56(%esp),%ebx movl CTX_code(%esp),%ebx
/* /*
*********************** ***********************
@ -120,11 +122,12 @@ isr_save:
// save the context pointer // save the context pointer
movl current, %edx movl current, %edx
movl %esp, (%edx) movl %esp, PCB_context(%edx)
// also save the page directory pointer // also save the page directory pointer
movl %cr3, %ecx movl %cr3, %ecx
movl %ecx, 4(%edx) addl $KERN_BASE, %ecx // convert to a virtual address
movl %ecx, PCB_pdir(%edx)
// switch to the system stack // switch to the system stack
// //
@ -166,8 +169,9 @@ isr_restore:
*********************** ***********************
*/ */
movl current, %ebx // return to the user stack movl current, %ebx // return to the user stack
movl (%ebx), %esp // ESP --> context save area movl PCB_context(%ebx), %esp // ESP --> context save area
movl 4(%ebx), %ecx // page directory pointer movl PCB_pdir(%ebx), %ecx // page directory pointer
subl $KERN_BASE, %ecx // convert to a physical address
movl %ecx, %cr3 movl %ecx, %cr3
// now we're operating with the user process' // now we're operating with the user process'

View file

@ -194,11 +194,11 @@ static void stats(int code)
case 'q': // dump the queues case 'q': // dump the queues
// code to dump out any/all queues // code to dump out any/all queues
pcb_queue_dump("R", ready); pcb_queue_dump("R", ready, true);
pcb_queue_dump("W", waiting); pcb_queue_dump("W", waiting, true);
pcb_queue_dump("S", sleeping); pcb_queue_dump("S", sleeping, true);
pcb_queue_dump("Z", zombie); pcb_queue_dump("Z", zombie, true);
pcb_queue_dump("I", sioread); pcb_queue_dump("I", sioread, true);
break; break;
case 'r': // print system configuration information case 'r': // print system configuration information
@ -309,7 +309,7 @@ int main(void)
// report our configuration options // report our configuration options
kreport(true); kreport(true);
delay(DELAY_3_SEC); delay(DELAY_2_SEC);
/* /*
** Other tasks typically performed here: ** Other tasks typically performed here:
@ -341,10 +341,11 @@ int main(void)
const char *args[2] = { "init", NULL }; const char *args[2] = { "init", NULL };
// load it // load it
assert(user_load(prog, init_pcb, args) == SUCCESS); assert(user_load(prog, init_pcb, args, true) == SUCCESS);
// send it on its merry way // send it on its merry way
schedule(init_pcb); schedule(init_pcb);
dispatch();
#ifdef TRACE_CX #ifdef TRACE_CX
// if we're using a scrolling region, wait a bit more and then set it up // if we're using a scrolling region, wait a bit more and then set it up
@ -366,9 +367,6 @@ int main(void)
"================================================================================"); "================================================================================");
#endif #endif
// switch to the "real" kernel page directory
vm_set_kvm();
/* /*
** END OF TERM-SPECIFIC CODE ** END OF TERM-SPECIFIC CODE
** **
@ -380,5 +378,21 @@ int main(void)
sio_enable(SIO_RX); sio_enable(SIO_RX);
// produce a "system state" report
cio_puts("System status: Queues ");
pcb_queue_dump("R", ready, true);
pcb_queue_dump("W", waiting, true);
pcb_queue_dump("S", sleeping, true);
pcb_queue_dump("Z", zombie, true);
pcb_queue_dump("I", sioread, true);
ptable_dump_counts();
pcb_dump("Current: ", current, true);
delay(DELAY_3_SEC);
vm_print(current->pdir, true, TwoLevel);
delay(DELAY_3_SEC);
return 0; return 0;
} }

View file

@ -26,6 +26,7 @@ SECTIONS
} }
/* Could put STABs here */ /* Could put STABs here */
/*
.stab : { .stab : {
PROVIDE(__STAB_BEGIN__ = .); PROVIDE(__STAB_BEGIN__ = .);
*(.stab); *(.stab);
@ -36,6 +37,7 @@ SECTIONS
*(.stabstr); *(.stabstr);
PROVIDE(__STABSTR_END__ = .); PROVIDE(__STABSTR_END__ = .);
} }
*/
/* Align the data segment at the next page boundary */ /* Align the data segment at the next page boundary */
. = ALIGN(0x1000); . = ALIGN(0x1000);
@ -65,7 +67,8 @@ SECTIONS
PROVIDE(_end = .); PROVIDE(_end = .);
/DISCARD/ : { /DISCARD/ : {
/* *(.stab .stab_info .stabstr) */ *(.stab .stab_info .stabstr)
*(.eh_frame .note.GNU-stack .note.gnu.property .comment) *(.eh_frame .eh_frame_hdr)
*(.note.GNU-stack .note.gnu.property .comment)
} }
} }

View file

@ -74,7 +74,7 @@ uint_t next_pid;
pcb_t *init_pcb; pcb_t *init_pcb;
// table of state name strings // table of state name strings
const char *state_str[N_STATES] = { const char state_str[N_STATES][4] = {
[STATE_UNUSED] = "Unu", // "Unused" [STATE_UNUSED] = "Unu", // "Unused"
[STATE_NEW] = "New", [STATE_NEW] = "New",
[STATE_READY] = "Rdy", // "Ready" [STATE_READY] = "Rdy", // "Ready"
@ -87,16 +87,16 @@ const char *state_str[N_STATES] = {
}; };
// table of priority name strings // table of priority name strings
const char *prio_str[N_PRIOS] = { [PRIO_HIGH] = "High", const char prio_str[N_PRIOS][5] = { [PRIO_HIGH] = "High",
[PRIO_STD] = "User", [PRIO_STD] = "User",
[PRIO_LOW] = "Low ", [PRIO_LOW] = "Low ",
[PRIO_DEFERRED] = "Def " }; [PRIO_DEFERRED] = "Def " };
// table of queue ordering name strings // table of queue ordering name strings
const char *ord_str[N_PRIOS] = { [O_FIFO] = "FIFO", const char ord_str[N_PRIOS][5] = { [O_FIFO] = "FIFO",
[O_PRIO] = "PRIO", [O_PRIO] = "PRIO",
[O_PID] = "PID ", [O_PID] = "PID ",
[O_WAKEUP] = "WAKE" }; [O_WAKEUP] = "WAKE" };
/* /*
** PRIVATE FUNCTIONS ** PRIVATE FUNCTIONS
@ -939,7 +939,7 @@ void ctx_dump_all(const char *msg)
} }
/** /**
** _pcb_dump(msg,pcb) ** pcb_dump(msg,pcb,all)
** **
** Dumps the contents of this PCB to the console ** Dumps the contents of this PCB to the console
** **
@ -964,7 +964,15 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all)
} }
cio_printf(" %d", pcb->pid); cio_printf(" %d", pcb->pid);
cio_printf(" %s", pcb->state >= N_STATES ? "???" : state_str[pcb->state]); cio_printf(" %s", pcb->state >= N_STATES ? "???" : state_str[pcb->state]);
#if 0
if( pcb->state >= N_STATES ) {
cio_puts( " ????" );
} else {
cio_printf( " %s", state_str[pcb->state] );
}
#endif
if (!all) { if (!all) {
// just printing IDs and states on one line // just printing IDs and states on one line
@ -974,6 +982,13 @@ void pcb_dump(const char *msg, register pcb_t *pcb, bool_t all)
// now, the rest of the contents // now, the rest of the contents
cio_printf(" %s", cio_printf(" %s",
pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority]); pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority]);
#if 0
if( pcb->priority >= N_PRIOS ) {
cio_puts( " ???" );
} else {
cio_printf( " %s", prio_str[pcb->priority] );
}
#endif
cio_printf(" ticks %u xit %d wake %08x\n", pcb->ticks, pcb->exit_status, cio_printf(" ticks %u xit %d wake %08x\n", pcb->ticks, pcb->exit_status,
pcb->wakeup); pcb->wakeup);
@ -1111,6 +1126,14 @@ void ptable_dump_counts(void)
for (n = 0; n < N_STATES; ++n) { for (n = 0; n < N_STATES; ++n) {
cio_printf(" %u %s", nstate[n], cio_printf(" %u %s", nstate[n],
state_str[n] != NULL ? state_str[n] : "???"); state_str[n] != NULL ? state_str[n] : "???");
#if 0
cio_printf( " %u ", nstate[n] );
if( state_str[n][0] != '\0' ) {
cio_puts( state_str[n] );
} else {
cio_puts( "???" );
}
#endif
} }
cio_putchar('\n'); cio_putchar('\n');
} }

View file

@ -33,18 +33,18 @@
/* /*
** A symbol for locating the beginning of the code. ** A symbol for locating the beginning of the code.
*/ */
.globl begtext
.text .text
begtext:
.globl begtext
.globl _start
_start = V2PNC(begtext)
/* /*
** The entry point. When we get here, we have just entered protected ** The entry point. When we get here, we have just entered protected
** mode, so all the segment registers are incorrect except for CS. ** mode, so all the segment registers are incorrect except for CS.
*/ */
.globl _start begtext:
_start:
cli /* seems to be reset on entry to p. mode */ cli /* seems to be reset on entry to p. mode */
movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */ movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */
outb $CMOS_ADDR /* turned them off) */ outb $CMOS_ADDR /* turned them off) */
@ -110,10 +110,18 @@ clearbss:
# set the initial frame pointer # set the initial frame pointer
xorl %ebp, %ebp xorl %ebp, %ebp
# now, jump and switch into using high addresses
# we use an indirect jump here because the assembler
# would ordinarily generate a PC-relative target
# address for the jump, which would not have the
# desired effect
movl $onward, %eax
jmp *%eax
onward:
/* /*
** Call the system initialization routine, and switch to ** Call the system initialization routine.
** executing at high addresses. We use an indirect jump
** here to avoid getting a PC-relative 'jmp' instruction.
** **
** Alternate idea: push the address of isr_restore ** Alternate idea: push the address of isr_restore
** and just do an indirect jump? ** and just do an indirect jump?

View file

@ -354,7 +354,7 @@ SYSIMPL(exec)
pcb->pdir = NULL; pcb->pdir = NULL;
// "load" it and set up the VM tables for this process // "load" it and set up the VM tables for this process
int status = user_load(prog, pcb, args); int status = user_load(prog, pcb, args, false);
if (status != SUCCESS) { if (status != SUCCESS) {
RET(pcb) = status; RET(pcb) = status;
SYSCALL_EXIT(status); SYSCALL_EXIT(status);

View file

@ -426,44 +426,27 @@ static int read_phdrs(elfhdr_t *hdr, pcb_t *pcb)
** @param pcb Pointer to the PCB for the process ** @param pcb Pointer to the PCB for the process
** @param entry Entry point for the new process ** @param entry Entry point for the new process
** @param args Argument vector to be put in place ** @param args Argument vector to be put in place
** @param sys Is the argument vector from kernel code?
** **
** @return A pointer to the context_t on the stack, or NULL ** @return A (user VA) pointer to the context_t on the stack, or NULL
*/ */
static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args) static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args,
bool_t sys)
{ {
#if TRACING_USER #if TRACING_USER
cio_printf("stksetup: pcb %08x, entry %08, args %08x\n", (uint32_t)pcb, cio_printf("stksetup: pcb %08x, entry %08x, args %08x\n", (uint32_t)pcb,
entry, (uint32_t)args); entry, (uint32_t)args);
#endif #endif
/* /*
** First, we need to count the space we'll need for the argument ** First, we need to calculate the space we'll need for the argument
** vector and strings. ** vector and strings.
*/
int argbytes = 0;
int argc = 0;
while (args[argc] != NULL) {
int n = strlen(args[argc]) + 1;
// can't go over one page in size
if ((argbytes + n) > SZ_PAGE) {
// oops - ignore this and any others
break;
}
argbytes += n;
++argc;
}
// Round up the byte count to the next multiple of four.
argbytes = (argbytes + 3) & MOD4_MASK;
/*
** Allocate the arrays. We are safe using dynamic arrays here
** because we're using the OS stack, not the user stack.
** **
** We want the argstrings and argv arrays to contain all zeroes. ** Keeping track of kernel vs. user VAs is tricky, so we'll use
** The C standard states, in section 6.7.8, that ** a prefix on variable names: kv_* is a kernel virtual address;
** uv_* is a user virtual address.
**
** We rely on the C standard, section 6.7.8, to clear these arrays:
** **
** "21 If there are fewer initializers in a brace-enclosed list ** "21 If there are fewer initializers in a brace-enclosed list
** than there are elements or members of an aggregate, or ** than there are elements or members of an aggregate, or
@ -472,31 +455,36 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** the remainder of the aggregate shall be initialized ** the remainder of the aggregate shall be initialized
** implicitly the same as objects that have static storage ** implicitly the same as objects that have static storage
** duration." ** duration."
**
** Sadly, because we're using variable-sized arrays, we can't
** rely on this, so we have to call memclr() instead. :-( In
** truth, it doesn't really cost us much more time, but it's an
** annoyance.
*/ */
char argstrings[argbytes]; int argbytes = 0; // total length of arg strings
char *argv[argc + 1]; int argc = 0; // number of argv entries
const char *kv_strs[N_ARGS] = { 0 }; // converted user arg string pointers
int strlengths[N_ARGS] = { 0 }; // length of each string
const char *uv_argv[N_ARGS] = { 0 }; // argv pointers
CLEAR(argstrings); /*
CLEAR(argv); ** IF the argument list given to us came from user code, we need
** to convert its address and the addresses it contains to kernel
** VAs; otherwise, we can use them directly.
*/
char **kv_args = sys ? args : vm_uva2kva(pcb->pdir, (void *)args);
// Next, duplicate the argument strings, and create pointers to while (kv_args[argc] != NULL) {
// each one in our argv. kv_strs[argc] = sys ? args[argc] :
char *tmp = argstrings; vm_uva2kva(pcb->pdir, (void *)(kv_args[argc]));
for (int i = 0; i < argc; ++i) { strlengths[argc] = strlen(kv_strs[argc]) + 1;
int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string // can't go over one page in size
strcpy(tmp, args[i]); // add to our buffer if ((argbytes + strlengths[argc]) > SZ_PAGE) {
argv[i] = tmp; // remember where it was // oops - ignore this and any others
tmp += nb; // move on break;
}
argbytes += strlengths[argc];
++argc;
} }
// trailing NULL pointer // Round up the byte count to the next multiple of four.
argv[argc] = NULL; argbytes = (argbytes + 3) & MOD4_MASK;
/* /*
** The pages for the stack were cleared when they were allocated, ** The pages for the stack were cleared when they were allocated,
@ -524,32 +512,44 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** see below for more information. ** see below for more information.
*/ */
// Pointer to the last word in stack. We get this from the /*
// VM hierarchy. Get the PDE entry for the user address space. ** Find the user stack. The PDE entry for user address space points
pde_t stack_pde = pcb->pdir[USER_PDE]; ** to a page table for the first 4MB of the address space, but the
** "pointer" there a physical frame address.
// The PDE entry points to the PT, which is an array of PTE. The last */
// two entries are for the stack; pull out the last one. pde_t *kv_userpt = (pde_t *)P2V(PTE_ADDR(pcb->pdir[USER_PDE]));
pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2]; assert(kv_userpt != NULL);
// OK, now we have the PTE. The frame address of the last page is
// in this PTE. Find the address immediately after that.
uint32_t *ptr = (uint32_t *)((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE);
// Pointer to where the arg strings should be filled in.
char *strings = (char *)((uint32_t)ptr - argbytes);
// back the pointer up to the nearest word boundary; because we're
// moving toward location 0, the nearest word boundary is just the
// next smaller address whose low-order two bits are zeroes
strings = (char *)((uint32_t)strings & MOD4_MASK);
// Copy over the argv strings.
memcpy((void *)strings, argstrings, argbytes);
/* /*
** Next, we need to copy over the argv pointers. Start by ** The final entries in that PMT are for the pages of the user stack.
** determining where 'argc' should go. ** Grab the address of the frame for the last one. (Again, we need
** to convert it to a virtual address we can use.)
*/
// the PMT entry for that page
pte_t pmt_entry = kv_userpt[USER_STK_LAST_PTE];
assert(IS_PRESENT(pmt_entry));
// kernel VA for the first byte following that page
uint8_t *kv_ptr = (uint8_t *)P2V(PTE_ADDR(pmt_entry) + SZ_PAGE);
assert(kv_ptr != NULL);
// user VA for the first byte following that page
uint32_t *uv_ptr = (uint32_t *)(USER_STACK_P2 + SZ_PAGE);
// Pointers to where the arg strings should be filled in.
uint32_t kv_strings = ((uint32_t)kv_ptr) - argbytes;
uint32_t uv_strings = ((uint32_t)uv_ptr) - argbytes;
// back the pointers up to the nearest word boundary; because we're
// moving toward location 0, the nearest word boundary is just the
// next smaller address whose low-order two bits are zeroes
kv_strings &= MOD4_MASK;
uv_strings &= MOD4_MASK;
/*
** Next, we need to copy over the data. Start by determining where
** where 'argc' should go.
** **
** Stack alignment is controlled by the SysV ABI i386 supplement, ** Stack alignment is controlled by the SysV ABI i386 supplement,
** version 1.2 (June 23, 2016), which states in section 2.2.2: ** version 1.2 (June 23, 2016), which states in section 2.2.2:
@ -563,7 +563,7 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** **
** Isn't technical documentation fun? Ultimately, this means that ** Isn't technical documentation fun? Ultimately, this means that
** the first parameter to main() should be on the stack at an address ** the first parameter to main() should be on the stack at an address
** that is a multiple of 16. ** that is a multiple of 16. In our case, that is 'argc'.
** **
** The space needed for argc, argv, and the argv array itself is ** The space needed for argc, argv, and the argv array itself is
** argc + 3 words (argc+1 for the argv entries, plus one word each ** argc + 3 words (argc+1 for the argv entries, plus one word each
@ -571,45 +571,66 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
*/ */
int nwords = argc + 3; int nwords = argc + 3;
uint32_t *acptr = ((uint32_t *)strings) - nwords; uint32_t *kv_acptr = ((uint32_t *)kv_strings) - nwords;
uint32_t *uv_acptr = ((uint32_t *)uv_strings) - nwords;
/* // back these up to multiple-of-16 addresses for stack alignment
** Next, back up until we're at a multiple-of-16 address. Because we kv_acptr = (uint32_t *)(((uint32_t)kv_acptr) & MOD16_MASK);
** are moving to a lower address, its upper 28 bits are identical to uv_acptr = (uint32_t *)(((uint32_t)uv_acptr) & MOD16_MASK);
** the address we currently have, so we can do this with a bitwise
** AND to just turn off the lower four bits.
*/
acptr = (uint32_t *)(((uint32_t)acptr) & MOD16_MASK); // the argv location
uint32_t *kv_avptr = kv_acptr + 1;
// copy in 'argc' // the user address for the first argv entry
*acptr = argc; uint32_t *uv_avptr = uv_acptr + 2;
// next, 'argv', which follows 'argc'; 'argv' points to the // Copy over the argv strings.
// word that follows it in the stack for (int i = 0; i < argc; ++i) {
uint32_t *avptr = acptr + 2; // copy the string using kernel addresses
*(acptr + 1) = (uint32_t)avptr; strcpy((char *)kv_strings, kv_args[i]);
/* // remember the user address where this string went
** Next, we copy in all argc+1 pointers. uv_argv[i] = (char *)uv_strings;
*/
// Adjust and copy the string pointers. // adjust both string addresses
for (int i = 0; i <= argc; ++i) { kv_strings += strlengths[i];
if (argv[i] != NULL) { uv_strings += strlengths[i];
// an actual pointer - adjust it and copy it in
*avptr = (uint32_t)strings;
// skip to the next entry in the array
strings += strlen(argv[i]) + 1;
} else {
// end of the line!
*avptr = NULL;
}
++avptr;
} }
/* /*
** Now, we need to set up the initial context for the executing ** Next, we copy in argc, argv, and the pointers. The stack will
** look something like this:
**
** kv_avptr
** kv_acptr |
** | |
** v v
** argc argv av[0] av[1] etc NULL str0 str1 etc.
** [....][....][....][....] ... [0000] ... [......0......0.........]
** | ^ | | ^ ^
** | | | | | |
** ------ | ---------------------|-------
** ---------------------------
*/
// copy in 'argc'
*kv_acptr = argc;
// copy in 'argv'
*kv_avptr++ = (uint32_t)uv_avptr;
// now, the argv entries themselves
for (int i = 0; i < argc; ++i) {
*kv_avptr++ = (uint32_t)uv_argv[i];
}
// and the trailing NULL
*kv_avptr = NULL;
/*
** Almost done!
**
** Now we need to set up the initial context for the executing
** process. ** process.
** **
** When this process is dispatched, the context restore code will ** When this process is dispatched, the context restore code will
@ -618,27 +639,34 @@ static context_t *stack_setup(pcb_t *pcb, uint32_t entry, const char **args)
** the interrupt "returns" to the entry point of the process. ** the interrupt "returns" to the entry point of the process.
*/ */
// Locate the context save area on the stack. // Locate the context save area on the stack by backup up one
context_t *ctx = ((context_t *)avptr) - 1; // "context" from where the argc value is saved
context_t *kv_ctx = ((context_t *)kv_acptr) - 1;
uint32_t uv_ctx = (uint32_t)(((context_t *)uv_acptr) - 1);
/* /*
** We cleared the entire stack earlier, so all the context ** We cleared the entire stack earlier, so all the context
** fields currently contain zeroes. We now need to fill in ** fields currently contain zeroes. We now need to fill in
** all the important fields. ** all the important fields.
**
** Note: we don't need to set the ESP value for the process,
** as the 'popa' that restores the general registers doesn't
** actually restore ESP from the context area - it leaves it
** where it winds up.
*/ */
ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0 kv_ctx->eflags = DEFAULT_EFLAGS; // IF enabled, IOPL 0
ctx->eip = entry; // initial EIP kv_ctx->eip = entry; // initial EIP
ctx->cs = GDT_CODE; // segment registers kv_ctx->cs = GDT_CODE; // segment registers
ctx->ss = GDT_STACK; kv_ctx->ss = GDT_STACK;
ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA; kv_ctx->ds = kv_ctx->es = kv_ctx->fs = kv_ctx->gs = GDT_DATA;
/* /*
** Return the new context pointer to the caller. It will be our ** Return the new context pointer to the caller. It will be our
** caller's responsibility to schedule this process. ** caller's responsibility to schedule this process.
*/ */
return (ctx); return ((context_t *)uv_ctx);
} }
/* /*
@ -809,10 +837,11 @@ int user_duplicate(pcb_t *new, pcb_t *old)
** @param ptab A pointer to the program table entry to be loaded ** @param ptab A pointer to the program table entry to be loaded
** @param pcb The PCB for the program being loaded ** @param pcb The PCB for the program being loaded
** @param args The argument vector for the program ** @param args The argument vector for the program
** @param sys Is the argument vector from kernel code?
** **
** @return the status of the load attempt ** @return the status of the load attempt
*/ */
int user_load(prog_t *ptab, pcb_t *pcb, const char **args) int user_load(prog_t *ptab, pcb_t *pcb, const char **args, bool_t sys)
{ {
// NULL pointers are bad! // NULL pointers are bad!
assert1(ptab != NULL); assert1(ptab != NULL);
@ -820,7 +849,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
assert1(args != NULL); assert1(args != NULL);
#if TRACING_USER #if TRACING_USER
cio_printf("uload: prog '%s' pcb %08x args %08x\n", cio_printf("Uload: prog '%s' pcb %08x args %08x\n",
ptab->name[0] ? ptab->name : "?", (uint32_t)pcb, (uint32_t)args); ptab->name[0] ? ptab->name : "?", (uint32_t)pcb, (uint32_t)args);
#endif #endif
@ -832,8 +861,16 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
(uint32_t)ptab, ptab->name, ptab->offset, ptab->size, (uint32_t)ptab, ptab->name, ptab->offset, ptab->size,
ptab->flags); ptab->flags);
cio_printf(" args %08x:", (uint32_t)args); cio_printf(" args %08x:", (uint32_t)args);
for (int i = 0; args[i] != NULL; ++i) { if (sys) {
cio_printf(" [%d] %s", i, args[i]); for (int i = 0; args[i] != NULL; ++i) {
cio_printf(" [%d] %s", i, args[i]);
}
} else {
char **kv_args = vm_uva2kva(pcb->pdir, args);
for (int i = 0; kv_args[i] != NULL; ++i) {
cio_printf(" [%d] %s", i,
(char *)vm_uva2kva(pcb->pdir, kv_args[i]));
}
} }
cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid); cio_printf("\n pcb %08x (pid %u)\n", (uint32_t)pcb, pcb->pid);
dump_fhdr(hdr); dump_fhdr(hdr);
@ -853,8 +890,8 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
// read all the program headers // read all the program headers
int stat = read_phdrs(hdr, pcb); int stat = read_phdrs(hdr, pcb);
if (stat != SUCCESS) { if (stat != SUCCESS) {
// TODO figure out a better way to deal with this cio_printf("Uload: read_phdrs('%s') returned %d\n", ptab->name, stat);
PANIC(0, "user_load: phdr read failed"); PANIC(0, "User_load: phdr read failed");
} }
// next, set up the runtime stack - just like setting up loadable // next, set up the runtime stack - just like setting up loadable
@ -862,12 +899,12 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
stat = stat =
vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0); vm_add(pcb->pdir, true, false, (void *)USER_STACK, SZ_USTACK, NULL, 0);
if (stat != SUCCESS) { if (stat != SUCCESS) {
// TODO yadda yadda... cio_printf("Uload: vm_add('%s') stack returned %d\n", ptab->name, stat);
PANIC(0, "user_load: vm_add failed"); PANIC(0, "user_load: vm_add stack failed");
} }
// set up the command-line arguments // set up the command-line arguments
pcb->context = stack_setup(pcb, hdr->e_entry, args); pcb->context = stack_setup(pcb, hdr->e_entry, args, sys);
return SUCCESS; return SUCCESS;
} }
@ -883,7 +920,7 @@ int user_load(prog_t *ptab, pcb_t *pcb, const char **args)
void user_cleanup(pcb_t *pcb) void user_cleanup(pcb_t *pcb)
{ {
#if TRACING_USER #if TRACING_USER
cio_printf("uclean: %08x\n", (uint32_t)pcb); cio_printf("Uclean: %08x\n", (uint32_t)pcb);
#endif #endif
if (pcb == NULL) { if (pcb == NULL) {

View file

@ -58,45 +58,155 @@ static void vm_isr(int vector, int code)
} }
/** /**
** Name: uva2kva ** Name: ptcount
** **
** Convert a user VA into a kernel address. Works for all addresses - ** Count the number of each type of entry in a page table.
** if the address is a page address, the PERMS(va) value will be 0; ** Returns a 32-bit result containing two 16-bit counts:
** otherwise, it is the offset into the page.
** **
** @param pdir Pointer to the page directory to examine ** Upper half Lower half
** @param va Virtual address to check ** PDIR: # of 4MB entries # of 'present' entries
** PMT: zero # of 'present' entries
**
** The number of "not present" can be calculated from these.
**
** @param pt Pointer to the page table
** @param dir Is it a page directory (vs. a page table)?
*/ */
ATTR_UNUSED ATTR_UNUSED
static void *uva2kva(pde_t *pdir, void *va) static uint32_t ptcount(pte_t *ptr, bool_t dir)
{ {
// find the PMT entry for this address uint16_t n_np = 0, n_p = 0, n_lg = 0;
pte_t *pte = vm_getpte(pdir, va, false);
if (pte == NULL) { for (int i = 0; i < N_PTE; ++i) {
return NULL; pde_t entry = *ptr++;
if (!IS_PRESENT(entry)) {
++n_np;
continue;
}
if (dir && IS_LARGE(entry)) {
++n_lg;
} else {
++n_p;
}
} }
// get the entry // n_lg will be 0 for PMTs
pte_t entry = *pte; return (n_lg << 16) | n_p;
}
// is this a valid address for the user? // decode a PDE
if (IS_PRESENT(entry)) { static void pde_prt(uint32_t level, uint32_t i, uint32_t entry)
return NULL; {
// indent
for (int n = 0; n <= level; ++n)
cio_puts(" ");
// line header
cio_printf("[%08x] %08x", i, entry);
// perms
if (IS_LARGE(entry)) { // PS is 1
if ((entry & PDE_PAT) != 0)
cio_puts(" PAT");
if ((entry & PDE_G) != 0)
cio_puts(" G");
cio_puts(" PS");
if ((entry & PDE_D) != 0)
cio_puts(" D");
} }
if ((entry & PDE_A) != 0)
cio_puts(" A");
if ((entry & PDE_PCD) != 0)
cio_puts(" CD");
if ((entry & PDE_PWT) != 0)
cio_puts(" WT");
if ((entry & PDE_US) != 0)
cio_puts(" U");
if ((entry & PDE_RW) != 0)
cio_puts(" W");
cio_puts((entry & PDE_P) != 0 ? " P" : "!P");
// is this a system-only page? cio_printf(" --> %s %08x", IS_LARGE(entry) ? "Pg" : "PT", PDE_ADDR(entry));
if (IS_SYSTEM(entry)) { }
return NULL;
}
// get the physical address // decode a PTE
uint32_t frame = PTE_ADDR(*pte) | PERMS(va); static void pte_prt(uint32_t level, uint32_t i, uint32_t entry)
{
// indent
for (int n = 0; n <= level; ++n)
cio_puts(" ");
// line header
cio_printf("[%08x] %08x", i, entry);
// perms
if ((entry & PDE_G) != 0)
cio_puts(" G");
if ((entry & PDE_PAT) != 0)
cio_puts(" PAT");
if ((entry & PDE_D) != 0)
cio_puts(" D");
if ((entry & PDE_A) != 0)
cio_puts(" A");
if ((entry & PDE_PCD) != 0)
cio_puts(" CD");
if ((entry & PDE_PWT) != 0)
cio_puts(" WT");
if ((entry & PDE_US) != 0)
cio_puts(" U");
if ((entry & PDE_RW) != 0)
cio_puts(" W");
cio_puts((entry & PDE_P) != 0 ? " P" : "!P");
return (void *)P2V(frame); cio_printf(" --> Pg %08x", PTE_ADDR(entry));
} }
/** /**
** Name: ptdump ** Name: pdump
**
** Recursive helper for table hierarchy dump.
**
** @param level Current hierarchy level
** @param pt Page table to display
** @param dir Is it a page directory (vs. a page table)?
** @param mode How to display the entries
*/
ATTR_UNUSED
static void pdump(uint_t level, void *pt, bool_t dir, enum vmmode_e mode)
{
pte_t *ptr = (pte_t *)pt;
cio_printf("? at 0x%08x:", dir ? "PDir" : "PTbl", (uint32_t)pt);
uint32_t nums = ptcount(ptr, dir);
if (dir) {
cio_printf(" %u 4MB", (nums >> 16));
}
cio_printf(" %u P %u !P\n", nums & 0xffff,
N_PTE - ((nums >> 16) + (nums & 0xffff)));
for (uint32_t i = 0; i < (uint32_t)N_PTE; ++i) {
pte_t entry = *ptr;
if (dir) {
// this is a PDIR entry; could be either a 4MB
// page, or a PMT pointer
if (mode > Simple) {
pde_prt(level, i, entry);
cio_putchar('\n');
if (!IS_LARGE(entry)) {
pdump(level + 1, (void *)*ptr, false, mode);
}
}
} else {
// just a PMT entry
if (mode > Simple) {
pte_prt(level, i, entry);
cio_putchar('\n');
}
}
// move to the next entry
++ptr;
}
}
/**
** Name: pmt_dump
** **
** Dump the non-zero entries of a page table or directory ** Dump the non-zero entries of a page table or directory
** **
@ -105,7 +215,8 @@ static void *uva2kva(pde_t *pdir, void *va)
** @param start First entry to process ** @param start First entry to process
** @param num Number of entries to process ** @param num Number of entries to process
*/ */
static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num) ATTR_UNUSED
static void pmt_dump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num)
{ {
cio_printf("\n\nP%c dump", dir ? 'D' : 'T'); cio_printf("\n\nP%c dump", dir ? 'D' : 'T');
cio_printf(" of %08x", (uint32_t)pt); cio_printf(" of %08x", (uint32_t)pt);
@ -169,7 +280,23 @@ void vm_init(void)
assert(kpdir != NULL); assert(kpdir != NULL);
#if TRACING_VM #if TRACING_VM
cio_printf("vm_init: kpdir is %08x\n", kpdir); cio_printf("vm_init: kpdir %08x, adding user pages\n", kpdir);
#endif
// add the entries for the user address space
for (uint32_t addr = 0; addr < NUM_4MB; addr += SZ_PAGE) {
int stat = vm_map(kpdir, (void *)addr, addr, SZ_PAGE, PTE_RW);
if (stat != SUCCESS) {
cio_printf("vm_init, map %08x->%08x failed, status %d\n", addr,
addr, stat);
PANIC(0, "vm_init user range map failed");
}
#if TRACING_VM
cio_putchar('.');
#endif
}
#if TRACING_VM
cio_puts(" done\n");
#endif #endif
// switch to it // switch to it
@ -183,6 +310,44 @@ void vm_init(void)
install_isr(VEC_PAGE_FAULT, vm_isr); install_isr(VEC_PAGE_FAULT, vm_isr);
} }
/**
** Name: vm_uva2kva
**
** Convert a user VA into a kernel address. Works for all addresses -
** if the address is a page address, the low-order nine bits will be
** zeroes; otherwise, they is the offset into the page, which is
** unchanged within the address spaces.
**
** @param pdir Pointer to the page directory to examine
** @param va Virtual address to check
*/
void *vm_uva2kva(pde_t *pdir, void *va)
{
// find the PMT entry for this address
pte_t *pte = vm_getpte(pdir, va, false);
if (pte == NULL) {
return NULL;
}
// get the entry
pte_t entry = *pte;
// is this a valid address for the user?
if (IS_PRESENT(entry)) {
return NULL;
}
// is this a system-only page?
if (IS_SYSTEM(entry)) {
return NULL;
}
// get the physical address
uint32_t frame = PTE_ADDR(*pte) | PERMS(va);
return (void *)P2V(frame);
}
/** /**
** Name: vm_pagedup ** Name: vm_pagedup
** **
@ -206,46 +371,45 @@ void *vm_pagedup(void *old)
** **
** Duplicate a page directory entry ** Duplicate a page directory entry
** **
** @param dst Pointer to where the duplicate should go ** @param entry The entry to be duplicated
** @param curr Pointer to the entry to be duplicated
** **
** @return true on success, else false ** @return the new entry, or -1 on error
*/ */
bool_t vm_pdedup(pde_t *dst, pde_t *curr) pde_t vm_pdedup(pde_t entry)
{ {
assert1(curr != NULL);
assert1(dst != NULL);
#if TRACING_VM #if TRACING_VM
cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr); cio_printf("vm_pdedup curr %08x\n", (uint32_t)entry);
#endif #endif
pde_t entry = *curr;
// simplest case // simplest case
if (!IS_PRESENT(entry)) { if (!IS_PRESENT(entry)) {
*dst = 0; return 0;
return true;
} }
// OK, we have an entry; allocate a page table for it // is this a large page?
pte_t *newtbl = (pte_t *)km_page_alloc(); if (IS_LARGE(entry)) {
if (newtbl == NULL) { // just copy it
return false; return entry;
} }
// we could clear the new table, but we'll be assigning to // OK, we have a 4KB entry; allocate a page table for it
// each entry anyway, so we'll save the execution time pte_t *tblva = (pte_t *)km_page_alloc();
if (tblva == NULL) {
return (uint32_t)-1;
}
// address of the page table for this directory entry // make sure the entries are all initially 'not present'
pte_t *old = (pte_t *)PDE_ADDR(entry); memclr(tblva, SZ_PAGE);
// pointer to the first PTE in the new table // VA of the page table for this directory entry
pte_t *new = newtbl; pte_t *old = (pte_t *)P2V(PDE_ADDR(entry));
// pointer to the first PTE in the new table (already a VA)
pte_t *new = tblva;
for (int i = 0; i < N_PTE; ++i) { for (int i = 0; i < N_PTE; ++i) {
if (!IS_PRESENT(*old)) { // only need to copy 'present' entries
*new = 0; if (IS_PRESENT(*old)) {
} else {
*new = *old; *new = *old;
} }
++old; ++old;
@ -253,10 +417,8 @@ bool_t vm_pdedup(pde_t *dst, pde_t *curr)
} }
// replace the page table address // replace the page table address
// upper 22 bits from 'newtbl', lower 12 from '*curr' // (PA of page table, lower 12 bits from '*curr')
*dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry)); return (pde_t)(V2P(PTE_ADDR(tblva)) | PERMS(entry));
return true;
} }
/** /**
@ -282,8 +444,7 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
assert1(pdir != NULL); assert1(pdir != NULL);
// get the PDIR entry for this virtual address // get the PDIR entry for this virtual address
uint32_t ix = PDIX(va); pde_t *pde_ptr = &pdir[PDIX(va)];
pde_t *pde_ptr = &pdir[ix];
// is it already set up? // is it already set up?
if (IS_PRESENT(*pde_ptr)) { if (IS_PRESENT(*pde_ptr)) {
@ -319,10 +480,8 @@ pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
*pde_ptr = V2P(ptbl) | PDE_P | PDE_RW; *pde_ptr = V2P(ptbl) | PDE_P | PDE_RW;
} }
// finally, return a pointer to the entry in the // finally, return a pointer to the entry in the page table for this VA
// page table for this VA return &ptbl[PTIX(va)];
ix = PTIX(va);
return &ptbl[ix];
} }
// Set up kernel part of a page table. // Set up kernel part of a page table.
@ -337,7 +496,7 @@ pde_t *vm_mkkvm(void)
} }
#if 0 && TRACING_VM #if 0 && TRACING_VM
cio_puts( "\nEntering vm_mkkvm\n" ); cio_puts( "\nEntering vm_mkkvm\n" );
ptdump( pdir, true, 0, N_PDE ); pmt_dump( pdir, true, 0, N_PDE );
#endif #endif
// clear it out to disable all the entries // clear it out to disable all the entries
@ -361,8 +520,8 @@ pde_t *vm_mkkvm(void)
} }
#if 0 && TRACING_VM #if 0 && TRACING_VM
cio_puts( "\nvm_mkkvm() final PD:\n" ); cio_puts( "\nvm_mkkvm() final PD:\n" );
ptdump( pdir, true, 0, 16 ); pmt_dump( pdir, true, 0, 16 );
ptdump( pdir, true, 0x200, 16 ); pmt_dump( pdir, true, 0x200, 16 );
#endif #endif
return pdir; return pdir;
@ -382,19 +541,26 @@ pde_t *vm_mkuvm(void)
return NULL; return NULL;
} }
// iterate through the kernel page directory // iterate through the 'system' portions of the kernel
pde_t *curr = kpdir; // page directory
pde_t *dst = new; int i = PDIX(KERN_BASE);
for (int i = 0; i < N_PDE; ++i) { pde_t *curr = &kpdir[i];
pde_t *dst = &new[i];
while (i < N_PDE) {
if (*curr != 0) { if (*curr != 0) {
// found an active one - duplicate it // found an active one - duplicate it
if (!vm_pdedup(dst, curr)) { pde_t entry = vm_pdedup(*curr);
if (entry == (uint32_t)-1) {
return NULL; return NULL;
} }
*dst = entry;
} else {
*dst = 0;
} }
++curr; ++curr;
++dst; ++dst;
++i;
} }
return new; return new;
@ -481,7 +647,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// figure out where this page will go in the hierarchy // figure out where this page will go in the hierarchy
pte_t *pte = vm_getpte(pdir, va, true); pte_t *pte = vm_getpte(pdir, va, true);
if (pte == NULL) { if (pte == NULL) {
// TODO if i > 0, this isn't the first frame - is // if i > 0, this isn't the first frame - is
// there anything to do about other frames? // there anything to do about other frames?
// POSSIBLE MEMORY LEAK? // POSSIBLE MEMORY LEAK?
return E_NO_MEMORY; return E_NO_MEMORY;
@ -490,7 +656,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// allocate the frame // allocate the frame
void *page = km_page_alloc(); void *page = km_page_alloc();
if (page == NULL) { if (page == NULL) {
// TODO same question here // same question here
return E_NO_MEMORY; return E_NO_MEMORY;
} }
@ -498,7 +664,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
memclr(page, SZ_PAGE); memclr(page, SZ_PAGE);
// create the PTE for this frame // create the PTE for this frame
uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase); uint32_t entry = (uint32_t)(PTE_ADDR(V2P(page)) | entrybase);
*pte = entry; *pte = entry;
// copy data if we need to // copy data if we need to
@ -506,7 +672,7 @@ int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
// how much to copy // how much to copy
uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes; uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes;
// do it! // do it!
memcpy((void *)page, (void *)data, num); memmove((void *)page, (void *)data, num);
// adjust all the pointers // adjust all the pointers
data += num; // where to continue data += num; // where to continue
bytes -= num; // what's left to copy bytes -= num; // what's left to copy
@ -644,7 +810,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
PDIX(addr), PTIX(addr)); PDIX(addr), PTIX(addr));
// dump the directory // dump the directory
ptdump(pdir, true, PDIX(addr), 4); pmt_dump(pdir, true, PDIX(addr), 4);
// find the relevant PDE entry // find the relevant PDE entry
uint32_t ix = PDIX(va); uint32_t ix = PDIX(va);
@ -653,7 +819,7 @@ int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
// round the PMT index down // round the PMT index down
uint32_t ix2 = PTIX(va) & MOD4_MASK; uint32_t ix2 = PTIX(va) & MOD4_MASK;
// dump the PMT for the relevant directory entry // dump the PMT for the relevant directory entry
ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4); pmt_dump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4);
} }
#endif #endif
PANIC(0, "mapping an already-mapped address"); PANIC(0, "mapping an already-mapped address");
@ -735,3 +901,26 @@ int vm_uvmdup(pde_t *new, pde_t *old)
return SUCCESS; return SUCCESS;
} }
/**
** Name: vm_print
**
** Print out a paging hierarchy.
**
** @param pt Page table to display
** @param dir Is it a page directory (vs. a page table)?
** @param mode How to display the entries
*/
void vm_print(void *pt, bool_t dir, enum vmmode_e mode)
{
cio_puts("\nVM hierarchy");
if (pt == NULL) {
cio_puts(" (NULL pointer)\n");
return;
}
cio_printf("Starting at 0x%08x (%s):\n", (uint32_t)pt,
dir ? "PDIR" : "PMT");
pdump(0, pt, dir, mode);
}

View file

@ -20,7 +20,7 @@ typedef struct proc_s {
uint_t pid; // its PID (when spawned) uint_t pid; // its PID (when spawned)
uint8_t e_prio; // process priority uint8_t e_prio; // process priority
char select[3]; // identifying character, NUL, extra char select[3]; // identifying character, NUL, extra
char *args[MAX_ARGS]; // argument vector strings char *args[N_ARGS]; // argument vector strings
} proc_t; } proc_t;
/* /*

View file

@ -27,7 +27,7 @@ typedef struct proc_s {
uint_t index; // process table index uint_t index; // process table index
int8_t prio; // process priority int8_t prio; // process priority
char select[3]; // identifying character, NUL, extra char select[3]; // identifying character, NUL, extra
char *args[MAX_ARGS]; // argument vector strings char *args[N_ARGS]; // argument vector strings
} proc_t; } proc_t;
/* /*

247
util/default.ld Normal file
View file

@ -0,0 +1,247 @@
GNU ld (GNU Binutils for Ubuntu) 2.30
Supported emulations:
elf_x86_64
elf32_x86_64
elf_i386
elf_iamcu
i386linux
elf_l1om
elf_k1om
i386pep
i386pe
using internal linker script:
==================================================
/* Script for -z combreloc: combine and sort reloc sections */
/* Copyright (C) 2014-2018 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(_start)
SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu64"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib");
SECTIONS
{
/* Read-only sections, merged into text segment: */
PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.dyn :
{
*(.rela.init)
*(.rela.text .rela.text.* .rela.gnu.linkonce.t.*)
*(.rela.fini)
*(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*)
*(.rela.data .rela.data.* .rela.gnu.linkonce.d.*)
*(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*)
*(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*)
*(.rela.ctors)
*(.rela.dtors)
*(.rela.got)
*(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*)
*(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*)
*(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*)
*(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*)
*(.rela.ifunc)
}
.rela.plt :
{
*(.rela.plt)
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) *(.iplt) }
.plt.got : { *(.plt.got) }
.plt.sec : { *(.plt.sec) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
.gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
/* Thread Local Storage sections */
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
.data :
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we don't
pad the .data section. */
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
.lbss :
{
*(.dynlbss)
*(.lbss .lbss.* .gnu.linkonce.lb.*)
*(LARGE_COMMON)
}
. = ALIGN(64 / 8);
. = SEGMENT_START("ldata-segment", .);
.lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.lrodata .lrodata.* .gnu.linkonce.lr.*)
}
.ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) :
{
*(.ldata .ldata.* .gnu.linkonce.l.*)
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
. = ALIGN(64 / 8);
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}
==================================================