diff options
author | Freya Murphy <freya@freyacat.org> | 2025-03-25 17:36:52 -0400 |
---|---|---|
committer | Freya Murphy <freya@freyacat.org> | 2025-03-25 17:38:22 -0400 |
commit | 6af21e6a4f2251e71353562d5df7f376fdffc270 (patch) | |
tree | de20c7afc9878422c81e34f30c6b010075e9e69a /kernel | |
download | comus-6af21e6a4f2251e71353562d5df7f376fdffc270.tar.gz comus-6af21e6a4f2251e71353562d5df7f376fdffc270.tar.bz2 comus-6af21e6a4f2251e71353562d5df7f376fdffc270.zip |
initial checkout from wrc
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Make.mk | 66 | ||||
-rw-r--r-- | kernel/cio.c | 796 | ||||
-rw-r--r-- | kernel/clock.c | 163 | ||||
-rw-r--r-- | kernel/isrs.S | 374 | ||||
-rw-r--r-- | kernel/kernel.c | 381 | ||||
-rw-r--r-- | kernel/kernel.ld | 57 | ||||
-rw-r--r-- | kernel/kmem.c | 681 | ||||
-rw-r--r-- | kernel/list.c | 64 | ||||
-rw-r--r-- | kernel/procs.c | 1136 | ||||
-rw-r--r-- | kernel/sio.c | 694 | ||||
-rw-r--r-- | kernel/startup.S | 153 | ||||
-rw-r--r-- | kernel/support.c | 279 | ||||
-rw-r--r-- | kernel/syscalls.c | 829 | ||||
-rw-r--r-- | kernel/user.c | 774 | ||||
-rw-r--r-- | kernel/vm.c | 585 | ||||
-rw-r--r-- | kernel/vmtables.c | 270 |
16 files changed, 7302 insertions, 0 deletions
diff --git a/kernel/Make.mk b/kernel/Make.mk new file mode 100644 index 0000000..0c9b507 --- /dev/null +++ b/kernel/Make.mk @@ -0,0 +1,66 @@ +# +# Makefile fragment for the kernel components of the system. +# +# Makefile fragment for the kernel component of the system. +# +# THIS IS NOT A COMPLETE Makefile - run GNU make in the top-level +# directory, and this will be pulled in automatically. +# + +SUBDIRS += kernel + +################### +# FILES SECTION # +################### + +BOOT_OBJ := $(patsubst %.c, $(BUILDDIR)/%.o, $(BOOT_SRC)) + +KERN_SRC := kernel/startup.S kernel/isrs.S \ + kernel/cio.c kernel/clock.c kernel/kernel.c kernel/kmem.c \ + kernel/list.c kernel/procs.c kernel/sio.c kernel/support.c \ + kernel/syscalls.c kernel/user.c kernel/vm.c kernel/vmtables.c + +KERN_OBJ := $(patsubst %.c, $(BUILDDIR)/%.o, $(KERN_SRC)) +KERN_OBJ := $(patsubst %.S, $(BUILDDIR)/%.o, $(KERN_OBJ)) + +KCFLAGS := -ggdb +KLDFLAGS := -T kernel/kernel.ld +KLIBS := -lkernel -lcommon + +################### +# RULES SECTION # +################### + +kernel: $(BUILDDIR)/kernel/kernel.b + +$(BUILDDIR)/kernel/%.o: kernel/%.c $(BUILDDIR)/.vars.CFLAGS + @mkdir -p $(@D) + $(CC) $(CFLAGS) $(KCFLAGS) -c -o $@ $< + +$(BUILDDIR)/kernel/%.o: kernel/%.S $(BUILDDIR)/.vars.CFLAGS + @mkdir -p $(@D) + $(CPP) $(CPPFLAGS) -o $(@D)/$*.s $< + $(AS) $(ASFLAGS) $(KCFLAGS) -o $@ $(@D)/$*.s -a=$(@D)/$*.lst + $(RM) -f $(@D)/$*.s + +$(BUILDDIR)/kernel/kernel: $(KERN_OBJ) + @mkdir -p $(@D) + $(LD) $(KLDFLAGS) $(LDFLAGS) -o $@ $(KERN_OBJ) $(KLIBS) + $(OBJDUMP) -S $@ > $@.asm + $(NM) -n $@ > $@.sym + $(READELF) -a $@ > $@.info + +$(BUILDDIR)/kernel/kernel.b: $(BUILDDIR)/kernel/kernel + $(LD) $(LDFLAGS) -o $(BUILDDIR)/kernel/kernel.b -s \ + --oformat binary -Ttext 0x10000 $(BUILDDIR)/kernel/kernel + +# some debugging assist rules +$(BUILDDIR)/kernel/%.i: kernel/%.c $(BUILDDIR)/.vars.CFLAGS + @mkdir -p $(@D) + $(CC) $(CFLAGS) $(KCFLAGS) -E -c $< > $(@D)/$*.i + +$(BUILDDIR)/kernel/%.dat: $(BUILDDIR)/kernel/%.o + @mkdir -p $(@D) + $(OBJCOPY) -S -O binary -j .data $< $@ + hexdump -C $@ > $(@D)/$*.hex + diff --git a/kernel/cio.c b/kernel/cio.c new file mode 100644 index 0000000..cfff543 --- /dev/null +++ b/kernel/cio.c @@ -0,0 +1,796 @@ +/* +** SCCS ID: @(#)cio.c 2.10 1/22/25 +** +** @file cio.c +** +** @author Warren R. Carithers +** +** Based on: c_io.c 1.13 (Ken Reek, Jon Coles, Warren R. Carithers) +** +** Console I/O routines +** +** This module implements a simple set of input and output routines +** for the console screen and keyboard on the machines in the DSL. +** Refer to the header file comments for complete details. +** +** Naming conventions: +** +** Externally-visible functions have names beginning with the +** characters "cio_". +** +*/ + +#include <cio.h> +#include <lib.h> +#include <support.h> +#include <x86/arch.h> +#include <x86/pic.h> +#include <x86/ops.h> + +/* +** Bit masks for the lower five and eight bits of a value +*/ +#define BMASK5 0x1f +#define BMASK8 0xff + +/* +** Video parameters +*/ +#define SCREEN_MIN_X 0 +#define SCREEN_MIN_Y 0 +#define SCREEN_X_SIZE 80 +#define SCREEN_Y_SIZE 25 +#define SCREEN_MAX_X ( SCREEN_X_SIZE - 1 ) +#define SCREEN_MAX_Y ( SCREEN_Y_SIZE - 1 ) + +/* +** Video state +*/ +static unsigned int scroll_min_x, scroll_min_y; +static unsigned int scroll_max_x, scroll_max_y; +static unsigned int curr_x, curr_y; +static unsigned int min_x, min_y; +static unsigned int max_x, max_y; + +// pointer to input notification function +static void (*notify)(int); + +#ifdef SA_DEBUG +#include <stdio.h> +#define cio_putchar putchar +#define cio_puts(x) fputs( x, stdout ) +#endif + + +/* +** VGA definitions. +*/ + +// calculate the memory address of a specific character position +// within VGA memory +#define VIDEO_ADDR(x,y) ( unsigned short * ) \ + ( VID_BASE_ADDR + 2 * ( (y) * SCREEN_X_SIZE + (x) ) ) + +// port addresses +#define VGA_CTRL_IX_ADDR 0x3d4 +# define VGA_CTRL_CUR_HIGH 0x0e // cursor location, high byte +# define VGA_CTRL_CUR_LOW 0x0f // cursor location, low byte +#define VGA_CTRL_IX_DATA 0x3d5 + +// attribute bits +#define VGA_ATT_BBI 0x80 // blink, or background intensity +#define VGA_ATT_BGC 0x70 // background color +#define VGA_ATT_FICS 0x80 // foreground intensity or char font select +#define VGA_ATT_FGC 0x70 // foreground color + +// color selections +#define VGA_BG_BLACK 0x0000 // background colors +#define VGA_BG_BLUE 0x1000 +#define VGA_BG_GREEN 0x2000 +#define VGA_BG_CYAN 0x3000 +#define VGA_BG_RED 0x4000 +#define VGA_BG_MAGENTA 0x5000 +#define VGA_BG_BROWN 0x6000 +#define VGA_BG_WHITE 0x7000 + +#define VGA_FG_BLACK 0x0000 // foreground colors +#define VGA_FG_BLUE 0x0100 +#define VGA_FG_GREEN 0x0200 +#define VGA_FG_CYAN 0x0300 +#define VGA_FG_RED 0x0400 +#define VGA_FG_MAGENTA 0x0500 +#define VGA_FG_BROWN 0x0600 +#define VGA_FG_WHITE 0x0700 + +// color combinations +#define VGA_WHITE_ON_BLACK (VGA_FG_WHITE | VGA_BG_BLACK) +#define VGA_BLACK_ON_WHITE (VGA_FG_BLACK | VGA_BG_WHITE) + +/* +** Internal support routines. +*/ + +/* +** setcursor: set the cursor location (screen coordinates) +*/ +static void setcursor( void ) { + unsigned addr; + unsigned int y = curr_y; + + if( y > scroll_max_y ) { + y = scroll_max_y; + } + + addr = (unsigned)( y * SCREEN_X_SIZE + curr_x ); + + outb( VGA_CTRL_IX_ADDR, VGA_CTRL_CUR_HIGH ); + outb( VGA_CTRL_IX_DATA, ( addr >> 8 ) & BMASK8 ); + outb( VGA_CTRL_IX_ADDR, VGA_CTRL_CUR_LOW ); + outb( VGA_CTRL_IX_DATA, addr & BMASK8 ); +} + +/* +** putchar_at: physical output to the video memory +*/ +static void putchar_at( unsigned int x, unsigned int y, unsigned int c ) { + /* + ** If x or y is too big or small, don't do any output. + */ + if( x <= max_x && y <= max_y ) { + unsigned short *addr = VIDEO_ADDR( x, y ); + + /* + ** The character may have attributes associated with it; if + ** so, use those, otherwise use white on black. + */ + c &= 0xffff; // keep only the lower bytes + if( c > BMASK8 ) { + *addr = (unsigned short)c; + } else { + *addr = (unsigned short)c | VGA_WHITE_ON_BLACK; + } + } +} + +/* +** Globally-visible support routines. +*/ + +/* +** Set the scrolling region +*/ +void cio_setscroll( unsigned int s_min_x, unsigned int s_min_y, + unsigned int s_max_x, unsigned int s_max_y ) { + scroll_min_x = bound( min_x, s_min_x, max_x ); + scroll_min_y = bound( min_y, s_min_y, max_y ); + scroll_max_x = bound( scroll_min_x, s_max_x, max_x ); + scroll_max_y = bound( scroll_min_y, s_max_y, max_y ); + curr_x = scroll_min_x; + curr_y = scroll_min_y; + setcursor(); +} + +/* +** Cursor movement in the scroll region +*/ +void cio_moveto( unsigned int x, unsigned int y ) { + curr_x = bound( scroll_min_x, x + scroll_min_x, scroll_max_x ); + curr_y = bound( scroll_min_y, y + scroll_min_y, scroll_max_y ); + setcursor(); +} + +/* +** The putchar family +*/ +void cio_putchar_at( unsigned int x, unsigned int y, unsigned int c ) { + if( ( c & 0x7f ) == '\n' ) { + unsigned int limit; + + /* + ** If we're in the scroll region, don't let this loop + ** leave it. If we're not in the scroll region, don't + ** let this loop enter it. + */ + if( x > scroll_max_x ) { + limit = max_x; + } + else if( x >= scroll_min_x ) { + limit = scroll_max_x; + } + else { + limit = scroll_min_x - 1; + } + while( x <= limit ) { + putchar_at( x, y, ' ' ); + x += 1; + } + } + else { + putchar_at( x, y, c ); + } +} + +#ifndef SA_DEBUG +void cio_putchar( unsigned int c ) { + /* + ** If we're off the bottom of the screen, scroll the window. + */ + if( curr_y > scroll_max_y ) { + cio_scroll( curr_y - scroll_max_y ); + curr_y = scroll_max_y; + } + + switch( c & BMASK8 ) { + case '\n': + /* + ** Erase to the end of the line, then move to new line + ** (actual scroll is delayed until next output appears). + */ + while( curr_x <= scroll_max_x ) { + putchar_at( curr_x, curr_y, ' ' ); + curr_x += 1; + } + curr_x = scroll_min_x; + curr_y += 1; + break; + + case '\r': + curr_x = scroll_min_x; + break; + + default: + putchar_at( curr_x, curr_y, c ); + curr_x += 1; + if( curr_x > scroll_max_x ) { + curr_x = scroll_min_x; + curr_y += 1; + } + break; + } + setcursor(); +} +#endif + +/* +** The puts family +*/ +void cio_puts_at( unsigned int x, unsigned int y, const char *str ) { + unsigned int ch; + + while( (ch = *str++) != '\0' && x <= max_x ) { + cio_putchar_at( x, y, ch ); + x += 1; + } +} + +#ifndef SA_DEBUG +void cio_puts( const char *str ) { + unsigned int ch; + + while( (ch = *str++) != '\0' ) { + cio_putchar( ch ); + } +} +#endif + +/* +** Write a "sized" buffer (like cio_puts(), but no NUL) +*/ +void cio_write( const char *buf, int length ) { + for( int i = 0; i < length; ++i ) { + cio_putchar( buf[i] ); + } +} + +void cio_clearscroll( void ) { + unsigned int nchars = scroll_max_x - scroll_min_x + 1; + unsigned int l; + unsigned int c; + + for( l = scroll_min_y; l <= scroll_max_y; l += 1 ) { + unsigned short *to = VIDEO_ADDR( scroll_min_x, l ); + + for( c = 0; c < nchars; c += 1 ) { + *to++ = ' ' | 0x0700; + } + } +} + +void cio_clearscreen( void ) { + unsigned short *to = VIDEO_ADDR( min_x, min_y ); + unsigned int nchars = ( max_y - min_y + 1 ) * ( max_x - min_x + 1 ); + + while( nchars > 0 ) { + *to++ = ' ' | 0x0700; + nchars -= 1; + } +} + + +void cio_scroll( unsigned int lines ) { + unsigned short *from; + unsigned short *to; + int nchars = scroll_max_x - scroll_min_x + 1; + int line, c; + + /* + ** If # of lines is the whole scrolling region or more, just clear. + */ + if( lines > scroll_max_y - scroll_min_y ) { + cio_clearscroll(); + curr_x = scroll_min_x; + curr_y = scroll_min_y; + setcursor(); + return; + } + + /* + ** Must copy it line by line. + */ + for( line = scroll_min_y; line <= scroll_max_y - lines; line += 1 ) { + from = VIDEO_ADDR( scroll_min_x, line + lines ); + to = VIDEO_ADDR( scroll_min_x, line ); + for( c = 0; c < nchars; c += 1 ) { + *to++ = *from++; + } + } + + for( ; line <= scroll_max_y; line += 1 ) { + to = VIDEO_ADDR( scroll_min_x, line ); + for( c = 0; c < nchars; c += 1 ) { + *to++ = ' ' | 0x0700; + } + } +} + +static int mypad( int x, int y, int extra, int padchar ) { + while( extra > 0 ) { + if( x != -1 || y != -1 ) { + cio_putchar_at( x, y, padchar ); + x += 1; + } + else { + cio_putchar( padchar ); + } + extra -= 1; + } + return x; +} + +static int mypadstr( int x, int y, char *str, int len, int width, + int leftadjust, int padchar ) { + int extra; + + if( len < 0 ) { + len = strlen( str ); + } + extra = width - len; + if( extra > 0 && !leftadjust ) { + x = mypad( x, y, extra, padchar ); + } + if( x != -1 || y != -1 ) { + cio_puts_at( x, y, str ); + x += len; + } + else { + cio_puts( str ); + } + if( extra > 0 && leftadjust ) { + x = mypad( x, y, extra, padchar ); + } + return x; +} + +static void do_printf( int x, int y, char **f ) { + char *fmt = *f; + int *ap; + char buf[ 12 ]; + char ch; + char *str; + int leftadjust; + int width; + int len; + int padchar; + + /* + ** Get characters from the format string and process them + */ + + ap = (int *)( f + 1 ); + + while( (ch = *fmt++) != '\0' ) { + + /* + ** Is it the start of a format code? + */ + + if( ch == '%' ) { + + /* + ** Yes, get the padding and width options (if there). + ** Alignment must come at the beginning, then fill, + ** then width. + */ + + leftadjust = 0; + padchar = ' '; + width = 0; + + ch = *fmt++; + + if( ch == '-' ) { + leftadjust = 1; + ch = *fmt++; + } + + if( ch == '0' ) { + padchar = '0'; + ch = *fmt++; + } + + while( ch >= '0' && ch <= '9' ) { + width *= 10; + width += ch - '0'; + ch = *fmt++; + } + + /* + ** What data type do we have? + */ + switch( ch ) { + + case 'c': + // ch = *( (int *)ap )++; + ch = *ap++; + buf[ 0 ] = ch; + buf[ 1 ] = '\0'; + x = mypadstr( x, y, buf, 1, width, leftadjust, padchar ); + break; + + case 'd': + // len = cvtdec( buf, *( (int *)ap )++ ); + len = cvtdec( buf, *ap++ ); + x = mypadstr( x, y, buf, len, width, leftadjust, padchar ); + break; + + case 's': + // str = *( (char **)ap )++; + str = (char *) (*ap++); + x = mypadstr( x, y, str, -1, width, leftadjust, padchar ); + break; + + case 'x': + // len = cvthex( buf, *( (int *)ap )++ ); + len = cvthex( buf, *ap++ ); + x = mypadstr( x, y, buf, len, width, leftadjust, padchar ); + break; + + case 'o': + // len = cvtoct( buf, *( (int *)ap )++ ); + len = cvtoct( buf, *ap++ ); + x = mypadstr( x, y, buf, len, width, leftadjust, padchar ); + break; + + case 'u': + len = cvtuns( buf, *ap++ ); + x = mypadstr( x, y, buf, len, width, leftadjust, padchar ); + break; + + } + } else { + + /* + ** No - just print it normally. + */ + + if( x != -1 || y != -1 ) { + cio_putchar_at( x, y, ch ); + switch( ch ) { + case '\n': + y += 1; + /* FALL THRU */ + + case '\r': + x = scroll_min_x; + break; + + default: + x += 1; + } + } + else { + cio_putchar( ch ); + } + } + } +} + +void cio_printf_at( unsigned int x, unsigned int y, char *fmt, ... ) { + do_printf( x, y, &fmt ); +} + +void cio_printf( char *fmt, ... ) { + do_printf( -1, -1, &fmt ); +} + +/* +** These are the "standard" IBM AT "Set 1" keycodes. +*/ + +static unsigned char scan_code[ 2 ][ 128 ] = { + { // unshifted characters +/* 00-07 */ '\377', '\033', '1', '2', '3', '4', '5', '6', +/* 08-0f */ '7', '8', '9', '0', '-', '=', '\b', '\t', +/* 10-17 */ 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +/* 18-1f */ 'o', 'p', '[', ']', '\n', '\377', 'a', 's', +/* 20-27 */ 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +/* 28-2f */ '\'', '`', '\377', '\\', 'z', 'x', 'c', 'v', +/* 30-37 */ 'b', 'n', 'm', ',', '.', '/', '\377', '*', +/* 38-3f */ '\377', ' ', '\377', '\377', '\377', '\377', '\377', '\377', +/* 40-47 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '7', +/* 48-4f */ '8', '9', '-', '4', '5', '6', '+', '1', +/* 50-57 */ '2', '3', '0', '.', '\377', '\377', '\377', '\377', +/* 58-5f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 60-67 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 68-6f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 70-77 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 78-7f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377' + }, + + { // shifted characters +/* 00-07 */ '\377', '\033', '!', '@', '#', '$', '%', '^', +/* 08-0f */ '&', '*', '(', ')', '_', '+', '\b', '\t', +/* 10-17 */ 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +/* 18-1f */ 'O', 'P', '{', '}', '\n', '\377', 'A', 'S', +/* 20-27 */ 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +/* 28-2f */ '"', '~', '\377', '|', 'Z', 'X', 'C', 'V', +/* 30-37 */ 'B', 'N', 'M', '<', '>', '?', '\377', '*', +/* 38-3f */ '\377', ' ', '\377', '\377', '\377', '\377', '\377', '\377', +/* 40-47 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '7', +/* 48-4f */ '8', '9', '-', '4', '5', '6', '+', '1', +/* 50-57 */ '2', '3', '0', '.', '\377', '\377', '\377', '\377', +/* 58-5f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 60-67 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 68-6f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 70-77 */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377', +/* 78-7f */ '\377', '\377', '\377', '\377', '\377', '\377', '\377', '\377' + } +}; + +/* +** Scan code masks +*/ + +// 'release' bit +#define REL_BIT 0x80 +#define CODE_BITS 0x7f + +#define IS_PRESS(c) (((c) & REL_BIT) == 0) +#define IS_RELEASE(c) (((c) & REL_BIT) != 0) + +/* +** Scan codes for some special characters +*/ + +// escape code - followed by another code byte +#define SCAN_ESC 0xe0 + +// shift keys: press, release +#define L_SHIFT_DN 0x2a +#define R_SHIFT_DN 0x36 +#define L_SHIFT_UP 0xaa +#define R_SHIFT_UP 0xb6 + +// control keys +#define L_CTRL_DN 0x1d +#define L_CTRL_UP 0x9d + +/* +** I/O communication constants +*/ +#define KBD_DATA 0x60 +#define KBD_STATUS 0x64 +#define READY 0x1 + +/* +** Circular buffer for input characters. Characters are inserted at +** next_space, and are removed at next_char. Buffer is empty if +** these are equal. +*/ +#define C_BUFSIZE 200 + +static char input_buffer[ C_BUFSIZE ]; +static volatile char *next_char = input_buffer; +static volatile char *next_space = input_buffer; + +static volatile char *increment( volatile char *pointer ) { + if( ++pointer >= input_buffer + C_BUFSIZE ) { + pointer = input_buffer; + } + return pointer; +} + +static int input_scan_code( int code ) { + static int shift = 0; + static int ctrl_mask = BMASK8; + int rval = -1; + + /* + ** Do the shift processing + */ + code &= BMASK8; + switch( code ) { + case L_SHIFT_DN: + case R_SHIFT_DN: + shift = 1; + break; + + case L_SHIFT_UP: + case R_SHIFT_UP: + shift = 0; + break; + + case L_CTRL_DN: + ctrl_mask = BMASK5; + break; + + case L_CTRL_UP: + ctrl_mask = BMASK8; + break; + + default: + /* + ** Process ordinary characters only on the press (to handle + ** autorepeat). Ignore undefined scan codes. + */ + if( IS_PRESS(code) ) { + code = scan_code[ shift ][ (int)code ]; + if( code != '\377' ) { + volatile char *next = increment( next_space ); + + /* + ** Store character only if there's room + */ + rval = code & ctrl_mask; + if( next != next_char ) { + *next_space = code & ctrl_mask; + next_space = next; + } + } + } + } + return( rval ); +} + +static void keyboard_isr( int vector, int code ) { + + int data = inb( KBD_DATA ); + int val = input_scan_code( data ); + + // if there is a notification function, call it + if( val != -1 && notify ) + notify( val ); + + outb( PIC1_CMD, PIC_EOI ); +} + +int cio_getchar( void ) { + char c; + int interrupts_enabled = r_eflags() & EFL_IF; + + while( next_char == next_space ) { + if( !interrupts_enabled ) { + /* + ** Must read the next keystroke ourselves. + */ + while( ( inb( KBD_STATUS ) & READY ) == 0 ) { + ; + } + (void) input_scan_code( inb( KBD_DATA ) ); + } + } + + c = *next_char & BMASK8; + next_char = increment( next_char ); + if( c != EOT ) { + cio_putchar( c ); + } + return c; +} + +int cio_gets( char *buffer, unsigned int size ) { + char ch; + int count = 0; + + while( size > 1 ) { + ch = cio_getchar(); + if( ch == EOT ) { + break; + } + *buffer++ = ch; + count += 1; + size -= 1; + if( ch == '\n' ) { + break; + } + } + *buffer = '\0'; + return count; +} + +int cio_input_queue( void ) { + int n_chars = next_space - next_char; + + if( n_chars < 0 ) { + n_chars += C_BUFSIZE; + } + return n_chars; +} + +/* +** Initialization routines +*/ +void cio_init( void (*fcn)(int) ) { + /* + ** Screen dimensions + */ + min_x = SCREEN_MIN_X; + min_y = SCREEN_MIN_Y; + max_x = SCREEN_MAX_X; + max_y = SCREEN_MAX_Y; + + /* + ** Scrolling region + */ + scroll_min_x = SCREEN_MIN_X; + scroll_min_y = SCREEN_MIN_Y; + scroll_max_x = SCREEN_MAX_X; + scroll_max_y = SCREEN_MAX_Y; + + /* + ** Initial cursor location + */ + curr_y = min_y; + curr_x = min_x; + setcursor(); + + /* + ** Notification function (or NULL) + */ + notify = fcn; + + /* + ** Set up the interrupt handler for the keyboard + */ + install_isr( VEC_KBD, keyboard_isr ); +} + +#ifdef SA_DEBUG +int main() { + cio_printf( "%d\n", 123 ); + cio_printf( "%d\n", -123 ); + cio_printf( "%d\n", 0x7fffffff ); + cio_printf( "%d\n", 0x80000001 ); + cio_printf( "%d\n", 0x80000000 ); + cio_printf( "x%14dy\n", 0x80000000 ); + cio_printf( "x%-14dy\n", 0x80000000 ); + cio_printf( "x%014dy\n", 0x80000000 ); + cio_printf( "x%-014dy\n", 0x80000000 ); + cio_printf( "%s\n", "xyz" ); + cio_printf( "|%10s|\n", "xyz" ); + cio_printf( "|%-10s|\n", "xyz" ); + cio_printf( "%c\n", 'x' ); + cio_printf( "|%4c|\n", 'y' ); + cio_printf( "|%-4c|\n", 'y' ); + cio_printf( "|%04c|\n", 'y' ); + cio_printf( "|%-04c|\n", 'y' ); + cio_printf( "|%3d|\n", 5 ); + cio_printf( "|%3d|\n", 54321 ); + cio_printf( "%x\n", 0x123abc ); + cio_printf( "|%04x|\n", 20 ); + cio_printf( "|%012x|\n", 0xfedcba98 ); + cio_printf( "|%-012x|\n", 0x76543210 ); +} + +int curr_x, curr_y, max_x, max_y; +#endif diff --git a/kernel/clock.c b/kernel/clock.c new file mode 100644 index 0000000..96f71c4 --- /dev/null +++ b/kernel/clock.c @@ -0,0 +1,163 @@ +/** +** @file clock.c +** +** @author CSCI-452 class of 20245 +** +** @brief Clock module implementation +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <clock.h> +#include <procs.h> + +#include <x86/arch.h> +#include <x86/pic.h> +#include <x86/pit.h> + +/* +** PRIVATE DEFINITIONS +*/ + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +// pinwheel control variables +static uint32_t pinwheel; // pinwheel counter +static uint32_t pindex; // index into pinwheel string + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +// current system time +uint32_t system_time; + +/* +** PRIVATE FUNCTIONS +*/ + +/** +** Name: clk_isr +** +** The ISR for the clock +** +** @param vector Vector number for the clock interrupt +** @param code Error code (0 for this interrupt) +*/ +static void clk_isr( int vector, int code ) { + + // spin the pinwheel + + ++pinwheel; + if( pinwheel == (CLOCK_FREQ / 10) ) { + pinwheel = 0; + ++pindex; + cio_putchar_at( 0, 0, "|/-\\"[ pindex & 3 ] ); + } + +#if defined(SYSTEM_STATUS) + // Periodically, dump the queue lengths and the SIO status (along + // with the SIO buffers, if non-empty). + // + // Define the symbol SYSTEM_STATUS with a value equal to the desired + // reporting frequency, in seconds. + + if( (system_time % SEC_TO_TICKS(SYSTEM_STATUS)) == 0 ) { + cio_printf_at( 1, 0, " queues: R[%u] W[%u] S[%u] Z[%u] I[%u] ", + pcb_queue_length(ready), + pcb_queue_length(waiting), + pcb_queue_length(sleeping), + pcb_queue_length(zombie), + pcb_queue_length(sioread) + ); + } +#endif + + // time marches on! + ++system_time; + + // wake up any sleeping processes whose time has come + // + // we give them preference over the current process when + // it is scheduled again + + do { + // if there isn't anyone in the sleep queue, we're done + if( pcb_queue_empty(sleeping) ) { + break; + } + + // peek at the first member of the queue + pcb_t *tmp = pcb_queue_peek( sleeping ); + assert( tmp != NULL ); + + // the sleep queue is sorted in ascending order by wakeup + // time, so we know that the retrieved PCB's wakeup time is + // the earliest of any process on the sleep queue; if that + // time hasn't arrived yet, there's nobody left to awaken + + if( tmp->wakeup > system_time ) { + break; + } + + // OK, we need to wake this process up + assert( pcb_queue_remove(sleeping,&tmp) == SUCCESS ); + schedule( tmp ); + } while( 1 ); + + // next, we decrement the current process' remaining time + current->ticks -= 1; + + // has it expired? + if( current->ticks < 1 ) { + // yes! reschedule it + schedule( current ); + current = NULL; + // and pick a new process + dispatch(); + } + + // tell the PIC we're done + outb( PIC1_CMD, PIC_EOI ); +} + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** Name: clk_init +** +** Initializes the clock module +** +*/ +void clk_init( void ) { + +#if TRACING_INIT + cio_puts( " Clock" ); +#endif + + // start the pinwheel + pinwheel = (CLOCK_FREQ / 10) - 1; + pindex = 0; + + // return to the dawn of time + system_time = 0; + + // configure the clock + uint32_t divisor = PIT_FREQ / CLOCK_FREQ; + outb( PIT_CONTROL_PORT, PIT_0_LOAD | PIT_0_SQUARE ); + outb( PIT_0_PORT, divisor & 0xff ); // LSB of divisor + outb( PIT_0_PORT, (divisor >> 8) & 0xff ); // MSB of divisor + + // register the second-stage ISR + install_isr( VEC_TIMER, clk_isr ); +} diff --git a/kernel/isrs.S b/kernel/isrs.S new file mode 100644 index 0000000..421e6d2 --- /dev/null +++ b/kernel/isrs.S @@ -0,0 +1,374 @@ +/* +** @file isrs.S +** +** @author K. Reek +** @authors Jon Coles, Warren R. Carithers, Margaret Reek +** @author numerous Systems Programming classes +** +** Stubs for ISRs. +** +** This module provides the stubs needed for interrupts to save +** the machine state before calling the ISR. All interrupts have +** their own stub which pushes the interrupt number on the stack. +** This makes it possible for a common ISR to determine which +** interrupted occurred. +*/ + +#define ASM_SRC + + .arch i386 + +#include <bootstrap.h> + +/* +** Configuration options - define in Makefile +** +** TRACE_CX include context restore debugging code +*/ + + .text + +/* +** Macros for the isr stubs. Some interrupts push an error code on +** the stack and others don't; for those that don't we simply push +** a zero so that cleaning up from either type is identical. +** +** Note: these are not marked as global symbols, as they are never +** accessed directly outside of this file. This could be changed +** if need be by adding this line to each macro definition right +** after the #define line: +** +** .global isr_##vector +*/ + +#define ISR(vector) \ +isr_##vector: ; \ + pushl $0 ; \ + pushl $vector ; \ + jmp isr_save + +#define ERR_ISR(vector) \ +isr_##vector: ; \ + pushl $vector ; \ + jmp isr_save + + .globl isr_table + .globl isr_restore + +/* +** This routine saves the machine state, calls the ISR, and then +** restores the machine state and returns from the interrupt. +** +******************************************************************** +******************************************************************** +** NOTE: this code is highly application-specific, and will most ** +** probably require modification to tailor it. ** +** ** +** Examples of mods: switch to/from user stack, context switch ** +** changes, etc. ** +******************************************************************** +******************************************************************** +*/ + +isr_save: + +/* +** Begin by saving the CPU state (except for the FP context information). +** +** At this point, the stack looks like this: +** +** esp -> vector # saved by the entry macro +** error code, or 0 saved by the hardware, or the entry macro +** saved EIP saved by the hardware +** saved CS saved by the hardware +** saved EFLAGS saved by the hardware +*/ + pusha // save E*X, ESP, EBP, ESI, EDI + pushl %ds // save segment registers + pushl %es + pushl %fs + pushl %gs + pushl %ss + +/* +** Stack contents (all 32-bit longwords) and offsets from ESP: +** +** SS GS FS ES DS EDI ESI EBP ESP EBX EDX ECX EAX vec cod EIP CS EFL +** 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 +** +** Note that the saved ESP is the contents before the PUSHA. +** +** Set up parameters for the ISR call. +*/ + movl 52(%esp),%eax // get vector number and error code + movl 56(%esp),%ebx + +/* +*********************** +** MOD FOR 20245 ** +*********************** +*/ + +/* +** We need to switch to the system stack. This requires that we save +** the user context pointer into the current PCB, then load ESP with +** the initial system stack pointer. +*/ + + .globl current + .globl kernel_esp + + // save the context pointer + movl current, %edx + movl %esp, (%edx) + + // also save the page directory pointer + movl %cr3, %ecx + movl %ecx, 4(%edx) + + // switch to the system stack + // + // NOTE: this is inherently non-reentrant! If/when the OS + // is converted from monolithic to something that supports + // reentrant or interruptable ISRs, this code will need to + // be changed to support that! + + movl kernel_esp, %esp + + // we don't change CR3 because all the user PDIRs are + // set up with mappings for the OS in the upper half + +/* +*********************** +** END MOD FOR 20245 ** +*********************** +*/ + + pushl %ebx // put them on the top of the stack ... + pushl %eax // ... as parameters for the ISR + +/* +** Call the ISR +*/ + movl isr_table(,%eax,4),%ebx + call *%ebx + addl $8,%esp // pop the two parameters + +/* +** Context restore begins here +*/ + +isr_restore: + +/* +*********************** +** MOD FOR 20245 ** +*********************** +*/ + movl current, %ebx // return to the user stack + movl (%ebx), %esp // ESP --> context save area + movl 4(%ebx), %ecx // page directory pointer + movl %ecx, %cr3 + + // now we're operating with the user process' + // page directory and stack + +/* +*********************** +** END MOD FOR 20245 ** +*********************** +*/ + +#ifdef TRACE_CX +/* +** DEBUGGING CODE PART 1 +** +** This code will execute during each context restore, and +** should be modified to print out whatever debugging information +** is desired. +** +** By default, it prints out the CPU context being restored; it +** relies on the standard save sequence (see above). +*/ + .globl cio_printf_at + + pushl $fmt + pushl $1 + pushl $0 + call cio_printf_at + addl $12,%esp +/* +** END OF DEBUGGING CODE PART 1 +*/ +#endif + +/* +** Restore the context. +*/ + popl %ss // restore the segment registers + popl %gs + popl %fs + popl %es + popl %ds + popa // restore others + addl $8, %esp // discard the error code and vector + iret // and return + +#ifdef TRACE_CX +/* +** DEBUGGING CODE PART 2 +** +** This format string is arranged according to the ordering of values +** in the context save area on the stack. +*/ +fmt: .ascii " ss=%08x gs=%08x fs=%08x es=%08x ds=%08x\n" + .ascii "edi=%08x esi=%08x ebp=%08x esp=%08x ebx=%08x\n" + .ascii "edx=%08x ecx=%08x eax=%08x vec=%08x cod=%08x\n" + .string "eip=%08x cs=%08x efl=%08x\n" + +/* +** END OF DEBUGGING CODE PART 2 +*/ +#endif + +/* +** Here we generate the individual stubs for each interrupt. +*/ +ISR(0x00); ISR(0x01); ISR(0x02); ISR(0x03); +ISR(0x04); ISR(0x05); ISR(0x06); ISR(0x07); +ERR_ISR(0x08); ISR(0x09); ERR_ISR(0x0a); ERR_ISR(0x0b); +ERR_ISR(0x0c); ERR_ISR(0x0d); ERR_ISR(0x0e); ISR(0x0f); +ISR(0x10); ERR_ISR(0x11); ISR(0x12); ISR(0x13); +ISR(0x14); ERR_ISR(0x15); ISR(0x16); ISR(0x17); +ISR(0x18); ISR(0x19); ISR(0x1a); ISR(0x1b); +ISR(0x1c); ISR(0x1d); ISR(0x1e); ISR(0x1f); +ISR(0x20); ISR(0x21); ISR(0x22); ISR(0x23); +ISR(0x24); ISR(0x25); ISR(0x26); ISR(0x27); +ISR(0x28); ISR(0x29); ISR(0x2a); ISR(0x2b); +ISR(0x2c); ISR(0x2d); ISR(0x2e); ISR(0x2f); +ISR(0x30); ISR(0x31); ISR(0x32); ISR(0x33); +ISR(0x34); ISR(0x35); ISR(0x36); ISR(0x37); +ISR(0x38); ISR(0x39); ISR(0x3a); ISR(0x3b); +ISR(0x3c); ISR(0x3d); ISR(0x3e); ISR(0x3f); +ISR(0x40); ISR(0x41); ISR(0x42); ISR(0x43); +ISR(0x44); ISR(0x45); ISR(0x46); ISR(0x47); +ISR(0x48); ISR(0x49); ISR(0x4a); ISR(0x4b); +ISR(0x4c); ISR(0x4d); ISR(0x4e); ISR(0x4f); +ISR(0x50); ISR(0x51); ISR(0x52); ISR(0x53); +ISR(0x54); ISR(0x55); ISR(0x56); ISR(0x57); +ISR(0x58); ISR(0x59); ISR(0x5a); ISR(0x5b); +ISR(0x5c); ISR(0x5d); ISR(0x5e); ISR(0x5f); +ISR(0x60); ISR(0x61); ISR(0x62); ISR(0x63); +ISR(0x64); ISR(0x65); ISR(0x66); ISR(0x67); +ISR(0x68); ISR(0x69); ISR(0x6a); ISR(0x6b); +ISR(0x6c); ISR(0x6d); ISR(0x6e); ISR(0x6f); +ISR(0x70); ISR(0x71); ISR(0x72); ISR(0x73); +ISR(0x74); ISR(0x75); ISR(0x76); ISR(0x77); +ISR(0x78); ISR(0x79); ISR(0x7a); ISR(0x7b); +ISR(0x7c); ISR(0x7d); ISR(0x7e); ISR(0x7f); +ISR(0x80); ISR(0x81); ISR(0x82); ISR(0x83); +ISR(0x84); ISR(0x85); ISR(0x86); ISR(0x87); +ISR(0x88); ISR(0x89); ISR(0x8a); ISR(0x8b); +ISR(0x8c); ISR(0x8d); ISR(0x8e); ISR(0x8f); +ISR(0x90); ISR(0x91); ISR(0x92); ISR(0x93); +ISR(0x94); ISR(0x95); ISR(0x96); ISR(0x97); +ISR(0x98); ISR(0x99); ISR(0x9a); ISR(0x9b); +ISR(0x9c); ISR(0x9d); ISR(0x9e); ISR(0x9f); +ISR(0xa0); ISR(0xa1); ISR(0xa2); ISR(0xa3); +ISR(0xa4); ISR(0xa5); ISR(0xa6); ISR(0xa7); +ISR(0xa8); ISR(0xa9); ISR(0xaa); ISR(0xab); +ISR(0xac); ISR(0xad); ISR(0xae); ISR(0xaf); +ISR(0xb0); ISR(0xb1); ISR(0xb2); ISR(0xb3); +ISR(0xb4); ISR(0xb5); ISR(0xb6); ISR(0xb7); +ISR(0xb8); ISR(0xb9); ISR(0xba); ISR(0xbb); +ISR(0xbc); ISR(0xbd); ISR(0xbe); ISR(0xbf); +ISR(0xc0); ISR(0xc1); ISR(0xc2); ISR(0xc3); +ISR(0xc4); ISR(0xc5); ISR(0xc6); ISR(0xc7); +ISR(0xc8); ISR(0xc9); ISR(0xca); ISR(0xcb); +ISR(0xcc); ISR(0xcd); ISR(0xce); ISR(0xcf); +ISR(0xd0); ISR(0xd1); ISR(0xd2); ISR(0xd3); +ISR(0xd4); ISR(0xd5); ISR(0xd6); ISR(0xd7); +ISR(0xd8); ISR(0xd9); ISR(0xda); ISR(0xdb); +ISR(0xdc); ISR(0xdd); ISR(0xde); ISR(0xdf); +ISR(0xe0); ISR(0xe1); ISR(0xe2); ISR(0xe3); +ISR(0xe4); ISR(0xe5); ISR(0xe6); ISR(0xe7); +ISR(0xe8); ISR(0xe9); ISR(0xea); ISR(0xeb); +ISR(0xec); ISR(0xed); ISR(0xee); ISR(0xef); +ISR(0xf0); ISR(0xf1); ISR(0xf2); ISR(0xf3); +ISR(0xf4); ISR(0xf5); ISR(0xf6); ISR(0xf7); +ISR(0xf8); ISR(0xf9); ISR(0xfa); ISR(0xfb); +ISR(0xfc); ISR(0xfd); ISR(0xfe); ISR(0xff); + + .data + +/* +** This table contains the addresses where each of the preceding +** stubs begins. This information is needed to initialize the +** Interrupt Descriptor Table in support.c +*/ + .globl isr_stub_table +isr_stub_table: + .long isr_0x00, isr_0x01, isr_0x02, isr_0x03 + .long isr_0x04, isr_0x05, isr_0x06, isr_0x07 + .long isr_0x08, isr_0x09, isr_0x0a, isr_0x0b + .long isr_0x0c, isr_0x0d, isr_0x0e, isr_0x0f + .long isr_0x10, isr_0x11, isr_0x12, isr_0x13 + .long isr_0x14, isr_0x15, isr_0x16, isr_0x17 + .long isr_0x18, isr_0x19, isr_0x1a, isr_0x1b + .long isr_0x1c, isr_0x1d, isr_0x1e, isr_0x1f + .long isr_0x20, isr_0x21, isr_0x22, isr_0x23 + .long isr_0x24, isr_0x25, isr_0x26, isr_0x27 + .long isr_0x28, isr_0x29, isr_0x2a, isr_0x2b + .long isr_0x2c, isr_0x2d, isr_0x2e, isr_0x2f + .long isr_0x30, isr_0x31, isr_0x32, isr_0x33 + .long isr_0x34, isr_0x35, isr_0x36, isr_0x37 + .long isr_0x38, isr_0x39, isr_0x3a, isr_0x3b + .long isr_0x3c, isr_0x3d, isr_0x3e, isr_0x3f + .long isr_0x40, isr_0x41, isr_0x42, isr_0x43 + .long isr_0x44, isr_0x45, isr_0x46, isr_0x47 + .long isr_0x48, isr_0x49, isr_0x4a, isr_0x4b + .long isr_0x4c, isr_0x4d, isr_0x4e, isr_0x4f + .long isr_0x50, isr_0x51, isr_0x52, isr_0x53 + .long isr_0x54, isr_0x55, isr_0x56, isr_0x57 + .long isr_0x58, isr_0x59, isr_0x5a, isr_0x5b + .long isr_0x5c, isr_0x5d, isr_0x5e, isr_0x5f + .long isr_0x60, isr_0x61, isr_0x62, isr_0x63 + .long isr_0x64, isr_0x65, isr_0x66, isr_0x67 + .long isr_0x68, isr_0x69, isr_0x6a, isr_0x6b + .long isr_0x6c, isr_0x6d, isr_0x6e, isr_0x6f + .long isr_0x70, isr_0x71, isr_0x72, isr_0x73 + .long isr_0x74, isr_0x75, isr_0x76, isr_0x77 + .long isr_0x78, isr_0x79, isr_0x7a, isr_0x7b + .long isr_0x7c, isr_0x7d, isr_0x7e, isr_0x7f + .long isr_0x80, isr_0x81, isr_0x82, isr_0x83 + .long isr_0x84, isr_0x85, isr_0x86, isr_0x87 + .long isr_0x88, isr_0x89, isr_0x8a, isr_0x8b + .long isr_0x8c, isr_0x8d, isr_0x8e, isr_0x8f + .long isr_0x90, isr_0x91, isr_0x92, isr_0x93 + .long isr_0x94, isr_0x95, isr_0x96, isr_0x97 + .long isr_0x98, isr_0x99, isr_0x9a, isr_0x9b + .long isr_0x9c, isr_0x9d, isr_0x9e, isr_0x9f + .long isr_0xa0, isr_0xa1, isr_0xa2, isr_0xa3 + .long isr_0xa4, isr_0xa5, isr_0xa6, isr_0xa7 + .long isr_0xa8, isr_0xa9, isr_0xaa, isr_0xab + .long isr_0xac, isr_0xad, isr_0xae, isr_0xaf + .long isr_0xb0, isr_0xb1, isr_0xb2, isr_0xb3 + .long isr_0xb4, isr_0xb5, isr_0xb6, isr_0xb7 + .long isr_0xb8, isr_0xb9, isr_0xba, isr_0xbb + .long isr_0xbc, isr_0xbd, isr_0xbe, isr_0xbf + .long isr_0xc0, isr_0xc1, isr_0xc2, isr_0xc3 + .long isr_0xc4, isr_0xc5, isr_0xc6, isr_0xc7 + .long isr_0xc8, isr_0xc9, isr_0xca, isr_0xcb + .long isr_0xcc, isr_0xcd, isr_0xce, isr_0xcf + .long isr_0xd0, isr_0xd1, isr_0xd2, isr_0xd3 + .long isr_0xd4, isr_0xd5, isr_0xd6, isr_0xd7 + .long isr_0xd8, isr_0xd9, isr_0xda, isr_0xdb + .long isr_0xdc, isr_0xdd, isr_0xde, isr_0xdf + .long isr_0xe0, isr_0xe1, isr_0xe2, isr_0xe3 + .long isr_0xe4, isr_0xe5, isr_0xe6, isr_0xe7 + .long isr_0xe8, isr_0xe9, isr_0xea, isr_0xeb + .long isr_0xec, isr_0xed, isr_0xee, isr_0xef + .long isr_0xf0, isr_0xf1, isr_0xf2, isr_0xf3 + .long isr_0xf4, isr_0xf5, isr_0xf6, isr_0xf7 + .long isr_0xf8, isr_0xf9, isr_0xfa, isr_0xfb + .long isr_0xfc, isr_0xfd, isr_0xfe, isr_0xff diff --git a/kernel/kernel.c b/kernel/kernel.c new file mode 100644 index 0000000..53e50a7 --- /dev/null +++ b/kernel/kernel.c @@ -0,0 +1,381 @@ +/** +** @file kernel.c +** +** @author CSCI-452 class of 20245 +** +** @brief Kernel support routines +*/ + +#define KERNEL_SRC + +#include <common.h> +#include <cio.h> +#include <clock.h> +#include <kmem.h> +#include <procs.h> +#include <sio.h> +#include <syscalls.h> +#include <user.h> +#include <userids.h> +#include <vm.h> + +/* +** PRIVATE DEFINITIONS +*/ + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +// character buffers, usable throughout the OS +// nto guaranteed to retain their contents across an exception return +char b256[256]; // primarily used for message creation +char b512[512]; // used by PANIC macro + +/* +** PRIVATE FUNCTIONS +*/ + +/* +** PRIVATE FUNCTIONS +*/ + +/** +** report - report the system configuration +** +** Prints configuration information about the OS on the console monitor. +** +** @param dtrace Decode the TRACE options +*/ +static void kreport( bool_t dtrace ) { + + cio_puts( "\n-------------------------------\n" ); + cio_printf( "Config: N_PROCS = %d", N_PROCS ); + cio_printf( " N_PRIOS = %d", N_PRIOS ); + cio_printf( " N_STATES = %d", N_STATES ); + cio_printf( " CLOCK = %dHz\n", CLOCK_FREQ ); + + // This code is ugly, but it's the simplest way to + // print out the values of compile-time options + // without spending a lot of execution time at it. + + cio_puts( "Options: " +#ifdef RPT_INT_UNEXP + " R-uint" +#endif +#ifdef RPT_INT_MYSTERY + " R-mint" +#endif +#ifdef TRACE_CX + " CX" +#endif +#ifdef CONSOLE_STATS + " Cstats" +#endif + ); // end of cio_puts() call + +#ifdef SANITY + cio_printf( " SANITY = %d", SANITY ); +#endif +#ifdef STATUS + cio_printf( " STATUS = %d", STATUS ); +#endif + +#if TRACE > 0 + cio_printf( " TRACE = 0x%04x\n", TRACE ); + + // decode the trace settings if that was requested + if( TRACING_SOMETHING && dtrace ) { + + // this one is simpler - we rely on string literal + // concatenation in the C compiler to create one + // long string to print out + + cio_puts( "Tracing:" +#if TRACING_PCB + " PCB" +#endif +#if TRACING_STACK + " STK" +#endif +#if TRACING_QUEUE + " QUE" +#endif +#if TRACING_SCHED + " SCHED" +#endif +#if TRACING_SYSCALLS + " SCALL" +#endif +#if TRACING_SYSRETS + " SRET" +#endif +#if TRACING_EXIT + " EXIT" +#endif +#if TRACING_DISPATCH + " DISPATCH" +#endif +#if TRACING_INIT + " INIT" +#endif +#if TRACING_KMEM + " KM" +#endif +#if TRACING_KMEM_FREELIST + " KMFL" +#endif +#if TRACING_KMEM_INIT + " KMIN" +#endif +#if TRACING_SPAWN + " SPAWN" +#endif +#if TRACING_SIO_STAT + " S_STAT" +#endif +#if TRACING_SIO_ISR + " S_ISR" +#endif +#if TRACING_SIO_RD + " S_RD" +#endif +#if TRACING_SIO_WR + " S_WR" +#endif +#if TRACING_USER + " USER" +#endif +#if TRACING_ELF + " ELF" +#endif + ); // end of cio_puts() call + } +#endif /* TRACE > 0 */ + + cio_puts( "\n-------------------------------\n" ); +} + + +#if defined(CONSOLE_STATS) +/** +** stats - callback routine for console statistics +** +** Called by the CIO module when a key is pressed on the +** console keyboard. Depending on the key, it will print +** statistics on the console display, or will cause the +** user shell process to be dispatched. +** +** This code runs as part of the CIO ISR. +*/ +static void stats( int code ) { + + switch( code ) { + + case 'a': // dump the active table + ptable_dump( "\nActive processes", false ); + break; + + case 'c': // dump context info for all active PCBs + ctx_dump_all( "\nContext dump" ); + break; + + case 'p': // dump the active table and all PCBs + ptable_dump( "\nActive processes", true ); + break; + + case 'q': // dump the queues + // code to dump out any/all queues + pcb_queue_dump( "R", ready ); + pcb_queue_dump( "W", waiting ); + pcb_queue_dump( "S", sleeping ); + pcb_queue_dump( "Z", zombie ); + pcb_queue_dump( "I", sioread ); + break; + + case 'r': // print system configuration information + report( true ); + break; + + // ignore CR and LF + case '\r': // FALL THROUGH + case '\n': + break; + + default: + cio_printf( "console: unknown request '0x%02x'\n", code ); + // FALL THROUGH + + case 'h': // help message + cio_puts( "\nCommands:\n" + " a -- dump the active table\n" + " c -- dump contexts for active processes\n" + " h -- this message\n" + " p -- dump the active table and all PCBs\n" + " q -- dump the queues\n" + " r -- print system configuration\n" + ); + break; + } +} +#endif + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** main - system initialization routine +** +** Called by the startup code immediately before returning into the +** first user process. +** +** Making this type 'int' keeps the compiler happy. +*/ +int main( void ) { + + /* + ** BOILERPLATE CODE - taken from basic framework + ** + ** Initialize interrupt stuff. + */ + + init_interrupts(); // IDT and PIC initialization + + /* + ** Console I/O system. + ** + ** Does not depend on the other kernel modules, so we can + ** initialize it before we initialize the kernel memory + ** and queue modules. + */ + +#if defined(CONSOLE_STATS) + cio_init( stats ); +#else + cio_init( NULL ); // no console callback routine +#endif + + cio_clearscreen(); // wipe out whatever is there + + /* + ** TERM-SPECIFIC CODE STARTS HERE + */ + + /* + ** Initialize various OS modules + ** + ** Other modules (clock, SIO, syscall, etc.) are expected to + ** install their own ISRs in their initialization routines. + */ + + cio_puts( "System initialization starting.\n" ); + cio_puts( "-------------------------------\n" ); + + cio_puts( "Modules:" ); + + // call the module initialization functions, being + // careful to follow any module precedence requirements + + km_init(); // MUST BE FIRST +#if TRACING_KMEM || TRACING_KMEM_FREE + delay( DELAY_2_SEC ); // approximately +#endif + + // other module initialization calls here + clk_init(); // clock + pcb_init(); // process (PCBs, queues, scheduler) +#if TRACING_PCB + delay( DELAY_2_SEC ); +#endif + sio_init(); // serial i/o + sys_init(); // system call +#if TRACING_SYSCALLS || TRACING_SYSRETS + delay( DELAY_2_SEC ); +#endif + vm_init(); // virtual memory + user_init(); // user code handling + + cio_puts( "\nModule initialization complete.\n" ); + cio_puts( "-------------------------------\n" ); + + // report our configuration options + kreport( true ); + + delay( DELAY_3_SEC ); + + /* + ** Other tasks typically performed here: + ** + ** Enabling any I/O devices (e.g., SIO xmit/rcv) + */ + + /* + ** Create the initial user process + ** + ** This code is largely stolen from the fork() and exec() + ** implementations in syscalls.c; if those change, this must + ** also change. + */ + + // if we can't get a PCB, there's no use continuing! + assert( pcb_alloc(&init_pcb) == SUCCESS ); + + // fill in the necessary details + init_pcb->pid = PID_INIT; + init_pcb->state = STATE_NEW; + init_pcb->priority = PRIO_HIGH; + + // find the 'init' program + prog_t *prog = user_locate( Init ); + assert( prog != NULL ); + + // command-line arguments for 'init' + const char *args[2] = { "init", NULL }; + + // load it + assert( user_load(prog,init_pcb,args) == SUCCESS ); + + // send it on its merry way + schedule( init_pcb ); + +#ifdef TRACE_CX + // if we're using a scrolling region, wait a bit more and then set it up + delay( DELAY_7_SEC ); + + // define a scrolling region in the top 7 lines of the screen + cio_setscroll( 0, 7, 99, 99 ); + + // clear it + cio_clearscroll(); + + // clear the top line + cio_puts_at( 0, 0, "* " ); + // separator + cio_puts_at( 0, 6, "================================================================================" ); +#endif + + // switch to the "real" kernel page directory + vm_set_kvm(); + + /* + ** END OF TERM-SPECIFIC CODE + ** + ** Finally, report that we're all done. + */ + + cio_puts( "System initialization complete.\n" ); + cio_puts( "-------------------------------\n" ); + + sio_enable( SIO_RX ); + + return 0; +} diff --git a/kernel/kernel.ld b/kernel/kernel.ld new file mode 100644 index 0000000..2007432 --- /dev/null +++ b/kernel/kernel.ld @@ -0,0 +1,57 @@ +/* +** Simple linker script for the 20245 kernel. +*/ + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + /* Link the kernel at this address. */ + /* Must match what is defined in vm.h! */ + . = 0x80010000; + + .text : AT(0x10000) { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + + /* standard symbols */ + PROVIDE(etext = .); + PROVIDE(_etext = .); + + /* put read-only data next */ + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + } + + /* Align the data segment at the next page boundary */ + /* . = ALIGN(0x1000); */ + + PROVIDE(data = .); + PROVIDE(_data = .); + + /* The data segment */ + .data : { + *(.data) + } + + PROVIDE(edata = .); + PROVIDE(_edata = .); + + /* page-align the BSS */ + . = ALIGN(0x1000); + + PROVIDE(__bss_start = .); + + .bss : { + *(.bss) + } + + PROVIDE(end = .); + PROVIDE(_end = .); + + /DISCARD/ : { + *(.stab .stab_info .stabstr .eh_frame .note.GNU-stack .note.gnu.property .comment) + } +} diff --git a/kernel/kmem.c b/kernel/kmem.c new file mode 100644 index 0000000..8777f49 --- /dev/null +++ b/kernel/kmem.c @@ -0,0 +1,681 @@ +/** +** @file kmem.c +** +** @author Warren R. Carithers +** @author Kenneth Reek +** @author 4003-506 class of 20013 +** +** @brief Functions to perform dynamic memory allocation in the OS. +** +** NOTE: these should NOT be called by user processes! +** +** This allocator functions as a simple "slab" allocator; it allows +** allocation of either 4096-byte ("page") or 1024-byte ("slice") +** chunks of memory from the free pool. The free pool is initialized +** using the memory map provided by the BIOS during the boot sequence, +** and contains a series of blocks which are each one page of memory +** (4KB, and aligned at 4KB boundaries); they are held in the free list +** in LIFO order, as all pages are created equal. +** +** Each allocator ("page" and "slice") allocates the first block from +** the appropriate free list. On deallocation, the block is added back +** to the free list. +** +** The "slice" allocator operates by taking blocks from the "page" +** allocator and splitting them into four 1K slices, which it then +** manages. Requests are made for slices one at a time. If the free +** list contains an available slice, it is unlinked and returned; +** otherwise, a page is requested from the page allocator, split into +** slices, and the slices are added to the free list, after which the +** first one is returned. The slice free list is a simple linked list +** of these 1K blocks; because they are all the same size, no ordering +** is done on the free list, and no coalescing is performed. +** +** This could be converted into a bitmap-based allocator pretty easily. +** A 4GB address space contains 2^20 (1,048,576) pages; at one bit per +** page frame, that's 131,072 (2^17) bytes to cover all of the address +** space, and that could be reduced by restricting allocatable space +** to a subset of the 4GB space. +** +** Compilation options: +** +** ALLOC_FAIL_PANIC if an internal slice allocation fails, panic +*/ + +#define KERNEL_SRC + +#include <common.h> + +// all other framework includes are next +#include <lib.h> + +#include <kmem.h> + +#include <list.h> +#include <x86/arch.h> +#include <x86/bios.h> +#include <bootstrap.h> +#include <cio.h> + +/* +** PRIVATE DEFINITIONS +*/ + +// parameters related to word and block sizes + +#define WORD_SIZE sizeof(int) +#define LOG2_OF_WORD_SIZE 2 + +#define LOG2_OF_PAGE_SIZE 12 + +#define LOG2_OF_SLICE_SIZE 10 + +// converters: pages to bytes, bytes to pages + +#define P2B(x) ((x) << LOG2_OF_PAGE_SIZE) +#define B2P(x) ((x) >> LOG2_OF_PAGE_SIZE) + +/* +** Name: adjacent +** +** Arguments: addresses of two blocks +** +** Description: Determines whether the second block immediately +** follows the first one. +*/ +#define adjacent(first,second) \ + ( (void *) (first) + P2B((first)->pages) == (void *) (second) ) + +/* +** PRIVATE DATA TYPES +*/ + +/* +** Memory region information returned by the BIOS +** +** This data consists of a 32-bit integer followed +** by an array of region descriptor structures. +*/ + +// a handy union for playing with 64-bit addresses +typedef union b64_u { + uint32_t part[2]; + uint64_t all; +} b64_t; + +// the halves of a 64-bit address +#define LOW part[0] +#define HIGH part[1] + +// memory region descriptor +typedef struct memregion_s { + b64_t base; // base address + b64_t length; // region length + uint32_t type; // type of region + uint32_t acpi; // ACPI 3.0 info +} __attribute__((__packed__)) region_t; + +/* +** Region types +*/ + +#define REGION_USABLE 1 +#define REGION_RESERVED 2 +#define REGION_ACPI_RECL 3 +#define REGION_ACPI_NVS 4 +#define REGION_BAD 5 + +/* +** ACPI 3.0 bit fields +*/ + +#define REGION_IGNORE 0x01 +#define REGION_NONVOL 0x02 + +/* +** 32-bit and 64-bit address values as 64-bit literals +*/ + +#define ADDR_BIT_32 0x0000000100000000LL +#define ADDR_LOW_HALF 0x00000000ffffffffLL +#define ADDR_HIGH_HALR 0xffffffff00000000LL + +#define ADDR_32_MAX ADDR_LOW_HALF +#define ADDR_64_FIRST ADDR_BIT_32 + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +// freespace pools +static list_t free_pages; +static list_t free_slices; + +// block counts +static uint32_t n_pages; +static uint32_t n_slices; + +// initialization status +static int km_initialized; + +/* +** IMPORTED GLOBAL VARIABLES +*/ + +// this is no longer used; for simple situations, it can be used as +// the KM_LOW_CUTOFF value +// +// extern int _end; // end of the BSS section - provided by the linker + +/* +** FUNCTIONS +*/ + +/* +** FREE LIST MANAGEMENT +*/ + +/** +** Name: add_block +** +** Add a block to the free list. The block will be split into separate +** page-sized fragments which will each be added to the free_pages +** list; each of these will also be modified. +** +** @param[in] base Base address of the block +** @param[in] length Block length, in bytes +*/ +static void add_block( uint32_t base, uint32_t length ) { + + // don't add it if it isn't at least 4K + if( length < SZ_PAGE ) { + return; + } + +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_printf( " add(%08x,%08x): ", base, length ); +#endif + + // verify that the base address is a 4K boundary + if( (base & MOD4K_BITS) != 0 ) { + // nope - how many bytes will we lose from the beginning + uint_t loss = base & MOD4K_BITS; + // adjust the starting address: (n + 4K - 1) / 4K + base = (base + MOD4K_BITS) & MOD4K_MASK; + // adjust the length + length -= loss; + } + + // only want to add multiples of 4K; check the lower bits + if( (length & MOD4K_BITS) != 0 ) { + // round it down to 4K + length &= MOD4K_MASK; + } + + // split the block into pages and add them to the free list + + void *block = (void *) base; + void *blend = (void *) (base + length); + int npages = 0; + +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_printf( "-> base %08x len %08x: ", base, length ); +#endif + + while( block < blend ) { + + // just add to the front of the list + list_add( &free_pages, block ); + ++npages; + + // move to the next block + base += SZ_PAGE; + block = (void *) base; + } + + // add the count to our running total + n_pages += npages; + +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_printf( " -> %d pages\n", npages ); +#endif +} + +/** +** Name: km_init +** +** Find what memory is present on the system and +** construct the list of free memory blocks. +** +** Dependencies: +** Must be called before any other init routine that uses +** dynamic storage is called. +*/ +void km_init( void ) { + int32_t entries; + region_t *region; + +#if TRACING_INIT + // announce that we're starting initialization + cio_puts( " Kmem" ); +#endif + + // initially, nothing in the free lists + free_slices.next = NULL; + free_pages.next = NULL; + n_pages = n_slices = 0; + km_initialized = 0; + + /* + ** We ignore anything below our KM_LOW_CUTOFF address. In theory, + ** we should be able to re-use much of that space; in practice, + ** this is safer. + */ + + // get the list length + entries = *((int32_t *) MMAP_ADDR); + +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_printf( "\nKmem: %d regions\n", entries ); +#endif + + // if there are no entries, we have nothing to do! + if( entries < 1 ) { // note: entries == -1 could occur! + return; + } + + // iterate through the entries, adding things to the freelist + + region = ((region_t *) (MMAP_ADDR + 4)); + + for( int i = 0; i < entries; ++i, ++region ) { + +#if TRACING_KMEM | TRACING_KMEM_INIT + // report this region + cio_printf( "%3d: ", i ); + cio_printf( " B %08x%08x", + region->base.HIGH, region->base.LOW ); + cio_printf( " L %08x%08x", + region->length.HIGH, region->length.LOW ); + cio_printf( " T %08x A %08x", + region->type, region->acpi ); +#endif + + /* + ** Determine whether or not we should ignore this region. + ** + ** We ignore regions for several reasons: + ** + ** ACPI indicates it should be ignored + ** ACPI indicates it's non-volatile memory + ** Region type isn't "usable" + ** Region is above our address limit + ** + ** Currently, only "normal" (type 1) regions are considered + ** "usable" for our purposes. We could potentially expand + ** this to include ACPI "reclaimable" memory. + */ + + // first, check the ACPI one-bit flags + + if( ((region->acpi) & REGION_IGNORE) == 0 ) { +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " IGN\n" ); +#endif + continue; + } + + if( ((region->acpi) & REGION_NONVOL) != 0 ) { +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " NVOL\n" ); +#endif + continue; // we'll ignore this, too + } + + // next, the region type + + if( (region->type) != REGION_USABLE ) { +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " RCLM\n" ); +#endif + continue; // we won't attempt to reclaim ACPI memory (yet) + } + + /* + ** We have a "normal" memory region. We need to verify + ** that it's within our constraints. We won't add anything + ** to the free list if it is: + ** + ** * below our KM_LOW_CUTOFF value + ** * above out KM_HIGH_CUTOFF value. + ** + ** For blocks which straddle one of those limits, we will + ** split it, and only use the portion that's within those + ** bounds. + */ + + // grab the two 64-bit values to simplify things + uint64_t base = region->base.all; + uint64_t length = region->length.all; + uint64_t endpt = base + length; + + // ignore it if it's above our high cutoff point + if( base >= KM_HIGH_CUTOFF || endpt >= KM_HIGH_CUTOFF ) { + + // is the whole thing too high, or just part? + if( base >= KM_HIGH_CUTOFF ) { + // it's all too high! +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " HIGH\n" ); +#endif + continue; + } + + // some of it is usable - fix the end point + endpt = KM_HIGH_CUTOFF; + } + + // see if it's below our low cutoff point + if( base < KM_LOW_CUTOFF || endpt < KM_LOW_CUTOFF ) { + + // is the whole thing too low, or just part? + if( endpt < KM_LOW_CUTOFF ) { + // it's all below the cutoff! +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " LOW\n" ); +#endif + continue; + } + + // some of it is usable - reset the base address + base = KM_LOW_CUTOFF; + + // recalculate the length + length = endpt - base; + } + + // we survived the gauntlet - add the new block + // + // we may have changed the base or endpoint, so + // we should recalculate the length + length = endpt - base; + +#if TRACING_KMEM | TRACING_KMEM_INIT + cio_puts( " OK\n" ); +#endif + + uint32_t b32 = base & ADDR_LOW_HALF; + uint32_t l32 = length & ADDR_LOW_HALF; + + add_block( b32, l32 ); + } + + // record the initialization + km_initialized = 1; + +#if TRACING_KMEM | TRACING_KMEM_INIT + delay( DELAY_3_SEC ); +#endif +} + +/** +** Name: km_dump +** +** Dump information about the free lists to the console. By default, +** prints only the list sizes; if 'addrs' is true, also dumps the list +** of page addresses; if 'all' is also true, dumps page addresses and +** slice addresses. +** +** @param addrs Also dump page addresses +** @param both Also dump slice addresses +*/ +void km_dump( bool_t addrs, bool_t both ) { + + // report the sizes + cio_printf( "&free_pages %08x, &free_slices %08x, %u pages, %u slices\n", + (uint32_t) &free_pages, (uint32_t) &free_slices, + n_pages, n_slices ); + + // was that all? + if( !addrs ) { + return; + } + + // dump the addresses of the pages in the free list + uint32_t n = 0; + list_t *block = free_pages.next; + while( block != NULL ) { + if( n && !(n & MOD4_BITS) ) { + // four per line + cio_putchar( '\n' ); + } + cio_printf( " page @ 0x%08x", (uint32_t) block ); + block = block->next; + ++n; + } + + // sanity check - verify that the counts match + if( n != n_pages ) { + sprint( b256, "km_dump: n_pages %u, counted %u!!!\n", + n_pages, n ); + WARNING( b256); + } + + if( !both ) { + return; + } + + // but wait - there's more! + + // also dump the addresses of slices in the slice free list + n = 0; + block = free_slices.next; + while( block != NULL ) { + if( n && !(n & MOD4_BITS) ) { + // four per line + cio_putchar( '\n' ); + } + cio_printf( " slc @ 0x%08x", (uint32_t) block ); + block = block->next; + ++n; + } + + // sanity check - verify that the counts match + if( n != n_slices ) { + sprint( b256, "km_dump: n_slices %u, counted %u!!!\n", + n_slices, n ); + WARNING( b256); + } +} + +/* +** PAGE MANAGEMENT +*/ + +/** +** Name: km_page_alloc +** +** Allocate a page of memory from the free list. +** +** @return a pointer to the beginning of the allocated page, +** or NULL if no memory is available +*/ +void *km_page_alloc( void ) { + + // if km_init() wasn't called first, stop us in our tracks + assert( km_initialized ); + +#if TRACING_KMEM_FREELIST + cio_puts( "KM: pg_alloc()" ); +#endif + + // pointer to the first block + void *page = list_remove( &free_pages ); + + // was a page available? + if( page == NULL ){ + // nope! +#if TRACING_KMEM_FREELIST + cio_puts( " FAIL\n" ); +#endif + return( NULL ); + } + + // fix the count of available pages + --n_pages; + +#if TRACING_KMEM_FREELIST + cio_printf( " -> %08x\n", (uint32_t) page ); +#endif + + return( page ); +} + +/** +** Name: km_page_free +** +** Returns a page to the list of available pages. +** +** @param[in] page Pointer to the page to be returned to the free list +*/ +void km_page_free( void *page ){ + + // verify that km_init() was called first + assert( km_initialized ); + + /* + ** Don't do anything if the address is NULL. + */ + if( page == NULL ){ + return; + } + +#if TRACING_KMEM_FREELIST + cio_printf( "KM: pg_free(%08x)", (uint32_t) page ); +#endif + + /* + ** CRITICAL ASSUMPTION + ** + ** We assume that the block pointer given to us points to a single + ** page-sized block of memory. We make this assumption because we + ** don't track allocation sizes. We can't use the simple "allocate + ** four extra bytes before the returned pointer" scheme to do this + ** because we're managing pages, and the pointers we return must point + ** to page boundaries, so we would wind up allocating an extra page + ** for each allocation. + ** + ** Alternatively, we could keep an array of addresses and block + ** sizes ourselves, but that feels clunky, and would risk running out + ** of table entries if there are lots of allocations (assuming we use + ** a 4KB page to hold the table, at eight bytes per entry we would have + ** 512 entries per page). + ** + ** IF THIS ASSUMPTION CHANGES, THIS CODE MUST BE FIXED!!! + */ + + // link this into the free list + list_add( &free_pages, page ); + + // one more in the pool + ++n_pages; +} + +/* +** SLICE MANAGEMENT +*/ + +/* +** Slices are 1024-byte fragments from pages. We maintain a free list of +** slices for those parts of the OS which don't need full 4096-byte chunks +** of space. +*/ + +/** +** Name: carve_slices +** +** Split an allocated page into four slices and add +** them to the "free slices" list. +** +** @param page Pointer to the page to be carved up +*/ +static void carve_slices( void *page ) { + + // sanity check + assert1( page != NULL ); + + // create the four slices from it + uint8_t *ptr = (uint8_t *) page; + for( int i = 0; i < 4; ++i ) { + km_slice_free( (void *) ptr ); + ptr += SZ_SLICE; + ++n_slices; + } +} + +/** +** Name: km_slice_alloc +** +** Dynamically allocates a slice (1/4 of a page). If no +** memory is available, we return NULL (unless ALLOC_FAIL_PANIC +** was defined, in which case we panic). +** +** @return a pointer to the allocated slice +*/ +void *km_slice_alloc( void ) { + + // verify that km_init() was called first + assert( km_initialized ); + +#if TRACING_KMEM_FREELIST + cio_printf( "KM: sl_alloc()\n" ); +#endif + + // if we are out of slices, create a few more + if( free_slices.next == NULL ) { + void *new = km_page_alloc(); + if( new == NULL ) { + // can't get any more space +#if ALLOC_FAIL_PANIC + PANIC( 0, "slice new alloc failed" ); +#else + return NULL; +#endif + } + carve_slices( new ); + } + + // take the first one from the free list + void *slice = list_remove( &free_slices ); + assert( slice != NULL ); + --n_slices; + + // make it nice and shiny for the caller + memclr( (void *) slice, SZ_SLICE ); + + return( slice ); +} + +/** +** Name: km_slice_free +** +** Returns a slice to the list of available slices. +** +** We make no attempt to merge slices, as we treat them as +** independent blocks of memory (like pages). +** +** @param[in] block Pointer to the slice (1/4 page) to be freed +*/ +void km_slice_free( void *block ) { + + // verify that km_init() was called first + assert( km_initialized ); + +#if TRACING_KMEM_FREELIST + cio_printf( "KM: sl_free(%08x)\n", (uint32_t) block ); +#endif + + // just add it to the front of the free list + list_add( &free_slices, block ); + --n_slices; +} diff --git a/kernel/list.c b/kernel/list.c new file mode 100644 index 0000000..084000a --- /dev/null +++ b/kernel/list.c @@ -0,0 +1,64 @@ +/** +** @file list.c +** +** @author Warren R. Carithers +** +** @brief Support for a basic linked list data type. +** +** This module provides a very basic linked list data structure. +** A list can contain anything that has a pointer field in the first +** four bytes; these routines assume those bytes contain a pointer to +** the following entry in the list, whatever that may be. +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <list.h> + +/* +** FUNCTIONS +*/ + +/** +** Name: list_add +** +** Add the supplied data to the beginning of the specified list. +** +** @param[in,out] list The address of a list_t variable +** @param[in] data The data to prepend to the list +*/ +void list_add( list_t *list, void *data ) { + + // sanity checks + assert1( list != NULL ); + assert1( data != NULL ); + + list_t *tmp = (list_t *)data; + tmp->next = list->next; + list->next = tmp; +} + +/** +** Name: list_remove +** +** Remove the first entry from a linked list. +** +** @param[in,out] list The address of a list_t variable +** +** @return a pointer to the removed data, or NULL if the list was empty +*/ +void *list_remove( list_t *list ) { + + assert1( list != NULL ); + + list_t *data = list->next; + if( data != NULL ) { + list->next = data->next; + data->next = NULL; + } + + return (void *)data; +} + diff --git a/kernel/procs.c b/kernel/procs.c new file mode 100644 index 0000000..96bb3fd --- /dev/null +++ b/kernel/procs.c @@ -0,0 +1,1136 @@ +/* +** @file procs.c +** +** @author CSCI-452 class of 20245 +** +** @brief Process-related implementations +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <procs.h> +#include <user.h> + +/* +** PRIVATE DEFINITIONS +*/ + +// determine if a queue is empty; assumes 'q' is a valid pointer +#define PCB_QUEUE_EMPTY(q) ((q)->head == NULL) + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PCB Queue structure +** +** Opaque to the rest of the kernel +** +** Typedef'd in the header: typedef struct pcb_queue_s *pcb_queue_t; +*/ +struct pcb_queue_s { + pcb_t *head; + pcb_t *tail; + enum pcb_queue_order_e order; +}; + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +// collection of queues +static struct pcb_queue_s pcb_freelist_queue; +static struct pcb_queue_s ready_queue; +static struct pcb_queue_s waiting_queue; +static struct pcb_queue_s sleeping_queue; +static struct pcb_queue_s zombie_queue; +static struct pcb_queue_s sioread_queue; + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +// public-facing queue handles +pcb_queue_t pcb_freelist; +pcb_queue_t ready; +pcb_queue_t waiting; +pcb_queue_t sleeping; +pcb_queue_t zombie; +pcb_queue_t sioread; + +// pointer to the currently-running process +pcb_t *current; + +// the process table +pcb_t ptable[N_PROCS]; + +// next available PID +uint_t next_pid; + +// pointer to the PCB for the 'init' process +pcb_t *init_pcb; + +// table of state name strings +const char *state_str[N_STATES] = { + [ STATE_UNUSED ] = "Unu", // "Unused" + [ STATE_NEW ] = "New", + [ STATE_READY ] = "Rdy", // "Ready" + [ STATE_RUNNING ] = "Run", // "Running" + [ STATE_SLEEPING ] = "Slp", // "Sleeping" + [ STATE_BLOCKED ] = "Blk", // "Blocked" + [ STATE_WAITING ] = "Wat", // "Waiting" + [ STATE_KILLED ] = "Kil", // "Killed" + [ STATE_ZOMBIE ] = "Zom" // "Zombie" +}; + +// table of priority name strings +const char *prio_str[N_PRIOS] = { + [ PRIO_HIGH ] = "High", + [ PRIO_STD ] = "User", + [ PRIO_LOW ] = "Low ", + [ PRIO_DEFERRED ] = "Def " +}; + +// table of queue ordering name strings +const char *ord_str[N_PRIOS] = { + [ O_FIFO ] = "FIFO", + [ O_PRIO ] = "PRIO", + [ O_PID ] = "PID ", + [ O_WAKEUP ] = "WAKE" +}; + +/* +** PRIVATE FUNCTIONS +*/ + +/** +** Priority search functions. These are used to traverse a supplied +** queue looking for the queue entry that would precede the supplied +** PCB when that PCB is inserted into the queue. +** +** Variations: +** find_prev_wakeup() compares wakeup times +** find_prev_priority() compares process priorities +** find_prev_pid() compares PIDs +** +** Each assumes the queue should be in ascending order by the specified +** comparison value. +** +** @param[in] queue The queue to search +** @param[in] pcb The PCB to look for +** +** @return a pointer to the predecessor in the queue, or NULL if +** this PCB would be at the beginning of the queue. +*/ +static pcb_t *find_prev_wakeup( pcb_queue_t queue, pcb_t *pcb ) { + + // sanity checks! + assert1( queue != NULL ); + assert1( pcb != NULL ); + + pcb_t *prev = NULL; + pcb_t *curr = queue->head; + + while( curr != NULL && curr->wakeup <= pcb->wakeup ) { + prev = curr; + curr = curr->next; + } + + return prev; +} + +static pcb_t *find_prev_priority( pcb_queue_t queue, pcb_t *pcb ) { + + // sanity checks! + assert1( queue != NULL ); + assert1( pcb != NULL ); + + pcb_t *prev = NULL; + pcb_t *curr = queue->head; + + while( curr != NULL && curr->priority <= pcb->priority ) { + prev = curr; + curr = curr->next; + } + + return prev; +} + +static pcb_t *find_prev_pid( pcb_queue_t queue, pcb_t *pcb ) { + + // sanity checks! + assert1( queue != NULL ); + assert1( pcb != NULL ); + + pcb_t *prev = NULL; + pcb_t *curr = queue->head; + + while( curr != NULL && curr->pid <= pcb->pid ) { + prev = curr; + curr = curr->next; + } + + return prev; +} + +/* +** PUBLIC FUNCTIONS +*/ + +// a macro to simplify queue setup +#define QINIT(q,s) \ + q = &q##_queue; \ + if( pcb_queue_reset(q,s) != SUCCESS ) { \ + PANIC( 0, "pcb_init can't reset " # q ); \ + } + +/** +** Name: pcb_init +** +** Initialization for the Process module. +*/ +void pcb_init( void ) { + +#if TRACING_INIT + cio_puts( " Procs" ); +#endif + + // there is no current process + current = NULL; + + // set up the external links to the queues + QINIT( pcb_freelist, O_FIFO ); + QINIT( ready, O_PRIO ); + QINIT( waiting, O_PID ); + QINIT( sleeping, O_WAKEUP ); + QINIT( zombie, O_PID ); + QINIT( sioread, O_FIFO ); + + /* + ** We statically allocate our PCBs, so we need to add them + ** to the freelist before we can use them. If this changes + ** so that we dynamicallyl allocate PCBs, this step either + ** won't be required, or could be used to pre-allocate some + ** number of PCB structures for future use. + */ + + pcb_t *ptr = ptable; + for( int i = 0; i < N_PROCS; ++i ) { + pcb_free( ptr ); + ++ptr; + } +} + +/** +** Name: pcb_alloc +** +** Allocate a PCB from the list of free PCBs. +** +** @param pcb Pointer to a pcb_t * where the PCB pointer will be returned. +** +** @return status of the allocation attempt +*/ +int pcb_alloc( pcb_t **pcb ) { + + // sanity check! + assert1( pcb != NULL ); + + // remove the first PCB from the free list + pcb_t *tmp; + if( pcb_queue_remove(pcb_freelist,&tmp) != SUCCESS ) { + return E_NO_PCBS; + } + + *pcb = tmp; + return SUCCESS; +} + +/** +** Name: pcb_free +** +** Return a PCB to the list of free PCBs. +** +** @param pcb Pointer to the PCB to be deallocated. +*/ +void pcb_free( pcb_t *pcb ) { + + if( pcb != NULL ) { + // mark the PCB as available + pcb->state = STATE_UNUSED; + + // add it to the free list + int status = pcb_queue_insert( pcb_freelist, pcb ); + + // if that failed, we're in trouble + if( status != SUCCESS ) { + sprint( b256, "pcb_free(0x%08x) status %d", (uint32_t) pcb, + status ); + PANIC( 0, b256 ); + } + } +} + +/** +** Name: pcb_zombify +** +** Turn the indicated process into a Zombie. This function +** does most of the real work for exit() and kill() calls. +** Is also called from the scheduler and dispatcher. +** +** @param pcb Pointer to the newly-undead PCB +*/ +void pcb_zombify( register pcb_t *victim ) { + + // should this be an error? + if( victim == NULL ) { + return; + } + + // every process must have a parent, even if it's 'init' + assert( victim->parent != NULL ); + + /* + ** We need to locate the parent of this process. We also need + ** to reparent any children of this process. We do these in + ** a single loop. + */ + pcb_t *parent = victim->parent; + pcb_t *zchild = NULL; + + // two PIDs we will look for + uint_t vicpid = victim->pid; + + // speed up access to the process table entries + register pcb_t *curr = ptable; + + for( int i = 0; i < N_PROCS; ++i, ++curr ) { + + // make sure this is a valid entry + if( curr->state == STATE_UNUSED ) { + continue; + } + + // if this is our parent, just keep going - we continue + // iterating to find all the children of this process. + if( curr == parent ) { + continue; + } + + if( curr->parent == victim ) { + + // found a child - reparent it + curr->parent = init_pcb; + + // see if this child is already undead + if( curr->state == STATE_ZOMBIE ) { + // if it's already a zombie, remember it, so we + // can pass it on to 'init'; also, if there are + // two or more zombie children, it doesn't matter + // which one we pick here, as the others will be + // collected when 'init' loops + zchild = curr; + } + + } + } + + /* + ** If we found a child that was already terminated, we need to + ** wake up the init process if it's already waiting. + ** + ** Note: we only need to do this for one Zombie child process - + ** init will loop and collect the others after it finishes with + ** this one. + ** + ** Also note: it's possible that the exiting process' parent is + ** also init, which means we're letting one of zombie children + ** of the exiting process be cleaned up by init before the + ** existing process itself is cleaned up by init. This will work, + ** because after init cleans up the zombie, it will loop and + ** call waitpid() again, by which time this exiting process will + ** be marked as a zombie. + */ + if( zchild != NULL && init_pcb->state == STATE_WAITING ) { + + // dequeue the zombie + assert( pcb_queue_remove_this(zombie,zchild) == SUCCESS ); + + assert( pcb_queue_remove_this(waiting,init_pcb) == SUCCESS ); + + // intrinsic return value is the PID + RET(init_pcb) = zchild->pid; + + // may also want to return the exit status + int32_t *ptr = (int32_t *) ARG(init_pcb,2); + + if( ptr != NULL ) { + // ******************************************************** + // ** Potential VM issue here! This code assigns the exit + // ** status into a variable in the parent's address space. + // ** This works in the baseline because we aren't using + // ** any type of memory protection. If address space + // ** separation is implemented, this code will very likely + // ** STOP WORKING, and will need to be fixed. + // ******************************************************** + *ptr = zchild->exit_status; + } + + // all done - schedule 'init', and clean up the zombie + schedule( init_pcb ); + pcb_cleanup( zchild ); + } + + /* + ** Now, deal with the parent of this process. If the parent is + ** already waiting, just wake it up and clean up this process. + ** Otherwise, this process becomes a zombie. + ** + ** Note: if the exiting process' parent is init and we just woke + ** init up to deal with a zombie child of the exiting process, + ** init's status won't be Waiting any more, so we don't have to + ** worry about it being scheduled twice. + */ + + if( parent->state == STATE_WAITING ) { + + // verify that the parent is either waiting for this process + // or is waiting for any of its children + uint32_t target = ARG(parent,1); + + if( target == 0 || target == vicpid ) { + + // the parent is waiting for this child or is waiting + // for any of its children, so we can wake it up. + + // intrinsic return value is the PID + RET(parent) = vicpid; + + // may also want to return the exit status + int32_t *ptr = (int32_t *) ARG(parent,2); + + if( ptr != NULL ) { + // ******************************************************** + // ** Potential VM issue here! This code assigns the exit + // ** status into a variable in the parent's address space. + // ** This works in the baseline because we aren't using + // ** any type of memory protection. If address space + // ** separation is implemented, this code will very likely + // ** STOP WORKING, and will need to be fixed. + // ******************************************************** + *ptr = victim->exit_status; + } + + // all done - schedule the parent, and clean up the zombie + schedule( parent ); + pcb_cleanup( victim ); + + return; + } + } + + /* + ** The parent isn't waiting OR is waiting for a specific child + ** that isn't this exiting process, so we become a Zombie. + ** + ** This code assumes that Zombie processes are *not* in + ** a queue, but instead are just in the process table with + ** a state of 'Zombie'. This simplifies life immensely, + ** because we won't need to dequeue it when it is collected + ** by its parent. + */ + + victim->state = STATE_ZOMBIE; + assert( pcb_queue_insert(zombie,victim) == SUCCESS ); + + /* + ** Note: we don't call _dispatch() here - we leave that for + ** the calling routine, as it's possible we don't need to + ** choose a new current process. + */ +} + +/** +** Name: pcb_cleanup +** +** Reclaim a process' data structures +** +** @param pcb The PCB to reclaim +*/ +void pcb_cleanup( pcb_t *pcb ) { + +#if TRACING_PCB + cio_printf( "** pcb_cleanup(0x%08x)\n", (uint32_t) pcb ); +#endif + + // avoid deallocating a NULL pointer + if( pcb == NULL ) { + // should this be an error? + return; + } + + // we need to release all the VM data structures and frames + user_cleanup( pcb ); + + // release the PCB itself + pcb_free( pcb ); +} + +/** +** Name: pcb_find_pid +** +** Locate the PCB for the process with the specified PID +** +** @param pid The PID to be located +** +** @return Pointer to the PCB, or NULL +*/ +pcb_t *pcb_find_pid( uint_t pid ) { + + // must be a valid PID + if( pid < 1 ) { + return NULL; + } + + // scan the process table + pcb_t *p = ptable; + + for( int i = 0; i < N_PROCS; ++i, ++p ) { + if( p->pid == pid && p->state != STATE_UNUSED ) { + return p; + } + } + + // didn't find it! + return NULL; +} + +/** +** Name: pcb_find_ppid +** +** Locate the PCB for the process with the specified parent +** +** @param pid The PID to be located +** +** @return Pointer to the PCB, or NULL +*/ +pcb_t *pcb_find_ppid( uint_t pid ) { + + // must be a valid PID + if( pid < 1 ) { + return NULL; + } + + // scan the process table + pcb_t *p = ptable; + + for( int i = 0; i < N_PROCS; ++i, ++p ) { + assert1( p->parent != NULL ); + if( p->parent->pid == pid && p->parent->state != STATE_UNUSED ) { + return p; + } + } + + // didn't find it! + return NULL; +} + +/** +** Name: pcb_queue_reset +** +** Initialize a PCB queue. We assume that whatever data may be +** in the queue structure can be overwritten. +** +** @param queue[out] The queue to be initialized +** @param order[in] The desired ordering for the queue +** +** @return status of the init request +*/ +int pcb_queue_reset( pcb_queue_t queue, enum pcb_queue_order_e style ) { + + // sanity check + assert1( queue != NULL ); + + // make sure the style is valid + if( style < O_FIRST_STYLE || style > O_LAST_STYLE ) { + return E_BAD_PARAM; + } + + // reset the queue + queue->head = queue->tail = NULL; + queue->order = style; + + return SUCCESS; +} + +/** +** Name: pcb_queue_empty +** +** Determine whether a queue is empty. Essentially just a wrapper +** for the PCB_QUEUE_EMPTY() macro, for use outside this module. +** +** @param[in] queue The queue to check +** +** @return true if the queue is empty, else false +*/ +bool_t pcb_queue_empty( pcb_queue_t queue ) { + + // if there is no queue, blow up + assert1( queue != NULL ); + + return PCB_QUEUE_EMPTY(queue); +} + +/** +** Name: pcb_queue_length +** +** Return the count of elements in the specified queue. +** +** @param[in] queue The queue to check +** +** @return the count (0 if the queue is empty) +*/ +uint_t pcb_queue_length( const pcb_queue_t queue ) { + + // sanity check + assert1( queue != NULL ); + + // this is pretty simple + register pcb_t *tmp = queue->head; + register int num = 0; + + while( tmp != NULL ) { + ++num; + tmp = tmp->next; + } + + return num; +} + +/** +** Name: pcb_queue_insert +** +** Inserts a PCB into the indicated queue. +** +** @param queue[in,out] The queue to be used +** @param pcb[in] The PCB to be inserted +** +** @return status of the insertion request +*/ +int pcb_queue_insert( pcb_queue_t queue, pcb_t *pcb ) { + + // sanity checks + assert1( queue != NULL ); + assert1( pcb != NULL ); + + // if this PCB is already in a queue, we won't touch it + if( pcb->next != NULL ) { + // what to do? we let the caller decide + return E_BAD_PARAM; + } + + // is the queue empty? + if( queue->head == NULL ) { + queue->head = queue->tail = pcb; + return SUCCESS; + } + assert1( queue->tail != NULL ); + + // no, so we need to search it + pcb_t *prev = NULL; + + // find the predecessor node + switch( queue->order ) { + case O_FIFO: + prev = queue->tail; + break; + case O_PRIO: + prev = find_prev_priority(queue,pcb); + break; + case O_PID: + prev = find_prev_pid(queue,pcb); + break; + case O_WAKEUP: + prev = find_prev_wakeup(queue,pcb); + break; + default: + // do we need something more specific here? + return E_BAD_PARAM; + } + + // OK, we found the predecessor node; time to do the insertion + + if( prev == NULL ) { + + // there is no predecessor, so we're + // inserting at the front of the queue + pcb->next = queue->head; + if( queue->head == NULL ) { + // empty queue!?! - should we panic? + queue->tail = pcb; + } + queue->head = pcb; + + } else if( prev->next == NULL ) { + + // append at end + prev->next = pcb; + queue->tail = pcb; + + } else { + + // insert between prev & prev->next + pcb->next = prev->next; + prev->next = pcb; + + } + + return SUCCESS; +} + +/** +** Name: pcb_queue_remove +** +** Remove the first PCB from the indicated queue. +** +** @param queue[in,out] The queue to be used +** @param pcb[out] Pointer to where the PCB pointer will be saved +** +** @return status of the removal request +*/ +int pcb_queue_remove( pcb_queue_t queue, pcb_t **pcb ) { + + //sanity checks + assert1( queue != NULL ); + assert1( pcb != NULL ); + + // can't get anything if there's nothing to get! + if( PCB_QUEUE_EMPTY(queue) ) { + return E_EMPTY_QUEUE; + } + + // take the first entry from the queue + pcb_t *tmp = queue->head; + queue->head = tmp->next; + + // disconnect it completely + tmp->next = NULL; + + // was this the last thing in the queue? + if( queue->head == NULL ) { + // yes, so clear the tail pointer for consistency + queue->tail = NULL; + } + + // save the pointer + *pcb = tmp; + + return SUCCESS; +} + +/** +** Name: pcb_queue_remove_this +** +** Remove the specified PCB from the indicated queue. +** +** We don't return the removed pointer, because the calling +** routine must already have it (because it was supplied +** to us in the call). +** +** @param queue[in,out] The queue to be used +** @param pcb[in] Pointer to the PCB to be removed +** +** @return status of the removal request +*/ +int pcb_queue_remove_this( pcb_queue_t queue, pcb_t *pcb ) { + + //sanity checks + assert1( queue != NULL ); + assert1( pcb != NULL ); + + // can't get anything if there's nothing to get! + if( PCB_QUEUE_EMPTY(queue) ) { + return E_EMPTY_QUEUE; + } + + // iterate through the queue until we find the desired PCB + pcb_t *prev = NULL; + pcb_t *curr = queue->head; + + while( curr != NULL && curr != pcb ) { + prev = curr; + curr = curr->next; + } + + // case prev curr next interpretation + // ==== ==== ==== ==== ============================ + // 1. 0 0 -- *** CANNOT HAPPEN *** + // 2. 0 !0 0 removing only element + // 3. 0 !0 !0 removing first element + // 4. !0 0 -- *** NOT FOUND *** + // 5. !0 !0 0 removing from end + // 6. !0 !0 !0 removing from middle + + if( curr == NULL ) { + // case 1 + assert( prev != NULL ); + // case 4 + return E_NOT_FOUND; + } + + // connect predecessor to successor + if( prev != NULL ) { + // not the first element + // cases 5 and 6 + prev->next = curr->next; + } else { + // removing first element + // cases 2 and 3 + queue->head = curr->next; + } + + // if this was the last node (cases 2 and 5), + // also need to reset the tail pointer + if( curr->next == NULL ) { + // if this was the only entry (2), prev is NULL, + // so this works for that case, too + queue->tail = prev; + } + + // unlink current from queue + curr->next = NULL; + + // there's a possible consistancy problem here if somehow + // one of the queue pointers is NULL and the other one + // is not NULL + + assert1( + (queue->head == NULL && queue->tail == NULL) || + (queue->head != NULL && queue->tail != NULL) + ); + + return SUCCESS; +} + +/** +** Name: pcb_queue_peek +** +** Return the first PCB from the indicated queue, but don't +** remove it from the queue. +** +** @param queue[in] The queue to be used +** +** @return the PCB poiner, or NULL if the queue is empty +*/ +pcb_t *pcb_queue_peek( const pcb_queue_t queue ) { + + //sanity check + assert1( queue != NULL ); + + // can't get anything if there's nothing to get! + if( PCB_QUEUE_EMPTY(queue) ) { + return NULL; + } + + // just return the first entry from the queue + return queue->head; +} + +/* +** Scheduler routines +*/ + +/** +** schedule(pcb) +** +** Schedule the supplied process +** +** @param pcb Pointer to the PCB of the process to be scheduled +*/ +void schedule( pcb_t *pcb ) { + + // sanity check + assert1( pcb != NULL ); + + // check for a killed process + if( pcb->state == STATE_KILLED ) { + // TODO figure out what to do now + return; + } + + // mark it as ready + pcb->state = STATE_READY; + + // add it to the ready queue + if( pcb_queue_insert(ready,pcb) != SUCCESS ) { + PANIC( 0, "schedule insert fail" ); + } +} + +/** +** dispatch() +** +** Select the next process to receive the CPU +*/ +void dispatch( void ) { + + // verify that there is no current process + assert( current == NULL ); + + // grab whoever is at the head of the queue + int status = pcb_queue_remove( ready, ¤t ); + if( status != SUCCESS ) { + sprint( b256, "dispatch queue remove failed, code %d", status ); + PANIC( 0, b256 ); + } + + // set the process up for success + current->state = STATE_RUNNING; + current->ticks = QUANTUM_STANDARD; +} + + +/* +** Debugging/tracing routines +*/ + +/** +** ctx_dump(msg,context) +** +** Dumps the contents of this process context to the console +** +** @param msg[in] An optional message to print before the dump +** @param c[in] The context to dump out +*/ +void ctx_dump( const char *msg, register context_t *c ) { + + // first, the message (if there is one) + if( msg ) { + cio_puts( msg ); + } + + // the pointer + cio_printf( " @ %08x: ", (uint32_t) c ); + + // if it's NULL, why did you bother calling me? + if( c == NULL ) { + cio_puts( " NULL???\n" ); + return; + } + + // now, the contents + cio_printf( "ss %04x gs %04x fs %04x es %04x ds %04x cs %04x\n", + c->ss & 0xff, c->gs & 0xff, c->fs & 0xff, + c->es & 0xff, c->ds & 0xff, c->cs & 0xff ); + cio_printf( " edi %08x esi %08x ebp %08x esp %08x\n", + c->edi, c->esi, c->ebp, c->esp ); + cio_printf( " ebx %08x edx %08x ecx %08x eax %08x\n", + c->ebx, c->edx, c->ecx, c->eax ); + cio_printf( " vec %08x cod %08x eip %08x eflags %08x\n", + c->vector, c->code, c->eip, c->eflags ); +} + +/** +** ctx_dump_all(msg) +** +** dump the process context for all active processes +** +** @param msg[in] Optional message to print +*/ +void ctx_dump_all( const char *msg ) { + + if( msg != NULL ) { + cio_puts( msg ); + } + + int n = 0; + register pcb_t *pcb = ptable; + for( int i = 0; i < N_PROCS; ++i, ++pcb ) { + if( pcb->state != STATE_UNUSED ) { + ++n; + cio_printf( "%2d(%d): ", n, pcb->pid ); + ctx_dump( NULL, pcb->context ); + } + } +} + +/** +** _pcb_dump(msg,pcb) +** +** Dumps the contents of this PCB to the console +** +** @param msg[in] An optional message to print before the dump +** @param pcb[in] The PCB to dump +** @param all[in] Dump all the contents? +*/ +void pcb_dump( const char *msg, register pcb_t *pcb, bool_t all ) { + + // first, the message (if there is one) + if( msg ) { + cio_puts( msg ); + } + + // the pointer + cio_printf( " @ %08x:", (uint32_t) pcb ); + + // if it's NULL, why did you bother calling me? + if( pcb == NULL ) { + cio_puts( " NULL???\n" ); + return; + } + + cio_printf( " %d", pcb->pid ); + cio_printf( " %s", + pcb->state >= N_STATES ? "???" : state_str[pcb->state] ); + + if( !all ) { + // just printing IDs and states on one line + return; + } + + // now, the rest of the contents + cio_printf( " %s", + pcb->priority >= N_PRIOS ? "???" : prio_str[pcb->priority] ); + + cio_printf( " ticks %u xit %d wake %08x\n", + pcb->ticks, pcb->exit_status, pcb->wakeup ); + + cio_printf( " parent %08x", (uint32_t)pcb->parent ); + if( pcb->parent != NULL ) { + cio_printf( " (%u)", pcb->parent->pid ); + } + + cio_printf( " next %08x context %08x pde %08x", (uint32_t) pcb->next, + (uint32_t) pcb->context, (uint32_t) pcb->pdir ); + + cio_putchar( '\n' ); +} + +/** +** pcb_queue_dump(msg,queue,contents) +** +** Dump the contents of the specified queue to the console +** +** @param msg[in] Optional message to print +** @param queue[in] The queue to dump +** @param contents[in] Also dump (some) contents? +*/ +void pcb_queue_dump( const char *msg, pcb_queue_t queue, bool_t contents ) { + + // report on this queue + cio_printf( "%s: ", msg ); + if( queue == NULL ) { + cio_puts( "NULL???\n" ); + return; + } + + // first, the basic data + cio_printf( "head %08x tail %08x", + (uint32_t) queue->head, (uint32_t) queue->tail ); + + // next, how the queue is ordered + cio_printf( " order %s\n", + queue->order >= N_ORDERINGS ? "????" : ord_str[queue->order] ); + + // if there are members in the queue, dump the first few PIDs + if( contents && queue->head != NULL ) { + cio_puts( " PIDs: " ); + pcb_t *tmp = queue->head; + for( int i = 0; i < 5 && tmp != NULL; ++i, tmp = tmp->next ) { + cio_printf( " [%u]", tmp->pid ); + } + + if( tmp != NULL ) { + cio_puts( " ..." ); + } + + cio_putchar( '\n' ); + } +} + +/** +** ptable_dump(msg,all) +** +** dump the contents of the "active processes" table +** +** @param msg[in] Optional message to print +** @param all[in] Dump all or only part of the relevant data +*/ +void ptable_dump( const char *msg, bool_t all ) { + + if( msg ) { + cio_puts( msg ); + } + cio_putchar( ' ' ); + + int used = 0; + int empty = 0; + + register pcb_t *pcb = ptable; + for( int i = 0; i < N_PROCS; ++i ) { + if( pcb->state == STATE_UNUSED ) { + + // an empty slot + ++empty; + + } else { + + // a non-empty slot + ++used; + + // if not dumping everything, add commas if needed + if( !all && used ) { + cio_putchar( ',' ); + } + + // report the table slot # + cio_printf( " #%d:", i ); + + // and dump the contents + pcb_dump( NULL, pcb, all ); + } + } + + // only need this if we're doing one-line output + if( !all ) { + cio_putchar( '\n' ); + } + + // sanity check - make sure we saw the correct number of table slots + if( (used + empty) != N_PROCS ) { + cio_printf( "Table size %d, used %d + empty %d = %d???\n", + N_PROCS, used, empty, used + empty ); + } +} + +/** +** Name: ptable_dump_counts +** +** Prints basic information about the process table (number of +** entries, number with each process state, etc.). +*/ +void ptable_dump_counts( void ) { + uint_t nstate[N_STATES] = { 0 }; + uint_t unknown = 0; + + int n = 0; + pcb_t *ptr = ptable; + while( n < N_PROCS ) { + if( ptr->state < 0 || ptr->state >= N_STATES ) { + ++unknown; + } else { + ++nstate[ptr->state]; + } + ++n; + ++ptr; + } + + cio_printf( "Ptable: %u ***", unknown ); + for( n = 0; n < N_STATES; ++n ) { + cio_printf( " %u %s", nstate[n], + state_str[n] != NULL ? state_str[n] : "???" ); + } + cio_putchar( '\n' ); +} diff --git a/kernel/sio.c b/kernel/sio.c new file mode 100644 index 0000000..a5c7b75 --- /dev/null +++ b/kernel/sio.c @@ -0,0 +1,694 @@ +/** +** @file sio.c +** +** @author Warren R. Carithers +** +** @brief SIO module +** +** For maximum compatibility from semester to semester, this code uses +** several "stand-in" type names and macros which should be defined +** in the accompanying "compat.h" header file if they're not part of +** the baseline system: +** +** standard-sized integer types: intN_t, uintN_t +** other types: PCBTYPE, QTYPE +** scheduler functions: SCHED, DISPATCH +** queue functions: QCREATE, QLENGTH, QDEQUE +** other functions: SLENGTH +** sio read queue: QNAME +** +** Our SIO scheme is very simple: +** +** Input: We maintain a buffer of incoming characters that haven't +** yet been read by processes. When a character comes in, if +** there is no process waiting for it, it goes in the buffer; +** otherwise, the first waiting process is awakeneda and it +** gets the character. +** +** When a process invokes readch(), if there is a character in +** the input buffer, the process gets it; otherwise, it is +** blocked until input appears +** +** Communication with system calls is via two routines. +** sio_readc() returns the first available character (if +** there is one), resetting the input variables if this was +** the last character in the buffer. If there are no +** characters in the buffer, sio_read() returns a -1 +** (presumably so the requesting process can be blocked). +** +** sio_read() copies the contents of the input buffer into +** a user-supplied buffer. It returns the number of characters +** copied. If there are no characters available, return a -1. +** +** Output: We maintain a buffer of outgoing characters that haven't +** yet been sent to the device, and an indication of whether +** or not we are in the middle of a transmit sequence. When +** an interrupt comes in, if there is another character to +** send we copy it to the transmitter buffer; otherwise, we +** end the transmit sequence. +** +** Communication with user processes is via three functions. +** sio_writec() writes a single character; sio_write() +** writes a sized buffer full of characters; sio_puts() +** prints a NUL-terminated string. If we are in the middle +** of a transmit sequence, all characters will be added +** to the output buffer (from where they will be sent +** automatically); otherwise, we send the first character +** directly, add the rest of the characters (if there are +** any) to the output buffer, and set the "sending" flag +** to indicate that we're expecting a transmitter interrupt. +*/ + +#define KERNEL_SRC + +// this should do all includes required for this OS +#include <compat.h> + +// all other framework includes are next +#include <x86/uart.h> +#include <x86/arch.h> +#include <x86/pic.h> + +#include <sio.h> +#include <lib.h> + +/* +** PRIVATE DEFINITIONS +*/ + +#define BUF_SIZE 2048 + +/* +** PRIVATE GLOBALS +*/ + + // input character buffer +static char inbuffer[ BUF_SIZE ]; +static char *inlast; +static char *innext; +static uint32_t incount; + + // output character buffer +static char outbuffer[ BUF_SIZE ]; +static char *outlast; +static char *outnext; +static uint32_t outcount; + + // output control flag +static int sending; + + // interrupt register status +static uint8_t ier; + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +// queue for read-blocked processes +#ifdef QNAME +QTYPE QNAME; +#endif + +/* +** PRIVATE FUNCTIONS +*/ + +/** +** sio_isr(vector,ecode) +** +** Interrupt handler for the SIO module. Handles all pending +** events (as described by the SIO controller). +** +** @param vector The interrupt vector number for this interrupt +** @param ecode The error code associated with this interrupt +*/ +static void sio_isr( int vector, int ecode ) { + int ch; + +#if TRACING_SIO_ISR + cio_puts( "SIO: int:" ); +#endif + // + // Must process all pending events; loop until the IRR + // says there's nothing else to do. + // + + for(;;) { + + // get the "pending event" indicator + int iir = inb( UA4_IIR ) & UA4_IIR_INT_PRI_MASK; + + // process this event + switch( iir ) { + + case UA4_IIR_LINE_STATUS: + // shouldn't happen, but just in case.... + cio_printf( "** SIO int, LSR = %02x\n", inb(UA4_LSR) ); + break; + + case UA4_IIR_RX: +#if TRACING_SIO_ISR + cio_puts( " RX" ); +#endif + // get the character + ch = inb( UA4_RXD ); + if( ch == '\r' ) { // map CR to LF + ch = '\n'; + } +#if TRACING_SIO_ISR + cio_printf( " ch %02x", ch ); +#endif + +#ifdef QNAME + // + // If there is a waiting process, this must be + // the first input character; give it to that + // process and awaken the process. + // + + if( !QEMPTY(QNAME) ) { + PCBTYPE *pcb; + + QDEQUE( QNAME, pcb ); + // make sure we got a non-NULL result + assert( pcb ); + + // return char via arg #2 and count in EAX + char *buf = (char *) ARG(pcb,2); + *buf = ch & 0xff; + RET(pcb) = 1; + SCHED( pcb ); + + } else { +#endif /* QNAME */ + + // + // Nobody waiting - add to the input buffer + // if there is room, otherwise just ignore it. + // + + if( incount < BUF_SIZE ) { + *inlast++ = ch; + ++incount; + } + +#ifdef QNAME + } +#endif /* QNAME */ + break; + + case UA5_IIR_RX_FIFO: + // shouldn't happen, but just in case.... + ch = inb( UA4_RXD ); + cio_printf( "** SIO FIFO timeout, RXD = %02x\n", ch ); + break; + + case UA4_IIR_TX: +#if TRACING_SIO_ISR + cio_puts( " TX" ); +#endif + // if there is another character, send it + if( sending && outcount > 0 ) { +#if TRACING_SIO_ISR + cio_printf( " ch %02x", *outnext ); +#endif + outb( UA4_TXD, *outnext ); + ++outnext; + // wrap around if necessary + if( outnext >= (outbuffer + BUF_SIZE) ) { + outnext = outbuffer; + } + --outcount; +#if TRACING_SIO_ISR + cio_printf( " (outcount %d)", outcount ); +#endif + } else { +#if TRACING_SIO_ISR + cio_puts( " EOS" ); +#endif + // no more data - reset the output vars + outcount = 0; + outlast = outnext = outbuffer; + sending = 0; + // disable TX interrupts + sio_disable( SIO_TX ); + } + break; + + case UA4_IIR_NO_INT: +#if TRACING_SIO_ISR + cio_puts( " EOI\n" ); +#endif + // nothing to do - tell the PIC we're done + outb( PIC1_CMD, PIC_EOI ); + return; + + case UA4_IIR_MODEM_STATUS: + // shouldn't happen, but just in case.... + cio_printf( "** SIO int, MSR = %02x\n", inb(UA4_MSR) ); + break; + + default: + // uh-oh.... + sprint( b256, "sio isr: IIR %02x\n", ((uint32_t) iir) & 0xff ); + PANIC( 0, b256 ); + } + + } + + // should never reach this point! + assert( false ); +} + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** sio_init() +** +** Initialize the UART chip. +*/ +void sio_init( void ) { + +#if TRACING_INIT + cio_puts( " Sio" ); +#endif + + /* + ** Initialize SIO variables. + */ + + memclr( (void *) inbuffer, sizeof(inbuffer) ); + inlast = innext = inbuffer; + incount = 0; + + memclr( (void *) outbuffer, sizeof(outbuffer) ); + outlast = outnext = outbuffer; + outcount = 0; + sending = 0; + + // queue of read-blocked processes + QCREATE( QNAME ); + + /* + ** Next, initialize the UART. + ** + ** Initialize the FIFOs + ** + ** this is a bizarre little sequence of operations + */ + + outb( UA5_FCR, 0x20 ); + outb( UA5_FCR, UA5_FCR_FIFO_RESET ); // 0x00 + outb( UA5_FCR, UA5_FCR_FIFO_EN ); // 0x01 + outb( UA5_FCR, UA5_FCR_FIFO_EN | UA5_FCR_RXSR ); // 0x03 + outb( UA5_FCR, UA5_FCR_FIFO_EN | UA5_FCR_RXSR | UA5_FCR_TXSR ); // 0x07 + + /* + ** disable interrupts + ** + ** note that we leave them disabled; sio_enable() must be + ** called to switch them back on + */ + + outb( UA4_IER, 0 ); + ier = 0; + + /* + ** select the divisor latch registers and set the data rate + */ + + outb( UA4_LCR, UA4_LCR_DLAB ); + outb( UA4_DLL, BAUD_LOW_BYTE( DL_BAUD_9600 ) ); + outb( UA4_DLM, BAUD_HIGH_BYTE( DL_BAUD_9600 ) ); + + /* + ** deselect the latch registers, by setting the data + ** characteristics in the LCR + */ + + outb( UA4_LCR, UA4_LCR_WLS_8 | UA4_LCR_1_STOP_BIT | UA4_LCR_NO_PARITY ); + + /* + ** Set the ISEN bit to enable the interrupt request signal, + ** and the DTR and RTS bits to enable two-way communication. + */ + + outb( UA4_MCR, UA4_MCR_ISEN | UA4_MCR_DTR | UA4_MCR_RTS ); + + /* + ** Install our ISR + */ + + install_isr( VEC_COM1, sio_isr ); +} + +/** +** sio_enable() +** +** Enable SIO interrupts +** +** usage: uint8_t old = sio_enable( uint8_t which ) +** +** @param which Bit mask indicating which interrupt(s) to enable +** +** @return the prior IER setting +*/ +uint8_t sio_enable( uint8_t which ) { + uint8_t old; + + // remember the current status + + old = ier; + + // figure out what to enable + + if( which & SIO_TX ) { + ier |= UA4_IER_TX_IE; + } + + if( which & SIO_RX ) { + ier |= UA4_IER_RX_IE; + } + + // if there was a change, make it + + if( old != ier ) { + outb( UA4_IER, ier ); + } + + // return the prior settings + + return( old ); +} + +/** +** sio_disable() +** +** Disable SIO interrupts +** +** usage: uint8_t old = sio_disable( uint8_t which ) +** +** @param which Bit mask indicating which interrupt(s) to disable +** +** @return the prior IER setting +*/ +uint8_t sio_disable( uint8_t which ) { + uint8_t old; + + // remember the current status + + old = ier; + + // figure out what to disable + + if( which & SIO_TX ) { + ier &= ~UA4_IER_TX_IE; + } + + if( which & SIO_RX ) { + ier &= ~UA4_IER_RX_IE; + } + + // if there was a change, make it + + if( old != ier ) { + outb( UA4_IER, ier ); + } + + // return the prior settings + + return( old ); +} + +/** +** sio_inq_length() +** +** Get the input queue length +** +** usage: int num = sio_inq_length() +** +** @return the count of characters still in the input queue +*/ +int sio_inq_length( void ) { + return( incount ); +} + +/** +** sio_readc() +** +** Get the next input character +** +** usage: int ch = sio_readc() +** +** @return the next character, or -1 if no character is available +*/ +int sio_readc( void ) { + int ch; + + // assume there is no character available + ch = -1; + + // + // If there is a character, return it + // + + if( incount > 0 ) { + + // take it out of the input buffer + ch = ((int)(*innext++)) & 0xff; + --incount; + + // reset the buffer variables if this was the last one + if( incount < 1 ) { + inlast = innext = inbuffer; + } + + } + + return( ch ); + +} + +/** +** sio_read(buf,length) +** +** Read the entire input buffer into a user buffer of a specified size +** +** usage: int num = sio_read( char *buffer, int length ) +** +** @param buf The destination buffer +** @param length Length of the buffer +** +** @return the number of bytes copied, or 0 if no characters were available +*/ + +int sio_read( char *buf, int length ) { + char *ptr = buf; + int copied = 0; + + // if there are no characters, just return 0 + + if( incount < 1 ) { + return( 0 ); + } + + // + // We have characters. Copy as many of them into the user + // buffer as will fit. + // + + while( incount > 0 && copied < length ) { + *ptr++ = *innext++ & 0xff; + if( innext > (inbuffer + BUF_SIZE) ) { + innext = inbuffer; + } + --incount; + ++copied; + } + + // reset the input buffer if necessary + + if( incount < 1 ) { + inlast = innext = inbuffer; + } + + // return the copy count + + return( copied ); +} + + +/** +** sio_writec( ch ) +** +** Write a character to the serial output +** +** usage: sio_writec( int ch ) +** +** @param ch Character to be written (in the low-order 8 bits) +*/ +void sio_writec( int ch ){ + + + // + // Must do LF -> CRLF mapping + // + + if( ch == '\n' ) { + sio_writec( '\r' ); + } + + // + // If we're currently transmitting, just add this to the buffer + // + + if( sending ) { + *outlast++ = ch; + ++outcount; + return; + } + + // + // Not sending - must prime the pump + // + + sending = 1; + outb( UA4_TXD, ch ); + + // Also must enable transmitter interrupts + + sio_enable( SIO_TX ); + +} + +/** +** sio_write( buffer, length ) +** +** Write a buffer of characters to the serial output +** +** usage: int num = sio_write( const char *buffer, int length ) +** +** @param buffer Buffer containing characters to write +** @param length Number of characters to write +** +** @return the number of characters copied into the SIO output buffer +*/ +int sio_write( const char *buffer, int length ) { + int first = *buffer; + const char *ptr = buffer; + int copied = 0; + + // + // If we are currently sending, we want to append all + // the characters to the output buffer; else, we want + // to append all but the first character, and then use + // sio_writec() to send the first one out. + // + + if( !sending ) { + ptr += 1; + copied++; + } + + while( copied < length && outcount < BUF_SIZE ) { + *outlast++ = *ptr++; + // wrap around if necessary + if( outlast >= (outbuffer + BUF_SIZE) ) { + outlast = outbuffer; + } + ++outcount; + ++copied; + } + + // + // We use sio_writec() to send out the first character, + // as it will correctly set all the other necessary + // variables for us. + // + + if( !sending ) { + sio_writec( first ); + } + + // Return the transfer count + + + return( copied ); + +} + +/** +** sio_puts( buf ) +** +** Write a NUL-terminated buffer of characters to the serial output +** +** usage: int num = sio_puts( const char *buffer ) +** +** @param buffer The buffer containing a NUL-terminated string +** +** @return the count of bytes transferred +*/ +int sio_puts( const char *buffer ) { + int n; // must be outside the loop so we can return it + + n = SLENGTH( buffer ); + sio_write( buffer, n ); + + return( n ); +} + +/** +** sio_dump( full ) +** +** dump the contents of the SIO buffers to the console +** +** usage: sio_dump(true) or sio_dump(false) +** +** @param full Boolean indicating whether or not a "full" dump +** is being requested (which includes the contents +** of the queues) +*/ + +void sio_dump( bool_t full ) { + int n; + char *ptr; + + // dump basic info into the status region + + cio_printf_at( 48, 0, + "SIO: IER %02x (%c%c%c) in %d ot %d", + ((uint32_t)ier) & 0xff, sending ? '*' : '.', + (ier & UA4_IER_TX_IE) ? 'T' : 't', + (ier & UA4_IER_RX_IE) ? 'R' : 'r', + incount, outcount ); + + // if we're not doing a full dump, stop now + + if( !full ) { + return; + } + + // also want the queue contents, but we'll + // dump them into the scrolling region + + if( incount ) { + cio_puts( "SIO input queue: \"" ); + ptr = innext; + for( n = 0; n < incount; ++n ) { + put_char_or_code( *ptr++ ); + } + cio_puts( "\"\n" ); + } + + if( outcount ) { + cio_puts( "SIO output queue: \"" ); + cio_puts( " ot: \"" ); + ptr = outnext; + for( n = 0; n < outcount; ++n ) { + put_char_or_code( *ptr++ ); + } + cio_puts( "\"\n" ); + } +} diff --git a/kernel/startup.S b/kernel/startup.S new file mode 100644 index 0000000..1cae13c --- /dev/null +++ b/kernel/startup.S @@ -0,0 +1,153 @@ +/* +** @file startup.S +** +** @author Jon Coles +** @authors Warren R. Carithers, K. Reek +** +** SP startup code. +** +** This code prepares the various registers for execution of +** the program. It sets up all the segment registers and the +** runtime stack. By the time this code is running, we're in +** protected mode already. +*/ + +#define KERNEL_SRC +#define ASM_SRC + + .arch i386 + +#include <common.h> +#include <bootstrap.h> +#include <x86/arch.h> +#include <x86/bios.h> +#include <vm.h> + +/* +** Configuration options - define in Makefile +** +** CLEAR_BSS include code to clear all BSS space +** OS_CONFIG OS-related (vs. just standalone) variations +*/ + +/* +** A symbol for locating the beginning of the code. +*/ + .globl begtext + + .text +begtext: + +/* +** The entry point. When we get here, we have just entered protected +** mode, so all the segment registers are incorrect except for CS. +*/ + .globl _start + +_start: + cli /* seems to be reset on entry to p. mode */ + movb $NMI_ENABLE, %al /* re-enable NMIs (bootstrap */ + outb $CMOS_ADDR /* turned them off) */ + +/* +** Set the data and stack segment registers (code segment register +** was set by the long jump that switched us into protected mode). +*/ + xorl %eax, %eax /* clear EAX */ + movw $GDT_DATA, %ax /* GDT entry #3 - data segment */ + movw %ax, %ds /* for all four data segment registers */ + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + + movw $GDT_STACK, %ax /* entry #4 is the stack segment */ + movw %ax, %ss + + movl $TARGET_STACK, %esp /* set up the system stack pointer */ + +#ifdef CLEAR_BSS +/* +** Zero the BSS segment +** +** These symbols are defined automatically by the linker, but they're +** defined at their virtual addresses rather than their physical addresses, +** and we haven't enabled paging yet. +*/ + .globl __bss_start, _end + + movl $V2P(__bss_start), %edi +clearbss: + movl $0, (%edi) + addl $4, %edi + cmpl $V2P(_end), %edi + jb clearbss +#endif /* CLEAR_BSS */ + +/* +** Enable paging. We use "large" pages for the initial page directory +** so that a one-level hierarchy will work for us. Once we have set +** up our memory freelist, we'll create a two-level hierarchy using +** "normal" 4KB pages. +*/ + # enable large pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + + # set the page directory + .globl firstpdir + movl $(V2P(firstpdir)+0x1000), %eax + movl %eax, %cr3 + + # turn on paging + movl %cr0, %eax + orl $(CR0_PG), %eax + movl %eax, %cr0 + + # reset our stack pointer + movl $(kstack + SZ_KSTACK), %esp + + # set the initial frame pointer + xorl %ebp, %ebp + +/* +** Call the system initialization routine, and switch to +** executing at high addresses. We use an indirect jump +** here to avoid getting a PC-relative 'jmp' instruction. +** +** Alternate idea: push the address of isr_restore +** and just do an indirect jump? +*/ + .globl main + + movl $main, %eax + call *%eax + +/* +** At this point, main() must have created the first user +** process, and we're ready to shift into user mode. The user +** stack for that process must have the initial context in it; +** we treat this as a "return from interrupt" event, and just +** transfer to the code that restores the user context. +*/ + + .globl isr_restore + jmp isr_restore + + .data + +/* +** Define the kernel stack here, at a multiple-of-16 address +*/ + .p2align 4 + .globl kstack +kstack: .space SZ_KSTACK, 0 + +/* +** Define the initial kernel ESP here, as well. It should point +** to the first byte after the stack. +*/ + + .globl kernel_esp +kernel_esp: + .long kstack + SZ_KSTACK diff --git a/kernel/support.c b/kernel/support.c new file mode 100644 index 0000000..d48ce59 --- /dev/null +++ b/kernel/support.c @@ -0,0 +1,279 @@ +/* +** SCCS ID: @(#)support.c 2.6 1/22/25 +** +** @file support.c +** +** @author 4003-506 class of 20003 +** @authors K. Reek, Warren R. Carithers +** +** Miscellaneous system initialization functions, interrupt +** support routines, and data structures. +*/ + +#include <common.h> + +#include <support.h> +#include <cio.h> +#include <x86/arch.h> +#include <x86/pic.h> +#include <x86/ops.h> +#include <bootstrap.h> +#include <syscalls.h> + +/* +** Global variables and local data types. +*/ + +/* +** This is the table that contains pointers to the C-language ISR for +** each interrupt. These functions are called from the isr stub based +** on the interrupt number. +*/ +void ( *isr_table[ 256 ] )( int vector, int code ); + +/* +** Format of an IDT entry. +*/ +typedef struct { + short offset_15_0; + short segment_selector; + short flags; + short offset_31_16; +} IDT_Gate; + +/* +** LOCAL ROUTINES - not intended to be used outside this module. +*/ + +/** +** unexpected_handler +** +** This routine catches interrupts that we do not expect to ever occur. +** It handles them by (optionally) reporting them and then calling panic(). +** +** @param vector vector number for the interrupt that occurred +** @param code error code, or a dummy value +** +** Does not return. +*/ +#ifdef RPT_INT_UNEXP +/* add any header includes you need here */ +#endif +static void unexpected_handler( int vector, int code ) { +#ifdef RPT_INT_UNEXP + cio_printf( "\n** UNEXPECTED vector %d code %d\n", vector, code ); +#endif + panic( "Unexpected interrupt" ); +} + +/** +** default_handler +** +** Default handler for interrupts we expect may occur but are not +** handling (yet). We just reset the PIC and return. +** +** @param vector vector number for the interrupt that occurred +** @param code error code, or a dummy value +*/ +static void default_handler( int vector, int code ) { +#ifdef RPT_INT_UNEXP + cio_printf( "\n** vector %d code %d\n", vector, code ); +#endif + if( vector >= 0x20 && vector < 0x30 ) { + if( vector > 0x27 ) { + // must also ACK the secondary PIC + outb( PIC2_CMD, PIC_EOI ); + } + outb( PIC1_CMD, PIC_EOI ); + } else { + /* + ** All the "expected" interrupts will be handled by the + ** code above. If we get down here, the isr table may + ** have been corrupted. Print a message and don't return. + */ + panic( "Unexpected \"expected\" interrupt!" ); + } +} + +/** +** mystery_handler +** +** Default handler for the "mystery" interrupt that comes through vector +** 0x27. This is a non-repeatable interrupt whose source has not been +** identified, but it appears to be the famous "spurious level 7 interrupt" +** source. +** +** @param vector vector number for the interrupt that occurred +** @param code error code, or a dummy value +*/ +static void mystery_handler( int vector, int code ) { +#if defined(RPT_INT_MYSTERY) || defined(RPT_INT_UNEXP) + cio_printf( "\nMystery interrupt!\nVector=0x%02x, code=%d\n", + vector, code ); +#endif + outb( PIC1_CMD, PIC_EOI ); +} + +/** +** init_pic +** +** Initialize the 8259 Programmable Interrupt Controller. +*/ +static void init_pic( void ) { + /* + ** ICW1: start the init sequence, update ICW4 + */ + outb( PIC1_CMD, PIC_CW1_INIT | PIC_CW1_NEED4 ); + outb( PIC2_CMD, PIC_CW1_INIT | PIC_CW1_NEED4 ); + + /* + ** ICW2: primary offset of 0x20 in the IDT, secondary offset of 0x28 + */ + outb( PIC1_DATA, PIC1_CW2_VECBASE ); + outb( PIC2_DATA, PIC2_CW2_VECBASE ); + + /* + ** ICW3: secondary attached to line 2 of primary, bit mask is 00000100 + ** secondary id is 2 + */ + outb( PIC1_DATA, PIC1_CW3_SEC_IRQ2 ); + outb( PIC2_DATA, PIC2_CW3_SEC_ID ); + + /* + ** ICW4: want 8086 mode, not 8080/8085 mode + */ + outb( PIC1_DATA, PIC_CW4_PM86 ); + outb( PIC2_DATA, PIC_CW4_PM86 ); + + /* + ** OCW1: allow interrupts on all lines + */ + outb( PIC1_DATA, PIC_MASK_NONE ); + outb( PIC2_DATA, PIC_MASK_NONE ); +} + +/** +** set_idt_entry +** +** Construct an entry in the IDT +** +** @param entry the vector number of the interrupt +** @param handler ISR address to be put into the IDT entry +** +** Note: generally, the handler invoked from the IDT will be a "stub" +** that calls the second-level C handler via the isr_table array. +*/ +static void set_idt_entry( int entry, void ( *handler )( void ) ) { + IDT_Gate *g = (IDT_Gate *)IDT_ADDR + entry; + + g->offset_15_0 = (int)handler & 0xffff; + g->segment_selector = 0x0010; + g->flags = IDT_PRESENT | IDT_DPL_0 | IDT_INT32_GATE; + g->offset_31_16 = (int)handler >> 16 & 0xffff; +} + +/** +** Name: init_idt +** +** Initialize the Interrupt Descriptor Table (IDT). This makes each of +** the entries in the IDT point to the isr stub for that entry, and +** installs a default handler in the handler table. Temporary handlers +** are then installed for those interrupts we may get before a real +** handler is set up. +*/ +static void init_idt( void ) { + int i; + extern void ( *isr_stub_table[ 256 ] )( void ); + + /* + ** Make each IDT entry point to the stub for that vector. Also + ** make each entry in the ISR table point to the default handler. + */ + for ( i=0; i < 256; i++ ) { + set_idt_entry( i, isr_stub_table[ i ] ); + install_isr( i, unexpected_handler ); + } + + /* + ** Install the handlers for interrupts that have (or will have) a + ** specific handler. Comments indicate which module init function + ** will eventually install the "real" handler. + */ + + install_isr( VEC_KBD, default_handler ); // cio_init() + install_isr( VEC_COM1, default_handler ); // sio_init() + install_isr( VEC_TIMER, default_handler ); // clk_init() + install_isr( VEC_SYSCALL, default_handler ); // sys_init() + install_isr( VEC_PAGE_FAULT, default_handler ); // vm_init() + + install_isr( VEC_MYSTERY, mystery_handler ); +} + +/* +** END OF LOCAL ROUTINES. +** +** Full documentation for globally-visible routines is in the corresponding +** header file. +*/ + +/* +** panic +** +** Called when we find an unrecoverable error. +*/ +void panic( char *reason ) { + __asm__( "cli" ); + cio_printf( "\nPANIC: %s\nHalting...", reason ); + for(;;) { + ; + } +} + +/* +** init_interrupts +** +** (Re)initilizes the interrupt system. +*/ +void init_interrupts( void ) { + init_idt(); + init_pic(); +} + +/* +** install_isr +** +** Installs a second-level handler for a specific interrupt. +*/ +void (*install_isr( int vector, + void (*handler)(int,int) ) ) ( int, int ) { + + void ( *old_handler )( int vector, int code ); + + old_handler = isr_table[ vector ]; + isr_table[ vector ] = handler; + return old_handler; +} + +/* +** Name: delay +** +** Notes: The parameter to the delay() function is ambiguous; it +** purports to indicate a delay length, but that isn't really tied +** to any real-world time measurement. +** +** On the original systems we used (dual 500MHz Intel P3 CPUs), each +** "unit" was approximately one tenth of a second, so delay(10) would +** delay for about one second. +** +** On the current machines (Intel Core i5-7500), delay(100) is about +** 2.5 seconds, so each "unit" is roughly 0.025 seconds. +** +** Ultimately, just remember that DELAY VALUES ARE APPROXIMATE AT BEST. +*/ +void delay( int length ) { + + while( --length >= 0 ) { + for( int i = 0; i < 10000000; ++i ) + ; + } +} diff --git a/kernel/syscalls.c b/kernel/syscalls.c new file mode 100644 index 0000000..7176cda --- /dev/null +++ b/kernel/syscalls.c @@ -0,0 +1,829 @@ +/** +** @file syscalls.c +** +** @author CSCI-452 class of 20245 +** +** @brief System call implementations +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <cio.h> +#include <clock.h> +#include <procs.h> +#include <sio.h> +#include <syscalls.h> +#include <user.h> +#include <vm.h> +#include <x86/pic.h> + +/* +** PRIVATE DEFINITIONS +*/ + +/* +** Macros to simplify tracing a bit +** +** TRACING_SYSCALLS and TRACING_SYSRETS are defined in debug.h, +** controlled by the TRACE ** macro. If not tracing these, SYSCALL_ENTER +** is a no-op, and SYSCALL_EXIT just does a return. +*/ + +#if TRACING_SYSCALLS + +#define SYSCALL_ENTER(x) do { \ + cio_printf( "--> %s, pid %08x", __func__, (uint32_t) (x) ); \ + } while(0) + +#else + +#define SYSCALL_ENTER(x) /* */ + +#endif /* TRACING_SYSCALLS */ + +#if TRACING_SYSRETS + +#define SYSCALL_EXIT(x) do { \ + cio_printf( "<-- %s %08x\n", __func__, (uint32_t) (x) ); \ + return; \ + } while(0) + +#else + +#define SYSCALL_EXIT(x) return + +#endif /* TRACING_SYSRETS */ + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +/* +** IMPLEMENTATION FUNCTIONS +*/ + +// a macro to simplify syscall entry point specification +// we don't declare these static because we may want to call +// some of them from other parts of the kernel +#define SYSIMPL(x) void sys_##x( pcb_t * pcb ) + +/* +** Second-level syscall handlers +** +** All have this prototype: +** +** static void sys_NAME( pcb_t *pcb ); +** +** where the parameter 'pcb' is a pointer to the PCB of the process +** making the system call. +** +** Values being returned to the user are placed into the EAX +** field in the context save area for that process. +*/ + +/** +** sys_exit - terminate the calling process +** +** Implements: +** void exit( int32_t status ); +** +** Does not return +*/ +SYSIMPL(exit) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // retrieve the exit status of this process + pcb->exit_status = (int32_t) ARG(pcb,1); + + // now, we need to do the following: + // reparent any children of this process and wake up init if need be + // find this process' parent and wake it up if it's waiting + + pcb_zombify( pcb ); + + // pick a new winner + dispatch(); + + SYSCALL_EXIT( 0 ); +} + +/** +** sys_waitpid - wait for a child process to terminate +** +** Implements: +** int waitpid( uint_t pid, int32_t *status ); +** +** Blocks the calling process until the specified child (or any child) +** of the caller terminates. Intrinsic return is the PID of the child that +** terminated, or an error code; on success, returns the child's termination +** status via 'status' if that pointer is non-NULL. +*/ +SYSIMPL(waitpid) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + /* + ** We need to do two things here: (1) find out whether or + ** not this process has any children in the system, and (2) + ** find out whether the desired child (or any child, if the + ** target PID is 0) has terminated. + ** + ** To do this, we loop until we find a the requested PID or + ** a Zombie child process, or have gone through all of the + ** slots in the process table. + ** + ** If the target PID is 0, we don't care which child process + ** we reap here; there could be several, but we only need to + ** find one. + */ + + // verify that we aren't looking for ourselves! + uint_t target = ARG(pcb,1); + + if( target == pcb->pid ) { + RET(pcb) = E_BAD_PARAM; + SYSCALL_EXIT( E_BAD_PARAM ); + } + + // Good. Now, figure out what we're looking for. + + pcb_t *child = NULL; + + if( target != 0 ) { + + // we're looking for a specific child + child = pcb_find_pid( target ); + + if( child != NULL ) { + + // found the process; is it one of our children: + if( child->parent != pcb ) { + // NO, so we can't wait for it + RET(pcb) = E_BAD_PARAM; + SYSCALL_EXIT( E_BAD_PARAM ); + } + + // yes! is this one ready to be collected? + if( child->state != STATE_ZOMBIE ) { + // no, so we'll have to block for now + child = NULL; + } + + } else { + + // no such child + RET(pcb) = E_BAD_PARAM; + SYSCALL_EXIT( E_BAD_PARAM ); + + } + + } else { + + // looking for any child + + // we need to find a process that is our child + // and has already exited + + child = NULL; + bool_t found = false; + + // unfortunately, we can't stop at the first child, + // so we need to do the iteration ourselves + register pcb_t *curr = ptable; + + for( int i = 0; i < N_PROCS; ++i, ++curr ) { + + if( curr->parent == pcb ) { + + // found one! + found = true; + + // has it already exited? + if( curr->state == STATE_ZOMBIE ) { + // yes, so we're done here + child = curr; + break; + } + } + } + + if( !found ) { + // got through the loop without finding a child! + RET(pcb) = E_NO_CHILDREN; + SYSCALL_EXIT( E_NO_CHILDREN ); + } + + } + + /* + ** At this point, one of these situations is true: + ** + ** * we are looking for a specific child and found it + ** * we are looking for any child and found one + ** + ** Either way, 'child' will be non-NULL if the selected + ** process has already become a Zombie. If that's the + ** case, we collect its status and clean it up; otherwise, + ** we block this process. + */ + + // did we find one to collect? + if( child == NULL ) { + + // no - mark the parent as "Waiting" + pcb->state = STATE_WAITING; + assert( pcb_queue_insert(waiting,pcb) == SUCCESS ); + + // select a new current process + dispatch(); + SYSCALL_EXIT( (uint32_t) current ); + } + + // found a Zombie; collect its information and clean it up + RET(pcb) = child->pid; + + // get "status" pointer from parent + int32_t *stat = (int32_t *) ARG(pcb,2); + + // if stat is NULL, the parent doesn't want the status + if( stat != NULL ) { + // ******************************************************** + // ** Potential VM issue here! This code assigns the exit + // ** status into a variable in the parent's address space. + // ** This works in the baseline because we aren't using + // ** any type of memory protection. If address space + // ** separation is implemented, this code will very likely + // ** STOP WORKING, and will need to be fixed. + // ******************************************************** + *stat = child->exit_status; + } + + // clean up the child + pcb_cleanup( child ); + + SYSCALL_EXIT( RET(pcb) ); +} + +/** +** sys_fork - create a new process +** +** Implements: +** int fork( void ); +** +** Creates a new process that is a duplicate of the calling process. +** Returns the child's PID to the parent, and 0 to the child, on success; +** else, returns an error code to the parent. +*/ +SYSIMPL(fork) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // Make sure there's room for another process! + pcb_t *new; + if( pcb_alloc(&new) != SUCCESS || new == NULL ) { + RET(pcb) = E_NO_PROCS; + SYSCALL_EXIT( RET(pcb) ); + } + + // duplicate the memory image of the parent + int status = user_duplicate( new, pcb ); + if( status != SUCCESS ) { + pcb_free( new ); + RET(pcb) = status; + SYSCALL_EXIT( status ); + } + + // Set the child's identity. + new->pid = next_pid++; + new->parent = pcb; + new->state = STATE_NEW; + + // replicate other things inherited from the parent + new->priority = pcb->priority; + + // Set the return values for the two processes. + RET(pcb) = new->pid; + RET(new) = 0; + + // Schedule the child, and let the parent continue. + schedule( new ); + + SYSCALL_EXIT( new->pid ); +} + +/** +** sys_exec - replace the memory image of a process +** +** Implements: +** void exec( uint_t what, char **args ); +** +** Replaces the memory image of the calling process with that of the +** indicated program. +** +** Returns only on failure. +*/ +SYSIMPL(exec) +{ + // sanity check + assert( pcb != NULL ); + + uint_t what = ARG(pcb,1); + const char **args = (const char **) ARG(pcb,2); + + SYSCALL_ENTER( pcb->pid ); + + // locate the requested program + prog_t *prog = user_locate( what ); + if( prog == NULL ) { + RET(pcb) = E_NOT_FOUND; + SYSCALL_EXIT( E_NOT_FOUND ); + } + + // we have located the program, but before we can load it, + // we need to clean up the existing VM hierarchy + vm_free( pcb->pdir ); + pcb->pdir = NULL; + + // "load" it and set up the VM tables for this process + int status = user_load( prog, pcb, args ); + if( status != SUCCESS ) { + RET(pcb) = status; + SYSCALL_EXIT( status ); + } + + /* + ** Decision: + ** (A) schedule this process and dispatch another, + ** (B) let this one continue in its current time slice + ** (C) reset this one's time slice and let it continue + ** + ** We choose option A. + ** + ** If scheduling the process fails, the exec() has failed. However, + ** all trace of the old process is gone by now, so we can't return + ** an error status to it. + */ + + schedule( pcb ); + + dispatch(); +} + +/** +** sys_read - read into a buffer from an input channel +** +** Implements: +** int read( uint_t chan, void *buffer, uint_t length ); +** +** Reads up to 'length' bytes from 'chan' into 'buffer'. Returns the +** count of bytes actually transferred. +*/ +SYSIMPL(read) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // grab the arguments + uint_t chan = ARG(pcb,1); + char *buf = (char *) ARG(pcb,2); + uint_t len = ARG(pcb,3); + + // if the buffer is of length 0, we're done! + if( len == 0 ) { + RET(pcb) = 0; + SYSCALL_EXIT( 0 ); + } + + // try to get the next character(s) + int n = 0; + + if( chan == CHAN_CIO ) { + + // console input is non-blocking + if( cio_input_queue() < 1 ) { + RET(pcb) = 0; + SYSCALL_EXIT( 0 ); + } + // at least one character + n = cio_gets( buf, len ); + RET(pcb) = n; + SYSCALL_EXIT( n ); + + } else if( chan == CHAN_SIO ) { + + // SIO input is blocking, so if there are no characters + // available, we'll block this process + n = sio_read( buf, len ); + RET(pcb) = n; + SYSCALL_EXIT( n ); + + } + + // bad channel code + RET(pcb) = E_BAD_PARAM; + SYSCALL_EXIT( E_BAD_PARAM ); +} + +/** +** sys_write - write from a buffer to an output channel +** +** Implements: +** int write( uint_t chan, const void *buffer, uint_t length ); +** +** Writes 'length' bytes from 'buffer' to 'chan'. Returns the +** count of bytes actually transferred. +*/ +SYSIMPL(write) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // grab the parameters + uint_t chan = ARG(pcb,1); + char *buf = (char *) ARG(pcb,2); + uint_t length = ARG(pcb,3); + + // this is almost insanely simple, but it does separate the + // low-level device access fromm the higher-level syscall implementation + + // assume we write the indicated amount + int rval = length; + + // simplest case + if( length >= 0 ) { + + if( chan == CHAN_CIO ) { + + cio_write( buf, length ); + + } else if( chan == CHAN_SIO ) { + + sio_write( buf, length ); + + } else { + + rval = E_BAD_CHAN; + + } + + } + + RET(pcb) = rval; + + SYSCALL_EXIT( rval ); +} + +/** +** sys_getpid - returns the PID of the calling process +** +** Implements: +** uint_t getpid( void ); +*/ +SYSIMPL(getpid) { + + // sanity check! + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // return the time + RET(pcb) = pcb->pid; +} + +/** +** sys_getppid - returns the PID of the parent of the calling process +** +** Implements: +** uint_t getppid( void ); +*/ +SYSIMPL(getppid) { + + // sanity check! + assert( pcb != NULL ); + assert( pcb->parent != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // return the time + RET(pcb) = pcb->parent->pid; +} + +/** +** sys_gettime - returns the current system time +** +** Implements: +** uint32_t gettime( void ); +*/ +SYSIMPL(gettime) { + + // sanity check! + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // return the time + RET(pcb) = system_time; +} + +/** +** sys_getprio - the scheduling priority of the calling process +** +** Implements: +** int getprio( void ); +*/ +SYSIMPL(getprio) { + + // sanity check! + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // return the time + RET(pcb) = pcb->priority; +} + +/** +** sys_setprio - sets the scheduling priority of the calling process +** +** Implements: +** int setprio( int new ); +*/ +SYSIMPL(setprio) { + + // sanity check! + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // remember the old priority + int old = pcb->priority; + + // set the priority + pcb->priority = ARG(pcb,1); + + // return the old value + RET(pcb) = old; +} + +/** +** sys_kill - terminate a process with extreme prejudice +** +** Implements: +** int32_t kill( uint_t pid ); +** +** Marks the specified process (or the calling process, if PID is 0) +** as "killed". Returns 0 on success, else an error code. +*/ +SYSIMPL(kill) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // who is the victim? + uint_t pid = ARG(pcb,1); + + // if it's this process, convert this into a call to exit() + if( pid == pcb->pid ) { + pcb->exit_status = EXIT_KILLED; + pcb_zombify( pcb ); + dispatch(); + SYSCALL_EXIT( EXIT_KILLED ); + } + + // must be a valid "ordinary user" PID + // QUESTION: what if it's the idle process? + if( pid < FIRST_USER_PID ) { + RET(pcb) = E_FAILURE; + SYSCALL_EXIT( E_FAILURE ); + } + + // OK, this is an acceptable victim; see if it exists + pcb_t *victim = pcb_find_pid( pid ); + if( victim == NULL ) { + // nope! + RET(pcb) = E_NOT_FOUND; + SYSCALL_EXIT( E_NOT_FOUND ); + } + + // must have a state that is possible + assert( victim->state >= FIRST_VIABLE && victim->state < N_STATES ); + + // how we perform the kill depends on the victim's state + int32_t status = SUCCESS; + + switch( victim->state ) { + + case STATE_KILLED: // FALL THROUGH + case STATE_ZOMBIE: + // you can't kill it if it's already dead + RET(pcb) = SUCCESS; + break; + + case STATE_READY: // FALL THROUGH + case STATE_SLEEPING: // FALL THROUGH + case STATE_BLOCKED: // FALL THROUGH + // here, the process is on a queue somewhere; mark + // it as "killed", and let the scheduler deal with it + victim->state = STATE_KILLED; + RET(pcb) = SUCCESS; + break; + + case STATE_RUNNING: + // we have met the enemy, and it is us! + pcb->exit_status = EXIT_KILLED; + pcb_zombify( pcb ); + status = EXIT_KILLED; + // we need a new current process + dispatch(); + break; + + case STATE_WAITING: + // similar to the 'running' state, but we don't need + // to dispatch a new process + victim->exit_status = EXIT_KILLED; + status = pcb_queue_remove_this( waiting, victim ); + pcb_zombify( victim ); + RET(pcb) = status; + break; + + default: + // this is a really bad potential problem - we have an + // unexpected or bogus process state, but we didn't + // catch that earlier. + sprint( b256, "*** kill(): victim %d, odd state %d\n", + victim->pid, victim->state ); + PANIC( 0, b256 ); + } + + SYSCALL_EXIT( status ); +} + + +/** +** sys_sleep - put the calling process to sleep for some length of time +** +** Implements: +** uint_t sleep( uint_t ms ); +** +** Puts the calling process to sleep for 'ms' milliseconds (or just yields +** the CPU if 'ms' is 0). ** Returns the time the process spent sleeping. +*/ +SYSIMPL(sleep) { + + // sanity check + assert( pcb != NULL ); + + SYSCALL_ENTER( pcb->pid ); + + // get the desired duration + uint_t length = ARG( pcb, 1 ); + + if( length == 0 ) { + + // just yield the CPU + // sleep duration is 0 + RET(pcb) = 0; + + // back on the ready queue + schedule( pcb ); + + } else { + + // sleep for a while + pcb->wakeup = system_time + length; + + if( pcb_queue_insert(sleeping,pcb) != SUCCESS ) { + // something strange is happening + WARNING( "sleep pcb insert failed" ); + // if this is the current process, report an error + if( current == pcb ) { + RET(pcb) = -1; + } + // return without dispatching a new process + return; + } + } + + // only dispatch if the current process called us + if( pcb == current ) { + current = NULL; + dispatch(); + } +} + +/* +** PRIVATE FUNCTIONS GLOBAL VARIABLES +*/ + +/* +** The system call jump table +** +** Initialized using designated initializers to ensure the entries +** are correct even if the syscall code values should happen to change. +** This also makes it easy to add new system call entries, as their +** position in the initialization list is irrelevant. +*/ + +static void (* const syscalls[N_SYSCALLS])( pcb_t * ) = { + [ SYS_exit ] = sys_exit, + [ SYS_waitpid ] = sys_waitpid, + [ SYS_fork ] = sys_fork, + [ SYS_exec ] = sys_exec, + [ SYS_read ] = sys_read, + [ SYS_write ] = sys_write, + [ SYS_getpid ] = sys_getpid, + [ SYS_getppid ] = sys_getppid, + [ SYS_gettime ] = sys_gettime, + [ SYS_getprio ] = sys_getprio, + [ SYS_setprio ] = sys_setprio, + [ SYS_kill ] = sys_kill, + [ SYS_sleep ] = sys_sleep +}; + +/** +** Name: sys_isr +** +** System call ISR +** +** @param vector Vector number for this interrupt +** @param code Error code (0 for this interrupt) +*/ +static void sys_isr( int vector, int code ) { + + // keep the compiler happy + (void) vector; + (void) code; + + // sanity check! + assert( current != NULL ); + assert( current->context != NULL ); + + // retrieve the syscall code + int num = REG( current, eax ); + +#if TRACING_SYSCALLS + cio_printf( "** --> SYS pid %u code %u\n", current->pid, num ); +#endif + + // validate it + if( num < 0 || num >= N_SYSCALLS ) { + // bad syscall number + // could kill it, but we'll just force it to exit + num = SYS_exit; + ARG(current,1) = EXIT_BAD_SYSCALL; + } + + // call the handler + syscalls[num]( current ); + +#if TRACING_SYSCALLS + cio_printf( "** <-- SYS pid %u ret %u\n", current->pid, RET(current) ); +#endif + + // tell the PIC we're done + outb( PIC1_CMD, PIC_EOI ); +} + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** Name: sys_init +** +** Syscall module initialization routine +** +** Dependencies: +** Must be called after cio_init() +*/ +void sys_init( void ) { + +#if TRACING_INIT + cio_puts( " Sys" ); +#endif + + // install the second-stage ISR + install_isr( VEC_SYSCALL, sys_isr ); +} diff --git a/kernel/user.c b/kernel/user.c new file mode 100644 index 0000000..2d32157 --- /dev/null +++ b/kernel/user.c @@ -0,0 +1,774 @@ +/** +** @file user.c +** +** @author CSCI-452 class of 20245 +** +** @brief User-level code manipulation routines +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <bootstrap.h> +#include <elf.h> +#include <user.h> +#include <vm.h> + +/* +** PRIVATE DEFINITIONS +*/ + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +/* +** Location of the "user blob" in memory. +** +** These variables are filled in by the code in startup.S using values +** passed to it from the bootstrap. +** +** These are visible so that the startup code can find them. +*/ +uint16_t user_offset; // byte offset from the segment base +uint16_t user_segment; // segment base address +uint16_t user_sectors; // number of 512-byte sectors it occupies + +header_t *user_header; // filled in by the user_init routine +prog_t *prog_table; // filled in by the user_init routine + +/* +** PRIVATE FUNCTIONS +*/ + +#if TRACING_ELF + +/* +** This is debugging support code; if not debugging the ELF +** handling code, it won't be compiled into the kernel. +*/ + +// buffer used by some of these functions +static char ebuf[16]; + +/* +** File header functions +*/ + +// interpret the file class +static const char *fh_eclass( e32_si class ) { + switch( class ) { + case ELF_CLASS_NONE: return( "None" ); break; + case ELF_CLASS_32: return( "EC32" ); break; + case ELF_CLASS_64: return( "EC64" ); break; + } + return( "????" ); +} + +// interpret the data encoding +static const char *fh_edata( e32_si data ) { + switch( data ) { + case ELF_DATA_NONE: return( "Invd" ); break; + case ELF_DATA_2LSB: return( "2CLE" ); break; + case ELF_DATA_2MSB: return( "2CBE" ); break; + } + return( "????" ); +} + +// interpret the file type +static const char *fh_htype( e32_h type ) { + switch( type ) { + case ET_NONE: return( "none" ); break; + case ET_REL: return( "rel" ); break; + case ET_EXEC: return( "exec" ); break; + case ET_DYN: return( "dyn" ); break; + case ET_CORE: return( "core" ); break; + default: + if( type >= ET_LO_OS && type <= ET_HI_OS ) + return( "OSsp" ); + else if( type >= ET_LO_CP && type <= ET_HI_CP ) + return( "CPsp" ); + } + sprint( ebuf, "0x%04x", type ); + return( (const char *) ebuf ); +} + +// interpret the machine type +static const char *fh_mtype( e32_h machine ) { + switch( machine ) { + case EM_NONE: return( "None" ); break; + case EM_386: return( "386" ); break; + case EM_ARM: return( "ARM" ); break; + case EM_X86_64: return( "AMD64" ); break; + case EM_AARCH64: return( "AARCH64" ); break; + case EM_RISCV: return( "RISC-V" ); break; + } + return( "Other" ); +} + +// dump the program header +static void dump_fhdr( elfhdr_t *hdr ) { + cio_puts( "File header: magic " ); + for( int i = EI_MAG0; i <= EI_MAG3; ++i ) + put_char_or_code( hdr->e_ident.bytes[i] ); + cio_printf( " class %s", fh_eclass(hdr->e_ident.f.class) ); + cio_printf( " enc %s", fh_edata(hdr->e_ident.f.data) ); + cio_printf( " ver %u\n", hdr->e_ident.f.version ); + cio_printf( " type %s", fh_htype(hdr->e_type) ); + cio_printf( " mach %s", fh_mtype(hdr->e_machine) ); + cio_printf( " vers %d", hdr->e_version ); + cio_printf( " entr %08x\n", hdr->e_entry ); + + cio_printf( " phoff %08x", hdr->e_phoff ); + cio_printf( " shoff %08x", hdr->e_shoff ); + cio_printf( " flags %08x", (uint32_t) hdr->e_flags ); + cio_printf( " ehsize %u\n", hdr->e_ehsize ); + cio_printf( " phentsize %u", hdr->e_phentsize ); + cio_printf( " phnum %u", hdr->e_phnum ); + cio_printf( " shentsize %u", hdr->e_shentsize ); + cio_printf( " shnum %u", hdr->e_shnum ); + cio_printf( " shstrndx %u\n", hdr->e_shstrndx ); +} + +/* +** Program header functions +*/ + +// categorize the header type +static const char *ph_type( e32_w type ) { + switch( type ) { + case PT_NULL: return( "Unused" ); break; + case PT_LOAD: return( "Load" ); break; + case PT_DYNAMIC: return( "DLI" ); break; + case PT_INTERP: return( "Interp" ); break; + case PT_NOTE: return( "Aux" ); break; + case PT_SHLIB: return( "RSVD" ); break; + case PT_PHDR: return( "PTentry" ); break; + case PT_TLS: return( "TLS" ); break; + default: + if( type >= PT_LO_OS && type <= PT_HI_OS ) + return( "OSsp" ); + else if( type >= PT_LO_CP && type <= PT_HI_CP ) + return( "CPsp" ); + } + sprint( ebuf, "0x%08x", type ); + return( (const char *) ebuf ); +} + +// report the individual flags +static void ph_flags( e32_w flags ) { + if( (flags & PF_R) != 0 ) cio_putchar( 'R' ); + if( (flags & PF_W) != 0 ) cio_putchar( 'W' ); + if( (flags & PF_E) != 0 ) cio_putchar( 'X' ); +} + +// dump a program header +static void dump_phdr( elfproghdr_t *hdr, int n ) { + cio_printf( "Prog header %d, type %s\n", n, ph_type(hdr->p_type) ); + cio_printf( " offset %08x", hdr->p_offset ); + cio_printf( " va %08x", hdr->p_va ); + cio_printf( " pa %08x\n", hdr->p_pa ); + cio_printf( " filesz %08x", hdr->p_filesz ); + cio_printf( " memsz %08x", hdr->p_memsz ); + cio_puts( " flags " ); + ph_flags( hdr->p_flags ); + cio_printf( " align %08x", hdr->p_align ); + cio_putchar( '\n' ); +} + +/* +** Section header functions +*/ + +// interpret the header type +static const char *sh_type( e32_w type ) { + switch( type ) { + case SHT_NULL: return( "Unused" ); break; + case SHT_PROGBITS: return( "Progbits" ); break; + case SHT_SYMTAB: return( "Symtab" ); break; + case SHT_STRTAB: return( "Strtab" ); break; + case SHT_RELA: return( "Rela" ); break; + case SHT_HASH: return( "Hash" ); break; + case SHT_DYNAMIC: return( "Dynamic" ); break; + case SHT_NOTE: return( "Note" ); break; + case SHT_NOBITS: return( "Nobits" ); break; + case SHT_REL: return( "Rel" ); break; + case SHT_SHLIB: return( "Shlib" ); break; + case SHT_DYNSYM: return( "Dynsym" ); break; + default: + if( type >= SHT_LO_CP && type <= SHT_HI_CP ) + return( "CCsp" ); + else if( type >= SHT_LO_US && type <= SHT_HI_US ) + return( "User" ); + } + sprint( ebuf, "0x%08x", type ); + return( (const char *) ebuf ); +} + +// report the various flags +static void sh_flags( unsigned int flags ) { + if( (flags & SHF_WRITE) != 0 ) cio_putchar( 'W' ); + if( (flags & SHF_ALLOC) != 0 ) cio_putchar( 'A' ); + if( (flags & SHF_EXECINSTR) != 0 ) cio_putchar( 'X' ); + if( (flags & SHF_MERGE) != 0 ) cio_putchar( 'M' ); + if( (flags & SHF_STRINGS) != 0 ) cio_putchar( 'S' ); + if( (flags & SHF_INFO_LINK) != 0 ) cio_putchar( 'L' ); + if( (flags & SHF_LINK_ORDER) != 0 ) cio_putchar( 'o' ); + if( (flags & SHF_OS_NONCON) != 0 ) cio_putchar( 'n' ); + if( (flags & SHF_GROUP) != 0 ) cio_putchar( 'g' ); + if( (flags & SHF_TLS) != 0 ) cio_putchar( 't' ); +} + +// dump a section header +__attribute__((__unused__)) +static void dump_shdr( elfsecthdr_t *hdr, int n ) { + cio_printf( "Sect header %d, type %d (%s), name %s\n", + n, hdr->sh_type, sh_type(hdr->sh_type) ); + cio_printf( " flags %08x ", (uint32_t) hdr->sh_flags ); + sh_flags( hdr->sh_flags ); + cio_printf( " addr %08x", hdr->sh_addr ); + cio_printf( " offset %08x", hdr->sh_offset ); + cio_printf( " size %08x\n", hdr->sh_size ); + cio_printf( " link %08x", hdr->sh_link ); + cio_printf( " info %08x", hdr->sh_info ); + cio_printf( " align %08x", hdr->sh_addralign ); + cio_printf( " entsz %08x\n", hdr->sh_entsize ); +} +#endif + +/** +** read_phdrs(addr,phoff,phentsize,phnum) +** +** Parses the ELF program headers and each segment described into memory. +** +** @param hdr Pointer to the program header +** @param pcb Pointer to the PCB (and its PDE) +** +** @return status of the attempt: +** SUCCESS everything loaded correctly +** E_LOAD_LIMIT more than N_LOADABLE PT_LOAD sections +** other status returned from vm_add() +*/ +static int read_phdrs( elfhdr_t *hdr, pcb_t *pcb ) { + + // sanity check + assert1( hdr != NULL ); + assert2( pcb != NULL ); + +#if TRACING_USER + cio_printf( "read_phdrs(%08x,%08x)\n", (uint32_t) hdr, (uint32_t) pcb ); +#endif + + // iterate through the program headers + uint_t nhdrs = hdr->e_phnum; + + // pointer to the first header table entry + elfproghdr_t *curr = (elfproghdr_t *) ((uint32_t) hdr + hdr->e_phoff); + + // process them all + int loaded = 0; + for( uint_t i = 0; i < nhdrs; ++i, ++curr ) { + +#if TRACING_ELF + dump_phdr( curr, i ); +#endif + if( curr->p_type != PT_LOAD ) { + // not loadable --> we'll skip it + continue; + } + + if( loaded >= N_LOADABLE ) { +#if TRACING_USER + cio_puts( " LIMIT\n" ); +#endif + return E_LOAD_LIMIT; + } + + // set a pointer to the bytes within the object file + char *data = (char *) (((uint32_t)hdr) + curr->p_offset); +#if TRACING_USER + cio_printf( " data @ %08x", (uint32_t) data ); +#endif + + // copy the pages into memory + int stat = vm_add( pcb->pdir, curr->p_flags & PF_W, false, + (char *) curr->p_va, curr->p_memsz, data, curr->p_filesz ); + if( stat != SUCCESS ) { + // TODO what else should we do here? check for memory leak? + return stat; + } + + // set the section table entry in the PCB + pcb->sects[loaded].length = curr->p_memsz; + pcb->sects[loaded].addr = curr->p_va; +#if TRACING_USER + cio_printf( " loaded %u @ %08x\n", + pcb->sects[loaded].length, pcb->sects[loaded].addr ); +#endif + ++loaded; + } + + return SUCCESS; +} + +/** +** Name: stack_setup +** +** Set up the stack for a new process +** +** @param pcb Pointer to the PCB for the process +** @param entry Entry point for the new process +** @param args Argument vector to be put in place +** +** @return A pointer to the context_t on the stack, or NULL +*/ +static context_t *stack_setup( pcb_t *pcb, uint32_t entry, const char **args ) { + + /* + ** First, we need to count the space we'll need for the argument + ** vector and strings. + */ + + int argbytes = 0; + int argc = 0; + + while( args[argc] != NULL ) { + int n = strlen( args[argc] ) + 1; + // can't go over one page in size + if( (argbytes + n) > SZ_PAGE ) { + // oops - ignore this and any others + break; + } + argbytes += n; + ++argc; + } + + // Round up the byte count to the next multiple of four. + argbytes = (argbytes + 3) & MOD4_MASK; + + /* + ** Allocate the arrays. We are safe using dynamic arrays here + ** because we're using the OS stack, not the user stack. + ** + ** We want the argstrings and argv arrays to contain all zeroes. + ** The C standard states, in section 6.7.8, that + ** + ** "21 If there are fewer initializers in a brace-enclosed list + ** than there are elements or members of an aggregate, or + ** fewer characters in a string literal used to initialize an + ** array of known size than there are elements in the array, + ** the remainder of the aggregate shall be initialized + ** implicitly the same as objects that have static storage + ** duration." + ** + ** Sadly, because we're using variable-sized arrays, we can't + ** rely on this, so we have to call memclr() instead. :-( In + ** truth, it doesn't really cost us much more time, but it's an + ** annoyance. + */ + + char argstrings[ argbytes ]; + char *argv[ argc + 1 ]; + + CLEAR( argstrings ); + CLEAR( argv ); + + // Next, duplicate the argument strings, and create pointers to + // each one in our argv. + char *tmp = argstrings; + for( int i = 0; i < argc; ++i ) { + int nb = strlen(args[i]) + 1; // bytes (incl. NUL) in this string + strcpy( tmp, args[i] ); // add to our buffer + argv[i] = tmp; // remember where it was + tmp += nb; // move on + } + + // trailing NULL pointer + argv[argc] = NULL; + + /* + ** The pages for the stack were cleared when they were allocated, + ** so we don't need to remember to do that. + ** + ** We reserve one longword at the bottom of the stack to hold a + ** pointer to where argv is on the stack. + ** + ** The user code was linked with a startup function that defines + ** the entry point (_start), calls main(), and then calls exit() + ** if main() returns. We need to set up the stack this way: + ** + ** esp -> context <- context save area + ** ... <- context save area + ** context <- context save area + ** entry_pt <- return address for the ISR + ** argc <- argument count for main() + ** /-> argv <- argv pointer for main() + ** | ... <- argv array w/trailing NULL + ** | ... <- argv character strings + ** \--- ptr <- last word in stack + ** + ** Stack alignment rules for the SysV ABI i386 supplement dictate that + ** the 'argc' parameter must be at an address that is a multiple of 16; + ** see below for more information. + */ + + // Pointer to the last word in stack. We get this from the + // VM hierarchy. Get the PDE entry for the user address space. + pde_t stack_pde = pcb->pdir[USER_PDE]; + + // The PDE entry points to the PT, which is an array of PTE. The last + // two entries are for the stack; pull out the last one. + pte_t stack_pte = ((pte_t *)(stack_pde & MOD4K_MASK))[USER_STK_PTE2]; + + // OK, now we have the PTE. The frame address of the last page is + // in this PTE. Find the address immediately after that. + uint32_t *ptr = (uint32_t *) + ((uint32_t)(stack_pte & MOD4K_MASK) + SZ_PAGE); + + // Pointer to where the arg strings should be filled in. + char *strings = (char *) ( (uint32_t) ptr - argbytes ); + + // back the pointer up to the nearest word boundary; because we're + // moving toward location 0, the nearest word boundary is just the + // next smaller address whose low-order two bits are zeroes + strings = (char *) ((uint32_t) strings & MOD4_MASK); + + // Copy over the argv strings. + memcpy( (void *)strings, argstrings, argbytes ); + + /* + ** Next, we need to copy over the argv pointers. Start by + ** determining where 'argc' should go. + ** + ** Stack alignment is controlled by the SysV ABI i386 supplement, + ** version 1.2 (June 23, 2016), which states in section 2.2.2: + ** + ** "The end of the input argument area shall be aligned on a 16 + ** (32 or 64, if __m256 or __m512 is passed on stack) byte boundary. + ** In other words, the value (%esp + 4) is always a multiple of 16 + ** (32 or 64) when control is transferred to the function entry + ** point. The stack pointer, %esp, always points to the end of the + ** latest allocated stack frame." + ** + ** Isn't technical documentation fun? Ultimately, this means that + ** the first parameter to main() should be on the stack at an address + ** that is a multiple of 16. + ** + ** The space needed for argc, argv, and the argv array itself is + ** argc + 3 words (argc+1 for the argv entries, plus one word each + ** for argc and argv). We back up that much from 'strings'. + */ + + int nwords = argc + 3; + uint32_t *acptr = ((uint32_t *) strings) - nwords; + + /* + ** Next, back up until we're at a multiple-of-16 address. Because we + ** are moving to a lower address, its upper 28 bits are identical to + ** the address we currently have, so we can do this with a bitwise + ** AND to just turn off the lower four bits. + */ + + acptr = (uint32_t *) ( ((uint32_t)acptr) & MOD16_MASK ); + + // copy in 'argc' + *acptr = argc; + + // next, 'argv', which follows 'argc'; 'argv' points to the + // word that follows it in the stack + uint32_t *avptr = acptr + 2; + *(acptr+1) = (uint32_t) avptr; + + /* + ** Next, we copy in all argc+1 pointers. + */ + + // Adjust and copy the string pointers. + for( int i = 0; i <= argc; ++i ) { + if( argv[i] != NULL ) { + // an actual pointer - adjust it and copy it in + *avptr = (uint32_t) strings; + // skip to the next entry in the array + strings += strlen(argv[i]) + 1; + } else { + // end of the line! + *avptr = NULL; + } + ++avptr; + } + + /* + ** Now, we need to set up the initial context for the executing + ** process. + ** + ** When this process is dispatched, the context restore code will + ** pop all the saved context information off the stack, including + ** the saved EIP, CS, and EFLAGS. We set those fields up so that + ** the interrupt "returns" to the entry point of the process. + */ + + // Locate the context save area on the stack. + context_t *ctx = ((context_t *) avptr) - 1; + + /* + ** We cleared the entire stack earlier, so all the context + ** fields currently contain zeroes. We now need to fill in + ** all the important fields. + */ + + ctx->eflags = DEFAULT_EFLAGS; // IE enabled, PPL 0 + ctx->eip = entry; // initial EIP + ctx->cs = GDT_CODE; // segment registers + ctx->ss = GDT_STACK; + ctx->ds = ctx->es = ctx->fs = ctx->gs = GDT_DATA; + + /* + ** Return the new context pointer to the caller. It will be our + ** caller's responsibility to schedule this process. + */ + + return( ctx ); +} + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** Name: user_init +** +** Initializes the user support module. +*/ +void user_init( void ) { + +#if TRACING_INIT + cio_puts( " User" ); +#endif + + // This is gross, but we need to get this information somehow. + // Access the "user blob" data in the second bootstrap sector + uint16_t *blobdata = (uint16_t *) USER_BLOB_DATA; + user_offset = *blobdata++; + user_segment = *blobdata++; + user_sectors = *blobdata++; + +#if TRACING_USER + cio_printf( "\nUser blob: %u sectors @ %04x:%04x", user_sectors, + user_segment, user_offset ); +#endif + + // calculate the location of the user blob + if( user_sectors > 0 ) { + + // calculate the address of the header + user_header = (header_t *) + ( KERN_BASE + + ( (((uint_t)user_segment) << 4) + ((uint_t)user_offset) ) + ); + + // the program table immediate follows the blob header + prog_table = (prog_t *) (user_header + 1); + +#if TRACING_USER + cio_printf( ", hdr %08x, %u progs, tbl %08x\n", (uint32_t) user_header, + user_header->num, (uint32_t) prog_table ); +#endif + + } else { + // too bad, so sad! + user_header = NULL; + prog_table = NULL; +#if TRACING_USER + cio_putchar( '\n' ); +#endif + } +} + +/** +** Name: user_locate +** +** Locates a user program in the user code archive. +** +** @param what The ID of the user program to find +** +** @return pointer to the program table entry in the code archive, or NULL +*/ +prog_t *user_locate( uint_t what ) { + + // no programs if there is no blob! + if( user_header == NULL ) { + return NULL; + } + + // make sure this is a reasonable program to request + if( what >= user_header->num ) { + // no such program! + return NULL; + } + + // find the entry in the program table + prog_t *prog = &prog_table[what]; + + // if there are no bytes, it's useless + if( prog->size < 1 ) { + return NULL; + } + + // return the program table pointer + return prog; +} + +/** +** Name: user_duplicate +** +** Duplicates the memory setup for an existing process. +** +** @param new The PCB for the new copy of the program +** @param old The PCB for the existing the program +** +** @return the status of the duplicate attempt +*/ +int user_duplicate( pcb_t *new, pcb_t *old ) { + + // We need to do a recursive duplication of the process address + // space of the current process. First, we create a new user + // page directory. Next, we'll duplicate the USER_PDE page + // table. Finally, we'll go through that table and duplicate + // all the frames. + + // create the initial VM hierarchy + pde_t *pdir = vm_mkuvm(); + if( pdir == NULL ) { + return E_NO_MEMORY; + } + new->pdir = pdir; + + // next, add a USER_PDE page table that's a duplicate of the + // current process' page table + if( !vm_uvmdup(old->pdir,new->pdir) ) { + // check for memory leak? + return E_NO_MEMORY; + } + + // now, iterate through the entries, replacing the frame + // numbers with duplicate frames + // + // NOTE: we only deal with pdir[0] here, as we are limiting + // the user address space to the first 4MB + pte_t *pt = (pte_t *) (pdir[USER_PDE]); + + for( int i = 0; i < N_PTE; ++i ) { + + // if this entry is present, + if( IS_PRESENT(*pt) ) { + + // duplicate the page + void *tmp = vm_pagedup( (void *) (*pt & FRAME_MASK) ); + // replace the old frame number with the new one + *pt = (pte_t) (((uint32_t)tmp) | (*pt & PERM_MASK)); + + } else { + + *pt = 0; + + } + ++pt; + } + + return SUCCESS; +} + +/** +** Name: user_load +** +** Loads a user program from the user code archive into memory. +** Allocates all needed frames and sets up the VM tables. +** +** @param ptab A pointer to the program table entry to be loaded +** @param pcb The PCB for the program being loaded +** @param args The argument vector for the program +** +** @return the status of the load attempt +*/ +int user_load( prog_t *ptab, pcb_t *pcb, const char **args ) { + + // NULL pointers are bad! + assert1( ptab != NULL ); + assert1( pcb != NULL ); + assert1( args != NULL ); + + // locate the ELF binary + elfhdr_t *hdr = (elfhdr_t *) ((uint32_t)user_header + ptab->offset); + +#if TRACING_ELF + cio_printf( "Load: ptab %08x: '%s', off %08x, size %08x, flags %08x\n", + (uint32_t) ptab, ptab->name, ptab->offset, ptab->size, + ptab->flags ); + cio_printf( " args %08x:", (uint32_t) args ); + for( int i = 0; args[i] != NULL; ++i ) { + cio_printf( " [%d] %s", i, args[i] ); + } + cio_printf( "\n pcb %08x (pid %u)\n", (uint32_t) pcb, pcb->pid ); + dump_fhdr( hdr ); +#endif + + // verify the ELF header + if( hdr->e_ident.f.magic != ELF_MAGIC ) { + return E_BAD_PARAM; + } + + // allocate a page directory + pcb->pdir = vm_mkuvm(); + if( pcb->pdir == NULL ) { + return E_NO_MEMORY; + } + + // read all the program headers + int stat = read_phdrs( hdr, pcb ); + if( stat != SUCCESS ) { + // TODO figure out a better way to deal with this + PANIC( 0, "user_load: phdr read failed" ); + } + + // next, set up the runtime stack - just like setting up loadable + // sections, except nothing to copy + stat = vm_add( pcb->pdir, true, false, (void *) USER_STACK, + SZ_USTACK, NULL, 0 ); + if( stat != SUCCESS ) { + // TODO yadda yadda... + PANIC( 0, "user_load: vm_add failed" ); + } + + // set up the command-line arguments + pcb->context = stack_setup( pcb, hdr->e_entry, args ); + + return SUCCESS; +} + +/** +** Name: user_cleanup +** +** "Unloads" a user program. Deallocates all memory frames and +** cleans up the VM structures. +** +** @param pcb The PCB of the program to be unloaded +*/ +void user_cleanup( pcb_t *pcb ) { + + if( pcb == NULL ) { + // should this be an error? + return; + } + + vm_free( pcb->pdir ); + pcb->pdir = NULL; +} diff --git a/kernel/vm.c b/kernel/vm.c new file mode 100644 index 0000000..46c4eab --- /dev/null +++ b/kernel/vm.c @@ -0,0 +1,585 @@ +/** +** @file vm.c +** +** @author CSCI-452 class of 20245 +** +** @brief Kernel VM support +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <vm.h> +#include <vmtables.h> + +#include <kmem.h> +#include <procs.h> +#include <x86/arch.h> +#include <x86/ops.h> + +/* +** PRIVATE DEFINITIONS +*/ + +/* +** PRIVATE DATA TYPES +*/ + +/* +** PRIVATE GLOBAL VARIABLES +*/ + +/* +** PUBLIC GLOBAL VARIABLES +*/ + +// created page directory for the kernel +pde_t *kpdir; + +/* +** PRIVATE FUNCTIONS +*/ + +/** +** Name: vm_isr +** +** Description: Page fault handler +** +** @param vector Interrupt vector number +** @param code Error code pushed onto the stack +*/ +static void vm_isr( int vector, int code ) { + + // get whatever information we can from the fault + pfec_t fault; + fault.u = (uint32_t) code; + uint32_t addr = r_cr2(); + + // report what we found + sprint( b256, + "** page fault @ 0x%08x %cP %c %cM %cRSV %c %cPK %cSS %cHLAT %cSGZ", + addr, + fault.s.p ? ' ' : '!', + fault.s.w ? 'W' : 'R', + fault.s.us ? 'U' : 'S', + fault.s.rsvd ? ' ' : '!', + fault.s.id ? 'I' : 'D', + fault.s.pk ? ' ' : '!', + fault.s.ss ? ' ' : '!', + fault.s.hlat ? ' ' : '!', + fault.s.sgz ? ' ' : '!' + ); + + // and give up + PANIC( 0, b256 ); +} + +/** +** Name: uva2kva +** +** Convert a user VA into a kernel address +*/ +__attribute__((__unused__)) +static void *uva2kva( pde_t *pdir, void *va ) { + + // find the PMT entry for this address + pte_t *pte = vm_getpte( pdir, va, false ); + if( pte == NULL ) { + return NULL; + } + + // is this a valid address for the user? + if( IS_PRESENT(*pte) ) { + return 0; + } + + if( IS_LARGE(*pte) ) { + return 0; + } + + // get the physical address + uint32_t frame = *pte & FRAME_MASK; // keep the frame address + frame |= ((uint32_t) va) & PERM_MASK; // OR in the lower 12 bits + + return (void *) frame; +} + + +/* +** PUBLIC FUNCTIONS +*/ + +/** +** Name: vm_init +** +** Description: Initialize the VM module +*/ +void vm_init( void ) { + +#if TRACING_INIT + cio_puts( " VM" ); +#endif + + // set up the kernel's page directory + kpdir = vm_mkkvm(); + assert( kpdir != NULL ); + + // install the page fault handler + install_isr( VEC_PAGE_FAULT, vm_isr ); +} + +/** +** Name: vm_pagedup +** +** Duplicate a page of memory +** +** @param old Pointer to the first byte of a page +** +** @return a pointer to the new, duplicate page, or NULL +*/ +void *vm_pagedup( void *old ) { + void *new = (void *) km_page_alloc(); + if( new != NULL ) { + memcpy( new, old, SZ_PAGE ); + } + return new; +} + +/** +** Name: vm_ptdup +** +** Duplicate a page directory entry +** +** @param dst Pointer to where the duplicate should go +** @param curr Pointer to the entry to be duplicated +** +** @return true on success, else false +*/ +bool_t vm_ptdup( pde_t *dst, pde_t *curr ) { + +#if TRACING_VM + cio_printf( "vm_ptdup dst %08x curr %08x\n", + (uint32_t) dst, (uint32_t) curr ); +#endif + // simplest case + if( *curr == 0 ) { + *dst = 0; + return true; + } + + // OK, we have an entry; allocate a page table + pte_t *pt = (pte_t *) km_page_alloc(); + if( pt == NULL ) { + return false; + } + + // pointer to the first PTE in the current table + pte_t *old = (pte_t *) (((uint32_t) *curr) & FRAME_MASK); + // pointer to the first PTE in the new table + pte_t *new = pt; + + for( int i = 0 ; i < N_PTE; ++i ) { + if( IS_PRESENT(*old) ) { + *new = 0; + } else { + *new = *old; + } + ++old; + ++new; + } + + // assign the page table into the new page directory + // upper 22 bits from 'pt', lower 12 from '*curr' + *dst = (pde_t) ( + (((uint32_t)pt) & FRAME_MASK) | + (((uint32_t)(*curr)) & PERM_MASK ) + ); + + return true; +} + +/** +** Name: vm_getpte +** +** Return the address of the PTE corresponding to the virtual address +** 'va' within the address space controlled by 'pgdir'. If there is no +** page table for that VA and 'alloc' is true, create the necessary +** page table entries. +** +** @param pdir Pointer to the page directory to be searched +** @param va The virtual address we're looking for +** @param alloc Should we allocate a page table if there isn't one? +** +** @return A pointer to the page table entry for this VA, or NULL +*/ +pte_t *vm_getpte( pde_t *pdir, const void *va, bool_t alloc ) { + pte_t *ptab; + + // sanity check + assert1( pdir != NULL ); + + // get the PDIR entry for this virtual address + pde_t *pde = &pdir[ PDIX(va) ]; + + // is it already set up? + if( IS_PRESENT(*pde) ) { + + // yes! + ptab = (pte_t*)P2V(PTE_ADDR(*pde)); + + } else { + + // no - should we create it? + if( !alloc ) { + // nope, so just return + return NULL; + } + + // yes - try to allocate a page table + ptab = (pte_t *) km_page_alloc(); + if( ptab == NULL ) { + WARNING( "can't allocate page table" ); + return NULL; + } + + // who knows what was left in this page.... + memclr( ptab, SZ_PAGE ); + + // add this to the page directory + // + // we set this up to allow general access; this could be + // controlled by setting access control in the page table + // entries, if necessary. + *pde = V2P(ptab) | PDE_P | PDE_RW; + } + + // finally, return a pointer to the entry in the + // page table for this VA + return &ptab[ PTIX(va) ]; +} + +// Set up kernel part of a page table. +pde_t *vm_mkkvm( void ) +{ + mapping_t *k; + + // allocate the page directory + pde_t *pdir = km_page_alloc(); + if( pdir == NULL ) { + return NULL; + } + + // clear it out to disable all the entries + memclr( pdir, SZ_PAGE ); + + // map in all the page ranges + k = kmap; + for( int i = 0; i < n_kmap; ++i, ++k ) { + int stat = vm_map( pdir, ((void *)k->va_start), + k->pa_end - k->pa_start, + k->pa_start, k->perm ); + if( stat != SUCCESS ) { + vm_free( pdir ); + return 0; + } + } + + return pdir; +} + +/* +** Creates an initial user VM table hierarchy by copying the +** system entries into a new page directory. +** +** @return a pointer to the new page directory, or NULL +*/ +pde_t *vm_mkuvm( void ) { + + // allocate the directory + pde_t *new = (pde_t *) km_page_alloc(); + if( new == NULL ) { + return NULL; + } + + // iterate through the kernel page directory + pde_t *curr = kpdir; + pde_t *dst = new; + for( int i = 0; i < N_PDE; ++i ) { + + if( *curr != 0 ) { + // found an active one - duplicate it + if( !vm_ptdup(dst,curr) ) { + return NULL; + } + } + + ++curr; + ++dst; + } + + return new; + +} + +/** +** Name: vm_set_kvm +** +** Switch the page table register to the kernel's page directory. +*/ +void vm_set_kvm( void ) { + w_cr3( V2P(kpdir) ); // switch to the kernel page table +} + +/** +** Name: vm_set_uvm +** +** Switch the page table register to the page directory for a user process. +** +** @param p PCB of the process we're switching to +*/ +void vm_set_uvm( pcb_t *p ) { + assert( p != NULL ); + assert( p->pdir != NULL ); + + w_cr3( V2P(p->pdir) ); // switch to process's address space +} + +/** +** Name: vm_add +** +** Add pages to the page hierarchy for a process, copying data into +** them if necessary. +** +** @param pdir Pointer to the page directory to modify +** @param wr "Writable" flag for the PTE +** @param sys "System" flag for the PTE +** @param va Starting VA of the range +** @param size Amount of physical memory to allocate (bytes) +** @param data Pointer to data to copy, or NULL +** @param bytes Number of bytes to copy +** +** @return status of the allocation attempt +*/ +int vm_add( pde_t *pdir, bool_t wr, bool_t sys, + void *va, uint32_t size, char *data, uint32_t bytes ) { + + // how many pages do we need? + uint_t npages = ((size & MOD4K_BITS) ? PGUP(size) : size) >> MOD4K_SHIFT; + + // permission set for the PTEs + uint_t entrybase = PTE_P; + if( wr ) { + entrybase |= PTE_RW; + } + if( sys ) { + entrybase |= PTE_US; + } + +#if TRACING_VM + cio_printf( "vm_add: pdir %08x, %s, va %08x (%u, %u pgs)\n", + (uint32_t) pdir, wr ? "W" : "!W", (uint32_t) va, size ); + cio_printf( " from %08x, %u bytes, perms %08x\n", + (uint32_t) data, bytes, entrybase ); +#endif + + // iterate through the pages + + for( int i = 0; i < npages; ++i ) { + + // figure out where this page will go in the hierarchy + pte_t *pte = vm_getpte( pdir, va, true ); + if( pte == NULL ) { + // TODO if i > 0, this isn't the first frame - is + // there anything to do about other frames? + // POSSIBLE MEMORY LEAK? + return E_NO_MEMORY; + } + + // allocate the frame + void *page = km_page_alloc(); + if( page == NULL ) { + // TODO same question here + return E_NO_MEMORY; + } + + // clear it all out + memclr( page, SZ_PAGE ); + + // create the PTE for this frame + uint32_t entry = (uint32_t) (PTE_ADDR(page) | entrybase); + *pte = entry; + + // copy data if we need to + if( data != NULL && bytes > 0 ) { + // how much to copy + uint_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes; + // do it! + memcpy( (void *)page, (void *)data, num ); + // adjust all the pointers + data += num; // where to continue + bytes -= num; // what's left to copy + } + + // bump the virtual address + va += SZ_PAGE; + } + + return SUCCESS; + +} + +/** +** Name: vm_free +** +** Deallocate a page table hierarchy and all physical memory frames +** in the user portion. +** +** @param pdir Pointer to the page directory +*/ +void vm_free( pde_t *pdir ) { + + // do we have anything to do? + if( pdir == NULL ) { + return; + } + + // iterate through the page directory entries, freeing the + // PMTS and the frames they point to + pde_t *curr = pdir; + for( int i = 0; i < N_PDE; ++i ) { + + // does this entry point to anything useful? + if( IS_PRESENT(*curr) ) { + + // yes - get the PMT pointer + pte_t *pte = (pte_t *) PTE_ADDR(*curr); + + // walk the PMT + for( int j = 0; j < N_PTE; ++j ) { + // does this entry point to a frame? + if( IS_PRESENT(*pte) ) { + // yes - free the frame + km_page_free( (void *) PTE_ADDR(*pte) ); + // mark it so we don't get surprised + *pte = 0; + } + // move on + ++pte; + } + // now, free the PMT itself + km_page_free( (void *) PDE_ADDR(*curr) ); + *curr = 0; + } + + // move to the next entry + ++curr; + } + + // finally, free the PDIR itself + km_page_free( (void *) pdir ); +} + +/* +** Name: vm_map +** +** Create PTEs for virtual addresses starting at 'va' that refer to +** physical addresses in the range [pa, pa+size-1]. We aren't guaranteed +** that va is page-aligned. +** +** @param pdir Page directory for this address space +** @param va The starting virtual address +** @param size Length of the range to be mapped +** @param pa The starting physical address +** @param perm Permission bits for the PTEs +*/ +int vm_map( pde_t *pdir, void *va, uint_t size, uint_t pa, int perm ) { + pte_t *pte; + + // round the VA down to its page boundary + char *addr = (char*)PGDOWN((uint_t)va); + + // round the end of the range down to its page boundary + char *last = (char*)PGDOWN(((uint_t)va) + size - 1); + + for(;;) { + + // get a pointer to the PTE for the current VA + if( (pte = vm_getpte(pdir, addr, 1)) == 0 ) { + // couldn't find it + return E_NO_PTE; + } + + // if this entry has already been mapped, we're in trouble + if( IS_PRESENT(*pte) ) { + PANIC( 0, "mapping an already-mapped address" ); + } + + // ok, set the PTE as requested + *pte = pa | perm | PTE_P; + + // are we done? + if( addr == last ) { + break; + } + + // nope - move to the next page + addr += SZ_PAGE; + pa += SZ_PAGE; + } + return 0; +} + +/** +** Name: vm_uvmdup +** +** Create a duplicate of the user portio of an existing page table +** hierarchy. We assume that the "new" page directory exists and +** the system portions of it should not be touched. +** +** Note: we do not duplicate the frames in the hierarchy - we just +** create a duplicate of the hierarchy itself. This means that we +** now have two sets of page tables that refer to the same user-level +** frames in memory. +** +** @param old Existing page directory +** @param new New page directory +** +** @return status of the duplication attempt +*/ +int vm_uvmdup( pde_t *old, pde_t *new ) { + + if( old == NULL || new == NULL ) { + return E_BAD_PARAM; + } + + // we only want to deal with the "user" half of the address space + for( int i = 0; i < (N_PDE >> 1); ++i ) { + + // is this entry in use? + if( IS_PRESENT(*old) ) { + + // yes. if it points to a 4MB page, we just copy it; + // otherwise, we must duplicate the next level PMT + + *new = *old; // copy the entry + + if( !IS_LARGE(*old) ) { + + // it's a 4KB page, so we need to duplicate the PMT + pte_t *newpmt = (pte_t *) vm_pagedup( (void *) (*old & FRAME_MASK) ); + if( newpmt == NULL ) { + return E_NO_MEMORY; + } + + // create the new PDE entry by replacing the frame # + *new = (pde_t) (((uint32_t)newpmt) | PERMS(*old)); + } + } + + ++old; + ++new; + } + + return SUCCESS; +} diff --git a/kernel/vmtables.c b/kernel/vmtables.c new file mode 100644 index 0000000..306b1f6 --- /dev/null +++ b/kernel/vmtables.c @@ -0,0 +1,270 @@ +/** +** @file vmtables.c +** +** @author CSCI-452 class of 20245 +** +** @brief Kernel VM tables +** +** Compilation options: +** +** MAKE_IDENTITY_MAP Creates a page table that identity-maps the first +** 4MB of main memory. +*/ + +#define KERNEL_SRC + +#include <common.h> + +#include <kmem.h> +#include <procs.h> +#include <vm.h> +#include <x86/arch.h> + +// defined for us by the linker +extern char _data[]; + +/* +** Initial page directory, for when the kernel is starting up +** +** we use large (4MB) pages here to allow us to use a one-level +** paging hierarchy; the kernel will create a new page table +** hierarchy once memory is initialized +** +** We only map the first 2GB of memory, plus a 4MB portion of +** the upper half, which we map to cover the first 4MB of +** memory. +*/ + +// identity-map 4MB page #n +#define L(n) [n] = (pde_t) ( (TO_4MFRAME((n))) | (PDE_P|PDE_RW|PDE_PS) ) + +ALIGN(SZ_PAGE) +pde_t firstpdir[N_PDE] = { + + // Map VA range [0, 2GB] to PA range [0, 2GB] +L(0x000), L(0x001), L(0x002), L(0x003), L(0x004), L(0x005), L(0x006), L(0x007), +L(0x008), L(0x009), L(0x00a), L(0x00b), L(0x00c), L(0x00d), L(0x00e), L(0x00f), +L(0x010), L(0x011), L(0x012), L(0x013), L(0x014), L(0x015), L(0x016), L(0x017), +L(0x018), L(0x019), L(0x01a), L(0x01b), L(0x01c), L(0x01d), L(0x01e), L(0x01f), +L(0x020), L(0x021), L(0x022), L(0x023), L(0x024), L(0x025), L(0x026), L(0x027), +L(0x028), L(0x029), L(0x02a), L(0x02b), L(0x02c), L(0x02d), L(0x02e), L(0x02f), +L(0x030), L(0x031), L(0x032), L(0x033), L(0x034), L(0x035), L(0x036), L(0x037), +L(0x038), L(0x039), L(0x03a), L(0x03b), L(0x03c), L(0x03d), L(0x03e), L(0x03f), +L(0x040), L(0x041), L(0x042), L(0x043), L(0x044), L(0x045), L(0x046), L(0x047), +L(0x048), L(0x049), L(0x04a), L(0x04b), L(0x04c), L(0x04d), L(0x04e), L(0x04f), +L(0x050), L(0x051), L(0x052), L(0x053), L(0x054), L(0x055), L(0x056), L(0x057), +L(0x058), L(0x059), L(0x05a), L(0x05b), L(0x05c), L(0x05d), L(0x05e), L(0x05f), +L(0x060), L(0x061), L(0x062), L(0x063), L(0x064), L(0x065), L(0x066), L(0x067), +L(0x068), L(0x069), L(0x06a), L(0x06b), L(0x06c), L(0x06d), L(0x06e), L(0x06f), +L(0x070), L(0x071), L(0x072), L(0x073), L(0x074), L(0x075), L(0x076), L(0x077), +L(0x078), L(0x079), L(0x07a), L(0x07b), L(0x07c), L(0x07d), L(0x07e), L(0x07f), +L(0x080), L(0x081), L(0x082), L(0x083), L(0x084), L(0x085), L(0x086), L(0x087), +L(0x088), L(0x089), L(0x08a), L(0x08b), L(0x08c), L(0x08d), L(0x08e), L(0x08f), +L(0x090), L(0x091), L(0x092), L(0x093), L(0x094), L(0x095), L(0x096), L(0x097), +L(0x098), L(0x099), L(0x09a), L(0x09b), L(0x09c), L(0x09d), L(0x09e), L(0x09f), +L(0x0a0), L(0x0a1), L(0x0a2), L(0x0a3), L(0x0a4), L(0x0a5), L(0x0a6), L(0x0a7), +L(0x0a8), L(0x0a9), L(0x0aa), L(0x0ab), L(0x0ac), L(0x0ad), L(0x0ae), L(0x0af), +L(0x0b0), L(0x0b1), L(0x0b2), L(0x0b3), L(0x0b4), L(0x0b5), L(0x0b6), L(0x0b7), +L(0x0b8), L(0x0b9), L(0x0ba), L(0x0bb), L(0x0bc), L(0x0bd), L(0x0be), L(0x0bf), +L(0x0c0), L(0x0c1), L(0x0c2), L(0x0c3), L(0x0c4), L(0x0c5), L(0x0c6), L(0x0c7), +L(0x0c8), L(0x0c9), L(0x0ca), L(0x0cb), L(0x0cc), L(0x0cd), L(0x0ce), L(0x0cf), +L(0x0d0), L(0x0d1), L(0x0d2), L(0x0d3), L(0x0d4), L(0x0d5), L(0x0d6), L(0x0d7), +L(0x0d8), L(0x0d9), L(0x0da), L(0x0db), L(0x0dc), L(0x0dd), L(0x0de), L(0x0df), +L(0x0e0), L(0x0e1), L(0x0e2), L(0x0e3), L(0x0e4), L(0x0e5), L(0x0e6), L(0x0e7), +L(0x0e8), L(0x0e9), L(0x0ea), L(0x0eb), L(0x0ec), L(0x0ed), L(0x0ee), L(0x0ef), +L(0x0f0), L(0x0f1), L(0x0f2), L(0x0f3), L(0x0f4), L(0x0f5), L(0x0f6), L(0x0f7), +L(0x0f8), L(0x0f9), L(0x0fa), L(0x0fb), L(0x0fc), L(0x0fd), L(0x0fe), L(0x0ff), +L(0x100), L(0x101), L(0x102), L(0x103), L(0x104), L(0x105), L(0x106), L(0x107), +L(0x108), L(0x109), L(0x10a), L(0x10b), L(0x10c), L(0x10d), L(0x10e), L(0x10f), +L(0x110), L(0x111), L(0x112), L(0x113), L(0x114), L(0x115), L(0x116), L(0x117), +L(0x118), L(0x119), L(0x11a), L(0x11b), L(0x11c), L(0x11d), L(0x11e), L(0x11f), +L(0x120), L(0x121), L(0x122), L(0x123), L(0x124), L(0x125), L(0x126), L(0x127), +L(0x128), L(0x129), L(0x12a), L(0x12b), L(0x12c), L(0x12d), L(0x12e), L(0x12f), +L(0x130), L(0x131), L(0x132), L(0x133), L(0x134), L(0x135), L(0x136), L(0x137), +L(0x138), L(0x139), L(0x13a), L(0x13b), L(0x13c), L(0x13d), L(0x13e), L(0x13f), +L(0x140), L(0x141), L(0x142), L(0x143), L(0x144), L(0x145), L(0x146), L(0x147), +L(0x148), L(0x149), L(0x14a), L(0x14b), L(0x14c), L(0x14d), L(0x14e), L(0x14f), +L(0x150), L(0x151), L(0x152), L(0x153), L(0x154), L(0x155), L(0x156), L(0x157), +L(0x158), L(0x159), L(0x15a), L(0x15b), L(0x15c), L(0x15d), L(0x15e), L(0x15f), +L(0x160), L(0x161), L(0x162), L(0x163), L(0x164), L(0x165), L(0x166), L(0x167), +L(0x168), L(0x169), L(0x16a), L(0x16b), L(0x16c), L(0x16d), L(0x16e), L(0x16f), +L(0x170), L(0x171), L(0x172), L(0x173), L(0x174), L(0x175), L(0x176), L(0x177), +L(0x178), L(0x179), L(0x17a), L(0x17b), L(0x17c), L(0x17d), L(0x17e), L(0x17f), +L(0x180), L(0x181), L(0x182), L(0x183), L(0x184), L(0x185), L(0x186), L(0x187), +L(0x188), L(0x189), L(0x18a), L(0x18b), L(0x18c), L(0x18d), L(0x18e), L(0x18f), +L(0x190), L(0x191), L(0x192), L(0x193), L(0x194), L(0x195), L(0x196), L(0x197), +L(0x198), L(0x199), L(0x19a), L(0x19b), L(0x19c), L(0x19d), L(0x19e), L(0x19f), +L(0x1a0), L(0x1a1), L(0x1a2), L(0x1a3), L(0x1a4), L(0x1a5), L(0x1a6), L(0x1a7), +L(0x1a8), L(0x1a9), L(0x1aa), L(0x1ab), L(0x1ac), L(0x1ad), L(0x1ae), L(0x1af), +L(0x1b0), L(0x1b1), L(0x1b2), L(0x1b3), L(0x1b4), L(0x1b5), L(0x1b6), L(0x1b7), +L(0x1b8), L(0x1b9), L(0x1ba), L(0x1bb), L(0x1bc), L(0x1bd), L(0x1be), L(0x1bf), +L(0x1c0), L(0x1c1), L(0x1c2), L(0x1c3), L(0x1c4), L(0x1c5), L(0x1c6), L(0x1c7), +L(0x1c8), L(0x1c9), L(0x1ca), L(0x1cb), L(0x1cc), L(0x1cd), L(0x1ce), L(0x1cf), +L(0x1d0), L(0x1d1), L(0x1d2), L(0x1d3), L(0x1d4), L(0x1d5), L(0x1d6), L(0x1d7), +L(0x1d8), L(0x1d9), L(0x1da), L(0x1db), L(0x1dc), L(0x1dd), L(0x1de), L(0x1df), +L(0x1e0), L(0x1e1), L(0x1e2), L(0x1e3), L(0x1e4), L(0x1e5), L(0x1e6), L(0x1e7), +L(0x1e8), L(0x1e9), L(0x1ea), L(0x1eb), L(0x1ec), L(0x1ed), L(0x1ee), L(0x1ef), +L(0x1f0), L(0x1f1), L(0x1f2), L(0x1f3), L(0x1f4), L(0x1f5), L(0x1f6), L(0x1f7), +L(0x1f8), L(0x1f9), L(0x1fa), L(0x1fb), L(0x1fc), L(0x1fd), L(0x1fe), L(0x1ff), + + // Map VA range [KERN_BASE, KERN_BASE+4MB] to PA range [0, 4MB] + [PDIX(KERN_BASE)] = (pde_t) (PDE_P | PDE_RW | PDE_PS) +}; + +#ifdef MAKE_IDENTITY_MAP +/* +** "Identity" page map table. +** +** This just maps the first 4MB of physical memory. It is initialized +** in vm_init(). +** +** This could be converted into a 4GB map of 4MB pages by turning on +** the PDE_PS bit in each entry. +*/ + +// identity-map 4KB page #n +#define S(n) [n] = (pte_t) ( (TO_4KFRAME((n))) | (PTE_P|PTE_RW) ) + +pte_t id_map[N_PTE] = { +S(0x000), S(0x001), S(0x002), S(0x003), S(0x004), S(0x005), S(0x006), S(0x007), +S(0x008), S(0x009), S(0x00a), S(0x00b), S(0x00c), S(0x00d), S(0x00e), S(0x00f), +S(0x010), S(0x011), S(0x012), S(0x013), S(0x014), S(0x015), S(0x016), S(0x017), +S(0x018), S(0x019), S(0x01a), S(0x01b), S(0x01c), S(0x01d), S(0x01e), S(0x01f), +S(0x020), S(0x021), S(0x022), S(0x023), S(0x024), S(0x025), S(0x026), S(0x027), +S(0x028), S(0x029), S(0x02a), S(0x02b), S(0x02c), S(0x02d), S(0x02e), S(0x02f), +S(0x030), S(0x031), S(0x032), S(0x033), S(0x034), S(0x035), S(0x036), S(0x037), +S(0x038), S(0x039), S(0x03a), S(0x03b), S(0x03c), S(0x03d), S(0x03e), S(0x03f), +S(0x040), S(0x041), S(0x042), S(0x043), S(0x044), S(0x045), S(0x046), S(0x047), +S(0x048), S(0x049), S(0x04a), S(0x04b), S(0x04c), S(0x04d), S(0x04e), S(0x04f), +S(0x050), S(0x051), S(0x052), S(0x053), S(0x054), S(0x055), S(0x056), S(0x057), +S(0x058), S(0x059), S(0x05a), S(0x05b), S(0x05c), S(0x05d), S(0x05e), S(0x05f), +S(0x060), S(0x061), S(0x062), S(0x063), S(0x064), S(0x065), S(0x066), S(0x067), +S(0x068), S(0x069), S(0x06a), S(0x06b), S(0x06c), S(0x06d), S(0x06e), S(0x06f), +S(0x070), S(0x071), S(0x072), S(0x073), S(0x074), S(0x075), S(0x076), S(0x077), +S(0x078), S(0x079), S(0x07a), S(0x07b), S(0x07c), S(0x07d), S(0x07e), S(0x07f), +S(0x080), S(0x081), S(0x082), S(0x083), S(0x084), S(0x085), S(0x086), S(0x087), +S(0x088), S(0x089), S(0x08a), S(0x08b), S(0x08c), S(0x08d), S(0x08e), S(0x08f), +S(0x090), S(0x091), S(0x092), S(0x093), S(0x094), S(0x095), S(0x096), S(0x097), +S(0x098), S(0x099), S(0x09a), S(0x09b), S(0x09c), S(0x09d), S(0x09e), S(0x09f), +S(0x0a0), S(0x0a1), S(0x0a2), S(0x0a3), S(0x0a4), S(0x0a5), S(0x0a6), S(0x0a7), +S(0x0a8), S(0x0a9), S(0x0aa), S(0x0ab), S(0x0ac), S(0x0ad), S(0x0ae), S(0x0af), +S(0x0b0), S(0x0b1), S(0x0b2), S(0x0b3), S(0x0b4), S(0x0b5), S(0x0b6), S(0x0b7), +S(0x0b8), S(0x0b9), S(0x0ba), S(0x0bb), S(0x0bc), S(0x0bd), S(0x0be), S(0x0bf), +S(0x0c0), S(0x0c1), S(0x0c2), S(0x0c3), S(0x0c4), S(0x0c5), S(0x0c6), S(0x0c7), +S(0x0c8), S(0x0c9), S(0x0ca), S(0x0cb), S(0x0cc), S(0x0cd), S(0x0ce), S(0x0cf), +S(0x0d0), S(0x0d1), S(0x0d2), S(0x0d3), S(0x0d4), S(0x0d5), S(0x0d6), S(0x0d7), +S(0x0d8), S(0x0d9), S(0x0da), S(0x0db), S(0x0dc), S(0x0dd), S(0x0de), S(0x0df), +S(0x0e0), S(0x0e1), S(0x0e2), S(0x0e3), S(0x0e4), S(0x0e5), S(0x0e6), S(0x0e7), +S(0x0e8), S(0x0e9), S(0x0ea), S(0x0eb), S(0x0ec), S(0x0ed), S(0x0ee), S(0x0ef), +S(0x0f0), S(0x0f1), S(0x0f2), S(0x0f3), S(0x0f4), S(0x0f5), S(0x0f6), S(0x0f7), +S(0x0f8), S(0x0f9), S(0x0fa), S(0x0fb), S(0x0fc), S(0x0fd), S(0x0fe), S(0x0ff), +S(0x100), S(0x101), S(0x102), S(0x103), S(0x104), S(0x105), S(0x106), S(0x107), +S(0x108), S(0x109), S(0x10a), S(0x10b), S(0x10c), S(0x10d), S(0x10e), S(0x10f), +S(0x110), S(0x111), S(0x112), S(0x113), S(0x114), S(0x115), S(0x116), S(0x117), +S(0x118), S(0x119), S(0x11a), S(0x11b), S(0x11c), S(0x11d), S(0x11e), S(0x11f), +S(0x120), S(0x121), S(0x122), S(0x123), S(0x124), S(0x125), S(0x126), S(0x127), +S(0x128), S(0x129), S(0x12a), S(0x12b), S(0x12c), S(0x12d), S(0x12e), S(0x12f), +S(0x130), S(0x131), S(0x132), S(0x133), S(0x134), S(0x135), S(0x136), S(0x137), +S(0x138), S(0x139), S(0x13a), S(0x13b), S(0x13c), S(0x13d), S(0x13e), S(0x13f), +S(0x140), S(0x141), S(0x142), S(0x143), S(0x144), S(0x145), S(0x146), S(0x147), +S(0x148), S(0x149), S(0x14a), S(0x14b), S(0x14c), S(0x14d), S(0x14e), S(0x14f), +S(0x150), S(0x151), S(0x152), S(0x153), S(0x154), S(0x155), S(0x156), S(0x157), +S(0x158), S(0x159), S(0x15a), S(0x15b), S(0x15c), S(0x15d), S(0x15e), S(0x15f), +S(0x160), S(0x161), S(0x162), S(0x163), S(0x164), S(0x165), S(0x166), S(0x167), +S(0x168), S(0x169), S(0x16a), S(0x16b), S(0x16c), S(0x16d), S(0x16e), S(0x16f), +S(0x170), S(0x171), S(0x172), S(0x173), S(0x174), S(0x175), S(0x176), S(0x177), +S(0x178), S(0x179), S(0x17a), S(0x17b), S(0x17c), S(0x17d), S(0x17e), S(0x17f), +S(0x180), S(0x181), S(0x182), S(0x183), S(0x184), S(0x185), S(0x186), S(0x187), +S(0x188), S(0x189), S(0x18a), S(0x18b), S(0x18c), S(0x18d), S(0x18e), S(0x18f), +S(0x190), S(0x191), S(0x192), S(0x193), S(0x194), S(0x195), S(0x196), S(0x197), +S(0x198), S(0x199), S(0x19a), S(0x19b), S(0x19c), S(0x19d), S(0x19e), S(0x19f), +S(0x1a0), S(0x1a1), S(0x1a2), S(0x1a3), S(0x1a4), S(0x1a5), S(0x1a6), S(0x1a7), +S(0x1a8), S(0x1a9), S(0x1aa), S(0x1ab), S(0x1ac), S(0x1ad), S(0x1ae), S(0x1af), +S(0x1b0), S(0x1b1), S(0x1b2), S(0x1b3), S(0x1b4), S(0x1b5), S(0x1b6), S(0x1b7), +S(0x1b8), S(0x1b9), S(0x1ba), S(0x1bb), S(0x1bc), S(0x1bd), S(0x1be), S(0x1bf), +S(0x1c0), S(0x1c1), S(0x1c2), S(0x1c3), S(0x1c4), S(0x1c5), S(0x1c6), S(0x1c7), +S(0x1c8), S(0x1c9), S(0x1ca), S(0x1cb), S(0x1cc), S(0x1cd), S(0x1ce), S(0x1cf), +S(0x1d0), S(0x1d1), S(0x1d2), S(0x1d3), S(0x1d4), S(0x1d5), S(0x1d6), S(0x1d7), +S(0x1d8), S(0x1d9), S(0x1da), S(0x1db), S(0x1dc), S(0x1dd), S(0x1de), S(0x1df), +S(0x1e0), S(0x1e1), S(0x1e2), S(0x1e3), S(0x1e4), S(0x1e5), S(0x1e6), S(0x1e7), +S(0x1e8), S(0x1e9), S(0x1ea), S(0x1eb), S(0x1ec), S(0x1ed), S(0x1ee), S(0x1ef), +S(0x1f0), S(0x1f1), S(0x1f2), S(0x1f3), S(0x1f4), S(0x1f5), S(0x1f6), S(0x1f7), +S(0x1f8), S(0x1f9), S(0x1fa), S(0x1fb), S(0x1fc), S(0x1fd), S(0x1fe), S(0x1ff), +S(0x200), S(0x201), S(0x202), S(0x203), S(0x204), S(0x205), S(0x206), S(0x207), +S(0x208), S(0x209), S(0x20a), S(0x20b), S(0x20c), S(0x20d), S(0x20e), S(0x20f), +S(0x210), S(0x211), S(0x212), S(0x213), S(0x214), S(0x215), S(0x216), S(0x217), +S(0x218), S(0x219), S(0x21a), S(0x21b), S(0x21c), S(0x21d), S(0x21e), S(0x21f), +S(0x220), S(0x221), S(0x222), S(0x223), S(0x224), S(0x225), S(0x226), S(0x227), +S(0x228), S(0x229), S(0x22a), S(0x22b), S(0x22c), S(0x22d), S(0x22e), S(0x22f), +S(0x230), S(0x231), S(0x232), S(0x233), S(0x234), S(0x235), S(0x236), S(0x237), +S(0x238), S(0x239), S(0x23a), S(0x23b), S(0x23c), S(0x23d), S(0x23e), S(0x23f), +S(0x240), S(0x241), S(0x242), S(0x243), S(0x244), S(0x245), S(0x246), S(0x247), +S(0x248), S(0x249), S(0x24a), S(0x24b), S(0x24c), S(0x24d), S(0x24e), S(0x24f), +S(0x250), S(0x251), S(0x252), S(0x253), S(0x254), S(0x255), S(0x256), S(0x257), +S(0x258), S(0x259), S(0x25a), S(0x25b), S(0x25c), S(0x25d), S(0x25e), S(0x25f), +S(0x260), S(0x261), S(0x262), S(0x263), S(0x264), S(0x265), S(0x266), S(0x267), +S(0x268), S(0x269), S(0x26a), S(0x26b), S(0x26c), S(0x26d), S(0x26e), S(0x26f), +S(0x270), S(0x271), S(0x272), S(0x273), S(0x274), S(0x275), S(0x276), S(0x277), +S(0x278), S(0x279), S(0x27a), S(0x27b), S(0x27c), S(0x27d), S(0x27e), S(0x27f), +S(0x280), S(0x281), S(0x282), S(0x283), S(0x284), S(0x285), S(0x286), S(0x287), +S(0x288), S(0x289), S(0x28a), S(0x28b), S(0x28c), S(0x28d), S(0x28e), S(0x28f), +S(0x290), S(0x291), S(0x292), S(0x293), S(0x294), S(0x295), S(0x296), S(0x297), +S(0x298), S(0x299), S(0x29a), S(0x29b), S(0x29c), S(0x29d), S(0x29e), S(0x29f), +S(0x2a0), S(0x2a1), S(0x2a2), S(0x2a3), S(0x2a4), S(0x2a5), S(0x2a6), S(0x2a7), +S(0x2a8), S(0x2a9), S(0x2aa), S(0x2ab), S(0x2ac), S(0x2ad), S(0x2ae), S(0x2af), +S(0x2b0), S(0x2b1), S(0x2b2), S(0x2b3), S(0x2b4), S(0x2b5), S(0x2b6), S(0x2b7), +S(0x2b8), S(0x2b9), S(0x2ba), S(0x2bb), S(0x2bc), S(0x2bd), S(0x2be), S(0x2bf), +S(0x2c0), S(0x2c1), S(0x2c2), S(0x2c3), S(0x2c4), S(0x2c5), S(0x2c6), S(0x2c7), +S(0x2c8), S(0x2c9), S(0x2ca), S(0x2cb), S(0x2cc), S(0x2cd), S(0x2ce), S(0x2cf), +S(0x2d0), S(0x2d1), S(0x2d2), S(0x2d3), S(0x2d4), S(0x2d5), S(0x2d6), S(0x2d7), +S(0x2d8), S(0x2d9), S(0x2da), S(0x2db), S(0x2dc), S(0x2dd), S(0x2de), S(0x2df), +S(0x2e0), S(0x2e1), S(0x2e2), S(0x2e3), S(0x2e4), S(0x2e5), S(0x2e6), S(0x2e7), +S(0x2e8), S(0x2e9), S(0x2ea), S(0x2eb), S(0x2ec), S(0x2ed), S(0x2ee), S(0x2ef), +S(0x2f0), S(0x2f1), S(0x2f2), S(0x2f3), S(0x2f4), S(0x2f5), S(0x2f6), S(0x2f7), +S(0x2f8), S(0x2f9), S(0x2fa), S(0x2fb), S(0x2fc), S(0x2fd), S(0x2fe), S(0x2ff), +S(0x300), S(0x301), S(0x302), S(0x303), S(0x304), S(0x305), S(0x306), S(0x307), +S(0x308), S(0x309), S(0x30a), S(0x30b), S(0x30c), S(0x30d), S(0x30e), S(0x30f), +S(0x310), S(0x311), S(0x312), S(0x313), S(0x314), S(0x315), S(0x316), S(0x317), +S(0x318), S(0x319), S(0x31a), S(0x31b), S(0x31c), S(0x31d), S(0x31e), S(0x31f), +S(0x320), S(0x321), S(0x322), S(0x323), S(0x324), S(0x325), S(0x326), S(0x327), +S(0x328), S(0x329), S(0x32a), S(0x32b), S(0x32c), S(0x32d), S(0x32e), S(0x32f), +S(0x330), S(0x331), S(0x332), S(0x333), S(0x334), S(0x335), S(0x336), S(0x337), +S(0x338), S(0x339), S(0x33a), S(0x33b), S(0x33c), S(0x33d), S(0x33e), S(0x33f), +S(0x340), S(0x341), S(0x342), S(0x343), S(0x344), S(0x345), S(0x346), S(0x347), +S(0x348), S(0x349), S(0x34a), S(0x34b), S(0x34c), S(0x34d), S(0x34e), S(0x34f), +S(0x350), S(0x351), S(0x352), S(0x353), S(0x354), S(0x355), S(0x356), S(0x357), +S(0x358), S(0x359), S(0x35a), S(0x35b), S(0x35c), S(0x35d), S(0x35e), S(0x35f), +S(0x360), S(0x361), S(0x362), S(0x363), S(0x364), S(0x365), S(0x366), S(0x367), +S(0x368), S(0x369), S(0x36a), S(0x36b), S(0x36c), S(0x36d), S(0x36e), S(0x36f), +S(0x370), S(0x371), S(0x372), S(0x373), S(0x374), S(0x375), S(0x376), S(0x377), +S(0x378), S(0x379), S(0x37a), S(0x37b), S(0x37c), S(0x37d), S(0x37e), S(0x37f), +S(0x380), S(0x381), S(0x382), S(0x383), S(0x384), S(0x385), S(0x386), S(0x387), +S(0x388), S(0x389), S(0x38a), S(0x38b), S(0x38c), S(0x38d), S(0x38e), S(0x38f), +S(0x390), S(0x391), S(0x392), S(0x393), S(0x394), S(0x395), S(0x396), S(0x397), +S(0x398), S(0x399), S(0x39a), S(0x39b), S(0x39c), S(0x39d), S(0x39e), S(0x39f), +S(0x3a0), S(0x3a1), S(0x3a2), S(0x3a3), S(0x3a4), S(0x3a5), S(0x3a6), S(0x3a7), +S(0x3a8), S(0x3a9), S(0x3aa), S(0x3ab), S(0x3ac), S(0x3ad), S(0x3ae), S(0x3af), +S(0x3b0), S(0x3b1), S(0x3b2), S(0x3b3), S(0x3b4), S(0x3b5), S(0x3b6), S(0x3b7), +S(0x3b8), S(0x3b9), S(0x3ba), S(0x3bb), S(0x3bc), S(0x3bd), S(0x3be), S(0x3bf), +S(0x3c0), S(0x3c1), S(0x3c2), S(0x3c3), S(0x3c4), S(0x3c5), S(0x3c6), S(0x3c7), +S(0x3c8), S(0x3c9), S(0x3ca), S(0x3cb), S(0x3cc), S(0x3cd), S(0x3ce), S(0x3cf), +S(0x3d0), S(0x3d1), S(0x3d2), S(0x3d3), S(0x3d4), S(0x3d5), S(0x3d6), S(0x3d7), +S(0x3d8), S(0x3d9), S(0x3da), S(0x3db), S(0x3dc), S(0x3dd), S(0x3de), S(0x3df), +S(0x3e0), S(0x3e1), S(0x3e2), S(0x3e3), S(0x3e4), S(0x3e5), S(0x3e6), S(0x3e7), +S(0x3e8), S(0x3e9), S(0x3ea), S(0x3eb), S(0x3ec), S(0x3ed), S(0x3ee), S(0x3ef), +S(0x3f0), S(0x3f1), S(0x3f2), S(0x3f3), S(0x3f4), S(0x3f5), S(0x3f6), S(0x3f7), +S(0x3f8), S(0x3f9), S(0x3fa), S(0x3fb), S(0x3fc), S(0x3fd), S(0x3fe), S(0x3ff) +}; +#endif /* MAKE_IDENTITY_MAP */ + +/* +** Kernel address mappings, present in every page table +*/ +mapping_t kmap[] = { + // va pa_start pa_end perms + { KERN_BASE, 0, EXT_BASE, PDE_RW }, + { KERN_VLINK, KERN_PLINK, V2P(_data), 0 }, + { (uint32_t) _data, V2P(_data), KERN_BASE, PDE_RW }, + { DEV_BASE, DEV_BASE, 0, PDE_RW } +}; +const uint_t n_kmap = sizeof(kmap) / sizeof(kmap[0]); |