kern/kernel/vm.c
2025-03-27 17:56:39 -04:00

737 lines
16 KiB
C

/**
** @file vm.c
**
** @author CSCI-452 class of 20245
**
** @brief Kernel VM support
*/
#define KERNEL_SRC
#include <common.h>
#include <vm.h>
#include <vmtables.h>
#include <kmem.h>
#include <procs.h>
#include <x86/arch.h>
#include <x86/ops.h>
/*
** PUBLIC GLOBAL VARIABLES
*/
// created page directory for the kernel
pde_t *kpdir;
/*
** PRIVATE FUNCTIONS
*/
/**
** Name: vm_isr
**
** Description: Page fault handler
**
** @param vector Interrupt vector number
** @param code Error code pushed onto the stack
*/
static void vm_isr(int vector, int code)
{
// get whatever information we can from the fault
pfec_t fault;
fault.u = (uint32_t)code;
uint32_t addr = r_cr2();
// report what we found
sprint(b256,
"** page fault @ 0x%08x %cP %c %cM %cRSV %c %cPK %cSS %cHLAT %cSGZ",
addr, fault.s.p ? ' ' : '!', fault.s.w ? 'W' : 'R',
fault.s.us ? 'U' : 'S', fault.s.rsvd ? ' ' : '!',
fault.s.id ? 'I' : 'D', fault.s.pk ? ' ' : '!',
fault.s.ss ? ' ' : '!', fault.s.hlat ? ' ' : '!',
fault.s.sgz ? ' ' : '!');
// and give up
PANIC(0, b256);
}
/**
** Name: uva2kva
**
** Convert a user VA into a kernel address. Works for all addresses -
** if the address is a page address, the PERMS(va) value will be 0;
** otherwise, it is the offset into the page.
**
** @param pdir Pointer to the page directory to examine
** @param va Virtual address to check
*/
ATTR_UNUSED
static void *uva2kva(pde_t *pdir, void *va)
{
// find the PMT entry for this address
pte_t *pte = vm_getpte(pdir, va, false);
if (pte == NULL) {
return NULL;
}
// get the entry
pte_t entry = *pte;
// is this a valid address for the user?
if (IS_PRESENT(entry)) {
return NULL;
}
// is this a system-only page?
if (IS_SYSTEM(entry)) {
return NULL;
}
// get the physical address
uint32_t frame = PTE_ADDR(*pte) | PERMS(va);
return (void *)P2V(frame);
}
/**
** Name: ptdump
**
** Dump the non-zero entries of a page table or directory
**
** @param pt The page table
** @param dir Is this a page directory?
** @param start First entry to process
** @param num Number of entries to process
*/
static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num)
{
cio_printf("\n\nP%c dump", dir ? 'D' : 'T');
cio_printf(" of %08x", (uint32_t)pt);
cio_printf(" [%03x] through [%03x]\n", start, start + num - 1);
uint_t n = 0;
uint_t z = 0;
for (uint_t i = 0; i < num; ++i) {
pte_t entry = pt[start + i];
// four entries per line
if (n && ((n & 0x3) == 0)) {
cio_putchar('\n');
}
if (IS_PRESENT(entry)) {
cio_printf(" %03x", start + i);
if (IS_LARGE(entry)) {
cio_printf(" 8 %05x", GET_4MFRAME(entry) << 10);
} else {
cio_printf(" 4 %05x", GET_4KFRAME(entry));
}
++n;
} else {
++z;
}
// pause after every four lines of output
if (n && ((n & 0xf) == 0)) {
delay(DELAY_2_SEC);
}
}
// partial line?
if ((n & 0x3) != 0) {
cio_putchar('\n');
}
if (z > 0) {
cio_printf(" %u entries were !P\n", z);
}
delay(DELAY_2_SEC);
}
/*
** PUBLIC FUNCTIONS
*/
/**
** Name: vm_init
**
** Description: Initialize the VM module
*/
void vm_init(void)
{
#if TRACING_INIT
cio_puts(" VM");
#endif
// set up the kernel's 4K-page directory
kpdir = vm_mkkvm();
assert(kpdir != NULL);
#if TRACING_VM
cio_printf("vm_init: kpdir is %08x\n", kpdir);
#endif
// switch to it
vm_set_kvm();
#if TRACING_VM
cio_puts("vm_init: running on new kpdir\n");
#endif
// install the page fault handler
install_isr(VEC_PAGE_FAULT, vm_isr);
}
/**
** Name: vm_pagedup
**
** Duplicate a page of memory
**
** @param old Pointer to the first byte of a page
**
** @return a pointer to the new, duplicate page, or NULL
*/
void *vm_pagedup(void *old)
{
void *new = (void *)km_page_alloc();
if (new != NULL) {
blkmov(new, old, SZ_PAGE);
}
return new;
}
/**
** Name: vm_pdedup
**
** Duplicate a page directory entry
**
** @param dst Pointer to where the duplicate should go
** @param curr Pointer to the entry to be duplicated
**
** @return true on success, else false
*/
bool_t vm_pdedup(pde_t *dst, pde_t *curr)
{
assert1(curr != NULL);
assert1(dst != NULL);
#if TRACING_VM
cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr);
#endif
pde_t entry = *curr;
// simplest case
if (!IS_PRESENT(entry)) {
*dst = 0;
return true;
}
// OK, we have an entry; allocate a page table for it
pte_t *newtbl = (pte_t *)km_page_alloc();
if (newtbl == NULL) {
return false;
}
// we could clear the new table, but we'll be assigning to
// each entry anyway, so we'll save the execution time
// address of the page table for this directory entry
pte_t *old = (pte_t *)PDE_ADDR(entry);
// pointer to the first PTE in the new table
pte_t *new = newtbl;
for (int i = 0; i < N_PTE; ++i) {
if (!IS_PRESENT(*old)) {
*new = 0;
} else {
*new = *old;
}
++old;
++new;
}
// replace the page table address
// upper 22 bits from 'newtbl', lower 12 from '*curr'
*dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry));
return true;
}
/**
** Name: vm_getpte
**
** Return the address of the PTE corresponding to the virtual address
** 'va' within the address space controlled by 'pgdir'. If there is no
** page table for that VA and 'alloc' is true, create the necessary
** page table entries.
**
** @param pdir Pointer to the page directory to be searched
** @param va The virtual address we're looking for
** @param alloc Should we allocate a page table if there isn't one?
**
** @return A pointer to the page table entry for this VA, or NULL if
** there isn't one and we're not allocating
*/
pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
{
pte_t *ptbl;
// sanity check
assert1(pdir != NULL);
// get the PDIR entry for this virtual address
uint32_t ix = PDIX(va);
pde_t *pde_ptr = &pdir[ix];
// is it already set up?
if (IS_PRESENT(*pde_ptr)) {
// yes!
ptbl = (pte_t *)P2V(PTE_ADDR(*pde_ptr));
} else {
// no - should we create it?
if (!alloc) {
// nope, so just return
return NULL;
}
// yes - try to allocate a page table
ptbl = (pte_t *)km_page_alloc();
if (ptbl == NULL) {
WARNING("can't allocate page table");
return NULL;
}
// who knows what was left in this page....
memclr(ptbl, SZ_PAGE);
// add this to the page directory
//
// we set this up to allow general access; this could be
// controlled by setting access control in the page table
// entries, if necessary.
//
// NOTE: the allocator is serving us virtual page addresses,
// so we must convert them to physical addresses for the
// table entries
*pde_ptr = V2P(ptbl) | PDE_P | PDE_RW;
}
// finally, return a pointer to the entry in the
// page table for this VA
ix = PTIX(va);
return &ptbl[ix];
}
// Set up kernel part of a page table.
pde_t *vm_mkkvm(void)
{
mapping_t *k;
// allocate the page directory
pde_t *pdir = km_page_alloc();
if (pdir == NULL) {
return NULL;
}
#if 0 && TRACING_VM
cio_puts( "\nEntering vm_mkkvm\n" );
ptdump( pdir, true, 0, N_PDE );
#endif
// clear it out to disable all the entries
memclr(pdir, SZ_PAGE);
if (P2V(PHYS_TOP) > DEV_BASE) {
cio_printf("PHYS_TOP (%08x -> %08x) > DEV_BASE(%08x)\n", PHYS_TOP,
P2V(PHYS_TOP), DEV_BASE);
PANIC(0, "PHYS_TOP too large");
}
// map in all the page ranges
k = kmap;
for (int i = 0; i < n_kmap; ++i, ++k) {
int stat = vm_map(pdir, ((void *)k->va_start), k->pa_start,
k->pa_end - k->pa_start, k->perm);
if (stat != SUCCESS) {
vm_free(pdir);
return 0;
}
}
#if 0 && TRACING_VM
cio_puts( "\nvm_mkkvm() final PD:\n" );
ptdump( pdir, true, 0, 16 );
ptdump( pdir, true, 0x200, 16 );
#endif
return pdir;
}
/*
** Creates an initial user VM table hierarchy by copying the
** system entries into a new page directory.
**
** @return a pointer to the new page directory, or NULL
*/
pde_t *vm_mkuvm(void)
{
// allocate the directory
pde_t *new = (pde_t *)km_page_alloc();
if (new == NULL) {
return NULL;
}
// iterate through the kernel page directory
pde_t *curr = kpdir;
pde_t *dst = new;
for (int i = 0; i < N_PDE; ++i) {
if (*curr != 0) {
// found an active one - duplicate it
if (!vm_pdedup(dst, curr)) {
return NULL;
}
}
++curr;
++dst;
}
return new;
}
/**
** Name: vm_set_kvm
**
** Switch the page table register to the kernel's page directory.
*/
void vm_set_kvm(void)
{
#if TRACING_VM
cio_puts("Entering vm_set_kvm()\n");
#endif
w_cr3(V2P(kpdir)); // switch to the kernel page table
#if TRACING_VM
cio_puts("Exiting vm_set_kvm()\n");
#endif
}
/**
** Name: vm_set_uvm
**
** Switch the page table register to the page directory for a user process.
**
** @param p PCB of the process we're switching to
*/
void vm_set_uvm(pcb_t *p)
{
#if TRACING_VM
cio_puts("Entering vm_set_uvm()\n");
#endif
assert(p != NULL);
assert(p->pdir != NULL);
w_cr3(V2P(p->pdir)); // switch to process's address space
#if TRACING_VM
cio_puts("Entering vm_set_uvm()\n");
#endif
}
/**
** Name: vm_add
**
** Add pages to the page hierarchy for a process, copying data into
** them if necessary.
**
** @param pdir Pointer to the page directory to modify
** @param wr "Writable" flag for the PTE
** @param sys "System" flag for the PTE
** @param va Starting VA of the range
** @param size Amount of physical memory to allocate (bytes)
** @param data Pointer to data to copy, or NULL
** @param bytes Number of bytes to copy
**
** @return status of the allocation attempt
*/
int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
char *data, uint32_t bytes)
{
// how many pages do we need?
uint32_t npages = ((size & MOD4K_BITS) ? PGUP(size) : size) >> MOD4K_SHIFT;
// permission set for the PTEs
uint32_t entrybase = PTE_P;
if (wr) {
entrybase |= PTE_RW;
}
if (sys) {
entrybase |= PTE_US;
}
#if TRACING_VM
cio_printf("vm_add: pdir %08x, %s, va %08x size %u (%u pgs)\n",
(uint32_t)pdir, wr ? "W" : "!W", (uint32_t)va, size, npages);
cio_printf(" from %08x, %u bytes, perms %08x\n", (uint32_t)data,
bytes, entrybase);
#endif
// iterate through the pages
for (int i = 0; i < npages; ++i) {
// figure out where this page will go in the hierarchy
pte_t *pte = vm_getpte(pdir, va, true);
if (pte == NULL) {
// TODO if i > 0, this isn't the first frame - is
// there anything to do about other frames?
// POSSIBLE MEMORY LEAK?
return E_NO_MEMORY;
}
// allocate the frame
void *page = km_page_alloc();
if (page == NULL) {
// TODO same question here
return E_NO_MEMORY;
}
// clear it all out
memclr(page, SZ_PAGE);
// create the PTE for this frame
uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase);
*pte = entry;
// copy data if we need to
if (data != NULL && bytes > 0) {
// how much to copy
uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes;
// do it!
memcpy((void *)page, (void *)data, num);
// adjust all the pointers
data += num; // where to continue
bytes -= num; // what's left to copy
}
// bump the virtual address
va += SZ_PAGE;
}
return SUCCESS;
}
/**
** Name: vm_free
**
** Deallocate a page table hierarchy and all physical memory frames
** in the user portion.
**
** Works only for 4KB pages.
**
** @param pdir Pointer to the page directory
*/
void vm_free(pde_t *pdir)
{
#if TRACING_VM
cio_printf("vm_free(%08x)\n", (uint32_t)pdir);
#endif
// do we have anything to do?
if (pdir == NULL) {
return;
}
// iterate through the page directory entries, freeing the
// PMTS and the frames they point to
pde_t *curr = pdir;
int nf = 0;
int nt = 0;
for (int i = 0; i < N_PDE; ++i) {
// the entry itself
pde_t entry = *curr;
// does this entry point to anything useful?
if (IS_PRESENT(entry)) {
// yes - large pages make us unhappy
assert(!IS_LARGE(entry));
// get the PMT pointer
pte_t *pmt = (pte_t *)PTE_ADDR(entry);
// walk the PMT
for (int j = 0; j < N_PTE; ++j) {
// does this entry point to a frame?
if (IS_PRESENT(*pmt)) {
// yes - free the frame
km_page_free((void *)PTE_ADDR(*pmt));
++nf;
// mark it so we don't get surprised
*pmt = 0;
}
// move on
++pmt;
}
// now, free the PMT itself
km_page_free((void *)PDE_ADDR(entry));
++nt;
*curr = 0;
}
// move to the next entry
++curr;
}
// finally, free the PDIR itself
km_page_free((void *)pdir);
++nt;
#if TRACING_VM
cio_printf("vm_free: %d pages, %d tables\n", nf, nt);
#endif
}
/*
** Name: vm_map
**
** Create PTEs for virtual addresses starting at 'va' that refer to
** physical addresses in the range [pa, pa+size-1]. We aren't guaranteed
** that va is page-aligned.
**
** @param pdir Page directory for this address space
** @param va The starting virtual address
** @param pa The starting physical address
** @param size Length of the range to be mapped
** @param perm Permission bits for the PTEs
**
** @return the status of the mapping attempt
*/
int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
{
// round the VA down to its page boundary
char *addr = (char *)PGDOWN((uint32_t)va);
// round the end of the range down to its page boundary
char *last = (char *)PGDOWN(((uint32_t)va) + size - 1);
#if TRACING_VM
cio_printf("vm_map pdir %08x va %08x pa %08x size %08x perm %03x\n",
(uint32_t)pdir, (uint32_t)va, pa, size, perm);
#endif
while (addr <= last) {
// get a pointer to the PTE for the current VA
pte_t *pte = vm_getpte(pdir, addr, true);
if (pte == NULL) {
// couldn't find it
return E_NO_PTE;
}
#if 0 && TRACING_VM
cio_printf( " addr %08x pa %08x last %08x pte %08x *pte %08x\n",
(uint32_t) addr, pa, (uint32_t) last, (uint32_t) pte, *pte
);
#endif
// create the new entry for the page table
pde_t newpte = pa | perm | PTE_P;
// if this entry has already been mapped, we're in trouble
if (IS_PRESENT(*pte)) {
if (*pte != newpte) {
#if TRACING_VM
cio_printf(
"vm_map: va %08x pa %08x pte %08x *pte %08x entry %08x\n",
(uint32_t)va, pa, (uint32_t)pte, (uint32_t)*pte, newpte);
cio_printf(" addr %08x PDIX 0x%x PTIX 0x%x\n", (uint32_t)addr,
PDIX(addr), PTIX(addr));
// dump the directory
ptdump(pdir, true, PDIX(addr), 4);
// find the relevant PDE entry
uint32_t ix = PDIX(va);
pde_t entry = pdir[ix];
if (!IS_LARGE(entry)) {
// round the PMT index down
uint32_t ix2 = PTIX(va) & MOD4_MASK;
// dump the PMT for the relevant directory entry
ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 4);
}
#endif
PANIC(0, "mapping an already-mapped address");
}
}
// ok, set the PTE as requested
*pte = newpte;
// nope - move to the next page
addr += SZ_PAGE;
pa += SZ_PAGE;
}
return SUCCESS;
}
/**
** Name: vm_uvmdup
**
** Create a duplicate of the user portio of an existing page table
** hierarchy. We assume that the "new" page directory exists and
** the system portions of it should not be touched.
**
** Note: we do not duplicate the frames in the hierarchy - we just
** create a duplicate of the hierarchy itself. This means that we
** now have two sets of page tables that refer to the same physical
** frames in memory.
**
** @param new New page directory
** @param old Existing page directory
**
** @return status of the duplication attempt
*/
int vm_uvmdup(pde_t *new, pde_t *old)
{
if (old == NULL || new == NULL) {
return E_BAD_PARAM;
}
#if TRACING_VM
cio_printf("vmdup: old %08x new %08x\n", (uint32_t)old, (uint32_t)new);
#endif
// we only want to deal with the "user" half of the address space
for (int i = 0; i < (N_PDE >> 1); ++i) {
// the entry to copy
pde_t entry = *old;
// is this entry in use?
if (IS_PRESENT(entry)) {
// yes. if it points to a 4MB page, we just copy it;
// otherwise, we must duplicate the next level PMT
if (!IS_LARGE(entry)) {
// it's a 4KB page, so we need to duplicate the PMT
pte_t *newpt = (pte_t *)vm_pagedup((void *)PTE_ADDR(entry));
if (newpt == NULL) {
return E_NO_MEMORY;
}
uint32_t perms = PERMS(entry);
// create the new PDE entry by replacing the frame #
entry = ((uint32_t)newpt) | perms;
}
} else {
// not present, so create an empty entry
entry = 0;
}
// send it on its way
*new = entry;
// move on down the line
++old;
++new;
}
return SUCCESS;
}