summaryrefslogtreecommitdiff
path: root/kernel/vm.c
blob: c9b632dc1b1b46ba55080f2f4f9407070b141dcb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
/**
** @file	vm.c
**
** @author	CSCI-452 class of 20245
**
** @brief	Kernel VM support
*/

#define KERNEL_SRC

#include <common.h>

#include <vm.h>
#include <vmtables.h>

#include <kmem.h>
#include <procs.h>
#include <x86/arch.h>
#include <x86/ops.h>

/*
** PUBLIC GLOBAL VARIABLES
*/

// created page directory for the kernel
pde_t *kpdir;

/*
** PRIVATE FUNCTIONS
*/

/**
** Name:	vm_isr
**
** Description: Page fault handler
**
** @param vector   Interrupt vector number
** @param code     Error code pushed onto the stack
*/
static void vm_isr(int vector, int code)
{
	// get whatever information we can from the fault
	pfec_t fault;
	fault.u = (uint32_t)code;
	uint32_t addr = r_cr2();

	// report what we found
	sprint(b256,
		   "** page fault @ 0x%08x %cP %c %cM %cRSV %c %cPK %cSS %cHLAT %cSGZ",
		   addr, fault.s.p ? ' ' : '!', fault.s.w ? 'W' : 'R',
		   fault.s.us ? 'U' : 'S', fault.s.rsvd ? ' ' : '!',
		   fault.s.id ? 'I' : 'D', fault.s.pk ? ' ' : '!',
		   fault.s.ss ? ' ' : '!', fault.s.hlat ? ' ' : '!',
		   fault.s.sgz ? ' ' : '!');

	// and give up
	PANIC(0, b256);
}

/**
** Name:    uva2kva
**
** Convert a user VA into a kernel address. Works for all addresses -
** if the address is a page address, the PERMS(va) value will be 0;
** otherwise, it is the offset into the page.
**
** @param pdir  Pointer to the page directory to examine
** @param va    Virtual address to check
*/
ATTR_UNUSED
static void *uva2kva(pde_t *pdir, void *va)
{
	// find the PMT entry for this address
	pte_t *pte = vm_getpte(pdir, va, false);
	if (pte == NULL) {
		return NULL;
	}

	// get the entry
	pte_t entry = *pte;

	// is this a valid address for the user?
	if (IS_PRESENT(entry)) {
		return NULL;
	}

	// is this a system-only page?
	if (IS_SYSTEM(entry)) {
		return NULL;
	}

	// get the physical address
	uint32_t frame = PTE_ADDR(*pte) | PERMS(va);

	return (void *)P2V(frame);
}

/**
** Name:	ptdump
**
** Dump the non-zero entries of a page table or directory
**
** @param pt     The page table
** @param dir    Is this a page directory?
** @param start  First entry to process
** @param num    Number of entries to process
*/
static void ptdump(pte_t *pt, bool_t dir, uint32_t start, uint32_t num)
{
	cio_printf("\n\nP%c dump", dir ? 'D' : 'T');
	cio_printf(" of %08x", (uint32_t)pt);
	cio_printf(" [%03x] through [%03x]\n", start, start + num - 1);

	uint_t n = 0;
	uint_t z = 0;

	for (uint_t i = 0; i < num; ++i) {
		pte_t entry = pt[start + i];
		// four entries per line
		if (n && ((n & 0x3) == 0)) {
			cio_putchar('\n');
		}
		if (IS_PRESENT(entry)) {
			cio_printf(" %03x", start + i);
			if (IS_LARGE(entry)) {
				cio_printf(" 8 %05x", GET_4MFRAME(entry) << 10);
			} else {
				cio_printf(" 4 %05x", GET_4KFRAME(entry));
			}
			++n;
		} else {
			++z;
		}
		// pause after every four lines of output
		if (n && ((n & 0xf) == 0)) {
			delay(DELAY_2_SEC);
		}
	}

	// partial line?
	if ((n & 0x3) != 0) {
		cio_putchar('\n');
	}

	if (z > 0) {
		cio_printf(" %u entries were !P\n", z);
	}

	delay(DELAY_2_SEC);
}

/*
** PUBLIC FUNCTIONS
*/

/**
** Name:	vm_init
**
** Description:  Initialize the VM module
*/
void vm_init(void)
{
#if TRACING_INIT
	cio_puts(" VM");
#endif

	// set up the kernel's 4K-page directory
	kpdir = vm_mkkvm();
	assert(kpdir != NULL);

	// switch to it
	vm_set_kvm();

	// install the page fault handler
	install_isr(VEC_PAGE_FAULT, vm_isr);
}

/**
** Name:	vm_pagedup
**
** Duplicate a page of memory
**
** @param old  Pointer to the first byte of a page
**
** @return a pointer to the new, duplicate page, or NULL
*/
void *vm_pagedup(void *old)
{
	void *new = (void *)km_page_alloc();
	if (new != NULL) {
		blkmov(new, old, SZ_PAGE);
	}
	return new;
}

/**
** Name:	vm_pdedup
**
** Duplicate a page directory entry
**
** @param dst   Pointer to where the duplicate should go
** @param curr  Pointer to the entry to be duplicated
**
** @return true on success, else false
*/
bool_t vm_pdedup(pde_t *dst, pde_t *curr)
{
	assert1(curr != NULL);
	assert1(dst != NULL);

#if TRACING_VM
	cio_printf("vm_pdedup dst %08x curr %08x\n", (uint32_t)dst, (uint32_t)curr);
#endif
	pde_t entry = *curr;

	// simplest case
	if (!IS_PRESENT(entry)) {
		*dst = 0;
		return true;
	}

	// OK, we have an entry; allocate a page table for it
	pte_t *newtbl = (pte_t *)km_page_alloc();
	if (newtbl == NULL) {
		return false;
	}

	// we could clear the new table, but we'll be assigning to
	// each entry anyway, so we'll save the execution time

	// address of the page table for this directory entry
	pte_t *old = (pte_t *)PDE_ADDR(entry);

	// pointer to the first PTE in the new table
	pte_t *new = newtbl;

	for (int i = 0; i < N_PTE; ++i) {
		if (!IS_PRESENT(*old)) {
			*new = 0;
		} else {
			*new = *old;
		}
		++old;
		++new;
	}

	// replace the page table address
	// upper 22 bits from 'newtbl', lower 12 from '*curr'
	*dst = (pde_t)(PTE_ADDR(newtbl) | PERMS(entry));

	return true;
}

/**
** Name:	vm_getpte
**
** Return the address of the PTE corresponding to the virtual address
** 'va' within the address space controlled by 'pgdir'. If there is no
** page table for that VA and 'alloc' is true, create the necessary
** page table entries.
**
** @param pdir   Pointer to the page directory to be searched
** @param va     The virtual address we're looking for
** @param alloc  Should we allocate a page table if there isn't one?
**
** @return A pointer to the page table entry for this VA, or NULL if
**         there isn't one and we're not allocating
*/
pte_t *vm_getpte(pde_t *pdir, const void *va, bool_t alloc)
{
	pte_t *ptbl;

	// sanity check
	assert1(pdir != NULL);

	// get the PDIR entry for this virtual address
	uint32_t ix = PDIX(va);
	pde_t *pde_ptr = &pdir[ix];

	// is it already set up?
	if (IS_PRESENT(*pde_ptr)) {
		// yes!
		ptbl = (pte_t *)P2V(PTE_ADDR(*pde_ptr));

	} else {
		// no - should we create it?
		if (!alloc) {
			// nope, so just return
			return NULL;
		}

		// yes - try to allocate a page table
		ptbl = (pte_t *)km_page_alloc();
		if (ptbl == NULL) {
			WARNING("can't allocate page table");
			return NULL;
		}

		// who knows what was left in this page....
		memclr(ptbl, SZ_PAGE);

		// add this to the page directory
		//
		// we set this up to allow general access; this could be
		// controlled by setting access control in the page table
		// entries, if necessary.
		//
		// NOTE: the allocator is serving us virtual page addresses,
		// so we must convert them to physical addresses for the
		// table entries
		*pde_ptr = V2P(ptbl) | PDE_P | PDE_RW;
	}

	// finally, return a pointer to the entry in the
	// page table for this VA
	ix = PTIX(va);
	return &ptbl[ix];
}

// Set up kernel part of a page table.
pde_t *vm_mkkvm(void)
{
	mapping_t *k;

	// allocate the page directory
	pde_t *pdir = km_page_alloc();
	if (pdir == NULL) {
		return NULL;
	}
#if TRACING_VM
	cio_puts("\nEntering vm_mkkvm\n");
	ptdump(pdir, true, 0, N_PDE);
#endif

	// clear it out to disable all the entries
	memclr(pdir, SZ_PAGE);

	if (P2V(PHYS_TOP) > DEV_BASE) {
		cio_printf("PHYS_TOP (%08x -> %08x) > DEV_BASE(%08x)\n", PHYS_TOP,
				   P2V(PHYS_TOP), DEV_BASE);
		PANIC(0, "PHYS_TOP too large");
	}

	// map in all the page ranges
	k = kmap;
	for (int i = 0; i < n_kmap; ++i, ++k) {
		int stat = vm_map(pdir, ((void *)k->va_start), k->pa_start,
						  k->pa_end - k->pa_start, k->perm);
		if (stat != SUCCESS) {
			vm_free(pdir);
			return 0;
		}
	}
#if TRACING_VM
	cio_puts("\nvm_mkkvm() final PD:\n");
	ptdump(pdir, true, 0, 16);
	ptdump(pdir, true, 0x200, 16);
#endif

	return pdir;
}

/*
** Creates an initial user VM table hierarchy by copying the
** system entries into a new page directory.
**
** @return a pointer to the new page directory, or NULL
*/
pde_t *vm_mkuvm(void)
{
	// allocate the directory
	pde_t *new = (pde_t *)km_page_alloc();
	if (new == NULL) {
		return NULL;
	}

	// iterate through the kernel page directory
	pde_t *curr = kpdir;
	pde_t *dst = new;
	for (int i = 0; i < N_PDE; ++i) {
		if (*curr != 0) {
			// found an active one - duplicate it
			if (!vm_pdedup(dst, curr)) {
				return NULL;
			}
		}

		++curr;
		++dst;
	}

	return new;
}

/**
** Name:	vm_set_kvm
**
** Switch the page table register to the kernel's page directory.
*/
void vm_set_kvm(void)
{
	w_cr3(V2P(kpdir)); // switch to the kernel page table
}

/**
** Name:	vm_set_uvm
**
** Switch the page table register to the page directory for a user process.
**
** @param p  PCB of the process we're switching to
*/
void vm_set_uvm(pcb_t *p)
{
	assert(p != NULL);
	assert(p->pdir != NULL);

	w_cr3(V2P(p->pdir)); // switch to process's address space
}

/**
** Name:	vm_add
**
** Add pages to the page hierarchy for a process, copying data into
** them if necessary.
**
** @param pdir   Pointer to the page directory to modify
** @param wr     "Writable" flag for the PTE
** @param sys    "System" flag for the PTE
** @param va     Starting VA of the range
** @param size   Amount of physical memory to allocate (bytes)
** @param data   Pointer to data to copy, or NULL
** @param bytes  Number of bytes to copy
**
** @return status of the allocation attempt
*/
int vm_add(pde_t *pdir, bool_t wr, bool_t sys, void *va, uint32_t size,
		   char *data, uint32_t bytes)
{
	// how many pages do we need?
	uint32_t npages = ((size & MOD4K_BITS) ? PGUP(size) : size) >> MOD4K_SHIFT;

	// permission set for the PTEs
	uint32_t entrybase = PTE_P;
	if (wr) {
		entrybase |= PTE_RW;
	}
	if (sys) {
		entrybase |= PTE_US;
	}

#if TRACING_VM
	cio_printf("vm_add: pdir %08x, %s, va %08x (%u, %u pgs)\n", (uint32_t)pdir,
			   wr ? "W" : "!W", (uint32_t)va, size);
	cio_printf("        from %08x, %u bytes, perms %08x\n", (uint32_t)data,
			   bytes, entrybase);
#endif

	// iterate through the pages

	for (int i = 0; i < npages; ++i) {
		// figure out where this page will go in the hierarchy
		pte_t *pte = vm_getpte(pdir, va, true);
		if (pte == NULL) {
			// TODO if i > 0, this isn't the first frame - is
			// there anything to do about other frames?
			// POSSIBLE MEMORY LEAK?
			return E_NO_MEMORY;
		}

		// allocate the frame
		void *page = km_page_alloc();
		if (page == NULL) {
			// TODO same question here
			return E_NO_MEMORY;
		}

		// clear it all out
		memclr(page, SZ_PAGE);

		// create the PTE for this frame
		uint32_t entry = (uint32_t)(PTE_ADDR(page) | entrybase);
		*pte = entry;

		// copy data if we need to
		if (data != NULL && bytes > 0) {
			// how much to copy
			uint32_t num = bytes > SZ_PAGE ? SZ_PAGE : bytes;
			// do it!
			memcpy((void *)page, (void *)data, num);
			// adjust all the pointers
			data += num; // where to continue
			bytes -= num; // what's left to copy
		}

		// bump the virtual address
		va += SZ_PAGE;
	}

	return SUCCESS;
}

/**
** Name:    vm_free
**
** Deallocate a page table hierarchy and all physical memory frames
** in the user portion.
**
** Works only for 4KB pages.
**
** @param pdir  Pointer to the page directory
*/
void vm_free(pde_t *pdir)
{
	// do we have anything to do?
	if (pdir == NULL) {
		return;
	}

	// iterate through the page directory entries, freeing the
	// PMTS and the frames they point to
	pde_t *curr = pdir;
	for (int i = 0; i < N_PDE; ++i) {
		// the entry itself
		pde_t entry = *curr;

		// does this entry point to anything useful?
		if (IS_PRESENT(entry)) {
			// yes - large pages make us unhappy
			assert(!IS_LARGE(entry));

			// get the PMT pointer
			pte_t *pmt = (pte_t *)PTE_ADDR(entry);

			// walk the PMT
			for (int j = 0; j < N_PTE; ++j) {
				// does this entry point to a frame?
				if (IS_PRESENT(*pmt)) {
					// yes - free the frame
					km_page_free((void *)PTE_ADDR(*pmt));
					// mark it so we don't get surprised
					*pmt = 0;
				}
				// move on
				++pmt;
			}
			// now, free the PMT itself
			km_page_free((void *)PDE_ADDR(entry));
			*curr = 0;
		}

		// move to the next entry
		++curr;
	}

	// finally, free the PDIR itself
	km_page_free((void *)pdir);
}

/*
** Name:	vm_map
**
** Create PTEs for virtual addresses starting at 'va' that refer to
** physical addresses in the range [pa, pa+size-1]. We aren't guaranteed
** that va is page-aligned.
**
** @param pdir  Page directory for this address space
** @param va    The starting virtual address
** @param pa    The starting physical address
** @param size  Length of the range to be mapped
** @param perm  Permission bits for the PTEs
**
** @return the status of the mapping attempt
*/
int vm_map(pde_t *pdir, void *va, uint32_t pa, uint32_t size, int perm)
{
	// round the VA down to its page boundary
	char *addr = (char *)PGDOWN((uint32_t)va);

	// round the end of the range down to its page boundary
	char *last = (char *)PGDOWN(((uint32_t)va) + size - 1);

#if TRACING_VM
	cio_printf("\n\nvm_map pdir %08x va %08x pa %08x size %08x perm %03x\n",
			   (uint32_t)pdir, (uint32_t)va, pa, size, perm);
#endif

	while (addr <= last) {
		// get a pointer to the PTE for the current VA
		pte_t *pte = vm_getpte(pdir, addr, true);
		if (pte == NULL) {
			// couldn't find it
			return E_NO_PTE;
		}
#if TRACING_VM
		cio_printf("  addr %08x pa %08x last %08x pte %08x *pte %08x\n",
				   (uint32_t)addr, pa, (uint32_t)last, (uint32_t)pte, *pte);
#endif

		// create the new entry
		pde_t entry = pa | perm | PTE_P;

		// if this entry has already been mapped, we're in trouble
		if (IS_PRESENT(*pte)) {
			if (*pte != entry) {
#if TRACING_VM
				cio_puts(" ALREADY MAPPED?");
				cio_printf("  PDIX 0x%x PTIX 0x%x\n", PDIX(addr), PTIX(addr));

				// dump the directory
				ptdump(pdir, true, 0, N_PDE);

				// find the relevant PDE entry
				uint32_t ix = PDIX(va);
				pde_t entry = pdir[ix];
				if (!IS_LARGE(entry)) {
					// round the PMT index down
					uint32_t ix2 = PTIX(va) & MOD4_MASK;
					// dump the PMT for the relevant directory entry
					ptdump((void *)P2V(PDE_ADDR(entry)), false, ix2, 8);
				}
#endif

				PANIC(0, "mapping an already-mapped address");
			}
		}

		// ok, set the PTE as requested
		*pte = entry;

		// nope - move to the next page
		addr += SZ_PAGE;
		pa += SZ_PAGE;
	}
	return SUCCESS;
}

/**
** Name:	vm_uvmdup
**
** Create a duplicate of the user portio of an existing page table
** hierarchy. We assume that the "new" page directory exists and
** the system portions of it should not be touched.
**
** Note: we do not duplicate the frames in the hierarchy - we just
** create a duplicate of the hierarchy itself. This means that we
** now have two sets of page tables that refer to the same physical
** frames in memory.
**
** @param old  Existing page directory
** @param new  New page directory
**
** @return status of the duplication attempt
*/
int vm_uvmdup(pde_t *old, pde_t *new)
{
	if (old == NULL || new == NULL) {
		return E_BAD_PARAM;
	}

	// we only want to deal with the "user" half of the address space
	for (int i = 0; i < (N_PDE >> 1); ++i) {
		// the entry to copy
		pde_t entry = *old;

		// is this entry in use?
		if (IS_PRESENT(entry)) {
			// yes. if it points to a 4MB page, we just copy it;
			// otherwise, we must duplicate the next level PMT

			if (!IS_LARGE(entry)) {
				// it's a 4KB page, so we need to duplicate the PMT
				pte_t *newpt = (pte_t *)vm_pagedup((void *)PTE_ADDR(entry));
				if (newpt == NULL) {
					return E_NO_MEMORY;
				}

				uint32_t perms = PERMS(entry);

				// create the new PDE entry by replacing the frame #
				entry = ((uint32_t)newpt) | perms;
			}

		} else {
			// not present, so create an empty entry
			entry = 0;
		}

		// send it on its way
		*new = entry;

		// move on down the line
		++old;
		++new;
	}

	return SUCCESS;
}