a cow based x86_64 operating system, using limine and stivale2
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

739 lines
20 KiB

#include <stivale2.h>
#include "../lib/assert.h"
#include "../lib/string.h"
#include "physical_allocator.h"
#include "paging.h"
#include "vmap.h"
#include "../lib/sprintf.h"
#include "../lib/panic.h"
#include "../lib/logging.h"
#include "../lib/registers.h"
#define CR0_WP (1lu << 16)
#define CR0_PG_BIT (1lu << 31)
#define CR4_PAE_BIT (1lu << 5)
#define CR4_PCIDE (1lu << 17)
#define IA32_EFER_NXE_BIT (1lu << 11)
// size of bulks of allocation
// the page buffer is 16 long
// in the worst case scenario,
// 8192 page allocs
// -> 1 pdpt + 1 pd + 9 pt
// = 11 newpages
#define MAX_ALLOC 4096
/**
* 4th level table (pde) entry
*/
typedef void* pte;
/**
* 3rd level table (pde) entry
*/
typedef pte* pde;
/**
* 2nd level table (pdpt) entry
*
*/
typedef pde* pdpte;
/**
* 1st level table (pml4) entry
*/
typedef pdpte* pml4e;
#define __page __attribute__((aligned(4096)))
static pml4e pml4[512] __page = {0};
static_assert_equals(sizeof(pml4), 0x1000);
// the alloc_page_table function has the right
// to realloc on the fly
// unset it to avoid nasty recursions
static int alloc_page_table_realloc = 1;
static void fill_page_table_allocator_buffer(size_t n);
static void* alloc_page_table(void);
static void internal_map_pages(uint64_t physical_addr,
uint64_t virtual_addr,
size_t count,
uint64_t flags);
// extract the offset of the page table
// from a virtual address
__attribute__((pure))
static uint32_t pt_offset(uint64_t virt) {
return (virt >> 12) & 0x1ff;
}
// extract the offset of the page directory
// from a virtual address
__attribute__((pure))
static uint32_t pd_offset(uint64_t virt) {
return (virt >> 21) & 0x1ff;
}
// extract the offset of the pdp
// from a virtual address
__attribute__((pure))
static uint32_t pdpt_offset(uint64_t virt) {
return (virt >> 30) & 0x1ff;
}
// extract the offset of the page directory
// from a virtual address
__attribute__((pure))
static uint32_t pml4_offset(uint64_t virt) {
return (virt >> 39) & 0x1ff;
}
// PWT: page level write-through
// PCD: page level cache disable
static void* create_table_entry(void* entry, uint64_t flags) {
assert_aligned(entry, 0x1000);
return (void*)(flags |
(uint64_t)entry);
}
// extract the pointer from an entry of
// a table structure
static void* extract_pointer(void* c) {
return (void*)(0x000ffffffffff000llu & (uint64_t)c);
}
// the current page flags
//static unsigned current_page_flags;
/*
static void physical_allocator_callback_kernel(
uint64_t physical_address,
uint64_t virtual_address,
size_t size) {
assert(is_kernel_data(virtual_address));
(void)(physical_address + virtual_address + size);
}
void physical_allocator_callback_user(
uint64_t physical_address,
uint64_t virtual_address,
size_t size) {
assert(is_user(virtual_address));
(void)(physical_address + virtual_address + size);
}
void physical_allocator_callback_mmio(
uint64_t physical_address,
uint64_t virtual_address,
size_t size) {
assert(is_mmio(virtual_address));
(void)(physical_address + virtual_address + size);
}
*/
// map the data of the allocator
static void map_physical_memory(const struct stivale2_struct_tag_memmap* memmap) {
// as we are not in a callback function,
// we can realloc page tables on the fly
alloc_page_table_realloc = 1;
for(unsigned i = 0; i < memmap->entries; i++) {
const struct stivale2_mmap_entry* e = &memmap->memmap[i];
if(e->type == STIVALE2_MMAP_USABLE
|| e->type == STIVALE2_MMAP_BOOTLOADER_RECLAIMABLE
|| e->type == STIVALE2_MMAP_ACPI_RECLAIMABLE
|| e->type == STIVALE2_MMAP_ACPI_NVS) {
// be inclusive!
uint64_t phys_addr = (uint64_t) (e->base / 0x1000) * 0x1000;
size_t size = (e->length + (e->base - phys_addr) + 0x0fff) / 0x1000;
if(size == 0)
continue;
void* virtual_addr = translate_address((void *)phys_addr);
internal_map_pages(
phys_addr,
(uint64_t)virtual_addr,
size,
PRESENT_ENTRY | PL_XD | PL_RW
);
// use the allocator to allocate page tables
// to map its own data
}
}
}
/*
static void map_allocator_data(void) {
const struct physical_allocator_data_page_entry* entries;
size_t size = 0;
// as we are not in a callback function,
// we can realloc page tables on the fly
alloc_page_table_realloc = 1;
entries = physical_allocator_data_pages(&size);
for(unsigned i = 0; i < size; i++) {
uint64_t phys_addr = (uint64_t) entries[i].physical_address;
uint64_t virtual_addr = TRANSLATED_PHYSICAL_MEMORY_BEGIN | phys_addr;
internal_map_pages(phys_addr, virtual_addr, 1, PRESENT_ENTRY);
// use the allocator to allocate page tables
// to map its own data
}
}
*/
// map the kernel to KERNEL_DATA_BEGIN | kernel_phys_base
static void map_kernel(const struct stivale2_struct_tag_memmap* memmap) {
// count the number of kernel entries
// suppose the first one is the .text section,
// the second one is rodata
// and the third is data+bss
int n = 0;
for(unsigned i = 0; i < memmap->entries; i++) {
const struct stivale2_mmap_entry* e = &memmap->memmap[i];
if(e->type == STIVALE2_MMAP_KERNEL_AND_MODULES) {
// floor the base to one page
uint64_t base = e->base & ~0x0fff;
// ceil the size
size_t size = e->length + (e->base - base);
size = (size+0x0fff) / 0x1000;
uint64_t virtual_addr = base | KERNEL_DATA_BEGIN;
uint64_t flags = PRESENT_ENTRY;
switch (n++)
{
case 0:
/* .text */
break;
case 1:
/* rodata */
flags |= PL_XD;
break;
case 2:
flags |= PL_RW;
/* data+bss */
break;
default:
//modules: do not map in higher half!
flags |= PL_RW;
virtual_addr = base | TRANSLATED_PHYSICAL_MEMORY_BEGIN;
break;
}
//alloc the page table pages
// before doing any allocation
fill_page_table_allocator_buffer(64);
internal_map_pages(base, virtual_addr, size, flags);
}
}
}
// return non 0 value iif the entry is
// present
static int present_entry(void* entry) {
return (uint64_t)entry & PRESENT_ENTRY;
}
//function for debug purposes
static inline void print_struct(int level, void** table, uint64_t virt) {
void** addr = translate_address(table);
//if(level > 1)
// return ;
for(int i = 0; i < 512; i++) {
if(present_entry(addr[i])) {
uint64_t v = (virt << 9) | i;
for(int i = 0; i < level; i++)
puts("-");
if(level == 2) {
printf(" %lx -> %lx\n", v << 12, extract_pointer(addr[i]));
}
else {
puts("\n");
print_struct(level+1, extract_pointer(addr[i]), v);
}
}
}
}
void init_paging(const struct stivale2_struct_tag_memmap* memmap) {
// init the highest paging structures
// no need to use the translate_address macro
// as we are still in the early memory configuration
// so memory is both identity mapped and transtated
// so *x = *translate_address(x)
// initialization: we map some memory regions
// statically: we don't use the pmm in order to
// find what is to map.
// we therefore can allocate page tables on
// the fly
alloc_page_table_realloc = 1;
// first, let's create the main memory regions:
// 1st user: 0x0000000000000000 -> 0x0000007fffffffff
// 256th supervisor: 0xffff800000000000 -> 0xffff807fffffffff
// 511st supervisor: 0xffffff8000000000 -> 0xffffffffffffffff
pml4[0] = create_table_entry(
alloc_page_table(), // alloc a new page table
// with pmm
PRESENT_ENTRY | PL_US // execute enable, read
| PL_RW // write for all the lower half
// and accessible from userspace
);
// the two high half memory regions are supervisor only
// so that no user can access it eventhough the entry
// stays in the pml4 table
pml4[256] = create_table_entry(
alloc_page_table(), // once again use the pmm
PRESENT_ENTRY | PL_RW // supervisor only
);
// same as above
pml4[511] = create_table_entry(
alloc_page_table(), // once again use the pmm
PRESENT_ENTRY | PL_RW // supervisor only
);
// map all the memory to 0xffff800000000000
map_physical_memory(memmap);
// everytime we allocate a page table
// while allocating some memory,
// we need to put this to 0
// in order to avoid awful recursion bugs
alloc_page_table_realloc = 0;
// map the kernel
map_kernel(memmap);
}
void append_paging_initialization(void) {
// enable PAE in cr4
// disable PCIDE
set_cr4((get_cr4() | CR4_PAE_BIT) & ~CR4_PCIDE);
// enable the PG bit
set_cr0(get_cr0() | CR0_PG_BIT | CR0_WP);
// enable NXE bit
write_msr(IA32_EFER_MSR, read_msr(IA32_EFER_MSR) | IA32_EFER_NXE_BIT);
// get the physical address of the pml4 table
uint64_t pml4_lower_half_ptr = early_virtual_to_physical(pml4);
// finaly set the pml4 to cr3
_cr3(pml4_lower_half_ptr);
}
// buffer the allocation requests
// ask for 16 pages per call is the
// optimal thing as its the granularity
// of the highest level bitmap
static void* page_table_allocator_buffer[64];
static size_t page_table_allocator_buffer_size = 0;
// callback for the page allocator
static void page_table_allocator_callback(uint64_t phys_addr,
uint64_t virt_addr,
size_t size) {
(void)(size+virt_addr); // the size is always one whatsoever...
page_table_allocator_buffer[page_table_allocator_buffer_size++] = (void*)phys_addr;
}
static void zero_page_table_page(void* physical_address) {
assert_aligned(physical_address, 0x1000);
memset(translate_address(physical_address), 0, 0x1000);
}
// fill the page table allocator buffer
static void fill_page_table_allocator_buffer(size_t n) {
assert(n <= 64);
int to_alloc = n - page_table_allocator_buffer_size;
if(to_alloc < 0)
return;
int old_size = page_table_allocator_buffer_size;
physalloc(to_alloc, 0, page_table_allocator_callback);
for(unsigned i = old_size; i < n; i++)
zero_page_table_page(page_table_allocator_buffer[i]);
page_table_allocator_buffer_size = n;
}
// return a newly allocated zeroed page
static void* alloc_page_table(void) {
if(! page_table_allocator_buffer_size) {
if(!alloc_page_table_realloc)
panic(
"alloc_page_table(): out of buffered pages, unable to allocate "
"page tables"
);
physalloc(16, 0, page_table_allocator_callback);
page_table_allocator_buffer_size = 16;
for(int i = 0; i < 16; i++)
zero_page_table_page(page_table_allocator_buffer[i]);
}
return page_table_allocator_buffer[--page_table_allocator_buffer_size];
}
static void* get_entry_or_allocate(void** restrict table, unsigned index) {
assert(index < 512);
void** virtual_addr_table = translate_address(table);
void* entry = virtual_addr_table[index];
if(!present_entry(entry)) {
void* e = create_table_entry(
alloc_page_table(),
PRESENT_ENTRY | PL_US | PL_RW);
return virtual_addr_table[index] = e;
}
else
return entry;
}
// kernel panic if the entrty is not present
static void* get_entry_or_panic(void** restrict table, unsigned index) {
assert(index < 512);
void** virtual_addr_table = translate_address(table);
void* entry = virtual_addr_table[index];
assert(present_entry(entry));
return entry;
}
/**
* this function cannot be called in a callback
* because it would lead to recursion.
* therefore we put it static internal
* and wrap it in a fresh public function
*/
static void internal_map_pages(uint64_t physical_addr,
uint64_t virtual_addr,
size_t count,
uint64_t flags) {
while(count > 0) {
// fetch table indexes
unsigned pml4i = pml4_offset(virtual_addr),
pdpti = pdpt_offset(virtual_addr),
pdi = pd_offset(virtual_addr),
pti = pt_offset(virtual_addr);
assert(pml4i == 0 || pml4i == 511 || pml4i == 256);
// those entries should exist
pml4e restrict pml4entry = extract_pointer(get_entry_or_allocate((void**)pml4, pml4i));
pdpte restrict pdptentry = extract_pointer(get_entry_or_allocate((void**)pml4entry, pdpti));
pde restrict pdentry = extract_pointer(get_entry_or_allocate((void**)pdptentry, pdi));
while(count > 0 && pti < 512) {
// create a new entry
void* e = create_table_entry((void*)physical_addr,flags);
void** entry_ptr = (void**)translate_address(pdentry) + pti;
if(present_entry(*entry_ptr)) {
char buff[256];
sprintf(buff,
"internal_map_pages(...,flags=%lu):\n"
" tried to map physical memory 0x%lx to 0x%lx, but physical memory 0x%lx"
" was already mapped here",
flags, physical_addr, virtual_addr, extract_pointer(*entry_ptr));
panic(buff);
}
*entry_ptr = e;
pti++;
count--;
physical_addr += 0x1000;
virtual_addr += 0x1000;
}
}
}
void remap_pages(void* vaddr_ptr,
size_t count,
uint64_t flags) {
uint64_t virtual_addr = (uint64_t)vaddr_ptr;
while(count > 0) {
// fetch table indexes
unsigned pml4i = pml4_offset(virtual_addr),
pdpti = pdpt_offset(virtual_addr),
pdi = pd_offset(virtual_addr),
pti = pt_offset(virtual_addr);
assert(pml4i == 0 || pml4i == 511 || pml4i == 256);
// those entries should exist
pml4e restrict pml4entry = extract_pointer(get_entry_or_allocate((void**)pml4, pml4i));
pdpte restrict pdptentry = extract_pointer(get_entry_or_allocate((void**)pml4entry, pdpti));
pde restrict pdentry = extract_pointer(get_entry_or_allocate((void**)pdptentry, pdi));
while(count > 0 && pti < 512) {
// create a new entry
void** entry_ptr = (void**)translate_address(pdentry) + pti;
if(!present_entry(*entry_ptr)) {
char buff[256];
sprintf(buff,
"remap_pages(...,flags=%lu):\n"
" tried to remap virtual memory 0x%lx, nothing was mapped there",
flags, virtual_addr
);
panic(buff);
}
// extract physical page and remake an entry with
// the given flags
void* e = create_table_entry(0,flags);
*entry_ptr = e;
pti++;
count--;
virtual_addr += 0x1000;
}
}
}
void alloc_pages(void* virtual_addr_begin,
size_t count,
uint64_t flags) {
// don't allow recusion
alloc_page_table_realloc = 0;
void callback(
uint64_t physical_address,
uint64_t virtual_address,
size_t c) {
internal_map_pages(physical_address,
virtual_address,
c,
flags);
};
while(count > 0) {
unsigned size = count;
if(size > MAX_ALLOC)
size = MAX_ALLOC;
fill_page_table_allocator_buffer(16);
physalloc(size, virtual_addr_begin, callback);
count -= size;
virtual_addr_begin += size * 0x1000;
}
}
void map_pages(uint64_t physical_addr,
uint64_t virtual_addr,
size_t count,
uint64_t flags) {
while(count > 64) {
fill_page_table_allocator_buffer(64);
internal_map_pages(physical_addr, virtual_addr, 64, flags);
count -= 64;
physical_addr += 0x1000 * 64;
virtual_addr += 0x1000 * 64;
}
// count <= 64
fill_page_table_allocator_buffer(64);
internal_map_pages(physical_addr, virtual_addr, count, flags);
}
// return 1 iif the no entry is present in the range
static inline int is_table_empty(pte* page_table, unsigned begin, unsigned end) {
assert(begin <= end);
assert(end <= 512);
pte* translated = translate_address(page_table);
for(unsigned i = begin; i < end; i++) {
if(present_entry(translated[i]))
return 0;
}
return 1;
}
static pte* get_page_table_or_panic(uint64_t vaddr) {
unsigned pml4i = pml4_offset(vaddr),
pdpti = pdpt_offset(vaddr),
pdi = pd_offset(vaddr);
assert(pml4i == 0 || pml4i == 511 || pml4i == 256);
// those entries should exist
pml4e restrict pml4entry = extract_pointer(
get_entry_or_panic((void**)pml4, pml4i));
pdpte restrict pdptentry = extract_pointer(
get_entry_or_panic((void**)pml4entry, pdpti));
return extract_pointer(get_entry_or_panic((void**)pdptentry, pdi));
}
void unmap_pages(uint64_t virtual_addr, size_t count) {
while(count > 0) {
// fetch table indexes
void** pt = translate_address(
get_page_table_or_panic(virtual_addr));
// unmap multiple pages in the page table
// without recalculating it
unsigned pti = pt_offset(virtual_addr);
// keep track of the first & last element
// to unmap, we are sure that in this range
// everything is unmapped
unsigned begin = pti;
while(count > 0 && pti < 512) {
void** entry_ptr = pt + pti;
if(!present_entry(*entry_ptr)) {
char buff[256];
sprintf(buff,
"unmap_pages(...):\n"
" tried to unmap not mapped virtual memory 0x%lx",
virtual_addr);
panic(buff);
}
// actually erase the entry
*entry_ptr = 0;
pti++;
count--;
virtual_addr += 0x1000;
}
(void) begin;
/** @TODO unmap pages if empty
*
*/
/*
unsigned end = pti;
// unmap the page map if empty
if(is_table_empty(pdentry, 0, begin) &&
is_table_empty(pdentry, end, 512))
{
// the page table contains no entry
// let's free it
// physfree(pdentry);
}
*/
}
}
uint64_t get_paddr(const void* vaddr) {
unsigned pti = pt_offset((uint64_t)vaddr);
pte entry = get_page_table_or_panic((uint64_t)vaddr)[pti];
assert(present_entry(entry));
uint64_t page_paddr = (uint64_t)extract_pointer(entry);
return page_paddr | ((uint64_t)vaddr & 0x0fff);
}