From 9eab0248c429ed9a85b72ab274baad3e04b39f63 Mon Sep 17 00:00:00 2001 From: niku Date: Sat, 12 Aug 2023 23:45:18 +0200 Subject: [PATCH] Initial commit --- .gitignore | 1 + .vscode/launch.json | 45 +++++++++++++++++++++ Cargo.lock | 72 ++++++++++++++++++++++++++++++++++ Cargo.toml | 8 ++++ LICENSE | 21 ++++++++++ README.md | 2 + src/layout.rs | 14 +++++++ src/lib.rs | 5 +++ src/main.rs | 72 ++++++++++++++++++++++++++++++++++ src/pager.rs | 85 ++++++++++++++++++++++++++++++++++++++++ src/row.rs | 71 +++++++++++++++++++++++++++++++++ src/statement.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++ src/table.rs | 38 ++++++++++++++++++ 13 files changed, 529 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 src/layout.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/pager.rs create mode 100644 src/row.rs create mode 100644 src/statement.rs create mode 100644 src/table.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..3a1b8d8 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,45 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'dbonk'", + "cargo": { + "args": [ + "build", + "--bin=dbonk", + "--package=dbonk" + ], + "filter": { + "name": "dbonk", + "kind": "bin" + } + }, + "args": ["debug.db"], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'dbonk'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=dbonk", + "--package=dbonk" + ], + "filter": { + "name": "dbonk", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f6a61d6 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,72 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" + +[[package]] +name = "dbonk" +version = "0.1.0" +dependencies = [ + "anyhow", + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1660708 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "dbonk" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.72" +thiserror = "1.0.44" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9a9a766 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 niku + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b317136 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# dbonk +Toy sqlite clone inspired by [Connor Stack's blog series](https://cstack.github.io/db_tutorial/). diff --git a/src/layout.rs b/src/layout.rs new file mode 100644 index 0000000..a8de611 --- /dev/null +++ b/src/layout.rs @@ -0,0 +1,14 @@ +pub const ID_SIZE: usize = 2; // u16 +pub const USERNAME_SIZE: usize = 32; +pub const EMAIL_SIZE: usize = 255; + +pub const ROW_SIZE: usize = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE; + +pub const ID_OFFSET: usize = 0; +pub const USERNAME_OFFSET: usize = ID_OFFSET + ID_SIZE; +pub const EMAIL_OFFSET: usize = USERNAME_OFFSET + USERNAME_SIZE; + +pub const TABLE_MAX_PAGES: usize = 100; +pub const PAGE_SIZE: usize = 4096; +pub const ROWS_PER_PAGE: usize = PAGE_SIZE / ROW_SIZE; +pub const TABLE_MAX_ROWS: usize = ROWS_PER_PAGE * TABLE_MAX_PAGES; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ae2874a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +pub mod layout; +pub mod pager; +pub mod row; +pub mod statement; +pub mod table; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..59542be --- /dev/null +++ b/src/main.rs @@ -0,0 +1,72 @@ +use anyhow::Result; +use dbonk::{pager::Pager, statement::*, table::Table}; +use std::{ + env, + io::{stdin, stdout, Write}, + process, +}; + +fn handle_meta_command(input: String, table: &mut Table) { + let mut parts = input.split_whitespace(); + let command = parts.next().expect("command not found"); + match command { + ".exit" => { + table.close(); + process::exit(0); + } + _ => println!("Unrecognized command '{}'", input), + } +} + +fn execute_statement(statement: Statement, table: &mut Table) -> Result<()> { + match statement.kind { + StatementType::Insert(row) => execute_insert(*row, table), + StatementType::Select => { + let rows = execute_select(table)?; + for row in rows { + println!("{:?}", row); + } + Ok(()) + } + } +} + +fn handle_statement(input: String, table: &mut Table) -> Result<()> { + match prepare_statement(input) { + Ok(statement) => execute_statement(statement, table), + Err(_) => Ok(()), + } +} + +fn read_input(input: &mut String) { + print!("sqlite> "); + stdout().flush().unwrap(); + input.clear(); + stdin().read_line(input).unwrap(); + input.pop(); // Pop the newline character. +} + +fn main() -> Result<()> { + let args: Vec = env::args().collect(); + if args.len() < 2 { + eprintln!("Must supply a database filename."); + process::exit(1); + } + + let pager = Pager::new(&args[1])?; + let mut table = Table::new(pager); + + let mut input = String::new(); + loop { + read_input(&mut input); + + if input.is_empty() { + continue; + } + + match input.starts_with('.') { + true => handle_meta_command(input.clone(), &mut table), + false => handle_statement(input.clone(), &mut table)?, + } + } +} diff --git a/src/pager.rs b/src/pager.rs new file mode 100644 index 0000000..3b9f5e0 --- /dev/null +++ b/src/pager.rs @@ -0,0 +1,85 @@ +use std::{ + fs::{File, OpenOptions}, + os::unix::prelude::FileExt, +}; + +use crate::layout::*; +use anyhow::Result; +use thiserror::Error; + +pub type Page = [u8; PAGE_SIZE]; + +#[derive(Error, Debug)] +enum PagerError { + #[error("Page {0} is out of bounds.")] + OutOfBounds(usize), + #[error("Read 0 bytes.")] + NoBytes, +} + +pub struct Pager { + file: File, + pub pages: [Option; TABLE_MAX_PAGES], +} + +impl Pager { + pub fn new(file_path: &str) -> Result { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(file_path)?; + + Ok(Self { + file, + pages: [None; TABLE_MAX_PAGES], + }) + } + + pub fn file_len(&self) -> usize { + let metadata = &self.file.metadata().expect("failed to parse metadata"); + metadata.len() as usize + } + + pub fn row_location(&self, row_num: usize) -> (usize, usize) { + let page_num = row_num / ROWS_PER_PAGE; + let row_offset = row_num % ROWS_PER_PAGE; + let byte_offset = row_offset * ROW_SIZE; + (page_num, byte_offset) + } + + pub fn page(&mut self, page_num: usize) -> Result<&mut Page> { + if page_num > TABLE_MAX_PAGES { + return Err(PagerError::OutOfBounds(page_num).into()); + } + + if self.pages[page_num].is_none() { + let file_len = self.file_len(); + let mut num_pages = file_len / PAGE_SIZE; + + if file_len % PAGE_SIZE != 0 { + num_pages += 1 + } + + if page_num <= num_pages { + let mut page: Page = [0; PAGE_SIZE]; + let offset: u64 = (page_num * PAGE_SIZE).try_into()?; + self.file.read_at(&mut page, offset)?; + self.pages[page_num] = Some(page); + } + } + + Ok(self.pages[page_num].as_mut().unwrap()) + } + + pub fn flush_page(&mut self, page_num: usize) -> Result<()> { + match self.pages[page_num] { + Some(page) => { + let offset: u64 = (page_num * PAGE_SIZE).try_into()?; + self.file.write_all_at(&page, offset)?; + Ok(()) + } + None => Err(PagerError::NoBytes.into()), + } + } +} diff --git a/src/row.rs b/src/row.rs new file mode 100644 index 0000000..e03a2b0 --- /dev/null +++ b/src/row.rs @@ -0,0 +1,71 @@ +use crate::layout::*; +use core::fmt::Debug; + +pub type RowBytes = [u8; ROW_SIZE]; + +pub struct Row { + id: u16, + username: [u8; 32], + email: [u8; 255], +} + +impl Row { + pub fn new(id: u16, username: &str, email: &str) -> Self { + let mut row = Self { + id, + ..Default::default() + }; + + row.username[..username.len()].copy_from_slice(username.as_bytes()); + row.email[..email.len()].copy_from_slice(email.as_bytes()); + + row + } +} + +impl Default for Row { + fn default() -> Self { + Self { + id: 0, + username: [0; 32], + email: [0; 255], + } + } +} + +impl From for Row { + fn from(value: RowBytes) -> Self { + let id = u16::from_be_bytes([value[0], value[1]]); + let username: [u8; USERNAME_SIZE] = + value[USERNAME_OFFSET..EMAIL_OFFSET].try_into().unwrap(); + let email: [u8; EMAIL_SIZE] = value[EMAIL_OFFSET..ROW_SIZE].try_into().unwrap(); + Self { + id, + username, + email, + } + } +} + +#[allow(clippy::from_over_into)] +impl Into for Row { + fn into(self) -> RowBytes { + let mut bytes = [0u8; ROW_SIZE]; + bytes[..USERNAME_OFFSET].copy_from_slice(&self.id.to_be_bytes()); + bytes[USERNAME_OFFSET..EMAIL_OFFSET].copy_from_slice(&self.username); + bytes[EMAIL_OFFSET..ROW_SIZE].copy_from_slice(&self.email); + bytes + } +} + +impl Debug for Row { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let username = std::str::from_utf8(&self.username).expect("invalid utf8"); + let email = std::str::from_utf8(&self.email).expect("invalid utf8"); + f.debug_struct("Row") + .field("id", &self.id) + .field("username", &username.trim_end_matches('\0')) + .field("email", &email.trim_end_matches('\0')) + .finish() + } +} diff --git a/src/statement.rs b/src/statement.rs new file mode 100644 index 0000000..2e10c78 --- /dev/null +++ b/src/statement.rs @@ -0,0 +1,95 @@ +use crate::{ + layout::{ROW_SIZE, TABLE_MAX_ROWS}, + row::{Row, RowBytes}, + table::Table, +}; +use anyhow::Result; +use thiserror::Error; + +type FilteredRows = Vec; + +#[derive(Debug)] +pub enum StatementType { + Insert(Box), + Select, +} + +#[derive(Debug)] +pub struct Statement { + pub kind: StatementType, +} + +impl Statement { + fn insert(row: Row) -> Self { + Self { + kind: StatementType::Insert(Box::new(row)), + } + } + + fn select() -> Self { + Self { + kind: StatementType::Select, + } + } +} + +#[derive(Error, Debug)] +enum PrepareError { + #[error("Unrecognized command: '{0}'")] + Unrecognized(String), + #[error("Invalid syntax")] + InvalidSyntax, +} + +#[derive(Error, Debug)] +enum ExecutionError { + #[error("Table is full")] + TableFull, +} + +pub fn prepare_statement(input: String) -> Result { + let mut parts = input.split_whitespace(); + match parts.next() { + Some("insert") => { + let args: Vec<&str> = parts.take(3).collect(); + if args.len() != 3 { + return Err(PrepareError::InvalidSyntax.into()); + } + let id: u16 = args[0].parse()?; + Ok(Statement::insert(Row::new(id, args[1], args[2]))) + } + Some("select") => Ok(Statement::select()), + Some(keyword) => { + let keyword = keyword.to_string(); + println!("Unrecognized keyword: '{}'", keyword); + Err(PrepareError::Unrecognized(keyword).into()) + } + None => panic!("Unreachable arm"), + } +} + +pub fn execute_insert(row: Row, table: &mut Table) -> Result<()> { + if table.row_count >= TABLE_MAX_ROWS { + return Err(ExecutionError::TableFull.into()); + } + + let bytes: RowBytes = row.into(); + let (page_num, offset) = table.row_slot(table.row_count); + let page = table.pager.page(page_num)?; + // let page = table.pager.pages[page_num].as_mut().unwrap(); + page[offset..offset + ROW_SIZE].copy_from_slice(&bytes); + table.row_count += 1; + Ok(()) +} + +pub fn execute_select(table: &mut Table) -> Result { + let mut rows: FilteredRows = vec![]; + for i in 0..table.row_count { + let (page_num, offset) = table.row_slot(i); + let page = table.pager.page(page_num)?; + let row: RowBytes = page[offset..offset + ROW_SIZE].try_into()?; + rows.push(row.into()); + } + + Ok(rows) +} diff --git a/src/table.rs b/src/table.rs new file mode 100644 index 0000000..856095a --- /dev/null +++ b/src/table.rs @@ -0,0 +1,38 @@ +use crate::{layout::*, pager::Pager}; + +pub struct Table { + pub row_count: usize, + pub pager: Pager, +} + +impl Table { + pub fn new(pager: Pager) -> Self { + let row_count = pager.file_len() / ROW_SIZE; + Self { row_count, pager } + } + + pub fn row_slot(&self, index: usize) -> (usize, usize) { + self.pager.row_location(index) + } + + pub fn close(&mut self) { + let total_pages = self.row_count / ROWS_PER_PAGE; + + for i in 0..total_pages { + let page = self.pager.pages[i]; + if page.is_none() { + continue; + } + + self.pager.flush_page(i).unwrap(); + } + + let added_rows = self.row_count % ROWS_PER_PAGE; + if added_rows > 0 { + let page_num = total_pages; + if self.pager.pages[page_num].is_some() { + self.pager.flush_page(page_num).unwrap(); + } + } + } +}