diff --git a/fixtures/complex.html b/fixtures/complex.html new file mode 100644 index 0000000..81afbb8 --- /dev/null +++ b/fixtures/complex.html @@ -0,0 +1,13 @@ + + + Document + + + +

Hello

+ + diff --git a/src/css.rs b/src/css.rs new file mode 100644 index 0000000..6e5ac22 --- /dev/null +++ b/src/css.rs @@ -0,0 +1,322 @@ +use core::fmt; + +struct Parser { + pos: usize, + input: String, +} + +impl Parser { + fn eof(&self) -> bool { + self.pos >= self.input.len() + } + + fn consume_while(&mut self, test: F) -> String + where + F: Fn(char) -> bool, + { + let mut result = String::new(); + while !self.eof() && test(self.next_char()) { + result.push(self.consume_char()); + } + return result; + } + + fn next_char(&self) -> char { + self.input[self.pos..].chars().next().unwrap() + } + + fn consume_char(&mut self) -> char { + let mut iter = self.input[self.pos..].char_indices(); + let (_, cur_char) = iter.next().unwrap(); + let (next_pos, _) = iter.next().unwrap_or((1, ' ')); + self.pos += next_pos; + return cur_char; + } + + fn consume_whitespace(&mut self) { + self.consume_while(|c| c.is_whitespace()); + } + + fn parse_single_selector(&mut self) -> SingleSelector { + let mut selector = SingleSelector::default(); + while !self.eof() { + self.consume_whitespace(); + match self.next_char() { + '#' => { + self.consume_char(); + assert!(self.next_char().is_ascii_alphanumeric()); + selector.id = Some(self.parse_identifier()); + } + '.' => { + self.consume_char(); + assert!(self.next_char().is_ascii_alphanumeric()); + selector.classes.push(self.parse_identifier()); + } + '*' => { + self.consume_char(); + assert!(self.next_char().is_ascii_whitespace()); + } + '{' => { + break; + } + c => { + if c.is_ascii_alphanumeric() == false { + break; + } + selector.tag_name = Some(self.parse_identifier()); + } + } + } + return selector; + } + + fn parse_identifier(&mut self) -> String { + self.consume_while(|c| (c.is_ascii_alphanumeric() || c == '-')) + } + + fn parse_rule(&mut self) -> Rule { + Rule { + selectors: self.parse_selectors(), + declarations: self.parse_declarations(), + } + } + + fn parse_selectors(&mut self) -> Vec { + let mut selectors = Vec::new(); + loop { + selectors.push(Selector::Single(self.parse_single_selector())); + self.consume_whitespace(); + match self.next_char() { + ',' => { + self.consume_char(); + self.consume_whitespace(); + } + '{' => break, + c => panic!("Unexpected character {} in selector list", c), + } + } + return selectors; + } + + fn parse_declarations(&mut self) -> Vec { + assert!(self.consume_char() == '{'); + let mut result = Vec::new(); + while !self.eof() { + self.consume_whitespace(); + if self.next_char() == '}' { + self.consume_char(); + break; + } + let identifier = self.parse_identifier(); + self.consume_whitespace(); + assert!(self.consume_char() == ':'); + self.consume_whitespace(); + let value = self.parse_declaration_value(); + result.push(Declaration { + name: identifier, + value: value, + }); + self.consume_whitespace(); + assert!(self.consume_char() == ';'); + } + return result; + } + + fn parse_declaration_value(&mut self) -> Value { + match self.next_char() { + '0'..='9' => self.parse_length(), + '#' => self.parse_color(), + _ => Value::Keyword(self.parse_identifier()), + } + } + + fn parse_hex_pair(&mut self) -> u8 { + let s = &self.input[self.pos..self.pos + 2]; + self.pos += 2; + u8::from_str_radix(s, 16).unwrap() + } + + fn parse_color(&mut self) -> Value { + assert_eq!(self.consume_char(), '#'); + Value::Color(ColorValue::RGBA( + self.parse_hex_pair(), + self.parse_hex_pair(), + self.parse_hex_pair(), + 255, + )) + } + + fn parse_length(&mut self) -> Value { + Value::Length(self.parse_float(), self.parse_unit()) + } + + fn parse_float(&mut self) -> f32 { + let s = self.consume_while(|c| match c { + '0'..='9' | '.' => true, + _ => false, + }); + s.parse().unwrap() + } + + fn parse_unit(&mut self) -> Unit { + match &*self.parse_identifier().to_ascii_lowercase() { + "px" => Unit::Px, + _ => panic!("unrecognized unit"), + } + } + + fn parse_rules(&mut self) -> Vec { + let mut rules = Vec::new(); + while !self.eof() { + rules.push(self.parse_rule()); + self.consume_whitespace(); + } + rules + } +} + +pub fn parse(input: String) -> StyleSheet { + let mut parser = Parser { + pos: 0, + input: input, + }; + StyleSheet { + rules: parser.parse_rules(), + } +} + +#[derive(Default, Debug, Clone)] +struct SingleSelector { + tag_name: Option, + id: Option, + classes: Vec, +} + +#[derive(Debug, Clone)] +enum Selector { + Single(SingleSelector), +} + +impl fmt::Display for Selector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + Self::Single(selector) => { + if let Some(tag_name) = &selector.tag_name { + write!(f, "{}", tag_name).unwrap(); + } + if selector.classes.len() > 0 { + write!(f, ".{}", selector.classes.join(".")).unwrap(); + } + if let Some(id) = &selector.id { + write!(f, "#{}", id).unwrap(); + } + Ok(()) + } + } + } +} + +#[derive(Debug, Clone)] +enum Value { + Keyword(String), + Length(f32, Unit), + Color(ColorValue), +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + Self::Keyword(keyword) => { + write!(f, "{}", keyword) + } + Self::Color(color) => { + write!(f, "{}", color) + } + Self::Length(amount, unit) => { + write!(f, "{}{}", amount, unit) + } + } + } +} + +#[derive(Debug, Clone)] +enum ColorValue { + RGBA(u8, u8, u8, u8), +} + +impl fmt::Display for ColorValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + Self::RGBA(r, g, b, a) => write!(f, "rgba({}, {}, {}, {})", r, g, b, a), + } + } +} + +#[derive(Debug, Clone)] +enum Unit { + Px, +} + +impl fmt::Display for Unit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + Self::Px => write!(f, "px"), + } + } +} + +#[derive(Debug, Clone)] +struct Declaration { + name: String, + value: Value, +} + +impl fmt::Display for Declaration { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {};", self.name, self.value) + } +} + +#[derive(Debug, Clone)] +struct Rule { + selectors: Vec, + declarations: Vec, +} + +impl fmt::Display for Rule { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let prepadding = " "; + let selectors = self + .selectors + .iter() + .map(|d| format!("{d}")) + .collect::>(); + writeln!(f, "{} {{", selectors.join(", ")).unwrap(); + let declarations = self + .declarations + .iter() + .map(|d| format!("{d}")) + .collect::>(); + writeln!(f, "{}{}", prepadding, declarations.join("")).unwrap(); + writeln!(f, "}}") + } +} + +#[derive(Debug, Clone)] +pub struct StyleSheet { + rules: Vec, +} + +impl fmt::Display for StyleSheet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.rules.len() > 0 { + let rules = self + .rules + .iter() + .map(|r| format!("{r}")) + .collect::>(); + writeln!(f, "{}", rules.join("\n")).unwrap() + } + Ok(()) + } +} diff --git a/src/dom.rs b/src/dom.rs index 233dcdf..b118801 100644 --- a/src/dom.rs +++ b/src/dom.rs @@ -1,13 +1,18 @@ use core::fmt; use std::collections::HashMap; -#[derive(Debug)] +use crate::{ + css::{self, StyleSheet}, + html, +}; + +#[derive(Debug, Clone)] pub enum AttrValue { Text(String), Implicit, } -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct AttrMap(pub HashMap); impl fmt::Display for AttrMap { @@ -24,23 +29,24 @@ impl fmt::Display for AttrMap { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ElementData { tag_name: String, attributes: AttrMap, child_nodes: Vec, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum NodeType { ElementNode(ElementData), TextNode(String), CommentNode(String), + DocumentNode(DocumentData), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Node { - node_type: NodeType, + pub node_type: NodeType, } impl Node { @@ -72,6 +78,7 @@ impl Node { writeln!(f, "{}{}", prepadding, text).unwrap(); } NodeType::CommentNode(text) => writeln!(f, "{}", prepadding, text).unwrap(), + NodeType::DocumentNode(_) => {} } } } @@ -83,6 +90,30 @@ impl fmt::Display for Node { } } +#[derive(Debug, Clone)] +pub struct DocumentData { + pub root: Box>, + pub stylesheets: Vec, +} + +impl DocumentData { + pub fn load_css(&mut self, styling: String) { + self.stylesheets.push(css::parse(styling)); + } + + pub fn load_document(&mut self, document: String) { + let node = html::parse(document, self); + _ = self.root.insert(node); + } + + pub fn new() -> Self { + Self { + root: Box::new(None), + stylesheets: vec![], + } + } +} + pub fn text(data: String) -> Node { Node { node_type: NodeType::TextNode(data), @@ -104,3 +135,11 @@ pub fn comment(text: String) -> Node { node_type: NodeType::CommentNode(text), } } + +pub fn parse(document: String) -> Node { + let mut context = DocumentData::new(); + context.load_document(document); + Node { + node_type: NodeType::DocumentNode(context), + } +} diff --git a/src/html.rs b/src/html.rs index 88d7266..e219798 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,14 +1,15 @@ #![allow(dead_code)] use std::collections::HashMap; -use crate::dom::{comment, element, text, AttrMap, AttrValue, Node}; +use crate::dom::{comment, element, text, AttrMap, AttrValue, DocumentData, Node, NodeType}; -struct Parser { +struct Parser<'a> { pos: usize, input: String, + context: &'a mut DocumentData, } -impl Parser { +impl Parser<'_> { fn next_char(&self) -> char { self.input[self.pos..].chars().next().unwrap() } @@ -80,6 +81,13 @@ impl Parser { // Contents. let children = self.parse_nodes(); + if tag_name == "style" { + let inner_node = children.first().unwrap(); + if let NodeType::TextNode(styling) = &inner_node.node_type { + self.context.load_css(styling.clone()); + } + } + // Closing tag. assert!(self.consume_char() == '<'); assert!(self.consume_char() == '/'); @@ -159,10 +167,11 @@ impl Parser { } } -pub fn parse(source: String) -> Node { +pub fn parse(source: String, context: &mut DocumentData) -> Node { let mut parser = Parser { pos: 0, input: source, + context: context, }; let mut nodes = parser.parse_nodes(); diff --git a/src/main.rs b/src/main.rs index 305f971..c207624 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,335 +1,19 @@ -use core::fmt; use std::io; +use dom::NodeType; + +mod css; mod dom; mod html; -struct Parser { - pos: usize, - input: String, -} - -impl Parser { - fn eof(&self) -> bool { - self.pos >= self.input.len() - } - - fn consume_while(&mut self, test: F) -> String - where - F: Fn(char) -> bool, - { - let mut result = String::new(); - while !self.eof() && test(self.next_char()) { - result.push(self.consume_char()); - } - return result; - } - - fn next_char(&self) -> char { - self.input[self.pos..].chars().next().unwrap() - } - - fn consume_char(&mut self) -> char { - let mut iter = self.input[self.pos..].char_indices(); - let (_, cur_char) = iter.next().unwrap(); - let (next_pos, _) = iter.next().unwrap_or((1, ' ')); - self.pos += next_pos; - return cur_char; - } - - fn consume_whitespace(&mut self) { - self.consume_while(|c| c.is_whitespace()); - } - - fn parse_single_selector(&mut self) -> SingleSelector { - let mut selector = SingleSelector::default(); - while !self.eof() { - self.consume_whitespace(); - match self.next_char() { - '#' => { - self.consume_char(); - assert!(self.next_char().is_ascii_alphanumeric()); - selector.id = Some(self.parse_identifier()); - } - '.' => { - self.consume_char(); - assert!(self.next_char().is_ascii_alphanumeric()); - selector.classes.push(self.parse_identifier()); - } - '*' => { - self.consume_char(); - assert!(self.next_char().is_ascii_whitespace()); - } - '{' => { - break; - } - c => { - if c.is_ascii_alphanumeric() == false { - break; - } - selector.tag_name = Some(self.parse_identifier()); - } - } - } - return selector; - } - - fn parse_identifier(&mut self) -> String { - self.consume_while(|c| (c.is_ascii_alphanumeric() || c == '-')) - } - - fn parse_rule(&mut self) -> Rule { - Rule { - selectors: self.parse_selectors(), - declarations: self.parse_declarations(), - } - } - - fn parse_selectors(&mut self) -> Vec { - let mut selectors = Vec::new(); - loop { - selectors.push(Selector::Single(self.parse_single_selector())); - self.consume_whitespace(); - match self.next_char() { - ',' => { - self.consume_char(); - self.consume_whitespace(); - } - '{' => break, - c => panic!("Unexpected character {} in selector list", c), - } - } - return selectors; - } - - fn parse_declarations(&mut self) -> Vec { - assert!(self.consume_char() == '{'); - let mut result = Vec::new(); - while !self.eof() { - self.consume_whitespace(); - if self.next_char() == '}' { - self.consume_char(); - break; - } - let identifier = self.parse_identifier(); - self.consume_whitespace(); - assert!(self.consume_char() == ':'); - self.consume_whitespace(); - let value = self.parse_declaration_value(); - result.push(Declaration { - name: identifier, - value: value, - }); - self.consume_whitespace(); - assert!(self.consume_char() == ';'); - } - return result; - } - - fn parse_declaration_value(&mut self) -> Value { - match self.next_char() { - '0'..='9' => self.parse_length(), - '#' => self.parse_color(), - _ => Value::Keyword(self.parse_identifier()), - } - } - - fn parse_hex_pair(&mut self) -> u8 { - let s = &self.input[self.pos..self.pos + 2]; - self.pos += 2; - u8::from_str_radix(s, 16).unwrap() - } - - fn parse_color(&mut self) -> Value { - assert_eq!(self.consume_char(), '#'); - Value::Color(ColorValue::RGBA( - self.parse_hex_pair(), - self.parse_hex_pair(), - self.parse_hex_pair(), - 255, - )) - } - - fn parse_length(&mut self) -> Value { - Value::Length(self.parse_float(), self.parse_unit()) - } - - fn parse_float(&mut self) -> f32 { - let s = self.consume_while(|c| match c { - '0'..='9' | '.' => true, - _ => false, - }); - s.parse().unwrap() - } - - fn parse_unit(&mut self) -> Unit { - match &*self.parse_identifier().to_ascii_lowercase() { - "px" => Unit::Px, - _ => panic!("unrecognized unit"), - } - } - - fn parse_rules(&mut self) -> Vec { - let mut rules = Vec::new(); - while !self.eof() { - rules.push(self.parse_rule()); - self.consume_whitespace(); - } - rules - } -} - -fn parse(input: String) -> StyleSheet { - let mut parser = Parser { - pos: 0, - input: input, - }; - StyleSheet { - rules: parser.parse_rules(), - } -} - -#[derive(Default, Debug)] -struct SingleSelector { - tag_name: Option, - id: Option, - classes: Vec, -} - -#[derive(Debug)] -enum Selector { - Single(SingleSelector), -} - -impl fmt::Display for Selector { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self { - Self::Single(selector) => { - if let Some(tag_name) = &selector.tag_name { - write!(f, "{}", tag_name).unwrap(); - } - if selector.classes.len() > 0 { - write!(f, ".{}", selector.classes.join(".")).unwrap(); - } - if let Some(id) = &selector.id { - write!(f, "#{}", id).unwrap(); - } - Ok(()) - } - } - } -} - -#[derive(Debug)] -enum Value { - Keyword(String), - Length(f32, Unit), - Color(ColorValue), -} - -impl fmt::Display for Value { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self { - Self::Keyword(keyword) => { - write!(f, "{}", keyword) - } - Self::Color(color) => { - write!(f, "{}", color) - } - Self::Length(amount, unit) => { - write!(f, "{}{}", amount, unit) - } - } - } -} - -#[derive(Debug)] -enum ColorValue { - RGBA(u8, u8, u8, u8), -} - -impl fmt::Display for ColorValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self { - Self::RGBA(r, g, b, a) => write!(f, "rgba({}, {}, {}, {})", r, g, b, a), - } - } -} - -#[derive(Debug)] -enum Unit { - Px, -} - -impl fmt::Display for Unit { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self { - Self::Px => write!(f, "px"), - } - } -} - -#[derive(Debug)] -struct Declaration { - name: String, - value: Value, -} - -impl fmt::Display for Declaration { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}: {};", self.name, self.value) - } -} - -#[derive(Debug)] -struct Rule { - selectors: Vec, - declarations: Vec, -} - -impl fmt::Display for Rule { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let prepadding = " "; - let selectors = self - .selectors - .iter() - .map(|d| format!("{d}")) - .collect::>(); - writeln!(f, "{} {{", selectors.join(", ")).unwrap(); - let declarations = self - .declarations - .iter() - .map(|d| format!("{d}")) - .collect::>(); - writeln!(f, "{}{}", prepadding, declarations.join("")).unwrap(); - writeln!(f, "}}") - } -} - -#[derive(Debug)] -struct StyleSheet { - rules: Vec, -} - -impl fmt::Display for StyleSheet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.rules.len() > 0 { - let rules = self - .rules - .iter() - .map(|r| format!("{r}")) - .collect::>(); - writeln!(f, "{}", rules.join("\n")).unwrap() - } - Ok(()) - } -} - fn main() { let mut input = String::new(); let stdin = io::stdin(); stdin.read_line(&mut input).unwrap(); - // let nodes = html::parse(input); - let nodes = parse(input); - println!("{nodes}"); + // let input = "Document

Hello

".into(); + + let node = dom::parse(input); + if let NodeType::DocumentNode(data) = node.node_type { + println!("{}", data.stylesheets[0]); + } }