diff --git a/lib/node.rs b/lib/node.rs index 212fb33..bc989e0 100644 --- a/lib/node.rs +++ b/lib/node.rs @@ -2,7 +2,7 @@ pub enum Node { // Structure Nodes Document { children: Vec }, - Heading { level: u8, children: Vec }, + Heading { level: usize, children: Vec }, Paragraph { children: Vec }, List { ordered: bool, children: Vec }, ListItem { children: Vec }, diff --git a/lib/parser.rs b/lib/parser.rs index 4b5fad2..ed93049 100644 --- a/lib/parser.rs +++ b/lib/parser.rs @@ -1,8 +1,11 @@ +use std::iter::Peekable; +use std::str::Chars; + use crate::node::Node; #[derive(Debug)] pub struct Parser { - content: String + content: String, } impl Parser { @@ -11,16 +14,199 @@ impl Parser { /// support operation on all operating systems. pub fn new(content: &str) -> Self { let normalized = content.replace("\r\n", "\n").replace("\r", ""); - Self { content: normalized } + Self { + content: normalized, + } } pub fn parse_document(&self) -> Node { - let chars = self.content.chars().peekable(); + let mut stream = self.content.chars().peekable(); - Node::Document { children: vec![ - Node::Paragraph { children: vec![ - Node::Text { content: chars.collect() }, - ]} - ]} + let mut root = Node::Document { children: vec![] }; + + // Same as !IsEOF from the CPP implementation + while stream.peek().is_some() { + let block = self.parse_block(&mut stream); + if !block.is_empty() { + root.add_child(block); + } + } + + root + } + + /// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT + fn parse_block(&self, stream: &mut Peekable>) -> Node { + self.consume_whitespace(stream); + + // If we are at the end, return an empty node; it will be ignored + let Some(&c1) = stream.peek() else { + return Node::Text { content: "".into() }; + }; + + // Use a clone to look ahead + let mut clone = stream.clone(); + clone.next(); + + let c2 = clone.next(); + let c3 = clone.next(); + + // Now we can handle numbers from 0 to 99 for ordered lists + match (c1, c2, c3) { + ('#', _, _) => self.parse_heading(stream), + ('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false), + (d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true), + (d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => { + self.parse_list(stream, true) + } + ('`', Some('`'), Some('`')) => self.parse_code_block(stream), + ('!', Some('['), _) => self.parse_image(stream), + ('>', _, _) => self.parse_block_quote(stream), + _ => self.parse_paragraph(stream), + } + } + + // --- STRUCTURE PARSING --- + fn parse_paragraph(&self, stream: &mut Peekable>) -> Node { + let children = self.parse_inline(stream); + Node::Paragraph { children } + } + + fn parse_heading(&self, stream: &mut Peekable>) -> Node { + // Consume the hashes to determine the size, then consume the whitespace + let hashes = self.consume_until_char(stream, ' '); + self.consume_whitespace(stream); + + let children = self.parse_inline(stream); + dbg!(&children); + Node::Heading { + level: hashes.len(), + children, + } + } + + fn parse_list(&self, stream: &mut Peekable>, ordered: bool) -> Node { + Node::Text { content: "".into() } + } + + fn parse_block_quote(&self, stream: &mut Peekable>) -> Node { + Node::Text { content: "".into() } + } + + fn parse_code_block(&self, stream: &mut Peekable>) -> Node { + Node::Text { content: "".into() } + } + + fn parse_image(&self, stream: &mut Peekable>) -> Node { + Node::Text { content: "".into() } + } + + // --- INLINE PARSING --- + fn parse_inline(&self, stream: &mut Peekable>) -> Vec { + let mut nodes = vec![]; + let mut str = String::new(); + + // use a clone to allow for peeking ahead + // REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED + let mut clone = stream.clone(); + clone.next(); // Stay one ahead + + while let Some(&c1) = stream.peek() { + let c2 = clone.next(); + let c3 = clone.next(); + + // println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~')); + + match (c1, c2, c3) { + ('\n', _, _) => break, + ('!', Some('['), _) => { /* Image */ } + ('[', _, _) => { /* Link */ } + ('*', Some('*'), Some('*')) => { /* Bold Italic */ } + ('*', Some('*'), _) => { + nodes.push(Node::Text { content: str }); + str = "".into(); + let node = self.parse_bold(stream); + if !node.is_empty() { + nodes.push(node) + } + continue; + } + ('*', _, _) => { /* Italic */ } + ('`', _, _) => { /* Code */ } + + _ => { + // Should we swap '\n' with ' ' + str.push(c1); + stream.next(); + clone.next(); + } + } + } + + // Push final node + if !str.is_empty() { + nodes.push(Node::Text { content: str }); + } + nodes + } + + /// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR + /// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT + /// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE + /// STRONG BLOCK. + fn parse_bold(&self, stream: &mut Peekable>) -> Node { + let mut str = String::new(); + + stream.next(); + stream.next(); + + let mut clone = stream.clone(); + clone.next(); + + while let Some(&c1) = stream.peek() { + let c2 = clone.peek(); + + match (c1, c2) { + ('\n', Some('\n')) => break, + ('*', Some('*')) => { + stream.next(); + stream.next(); + break; + } + _ => str.push(c1), + } + stream.next(); + clone.next(); + } + + println!("@str '{}'", str); + + Node::Bold { content: str } + } + + // --- HELPER FUNCTIONS --- + fn consume_whitespace(&self, stream: &mut Peekable>) { + while let Some(&c) = stream.peek() { + if !c.is_whitespace() { + break; + } + stream.next(); + } + } + + /// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL + /// ITSELF + fn consume_until_char(&self, stream: &mut Peekable>, target: char) -> String { + let mut out = String::new(); + + while let Some(&c) = stream.peek() { + if c == target { + break; + } + stream.next(); + out.push(c); + } + + out } } diff --git a/src/main.rs b/src/main.rs index e153b78..c63ad3b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use transpiler::filesystem::Filesystem; use transpiler::parser::Parser; pub fn main() -> Result<(), Box> { - let file = Filesystem::read_file("./journal.md"); + let file = Filesystem::read_file("./test.md"); let content; match file { Ok(s) => content = s, diff --git a/test.md b/test.md new file mode 100644 index 0000000..06032f7 --- /dev/null +++ b/test.md @@ -0,0 +1 @@ +# h1, **this** is sick as fuck