use crate::node::Node; #[derive(Debug)] pub struct Parser<'a> { content: &'a str, position: usize, } impl<'a> Parser<'a> { // Content should be normalized before being passed into this function. Since we do not take // ownership here, we cannot mutate it. pub fn new(content: &'a str) -> Self { Self { content, position: 0, } } pub fn parse_document(&mut self) -> Node { let mut root = Node::Document { children: vec![] }; while !self.is_eof() { let block = self.parse_block(); if !block.is_empty() { root.add_child(block); } } root } // BUG: We should be using optional, not blank nodes fn parse_block(&mut self) -> Node { self.consume_whitespace(); // If we are at the end, return an empty node, the caller should know to return it. if self.is_eof() { return Node::Text { content: "".into() }; } let c1 = self.peek(); let c2 = self.peek_nth(1); let c3 = self.peek_nth(2); // TODO: For now we are just implementing paragraphs. So we can start with inline parsing match (c1, c2, c3) { // (Some('#'), _, _) => self.parse_heading(), _ => self.parse_paragraph(), } } fn parse_heading(&mut self) -> Node { Node::Heading { level: 1, children: vec![], } } fn parse_paragraph(&mut self) -> Node { Node::Paragraph { children: self.parse_inline(), } } // --- INLINE PARSING --- fn parse_inline(&mut self) -> Vec { let mut nodes = vec![]; let mut str = "".to_string(); while !self.is_eof() { // c1 stores current char, c2/c3 store future, contextual chars let c1 = self.peek(); let c2 = self.peek_nth(1); let c3 = self.peek_nth(2); // TODO: Need to redesign the nodes // TODO: Support _ AND * match (c1, c2, c3) { (None, _, _) | (Some('\n'), Some('\n'), _) => break, (Some('!'), Some('['), _) => /* parse image */ { continue; } (Some('['), _, _) => /* parse link */ { continue; } (Some('*'), Some('*'), Some('*')) => /* parse bold italic */ { continue; } (Some('*'), Some('*'), _) => /* parse bold */ { continue; } (Some('*'), _, _) => { nodes.push(Node::Text { content: str.clone(), }); str = "".to_string(); let node = self.parse_italic(); if !node.is_empty() { nodes.push(node); } continue; } (Some('`'), _, _) => /* parse code */ { continue; } (Some(c), _, _) => { str.push(c); self.consume(); } } } // TODO: Push text node nodes.push(Node::Text { content: str }); nodes } fn parse_italic(&mut self) -> Node { let mut str = "".to_string(); self.consume(); // Consume the '*' println!("'{}'", self.content); // Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to // exit loop { let c1 = self.peek(); let c2 = self.peek_nth(1); match (c1, c2) { (None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => { // In this case, we did not find an ending star, so we should return a normal // node. But we have to add the star back since we consumed it already str.insert(0, '*'); return Node::Text { content: str }; } (Some('*'), _) => { self.consume(); break; } (Some(c), _) => { str.push(c); self.consume(); } } } Node::Italic { children: vec![Node::Text { content: str }], } } // --- HELPERS --- fn is_eof(&self) -> bool { self.position >= self.content.len() } fn peek(&self) -> Option { self.peek_nth(0) } fn peek_nth(&self, n: usize) -> Option { self.content[self.position..].chars().nth(n) } fn consume(&mut self) { self.consume_n(1) } fn consume_n(&mut self, n: usize) { for _ in 0..n { if let Some(c) = self.content[self.position..].chars().next() { self.position += c.len_utf8(); } else { break; } } } fn consume_whitespace(&mut self) { while let Some(c) = self.peek() { if !c.is_whitespace() { break; } self.consume(); } } } #[cfg(test)] mod parser_tests { use super::Parser; #[test] fn test_parse_italic() { // This test only tests the `parse_italic` method, so it is expected that the first // character is a '*', otherwise the first character will be consumed. The `parse_inline` // tests will be able to handle more specific cases. { let s = "*hello world*"; let html = "hello world"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } { let s = "*hello* world"; let html = "hello"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } { let s = "*hello world"; let html = "*hello world"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } { let s = "*hello world\n"; let html = "*hello world"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } { let s = "*hello world\n\n"; let html = "*hello world"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } { let s = "*hello\n\nworld*"; let html = "*hello"; let mut p = Parser::new(s); let node = p.parse_italic(); assert_eq!(node.to_html(), html); } } }