From 79633bd059175e3a20c9456c9d5254dd56b7da04 Mon Sep 17 00:00:00 2001
From: Hayden Hargreaves <hhargreaves2006@gmail.com>
Date: Thu, 27 Nov 2025 11:37:06 -0700
Subject: [PATCH] (FEAT): parse_italic implemented.

But now I have realized that the parser should work differently and with
mutual recursion. The "inline" nodes (except the text node) should all
have children.
---
 lib/node.rs   |   2 +
 lib/parser.rs | 328 ++++++++++++++++++++++++++++----------------------
 src/main.rs   |   7 +-
 test.md       |   8 +-
 4 files changed, 197 insertions(+), 148 deletions(-)

diff --git a/lib/node.rs b/lib/node.rs
index bc989e0..8300d75 100644
--- a/lib/node.rs
+++ b/lib/node.rs
@@ -12,6 +12,8 @@ pub enum Node {
 
     // Inline Nodes
     Text { content: String },
+
+    // TODO: THESE SHOULD BE STRUCTURE NODES, SO THEY CAN BE CALLED RECURSIVELY
     Bold { content: String },
     Italic { content: String },
     BoldItalic { content: String },
diff --git a/lib/parser.rs b/lib/parser.rs
index ed93049..0df320e 100644
--- a/lib/parser.rs
+++ b/lib/parser.rs
@@ -1,32 +1,26 @@
-use std::iter::Peekable;
-use std::str::Chars;
-
 use crate::node::Node;
 
 #[derive(Debug)]
-pub struct Parser {
-    content: String,
+pub struct Parser<'a> {
+    content: &'a str,
+    position: usize,
 }
 
-impl Parser {
-    /// Create a new parser object with the content attached. This does not take ownership of the
-    /// string provided and therefore dies with the string. The input string is normalized to
-    /// support operation on all operating systems.
-    pub fn new(content: &str) -> Self {
-        let normalized = content.replace("\r\n", "\n").replace("\r", "");
+impl<'a> Parser<'a> {
+    // Content should be normalized before being passed into this function. Since we do not take
+    // ownership here, we cannot mutate it.
+    pub fn new(content: &'a str) -> Self {
         Self {
-            content: normalized,
+            content,
+            position: 0,
         }
     }
 
-    pub fn parse_document(&self) -> Node {
-        let mut stream = self.content.chars().peekable();
-
+    pub fn parse_document(&mut self) -> Node {
         let mut root = Node::Document { children: vec![] };
 
-        // Same as !IsEOF from the CPP implementation
-        while stream.peek().is_some() {
-            let block = self.parse_block(&mut stream);
+        while !self.is_eof() {
+            let block = self.parse_block();
             if !block.is_empty() {
                 root.add_child(block);
             }
@@ -35,178 +29,222 @@ impl Parser {
         root
     }
 
-    /// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT
-    fn parse_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        self.consume_whitespace(stream);
+    // BUG: We should be using optional, not blank nodes
+    fn parse_block(&mut self) -> Node {
+        self.consume_whitespace();
 
-        // If we are at the end, return an empty node; it will be ignored
-        let Some(&c1) = stream.peek() else {
+        // If we are at the end, return an empty node, the caller should know to return it.
+        if self.is_eof() {
             return Node::Text { content: "".into() };
-        };
+        }
 
-        // Use a clone to look ahead
-        let mut clone = stream.clone();
-        clone.next();
+        let c1 = self.peek();
+        let c2 = self.peek_nth(1);
+        let c3 = self.peek_nth(2);
 
-        let c2 = clone.next();
-        let c3 = clone.next();
-
-        // Now we can handle numbers from 0 to 99 for ordered lists
+        // TODO: For now we are just implementing paragraphs. So we can start with inline parsing
         match (c1, c2, c3) {
-            ('#', _, _) => self.parse_heading(stream),
-            ('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false),
-            (d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true),
-            (d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => {
-                self.parse_list(stream, true)
-            }
-            ('`', Some('`'), Some('`')) => self.parse_code_block(stream),
-            ('!', Some('['), _) => self.parse_image(stream),
-            ('>', _, _) => self.parse_block_quote(stream),
-            _ => self.parse_paragraph(stream),
+            // (Some('#'), _, _) => self.parse_heading(),
+            _ => self.parse_paragraph(),
         }
     }
 
-    // --- STRUCTURE PARSING ---
-    fn parse_paragraph(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        let children = self.parse_inline(stream);
-        Node::Paragraph { children }
-    }
-
-    fn parse_heading(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        // Consume the hashes to determine the size, then consume the whitespace
-        let hashes = self.consume_until_char(stream, ' ');
-        self.consume_whitespace(stream);
-
-        let children = self.parse_inline(stream);
-        dbg!(&children);
+    fn parse_heading(&mut self) -> Node {
         Node::Heading {
-            level: hashes.len(),
-            children,
+            level: 1,
+            children: vec![],
         }
     }
 
-    fn parse_list(&self, stream: &mut Peekable<Chars<'_>>, ordered: bool) -> Node {
-        Node::Text { content: "".into() }
-    }
-
-    fn parse_block_quote(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        Node::Text { content: "".into() }
-    }
-
-    fn parse_code_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        Node::Text { content: "".into() }
-    }
-
-    fn parse_image(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        Node::Text { content: "".into() }
+    fn parse_paragraph(&mut self) -> Node {
+        Node::Paragraph {
+            children: self.parse_inline(),
+        }
     }
 
     // --- INLINE PARSING ---
-    fn parse_inline(&self, stream: &mut Peekable<Chars<'_>>) -> Vec<Node> {
+    fn parse_inline(&mut self) -> Vec<Node> {
         let mut nodes = vec![];
-        let mut str = String::new();
+        let mut str = "".to_string();
 
-        // use a clone to allow for peeking ahead
-        // REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED
-        let mut clone = stream.clone();
-        clone.next(); // Stay one ahead
-
-        while let Some(&c1) = stream.peek() {
-            let c2 = clone.next();
-            let c3 = clone.next();
-
-            // println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~'));
+        while !self.is_eof() {
+            // c1 stores current char, c2/c3 store future, contextual chars
+            let c1 = self.peek();
+            let c2 = self.peek_nth(1);
+            let c3 = self.peek_nth(2);
 
+            // TODO: Need to redesign the nodes
             match (c1, c2, c3) {
-                ('\n', _, _) => break,
-                ('!', Some('['), _) => { /* Image */ }
-                ('[', _, _) => { /* Link */ }
-                ('*', Some('*'), Some('*')) => { /* Bold Italic */ }
-                ('*', Some('*'), _) => {
-                    nodes.push(Node::Text { content: str });
-                    str = "".into();
-                    let node = self.parse_bold(stream);
+                (None, _, _) | (Some('\n'), Some('\n'), _) => break,
+                (Some('!'), Some('['), _) =>
+                /* parse image */
+                {
+                    continue;
+                }
+                (Some('['), _, _) =>
+                /* parse link */
+                {
+                    continue;
+                }
+                (Some('*'), Some('*'), Some('*')) =>
+                /* parse bold italic */
+                {
+                    continue;
+                }
+                (Some('*'), Some('*'), _) =>
+                /* parse bold */
+                {
+                    continue;
+                }
+                (Some('*'), _, _) => {
+                    nodes.push(Node::Text {
+                        content: str.clone(),
+                    });
+                    str = "".to_string();
+                    let node = self.parse_italic();
                     if !node.is_empty() {
-                        nodes.push(node)
+                        nodes.push(node);
                     }
                     continue;
                 }
-                ('*', _, _) => { /* Italic */ }
-                ('`', _, _) => { /* Code */ }
+                (Some('`'), _, _) =>
+                /* parse code */
+                {
+                    continue;
+                }
 
-                _ => {
-                    // Should we swap '\n' with ' '
-                    str.push(c1);
-                    stream.next();
-                    clone.next();
+                (Some(c), _, _) => {
+                    str.push(c);
+                    self.consume();
                 }
             }
         }
 
-        // Push final node
-        if !str.is_empty() {
-            nodes.push(Node::Text { content: str });
-        }
+        // TODO: Push text node
+        nodes.push(Node::Text { content: str });
         nodes
     }
 
-    /// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR
-    /// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT
-    /// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE
-    /// STRONG BLOCK.
-    fn parse_bold(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
-        let mut str = String::new();
+    fn parse_italic(&mut self) -> Node {
+        let mut str = "".to_string();
+        self.consume(); // Consume the '*'
 
-        stream.next();
-        stream.next();
+        println!("'{}'", self.content);
 
-        let mut clone = stream.clone();
-        clone.next();
-
-        while let Some(&c1) = stream.peek() {
-            let c2 = clone.peek();
+        // Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to
+        // exit
+        loop {
+            let c1 = self.peek();
+            let c2 = self.peek_nth(1);
 
             match (c1, c2) {
-                ('\n', Some('\n')) => break,
-                ('*', Some('*')) => {
-                    stream.next();
-                    stream.next();
+                (None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => {
+                    // In this case, we did not find an ending star, so we should return a normal
+                    // node. But we have to add the star back since we consumed it already
+                    str.insert(0, '*');
+                    return Node::Text { content: str };
+                }
+                (Some('*'), _) => {
+                    self.consume();
                     break;
                 }
-                _ => str.push(c1),
+                (Some(c), _) => {
+                    str.push(c);
+                    self.consume();
+                }
             }
-            stream.next();
-            clone.next();
         }
 
-        println!("@str '{}'", str);
-
-        Node::Bold { content: str }
+        Node::Italic { content: str }
     }
 
-    // --- HELPER FUNCTIONS ---
-    fn consume_whitespace(&self, stream: &mut Peekable<Chars<'_>>) {
-        while let Some(&c) = stream.peek() {
+    // --- HELPERS ---
+    fn is_eof(&self) -> bool {
+        self.position >= self.content.len()
+    }
+
+    fn peek(&self) -> Option<char> {
+        self.peek_nth(0)
+    }
+
+    fn peek_nth(&self, n: usize) -> Option<char> {
+        self.content[self.position..].chars().nth(n)
+    }
+
+    fn consume(&mut self) {
+        self.consume_n(1)
+    }
+
+    fn consume_n(&mut self, n: usize) {
+        for _ in 0..n {
+            if let Some(c) = self.content[self.position..].chars().next() {
+                self.position += c.len_utf8();
+            } else {
+                break;
+            }
+        }
+    }
+
+    fn consume_whitespace(&mut self) {
+        while let Some(c) = self.peek() {
             if !c.is_whitespace() {
                 break;
             }
-            stream.next();
+            self.consume();
+        }
+    }
+}
+
+#[cfg(test)]
+mod parser_tests {
+    use super::Parser;
+
+    #[test]
+    fn test_parse_italic() {
+        // This test only tests the `parse_italic` method, so it is expected that the first
+        // character is a '*', otherwise the first character will be consumed. The `parse_inline`
+        // tests will be able to handle more specific cases.
+        {
+            let s = "*hello world*";
+            let html = "<em>hello world</em>";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
+        }
+        {
+            let s = "*hello* world";
+            let html = "<em>hello</em>";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
+        }
+        {
+            let s = "*hello world";
+            let html = "*hello world";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
+        }
+        {
+            let s = "*hello world\n";
+            let html = "*hello world";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
+        }
+        {
+            let s = "*hello world\n\n";
+            let html = "*hello world";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
+        }
+        {
+            let s = "*hello\n\nworld*";
+            let html = "*hello";
+            let mut p = Parser::new(s);
+            let node = p.parse_italic();
+            assert_eq!(node.to_html(), html);
         }
     }
-
-    /// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL
-    /// ITSELF
-    fn consume_until_char(&self, stream: &mut Peekable<Chars<'_>>, target: char) -> String {
-        let mut out = String::new();
-
-        while let Some(&c) = stream.peek() {
-            if c == target {
-                break;
-            }
-            stream.next();
-            out.push(c);
-        }
-
-        out
-    }
 }
diff --git a/src/main.rs b/src/main.rs
index c63ad3b..7fb3bcb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,13 +3,16 @@ use transpiler::parser::Parser;
 
 pub fn main() -> Result<(), Box<dyn std::error::Error>> {
     let file = Filesystem::read_file("./test.md");
-    let content;
+    let mut content;
     match file {
         Ok(s) => content = s,
         Err(err) => panic!("Failed to read file. {}", err),
     }
 
-    let parser = Parser::new(&content);
+    // Normalize char stream
+    content = content.replace("\r\n", "\n").replace("\r", "");
+
+    let mut parser = Parser::new(&content);
     let node = parser.parse_document();
 
     match Filesystem::write_file("./output.html", &node.to_html()) {
diff --git a/test.md b/test.md
index 06032f7..d11d815 100644
--- a/test.md
+++ b/test.md
@@ -1 +1,7 @@
-# h1, **this** is sick as fuck 
+hello *world*. This is pre*tty* cool
+
+
+What about this
+
+
+This *should have a star