This stuff sucks actually, clones are annoying as hell.

This peekable class might not work, it lowkey sucks.
This commit is contained in:
Hayden Hargreaves 2025-11-25 20:08:13 -07:00
parent bacc17ab7d
commit 3c25e6b9e8
4 changed files with 197 additions and 10 deletions

View File

@ -2,7 +2,7 @@
pub enum Node { pub enum Node {
// Structure Nodes // Structure Nodes
Document { children: Vec<Node> }, Document { children: Vec<Node> },
Heading { level: u8, children: Vec<Node> }, Heading { level: usize, children: Vec<Node> },
Paragraph { children: Vec<Node> }, Paragraph { children: Vec<Node> },
List { ordered: bool, children: Vec<Node> }, List { ordered: bool, children: Vec<Node> },
ListItem { children: Vec<Node> }, ListItem { children: Vec<Node> },

View File

@ -1,8 +1,11 @@
use std::iter::Peekable;
use std::str::Chars;
use crate::node::Node; use crate::node::Node;
#[derive(Debug)] #[derive(Debug)]
pub struct Parser { pub struct Parser {
content: String content: String,
} }
impl Parser { impl Parser {
@ -11,16 +14,199 @@ impl Parser {
/// support operation on all operating systems. /// support operation on all operating systems.
pub fn new(content: &str) -> Self { pub fn new(content: &str) -> Self {
let normalized = content.replace("\r\n", "\n").replace("\r", ""); let normalized = content.replace("\r\n", "\n").replace("\r", "");
Self { content: normalized } Self {
content: normalized,
}
} }
pub fn parse_document(&self) -> Node { pub fn parse_document(&self) -> Node {
let chars = self.content.chars().peekable(); let mut stream = self.content.chars().peekable();
Node::Document { children: vec![ let mut root = Node::Document { children: vec![] };
Node::Paragraph { children: vec![
Node::Text { content: chars.collect() }, // Same as !IsEOF from the CPP implementation
]} while stream.peek().is_some() {
]} let block = self.parse_block(&mut stream);
if !block.is_empty() {
root.add_child(block);
}
}
root
}
/// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT
fn parse_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
self.consume_whitespace(stream);
// If we are at the end, return an empty node; it will be ignored
let Some(&c1) = stream.peek() else {
return Node::Text { content: "".into() };
};
// Use a clone to look ahead
let mut clone = stream.clone();
clone.next();
let c2 = clone.next();
let c3 = clone.next();
// Now we can handle numbers from 0 to 99 for ordered lists
match (c1, c2, c3) {
('#', _, _) => self.parse_heading(stream),
('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false),
(d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true),
(d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => {
self.parse_list(stream, true)
}
('`', Some('`'), Some('`')) => self.parse_code_block(stream),
('!', Some('['), _) => self.parse_image(stream),
('>', _, _) => self.parse_block_quote(stream),
_ => self.parse_paragraph(stream),
}
}
// --- STRUCTURE PARSING ---
fn parse_paragraph(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let children = self.parse_inline(stream);
Node::Paragraph { children }
}
fn parse_heading(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
// Consume the hashes to determine the size, then consume the whitespace
let hashes = self.consume_until_char(stream, ' ');
self.consume_whitespace(stream);
let children = self.parse_inline(stream);
dbg!(&children);
Node::Heading {
level: hashes.len(),
children,
}
}
fn parse_list(&self, stream: &mut Peekable<Chars<'_>>, ordered: bool) -> Node {
Node::Text { content: "".into() }
}
fn parse_block_quote(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_code_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_image(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
// --- INLINE PARSING ---
fn parse_inline(&self, stream: &mut Peekable<Chars<'_>>) -> Vec<Node> {
let mut nodes = vec![];
let mut str = String::new();
// use a clone to allow for peeking ahead
// REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED
let mut clone = stream.clone();
clone.next(); // Stay one ahead
while let Some(&c1) = stream.peek() {
let c2 = clone.next();
let c3 = clone.next();
// println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~'));
match (c1, c2, c3) {
('\n', _, _) => break,
('!', Some('['), _) => { /* Image */ }
('[', _, _) => { /* Link */ }
('*', Some('*'), Some('*')) => { /* Bold Italic */ }
('*', Some('*'), _) => {
nodes.push(Node::Text { content: str });
str = "".into();
let node = self.parse_bold(stream);
if !node.is_empty() {
nodes.push(node)
}
continue;
}
('*', _, _) => { /* Italic */ }
('`', _, _) => { /* Code */ }
_ => {
// Should we swap '\n' with ' '
str.push(c1);
stream.next();
clone.next();
}
}
}
// Push final node
if !str.is_empty() {
nodes.push(Node::Text { content: str });
}
nodes
}
/// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR
/// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT
/// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE
/// STRONG BLOCK.
fn parse_bold(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let mut str = String::new();
stream.next();
stream.next();
let mut clone = stream.clone();
clone.next();
while let Some(&c1) = stream.peek() {
let c2 = clone.peek();
match (c1, c2) {
('\n', Some('\n')) => break,
('*', Some('*')) => {
stream.next();
stream.next();
break;
}
_ => str.push(c1),
}
stream.next();
clone.next();
}
println!("@str '{}'", str);
Node::Bold { content: str }
}
// --- HELPER FUNCTIONS ---
fn consume_whitespace(&self, stream: &mut Peekable<Chars<'_>>) {
while let Some(&c) = stream.peek() {
if !c.is_whitespace() {
break;
}
stream.next();
}
}
/// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL
/// ITSELF
fn consume_until_char(&self, stream: &mut Peekable<Chars<'_>>, target: char) -> String {
let mut out = String::new();
while let Some(&c) = stream.peek() {
if c == target {
break;
}
stream.next();
out.push(c);
}
out
} }
} }

View File

@ -2,7 +2,7 @@ use transpiler::filesystem::Filesystem;
use transpiler::parser::Parser; use transpiler::parser::Parser;
pub fn main() -> Result<(), Box<dyn std::error::Error>> { pub fn main() -> Result<(), Box<dyn std::error::Error>> {
let file = Filesystem::read_file("./journal.md"); let file = Filesystem::read_file("./test.md");
let content; let content;
match file { match file {
Ok(s) => content = s, Ok(s) => content = s,

1
test.md Normal file
View File

@ -0,0 +1 @@
# h1, **this** is sick as fuck