Hayden Hargreaves 9f2595faeb (FIX): Fixed the inline node issue.
This does not include parse fixes, just compile-required fixes.
2025-11-27 11:57:28 -07:00

254 lines
7.0 KiB
Rust

use crate::node::Node;
#[derive(Debug)]
pub struct Parser<'a> {
content: &'a str,
position: usize,
}
impl<'a> Parser<'a> {
// Content should be normalized before being passed into this function. Since we do not take
// ownership here, we cannot mutate it.
pub fn new(content: &'a str) -> Self {
Self {
content,
position: 0,
}
}
pub fn parse_document(&mut self) -> Node {
let mut root = Node::Document { children: vec![] };
while !self.is_eof() {
let block = self.parse_block();
if !block.is_empty() {
root.add_child(block);
}
}
root
}
// BUG: We should be using optional, not blank nodes
fn parse_block(&mut self) -> Node {
self.consume_whitespace();
// If we are at the end, return an empty node, the caller should know to return it.
if self.is_eof() {
return Node::Text { content: "".into() };
}
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
// TODO: For now we are just implementing paragraphs. So we can start with inline parsing
match (c1, c2, c3) {
// (Some('#'), _, _) => self.parse_heading(),
_ => self.parse_paragraph(),
}
}
fn parse_heading(&mut self) -> Node {
Node::Heading {
level: 1,
children: vec![],
}
}
fn parse_paragraph(&mut self) -> Node {
Node::Paragraph {
children: self.parse_inline(),
}
}
// --- INLINE PARSING ---
fn parse_inline(&mut self) -> Vec<Node> {
let mut nodes = vec![];
let mut str = "".to_string();
while !self.is_eof() {
// c1 stores current char, c2/c3 store future, contextual chars
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
// TODO: Need to redesign the nodes
// TODO: Support _ AND *
match (c1, c2, c3) {
(None, _, _) | (Some('\n'), Some('\n'), _) => break,
(Some('!'), Some('['), _) =>
/* parse image */
{
continue;
}
(Some('['), _, _) =>
/* parse link */
{
continue;
}
(Some('*'), Some('*'), Some('*')) =>
/* parse bold italic */
{
continue;
}
(Some('*'), Some('*'), _) =>
/* parse bold */
{
continue;
}
(Some('*'), _, _) => {
nodes.push(Node::Text {
content: str.clone(),
});
str = "".to_string();
let node = self.parse_italic();
if !node.is_empty() {
nodes.push(node);
}
continue;
}
(Some('`'), _, _) =>
/* parse code */
{
continue;
}
(Some(c), _, _) => {
str.push(c);
self.consume();
}
}
}
// TODO: Push text node
nodes.push(Node::Text { content: str });
nodes
}
fn parse_italic(&mut self) -> Node {
let mut str = "".to_string();
self.consume(); // Consume the '*'
println!("'{}'", self.content);
// Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to
// exit
loop {
let c1 = self.peek();
let c2 = self.peek_nth(1);
match (c1, c2) {
(None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => {
// In this case, we did not find an ending star, so we should return a normal
// node. But we have to add the star back since we consumed it already
str.insert(0, '*');
return Node::Text { content: str };
}
(Some('*'), _) => {
self.consume();
break;
}
(Some(c), _) => {
str.push(c);
self.consume();
}
}
}
Node::Italic {
children: vec![Node::Text { content: str }],
}
}
// --- HELPERS ---
fn is_eof(&self) -> bool {
self.position >= self.content.len()
}
fn peek(&self) -> Option<char> {
self.peek_nth(0)
}
fn peek_nth(&self, n: usize) -> Option<char> {
self.content[self.position..].chars().nth(n)
}
fn consume(&mut self) {
self.consume_n(1)
}
fn consume_n(&mut self, n: usize) {
for _ in 0..n {
if let Some(c) = self.content[self.position..].chars().next() {
self.position += c.len_utf8();
} else {
break;
}
}
}
fn consume_whitespace(&mut self) {
while let Some(c) = self.peek() {
if !c.is_whitespace() {
break;
}
self.consume();
}
}
}
#[cfg(test)]
mod parser_tests {
use super::Parser;
#[test]
fn test_parse_italic() {
// This test only tests the `parse_italic` method, so it is expected that the first
// character is a '*', otherwise the first character will be consumed. The `parse_inline`
// tests will be able to handle more specific cases.
{
let s = "*hello world*";
let html = "<em>hello world</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello* world";
let html = "<em>hello</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello\n\nworld*";
let html = "*hello";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
}
}