Compare commits

...

2 Commits

Author SHA1 Message Date
Hayden Hargreaves
9f2595faeb (FIX): Fixed the inline node issue.
This does not include parse fixes, just compile-required fixes.
2025-11-27 11:57:28 -07:00
Hayden Hargreaves
79633bd059 (FEAT): parse_italic implemented.
But now I have realized that the parser should work differently and with
mutual recursion. The "inline" nodes (except the text node) should all
have children.
2025-11-27 11:37:06 -07:00
4 changed files with 460 additions and 278 deletions

View File

@ -10,11 +10,12 @@ pub enum Node {
BlockQuote { children: Vec<Node> },
Link { href: String, children: Vec<Node> },
Bold { children: Vec<Node> },
Italic { children: Vec<Node> },
BoldItalic { children: Vec<Node> },
// Inline Nodes
Text { content: String },
Bold { content: String },
Italic { content: String },
BoldItalic { content: String },
Code { content: String },
// Special Nodes
@ -62,16 +63,25 @@ impl Node {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<blockquote>{}</blockquote>\n", inner)
}
Node::Link { href, children} => {
Node::Link { href, children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<a href=\"{}\">{}</a>", href, inner)
}
Node::Bold { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong>{}</strong>", inner)
}
Node::Italic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<em>{}</em>", inner)
}
Node::BoldItalic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong><em>{}</em></strong>", inner)
}
// Inline nodes
Node::Text { content } => format!("{}", content),
Node::Bold { content } => format!("<strong>{}</strong>", content),
Node::Italic { content } => format!("<em>{}</em>", content),
Node::BoldItalic { content } => format!("<strong><em>{}</em></strong>", content),
Node::Code { content } => format!("<code>{}</code>", content),
// Special nodes
@ -88,18 +98,20 @@ impl Node {
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List { ordered: _, children }
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children } => children.is_empty(),
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children } => children.is_empty(),
Node::Link { href, children } => children.is_empty() && href.is_empty(),
// Inline nodes
Node::Text { content }
| Node::Bold { content }
| Node::Italic { content }
| Node::BoldItalic { content }
| Node::Code { content } => content.is_empty(),
Node::Text { content } | Node::Code { content } => content.is_empty(),
// Special rules
Node::Image { src, alt } => src.is_empty() && alt.is_empty(),
@ -114,18 +126,20 @@ impl Node {
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List { ordered: _, children}
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => Some(&children),
// Inline Nodes
Node::Text { content: _ }
| Node::Bold { content: _ }
| Node::Italic { content: _ }
| Node::BoldItalic { content: _ }
| Node::Code { content: _ } => None,
Node::Text { content: _ } | Node::Code { content: _ } => None,
// Special Nodes
Node::Image { src: _, alt: _ } => None,
@ -140,18 +154,22 @@ impl Node {
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List { ordered: _, children }
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => children.push(child),
// Inline Nodes
Node::Text { content: _ }
| Node::Bold { content: _ }
| Node::Italic { content: _ }
| Node::BoldItalic { content: _ }
| Node::Code { content: _ } => panic!("Can't add child to this node type."),
Node::Text { content: _ } | Node::Code { content: _ } => {
panic!("Can't add child to this node type.")
}
// Special Nodes
Node::Image { src: _, alt: _ } => panic!("Can't add child to this node type."),
@ -165,23 +183,45 @@ mod node_tests {
#[test]
fn can_return_html_string_for_structure_nodes() {
let child = Node::Text { content: "CONTENT".into() };
let child = Node::Text {
content: "CONTENT".into(),
};
// Document
{
let node = Node::Document { children: vec![child.clone()] };
let node = Node::Document {
children: vec![child.clone()],
};
let html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>\nCONTENT</body>\n</html>";
assert_eq!(node.to_html(), html);
}
// Heading
{
let node_h1 = Node::Heading { level: 1, children: vec![child.clone()] };
let node_h2 = Node::Heading { level: 2, children: vec![child.clone()] };
let node_h3 = Node::Heading { level: 3, children: vec![child.clone()] };
let node_h4 = Node::Heading { level: 4, children: vec![child.clone()] };
let node_h5 = Node::Heading { level: 5, children: vec![child.clone()] };
let node_h6 = Node::Heading { level: 6, children: vec![child.clone()] };
let node_h1 = Node::Heading {
level: 1,
children: vec![child.clone()],
};
let node_h2 = Node::Heading {
level: 2,
children: vec![child.clone()],
};
let node_h3 = Node::Heading {
level: 3,
children: vec![child.clone()],
};
let node_h4 = Node::Heading {
level: 4,
children: vec![child.clone()],
};
let node_h5 = Node::Heading {
level: 5,
children: vec![child.clone()],
};
let node_h6 = Node::Heading {
level: 6,
children: vec![child.clone()],
};
let html_h1 = "<h1>CONTENT</h1>\n";
let html_h2 = "<h2>CONTENT</h2>\n";
let html_h3 = "<h3>CONTENT</h3>\n";
@ -198,15 +238,23 @@ mod node_tests {
// Paragraph
{
let node = Node::Paragraph { children: vec![child.clone()] };
let node = Node::Paragraph {
children: vec![child.clone()],
};
let html = "<p>CONTENT</p>\n";
assert_eq!(node.to_html(), html);
}
// List
{
let node_ul = Node::List { ordered: false, children: vec![child.clone()] };
let node_ol = Node::List { ordered: true, children: vec![child.clone()] };
let node_ul = Node::List {
ordered: false,
children: vec![child.clone()],
};
let node_ol = Node::List {
ordered: true,
children: vec![child.clone()],
};
let html_ul = "<ul>CONTENT</ul>\n";
let html_ol = "<ol>CONTENT</ol>\n";
assert_eq!(node_ul.to_html(), html_ul);
@ -215,28 +263,64 @@ mod node_tests {
// List Item
{
let node = Node::ListItem { children: vec![child.clone()] };
let node = Node::ListItem {
children: vec![child.clone()],
};
let html = "<li>CONTENT</li>\n";
assert_eq!(node.to_html(), html);
}
// CodeBlock
{
let node = Node::CodeBlock { children: vec![child.clone()] };
let node = Node::CodeBlock {
children: vec![child.clone()],
};
let html = "<code>CONTENT</code>\n";
assert_eq!(node.to_html(), html);
}
// BlockQuote
{
let node = Node::BlockQuote { children: vec![child.clone()] };
let node = Node::BlockQuote {
children: vec![child.clone()],
};
let html = "<blockquote>CONTENT</blockquote>\n";
assert_eq!(node.to_html(), html);
}
// Bold
{
let node = Node::Bold {
children: vec![child.clone()],
};
let html = "<strong>CONTENT</strong>";
assert_eq!(node.to_html(), html);
}
// Italic
{
let node = Node::Italic {
children: vec![child.clone()],
};
let html = "<em>CONTENT</em>";
assert_eq!(node.to_html(), html);
}
// BoldItalic
{
let node = Node::BoldItalic {
children: vec![child.clone()],
};
let html = "<strong><em>CONTENT</em></strong>";
assert_eq!(node.to_html(), html);
}
// Link
{
let node = Node::Link { href: "HREF".into(), children: vec![child.clone()]};
let node = Node::Link {
href: "HREF".into(),
children: vec![child.clone()],
};
let html = "<a href=\"HREF\">CONTENT</a>";
assert_eq!(node.to_html(), html);
}
@ -246,35 +330,18 @@ mod node_tests {
fn can_return_html_string_for_inline_nodes() {
// Text
{
let node = Node::Text { content: "x".into() };
let node = Node::Text {
content: "x".into(),
};
let html = "x";
assert_eq!(node.to_html(), html);
}
// Bold
{
let node = Node::Bold { content: "x".into() };
let html = "<strong>x</strong>";
assert_eq!(node.to_html(), html);
}
// Italic
{
let node = Node::Italic { content: "x".into() };
let html = "<em>x</em>";
assert_eq!(node.to_html(), html);
}
// BoldItalic
{
let node = Node::BoldItalic { content: "x".into() };
let html = "<strong><em>x</em></strong>";
assert_eq!(node.to_html(), html);
}
// Code
{
let node = Node::Code { content: "x".into() };
let node = Node::Code {
content: "x".into(),
};
let html = "<code>x</code>";
assert_eq!(node.to_html(), html);
}
@ -283,7 +350,10 @@ mod node_tests {
#[test]
fn can_return_html_string_for_special_nodes() {
{
let node = Node::Image { src: "SOURCE".into(), alt: "ALT".into() };
let node = Node::Image {
src: "SOURCE".into(),
alt: "ALT".into(),
};
let html = "<img src=\"SOURCE\" alt=\"ALT\">\n";
assert_eq!(node.to_html(), html);
}
@ -367,9 +437,47 @@ mod node_tests {
assert!(!block_quote_node.is_empty());
assert!(block_quote_node_empty.is_empty());
// Bold
let bold_node = Node::Bold {
children: vec![Node::Text {
content: "bold".into(),
}],
};
let bold_node_empty = Node::Bold { children: vec![] };
assert!(!bold_node.is_empty());
assert!(bold_node_empty.is_empty());
// Italic
let italic_node = Node::Italic {
children: vec![Node::Text {
content: "italic".into(),
}],
};
let italic_node_empty = Node::Italic { children: vec![] };
assert!(!italic_node.is_empty());
assert!(italic_node_empty.is_empty());
// Bold Italic
let bold_italic_node = Node::BoldItalic {
children: vec![Node::Text {
content: "bold italic".into(),
}],
};
let bold_italic_node_empty = Node::BoldItalic { children: vec![] };
assert!(!bold_italic_node.is_empty());
assert!(bold_italic_node_empty.is_empty());
// Link
let link_node = Node::Link { href: "x".into(), children: vec![Node::Text{content: "link".into()}] };
let link_node_empty = Node::Link { href: "".into(), children: vec![] };
let link_node = Node::Link {
href: "x".into(),
children: vec![Node::Text {
content: "link".into(),
}],
};
let link_node_empty = Node::Link {
href: "".into(),
children: vec![],
};
assert!(!link_node.is_empty());
assert!(link_node_empty.is_empty());
}
@ -380,53 +488,31 @@ mod node_tests {
let text_node = Node::Text {
content: "text".into(),
};
let bold_node = Node::Bold {
content: "bold".into(),
};
let italic_node = Node::Italic {
content: "italic".into(),
};
let bold_italic_node = Node::BoldItalic {
content: "both".into(),
};
let code_node = Node::Code {
content: "code".into(),
};
assert!(!text_node.is_empty());
assert!(!bold_node.is_empty());
assert!(!italic_node.is_empty());
assert!(!bold_italic_node.is_empty());
assert!(!code_node.is_empty());
// empty
let text_node_empty = Node::Text {
content: "".into(),
};
let bold_node_empty = Node::Bold {
content: "".into(),
};
let italic_node_empty = Node::Italic {
content: "".into(),
};
let bold_italic_node_empty = Node::BoldItalic {
content: "".into(),
};
let code_node_empty = Node::Code {
content: "".into(),
};
let text_node_empty = Node::Text { content: "".into() };
let code_node_empty = Node::Code { content: "".into() };
assert!(text_node_empty.is_empty());
assert!(bold_node_empty.is_empty());
assert!(italic_node_empty.is_empty());
assert!(bold_italic_node_empty.is_empty());
assert!(code_node_empty.is_empty());
}
#[test]
fn can_return_when_empty_for_special_nodes() {
let image_node = Node::Image { src: "x".into(), alt: "x".into() };
let image_node_empty = Node::Image { src: "".into(), alt: "".into() };
let image_node = Node::Image {
src: "x".into(),
alt: "x".into(),
};
let image_node_empty = Node::Image {
src: "".into(),
alt: "".into(),
};
assert!(!image_node.is_empty());
assert!(image_node_empty.is_empty());
}
@ -491,6 +577,29 @@ mod node_tests {
};
let bq_children = bq.children().expect("BlockQuote should have children");
assert_eq!(bq_children.len(), 1);
// Bold
let bold = Node::Bold {
children: vec![child.clone()],
};
let bold_children = bold.children().expect("BlockQuote should have children");
assert_eq!(bold_children.len(), 1);
// Italic
let italic = Node::Italic {
children: vec![child.clone()],
};
let italic_children = italic.children().expect("BlockQuote should have children");
assert_eq!(italic_children.len(), 1);
// Bold Italic
let bold_italic = Node::BoldItalic {
children: vec![child.clone()],
};
let bold_italic_children = bold_italic
.children()
.expect("BlockQuote should have children");
assert_eq!(bold_italic_children.len(), 1);
}
#[test]
@ -498,23 +607,11 @@ mod node_tests {
let text = Node::Text {
content: "x".into(),
};
let bold = Node::Bold {
content: "x".into(),
};
let italic = Node::Italic {
content: "x".into(),
};
let bolditalic = Node::BoldItalic {
content: "x".into(),
};
let code = Node::Code {
content: "x".into(),
};
assert!(text.children().is_none());
assert!(bold.children().is_none());
assert!(italic.children().is_none());
assert!(bolditalic.children().is_none());
assert!(code.children().is_none());
}
@ -529,7 +626,9 @@ mod node_tests {
#[test]
fn add_child_succeeds_for_structure_nodes() {
let child = Node::Text { content: "x".into() };
let child = Node::Text {
content: "x".into(),
};
// Document
{
@ -541,7 +640,10 @@ mod node_tests {
// Heading
{
let mut node = Node::Heading { level: 1, children: vec![] };
let mut node = Node::Heading {
level: 1,
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Heading should have 1 child");
@ -557,7 +659,10 @@ mod node_tests {
// List
{
let mut node = Node::List { ordered: false, children: vec![] };
let mut node = Node::List {
ordered: false,
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "List should have 1 child");
@ -586,9 +691,37 @@ mod node_tests {
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BlockQuote should have 1 child");
}
// Bold
{
let mut node = Node::Bold { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Bold should have 1 child");
}
// Italic
{
let mut node = Node::Italic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Italic should have 1 child");
}
// Bold Italic
{
let mut node = Node::BoldItalic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BoldItalic should have 1 child");
}
// Link
{
let mut node = Node::Link { href: "x".into(), children: vec![] };
let mut node = Node::Link {
href: "x".into(),
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Link should have 1 child");
@ -597,7 +730,7 @@ mod node_tests {
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_inline_nodes() {
fn add_child_panics_for_text_node() {
let child = Node::Text {
content: "x".into(),
};
@ -605,29 +738,27 @@ mod node_tests {
let mut text = Node::Text {
content: "x".into(),
};
let mut bold = Node::Bold {
content: "x".into(),
};
let mut italic = Node::Italic {
content: "x".into(),
};
let mut bolditalic = Node::BoldItalic {
text.add_child(child.clone());
}
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_code_node() {
let child = Node::Text {
content: "x".into(),
};
let mut code = Node::Code {
content: "x".into(),
};
text.add_child(child.clone());
bold.add_child(child.clone());
italic.add_child(child.clone());
bolditalic.add_child(child.clone());
code.add_child(child.clone());
}
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_special_nodes() {
fn add_child_panics_for_image_node() {
let child = Node::Text {
content: "x".into(),
};

View File

@ -1,32 +1,26 @@
use std::iter::Peekable;
use std::str::Chars;
use crate::node::Node;
#[derive(Debug)]
pub struct Parser {
content: String,
pub struct Parser<'a> {
content: &'a str,
position: usize,
}
impl Parser {
/// Create a new parser object with the content attached. This does not take ownership of the
/// string provided and therefore dies with the string. The input string is normalized to
/// support operation on all operating systems.
pub fn new(content: &str) -> Self {
let normalized = content.replace("\r\n", "\n").replace("\r", "");
impl<'a> Parser<'a> {
// Content should be normalized before being passed into this function. Since we do not take
// ownership here, we cannot mutate it.
pub fn new(content: &'a str) -> Self {
Self {
content: normalized,
content,
position: 0,
}
}
pub fn parse_document(&self) -> Node {
let mut stream = self.content.chars().peekable();
pub fn parse_document(&mut self) -> Node {
let mut root = Node::Document { children: vec![] };
// Same as !IsEOF from the CPP implementation
while stream.peek().is_some() {
let block = self.parse_block(&mut stream);
while !self.is_eof() {
let block = self.parse_block();
if !block.is_empty() {
root.add_child(block);
}
@ -35,178 +29,225 @@ impl Parser {
root
}
/// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT
fn parse_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
self.consume_whitespace(stream);
// BUG: We should be using optional, not blank nodes
fn parse_block(&mut self) -> Node {
self.consume_whitespace();
// If we are at the end, return an empty node; it will be ignored
let Some(&c1) = stream.peek() else {
// If we are at the end, return an empty node, the caller should know to return it.
if self.is_eof() {
return Node::Text { content: "".into() };
};
}
// Use a clone to look ahead
let mut clone = stream.clone();
clone.next();
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
let c2 = clone.next();
let c3 = clone.next();
// Now we can handle numbers from 0 to 99 for ordered lists
// TODO: For now we are just implementing paragraphs. So we can start with inline parsing
match (c1, c2, c3) {
('#', _, _) => self.parse_heading(stream),
('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false),
(d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true),
(d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => {
self.parse_list(stream, true)
}
('`', Some('`'), Some('`')) => self.parse_code_block(stream),
('!', Some('['), _) => self.parse_image(stream),
('>', _, _) => self.parse_block_quote(stream),
_ => self.parse_paragraph(stream),
// (Some('#'), _, _) => self.parse_heading(),
_ => self.parse_paragraph(),
}
}
// --- STRUCTURE PARSING ---
fn parse_paragraph(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let children = self.parse_inline(stream);
Node::Paragraph { children }
}
fn parse_heading(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
// Consume the hashes to determine the size, then consume the whitespace
let hashes = self.consume_until_char(stream, ' ');
self.consume_whitespace(stream);
let children = self.parse_inline(stream);
dbg!(&children);
fn parse_heading(&mut self) -> Node {
Node::Heading {
level: hashes.len(),
children,
level: 1,
children: vec![],
}
}
fn parse_list(&self, stream: &mut Peekable<Chars<'_>>, ordered: bool) -> Node {
Node::Text { content: "".into() }
}
fn parse_block_quote(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_code_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_image(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
fn parse_paragraph(&mut self) -> Node {
Node::Paragraph {
children: self.parse_inline(),
}
}
// --- INLINE PARSING ---
fn parse_inline(&self, stream: &mut Peekable<Chars<'_>>) -> Vec<Node> {
fn parse_inline(&mut self) -> Vec<Node> {
let mut nodes = vec![];
let mut str = String::new();
let mut str = "".to_string();
// use a clone to allow for peeking ahead
// REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED
let mut clone = stream.clone();
clone.next(); // Stay one ahead
while let Some(&c1) = stream.peek() {
let c2 = clone.next();
let c3 = clone.next();
// println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~'));
while !self.is_eof() {
// c1 stores current char, c2/c3 store future, contextual chars
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
// TODO: Need to redesign the nodes
// TODO: Support _ AND *
match (c1, c2, c3) {
('\n', _, _) => break,
('!', Some('['), _) => { /* Image */ }
('[', _, _) => { /* Link */ }
('*', Some('*'), Some('*')) => { /* Bold Italic */ }
('*', Some('*'), _) => {
nodes.push(Node::Text { content: str });
str = "".into();
let node = self.parse_bold(stream);
(None, _, _) | (Some('\n'), Some('\n'), _) => break,
(Some('!'), Some('['), _) =>
/* parse image */
{
continue;
}
(Some('['), _, _) =>
/* parse link */
{
continue;
}
(Some('*'), Some('*'), Some('*')) =>
/* parse bold italic */
{
continue;
}
(Some('*'), Some('*'), _) =>
/* parse bold */
{
continue;
}
(Some('*'), _, _) => {
nodes.push(Node::Text {
content: str.clone(),
});
str = "".to_string();
let node = self.parse_italic();
if !node.is_empty() {
nodes.push(node)
nodes.push(node);
}
continue;
}
('*', _, _) => { /* Italic */ }
('`', _, _) => { /* Code */ }
(Some('`'), _, _) =>
/* parse code */
{
continue;
}
_ => {
// Should we swap '\n' with ' '
str.push(c1);
stream.next();
clone.next();
(Some(c), _, _) => {
str.push(c);
self.consume();
}
}
}
// Push final node
if !str.is_empty() {
nodes.push(Node::Text { content: str });
}
// TODO: Push text node
nodes.push(Node::Text { content: str });
nodes
}
/// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR
/// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT
/// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE
/// STRONG BLOCK.
fn parse_bold(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let mut str = String::new();
fn parse_italic(&mut self) -> Node {
let mut str = "".to_string();
self.consume(); // Consume the '*'
stream.next();
stream.next();
println!("'{}'", self.content);
let mut clone = stream.clone();
clone.next();
while let Some(&c1) = stream.peek() {
let c2 = clone.peek();
// Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to
// exit
loop {
let c1 = self.peek();
let c2 = self.peek_nth(1);
match (c1, c2) {
('\n', Some('\n')) => break,
('*', Some('*')) => {
stream.next();
stream.next();
(None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => {
// In this case, we did not find an ending star, so we should return a normal
// node. But we have to add the star back since we consumed it already
str.insert(0, '*');
return Node::Text { content: str };
}
(Some('*'), _) => {
self.consume();
break;
}
_ => str.push(c1),
(Some(c), _) => {
str.push(c);
self.consume();
}
}
stream.next();
clone.next();
}
println!("@str '{}'", str);
Node::Bold { content: str }
Node::Italic {
children: vec![Node::Text { content: str }],
}
}
// --- HELPER FUNCTIONS ---
fn consume_whitespace(&self, stream: &mut Peekable<Chars<'_>>) {
while let Some(&c) = stream.peek() {
// --- HELPERS ---
fn is_eof(&self) -> bool {
self.position >= self.content.len()
}
fn peek(&self) -> Option<char> {
self.peek_nth(0)
}
fn peek_nth(&self, n: usize) -> Option<char> {
self.content[self.position..].chars().nth(n)
}
fn consume(&mut self) {
self.consume_n(1)
}
fn consume_n(&mut self, n: usize) {
for _ in 0..n {
if let Some(c) = self.content[self.position..].chars().next() {
self.position += c.len_utf8();
} else {
break;
}
}
}
fn consume_whitespace(&mut self) {
while let Some(c) = self.peek() {
if !c.is_whitespace() {
break;
}
stream.next();
self.consume();
}
}
}
#[cfg(test)]
mod parser_tests {
use super::Parser;
#[test]
fn test_parse_italic() {
// This test only tests the `parse_italic` method, so it is expected that the first
// character is a '*', otherwise the first character will be consumed. The `parse_inline`
// tests will be able to handle more specific cases.
{
let s = "*hello world*";
let html = "<em>hello world</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello* world";
let html = "<em>hello</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello\n\nworld*";
let html = "*hello";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
}
/// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL
/// ITSELF
fn consume_until_char(&self, stream: &mut Peekable<Chars<'_>>, target: char) -> String {
let mut out = String::new();
while let Some(&c) = stream.peek() {
if c == target {
break;
}
stream.next();
out.push(c);
}
out
}
}

View File

@ -3,13 +3,16 @@ use transpiler::parser::Parser;
pub fn main() -> Result<(), Box<dyn std::error::Error>> {
let file = Filesystem::read_file("./test.md");
let content;
let mut content;
match file {
Ok(s) => content = s,
Err(err) => panic!("Failed to read file. {}", err),
}
let parser = Parser::new(&content);
// Normalize char stream
content = content.replace("\r\n", "\n").replace("\r", "");
let mut parser = Parser::new(&content);
let node = parser.parse_document();
match Filesystem::write_file("./output.html", &node.to_html()) {

View File

@ -1 +1,8 @@
# h1, **this** is sick as fuck
hello *world*. This is pre*tty* cool
What about this
This *should have a star