Compare commits

..

No commits in common. "9f2595faeb0ff6dffcd7851125d23fe7417e42a6" and "3c25e6b9e86e5c9081e3d3f0fe07f57dc33d5d8d" have entirely different histories.

4 changed files with 274 additions and 456 deletions

View File

@ -10,12 +10,11 @@ pub enum Node {
BlockQuote { children: Vec<Node> }, BlockQuote { children: Vec<Node> },
Link { href: String, children: Vec<Node> }, Link { href: String, children: Vec<Node> },
Bold { children: Vec<Node> },
Italic { children: Vec<Node> },
BoldItalic { children: Vec<Node> },
// Inline Nodes // Inline Nodes
Text { content: String }, Text { content: String },
Bold { content: String },
Italic { content: String },
BoldItalic { content: String },
Code { content: String }, Code { content: String },
// Special Nodes // Special Nodes
@ -63,25 +62,16 @@ impl Node {
let inner = children.iter().map(|x| x.to_html()).collect::<String>(); let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<blockquote>{}</blockquote>\n", inner) format!("<blockquote>{}</blockquote>\n", inner)
} }
Node::Link { href, children } => { Node::Link { href, children} => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>(); let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<a href=\"{}\">{}</a>", href, inner) format!("<a href=\"{}\">{}</a>", href, inner)
} }
Node::Bold { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong>{}</strong>", inner)
}
Node::Italic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<em>{}</em>", inner)
}
Node::BoldItalic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong><em>{}</em></strong>", inner)
}
// Inline nodes // Inline nodes
Node::Text { content } => format!("{}", content), Node::Text { content } => format!("{}", content),
Node::Bold { content } => format!("<strong>{}</strong>", content),
Node::Italic { content } => format!("<em>{}</em>", content),
Node::BoldItalic { content } => format!("<strong><em>{}</em></strong>", content),
Node::Code { content } => format!("<code>{}</code>", content), Node::Code { content } => format!("<code>{}</code>", content),
// Special nodes // Special nodes
@ -98,20 +88,18 @@ impl Node {
Node::Document { children } Node::Document { children }
| Node::Heading { level: _, children } | Node::Heading { level: _, children }
| Node::Paragraph { children } | Node::Paragraph { children }
| Node::List { | Node::List { ordered: _, children }
ordered: _,
children,
}
| Node::ListItem { children } | Node::ListItem { children }
| Node::CodeBlock { children } | Node::CodeBlock { children }
| Node::BlockQuote { children } | Node::BlockQuote { children } => children.is_empty(),
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children } => children.is_empty(),
Node::Link { href, children } => children.is_empty() && href.is_empty(), Node::Link { href, children } => children.is_empty() && href.is_empty(),
// Inline nodes // Inline nodes
Node::Text { content } | Node::Code { content } => content.is_empty(), Node::Text { content }
| Node::Bold { content }
| Node::Italic { content }
| Node::BoldItalic { content }
| Node::Code { content } => content.is_empty(),
// Special rules // Special rules
Node::Image { src, alt } => src.is_empty() && alt.is_empty(), Node::Image { src, alt } => src.is_empty() && alt.is_empty(),
@ -126,20 +114,18 @@ impl Node {
Node::Document { children } Node::Document { children }
| Node::Heading { level: _, children } | Node::Heading { level: _, children }
| Node::Paragraph { children } | Node::Paragraph { children }
| Node::List { | Node::List { ordered: _, children}
ordered: _,
children,
}
| Node::ListItem { children } | Node::ListItem { children }
| Node::CodeBlock { children } | Node::CodeBlock { children }
| Node::BlockQuote { children } | Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => Some(&children), | Node::Link { href: _, children } => Some(&children),
// Inline Nodes // Inline Nodes
Node::Text { content: _ } | Node::Code { content: _ } => None, Node::Text { content: _ }
| Node::Bold { content: _ }
| Node::Italic { content: _ }
| Node::BoldItalic { content: _ }
| Node::Code { content: _ } => None,
// Special Nodes // Special Nodes
Node::Image { src: _, alt: _ } => None, Node::Image { src: _, alt: _ } => None,
@ -154,22 +140,18 @@ impl Node {
Node::Document { children } Node::Document { children }
| Node::Heading { level: _, children } | Node::Heading { level: _, children }
| Node::Paragraph { children } | Node::Paragraph { children }
| Node::List { | Node::List { ordered: _, children }
ordered: _,
children,
}
| Node::ListItem { children } | Node::ListItem { children }
| Node::CodeBlock { children } | Node::CodeBlock { children }
| Node::BlockQuote { children } | Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => children.push(child), | Node::Link { href: _, children } => children.push(child),
// Inline Nodes // Inline Nodes
Node::Text { content: _ } | Node::Code { content: _ } => { Node::Text { content: _ }
panic!("Can't add child to this node type.") | Node::Bold { content: _ }
} | Node::Italic { content: _ }
| Node::BoldItalic { content: _ }
| Node::Code { content: _ } => panic!("Can't add child to this node type."),
// Special Nodes // Special Nodes
Node::Image { src: _, alt: _ } => panic!("Can't add child to this node type."), Node::Image { src: _, alt: _ } => panic!("Can't add child to this node type."),
@ -183,45 +165,23 @@ mod node_tests {
#[test] #[test]
fn can_return_html_string_for_structure_nodes() { fn can_return_html_string_for_structure_nodes() {
let child = Node::Text { let child = Node::Text { content: "CONTENT".into() };
content: "CONTENT".into(),
};
// Document // Document
{ {
let node = Node::Document { let node = Node::Document { children: vec![child.clone()] };
children: vec![child.clone()],
};
let html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>\nCONTENT</body>\n</html>"; let html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>\nCONTENT</body>\n</html>";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// Heading // Heading
{ {
let node_h1 = Node::Heading { let node_h1 = Node::Heading { level: 1, children: vec![child.clone()] };
level: 1, let node_h2 = Node::Heading { level: 2, children: vec![child.clone()] };
children: vec![child.clone()], let node_h3 = Node::Heading { level: 3, children: vec![child.clone()] };
}; let node_h4 = Node::Heading { level: 4, children: vec![child.clone()] };
let node_h2 = Node::Heading { let node_h5 = Node::Heading { level: 5, children: vec![child.clone()] };
level: 2, let node_h6 = Node::Heading { level: 6, children: vec![child.clone()] };
children: vec![child.clone()],
};
let node_h3 = Node::Heading {
level: 3,
children: vec![child.clone()],
};
let node_h4 = Node::Heading {
level: 4,
children: vec![child.clone()],
};
let node_h5 = Node::Heading {
level: 5,
children: vec![child.clone()],
};
let node_h6 = Node::Heading {
level: 6,
children: vec![child.clone()],
};
let html_h1 = "<h1>CONTENT</h1>\n"; let html_h1 = "<h1>CONTENT</h1>\n";
let html_h2 = "<h2>CONTENT</h2>\n"; let html_h2 = "<h2>CONTENT</h2>\n";
let html_h3 = "<h3>CONTENT</h3>\n"; let html_h3 = "<h3>CONTENT</h3>\n";
@ -238,23 +198,15 @@ mod node_tests {
// Paragraph // Paragraph
{ {
let node = Node::Paragraph { let node = Node::Paragraph { children: vec![child.clone()] };
children: vec![child.clone()],
};
let html = "<p>CONTENT</p>\n"; let html = "<p>CONTENT</p>\n";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// List // List
{ {
let node_ul = Node::List { let node_ul = Node::List { ordered: false, children: vec![child.clone()] };
ordered: false, let node_ol = Node::List { ordered: true, children: vec![child.clone()] };
children: vec![child.clone()],
};
let node_ol = Node::List {
ordered: true,
children: vec![child.clone()],
};
let html_ul = "<ul>CONTENT</ul>\n"; let html_ul = "<ul>CONTENT</ul>\n";
let html_ol = "<ol>CONTENT</ol>\n"; let html_ol = "<ol>CONTENT</ol>\n";
assert_eq!(node_ul.to_html(), html_ul); assert_eq!(node_ul.to_html(), html_ul);
@ -263,64 +215,28 @@ mod node_tests {
// List Item // List Item
{ {
let node = Node::ListItem { let node = Node::ListItem { children: vec![child.clone()] };
children: vec![child.clone()],
};
let html = "<li>CONTENT</li>\n"; let html = "<li>CONTENT</li>\n";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// CodeBlock // CodeBlock
{ {
let node = Node::CodeBlock { let node = Node::CodeBlock { children: vec![child.clone()] };
children: vec![child.clone()],
};
let html = "<code>CONTENT</code>\n"; let html = "<code>CONTENT</code>\n";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// BlockQuote // BlockQuote
{ {
let node = Node::BlockQuote { let node = Node::BlockQuote { children: vec![child.clone()] };
children: vec![child.clone()],
};
let html = "<blockquote>CONTENT</blockquote>\n"; let html = "<blockquote>CONTENT</blockquote>\n";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// Bold
{
let node = Node::Bold {
children: vec![child.clone()],
};
let html = "<strong>CONTENT</strong>";
assert_eq!(node.to_html(), html);
}
// Italic
{
let node = Node::Italic {
children: vec![child.clone()],
};
let html = "<em>CONTENT</em>";
assert_eq!(node.to_html(), html);
}
// BoldItalic
{
let node = Node::BoldItalic {
children: vec![child.clone()],
};
let html = "<strong><em>CONTENT</em></strong>";
assert_eq!(node.to_html(), html);
}
// Link // Link
{ {
let node = Node::Link { let node = Node::Link { href: "HREF".into(), children: vec![child.clone()]};
href: "HREF".into(),
children: vec![child.clone()],
};
let html = "<a href=\"HREF\">CONTENT</a>"; let html = "<a href=\"HREF\">CONTENT</a>";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
@ -330,18 +246,35 @@ mod node_tests {
fn can_return_html_string_for_inline_nodes() { fn can_return_html_string_for_inline_nodes() {
// Text // Text
{ {
let node = Node::Text { let node = Node::Text { content: "x".into() };
content: "x".into(),
};
let html = "x"; let html = "x";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
// Bold
{
let node = Node::Bold { content: "x".into() };
let html = "<strong>x</strong>";
assert_eq!(node.to_html(), html);
}
// Italic
{
let node = Node::Italic { content: "x".into() };
let html = "<em>x</em>";
assert_eq!(node.to_html(), html);
}
// BoldItalic
{
let node = Node::BoldItalic { content: "x".into() };
let html = "<strong><em>x</em></strong>";
assert_eq!(node.to_html(), html);
}
// Code // Code
{ {
let node = Node::Code { let node = Node::Code { content: "x".into() };
content: "x".into(),
};
let html = "<code>x</code>"; let html = "<code>x</code>";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
@ -350,10 +283,7 @@ mod node_tests {
#[test] #[test]
fn can_return_html_string_for_special_nodes() { fn can_return_html_string_for_special_nodes() {
{ {
let node = Node::Image { let node = Node::Image { src: "SOURCE".into(), alt: "ALT".into() };
src: "SOURCE".into(),
alt: "ALT".into(),
};
let html = "<img src=\"SOURCE\" alt=\"ALT\">\n"; let html = "<img src=\"SOURCE\" alt=\"ALT\">\n";
assert_eq!(node.to_html(), html); assert_eq!(node.to_html(), html);
} }
@ -437,47 +367,9 @@ mod node_tests {
assert!(!block_quote_node.is_empty()); assert!(!block_quote_node.is_empty());
assert!(block_quote_node_empty.is_empty()); assert!(block_quote_node_empty.is_empty());
// Bold
let bold_node = Node::Bold {
children: vec![Node::Text {
content: "bold".into(),
}],
};
let bold_node_empty = Node::Bold { children: vec![] };
assert!(!bold_node.is_empty());
assert!(bold_node_empty.is_empty());
// Italic
let italic_node = Node::Italic {
children: vec![Node::Text {
content: "italic".into(),
}],
};
let italic_node_empty = Node::Italic { children: vec![] };
assert!(!italic_node.is_empty());
assert!(italic_node_empty.is_empty());
// Bold Italic
let bold_italic_node = Node::BoldItalic {
children: vec![Node::Text {
content: "bold italic".into(),
}],
};
let bold_italic_node_empty = Node::BoldItalic { children: vec![] };
assert!(!bold_italic_node.is_empty());
assert!(bold_italic_node_empty.is_empty());
// Link // Link
let link_node = Node::Link { let link_node = Node::Link { href: "x".into(), children: vec![Node::Text{content: "link".into()}] };
href: "x".into(), let link_node_empty = Node::Link { href: "".into(), children: vec![] };
children: vec![Node::Text {
content: "link".into(),
}],
};
let link_node_empty = Node::Link {
href: "".into(),
children: vec![],
};
assert!(!link_node.is_empty()); assert!(!link_node.is_empty());
assert!(link_node_empty.is_empty()); assert!(link_node_empty.is_empty());
} }
@ -488,31 +380,53 @@ mod node_tests {
let text_node = Node::Text { let text_node = Node::Text {
content: "text".into(), content: "text".into(),
}; };
let bold_node = Node::Bold {
content: "bold".into(),
};
let italic_node = Node::Italic {
content: "italic".into(),
};
let bold_italic_node = Node::BoldItalic {
content: "both".into(),
};
let code_node = Node::Code { let code_node = Node::Code {
content: "code".into(), content: "code".into(),
}; };
assert!(!text_node.is_empty()); assert!(!text_node.is_empty());
assert!(!bold_node.is_empty());
assert!(!italic_node.is_empty());
assert!(!bold_italic_node.is_empty());
assert!(!code_node.is_empty()); assert!(!code_node.is_empty());
// empty // empty
let text_node_empty = Node::Text { content: "".into() }; let text_node_empty = Node::Text {
let code_node_empty = Node::Code { content: "".into() }; content: "".into(),
};
let bold_node_empty = Node::Bold {
content: "".into(),
};
let italic_node_empty = Node::Italic {
content: "".into(),
};
let bold_italic_node_empty = Node::BoldItalic {
content: "".into(),
};
let code_node_empty = Node::Code {
content: "".into(),
};
assert!(text_node_empty.is_empty()); assert!(text_node_empty.is_empty());
assert!(bold_node_empty.is_empty());
assert!(italic_node_empty.is_empty());
assert!(bold_italic_node_empty.is_empty());
assert!(code_node_empty.is_empty()); assert!(code_node_empty.is_empty());
} }
#[test] #[test]
fn can_return_when_empty_for_special_nodes() { fn can_return_when_empty_for_special_nodes() {
let image_node = Node::Image { let image_node = Node::Image { src: "x".into(), alt: "x".into() };
src: "x".into(), let image_node_empty = Node::Image { src: "".into(), alt: "".into() };
alt: "x".into(),
};
let image_node_empty = Node::Image {
src: "".into(),
alt: "".into(),
};
assert!(!image_node.is_empty()); assert!(!image_node.is_empty());
assert!(image_node_empty.is_empty()); assert!(image_node_empty.is_empty());
} }
@ -577,29 +491,6 @@ mod node_tests {
}; };
let bq_children = bq.children().expect("BlockQuote should have children"); let bq_children = bq.children().expect("BlockQuote should have children");
assert_eq!(bq_children.len(), 1); assert_eq!(bq_children.len(), 1);
// Bold
let bold = Node::Bold {
children: vec![child.clone()],
};
let bold_children = bold.children().expect("BlockQuote should have children");
assert_eq!(bold_children.len(), 1);
// Italic
let italic = Node::Italic {
children: vec![child.clone()],
};
let italic_children = italic.children().expect("BlockQuote should have children");
assert_eq!(italic_children.len(), 1);
// Bold Italic
let bold_italic = Node::BoldItalic {
children: vec![child.clone()],
};
let bold_italic_children = bold_italic
.children()
.expect("BlockQuote should have children");
assert_eq!(bold_italic_children.len(), 1);
} }
#[test] #[test]
@ -607,11 +498,23 @@ mod node_tests {
let text = Node::Text { let text = Node::Text {
content: "x".into(), content: "x".into(),
}; };
let bold = Node::Bold {
content: "x".into(),
};
let italic = Node::Italic {
content: "x".into(),
};
let bolditalic = Node::BoldItalic {
content: "x".into(),
};
let code = Node::Code { let code = Node::Code {
content: "x".into(), content: "x".into(),
}; };
assert!(text.children().is_none()); assert!(text.children().is_none());
assert!(bold.children().is_none());
assert!(italic.children().is_none());
assert!(bolditalic.children().is_none());
assert!(code.children().is_none()); assert!(code.children().is_none());
} }
@ -626,9 +529,7 @@ mod node_tests {
#[test] #[test]
fn add_child_succeeds_for_structure_nodes() { fn add_child_succeeds_for_structure_nodes() {
let child = Node::Text { let child = Node::Text { content: "x".into() };
content: "x".into(),
};
// Document // Document
{ {
@ -640,10 +541,7 @@ mod node_tests {
// Heading // Heading
{ {
let mut node = Node::Heading { let mut node = Node::Heading { level: 1, children: vec![] };
level: 1,
children: vec![],
};
node.add_child(child.clone()); node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0); let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Heading should have 1 child"); assert_eq!(len, 1, "Heading should have 1 child");
@ -659,10 +557,7 @@ mod node_tests {
// List // List
{ {
let mut node = Node::List { let mut node = Node::List { ordered: false, children: vec![] };
ordered: false,
children: vec![],
};
node.add_child(child.clone()); node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0); let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "List should have 1 child"); assert_eq!(len, 1, "List should have 1 child");
@ -691,37 +586,9 @@ mod node_tests {
let len = node.children().map(|c| c.len()).unwrap_or(0); let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BlockQuote should have 1 child"); assert_eq!(len, 1, "BlockQuote should have 1 child");
} }
// Bold
{
let mut node = Node::Bold { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Bold should have 1 child");
}
// Italic
{
let mut node = Node::Italic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Italic should have 1 child");
}
// Bold Italic
{
let mut node = Node::BoldItalic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BoldItalic should have 1 child");
}
// Link // Link
{ {
let mut node = Node::Link { let mut node = Node::Link { href: "x".into(), children: vec![] };
href: "x".into(),
children: vec![],
};
node.add_child(child.clone()); node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0); let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Link should have 1 child"); assert_eq!(len, 1, "Link should have 1 child");
@ -730,7 +597,7 @@ mod node_tests {
#[test] #[test]
#[should_panic(expected = "Can't add child to this node type.")] #[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_text_node() { fn add_child_panics_for_inline_nodes() {
let child = Node::Text { let child = Node::Text {
content: "x".into(), content: "x".into(),
}; };
@ -738,27 +605,29 @@ mod node_tests {
let mut text = Node::Text { let mut text = Node::Text {
content: "x".into(), content: "x".into(),
}; };
let mut bold = Node::Bold {
text.add_child(child.clone()); content: "x".into(),
} };
let mut italic = Node::Italic {
#[test] content: "x".into(),
#[should_panic(expected = "Can't add child to this node type.")] };
fn add_child_panics_for_code_node() { let mut bolditalic = Node::BoldItalic {
let child = Node::Text {
content: "x".into(), content: "x".into(),
}; };
let mut code = Node::Code { let mut code = Node::Code {
content: "x".into(), content: "x".into(),
}; };
text.add_child(child.clone());
bold.add_child(child.clone());
italic.add_child(child.clone());
bolditalic.add_child(child.clone());
code.add_child(child.clone()); code.add_child(child.clone());
} }
#[test] #[test]
#[should_panic(expected = "Can't add child to this node type.")] #[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_image_node() { fn add_child_panics_for_special_nodes() {
let child = Node::Text { let child = Node::Text {
content: "x".into(), content: "x".into(),
}; };

View File

@ -1,26 +1,32 @@
use std::iter::Peekable;
use std::str::Chars;
use crate::node::Node; use crate::node::Node;
#[derive(Debug)] #[derive(Debug)]
pub struct Parser<'a> { pub struct Parser {
content: &'a str, content: String,
position: usize,
} }
impl<'a> Parser<'a> { impl Parser {
// Content should be normalized before being passed into this function. Since we do not take /// Create a new parser object with the content attached. This does not take ownership of the
// ownership here, we cannot mutate it. /// string provided and therefore dies with the string. The input string is normalized to
pub fn new(content: &'a str) -> Self { /// support operation on all operating systems.
pub fn new(content: &str) -> Self {
let normalized = content.replace("\r\n", "\n").replace("\r", "");
Self { Self {
content, content: normalized,
position: 0,
} }
} }
pub fn parse_document(&mut self) -> Node { pub fn parse_document(&self) -> Node {
let mut stream = self.content.chars().peekable();
let mut root = Node::Document { children: vec![] }; let mut root = Node::Document { children: vec![] };
while !self.is_eof() { // Same as !IsEOF from the CPP implementation
let block = self.parse_block(); while stream.peek().is_some() {
let block = self.parse_block(&mut stream);
if !block.is_empty() { if !block.is_empty() {
root.add_child(block); root.add_child(block);
} }
@ -29,225 +35,178 @@ impl<'a> Parser<'a> {
root root
} }
// BUG: We should be using optional, not blank nodes /// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT
fn parse_block(&mut self) -> Node { fn parse_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
self.consume_whitespace(); self.consume_whitespace(stream);
// If we are at the end, return an empty node, the caller should know to return it. // If we are at the end, return an empty node; it will be ignored
if self.is_eof() { let Some(&c1) = stream.peek() else {
return Node::Text { content: "".into() }; return Node::Text { content: "".into() };
} };
let c1 = self.peek(); // Use a clone to look ahead
let c2 = self.peek_nth(1); let mut clone = stream.clone();
let c3 = self.peek_nth(2); clone.next();
// TODO: For now we are just implementing paragraphs. So we can start with inline parsing let c2 = clone.next();
let c3 = clone.next();
// Now we can handle numbers from 0 to 99 for ordered lists
match (c1, c2, c3) { match (c1, c2, c3) {
// (Some('#'), _, _) => self.parse_heading(), ('#', _, _) => self.parse_heading(stream),
_ => self.parse_paragraph(), ('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false),
(d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true),
(d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => {
self.parse_list(stream, true)
}
('`', Some('`'), Some('`')) => self.parse_code_block(stream),
('!', Some('['), _) => self.parse_image(stream),
('>', _, _) => self.parse_block_quote(stream),
_ => self.parse_paragraph(stream),
} }
} }
fn parse_heading(&mut self) -> Node { // --- STRUCTURE PARSING ---
fn parse_paragraph(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let children = self.parse_inline(stream);
Node::Paragraph { children }
}
fn parse_heading(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
// Consume the hashes to determine the size, then consume the whitespace
let hashes = self.consume_until_char(stream, ' ');
self.consume_whitespace(stream);
let children = self.parse_inline(stream);
dbg!(&children);
Node::Heading { Node::Heading {
level: 1, level: hashes.len(),
children: vec![], children,
} }
} }
fn parse_paragraph(&mut self) -> Node { fn parse_list(&self, stream: &mut Peekable<Chars<'_>>, ordered: bool) -> Node {
Node::Paragraph { Node::Text { content: "".into() }
children: self.parse_inline(), }
}
fn parse_block_quote(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_code_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
}
fn parse_image(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
Node::Text { content: "".into() }
} }
// --- INLINE PARSING --- // --- INLINE PARSING ---
fn parse_inline(&mut self) -> Vec<Node> { fn parse_inline(&self, stream: &mut Peekable<Chars<'_>>) -> Vec<Node> {
let mut nodes = vec![]; let mut nodes = vec![];
let mut str = "".to_string(); let mut str = String::new();
while !self.is_eof() { // use a clone to allow for peeking ahead
// c1 stores current char, c2/c3 store future, contextual chars // REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED
let c1 = self.peek(); let mut clone = stream.clone();
let c2 = self.peek_nth(1); clone.next(); // Stay one ahead
let c3 = self.peek_nth(2);
while let Some(&c1) = stream.peek() {
let c2 = clone.next();
let c3 = clone.next();
// println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~'));
// TODO: Need to redesign the nodes
// TODO: Support _ AND *
match (c1, c2, c3) { match (c1, c2, c3) {
(None, _, _) | (Some('\n'), Some('\n'), _) => break, ('\n', _, _) => break,
(Some('!'), Some('['), _) => ('!', Some('['), _) => { /* Image */ }
/* parse image */ ('[', _, _) => { /* Link */ }
{ ('*', Some('*'), Some('*')) => { /* Bold Italic */ }
continue; ('*', Some('*'), _) => {
} nodes.push(Node::Text { content: str });
(Some('['), _, _) => str = "".into();
/* parse link */ let node = self.parse_bold(stream);
{
continue;
}
(Some('*'), Some('*'), Some('*')) =>
/* parse bold italic */
{
continue;
}
(Some('*'), Some('*'), _) =>
/* parse bold */
{
continue;
}
(Some('*'), _, _) => {
nodes.push(Node::Text {
content: str.clone(),
});
str = "".to_string();
let node = self.parse_italic();
if !node.is_empty() { if !node.is_empty() {
nodes.push(node); nodes.push(node)
} }
continue; continue;
} }
(Some('`'), _, _) => ('*', _, _) => { /* Italic */ }
/* parse code */ ('`', _, _) => { /* Code */ }
{
continue;
}
(Some(c), _, _) => { _ => {
str.push(c); // Should we swap '\n' with ' '
self.consume(); str.push(c1);
stream.next();
clone.next();
} }
} }
} }
// TODO: Push text node // Push final node
nodes.push(Node::Text { content: str }); if !str.is_empty() {
nodes.push(Node::Text { content: str });
}
nodes nodes
} }
fn parse_italic(&mut self) -> Node { /// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR
let mut str = "".to_string(); /// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT
self.consume(); // Consume the '*' /// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE
/// STRONG BLOCK.
fn parse_bold(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
let mut str = String::new();
println!("'{}'", self.content); stream.next();
stream.next();
// Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to let mut clone = stream.clone();
// exit clone.next();
loop {
let c1 = self.peek(); while let Some(&c1) = stream.peek() {
let c2 = self.peek_nth(1); let c2 = clone.peek();
match (c1, c2) { match (c1, c2) {
(None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => { ('\n', Some('\n')) => break,
// In this case, we did not find an ending star, so we should return a normal ('*', Some('*')) => {
// node. But we have to add the star back since we consumed it already stream.next();
str.insert(0, '*'); stream.next();
return Node::Text { content: str };
}
(Some('*'), _) => {
self.consume();
break; break;
} }
(Some(c), _) => { _ => str.push(c1),
str.push(c);
self.consume();
}
} }
stream.next();
clone.next();
} }
Node::Italic { println!("@str '{}'", str);
children: vec![Node::Text { content: str }],
} Node::Bold { content: str }
} }
// --- HELPERS --- // --- HELPER FUNCTIONS ---
fn is_eof(&self) -> bool { fn consume_whitespace(&self, stream: &mut Peekable<Chars<'_>>) {
self.position >= self.content.len() while let Some(&c) = stream.peek() {
}
fn peek(&self) -> Option<char> {
self.peek_nth(0)
}
fn peek_nth(&self, n: usize) -> Option<char> {
self.content[self.position..].chars().nth(n)
}
fn consume(&mut self) {
self.consume_n(1)
}
fn consume_n(&mut self, n: usize) {
for _ in 0..n {
if let Some(c) = self.content[self.position..].chars().next() {
self.position += c.len_utf8();
} else {
break;
}
}
}
fn consume_whitespace(&mut self) {
while let Some(c) = self.peek() {
if !c.is_whitespace() { if !c.is_whitespace() {
break; break;
} }
self.consume(); stream.next();
} }
} }
}
#[cfg(test)] /// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL
mod parser_tests { /// ITSELF
use super::Parser; fn consume_until_char(&self, stream: &mut Peekable<Chars<'_>>, target: char) -> String {
let mut out = String::new();
#[test] while let Some(&c) = stream.peek() {
fn test_parse_italic() { if c == target {
// This test only tests the `parse_italic` method, so it is expected that the first break;
// character is a '*', otherwise the first character will be consumed. The `parse_inline` }
// tests will be able to handle more specific cases. stream.next();
{ out.push(c);
let s = "*hello world*";
let html = "<em>hello world</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello* world";
let html = "<em>hello</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello\n\nworld*";
let html = "*hello";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
} }
out
} }
} }

View File

@ -3,16 +3,13 @@ use transpiler::parser::Parser;
pub fn main() -> Result<(), Box<dyn std::error::Error>> { pub fn main() -> Result<(), Box<dyn std::error::Error>> {
let file = Filesystem::read_file("./test.md"); let file = Filesystem::read_file("./test.md");
let mut content; let content;
match file { match file {
Ok(s) => content = s, Ok(s) => content = s,
Err(err) => panic!("Failed to read file. {}", err), Err(err) => panic!("Failed to read file. {}", err),
} }
// Normalize char stream let parser = Parser::new(&content);
content = content.replace("\r\n", "\n").replace("\r", "");
let mut parser = Parser::new(&content);
let node = parser.parse_document(); let node = parser.parse_document();
match Filesystem::write_file("./output.html", &node.to_html()) { match Filesystem::write_file("./output.html", &node.to_html()) {

View File

@ -1,8 +1 @@
hello *world*. This is pre*tty* cool # h1, **this** is sick as fuck
What about this
This *should have a star