(FEAT): parse_italic implemented.
But now I have realized that the parser should work differently and with mutual recursion. The "inline" nodes (except the text node) should all have children.
This commit is contained in:
parent
3c25e6b9e8
commit
79633bd059
@ -12,6 +12,8 @@ pub enum Node {
|
|||||||
|
|
||||||
// Inline Nodes
|
// Inline Nodes
|
||||||
Text { content: String },
|
Text { content: String },
|
||||||
|
|
||||||
|
// TODO: THESE SHOULD BE STRUCTURE NODES, SO THEY CAN BE CALLED RECURSIVELY
|
||||||
Bold { content: String },
|
Bold { content: String },
|
||||||
Italic { content: String },
|
Italic { content: String },
|
||||||
BoldItalic { content: String },
|
BoldItalic { content: String },
|
||||||
|
|||||||
328
lib/parser.rs
328
lib/parser.rs
@ -1,32 +1,26 @@
|
|||||||
use std::iter::Peekable;
|
|
||||||
use std::str::Chars;
|
|
||||||
|
|
||||||
use crate::node::Node;
|
use crate::node::Node;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Parser {
|
pub struct Parser<'a> {
|
||||||
content: String,
|
content: &'a str,
|
||||||
|
position: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser {
|
impl<'a> Parser<'a> {
|
||||||
/// Create a new parser object with the content attached. This does not take ownership of the
|
// Content should be normalized before being passed into this function. Since we do not take
|
||||||
/// string provided and therefore dies with the string. The input string is normalized to
|
// ownership here, we cannot mutate it.
|
||||||
/// support operation on all operating systems.
|
pub fn new(content: &'a str) -> Self {
|
||||||
pub fn new(content: &str) -> Self {
|
|
||||||
let normalized = content.replace("\r\n", "\n").replace("\r", "");
|
|
||||||
Self {
|
Self {
|
||||||
content: normalized,
|
content,
|
||||||
|
position: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_document(&self) -> Node {
|
pub fn parse_document(&mut self) -> Node {
|
||||||
let mut stream = self.content.chars().peekable();
|
|
||||||
|
|
||||||
let mut root = Node::Document { children: vec![] };
|
let mut root = Node::Document { children: vec![] };
|
||||||
|
|
||||||
// Same as !IsEOF from the CPP implementation
|
while !self.is_eof() {
|
||||||
while stream.peek().is_some() {
|
let block = self.parse_block();
|
||||||
let block = self.parse_block(&mut stream);
|
|
||||||
if !block.is_empty() {
|
if !block.is_empty() {
|
||||||
root.add_child(block);
|
root.add_child(block);
|
||||||
}
|
}
|
||||||
@ -35,178 +29,222 @@ impl Parser {
|
|||||||
root
|
root
|
||||||
}
|
}
|
||||||
|
|
||||||
/// BUG: USING CLONES IS FUCKED AS HELL, STOP THIS SHIT
|
// BUG: We should be using optional, not blank nodes
|
||||||
fn parse_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
fn parse_block(&mut self) -> Node {
|
||||||
self.consume_whitespace(stream);
|
self.consume_whitespace();
|
||||||
|
|
||||||
// If we are at the end, return an empty node; it will be ignored
|
// If we are at the end, return an empty node, the caller should know to return it.
|
||||||
let Some(&c1) = stream.peek() else {
|
if self.is_eof() {
|
||||||
return Node::Text { content: "".into() };
|
return Node::Text { content: "".into() };
|
||||||
};
|
}
|
||||||
|
|
||||||
// Use a clone to look ahead
|
let c1 = self.peek();
|
||||||
let mut clone = stream.clone();
|
let c2 = self.peek_nth(1);
|
||||||
clone.next();
|
let c3 = self.peek_nth(2);
|
||||||
|
|
||||||
let c2 = clone.next();
|
// TODO: For now we are just implementing paragraphs. So we can start with inline parsing
|
||||||
let c3 = clone.next();
|
|
||||||
|
|
||||||
// Now we can handle numbers from 0 to 99 for ordered lists
|
|
||||||
match (c1, c2, c3) {
|
match (c1, c2, c3) {
|
||||||
('#', _, _) => self.parse_heading(stream),
|
// (Some('#'), _, _) => self.parse_heading(),
|
||||||
('*' | '-' | '+', Some(' ' | '\t'), _) => self.parse_list(stream, false),
|
_ => self.parse_paragraph(),
|
||||||
(d, Some('.'), _) if d.is_ascii_digit() => self.parse_list(stream, true),
|
|
||||||
(d1, Some(d2), Some('.')) if d1.is_ascii_digit() && d2.is_ascii_digit() => {
|
|
||||||
self.parse_list(stream, true)
|
|
||||||
}
|
|
||||||
('`', Some('`'), Some('`')) => self.parse_code_block(stream),
|
|
||||||
('!', Some('['), _) => self.parse_image(stream),
|
|
||||||
('>', _, _) => self.parse_block_quote(stream),
|
|
||||||
_ => self.parse_paragraph(stream),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- STRUCTURE PARSING ---
|
fn parse_heading(&mut self) -> Node {
|
||||||
fn parse_paragraph(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
let children = self.parse_inline(stream);
|
|
||||||
Node::Paragraph { children }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_heading(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
// Consume the hashes to determine the size, then consume the whitespace
|
|
||||||
let hashes = self.consume_until_char(stream, ' ');
|
|
||||||
self.consume_whitespace(stream);
|
|
||||||
|
|
||||||
let children = self.parse_inline(stream);
|
|
||||||
dbg!(&children);
|
|
||||||
Node::Heading {
|
Node::Heading {
|
||||||
level: hashes.len(),
|
level: 1,
|
||||||
children,
|
children: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_list(&self, stream: &mut Peekable<Chars<'_>>, ordered: bool) -> Node {
|
fn parse_paragraph(&mut self) -> Node {
|
||||||
Node::Text { content: "".into() }
|
Node::Paragraph {
|
||||||
}
|
children: self.parse_inline(),
|
||||||
|
}
|
||||||
fn parse_block_quote(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
Node::Text { content: "".into() }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_code_block(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
Node::Text { content: "".into() }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_image(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
Node::Text { content: "".into() }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- INLINE PARSING ---
|
// --- INLINE PARSING ---
|
||||||
fn parse_inline(&self, stream: &mut Peekable<Chars<'_>>) -> Vec<Node> {
|
fn parse_inline(&mut self) -> Vec<Node> {
|
||||||
let mut nodes = vec![];
|
let mut nodes = vec![];
|
||||||
let mut str = String::new();
|
let mut str = "".to_string();
|
||||||
|
|
||||||
// use a clone to allow for peeking ahead
|
while !self.is_eof() {
|
||||||
// REMEMBER TO ALSO CONSUME ANYTIME MAIN STREAM IS CONSUMED
|
// c1 stores current char, c2/c3 store future, contextual chars
|
||||||
let mut clone = stream.clone();
|
let c1 = self.peek();
|
||||||
clone.next(); // Stay one ahead
|
let c2 = self.peek_nth(1);
|
||||||
|
let c3 = self.peek_nth(2);
|
||||||
while let Some(&c1) = stream.peek() {
|
|
||||||
let c2 = clone.next();
|
|
||||||
let c3 = clone.next();
|
|
||||||
|
|
||||||
// println!("({}, {}, {})", c1, c2.unwrap_or('~'), c3.unwrap_or('~'));
|
|
||||||
|
|
||||||
|
// TODO: Need to redesign the nodes
|
||||||
match (c1, c2, c3) {
|
match (c1, c2, c3) {
|
||||||
('\n', _, _) => break,
|
(None, _, _) | (Some('\n'), Some('\n'), _) => break,
|
||||||
('!', Some('['), _) => { /* Image */ }
|
(Some('!'), Some('['), _) =>
|
||||||
('[', _, _) => { /* Link */ }
|
/* parse image */
|
||||||
('*', Some('*'), Some('*')) => { /* Bold Italic */ }
|
{
|
||||||
('*', Some('*'), _) => {
|
continue;
|
||||||
nodes.push(Node::Text { content: str });
|
}
|
||||||
str = "".into();
|
(Some('['), _, _) =>
|
||||||
let node = self.parse_bold(stream);
|
/* parse link */
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
(Some('*'), Some('*'), Some('*')) =>
|
||||||
|
/* parse bold italic */
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
(Some('*'), Some('*'), _) =>
|
||||||
|
/* parse bold */
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
(Some('*'), _, _) => {
|
||||||
|
nodes.push(Node::Text {
|
||||||
|
content: str.clone(),
|
||||||
|
});
|
||||||
|
str = "".to_string();
|
||||||
|
let node = self.parse_italic();
|
||||||
if !node.is_empty() {
|
if !node.is_empty() {
|
||||||
nodes.push(node)
|
nodes.push(node);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
('*', _, _) => { /* Italic */ }
|
(Some('`'), _, _) =>
|
||||||
('`', _, _) => { /* Code */ }
|
/* parse code */
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
_ => {
|
(Some(c), _, _) => {
|
||||||
// Should we swap '\n' with ' '
|
str.push(c);
|
||||||
str.push(c1);
|
self.consume();
|
||||||
stream.next();
|
|
||||||
clone.next();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push final node
|
// TODO: Push text node
|
||||||
if !str.is_empty() {
|
nodes.push(Node::Text { content: str });
|
||||||
nodes.push(Node::Text { content: str });
|
|
||||||
}
|
|
||||||
nodes
|
nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
/// BUG: THIS FUNCTION SHOULD PARSE UNTIL IT FINDS EITHER THE ** OR AN ENDING OF A BLOCK. FOR
|
fn parse_italic(&mut self) -> Node {
|
||||||
/// EXAMPLE: \n\n IS A NEW BLOCK AND THEN IT SHOULD END. BUT IF IT ENDS ON A NEW BLOCK, IT
|
let mut str = "".to_string();
|
||||||
/// SHOULD RETURN A TEXT NODE, WITH THE ** PREPENDED, SIGNIFYING FAILURE TO COMPLETE THE ENTIRE
|
self.consume(); // Consume the '*'
|
||||||
/// STRONG BLOCK.
|
|
||||||
fn parse_bold(&self, stream: &mut Peekable<Chars<'_>>) -> Node {
|
|
||||||
let mut str = String::new();
|
|
||||||
|
|
||||||
stream.next();
|
println!("'{}'", self.content);
|
||||||
stream.next();
|
|
||||||
|
|
||||||
let mut clone = stream.clone();
|
// Use loop instead of 'while !self.is_eof()' so we can make it to the (None, _) case to
|
||||||
clone.next();
|
// exit
|
||||||
|
loop {
|
||||||
while let Some(&c1) = stream.peek() {
|
let c1 = self.peek();
|
||||||
let c2 = clone.peek();
|
let c2 = self.peek_nth(1);
|
||||||
|
|
||||||
match (c1, c2) {
|
match (c1, c2) {
|
||||||
('\n', Some('\n')) => break,
|
(None, _) | (Some('\n'), None) | (Some('\n'), Some('\n')) => {
|
||||||
('*', Some('*')) => {
|
// In this case, we did not find an ending star, so we should return a normal
|
||||||
stream.next();
|
// node. But we have to add the star back since we consumed it already
|
||||||
stream.next();
|
str.insert(0, '*');
|
||||||
|
return Node::Text { content: str };
|
||||||
|
}
|
||||||
|
(Some('*'), _) => {
|
||||||
|
self.consume();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ => str.push(c1),
|
(Some(c), _) => {
|
||||||
|
str.push(c);
|
||||||
|
self.consume();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
stream.next();
|
|
||||||
clone.next();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("@str '{}'", str);
|
Node::Italic { content: str }
|
||||||
|
|
||||||
Node::Bold { content: str }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- HELPER FUNCTIONS ---
|
// --- HELPERS ---
|
||||||
fn consume_whitespace(&self, stream: &mut Peekable<Chars<'_>>) {
|
fn is_eof(&self) -> bool {
|
||||||
while let Some(&c) = stream.peek() {
|
self.position >= self.content.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(&self) -> Option<char> {
|
||||||
|
self.peek_nth(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek_nth(&self, n: usize) -> Option<char> {
|
||||||
|
self.content[self.position..].chars().nth(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume(&mut self) {
|
||||||
|
self.consume_n(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_n(&mut self, n: usize) {
|
||||||
|
for _ in 0..n {
|
||||||
|
if let Some(c) = self.content[self.position..].chars().next() {
|
||||||
|
self.position += c.len_utf8();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_whitespace(&mut self) {
|
||||||
|
while let Some(c) = self.peek() {
|
||||||
if !c.is_whitespace() {
|
if !c.is_whitespace() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
stream.next();
|
self.consume();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod parser_tests {
|
||||||
|
use super::Parser;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_italic() {
|
||||||
|
// This test only tests the `parse_italic` method, so it is expected that the first
|
||||||
|
// character is a '*', otherwise the first character will be consumed. The `parse_inline`
|
||||||
|
// tests will be able to handle more specific cases.
|
||||||
|
{
|
||||||
|
let s = "*hello world*";
|
||||||
|
let html = "<em>hello world</em>";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let s = "*hello* world";
|
||||||
|
let html = "<em>hello</em>";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let s = "*hello world";
|
||||||
|
let html = "*hello world";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let s = "*hello world\n";
|
||||||
|
let html = "*hello world";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let s = "*hello world\n\n";
|
||||||
|
let html = "*hello world";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let s = "*hello\n\nworld*";
|
||||||
|
let html = "*hello";
|
||||||
|
let mut p = Parser::new(s);
|
||||||
|
let node = p.parse_italic();
|
||||||
|
assert_eq!(node.to_html(), html);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// THIS DOES NOT CONSUME THE TARGET, IT STOPS RIGHT BEFORE IT AND RETURNS THE STRING UNTIL
|
|
||||||
/// ITSELF
|
|
||||||
fn consume_until_char(&self, stream: &mut Peekable<Chars<'_>>, target: char) -> String {
|
|
||||||
let mut out = String::new();
|
|
||||||
|
|
||||||
while let Some(&c) = stream.peek() {
|
|
||||||
if c == target {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stream.next();
|
|
||||||
out.push(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
out
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,13 +3,16 @@ use transpiler::parser::Parser;
|
|||||||
|
|
||||||
pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
pub fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let file = Filesystem::read_file("./test.md");
|
let file = Filesystem::read_file("./test.md");
|
||||||
let content;
|
let mut content;
|
||||||
match file {
|
match file {
|
||||||
Ok(s) => content = s,
|
Ok(s) => content = s,
|
||||||
Err(err) => panic!("Failed to read file. {}", err),
|
Err(err) => panic!("Failed to read file. {}", err),
|
||||||
}
|
}
|
||||||
|
|
||||||
let parser = Parser::new(&content);
|
// Normalize char stream
|
||||||
|
content = content.replace("\r\n", "\n").replace("\r", "");
|
||||||
|
|
||||||
|
let mut parser = Parser::new(&content);
|
||||||
let node = parser.parse_document();
|
let node = parser.parse_document();
|
||||||
|
|
||||||
match Filesystem::write_file("./output.html", &node.to_html()) {
|
match Filesystem::write_file("./output.html", &node.to_html()) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user