Compare commits

..

12 Commits

Author SHA1 Message Date
Hayden Hargreaves
7f8c978e56 This is going on hold. Its too complicated for my bad rust skills. 2025-11-27 13:52:44 -07:00
Hayden Hargreaves
9f2595faeb (FIX): Fixed the inline node issue.
This does not include parse fixes, just compile-required fixes.
2025-11-27 11:57:28 -07:00
Hayden Hargreaves
79633bd059 (FEAT): parse_italic implemented.
But now I have realized that the parser should work differently and with
mutual recursion. The "inline" nodes (except the text node) should all
have children.
2025-11-27 11:37:06 -07:00
Hayden Hargreaves
3c25e6b9e8 This stuff sucks actually, clones are annoying as hell.
This peekable class might not work, it lowkey sucks.
2025-11-25 20:08:13 -07:00
Hayden Hargreaves
bacc17ab7d (TEST): Finished tests for the node type 2025-11-25 16:42:04 -07:00
Hayden Hargreaves
c2c9335f92 (TEST): Working on node tests 2025-11-23 18:28:11 -07:00
Hayden Hargreaves
d43285bd63 (TEST): Started tests on nodes 2025-11-23 17:47:19 -07:00
Hayden Hargreaves
929c569a18 (TEST): Filesystem tests and cleaned up 2025-11-23 17:45:27 -07:00
Hayden Hargreaves
67542b8153 (FEAT): Migrated files using crates 2025-11-23 17:32:40 -07:00
Hayden Hargreaves
ee616a2cc7 (FIX): moving to desktop 2025-11-18 21:05:36 -07:00
Hayden Hargreaves
0d693fc728 Yeah, this, this is magic 2025-11-17 22:36:28 -07:00
Hayden Hargreaves
ed6e217ae8 Holy mother of femboys, I love rust. 2025-11-17 22:33:37 -07:00
15 changed files with 1706 additions and 29 deletions

2
.gitignore vendored
View File

@ -4,3 +4,5 @@
parser parser
/.vscode /.vscode
/*.html /*.html
/target
/target/*

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "MarkdownToHtmlTranspiler"
version = "0.1.0"

11
Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "MarkdownToHtmlTranspiler"
version = "0.1.0"
edition = "2024"
[dependencies]
[lib]
name = "transpiler"
path = "lib/mod.rs"

View File

@ -20,6 +20,10 @@
gcc gcc
gdb gdb
stdenv stdenv
rustup
rustc
cargo
]; ];
# Define the shell that will be executed. # Define the shell that will be executed.

49
lib/filesystem.rs Normal file
View File

@ -0,0 +1,49 @@
use std::io::Write;
use std::{fs, io};
use std::path::Path;
pub struct Filesystem;
impl Filesystem {
/// This uses a generic: `<P: AsRef<Path>>` to allow for easy use with any type that can be
/// converted into a &Path. Allowing String types, &str types and anything that can be
/// converted into a Path type, and then borrowed. There is no run time cost of doing this
/// either, so it is very fast and very elegant.
pub fn read_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
fs::read_to_string(path)
}
pub fn write_file<P: AsRef<Path>>(path: P, contents: &str) -> io::Result<()> {
let mut file = fs::File::create(path)?;
file.write_all(contents.as_bytes())
}
}
#[cfg(test)]
mod filesystem_tests {
use std::fs;
use super::Filesystem;
#[test]
fn reads_file() {
match Filesystem::read_file("./test/filesystem_test.md") {
Ok(s) => assert_eq!(s, "DO NOT DELETE. Used in filesystem.rs tests.\n"),
Err(err) => unreachable!("{}", err)
}
}
#[test]
fn writes_file() {
let path = String::from("./test/filesystem_test_output.md");
let content = String::from("TESTING OUTPUT");
match Filesystem::write_file(&path, &content) {
Ok(_) => {
match fs::read_to_string(&path) {
Ok(s) => assert_eq!(s, content),
Err(err) => unreachable!("{}", err),
}
},
Err(err) => unreachable!("{}", err)
}
}
}

3
lib/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod node;
pub mod parser;
pub mod filesystem;

772
lib/node.rs Normal file
View File

@ -0,0 +1,772 @@
#[derive(Debug, Clone)]
pub enum Node {
// Structure Nodes
Document { children: Vec<Node> },
Heading { level: usize, children: Vec<Node> },
Paragraph { children: Vec<Node> },
List { ordered: bool, children: Vec<Node> },
ListItem { children: Vec<Node> },
CodeBlock { children: Vec<Node> },
BlockQuote { children: Vec<Node> },
Link { href: String, children: Vec<Node> },
Bold { children: Vec<Node> },
Italic { children: Vec<Node> },
BoldItalic { children: Vec<Node> },
// Inline Nodes
Text { content: String },
Code { content: String },
// Special Nodes
Image { src: String, alt: String },
}
impl Node {
/// Recursively convert a node into a HTML string. This is used to generate the DOM output
/// tree. This should only be called on the root node of the tree. This function will recursively
/// call itself to create the entire tree.
///
/// Currently, this function does not create indentation, that would be a nice touch though.
pub fn to_html(&self) -> String {
match self {
// Structure nodes
Node::Document { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!(
"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>\n{}</body>\n</html>",
inner
)
}
Node::Heading { level, children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<h{level}>{}</h{level}>\n", inner, level = level)
}
Node::Paragraph { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<p>{}</p>\n", inner)
}
Node::List { ordered, children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
let tag = if *ordered { "ol" } else { "ul" };
format!("<{tag}>{}</{tag}>\n", inner, tag = tag)
}
Node::ListItem { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<li>{}</li>\n", inner)
}
Node::CodeBlock { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<code>{}</code>\n", inner)
}
Node::BlockQuote { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<blockquote>{}</blockquote>\n", inner)
}
Node::Link { href, children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<a href=\"{}\">{}</a>", href, inner)
}
Node::Bold { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong>{}</strong>", inner)
}
Node::Italic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<em>{}</em>", inner)
}
Node::BoldItalic { children } => {
let inner = children.iter().map(|x| x.to_html()).collect::<String>();
format!("<strong><em>{}</em></strong>", inner)
}
// Inline nodes
Node::Text { content } => format!("{}", content),
Node::Code { content } => format!("<code>{}</code>", content),
// Special nodes
Node::Image { src, alt } => format!("<img src=\"{}\" alt=\"{}\">\n", src, alt),
}
}
/// Determines if a node is empty. For structure nodes (those with `children`) this will be
/// true when there are no elements in the list. For inline nodes (those without `children`)
/// this will be true when the string content of all fields are blank.
pub fn is_empty(&self) -> bool {
match self {
// Structure nodes
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children } => children.is_empty(),
Node::Link { href, children } => children.is_empty() && href.is_empty(),
// Inline nodes
Node::Text { content } | Node::Code { content } => content.is_empty(),
// Special rules
Node::Image { src, alt } => src.is_empty() && alt.is_empty(),
}
}
/// Returns Some children if they exist, otherwise None will be returned. For nodes that do not
/// have children, None will be returned.
pub fn children(&self) -> Option<&[Node]> {
match self {
// Structure Nodes
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => Some(&children),
// Inline Nodes
Node::Text { content: _ } | Node::Code { content: _ } => None,
// Special Nodes
Node::Image { src: _, alt: _ } => None,
}
}
/// Add a child to the back of the list of children. If the node is a type which does not allow
/// children to be added, this function will panic.
pub fn add_child(&mut self, child: Node) {
match self {
// Structure Nodes
Node::Document { children }
| Node::Heading { level: _, children }
| Node::Paragraph { children }
| Node::List {
ordered: _,
children,
}
| Node::ListItem { children }
| Node::CodeBlock { children }
| Node::BlockQuote { children }
| Node::Bold { children }
| Node::Italic { children }
| Node::BoldItalic { children }
| Node::Link { href: _, children } => children.push(child),
// Inline Nodes
Node::Text { content: _ } | Node::Code { content: _ } => {
panic!("Can't add child to this node type.")
}
// Special Nodes
Node::Image { src: _, alt: _ } => panic!("Can't add child to this node type."),
};
}
}
#[cfg(test)]
mod node_tests {
use super::Node;
#[test]
fn can_return_html_string_for_structure_nodes() {
let child = Node::Text {
content: "CONTENT".into(),
};
// Document
{
let node = Node::Document {
children: vec![child.clone()],
};
let html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>\nCONTENT</body>\n</html>";
assert_eq!(node.to_html(), html);
}
// Heading
{
let node_h1 = Node::Heading {
level: 1,
children: vec![child.clone()],
};
let node_h2 = Node::Heading {
level: 2,
children: vec![child.clone()],
};
let node_h3 = Node::Heading {
level: 3,
children: vec![child.clone()],
};
let node_h4 = Node::Heading {
level: 4,
children: vec![child.clone()],
};
let node_h5 = Node::Heading {
level: 5,
children: vec![child.clone()],
};
let node_h6 = Node::Heading {
level: 6,
children: vec![child.clone()],
};
let html_h1 = "<h1>CONTENT</h1>\n";
let html_h2 = "<h2>CONTENT</h2>\n";
let html_h3 = "<h3>CONTENT</h3>\n";
let html_h4 = "<h4>CONTENT</h4>\n";
let html_h5 = "<h5>CONTENT</h5>\n";
let html_h6 = "<h6>CONTENT</h6>\n";
assert_eq!(node_h1.to_html(), html_h1);
assert_eq!(node_h2.to_html(), html_h2);
assert_eq!(node_h3.to_html(), html_h3);
assert_eq!(node_h4.to_html(), html_h4);
assert_eq!(node_h5.to_html(), html_h5);
assert_eq!(node_h6.to_html(), html_h6);
}
// Paragraph
{
let node = Node::Paragraph {
children: vec![child.clone()],
};
let html = "<p>CONTENT</p>\n";
assert_eq!(node.to_html(), html);
}
// List
{
let node_ul = Node::List {
ordered: false,
children: vec![child.clone()],
};
let node_ol = Node::List {
ordered: true,
children: vec![child.clone()],
};
let html_ul = "<ul>CONTENT</ul>\n";
let html_ol = "<ol>CONTENT</ol>\n";
assert_eq!(node_ul.to_html(), html_ul);
assert_eq!(node_ol.to_html(), html_ol);
}
// List Item
{
let node = Node::ListItem {
children: vec![child.clone()],
};
let html = "<li>CONTENT</li>\n";
assert_eq!(node.to_html(), html);
}
// CodeBlock
{
let node = Node::CodeBlock {
children: vec![child.clone()],
};
let html = "<code>CONTENT</code>\n";
assert_eq!(node.to_html(), html);
}
// BlockQuote
{
let node = Node::BlockQuote {
children: vec![child.clone()],
};
let html = "<blockquote>CONTENT</blockquote>\n";
assert_eq!(node.to_html(), html);
}
// Bold
{
let node = Node::Bold {
children: vec![child.clone()],
};
let html = "<strong>CONTENT</strong>";
assert_eq!(node.to_html(), html);
}
// Italic
{
let node = Node::Italic {
children: vec![child.clone()],
};
let html = "<em>CONTENT</em>";
assert_eq!(node.to_html(), html);
}
// BoldItalic
{
let node = Node::BoldItalic {
children: vec![child.clone()],
};
let html = "<strong><em>CONTENT</em></strong>";
assert_eq!(node.to_html(), html);
}
// Link
{
let node = Node::Link {
href: "HREF".into(),
children: vec![child.clone()],
};
let html = "<a href=\"HREF\">CONTENT</a>";
assert_eq!(node.to_html(), html);
}
}
#[test]
fn can_return_html_string_for_inline_nodes() {
// Text
{
let node = Node::Text {
content: "x".into(),
};
let html = "x";
assert_eq!(node.to_html(), html);
}
// Code
{
let node = Node::Code {
content: "x".into(),
};
let html = "<code>x</code>";
assert_eq!(node.to_html(), html);
}
}
#[test]
fn can_return_html_string_for_special_nodes() {
{
let node = Node::Image {
src: "SOURCE".into(),
alt: "ALT".into(),
};
let html = "<img src=\"SOURCE\" alt=\"ALT\">\n";
assert_eq!(node.to_html(), html);
}
}
#[test]
fn can_return_when_empty_for_structure_nodes() {
// Document
let document_node = Node::Document {
children: vec![Node::Paragraph { children: vec![] }],
};
let document_node_empty = Node::Document { children: vec![] };
assert!(!document_node.is_empty());
assert!(document_node_empty.is_empty());
// Heading
let heading_node = Node::Heading {
level: 1,
children: vec![Node::Paragraph { children: vec![] }],
};
let heading_node_empty = Node::Heading {
level: 1,
children: vec![],
};
assert!(!heading_node.is_empty());
assert!(heading_node_empty.is_empty());
// Paragraph
let paragraph_node = Node::Paragraph {
children: vec![Node::Text {
content: "hello".into(),
}],
};
let paragraph_node_empty = Node::Paragraph { children: vec![] };
assert!(!paragraph_node.is_empty());
assert!(paragraph_node_empty.is_empty());
// List
let list_node = Node::List {
ordered: false,
children: vec![Node::ListItem {
children: vec![Node::Text {
content: "item".into(),
}],
}],
};
let list_node_empty = Node::List {
ordered: false,
children: vec![],
};
assert!(!list_node.is_empty());
assert!(list_node_empty.is_empty());
// ListItem
let list_item_node = Node::ListItem {
children: vec![Node::Text {
content: "item".into(),
}],
};
let list_item_node_empty = Node::ListItem { children: vec![] };
assert!(!list_item_node.is_empty());
assert!(list_item_node_empty.is_empty());
// CodeBlock
let code_block_node = Node::CodeBlock {
children: vec![Node::Text {
content: "code".into(),
}],
};
let code_block_node_empty = Node::CodeBlock { children: vec![] };
assert!(!code_block_node.is_empty());
assert!(code_block_node_empty.is_empty());
// BlockQuote
let block_quote_node = Node::BlockQuote {
children: vec![Node::Text {
content: "quote".into(),
}],
};
let block_quote_node_empty = Node::BlockQuote { children: vec![] };
assert!(!block_quote_node.is_empty());
assert!(block_quote_node_empty.is_empty());
// Bold
let bold_node = Node::Bold {
children: vec![Node::Text {
content: "bold".into(),
}],
};
let bold_node_empty = Node::Bold { children: vec![] };
assert!(!bold_node.is_empty());
assert!(bold_node_empty.is_empty());
// Italic
let italic_node = Node::Italic {
children: vec![Node::Text {
content: "italic".into(),
}],
};
let italic_node_empty = Node::Italic { children: vec![] };
assert!(!italic_node.is_empty());
assert!(italic_node_empty.is_empty());
// Bold Italic
let bold_italic_node = Node::BoldItalic {
children: vec![Node::Text {
content: "bold italic".into(),
}],
};
let bold_italic_node_empty = Node::BoldItalic { children: vec![] };
assert!(!bold_italic_node.is_empty());
assert!(bold_italic_node_empty.is_empty());
// Link
let link_node = Node::Link {
href: "x".into(),
children: vec![Node::Text {
content: "link".into(),
}],
};
let link_node_empty = Node::Link {
href: "".into(),
children: vec![],
};
assert!(!link_node.is_empty());
assert!(link_node_empty.is_empty());
}
#[test]
fn can_return_when_empty_for_inline_nodes() {
// non-empty
let text_node = Node::Text {
content: "text".into(),
};
let code_node = Node::Code {
content: "code".into(),
};
assert!(!text_node.is_empty());
assert!(!code_node.is_empty());
// empty
let text_node_empty = Node::Text { content: "".into() };
let code_node_empty = Node::Code { content: "".into() };
assert!(text_node_empty.is_empty());
assert!(code_node_empty.is_empty());
}
#[test]
fn can_return_when_empty_for_special_nodes() {
let image_node = Node::Image {
src: "x".into(),
alt: "x".into(),
};
let image_node_empty = Node::Image {
src: "".into(),
alt: "".into(),
};
assert!(!image_node.is_empty());
assert!(image_node_empty.is_empty());
}
#[test]
fn children_returns_some_for_structure_nodes() {
let child = Node::Text {
content: "x".into(),
};
// Document
let doc = Node::Document {
children: vec![child.clone()],
};
let doc_children = doc.children().expect("Document should have children");
assert_eq!(doc_children.len(), 1);
// Heading
let heading = Node::Heading {
level: 1,
children: vec![child.clone()],
};
let heading_children = heading.children().expect("Heading should have children");
assert_eq!(heading_children.len(), 1);
// Paragraph
let paragraph = Node::Paragraph {
children: vec![child.clone()],
};
let paragraph_children = paragraph
.children()
.expect("Paragraph should have children");
assert_eq!(paragraph_children.len(), 1);
// List
let list = Node::List {
ordered: false,
children: vec![child.clone()],
};
let list_children = list.children().expect("List should have children");
assert_eq!(list_children.len(), 1);
// ListItem
let li = Node::ListItem {
children: vec![child.clone()],
};
let li_children = li.children().expect("ListItem should have children");
assert_eq!(li_children.len(), 1);
// CodeBlock
let code_block = Node::CodeBlock {
children: vec![child.clone()],
};
let code_block_children = code_block
.children()
.expect("CodeBlock should have children");
assert_eq!(code_block_children.len(), 1);
// BlockQuote
let bq = Node::BlockQuote {
children: vec![child.clone()],
};
let bq_children = bq.children().expect("BlockQuote should have children");
assert_eq!(bq_children.len(), 1);
// Bold
let bold = Node::Bold {
children: vec![child.clone()],
};
let bold_children = bold.children().expect("BlockQuote should have children");
assert_eq!(bold_children.len(), 1);
// Italic
let italic = Node::Italic {
children: vec![child.clone()],
};
let italic_children = italic.children().expect("BlockQuote should have children");
assert_eq!(italic_children.len(), 1);
// Bold Italic
let bold_italic = Node::BoldItalic {
children: vec![child.clone()],
};
let bold_italic_children = bold_italic
.children()
.expect("BlockQuote should have children");
assert_eq!(bold_italic_children.len(), 1);
}
#[test]
fn children_returns_none_for_inline_nodes() {
let text = Node::Text {
content: "x".into(),
};
let code = Node::Code {
content: "x".into(),
};
assert!(text.children().is_none());
assert!(code.children().is_none());
}
#[test]
fn children_returns_none_for_special_nodes() {
let image = Node::Image {
src: "x".into(),
alt: "x".into(),
};
assert!(image.children().is_none());
}
#[test]
fn add_child_succeeds_for_structure_nodes() {
let child = Node::Text {
content: "x".into(),
};
// Document
{
let mut node = Node::Document { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Document should have 1 child");
}
// Heading
{
let mut node = Node::Heading {
level: 1,
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Heading should have 1 child");
}
// Paragraph
{
let mut node = Node::Paragraph { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Paragraph should have 1 child");
}
// List
{
let mut node = Node::List {
ordered: false,
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "List should have 1 child");
}
// ListItem
{
let mut node = Node::ListItem { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "ListItem should have 1 child");
}
// CodeBlock
{
let mut node = Node::CodeBlock { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "CodeBlock should have 1 child");
}
// BlockQuote
{
let mut node = Node::BlockQuote { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BlockQuote should have 1 child");
}
// Bold
{
let mut node = Node::Bold { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Bold should have 1 child");
}
// Italic
{
let mut node = Node::Italic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Italic should have 1 child");
}
// Bold Italic
{
let mut node = Node::BoldItalic { children: vec![] };
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "BoldItalic should have 1 child");
}
// Link
{
let mut node = Node::Link {
href: "x".into(),
children: vec![],
};
node.add_child(child.clone());
let len = node.children().map(|c| c.len()).unwrap_or(0);
assert_eq!(len, 1, "Link should have 1 child");
}
}
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_text_node() {
let child = Node::Text {
content: "x".into(),
};
let mut text = Node::Text {
content: "x".into(),
};
text.add_child(child.clone());
}
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_code_node() {
let child = Node::Text {
content: "x".into(),
};
let mut code = Node::Code {
content: "x".into(),
};
code.add_child(child.clone());
}
#[test]
#[should_panic(expected = "Can't add child to this node type.")]
fn add_child_panics_for_image_node() {
let child = Node::Text {
content: "x".into(),
};
let mut image = Node::Image {
src: "x".into(),
alt: "x".into(),
};
image.add_child(child.clone());
}
}

731
lib/parser.rs Normal file
View File

@ -0,0 +1,731 @@
use crate::node::Node;
#[derive(Debug)]
pub struct Parser<'a> {
content: &'a str,
position: usize,
}
impl<'a> Parser<'a> {
// Content should be normalized before being passed into this function. Since we do not take
// ownership here, we cannot mutate it.
pub fn new(content: &'a str) -> Self {
Self {
content,
position: 0,
}
}
pub fn parse_document(&mut self) -> Node {
let mut root = Node::Document { children: vec![] };
while !self.is_eof() {
let block = self.parse_block();
if !block.is_empty() {
root.add_child(block);
}
}
root
}
// BUG: We should be using optional, not blank nodes
fn parse_block(&mut self) -> Node {
self.consume_whitespace();
// If we are at the end, return an empty node, the caller should know to return it.
if self.is_eof() {
return Node::Text { content: "".into() };
}
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
// TODO: For now we are just implementing paragraphs. So we can start with inline parsing
match (c1, c2, c3) {
// (Some('#'), _, _) => self.parse_heading(),
_ => self.parse_paragraph(),
}
}
fn parse_paragraph(&mut self) -> Node {
self.consume_whitespace();
Node::Paragraph {
children: self.parse_inline(),
}
}
// --- INLINE PARSING ---
fn parse_inline(&mut self) -> Vec<Node> {
let mut nodes = vec![];
let mut str = String::new();
while !self.is_eof() {
let c1 = self.peek();
let c2 = self.peek_nth(1);
let c3 = self.peek_nth(2);
println!("parse_inline: c1={:?}, c2={:?}, c3={:?}", c1, c2, c3); // DEBUG
match (c1, c2, c3) {
(None, _, _) | (Some('\n'), Some('\n'), _) => break,
// Check for ** (bold) before * (italic)
(Some('*'), Some('*'), _) => {
println!("Matched bold");
if !str.is_empty() {
nodes.push(Node::Text {
content: str.clone(),
});
str.clear();
}
nodes.push(self.parse_bold());
}
// Check for * (italic)
(Some('*'), _, _) => {
println!("Matched italic");
if !str.is_empty() {
nodes.push(Node::Text {
content: str.clone(),
});
str.clear();
}
nodes.push(self.parse_italic());
}
(Some(c), _, _) => {
str.push(c);
self.consume();
}
}
}
if !str.is_empty() {
nodes.push(Node::Text { content: str });
}
nodes
}
fn parse_bold(&mut self) -> Node {
self.consume_n(2); // Consume opening '**'
let mut children = vec![];
let mut str = String::new();
while !self.is_eof() {
let c1 = self.peek();
let c2 = self.peek_nth(1);
println!("parse_bold: c1={:?} c2={:?}", c1, c2);
match (c1, c2) {
(None, _) | (Some('\n'), Some('\n')) => break,
// Found closing '**'
(Some('*'), Some('*')) => {
println!("parse_bold: matched closing bold");
if !str.is_empty() {
children.push(Node::Text { content: str });
}
self.consume_n(2);
return Node::Bold { children };
}
// Single '*' inside bold (italic)
(Some('*'), _) => {
println!("parse_bold: matched italic");
if !str.is_empty() {
children.push(Node::Text {
content: str.clone(),
});
str.clear();
}
children.push(self.parse_italic());
}
(Some(c), _) => {
str.push(c);
self.consume();
}
}
}
// No closing '**' found - return as text with '**' prefix
if !str.is_empty() {
children.push(Node::Text { content: str });
}
let mut text = String::from("**");
for child in children {
if let Node::Text { content } = child {
text.push_str(&content);
}
}
Node::Text { content: text }
}
fn parse_italic(&mut self) -> Node {
self.consume(); // Consume opening '*'
let mut children = vec![];
let mut str = String::new();
while !self.is_eof() {
let c1 = self.peek();
let c2 = self.peek_nth(1);
println!("parse_italic: c1={:?} c2={:?}", c1, c2);
match (c1, c2) {
(None, _) | (Some('\n'), Some('\n')) => break,
// Check for '**' (bold inside italic)
(Some('*'), Some('*')) => {
println!("parse_italic: matched bold");
if !str.is_empty() {
children.push(Node::Text {
content: str.clone(),
});
str.clear();
}
children.push(self.parse_bold());
}
// Single '*' - our closing delimiter
(Some('*'), _) => {
println!("parse_italic: matched closing italic");
if !str.is_empty() {
children.push(Node::Text { content: str });
}
self.consume();
return Node::Italic { children };
}
(Some(c), _) => {
str.push(c);
self.consume();
}
}
}
// No closing '*' found - return as text with '*' prefix
if !str.is_empty() {
children.push(Node::Text { content: str });
}
let mut text = String::from("*");
for child in children {
if let Node::Text { content } = child {
text.push_str(&content);
}
}
Node::Text { content: text }
}
// --- INLINE PARSING ---
// fn parse_inline(&mut self) -> Vec<Node> {
// let mut nodes = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
// let c3 = self.peek_nth(2);
//
// match (c1, c2, c3) {
// // Stop at double newline (paragraph break)
// (None, _, _) | (Some('\n'), Some('\n'), _) => break,
//
// // Check for *** (bold italic) - LONGEST FIRST
// (Some('*'), Some('*'), Some('*')) => {
// if !str.is_empty() {
// nodes.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// nodes.push(self.parse_bold_italic());
// }
//
// // Check for ** (bold)
// (Some('*'), Some('*'), _) => {
// if !str.is_empty() {
// nodes.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// nodes.push(self.parse_bold());
// }
//
// // Check for * (italic)
// (Some('*'), _, _) => {
// if !str.is_empty() {
// nodes.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// nodes.push(self.parse_italic());
// }
//
// // Regular character
// (Some(c), _, _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// if !str.is_empty() {
// nodes.push(Node::Text { content: str });
// }
//
// nodes
// }
//
// fn parse_bold_italic(&mut self) -> Node {
// self.consume_n(3); // Consume opening '***'
//
// let mut children = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
// let c3 = self.peek_nth(2);
//
// match (c1, c2, c3) {
// (None, _, _) | (Some('\n'), Some('\n'), _) => break,
//
// // Found closing '***'
// (Some('*'), Some('*'), Some('*')) => {
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// self.consume_n(3);
// return Node::BoldItalic { children };
// }
//
// // Regular character (no nested formatting in bold-italic for simplicity)
// (Some(c), _, _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// // No closing '***' found - return as text
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// let mut text = String::from("***");
// for child in children {
// if let Node::Text { content } = child {
// text.push_str(&content);
// }
// }
// Node::Text { content: text }
// }
//
// fn parse_bold(&mut self) -> Node {
// self.consume_n(2); // Consume opening '**'
//
// let mut children = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
// let c3 = self.peek_nth(2);
//
// match (c1, c2, c3) {
// (None, _, _) | (Some('\n'), Some('\n'), _) => break,
//
// // Check for closing '**' but NOT '***'
// (Some('*'), Some('*'), Some('*')) => {
// // This is ***, not our closing **
// // Treat as text (or you could support nested bold-italic)
// str.push('*');
// self.consume();
// }
//
// // Found closing '**'
// (Some('*'), Some('*'), _) => {
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// self.consume_n(2);
// return Node::Bold { children };
// }
//
// // Single '*' inside bold (italic)
// (Some('*'), _, _) => {
// if !str.is_empty() {
// children.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// children.push(self.parse_italic());
// }
//
// (Some(c), _, _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// // No closing '**' found - return as text with '**' prefix
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// let mut text = String::from("**");
// for child in children {
// if let Node::Text { content } = child {
// text.push_str(&content);
// }
// }
// Node::Text { content: text }
// }
//
// fn parse_italic(&mut self) -> Node {
// self.consume(); // Consume opening '*'
//
// let mut children = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
// let c3 = self.peek_nth(2);
//
// match (c1, c2, c3) {
// (None, _, _) | (Some('\n'), Some('\n'), _) => break,
//
// // Check for '***' - not our closing
// (Some('*'), Some('*'), Some('*')) => {
// // Treat as text or handle specially
// str.push('*');
// self.consume();
// }
//
// // Check for '**' (bold inside italic)
// (Some('*'), Some('*'), _) => {
// if !str.is_empty() {
// children.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// children.push(self.parse_bold());
// }
//
// // Single '*' - our closing delimiter
// (Some('*'), _, _) => {
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// self.consume();
// return Node::Italic { children };
// }
//
// (Some(c), _, _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// // No closing '*' found - return as text with '*' prefix
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// let mut text = String::from("*");
// for child in children {
// if let Node::Text { content } = child {
// text.push_str(&content);
// }
// }
// Node::Text { content: text }
// }
//
//
//
//
// fn parse_inline(&mut self) -> Vec<Node> {
// self.parse_inline_until(&[])
// }
//
// fn parse_inline_until(&mut self, chars: &[char]) -> Vec<Node> {
// let mut nodes = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
// let c3 = self.peek_nth(2);
//
// match (c1, c2, c3) {
// // Default stop conditions for all elements: empty or block break
// (None, _, _) | (Some('\n'), Some('\n'), _) => break,
//
// // Parse bold nodes
// (Some('*'), Some('*'), _) => {
// if !str.is_empty() {
// nodes.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// nodes.push(self.parse_bold());
// }
//
// // Check the dynamic stop conditions
// (Some(c), _, _) if chars.contains(&c) => break,
//
// // Parse italic nodes
// (Some('*'), _, _) => {
// if !str.is_empty() {
// nodes.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// nodes.push(self.parse_italic());
// }
//
// (Some(c), _, _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// // If content remains, push it to the list
// if !str.is_empty() {
// nodes.push(Node::Text { content: str });
// }
//
// nodes
// }
//
// fn parse_bold(&mut self) -> Node {
// self.consume_n(2); // Consume opening '**'
//
// // DON'T pass '*' as stop char - we need to look for '**' specifically
// let mut children = vec![];
// let mut str = String::new();
//
// while !self.is_eof() {
// let c1 = self.peek();
// let c2 = self.peek_nth(1);
//
// match (c1, c2) {
// (None, _) | (Some('\n'), Some('\n')) => break,
//
// // Found closing '**'
// (Some('*'), Some('*')) => {
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// self.consume_n(2);
// return Node::Bold { children };
// }
//
// // Single '*' inside bold (could be italic)
// (Some('*'), _) => {
// if !str.is_empty() {
// children.push(Node::Text {
// content: str.clone(),
// });
// str.clear();
// }
// children.push(self.parse_italic());
// }
//
// (Some(c), _) => {
// str.push(c);
// self.consume();
// }
// }
// }
//
// // No closing '**' found
// if !str.is_empty() {
// children.push(Node::Text { content: str });
// }
// let mut text = String::from("**");
// for child in children {
// if let Node::Text { content } = child {
// text.push_str(&content);
// }
// }
// Node::Text { content: text }
// }
//
// fn parse_italic(&mut self) -> Node {
// self.consume(); // Consume opening '*'
//
// // Parse inline content until we hit closing '*' or end condition
// let children = self.parse_inline_until(&['*', '\n']);
//
// // Check if we found the closing '*'
// if self.peek() == Some('*') {
// self.consume(); // Consume closing '*'
// Node::Italic { children }
// } else {
// // No closing '*' found - return as plain text with the '*' prefix
// let mut text = String::from("*");
// for child in children {
// if let Node::Text { content } = child {
// text.push_str(&content);
// }
// // Note: This is simplified - you'd need to flatten properly
// }
// Node::Text { content: text }
// }
// }
// --- HELPERS ---
fn is_eof(&self) -> bool {
self.position >= self.content.len()
}
fn peek(&self) -> Option<char> {
self.peek_nth(0)
}
fn peek_nth(&self, n: usize) -> Option<char> {
self.content[self.position..].chars().nth(n)
}
fn consume(&mut self) {
self.consume_n(1)
}
fn consume_n(&mut self, n: usize) {
for _ in 0..n {
if let Some(c) = self.content[self.position..].chars().next() {
self.position += c.len_utf8();
} else {
break;
}
}
}
fn consume_whitespace(&mut self) {
while let Some(c) = self.peek() {
if !c.is_whitespace() {
break;
}
self.consume();
}
}
}
#[cfg(test)]
mod parser_tests {
use super::Parser;
#[test]
fn test_parse_italic() {
// This test only tests the `parse_italic` method, so it is expected that the first
// character is a '*', otherwise the first character will be consumed. The `parse_inline`
// tests will be able to handle more specific cases.
{
let s = "*hello world*";
let html = "<em>hello world</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello* world";
let html = "<em>hello</em>";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n";
let html = "*hello world\n";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello world\n\n";
let html = "*hello world";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
{
let s = "*hello\n\nworld*";
let html = "*hello";
let mut p = Parser::new(s);
let node = p.parse_italic();
assert_eq!(node.to_html(), html);
}
}
#[test]
fn test_nested_bold_and_italics() {
{
let s = "***a***";
let html = "<strong><em>a</em></strong>";
let mut p = Parser::new(s);
let nodes = p.parse_inline();
assert_eq!(nodes.len(), 1);
assert_eq!(nodes[0].to_html(), html);
}
{
let s = "*a **b** c*";
let html = "<em>a <strong>b</strong> c</em>";
let mut p = Parser::new(s);
let nodes = p.parse_inline();
assert_eq!(nodes.len(), 1);
assert_eq!(nodes[0].to_html(), html);
}
{
let s = "**a *b* c**";
let html = "<strong>a <em>b</em> c</strong>";
let mut p = Parser::new(s);
let nodes = p.parse_inline();
assert_eq!(nodes.len(), 1);
assert_eq!(nodes[0].to_html(), html);
}
{
let s = "*a *b* c*";
let html_1 = "<em>a </em>";
let html_2 = "b";
let html_3 = "<em> c</em>";
let mut p = Parser::new(s);
let nodes = p.parse_inline();
assert_eq!(nodes.len(), 3);
assert_eq!(nodes[0].to_html(), html_1);
assert_eq!(nodes[1].to_html(), html_2);
assert_eq!(nodes[2].to_html(), html_3);
}
{
let s = "**a **b** c**";
let html_1 = "<strong>a </strong>";
let html_2 = "b";
let html_3 = "<strong> c</strong>";
let mut p = Parser::new(s);
let nodes = p.parse_inline();
assert_eq!(nodes.len(), 3);
assert_eq!(nodes[0].to_html(), html_1);
assert_eq!(nodes[1].to_html(), html_2);
assert_eq!(nodes[2].to_html(), html_3);
}
}
}

99
src/boilerplate.rs Normal file
View File

@ -0,0 +1,99 @@
/// This is an old code example, moving to using enums now
pub trait Node {
fn to_html(&self) -> String;
fn is_empty(&self) -> bool;
fn add_child(&mut self, _: Box<dyn Node>) {
panic!("Cannot add children to this node.")
}
fn children(&self) -> Option<&[Box<dyn Node>]> {
None
}
}
pub struct DocumentNode {
pub children: Vec<Box<dyn Node>>,
}
pub struct HeadingNode {
pub size: u8,
pub children: Vec<Box<dyn Node>>,
}
pub struct ParagraphNode {
pub children: Vec<Box<dyn Node>>,
}
impl Node for DocumentNode {
fn to_html(&self) -> String {
let inner_html = self
.children
.iter()
.map(|child| child.to_html())
.collect::<String>();
format!(
"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Document</title>\n</head>\n<body>{}</body>\n</html>",
inner_html
)
}
fn is_empty(&self) -> bool {
self.children.is_empty()
}
fn add_child(&mut self, child: Box<dyn Node>) {
self.children.push(child);
}
fn children(&self) -> Option<&[Box<dyn Node>]> {
Some(&self.children)
}
}
impl Node for HeadingNode {
fn to_html(&self) -> String {
let inner_html = self
.children
.iter()
.map(|child| child.to_html())
.collect::<String>();
format!("<h{}>{}</h{}>", self.size, inner_html, self.size)
}
fn is_empty(&self) -> bool {
self.children.is_empty()
}
fn add_child(&mut self, child: Box<dyn Node>) {
self.children.push(child);
}
fn children(&self) -> Option<&[Box<dyn Node>]> {
Some(&self.children)
}
}
impl Node for ParagraphNode {
fn to_html(&self) -> String {
let inner_html = self
.children
.iter()
.map(|child| child.to_html())
.collect::<String>();
format!("<p>{}</p>", inner_html)
}
fn is_empty(&self) -> bool {
self.children.is_empty()
}
fn add_child(&mut self, child: Box<dyn Node>) {
self.children.push(child);
}
fn children(&self) -> Option<&[Box<dyn Node>]> {
Some(&self.children)
}
}

25
src/main.rs Normal file
View File

@ -0,0 +1,25 @@
use transpiler::filesystem::Filesystem;
use transpiler::parser::Parser;
pub fn main() -> Result<(), Box<dyn std::error::Error>> {
let file = Filesystem::read_file("./test.md");
let mut content;
match file {
Ok(s) => content = s,
Err(err) => panic!("Failed to read file. {}", err),
}
// Normalize char stream
content = content.replace("\r\n", "\n").replace("\r", "");
let mut parser = Parser::new(&content);
let node = parser.parse_document();
match Filesystem::write_file("./output.html", &node.to_html()) {
Ok(_) => println!("Input document parsed and output written."),
Err(err) => panic!("Failed to write to output. {}", err),
}
// Return a value to meet the main function requirements
Ok(())
}

1
test.md Normal file
View File

@ -0,0 +1 @@
**hello *world***

1
test/filesystem_test.md Normal file
View File

@ -0,0 +1 @@
DO NOT DELETE. Used in filesystem.rs tests.

View File

@ -0,0 +1 @@
TESTING OUTPUT

View File

@ -1,29 +0,0 @@
hello world
>
>
>
> hello world
> hello world
>
>
>
>
>
>
>
> **a final line**
>
>
>
>
>
> hello
hi mom
> hello world\n>\n>\n>\n> a new line