From 2881897c23ecf72222acdbb629b0d87c88caf08c Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 28 Oct 2025 16:12:16 -0700 Subject: [PATCH 1/3] (FEAT): List nodes seem to be working. However, the DRY principals are being screwed around with like they don't exist. Some better architecture needs to be implemented. But that will take place after block code nodes and anchor tags are implemented. I will remain on this branch for those other implementations, for now. --- input.md | 18 +++++++ lib/parser.cpp | 118 +++++++++++++++++++++++++++++++++++++++++- lib/parser.h | 4 +- lib/structureNode.cpp | 10 ++-- 4 files changed, 144 insertions(+), 6 deletions(-) diff --git a/input.md b/input.md index f66ff5f..a4cf29c 100644 --- a/input.md +++ b/input.md @@ -20,3 +20,21 @@ this is too far` # ***This is both!*** ###### This is neither + +- Hello world +- This is a list + + +* this is also a list +* this is still a list + + +1. This list is ordered +2. This is **number two** + +- hello +world + +- hello + +world number two diff --git a/lib/parser.cpp b/lib/parser.cpp index 1d5a7e5..4e8b052 100644 --- a/lib/parser.cpp +++ b/lib/parser.cpp @@ -72,11 +72,30 @@ std::unique_ptr Parser::ParseBlock() { // std::unique_ptr block = std::make_unique(ch); // Consume(); - if (Peek() == '#') { + char c = Peek(); + char c_next = Peek(1); + + // 1. Parse heading + if (c == '#') { return ParseHeading(); } - // this is the default case + // 2. Parser unordered list + if (c == '*' || c == '-' || c == '+') { + // Next character must be space or tab + if (c_next == ' ' || c_next == '\t') { + return ParseList(false); + } + } + + // 3. Parse ordered list + // TODO: This only checks a single digit, should check for 'n' digits + if (std::isdigit(c) && c_next == '.') { + // TODO: Do we need to check for white space? + return ParseList(true); + } + + // 4. Parser paragraph return ParseParagraph(); } @@ -120,6 +139,44 @@ std::unique_ptr Parser::ParseHeading() { return node; } +std::unique_ptr Parser::ParseList(bool ordered) { + auto node = std::make_unique(ordered); + + // Consume the required white space and list char ('* ' or '1.') + while (true) { + + Consume(ordered ? 2 : 1); + ConsumeWhiteSpace(); + + // Parse until either '\n\n' (exit) or the next list element is found ('* ' + // or '1.') If '\n\n', then create a node and exit + auto children = ParseInlineListContent(); + for (auto &child : children) { + node->AddChild(std::move(child)); + } + + char c = Peek(); + char c_next = Peek(1); + + // 2. Parser unordered list + if (c == '*' || c == '-' || c == '+') { + if (c_next == ' ' || c_next == '\t') { + continue; + } + } + + // 3. Parse ordered list + // TODO: This only checks a single digit, should check for 'n' digits + if (std::isdigit(c) && c_next == '.') { + continue; + } + + break; + } + + return node; +}; + vector> Parser::ParseInline() { vector> nodes; string str; @@ -217,6 +274,63 @@ vector> Parser::ParseInlineHeading() { return nodes; } +vector> Parser::ParseInlineListContent() { + vector> nodes; + string str; + + while (!IsEOF()) { + char c = Peek(); + char c_next = Peek(1); + // If this char and next char are both newlines: then we have an empty line, + // we should stop. + if (c == '\n' && Peek(1) == '\n') + break; + + // Check if a list block has been found + if ((c == '*' || c == '-' || c == '+') && (c_next == ' ' || c_next == '\t')) + break; + + if (std::isdigit(c) && c_next == '.') + break; + + if (c == '*' && Peek(1) == '*' && Peek(2) == '*') { + PushTextNode(nodes, str); + auto node = ParseBoldItalic(); + if (!node->IsEmpty()) + nodes.push_back(std::move(node)); + continue; + } else if (c == '*' && Peek(1) == '*') { + PushTextNode(nodes, str); + auto node = ParseBold(); + if (!node->IsEmpty()) + nodes.push_back(std::move(node)); + continue; + } else if (c == '*') { + PushTextNode(nodes, str); + auto node = ParseItalic(); + if (!node->IsEmpty()) + nodes.push_back(std::move(node)); + continue; + } + + if (c == '`') { + PushTextNode(nodes, str); + auto node = ParseCode(); + if (!node->IsEmpty()) + nodes.push_back(std::move(node)); + continue; + } + + // If a newline, use a space instead + str += (c == '\n' ? ' ' : c); + Consume(); + } + + // Push the last node, if the string is not empty + PushTextNode(nodes, str); + return nodes; +} + std::unique_ptr Parser::ParseItalic() { string str; Consume(1); diff --git a/lib/parser.h b/lib/parser.h index 02ccb35..1305c02 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -121,10 +121,12 @@ private: std::unique_ptr ParseParagraph(); std::unique_ptr ParseHeading(); + std::unique_ptr ParseList(bool ordered); vector> ParseInline(); - // The only difference is the exit condition + // The only differences are the exit condition vector> ParseInlineHeading(); + vector> ParseInlineListContent(); void PushTextNode(vector> &nodes, string &str); diff --git a/lib/structureNode.cpp b/lib/structureNode.cpp index 55ac4d1..ca4b00f 100644 --- a/lib/structureNode.cpp +++ b/lib/structureNode.cpp @@ -55,11 +55,15 @@ string ParagraphNode::ToHtml() const { return ss.str(); } -// TODO: Implement string ListNode::ToHtml() const { std::stringstream ss; - ss << (this->ordered ? "
    NOT YET IMPLEMENTED
" - : "
    NOT YET IMPLEMENTED
"); + ss << (this->ordered ? "
    " : "
      ") << "\n"; + + for (const auto &child : this->GetChilren()) { + ss << "
    • " << child->ToHtml() << "
    • " << "\n"; + } + + ss << (this->ordered ? "
" : "") << "\n"; return ss.str(); } -- 2.47.2 From 1c81d2aa41e526870392fd8fd900dbb2e8d0172f Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 28 Oct 2025 17:45:40 -0700 Subject: [PATCH 2/3] (FEAT): Includes support for code block segments. This includes the addition of lots of new node elements. --- input.md | 7 +++++++ lib/inlineNode.cpp | 2 ++ lib/inlineNode.h | 13 +++++++++++++ lib/parser.cpp | 39 ++++++++++++++++++++++++++++++++++++++- lib/parser.h | 1 + lib/structureNode.cpp | 13 +++++++++++++ lib/structureNode.h | 15 +++++++++++++++ 7 files changed, 89 insertions(+), 1 deletion(-) diff --git a/input.md b/input.md index a4cf29c..cd608b8 100644 --- a/input.md +++ b/input.md @@ -38,3 +38,10 @@ world - hello world number two + +``` +int x = 5; +int y = 10; + +console.log(x + y); // '15' +``` diff --git a/lib/inlineNode.cpp b/lib/inlineNode.cpp index 625de65..e0cced9 100644 --- a/lib/inlineNode.cpp +++ b/lib/inlineNode.cpp @@ -23,3 +23,5 @@ string BoldItalicNode::ToHtml() const { } string CodeNode::ToHtml() const { return "" + this->content + ""; } + +string RawTextNode::ToHtml() const { return this->content; }; diff --git a/lib/inlineNode.h b/lib/inlineNode.h index af825a7..9866b6e 100644 --- a/lib/inlineNode.h +++ b/lib/inlineNode.h @@ -117,4 +117,17 @@ public: std::string ToHtml() const; }; +/** + * @desc A raw text node. + * + * This node returns only it content, with no formatting at all. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ +class RawTextNode : public InlineNode { +public: + RawTextNode(std::string content) : InlineNode(content) {}; + std::string ToHtml() const; +}; + #endif diff --git a/lib/parser.cpp b/lib/parser.cpp index 4e8b052..f01c505 100644 --- a/lib/parser.cpp +++ b/lib/parser.cpp @@ -4,6 +4,7 @@ #include "structureNode.h" #include #include +#include #include #include @@ -95,7 +96,12 @@ std::unique_ptr Parser::ParseBlock() { return ParseList(true); } - // 4. Parser paragraph + // 4. Parse code block + if (c == '`' && c_next == '`' && Peek(2) == '`') { + return ParseCodeBlock(); + } + + // 5. Parser paragraph return ParseParagraph(); } @@ -177,6 +183,37 @@ std::unique_ptr Parser::ParseList(bool ordered) { return node; }; +std::unique_ptr Parser::ParseCodeBlock() { + auto node = std::make_unique(); + string str; + + // Remove the first three characters, the '```' + Consume(3); + + // Parse text into a single text node until '```' is found, include everything + // else + while (!IsEOF()) { + char c = Peek(); + if (c == '`' && Peek(1) == '`' && Peek(2) == '`') { + Consume(3); + break; + } + + // Swap any '\n' with BR tags, so it will visually break + if (c == '\n') + str += "\n
\n"; + else + str += c; + + Consume(); + } + + auto text_node = std::make_unique(str); + node->AddChild(std::move(text_node)); + + return node; +} + vector> Parser::ParseInline() { vector> nodes; string str; diff --git a/lib/parser.h b/lib/parser.h index 1305c02..ae01214 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -123,6 +123,7 @@ private: std::unique_ptr ParseHeading(); std::unique_ptr ParseList(bool ordered); vector> ParseInline(); + std::unique_ptr ParseCodeBlock(); // The only differences are the exit condition vector> ParseInlineHeading(); diff --git a/lib/structureNode.cpp b/lib/structureNode.cpp index ca4b00f..9516c3b 100644 --- a/lib/structureNode.cpp +++ b/lib/structureNode.cpp @@ -67,3 +67,16 @@ string ListNode::ToHtml() const { ss << (this->ordered ? "" : "") << "\n"; return ss.str(); } + +string CodeBlockNode::ToHtml() const { + std::stringstream ss; + + ss << "\n"; + + for (const auto &child : this->GetChilren()) { + ss << child->ToHtml() << "\n"; + } + + ss << "\n"; + return ss.str(); +} diff --git a/lib/structureNode.h b/lib/structureNode.h index 7e07bf8..d8632d3 100644 --- a/lib/structureNode.h +++ b/lib/structureNode.h @@ -111,4 +111,19 @@ public: std::string ToHtml() const; }; +/** + * @desc A code block container node. + * + * This node is used to wrap a code block node. When three '`' are used a + * code block should be created. This node's children are expected to be simple + * text nodes - containing no formatting at all. Since code blocks are not parsed + * any deeper then their parents. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ +class CodeBlockNode : public StructureNode { +public: + std::string ToHtml() const; +}; + #endif -- 2.47.2 From 6203f9c32567c1829cd6f01331058a547e9777fb Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 28 Oct 2025 17:50:55 -0700 Subject: [PATCH 3/3] (FIX): Forgot this somehow --- lib/parser.h | 2 +- lib/structureNode.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/parser.h b/lib/parser.h index ae01214..12ef8b3 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -122,8 +122,8 @@ private: std::unique_ptr ParseParagraph(); std::unique_ptr ParseHeading(); std::unique_ptr ParseList(bool ordered); - vector> ParseInline(); std::unique_ptr ParseCodeBlock(); + vector> ParseInline(); // The only differences are the exit condition vector> ParseInlineHeading(); diff --git a/lib/structureNode.h b/lib/structureNode.h index d8632d3..186268e 100644 --- a/lib/structureNode.h +++ b/lib/structureNode.h @@ -114,10 +114,10 @@ public: /** * @desc A code block container node. * - * This node is used to wrap a code block node. When three '`' are used a + * This node is used to wrap a code block node. When three '`' are used a * code block should be created. This node's children are expected to be simple - * text nodes - containing no formatting at all. Since code blocks are not parsed - * any deeper then their parents. + * text nodes - containing no formatting at all. Since code blocks are not + * parsed any deeper then their parents. * * @author Hayden Hargreaves (hhargreaves2006@gmail.com) */ -- 2.47.2