From 39186fad509697c189d83e3dfa4563ee3241b149 Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 14 Oct 2025 13:07:55 -0700 Subject: [PATCH 1/3] (FEAT): Worked on the parser class definition. There are no implementations yet, just a rough outline and some comments. This commit also includes an update to the Makefile to use wild cards to build the `libs`, which will solve the issue with dynamic updating. --- Makefile | 7 +++- lib/parser.cpp | 31 ++++++--------- lib/parser.h | 101 ++++++++++++++++++++++++++++++++++++++++++++----- lib/util.cpp | 24 ++++++++++++ lib/util.h | 10 +++++ src/main.cpp | 6 ++- syntax.md | 2 +- 7 files changed, 148 insertions(+), 33 deletions(-) create mode 100644 lib/util.cpp create mode 100644 lib/util.h diff --git a/Makefile b/Makefile index 1a338d2..e11f80d 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ LIB_DIR = lib # Executable name TARGET = parser +# Automatically find all source files SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp) LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp) ALL_SOURCES = $(SRC_FILES) $(LIB_FILES) @@ -30,10 +31,12 @@ $(BUILD_DIR): $(TARGET): $(OBJECTS) $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ -$(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp $(LIB_DIR)/parser.h +# Generic rule for all .cpp files in the src/ directory +$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ -$(BUILD_DIR)/parser.o: $(LIB_DIR)/parser.cpp $(LIB_DIR)/parser.h +# Generic rule for all .cpp files in the lib/ directory +$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ test: all diff --git a/lib/parser.cpp b/lib/parser.cpp index 1ab53b2..f2ac975 100644 --- a/lib/parser.cpp +++ b/lib/parser.cpp @@ -1,26 +1,12 @@ #include "./parser.h" +#include "./util.h" #include #include using std::string; -void removeWhitespace(string &input) { - size_t end = input.find_last_not_of(" \t\n\r\f\v"); - if (end != std::string::npos) { - input.erase(end + 1); - } else { - input.clear(); // String contains only whitespace - } - - size_t start = input.find_first_not_of(" \t\n\r\f\v"); - if (start != std::string::npos) { - input.erase(0, start); - } else { - input.clear(); // String contains only whitespace - } -} - Parser::Parser(string input_file_path, string output_file_path) { + // NOTE: Remove any white space AROUND the inputs removeWhitespace(input_file_path); removeWhitespace(output_file_path); @@ -33,11 +19,18 @@ Parser::Parser(string input_file_path, string output_file_path) { // NOTE: If the user does not provide an output file, then we should construct // one using the input file with .md swapped with the extension. if (output_file_path == "") { - std::cout << "CLEANING" << std::endl; int ext_idx = input_file_path.find_last_of('.'); string output_cleaned = input_file_path.substr(0, ext_idx) + ".html"; this->output_file_path = output_cleaned; - } else { - this->output_file_path = output_file_path; + return; } + + this->output_file_path = output_file_path; +} + +void Parser::Inspect() { + std::cout << "std::string input_file_path: " << this->input_file_path + << std::endl; + std::cout << "std::string output_file_path: " << this->output_file_path + << std::endl; } diff --git a/lib/parser.h b/lib/parser.h index e8e8db2..972ccee 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -2,20 +2,103 @@ #define PARSER_H #include +#include #include +using std::string; + +/** + * @brief Markdown parser class. + * + * Converts a Markdown file into an HTML output. This is done using a + * recursive descent parser and converting the Markdown into a DOM tree. + * Once the DOM tree exists, it is converted into an HTML string and + * written to the output file provided. + * + * This class will have a `DOM` and a `DOMParser` which are used in this + * process. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ class Parser { -private: - std::string input_file_path; - std::string output_file_path; - public: - Parser(std::string input_file_path, std::string output_file_path = ""); + Parser(string input_file_path, string output_file_path = ""); - inline void Print() { - std::cout << this->input_file_path << " -> " << this->output_file_path - << std::endl; - } + /** + * @brief Inspect (view) contents of the class. + * + * Print each member of the class in its current state. Used for debugging. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + void Inspect(); + + /** + * + * @brief Parse an entire document. + * + * This function will be called to yield the result. This is the entry point + * to the recursive descent parser. + * + * Currently, there are no parameters, they are still under consideration. + * + * It will be important to remember states between lines. For example, a + * paragraph that spans many lines should be inside the same node. But + * white space causes the node to be broken. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + void ParseDocument(void); + +protected: + /** + * @brief Input file path. + * + * Must be provided by the user. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + string input_file_path; + + /** + * @brief Output file path. + * + * If not provided, will be generated using the `input_file_path` by removing + * the extension and appending `.html`. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + string output_file_path; + + // NOTE: We need a stack, just not sure what goes in it yet + // std::stack stack; + +private: + /** + * @brief Parse a single line. + * + * How does this function work... + * This is where the magic happens. + * + * @param line Target line to parse, as string. + * @return DOMNode, once exists + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + void ParseLine(string line); + + // NOTE: Parser operations, again, abstract, just for brainstorming now + // These should operate on internal state, not lines themselves + void ParseHeader(); + void ParseParagraph(); + void ParseItalic(); + void ParseBold(); + void ParseBoldItalic(); + + // NOTE: Character operations, these are just for brainstorming + char Peek(); + void Consume(); + bool EndOfLine(); }; #endif diff --git a/lib/util.cpp b/lib/util.cpp new file mode 100644 index 0000000..0811048 --- /dev/null +++ b/lib/util.cpp @@ -0,0 +1,24 @@ +#include "./util.h" + +void removeTrailingWhitespace(std::string &input) { + size_t start = input.find_first_not_of(" \t\n\r\f\v"); + if (start != std::string::npos) { + input.erase(0, start); + } else { + input.clear(); + } +} + +void removeLeadingWhitespace(std::string &input) { + size_t end = input.find_last_not_of(" \t\n\r\f\v"); + if (end != std::string::npos) { + input.erase(end + 1); + } else { + input.clear(); + } +} + +void removeWhitespace(std::string &input) { + removeLeadingWhitespace(input); + removeTrailingWhitespace(input); +} diff --git a/lib/util.h b/lib/util.h new file mode 100644 index 0000000..6a3c9ca --- /dev/null +++ b/lib/util.h @@ -0,0 +1,10 @@ +#ifndef UTIL_H +#define UTIL_H + +#include + +void removeTrailingWhitespace(std::string &input); +void removeLeadingWhitespace(std::string &input); +void removeWhitespace(std::string &input); + +#endif diff --git a/src/main.cpp b/src/main.cpp index 032fcd4..358777c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,10 +10,10 @@ int main(int argc, char **argv) { try { if (argc >= 3) { Parser p(argv[1], argv[2]); - p.Print(); + p.Inspect(); } else { Parser p(argv[1]); - p.Print(); + p.Inspect(); } } catch (const std::runtime_error &e) { std::cout << "Caught an error: " << e.what() << std::endl; @@ -21,5 +21,7 @@ int main(int argc, char **argv) { std::cout << "Caught an error: UNKNOWN" << std::endl; } + std::cout << std::endl; + return 0; } diff --git a/syntax.md b/syntax.md index d780cfe..a44c791 100644 --- a/syntax.md +++ b/syntax.md @@ -35,4 +35,4 @@ this is a break, because it ends with two spaces ->

However
this is a b Double returns also -yield line breaks ->

Double returns also
yield line breaks

+yields new paragraphs ->

Double returns also

yields new paragraphs

From d0daf4f598cf719167d86aca51c644da6b62db93 Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 14 Oct 2025 15:01:49 -0700 Subject: [PATCH 2/3] (DOC): Added some doc comments to the util class --- lib/util.h | 20 ++++++++++++++++++++ syntax.md | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/lib/util.h b/lib/util.h index 6a3c9ca..f773d94 100644 --- a/lib/util.h +++ b/lib/util.h @@ -3,8 +3,28 @@ #include +/** + * @brief Remove all white space after the content. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ void removeTrailingWhitespace(std::string &input); + +/** + * @brief Remove all white space before the content. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ void removeLeadingWhitespace(std::string &input); + +/** + * @brief Remove all white space before and after the content. + * + * This uses the removeTrailingWhitespace and the removeLeadingWhitespace + * methods together on the same string. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ void removeWhitespace(std::string &input); #endif diff --git a/syntax.md b/syntax.md index a44c791..d965dfd 100644 --- a/syntax.md +++ b/syntax.md @@ -36,3 +36,10 @@ this is a break, because it ends with two spaces ->

However
this is a b Double returns also yields new paragraphs ->

Double returns also

yields new paragraphs

+ + +*italic* -> italic +**bold** -> bold +***italic bold*** -> italic bold + +hello **world** -> [TextClass: hello, BoldClass: world] From 5fd5822b6bd063e8a5b8cb0e35c3682a93940ad1 Mon Sep 17 00:00:00 2001 From: Hayden Hargreaves Date: Tue, 14 Oct 2025 20:34:45 -0700 Subject: [PATCH 3/3] (FEAT): Began working on node implementations. Drew out an inheritance map to review with the team. --- lib/node.cpp | 21 ++++++++++++++++ lib/node.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 lib/node.cpp create mode 100644 lib/node.h diff --git a/lib/node.cpp b/lib/node.cpp new file mode 100644 index 0000000..0ed30f8 --- /dev/null +++ b/lib/node.cpp @@ -0,0 +1,21 @@ +#include "./node.h" + +#include +#include + +void Node::Inspect(int indent) { + + if (this->children.size() > 0) { + for (int i = 0; i <= indent; i++) { + std::cout << "\t"; + } + std::cout << "std::vector children: " << std::endl; + } + + for (const auto &child : this->children) { + for (int i = 0; i <= indent; i++) { + std::cout << "\t"; + } + child->Inspect(indent + 1); + } +} diff --git a/lib/node.h b/lib/node.h new file mode 100644 index 0000000..47a3918 --- /dev/null +++ b/lib/node.h @@ -0,0 +1,71 @@ +#ifndef NODE_H +#define NODE_H + +#include +#include +#include + +/// NOTE: What the heck are unique pointers (unique_ptrs) +/// They are basically an abstraction over the typical (raw) pointer. +/// They handle ownership and deletion of the pointer for us, to prevent memory +/// leaks. +/// They are pretty easy to use, and only need to exist in places where new ones +/// are created. i.e., functions should not accept unique_ptrs, instead they +/// should accept normal pointers or references. +/// When calling a function that accepts a raw pointer, the unique_ptrs.get() +/// method is required. When calling a function that accepts a reference, the +/// reference operator (*) works perfectly fine. Hence, in this project we will +/// try to avoid using raw pointers, and only use references when needed. + +// NOTE ABC +class Node { +protected: + /** + * @brief List of children nodes. + * + * Most nodes will not have children, but some may, therefore this class must + * have it. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + std::vector> children; + +public: + /** + * @brief Inspect (view) the contents of the Node. + * + * This is a recursive approach to allow for indentation for easier viewing. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + virtual void Inspect(int indent = 0); + + /** + * @brief Return the node as a string. + * + * In this ABC the content is just returned with no modifications. The child + * nodes are expected to modify this behavior. i.e. wrapping in HTML tags. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + virtual std::string ToHtml() const = 0; + + virtual void AddChild(std::unique_ptr child) { + // Move ownership from the existing owner, to this class + this->children.push_back(std::move(child)); + } + + /** + * @brief Return a read-only (const) list of children. + * + * Return our list of unique ptrs, they are const and therefore only have read + * access. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + virtual const std::vector> &GetChilren() const { + return this->children; + } +}; + +#endif