Merge pull request '(FEAT): Worked on the parser class definition.' (#10) from feature/parser-class into main

Reviewed-on: azpect/MarkdownToHtmlCompiler#10
2025-10-15 12:56:46 -07:00 · 2025-10-15 12:56:46 -07:00 · 253ed8dfce
commit 253ed8dfce
parent 6e2dede9a0 5fd5822b6b
9 changed files with 267 additions and 33 deletions
--- a/7
+++ b/7
@ -10,6 +10,7 @@ LIB_DIR = lib
 # Executable name
 TARGET = parser

+# Automatically find all source files
 SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
 LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp)
 ALL_SOURCES = $(SRC_FILES) $(LIB_FILES)
@ -30,10 +31,12 @@ $(BUILD_DIR):
 $(TARGET): $(OBJECTS)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@

-$(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp $(LIB_DIR)/parser.h
+# Generic rule for all .cpp files in the src/ directory
+$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@

-$(BUILD_DIR)/parser.o: $(LIB_DIR)/parser.cpp $(LIB_DIR)/parser.h
+# Generic rule for all .cpp files in the lib/ directory
+$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
 	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@

 test: all
--- a/lib/node.cpp
+++ b/lib/node.cpp
@ -0,0 +1,21 @@
+#include "./node.h"
+
+#include <iostream>
+#include <memory>
+
+void Node::Inspect(int indent) {
+
+  if (this->children.size() > 0) {
+    for (int i = 0; i <= indent; i++) {
+      std::cout << "\t";
+    }
+    std::cout << "std::vector<Node> children: " << std::endl;
+  }
+
+  for (const auto &child : this->children) {
+    for (int i = 0; i <= indent; i++) {
+      std::cout << "\t";
+    }
+    child->Inspect(indent + 1);
+  }
+}
--- a/lib/node.h
+++ b/lib/node.h
@ -0,0 +1,71 @@
+#ifndef NODE_H
+#define NODE_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+/// NOTE: What the heck are unique pointers (unique_ptrs)
+/// They are basically an abstraction over the typical (raw) pointer.
+/// They handle ownership and deletion of the pointer for us, to prevent memory
+/// leaks.
+/// They are pretty easy to use, and only need to exist in places where new ones
+/// are created. i.e., functions should not accept unique_ptrs, instead they
+/// should accept normal pointers or references.
+/// When calling a function that accepts a raw pointer, the unique_ptrs.get()
+/// method is required. When calling a function that accepts a reference, the
+/// reference operator (*) works perfectly fine. Hence, in this project we will
+/// try to avoid using raw pointers, and only use references when needed.
+
+// NOTE ABC
+class Node {
+protected:
+  /**
+   * @brief List of children nodes.
+   *
+   * Most nodes will not have children, but some may, therefore this class must
+   * have it.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  std::vector<std::unique_ptr<Node>> children;
+
+public:
+  /**
+   * @brief Inspect (view) the contents of the Node.
+   *
+   * This is a recursive approach to allow for indentation for easier viewing.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  virtual void Inspect(int indent = 0);
+
+  /**
+   * @brief Return the node as a string.
+   *
+   * In this ABC the content is just returned with no modifications. The child
+   * nodes are expected to modify this behavior. i.e. wrapping in HTML tags.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  virtual std::string ToHtml() const = 0;
+
+  virtual void AddChild(std::unique_ptr<Node> child) {
+    // Move ownership from the existing owner, to this class
+    this->children.push_back(std::move(child));
+  }
+
+  /**
+   * @brief Return a read-only (const) list of children.
+   *
+   * Return our list of unique ptrs, they are const and therefore only have read
+   * access.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  virtual const std::vector<std::unique_ptr<Node>> &GetChilren() const {
+    return this->children;
+  }
+};
+
+#endif
--- a/lib/parser.cpp
+++ b/lib/parser.cpp
@ -1,26 +1,12 @@
 #include "./parser.h"
+#include "./util.h"
 #include <cctype>
 #include <stdexcept>

 using std::string;

-void removeWhitespace(string &input) {
-  size_t end = input.find_last_not_of(" \t\n\r\f\v");
-  if (end != std::string::npos) {
-    input.erase(end + 1);
-  } else {
-    input.clear(); // String contains only whitespace
-  }
-
-  size_t start = input.find_first_not_of(" \t\n\r\f\v");
-  if (start != std::string::npos) {
-    input.erase(0, start);
-  } else {
-    input.clear(); // String contains only whitespace
-  }
-}
-
 Parser::Parser(string input_file_path, string output_file_path) {
+  // NOTE: Remove any white space AROUND the inputs
  removeWhitespace(input_file_path);
  removeWhitespace(output_file_path);

@ -33,11 +19,18 @@ Parser::Parser(string input_file_path, string output_file_path) {
  // NOTE: If the user does not provide an output file, then we should construct
  // one using the input file with .md swapped with the extension.
  if (output_file_path == "") {
-    std::cout << "CLEANING" << std::endl;
    int ext_idx = input_file_path.find_last_of('.');
    string output_cleaned = input_file_path.substr(0, ext_idx) + ".html";
    this->output_file_path = output_cleaned;
-  } else {
+    return;
+  }
+
  this->output_file_path = output_file_path;
 }
+
+void Parser::Inspect() {
+  std::cout << "std::string input_file_path: " << this->input_file_path
+            << std::endl;
+  std::cout << "std::string output_file_path: " << this->output_file_path
+            << std::endl;
 }
--- a/lib/parser.h
+++ b/lib/parser.h
@ -2,20 +2,103 @@
 #define PARSER_H

 #include <iostream>
+#include <stack>
 #include <string>

+using std::string;
+
+/**
+ * @brief Markdown parser class.
+ *
+ * Converts a Markdown file into an HTML output. This is done using a
+ * recursive descent parser and converting the Markdown into a DOM tree.
+ * Once the DOM tree exists, it is converted into an HTML string and
+ * written to the output file provided.
+ *
+ * This class will have a `DOM` and a `DOMParser` which are used in this
+ * process.
+ *
+ * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+ */
 class Parser {
-private:
-  std::string input_file_path;
-  std::string output_file_path;
-
 public:
-  Parser(std::string input_file_path, std::string output_file_path = "");
+  Parser(string input_file_path, string output_file_path = "");

-  inline void Print() {
-    std::cout << this->input_file_path << " -> " << this->output_file_path
-              << std::endl;
-  }
+  /**
+   * @brief Inspect (view) contents of the class.
+   *
+   * Print each member of the class in its current state. Used for debugging.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  void Inspect();
+
+  /**
+   *
+   * @brief Parse an entire document.
+   *
+   * This function will be called to yield the result. This is the entry point
+   * to the recursive descent parser.
+   *
+   * Currently, there are no parameters, they are still under consideration.
+   *
+   * It will be important to remember states between lines. For example, a
+   * paragraph that spans many lines should be inside the same node. But
+   * white space causes the node to be broken.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  void ParseDocument(void);
+
+protected:
+  /**
+   * @brief Input file path.
+   *
+   * Must be provided by the user.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  string input_file_path;
+
+  /**
+   * @brief Output file path.
+   *
+   * If not provided, will be generated using the `input_file_path` by removing
+   * the extension and appending `.html`.
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  string output_file_path;
+
+  // NOTE: We need a stack, just not sure what goes in it yet
+  // std::stack<any> stack;
+
+private:
+  /**
+   * @brief Parse a single line.
+   *
+   * How does this function work...
+   * This is where the magic happens.
+   *
+   * @param line Target line to parse, as string.
+   * @return DOMNode, once exists
+   *
+   * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+   */
+  void ParseLine(string line);
+
+  // NOTE: Parser operations, again, abstract, just for brainstorming now
+  //       These should operate on internal state, not lines themselves
+  void ParseHeader();
+  void ParseParagraph();
+  void ParseItalic();
+  void ParseBold();
+  void ParseBoldItalic();
+
+  // NOTE: Character operations, these are just for brainstorming
+  char Peek();
+  void Consume();
+  bool EndOfLine();
 };

 #endif
--- a/lib/util.cpp
+++ b/lib/util.cpp
@ -0,0 +1,24 @@
+#include "./util.h"
+
+void removeTrailingWhitespace(std::string &input) {
+  size_t start = input.find_first_not_of(" \t\n\r\f\v");
+  if (start != std::string::npos) {
+    input.erase(0, start);
+  } else {
+    input.clear();
+  }
+}
+
+void removeLeadingWhitespace(std::string &input) {
+  size_t end = input.find_last_not_of(" \t\n\r\f\v");
+  if (end != std::string::npos) {
+    input.erase(end + 1);
+  } else {
+    input.clear();
+  }
+}
+
+void removeWhitespace(std::string &input) {
+  removeLeadingWhitespace(input);
+  removeTrailingWhitespace(input);
+}
--- a/lib/util.h
+++ b/lib/util.h
@ -0,0 +1,30 @@
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <string>
+
+/**
+ * @brief Remove all white space after the content.
+ *
+ * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+ */
+void removeTrailingWhitespace(std::string &input);
+
+/**
+ * @brief Remove all white space before the content.
+ *
+ * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+ */
+void removeLeadingWhitespace(std::string &input);
+
+/**
+ * @brief Remove all white space before and after the content.
+ *
+ * This uses the removeTrailingWhitespace and the removeLeadingWhitespace
+ * methods together on the same string.
+ *
+ * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
+ */
+void removeWhitespace(std::string &input);
+
+#endif
--- a/src/main.cpp
+++ b/src/main.cpp
@ -10,10 +10,10 @@ int main(int argc, char **argv) {
  try {
    if (argc >= 3) {
      Parser p(argv[1], argv[2]);
-      p.Print();
+      p.Inspect();
    } else {
      Parser p(argv[1]);
-      p.Print();
+      p.Inspect();
    }
  } catch (const std::runtime_error &e) {
    std::cout << "Caught an error: " << e.what() << std::endl;
@ -21,5 +21,7 @@ int main(int argc, char **argv) {
    std::cout << "Caught an error: UNKNOWN" << std::endl;
  }

+  std::cout << std::endl;
+
  return 0;
 }
--- a/syntax.md
+++ b/syntax.md
@ -35,4 +35,11 @@ this is a break, because it ends with two spaces -> <p> However <br> this is a b

 Double returns also

-yield line breaks -> <p> Double returns also <br> yield line breaks </p>
+yields new paragraphs -> <p> Double returns also</p> <p> yields new paragraphs </p>
+
+
+*italic* -> <em>italic</em>
+**bold** -> <strong>bold</strong>
+***italic bold*** -> <strong><em>italic bold</em></strong>
+
+hello **world** -> [TextClass: hello, BoldClass: world]