(FEAT): Worked on the parser class definition. #10

Merged
azpect merged 3 commits from feature/parser-class into main 2025-10-15 12:56:47 -07:00
7 changed files with 148 additions and 33 deletions
Showing only changes of commit 39186fad50 - Show all commits

View File

@ -10,6 +10,7 @@ LIB_DIR = lib
# Executable name # Executable name
TARGET = parser TARGET = parser
# Automatically find all source files
SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp) SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp) LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp)
ALL_SOURCES = $(SRC_FILES) $(LIB_FILES) ALL_SOURCES = $(SRC_FILES) $(LIB_FILES)
@ -30,10 +31,12 @@ $(BUILD_DIR):
$(TARGET): $(OBJECTS) $(TARGET): $(OBJECTS)
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@
$(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp $(LIB_DIR)/parser.h # Generic rule for all .cpp files in the src/ directory
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
$(BUILD_DIR)/parser.o: $(LIB_DIR)/parser.cpp $(LIB_DIR)/parser.h # Generic rule for all .cpp files in the lib/ directory
$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
test: all test: all

View File

@ -1,26 +1,12 @@
#include "./parser.h" #include "./parser.h"
#include "./util.h"
#include <cctype> #include <cctype>
#include <stdexcept> #include <stdexcept>
using std::string; using std::string;
void removeWhitespace(string &input) {
size_t end = input.find_last_not_of(" \t\n\r\f\v");
if (end != std::string::npos) {
input.erase(end + 1);
} else {
input.clear(); // String contains only whitespace
}
size_t start = input.find_first_not_of(" \t\n\r\f\v");
if (start != std::string::npos) {
input.erase(0, start);
} else {
input.clear(); // String contains only whitespace
}
}
Parser::Parser(string input_file_path, string output_file_path) { Parser::Parser(string input_file_path, string output_file_path) {
// NOTE: Remove any white space AROUND the inputs
removeWhitespace(input_file_path); removeWhitespace(input_file_path);
removeWhitespace(output_file_path); removeWhitespace(output_file_path);
@ -33,11 +19,18 @@ Parser::Parser(string input_file_path, string output_file_path) {
// NOTE: If the user does not provide an output file, then we should construct // NOTE: If the user does not provide an output file, then we should construct
// one using the input file with .md swapped with the extension. // one using the input file with .md swapped with the extension.
if (output_file_path == "") { if (output_file_path == "") {
std::cout << "CLEANING" << std::endl;
int ext_idx = input_file_path.find_last_of('.'); int ext_idx = input_file_path.find_last_of('.');
string output_cleaned = input_file_path.substr(0, ext_idx) + ".html"; string output_cleaned = input_file_path.substr(0, ext_idx) + ".html";
this->output_file_path = output_cleaned; this->output_file_path = output_cleaned;
} else { return;
this->output_file_path = output_file_path;
} }
this->output_file_path = output_file_path;
}
void Parser::Inspect() {
std::cout << "std::string input_file_path: " << this->input_file_path
<< std::endl;
std::cout << "std::string output_file_path: " << this->output_file_path
<< std::endl;
} }

View File

@ -2,20 +2,103 @@
#define PARSER_H #define PARSER_H
#include <iostream> #include <iostream>
#include <stack>
#include <string> #include <string>
using std::string;
/**
* @brief Markdown parser class.
*
* Converts a Markdown file into an HTML output. This is done using a
* recursive descent parser and converting the Markdown into a DOM tree.
* Once the DOM tree exists, it is converted into an HTML string and
* written to the output file provided.
*
* This class will have a `DOM` and a `DOMParser` which are used in this
* process.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
class Parser { class Parser {
private:
std::string input_file_path;
std::string output_file_path;
public: public:
Parser(std::string input_file_path, std::string output_file_path = ""); Parser(string input_file_path, string output_file_path = "");
inline void Print() { /**
std::cout << this->input_file_path << " -> " << this->output_file_path * @brief Inspect (view) contents of the class.
<< std::endl; *
} * Print each member of the class in its current state. Used for debugging.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void Inspect();
/**
*
* @brief Parse an entire document.
*
* This function will be called to yield the result. This is the entry point
* to the recursive descent parser.
*
* Currently, there are no parameters, they are still under consideration.
*
* It will be important to remember states between lines. For example, a
* paragraph that spans many lines should be inside the same node. But
* white space causes the node to be broken.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void ParseDocument(void);
protected:
/**
* @brief Input file path.
*
* Must be provided by the user.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
string input_file_path;
/**
* @brief Output file path.
*
* If not provided, will be generated using the `input_file_path` by removing
* the extension and appending `.html`.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
string output_file_path;
// NOTE: We need a stack, just not sure what goes in it yet
// std::stack<any> stack;
private:
/**
* @brief Parse a single line.
*
* How does this function work...
* This is where the magic happens.
*
* @param line Target line to parse, as string.
* @return DOMNode, once exists
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void ParseLine(string line);
// NOTE: Parser operations, again, abstract, just for brainstorming now
// These should operate on internal state, not lines themselves
void ParseHeader();
void ParseParagraph();
void ParseItalic();
void ParseBold();
void ParseBoldItalic();
// NOTE: Character operations, these are just for brainstorming
char Peek();
void Consume();
bool EndOfLine();
}; };
#endif #endif

24
lib/util.cpp Normal file
View File

@ -0,0 +1,24 @@
#include "./util.h"
void removeTrailingWhitespace(std::string &input) {
size_t start = input.find_first_not_of(" \t\n\r\f\v");
if (start != std::string::npos) {
input.erase(0, start);
} else {
input.clear();
}
}
void removeLeadingWhitespace(std::string &input) {
size_t end = input.find_last_not_of(" \t\n\r\f\v");
if (end != std::string::npos) {
input.erase(end + 1);
} else {
input.clear();
}
}
void removeWhitespace(std::string &input) {
removeLeadingWhitespace(input);
removeTrailingWhitespace(input);
}

10
lib/util.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef UTIL_H
#define UTIL_H
#include <string>
void removeTrailingWhitespace(std::string &input);
void removeLeadingWhitespace(std::string &input);
void removeWhitespace(std::string &input);
#endif

View File

@ -10,10 +10,10 @@ int main(int argc, char **argv) {
try { try {
if (argc >= 3) { if (argc >= 3) {
Parser p(argv[1], argv[2]); Parser p(argv[1], argv[2]);
p.Print(); p.Inspect();
} else { } else {
Parser p(argv[1]); Parser p(argv[1]);
p.Print(); p.Inspect();
} }
} catch (const std::runtime_error &e) { } catch (const std::runtime_error &e) {
std::cout << "Caught an error: " << e.what() << std::endl; std::cout << "Caught an error: " << e.what() << std::endl;
@ -21,5 +21,7 @@ int main(int argc, char **argv) {
std::cout << "Caught an error: UNKNOWN" << std::endl; std::cout << "Caught an error: UNKNOWN" << std::endl;
} }
std::cout << std::endl;
return 0; return 0;
} }

View File

@ -35,4 +35,4 @@ this is a break, because it ends with two spaces -> <p> However <br> this is a b
Double returns also Double returns also
yield line breaks -> <p> Double returns also <br> yield line breaks </p> yields new paragraphs -> <p> Double returns also</p> <p> yields new paragraphs </p>