Compare commits

..

15 Commits

Author SHA1 Message Date
Hayden Hargreaves
c90e4e8895 Merge branch 'feature/cmdl' 2025-10-28 17:52:25 -07:00
0ef0500fe8 Merge pull request '(FEAT): Heading parser uses a new inline method.' (#25) from feature/heading-inline-content into main
Reviewed-on: azpect/MarkdownToHtmlCompiler#25
2025-10-28 14:55:21 -07:00
Hayden Hargreaves
e6dc318306 (FEAT): Heading parser uses a new inline method.
The only difference between this and the original one is the exit
condition.
2025-10-22 12:03:02 -07:00
d863b93132 Merge pull request '(FEAT): Added support for inline code blocks.' (#23) from feature/inline-code-nodes into main
Reviewed-on: azpect/MarkdownToHtmlCompiler#23
Reviewed-by: shultzp1 <shultzp1@my.erau.edu>
2025-10-21 07:45:52 -07:00
Hayden Hargreaves
f0ab2d9006 (FIX): Cleaned up a little bit, removed some notes. 2025-10-20 12:12:51 -07:00
Hayden Hargreaves
c958536284 Merge branch 'main' into feature/inline-code-nodes 2025-10-20 12:01:50 -07:00
Hayden Hargreaves
d2f0b5451d (DOC): Updated to the input test file. 2025-10-20 12:00:37 -07:00
022b2d495e Merge pull request '(FEAT) Abstracted file system into its own class.' (#20) from feature/file-handler into main
Reviewed-on: azpect/MarkdownToHtmlCompiler#20
2025-10-18 14:17:02 -07:00
Hayden Hargreaves
4b0900e3ae (FEAT): Added support for inline code blocks. 2025-10-17 13:50:29 -07:00
Hayden Hargreaves
2fdab0134a (FIX): Small file updates.
We won't need a stack yet.
2025-10-17 13:15:42 -07:00
Hayden Hargreaves
7587e493d7 (FEAT) Abstracted file system into its own class.
This class is then composed into the parser class and called to write
the expected outputs. This is a huge step towards the final product.
Furthermore, the output it being written to the file generated. Until
the CLI is implemented, this is the best it will do.
2025-10-17 13:08:37 -07:00
cf3b57c8e4 Merge pull request 'FEATURE: Implemented basic parser rules' (#18) from feature/parser-basic-rules into main
Reviewed-on: azpect/MarkdownToHtmlCompiler#18
Reviewed-by: shultzp1 <shultzp1@my.erau.edu>
2025-10-16 18:34:35 -07:00
6fd9b4df52 Merge branch 'main' into feature/parser-basic-rules 2025-10-16 18:30:55 -07:00
Hayden Hargreaves
4e33463863 (FIX): Forgot to call the method itself.
But now this will be supported by both windows and linux.
2025-10-16 17:23:29 -07:00
Hayden Hargreaves
2c1e137157 (FEAT): Parser is working pretty well.
I needed to make this commit so I can test on the windows machine...
2025-10-16 17:19:51 -07:00
12 changed files with 591 additions and 55 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
/build/* /build/*
parser parser
/.vscode /.vscode
/*.html

View File

@ -1,6 +1,6 @@
# Define the C++ compiler and flags # Define the C++ compiler and flags
CXX = g++ CXX = g++
CXXFLAGS = -Wall -g CXXFLAGS = -Wall -g -fPIE
# Directories # Directories
BUILD_DIR = build BUILD_DIR = build
@ -29,15 +29,15 @@ $(BUILD_DIR):
mkdir -p $(BUILD_DIR) mkdir -p $(BUILD_DIR)
$(TARGET): $(OBJECTS) $(TARGET): $(OBJECTS)
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ -pie
# Generic rule for all .cpp files in the src/ directory # Generic rule for all .cpp files in the src/ directory
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ -pie
# Generic rule for all .cpp files in the lib/ directory # Generic rule for all .cpp files in the lib/ directory
$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp $(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ -pie
test: all test: all
./$(TARGET) ./$(TARGET)

22
input.md Normal file
View File

@ -0,0 +1,22 @@
hello `world`
This `is also a code block`
hi `mom
hello`
hi `mom
this is too far`
*this is **words***
## **Hello world**
### hello *world*
# ***This is both!***
###### This is neither

62
lib/fileSystem.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "fileSystem.h"
#include "util.h"
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <string>
FileSystem::FileSystem(string input_file_path, string output_file_path) {
removeWhitespace(input_file_path);
removeWhitespace(output_file_path);
if (input_file_path.empty())
throw std::runtime_error("input_file_path cannot be empty");
this->input_file_path = input_file_path;
this->output_file_path = output_file_path;
if (this->output_file_path.empty())
GenerateOutputFilePath();
};
void FileSystem::GenerateOutputFilePath() {
if (this->input_file_path.empty())
throw std::runtime_error("Cannot generate output path from empty input.");
int ext_idx = this->input_file_path.find_last_of('.');
string output_cleaned = this->input_file_path.substr(0, ext_idx) + ".html";
this->output_file_path = output_cleaned;
}
std::string FileSystem::ReadInputFile() {
// Cannot read file if the path does not exist
if (this->input_file_path.empty())
throw std::runtime_error("Cannot open file: path was not provided.");
std::ifstream input_file(this->input_file_path);
if (!input_file.is_open())
throw std::runtime_error("Failed to open input file.");
// Read the file into a single string using a string stream
std::stringstream buffer;
buffer << input_file.rdbuf();
input_file.close();
return buffer.str();
}
void FileSystem::WriteOutputFile(std::string content) {
// Cannot write to file if the path does not exist
if (this->output_file_path.empty())
throw std::runtime_error("Cannot open file: path was not provided.");
std::ofstream output_file(this->output_file_path);
if (!output_file.is_open())
throw std::runtime_error("Failed to open output file.");
output_file << content;
output_file.close();
}

71
lib/fileSystem.h Normal file
View File

@ -0,0 +1,71 @@
#ifndef FILESYSTEM_H
#define FILESYSTEM_H
#include <string>
using std::string;
class FileSystem {
public:
FileSystem(string input_file_path, string output_file_path = "");
/**
* @brief Read the input file and return its content.
*
* This method will read the file at the input_file_path and create a single
* output string. Each line will be delimited by either `\n` (Unix) or `\r\n`
* (Windows). If the file path does not exist OR the file fails to open, this
* method will throw a run time error.
*
* @return File contents as a single string.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
string ReadInputFile();
/**
* @brief Write the provided string to the output file.
*
* This method will attempt to open the output file and write the content
* provided to the method in the file. If the file does not exist, it will be
* created. If the file path does not exist OR the file fails to open, this
* method will throw a run time error.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void WriteOutputFile(string content);
protected:
/**
* @brief Input file path.
*
* Must be provided by the user.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
string input_file_path;
/**
* @brief Output file path.
*
* If not provided, will be generated using the `input_file_path` by removing
* the extension and appending `.html`.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
string output_file_path;
private:
/**
* @brief Generate an output file path.
*
* If the user does not provide an output file path, this method can be
* used to generate the path. This is done by simply swapping the `.md`
* with `.html`.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void GenerateOutputFilePath();
};
#endif

View File

@ -21,3 +21,5 @@ string BoldNode::ToHtml() const {
string BoldItalicNode::ToHtml() const { string BoldItalicNode::ToHtml() const {
return "<strong><em>" + this->content + "</em></strong>"; return "<strong><em>" + this->content + "</em></strong>";
} }
string CodeNode::ToHtml() const { return "<code>" + this->content + "</code>"; }

View File

@ -2,6 +2,7 @@
#define INLINENODE_H #define INLINENODE_H
#include "node.h" #include "node.h"
#include <iostream>
#include <vector> #include <vector>
/** /**
@ -37,6 +38,15 @@ public:
*/ */
void AddChild(std::unique_ptr<Node> child); void AddChild(std::unique_ptr<Node> child);
/**
* @brief Is the node empty.
*
* This is the same as checking if the nodes content is empty.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
bool IsEmpty() const { return this->content.empty(); };
protected: protected:
std::string content; std::string content;
}; };
@ -94,4 +104,17 @@ public:
std::string ToHtml() const; std::string ToHtml() const;
}; };
/**
* @desc An inline code block node.
*
* This node returns it's content wrapped with <code></code> tags.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
class CodeNode : public InlineNode {
public:
CodeNode(std::string content) : InlineNode(content) {};
std::string ToHtml() const;
};
#endif #endif

View File

@ -18,7 +18,6 @@
/// try to avoid using raw pointers, and only use references when needed. /// try to avoid using raw pointers, and only use references when needed.
/// Reference: https://www.youtube.com/watch?v=AmjoK55h68Y&t=166s /// Reference: https://www.youtube.com/watch?v=AmjoK55h68Y&t=166s
// NOTE ABC
class Node { class Node {
protected: protected:
/** /**
@ -67,6 +66,15 @@ public:
virtual const std::vector<std::unique_ptr<Node>> &GetChilren() const { virtual const std::vector<std::unique_ptr<Node>> &GetChilren() const {
return this->children; return this->children;
} }
/**
* @brief Is the node empty.
*
* This is done differently for inline nodes and structure nodes.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
virtual bool IsEmpty() const = 0;
}; };
#endif #endif

View File

@ -1,36 +1,334 @@
#include "parser.h" #include "parser.h"
#include "util.h" #include "fileSystem.h"
#include "inlineNode.h"
#include "structureNode.h"
#include <algorithm>
#include <cctype> #include <cctype>
#include <stdexcept> #include <memory>
#include <string>
using std::string; using std::string;
using std::vector;
Parser::Parser(string input_file_path, string output_file_path) { void Parser::Inspect() {
// NOTE: Remove any white space AROUND the inputs std::cerr << "Parser::Inspect() is not yet implemented." << std::endl;
removeWhitespace(input_file_path); }
removeWhitespace(output_file_path);
if (input_file_path == "") { void Parser::NormalizeInputStream() {
throw std::runtime_error("input_file_path cannot be empty"); if (this->content.empty())
return;
size_t pos = 0;
while ((pos = content.find("\r\n", pos)) != string::npos) {
this->content.replace(pos, 2, "\n");
pos++;
} }
this->input_file_path = input_file_path; // NOTE: Remove all occurrences of '\r'
this->content.erase(
std::remove(this->content.begin(), this->content.end(), '\r'),
this->content.end());
}
// NOTE: If the user does not provide an output file, then we should construct void Parser::WriteOutput() {
// one using the input file with .md swapped with the extension. if (this->DOM == nullptr)
if (output_file_path == "") { throw std::runtime_error(
int ext_idx = input_file_path.find_last_of('.'); "Cannot write output, output DOM tree does not exist. Please run the "
string output_cleaned = input_file_path.substr(0, ext_idx) + ".html"; "Parser::ParserDocument method first.");
this->output_file_path = output_cleaned;
this->filesystem.WriteOutputFile(this->DOM->ToHtml());
}
void Parser::ParseDocument() {
try {
this->content = this->filesystem.ReadInputFile();
} catch (const std::runtime_error &e) {
std::cerr << "Caught an error: " << e.what() << std::endl;
return; return;
} }
this->output_file_path = output_file_path; // Remove the windows BS
NormalizeInputStream();
// We need document parent
this->DOM = std::make_unique<DocumentNode>();
while (!IsEOF()) {
// std::cout << Peek(); Consume();
auto block = ParseBlock();
if (block != nullptr)
this->DOM->AddChild(std::move(block));
}
} }
void Parser::Inspect() { // All this does is pick which subparser to call
std::cout << "std::string input_file_path: " << this->input_file_path // Identify which block to parse
<< std::endl; std::unique_ptr<Node> Parser::ParseBlock() {
std::cout << "std::string output_file_path: " << this->output_file_path // Remove whitespace using peek and consume (' ', '\t', '\n')
<< std::endl; ConsumeWhiteSpace();
// NOTE: Simple example
// std::string ch(1, Peek());
// std::unique_ptr<Node> block = std::make_unique<TextNode>(ch);
// Consume();
if (Peek() == '#') {
return ParseHeading();
}
// this is the default case
return ParseParagraph();
}
std::unique_ptr<Node> Parser::ParseParagraph() {
auto node = std::make_unique<ParagraphNode>();
// This should call parse inline
auto text_nodes = ParseInline();
for (auto &text_node : text_nodes) {
node->AddChild(std::move(text_node));
}
if (node->IsEmpty())
return nullptr;
return node;
}
std::unique_ptr<Node> Parser::ParseHeading() {
// Compute the size of the heading
int i = 0;
char c = Peek();
while (c == '#') {
c = Peek(i++);
}
Consume(i - 1);
auto node = std::make_unique<HeadingNode>(i - 1);
ConsumeWhiteSpace();
// This should call parse inline
auto text_nodes = ParseInline();
for (auto &text_node : text_nodes) {
node->AddChild(std::move(text_node));
}
if (node->IsEmpty())
return nullptr;
return node;
}
vector<std::unique_ptr<Node>> Parser::ParseInline() {
vector<std::unique_ptr<Node>> nodes;
string str;
while (!IsEOF()) {
char c = Peek();
// If this char and next char are both newlines: then we have an empty line,
// we should stop.
if (c == '\n' && Peek(1) == '\n')
break;
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
PushTextNode(nodes, str);
auto node = ParseBoldItalic();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
} else if (c == '*' && Peek(1) == '*') {
PushTextNode(nodes, str);
auto node = ParseBold();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
} else if (c == '*') {
PushTextNode(nodes, str);
auto node = ParseItalic();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
}
if (c == '`') {
PushTextNode(nodes, str);
auto node = ParseCode();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
}
// If a newline, use a space instead
str += (c == '\n' ? ' ' : c);
Consume();
}
// Push the last node, if the string is not empty
PushTextNode(nodes, str);
return nodes;
}
vector<std::unique_ptr<Node>> Parser::ParseInlineHeading() {
vector<std::unique_ptr<Node>> nodes;
string str;
while (!IsEOF()) {
char c = Peek();
// We can stop as soon as we see a new line. Headings are single line blocks
if (c == '\n')
break;
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
PushTextNode(nodes, str);
auto node = ParseBoldItalic();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
} else if (c == '*' && Peek(1) == '*') {
PushTextNode(nodes, str);
auto node = ParseBold();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
} else if (c == '*') {
PushTextNode(nodes, str);
auto node = ParseItalic();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
}
if (c == '`') {
PushTextNode(nodes, str);
auto node = ParseCode();
if (!node->IsEmpty())
nodes.push_back(std::move(node));
continue;
}
// If a newline, use a space instead
str += (c == '\n' ? ' ' : c);
Consume();
}
// Push the last node, if the string is not empty
PushTextNode(nodes, str);
return nodes;
}
std::unique_ptr<Node> Parser::ParseItalic() {
string str;
Consume(1);
while (!IsEOF()) {
char c = Peek();
if (c == '\n' && Peek(1) == '\n')
break;
if (c == '*') {
Consume(1);
break;
}
str += c;
Consume();
}
return std::make_unique<ItalicNode>(str);
}
std::unique_ptr<Node> Parser::ParseBold() {
string str;
Consume(2);
while (!IsEOF()) {
char c = Peek();
if (c == '\n' && Peek(1) == '\n')
break;
if (c == '*' && Peek(1) == '*') {
Consume(2);
break;
}
str += c;
Consume();
}
return std::make_unique<BoldNode>(str);
}
std::unique_ptr<Node> Parser::ParseBoldItalic() {
string str;
Consume(3);
while (!IsEOF()) {
char c = Peek();
if (c == '\n' && Peek(1) == '\n')
break;
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
Consume(3);
break;
}
str += c;
Consume();
}
return std::make_unique<BoldItalicNode>(str);
}
std::unique_ptr<Node> Parser::ParseCode() {
string str;
Consume(1);
while (!IsEOF()) {
char c = Peek();
if (c == '\n' && Peek(1) == '\n')
break;
if (c == '`') {
Consume(1);
break;
}
str += c;
Consume();
}
return std::make_unique<CodeNode>(str);
}
void Parser::PushTextNode(vector<std::unique_ptr<Node>> &nodes, string &str) {
if (!str.empty())
nodes.push_back(std::move(std::make_unique<TextNode>(str)));
str = "";
}
char Parser::Peek(size_t offset) {
size_t look_ahead_pos = this->position + offset;
if (look_ahead_pos < this->content.length()) {
return this->content[look_ahead_pos];
}
return '\0'; // null if past end
};
void Parser::Consume(size_t count) { this->position += count; };
bool Parser::IsEOF() { return this->position >= this->content.length(); };
void Parser::ConsumeWhiteSpace() {
char c = Peek();
while (c == ' ' || c == '\t' || c == '\n') {
Consume();
c = Peek();
}
} }

View File

@ -1,11 +1,15 @@
#ifndef PARSER_H #ifndef PARSER_H
#define PARSER_H #define PARSER_H
#include "fileSystem.h"
#include "node.h"
#include <iostream> #include <iostream>
#include <memory>
#include <stack> #include <stack>
#include <string> #include <string>
using std::string; using std::string;
using std::vector;
/** /**
* @brief Markdown parser class. * @brief Markdown parser class.
@ -22,7 +26,8 @@ using std::string;
*/ */
class Parser { class Parser {
public: public:
Parser(string input_file_path, string output_file_path = ""); Parser(string input_file_path, string output_file_path = "")
: filesystem(input_file_path, output_file_path) {};
/** /**
* @brief Inspect (view) contents of the class. * @brief Inspect (view) contents of the class.
@ -34,7 +39,6 @@ public:
void Inspect(); void Inspect();
/** /**
*
* @brief Parse an entire document. * @brief Parse an entire document.
* *
* This function will be called to yield the result. This is the entry point * This function will be called to yield the result. This is the entry point
@ -48,57 +52,92 @@ public:
* *
* @author Hayden Hargreaves (hhargreaves2006@gmail.com) * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/ */
void ParseDocument(void); void ParseDocument();
/**
* @brief Write the output to the file.
*
* Once the tree is generated, this method should be called to actually
* write the output. Having this functionality separate allows for more
* portability.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void WriteOutput();
protected: protected:
/** /**
* @brief Input file path. * @brief File system module to handle file I/O.
*
* Anything requiring file I/O operations will be handled by this module.
* *
* Must be provided by the user.
* *
* @author Hayden Hargreaves (hhargreaves2006@gmail.com) * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/ */
string input_file_path; FileSystem filesystem;
/** /**
* @brief Output file path. * @brief Parser generated tree.
* *
* If not provided, will be generated using the `input_file_path` by removing * This value will store the root, which is expected to be a DocumentNode.
* the extension and appending `.html`. * This node will mark the start of the tree. The parser will populate this
* tree during the parsing process.
* *
* @author Hayden Hargreaves (hhargreaves2006@gmail.com) * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/ */
string output_file_path; std::unique_ptr<Node> DOM;
// NOTE: We need a stack, just not sure what goes in it yet
// std::stack<any> stack;
private: private:
/** /**
* @brief Parse a single line. * @brief Normalize the input stream.
*
* Replaces all `\r\n` with just `\n` since that is what the parser expects.
* Then removes any left over `\r` elements in the stream. If the stream is
* empty this method does nothing.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
void NormalizeInputStream();
/**
* @brief Parse a single block of content
* *
* How does this function work... * How does this function work...
* This is where the magic happens. * This is where the magic happens.
* *
* @param line Target line to parse, as string. * @return Node, to be appended to the callers children.
* @return DOMNode, once exists
* *
* @author Hayden Hargreaves (hhargreaves2006@gmail.com) * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/ */
void ParseLine(string line); std::unique_ptr<Node> ParseBlock();
// NOTE: Parser operations, again, abstract, just for brainstorming now // Stores index in the string
// These should operate on internal state, not lines themselves size_t position = 0;
void ParseHeader();
void ParseParagraph();
void ParseItalic();
void ParseBold();
void ParseBoldItalic();
// NOTE: Character operations, these are just for brainstorming // Working input content
char Peek(); string content;
void Consume();
bool EndOfLine(); // TODO: Document these methods, no more magic methods :)
std::unique_ptr<Node> ParseParagraph();
std::unique_ptr<Node> ParseHeading();
vector<std::unique_ptr<Node>> ParseInline();
// The only difference is the exit condition
vector<std::unique_ptr<Node>> ParseInlineHeading();
void PushTextNode(vector<std::unique_ptr<Node>> &nodes, string &str);
std::unique_ptr<Node> ParseItalic();
std::unique_ptr<Node> ParseBold();
std::unique_ptr<Node> ParseBoldItalic();
std::unique_ptr<Node> ParseCode();
char Peek(size_t offset = 0);
void Consume(size_t count = 1);
bool IsEOF();
void ConsumeWhiteSpace();
}; };
#endif #endif

View File

@ -2,6 +2,7 @@
#define STRUCTURENODE_H #define STRUCTURENODE_H
#include "node.h" #include "node.h"
#include <iostream>
#include <string> #include <string>
/** /**
@ -23,6 +24,15 @@ public:
* @author Hayden Hargreaves (hhargreaves2006@gmail.com) * @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/ */
virtual std::string ToHtml() const = 0; virtual std::string ToHtml() const = 0;
/**
* @brief Is the node empty.
*
* This is the same as checking if the node has no children.
*
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
*/
bool IsEmpty() const { return this->children.size() == 0; };
}; };
/** /**