diff --git a/lib/parser.cpp b/lib/parser.cpp index 0f563e0..9519d82 100644 --- a/lib/parser.cpp +++ b/lib/parser.cpp @@ -1,9 +1,17 @@ #include "parser.h" +#include "inlineNode.h" +#include "structureNode.h" #include "util.h" +#include #include +#include +#include +#include #include +#include using std::string; +using std::vector; Parser::Parser(string input_file_path, string output_file_path) { // NOTE: Remove any white space AROUND the inputs @@ -34,3 +42,248 @@ void Parser::Inspect() { std::cout << "std::string output_file_path: " << this->output_file_path << std::endl; } + +// replace '\r\n' with '\n' +void Parser::NormalizeInputStream() { + if (this->content.empty()) + return; + + size_t pos = 0; + while ((pos = content.find("\r\n", pos)) != string::npos) { + this->content.replace(pos, 2, "\n"); + pos++; + } + + // NOTE: Remove all occurrences of '\r' + this->content.erase( + std::remove(this->content.begin(), this->content.end(), '\r'), + this->content.end()); +} + +void Parser::ParseDocument() { + // Open the input file + std::ifstream input_file(this->input_file_path); + + if (!input_file.is_open()) { + throw std::runtime_error("Failed to open input file."); + return; + } + + // Read the file into a single string + std::stringstream buffer; + buffer << input_file.rdbuf(); + this->content = buffer.str(); + + input_file.close(); + + // We need document parent + this->DOM = std::make_unique(); + + while (!IsEOF()) { + // std::cout << Peek(); Consume(); + auto block = ParseBlock(); + if (block != nullptr) + this->DOM->AddChild(std::move(block)); + } + + std::cout << this->DOM->ToHtml(); +} + +// All this does is pick which subparser to call +// Identify which block to parse +std::unique_ptr Parser::ParseBlock() { + // Remove whitespace using peek and consume (' ', '\t', '\n') + ConsumeWhiteSpace(); + + // NOTE: Simple example + // std::string ch(1, Peek()); + // std::unique_ptr block = std::make_unique(ch); + // Consume(); + + if (Peek() == '#') { + return ParseHeading(); + } + + // this is the default case + return ParseParagraph(); +} + +std::unique_ptr Parser::ParseParagraph() { + auto node = std::make_unique(); + + // This should call parse inline + auto text_nodes = ParseInline(); + for (auto &text_node : text_nodes) { + node->AddChild(std::move(text_node)); + } + + if (node->GetChilren().size() < 1) + return nullptr; + + return node; +} + +std::unique_ptr Parser::ParseHeading() { + // Compute the size of the heading + int i = 0; + char c = Peek(); + while (c == '#') { + c = Peek(i++); + } + + Consume(i - 1); + auto node = std::make_unique(i - 1); + + ConsumeWhiteSpace(); + + std::string str; + while (!IsEOF()) { + c = Peek(); + // We can stop as soon as we see a new line. Headings are single line blocks + if (c == '\n') + break; + + // If a newline, use a space instead + str += c; + Consume(); + } + + // BUG: Why do we need to check this? + if (str == "") + return nullptr; + + auto text_node = std::make_unique(str); + node->AddChild(std::move(text_node)); + + return node; +} + +vector> Parser::ParseInline() { + vector> nodes; + string str; + + while (!IsEOF()) { + char c = Peek(); + // If this char and next char are both newlines: then we have an empty line, + // we should stop. + if (c == '\n' && Peek(1) == '\n') + break; + + if (c == '*' && Peek(1) == '*' && Peek(2) == '*') { + PushTextNode(nodes, str); + nodes.push_back(std::move(ParseBoldItalic())); + continue; + } else if (c == '*' && Peek(1) == '*') { + PushTextNode(nodes, str); + nodes.push_back(std::move(ParseBold())); + continue; + } else if (c == '*') { + PushTextNode(nodes, str); + nodes.push_back(std::move(ParseItalic())); + continue; + } + + // If a newline, use a space instead + str += (c == '\n' ? ' ' : c); + Consume(); + } + + // Push the last node, if the string is not empty + PushTextNode(nodes, str); + return nodes; +} + +std::unique_ptr Parser::ParseItalic() { + string str; + Consume(1); + + while (!IsEOF()) { + char c = Peek(); + + if (c == '\n' && Peek(1) == '\n') + break; + + if (c == '*') { + Consume(1); + break; + } + + str += c; + Consume(); + } + + return std::make_unique(str); +} + +std::unique_ptr Parser::ParseBold() { + string str; + Consume(2); + + while (!IsEOF()) { + char c = Peek(); + + if (c == '\n' && Peek(1) == '\n') + break; + + if (c == '*' && Peek(1) == '*') { + Consume(2); + break; + } + + str += c; + Consume(); + } + + return std::make_unique(str); +} + +std::unique_ptr Parser::ParseBoldItalic() { + string str; + Consume(3); + + while (!IsEOF()) { + char c = Peek(); + + if (c == '\n' && Peek(1) == '\n') + break; + + if (c == '*' && Peek(1) == '*' && Peek(2) == '*') { + Consume(3); + break; + } + + str += c; + Consume(); + } + + return std::make_unique(str); +} + +void Parser::PushTextNode(vector> &nodes, string &str) { + if (!str.empty()) + nodes.push_back(std::move(std::make_unique(str))); + str = ""; +} + +char Parser::Peek(size_t offset) { + size_t look_ahead_pos = this->position + offset; + + if (look_ahead_pos < this->content.length()) { + return this->content[look_ahead_pos]; + } + + return '\0'; // null if past end +}; + +void Parser::Consume(size_t count) { this->position += count; }; + +bool Parser::IsEOF() { return this->position >= this->content.length(); }; + +void Parser::ConsumeWhiteSpace() { + // TODO: This can be optimized using an accumulator and then consuming + char c = Peek(); + while (c == ' ' || c == '\t' || c == '\n') { + Consume(); + c = Peek(); + } +} diff --git a/lib/parser.h b/lib/parser.h index 74ecfab..ce7590d 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -1,11 +1,14 @@ #ifndef PARSER_H #define PARSER_H +#include "node.h" #include +#include #include #include using std::string; +using std::vector; /** * @brief Markdown parser class. @@ -48,7 +51,7 @@ public: * * @author Hayden Hargreaves (hhargreaves2006@gmail.com) */ - void ParseDocument(void); + void ParseDocument(); protected: /** @@ -70,35 +73,57 @@ protected: */ string output_file_path; + /** + * @brief Parser generated tree. + * + * This value will store the root, which is expected to be a DocumentNode. + * This node will mark the start of the tree. The parser will populate this + * tree during the parsing process. + * + * @author Hayden Hargreaves (hhargreaves2006@gmail.com) + */ + std::unique_ptr DOM; + // NOTE: We need a stack, just not sure what goes in it yet // std::stack stack; private: + // windows... >:( + void NormalizeInputStream(); + /** - * @brief Parse a single line. + * @brief Parse a single block of content * * How does this function work... * This is where the magic happens. * - * @param line Target line to parse, as string. * @return DOMNode, once exists * * @author Hayden Hargreaves (hhargreaves2006@gmail.com) */ - void ParseLine(string line); + std::unique_ptr ParseBlock(); - // NOTE: Parser operations, again, abstract, just for brainstorming now - // These should operate on internal state, not lines themselves - void ParseHeader(); - void ParseParagraph(); - void ParseItalic(); - void ParseBold(); - void ParseBoldItalic(); + // Stores index in the string + size_t position = 0; - // NOTE: Character operations, these are just for brainstorming - char Peek(); - void Consume(); - bool EndOfLine(); + // Working input content + string content; + + std::unique_ptr ParseParagraph(); + std::unique_ptr ParseHeading(); + vector> ParseInline(); + + void PushTextNode(vector> &nodes, string &str); + + std::unique_ptr ParseItalic(); + std::unique_ptr ParseBold(); + std::unique_ptr ParseBoldItalic(); + + char Peek(size_t offset = 0); + void Consume(size_t count = 1); + bool IsEOF(); + + void ConsumeWhiteSpace(); }; #endif diff --git a/lib/watchDog.cpp b/lib/watchDog.cpp index e9bec9e..cbc44bd 100644 --- a/lib/watchDog.cpp +++ b/lib/watchDog.cpp @@ -126,4 +126,4 @@ std::string WatchDog::timePointToString(const fs::file_time_type& timePoint){ std::strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", &localTime); return std::string(buffer); -} \ No newline at end of file +} diff --git a/src/main.cpp b/src/main.cpp index 9a67d41..5fc78ed 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -78,4 +78,10 @@ void test_input(int argc, char **argv) { std::cout << std::endl; } -int main(int argc, char **argv) { test_nodes(); } +int main(int argc, char **argv) { + Parser p("input.md"); + p.ParseDocument(); + + Parser p2("README.md"); + p2.ParseDocument(); +}