Compare commits
15 Commits
ca38b98e8a
...
c90e4e8895
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c90e4e8895 | ||
| 0ef0500fe8 | |||
|
|
e6dc318306 | ||
| d863b93132 | |||
|
|
f0ab2d9006 | ||
|
|
c958536284 | ||
|
|
d2f0b5451d | ||
| 022b2d495e | |||
|
|
4b0900e3ae | ||
|
|
2fdab0134a | ||
|
|
7587e493d7 | ||
| cf3b57c8e4 | |||
| 6fd9b4df52 | |||
|
|
4e33463863 | ||
|
|
2c1e137157 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,3 +3,4 @@
|
|||||||
/build/*
|
/build/*
|
||||||
parser
|
parser
|
||||||
/.vscode
|
/.vscode
|
||||||
|
/*.html
|
||||||
|
|||||||
8
Makefile
8
Makefile
@ -1,6 +1,6 @@
|
|||||||
# Define the C++ compiler and flags
|
# Define the C++ compiler and flags
|
||||||
CXX = g++
|
CXX = g++
|
||||||
CXXFLAGS = -Wall -g
|
CXXFLAGS = -Wall -g -fPIE
|
||||||
|
|
||||||
# Directories
|
# Directories
|
||||||
BUILD_DIR = build
|
BUILD_DIR = build
|
||||||
@ -29,15 +29,15 @@ $(BUILD_DIR):
|
|||||||
mkdir -p $(BUILD_DIR)
|
mkdir -p $(BUILD_DIR)
|
||||||
|
|
||||||
$(TARGET): $(OBJECTS)
|
$(TARGET): $(OBJECTS)
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ -pie
|
||||||
|
|
||||||
# Generic rule for all .cpp files in the src/ directory
|
# Generic rule for all .cpp files in the src/ directory
|
||||||
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
|
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ -pie
|
||||||
|
|
||||||
# Generic rule for all .cpp files in the lib/ directory
|
# Generic rule for all .cpp files in the lib/ directory
|
||||||
$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
|
$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ -pie
|
||||||
|
|
||||||
test: all
|
test: all
|
||||||
./$(TARGET)
|
./$(TARGET)
|
||||||
|
|||||||
22
input.md
Normal file
22
input.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
hello `world`
|
||||||
|
|
||||||
|
|
||||||
|
This `is also a code block`
|
||||||
|
|
||||||
|
hi `mom
|
||||||
|
hello`
|
||||||
|
|
||||||
|
hi `mom
|
||||||
|
|
||||||
|
this is too far`
|
||||||
|
|
||||||
|
|
||||||
|
*this is **words***
|
||||||
|
|
||||||
|
## **Hello world**
|
||||||
|
|
||||||
|
### hello *world*
|
||||||
|
|
||||||
|
# ***This is both!***
|
||||||
|
|
||||||
|
###### This is neither
|
||||||
62
lib/fileSystem.cpp
Normal file
62
lib/fileSystem.cpp
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#include "fileSystem.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
FileSystem::FileSystem(string input_file_path, string output_file_path) {
|
||||||
|
removeWhitespace(input_file_path);
|
||||||
|
removeWhitespace(output_file_path);
|
||||||
|
|
||||||
|
if (input_file_path.empty())
|
||||||
|
throw std::runtime_error("input_file_path cannot be empty");
|
||||||
|
|
||||||
|
this->input_file_path = input_file_path;
|
||||||
|
|
||||||
|
this->output_file_path = output_file_path;
|
||||||
|
if (this->output_file_path.empty())
|
||||||
|
GenerateOutputFilePath();
|
||||||
|
};
|
||||||
|
|
||||||
|
void FileSystem::GenerateOutputFilePath() {
|
||||||
|
if (this->input_file_path.empty())
|
||||||
|
throw std::runtime_error("Cannot generate output path from empty input.");
|
||||||
|
|
||||||
|
int ext_idx = this->input_file_path.find_last_of('.');
|
||||||
|
string output_cleaned = this->input_file_path.substr(0, ext_idx) + ".html";
|
||||||
|
this->output_file_path = output_cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FileSystem::ReadInputFile() {
|
||||||
|
// Cannot read file if the path does not exist
|
||||||
|
if (this->input_file_path.empty())
|
||||||
|
throw std::runtime_error("Cannot open file: path was not provided.");
|
||||||
|
|
||||||
|
std::ifstream input_file(this->input_file_path);
|
||||||
|
|
||||||
|
if (!input_file.is_open())
|
||||||
|
throw std::runtime_error("Failed to open input file.");
|
||||||
|
|
||||||
|
// Read the file into a single string using a string stream
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << input_file.rdbuf();
|
||||||
|
input_file.close();
|
||||||
|
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FileSystem::WriteOutputFile(std::string content) {
|
||||||
|
// Cannot write to file if the path does not exist
|
||||||
|
if (this->output_file_path.empty())
|
||||||
|
throw std::runtime_error("Cannot open file: path was not provided.");
|
||||||
|
|
||||||
|
std::ofstream output_file(this->output_file_path);
|
||||||
|
|
||||||
|
if (!output_file.is_open())
|
||||||
|
throw std::runtime_error("Failed to open output file.");
|
||||||
|
|
||||||
|
output_file << content;
|
||||||
|
output_file.close();
|
||||||
|
}
|
||||||
71
lib/fileSystem.h
Normal file
71
lib/fileSystem.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#ifndef FILESYSTEM_H
|
||||||
|
#define FILESYSTEM_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
class FileSystem {
|
||||||
|
public:
|
||||||
|
FileSystem(string input_file_path, string output_file_path = "");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Read the input file and return its content.
|
||||||
|
*
|
||||||
|
* This method will read the file at the input_file_path and create a single
|
||||||
|
* output string. Each line will be delimited by either `\n` (Unix) or `\r\n`
|
||||||
|
* (Windows). If the file path does not exist OR the file fails to open, this
|
||||||
|
* method will throw a run time error.
|
||||||
|
*
|
||||||
|
* @return File contents as a single string.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
string ReadInputFile();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Write the provided string to the output file.
|
||||||
|
*
|
||||||
|
* This method will attempt to open the output file and write the content
|
||||||
|
* provided to the method in the file. If the file does not exist, it will be
|
||||||
|
* created. If the file path does not exist OR the file fails to open, this
|
||||||
|
* method will throw a run time error.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void WriteOutputFile(string content);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* @brief Input file path.
|
||||||
|
*
|
||||||
|
* Must be provided by the user.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
string input_file_path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Output file path.
|
||||||
|
*
|
||||||
|
* If not provided, will be generated using the `input_file_path` by removing
|
||||||
|
* the extension and appending `.html`.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
string output_file_path;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Generate an output file path.
|
||||||
|
*
|
||||||
|
* If the user does not provide an output file path, this method can be
|
||||||
|
* used to generate the path. This is done by simply swapping the `.md`
|
||||||
|
* with `.html`.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void GenerateOutputFilePath();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -21,3 +21,5 @@ string BoldNode::ToHtml() const {
|
|||||||
string BoldItalicNode::ToHtml() const {
|
string BoldItalicNode::ToHtml() const {
|
||||||
return "<strong><em>" + this->content + "</em></strong>";
|
return "<strong><em>" + this->content + "</em></strong>";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string CodeNode::ToHtml() const { return "<code>" + this->content + "</code>"; }
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
#define INLINENODE_H
|
#define INLINENODE_H
|
||||||
|
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -37,6 +38,15 @@ public:
|
|||||||
*/
|
*/
|
||||||
void AddChild(std::unique_ptr<Node> child);
|
void AddChild(std::unique_ptr<Node> child);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Is the node empty.
|
||||||
|
*
|
||||||
|
* This is the same as checking if the nodes content is empty.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
bool IsEmpty() const { return this->content.empty(); };
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string content;
|
std::string content;
|
||||||
};
|
};
|
||||||
@ -94,4 +104,17 @@ public:
|
|||||||
std::string ToHtml() const;
|
std::string ToHtml() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @desc An inline code block node.
|
||||||
|
*
|
||||||
|
* This node returns it's content wrapped with <code></code> tags.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
class CodeNode : public InlineNode {
|
||||||
|
public:
|
||||||
|
CodeNode(std::string content) : InlineNode(content) {};
|
||||||
|
std::string ToHtml() const;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
10
lib/node.h
10
lib/node.h
@ -18,7 +18,6 @@
|
|||||||
/// try to avoid using raw pointers, and only use references when needed.
|
/// try to avoid using raw pointers, and only use references when needed.
|
||||||
/// Reference: https://www.youtube.com/watch?v=AmjoK55h68Y&t=166s
|
/// Reference: https://www.youtube.com/watch?v=AmjoK55h68Y&t=166s
|
||||||
|
|
||||||
// NOTE ABC
|
|
||||||
class Node {
|
class Node {
|
||||||
protected:
|
protected:
|
||||||
/**
|
/**
|
||||||
@ -67,6 +66,15 @@ public:
|
|||||||
virtual const std::vector<std::unique_ptr<Node>> &GetChilren() const {
|
virtual const std::vector<std::unique_ptr<Node>> &GetChilren() const {
|
||||||
return this->children;
|
return this->children;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Is the node empty.
|
||||||
|
*
|
||||||
|
* This is done differently for inline nodes and structure nodes.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
virtual bool IsEmpty() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
342
lib/parser.cpp
342
lib/parser.cpp
@ -1,36 +1,334 @@
|
|||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "util.h"
|
#include "fileSystem.h"
|
||||||
|
#include "inlineNode.h"
|
||||||
|
#include "structureNode.h"
|
||||||
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <stdexcept>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
Parser::Parser(string input_file_path, string output_file_path) {
|
void Parser::Inspect() {
|
||||||
// NOTE: Remove any white space AROUND the inputs
|
std::cerr << "Parser::Inspect() is not yet implemented." << std::endl;
|
||||||
removeWhitespace(input_file_path);
|
|
||||||
removeWhitespace(output_file_path);
|
|
||||||
|
|
||||||
if (input_file_path == "") {
|
|
||||||
throw std::runtime_error("input_file_path cannot be empty");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this->input_file_path = input_file_path;
|
void Parser::NormalizeInputStream() {
|
||||||
|
if (this->content.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
// NOTE: If the user does not provide an output file, then we should construct
|
size_t pos = 0;
|
||||||
// one using the input file with .md swapped with the extension.
|
while ((pos = content.find("\r\n", pos)) != string::npos) {
|
||||||
if (output_file_path == "") {
|
this->content.replace(pos, 2, "\n");
|
||||||
int ext_idx = input_file_path.find_last_of('.');
|
pos++;
|
||||||
string output_cleaned = input_file_path.substr(0, ext_idx) + ".html";
|
}
|
||||||
this->output_file_path = output_cleaned;
|
|
||||||
|
// NOTE: Remove all occurrences of '\r'
|
||||||
|
this->content.erase(
|
||||||
|
std::remove(this->content.begin(), this->content.end(), '\r'),
|
||||||
|
this->content.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Parser::WriteOutput() {
|
||||||
|
if (this->DOM == nullptr)
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Cannot write output, output DOM tree does not exist. Please run the "
|
||||||
|
"Parser::ParserDocument method first.");
|
||||||
|
|
||||||
|
this->filesystem.WriteOutputFile(this->DOM->ToHtml());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Parser::ParseDocument() {
|
||||||
|
try {
|
||||||
|
this->content = this->filesystem.ReadInputFile();
|
||||||
|
} catch (const std::runtime_error &e) {
|
||||||
|
std::cerr << "Caught an error: " << e.what() << std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this->output_file_path = output_file_path;
|
// Remove the windows BS
|
||||||
|
NormalizeInputStream();
|
||||||
|
|
||||||
|
// We need document parent
|
||||||
|
this->DOM = std::make_unique<DocumentNode>();
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
// std::cout << Peek(); Consume();
|
||||||
|
auto block = ParseBlock();
|
||||||
|
if (block != nullptr)
|
||||||
|
this->DOM->AddChild(std::move(block));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Parser::Inspect() {
|
// All this does is pick which subparser to call
|
||||||
std::cout << "std::string input_file_path: " << this->input_file_path
|
// Identify which block to parse
|
||||||
<< std::endl;
|
std::unique_ptr<Node> Parser::ParseBlock() {
|
||||||
std::cout << "std::string output_file_path: " << this->output_file_path
|
// Remove whitespace using peek and consume (' ', '\t', '\n')
|
||||||
<< std::endl;
|
ConsumeWhiteSpace();
|
||||||
|
|
||||||
|
// NOTE: Simple example
|
||||||
|
// std::string ch(1, Peek());
|
||||||
|
// std::unique_ptr<Node> block = std::make_unique<TextNode>(ch);
|
||||||
|
// Consume();
|
||||||
|
|
||||||
|
if (Peek() == '#') {
|
||||||
|
return ParseHeading();
|
||||||
|
}
|
||||||
|
|
||||||
|
// this is the default case
|
||||||
|
return ParseParagraph();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseParagraph() {
|
||||||
|
auto node = std::make_unique<ParagraphNode>();
|
||||||
|
|
||||||
|
// This should call parse inline
|
||||||
|
auto text_nodes = ParseInline();
|
||||||
|
for (auto &text_node : text_nodes) {
|
||||||
|
node->AddChild(std::move(text_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->IsEmpty())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseHeading() {
|
||||||
|
// Compute the size of the heading
|
||||||
|
int i = 0;
|
||||||
|
char c = Peek();
|
||||||
|
while (c == '#') {
|
||||||
|
c = Peek(i++);
|
||||||
|
}
|
||||||
|
|
||||||
|
Consume(i - 1);
|
||||||
|
auto node = std::make_unique<HeadingNode>(i - 1);
|
||||||
|
|
||||||
|
ConsumeWhiteSpace();
|
||||||
|
|
||||||
|
// This should call parse inline
|
||||||
|
auto text_nodes = ParseInline();
|
||||||
|
for (auto &text_node : text_nodes) {
|
||||||
|
node->AddChild(std::move(text_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (node->IsEmpty())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<std::unique_ptr<Node>> Parser::ParseInline() {
|
||||||
|
vector<std::unique_ptr<Node>> nodes;
|
||||||
|
string str;
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
// If this char and next char are both newlines: then we have an empty line,
|
||||||
|
// we should stop.
|
||||||
|
if (c == '\n' && Peek(1) == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseBoldItalic();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
} else if (c == '*' && Peek(1) == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseBold();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
} else if (c == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseItalic();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == '`') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseCode();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a newline, use a space instead
|
||||||
|
str += (c == '\n' ? ' ' : c);
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push the last node, if the string is not empty
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<std::unique_ptr<Node>> Parser::ParseInlineHeading() {
|
||||||
|
vector<std::unique_ptr<Node>> nodes;
|
||||||
|
string str;
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
// We can stop as soon as we see a new line. Headings are single line blocks
|
||||||
|
if (c == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseBoldItalic();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
} else if (c == '*' && Peek(1) == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseBold();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
} else if (c == '*') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseItalic();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == '`') {
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
auto node = ParseCode();
|
||||||
|
if (!node->IsEmpty())
|
||||||
|
nodes.push_back(std::move(node));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a newline, use a space instead
|
||||||
|
str += (c == '\n' ? ' ' : c);
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push the last node, if the string is not empty
|
||||||
|
PushTextNode(nodes, str);
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseItalic() {
|
||||||
|
string str;
|
||||||
|
Consume(1);
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
|
||||||
|
if (c == '\n' && Peek(1) == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '*') {
|
||||||
|
Consume(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
str += c;
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<ItalicNode>(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseBold() {
|
||||||
|
string str;
|
||||||
|
Consume(2);
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
|
||||||
|
if (c == '\n' && Peek(1) == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '*' && Peek(1) == '*') {
|
||||||
|
Consume(2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
str += c;
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<BoldNode>(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseBoldItalic() {
|
||||||
|
string str;
|
||||||
|
Consume(3);
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
|
||||||
|
if (c == '\n' && Peek(1) == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '*' && Peek(1) == '*' && Peek(2) == '*') {
|
||||||
|
Consume(3);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
str += c;
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<BoldItalicNode>(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Node> Parser::ParseCode() {
|
||||||
|
string str;
|
||||||
|
Consume(1);
|
||||||
|
|
||||||
|
while (!IsEOF()) {
|
||||||
|
char c = Peek();
|
||||||
|
|
||||||
|
if (c == '\n' && Peek(1) == '\n')
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (c == '`') {
|
||||||
|
Consume(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
str += c;
|
||||||
|
Consume();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_unique<CodeNode>(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Parser::PushTextNode(vector<std::unique_ptr<Node>> &nodes, string &str) {
|
||||||
|
if (!str.empty())
|
||||||
|
nodes.push_back(std::move(std::make_unique<TextNode>(str)));
|
||||||
|
str = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
char Parser::Peek(size_t offset) {
|
||||||
|
size_t look_ahead_pos = this->position + offset;
|
||||||
|
|
||||||
|
if (look_ahead_pos < this->content.length()) {
|
||||||
|
return this->content[look_ahead_pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
return '\0'; // null if past end
|
||||||
|
};
|
||||||
|
|
||||||
|
void Parser::Consume(size_t count) { this->position += count; };
|
||||||
|
|
||||||
|
bool Parser::IsEOF() { return this->position >= this->content.length(); };
|
||||||
|
|
||||||
|
void Parser::ConsumeWhiteSpace() {
|
||||||
|
char c = Peek();
|
||||||
|
while (c == ' ' || c == '\t' || c == '\n') {
|
||||||
|
Consume();
|
||||||
|
c = Peek();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
95
lib/parser.h
95
lib/parser.h
@ -1,11 +1,15 @@
|
|||||||
#ifndef PARSER_H
|
#ifndef PARSER_H
|
||||||
#define PARSER_H
|
#define PARSER_H
|
||||||
|
|
||||||
|
#include "fileSystem.h"
|
||||||
|
#include "node.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Markdown parser class.
|
* @brief Markdown parser class.
|
||||||
@ -22,7 +26,8 @@ using std::string;
|
|||||||
*/
|
*/
|
||||||
class Parser {
|
class Parser {
|
||||||
public:
|
public:
|
||||||
Parser(string input_file_path, string output_file_path = "");
|
Parser(string input_file_path, string output_file_path = "")
|
||||||
|
: filesystem(input_file_path, output_file_path) {};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Inspect (view) contents of the class.
|
* @brief Inspect (view) contents of the class.
|
||||||
@ -34,7 +39,6 @@ public:
|
|||||||
void Inspect();
|
void Inspect();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
|
||||||
* @brief Parse an entire document.
|
* @brief Parse an entire document.
|
||||||
*
|
*
|
||||||
* This function will be called to yield the result. This is the entry point
|
* This function will be called to yield the result. This is the entry point
|
||||||
@ -48,57 +52,92 @@ public:
|
|||||||
*
|
*
|
||||||
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
*/
|
*/
|
||||||
void ParseDocument(void);
|
void ParseDocument();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Write the output to the file.
|
||||||
|
*
|
||||||
|
* Once the tree is generated, this method should be called to actually
|
||||||
|
* write the output. Having this functionality separate allows for more
|
||||||
|
* portability.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void WriteOutput();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/**
|
/**
|
||||||
* @brief Input file path.
|
* @brief File system module to handle file I/O.
|
||||||
|
*
|
||||||
|
* Anything requiring file I/O operations will be handled by this module.
|
||||||
*
|
*
|
||||||
* Must be provided by the user.
|
|
||||||
*
|
*
|
||||||
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
*/
|
*/
|
||||||
string input_file_path;
|
FileSystem filesystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Output file path.
|
* @brief Parser generated tree.
|
||||||
*
|
*
|
||||||
* If not provided, will be generated using the `input_file_path` by removing
|
* This value will store the root, which is expected to be a DocumentNode.
|
||||||
* the extension and appending `.html`.
|
* This node will mark the start of the tree. The parser will populate this
|
||||||
|
* tree during the parsing process.
|
||||||
*
|
*
|
||||||
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
*/
|
*/
|
||||||
string output_file_path;
|
std::unique_ptr<Node> DOM;
|
||||||
|
|
||||||
// NOTE: We need a stack, just not sure what goes in it yet
|
|
||||||
// std::stack<any> stack;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* @brief Parse a single line.
|
* @brief Normalize the input stream.
|
||||||
|
*
|
||||||
|
* Replaces all `\r\n` with just `\n` since that is what the parser expects.
|
||||||
|
* Then removes any left over `\r` elements in the stream. If the stream is
|
||||||
|
* empty this method does nothing.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void NormalizeInputStream();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parse a single block of content
|
||||||
*
|
*
|
||||||
* How does this function work...
|
* How does this function work...
|
||||||
* This is where the magic happens.
|
* This is where the magic happens.
|
||||||
*
|
*
|
||||||
* @param line Target line to parse, as string.
|
* @return Node, to be appended to the callers children.
|
||||||
* @return DOMNode, once exists
|
|
||||||
*
|
*
|
||||||
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
*/
|
*/
|
||||||
void ParseLine(string line);
|
std::unique_ptr<Node> ParseBlock();
|
||||||
|
|
||||||
// NOTE: Parser operations, again, abstract, just for brainstorming now
|
// Stores index in the string
|
||||||
// These should operate on internal state, not lines themselves
|
size_t position = 0;
|
||||||
void ParseHeader();
|
|
||||||
void ParseParagraph();
|
|
||||||
void ParseItalic();
|
|
||||||
void ParseBold();
|
|
||||||
void ParseBoldItalic();
|
|
||||||
|
|
||||||
// NOTE: Character operations, these are just for brainstorming
|
// Working input content
|
||||||
char Peek();
|
string content;
|
||||||
void Consume();
|
|
||||||
bool EndOfLine();
|
// TODO: Document these methods, no more magic methods :)
|
||||||
|
|
||||||
|
std::unique_ptr<Node> ParseParagraph();
|
||||||
|
std::unique_ptr<Node> ParseHeading();
|
||||||
|
vector<std::unique_ptr<Node>> ParseInline();
|
||||||
|
|
||||||
|
// The only difference is the exit condition
|
||||||
|
vector<std::unique_ptr<Node>> ParseInlineHeading();
|
||||||
|
|
||||||
|
void PushTextNode(vector<std::unique_ptr<Node>> &nodes, string &str);
|
||||||
|
|
||||||
|
std::unique_ptr<Node> ParseItalic();
|
||||||
|
std::unique_ptr<Node> ParseBold();
|
||||||
|
std::unique_ptr<Node> ParseBoldItalic();
|
||||||
|
std::unique_ptr<Node> ParseCode();
|
||||||
|
|
||||||
|
char Peek(size_t offset = 0);
|
||||||
|
void Consume(size_t count = 1);
|
||||||
|
bool IsEOF();
|
||||||
|
|
||||||
|
void ConsumeWhiteSpace();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
#define STRUCTURENODE_H
|
#define STRUCTURENODE_H
|
||||||
|
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -23,6 +24,15 @@ public:
|
|||||||
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
*/
|
*/
|
||||||
virtual std::string ToHtml() const = 0;
|
virtual std::string ToHtml() const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Is the node empty.
|
||||||
|
*
|
||||||
|
* This is the same as checking if the node has no children.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
bool IsEmpty() const { return this->children.size() == 0; };
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user