(FEAT): Worked on the parser class definition. #10
7
Makefile
7
Makefile
@ -10,6 +10,7 @@ LIB_DIR = lib
|
|||||||
# Executable name
|
# Executable name
|
||||||
TARGET = parser
|
TARGET = parser
|
||||||
|
|
||||||
|
# Automatically find all source files
|
||||||
SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
|
SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp)
|
||||||
LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp)
|
LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp)
|
||||||
ALL_SOURCES = $(SRC_FILES) $(LIB_FILES)
|
ALL_SOURCES = $(SRC_FILES) $(LIB_FILES)
|
||||||
@ -30,10 +31,12 @@ $(BUILD_DIR):
|
|||||||
$(TARGET): $(OBJECTS)
|
$(TARGET): $(OBJECTS)
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@
|
||||||
|
|
||||||
$(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp $(LIB_DIR)/parser.h
|
# Generic rule for all .cpp files in the src/ directory
|
||||||
|
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
||||||
|
|
||||||
$(BUILD_DIR)/parser.o: $(LIB_DIR)/parser.cpp $(LIB_DIR)/parser.h
|
# Generic rule for all .cpp files in the lib/ directory
|
||||||
|
$(BUILD_DIR)/%.o: $(LIB_DIR)/%.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
|
||||||
|
|
||||||
test: all
|
test: all
|
||||||
|
|||||||
@ -1,26 +1,12 @@
|
|||||||
#include "./parser.h"
|
#include "./parser.h"
|
||||||
|
#include "./util.h"
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
void removeWhitespace(string &input) {
|
|
||||||
size_t end = input.find_last_not_of(" \t\n\r\f\v");
|
|
||||||
if (end != std::string::npos) {
|
|
||||||
input.erase(end + 1);
|
|
||||||
} else {
|
|
||||||
input.clear(); // String contains only whitespace
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t start = input.find_first_not_of(" \t\n\r\f\v");
|
|
||||||
if (start != std::string::npos) {
|
|
||||||
input.erase(0, start);
|
|
||||||
} else {
|
|
||||||
input.clear(); // String contains only whitespace
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Parser::Parser(string input_file_path, string output_file_path) {
|
Parser::Parser(string input_file_path, string output_file_path) {
|
||||||
|
// NOTE: Remove any white space AROUND the inputs
|
||||||
removeWhitespace(input_file_path);
|
removeWhitespace(input_file_path);
|
||||||
removeWhitespace(output_file_path);
|
removeWhitespace(output_file_path);
|
||||||
|
|
||||||
@ -33,11 +19,18 @@ Parser::Parser(string input_file_path, string output_file_path) {
|
|||||||
// NOTE: If the user does not provide an output file, then we should construct
|
// NOTE: If the user does not provide an output file, then we should construct
|
||||||
// one using the input file with .md swapped with the extension.
|
// one using the input file with .md swapped with the extension.
|
||||||
if (output_file_path == "") {
|
if (output_file_path == "") {
|
||||||
std::cout << "CLEANING" << std::endl;
|
|
||||||
int ext_idx = input_file_path.find_last_of('.');
|
int ext_idx = input_file_path.find_last_of('.');
|
||||||
string output_cleaned = input_file_path.substr(0, ext_idx) + ".html";
|
string output_cleaned = input_file_path.substr(0, ext_idx) + ".html";
|
||||||
this->output_file_path = output_cleaned;
|
this->output_file_path = output_cleaned;
|
||||||
} else {
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
this->output_file_path = output_file_path;
|
this->output_file_path = output_file_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Parser::Inspect() {
|
||||||
|
std::cout << "std::string input_file_path: " << this->input_file_path
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "std::string output_file_path: " << this->output_file_path
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|||||||
101
lib/parser.h
101
lib/parser.h
@ -2,20 +2,103 @@
|
|||||||
#define PARSER_H
|
#define PARSER_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stack>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Markdown parser class.
|
||||||
|
*
|
||||||
|
* Converts a Markdown file into an HTML output. This is done using a
|
||||||
|
* recursive descent parser and converting the Markdown into a DOM tree.
|
||||||
|
* Once the DOM tree exists, it is converted into an HTML string and
|
||||||
|
* written to the output file provided.
|
||||||
|
*
|
||||||
|
* This class will have a `DOM` and a `DOMParser` which are used in this
|
||||||
|
* process.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
class Parser {
|
class Parser {
|
||||||
private:
|
|
||||||
std::string input_file_path;
|
|
||||||
std::string output_file_path;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Parser(std::string input_file_path, std::string output_file_path = "");
|
Parser(string input_file_path, string output_file_path = "");
|
||||||
|
|
||||||
inline void Print() {
|
/**
|
||||||
std::cout << this->input_file_path << " -> " << this->output_file_path
|
* @brief Inspect (view) contents of the class.
|
||||||
<< std::endl;
|
*
|
||||||
}
|
* Print each member of the class in its current state. Used for debugging.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void Inspect();
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @brief Parse an entire document.
|
||||||
|
*
|
||||||
|
* This function will be called to yield the result. This is the entry point
|
||||||
|
* to the recursive descent parser.
|
||||||
|
*
|
||||||
|
* Currently, there are no parameters, they are still under consideration.
|
||||||
|
*
|
||||||
|
* It will be important to remember states between lines. For example, a
|
||||||
|
* paragraph that spans many lines should be inside the same node. But
|
||||||
|
* white space causes the node to be broken.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void ParseDocument(void);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* @brief Input file path.
|
||||||
|
*
|
||||||
|
* Must be provided by the user.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
string input_file_path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Output file path.
|
||||||
|
*
|
||||||
|
* If not provided, will be generated using the `input_file_path` by removing
|
||||||
|
* the extension and appending `.html`.
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
string output_file_path;
|
||||||
|
|
||||||
|
// NOTE: We need a stack, just not sure what goes in it yet
|
||||||
|
// std::stack<any> stack;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Parse a single line.
|
||||||
|
*
|
||||||
|
* How does this function work...
|
||||||
|
* This is where the magic happens.
|
||||||
|
*
|
||||||
|
* @param line Target line to parse, as string.
|
||||||
|
* @return DOMNode, once exists
|
||||||
|
*
|
||||||
|
* @author Hayden Hargreaves (hhargreaves2006@gmail.com)
|
||||||
|
*/
|
||||||
|
void ParseLine(string line);
|
||||||
|
|
||||||
|
// NOTE: Parser operations, again, abstract, just for brainstorming now
|
||||||
|
// These should operate on internal state, not lines themselves
|
||||||
|
void ParseHeader();
|
||||||
|
void ParseParagraph();
|
||||||
|
void ParseItalic();
|
||||||
|
void ParseBold();
|
||||||
|
void ParseBoldItalic();
|
||||||
|
|
||||||
|
// NOTE: Character operations, these are just for brainstorming
|
||||||
|
char Peek();
|
||||||
|
void Consume();
|
||||||
|
bool EndOfLine();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
24
lib/util.cpp
Normal file
24
lib/util.cpp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#include "./util.h"
|
||||||
|
|
||||||
|
void removeTrailingWhitespace(std::string &input) {
|
||||||
|
size_t start = input.find_first_not_of(" \t\n\r\f\v");
|
||||||
|
if (start != std::string::npos) {
|
||||||
|
input.erase(0, start);
|
||||||
|
} else {
|
||||||
|
input.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeLeadingWhitespace(std::string &input) {
|
||||||
|
size_t end = input.find_last_not_of(" \t\n\r\f\v");
|
||||||
|
if (end != std::string::npos) {
|
||||||
|
input.erase(end + 1);
|
||||||
|
} else {
|
||||||
|
input.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeWhitespace(std::string &input) {
|
||||||
|
removeLeadingWhitespace(input);
|
||||||
|
removeTrailingWhitespace(input);
|
||||||
|
}
|
||||||
10
lib/util.h
Normal file
10
lib/util.h
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#ifndef UTIL_H
|
||||||
|
#define UTIL_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
void removeTrailingWhitespace(std::string &input);
|
||||||
|
void removeLeadingWhitespace(std::string &input);
|
||||||
|
void removeWhitespace(std::string &input);
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -10,10 +10,10 @@ int main(int argc, char **argv) {
|
|||||||
try {
|
try {
|
||||||
if (argc >= 3) {
|
if (argc >= 3) {
|
||||||
Parser p(argv[1], argv[2]);
|
Parser p(argv[1], argv[2]);
|
||||||
p.Print();
|
p.Inspect();
|
||||||
} else {
|
} else {
|
||||||
Parser p(argv[1]);
|
Parser p(argv[1]);
|
||||||
p.Print();
|
p.Inspect();
|
||||||
}
|
}
|
||||||
} catch (const std::runtime_error &e) {
|
} catch (const std::runtime_error &e) {
|
||||||
std::cout << "Caught an error: " << e.what() << std::endl;
|
std::cout << "Caught an error: " << e.what() << std::endl;
|
||||||
@ -21,5 +21,7 @@ int main(int argc, char **argv) {
|
|||||||
std::cout << "Caught an error: UNKNOWN" << std::endl;
|
std::cout << "Caught an error: UNKNOWN" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -35,4 +35,4 @@ this is a break, because it ends with two spaces -> <p> However <br> this is a b
|
|||||||
|
|
||||||
Double returns also
|
Double returns also
|
||||||
|
|
||||||
yield line breaks -> <p> Double returns also <br> yield line breaks </p>
|
yields new paragraphs -> <p> Double returns also</p> <p> yields new paragraphs </p>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user