diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91c99c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +/build +/build/* +parser diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1a338d2 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +# Define the C++ compiler and flags +CXX = g++ +CXXFLAGS = -Wall -g + +# Directories +BUILD_DIR = build +SRC_DIR = src +LIB_DIR = lib + +# Executable name +TARGET = parser + +SRC_FILES := $(wildcard $(SRC_DIR)/*.cpp) +LIB_FILES := $(wildcard $(LIB_DIR)/*.cpp) +ALL_SOURCES = $(SRC_FILES) $(LIB_FILES) + +# Generate object file paths in the build directory +OBJECTS := $(patsubst %.cpp, $(BUILD_DIR)/%.o, $(notdir $(ALL_SOURCES))) + +# Include directories +INCLUDES = -I$(LIB_DIR) -I$(SRC_DIR) + +.PHONY: all clean test + +all: $(BUILD_DIR) $(TARGET) + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(TARGET): $(OBJECTS) + $(CXX) $(CXXFLAGS) $(INCLUDES) $^ -o $@ + +$(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp $(LIB_DIR)/parser.h + $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + +$(BUILD_DIR)/parser.o: $(LIB_DIR)/parser.cpp $(LIB_DIR)/parser.h + $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + +test: all + ./$(TARGET) + ./$(TARGET) ' ' + ./$(TARGET) ./test/input.md + ./$(TARGET) ./test/input.md ./test/output.html + +clean: + rm -rf $(BUILD_DIR) $(TARGET) diff --git a/README.md b/README.md index 308db0b..980ee84 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,31 @@ # MarkdownToHtmlCompiler -This compiler will convert a Markdown file into an HTML output. \ No newline at end of file +This compiler will convert a Markdown file into an HTML output. + + + +### Notes + +Recursive Descent Parser: This is the primary algorithm you'll use. It's a top-down parsing technique +where a set of recursive functions "descend" through the grammar of your simple Markdown language. +For example, a parse_document() function would call parse_line(), which in turn might call parse_bold_text() +or parse_italic_text(). This method is intuitive and easy to implement for a simple grammar. + +Stack: A stack is essential for handling nested elements. For instance, if you allow bold text inside +italic text (_This is *bold and italic* text_), you can push the _ token onto the stack and then push +the * token. When you encounter the closing *, you check if the top of the stack matches. This ensures +that all tags are correctly opened and closed. Your presentation can visually demonstrate this process +with a stack diagram. + +Hash Map or Map: A hash map (std::unordered_map) or a map (std::map) can be used to efficiently store +and retrieve the HTML equivalent for each Markdown tag. For example, you could map `#` to `

` or "_" +to ``. This provides O(1) average-case lookup time. + +### Targets + +- [-] Convert a .md file to an .html output + +### Reaches +- [ ] Hot reload? + + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..5cc6d19 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1760284886, + "narHash": "sha256-TK9Kr0BYBQ/1P5kAsnNQhmWWKgmZXwUQr4ZMjCzWf2c=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "cf3f5c4def3c7b5f1fc012b3d839575dbe552d43", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..351e25c --- /dev/null +++ b/flake.nix @@ -0,0 +1,42 @@ +{ + description = "Blank development flake. Adjust as needed"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + in + { + # Define the development shell. + # When you run `nix develop` (or direnv activates), you'll enter this shell. + devShells.default = pkgs.mkShell { + # List all the development tools you need available in this shell's PATH. + packages = with pkgs; [ + gcc + gdb + stdenv + ]; + + # Define the shell that will be executed. + # Here, we explicitly use zsh. + # Note: pkgs.zsh needs to be included in `packages` or `nativeBuildInputs` + # for it to be found in the shell's environment. `inherit pkgs.zsh;` is concise. + inherit (pkgs) zsh; + + # Environment variables and commands to run when the shell starts. + shellHook = '' + # Add any exports, hooks, aliases, or anything else here + + # Exec zsh to replace the current shell process with zsh. + # This ensures your prompt and zsh configurations load correctly. + exec zsh + ''; + }; + } + ); +} diff --git a/lib/parser.cpp b/lib/parser.cpp new file mode 100644 index 0000000..1ab53b2 --- /dev/null +++ b/lib/parser.cpp @@ -0,0 +1,43 @@ +#include "./parser.h" +#include +#include + +using std::string; + +void removeWhitespace(string &input) { + size_t end = input.find_last_not_of(" \t\n\r\f\v"); + if (end != std::string::npos) { + input.erase(end + 1); + } else { + input.clear(); // String contains only whitespace + } + + size_t start = input.find_first_not_of(" \t\n\r\f\v"); + if (start != std::string::npos) { + input.erase(0, start); + } else { + input.clear(); // String contains only whitespace + } +} + +Parser::Parser(string input_file_path, string output_file_path) { + removeWhitespace(input_file_path); + removeWhitespace(output_file_path); + + if (input_file_path == "") { + throw std::runtime_error("input_file_path cannot be empty"); + } + + this->input_file_path = input_file_path; + + // NOTE: If the user does not provide an output file, then we should construct + // one using the input file with .md swapped with the extension. + if (output_file_path == "") { + std::cout << "CLEANING" << std::endl; + int ext_idx = input_file_path.find_last_of('.'); + string output_cleaned = input_file_path.substr(0, ext_idx) + ".html"; + this->output_file_path = output_cleaned; + } else { + this->output_file_path = output_file_path; + } +} diff --git a/lib/parser.h b/lib/parser.h new file mode 100644 index 0000000..e8e8db2 --- /dev/null +++ b/lib/parser.h @@ -0,0 +1,21 @@ +#ifndef PARSER_H +#define PARSER_H + +#include +#include + +class Parser { +private: + std::string input_file_path; + std::string output_file_path; + +public: + Parser(std::string input_file_path, std::string output_file_path = ""); + + inline void Print() { + std::cout << this->input_file_path << " -> " << this->output_file_path + << std::endl; + } +}; + +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..032fcd4 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,25 @@ +#include "../lib/parser.h" +#include + +int main(int argc, char **argv) { + if (argc <= 1) { + std::cerr << "Usage: " << std::endl; + return 0; // TODO: Should return 1? + } + + try { + if (argc >= 3) { + Parser p(argv[1], argv[2]); + p.Print(); + } else { + Parser p(argv[1]); + p.Print(); + } + } catch (const std::runtime_error &e) { + std::cout << "Caught an error: " << e.what() << std::endl; + } catch (...) { + std::cout << "Caught an error: UNKNOWN" << std::endl; + } + + return 0; +} diff --git a/syntax.md b/syntax.md new file mode 100644 index 0000000..d780cfe --- /dev/null +++ b/syntax.md @@ -0,0 +1,38 @@ + +Reference [here](https://www.markdownguide.org/basic-syntax/) + +Headings, h# tags + + +# Header Level 1 ->

Content

+## Header Level 2 ->

Content

+### Header Level 3 ->

Content

+#### Header Level 4 ->

Content

+##### Header Level 5 ->
Content
+###### Header Level 6 ->
Content
+ + +Alternate syntax (n number of =/-) + +Header Level 1 ->

Content

+================ + + +Header Level 2 ->

Content

+---------------- + + +Paragraph tags + +Hello world ->

Hello world

+ +This is also +a paragraph ->

this is also a paragraph regardless

+regardless + +However +this is a break, because it ends with two spaces ->

However
this is a break, because it ends with two spaces

+ +Double returns also + +yield line breaks ->

Double returns also
yield line breaks

diff --git a/test/input.md b/test/input.md new file mode 100644 index 0000000..03da0ea --- /dev/null +++ b/test/input.md @@ -0,0 +1,17 @@ + + +# Hello world in an h1 tag + + +## This is a h2 tag + + +### h3 + + +#### h4 + +##### h5 + +###### h6 +