From 5c347a95a0e25f9a52688ec81c7052bd131e1d28 Mon Sep 17 00:00:00 2001 From: Joshua Bemenderfer Date: Sun, 19 Feb 2023 17:04:36 -0500 Subject: [PATCH] Implement basic document functions for C API, mostly equivalent to JS ones. --- packages/c/.gitignore | 2 + packages/c/document.h | 212 ++++++++++++++++++++++++++++++++++++++++++ packages/c/parser.h | 10 +- 3 files changed, 218 insertions(+), 6 deletions(-) create mode 100644 packages/c/document.h diff --git a/packages/c/.gitignore b/packages/c/.gitignore index a65dd00..a8a24c4 100644 --- a/packages/c/.gitignore +++ b/packages/c/.gitignore @@ -1 +1,3 @@ test/test-runner +test/document +test/document.c \ No newline at end of file diff --git a/packages/c/document.h b/packages/c/document.h new file mode 100644 index 0000000..6e6998c --- /dev/null +++ b/packages/c/document.h @@ -0,0 +1,212 @@ +#ifndef TERRACE_DOCUMENT_H +#define TERRACE_DOCUMENT_H + +#include "parser.h" + +typedef struct terrace_document_s { + // == Internal State == // + unsigned int _repeatCurrentLine; + // Current line being read + char* _currentLine; + + // == External Information == // + // Embedded line data struct. Holds information about the current parsed line + terrace_linedata_t lineData; + // Custom data passed to the readline function + void* userData; + /** + * Line reader function, provided by the user + * Needed to get the next line inside of `terrace_next(doc)` + * @param {char**} line First argument is a pointer to `_currentLine`, above + * @param {void*} userData Second argument is `userData`, above + * @returns {int} The number of characters read, or -1 if no characters were read. + */ + int (*reader)(char** line, void* userData); +} terrace_document_t; + +/** + * Initialize a Terrace document with indent parameters and the function neded to read lines. + * @param {char} indent The indent character to use. Generally a single space character. + * @param {int (*reader)(char** line, void* userData)} A function pointer to a function that reads lines sequentially + * from a user-provided source. Receives a pointer to lineData->_currLine, and userData, supplied in the next argument. + * @param {void*} userData A user-supplied pointer to any state information needed by their reader function. + * Passed to `reader`each time it is called. + * @returns {terrace_document_t} An initialized document that can now be used for futher parsing. + */ +terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData) { + terrace_document_t document = { + ._repeatCurrentLine = 0, + ._currentLine = 0, + .lineData = terrace_create_line_data(indent), + .reader = reader, + .userData = userData + }; + + return document; +} + +/** + * Returns the number of indent characters of the current line + * + * Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line + * + * ```terrace + * block + * block + * block + * block + * ``` + * @returns {unsigned int} The indent level of the current line + */ +unsigned int terrace_level(terrace_document_t* doc) { + return doc->lineData.level; +} + +/** + * Get a string with the current line contents + * If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified + * + * Given the following document + * + * ```terrace + * root + * sub-line + * ``` + * `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters + * `terrace_line(doc, 0)` however, returns " sub-line", with all four leading spaces + * + * `startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text + * + * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed + * @param {int} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level + * @returns {char*} The line contents starting from `startOffset` + */ +char* terrace_line(terrace_document_t* doc, int startOffset) { + if (startOffset == -1) startOffset = doc->lineData.level; + return doc->_currentLine + startOffset; +} + +/** + * Get the *length* of the first "word" of a line, + * starting from the first non-indent character to the first space or end of the line + * Often used for deciding how to parse a block. + * + * Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle. + * Instead, `terrace_head_length` provides the length of the head portion. + * In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string, + * or use any number of `strn*` C stdlib functions to work with the head section without copying it. + * + * Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser + * + * Given the following line, `terrace_head_length(doc)` returns `5` + * + * ```terrace + * title An Important Document + * ``` + * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed + * @returns {int} The length of the `head` portion (first word) of a line + */ +int terrace_head_length(terrace_document_t* doc) { + return doc->lineData.offsetTail - doc->lineData.offsetHead; +} + +/** + * Get a char pointer to everything following the first "word" of a line, + * starting from the first character after the space at the end of `head` + * + * Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser + * + * Given the following line, `terrace_tail(doc)` returns "An Important Document" + * + * ```terrace + * title An Important Document + * ``` + * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed + * @returns {char*} The remainder of the line following the `head` portion, with no leading space + */ +char* terrace_tail(terrace_document_t* doc) { + return doc->_currentLine + doc->lineData.offsetTail + 1; +} + +/** + * Quickly check if the current line head matches a specified value. Useful in many document-parsing situations. + * + * Given the following line + * + * ```terrace + * title An Important Document + * ``` + * + * `terrace_match(doc, "title")` returns `1` + * `terrace_match(doc, "somethingElse") returns `0` + * + * @param {const char*} matchValue A string to check against the line `head` for equality + * @returns {char} A byte set to 0 if the head does not match, or 1 if it does match + */ +char terrace_match(terrace_document_t* doc, const char* matchHead) { + // Get a pointer to the start of the head portion of the string. + char* head = doc->_currentLine + doc->lineData.offsetHead; + + int i = 0; + // Loop until we run out of characters in `matchHead`. + while (matchHead[i] != '\0') { + // Return as unmatched if we run out of `head` characters + // or if a character at the same position in both matchHead and head is not identical. + if (head[i] == '\0' || matchHead[i] != head[i]) return 0; + i++; + } + + // If we didn't return inside the while loop, `matchHead` and `head` are equivalent, a successful match. + return 1; +} + +/** + * Advances the current position in the terrace document and populates `doc->lineData` + * with the parsed information from that line + * + * Returns `1` after parsing the next line, or `0` upon reaching the end of the document. + * If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line + * with a level at or below `levelScope`. This allows you to iterate through subsections of a document. + * + * If a lower-level line was encountered, the following call to `next()` will repeat this line again. + * This allows a child loop to look forward, determine that the next line will be outside its purview, + * and return control to the calling loop transparently without additional logic. + * + * Intended to be used inside a while loop to parse a section of a Terrace document. + * + * ```c + * while(terrace_next(doc, -1)) { + * // Do something with each line. + * } + * ``` + * + * @param {number} levelScope If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope` + * @returns {char} Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered. + */ +char terrace_next(terrace_document_t* doc, int levelScope) { + // Repeat the current line instead of parsing a new one if the previous call to next() + // determined the current line to be out of its scope. + if (doc->_repeatCurrentLine) doc->_repeatCurrentLine = 0; + // Otherwise parse the line normally. + else { + // Load the next line from the line reader. + int chars_read = doc->reader(&doc->_currentLine, doc->userData); + // If there are no more lines, bail out. + if (chars_read == -1) return 0; + + // Populate lineData with parsed information from the current line. + terrace_parse_line(doc->_currentLine, &doc->lineData); + } + + // If we shouldn't be handling this line, make the following call to next() repeat the current line. + // Allows a child loop to look forward, determine that the next line will be outside its purview, + // and return control to the calling loop transparently without additional logic. + if ((int) terrace_level(doc) <= levelScope) { + doc->_repeatCurrentLine = 1; + return 0; + } + + return 1; +} + +#endif \ No newline at end of file diff --git a/packages/c/parser.h b/packages/c/parser.h index 5bd5aea..1a86ad8 100644 --- a/packages/c/parser.h +++ b/packages/c/parser.h @@ -2,7 +2,7 @@ #define TERRACE_PARSER_H // Holds the parsed information from each line. -struct terrace_linedata_s { +typedef struct terrace_linedata_s { // Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call. char indent; // How many indent characters are present in the current line before the first non-indent character. @@ -12,11 +12,9 @@ struct terrace_linedata_s { unsigned int offsetHead; // The number of characters before the start of the line's "tail" section. unsigned int offsetTail; -}; +} terrace_linedata_t; -typedef struct terrace_linedata_s terrace_linedata_t; - -terrace_linedata_t terrace_create_line_data(char indent) { +terrace_linedata_t terrace_create_line_data(const char indent) { terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 }; return line_data; } @@ -26,7 +24,7 @@ terrace_linedata_t terrace_create_line_data(char indent) { * @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline. * @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in. */ -void terrace_parse_line(char *line, terrace_linedata_t *lineData) { +void terrace_parse_line(const char *line, terrace_linedata_t *lineData) { // Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.) // Special case handling for these allows them to be parsed extra quickly. if (!line) {