Implement basic document functions for C API, mostly equivalent to JS ones.

This commit is contained in:
Joshua Bemenderfer 2023-02-19 17:04:36 -05:00
parent 3f6c475756
commit 5c347a95a0
3 changed files with 218 additions and 6 deletions

View File

@ -1 +1,3 @@
test/test-runner test/test-runner
test/document
test/document.c

212
packages/c/document.h Normal file
View File

@ -0,0 +1,212 @@
#ifndef TERRACE_DOCUMENT_H
#define TERRACE_DOCUMENT_H
#include "parser.h"
typedef struct terrace_document_s {
// == Internal State == //
unsigned int _repeatCurrentLine;
// Current line being read
char* _currentLine;
// == External Information == //
// Embedded line data struct. Holds information about the current parsed line
terrace_linedata_t lineData;
// Custom data passed to the readline function
void* userData;
/**
* Line reader function, provided by the user
* Needed to get the next line inside of `terrace_next(doc)`
* @param {char**} line First argument is a pointer to `_currentLine`, above
* @param {void*} userData Second argument is `userData`, above
* @returns {int} The number of characters read, or -1 if no characters were read.
*/
int (*reader)(char** line, void* userData);
} terrace_document_t;
/**
* Initialize a Terrace document with indent parameters and the function neded to read lines.
* @param {char} indent The indent character to use. Generally a single space character.
* @param {int (*reader)(char** line, void* userData)} A function pointer to a function that reads lines sequentially
* from a user-provided source. Receives a pointer to lineData->_currLine, and userData, supplied in the next argument.
* @param {void*} userData A user-supplied pointer to any state information needed by their reader function.
* Passed to `reader`each time it is called.
* @returns {terrace_document_t} An initialized document that can now be used for futher parsing.
*/
terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData) {
terrace_document_t document = {
._repeatCurrentLine = 0,
._currentLine = 0,
.lineData = terrace_create_line_data(indent),
.reader = reader,
.userData = userData
};
return document;
}
/**
* Returns the number of indent characters of the current line
*
* Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line
*
* ```terrace
* block
* block
* block
* block
* ```
* @returns {unsigned int} The indent level of the current line
*/
unsigned int terrace_level(terrace_document_t* doc) {
return doc->lineData.level;
}
/**
* Get a string with the current line contents
* If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified
*
* Given the following document
*
* ```terrace
* root
* sub-line
* ```
* `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters
* `terrace_line(doc, 0)` however, returns " sub-line", with all four leading spaces
*
* `startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text
*
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @param {int} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
* @returns {char*} The line contents starting from `startOffset`
*/
char* terrace_line(terrace_document_t* doc, int startOffset) {
if (startOffset == -1) startOffset = doc->lineData.level;
return doc->_currentLine + startOffset;
}
/**
* Get the *length* of the first "word" of a line,
* starting from the first non-indent character to the first space or end of the line
* Often used for deciding how to parse a block.
*
* Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle.
* Instead, `terrace_head_length` provides the length of the head portion.
* In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string,
* or use any number of `strn*` C stdlib functions to work with the head section without copying it.
*
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
*
* Given the following line, `terrace_head_length(doc)` returns `5`
*
* ```terrace
* title An Important Document
* ```
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @returns {int} The length of the `head` portion (first word) of a line
*/
int terrace_head_length(terrace_document_t* doc) {
return doc->lineData.offsetTail - doc->lineData.offsetHead;
}
/**
* Get a char pointer to everything following the first "word" of a line,
* starting from the first character after the space at the end of `head`
*
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
*
* Given the following line, `terrace_tail(doc)` returns "An Important Document"
*
* ```terrace
* title An Important Document
* ```
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @returns {char*} The remainder of the line following the `head` portion, with no leading space
*/
char* terrace_tail(terrace_document_t* doc) {
return doc->_currentLine + doc->lineData.offsetTail + 1;
}
/**
* Quickly check if the current line head matches a specified value. Useful in many document-parsing situations.
*
* Given the following line
*
* ```terrace
* title An Important Document
* ```
*
* `terrace_match(doc, "title")` returns `1`
* `terrace_match(doc, "somethingElse") returns `0`
*
* @param {const char*} matchValue A string to check against the line `head` for equality
* @returns {char} A byte set to 0 if the head does not match, or 1 if it does match
*/
char terrace_match(terrace_document_t* doc, const char* matchHead) {
// Get a pointer to the start of the head portion of the string.
char* head = doc->_currentLine + doc->lineData.offsetHead;
int i = 0;
// Loop until we run out of characters in `matchHead`.
while (matchHead[i] != '\0') {
// Return as unmatched if we run out of `head` characters
// or if a character at the same position in both matchHead and head is not identical.
if (head[i] == '\0' || matchHead[i] != head[i]) return 0;
i++;
}
// If we didn't return inside the while loop, `matchHead` and `head` are equivalent, a successful match.
return 1;
}
/**
* Advances the current position in the terrace document and populates `doc->lineData`
* with the parsed information from that line
*
* Returns `1` after parsing the next line, or `0` upon reaching the end of the document.
* If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line
* with a level at or below `levelScope`. This allows you to iterate through subsections of a document.
*
* If a lower-level line was encountered, the following call to `next()` will repeat this line again.
* This allows a child loop to look forward, determine that the next line will be outside its purview,
* and return control to the calling loop transparently without additional logic.
*
* Intended to be used inside a while loop to parse a section of a Terrace document.
*
* ```c
* while(terrace_next(doc, -1)) {
* // Do something with each line.
* }
* ```
*
* @param {number} levelScope If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope`
* @returns {char} Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered.
*/
char terrace_next(terrace_document_t* doc, int levelScope) {
// Repeat the current line instead of parsing a new one if the previous call to next()
// determined the current line to be out of its scope.
if (doc->_repeatCurrentLine) doc->_repeatCurrentLine = 0;
// Otherwise parse the line normally.
else {
// Load the next line from the line reader.
int chars_read = doc->reader(&doc->_currentLine, doc->userData);
// If there are no more lines, bail out.
if (chars_read == -1) return 0;
// Populate lineData with parsed information from the current line.
terrace_parse_line(doc->_currentLine, &doc->lineData);
}
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
// Allows a child loop to look forward, determine that the next line will be outside its purview,
// and return control to the calling loop transparently without additional logic.
if ((int) terrace_level(doc) <= levelScope) {
doc->_repeatCurrentLine = 1;
return 0;
}
return 1;
}
#endif

View File

@ -2,7 +2,7 @@
#define TERRACE_PARSER_H #define TERRACE_PARSER_H
// Holds the parsed information from each line. // Holds the parsed information from each line.
struct terrace_linedata_s { typedef struct terrace_linedata_s {
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call. // Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
char indent; char indent;
// How many indent characters are present in the current line before the first non-indent character. // How many indent characters are present in the current line before the first non-indent character.
@ -12,11 +12,9 @@ struct terrace_linedata_s {
unsigned int offsetHead; unsigned int offsetHead;
// The number of characters before the start of the line's "tail" section. // The number of characters before the start of the line's "tail" section.
unsigned int offsetTail; unsigned int offsetTail;
}; } terrace_linedata_t;
typedef struct terrace_linedata_s terrace_linedata_t; terrace_linedata_t terrace_create_line_data(const char indent) {
terrace_linedata_t terrace_create_line_data(char indent) {
terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 }; terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 };
return line_data; return line_data;
} }
@ -26,7 +24,7 @@ terrace_linedata_t terrace_create_line_data(char indent) {
* @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline. * @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline.
* @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in. * @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in.
*/ */
void terrace_parse_line(char *line, terrace_linedata_t *lineData) { void terrace_parse_line(const char *line, terrace_linedata_t *lineData) {
// Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.) // Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.)
// Special case handling for these allows them to be parsed extra quickly. // Special case handling for these allows them to be parsed extra quickly.
if (!line) { if (!line) {