295 lines
12 KiB
Plaintext
295 lines
12 KiB
Plaintext
Heading 2 Document API
|
|
class mt-12
|
|
|
|
Heading 3 terrace_document_t
|
|
class mb-4 mt-12
|
|
Markdown
|
|
Tracks state of a document while being parsed.
|
|
Obtained from [terrace_create_document()](#terrace-create-document) below
|
|
CodeBlock c
|
|
// Type Definition
|
|
typedef struct terrace_document_s {
|
|
// == Internal State == //
|
|
unsigned int _repeatCurrentLine;
|
|
// Current line being read
|
|
char* _currentLine;
|
|
|
|
// == External Information == //
|
|
// Embedded line data struct. Holds information about the current parsed line
|
|
terrace_linedata_t lineData;
|
|
// Custom data passed to the readline function
|
|
void* userData;
|
|
/**
|
|
* Line reader function, provided by the user
|
|
* Needed to get the next line inside of `terrace_next(doc)`
|
|
* @param {char**} line First argument is a pointer to `_currentLine`, above
|
|
* @param {void*} userData Second argument is `userData`, above
|
|
* @returns {int} The number of characters read, or -1 if no characters were read.
|
|
*/
|
|
int (*reader)(char** line, void* userData);
|
|
} terrace_document_t;
|
|
|
|
Heading 3 terrace_create_document()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| indent | const char | The indent character to use. Generally a single space character.
|
|
| reader | int (\*reader)(char** line, void* userData) | A function pointer to a function that reads lines sequentially from a user-provided source. Receives a pointer to `lineData->_currLine`, and `userData`, supplied in the next argument.
|
|
| userData | void * | A user-supplied pointer to any state information needed by their reader function. Passed to `reader`each time it is called.
|
|
| **@returns** | [terrace_document_t](#terrace-documentt) | A state struct needed by the convenience functions below.
|
|
|
|
Initializes the state needed for the convenience functions below. Takes a user-supplied `reader` function to read each line from a user-determined source.
|
|
CodeBlock c
|
|
// Call Signature
|
|
terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData)
|
|
|
|
Heading 3 terrace_next()
|
|
class mb-4 mt-12
|
|
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| levelScope | int | If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope`
|
|
| **@returns** | char | Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered.
|
|
|
|
Advances the current position in the terrace document and populates lineData
|
|
with the parsed information from that line.
|
|
|
|
Returns `1` after parsing the next line, or `0` upon reaching the end of the document.
|
|
If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line
|
|
with a level at or below `levelScope`. This allows you to iterate through subsections of a document.
|
|
|
|
If a lower-level line was encountered, the following call to `terrace_next()` will repeat this line again.
|
|
This allows a child loop to look forward, determine that the next line will be outside its purview,
|
|
and return control to the calling loop transparently without additional logic.
|
|
|
|
Intended to be used inside a while loop to parse a section of a Terrace document.
|
|
|
|
CodeBlock c
|
|
// Call Signature
|
|
char terrace_next(terrace_document_t* doc, int levelScope)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
// Do something with each line.
|
|
}
|
|
|
|
Heading 3 terrace_level()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| **@returns** | unsigned int | The indent level of the current line
|
|
|
|
Returns the number of indent characters of the current line.
|
|
|
|
Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line.
|
|
CodeBlock terrace
|
|
block
|
|
block
|
|
block
|
|
block
|
|
|
|
CodeBlock c
|
|
// Call Signature
|
|
unsigned int terrace_level(terrace_document_t* doc)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
printf("Indent Level: %u", terrace_level(doc));
|
|
}
|
|
|
|
Heading 3 terrace_line()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| startOffset | int | How many indent characters to skip before outputting the line contents. If set to -1, uses the current indent level.
|
|
| **@returns** | char* | The line contents starting from `startOffset`
|
|
|
|
Get a string with the current line contents.
|
|
If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified.
|
|
|
|
Given the following document
|
|
CodeBlock terrace
|
|
root
|
|
sub-line
|
|
Markdown
|
|
- Calling `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters.
|
|
- Calling `terrace_line(doc, 0)` however, returns " sub-line", with all four leading spaces.
|
|
|
|
`startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text
|
|
|
|
CodeBlock c
|
|
// Call Signature
|
|
char* terrace_line(terrace_document_t* doc, int startOffset)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
printf("Line with indent characters: %s", terrace_line(doc, 0));
|
|
printf("Line without indent characters: %s", terrace_line(doc, -1));
|
|
}
|
|
|
|
Heading 3 terrace_head_length()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| **@returns** | unsigned int | The length of the `head` portion (first word) of a line
|
|
|
|
Get the *length* of the first "word" of a line,
|
|
starting from the first non-indent character to the first space or end of the line
|
|
Often used for deciding how to parse a block.
|
|
|
|
Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle.
|
|
Instead, `terrace_head_length()` provides the length of the head portion.
|
|
In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string,
|
|
or use any number of `strn*` C stdlib functions to work with the head section without copying it.
|
|
|
|
Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
|
|
|
|
Given the following line, `terrace_head_length(doc)` returns `5`
|
|
CodeBlock terrace
|
|
title An Important Document
|
|
CodeBlock c
|
|
// Call Signature
|
|
unsigned int terrace_head_length(terrace_document_t* doc)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
printf("Head length: %u", terrace_head_length(doc));
|
|
}
|
|
|
|
Heading 3 terrace_tail()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| **@returns** | char* | The remainder of the line following the `head` portion, with no leading space.
|
|
|
|
Get a char pointer to everything following the first "word" of a line,
|
|
starting from the first character after the space at the end of `head`.
|
|
|
|
Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
|
|
|
|
Given the following line, `terrace_tail(doc)` returns "An Important Document"
|
|
CodeBlock terrace
|
|
title An Important Document
|
|
CodeBlock c
|
|
// Call Signature
|
|
char* terrace_tail(terrace_document_t* doc)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
printf("Line tail: %s", terrace_tail(doc));
|
|
}
|
|
|
|
Heading 3 terrace_match()
|
|
class mb-4 mt-12
|
|
Markdown
|
|
| Parameter | Type | Description
|
|
| -------------- | --------------------- | -----------------------------------------------------------------------
|
|
| doc | [terrace_document_t*](#terrace-documentt) | A pointer to the current document state struct.
|
|
| matchValue | const char* | A string to check against the line `head` for equality.
|
|
| **@returns** | char | A byte set to `0` if the head does not match, or `1`if it does match.
|
|
|
|
Quickly check if the current line head matches a specified value. Useful in many document-parsing situations.
|
|
|
|
Given the following line:
|
|
CodeBlock terrace
|
|
title An Important Document
|
|
Markdown
|
|
- `terrace_match(doc, "title")` returns `1`
|
|
- `terrace_match(doc, "somethingElse")` returns `0`
|
|
CodeBlock c
|
|
// Call Signature
|
|
char terrace_match(terrace_document_t* doc, const char* matchHead)
|
|
|
|
// Usage
|
|
while(terrace_next(doc, -1)) {
|
|
printf("Does the line start with 'title': %d", terrace_match(doc, "title"));
|
|
}
|
|
|
|
Heading 2 Recipes
|
|
class mt-12
|
|
|
|
Heading 3 Parse a single line
|
|
Markdown
|
|
Parses a single line into `line_data`, the prints the information from `line_data`.
|
|
CodeBlock c
|
|
#include "parser.h"
|
|
|
|
int main(int argc, char *argv[]) {
|
|
char* line = "example line";
|
|
// Create the line_data struct
|
|
terrace_linedata_t line_data;
|
|
// Set the indent character to a space
|
|
line_data.indent = ' ';
|
|
// Populates line_data level, offsetHead, and offsetTail from line
|
|
terrace_parse_line(line, &line_data);
|
|
|
|
printf(
|
|
"level %u | indent %c | offsetHead %u | offsetTail %u\n",
|
|
line_data.level,
|
|
line_data.indent,
|
|
line_data.offsetHead,
|
|
line_data.offsetTail
|
|
);
|
|
|
|
return 0;
|
|
}
|
|
|
|
Heading 3 Parse all lines from stdin
|
|
Markdown
|
|
Reads lines from stdin one-by-one and prints each line's `line_data`.
|
|
CodeBlock c
|
|
#include "parser.h"
|
|
// Depends on several cstdlib functions
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
|
|
int main(int argc, char *argv[]) {
|
|
// Pointer to start of line
|
|
char *line = NULL;
|
|
// Initial size of the buffer to read into
|
|
// getline() will resize as needed
|
|
size_t bufsize = 128;
|
|
// How many characters have been read
|
|
ssize_t chars_read = 0;
|
|
|
|
// Create the line_data struct
|
|
terrace_linedata_t line_data;
|
|
// Set the indent character to a space
|
|
line_data.indent = ' ';
|
|
|
|
while (chars_read = getline(&line, &bufsize, stdin)) {
|
|
// If chars_read is -1, we've reached end of file.
|
|
if (chars_read == -1) break;
|
|
// getline returns lines with a trailing newline
|
|
// terrace_parse_line expects no trailing newline
|
|
// strip it off using strtok()
|
|
// (An odd solution, probably leaks memory)
|
|
char *terrace_line = strtok(line, "\n");
|
|
terrace_parse_line(terrace_line, &line_data);
|
|
|
|
printf(
|
|
"level %u | indent %c | offsetHead %u | offsetTail %u\n",
|
|
line_data.level,
|
|
line_data.indent,
|
|
line_data.offsetHead,
|
|
line_data.offsetTail
|
|
);
|
|
};
|
|
|
|
// Free the buffer allocated by getline().
|
|
free(line);
|
|
}
|