Compare commits
No commits in common. "5c347a95a0e25f9a52688ec81c7052bd131e1d28" and "38068b24dfb6ea9551c69b1df249fbbb38011e87" have entirely different histories.
5c347a95a0
...
38068b24df
2
packages/c/.gitignore
vendored
2
packages/c/.gitignore
vendored
@ -1,3 +1 @@
|
||||
test/test-runner
|
||||
test/document
|
||||
test/document.c
|
@ -1,212 +0,0 @@
|
||||
#ifndef TERRACE_DOCUMENT_H
|
||||
#define TERRACE_DOCUMENT_H
|
||||
|
||||
#include "parser.h"
|
||||
|
||||
typedef struct terrace_document_s {
|
||||
// == Internal State == //
|
||||
unsigned int _repeatCurrentLine;
|
||||
// Current line being read
|
||||
char* _currentLine;
|
||||
|
||||
// == External Information == //
|
||||
// Embedded line data struct. Holds information about the current parsed line
|
||||
terrace_linedata_t lineData;
|
||||
// Custom data passed to the readline function
|
||||
void* userData;
|
||||
/**
|
||||
* Line reader function, provided by the user
|
||||
* Needed to get the next line inside of `terrace_next(doc)`
|
||||
* @param {char**} line First argument is a pointer to `_currentLine`, above
|
||||
* @param {void*} userData Second argument is `userData`, above
|
||||
* @returns {int} The number of characters read, or -1 if no characters were read.
|
||||
*/
|
||||
int (*reader)(char** line, void* userData);
|
||||
} terrace_document_t;
|
||||
|
||||
/**
|
||||
* Initialize a Terrace document with indent parameters and the function neded to read lines.
|
||||
* @param {char} indent The indent character to use. Generally a single space character.
|
||||
* @param {int (*reader)(char** line, void* userData)} A function pointer to a function that reads lines sequentially
|
||||
* from a user-provided source. Receives a pointer to lineData->_currLine, and userData, supplied in the next argument.
|
||||
* @param {void*} userData A user-supplied pointer to any state information needed by their reader function.
|
||||
* Passed to `reader`each time it is called.
|
||||
* @returns {terrace_document_t} An initialized document that can now be used for futher parsing.
|
||||
*/
|
||||
terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData) {
|
||||
terrace_document_t document = {
|
||||
._repeatCurrentLine = 0,
|
||||
._currentLine = 0,
|
||||
.lineData = terrace_create_line_data(indent),
|
||||
.reader = reader,
|
||||
.userData = userData
|
||||
};
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of indent characters of the current line
|
||||
*
|
||||
* Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line
|
||||
*
|
||||
* ```terrace
|
||||
* block
|
||||
* block
|
||||
* block
|
||||
* block
|
||||
* ```
|
||||
* @returns {unsigned int} The indent level of the current line
|
||||
*/
|
||||
unsigned int terrace_level(terrace_document_t* doc) {
|
||||
return doc->lineData.level;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string with the current line contents
|
||||
* If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified
|
||||
*
|
||||
* Given the following document
|
||||
*
|
||||
* ```terrace
|
||||
* root
|
||||
* sub-line
|
||||
* ```
|
||||
* `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters
|
||||
* `terrace_line(doc, 0)` however, returns " sub-line", with all four leading spaces
|
||||
*
|
||||
* `startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text
|
||||
*
|
||||
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
|
||||
* @param {int} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
|
||||
* @returns {char*} The line contents starting from `startOffset`
|
||||
*/
|
||||
char* terrace_line(terrace_document_t* doc, int startOffset) {
|
||||
if (startOffset == -1) startOffset = doc->lineData.level;
|
||||
return doc->_currentLine + startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the *length* of the first "word" of a line,
|
||||
* starting from the first non-indent character to the first space or end of the line
|
||||
* Often used for deciding how to parse a block.
|
||||
*
|
||||
* Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle.
|
||||
* Instead, `terrace_head_length` provides the length of the head portion.
|
||||
* In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string,
|
||||
* or use any number of `strn*` C stdlib functions to work with the head section without copying it.
|
||||
*
|
||||
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
|
||||
*
|
||||
* Given the following line, `terrace_head_length(doc)` returns `5`
|
||||
*
|
||||
* ```terrace
|
||||
* title An Important Document
|
||||
* ```
|
||||
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
|
||||
* @returns {int} The length of the `head` portion (first word) of a line
|
||||
*/
|
||||
int terrace_head_length(terrace_document_t* doc) {
|
||||
return doc->lineData.offsetTail - doc->lineData.offsetHead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a char pointer to everything following the first "word" of a line,
|
||||
* starting from the first character after the space at the end of `head`
|
||||
*
|
||||
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
|
||||
*
|
||||
* Given the following line, `terrace_tail(doc)` returns "An Important Document"
|
||||
*
|
||||
* ```terrace
|
||||
* title An Important Document
|
||||
* ```
|
||||
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
|
||||
* @returns {char*} The remainder of the line following the `head` portion, with no leading space
|
||||
*/
|
||||
char* terrace_tail(terrace_document_t* doc) {
|
||||
return doc->_currentLine + doc->lineData.offsetTail + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Quickly check if the current line head matches a specified value. Useful in many document-parsing situations.
|
||||
*
|
||||
* Given the following line
|
||||
*
|
||||
* ```terrace
|
||||
* title An Important Document
|
||||
* ```
|
||||
*
|
||||
* `terrace_match(doc, "title")` returns `1`
|
||||
* `terrace_match(doc, "somethingElse") returns `0`
|
||||
*
|
||||
* @param {const char*} matchValue A string to check against the line `head` for equality
|
||||
* @returns {char} A byte set to 0 if the head does not match, or 1 if it does match
|
||||
*/
|
||||
char terrace_match(terrace_document_t* doc, const char* matchHead) {
|
||||
// Get a pointer to the start of the head portion of the string.
|
||||
char* head = doc->_currentLine + doc->lineData.offsetHead;
|
||||
|
||||
int i = 0;
|
||||
// Loop until we run out of characters in `matchHead`.
|
||||
while (matchHead[i] != '\0') {
|
||||
// Return as unmatched if we run out of `head` characters
|
||||
// or if a character at the same position in both matchHead and head is not identical.
|
||||
if (head[i] == '\0' || matchHead[i] != head[i]) return 0;
|
||||
i++;
|
||||
}
|
||||
|
||||
// If we didn't return inside the while loop, `matchHead` and `head` are equivalent, a successful match.
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the current position in the terrace document and populates `doc->lineData`
|
||||
* with the parsed information from that line
|
||||
*
|
||||
* Returns `1` after parsing the next line, or `0` upon reaching the end of the document.
|
||||
* If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line
|
||||
* with a level at or below `levelScope`. This allows you to iterate through subsections of a document.
|
||||
*
|
||||
* If a lower-level line was encountered, the following call to `next()` will repeat this line again.
|
||||
* This allows a child loop to look forward, determine that the next line will be outside its purview,
|
||||
* and return control to the calling loop transparently without additional logic.
|
||||
*
|
||||
* Intended to be used inside a while loop to parse a section of a Terrace document.
|
||||
*
|
||||
* ```c
|
||||
* while(terrace_next(doc, -1)) {
|
||||
* // Do something with each line.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @param {number} levelScope If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope`
|
||||
* @returns {char} Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered.
|
||||
*/
|
||||
char terrace_next(terrace_document_t* doc, int levelScope) {
|
||||
// Repeat the current line instead of parsing a new one if the previous call to next()
|
||||
// determined the current line to be out of its scope.
|
||||
if (doc->_repeatCurrentLine) doc->_repeatCurrentLine = 0;
|
||||
// Otherwise parse the line normally.
|
||||
else {
|
||||
// Load the next line from the line reader.
|
||||
int chars_read = doc->reader(&doc->_currentLine, doc->userData);
|
||||
// If there are no more lines, bail out.
|
||||
if (chars_read == -1) return 0;
|
||||
|
||||
// Populate lineData with parsed information from the current line.
|
||||
terrace_parse_line(doc->_currentLine, &doc->lineData);
|
||||
}
|
||||
|
||||
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
|
||||
// Allows a child loop to look forward, determine that the next line will be outside its purview,
|
||||
// and return control to the calling loop transparently without additional logic.
|
||||
if ((int) terrace_level(doc) <= levelScope) {
|
||||
doc->_repeatCurrentLine = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,49 +1,28 @@
|
||||
#ifndef TERRACE_PARSER_H
|
||||
#define TERRACE_PARSER_H
|
||||
|
||||
// Holds the parsed information from each line.
|
||||
typedef struct terrace_linedata_s {
|
||||
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
|
||||
struct terrace_linedata_s {
|
||||
char indent;
|
||||
// How many indent characters are present in the current line before the first non-indent character.
|
||||
unsigned int level;
|
||||
// The number of characters before the start of the line's "head" section.
|
||||
// (Normally the same as `level`)
|
||||
unsigned int offsetHead;
|
||||
// The number of characters before the start of the line's "tail" section.
|
||||
unsigned int offsetTail;
|
||||
} terrace_linedata_t;
|
||||
};
|
||||
|
||||
terrace_linedata_t terrace_create_line_data(const char indent) {
|
||||
terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 };
|
||||
return line_data;
|
||||
}
|
||||
typedef struct terrace_linedata_s terrace_linedata_t;
|
||||
|
||||
/**
|
||||
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a lineData struct based on the current line.
|
||||
* @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline.
|
||||
* @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in.
|
||||
*/
|
||||
void terrace_parse_line(const char *line, terrace_linedata_t *lineData) {
|
||||
// Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.)
|
||||
// Special case handling for these allows them to be parsed extra quickly.
|
||||
if (!line) {
|
||||
// Empty lines are treated as having the same level as the previous line, so lineData->line is not updated.
|
||||
void terrace_parse_line(char* line, terrace_linedata_t *lineData) {
|
||||
if (line == 0) {
|
||||
// Reuse lineData->level from previous line.
|
||||
lineData->offsetHead = 0;
|
||||
lineData->offsetTail = 0;
|
||||
} else {
|
||||
// Count the number of indent characters in the current line.
|
||||
unsigned int level = 0;
|
||||
while (line[level] == lineData->indent) ++level;
|
||||
while (line[level] == lineData->indent && level <= lineData->level + 1) ++level;
|
||||
lineData->level = level;
|
||||
|
||||
// Set offsetHead and offsetTail to level to start with.
|
||||
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
|
||||
lineData->offsetHead = level;
|
||||
lineData->offsetTail = level;
|
||||
|
||||
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
|
||||
while (line[lineData->offsetTail] && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
|
||||
while (line[lineData->offsetTail] != '\0' && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,8 @@ void linedata_basic (char indent) {
|
||||
size_t bufsize = 32;
|
||||
ssize_t c_read = 0;
|
||||
|
||||
terrace_linedata_t line_data = terrace_create_line_data(indent);
|
||||
terrace_linedata_t line_data;
|
||||
line_data.indent = indent;
|
||||
|
||||
while(c_read = getline(&line, &bufsize, stdin)) {
|
||||
if (c_read == -1) break;
|
||||
@ -28,7 +29,8 @@ void linedata_head_tail (char indent) {
|
||||
size_t bufsize = 32;
|
||||
ssize_t c_read = 0;
|
||||
|
||||
terrace_linedata_t line_data = terrace_create_line_data(indent);
|
||||
terrace_linedata_t line_data;
|
||||
line_data.indent = indent;
|
||||
|
||||
char *head;
|
||||
char *tail;
|
||||
|
@ -22,8 +22,7 @@ export type Document = {
|
||||
export function useDocument (reader: Reader, indent: string = ' '): Document {
|
||||
if (indent.length !== 1) throw new Error(`Terrace currently only allows single-character indent strings - you passed "${indent}"`)
|
||||
|
||||
const lineData = createLineData(indent)
|
||||
let currLine = ''
|
||||
const lineData = createLineData('', indent)
|
||||
|
||||
// If `repeatCurrentLine` is `true`, the following call to `next()` will repeat the current line in
|
||||
// the document and set `repeatCurrentLine` back to `false`
|
||||
@ -63,8 +62,8 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
|
||||
if (line == null) return false
|
||||
|
||||
// Populate lineData with parsed information from the current line.
|
||||
currLine = line
|
||||
parseLine(currLine, lineData)
|
||||
lineData.line = line
|
||||
parseLine(lineData)
|
||||
}
|
||||
|
||||
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
|
||||
@ -109,7 +108,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
|
||||
* @param {number} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
|
||||
* @returns {string} The line contents starting from `startOffset`
|
||||
*/
|
||||
const line = (startOffset: number = lineData.level): string => currLine.slice(startOffset)
|
||||
const line = (startOffset: number = lineData.level): string => lineData.line.slice(startOffset)
|
||||
/**
|
||||
* Get the first "word" of a line, starting from the first non-indent character to the first space or end of the line
|
||||
* Often used for deciding how to parse a block.
|
||||
@ -123,7 +122,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
|
||||
* ```
|
||||
* @returns {string} The `head` portion (first word) of a line
|
||||
*/
|
||||
const head = (): string => currLine.slice(lineData.offsetHead, lineData.offsetTail)
|
||||
const head = (): string => lineData.line.slice(lineData.offsetHead, lineData.offsetTail)
|
||||
/**
|
||||
* Get all text following the first "word" of a line, starting from the first character after the space at the end of `head()`
|
||||
*
|
||||
@ -136,7 +135,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
|
||||
* ```
|
||||
* @returns {string} The remainder of the line following the `head()` portion, with no leading space
|
||||
*/
|
||||
const tail = (): string => currLine.slice(lineData.offsetTail + 1) // Skip the space
|
||||
const tail = (): string => lineData.line.slice(lineData.offsetTail + 1) // Skip the space
|
||||
/**
|
||||
* Quickly check if the current line head matches a specified value
|
||||
*
|
||||
|
253
packages/js/src/parser.test.ts
Normal file
253
packages/js/src/parser.test.ts
Normal file
@ -0,0 +1,253 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { createLineData, parseLine } from './parser'
|
||||
|
||||
describe(`LineData`, () => {
|
||||
it(`is an object`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(lineData).toBeTypeOf(`object`)
|
||||
})
|
||||
|
||||
it(`has five properties`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(Object.keys(lineData).length).to.equal(5)
|
||||
})
|
||||
|
||||
it(`'line' is a string|null initialized to null`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(lineData.level).to.equal(0)
|
||||
})
|
||||
|
||||
it(`'level' is an integer initialized to zero`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(lineData.level).to.equal(0)
|
||||
})
|
||||
|
||||
it(`'offsetHead' is an integer initialized to zero`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(lineData.offsetHead).to.equal(0)
|
||||
})
|
||||
|
||||
it(`'offsetTail' is an integer initialized to zero`, () => {
|
||||
const lineData = createLineData()
|
||||
expect(lineData.offsetTail).to.equal(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe(`parseLine`, () => {
|
||||
it(`Requres 'lineData' to be an object with string line and numeric level properties`, () => {
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, 0)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, [])).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, null)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, true)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, () => {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, { line: '', level: '' })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
// @ts-ignore
|
||||
expect(() => parseLine(``, { line: '', level: 0 })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
|
||||
})
|
||||
|
||||
it(`Requres 'indent' to be a single-character string`, () => {
|
||||
const lineData = createLineData()
|
||||
lineData.line = ``
|
||||
// @ts-ignore
|
||||
lineData.indent = 0
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
// @ts-ignore
|
||||
lineData.indent = []
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
// @ts-ignore
|
||||
lineData.indent = {}
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
// @ts-ignore
|
||||
lineData.indent = null
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
// @ts-ignore
|
||||
lineData.indent = true
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
// @ts-ignore
|
||||
lineData.indent = () => {}
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
lineData.indent = ` `
|
||||
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
|
||||
})
|
||||
|
||||
it(`Handles a blank line at indent level 0`, () => {
|
||||
const line = ``
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 0 })
|
||||
})
|
||||
|
||||
it(`Handles a line with a single space at indent level 1`, () => {
|
||||
const line = ` `
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 1 })
|
||||
})
|
||||
|
||||
it(`Handles a line with two spaces`, () => {
|
||||
const line = ` `
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 2 })
|
||||
})
|
||||
|
||||
it(`Handles a normal line at indent level 0`, () => {
|
||||
const line = `line 1`
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 4 })
|
||||
})
|
||||
|
||||
it(`Handles a normal line at indent level 1`, () => {
|
||||
const line = ` line 1`
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 5 })
|
||||
})
|
||||
|
||||
it(`Handles a normal line at indent level 2`, () => {
|
||||
const line = ` line 1`
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 6 })
|
||||
})
|
||||
|
||||
it(`Handles a normal line at indent level 1 indented with tabs`, () => {
|
||||
const line = `\tline 1`
|
||||
const lineData = createLineData(line, `\t`)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 1, offsetHead: 1, offsetTail: 5 })
|
||||
})
|
||||
|
||||
it(`Handles a normal line at indent level 2 indented with tabs`, () => {
|
||||
const line = `\t\tline 1`
|
||||
const lineData = createLineData(line, `\t`)
|
||||
parseLine(lineData)
|
||||
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 2, offsetHead: 2, offsetTail: 6})
|
||||
})
|
||||
|
||||
it(`Nests a normal line under a preceding normal line`, () => {
|
||||
const lines = [
|
||||
'line 1',
|
||||
' line 2'
|
||||
]
|
||||
|
||||
const lineData = createLineData()
|
||||
const results = lines.map(line => {
|
||||
lineData.line = line
|
||||
parseLine(lineData)
|
||||
return {...lineData}
|
||||
})
|
||||
|
||||
expect(results).to.deep.equal([
|
||||
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
|
||||
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
|
||||
])
|
||||
})
|
||||
|
||||
it(`Nests multiple normal line under a preceding normal line`, () => {
|
||||
const lines = [
|
||||
'line 1',
|
||||
' line 2',
|
||||
' line 3',
|
||||
' line 4',
|
||||
]
|
||||
|
||||
const lineData = createLineData()
|
||||
const results = lines.map(line => {
|
||||
lineData.line = line
|
||||
parseLine(lineData)
|
||||
return {...lineData}
|
||||
})
|
||||
|
||||
expect(results).to.deep.equal([
|
||||
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
|
||||
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
|
||||
{ line: lines[2], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
|
||||
{ line: lines[3], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
|
||||
])
|
||||
})
|
||||
|
||||
it(`Does not nest an empty line under a preceding normal line`, () => {
|
||||
const lines = [
|
||||
'line 1',
|
||||
''
|
||||
]
|
||||
|
||||
const lineData = createLineData()
|
||||
const results = lines.map(line => {
|
||||
lineData.line = line
|
||||
parseLine(lineData)
|
||||
return {...lineData}
|
||||
})
|
||||
|
||||
expect(results).to.deep.equal([
|
||||
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
|
||||
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
|
||||
])
|
||||
})
|
||||
|
||||
it(`Does not nest multiple empty lines under a preceding normal line`, () => {
|
||||
const lines = [
|
||||
'line 1',
|
||||
'',
|
||||
'',
|
||||
'',
|
||||
]
|
||||
|
||||
const lineData = createLineData()
|
||||
const results = lines.map(line => {
|
||||
lineData.line = line
|
||||
parseLine(lineData)
|
||||
return {...lineData}
|
||||
})
|
||||
|
||||
expect(results).to.deep.equal([
|
||||
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
|
||||
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
|
||||
{ line: lines[2], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
|
||||
{ line: lines[3], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
|
||||
])
|
||||
})
|
||||
|
||||
it(`Handle head and tail matching for lines with head and tail`, () => {
|
||||
const line = ` head tail1 tail2 tail3`
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
|
||||
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
|
||||
const tail = line.slice(lineData.offsetTail + 1)
|
||||
expect(head).to.equal(`head`)
|
||||
expect(tail).to.equal(`tail1 tail2 tail3`)
|
||||
})
|
||||
|
||||
it(`Handle head and tail matching for lines with head but no tail`, () => {
|
||||
const line = ` head`
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
|
||||
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
|
||||
const tail = line.slice(lineData.offsetTail + 1)
|
||||
expect(head).to.equal(`head`)
|
||||
expect(tail).to.equal(``)
|
||||
})
|
||||
|
||||
it(`Handle head and tail matching for lines with head and trailing space`, () => {
|
||||
const line = ` head `
|
||||
const lineData = createLineData(line)
|
||||
parseLine(lineData)
|
||||
|
||||
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
|
||||
const tail = line.slice(lineData.offsetTail + 1)
|
||||
expect(head).to.equal(`head`)
|
||||
expect(tail).to.equal(``)
|
||||
})
|
||||
})
|
@ -1,54 +1,35 @@
|
||||
// Holds the parsed information from each line.
|
||||
export type LineData = {
|
||||
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
|
||||
line: string;
|
||||
indent: string;
|
||||
// How many indent characters are present in the current line before the first non-indent character.
|
||||
level: number;
|
||||
// The number of characters before the start of the line's "head" section.
|
||||
// (Normally the same as `level`)
|
||||
offsetHead: number;
|
||||
// The number of characters before the start of the line's "tail" section.
|
||||
offsetTail: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a LineData instance with default values.
|
||||
* @param {string} indent The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
|
||||
* @returns {LineData} A LineData instance with the specified indent character and all other values initialized to 0.
|
||||
*/
|
||||
export function createLineData(indent: string = ' '): LineData {
|
||||
return { indent, level: 0, offsetHead: 0, offsetTail: 0 }
|
||||
export function createLineData(line: string = '', indent: string = ' '): LineData {
|
||||
return { line, indent, level: 0, offsetHead: 0, offsetTail: 0 }
|
||||
}
|
||||
|
||||
/**
|
||||
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
|
||||
* Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
|
||||
* @param {string} line A string containing a line to parse. Shouldn't end with a newline.
|
||||
* @param {LineData} lineData A LineData object to store information about the current line in. **Mutated in-place!**
|
||||
*/
|
||||
export function parseLine(line: string, lineData: LineData) {
|
||||
export function parseLine(lineData: LineData): LineData {
|
||||
if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
|
||||
if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
|
||||
if (typeof line !== 'string') throw new Error(`'line' must be a string`)
|
||||
if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
|
||||
|
||||
// Blank lines have no characters, the newline should be stripped off.
|
||||
// Special case handling for these allows them to be parsed quickly.
|
||||
if (!line.length) {
|
||||
// Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
|
||||
let level = 0
|
||||
|
||||
// Repeat previous level for blank lines.
|
||||
if (!lineData.line.length) {
|
||||
lineData.level = lineData.level
|
||||
lineData.offsetHead = 0
|
||||
lineData.offsetTail = 0
|
||||
} else {
|
||||
// Count the number of indent characters in the current line.
|
||||
let level = 0
|
||||
while (line[level] === lineData.indent) ++level
|
||||
while (lineData.line[level] === lineData.indent && level <= lineData.level + 1) ++level
|
||||
lineData.level = level
|
||||
|
||||
// Set offsetHead and offsetTail to level to start with.
|
||||
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
|
||||
lineData.offsetHead = level
|
||||
lineData.offsetTail = level
|
||||
|
||||
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
|
||||
while (line[lineData.offsetTail] && line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
|
||||
while (lineData.line[lineData.offsetTail] && lineData.line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
|
||||
}
|
||||
|
||||
return lineData
|
||||
}
|
||||
|
@ -1,28 +1,26 @@
|
||||
import { createLineData, parseLine } from '@terrace-lang/js'
|
||||
import { createLineData, parseLine, useDocument } from '@terrace-lang/js'
|
||||
import { createStdinReader } from '@terrace-lang/js/readers/node-readline'
|
||||
|
||||
const testName = process.argv[2]
|
||||
|
||||
async function linedata_basic(indent) {
|
||||
const lineData = createLineData(indent)
|
||||
const lineData = createLineData('', indent)
|
||||
const next = createStdinReader()
|
||||
|
||||
let line = ''
|
||||
while ((line = await next()) != null) {
|
||||
parseLine(line, lineData)
|
||||
const { level, indent, offsetHead, offsetTail } = lineData
|
||||
while ((lineData.line = await next()) != null) {
|
||||
parseLine(lineData)
|
||||
const { level, indent, offsetHead, offsetTail, line } = lineData
|
||||
console.log(`| level ${level} | indent ${indent} | offsetHead ${offsetHead} | offsetTail ${offsetTail} | line ${line} |`)
|
||||
}
|
||||
}
|
||||
|
||||
async function linedata_head_tail () {
|
||||
const lineData = createLineData()
|
||||
const lineData = createLineData('')
|
||||
const next = createStdinReader()
|
||||
|
||||
let line = ''
|
||||
while ((line = await next()) != null) {
|
||||
parseLine(line, lineData)
|
||||
const { offsetHead, offsetTail } = lineData
|
||||
while ((lineData.line = await next()) != null) {
|
||||
parseLine(lineData)
|
||||
const { level, indent, offsetHead, offsetTail, line } = lineData
|
||||
const head = line.slice(offsetHead, offsetTail)
|
||||
const tail = line.slice(offsetTail + 1)
|
||||
|
||||
|
@ -1,63 +1,35 @@
|
||||
from typing import TypedDict
|
||||
|
||||
# Holds the parsed information from each line.
|
||||
class LineData(TypedDict):
|
||||
# Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
|
||||
line: str
|
||||
indent: str
|
||||
# How many indent characters are present in the current line before the first non-indent character.
|
||||
level: int
|
||||
# The number of characters before the start of the line's "head" section.
|
||||
# (Normally the same as `level`)
|
||||
offsetHead: int
|
||||
# The number of characters before the start of the line's "tail" section.
|
||||
offsetTail: int
|
||||
|
||||
def createLineData(indent: str = ' ') -> LineData:
|
||||
"""
|
||||
Initialize a LineData instance with default values.
|
||||
def createLineData(line: str = '', indent: str = ' ') -> LineData:
|
||||
return { "line": line, "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indent : str
|
||||
The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
|
||||
Returns
|
||||
-------
|
||||
LineData
|
||||
A LineData dict with the specified indent character and all other values initialized to 0.
|
||||
"""
|
||||
return { "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
|
||||
def parseLine(lineData: LineData) -> LineData:
|
||||
# if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
|
||||
# if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
|
||||
# if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
|
||||
|
||||
def parseLine(line: str, lineData: LineData):
|
||||
"""
|
||||
Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
|
||||
Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
|
||||
level = 0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
line : str
|
||||
A string containing a line to parse. Shouldn't end with a newline.
|
||||
lineData: LineData
|
||||
A LineData dict to store information about the current line in. **Mutated in-place!**
|
||||
"""
|
||||
|
||||
# Blank lines have no characters, the newline should be stripped off.
|
||||
# Special case handling for these allows them to be parsed quickly.
|
||||
if len(line) == 0:
|
||||
# Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
|
||||
# Repeat previous level for blank lines.
|
||||
if len(lineData['line']) == 0:
|
||||
lineData['level'] = lineData['level']
|
||||
lineData['offsetHead'] = 0
|
||||
lineData['offsetTail'] = 0
|
||||
else:
|
||||
# Count the number of indent characters in the current line.
|
||||
level = 0
|
||||
while level < len(line) and line[level] == lineData['indent']:
|
||||
while level < len(lineData['line']) and lineData['line'][level] == lineData['indent'] and level <= lineData['level'] + 1:
|
||||
level += 1
|
||||
lineData['level'] = level
|
||||
|
||||
# Set offsetHead and offsetTail to level to start with.
|
||||
# offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
|
||||
lineData['offsetHead'] = level
|
||||
lineData['offsetTail'] = level
|
||||
|
||||
# Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
|
||||
while lineData['offsetTail'] < len(line) and line[lineData['offsetTail']] != ' ':
|
||||
while lineData['offsetTail'] < len(lineData['line']) and lineData['line'][lineData['offsetTail']] != ' ':
|
||||
lineData['offsetTail'] += 1
|
||||
|
||||
return lineData
|
||||
|
@ -14,21 +14,33 @@ def next():
|
||||
return line.rstrip('\n') if len(line) > 0 else None
|
||||
|
||||
def linedata_basic (indent):
|
||||
lineData = createLineData(indent)
|
||||
lineData = createLineData('', indent)
|
||||
|
||||
while (line := next()) != None:
|
||||
parseLine(line, lineData)
|
||||
while (l := next()) != None:
|
||||
lineData['line'] = l
|
||||
parseLine(lineData)
|
||||
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
|
||||
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = line
|
||||
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
|
||||
))
|
||||
|
||||
def linedata_tabs ():
|
||||
lineData = createLineData('', '\t')
|
||||
|
||||
while (l := next()) != None:
|
||||
lineData['line'] = l
|
||||
parseLine(lineData)
|
||||
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
|
||||
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
|
||||
))
|
||||
|
||||
def linedata_head_tail ():
|
||||
lineData = createLineData()
|
||||
lineData = createLineData('')
|
||||
|
||||
while (line := next()) != None:
|
||||
parseLine(line, lineData)
|
||||
head = line[lineData['offsetHead']:lineData['offsetTail']] if len(line) > lineData['offsetHead'] else ''
|
||||
tail = line[lineData['offsetTail'] + 1:] if len(line) > lineData['offsetTail'] + 1 else ''
|
||||
while (l := next()) != None:
|
||||
lineData['line'] = l
|
||||
parseLine(lineData)
|
||||
head = lineData['line'][lineData['offsetHead']:lineData['offsetTail']] if len(lineData['line']) > lineData['offsetHead'] else ''
|
||||
tail = lineData['line'][lineData['offsetTail'] + 1:] if len(lineData['line']) > lineData['offsetTail'] + 1 else ''
|
||||
|
||||
print("| head {head} | tail {tail} |".format(
|
||||
head = head, tail = tail
|
||||
|
Loading…
x
Reference in New Issue
Block a user