Compare commits

...

2 Commits

Author SHA1 Message Date
Joshua Bemenderfer
5c347a95a0 Implement basic document functions for C API, mostly equivalent to JS ones. 2023-02-19 17:04:36 -05:00
Joshua Bemenderfer
3f6c475756 Cleanup and document core APIs in C, JS, and Python. 2023-02-19 14:53:59 -05:00
10 changed files with 349 additions and 331 deletions

View File

@ -1 +1,3 @@
test/test-runner
test/document
test/document.c

212
packages/c/document.h Normal file
View File

@ -0,0 +1,212 @@
#ifndef TERRACE_DOCUMENT_H
#define TERRACE_DOCUMENT_H
#include "parser.h"
typedef struct terrace_document_s {
// == Internal State == //
unsigned int _repeatCurrentLine;
// Current line being read
char* _currentLine;
// == External Information == //
// Embedded line data struct. Holds information about the current parsed line
terrace_linedata_t lineData;
// Custom data passed to the readline function
void* userData;
/**
* Line reader function, provided by the user
* Needed to get the next line inside of `terrace_next(doc)`
* @param {char**} line First argument is a pointer to `_currentLine`, above
* @param {void*} userData Second argument is `userData`, above
* @returns {int} The number of characters read, or -1 if no characters were read.
*/
int (*reader)(char** line, void* userData);
} terrace_document_t;
/**
* Initialize a Terrace document with indent parameters and the function neded to read lines.
* @param {char} indent The indent character to use. Generally a single space character.
* @param {int (*reader)(char** line, void* userData)} A function pointer to a function that reads lines sequentially
* from a user-provided source. Receives a pointer to lineData->_currLine, and userData, supplied in the next argument.
* @param {void*} userData A user-supplied pointer to any state information needed by their reader function.
* Passed to `reader`each time it is called.
* @returns {terrace_document_t} An initialized document that can now be used for futher parsing.
*/
terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData) {
terrace_document_t document = {
._repeatCurrentLine = 0,
._currentLine = 0,
.lineData = terrace_create_line_data(indent),
.reader = reader,
.userData = userData
};
return document;
}
/**
* Returns the number of indent characters of the current line
*
* Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line
*
* ```terrace
* block
* block
* block
* block
* ```
* @returns {unsigned int} The indent level of the current line
*/
unsigned int terrace_level(terrace_document_t* doc) {
return doc->lineData.level;
}
/**
* Get a string with the current line contents
* If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified
*
* Given the following document
*
* ```terrace
* root
* sub-line
* ```
* `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters
* `terrace_line(doc, 0)` however, returns " sub-line", with all four leading spaces
*
* `startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text
*
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @param {int} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
* @returns {char*} The line contents starting from `startOffset`
*/
char* terrace_line(terrace_document_t* doc, int startOffset) {
if (startOffset == -1) startOffset = doc->lineData.level;
return doc->_currentLine + startOffset;
}
/**
* Get the *length* of the first "word" of a line,
* starting from the first non-indent character to the first space or end of the line
* Often used for deciding how to parse a block.
*
* Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle.
* Instead, `terrace_head_length` provides the length of the head portion.
* In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string,
* or use any number of `strn*` C stdlib functions to work with the head section without copying it.
*
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
*
* Given the following line, `terrace_head_length(doc)` returns `5`
*
* ```terrace
* title An Important Document
* ```
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @returns {int} The length of the `head` portion (first word) of a line
*/
int terrace_head_length(terrace_document_t* doc) {
return doc->lineData.offsetTail - doc->lineData.offsetHead;
}
/**
* Get a char pointer to everything following the first "word" of a line,
* starting from the first character after the space at the end of `head`
*
* Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
*
* Given the following line, `terrace_tail(doc)` returns "An Important Document"
*
* ```terrace
* title An Important Document
* ```
* @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
* @returns {char*} The remainder of the line following the `head` portion, with no leading space
*/
char* terrace_tail(terrace_document_t* doc) {
return doc->_currentLine + doc->lineData.offsetTail + 1;
}
/**
* Quickly check if the current line head matches a specified value. Useful in many document-parsing situations.
*
* Given the following line
*
* ```terrace
* title An Important Document
* ```
*
* `terrace_match(doc, "title")` returns `1`
* `terrace_match(doc, "somethingElse") returns `0`
*
* @param {const char*} matchValue A string to check against the line `head` for equality
* @returns {char} A byte set to 0 if the head does not match, or 1 if it does match
*/
char terrace_match(terrace_document_t* doc, const char* matchHead) {
// Get a pointer to the start of the head portion of the string.
char* head = doc->_currentLine + doc->lineData.offsetHead;
int i = 0;
// Loop until we run out of characters in `matchHead`.
while (matchHead[i] != '\0') {
// Return as unmatched if we run out of `head` characters
// or if a character at the same position in both matchHead and head is not identical.
if (head[i] == '\0' || matchHead[i] != head[i]) return 0;
i++;
}
// If we didn't return inside the while loop, `matchHead` and `head` are equivalent, a successful match.
return 1;
}
/**
* Advances the current position in the terrace document and populates `doc->lineData`
* with the parsed information from that line
*
* Returns `1` after parsing the next line, or `0` upon reaching the end of the document.
* If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line
* with a level at or below `levelScope`. This allows you to iterate through subsections of a document.
*
* If a lower-level line was encountered, the following call to `next()` will repeat this line again.
* This allows a child loop to look forward, determine that the next line will be outside its purview,
* and return control to the calling loop transparently without additional logic.
*
* Intended to be used inside a while loop to parse a section of a Terrace document.
*
* ```c
* while(terrace_next(doc, -1)) {
* // Do something with each line.
* }
* ```
*
* @param {number} levelScope If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope`
* @returns {char} Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered.
*/
char terrace_next(terrace_document_t* doc, int levelScope) {
// Repeat the current line instead of parsing a new one if the previous call to next()
// determined the current line to be out of its scope.
if (doc->_repeatCurrentLine) doc->_repeatCurrentLine = 0;
// Otherwise parse the line normally.
else {
// Load the next line from the line reader.
int chars_read = doc->reader(&doc->_currentLine, doc->userData);
// If there are no more lines, bail out.
if (chars_read == -1) return 0;
// Populate lineData with parsed information from the current line.
terrace_parse_line(doc->_currentLine, &doc->lineData);
}
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
// Allows a child loop to look forward, determine that the next line will be outside its purview,
// and return control to the calling loop transparently without additional logic.
if ((int) terrace_level(doc) <= levelScope) {
doc->_repeatCurrentLine = 1;
return 0;
}
return 1;
}
#endif

View File

@ -1,28 +1,49 @@
#ifndef TERRACE_PARSER_H
#define TERRACE_PARSER_H
struct terrace_linedata_s {
// Holds the parsed information from each line.
typedef struct terrace_linedata_s {
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
char indent;
// How many indent characters are present in the current line before the first non-indent character.
unsigned int level;
// The number of characters before the start of the line's "head" section.
// (Normally the same as `level`)
unsigned int offsetHead;
// The number of characters before the start of the line's "tail" section.
unsigned int offsetTail;
};
} terrace_linedata_t;
typedef struct terrace_linedata_s terrace_linedata_t;
terrace_linedata_t terrace_create_line_data(const char indent) {
terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 };
return line_data;
}
void terrace_parse_line(char* line, terrace_linedata_t *lineData) {
if (line == 0) {
// Reuse lineData->level from previous line.
/**
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a lineData struct based on the current line.
* @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline.
* @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in.
*/
void terrace_parse_line(const char *line, terrace_linedata_t *lineData) {
// Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.)
// Special case handling for these allows them to be parsed extra quickly.
if (!line) {
// Empty lines are treated as having the same level as the previous line, so lineData->line is not updated.
lineData->offsetHead = 0;
lineData->offsetTail = 0;
} else {
// Count the number of indent characters in the current line.
unsigned int level = 0;
while (line[level] == lineData->indent && level <= lineData->level + 1) ++level;
while (line[level] == lineData->indent) ++level;
lineData->level = level;
// Set offsetHead and offsetTail to level to start with.
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData->offsetHead = level;
lineData->offsetTail = level;
while (line[lineData->offsetTail] != '\0' && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while (line[lineData->offsetTail] && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
}
}

View File

@ -9,8 +9,7 @@ void linedata_basic (char indent) {
size_t bufsize = 32;
ssize_t c_read = 0;
terrace_linedata_t line_data;
line_data.indent = indent;
terrace_linedata_t line_data = terrace_create_line_data(indent);
while(c_read = getline(&line, &bufsize, stdin)) {
if (c_read == -1) break;
@ -29,8 +28,7 @@ void linedata_head_tail (char indent) {
size_t bufsize = 32;
ssize_t c_read = 0;
terrace_linedata_t line_data;
line_data.indent = indent;
terrace_linedata_t line_data = terrace_create_line_data(indent);
char *head;
char *tail;

View File

@ -22,7 +22,8 @@ export type Document = {
export function useDocument (reader: Reader, indent: string = ' '): Document {
if (indent.length !== 1) throw new Error(`Terrace currently only allows single-character indent strings - you passed "${indent}"`)
const lineData = createLineData('', indent)
const lineData = createLineData(indent)
let currLine = ''
// If `repeatCurrentLine` is `true`, the following call to `next()` will repeat the current line in
// the document and set `repeatCurrentLine` back to `false`
@ -62,8 +63,8 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
if (line == null) return false
// Populate lineData with parsed information from the current line.
lineData.line = line
parseLine(lineData)
currLine = line
parseLine(currLine, lineData)
}
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
@ -108,7 +109,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* @param {number} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
* @returns {string} The line contents starting from `startOffset`
*/
const line = (startOffset: number = lineData.level): string => lineData.line.slice(startOffset)
const line = (startOffset: number = lineData.level): string => currLine.slice(startOffset)
/**
* Get the first "word" of a line, starting from the first non-indent character to the first space or end of the line
* Often used for deciding how to parse a block.
@ -122,7 +123,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* ```
* @returns {string} The `head` portion (first word) of a line
*/
const head = (): string => lineData.line.slice(lineData.offsetHead, lineData.offsetTail)
const head = (): string => currLine.slice(lineData.offsetHead, lineData.offsetTail)
/**
* Get all text following the first "word" of a line, starting from the first character after the space at the end of `head()`
*
@ -135,7 +136,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* ```
* @returns {string} The remainder of the line following the `head()` portion, with no leading space
*/
const tail = (): string => lineData.line.slice(lineData.offsetTail + 1) // Skip the space
const tail = (): string => currLine.slice(lineData.offsetTail + 1) // Skip the space
/**
* Quickly check if the current line head matches a specified value
*

View File

@ -1,253 +0,0 @@
import { describe, expect, it } from 'vitest'
import { createLineData, parseLine } from './parser'
describe(`LineData`, () => {
it(`is an object`, () => {
const lineData = createLineData()
expect(lineData).toBeTypeOf(`object`)
})
it(`has five properties`, () => {
const lineData = createLineData()
expect(Object.keys(lineData).length).to.equal(5)
})
it(`'line' is a string|null initialized to null`, () => {
const lineData = createLineData()
expect(lineData.level).to.equal(0)
})
it(`'level' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.level).to.equal(0)
})
it(`'offsetHead' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.offsetHead).to.equal(0)
})
it(`'offsetTail' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.offsetTail).to.equal(0)
})
})
describe(`parseLine`, () => {
it(`Requres 'lineData' to be an object with string line and numeric level properties`, () => {
// @ts-ignore
expect(() => parseLine(``, 0)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, [])).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, null)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, true)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, () => {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, { line: '', level: '' })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, { line: '', level: 0 })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
})
it(`Requres 'indent' to be a single-character string`, () => {
const lineData = createLineData()
lineData.line = ``
// @ts-ignore
lineData.indent = 0
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = []
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = {}
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = null
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = true
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = () => {}
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
lineData.indent = ` `
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
})
it(`Handles a blank line at indent level 0`, () => {
const line = ``
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 0 })
})
it(`Handles a line with a single space at indent level 1`, () => {
const line = ` `
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 1 })
})
it(`Handles a line with two spaces`, () => {
const line = ` `
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 2 })
})
it(`Handles a normal line at indent level 0`, () => {
const line = `line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 4 })
})
it(`Handles a normal line at indent level 1`, () => {
const line = ` line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 5 })
})
it(`Handles a normal line at indent level 2`, () => {
const line = ` line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 6 })
})
it(`Handles a normal line at indent level 1 indented with tabs`, () => {
const line = `\tline 1`
const lineData = createLineData(line, `\t`)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 1, offsetHead: 1, offsetTail: 5 })
})
it(`Handles a normal line at indent level 2 indented with tabs`, () => {
const line = `\t\tline 1`
const lineData = createLineData(line, `\t`)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 2, offsetHead: 2, offsetTail: 6})
})
it(`Nests a normal line under a preceding normal line`, () => {
const lines = [
'line 1',
' line 2'
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
])
})
it(`Nests multiple normal line under a preceding normal line`, () => {
const lines = [
'line 1',
' line 2',
' line 3',
' line 4',
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
{ line: lines[2], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
{ line: lines[3], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
])
})
it(`Does not nest an empty line under a preceding normal line`, () => {
const lines = [
'line 1',
''
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
])
})
it(`Does not nest multiple empty lines under a preceding normal line`, () => {
const lines = [
'line 1',
'',
'',
'',
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
{ line: lines[2], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
{ line: lines[3], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
])
})
it(`Handle head and tail matching for lines with head and tail`, () => {
const line = ` head tail1 tail2 tail3`
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(`tail1 tail2 tail3`)
})
it(`Handle head and tail matching for lines with head but no tail`, () => {
const line = ` head`
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(``)
})
it(`Handle head and tail matching for lines with head and trailing space`, () => {
const line = ` head `
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(``)
})
})

View File

@ -1,35 +1,54 @@
// Holds the parsed information from each line.
export type LineData = {
line: string;
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
indent: string;
// How many indent characters are present in the current line before the first non-indent character.
level: number;
// The number of characters before the start of the line's "head" section.
// (Normally the same as `level`)
offsetHead: number;
// The number of characters before the start of the line's "tail" section.
offsetTail: number;
}
export function createLineData(line: string = '', indent: string = ' '): LineData {
return { line, indent, level: 0, offsetHead: 0, offsetTail: 0 }
/**
* Initialize a LineData instance with default values.
* @param {string} indent The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
* @returns {LineData} A LineData instance with the specified indent character and all other values initialized to 0.
*/
export function createLineData(indent: string = ' '): LineData {
return { indent, level: 0, offsetHead: 0, offsetTail: 0 }
}
export function parseLine(lineData: LineData): LineData {
/**
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
* Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
* @param {string} line A string containing a line to parse. Shouldn't end with a newline.
* @param {LineData} lineData A LineData object to store information about the current line in. **Mutated in-place!**
*/
export function parseLine(line: string, lineData: LineData) {
if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
if (typeof line !== 'string') throw new Error(`'line' must be a string`)
let level = 0
// Repeat previous level for blank lines.
if (!lineData.line.length) {
lineData.level = lineData.level
// Blank lines have no characters, the newline should be stripped off.
// Special case handling for these allows them to be parsed quickly.
if (!line.length) {
// Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
lineData.offsetHead = 0
lineData.offsetTail = 0
} else {
while (lineData.line[level] === lineData.indent && level <= lineData.level + 1) ++level
// Count the number of indent characters in the current line.
let level = 0
while (line[level] === lineData.indent) ++level
lineData.level = level
// Set offsetHead and offsetTail to level to start with.
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData.offsetHead = level
lineData.offsetTail = level
while (lineData.line[lineData.offsetTail] && lineData.line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while (line[lineData.offsetTail] && line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
}
return lineData
}

View File

@ -1,26 +1,28 @@
import { createLineData, parseLine, useDocument } from '@terrace-lang/js'
import { createLineData, parseLine } from '@terrace-lang/js'
import { createStdinReader } from '@terrace-lang/js/readers/node-readline'
const testName = process.argv[2]
async function linedata_basic(indent) {
const lineData = createLineData('', indent)
const lineData = createLineData(indent)
const next = createStdinReader()
while ((lineData.line = await next()) != null) {
parseLine(lineData)
const { level, indent, offsetHead, offsetTail, line } = lineData
let line = ''
while ((line = await next()) != null) {
parseLine(line, lineData)
const { level, indent, offsetHead, offsetTail } = lineData
console.log(`| level ${level} | indent ${indent} | offsetHead ${offsetHead} | offsetTail ${offsetTail} | line ${line} |`)
}
}
async function linedata_head_tail () {
const lineData = createLineData('')
const lineData = createLineData()
const next = createStdinReader()
while ((lineData.line = await next()) != null) {
parseLine(lineData)
const { level, indent, offsetHead, offsetTail, line } = lineData
let line = ''
while ((line = await next()) != null) {
parseLine(line, lineData)
const { offsetHead, offsetTail } = lineData
const head = line.slice(offsetHead, offsetTail)
const tail = line.slice(offsetTail + 1)

View File

@ -1,35 +1,63 @@
from typing import TypedDict
# Holds the parsed information from each line.
class LineData(TypedDict):
line: str
# Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
indent: str
# How many indent characters are present in the current line before the first non-indent character.
level: int
# The number of characters before the start of the line's "head" section.
# (Normally the same as `level`)
offsetHead: int
# The number of characters before the start of the line's "tail" section.
offsetTail: int
def createLineData(line: str = '', indent: str = ' ') -> LineData:
return { "line": line, "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
def createLineData(indent: str = ' ') -> LineData:
"""
Initialize a LineData instance with default values.
def parseLine(lineData: LineData) -> LineData:
# if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
# if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
# if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
Parameters
----------
indent : str
The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
Returns
-------
LineData
A LineData dict with the specified indent character and all other values initialized to 0.
"""
return { "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
level = 0
def parseLine(line: str, lineData: LineData):
"""
Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
# Repeat previous level for blank lines.
if len(lineData['line']) == 0:
lineData['level'] = lineData['level']
Parameters
----------
line : str
A string containing a line to parse. Shouldn't end with a newline.
lineData: LineData
A LineData dict to store information about the current line in. **Mutated in-place!**
"""
# Blank lines have no characters, the newline should be stripped off.
# Special case handling for these allows them to be parsed quickly.
if len(line) == 0:
# Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
lineData['offsetHead'] = 0
lineData['offsetTail'] = 0
else:
while level < len(lineData['line']) and lineData['line'][level] == lineData['indent'] and level <= lineData['level'] + 1:
# Count the number of indent characters in the current line.
level = 0
while level < len(line) and line[level] == lineData['indent']:
level += 1
lineData['level'] = level
# Set offsetHead and offsetTail to level to start with.
# offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData['offsetHead'] = level
lineData['offsetTail'] = level
while lineData['offsetTail'] < len(lineData['line']) and lineData['line'][lineData['offsetTail']] != ' ':
lineData['offsetTail'] += 1
return lineData
# Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while lineData['offsetTail'] < len(line) and line[lineData['offsetTail']] != ' ':
lineData['offsetTail'] += 1

View File

@ -14,33 +14,21 @@ def next():
return line.rstrip('\n') if len(line) > 0 else None
def linedata_basic (indent):
lineData = createLineData('', indent)
lineData = createLineData(indent)
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
while (line := next()) != None:
parseLine(line, lineData)
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
))
def linedata_tabs ():
lineData = createLineData('', '\t')
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = line
))
def linedata_head_tail ():
lineData = createLineData('')
lineData = createLineData()
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
head = lineData['line'][lineData['offsetHead']:lineData['offsetTail']] if len(lineData['line']) > lineData['offsetHead'] else ''
tail = lineData['line'][lineData['offsetTail'] + 1:] if len(lineData['line']) > lineData['offsetTail'] + 1 else ''
while (line := next()) != None:
parseLine(line, lineData)
head = line[lineData['offsetHead']:lineData['offsetTail']] if len(line) > lineData['offsetHead'] else ''
tail = line[lineData['offsetTail'] + 1:] if len(line) > lineData['offsetTail'] + 1 else ''
print("| head {head} | tail {tail} |".format(
head = head, tail = tail