Cleanup and document core APIs in C, JS, and Python.

This commit is contained in:
Joshua Bemenderfer 2023-02-19 14:53:59 -05:00
parent 38068b24df
commit 3f6c475756
8 changed files with 134 additions and 328 deletions

View File

@ -1,28 +1,51 @@
#ifndef TERRACE_PARSER_H
#define TERRACE_PARSER_H
// Holds the parsed information from each line.
struct terrace_linedata_s {
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
char indent;
// How many indent characters are present in the current line before the first non-indent character.
unsigned int level;
// The number of characters before the start of the line's "head" section.
// (Normally the same as `level`)
unsigned int offsetHead;
// The number of characters before the start of the line's "tail" section.
unsigned int offsetTail;
};
typedef struct terrace_linedata_s terrace_linedata_t;
void terrace_parse_line(char* line, terrace_linedata_t *lineData) {
if (line == 0) {
// Reuse lineData->level from previous line.
terrace_linedata_t terrace_create_line_data(char indent) {
terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 };
return line_data;
}
/**
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a lineData struct based on the current line.
* @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline.
* @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in.
*/
void terrace_parse_line(char *line, terrace_linedata_t *lineData) {
// Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.)
// Special case handling for these allows them to be parsed extra quickly.
if (!line) {
// Empty lines are treated as having the same level as the previous line, so lineData->line is not updated.
lineData->offsetHead = 0;
lineData->offsetTail = 0;
} else {
// Count the number of indent characters in the current line.
unsigned int level = 0;
while (line[level] == lineData->indent && level <= lineData->level + 1) ++level;
while (line[level] == lineData->indent) ++level;
lineData->level = level;
// Set offsetHead and offsetTail to level to start with.
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData->offsetHead = level;
lineData->offsetTail = level;
while (line[lineData->offsetTail] != '\0' && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while (line[lineData->offsetTail] && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
}
}

View File

@ -9,8 +9,7 @@ void linedata_basic (char indent) {
size_t bufsize = 32;
ssize_t c_read = 0;
terrace_linedata_t line_data;
line_data.indent = indent;
terrace_linedata_t line_data = terrace_create_line_data(indent);
while(c_read = getline(&line, &bufsize, stdin)) {
if (c_read == -1) break;
@ -29,8 +28,7 @@ void linedata_head_tail (char indent) {
size_t bufsize = 32;
ssize_t c_read = 0;
terrace_linedata_t line_data;
line_data.indent = indent;
terrace_linedata_t line_data = terrace_create_line_data(indent);
char *head;
char *tail;

View File

@ -22,7 +22,8 @@ export type Document = {
export function useDocument (reader: Reader, indent: string = ' '): Document {
if (indent.length !== 1) throw new Error(`Terrace currently only allows single-character indent strings - you passed "${indent}"`)
const lineData = createLineData('', indent)
const lineData = createLineData(indent)
let currLine = ''
// If `repeatCurrentLine` is `true`, the following call to `next()` will repeat the current line in
// the document and set `repeatCurrentLine` back to `false`
@ -62,8 +63,8 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
if (line == null) return false
// Populate lineData with parsed information from the current line.
lineData.line = line
parseLine(lineData)
currLine = line
parseLine(currLine, lineData)
}
// If we shouldn't be handling this line, make the following call to next() repeat the current line.
@ -108,7 +109,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* @param {number} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
* @returns {string} The line contents starting from `startOffset`
*/
const line = (startOffset: number = lineData.level): string => lineData.line.slice(startOffset)
const line = (startOffset: number = lineData.level): string => currLine.slice(startOffset)
/**
* Get the first "word" of a line, starting from the first non-indent character to the first space or end of the line
* Often used for deciding how to parse a block.
@ -122,7 +123,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* ```
* @returns {string} The `head` portion (first word) of a line
*/
const head = (): string => lineData.line.slice(lineData.offsetHead, lineData.offsetTail)
const head = (): string => currLine.slice(lineData.offsetHead, lineData.offsetTail)
/**
* Get all text following the first "word" of a line, starting from the first character after the space at the end of `head()`
*
@ -135,7 +136,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
* ```
* @returns {string} The remainder of the line following the `head()` portion, with no leading space
*/
const tail = (): string => lineData.line.slice(lineData.offsetTail + 1) // Skip the space
const tail = (): string => currLine.slice(lineData.offsetTail + 1) // Skip the space
/**
* Quickly check if the current line head matches a specified value
*

View File

@ -1,253 +0,0 @@
import { describe, expect, it } from 'vitest'
import { createLineData, parseLine } from './parser'
describe(`LineData`, () => {
it(`is an object`, () => {
const lineData = createLineData()
expect(lineData).toBeTypeOf(`object`)
})
it(`has five properties`, () => {
const lineData = createLineData()
expect(Object.keys(lineData).length).to.equal(5)
})
it(`'line' is a string|null initialized to null`, () => {
const lineData = createLineData()
expect(lineData.level).to.equal(0)
})
it(`'level' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.level).to.equal(0)
})
it(`'offsetHead' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.offsetHead).to.equal(0)
})
it(`'offsetTail' is an integer initialized to zero`, () => {
const lineData = createLineData()
expect(lineData.offsetTail).to.equal(0)
})
})
describe(`parseLine`, () => {
it(`Requres 'lineData' to be an object with string line and numeric level properties`, () => {
// @ts-ignore
expect(() => parseLine(``, 0)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, [])).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, null)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, true)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, () => {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, { line: '', level: '' })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
// @ts-ignore
expect(() => parseLine(``, { line: '', level: 0 })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
})
it(`Requres 'indent' to be a single-character string`, () => {
const lineData = createLineData()
lineData.line = ``
// @ts-ignore
lineData.indent = 0
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = []
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = {}
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = null
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = true
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
// @ts-ignore
lineData.indent = () => {}
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
lineData.indent = ` `
expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
})
it(`Handles a blank line at indent level 0`, () => {
const line = ``
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 0 })
})
it(`Handles a line with a single space at indent level 1`, () => {
const line = ` `
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 1 })
})
it(`Handles a line with two spaces`, () => {
const line = ` `
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 2 })
})
it(`Handles a normal line at indent level 0`, () => {
const line = `line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 4 })
})
it(`Handles a normal line at indent level 1`, () => {
const line = ` line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 5 })
})
it(`Handles a normal line at indent level 2`, () => {
const line = ` line 1`
const lineData = createLineData(line)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 6 })
})
it(`Handles a normal line at indent level 1 indented with tabs`, () => {
const line = `\tline 1`
const lineData = createLineData(line, `\t`)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 1, offsetHead: 1, offsetTail: 5 })
})
it(`Handles a normal line at indent level 2 indented with tabs`, () => {
const line = `\t\tline 1`
const lineData = createLineData(line, `\t`)
parseLine(lineData)
expect(lineData).to.deep.equal({ line, indent: `\t`, level: 2, offsetHead: 2, offsetTail: 6})
})
it(`Nests a normal line under a preceding normal line`, () => {
const lines = [
'line 1',
' line 2'
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
])
})
it(`Nests multiple normal line under a preceding normal line`, () => {
const lines = [
'line 1',
' line 2',
' line 3',
' line 4',
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
{ line: lines[2], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
{ line: lines[3], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
])
})
it(`Does not nest an empty line under a preceding normal line`, () => {
const lines = [
'line 1',
''
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
])
})
it(`Does not nest multiple empty lines under a preceding normal line`, () => {
const lines = [
'line 1',
'',
'',
'',
]
const lineData = createLineData()
const results = lines.map(line => {
lineData.line = line
parseLine(lineData)
return {...lineData}
})
expect(results).to.deep.equal([
{ line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
{ line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
{ line: lines[2], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
{ line: lines[3], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
])
})
it(`Handle head and tail matching for lines with head and tail`, () => {
const line = ` head tail1 tail2 tail3`
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(`tail1 tail2 tail3`)
})
it(`Handle head and tail matching for lines with head but no tail`, () => {
const line = ` head`
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(``)
})
it(`Handle head and tail matching for lines with head and trailing space`, () => {
const line = ` head `
const lineData = createLineData(line)
parseLine(lineData)
const head = line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = line.slice(lineData.offsetTail + 1)
expect(head).to.equal(`head`)
expect(tail).to.equal(``)
})
})

View File

@ -1,35 +1,54 @@
// Holds the parsed information from each line.
export type LineData = {
line: string;
// Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
indent: string;
// How many indent characters are present in the current line before the first non-indent character.
level: number;
// The number of characters before the start of the line's "head" section.
// (Normally the same as `level`)
offsetHead: number;
// The number of characters before the start of the line's "tail" section.
offsetTail: number;
}
export function createLineData(line: string = '', indent: string = ' '): LineData {
return { line, indent, level: 0, offsetHead: 0, offsetTail: 0 }
/**
* Initialize a LineData instance with default values.
* @param {string} indent The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
* @returns {LineData} A LineData instance with the specified indent character and all other values initialized to 0.
*/
export function createLineData(indent: string = ' '): LineData {
return { indent, level: 0, offsetHead: 0, offsetTail: 0 }
}
export function parseLine(lineData: LineData): LineData {
/**
* Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
* Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
* @param {string} line A string containing a line to parse. Shouldn't end with a newline.
* @param {LineData} lineData A LineData object to store information about the current line in. **Mutated in-place!**
*/
export function parseLine(line: string, lineData: LineData) {
if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
if (typeof line !== 'string') throw new Error(`'line' must be a string`)
let level = 0
// Repeat previous level for blank lines.
if (!lineData.line.length) {
lineData.level = lineData.level
// Blank lines have no characters, the newline should be stripped off.
// Special case handling for these allows them to be parsed quickly.
if (!line.length) {
// Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
lineData.offsetHead = 0
lineData.offsetTail = 0
} else {
while (lineData.line[level] === lineData.indent && level <= lineData.level + 1) ++level
// Count the number of indent characters in the current line.
let level = 0
while (line[level] === lineData.indent) ++level
lineData.level = level
// Set offsetHead and offsetTail to level to start with.
// offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData.offsetHead = level
lineData.offsetTail = level
while (lineData.line[lineData.offsetTail] && lineData.line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
// Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while (line[lineData.offsetTail] && line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
}
return lineData
}

View File

@ -1,26 +1,28 @@
import { createLineData, parseLine, useDocument } from '@terrace-lang/js'
import { createLineData, parseLine } from '@terrace-lang/js'
import { createStdinReader } from '@terrace-lang/js/readers/node-readline'
const testName = process.argv[2]
async function linedata_basic(indent) {
const lineData = createLineData('', indent)
const lineData = createLineData(indent)
const next = createStdinReader()
while ((lineData.line = await next()) != null) {
parseLine(lineData)
const { level, indent, offsetHead, offsetTail, line } = lineData
let line = ''
while ((line = await next()) != null) {
parseLine(line, lineData)
const { level, indent, offsetHead, offsetTail } = lineData
console.log(`| level ${level} | indent ${indent} | offsetHead ${offsetHead} | offsetTail ${offsetTail} | line ${line} |`)
}
}
async function linedata_head_tail () {
const lineData = createLineData('')
const lineData = createLineData()
const next = createStdinReader()
while ((lineData.line = await next()) != null) {
parseLine(lineData)
const { level, indent, offsetHead, offsetTail, line } = lineData
let line = ''
while ((line = await next()) != null) {
parseLine(line, lineData)
const { offsetHead, offsetTail } = lineData
const head = line.slice(offsetHead, offsetTail)
const tail = line.slice(offsetTail + 1)

View File

@ -1,35 +1,63 @@
from typing import TypedDict
# Holds the parsed information from each line.
class LineData(TypedDict):
line: str
# Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
indent: str
# How many indent characters are present in the current line before the first non-indent character.
level: int
# The number of characters before the start of the line's "head" section.
# (Normally the same as `level`)
offsetHead: int
# The number of characters before the start of the line's "tail" section.
offsetTail: int
def createLineData(line: str = '', indent: str = ' ') -> LineData:
return { "line": line, "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
def createLineData(indent: str = ' ') -> LineData:
"""
Initialize a LineData instance with default values.
def parseLine(lineData: LineData) -> LineData:
# if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
# if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
# if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
Parameters
----------
indent : str
The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
Returns
-------
LineData
A LineData dict with the specified indent character and all other values initialized to 0.
"""
return { "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
level = 0
def parseLine(line: str, lineData: LineData):
"""
Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
# Repeat previous level for blank lines.
if len(lineData['line']) == 0:
lineData['level'] = lineData['level']
Parameters
----------
line : str
A string containing a line to parse. Shouldn't end with a newline.
lineData: LineData
A LineData dict to store information about the current line in. **Mutated in-place!**
"""
# Blank lines have no characters, the newline should be stripped off.
# Special case handling for these allows them to be parsed quickly.
if len(line) == 0:
# Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
lineData['offsetHead'] = 0
lineData['offsetTail'] = 0
else:
while level < len(lineData['line']) and lineData['line'][level] == lineData['indent'] and level <= lineData['level'] + 1:
# Count the number of indent characters in the current line.
level = 0
while level < len(line) and line[level] == lineData['indent']:
level += 1
lineData['level'] = level
# Set offsetHead and offsetTail to level to start with.
# offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
lineData['offsetHead'] = level
lineData['offsetTail'] = level
while lineData['offsetTail'] < len(lineData['line']) and lineData['line'][lineData['offsetTail']] != ' ':
lineData['offsetTail'] += 1
return lineData
# Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
while lineData['offsetTail'] < len(line) and line[lineData['offsetTail']] != ' ':
lineData['offsetTail'] += 1

View File

@ -14,33 +14,21 @@ def next():
return line.rstrip('\n') if len(line) > 0 else None
def linedata_basic (indent):
lineData = createLineData('', indent)
lineData = createLineData(indent)
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
while (line := next()) != None:
parseLine(line, lineData)
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
))
def linedata_tabs ():
lineData = createLineData('', '\t')
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = line
))
def linedata_head_tail ():
lineData = createLineData('')
lineData = createLineData()
while (l := next()) != None:
lineData['line'] = l
parseLine(lineData)
head = lineData['line'][lineData['offsetHead']:lineData['offsetTail']] if len(lineData['line']) > lineData['offsetHead'] else ''
tail = lineData['line'][lineData['offsetTail'] + 1:] if len(lineData['line']) > lineData['offsetTail'] + 1 else ''
while (line := next()) != None:
parseLine(line, lineData)
head = line[lineData['offsetHead']:lineData['offsetTail']] if len(line) > lineData['offsetHead'] else ''
tail = line[lineData['offsetTail'] + 1:] if len(line) > lineData['offsetTail'] + 1 else ''
print("| head {head} | tail {tail} |".format(
head = head, tail = tail