Implement basic document functions for C API, mostly equivalent to JS ones.

Cleanup and document core APIs in C, JS, and Python.
2023-02-19 17:04:36 -05:00 · 2023-02-19 14:53:59 -05:00
10 changed files with 349 additions and 331 deletions
--- a/packages/c/.gitignore
+++ b/packages/c/.gitignore
@@ -1 +1,3 @@
 test/test-runner
+test/document
+test/document.c
--- a/packages/c/document.h
+++ b/packages/c/document.h
@@ -0,0 +1,212 @@
+#ifndef TERRACE_DOCUMENT_H
+#define TERRACE_DOCUMENT_H
+
+#include "parser.h"
+
+typedef struct terrace_document_s {
+  // == Internal State == //
+  unsigned int _repeatCurrentLine;
+  // Current line being read
+  char* _currentLine;
+
+  // == External Information == //
+  // Embedded line data struct. Holds information about the current parsed line
+  terrace_linedata_t lineData;
+  // Custom data passed to the readline function
+  void* userData;
+  /**
+   * Line reader function, provided by the user
+   * Needed to get the next line inside of `terrace_next(doc)`
+   * @param {char**} line First argument is a pointer to `_currentLine`, above
+   * @param {void*} userData Second argument is `userData`, above
+   * @returns {int} The number of characters read, or -1 if no characters were read.
+   */
+  int (*reader)(char** line, void* userData);
+} terrace_document_t;
+
+/**
+ * Initialize a Terrace document with indent parameters and the function neded to read lines.
+ * @param {char} indent The indent character to use. Generally a single space character.
+ * @param {int (*reader)(char** line, void* userData)} A function pointer to a function that reads lines sequentially
+ * from a user-provided source. Receives a pointer to lineData->_currLine, and userData, supplied in the next argument.
+ * @param {void*} userData A user-supplied pointer to any state information needed by their reader function.
+ * Passed to `reader`each time it is called.
+ * @returns {terrace_document_t} An initialized document that can now be used for futher parsing.
+ */
+terrace_document_t terrace_create_document(const char indent, int (*reader)(char** line, void* userData), void* userData) {
+  terrace_document_t document = {
+    ._repeatCurrentLine = 0,
+    ._currentLine = 0,
+    .lineData = terrace_create_line_data(indent),
+    .reader = reader,
+    .userData = userData
+  };
+
+  return document;
+}
+
+/**
+ * Returns the number of indent characters of the current line
+ *
+ * Given the following document, `terrace_level(doc)` would return 0, 1, 2, and 5 respectively for each line
+ *
+ * ```terrace
+ * block
+ *  block
+ *   block
+ *      block
+ * ```
+ * @returns {unsigned int} The indent level of the current line
+ */
+unsigned int terrace_level(terrace_document_t* doc) {
+  return doc->lineData.level;
+}
+
+/**
+ * Get a string with the current line contents
+ * If `startOffset` is -1, skips all indent characters by default. Otherwise only skips the amount specified
+ *
+ * Given the following document
+ *
+ * ```terrace
+ * root
+ *     sub-line
+ * ```
+ * `terrace_line(doc, -1)` on the second line returns "sub-line", trimming off the leading indent characters
+ * `terrace_line(doc, 0)` however, returns "    sub-line", with all four leading spaces
+ *
+ * `startOffset`s other than `-1` are primarily used for parsing blocks that have literal indented multi-line text
+ *
+ * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
+ * @param {int} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
+ * @returns {char*} The line contents starting from `startOffset`
+ */
+char* terrace_line(terrace_document_t* doc, int startOffset) {
+  if (startOffset == -1) startOffset = doc->lineData.level;
+  return doc->_currentLine + startOffset;
+}
+
+/**
+ * Get the *length* of the first "word" of a line,
+ * starting from the first non-indent character to the first space or end of the line
+ * Often used for deciding how to parse a block.
+ *
+ * Because C uses NULL-terminated strings, we cannot easily slice a string to return something out of the middle.
+ * Instead, `terrace_head_length` provides the length of the head portion.
+ * In combination with `doc->lineData.offsetHead`, you can copy the head section into a new string,
+ * or use any number of `strn*` C stdlib functions to work with the head section without copying it.
+ *
+ * Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
+ *
+ * Given the following line, `terrace_head_length(doc)` returns `5`
+ *
+ * ```terrace
+ *   title An Important Document
+ * ```
+ * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
+ * @returns {int} The length of the `head` portion (first word) of a line
+ */
+int terrace_head_length(terrace_document_t* doc) {
+  return doc->lineData.offsetTail - doc->lineData.offsetHead;
+}
+
+/**
+ * Get a char pointer to everything following the first "word" of a line,
+ * starting from the first character after the space at the end of `head`
+ *
+ * Terrace DSLs do not *need* to use head-tail line structure, but support for them is built into the parser
+ *
+ * Given the following line, `terrace_tail(doc)` returns "An Important Document"
+ *
+ * ```terrace
+ *   title An Important Document
+ * ```
+ * @param {terrace_document_t*} doc A pointer to the Terrace document being parsed
+ * @returns {char*} The remainder of the line following the `head` portion, with no leading space
+ */
+char* terrace_tail(terrace_document_t* doc) {
+  return doc->_currentLine + doc->lineData.offsetTail + 1;
+}
+
+/**
+ * Quickly check if the current line head matches a specified value. Useful in many document-parsing situations.
+ *
+ * Given the following line
+ *
+ * ```terrace
+ * title An Important Document
+ * ```
+ *
+ * `terrace_match(doc, "title")` returns `1`
+ * `terrace_match(doc, "somethingElse") returns `0`
+ *
+ * @param {const char*} matchValue A string to check against the line `head` for equality
+ * @returns {char} A byte set to 0 if the head does not match, or 1 if it does match
+ */
+char terrace_match(terrace_document_t* doc, const char* matchHead) {
+  // Get a pointer to the start of the head portion of the string.
+  char* head = doc->_currentLine + doc->lineData.offsetHead;
+
+  int i = 0;
+  // Loop until we run out of characters in `matchHead`.
+  while (matchHead[i] != '\0') {
+    // Return as unmatched if we run out of `head` characters
+    // or if a character at the same position in both matchHead and head is not identical.
+    if (head[i] == '\0' || matchHead[i] != head[i]) return 0;
+    i++;
+  }
+
+  // If we didn't return inside the while loop, `matchHead` and `head` are equivalent, a successful match.
+  return 1;
+}
+
+/**
+  * Advances the current position in the terrace document and populates `doc->lineData`
+  * with the parsed information from that line
+  *
+  * Returns `1` after parsing the next line, or `0` upon reaching the end of the document.
+  * If the `levelScope` parameter is not -1, `terrace_next()` will also return `0` when it encounters a line
+  * with a level at or below `levelScope`. This allows you to iterate through subsections of a document.
+  *
+  * If a lower-level line was encountered, the following call to `next()` will repeat this line again.
+  * This allows a child loop to look forward, determine that the next line will be outside its purview,
+  * and return control to the calling loop transparently without additional logic.
+  *
+  * Intended to be used inside a while loop to parse a section of a Terrace document.
+  *
+  * ```c
+  * while(terrace_next(doc, -1)) {
+  *   // Do something with each line.
+  * }
+  * ```
+  *
+  * @param {number} levelScope If set above -1, `next()` will return `0` when it encounters a line with a level at or below `levelScope`
+  * @returns {char} Returns `1` after parsing a line, or `0` if the document has ended or a line at or below `levelScope` has been encountered.
+  */
+char terrace_next(terrace_document_t* doc, int levelScope) {
+  // Repeat the current line instead of parsing a new one if the previous call to next()
+  // determined the current line to be out of its scope.
+  if (doc->_repeatCurrentLine) doc->_repeatCurrentLine = 0;
+  // Otherwise parse the line normally.
+  else {
+    // Load the next line from the line reader.
+    int chars_read = doc->reader(&doc->_currentLine, doc->userData);
+    // If there are no more lines, bail out.
+    if (chars_read == -1) return 0;
+
+    // Populate lineData with parsed information from the current line.
+    terrace_parse_line(doc->_currentLine, &doc->lineData);
+  }
+
+  // If we shouldn't be handling this line, make the following call to next() repeat the current line.
+  // Allows a child loop to look forward, determine that the next line will be outside its purview,
+  // and return control to the calling loop transparently without additional logic.
+  if ((int) terrace_level(doc) <= levelScope) {
+    doc->_repeatCurrentLine = 1;
+    return 0;
+  }
+
+  return 1;
+}
+
+#endif
--- a/packages/c/parser.h
+++ b/packages/c/parser.h
@@ -1,28 +1,49 @@
 #ifndef TERRACE_PARSER_H
 #define TERRACE_PARSER_H

-struct terrace_linedata_s {
+// Holds the parsed information from each line.
+typedef struct terrace_linedata_s {
+  // Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
  char indent;
+  // How many indent characters are present in the current line before the first non-indent character.
  unsigned int level;
+  // The number of characters before the start of the line's "head" section.
+  // (Normally the same as `level`)
  unsigned int offsetHead;
+  // The number of characters before the start of the line's "tail" section.
  unsigned int offsetTail;
-};
+} terrace_linedata_t;

-typedef struct terrace_linedata_s terrace_linedata_t;
+terrace_linedata_t terrace_create_line_data(const char indent) {
+  terrace_linedata_t line_data = { .indent = indent, .level = 0, .offsetHead = 0, .offsetTail = 0 };
+  return line_data;
+}

-void terrace_parse_line(char* line, terrace_linedata_t *lineData) {
-  if (line == 0) {
-    // Reuse lineData->level from previous line.
+/**
+ * Core Terrace parser function, sets level, offsetHead, and offsetTail in a lineData struct based on the current line.
+ * @param char* line A pointer to the line to parse as a C-style string. Shouldn't end with a newline.
+ * @param terrace_linedata_t* lineData A pointer to the terrace_linedata_t struct to store information about the current line in.
+ */
+void terrace_parse_line(const char *line, terrace_linedata_t *lineData) {
+  // Empty lines are nullptr/0 as they have no characters. (The newline character should be stripped off.)
+  // Special case handling for these allows them to be parsed extra quickly.
+  if (!line) {
+    // Empty lines are treated as having the same level as the previous line, so lineData->line is not updated.
    lineData->offsetHead = 0;
    lineData->offsetTail = 0;
  } else {
+    // Count the number of indent characters in the current line.
    unsigned int level = 0;
-    while (line[level] == lineData->indent && level <= lineData->level + 1) ++level;
+    while (line[level] == lineData->indent) ++level;
    lineData->level = level;
+
+    // Set offsetHead and offsetTail to level to start with.
+    // offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
    lineData->offsetHead = level;
    lineData->offsetTail = level;

-    while (line[lineData->offsetTail] != '\0' && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
+    // Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
+    while (line[lineData->offsetTail] && line[lineData->offsetTail] != ' ') ++lineData->offsetTail;
  }
 }

--- a/packages/c/test/test-runner.c
+++ b/packages/c/test/test-runner.c
@@ -9,8 +9,7 @@ void linedata_basic (char indent) {
  size_t bufsize = 32;
  ssize_t c_read = 0;

-  terrace_linedata_t line_data;
-  line_data.indent = indent;
+  terrace_linedata_t line_data = terrace_create_line_data(indent);

  while(c_read = getline(&line, &bufsize, stdin)) {
    if (c_read == -1) break;
@@ -29,8 +28,7 @@ void linedata_head_tail (char indent) {
  size_t bufsize = 32;
  ssize_t c_read = 0;

-  terrace_linedata_t line_data;
-  line_data.indent = indent;
+  terrace_linedata_t line_data = terrace_create_line_data(indent);

  char *head;
  char *tail;
--- a/packages/js/src/document.ts
+++ b/packages/js/src/document.ts
@@ -22,7 +22,8 @@ export type Document = {
 export function useDocument (reader: Reader, indent: string = ' '): Document {
  if (indent.length !== 1) throw new Error(`Terrace currently only allows single-character indent strings - you passed "${indent}"`)

-  const lineData = createLineData('', indent)
+  const lineData = createLineData(indent)
+  let currLine = ''

  // If `repeatCurrentLine` is `true`, the following call to `next()` will repeat the current line in
  // the document and set `repeatCurrentLine` back to `false`
@@ -62,8 +63,8 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
      if (line == null) return false

      // Populate lineData with parsed information from the current line.
-      lineData.line = line
-      parseLine(lineData)
+      currLine = line
+      parseLine(currLine, lineData)
    }

    // If we shouldn't be handling this line, make the following call to next() repeat the current line.
@@ -108,7 +109,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
   * @param {number} startOffset How many indent characters to skip before outputting the line contents. Defaults to the current indent level
   * @returns {string} The line contents starting from `startOffset`
   */
-  const line = (startOffset: number = lineData.level): string => lineData.line.slice(startOffset)
+  const line = (startOffset: number = lineData.level): string => currLine.slice(startOffset)
  /**
   * Get the first "word" of a line, starting from the first non-indent character to the first space or end of the line
   * Often used for deciding how to parse a block.
@@ -122,7 +123,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
   * ```
   * @returns {string} The `head` portion (first word) of a line
   */
-  const head = (): string => lineData.line.slice(lineData.offsetHead, lineData.offsetTail)
+  const head = (): string => currLine.slice(lineData.offsetHead, lineData.offsetTail)
  /**
   * Get all text following the first "word" of a line, starting from the first character after the space at the end of `head()`
   *
@@ -135,7 +136,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
   * ```
   * @returns {string} The remainder of the line following the `head()` portion, with no leading space
   */
-  const tail = (): string => lineData.line.slice(lineData.offsetTail + 1) // Skip the space
+  const tail = (): string => currLine.slice(lineData.offsetTail + 1) // Skip the space
  /**
   * Quickly check if the current line head matches a specified value
   *
--- a/packages/js/src/parser.test.ts
+++ b/packages/js/src/parser.test.ts
@@ -1,253 +0,0 @@
-import { describe, expect, it } from 'vitest'
-import { createLineData, parseLine } from './parser'
-
-describe(`LineData`, () => {
-  it(`is an object`, () => {
-    const lineData = createLineData()
-    expect(lineData).toBeTypeOf(`object`)
-  })
-
-  it(`has five properties`, () => {
-    const lineData = createLineData()
-    expect(Object.keys(lineData).length).to.equal(5)
-  })
-
-  it(`'line' is a string|null initialized to null`, () => {
-    const lineData = createLineData()
-    expect(lineData.level).to.equal(0)
-  })
-
-  it(`'level' is an integer initialized to zero`, () => {
-    const lineData = createLineData()
-    expect(lineData.level).to.equal(0)
-  })
-
-  it(`'offsetHead' is an integer initialized to zero`, () => {
-    const lineData = createLineData()
-    expect(lineData.offsetHead).to.equal(0)
-  })
-
-  it(`'offsetTail' is an integer initialized to zero`, () => {
-    const lineData = createLineData()
-    expect(lineData.offsetTail).to.equal(0)
-  })
-})
-
-describe(`parseLine`, () => {
-  it(`Requres 'lineData' to be an object with string line and numeric level properties`, () => {
-    // @ts-ignore
-    expect(() => parseLine(``, 0)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, [])).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, null)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, true)).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, () => {})).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, { line: '', level: '' })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-    // @ts-ignore
-    expect(() => parseLine(``, { line: '', level: 0 })).toThrowError(`'lineData' must be an object with string line and numeric level properties`)
-  })
-
-  it(`Requres 'indent' to be a single-character string`, () => {
-    const lineData = createLineData()
-    lineData.line = ``
-    // @ts-ignore
-    lineData.indent = 0
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    // @ts-ignore
-    lineData.indent = []
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    // @ts-ignore
-    lineData.indent = {}
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    // @ts-ignore
-    lineData.indent = null
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    // @ts-ignore
-    lineData.indent = true
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    // @ts-ignore
-    lineData.indent = () => {}
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-    lineData.indent = `  `
-    expect(() => parseLine(lineData)).toThrowError(`'lineData.indent' must be a single-character string`)
-  })
-
-  it(`Handles a blank line at indent level 0`, () => {
-    const line = ``
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 0 })
-  })
-
-  it(`Handles a line with a single space at indent level 1`, () => {
-    const line = ` `
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 1 })
-  })
-
-  it(`Handles a line with two spaces`, () => {
-    const line = `  `
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 2 })
-  })
-
-  it(`Handles a normal line at indent level 0`, () => {
-    const line = `line 1`
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 0, offsetHead: 0, offsetTail: 4 })
-  })
-
-  it(`Handles a normal line at indent level 1`, () => {
-    const line = ` line 1`
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 1, offsetHead: 1, offsetTail: 5 })
-  })
-
-  it(`Handles a normal line at indent level 2`, () => {
-    const line = `  line 1`
-    const lineData = createLineData(line)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: ` `, level: 2, offsetHead: 2, offsetTail: 6 })
-  })
-
-  it(`Handles a normal line at indent level 1 indented with tabs`, () => {
-    const line = `\tline 1`
-    const lineData = createLineData(line, `\t`)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: `\t`, level: 1, offsetHead: 1, offsetTail: 5 })
-  })
-
-  it(`Handles a normal line at indent level 2 indented with tabs`, () => {
-    const line = `\t\tline 1`
-    const lineData = createLineData(line, `\t`)
-    parseLine(lineData)
-    expect(lineData).to.deep.equal({ line, indent: `\t`, level: 2, offsetHead: 2, offsetTail:  6})
-  })
-
-  it(`Nests a normal line under a preceding normal line`, () => {
-    const lines = [
-      'line 1',
-      ' line 2'
-    ]
-
-    const lineData = createLineData()
-    const results = lines.map(line => {
-      lineData.line = line
-      parseLine(lineData)
-      return {...lineData}
-    })
-
-    expect(results).to.deep.equal([
-      { line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
-      { line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
-    ])
-  })
-
-  it(`Nests multiple normal line under a preceding normal line`, () => {
-    const lines = [
-      'line 1',
-      ' line 2',
-      ' line 3',
-      ' line 4',
-    ]
-
-    const lineData = createLineData()
-    const results = lines.map(line => {
-      lineData.line = line
-      parseLine(lineData)
-      return {...lineData}
-    })
-
-    expect(results).to.deep.equal([
-      { line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
-      { line: lines[1], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
-      { line: lines[2], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 },
-      { line: lines[3], indent: ' ', level: 1, offsetHead: 1, offsetTail: 5 }
-    ])
-  })
-
-  it(`Does not nest an empty line under a preceding normal line`, () => {
-    const lines = [
-      'line 1',
-      ''
-    ]
-
-    const lineData = createLineData()
-    const results = lines.map(line => {
-      lineData.line = line
-      parseLine(lineData)
-      return {...lineData}
-    })
-
-    expect(results).to.deep.equal([
-      { line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
-      { line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
-    ])
-  })
-
-  it(`Does not nest multiple empty lines under a preceding normal line`, () => {
-    const lines = [
-      'line 1',
-      '',
-      '',
-      '',
-    ]
-
-    const lineData = createLineData()
-    const results = lines.map(line => {
-      lineData.line = line
-      parseLine(lineData)
-      return {...lineData}
-    })
-
-    expect(results).to.deep.equal([
-      { line: lines[0], indent: ' ', level: 0, offsetHead: 0, offsetTail: 4 },
-      { line: lines[1], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
-      { line: lines[2], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 },
-      { line: lines[3], indent: ' ', level: 0, offsetHead: 0, offsetTail: 0 }
-    ])
-  })
-
-  it(`Handle head and tail matching for lines with head and tail`, () => {
-    const line = `  head tail1 tail2 tail3`
-    const lineData = createLineData(line)
-    parseLine(lineData)
-
-    const head = line.slice(lineData.offsetHead, lineData.offsetTail)
-    const tail = line.slice(lineData.offsetTail + 1)
-    expect(head).to.equal(`head`)
-    expect(tail).to.equal(`tail1 tail2 tail3`)
-  })
-
-  it(`Handle head and tail matching for lines with head but no tail`, () => {
-    const line = `  head`
-    const lineData = createLineData(line)
-    parseLine(lineData)
-
-    const head = line.slice(lineData.offsetHead, lineData.offsetTail)
-    const tail = line.slice(lineData.offsetTail + 1)
-    expect(head).to.equal(`head`)
-    expect(tail).to.equal(``)
-  })
-
-  it(`Handle head and tail matching for lines with head and trailing space`, () => {
-    const line = `  head `
-    const lineData = createLineData(line)
-    parseLine(lineData)
-
-    const head = line.slice(lineData.offsetHead, lineData.offsetTail)
-    const tail = line.slice(lineData.offsetTail + 1)
-    expect(head).to.equal(`head`)
-    expect(tail).to.equal(``)
-  })
-})
--- a/packages/js/src/parser.ts
+++ b/packages/js/src/parser.ts
@@ -1,35 +1,54 @@
+// Holds the parsed information from each line.
 export type LineData = {
-  line: string;
+  // Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
  indent: string;
+  // How many indent characters are present in the current line before the first non-indent character.
  level: number;
+  // The number of characters before the start of the line's "head" section.
+  // (Normally the same as `level`)
  offsetHead: number;
+  // The number of characters before the start of the line's "tail" section.
  offsetTail: number;
 }

-export function createLineData(line: string = '', indent: string = ' '): LineData {
-  return { line, indent, level: 0, offsetHead: 0, offsetTail: 0 }
+/**
+ * Initialize a LineData instance with default values.
+ * @param {string} indent The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
+ * @returns {LineData} A LineData instance with the specified indent character and all other values initialized to 0.
+ */
+export function createLineData(indent: string = ' '): LineData {
+  return { indent, level: 0, offsetHead: 0, offsetTail: 0 }
 }

-export function parseLine(lineData: LineData): LineData {
+/**
+ * Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
+ * Note that this is a C-style function, lineData is treated as a reference and mutated in-place.
+ * @param {string} line A string containing a line to parse. Shouldn't end with a newline.
+ * @param {LineData} lineData A LineData object to store information about the current line in. **Mutated in-place!**
+ */
+export function parseLine(line: string, lineData: LineData) {
  if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
  if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
-  if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
+  if (typeof line !== 'string') throw new Error(`'line' must be a string`)

-  let level = 0
-
-  // Repeat previous level for blank lines.
-  if (!lineData.line.length) {
-    lineData.level = lineData.level
+  // Blank lines have no characters, the newline should be stripped off.
+  // Special case handling for these allows them to be parsed quickly.
+  if (!line.length) {
+    // Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
    lineData.offsetHead = 0
    lineData.offsetTail = 0
  } else {
-    while (lineData.line[level] === lineData.indent && level <= lineData.level + 1) ++level
+    // Count the number of indent characters in the current line.
+    let level = 0
+    while (line[level] === lineData.indent) ++level
    lineData.level = level
+
+    // Set offsetHead and offsetTail to level to start with.
+    // offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
    lineData.offsetHead = level
    lineData.offsetTail = level

-    while (lineData.line[lineData.offsetTail] && lineData.line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
+    // Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
+    while (line[lineData.offsetTail] && line[lineData.offsetTail] !== ' ') ++lineData.offsetTail
  }
-
-  return lineData
 }
--- a/packages/js/test/index.js
+++ b/packages/js/test/index.js
@@ -1,26 +1,28 @@
-import { createLineData, parseLine, useDocument } from '@terrace-lang/js'
+import { createLineData, parseLine } from '@terrace-lang/js'
 import { createStdinReader } from '@terrace-lang/js/readers/node-readline'

 const testName = process.argv[2]

 async function linedata_basic(indent) {
-  const lineData = createLineData('', indent)
+  const lineData = createLineData(indent)
  const next = createStdinReader()

-  while ((lineData.line = await next()) != null) {
-    parseLine(lineData)
-    const { level, indent, offsetHead, offsetTail, line } = lineData
+  let line = ''
+  while ((line = await next()) != null) {
+    parseLine(line, lineData)
+    const { level, indent, offsetHead, offsetTail } = lineData
    console.log(`| level ${level} | indent ${indent} | offsetHead ${offsetHead} | offsetTail ${offsetTail} | line ${line} |`)
  }
 }

 async function linedata_head_tail () {
-  const lineData = createLineData('')
+  const lineData = createLineData()
  const next = createStdinReader()

-  while ((lineData.line = await next()) != null) {
-    parseLine(lineData)
-    const { level, indent, offsetHead, offsetTail, line } = lineData
+  let line = ''
+  while ((line = await next()) != null) {
+    parseLine(line, lineData)
+    const { offsetHead, offsetTail } = lineData
    const head = line.slice(offsetHead, offsetTail)
    const tail = line.slice(offsetTail + 1)

--- a/packages/python/parser.py
+++ b/packages/python/parser.py
@@ -1,35 +1,63 @@
 from typing import TypedDict

+# Holds the parsed information from each line.
 class LineData(TypedDict):
-  line: str
+  # Which character is being used for indentation. Avoids having to specify it on each terrace_parse_line call.
  indent: str
+  # How many indent characters are present in the current line before the first non-indent character.
  level: int
+  # The number of characters before the start of the line's "head" section.
+  # (Normally the same as `level`)
  offsetHead: int
+  # The number of characters before the start of the line's "tail" section.
  offsetTail: int

-def createLineData(line: str = '', indent: str = ' ') -> LineData:
-  return { "line": line, "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }
+def createLineData(indent: str = ' ') -> LineData:
+  """
+  Initialize a LineData instance with default values.

-def parseLine(lineData: LineData) -> LineData:
-  # if ((typeof lineData !== 'object' || !lineData) || typeof lineData.level !== 'number') throw new Error(`'lineData' must be an object with string line and numeric level properties`)
-  # if (typeof lineData.indent !== 'string' || lineData.indent.length === 0 || lineData.indent.length > 1) throw new Error(`'lineData.indent' must be a single-character string`)
-  # if (typeof lineData.line !== 'string') throw new Error(`'lineData.line' must be a string`)
+  Parameters
+  ----------
+  indent : str
+    The character to use for indenting lines. ONLY ONE CHARACTER IS CURRENTLY PERMITTED.
+  Returns
+  -------
+  LineData
+    A LineData dict with the specified indent character and all other values initialized to 0.
+  """
+  return { "indent": indent, "level": 0, "offsetHead": 0, "offsetTail": 0 }

-  level = 0
+def parseLine(line: str, lineData: LineData):
+  """
+  Core Terrace parser function, sets level, offsetHead, and offsetTail in a LineData object based on the current line.
+  Note that this is a C-style function, lineData is treated as a reference and mutated in-place.

-  # Repeat previous level for blank lines.
-  if len(lineData['line']) == 0:
-    lineData['level'] = lineData['level']
+  Parameters
+  ----------
+  line : str
+    A string containing a line to parse. Shouldn't end with a newline.
+  lineData: LineData
+    A LineData dict to store information about the current line in. **Mutated in-place!**
+  """
+
+  # Blank lines have no characters, the newline should be stripped off.
+  # Special case handling for these allows them to be parsed quickly.
+  if len(line) == 0:
+    # Empty lines are treated as having the same level as the previous line, so lineData.line is not updated.
    lineData['offsetHead'] = 0
    lineData['offsetTail'] = 0
  else:
-    while level < len(lineData['line']) and lineData['line'][level] == lineData['indent'] and level <= lineData['level'] + 1:
+    # Count the number of indent characters in the current line.
+    level = 0
+    while level < len(line) and line[level] == lineData['indent']:
      level += 1
    lineData['level'] = level
+
+    # Set offsetHead and offsetTail to level to start with.
+    # offsetHead should always be equal to level, and offsetTail will always be equal to or greater than level.
    lineData['offsetHead'] = level
    lineData['offsetTail'] = level

-    while lineData['offsetTail'] < len(lineData['line']) and lineData['line'][lineData['offsetTail']] != ' ':
-      lineData['offsetTail'] += 1
-
-  return lineData
+    # Increment offsetTail until we encounter a space character (start of tail) or reach EOL (no tail present).
+    while lineData['offsetTail'] < len(line) and line[lineData['offsetTail']] != ' ':
+      lineData['offsetTail'] += 1
--- a/packages/python/test/index.py
+++ b/packages/python/test/index.py
@@ -14,33 +14,21 @@ def next():
  return line.rstrip('\n') if len(line) > 0 else None

 def linedata_basic (indent):
-  lineData = createLineData('', indent)
+  lineData = createLineData(indent)

-  while (l := next()) != None:
-    lineData['line'] = l
-    parseLine(lineData)
+  while (line := next()) != None:
+    parseLine(line, lineData)
    print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
-      level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
-    ))
-
-def linedata_tabs ():
-  lineData = createLineData('', '\t')
-
-  while (l := next()) != None:
-    lineData['line'] = l
-    parseLine(lineData)
-    print("| level {level} | indent {indent} | offsetHead {offsetHead} | offsetTail {offsetTail} | line {line} |".format(
-      level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = lineData['line']
+      level = lineData['level'], indent = lineData['indent'], offsetHead = lineData['offsetHead'], offsetTail = lineData['offsetTail'], line = line
    ))

 def linedata_head_tail ():
-  lineData = createLineData('')
+  lineData = createLineData()

-  while (l := next()) != None:
-    lineData['line'] = l
-    parseLine(lineData)
-    head = lineData['line'][lineData['offsetHead']:lineData['offsetTail']] if len(lineData['line']) > lineData['offsetHead'] else ''
-    tail = lineData['line'][lineData['offsetTail'] + 1:] if len(lineData['line']) > lineData['offsetTail'] + 1 else ''
+  while (line := next()) != None:
+    parseLine(line, lineData)
+    head = line[lineData['offsetHead']:lineData['offsetTail']] if len(line) > lineData['offsetHead'] else ''
+    tail = line[lineData['offsetTail'] + 1:] if len(line) > lineData['offsetTail'] + 1 else ''

    print("| head {head} | tail {tail} |".format(
      head = head, tail = tail
Author	SHA1	Message	Date
Joshua Bemenderfer	5c347a95a0	Implement basic document functions for C API, mostly equivalent to JS ones.	2023-02-19 17:04:36 -05:00
Joshua Bemenderfer	3f6c475756	Cleanup and document core APIs in C, JS, and Python.	2023-02-19 14:53:59 -05:00