Start cleaning up document and the example parser. I think this is the way to move forward, finally. Next up: DSL on top of functions.

This commit is contained in:
Joshua Bemenderfer 2023-01-30 18:54:13 -05:00
parent b02e742c89
commit 67e7811772
9 changed files with 221 additions and 302 deletions

View File

@ -1,77 +0,0 @@
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export async function parse(lines) {
const { tail, each, match, buildObject } = useDocument(createStringReader(lines))
const structure = {
name: null,
version: null,
license: null,
exports: null,
scripts: null,
devDependencies: null,
author: null
}
await each(async () => {
if (match('name')) structure.name = tail().trim()
if (match('version')) structure.version = tail().trim()
if (match('license')) structure.license = tail().trim()
// FIXME: Order of operations causes other parts to break if this doesn't run first?!
if (match('exports')) structure.exports = await buildObject([], async () => {
const section = { import: null, require: null }
await each(() => {
if (match('import')) section.import = tail().trim()
if (match('require')) section.require = tail().trim()
if (section.import && section.require) return true
})
return section
})
if (match('scripts')) structure.scripts = await buildObject()
if (match('devDependencies')) structure.devDependencies = await buildObject()
if (match('author')) structure.author = await buildObject(['name', 'email', '#text'])
return structure.name &&
structure.version &&
structure.license &&
structure.exports &&
structure.scripts &&
structure.devDependencies &&
structure.author
})
return structure
}
export async function toArrays(lines) {
const { next, level, line } = useDocument(createStringReader(lines))
const levelTracker = []
function createScope(level, line) {
levelTracker.length = level
const scope = levelTracker[level] = [line, []]
return scope
}
createScope(0, 'root')
// Simple parser that produces canonical array structure for blocks.
while (true) {
// If next() returns true we've ended the document.
if (await next()) break;
// Determine parent for this scope.
const parent = levelTracker[level()]
// If there's no parent, skip this line.
if (!parent) continue
// Create new scope
const scope = createScope(level() + 1, line())
// Add current scope to parent.
parent[1].push(scope)
}
return levelTracker[0]
}

View File

@ -1,40 +1,7 @@
import { parse, toArrays } from './core.js'
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
const linesArrays = [
`title Example`,
`options`,
` parameter1 30`,
` parameter2 Enim eu id anim minim reprehenderit nostrud eu amet deserunt ea ut do cupidatat ea.`,
`options`,
` parameter1 0`,
` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`,
` deep Enim fugiat do in est commodo culpa dolore.`,
`subsection`,
` position 1`,
` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`,
`subsection 2`,
` position 2`,
` `,
` Aute deserunt incididunt ad in sint adipisicing est officia velit pariatur ipsum deserunt quis nulla.`,
` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`,
`list`,
` - item 1`,
` - item 2`,
`collection`,
` section`,
` lorem ipsum 1`,
` section`,
` lorem ipsum 2`,
`collection2`,
` section`,
` position 3`,
` Laborum aute anim occaecat occaecat pariatur tempor proident magna sit magna non non.`,
` list`,
` 1`,
` 2`
]
const linesParse = [
const lines = [
`name @terrace/core`,
`version 0.0.1`,
`randomthing test`,
@ -75,35 +42,53 @@ const linesParse = [
` way of dealing with this problem.`,
]
const schema = {
"name": {count: 1},
"version": {count: 1},
"license": {count: 1},
"exports": {count: 1, children: {
"?": {count: -1, children: {
"import": {count: 1},
"require": {count: 1}
}}
}},
"scripts": {count: 1, children: {
"?": { count: -1 }
}},
"devDependencies": {count: 1, children: {
"?": { count: -1 }
}},
"author": { count: 1, children: {
"name": { count: 1 },
"email": { count: 1 },
"? literal": { count: -1 }
}}
}
async function main() {
const resultArrays = await toArrays(linesArrays)
// console.dir(resultArrays, { depth: null })
const { toArrays } = useDocument(createStringReader(lines))
const resultArrays = await toArrays()
console.dir(resultArrays, { depth: null })
const resultParse = await parse(linesParse)
console.dir(resultParse, { depth: null })
const { tail, each, match, buildObject } = useDocument(createStringReader(lines))
const structure = {
name: null,
version: null,
license: null,
exports: null,
scripts: null,
devDependencies: null,
author: null
}
await each(async () => {
if (match('name')) structure.name = tail().trim()
if (match('version')) structure.version = tail().trim()
if (match('license')) structure.license = tail().trim()
// FIXME: Order of operations causes other parts to break if this doesn't run first?!
if (match('exports')) structure.exports = await buildObject([], async () => {
const section = { import: null, require: null }
await each(() => {
if (match('import')) section.import = tail().trim()
if (match('require')) section.require = tail().trim()
if (section.import && section.require) return true
})
return section
})
if (match('scripts')) structure.scripts = await buildObject()
if (match('devDependencies')) structure.devDependencies = await buildObject()
if (match('author')) structure.author = await buildObject(['name', 'email', '#text'])
return structure.name &&
structure.version &&
structure.license &&
structure.exports &&
structure.scripts &&
structure.devDependencies &&
structure.author
})
console.dir(structure, { depth: null })
}
main()

View File

@ -1 +1 @@
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function c(a,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return c(a.clone(),u)},async next(){if(r.line=await a.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var l;return e===-1&&(e=r.level+1),await t.next()||r.level<e?n:(n.push(((l=r.line)==null?void 0:l.slice(e))||""),t.content(e,n))},match(e){return e===t.head()},async each(e){const n=t.line()!==void 0?t.level():-1;for(;!(await t.next()||t.level()<=n||await e()););},async buildObject(e=[],n){n||(n=()=>t.tail().trim());const i={};return await t.each(async()=>{if(!!t.head()){if(!e.length||e.includes(t.head())){i[t.head()]=await n();return}if(e&&!e.includes(t.head())&&e.includes("#text")){i["#text"]=[t.line(),...await t.content(t.level())];return}}}),i},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.level<n?!1:t.seek(e,n)}};return t}exports.useDocument=c;
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const h=require("./parser.cjs");function T(p,b=" "){let c=0;const t=h.createLineData("",b);async function s(){c===0&&(c=1);const e=await p.next();return e===null?(c=2,!0):(t.line=e,h.parseLine(t),!1)}const a=()=>t.level,u=()=>t.line.slice(t.offsetHead),o=()=>t.line.slice(t.offsetHead,t.offsetTail),f=()=>t.line.slice(t.offsetTail),v=e=>e===o();async function d(e){const n=c===0?-1:a();for(;!(await s()||a()<=n||await e()););}async function l(e=-1,n=[u()]){var i;return e===-1&&(e=t.level+1),await s()||t.level<e?n:(n.push(((i=t.line)==null?void 0:i.slice(e))||""),l(e,n))}async function w(e=[],n){const r={};return await d(async()=>{const i=o();if(!!i){if(!e.length||e.includes(i)){r[i]=n?await n():f().trim();return}if(e&&!e.includes(i)&&e.includes("#text")){r["#text"]=await l(a());return}}}),r}async function m(){const e=[["root",[]]];for(;!await s();){const n=a(),r=n+1,i=e[n];if(!i)continue;e.length=r;const x=e[r]=[u(),[]];i[1].push(x)}return e[0]}return{state:c,next:s,line:u,head:o,tail:f,level:a,match:v,each:d,blockAsText:l,buildObject:w,toArrays:m}}exports.useDocument=T;

View File

@ -1,68 +1,64 @@
import { parseLine as f, createLineData as s } from "./parser.js";
function c(a, u = " ") {
let n = s(null, u);
const t = {
ended: !1,
clone() {
return c(a.clone(), u);
},
async next() {
if (n.line = await a.next(), n.line === null)
return !0;
f(n);
},
current() {
return t;
},
line() {
var e;
return (e = n.line) == null ? void 0 : e.slice(n.offsetHead);
},
head() {
var e;
return (e = n.line) == null ? void 0 : e.slice(n.offsetHead, n.offsetTail);
},
tail() {
var e;
return (e = n.line) == null ? void 0 : e.slice(n.offsetTail);
},
level() {
return n.level;
},
async content(e = -1, r = []) {
var l;
return e === -1 && (e = n.level + 1), await t.next() || n.level < e ? r : (r.push(((l = n.line) == null ? void 0 : l.slice(e)) || ""), t.content(e, r));
},
match(e) {
return e === t.head();
},
async each(e) {
const r = t.line() !== void 0 ? t.level() : -1;
for (; !(await t.next() || t.level() <= r || await e()); )
;
},
async buildObject(e = [], r) {
r || (r = () => t.tail().trim());
const i = {};
return await t.each(async () => {
if (!!t.head()) {
if (!e.length || e.includes(t.head())) {
i[t.head()] = await r();
return;
}
if (e && !e.includes(t.head()) && e.includes("#text")) {
i["#text"] = [t.line(), ...await t.content(t.level())];
return;
}
import { parseLine as v, createLineData as T } from "./parser.js";
function y(h, p = " ") {
let r = 0;
const t = T("", p);
async function s() {
r === 0 && (r = 1);
const e = await h.next();
return e === null ? (r = 2, !0) : (t.line = e, v(t), !1);
}
const a = () => t.level, o = () => t.line.slice(t.offsetHead), u = () => t.line.slice(t.offsetHead, t.offsetTail), l = () => t.line.slice(t.offsetTail), w = (e) => e === u();
async function d(e) {
const n = r === 0 ? -1 : a();
for (; !(await s() || a() <= n || await e()); )
;
}
async function f(e = -1, n = [o()]) {
var i;
return e === -1 && (e = t.level + 1), await s() || t.level < e ? n : (n.push(((i = t.line) == null ? void 0 : i.slice(e)) || ""), f(e, n));
}
async function x(e = [], n) {
const c = {};
return await d(async () => {
const i = u();
if (!!i) {
if (!e.length || e.includes(i)) {
c[i] = n ? await n() : l().trim();
return;
}
}), i;
},
async seek(e, r = -1) {
return r === -1 && (r = n.level), await t.next() ? !1 : t.head() === e ? t : n.level < r ? !1 : t.seek(e, r);
if (e && !e.includes(i) && e.includes("#text")) {
c["#text"] = await f(a());
return;
}
}
}), c;
}
async function b() {
const e = [["root", []]];
for (; !await s(); ) {
const n = a(), c = n + 1, i = e[n];
if (!i)
continue;
e.length = c;
const m = e[c] = [o(), []];
i[1].push(m);
}
return e[0];
}
return {
state: r,
next: s,
line: o,
head: u,
tail: l,
level: a,
match: w,
each: d,
blockAsText: f,
buildObject: x,
toArrays: b
};
return t;
}
export {
c as useDocument
y as useDocument
};

View File

@ -1 +1 @@
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});function f(e=null,r=" "){return{line:e,indent:r,type:0,level:0,offsetHead:0,offsetTail:0}}function o(e){if(typeof e!="object"||!e||typeof e.type!="number"||typeof e.level!="number")throw new Error("'lineData' must be an object with 'line' string, and 'type' and 'level' integer properties");if(typeof e.indent!="string"||e.indent.length===0||e.indent.length>1)throw new Error("'lineData.indent' must be a single-character string");if(typeof e.line!="string")throw new Error("'lineData.line' must be a string");let r=0,t=0;if(!e.line.length)e.type===1&&(t+=1),e.type===0&&(t=e.level),e.type=r,e.level=t,e.offsetHead=0,e.offsetTail=0;else{for(r=1;e.line[t]===e.indent&&t<=e.level+1;)++t;for(e.type=r,e.level=t,e.offsetHead=t,e.offsetTail=t;e.line[e.offsetTail]&&e.line[e.offsetTail]!==" ";)++e.offsetTail}return e}exports.createLineData=f;exports.parseLine=o;
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});function f(e="",r=" "){return{line:e,indent:r,type:0,level:0,offsetHead:0,offsetTail:0}}function o(e){if(typeof e!="object"||!e||typeof e.type!="number"||typeof e.level!="number")throw new Error("'lineData' must be an object with 'line' string, and 'type' and 'level' integer properties");if(typeof e.indent!="string"||e.indent.length===0||e.indent.length>1)throw new Error("'lineData.indent' must be a single-character string");if(typeof e.line!="string")throw new Error("'lineData.line' must be a string");let r=0,t=0;if(!e.line.length)e.type===1&&(t+=1),e.type===0&&(t=e.level),e.type=r,e.level=t,e.offsetHead=0,e.offsetTail=0;else{for(r=1;e.line[t]===e.indent&&t<=e.level+1;)++t;for(e.type=r,e.level=t,e.offsetHead=t,e.offsetTail=t;e.line[e.offsetTail]&&e.line[e.offsetTail]!==" ";)++e.offsetTail}return e}exports.createLineData=f;exports.parseLine=o;

View File

@ -1,4 +1,4 @@
function t(e = null, r = " ") {
function t(e = "", r = " ") {
return { line: e, indent: r, type: 0, level: 0, offsetHead: 0, offsetTail: 0 };
}
function o(e) {

View File

@ -27,6 +27,7 @@
},
"scripts": {
"test": "vitest ./src",
"dev": "vite build --watch",
"build": "vite build"
},
"devDependencies": {

View File

@ -1,117 +1,131 @@
import type { Reader } from './readers/reader'
import { createLineData, parseLine } from './parser'
enum STATE {
READY = 0,
STARTED = 1,
ENDED = 2
}
type Document = {
ended: boolean,
clone: () => Document,
next: () => Promise<Document>
current: () => Document
state: STATE,
next: () => Promise<boolean>
level: () => number,
line: () => string,
head: () => string,
tail: () => string,
content: (contentLevel: number, lines: string[]) => Promise<string>,
match: (matchHead: string) => boolean,
each: (handler: Function) => void,
buildObject: (allowList: Array<string>, handler: Function) => object,
seek: (matchHead: string, contentLevel: number) => Promise<Document|false>
blockAsText: (startLevel: number, lines?: string[]) => Promise<Array<string>>,
buildObject: (allowKeys: Array<string>, processValue?: () => any) => any,
toArrays(): Promise<LineArray>
}
type LineArray = [string, Array<LineArray>];
export function useDocument (reader: Reader, indent: string = ' '): Document {
let lineData = createLineData(null, indent)
let state = STATE.READY
const lineData = createLineData('', indent)
const document = {
ended: false,
async function next() {
if (state === STATE.READY) state = STATE.STARTED
clone() {
return useDocument(reader.clone(), indent)
},
const line = await reader.next()
if (line === null) {
state = STATE.ENDED
return true
}
async next() {
lineData.line = await reader.next()
if (lineData.line === null) return true
else parseLine(lineData)
},
lineData.line = line
parseLine(lineData)
return false
}
current() {
return document
},
const level = () => lineData.level
const line = () => lineData.line.slice(lineData.offsetHead)
const head = () => lineData.line.slice(lineData.offsetHead, lineData.offsetTail)
const tail = () => lineData.line.slice(lineData.offsetTail)
const match = (matchHead: string): boolean => matchHead === head()
line() {
return lineData.line?.slice(lineData.offsetHead)
},
async function each(handler: Function) {
// Set startLevel to -1 if we haven't started parsing the document yet.
// Otherwise we'll break to early, as the default value for doc.level() is 0.
const startLevel = state === STATE.READY ? -1 : level()
head () {
return lineData.line?.slice(lineData.offsetHead, lineData.offsetTail)
},
tail () {
return lineData.line?.slice(lineData.offsetTail)
},
level () {
return lineData.level
},
async content (contentLevel = -1, lines: string[] = []): Promise<Array<string>> {
if (contentLevel === -1) contentLevel = lineData.level + 1
const ended = await document.next()
if (ended) return lines
if (lineData.level < contentLevel) return lines
lines.push(lineData.line?.slice(contentLevel) || '')
return document.content(contentLevel, lines)
},
match(matchHead: string): boolean {
return matchHead === document.head()
},
async each(handler: Function) {
// Set startLevel to -1 if we haven't started parsing the document yet.
// Otherwise we'll break to early, as the default value for doc.level() is 0.
const startLevel = document.line() !== undefined ? document.level() : -1
while(true) {
if (await document.next()) break
if (document.level() <= startLevel) break
if (await handler()) break
}
},
async buildObject(allowList = [], valHandler?: () => any) {
if (!valHandler) valHandler = () => document.tail().trim()
const obj = {}
await document.each(async () => {
if (!document.head()) return
if (!allowList.length || allowList.includes(document.head())) {
obj[document.head()] = await valHandler()
return
}
// Parse unspecified text into an array of lines and save to #text key.
if (allowList && !allowList.includes(document.head()) && allowList.includes('#text')) {
obj['#text'] = [document.line(), ...await document.content(document.level())]
return
}
})
return obj
},
async seek (matchHead: string, contentLevel = -1): Promise<Document|false> {
if (contentLevel === -1) contentLevel = lineData.level
const ended = await document.next()
if (ended) return false
if (document.head() === matchHead) return document
if (lineData.level < contentLevel) return false
return document.seek(matchHead, contentLevel)
while(true) {
if (await next()) break
if (level() <= startLevel) break
if (await handler()) break
}
}
return document
async function blockAsText (startLevel: number = -1, blockLines: string[] = [line()]): Promise<Array<string>> {
if (startLevel === -1) startLevel = lineData.level + 1
const ended = await next()
if (ended) return blockLines
if (lineData.level < startLevel) return blockLines
blockLines.push(lineData.line?.slice(startLevel) || '')
return blockAsText(startLevel, blockLines)
}
async function buildObject(allowKeys: Array<string> = [], processValue?: () => any) {
const obj: any = {}
await each(async () => {
const currHead = head()
if (!currHead) return
// Set the object key matching the current head if it is allowed, or if no allow list is specified.
if (!allowKeys.length || allowKeys.includes(currHead)) {
// Default to using {head: tail} as the key-value pair if no value handler is specified.
obj[currHead] = processValue ? await processValue() : tail().trim()
return
}
// Parse unspecified text into an array of lines and save to #text key.
// TODO: Rework this. I don't like it at all.
if (allowKeys && !allowKeys.includes(currHead) && allowKeys.includes('#text')) {
obj['#text'] = await blockAsText(level())
return
}
})
return obj
}
async function toArrays(): Promise<LineArray> {
const levelTracker: Array<LineArray> = [['root', []]]
// Simple parser that produces canonical array structure for blocks.
while (true) {
// If next() returns true we've ended the
if (await next()) break;
const parentLevel = level()
const scopeLevel = parentLevel + 1
// Determine parent for this scope.
const parent = levelTracker[parentLevel]
// If there's no parent, skip this line.
if (!parent) continue
levelTracker.length = scopeLevel
const scope = levelTracker[scopeLevel] = [line(), []]
// Add current scope to parent.
parent[1].push(scope)
}
return levelTracker[0]
}
return {
state,
next,
line,
head,
tail,
level,
match,
each,
blockAsText,
buildObject,
toArrays,
}
}

View File

@ -1,5 +1,5 @@
export type LineData = {
line: string|null;
line: string;
indent: string;
type: number;
level: number;
@ -7,7 +7,7 @@ export type LineData = {
offsetTail: number;
}
export function createLineData(line: string|null = null, indent: string = ' '): LineData {
export function createLineData(line: string = '', indent: string = ' '): LineData {
return { line, indent, type: 0, level: 0, offsetHead: 0, offsetTail: 0 }
}