From f54e29acbf08e377fd0fbb576830df5b1c0dd417 Mon Sep 17 00:00:00 2001 From: Joshua Bemenderfer Date: Sat, 4 Feb 2023 09:47:02 -0500 Subject: [PATCH] Remove helper functions in favor of using a smarter next() function in while loops. --- docs/experiments/package.json | 5 +- .../experiments/parsers/{v4 => v5}/example.js | 63 ++++-- packages/js/core/dist/document.cjs | 2 +- packages/js/core/dist/document.js | 103 ++------- packages/js/core/src/document.ts | 210 +++--------------- 5 files changed, 98 insertions(+), 285 deletions(-) rename docs/experiments/parsers/{v4 => v5}/example.js (51%) diff --git a/docs/experiments/package.json b/docs/experiments/package.json index 01f7326..58b0084 100644 --- a/docs/experiments/package.json +++ b/docs/experiments/package.json @@ -1,6 +1,9 @@ { "type": "module", "dependencies": { - "@terrace/core": "0.0.1" + "@terrace/core": "workspace:*" + }, + "scripts": { + "example": "node ./parsers/v5/example.js" } } diff --git a/docs/experiments/parsers/v4/example.js b/docs/experiments/parsers/v5/example.js similarity index 51% rename from docs/experiments/parsers/v4/example.js rename to docs/experiments/parsers/v5/example.js index 920acaa..97e111e 100644 --- a/docs/experiments/parsers/v4/example.js +++ b/docs/experiments/parsers/v5/example.js @@ -70,24 +70,55 @@ const lines = [ async function main() { - const { toLineArray } = useDocument(createStringReader(lines)) - console.dir(await toLineArray(), { depth: null }) + const { head, tail, next, match, level, line } = useDocument(createStringReader(lines)) - const { head, tail, each, match, toArray, toObject } = useDocument(createStringReader(lines)) + const structure = {} - const structure = await toObject({ - 'name': true, - 'version': () => tail().trim(), - 'license': () => tail().trim(), - 'exports': () => toObject({ - '#any': () => toObject({ import: true, require: true }) - }), - 'scripts': () => toObject({ '#any': true }), - 'devDependencies': () => toObject(), - 'authors': () => toArray({ - 'author': () => toObject({ name: true, email: true, '#text': true }) - }), - }) + async function kvObject(handle) { + const obj = {} + const l = level() + while (await next(l)) { + if (!head()) continue + obj[head()] = handle ? await handle(level()) : tail().trim() + } + return obj + } + + while (await next()) { + if (match('name')) structure.name = tail().trim() + if (match('version')) structure.version = tail().trim() + if (match('exports')) structure.exports = await kvObject(async l => { + const obj = {} + while (await next(l)) { + if (match('import')) obj.import = tail().trim() + if (match('require')) obj.require = tail().trim() + } + return obj + }) + + if (match('scripts')) structure.scripts = await kvObject() + if (match('devDependencies')) structure.devDependencies = await kvObject() + + if (match('author')) { + if (!structure.authors) structure.authors = [] + const author = {} + structure.authors.push(author) + + const l = level() + while (await next(l)) { + if (!head()) continue + if (match('name')) author.name = tail().trim() + else if (match('email')) author.email = tail().trim() + else { + if (!author['#text']) author['#text'] = [line()] + // Loop through all remaining lines to avoid re-matching name or email above. + while(await next(l)) { + author['#text'].push(line()) + } + } + } + } + } console.dir(structure, { depth: null }) } diff --git a/packages/js/core/dist/document.cjs b/packages/js/core/dist/document.cjs index 7d96f23..aa9dd5c 100644 --- a/packages/js/core/dist/document.cjs +++ b/packages/js/core/dist/document.cjs @@ -1 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const m=require("./parser.cjs");function g(w,b=" "){let c=0;const i=m.createLineData("",b);async function f(){switch(c){case 0:c=1;break;case 2:return c=1,!1}const e=await w.next();return e===null?(c=3,!0):(i.line=e,m.parseLine(i),!1)}const y=()=>c=2,o=()=>i.level,d=()=>i.line.slice(i.offsetHead),u=()=>i.line.slice(i.offsetHead,i.offsetTail),s=()=>i.line.slice(i.offsetTail),j=e=>e===u();async function h(e){const a=c===0?-1:o();for(;;){if(await f())return;if(o()<=a)return y();if(await e())return}}async function p(e=-1,a=[d()]){var n;return e===-1&&(e=o()+1),await f()?a:o(){e[r]===!0&&(t[r]={type:a?"collection":"normal",handle:()=>s().trim()}),typeof e[r]=="function"&&(t[r]={type:a?"collection":"normal",handle:e[r]}),typeof e[r]=="object"&&(t[r]=e[r])}):t={"#any":{type:a?"collection":"normal",handle:()=>s().trim()}},await h(async()=>{const r=u();if(!r)return;const l=t[r]||t["#any"];!l||(l.type==="normal"?n.push(await l.handle()):l.type==="collection"&&n.push({[r]:await l.handle()}))}),n}async function O(e={}){const a={};let n={};return Object.keys(e).length?Object.keys(e).forEach(t=>{t==="#tail"?n[t]={type:"tail",handle:()=>{}}:t==="#text"?n[t]={type:"text",handle:()=>{}}:e[t]===!0?n[t]={type:"normal",handle:()=>s().trim()}:typeof e[t]=="function"?n[t]={type:"normal",handle:e[t]}:typeof e[t]=="object"&&(n[t]=e[t])}):n={"#any":{type:"normal",handle:()=>s().trim()}},n["#tail"]&&(a["#tail"]=s().trim()),await h(async()=>{const t=u();if(!t)return;const r=n[t]||n["#any"]||n["#text"];if(!!r&&(r.type==="normal"?a[t]=await r.handle():r.type==="collection"?(a[t]||(a[t]=[]),a[t].push(await r.handle())):r.type==="text"&&(a["#text"]=await p(o())),n&&Object.keys(n).every(l=>["collection"].includes(n[l].type)?!1:a[l]!==void 0)))return!0}),a}async function v(){const e=[["root",[]]];for(;!await f();){const a=o(),n=a+1,t=e[a];if(!t)continue;e.length=n;const r=e[n]=[d(),[]];t[1].push(r)}return e[0]}return{next:f,line:d,head:u,tail:s,level:o,match:j,each:h,blockAsText:p,toObject:O,toArray:x,toLineArray:v}}exports.useDocument=g; +"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const i=require("./parser.cjs");function f(r,c=" "){const e=i.createLineData("",c);let t=!1;async function o(n=-1){if(t)t=!1;else{const a=await r.next();if(a===null)return!1;e.line=a,i.parseLine(e)}return l()<=n?(t=!0,!1):!0}const l=()=>e.level,u=()=>e.line.slice(e.offsetHead),s=()=>e.line.slice(e.offsetHead,e.offsetTail);return{next:o,level:l,line:u,head:s,tail:()=>e.line.slice(e.offsetTail),match:n=>n===s()}}exports.useDocument=f; diff --git a/packages/js/core/dist/document.js b/packages/js/core/dist/document.js index e954a64..23e1272 100644 --- a/packages/js/core/dist/document.js +++ b/packages/js/core/dist/document.js @@ -1,89 +1,28 @@ -import { parseLine as v, createLineData as H } from "./parser.js"; -function g(y, w = " ") { - let c = 0; - const i = H("", w); - async function s() { - switch (c) { - case 0: - c = 1; - break; - case 2: - return c = 1, !1; +import { parseLine as r, createLineData as u } from "./parser.js"; +function h(s, f = " ") { + const e = u("", f); + let t = !1; + async function c(n = -1) { + if (t) + t = !1; + else { + const i = await s.next(); + if (i === null) + return !1; + e.line = i, r(e); } - const e = await y.next(); - return e === null ? (c = 3, !0) : (i.line = e, v(i), !1); - } - const p = () => c = 2, o = () => i.level, h = () => i.line.slice(i.offsetHead), u = () => i.line.slice(i.offsetHead, i.offsetTail), f = () => i.line.slice(i.offsetTail), b = (e) => e === u(); - async function d(e) { - const a = c === 0 ? -1 : o(); - for (; ; ) { - if (await s()) - return; - if (o() <= a) - return p(); - if (await e()) - return; - } - } - async function m(e = -1, a = [h()]) { - var n; - return e === -1 && (e = o() + 1), await s() ? a : o() < e ? (p(), a) : (a.push(((n = i.line) == null ? void 0 : n.slice(e)) || ""), m(e, a)); - } - async function j(e = {}, a = !1) { - const n = []; - let t = {}; - return Object.keys(e).length ? Object.keys(e).forEach((r) => { - e[r] === !0 && (t[r] = { type: a ? "collection" : "normal", handle: () => f().trim() }), typeof e[r] == "function" && (t[r] = { type: a ? "collection" : "normal", handle: e[r] }), typeof e[r] == "object" && (t[r] = e[r]); - }) : t = { "#any": { type: a ? "collection" : "normal", handle: () => f().trim() } }, await d(async () => { - const r = u(); - if (!r) - return; - const l = t[r] || t["#any"]; - !l || (l.type === "normal" ? n.push(await l.handle()) : l.type === "collection" && n.push({ [r]: await l.handle() })); - }), n; - } - async function x(e = {}) { - const a = {}; - let n = {}; - return Object.keys(e).length ? Object.keys(e).forEach((t) => { - t === "#tail" ? n[t] = { type: "tail", handle: () => { - } } : t === "#text" ? n[t] = { type: "text", handle: () => { - } } : e[t] === !0 ? n[t] = { type: "normal", handle: () => f().trim() } : typeof e[t] == "function" ? n[t] = { type: "normal", handle: e[t] } : typeof e[t] == "object" && (n[t] = e[t]); - }) : n = { "#any": { type: "normal", handle: () => f().trim() } }, n["#tail"] && (a["#tail"] = f().trim()), await d(async () => { - const t = u(); - if (!t) - return; - const r = n[t] || n["#any"] || n["#text"]; - if (!!r && (r.type === "normal" ? a[t] = await r.handle() : r.type === "collection" ? (a[t] || (a[t] = []), a[t].push(await r.handle())) : r.type === "text" && (a["#text"] = await m(o())), n && Object.keys(n).every((l) => ["collection"].includes(n[l].type) ? !1 : a[l] !== void 0))) - return !0; - }), a; - } - async function O() { - const e = [["root", []]]; - for (; !await s(); ) { - const a = o(), n = a + 1, t = e[a]; - if (!t) - continue; - e.length = n; - const r = e[n] = [h(), []]; - t[1].push(r); - } - return e[0]; + return l() <= n ? (t = !0, !1) : !0; } + const l = () => e.level, o = () => e.line.slice(e.offsetHead), a = () => e.line.slice(e.offsetHead, e.offsetTail); return { - next: s, - line: h, - head: u, - tail: f, - level: o, - match: b, - each: d, - blockAsText: m, - toObject: x, - toArray: j, - toLineArray: O + next: c, + level: l, + line: o, + head: a, + tail: () => e.line.slice(e.offsetTail), + match: (n) => n === a() }; } export { - g as useDocument + h as useDocument }; diff --git a/packages/js/core/src/document.ts b/packages/js/core/src/document.ts index 64fab7c..580c915 100644 --- a/packages/js/core/src/document.ts +++ b/packages/js/core/src/document.ts @@ -1,216 +1,56 @@ import type { Reader } from './readers/reader' import { createLineData, parseLine } from './parser' -enum STATE { - READY = 0, - STARTED = 1, - PAUSED = 2, - ENDED = 3 -} - -type Document = { - next: () => Promise +export type Document = { + next: (startLevel?: number) => Promise level: () => number, line: () => string, head: () => string, tail: () => string, - match: (matchHead: string) => boolean, - each: (handler: Function) => void, - blockAsText: (startLevel: number, lines?: string[]) => Promise>, - toArray: (inputMatchers: { [key: string]: Function|boolean|{ type: string, handle: Function } }, collection: boolean) => Promise<[{ [key: string]: any }?]>, - toObject: (matchers?: { [key: string]: Function|boolean }) => { [key: string]: any }, - toLineArray(): Promise + match: (matchHead: string) => boolean } -type LineArray = [string, Array] - export function useDocument (reader: Reader, indent: string = ' '): Document { - let state = STATE.READY const lineData = createLineData('', indent) - async function next() { - switch (state) { - // The initial state change allows us to do some special-case handling for the initial state of lineData. TODO: Should lineData have a special inital state? - case STATE.READY: - state = STATE.STARTED - break - // If we are currently in the "paused" state, repeat the same line instead of reading the next one. - case STATE.PAUSED: - state = STATE.STARTED - return false + let repeat = false + async function next(startLevel: number = -1): Promise { + // Repeat the current line instead of parsing a new one if the previous call to next() + // determined the current line to be out of its scope. + if (repeat) repeat = false + // Otherwise parse the line normally. + else { + const line = await reader.next() + // If there are no more lines, bail out. + if (line === null) return false + + lineData.line = line + parseLine(lineData) } - const line = await reader.next() - if (line === null) { - state = STATE.ENDED - return true + // If we shouldn't be handling this line, make the next call to next() repeat the current line. + // Allows a child loop to look forward, determine that the next line will be outside its purview, + // and return control to the calling loop transparently without additional logic. + if (level() <= startLevel) { + repeat = true + return false } - lineData.line = line - parseLine(lineData) - return false + return true } - // If we pause, the next call to next() will repeat the current line. - // Allows a child loop to look forward, determine that the next line will be outside its purview, - // and return control to the calling loop transparently without additional logic. - const pause = () => state = STATE.PAUSED const level = () => lineData.level const line = () => lineData.line.slice(lineData.offsetHead) const head = () => lineData.line.slice(lineData.offsetHead, lineData.offsetTail) const tail = () => lineData.line.slice(lineData.offsetTail) const match = (matchHead: string): boolean => matchHead === head() - async function each(handler: Function) { - // Set startLevel to -1 if we haven't started parsing the document yet. - // Otherwise we'll break to early, as the default value for doc.level() is 0. - const startLevel = state === STATE.READY ? -1 : level() - - while(true) { - if (await next()) return - // If we've reached the next block outside the level of this one, "pause", so that the next time "next" is called, we repeat the same line. - if (level() <= startLevel) return pause() - // If the handler returns true, exit. - if (await handler()) return - } - } - - async function blockAsText (startLevel: number = -1, blockLines: string[] = [line()]): Promise> { - if (startLevel === -1) startLevel = level() + 1 - - if (await next()) return blockLines - if (level() < startLevel) { pause(); return blockLines } - - blockLines.push(lineData.line?.slice(startLevel) || '') - return blockAsText(startLevel, blockLines) - } - - // Currently a modified copy of toObject. Has lots of room for simplification. - async function toArray (inputMatchers: { [key: string]: Function|boolean|{ type: string, handle: Function } } = {}, collection: boolean = false): Promise<[{ [key: string]: any }?]> { - const arr: [{ [key: string]: any }?] = [] - - let matchers: { [key: string]: {type: string, handle: Function } } = {} - - // Normalize the matchers to an object-based format despite allowing flexible input types for convenience. - // TODO: Decide whether to enforce verbose input once a DSL has been created. - if (!Object.keys(inputMatchers).length) { - // Default matcher - matchers = { '#any': { type: collection ? 'collection' : 'normal', handle: () => tail().trim() } } - } else { - Object.keys(inputMatchers).forEach(key => { - // If a matcher is specified as `true`, treat as a key-value pair where { [head]: tail } - if(inputMatchers[key] === true) matchers[key] = { type: collection ? 'collection' : 'normal', handle: () => tail().trim() } - // If a matcher is specified as a function, treat as a key-value pair where { [head]: handle() } - if (typeof inputMatchers[key] === 'function') matchers[key] = { type: collection ? 'collection' : 'normal', handle: inputMatchers[key] as Function } - // If a matcher is specified as an object, allow customization of the type and handle for various cases. - if (typeof inputMatchers[key] === 'object') matchers[key] = inputMatchers[key] as { type: string, handle: Function } - }) - } - - await each(async () => { - const currHead = head() - if (!currHead) return - - const currMatcher = matchers[currHead] || matchers['#any'] - if (!currMatcher) return - - // Normal - Outputs values directly into the array, removing their keys. - if (currMatcher.type === 'normal') arr.push(await currMatcher.handle()) - // Collection - Outputs values as { head: value } objects into the array, preserving their keys. - else if (currMatcher.type === 'collection') arr.push({ [currHead]: await currMatcher.handle() }) - }) - return arr - } - - async function toObject (inputMatchers: { [key: string]: Function|boolean|{ type: string, handle: Function } } = {}): Promise<{ [key: string]: any }> { - const obj: { [key: string]: any } = {} - - let matchers: { [key: string]: {type: string, handle: Function } } = {} - - // Normalize the matchers to an object-based format despite allowing flexible input types for convenience. - // TODO: Decide whether to enforce verbose input once a DSL has been created. - if (!Object.keys(inputMatchers).length) { - // Default matcher - matchers = { '#any': { type: 'normal', handle: () => tail().trim() } } - } else { - Object.keys(inputMatchers).forEach(key => { - if (key === '#tail') matchers[key] = { type: 'tail', handle: () => {} } - else if (key === '#text') matchers[key] = { type: 'text', handle: () => {} } - // If a matcher is specified as `true`, treat as a key-value pair where { [head]: tail } - else if (inputMatchers[key] === true) matchers[key] = { type: 'normal', handle: () => tail().trim() } - // If a matcher is specified as a function, treat as a key-value pair where { [head]: handle() } - else if (typeof inputMatchers[key] === 'function') matchers[key] = { type: 'normal', handle: inputMatchers[key] as Function } - // If a matcher is specified as an object, allow customization of the type and handle for various cases. - else if (typeof inputMatchers[key] === 'object') matchers[key] = inputMatchers[key] as { type: string, handle: Function } - }) - } - - // TODO: Rework this so it fits bettwer with the model already established here. - if (matchers['#tail']) obj['#tail'] = tail().trim() - - await each(async () => { - const currHead = head() - if (!currHead) return - - const currMatcher = matchers[currHead] || matchers['#any'] || matchers['#text'] - if (!currMatcher) return - - if (currMatcher.type === 'normal') obj[currHead] = await currMatcher.handle() - // Allows matching the same key more than once. - else if (currMatcher.type === 'collection') { - if (!obj[currHead]) obj[currHead] = [] - obj[currHead].push(await currMatcher.handle()) - } - // If matchers[currHead] or matchers[#any] is a function, set object key to its output. - // If we get to this point and matchers[#text] is set, parse all remaining block contents as text. - // TODO: I still don't like this. - else if (currMatcher.type === 'text') obj['#text'] = await blockAsText(level()) - - // Bail early as soon as we know all keys have been matched. - if (matchers && Object.keys(matchers).every(key => { - // If we have any collection keys, we have to continue searching all the way to the end of the current block - // as there may be more than one entry. - if (['collection'].includes(matchers[key].type)) return false - return obj[key] !== undefined - })) return true - }) - return obj - } - - async function toLineArray (): Promise { - const levelTracker: Array = [['root', []]] - - // Simple parser that produces canonical array structure for blocks. - while (true) { - // If next() returns true we've ended the - if (await next()) break - const parentLevel = level() - const scopeLevel = parentLevel + 1 - // Determine parent for this scope. - const parent = levelTracker[parentLevel] - // If there's no parent, skip this line. - if (!parent) continue - - levelTracker.length = scopeLevel - const scope = levelTracker[scopeLevel] = [line(), []] - // Add current scope to parent. - parent[1].push(scope) - } - - return levelTracker[0] - } - return { next, + level, line, head, tail, - level, - match, - each, - blockAsText, - toObject, - toArray, - toLineArray, + match } }