diff --git a/docs/experiments/parsers/v4/example.js b/docs/experiments/parsers/v4/example.js index 5e5b5ed..6e448e4 100644 --- a/docs/experiments/parsers/v4/example.js +++ b/docs/experiments/parsers/v4/example.js @@ -40,52 +40,43 @@ const lines = [ ` Further comments below. As I will now demonstrate, there is no simple`, ` even if embedded`, ` way of dealing with this problem.`, + `` ] +// Schema +// name tail +// version tail +// license tail +// exports object +// #any object +// import tail +// require tail +// scripts object +// #any tail +// devDependencies +// #any tail +// author object +// name tail +// email tail +// #text + + async function main() { - const { toArrays } = useDocument(createStringReader(lines)) - const resultArrays = await toArrays() - console.dir(resultArrays, { depth: null }) + const { toLineArray } = useDocument(createStringReader(lines)) + console.dir(await toLineArray(), { depth: null }) - const { tail, each, match, buildObject } = useDocument(createStringReader(lines)) + const { head, tail, each, match, toObject } = useDocument(createStringReader(lines)) - const structure = { - name: null, - version: null, - license: null, - exports: null, - scripts: null, - devDependencies: null, - author: null - } - - await each(async () => { - if (match('name')) structure.name = tail().trim() - if (match('version')) structure.version = tail().trim() - if (match('license')) structure.license = tail().trim() - // FIXME: Order of operations causes other parts to break if this doesn't run first?! - if (match('exports')) structure.exports = await buildObject([], async () => { - const section = { import: null, require: null } - - await each(() => { - if (match('import')) section.import = tail().trim() - if (match('require')) section.require = tail().trim() - if (section.import && section.require) return true - }) - - return section - }) - if (match('scripts')) structure.scripts = await buildObject() - if (match('devDependencies')) structure.devDependencies = await buildObject() - if (match('author')) structure.author = await buildObject(['name', 'email', '#text']) - - return structure.name && - structure.version && - structure.license && - structure.exports && - structure.scripts && - structure.devDependencies && - structure.author + const structure = await toObject({ + 'name': true, + 'version': () => tail().trim(), + 'license': () => tail().trim(), + 'exports': () => toObject({ + '#any': () => toObject({ import: true, require: true }) + }), + 'scripts': () => toObject({ '#any': true }), + 'devDependencies': () => toObject(), + 'author': () => toObject({ name: true, email: true, '#text': true }) }) console.dir(structure, { depth: null }) diff --git a/packages/js/core/dist/document.cjs b/packages/js/core/dist/document.cjs index bf6ea3e..7a41f4c 100644 --- a/packages/js/core/dist/document.cjs +++ b/packages/js/core/dist/document.cjs @@ -1 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const h=require("./parser.cjs");function T(p,b=" "){let c=0;const t=h.createLineData("",b);async function s(){c===0&&(c=1);const e=await p.next();return e===null?(c=2,!0):(t.line=e,h.parseLine(t),!1)}const a=()=>t.level,u=()=>t.line.slice(t.offsetHead),o=()=>t.line.slice(t.offsetHead,t.offsetTail),f=()=>t.line.slice(t.offsetTail),v=e=>e===o();async function d(e){const n=c===0?-1:a();for(;!(await s()||a()<=n||await e()););}async function l(e=-1,n=[u()]){var i;return e===-1&&(e=t.level+1),await s()||t.level{const i=o();if(!!i){if(!e.length||e.includes(i)){r[i]=n?await n():f().trim();return}if(e&&!e.includes(i)&&e.includes("#text")){r["#text"]=await l(a());return}}}),r}async function m(){const e=[["root",[]]];for(;!await s();){const n=a(),r=n+1,i=e[n];if(!i)continue;e.length=r;const x=e[r]=[u(),[]];i[1].push(x)}return e[0]}return{state:c,next:s,line:u,head:o,tail:f,level:a,match:v,each:d,blockAsText:l,buildObject:w,toArrays:m}}exports.useDocument=T; +"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const d=require("./parser.cjs");function D(v,b=" "){let a=0;const n=d.createLineData("",b);async function u(){switch(a){case 0:a=1;break;case 2:return a=1,!1}const e=await v.next();return e===null?(a=3,!0):(n.line=e,d.parseLine(n),!1)}const y=()=>a=2,r=()=>n.level,s=()=>n.line.slice(n.offsetHead),c=()=>n.line.slice(n.offsetHead,n.offsetTail),l=()=>n.line.slice(n.offsetTail),j=e=>e===c();async function w(e){const t=a===0?-1:r();for(;;){if(await u())return;if(r()<=t)return y();if(await e())return}}async function f(e=-1,t=[s()]){var i;return e===-1&&(e=r()+1),await u()?t:r(){const i=c();if(!i)return;const o=e?e[i]||e["#any"]:null;if(e?o===!0?t[i]=l().trim():o?t[i]=await o():e!=null&&e["#text"]&&(t["#text"]=await f(r())):t[i]=l().trim(),e&&Object.keys(e).every(p=>t[p]!==void 0))return!0}),t}async function T(){const e=[["root",[]]];for(;!await u();){const t=r(),i=t+1,o=e[t];if(!o)continue;e.length=i;const p=e[i]=[s(),[]];o[1].push(p)}return e[0]}return{next:u,line:s,head:c,tail:l,level:r,match:j,each:w,blockAsText:f,toObject:x,toLineArray:T}}exports.useDocument=D; diff --git a/packages/js/core/dist/document.js b/packages/js/core/dist/document.js index e215aa4..dac40b8 100644 --- a/packages/js/core/dist/document.js +++ b/packages/js/core/dist/document.js @@ -1,64 +1,70 @@ -import { parseLine as v, createLineData as T } from "./parser.js"; -function y(h, p = " ") { - let r = 0; - const t = T("", p); - async function s() { - r === 0 && (r = 1); - const e = await h.next(); - return e === null ? (r = 2, !0) : (t.line = e, v(t), !1); +import { parseLine as j, createLineData as D } from "./parser.js"; +function k(x, d = " ") { + let o = 0; + const n = D("", d); + async function c() { + switch (o) { + case 0: + o = 1; + break; + case 2: + return o = 1, !1; + } + const e = await x.next(); + return e === null ? (o = 3, !0) : (n.line = e, j(n), !1); } - const a = () => t.level, o = () => t.line.slice(t.offsetHead), u = () => t.line.slice(t.offsetHead, t.offsetTail), l = () => t.line.slice(t.offsetTail), w = (e) => e === u(); - async function d(e) { - const n = r === 0 ? -1 : a(); - for (; !(await s() || a() <= n || await e()); ) - ; + const w = () => o = 2, r = () => n.level, u = () => n.line.slice(n.offsetHead), s = () => n.line.slice(n.offsetHead, n.offsetTail), f = () => n.line.slice(n.offsetTail), v = (e) => e === s(); + async function y(e) { + const t = o === 0 ? -1 : r(); + for (; ; ) { + if (await c()) + return; + if (r() <= t) + return w(); + if (await e()) + return; + } } - async function f(e = -1, n = [o()]) { + async function l(e = -1, t = [u()]) { var i; - return e === -1 && (e = t.level + 1), await s() || t.level < e ? n : (n.push(((i = t.line) == null ? void 0 : i.slice(e)) || ""), f(e, n)); + return e === -1 && (e = r() + 1), await c() ? t : r() < e ? (w(), t) : (t.push(((i = n.line) == null ? void 0 : i.slice(e)) || ""), l(e, t)); } - async function x(e = [], n) { - const c = {}; - return await d(async () => { - const i = u(); - if (!!i) { - if (!e.length || e.includes(i)) { - c[i] = n ? await n() : l().trim(); - return; - } - if (e && !e.includes(i) && e.includes("#text")) { - c["#text"] = await f(a()); - return; - } - } - }), c; + async function T(e) { + const t = {}; + return await y(async () => { + const i = s(); + if (!i) + return; + const a = e ? e[i] || e["#any"] : null; + if (e ? a === !0 ? t[i] = f().trim() : a ? t[i] = await a() : e != null && e["#text"] && (t["#text"] = await l(r())) : t[i] = f().trim(), e && Object.keys(e).every((p) => t[p] !== void 0)) + return !0; + }), t; } async function b() { const e = [["root", []]]; - for (; !await s(); ) { - const n = a(), c = n + 1, i = e[n]; - if (!i) + for (; !await c(); ) { + const t = r(), i = t + 1, a = e[t]; + if (!a) continue; - e.length = c; - const m = e[c] = [o(), []]; - i[1].push(m); + e.length = i; + const p = e[i] = [u(), []]; + a[1].push(p); } return e[0]; } return { - state: r, - next: s, - line: o, - head: u, - tail: l, - level: a, - match: w, - each: d, - blockAsText: f, - buildObject: x, - toArrays: b + next: c, + line: u, + head: s, + tail: f, + level: r, + match: v, + each: y, + blockAsText: l, + toObject: T, + toLineArray: b }; } export { - y as useDocument + k as useDocument }; diff --git a/packages/js/core/src/document.ts b/packages/js/core/src/document.ts index ddbe6d1..0f01ce0 100644 --- a/packages/js/core/src/document.ts +++ b/packages/js/core/src/document.ts @@ -4,11 +4,11 @@ import { createLineData, parseLine } from './parser' enum STATE { READY = 0, STARTED = 1, - ENDED = 2 + PAUSED = 2, + ENDED = 3 } type Document = { - state: STATE, next: () => Promise level: () => number, line: () => string, @@ -17,18 +17,27 @@ type Document = { match: (matchHead: string) => boolean, each: (handler: Function) => void, blockAsText: (startLevel: number, lines?: string[]) => Promise>, - buildObject: (allowKeys: Array, processValue?: () => any) => any, - toArrays(): Promise + toObject: (matchers?: { [key: string]: Function|boolean }) => { [key: string]: any }, + toLineArray(): Promise } -type LineArray = [string, Array]; +type LineArray = [string, Array] export function useDocument (reader: Reader, indent: string = ' '): Document { let state = STATE.READY const lineData = createLineData('', indent) async function next() { - if (state === STATE.READY) state = STATE.STARTED + switch (state) { + // The initial state change allows us to do some special-case handling for the initial state of lineData. TODO: Should lineData have a special inital state? + case STATE.READY: + state = STATE.STARTED + break + // If we are currently in the "paused" state, repeat the same line instead of reading the next one. + case STATE.PAUSED: + state = STATE.STARTED + return false + } const line = await reader.next() if (line === null) { @@ -41,6 +50,10 @@ export function useDocument (reader: Reader, indent: string = ' '): Document { return false } + // If we pause, the next call to next() will repeat the current line. + // Allows a child loop to look forward, determine that the next line will be outside its purview, + // and return control to the calling loop transparently without additional logic. + const pause = () => state = STATE.PAUSED const level = () => lineData.level const line = () => lineData.line.slice(lineData.offsetHead) const head = () => lineData.line.slice(lineData.offsetHead, lineData.offsetTail) @@ -53,52 +66,54 @@ export function useDocument (reader: Reader, indent: string = ' '): Document { const startLevel = state === STATE.READY ? -1 : level() while(true) { - if (await next()) break - if (level() <= startLevel) break - if (await handler()) break + if (await next()) return + // If we've reached the next block outside the level of this one, "pause", so that the next time "next" is called, we repeat the same line. + if (level() <= startLevel) return pause() + // If the handler returns true, exit. + if (await handler()) return } } async function blockAsText (startLevel: number = -1, blockLines: string[] = [line()]): Promise> { - if (startLevel === -1) startLevel = lineData.level + 1 + if (startLevel === -1) startLevel = level() + 1 - const ended = await next() - if (ended) return blockLines - if (lineData.level < startLevel) return blockLines + if (await next()) return blockLines + if (level() < startLevel) { pause(); return blockLines } blockLines.push(lineData.line?.slice(startLevel) || '') return blockAsText(startLevel, blockLines) } - async function buildObject(allowKeys: Array = [], processValue?: () => any) { - const obj: any = {} + async function toObject (matchers?: { [key: string]: Function|boolean }) { + const obj: { [key: string]: any } = {} await each(async () => { const currHead = head() if (!currHead) return - // Set the object key matching the current head if it is allowed, or if no allow list is specified. - if (!allowKeys.length || allowKeys.includes(currHead)) { - // Default to using {head: tail} as the key-value pair if no value handler is specified. - obj[currHead] = processValue ? await processValue() : tail().trim() - return - } - // Parse unspecified text into an array of lines and save to #text key. - // TODO: Rework this. I don't like it at all. - if (allowKeys && !allowKeys.includes(currHead) && allowKeys.includes('#text')) { - obj['#text'] = await blockAsText(level()) - return - } + const propertyMatcher = matchers ? matchers[currHead] || matchers['#any'] : null + // Set the object key matching the current head to tail() if no matchers are specified. + if (!matchers) obj[currHead] = tail().trim() + // Or if matchers[currHead] is `true`, or if #any is true. + else if (propertyMatcher === true) obj[currHead] = tail().trim() + // If matchers[currHead] or matchers[#any] is a function, set object key to its output. + else if (propertyMatcher) obj[currHead] = await propertyMatcher() + // If we get to this point and matchers[#text] is set, parse all remaining block contents as text. + // TODO: I still don't like this. + else if (matchers?.['#text']) obj['#text'] = await blockAsText(level()) + + // Bail early as soon as we know all keys have been matched. + if (matchers && Object.keys(matchers).every(k => obj[k] !== undefined)) return true }) return obj } - async function toArrays(): Promise { + async function toLineArray (): Promise { const levelTracker: Array = [['root', []]] // Simple parser that produces canonical array structure for blocks. while (true) { // If next() returns true we've ended the - if (await next()) break; + if (await next()) break const parentLevel = level() const scopeLevel = parentLevel + 1 // Determine parent for this scope. @@ -116,7 +131,6 @@ export function useDocument (reader: Reader, indent: string = ' '): Document { } return { - state, next, line, head, @@ -125,7 +139,7 @@ export function useDocument (reader: Reader, indent: string = ' '): Document { match, each, blockAsText, - buildObject, - toArrays, + toObject, + toLineArray, } }