diff --git a/docs/experiments/parsers/v4/core.js b/docs/experiments/parsers/v4/core.js deleted file mode 100644 index f2283a7..0000000 --- a/docs/experiments/parsers/v4/core.js +++ /dev/null @@ -1,77 +0,0 @@ -import { useDocument } from '@terrace/core' -import { createStringReader } from '@terrace/core/readers/js-string' - -export async function parse(lines) { - const { tail, each, match, buildObject } = useDocument(createStringReader(lines)) - - const structure = { - name: null, - version: null, - license: null, - exports: null, - scripts: null, - devDependencies: null, - author: null - } - - await each(async () => { - if (match('name')) structure.name = tail().trim() - if (match('version')) structure.version = tail().trim() - if (match('license')) structure.license = tail().trim() - // FIXME: Order of operations causes other parts to break if this doesn't run first?! - if (match('exports')) structure.exports = await buildObject([], async () => { - const section = { import: null, require: null } - - await each(() => { - if (match('import')) section.import = tail().trim() - if (match('require')) section.require = tail().trim() - if (section.import && section.require) return true - }) - - return section - }) - if (match('scripts')) structure.scripts = await buildObject() - if (match('devDependencies')) structure.devDependencies = await buildObject() - if (match('author')) structure.author = await buildObject(['name', 'email', '#text']) - - return structure.name && - structure.version && - structure.license && - structure.exports && - structure.scripts && - structure.devDependencies && - structure.author - }) - - return structure -} - -export async function toArrays(lines) { - const { next, level, line } = useDocument(createStringReader(lines)) - - const levelTracker = [] - - function createScope(level, line) { - levelTracker.length = level - const scope = levelTracker[level] = [line, []] - return scope - } - createScope(0, 'root') - - // Simple parser that produces canonical array structure for blocks. - while (true) { - // If next() returns true we've ended the document. - if (await next()) break; - // Determine parent for this scope. - const parent = levelTracker[level()] - // If there's no parent, skip this line. - if (!parent) continue - - // Create new scope - const scope = createScope(level() + 1, line()) - // Add current scope to parent. - parent[1].push(scope) - } - - return levelTracker[0] -} diff --git a/docs/experiments/parsers/v4/example.js b/docs/experiments/parsers/v4/example.js index 8be005d..5e5b5ed 100644 --- a/docs/experiments/parsers/v4/example.js +++ b/docs/experiments/parsers/v4/example.js @@ -1,40 +1,7 @@ -import { parse, toArrays } from './core.js' +import { useDocument } from '@terrace/core' +import { createStringReader } from '@terrace/core/readers/js-string' -const linesArrays = [ - `title Example`, - `options`, - ` parameter1 30`, - ` parameter2 Enim eu id anim minim reprehenderit nostrud eu amet deserunt ea ut do cupidatat ea.`, - `options`, - ` parameter1 0`, - ` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`, - ` deep Enim fugiat do in est commodo culpa dolore.`, - `subsection`, - ` position 1`, - ` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`, - `subsection 2`, - ` position 2`, - ` `, - ` Aute deserunt incididunt ad in sint adipisicing est officia velit pariatur ipsum deserunt quis nulla.`, - ` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`, - `list`, - ` - item 1`, - ` - item 2`, - `collection`, - ` section`, - ` lorem ipsum 1`, - ` section`, - ` lorem ipsum 2`, - `collection2`, - ` section`, - ` position 3`, - ` Laborum aute anim occaecat occaecat pariatur tempor proident magna sit magna non non.`, - ` list`, - ` 1`, - ` 2` -] - -const linesParse = [ +const lines = [ `name @terrace/core`, `version 0.0.1`, `randomthing test`, @@ -75,35 +42,53 @@ const linesParse = [ ` way of dealing with this problem.`, ] -const schema = { - "name": {count: 1}, - "version": {count: 1}, - "license": {count: 1}, - "exports": {count: 1, children: { - "?": {count: -1, children: { - "import": {count: 1}, - "require": {count: 1} - }} - }}, - "scripts": {count: 1, children: { - "?": { count: -1 } - }}, - "devDependencies": {count: 1, children: { - "?": { count: -1 } - }}, - "author": { count: 1, children: { - "name": { count: 1 }, - "email": { count: 1 }, - "? literal": { count: -1 } - }} -} - async function main() { - const resultArrays = await toArrays(linesArrays) - // console.dir(resultArrays, { depth: null }) + const { toArrays } = useDocument(createStringReader(lines)) + const resultArrays = await toArrays() + console.dir(resultArrays, { depth: null }) - const resultParse = await parse(linesParse) - console.dir(resultParse, { depth: null }) + const { tail, each, match, buildObject } = useDocument(createStringReader(lines)) + + const structure = { + name: null, + version: null, + license: null, + exports: null, + scripts: null, + devDependencies: null, + author: null + } + + await each(async () => { + if (match('name')) structure.name = tail().trim() + if (match('version')) structure.version = tail().trim() + if (match('license')) structure.license = tail().trim() + // FIXME: Order of operations causes other parts to break if this doesn't run first?! + if (match('exports')) structure.exports = await buildObject([], async () => { + const section = { import: null, require: null } + + await each(() => { + if (match('import')) section.import = tail().trim() + if (match('require')) section.require = tail().trim() + if (section.import && section.require) return true + }) + + return section + }) + if (match('scripts')) structure.scripts = await buildObject() + if (match('devDependencies')) structure.devDependencies = await buildObject() + if (match('author')) structure.author = await buildObject(['name', 'email', '#text']) + + return structure.name && + structure.version && + structure.license && + structure.exports && + structure.scripts && + structure.devDependencies && + structure.author + }) + + console.dir(structure, { depth: null }) } main() diff --git a/packages/js/core/dist/document.cjs b/packages/js/core/dist/document.cjs index d79e3a6..bf6ea3e 100644 --- a/packages/js/core/dist/document.cjs +++ b/packages/js/core/dist/document.cjs @@ -1 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function c(a,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return c(a.clone(),u)},async next(){if(r.line=await a.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var l;return e===-1&&(e=r.level+1),await t.next()||r.levelt.tail().trim());const i={};return await t.each(async()=>{if(!!t.head()){if(!e.length||e.includes(t.head())){i[t.head()]=await n();return}if(e&&!e.includes(t.head())&&e.includes("#text")){i["#text"]=[t.line(),...await t.content(t.level())];return}}}),i},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.levelt.level,u=()=>t.line.slice(t.offsetHead),o=()=>t.line.slice(t.offsetHead,t.offsetTail),f=()=>t.line.slice(t.offsetTail),v=e=>e===o();async function d(e){const n=c===0?-1:a();for(;!(await s()||a()<=n||await e()););}async function l(e=-1,n=[u()]){var i;return e===-1&&(e=t.level+1),await s()||t.level{const i=o();if(!!i){if(!e.length||e.includes(i)){r[i]=n?await n():f().trim();return}if(e&&!e.includes(i)&&e.includes("#text")){r["#text"]=await l(a());return}}}),r}async function m(){const e=[["root",[]]];for(;!await s();){const n=a(),r=n+1,i=e[n];if(!i)continue;e.length=r;const x=e[r]=[u(),[]];i[1].push(x)}return e[0]}return{state:c,next:s,line:u,head:o,tail:f,level:a,match:v,each:d,blockAsText:l,buildObject:w,toArrays:m}}exports.useDocument=T; diff --git a/packages/js/core/dist/document.js b/packages/js/core/dist/document.js index a9f8db0..e215aa4 100644 --- a/packages/js/core/dist/document.js +++ b/packages/js/core/dist/document.js @@ -1,68 +1,64 @@ -import { parseLine as f, createLineData as s } from "./parser.js"; -function c(a, u = " ") { - let n = s(null, u); - const t = { - ended: !1, - clone() { - return c(a.clone(), u); - }, - async next() { - if (n.line = await a.next(), n.line === null) - return !0; - f(n); - }, - current() { - return t; - }, - line() { - var e; - return (e = n.line) == null ? void 0 : e.slice(n.offsetHead); - }, - head() { - var e; - return (e = n.line) == null ? void 0 : e.slice(n.offsetHead, n.offsetTail); - }, - tail() { - var e; - return (e = n.line) == null ? void 0 : e.slice(n.offsetTail); - }, - level() { - return n.level; - }, - async content(e = -1, r = []) { - var l; - return e === -1 && (e = n.level + 1), await t.next() || n.level < e ? r : (r.push(((l = n.line) == null ? void 0 : l.slice(e)) || ""), t.content(e, r)); - }, - match(e) { - return e === t.head(); - }, - async each(e) { - const r = t.line() !== void 0 ? t.level() : -1; - for (; !(await t.next() || t.level() <= r || await e()); ) - ; - }, - async buildObject(e = [], r) { - r || (r = () => t.tail().trim()); - const i = {}; - return await t.each(async () => { - if (!!t.head()) { - if (!e.length || e.includes(t.head())) { - i[t.head()] = await r(); - return; - } - if (e && !e.includes(t.head()) && e.includes("#text")) { - i["#text"] = [t.line(), ...await t.content(t.level())]; - return; - } +import { parseLine as v, createLineData as T } from "./parser.js"; +function y(h, p = " ") { + let r = 0; + const t = T("", p); + async function s() { + r === 0 && (r = 1); + const e = await h.next(); + return e === null ? (r = 2, !0) : (t.line = e, v(t), !1); + } + const a = () => t.level, o = () => t.line.slice(t.offsetHead), u = () => t.line.slice(t.offsetHead, t.offsetTail), l = () => t.line.slice(t.offsetTail), w = (e) => e === u(); + async function d(e) { + const n = r === 0 ? -1 : a(); + for (; !(await s() || a() <= n || await e()); ) + ; + } + async function f(e = -1, n = [o()]) { + var i; + return e === -1 && (e = t.level + 1), await s() || t.level < e ? n : (n.push(((i = t.line) == null ? void 0 : i.slice(e)) || ""), f(e, n)); + } + async function x(e = [], n) { + const c = {}; + return await d(async () => { + const i = u(); + if (!!i) { + if (!e.length || e.includes(i)) { + c[i] = n ? await n() : l().trim(); + return; } - }), i; - }, - async seek(e, r = -1) { - return r === -1 && (r = n.level), await t.next() ? !1 : t.head() === e ? t : n.level < r ? !1 : t.seek(e, r); + if (e && !e.includes(i) && e.includes("#text")) { + c["#text"] = await f(a()); + return; + } + } + }), c; + } + async function b() { + const e = [["root", []]]; + for (; !await s(); ) { + const n = a(), c = n + 1, i = e[n]; + if (!i) + continue; + e.length = c; + const m = e[c] = [o(), []]; + i[1].push(m); } + return e[0]; + } + return { + state: r, + next: s, + line: o, + head: u, + tail: l, + level: a, + match: w, + each: d, + blockAsText: f, + buildObject: x, + toArrays: b }; - return t; } export { - c as useDocument + y as useDocument }; diff --git a/packages/js/core/dist/parser.cjs b/packages/js/core/dist/parser.cjs index 7d0ab01..16bf37d 100644 --- a/packages/js/core/dist/parser.cjs +++ b/packages/js/core/dist/parser.cjs @@ -1 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});function f(e=null,r=" "){return{line:e,indent:r,type:0,level:0,offsetHead:0,offsetTail:0}}function o(e){if(typeof e!="object"||!e||typeof e.type!="number"||typeof e.level!="number")throw new Error("'lineData' must be an object with 'line' string, and 'type' and 'level' integer properties");if(typeof e.indent!="string"||e.indent.length===0||e.indent.length>1)throw new Error("'lineData.indent' must be a single-character string");if(typeof e.line!="string")throw new Error("'lineData.line' must be a string");let r=0,t=0;if(!e.line.length)e.type===1&&(t+=1),e.type===0&&(t=e.level),e.type=r,e.level=t,e.offsetHead=0,e.offsetTail=0;else{for(r=1;e.line[t]===e.indent&&t<=e.level+1;)++t;for(e.type=r,e.level=t,e.offsetHead=t,e.offsetTail=t;e.line[e.offsetTail]&&e.line[e.offsetTail]!==" ";)++e.offsetTail}return e}exports.createLineData=f;exports.parseLine=o; +"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});function f(e="",r=" "){return{line:e,indent:r,type:0,level:0,offsetHead:0,offsetTail:0}}function o(e){if(typeof e!="object"||!e||typeof e.type!="number"||typeof e.level!="number")throw new Error("'lineData' must be an object with 'line' string, and 'type' and 'level' integer properties");if(typeof e.indent!="string"||e.indent.length===0||e.indent.length>1)throw new Error("'lineData.indent' must be a single-character string");if(typeof e.line!="string")throw new Error("'lineData.line' must be a string");let r=0,t=0;if(!e.line.length)e.type===1&&(t+=1),e.type===0&&(t=e.level),e.type=r,e.level=t,e.offsetHead=0,e.offsetTail=0;else{for(r=1;e.line[t]===e.indent&&t<=e.level+1;)++t;for(e.type=r,e.level=t,e.offsetHead=t,e.offsetTail=t;e.line[e.offsetTail]&&e.line[e.offsetTail]!==" ";)++e.offsetTail}return e}exports.createLineData=f;exports.parseLine=o; diff --git a/packages/js/core/dist/parser.js b/packages/js/core/dist/parser.js index 4e8c666..f0ee311 100644 --- a/packages/js/core/dist/parser.js +++ b/packages/js/core/dist/parser.js @@ -1,4 +1,4 @@ -function t(e = null, r = " ") { +function t(e = "", r = " ") { return { line: e, indent: r, type: 0, level: 0, offsetHead: 0, offsetTail: 0 }; } function o(e) { diff --git a/packages/js/core/package.json b/packages/js/core/package.json index 9a3d976..a8a5568 100644 --- a/packages/js/core/package.json +++ b/packages/js/core/package.json @@ -27,6 +27,7 @@ }, "scripts": { "test": "vitest ./src", + "dev": "vite build --watch", "build": "vite build" }, "devDependencies": { diff --git a/packages/js/core/src/document.ts b/packages/js/core/src/document.ts index fd6a218..ddbe6d1 100644 --- a/packages/js/core/src/document.ts +++ b/packages/js/core/src/document.ts @@ -1,117 +1,131 @@ import type { Reader } from './readers/reader' import { createLineData, parseLine } from './parser' +enum STATE { + READY = 0, + STARTED = 1, + ENDED = 2 +} + type Document = { - ended: boolean, - clone: () => Document, - next: () => Promise - current: () => Document + state: STATE, + next: () => Promise + level: () => number, line: () => string, head: () => string, tail: () => string, - content: (contentLevel: number, lines: string[]) => Promise, match: (matchHead: string) => boolean, each: (handler: Function) => void, - buildObject: (allowList: Array, handler: Function) => object, - seek: (matchHead: string, contentLevel: number) => Promise + blockAsText: (startLevel: number, lines?: string[]) => Promise>, + buildObject: (allowKeys: Array, processValue?: () => any) => any, + toArrays(): Promise } +type LineArray = [string, Array]; + export function useDocument (reader: Reader, indent: string = ' '): Document { - let lineData = createLineData(null, indent) + let state = STATE.READY + const lineData = createLineData('', indent) - const document = { - ended: false, + async function next() { + if (state === STATE.READY) state = STATE.STARTED - clone() { - return useDocument(reader.clone(), indent) - }, + const line = await reader.next() + if (line === null) { + state = STATE.ENDED + return true + } - async next() { - lineData.line = await reader.next() - if (lineData.line === null) return true - else parseLine(lineData) - }, + lineData.line = line + parseLine(lineData) + return false + } - current() { - return document - }, + const level = () => lineData.level + const line = () => lineData.line.slice(lineData.offsetHead) + const head = () => lineData.line.slice(lineData.offsetHead, lineData.offsetTail) + const tail = () => lineData.line.slice(lineData.offsetTail) + const match = (matchHead: string): boolean => matchHead === head() - line() { - return lineData.line?.slice(lineData.offsetHead) - }, + async function each(handler: Function) { + // Set startLevel to -1 if we haven't started parsing the document yet. + // Otherwise we'll break to early, as the default value for doc.level() is 0. + const startLevel = state === STATE.READY ? -1 : level() - head () { - return lineData.line?.slice(lineData.offsetHead, lineData.offsetTail) - }, - - tail () { - return lineData.line?.slice(lineData.offsetTail) - }, - - level () { - return lineData.level - }, - - async content (contentLevel = -1, lines: string[] = []): Promise> { - if (contentLevel === -1) contentLevel = lineData.level + 1 - - const ended = await document.next() - if (ended) return lines - - if (lineData.level < contentLevel) return lines - - lines.push(lineData.line?.slice(contentLevel) || '') - return document.content(contentLevel, lines) - }, - - match(matchHead: string): boolean { - return matchHead === document.head() - }, - - async each(handler: Function) { - // Set startLevel to -1 if we haven't started parsing the document yet. - // Otherwise we'll break to early, as the default value for doc.level() is 0. - const startLevel = document.line() !== undefined ? document.level() : -1 - - while(true) { - if (await document.next()) break - if (document.level() <= startLevel) break - if (await handler()) break - } - }, - - async buildObject(allowList = [], valHandler?: () => any) { - if (!valHandler) valHandler = () => document.tail().trim() - - const obj = {} - await document.each(async () => { - if (!document.head()) return - if (!allowList.length || allowList.includes(document.head())) { - obj[document.head()] = await valHandler() - return - } - - // Parse unspecified text into an array of lines and save to #text key. - if (allowList && !allowList.includes(document.head()) && allowList.includes('#text')) { - obj['#text'] = [document.line(), ...await document.content(document.level())] - return - } - }) - return obj - }, - - async seek (matchHead: string, contentLevel = -1): Promise { - if (contentLevel === -1) contentLevel = lineData.level - - const ended = await document.next() - if (ended) return false - - if (document.head() === matchHead) return document - if (lineData.level < contentLevel) return false - - return document.seek(matchHead, contentLevel) + while(true) { + if (await next()) break + if (level() <= startLevel) break + if (await handler()) break } } - return document + async function blockAsText (startLevel: number = -1, blockLines: string[] = [line()]): Promise> { + if (startLevel === -1) startLevel = lineData.level + 1 + + const ended = await next() + if (ended) return blockLines + if (lineData.level < startLevel) return blockLines + + blockLines.push(lineData.line?.slice(startLevel) || '') + return blockAsText(startLevel, blockLines) + } + + async function buildObject(allowKeys: Array = [], processValue?: () => any) { + const obj: any = {} + await each(async () => { + const currHead = head() + if (!currHead) return + // Set the object key matching the current head if it is allowed, or if no allow list is specified. + if (!allowKeys.length || allowKeys.includes(currHead)) { + // Default to using {head: tail} as the key-value pair if no value handler is specified. + obj[currHead] = processValue ? await processValue() : tail().trim() + return + } + + // Parse unspecified text into an array of lines and save to #text key. + // TODO: Rework this. I don't like it at all. + if (allowKeys && !allowKeys.includes(currHead) && allowKeys.includes('#text')) { + obj['#text'] = await blockAsText(level()) + return + } + }) + return obj + } + + async function toArrays(): Promise { + const levelTracker: Array = [['root', []]] + + // Simple parser that produces canonical array structure for blocks. + while (true) { + // If next() returns true we've ended the + if (await next()) break; + const parentLevel = level() + const scopeLevel = parentLevel + 1 + // Determine parent for this scope. + const parent = levelTracker[parentLevel] + // If there's no parent, skip this line. + if (!parent) continue + + levelTracker.length = scopeLevel + const scope = levelTracker[scopeLevel] = [line(), []] + // Add current scope to parent. + parent[1].push(scope) + } + + return levelTracker[0] + } + + return { + state, + next, + line, + head, + tail, + level, + match, + each, + blockAsText, + buildObject, + toArrays, + } } diff --git a/packages/js/core/src/parser.ts b/packages/js/core/src/parser.ts index 0522255..f3913ac 100644 --- a/packages/js/core/src/parser.ts +++ b/packages/js/core/src/parser.ts @@ -1,5 +1,5 @@ export type LineData = { - line: string|null; + line: string; indent: string; type: number; level: number; @@ -7,7 +7,7 @@ export type LineData = { offsetTail: number; } -export function createLineData(line: string|null = null, indent: string = ' '): LineData { +export function createLineData(line: string = '', indent: string = ' '): LineData { return { line, indent, type: 0, level: 0, offsetHead: 0, offsetTail: 0 } }