From b87fdfbd83fa94df7bfbcc277e6ecfb60978bd66 Mon Sep 17 00:00:00 2001 From: Joshua Bemenderfer Date: Sun, 29 Jan 2023 17:25:43 -0500 Subject: [PATCH] Start on parser v4. --- docs/experiments/parsers/v3/core.js | 77 ------------------- docs/experiments/parsers/v4/core.js | 77 +++++++++++++++++++ .../experiments/parsers/{v3 => v4}/example.js | 15 ++-- packages/js/core/dist/document.cjs | 2 +- packages/js/core/dist/document.js | 32 +++++--- packages/js/core/src/document.ts | 39 ++++++++++ 6 files changed, 145 insertions(+), 97 deletions(-) delete mode 100644 docs/experiments/parsers/v3/core.js create mode 100644 docs/experiments/parsers/v4/core.js rename docs/experiments/parsers/{v3 => v4}/example.js (89%) diff --git a/docs/experiments/parsers/v3/core.js b/docs/experiments/parsers/v3/core.js deleted file mode 100644 index cca7ee1..0000000 --- a/docs/experiments/parsers/v3/core.js +++ /dev/null @@ -1,77 +0,0 @@ -import { useDocument } from '@terrace/core' -import { createStringReader } from '@terrace/core/readers/js-string' - -export async function parse(lines, rootSchema) { - const doc = useDocument(createStringReader(lines)) - - const levelTracker = [] - - function createScope(level, line, schema = null) { - levelTracker.length = level - const entry = levelTracker[level] = {} - entry.scope = [line, []] - - if (schema) { - entry.schemas = Object.values(schema), - entry.matchers = Object.keys(schema), - entry.counts = Object.values(schema).map(e => e.count) - } - - return entry.scope - } - - createScope(0, 'root', rootSchema) - - // Simpler parsing logic, don't need to worry about schemas, matchers, or counts if no schema is specified. - if (!rootSchema) { - while (true) { - // If doc.next() returns true we've ended the document. - if (await doc.next()) break; - const level = doc.level() - // Determine parent for this scope. - const parent = levelTracker[level].scope - // If there's no parent, skip this line. - if (!parent) continue - - // Create new scope - const scope = createScope(level + 1, doc.line()) - // Add current scope to parent. - parent[1].push(scope) - } - // Full parsing logic - } else { - while (true) { - // If doc.next() returns true we've ended the document. - if (await doc.next()) break; - const level = doc.level() - if (!levelTracker[level]) continue - - // Determine parent for this scope. - const parent = levelTracker[level].scope - const schemas = levelTracker[level].schemas - const matchers = levelTracker[level].matchers - const counts = levelTracker[level].counts - - // Match the head value, or '?' for unspecified lines. - const matchIndex = matchers.findIndex(entry => entry === doc.head() || entry === '?') - - // Handle trailing blocks of text. TODO: Proper trailing. - if (matchIndex === -1 && matchers.includes('? literal')) { - parent[1].push(...(await doc.content(level)).map(e => [e])) - continue - } else if (matchIndex === -1) continue - - // Return if the match has already been "used up" - if (counts[matchIndex] === 0) continue - // "use up" one more match - counts[matchIndex] -= 1 - - const scopeSchema = schemas[matchIndex] - // Create new scope - const scope = createScope(level + 1, doc.line(), scopeSchema?.children) - parent[1].push(scope) - } - } - - return levelTracker[0].scope -} diff --git a/docs/experiments/parsers/v4/core.js b/docs/experiments/parsers/v4/core.js new file mode 100644 index 0000000..80d2ff7 --- /dev/null +++ b/docs/experiments/parsers/v4/core.js @@ -0,0 +1,77 @@ +import { useDocument } from '@terrace/core' +import { createStringReader } from '@terrace/core/readers/js-string' + +export async function parse(lines) { + const { tail, each, match, buildObject } = useDocument(createStringReader(lines)) + + const structure = { + name: null, + version: null, + license: null, + exports: null, + scripts: null, + devDependencies: null, + author: null + } + + await each(async () => { + if (match('name')) structure.name = tail().trim() + if (match('version')) structure.version = tail().trim() + if (match('license')) structure.license = tail().trim() + // FIXME: Order of operations causes other parts to break if this doesn't run first?! + if (match('exports')) structure.exports = await parseObjectKV(null, async () => { + const section = { import: null, require: null } + + await each(() => { + if (match('import')) section.import = tail().trim() + if (match('require')) section.require = tail().trim() + if (section.import && section.require) return true + }) + + return section + }) + if (match('scripts')) structure.scripts = await buildObject() + if (match('devDependencies')) structure.devDependencies = await buildObject() + if (match('author')) structure.author = await buildObject(['name', 'email', '#text']) + + return structure.name && + structure.version && + structure.license && + structure.exports && + structure.scripts && + structure.devDependencies && + structure.author + }) + + return structure +} + +export async function toArrays(lines) { + const { next, level, line } = useDocument(createStringReader(lines)) + + const levelTracker = [] + + function createScope(level, line) { + levelTracker.length = level + const scope = levelTracker[level] = [line, []] + return scope + } + createScope(0, 'root') + + // Simple parser that produces canonical array structure for blocks. + while (true) { + // If next() returns true we've ended the document. + if (await next()) break; + // Determine parent for this scope. + const parent = levelTracker[level()] + // If there's no parent, skip this line. + if (!parent) continue + + // Create new scope + const scope = createScope(level() + 1, line()) + // Add current scope to parent. + parent[1].push(scope) + } + + return levelTracker[0] +} diff --git a/docs/experiments/parsers/v3/example.js b/docs/experiments/parsers/v4/example.js similarity index 89% rename from docs/experiments/parsers/v3/example.js rename to docs/experiments/parsers/v4/example.js index 8637e9d..8be005d 100644 --- a/docs/experiments/parsers/v3/example.js +++ b/docs/experiments/parsers/v4/example.js @@ -1,6 +1,6 @@ -import { parse } from './core.js' +import { parse, toArrays } from './core.js' -const linesFull = [ +const linesArrays = [ `title Example`, `options`, ` parameter1 30`, @@ -34,7 +34,7 @@ const linesFull = [ ` 2` ] -const linesSchema = [ +const linesParse = [ `name @terrace/core`, `version 0.0.1`, `randomthing test`, @@ -71,6 +71,7 @@ const linesSchema = [ ` email josh@thederf.com`, ` `, ` Further comments below. As I will now demonstrate, there is no simple`, + ` even if embedded`, ` way of dealing with this problem.`, ] @@ -98,11 +99,11 @@ const schema = { } async function main() { - const resultFull = await parse(linesFull) - // console.dir(resultFull, { depth: null }) + const resultArrays = await toArrays(linesArrays) + // console.dir(resultArrays, { depth: null }) - const resultSchema = await parse(linesSchema, schema) - console.dir(resultSchema, { depth: null }) + const resultParse = await parse(linesParse) + console.dir(resultParse, { depth: null }) } main() diff --git a/packages/js/core/dist/document.cjs b/packages/js/core/dist/document.cjs index d643bfa..97980ac 100644 --- a/packages/js/core/dist/document.cjs +++ b/packages/js/core/dist/document.cjs @@ -1 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function a(l,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return a(l.clone(),u)},async next(){if(r.line=await l.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level string, tail: () => string, content: (contentLevel: number, lines: string[]) => Promise, + match: (matchHead: string) => boolean, + each: (handler: Function) => void, + buildObject: (allowList: Array, handler: Function) => object, seek: (matchHead: string, contentLevel: number) => Promise } @@ -61,6 +64,42 @@ export function useDocument (reader: Reader, indent: string = ' '): Document { return document.content(contentLevel, lines) }, + match(matchHead: string): boolean { + return matchHead === document.head() + }, + + async each(handler: Function) { + // Set startLevel to -1 if we haven't started parsing the document yet. + // Otherwise we'll break to early, as the default value for doc.level() is 0. + const startLevel = document.line() !== undefined ? document.level() : -1 + + while(true) { + if (await document.next()) break + if (document.level() <= startLevel) break + if (await handler()) break + } + }, + + async buildObject(allowList = [], valHandler?: () => any) { + if (!valHandler) valHandler = () => document.tail().trim() + + const obj = {} + await document.each(async () => { + if (!document.head()) return + if (!allowList.length || allowList.includes(document.head())) { + obj[document.head()] = await valHandler() + return + } + + // Parse unspecified text into an array of lines and save to #text key. + if (allowList && !allowList.includes(document.head()) && allowList.includes('#text')) { + obj['#text'] = [document.line(), ...await document.content(document.level())] + return + } + }) + return obj + }, + async seek (matchHead: string, contentLevel = -1): Promise { if (contentLevel === -1) contentLevel = lineData.level