From e256dd739787dcd48b3451188eb57f190eaababa Mon Sep 17 00:00:00 2001 From: Joshua Bemenderfer Date: Sat, 28 Jan 2023 22:24:44 -0500 Subject: [PATCH] New implementation of v3, should be more foolproof with simpler schemas. --- docs/experiments/parsers/v3/core.js | 137 +++++++++++++------------ docs/experiments/parsers/v3/example.js | 77 ++++++++++++-- packages/js/core/dist/document.cjs | 3 +- packages/js/core/dist/document.js | 27 +++-- packages/js/core/src/document.ts | 6 +- 5 files changed, 157 insertions(+), 93 deletions(-) diff --git a/docs/experiments/parsers/v3/core.js b/docs/experiments/parsers/v3/core.js index 106abcc..6b30386 100644 --- a/docs/experiments/parsers/v3/core.js +++ b/docs/experiments/parsers/v3/core.js @@ -1,76 +1,77 @@ import { useDocument } from '@terrace/core' import { createStringReader } from '@terrace/core/readers/js-string' -export async function parse(lines, schema) { +export async function parse(lines, rootSchema) { const doc = useDocument(createStringReader(lines)) - function createScope(line, matchers) { - return { - block: [line, []], - handlers: [], - matchers: matchers + const levelTracker = [] + + function createScope(level, line, schema = null) { + levelTracker.length = level + const entry = levelTracker[level] = {} + entry.scope = [line, []] + + if (schema) { + entry.schemas = Object.values(schema), + entry.matchers = Object.keys(schema), + entry.counts = Object.values(schema).map(e => e.count) + } + + return entry.scope + } + + createScope(0, 'root', rootSchema) + + // Simpler parsing logic, don't need to worry about matcherList or the likes. + if (!rootSchema) { + while (true) { + // If doc.next() returns true we've ended the document. + if (await doc.next()) break; + const level = doc.level() + // Determine parent for this scope. + const parent = levelTracker[level].scope + // If there's no parent, skip this line. + if (!parent) continue + + // Create new scope + const scope = createScope(level + 1, doc.line()) + // Add current scope to parent. + parent[1].push(scope) + } + // Full parsing logic + } else { + while (true) { + // If doc.next() returns true we've ended the document. + if (await doc.next()) break; + const level = doc.level() + if (!levelTracker[level]) continue + + // Determine parent for this scope. + const parent = levelTracker[level].scope + const schemas = levelTracker[level].schemas + const matchers = levelTracker[level].matchers + const counts = levelTracker[level].counts + + // Match the head value, or '?' for unspecified lines. + const matchIndex = matchers.findIndex(entry => entry === doc.head() || entry === '?') + + // Handle trailing blocks of text. TODO: Proper trailing. + if (matchIndex === -1 && matchers.includes('? literal')) { + parent[1].push(...(await doc.content(level)).map(e => [e])) + continue + } else if (matchIndex === -1) continue + + // Return if the match has already been "used up" + if (counts[matchIndex] === 0) continue + // "use up" one more match + counts[matchIndex] -= 1 + + const scopeSchema = schemas[matchIndex] + // Create new scope + const scope = createScope(level + 1, doc.line(), scopeSchema?.children) + parent[1].push(scope) } } - const scopes = [createScope('root', { - title ({ addHandler }) { - addHandler(scope => [scope[0].split(' ')[0], scope[0].split(' ').slice(1).join(' ')]) - }, - options ({ addMatcher }) { - addMatcher({ - parameter1({ addHandler }) { - addHandler(scope => [scope[0].split(' ')[0], +scope[0].split(' ').slice(1).join(' ')]) - } - }) - } - })] - - while (true) { - // If doc.next() returns true we've ended the document. - if (await doc.next()) break; - const level = doc.level() + 1 - - // Trigger macros for closed scopes. - for (let i = scopes.length - 1; i >= level; --i) { - const scope = scopes[i] - const parent = scopes[i - 1] - - // Remove empty arrays from scopes without children. - if (scope.block[1] && !scope.block[1].length) scope.block.length = 1 - - // Postprocess scope with relevant macros. - scope.block = scope.handlers.reduce((block, handler) => { - return handler(block) - }, [...scope.block]) - - // Add to parent block. - parent.block[1].push(scope.block) - } - - // Reset scope length to avoid dangling scopes. - scopes.length = level - - // Define current scope - const scope = scopes[level] = createScope(doc.line(), {}) - // Determine parent for this scope. - const parent = scopes[level - 1] - - // Add a postprocess handler for this scope. - // Ie. When we leave the scope, rewrite it using this handler. - function addHandler(handler) { - scope.handlers.push(handler) - } - - // Add matchers for this scope's children. - function addMatcher(definition) { - scope.matchers = definition - } - - // Run matching matchers for this scope. - if (parent.matchers[doc.head()]) { - parent.matchers[doc.head()]({ addHandler, addMatcher }) - } - } - - return scopes[0].block -} \ No newline at end of file + return levelTracker[0].scope +} diff --git a/docs/experiments/parsers/v3/example.js b/docs/experiments/parsers/v3/example.js index e0bde0f..8637e9d 100644 --- a/docs/experiments/parsers/v3/example.js +++ b/docs/experiments/parsers/v3/example.js @@ -1,10 +1,6 @@ import { parse } from './core.js' -const schema = { -} - - -const lines = [ +const linesFull = [ `title Example`, `options`, ` parameter1 30`, @@ -12,11 +8,13 @@ const lines = [ `options`, ` parameter1 0`, ` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`, + ` deep Enim fugiat do in est commodo culpa dolore.`, `subsection`, ` position 1`, ` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`, `subsection 2`, ` position 2`, + ` `, ` Aute deserunt incididunt ad in sint adipisicing est officia velit pariatur ipsum deserunt quis nulla.`, ` Ea dolore in aliquip fugiat anim adipisicing amet aute tempor et deserunt est duis sint.`, `list`, @@ -36,8 +34,75 @@ const lines = [ ` 2` ] +const linesSchema = [ + `name @terrace/core`, + `version 0.0.1`, + `randomthing test`, + `license MIT`, + `license GPL`, + `exports`, + ` .`, + ` import ./dist/index.js`, + ` require ./dist/index.cjs`, + ` ./parser`, + ` import ./dist/parser.js`, + ` require ./dist/parser.cjs`, + ``, + ` ./document`, + ` import ./dist/document.js`, + ` require ./dist/document.cjs`, + ``, + ` ./readers/node-readline`, + ` import ./dist/readers/node-readline.js`, + ` require ./dist/readers/node-readline.cjs`, + ``, + ` ./readers/js-string`, + ` import ./dist/readers/js-string.js`, + ` require ./dist/readers/js-string.cjs`, + `scripts`, + ` test vitest ./src`, + ` build vite build`, + `devDependencies`, + ` vite ^3.2.3`, + ` vitest ^0.24.5`, + ``, + `author`, + ` name Joshua Bemenderfer`, + ` email josh@thederf.com`, + ` `, + ` Further comments below. As I will now demonstrate, there is no simple`, + ` way of dealing with this problem.`, +] + +const schema = { + "name": {count: 1}, + "version": {count: 1}, + "license": {count: 1}, + "exports": {count: 1, children: { + "?": {count: -1, children: { + "import": {count: 1}, + "require": {count: 1} + }} + }}, + "scripts": {count: 1, children: { + "?": { count: -1 } + }}, + "devDependencies": {count: 1, children: { + "?": { count: -1 } + }}, + "author": { count: 1, children: { + "name": { count: 1 }, + "email": { count: 1 }, + "? literal": { count: -1 } + }} +} + async function main() { - console.dir(await parse(lines, schema), { depth: null }) + const resultFull = await parse(linesFull) + // console.dir(resultFull, { depth: null }) + + const resultSchema = await parse(linesSchema, schema) + console.dir(resultSchema, { depth: null }) } main() diff --git a/packages/js/core/dist/document.cjs b/packages/js/core/dist/document.cjs index 5ba5b3d..d643bfa 100644 --- a/packages/js/core/dist/document.cjs +++ b/packages/js/core/dist/document.cjs @@ -1,2 +1 @@ -"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function a(l,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return a(l.clone(),u)},async next(){if(r.line=await l.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level { + async content (contentLevel = -1, lines: string[] = []): Promise> { if (contentLevel === -1) contentLevel = lineData.level + 1 const ended = await document.next() - if (ended) return lines.join('\n') + if (ended) return lines - if (lineData.level < contentLevel) return lines.join('\n') + if (lineData.level < contentLevel) return lines lines.push(lineData.line?.slice(contentLevel) || '') return document.content(contentLevel, lines)