Start on parser v4.

This commit is contained in:
Joshua Bemenderfer 2023-01-29 17:25:43 -05:00
parent ef3c59fb74
commit b87fdfbd83
6 changed files with 145 additions and 97 deletions

View File

@ -1,77 +0,0 @@
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export async function parse(lines, rootSchema) {
const doc = useDocument(createStringReader(lines))
const levelTracker = []
function createScope(level, line, schema = null) {
levelTracker.length = level
const entry = levelTracker[level] = {}
entry.scope = [line, []]
if (schema) {
entry.schemas = Object.values(schema),
entry.matchers = Object.keys(schema),
entry.counts = Object.values(schema).map(e => e.count)
}
return entry.scope
}
createScope(0, 'root', rootSchema)
// Simpler parsing logic, don't need to worry about schemas, matchers, or counts if no schema is specified.
if (!rootSchema) {
while (true) {
// If doc.next() returns true we've ended the document.
if (await doc.next()) break;
const level = doc.level()
// Determine parent for this scope.
const parent = levelTracker[level].scope
// If there's no parent, skip this line.
if (!parent) continue
// Create new scope
const scope = createScope(level + 1, doc.line())
// Add current scope to parent.
parent[1].push(scope)
}
// Full parsing logic
} else {
while (true) {
// If doc.next() returns true we've ended the document.
if (await doc.next()) break;
const level = doc.level()
if (!levelTracker[level]) continue
// Determine parent for this scope.
const parent = levelTracker[level].scope
const schemas = levelTracker[level].schemas
const matchers = levelTracker[level].matchers
const counts = levelTracker[level].counts
// Match the head value, or '?' for unspecified lines.
const matchIndex = matchers.findIndex(entry => entry === doc.head() || entry === '?')
// Handle trailing blocks of text. TODO: Proper trailing.
if (matchIndex === -1 && matchers.includes('? literal')) {
parent[1].push(...(await doc.content(level)).map(e => [e]))
continue
} else if (matchIndex === -1) continue
// Return if the match has already been "used up"
if (counts[matchIndex] === 0) continue
// "use up" one more match
counts[matchIndex] -= 1
const scopeSchema = schemas[matchIndex]
// Create new scope
const scope = createScope(level + 1, doc.line(), scopeSchema?.children)
parent[1].push(scope)
}
}
return levelTracker[0].scope
}

View File

@ -0,0 +1,77 @@
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export async function parse(lines) {
const { tail, each, match, buildObject } = useDocument(createStringReader(lines))
const structure = {
name: null,
version: null,
license: null,
exports: null,
scripts: null,
devDependencies: null,
author: null
}
await each(async () => {
if (match('name')) structure.name = tail().trim()
if (match('version')) structure.version = tail().trim()
if (match('license')) structure.license = tail().trim()
// FIXME: Order of operations causes other parts to break if this doesn't run first?!
if (match('exports')) structure.exports = await parseObjectKV(null, async () => {
const section = { import: null, require: null }
await each(() => {
if (match('import')) section.import = tail().trim()
if (match('require')) section.require = tail().trim()
if (section.import && section.require) return true
})
return section
})
if (match('scripts')) structure.scripts = await buildObject()
if (match('devDependencies')) structure.devDependencies = await buildObject()
if (match('author')) structure.author = await buildObject(['name', 'email', '#text'])
return structure.name &&
structure.version &&
structure.license &&
structure.exports &&
structure.scripts &&
structure.devDependencies &&
structure.author
})
return structure
}
export async function toArrays(lines) {
const { next, level, line } = useDocument(createStringReader(lines))
const levelTracker = []
function createScope(level, line) {
levelTracker.length = level
const scope = levelTracker[level] = [line, []]
return scope
}
createScope(0, 'root')
// Simple parser that produces canonical array structure for blocks.
while (true) {
// If next() returns true we've ended the document.
if (await next()) break;
// Determine parent for this scope.
const parent = levelTracker[level()]
// If there's no parent, skip this line.
if (!parent) continue
// Create new scope
const scope = createScope(level() + 1, line())
// Add current scope to parent.
parent[1].push(scope)
}
return levelTracker[0]
}

View File

@ -1,6 +1,6 @@
import { parse } from './core.js' import { parse, toArrays } from './core.js'
const linesFull = [ const linesArrays = [
`title Example`, `title Example`,
`options`, `options`,
` parameter1 30`, ` parameter1 30`,
@ -34,7 +34,7 @@ const linesFull = [
` 2` ` 2`
] ]
const linesSchema = [ const linesParse = [
`name @terrace/core`, `name @terrace/core`,
`version 0.0.1`, `version 0.0.1`,
`randomthing test`, `randomthing test`,
@ -71,6 +71,7 @@ const linesSchema = [
` email josh@thederf.com`, ` email josh@thederf.com`,
` `, ` `,
` Further comments below. As I will now demonstrate, there is no simple`, ` Further comments below. As I will now demonstrate, there is no simple`,
` even if embedded`,
` way of dealing with this problem.`, ` way of dealing with this problem.`,
] ]
@ -98,11 +99,11 @@ const schema = {
} }
async function main() { async function main() {
const resultFull = await parse(linesFull) const resultArrays = await toArrays(linesArrays)
// console.dir(resultFull, { depth: null }) // console.dir(resultArrays, { depth: null })
const resultSchema = await parse(linesSchema, schema) const resultParse = await parse(linesParse)
console.dir(resultSchema, { depth: null }) console.dir(resultParse, { depth: null })
} }
main() main()

View File

@ -1 +1 @@
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function a(l,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return a(l.clone(),u)},async next(){if(r.line=await l.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level<e?n:(n.push(((i=r.line)==null?void 0:i.slice(e))||""),t.content(e,n))},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.level<n?!1:t.seek(e,n)}};return t}exports.useDocument=a; "use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const u=require("./parser.cjs");function s(l,a=" "){let r=u.createLineData(null,a);const t={ended:!1,clone(){return s(l.clone(),a)},async next(){if(r.line=await l.next(),r.line===null)return!0;u.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level<e?n:(n.push(((i=r.line)==null?void 0:i.slice(e))||""),t.content(e,n))},match(e){return e===t.head()},async each(e){const n=t.line()!==void 0?t.level():-1;for(;!(await t.next()||t.level()<=n||await e()););},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.level<n?!1:t.seek(e,n)}};return t}exports.useDocument=s;

View File

@ -1,10 +1,10 @@
import { parseLine as f, createLineData as s } from "./parser.js"; import { parseLine as f, createLineData as s } from "./parser.js";
function d(l, i = " ") { function c(l, a = " ") {
let r = s(null, i); let r = s(null, a);
const t = { const n = {
ended: !1, ended: !1,
clone() { clone() {
return d(l.clone(), i); return c(l.clone(), a);
}, },
async next() { async next() {
if (r.line = await l.next(), r.line === null) if (r.line = await l.next(), r.line === null)
@ -12,7 +12,7 @@ function d(l, i = " ") {
f(r); f(r);
}, },
current() { current() {
return t; return n;
}, },
line() { line() {
var e; var e;
@ -29,16 +29,24 @@ function d(l, i = " ") {
level() { level() {
return r.level; return r.level;
}, },
async content(e = -1, n = []) { async content(e = -1, t = []) {
var u; var i;
return e === -1 && (e = r.level + 1), await t.next() || r.level < e ? n : (n.push(((u = r.line) == null ? void 0 : u.slice(e)) || ""), t.content(e, n)); return e === -1 && (e = r.level + 1), await n.next() || r.level < e ? t : (t.push(((i = r.line) == null ? void 0 : i.slice(e)) || ""), n.content(e, t));
}, },
async seek(e, n = -1) { match(e) {
return n === -1 && (n = r.level), await t.next() ? !1 : t.head() === e ? t : r.level < n ? !1 : t.seek(e, n); return e === n.head();
},
async each(e) {
const t = n.line() !== void 0 ? n.level() : -1;
for (; !(await n.next() || n.level() <= t || await e()); )
;
},
async seek(e, t = -1) {
return t === -1 && (t = r.level), await n.next() ? !1 : n.head() === e ? n : r.level < t ? !1 : n.seek(e, t);
} }
}; };
return t; return n;
} }
export { export {
d as useDocument c as useDocument
}; };

View File

@ -10,6 +10,9 @@ type Document = {
head: () => string, head: () => string,
tail: () => string, tail: () => string,
content: (contentLevel: number, lines: string[]) => Promise<string>, content: (contentLevel: number, lines: string[]) => Promise<string>,
match: (matchHead: string) => boolean,
each: (handler: Function) => void,
buildObject: (allowList: Array<string>, handler: Function) => object,
seek: (matchHead: string, contentLevel: number) => Promise<Document|false> seek: (matchHead: string, contentLevel: number) => Promise<Document|false>
} }
@ -61,6 +64,42 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
return document.content(contentLevel, lines) return document.content(contentLevel, lines)
}, },
match(matchHead: string): boolean {
return matchHead === document.head()
},
async each(handler: Function) {
// Set startLevel to -1 if we haven't started parsing the document yet.
// Otherwise we'll break to early, as the default value for doc.level() is 0.
const startLevel = document.line() !== undefined ? document.level() : -1
while(true) {
if (await document.next()) break
if (document.level() <= startLevel) break
if (await handler()) break
}
},
async buildObject(allowList = [], valHandler?: () => any) {
if (!valHandler) valHandler = () => document.tail().trim()
const obj = {}
await document.each(async () => {
if (!document.head()) return
if (!allowList.length || allowList.includes(document.head())) {
obj[document.head()] = await valHandler()
return
}
// Parse unspecified text into an array of lines and save to #text key.
if (allowList && !allowList.includes(document.head()) && allowList.includes('#text')) {
obj['#text'] = [document.line(), ...await document.content(document.level())]
return
}
})
return obj
},
async seek (matchHead: string, contentLevel = -1): Promise<Document|false> { async seek (matchHead: string, contentLevel = -1): Promise<Document|false> {
if (contentLevel === -1) contentLevel = lineData.level if (contentLevel === -1) contentLevel = lineData.level