Start on parser v4.
This commit is contained in:
parent
ef3c59fb74
commit
b87fdfbd83
@ -1,77 +0,0 @@
|
|||||||
import { useDocument } from '@terrace/core'
|
|
||||||
import { createStringReader } from '@terrace/core/readers/js-string'
|
|
||||||
|
|
||||||
export async function parse(lines, rootSchema) {
|
|
||||||
const doc = useDocument(createStringReader(lines))
|
|
||||||
|
|
||||||
const levelTracker = []
|
|
||||||
|
|
||||||
function createScope(level, line, schema = null) {
|
|
||||||
levelTracker.length = level
|
|
||||||
const entry = levelTracker[level] = {}
|
|
||||||
entry.scope = [line, []]
|
|
||||||
|
|
||||||
if (schema) {
|
|
||||||
entry.schemas = Object.values(schema),
|
|
||||||
entry.matchers = Object.keys(schema),
|
|
||||||
entry.counts = Object.values(schema).map(e => e.count)
|
|
||||||
}
|
|
||||||
|
|
||||||
return entry.scope
|
|
||||||
}
|
|
||||||
|
|
||||||
createScope(0, 'root', rootSchema)
|
|
||||||
|
|
||||||
// Simpler parsing logic, don't need to worry about schemas, matchers, or counts if no schema is specified.
|
|
||||||
if (!rootSchema) {
|
|
||||||
while (true) {
|
|
||||||
// If doc.next() returns true we've ended the document.
|
|
||||||
if (await doc.next()) break;
|
|
||||||
const level = doc.level()
|
|
||||||
// Determine parent for this scope.
|
|
||||||
const parent = levelTracker[level].scope
|
|
||||||
// If there's no parent, skip this line.
|
|
||||||
if (!parent) continue
|
|
||||||
|
|
||||||
// Create new scope
|
|
||||||
const scope = createScope(level + 1, doc.line())
|
|
||||||
// Add current scope to parent.
|
|
||||||
parent[1].push(scope)
|
|
||||||
}
|
|
||||||
// Full parsing logic
|
|
||||||
} else {
|
|
||||||
while (true) {
|
|
||||||
// If doc.next() returns true we've ended the document.
|
|
||||||
if (await doc.next()) break;
|
|
||||||
const level = doc.level()
|
|
||||||
if (!levelTracker[level]) continue
|
|
||||||
|
|
||||||
// Determine parent for this scope.
|
|
||||||
const parent = levelTracker[level].scope
|
|
||||||
const schemas = levelTracker[level].schemas
|
|
||||||
const matchers = levelTracker[level].matchers
|
|
||||||
const counts = levelTracker[level].counts
|
|
||||||
|
|
||||||
// Match the head value, or '?' for unspecified lines.
|
|
||||||
const matchIndex = matchers.findIndex(entry => entry === doc.head() || entry === '?')
|
|
||||||
|
|
||||||
// Handle trailing blocks of text. TODO: Proper trailing.
|
|
||||||
if (matchIndex === -1 && matchers.includes('? literal')) {
|
|
||||||
parent[1].push(...(await doc.content(level)).map(e => [e]))
|
|
||||||
continue
|
|
||||||
} else if (matchIndex === -1) continue
|
|
||||||
|
|
||||||
// Return if the match has already been "used up"
|
|
||||||
if (counts[matchIndex] === 0) continue
|
|
||||||
// "use up" one more match
|
|
||||||
counts[matchIndex] -= 1
|
|
||||||
|
|
||||||
const scopeSchema = schemas[matchIndex]
|
|
||||||
// Create new scope
|
|
||||||
const scope = createScope(level + 1, doc.line(), scopeSchema?.children)
|
|
||||||
parent[1].push(scope)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return levelTracker[0].scope
|
|
||||||
}
|
|
77
docs/experiments/parsers/v4/core.js
Normal file
77
docs/experiments/parsers/v4/core.js
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import { useDocument } from '@terrace/core'
|
||||||
|
import { createStringReader } from '@terrace/core/readers/js-string'
|
||||||
|
|
||||||
|
export async function parse(lines) {
|
||||||
|
const { tail, each, match, buildObject } = useDocument(createStringReader(lines))
|
||||||
|
|
||||||
|
const structure = {
|
||||||
|
name: null,
|
||||||
|
version: null,
|
||||||
|
license: null,
|
||||||
|
exports: null,
|
||||||
|
scripts: null,
|
||||||
|
devDependencies: null,
|
||||||
|
author: null
|
||||||
|
}
|
||||||
|
|
||||||
|
await each(async () => {
|
||||||
|
if (match('name')) structure.name = tail().trim()
|
||||||
|
if (match('version')) structure.version = tail().trim()
|
||||||
|
if (match('license')) structure.license = tail().trim()
|
||||||
|
// FIXME: Order of operations causes other parts to break if this doesn't run first?!
|
||||||
|
if (match('exports')) structure.exports = await parseObjectKV(null, async () => {
|
||||||
|
const section = { import: null, require: null }
|
||||||
|
|
||||||
|
await each(() => {
|
||||||
|
if (match('import')) section.import = tail().trim()
|
||||||
|
if (match('require')) section.require = tail().trim()
|
||||||
|
if (section.import && section.require) return true
|
||||||
|
})
|
||||||
|
|
||||||
|
return section
|
||||||
|
})
|
||||||
|
if (match('scripts')) structure.scripts = await buildObject()
|
||||||
|
if (match('devDependencies')) structure.devDependencies = await buildObject()
|
||||||
|
if (match('author')) structure.author = await buildObject(['name', 'email', '#text'])
|
||||||
|
|
||||||
|
return structure.name &&
|
||||||
|
structure.version &&
|
||||||
|
structure.license &&
|
||||||
|
structure.exports &&
|
||||||
|
structure.scripts &&
|
||||||
|
structure.devDependencies &&
|
||||||
|
structure.author
|
||||||
|
})
|
||||||
|
|
||||||
|
return structure
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function toArrays(lines) {
|
||||||
|
const { next, level, line } = useDocument(createStringReader(lines))
|
||||||
|
|
||||||
|
const levelTracker = []
|
||||||
|
|
||||||
|
function createScope(level, line) {
|
||||||
|
levelTracker.length = level
|
||||||
|
const scope = levelTracker[level] = [line, []]
|
||||||
|
return scope
|
||||||
|
}
|
||||||
|
createScope(0, 'root')
|
||||||
|
|
||||||
|
// Simple parser that produces canonical array structure for blocks.
|
||||||
|
while (true) {
|
||||||
|
// If next() returns true we've ended the document.
|
||||||
|
if (await next()) break;
|
||||||
|
// Determine parent for this scope.
|
||||||
|
const parent = levelTracker[level()]
|
||||||
|
// If there's no parent, skip this line.
|
||||||
|
if (!parent) continue
|
||||||
|
|
||||||
|
// Create new scope
|
||||||
|
const scope = createScope(level() + 1, line())
|
||||||
|
// Add current scope to parent.
|
||||||
|
parent[1].push(scope)
|
||||||
|
}
|
||||||
|
|
||||||
|
return levelTracker[0]
|
||||||
|
}
|
@ -1,6 +1,6 @@
|
|||||||
import { parse } from './core.js'
|
import { parse, toArrays } from './core.js'
|
||||||
|
|
||||||
const linesFull = [
|
const linesArrays = [
|
||||||
`title Example`,
|
`title Example`,
|
||||||
`options`,
|
`options`,
|
||||||
` parameter1 30`,
|
` parameter1 30`,
|
||||||
@ -34,7 +34,7 @@ const linesFull = [
|
|||||||
` 2`
|
` 2`
|
||||||
]
|
]
|
||||||
|
|
||||||
const linesSchema = [
|
const linesParse = [
|
||||||
`name @terrace/core`,
|
`name @terrace/core`,
|
||||||
`version 0.0.1`,
|
`version 0.0.1`,
|
||||||
`randomthing test`,
|
`randomthing test`,
|
||||||
@ -71,6 +71,7 @@ const linesSchema = [
|
|||||||
` email josh@thederf.com`,
|
` email josh@thederf.com`,
|
||||||
` `,
|
` `,
|
||||||
` Further comments below. As I will now demonstrate, there is no simple`,
|
` Further comments below. As I will now demonstrate, there is no simple`,
|
||||||
|
` even if embedded`,
|
||||||
` way of dealing with this problem.`,
|
` way of dealing with this problem.`,
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -98,11 +99,11 @@ const schema = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const resultFull = await parse(linesFull)
|
const resultArrays = await toArrays(linesArrays)
|
||||||
// console.dir(resultFull, { depth: null })
|
// console.dir(resultArrays, { depth: null })
|
||||||
|
|
||||||
const resultSchema = await parse(linesSchema, schema)
|
const resultParse = await parse(linesParse)
|
||||||
console.dir(resultSchema, { depth: null })
|
console.dir(resultParse, { depth: null })
|
||||||
}
|
}
|
||||||
|
|
||||||
main()
|
main()
|
2
packages/js/core/dist/document.cjs
vendored
2
packages/js/core/dist/document.cjs
vendored
@ -1 +1 @@
|
|||||||
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const s=require("./parser.cjs");function a(l,u=" "){let r=s.createLineData(null,u);const t={ended:!1,clone(){return a(l.clone(),u)},async next(){if(r.line=await l.next(),r.line===null)return!0;s.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level<e?n:(n.push(((i=r.line)==null?void 0:i.slice(e))||""),t.content(e,n))},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.level<n?!1:t.seek(e,n)}};return t}exports.useDocument=a;
|
"use strict";Object.defineProperties(exports,{__esModule:{value:!0},[Symbol.toStringTag]:{value:"Module"}});const u=require("./parser.cjs");function s(l,a=" "){let r=u.createLineData(null,a);const t={ended:!1,clone(){return s(l.clone(),a)},async next(){if(r.line=await l.next(),r.line===null)return!0;u.parseLine(r)},current(){return t},line(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead)},head(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetHead,r.offsetTail)},tail(){var e;return(e=r.line)==null?void 0:e.slice(r.offsetTail)},level(){return r.level},async content(e=-1,n=[]){var i;return e===-1&&(e=r.level+1),await t.next()||r.level<e?n:(n.push(((i=r.line)==null?void 0:i.slice(e))||""),t.content(e,n))},match(e){return e===t.head()},async each(e){const n=t.line()!==void 0?t.level():-1;for(;!(await t.next()||t.level()<=n||await e()););},async seek(e,n=-1){return n===-1&&(n=r.level),await t.next()?!1:t.head()===e?t:r.level<n?!1:t.seek(e,n)}};return t}exports.useDocument=s;
|
||||||
|
32
packages/js/core/dist/document.js
vendored
32
packages/js/core/dist/document.js
vendored
@ -1,10 +1,10 @@
|
|||||||
import { parseLine as f, createLineData as s } from "./parser.js";
|
import { parseLine as f, createLineData as s } from "./parser.js";
|
||||||
function d(l, i = " ") {
|
function c(l, a = " ") {
|
||||||
let r = s(null, i);
|
let r = s(null, a);
|
||||||
const t = {
|
const n = {
|
||||||
ended: !1,
|
ended: !1,
|
||||||
clone() {
|
clone() {
|
||||||
return d(l.clone(), i);
|
return c(l.clone(), a);
|
||||||
},
|
},
|
||||||
async next() {
|
async next() {
|
||||||
if (r.line = await l.next(), r.line === null)
|
if (r.line = await l.next(), r.line === null)
|
||||||
@ -12,7 +12,7 @@ function d(l, i = " ") {
|
|||||||
f(r);
|
f(r);
|
||||||
},
|
},
|
||||||
current() {
|
current() {
|
||||||
return t;
|
return n;
|
||||||
},
|
},
|
||||||
line() {
|
line() {
|
||||||
var e;
|
var e;
|
||||||
@ -29,16 +29,24 @@ function d(l, i = " ") {
|
|||||||
level() {
|
level() {
|
||||||
return r.level;
|
return r.level;
|
||||||
},
|
},
|
||||||
async content(e = -1, n = []) {
|
async content(e = -1, t = []) {
|
||||||
var u;
|
var i;
|
||||||
return e === -1 && (e = r.level + 1), await t.next() || r.level < e ? n : (n.push(((u = r.line) == null ? void 0 : u.slice(e)) || ""), t.content(e, n));
|
return e === -1 && (e = r.level + 1), await n.next() || r.level < e ? t : (t.push(((i = r.line) == null ? void 0 : i.slice(e)) || ""), n.content(e, t));
|
||||||
},
|
},
|
||||||
async seek(e, n = -1) {
|
match(e) {
|
||||||
return n === -1 && (n = r.level), await t.next() ? !1 : t.head() === e ? t : r.level < n ? !1 : t.seek(e, n);
|
return e === n.head();
|
||||||
|
},
|
||||||
|
async each(e) {
|
||||||
|
const t = n.line() !== void 0 ? n.level() : -1;
|
||||||
|
for (; !(await n.next() || n.level() <= t || await e()); )
|
||||||
|
;
|
||||||
|
},
|
||||||
|
async seek(e, t = -1) {
|
||||||
|
return t === -1 && (t = r.level), await n.next() ? !1 : n.head() === e ? n : r.level < t ? !1 : n.seek(e, t);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return t;
|
return n;
|
||||||
}
|
}
|
||||||
export {
|
export {
|
||||||
d as useDocument
|
c as useDocument
|
||||||
};
|
};
|
||||||
|
@ -10,6 +10,9 @@ type Document = {
|
|||||||
head: () => string,
|
head: () => string,
|
||||||
tail: () => string,
|
tail: () => string,
|
||||||
content: (contentLevel: number, lines: string[]) => Promise<string>,
|
content: (contentLevel: number, lines: string[]) => Promise<string>,
|
||||||
|
match: (matchHead: string) => boolean,
|
||||||
|
each: (handler: Function) => void,
|
||||||
|
buildObject: (allowList: Array<string>, handler: Function) => object,
|
||||||
seek: (matchHead: string, contentLevel: number) => Promise<Document|false>
|
seek: (matchHead: string, contentLevel: number) => Promise<Document|false>
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,6 +64,42 @@ export function useDocument (reader: Reader, indent: string = ' '): Document {
|
|||||||
return document.content(contentLevel, lines)
|
return document.content(contentLevel, lines)
|
||||||
},
|
},
|
||||||
|
|
||||||
|
match(matchHead: string): boolean {
|
||||||
|
return matchHead === document.head()
|
||||||
|
},
|
||||||
|
|
||||||
|
async each(handler: Function) {
|
||||||
|
// Set startLevel to -1 if we haven't started parsing the document yet.
|
||||||
|
// Otherwise we'll break to early, as the default value for doc.level() is 0.
|
||||||
|
const startLevel = document.line() !== undefined ? document.level() : -1
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
if (await document.next()) break
|
||||||
|
if (document.level() <= startLevel) break
|
||||||
|
if (await handler()) break
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async buildObject(allowList = [], valHandler?: () => any) {
|
||||||
|
if (!valHandler) valHandler = () => document.tail().trim()
|
||||||
|
|
||||||
|
const obj = {}
|
||||||
|
await document.each(async () => {
|
||||||
|
if (!document.head()) return
|
||||||
|
if (!allowList.length || allowList.includes(document.head())) {
|
||||||
|
obj[document.head()] = await valHandler()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse unspecified text into an array of lines and save to #text key.
|
||||||
|
if (allowList && !allowList.includes(document.head()) && allowList.includes('#text')) {
|
||||||
|
obj['#text'] = [document.line(), ...await document.content(document.level())]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return obj
|
||||||
|
},
|
||||||
|
|
||||||
async seek (matchHead: string, contentLevel = -1): Promise<Document|false> {
|
async seek (matchHead: string, contentLevel = -1): Promise<Document|false> {
|
||||||
if (contentLevel === -1) contentLevel = lineData.level
|
if (contentLevel === -1) contentLevel = lineData.level
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user