Reworked parser to be macro based.

This commit is contained in:
Joshua Bemenderfer
2022-11-15 17:10:17 -05:00
parent 4405f4857d
commit 296056ce16
3 changed files with 188 additions and 266 deletions

View File

@@ -1,166 +1,135 @@
import { createLineData, useDocument } from '@terrace/core' import { createLineData, useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string' import { createStringReader } from '@terrace/core/readers/js-string'
export const SYMBOLS = {
TAIL: Symbol('TAIL'),
UNMATCHED: Symbol('UNMATCHED')
}
export const BASE_MACROS = {
string({ tail }) {
return tail.toString()
},
number({ tail }) {
const num = +tail
if (isNaN(num) || tail === '') return
return num
},
primitive({ macros }) {
const num = macros.number(args)
return num !== undefined ? num : macros.string(args)
},
any(args) {
const macro = args.macros[args.head]
if (macro) return macro(args)
const numResult = args.macros.number(args)
if (numResult !== undefined) return numResult
args.tail = args.line
return args.macros.string(args)
},
scope({ addScope, head, tail, line }, definition) {
return addScope({ definition, head, tail, line })
}
}
export function getTail(key, macro) {
return args => {
const scope = args.scope
const result = macro(args)
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function getText(key, macro) {
return (args) => {
const scope = args.scope
const result = macro({...args, tail: args.line })
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function getUnmatched(macro) {
return (args) => {
const key = args.head
const scope = args.scope
const result = macro(args)
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function isMacro (macro) {
return (args) => args.macros[macro](args)
}
export function isCollection (macro) {
return (args) => ({ [args.head]: macro(args) })
}
export function isScope (definition) {
return (args) => args.macros.scope(args, definition)
}
export async function parse(lines, schema) { export async function parse(lines, schema) {
const typeHandlers = {
string (doc) {
return doc.tail().slice(1)
},
number (doc) {
return +doc.tail().slice(1)
},
object (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const object = {}
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
if (key === 'any') key = null
addHandler(handlers, level, key, child, (key, value) => {
if (child.collate === 'array') {
if (!object[key]) object[key] = []
object[key].push(value)
return
} else if (child.collate === 'collection') {
if (!object[key]) object[key] = []
object[key].push({ [key]: value })
return
} else {
object[key] = value
}
})
}
if (definition.tail) {
const tailKey = definition.tail
const tailType = definition.values[tailKey]
object[tailKey] = typeHandlers[tailType](doc, handlers)
}
if (definition.text) {
const textKey = definition.text
const textType = 'string'
object[textKey] = ''
addHandler(handlers, level, '', textType, (key, value) => {
object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
})
}
return object
},
array (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const array = []
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
addHandler(handlers, level, key, child, (key, value) => {
array.push(value)
})
}
// if (definition.tail) {
// const tailKey = definition.tail
// const tailType = definition.values[tailKey]
// object[tailKey] = typeHandlers[tailType](doc, handlers)
// }
// if (definition.text) {
// const textKey = definition.text
// const textType = definition.values[textKey]
// object[textKey] = ''
// addHandler(handlers, level, '', textType, (key, value) => {
// object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
// })
// }
return array
},
collection (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const collection = []
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
addHandler(handlers, level, key, child, (key, value) => {
collection.push({ [key]: value })
})
}
// if (definition.tail) {
// const tailKey = definition.tail
// const tailType = definition.values[tailKey]
// object[tailKey] = typeHandlers[tailType](doc, handlers)
// }
// if (definition.text) {
// const textKey = definition.text
// const textType = definition.values[textKey]
// object[textKey] = ''
// addHandler(handlers, level, '', textType, (key, value) => {
// object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
// })
// }
return collection
},
}
function lookup (definition) {
const type = typeof definition === 'string' ? definition : definition.type
const resolvedDefinition = schema.types[type] || definition
return typeof definition === 'string' ? resolvedDefinition : Object.assign({}, definition, resolvedDefinition)
}
function registerTypes(typeHandlers, types) {
for (const [key, definition] of Object.entries(types)) {
const existingType = typeof definition === 'string' ? definition : definition.type
typeHandlers[key] = typeHandlers[existingType]
}
}
function addHandler(handlers, level, key, definition, resolve) {
const type = typeof definition === 'string' ? definition : definition.type
if (!handlers[level]) handlers[level] = []
handlers[level].push({
key,
resolve,
definition,
handler: typeHandlers[type]
})
}
const doc = useDocument(createStringReader(lines)) const doc = useDocument(createStringReader(lines))
const handlers = []
registerTypes(typeHandlers, schema.types) const macros = schema.macros
const handlers = [schema.root]
const scopes = [{}]
const root = typeHandlers.object(doc, handlers, schema.root, 0) function addScope({ head, tail, line, definition }) {
const scope = {}
if (definition[SYMBOLS.TAIL]) {
const result = definition[SYMBOLS.TAIL]({ macros, scope, head, tail, line, addScope })
Object.assign(scope, result)
}
scopes[scopes.length] = scope
handlers[handlers.length] = definition
return scope
}
let ended = false let ended = false
while (!ended) { while (!ended) {
ended = await doc.next() ended = await doc.next()
if (ended) break; if (ended) break;
if (doc.line() === '') continue;
const level = doc.level() const level = doc.level()
handlers.length = level + 1 handlers.length = level + 1
scopes.length = level + 1
const unmatchedHandler = handlers[level]?.find(h => h.key === '') const line = doc.line()
let matched = false const head = doc.head()
for (const { key, definition, resolve, handler } of handlers[level] || []) { const tail = doc.tail().slice(1)
if (key && doc.head() !== key) continue;
if (!doc.line()) continue; const options = handlers[level]
resolve(doc.head(), handler(doc, handlers, definition)) const scope = scopes[level]
matched = true
break const matches = {
head: options?.[doc.head()],
unmatched: options?.[SYMBOLS.UNMATCHED]
} }
if (!matched && unmatchedHandler) {
const { resolve, definition, handler } = unmatchedHandler if (matches.head) {
resolve(doc.head(), handler(doc, handlers, definition)) const result = matches.head({macros, scope, head, tail, line, addScope })
if (!scope[head]) scope[head] = []
scope[head].push(result)
continue
}
if (matches.unmatched) {
const result = matches.unmatched({ macros, scope, head, tail, line, addScope })
Object.assign(scope, result)
} }
} }
return root return scopes[0]
} }

View File

@@ -1,83 +1,80 @@
import { parse } from './core.js' import { SYMBOLS, parse, isScope, isMacro, getTail, getText, getUnmatched, isCollection } from './core.js'
const schemaTCE = ` const schemaTCE = `
types macros
section object, text as content, tail as pos primitive match number string
content string section
pos number pos number tail
content string unmatched
position number position number
options object options
parameter1 number parameter1 number
parameter2 string parameter2 string
literal unmatched string
unmatched primitive
root object root
title string title string
options options options options collection
options2 options
subsection section subsection section
collate collection list
list array - string array
- string collection
collection collection section collection
section section collection2
unmatched collection
` `
const schema = { const schema = {
types: { macros: {
section: { string({ tail }) {
type: 'object', return tail.toString()
text: 'content',
tail: 'pos',
values: {
content: 'string',
pos: 'number',
position: {
type: 'number'
}
}
}, },
options: { number({ tail }) {
type: 'object', const num = +tail
values: { if (isNaN(num) || tail === '') return
parameter1: 'number', return num
parameter2: 'string' },
} primitive({ macros }) {
const num = macros.number(args)
return num !== undefined ? num : macros.string(args)
},
scope({ addScope, head, tail, line }, definition) {
return addScope({ definition, head, tail, line })
},
section: isScope({
[SYMBOLS.TAIL]: getTail('pos', isMacro('number')),
[SYMBOLS.UNMATCHED]: getText('content', isMacro('string')),
position: isMacro('number')
}),
options: isScope({
parameter1: isMacro('number'),
parameter2: isMacro('string'),
unmatched: isMacro('string'),
[SYMBOLS.UNMATCHED]: getUnmatched(isMacro('any')),
}),
any(args) {
const macro = args.macros[args.head]
if (macro) return macro(args)
const numResult = args.macros.number(args)
if (numResult !== undefined) return numResult
args.tail = args.line
return args.macros.string(args)
}, },
advList: {
type: 'array',
values: {
section: 'section'
}
}
}, },
root: { root: {
type: 'object', title: isMacro('string'),
values: { options: isMacro('options'),
title: 'string', subsection: isMacro('section'),
options: 'options', list: isScope({
options2: 'options', '-': isMacro('string')
subsection: { }),
type: 'section', collection: isScope({
// Allows a particular repeated key to collect itself under a multi-value root as an array. section: isCollection(isMacro('section'))
collate: 'collection' }),
}, collection2: isScope({
list: { [SYMBOLS.UNMATCHED]: getUnmatched(isMacro('any'))
// Defines an array where all entries must conform to specific types. No other keys are permitted. })
// Ex: [ value, value ]
type: 'array',
values: {
'-': 'string'
}
},
collection: {
// Defines an array where all entries must conform to specific types. They will be segregated by key. No other keys are permitted.
// Ex: [ {key: value }, { key: value }]
type: 'collection',
values: {
section: 'section'
}
}
}
} }
} }
@@ -87,7 +84,7 @@ const lines = [
`options`, `options`,
` parameter1 30`, ` parameter1 30`,
` parameter2 Enim eu id anim minim reprehenderit nostrud eu amet deserunt ea ut do cupidatat ea.`, ` parameter2 Enim eu id anim minim reprehenderit nostrud eu amet deserunt ea ut do cupidatat ea.`,
`options2`, `options`,
` parameter1 0`, ` parameter1 0`,
` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`, ` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`,
`subsection`, `subsection`,
@@ -104,11 +101,18 @@ const lines = [
` section`, ` section`,
` lorem ipsum 1`, ` lorem ipsum 1`,
` section`, ` section`,
` lorem ipsum 2` ` lorem ipsum 2`,
`collection2`,
` section`,
` position 3`,
` Laborum aute anim occaecat occaecat pariatur tempor proident magna sit magna non non.`,
` list`,
` 1`,
` 2`
] ]
async function main() { async function main() {
console.log(await parse(lines, schema)) console.dir(await parse(lines, schema), { depth: null })
} }
main() main()

View File

@@ -1,51 +0,0 @@
import { parse } from './core.js'
const schemaTCE = `
types
primitive object, tail as type
any primitive
root object
types object, optional
any primitive
root object
any primitive
`
const schema = {
types: {
primitive: {
type: 'object',
tail: 'type',
values: {
type: 'string',
any: 'primitive'
}
},
},
root: {
type: 'object',
values: {
types: {
type: 'object',
optional: true,
values: {
any: 'primitive'
}
},
root: {
type: 'object',
values: {
any: 'primitive'
}
}
},
}
}
async function main() {
console.dir(await parse(schemaTCE.split('\n'), schema), { depth: null })
}
main()