Reworked parser to be macro based.

This commit is contained in:
Joshua Bemenderfer 2022-11-15 17:10:17 -05:00
parent 4405f4857d
commit 296056ce16
3 changed files with 188 additions and 266 deletions

View File

@ -1,166 +1,135 @@
import { createLineData, useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export const SYMBOLS = {
TAIL: Symbol('TAIL'),
UNMATCHED: Symbol('UNMATCHED')
}
export const BASE_MACROS = {
string({ tail }) {
return tail.toString()
},
number({ tail }) {
const num = +tail
if (isNaN(num) || tail === '') return
return num
},
primitive({ macros }) {
const num = macros.number(args)
return num !== undefined ? num : macros.string(args)
},
any(args) {
const macro = args.macros[args.head]
if (macro) return macro(args)
const numResult = args.macros.number(args)
if (numResult !== undefined) return numResult
args.tail = args.line
return args.macros.string(args)
},
scope({ addScope, head, tail, line }, definition) {
return addScope({ definition, head, tail, line })
}
}
export function getTail(key, macro) {
return args => {
const scope = args.scope
const result = macro(args)
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function getText(key, macro) {
return (args) => {
const scope = args.scope
const result = macro({...args, tail: args.line })
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function getUnmatched(macro) {
return (args) => {
const key = args.head
const scope = args.scope
const result = macro(args)
if (result === undefined) return
if (!scope[key]) scope[key] = []
scope[key].push(result)
}
}
export function isMacro (macro) {
return (args) => args.macros[macro](args)
}
export function isCollection (macro) {
return (args) => ({ [args.head]: macro(args) })
}
export function isScope (definition) {
return (args) => args.macros.scope(args, definition)
}
export async function parse(lines, schema) {
const typeHandlers = {
string (doc) {
return doc.tail().slice(1)
},
number (doc) {
return +doc.tail().slice(1)
},
object (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const object = {}
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
if (key === 'any') key = null
addHandler(handlers, level, key, child, (key, value) => {
if (child.collate === 'array') {
if (!object[key]) object[key] = []
object[key].push(value)
return
} else if (child.collate === 'collection') {
if (!object[key]) object[key] = []
object[key].push({ [key]: value })
return
} else {
object[key] = value
}
})
}
if (definition.tail) {
const tailKey = definition.tail
const tailType = definition.values[tailKey]
object[tailKey] = typeHandlers[tailType](doc, handlers)
}
if (definition.text) {
const textKey = definition.text
const textType = 'string'
object[textKey] = ''
addHandler(handlers, level, '', textType, (key, value) => {
object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
})
}
return object
},
array (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const array = []
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
addHandler(handlers, level, key, child, (key, value) => {
array.push(value)
})
}
// if (definition.tail) {
// const tailKey = definition.tail
// const tailType = definition.values[tailKey]
// object[tailKey] = typeHandlers[tailType](doc, handlers)
// }
// if (definition.text) {
// const textKey = definition.text
// const textType = definition.values[textKey]
// object[textKey] = ''
// addHandler(handlers, level, '', textType, (key, value) => {
// object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
// })
// }
return array
},
collection (doc, handlers, definition, level) {
level = level != null ? level : doc.level() + 1
definition = lookup(definition)
const collection = []
for (let [key, child] of Object.entries(definition.values)) {
child = lookup(child)
addHandler(handlers, level, key, child, (key, value) => {
collection.push({ [key]: value })
})
}
// if (definition.tail) {
// const tailKey = definition.tail
// const tailType = definition.values[tailKey]
// object[tailKey] = typeHandlers[tailType](doc, handlers)
// }
// if (definition.text) {
// const textKey = definition.text
// const textType = definition.values[textKey]
// object[textKey] = ''
// addHandler(handlers, level, '', textType, (key, value) => {
// object[textKey] += object[textKey] ? `\n${doc.line()}` : doc.line()
// })
// }
return collection
},
}
function lookup (definition) {
const type = typeof definition === 'string' ? definition : definition.type
const resolvedDefinition = schema.types[type] || definition
return typeof definition === 'string' ? resolvedDefinition : Object.assign({}, definition, resolvedDefinition)
}
function registerTypes(typeHandlers, types) {
for (const [key, definition] of Object.entries(types)) {
const existingType = typeof definition === 'string' ? definition : definition.type
typeHandlers[key] = typeHandlers[existingType]
}
}
function addHandler(handlers, level, key, definition, resolve) {
const type = typeof definition === 'string' ? definition : definition.type
if (!handlers[level]) handlers[level] = []
handlers[level].push({
key,
resolve,
definition,
handler: typeHandlers[type]
})
}
const doc = useDocument(createStringReader(lines))
const handlers = []
registerTypes(typeHandlers, schema.types)
const macros = schema.macros
const handlers = [schema.root]
const scopes = [{}]
const root = typeHandlers.object(doc, handlers, schema.root, 0)
function addScope({ head, tail, line, definition }) {
const scope = {}
if (definition[SYMBOLS.TAIL]) {
const result = definition[SYMBOLS.TAIL]({ macros, scope, head, tail, line, addScope })
Object.assign(scope, result)
}
scopes[scopes.length] = scope
handlers[handlers.length] = definition
return scope
}
let ended = false
while (!ended) {
ended = await doc.next()
if (ended) break;
if (doc.line() === '') continue;
const level = doc.level()
handlers.length = level + 1
scopes.length = level + 1
const unmatchedHandler = handlers[level]?.find(h => h.key === '')
let matched = false
for (const { key, definition, resolve, handler } of handlers[level] || []) {
if (key && doc.head() !== key) continue;
if (!doc.line()) continue;
resolve(doc.head(), handler(doc, handlers, definition))
matched = true
break
const line = doc.line()
const head = doc.head()
const tail = doc.tail().slice(1)
const options = handlers[level]
const scope = scopes[level]
const matches = {
head: options?.[doc.head()],
unmatched: options?.[SYMBOLS.UNMATCHED]
}
if (!matched && unmatchedHandler) {
const { resolve, definition, handler } = unmatchedHandler
resolve(doc.head(), handler(doc, handlers, definition))
if (matches.head) {
const result = matches.head({macros, scope, head, tail, line, addScope })
if (!scope[head]) scope[head] = []
scope[head].push(result)
continue
}
if (matches.unmatched) {
const result = matches.unmatched({ macros, scope, head, tail, line, addScope })
Object.assign(scope, result)
}
}
return root
return scopes[0]
}

View File

@ -1,83 +1,80 @@
import { parse } from './core.js'
import { SYMBOLS, parse, isScope, isMacro, getTail, getText, getUnmatched, isCollection } from './core.js'
const schemaTCE = `
types
section object, text as content, tail as pos
content string
pos number
macros
primitive match number string
section
pos number tail
content string unmatched
position number
options object
options
parameter1 number
parameter2 string
literal unmatched string
unmatched primitive
root object
root
title string
options options
options2 options
options options collection
subsection section
collate collection
list array
- string
collection collection
section section
list
- string array
collection
section collection
collection2
unmatched collection
`
const schema = {
types: {
section: {
type: 'object',
text: 'content',
tail: 'pos',
values: {
content: 'string',
pos: 'number',
position: {
type: 'number'
}
}
macros: {
string({ tail }) {
return tail.toString()
},
options: {
type: 'object',
values: {
parameter1: 'number',
parameter2: 'string'
}
number({ tail }) {
const num = +tail
if (isNaN(num) || tail === '') return
return num
},
primitive({ macros }) {
const num = macros.number(args)
return num !== undefined ? num : macros.string(args)
},
scope({ addScope, head, tail, line }, definition) {
return addScope({ definition, head, tail, line })
},
section: isScope({
[SYMBOLS.TAIL]: getTail('pos', isMacro('number')),
[SYMBOLS.UNMATCHED]: getText('content', isMacro('string')),
position: isMacro('number')
}),
options: isScope({
parameter1: isMacro('number'),
parameter2: isMacro('string'),
unmatched: isMacro('string'),
[SYMBOLS.UNMATCHED]: getUnmatched(isMacro('any')),
}),
any(args) {
const macro = args.macros[args.head]
if (macro) return macro(args)
const numResult = args.macros.number(args)
if (numResult !== undefined) return numResult
args.tail = args.line
return args.macros.string(args)
},
advList: {
type: 'array',
values: {
section: 'section'
}
}
},
root: {
type: 'object',
values: {
title: 'string',
options: 'options',
options2: 'options',
subsection: {
type: 'section',
// Allows a particular repeated key to collect itself under a multi-value root as an array.
collate: 'collection'
},
list: {
// Defines an array where all entries must conform to specific types. No other keys are permitted.
// Ex: [ value, value ]
type: 'array',
values: {
'-': 'string'
}
},
collection: {
// Defines an array where all entries must conform to specific types. They will be segregated by key. No other keys are permitted.
// Ex: [ {key: value }, { key: value }]
type: 'collection',
values: {
section: 'section'
}
}
}
title: isMacro('string'),
options: isMacro('options'),
subsection: isMacro('section'),
list: isScope({
'-': isMacro('string')
}),
collection: isScope({
section: isCollection(isMacro('section'))
}),
collection2: isScope({
[SYMBOLS.UNMATCHED]: getUnmatched(isMacro('any'))
})
}
}
@ -87,7 +84,7 @@ const lines = [
`options`,
` parameter1 30`,
` parameter2 Enim eu id anim minim reprehenderit nostrud eu amet deserunt ea ut do cupidatat ea.`,
`options2`,
`options`,
` parameter1 0`,
` parameter2 Esse incididunt et est adipisicing eiusmod aliqua enim ea aliqua id enim.`,
`subsection`,
@ -104,11 +101,18 @@ const lines = [
` section`,
` lorem ipsum 1`,
` section`,
` lorem ipsum 2`
` lorem ipsum 2`,
`collection2`,
` section`,
` position 3`,
` Laborum aute anim occaecat occaecat pariatur tempor proident magna sit magna non non.`,
` list`,
` 1`,
` 2`
]
async function main() {
console.log(await parse(lines, schema))
console.dir(await parse(lines, schema), { depth: null })
}
main()

View File

@ -1,51 +0,0 @@
import { parse } from './core.js'
const schemaTCE = `
types
primitive object, tail as type
any primitive
root object
types object, optional
any primitive
root object
any primitive
`
const schema = {
types: {
primitive: {
type: 'object',
tail: 'type',
values: {
type: 'string',
any: 'primitive'
}
},
},
root: {
type: 'object',
values: {
types: {
type: 'object',
optional: true,
values: {
any: 'primitive'
}
},
root: {
type: 'object',
values: {
any: 'primitive'
}
}
},
}
}
async function main() {
console.dir(await parse(schemaTCE.split('\n'), schema), { depth: null })
}
main()