Start on parser v4.

This commit is contained in:
Joshua Bemenderfer
2023-01-29 17:25:43 -05:00
parent ef3c59fb74
commit b87fdfbd83
6 changed files with 145 additions and 97 deletions

View File

@@ -1,77 +0,0 @@
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export async function parse(lines, rootSchema) {
const doc = useDocument(createStringReader(lines))
const levelTracker = []
function createScope(level, line, schema = null) {
levelTracker.length = level
const entry = levelTracker[level] = {}
entry.scope = [line, []]
if (schema) {
entry.schemas = Object.values(schema),
entry.matchers = Object.keys(schema),
entry.counts = Object.values(schema).map(e => e.count)
}
return entry.scope
}
createScope(0, 'root', rootSchema)
// Simpler parsing logic, don't need to worry about schemas, matchers, or counts if no schema is specified.
if (!rootSchema) {
while (true) {
// If doc.next() returns true we've ended the document.
if (await doc.next()) break;
const level = doc.level()
// Determine parent for this scope.
const parent = levelTracker[level].scope
// If there's no parent, skip this line.
if (!parent) continue
// Create new scope
const scope = createScope(level + 1, doc.line())
// Add current scope to parent.
parent[1].push(scope)
}
// Full parsing logic
} else {
while (true) {
// If doc.next() returns true we've ended the document.
if (await doc.next()) break;
const level = doc.level()
if (!levelTracker[level]) continue
// Determine parent for this scope.
const parent = levelTracker[level].scope
const schemas = levelTracker[level].schemas
const matchers = levelTracker[level].matchers
const counts = levelTracker[level].counts
// Match the head value, or '?' for unspecified lines.
const matchIndex = matchers.findIndex(entry => entry === doc.head() || entry === '?')
// Handle trailing blocks of text. TODO: Proper trailing.
if (matchIndex === -1 && matchers.includes('? literal')) {
parent[1].push(...(await doc.content(level)).map(e => [e]))
continue
} else if (matchIndex === -1) continue
// Return if the match has already been "used up"
if (counts[matchIndex] === 0) continue
// "use up" one more match
counts[matchIndex] -= 1
const scopeSchema = schemas[matchIndex]
// Create new scope
const scope = createScope(level + 1, doc.line(), scopeSchema?.children)
parent[1].push(scope)
}
}
return levelTracker[0].scope
}

View File

@@ -0,0 +1,77 @@
import { useDocument } from '@terrace/core'
import { createStringReader } from '@terrace/core/readers/js-string'
export async function parse(lines) {
const { tail, each, match, buildObject } = useDocument(createStringReader(lines))
const structure = {
name: null,
version: null,
license: null,
exports: null,
scripts: null,
devDependencies: null,
author: null
}
await each(async () => {
if (match('name')) structure.name = tail().trim()
if (match('version')) structure.version = tail().trim()
if (match('license')) structure.license = tail().trim()
// FIXME: Order of operations causes other parts to break if this doesn't run first?!
if (match('exports')) structure.exports = await parseObjectKV(null, async () => {
const section = { import: null, require: null }
await each(() => {
if (match('import')) section.import = tail().trim()
if (match('require')) section.require = tail().trim()
if (section.import && section.require) return true
})
return section
})
if (match('scripts')) structure.scripts = await buildObject()
if (match('devDependencies')) structure.devDependencies = await buildObject()
if (match('author')) structure.author = await buildObject(['name', 'email', '#text'])
return structure.name &&
structure.version &&
structure.license &&
structure.exports &&
structure.scripts &&
structure.devDependencies &&
structure.author
})
return structure
}
export async function toArrays(lines) {
const { next, level, line } = useDocument(createStringReader(lines))
const levelTracker = []
function createScope(level, line) {
levelTracker.length = level
const scope = levelTracker[level] = [line, []]
return scope
}
createScope(0, 'root')
// Simple parser that produces canonical array structure for blocks.
while (true) {
// If next() returns true we've ended the document.
if (await next()) break;
// Determine parent for this scope.
const parent = levelTracker[level()]
// If there's no parent, skip this line.
if (!parent) continue
// Create new scope
const scope = createScope(level() + 1, line())
// Add current scope to parent.
parent[1].push(scope)
}
return levelTracker[0]
}

View File

@@ -1,6 +1,6 @@
import { parse } from './core.js'
import { parse, toArrays } from './core.js'
const linesFull = [
const linesArrays = [
`title Example`,
`options`,
` parameter1 30`,
@@ -34,7 +34,7 @@ const linesFull = [
` 2`
]
const linesSchema = [
const linesParse = [
`name @terrace/core`,
`version 0.0.1`,
`randomthing test`,
@@ -71,6 +71,7 @@ const linesSchema = [
` email josh@thederf.com`,
` `,
` Further comments below. As I will now demonstrate, there is no simple`,
` even if embedded`,
` way of dealing with this problem.`,
]
@@ -98,11 +99,11 @@ const schema = {
}
async function main() {
const resultFull = await parse(linesFull)
// console.dir(resultFull, { depth: null })
const resultArrays = await toArrays(linesArrays)
// console.dir(resultArrays, { depth: null })
const resultSchema = await parse(linesSchema, schema)
console.dir(resultSchema, { depth: null })
const resultParse = await parse(linesParse)
console.dir(resultParse, { depth: null })
}
main()