You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
509 lines
15 KiB
509 lines
15 KiB
1 month ago
|
/*
|
||
|
Copyright (c) 2007-2010 Alessandro Warth <awarth@cs.ucla.edu>
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person
|
||
|
obtaining a copy of this software and associated documentation
|
||
|
files (the "Software"), to deal in the Software without
|
||
|
restriction, including without limitation the rights to use,
|
||
|
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
copies of the Software, and to permit persons to whom the
|
||
|
Software is furnished to do so, subject to the following
|
||
|
conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be
|
||
|
included in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||
|
OTHER DEALINGS IN THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
new syntax:
|
||
|
#foo and `foo match the string object 'foo' (it's also accepted in my JS)
|
||
|
'abc' match the string object 'abc'
|
||
|
'c' match the string object 'c'
|
||
|
``abc'' match the sequence of string objects 'a', 'b', 'c'
|
||
|
"abc" token('abc')
|
||
|
[1 2 3] match the array object [1, 2, 3]
|
||
|
foo(bar) apply rule foo with argument bar
|
||
|
-> ... semantic actions written in JS (see OMetaParser's atomicHostExpr rule)
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
ometa M {
|
||
|
number = number:n digit:d -> { n * 10 + d.digitValue() }
|
||
|
| digit:d -> { d.digitValue() }
|
||
|
}
|
||
|
|
||
|
translates to...
|
||
|
|
||
|
M = objectThatDelegatesTo(OMeta, {
|
||
|
number: function() {
|
||
|
return this._or(function() {
|
||
|
var n = this._apply("number"),
|
||
|
d = this._apply("digit")
|
||
|
return n * 10 + d.digitValue()
|
||
|
},
|
||
|
function() {
|
||
|
var d = this._apply("digit")
|
||
|
return d.digitValue()
|
||
|
}
|
||
|
)
|
||
|
}
|
||
|
})
|
||
|
M.matchAll("123456789", "number")
|
||
|
*/
|
||
|
|
||
|
// the failure exception
|
||
|
|
||
|
fail = { toString: function() { return "match failed" } }
|
||
|
|
||
|
// streams and memoization
|
||
|
|
||
|
function OMInputStream(hd, tl) {
|
||
|
this.memo = { }
|
||
|
this.lst = tl.lst
|
||
|
this.idx = tl.idx
|
||
|
this.hd = hd
|
||
|
this.tl = tl
|
||
|
}
|
||
|
OMInputStream.prototype.head = function() { return this.hd }
|
||
|
OMInputStream.prototype.tail = function() { return this.tl }
|
||
|
OMInputStream.prototype.type = function() { return this.lst.constructor }
|
||
|
OMInputStream.prototype.upTo = function(that) {
|
||
|
var r = [], curr = this
|
||
|
while (curr != that) {
|
||
|
r.push(curr.head())
|
||
|
curr = curr.tail()
|
||
|
}
|
||
|
return this.type() == String ? r.join('') : r
|
||
|
}
|
||
|
|
||
|
function OMInputStreamEnd(lst, idx) {
|
||
|
this.memo = { }
|
||
|
this.lst = lst
|
||
|
this.idx = idx
|
||
|
}
|
||
|
OMInputStreamEnd.prototype = objectThatDelegatesTo(OMInputStream.prototype)
|
||
|
OMInputStreamEnd.prototype.head = function() { throw fail }
|
||
|
OMInputStreamEnd.prototype.tail = function() { throw fail }
|
||
|
|
||
|
// This is necessary b/c in IE, you can't say "foo"[idx]
|
||
|
Array.prototype.at = function(idx) { return this[idx] }
|
||
|
String.prototype.at = String.prototype.charAt
|
||
|
|
||
|
function ListOMInputStream(lst, idx) {
|
||
|
this.memo = { }
|
||
|
this.lst = lst
|
||
|
this.idx = idx
|
||
|
this.hd = lst.at(idx)
|
||
|
}
|
||
|
ListOMInputStream.prototype = objectThatDelegatesTo(OMInputStream.prototype)
|
||
|
ListOMInputStream.prototype.head = function() { return this.hd }
|
||
|
ListOMInputStream.prototype.tail = function() { return this.tl || (this.tl = makeListOMInputStream(this.lst, this.idx + 1)) }
|
||
|
|
||
|
function makeListOMInputStream(lst, idx) { return new (idx < lst.length ? ListOMInputStream : OMInputStreamEnd)(lst, idx) }
|
||
|
|
||
|
Array.prototype.toOMInputStream = function() { return makeListOMInputStream(this, 0) }
|
||
|
String.prototype.toOMInputStream = function() { return makeListOMInputStream(this, 0) }
|
||
|
|
||
|
function makeOMInputStreamProxy(target) {
|
||
|
return objectThatDelegatesTo(target, {
|
||
|
memo: { },
|
||
|
target: target,
|
||
|
tail: function() { return makeOMInputStreamProxy(target.tail()) }
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// Failer (i.e., that which makes things fail) is used to detect (direct) left recursion and memoize failures
|
||
|
|
||
|
function Failer() { }
|
||
|
Failer.prototype.used = false
|
||
|
|
||
|
// the OMeta "class" and basic functionality
|
||
|
|
||
|
OMeta = {
|
||
|
_apply: function(rule) {
|
||
|
var memoRec = this.input.memo[rule]
|
||
|
if (memoRec == undefined) {
|
||
|
var origInput = this.input,
|
||
|
failer = new Failer()
|
||
|
if (this[rule] === undefined)
|
||
|
throw 'tried to apply undefined rule "' + rule + '"'
|
||
|
this.input.memo[rule] = failer
|
||
|
this.input.memo[rule] = memoRec = {ans: this[rule].call(this), nextInput: this.input}
|
||
|
if (failer.used) {
|
||
|
var sentinel = this.input
|
||
|
while (true) {
|
||
|
try {
|
||
|
this.input = origInput
|
||
|
var ans = this[rule].call(this)
|
||
|
if (this.input == sentinel)
|
||
|
throw fail
|
||
|
memoRec.ans = ans
|
||
|
memoRec.nextInput = this.input
|
||
|
}
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else if (memoRec instanceof Failer) {
|
||
|
memoRec.used = true
|
||
|
throw fail
|
||
|
}
|
||
|
this.input = memoRec.nextInput
|
||
|
return memoRec.ans
|
||
|
},
|
||
|
|
||
|
// note: _applyWithArgs and _superApplyWithArgs are not memoized, so they can't be left-recursive
|
||
|
_applyWithArgs: function(rule) {
|
||
|
for (var idx = arguments.length - 1; idx > 0; idx--)
|
||
|
this._prependInput(arguments[idx])
|
||
|
return this[rule].call(this)
|
||
|
},
|
||
|
_superApplyWithArgs: function(recv, rule) {
|
||
|
for (var idx = arguments.length - 1; idx > 1; idx--)
|
||
|
recv._prependInput(arguments[idx])
|
||
|
return this[rule].call(recv)
|
||
|
},
|
||
|
_prependInput: function(v) {
|
||
|
this.input = new OMInputStream(v, this.input)
|
||
|
},
|
||
|
|
||
|
// if you want your grammar (and its subgrammars) to memoize parameterized rules, invoke this method on it:
|
||
|
memoizeParameterizedRules: function() {
|
||
|
this._prependInput = function(v) {
|
||
|
var newInput
|
||
|
if (isImmutable(v)) {
|
||
|
newInput = this.input[getTag(v)]
|
||
|
if (!newInput) {
|
||
|
newInput = new OMInputStream(v, this.input)
|
||
|
this.input[getTag(v)] = newInput
|
||
|
}
|
||
|
}
|
||
|
else newInput = new OMInputStream(v, this.input)
|
||
|
this.input = newInput
|
||
|
}
|
||
|
this._applyWithArgs = function(rule) {
|
||
|
for (var idx = arguments.length - 1; idx > 0; idx--)
|
||
|
this._prependInput(arguments[idx])
|
||
|
return this._apply(rule)
|
||
|
}
|
||
|
},
|
||
|
|
||
|
_pred: function(b) {
|
||
|
if (b)
|
||
|
return true
|
||
|
throw fail
|
||
|
},
|
||
|
_not: function(x) {
|
||
|
var origInput = this.input
|
||
|
try { x.call(this) }
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
this.input = origInput
|
||
|
return true
|
||
|
}
|
||
|
throw fail
|
||
|
},
|
||
|
_lookahead: function(x) {
|
||
|
var origInput = this.input,
|
||
|
r = x.call(this)
|
||
|
this.input = origInput
|
||
|
return r
|
||
|
},
|
||
|
_or: function() {
|
||
|
var origInput = this.input
|
||
|
for (var idx = 0; idx < arguments.length; idx++)
|
||
|
try { this.input = origInput; return arguments[idx].call(this) }
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
}
|
||
|
throw fail
|
||
|
},
|
||
|
_xor: function(ruleName) {
|
||
|
var origInput = this.input, idx = 1, newInput, ans
|
||
|
while (idx < arguments.length) {
|
||
|
try {
|
||
|
this.input = origInput
|
||
|
ans = arguments[idx].call(this)
|
||
|
if (newInput)
|
||
|
throw 'more than one choice matched by "exclusive-OR" in ' + ruleName
|
||
|
newInput = this.input
|
||
|
}
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
}
|
||
|
idx++
|
||
|
}
|
||
|
if (newInput) {
|
||
|
this.input = newInput
|
||
|
return ans
|
||
|
}
|
||
|
else
|
||
|
throw fail
|
||
|
},
|
||
|
disableXORs: function() {
|
||
|
this._xor = function(ruleName) {
|
||
|
var origInput = this.input
|
||
|
for (var idx = 1; idx < arguments.length; idx++)
|
||
|
try { this.input = origInput; return arguments[idx].call(this) }
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
}
|
||
|
throw fail
|
||
|
}
|
||
|
},
|
||
|
_opt: function(x) {
|
||
|
var origInput = this.input, ans
|
||
|
try { ans = x.call(this) }
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
this.input = origInput
|
||
|
}
|
||
|
return ans
|
||
|
},
|
||
|
_many: function(x) {
|
||
|
var ans = arguments[1] != undefined ? [arguments[1]] : []
|
||
|
while (true) {
|
||
|
var origInput = this.input
|
||
|
try { ans.push(x.call(this)) }
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
this.input = origInput
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
return ans
|
||
|
},
|
||
|
_many1: function(x) { return this._many(x, x.call(this)) },
|
||
|
_form: function(x) {
|
||
|
var v = this._apply("anything")
|
||
|
if (!isSequenceable(v))
|
||
|
throw fail
|
||
|
var origInput = this.input
|
||
|
this.input = v.toOMInputStream()
|
||
|
var r = x.call(this)
|
||
|
this._apply("end")
|
||
|
this.input = origInput
|
||
|
return v
|
||
|
},
|
||
|
_consumedBy: function(x) {
|
||
|
var origInput = this.input
|
||
|
x.call(this)
|
||
|
return origInput.upTo(this.input)
|
||
|
},
|
||
|
_idxConsumedBy: function(x) {
|
||
|
var origInput = this.input
|
||
|
x.call(this)
|
||
|
return {fromIdx: origInput.idx, toIdx: this.input.idx}
|
||
|
},
|
||
|
_interleave: function(mode1, part1, mode2, part2 /* ..., moden, partn */) {
|
||
|
var currInput = this.input, ans = []
|
||
|
for (var idx = 0; idx < arguments.length; idx += 2)
|
||
|
ans[idx / 2] = (arguments[idx] == "*" || arguments[idx] == "+") ? [] : undefined
|
||
|
while (true) {
|
||
|
var idx = 0, allDone = true
|
||
|
while (idx < arguments.length) {
|
||
|
if (arguments[idx] != "0")
|
||
|
try {
|
||
|
this.input = currInput
|
||
|
switch (arguments[idx]) {
|
||
|
case "*": ans[idx / 2].push(arguments[idx + 1].call(this)); break
|
||
|
case "+": ans[idx / 2].push(arguments[idx + 1].call(this)); arguments[idx] = "*"; break
|
||
|
case "?": ans[idx / 2] = arguments[idx + 1].call(this); arguments[idx] = "0"; break
|
||
|
case "1": ans[idx / 2] = arguments[idx + 1].call(this); arguments[idx] = "0"; break
|
||
|
default: throw "invalid mode '" + arguments[idx] + "' in OMeta._interleave"
|
||
|
}
|
||
|
currInput = this.input
|
||
|
break
|
||
|
}
|
||
|
catch (f) {
|
||
|
if (f != fail)
|
||
|
throw f
|
||
|
// if this (failed) part's mode is "1" or "+", we're not done yet
|
||
|
allDone = allDone && (arguments[idx] == "*" || arguments[idx] == "?")
|
||
|
}
|
||
|
idx += 2
|
||
|
}
|
||
|
if (idx == arguments.length) {
|
||
|
if (allDone)
|
||
|
return ans
|
||
|
else
|
||
|
throw fail
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
_currIdx: function() { return this.input.idx },
|
||
|
|
||
|
// some basic rules
|
||
|
anything: function() {
|
||
|
var r = this.input.head()
|
||
|
this.input = this.input.tail()
|
||
|
return r
|
||
|
},
|
||
|
end: function() {
|
||
|
return this._not(function() { return this._apply("anything") })
|
||
|
},
|
||
|
pos: function() {
|
||
|
return this.input.idx
|
||
|
},
|
||
|
empty: function() { return true },
|
||
|
apply: function() {
|
||
|
var r = this._apply("anything")
|
||
|
return this._apply(r)
|
||
|
},
|
||
|
foreign: function() {
|
||
|
var g = this._apply("anything"),
|
||
|
r = this._apply("anything"),
|
||
|
gi = objectThatDelegatesTo(g, {input: makeOMInputStreamProxy(this.input)})
|
||
|
gi.initialize();
|
||
|
var ans = gi._apply(r)
|
||
|
this.input = gi.input.target
|
||
|
return ans
|
||
|
},
|
||
|
|
||
|
// some useful "derived" rules
|
||
|
exactly: function() {
|
||
|
var wanted = this._apply("anything")
|
||
|
if (wanted === this._apply("anything"))
|
||
|
return wanted
|
||
|
throw fail
|
||
|
},
|
||
|
"true": function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(r === true)
|
||
|
return r
|
||
|
},
|
||
|
"false": function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(r === false)
|
||
|
return r
|
||
|
},
|
||
|
"undefined": function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(r === undefined)
|
||
|
return r
|
||
|
},
|
||
|
number: function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(typeof r === "number")
|
||
|
return r
|
||
|
},
|
||
|
string: function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(typeof r === "string")
|
||
|
return r
|
||
|
},
|
||
|
"char": function() {
|
||
|
var r = this._apply("anything")
|
||
|
this._pred(typeof r === "string" && r.length == 1)
|
||
|
return r
|
||
|
},
|
||
|
space: function() {
|
||
|
var r = this._apply("char")
|
||
|
this._pred(r.charCodeAt(0) <= 32)
|
||
|
return r
|
||
|
},
|
||
|
spaces: function() {
|
||
|
return this._many(function() { return this._apply("space") })
|
||
|
},
|
||
|
digit: function() {
|
||
|
var r = this._apply("char")
|
||
|
this._pred(r >= "0" && r <= "9")
|
||
|
return r
|
||
|
},
|
||
|
lower: function() {
|
||
|
var r = this._apply("char")
|
||
|
this._pred(r >= "a" && r <= "z")
|
||
|
return r
|
||
|
},
|
||
|
upper: function() {
|
||
|
var r = this._apply("char")
|
||
|
this._pred(r >= "A" && r <= "Z")
|
||
|
return r
|
||
|
},
|
||
|
letter: function() {
|
||
|
return this._or(function() { return this._apply("lower") },
|
||
|
function() { return this._apply("upper") })
|
||
|
},
|
||
|
letterOrDigit: function() {
|
||
|
return this._or(function() { return this._apply("letter") },
|
||
|
function() { return this._apply("digit") })
|
||
|
},
|
||
|
firstAndRest: function() {
|
||
|
var first = this._apply("anything"),
|
||
|
rest = this._apply("anything")
|
||
|
return this._many(function() { return this._apply(rest) }, this._apply(first))
|
||
|
},
|
||
|
seq: function() {
|
||
|
var xs = this._apply("anything")
|
||
|
for (var idx = 0; idx < xs.length; idx++)
|
||
|
this._applyWithArgs("exactly", xs.at(idx))
|
||
|
return xs
|
||
|
},
|
||
|
notLast: function() {
|
||
|
var rule = this._apply("anything"),
|
||
|
r = this._apply(rule)
|
||
|
this._lookahead(function() { return this._apply(rule) })
|
||
|
return r
|
||
|
},
|
||
|
|
||
|
initialize: function() { },
|
||
|
// match and matchAll are a grammar's "public interface"
|
||
|
_genericMatch: function(input, rule, args, matchFailed) {
|
||
|
if (args == undefined)
|
||
|
args = []
|
||
|
var realArgs = [rule]
|
||
|
for (var idx = 0; idx < args.length; idx++)
|
||
|
realArgs.push(args[idx])
|
||
|
var m = objectThatDelegatesTo(this, {input: input})
|
||
|
m.initialize()
|
||
|
try { return realArgs.length == 1 ? m._apply.call(m, realArgs[0]) : m._applyWithArgs.apply(m, realArgs) }
|
||
|
catch (f) {
|
||
|
if (f == fail && matchFailed != undefined) {
|
||
|
var input = m.input
|
||
|
if (input.idx != undefined) {
|
||
|
while (input.tl != undefined && input.tl.idx != undefined)
|
||
|
input = input.tl
|
||
|
input.idx--
|
||
|
}
|
||
|
return matchFailed(m, input.idx)
|
||
|
}
|
||
|
throw f
|
||
|
}
|
||
|
},
|
||
|
match: function(obj, rule, args, matchFailed) {
|
||
|
return this._genericMatch([obj].toOMInputStream(), rule, args, matchFailed)
|
||
|
},
|
||
|
matchAll: function(listyObj, rule, args, matchFailed) {
|
||
|
return this._genericMatch(listyObj.toOMInputStream(), rule, args, matchFailed)
|
||
|
},
|
||
|
createInstance: function() {
|
||
|
var m = objectThatDelegatesTo(this)
|
||
|
m.initialize()
|
||
|
m.matchAll = function(listyObj, aRule) {
|
||
|
m.input = listyObj.toOMInputStream()
|
||
|
return m._apply(aRule)
|
||
|
}
|
||
|
return m
|
||
|
}
|
||
|
}
|
||
|
|