You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
592 lines
22 KiB
592 lines
22 KiB
"use strict"; |
|
// parse a single path portion |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.AST = void 0; |
|
const brace_expressions_js_1 = require("./brace-expressions.js"); |
|
const unescape_js_1 = require("./unescape.js"); |
|
const types = new Set(['!', '?', '+', '*', '@']); |
|
const isExtglobType = (c) => types.has(c); |
|
// Patterns that get prepended to bind to the start of either the |
|
// entire string, or just a single path portion, to prevent dots |
|
// and/or traversal patterns, when needed. |
|
// Exts don't need the ^ or / bit, because the root binds that already. |
|
const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))'; |
|
const startNoDot = '(?!\\.)'; |
|
// characters that indicate a start of pattern needs the "no dots" bit, |
|
// because a dot *might* be matched. ( is not in the list, because in |
|
// the case of a child extglob, it will handle the prevention itself. |
|
const addPatternStart = new Set(['[', '.']); |
|
// cases where traversal is A-OK, no dot prevention needed |
|
const justDots = new Set(['..', '.']); |
|
const reSpecials = new Set('().*{}+?[]^$\\!'); |
|
const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); |
|
// any single thing other than / |
|
const qmark = '[^/]'; |
|
// * => any number of characters |
|
const star = qmark + '*?'; |
|
// use + when we need to ensure that *something* matches, because the * is |
|
// the only thing in the path portion. |
|
const starNoEmpty = qmark + '+?'; |
|
// remove the \ chars that we added if we end up doing a nonmagic compare |
|
// const deslash = (s: string) => s.replace(/\\(.)/g, '$1') |
|
class AST { |
|
type; |
|
#root; |
|
#hasMagic; |
|
#uflag = false; |
|
#parts = []; |
|
#parent; |
|
#parentIndex; |
|
#negs; |
|
#filledNegs = false; |
|
#options; |
|
#toString; |
|
// set to true if it's an extglob with no children |
|
// (which really means one child of '') |
|
#emptyExt = false; |
|
constructor(type, parent, options = {}) { |
|
this.type = type; |
|
// extglobs are inherently magical |
|
if (type) |
|
this.#hasMagic = true; |
|
this.#parent = parent; |
|
this.#root = this.#parent ? this.#parent.#root : this; |
|
this.#options = this.#root === this ? options : this.#root.#options; |
|
this.#negs = this.#root === this ? [] : this.#root.#negs; |
|
if (type === '!' && !this.#root.#filledNegs) |
|
this.#negs.push(this); |
|
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; |
|
} |
|
get hasMagic() { |
|
/* c8 ignore start */ |
|
if (this.#hasMagic !== undefined) |
|
return this.#hasMagic; |
|
/* c8 ignore stop */ |
|
for (const p of this.#parts) { |
|
if (typeof p === 'string') |
|
continue; |
|
if (p.type || p.hasMagic) |
|
return (this.#hasMagic = true); |
|
} |
|
// note: will be undefined until we generate the regexp src and find out |
|
return this.#hasMagic; |
|
} |
|
// reconstructs the pattern |
|
toString() { |
|
if (this.#toString !== undefined) |
|
return this.#toString; |
|
if (!this.type) { |
|
return (this.#toString = this.#parts.map(p => String(p)).join('')); |
|
} |
|
else { |
|
return (this.#toString = |
|
this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')'); |
|
} |
|
} |
|
#fillNegs() { |
|
/* c8 ignore start */ |
|
if (this !== this.#root) |
|
throw new Error('should only call on root'); |
|
if (this.#filledNegs) |
|
return this; |
|
/* c8 ignore stop */ |
|
// call toString() once to fill this out |
|
this.toString(); |
|
this.#filledNegs = true; |
|
let n; |
|
while ((n = this.#negs.pop())) { |
|
if (n.type !== '!') |
|
continue; |
|
// walk up the tree, appending everthing that comes AFTER parentIndex |
|
let p = n; |
|
let pp = p.#parent; |
|
while (pp) { |
|
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { |
|
for (const part of n.#parts) { |
|
/* c8 ignore start */ |
|
if (typeof part === 'string') { |
|
throw new Error('string part in extglob AST??'); |
|
} |
|
/* c8 ignore stop */ |
|
part.copyIn(pp.#parts[i]); |
|
} |
|
} |
|
p = pp; |
|
pp = p.#parent; |
|
} |
|
} |
|
return this; |
|
} |
|
push(...parts) { |
|
for (const p of parts) { |
|
if (p === '') |
|
continue; |
|
/* c8 ignore start */ |
|
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { |
|
throw new Error('invalid part: ' + p); |
|
} |
|
/* c8 ignore stop */ |
|
this.#parts.push(p); |
|
} |
|
} |
|
toJSON() { |
|
const ret = this.type === null |
|
? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON())) |
|
: [this.type, ...this.#parts.map(p => p.toJSON())]; |
|
if (this.isStart() && !this.type) |
|
ret.unshift([]); |
|
if (this.isEnd() && |
|
(this === this.#root || |
|
(this.#root.#filledNegs && this.#parent?.type === '!'))) { |
|
ret.push({}); |
|
} |
|
return ret; |
|
} |
|
isStart() { |
|
if (this.#root === this) |
|
return true; |
|
// if (this.type) return !!this.#parent?.isStart() |
|
if (!this.#parent?.isStart()) |
|
return false; |
|
if (this.#parentIndex === 0) |
|
return true; |
|
// if everything AHEAD of this is a negation, then it's still the "start" |
|
const p = this.#parent; |
|
for (let i = 0; i < this.#parentIndex; i++) { |
|
const pp = p.#parts[i]; |
|
if (!(pp instanceof AST && pp.type === '!')) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
isEnd() { |
|
if (this.#root === this) |
|
return true; |
|
if (this.#parent?.type === '!') |
|
return true; |
|
if (!this.#parent?.isEnd()) |
|
return false; |
|
if (!this.type) |
|
return this.#parent?.isEnd(); |
|
// if not root, it'll always have a parent |
|
/* c8 ignore start */ |
|
const pl = this.#parent ? this.#parent.#parts.length : 0; |
|
/* c8 ignore stop */ |
|
return this.#parentIndex === pl - 1; |
|
} |
|
copyIn(part) { |
|
if (typeof part === 'string') |
|
this.push(part); |
|
else |
|
this.push(part.clone(this)); |
|
} |
|
clone(parent) { |
|
const c = new AST(this.type, parent); |
|
for (const p of this.#parts) { |
|
c.copyIn(p); |
|
} |
|
return c; |
|
} |
|
static #parseAST(str, ast, pos, opt) { |
|
let escaping = false; |
|
let inBrace = false; |
|
let braceStart = -1; |
|
let braceNeg = false; |
|
if (ast.type === null) { |
|
// outside of a extglob, append until we find a start |
|
let i = pos; |
|
let acc = ''; |
|
while (i < str.length) { |
|
const c = str.charAt(i++); |
|
// still accumulate escapes at this point, but we do ignore |
|
// starts that are escaped |
|
if (escaping || c === '\\') { |
|
escaping = !escaping; |
|
acc += c; |
|
continue; |
|
} |
|
if (inBrace) { |
|
if (i === braceStart + 1) { |
|
if (c === '^' || c === '!') { |
|
braceNeg = true; |
|
} |
|
} |
|
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { |
|
inBrace = false; |
|
} |
|
acc += c; |
|
continue; |
|
} |
|
else if (c === '[') { |
|
inBrace = true; |
|
braceStart = i; |
|
braceNeg = false; |
|
acc += c; |
|
continue; |
|
} |
|
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { |
|
ast.push(acc); |
|
acc = ''; |
|
const ext = new AST(c, ast); |
|
i = AST.#parseAST(str, ext, i, opt); |
|
ast.push(ext); |
|
continue; |
|
} |
|
acc += c; |
|
} |
|
ast.push(acc); |
|
return i; |
|
} |
|
// some kind of extglob, pos is at the ( |
|
// find the next | or ) |
|
let i = pos + 1; |
|
let part = new AST(null, ast); |
|
const parts = []; |
|
let acc = ''; |
|
while (i < str.length) { |
|
const c = str.charAt(i++); |
|
// still accumulate escapes at this point, but we do ignore |
|
// starts that are escaped |
|
if (escaping || c === '\\') { |
|
escaping = !escaping; |
|
acc += c; |
|
continue; |
|
} |
|
if (inBrace) { |
|
if (i === braceStart + 1) { |
|
if (c === '^' || c === '!') { |
|
braceNeg = true; |
|
} |
|
} |
|
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { |
|
inBrace = false; |
|
} |
|
acc += c; |
|
continue; |
|
} |
|
else if (c === '[') { |
|
inBrace = true; |
|
braceStart = i; |
|
braceNeg = false; |
|
acc += c; |
|
continue; |
|
} |
|
if (isExtglobType(c) && str.charAt(i) === '(') { |
|
part.push(acc); |
|
acc = ''; |
|
const ext = new AST(c, part); |
|
part.push(ext); |
|
i = AST.#parseAST(str, ext, i, opt); |
|
continue; |
|
} |
|
if (c === '|') { |
|
part.push(acc); |
|
acc = ''; |
|
parts.push(part); |
|
part = new AST(null, ast); |
|
continue; |
|
} |
|
if (c === ')') { |
|
if (acc === '' && ast.#parts.length === 0) { |
|
ast.#emptyExt = true; |
|
} |
|
part.push(acc); |
|
acc = ''; |
|
ast.push(...parts, part); |
|
return i; |
|
} |
|
acc += c; |
|
} |
|
// unfinished extglob |
|
// if we got here, it was a malformed extglob! not an extglob, but |
|
// maybe something else in there. |
|
ast.type = null; |
|
ast.#hasMagic = undefined; |
|
ast.#parts = [str.substring(pos - 1)]; |
|
return i; |
|
} |
|
static fromGlob(pattern, options = {}) { |
|
const ast = new AST(null, undefined, options); |
|
AST.#parseAST(pattern, ast, 0, options); |
|
return ast; |
|
} |
|
// returns the regular expression if there's magic, or the unescaped |
|
// string if not. |
|
toMMPattern() { |
|
// should only be called on root |
|
/* c8 ignore start */ |
|
if (this !== this.#root) |
|
return this.#root.toMMPattern(); |
|
/* c8 ignore stop */ |
|
const glob = this.toString(); |
|
const [re, body, hasMagic, uflag] = this.toRegExpSource(); |
|
// if we're in nocase mode, and not nocaseMagicOnly, then we do |
|
// still need a regular expression if we have to case-insensitively |
|
// match capital/lowercase characters. |
|
const anyMagic = hasMagic || |
|
this.#hasMagic || |
|
(this.#options.nocase && |
|
!this.#options.nocaseMagicOnly && |
|
glob.toUpperCase() !== glob.toLowerCase()); |
|
if (!anyMagic) { |
|
return body; |
|
} |
|
const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : ''); |
|
return Object.assign(new RegExp(`^${re}$`, flags), { |
|
_src: re, |
|
_glob: glob, |
|
}); |
|
} |
|
get options() { |
|
return this.#options; |
|
} |
|
// returns the string match, the regexp source, whether there's magic |
|
// in the regexp (so a regular expression is required) and whether or |
|
// not the uflag is needed for the regular expression (for posix classes) |
|
// TODO: instead of injecting the start/end at this point, just return |
|
// the BODY of the regexp, along with the start/end portions suitable |
|
// for binding the start/end in either a joined full-path makeRe context |
|
// (where we bind to (^|/), or a standalone matchPart context (where |
|
// we bind to ^, and not /). Otherwise slashes get duped! |
|
// |
|
// In part-matching mode, the start is: |
|
// - if not isStart: nothing |
|
// - if traversal possible, but not allowed: ^(?!\.\.?$) |
|
// - if dots allowed or not possible: ^ |
|
// - if dots possible and not allowed: ^(?!\.) |
|
// end is: |
|
// - if not isEnd(): nothing |
|
// - else: $ |
|
// |
|
// In full-path matching mode, we put the slash at the START of the |
|
// pattern, so start is: |
|
// - if first pattern: same as part-matching mode |
|
// - if not isStart(): nothing |
|
// - if traversal possible, but not allowed: /(?!\.\.?(?:$|/)) |
|
// - if dots allowed or not possible: / |
|
// - if dots possible and not allowed: /(?!\.) |
|
// end is: |
|
// - if last pattern, same as part-matching mode |
|
// - else nothing |
|
// |
|
// Always put the (?:$|/) on negated tails, though, because that has to be |
|
// there to bind the end of the negated pattern portion, and it's easier to |
|
// just stick it in now rather than try to inject it later in the middle of |
|
// the pattern. |
|
// |
|
// We can just always return the same end, and leave it up to the caller |
|
// to know whether it's going to be used joined or in parts. |
|
// And, if the start is adjusted slightly, can do the same there: |
|
// - if not isStart: nothing |
|
// - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$) |
|
// - if dots allowed or not possible: (?:/|^) |
|
// - if dots possible and not allowed: (?:/|^)(?!\.) |
|
// |
|
// But it's better to have a simpler binding without a conditional, for |
|
// performance, so probably better to return both start options. |
|
// |
|
// Then the caller just ignores the end if it's not the first pattern, |
|
// and the start always gets applied. |
|
// |
|
// But that's always going to be $ if it's the ending pattern, or nothing, |
|
// so the caller can just attach $ at the end of the pattern when building. |
|
// |
|
// So the todo is: |
|
// - better detect what kind of start is needed |
|
// - return both flavors of starting pattern |
|
// - attach $ at the end of the pattern when creating the actual RegExp |
|
// |
|
// Ah, but wait, no, that all only applies to the root when the first pattern |
|
// is not an extglob. If the first pattern IS an extglob, then we need all |
|
// that dot prevention biz to live in the extglob portions, because eg |
|
// +(*|.x*) can match .xy but not .yx. |
|
// |
|
// So, return the two flavors if it's #root and the first child is not an |
|
// AST, otherwise leave it to the child AST to handle it, and there, |
|
// use the (?:^|/) style of start binding. |
|
// |
|
// Even simplified further: |
|
// - Since the start for a join is eg /(?!\.) and the start for a part |
|
// is ^(?!\.), we can just prepend (?!\.) to the pattern (either root |
|
// or start or whatever) and prepend ^ or / at the Regexp construction. |
|
toRegExpSource(allowDot) { |
|
const dot = allowDot ?? !!this.#options.dot; |
|
if (this.#root === this) |
|
this.#fillNegs(); |
|
if (!this.type) { |
|
const noEmpty = this.isStart() && this.isEnd(); |
|
const src = this.#parts |
|
.map(p => { |
|
const [re, _, hasMagic, uflag] = typeof p === 'string' |
|
? AST.#parseGlob(p, this.#hasMagic, noEmpty) |
|
: p.toRegExpSource(allowDot); |
|
this.#hasMagic = this.#hasMagic || hasMagic; |
|
this.#uflag = this.#uflag || uflag; |
|
return re; |
|
}) |
|
.join(''); |
|
let start = ''; |
|
if (this.isStart()) { |
|
if (typeof this.#parts[0] === 'string') { |
|
// this is the string that will match the start of the pattern, |
|
// so we need to protect against dots and such. |
|
// '.' and '..' cannot match unless the pattern is that exactly, |
|
// even if it starts with . or dot:true is set. |
|
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); |
|
if (!dotTravAllowed) { |
|
const aps = addPatternStart; |
|
// check if we have a possibility of matching . or .., |
|
// and prevent that. |
|
const needNoTrav = |
|
// dots are allowed, and the pattern starts with [ or . |
|
(dot && aps.has(src.charAt(0))) || |
|
// the pattern starts with \., and then [ or . |
|
(src.startsWith('\\.') && aps.has(src.charAt(2))) || |
|
// the pattern starts with \.\., and then [ or . |
|
(src.startsWith('\\.\\.') && aps.has(src.charAt(4))); |
|
// no need to prevent dots if it can't match a dot, or if a |
|
// sub-pattern will be preventing it anyway. |
|
const needNoDot = !dot && !allowDot && aps.has(src.charAt(0)); |
|
start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : ''; |
|
} |
|
} |
|
} |
|
// append the "end of path portion" pattern to negation tails |
|
let end = ''; |
|
if (this.isEnd() && |
|
this.#root.#filledNegs && |
|
this.#parent?.type === '!') { |
|
end = '(?:$|\\/)'; |
|
} |
|
const final = start + src + end; |
|
return [ |
|
final, |
|
(0, unescape_js_1.unescape)(src), |
|
(this.#hasMagic = !!this.#hasMagic), |
|
this.#uflag, |
|
]; |
|
} |
|
// We need to calculate the body *twice* if it's a repeat pattern |
|
// at the start, once in nodot mode, then again in dot mode, so a |
|
// pattern like *(?) can match 'x.y' |
|
const repeated = this.type === '*' || this.type === '+'; |
|
// some kind of extglob |
|
const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; |
|
let body = this.#partsToRegExp(dot); |
|
if (this.isStart() && this.isEnd() && !body && this.type !== '!') { |
|
// invalid extglob, has to at least be *something* present, if it's |
|
// the entire path portion. |
|
const s = this.toString(); |
|
this.#parts = [s]; |
|
this.type = null; |
|
this.#hasMagic = undefined; |
|
return [s, (0, unescape_js_1.unescape)(this.toString()), false, false]; |
|
} |
|
// XXX abstract out this map method |
|
let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot |
|
? '' |
|
: this.#partsToRegExp(true); |
|
if (bodyDotAllowed === body) { |
|
bodyDotAllowed = ''; |
|
} |
|
if (bodyDotAllowed) { |
|
body = `(?:${body})(?:${bodyDotAllowed})*?`; |
|
} |
|
// an empty !() is exactly equivalent to a starNoEmpty |
|
let final = ''; |
|
if (this.type === '!' && this.#emptyExt) { |
|
final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty; |
|
} |
|
else { |
|
const close = this.type === '!' |
|
? // !() must match something,but !(x) can match '' |
|
'))' + |
|
(this.isStart() && !dot && !allowDot ? startNoDot : '') + |
|
star + |
|
')' |
|
: this.type === '@' |
|
? ')' |
|
: this.type === '?' |
|
? ')?' |
|
: this.type === '+' && bodyDotAllowed |
|
? ')' |
|
: this.type === '*' && bodyDotAllowed |
|
? `)?` |
|
: `)${this.type}`; |
|
final = start + body + close; |
|
} |
|
return [ |
|
final, |
|
(0, unescape_js_1.unescape)(body), |
|
(this.#hasMagic = !!this.#hasMagic), |
|
this.#uflag, |
|
]; |
|
} |
|
#partsToRegExp(dot) { |
|
return this.#parts |
|
.map(p => { |
|
// extglob ASTs should only contain parent ASTs |
|
/* c8 ignore start */ |
|
if (typeof p === 'string') { |
|
throw new Error('string type in extglob ast??'); |
|
} |
|
/* c8 ignore stop */ |
|
// can ignore hasMagic, because extglobs are already always magic |
|
const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot); |
|
this.#uflag = this.#uflag || uflag; |
|
return re; |
|
}) |
|
.filter(p => !(this.isStart() && this.isEnd()) || !!p) |
|
.join('|'); |
|
} |
|
static #parseGlob(glob, hasMagic, noEmpty = false) { |
|
let escaping = false; |
|
let re = ''; |
|
let uflag = false; |
|
for (let i = 0; i < glob.length; i++) { |
|
const c = glob.charAt(i); |
|
if (escaping) { |
|
escaping = false; |
|
re += (reSpecials.has(c) ? '\\' : '') + c; |
|
continue; |
|
} |
|
if (c === '\\') { |
|
if (i === glob.length - 1) { |
|
re += '\\\\'; |
|
} |
|
else { |
|
escaping = true; |
|
} |
|
continue; |
|
} |
|
if (c === '[') { |
|
const [src, needUflag, consumed, magic] = (0, brace_expressions_js_1.parseClass)(glob, i); |
|
if (consumed) { |
|
re += src; |
|
uflag = uflag || needUflag; |
|
i += consumed - 1; |
|
hasMagic = hasMagic || magic; |
|
continue; |
|
} |
|
} |
|
if (c === '*') { |
|
if (noEmpty && glob === '*') |
|
re += starNoEmpty; |
|
else |
|
re += star; |
|
hasMagic = true; |
|
continue; |
|
} |
|
if (c === '?') { |
|
re += qmark; |
|
hasMagic = true; |
|
continue; |
|
} |
|
re += regExpEscape(c); |
|
} |
|
return [re, (0, unescape_js_1.unescape)(glob), !!hasMagic, uflag]; |
|
} |
|
} |
|
exports.AST = AST; |
|
//# sourceMappingURL=ast.js.map
|