Mini Shell
// parse a single path portion
import { parseClass } from './brace-expressions';
const types = new Set(['!', '?', '+', '*', '@']);
const isExtglobType = (c) => types.has(c);
// characters that indicate a start of pattern needs the "no dots" bit
const addPatternStart = new Set(['[', '.']);
const justDots = new Set(['..', '.']);
const reSpecials = new Set('().*{}+?[]^$\\!');
const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// any single thing other than /
// don't need to escape / when using new RegExp()
const qmark = '[^/]';
// * => any number of characters
const star = qmark + '*?';
export class AST {
type;
#root;
#parts = [];
#parent;
#parentIndex;
#negs;
#filledNegs = false;
#options;
constructor(type, parent, options = {}) {
this.type = type;
this.#parent = parent;
this.#root = this.#parent ? this.#parent.#root : this;
this.#options = this.#root === this ? options : this.#root.#options;
this.#negs = this.#root === this ? [] : this.#root.#negs;
if (type === '!' && !this.#root.#filledNegs)
this.#negs.push(this);
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
}
fillNegs() {
if (this !== this.#root) {
this.#root.fillNegs();
return this;
}
if (this.#filledNegs)
return this;
this.#filledNegs = true;
let n;
while ((n = this.#negs.pop())) {
if (n.type !== '!')
continue;
// walk up the tree, appending everthing that comes AFTER parentIndex
let p = n;
let pp = p.#parent;
while (pp) {
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
for (const part of n.#parts) {
/* c8 ignore start */
if (typeof part === 'string') {
throw new Error('string part in extglob AST??');
}
/* c8 ignore stop */
part.copyIn(pp.#parts[i]);
}
}
p = pp;
pp = p.#parent;
}
}
return this;
}
push(...parts) {
for (const p of parts) {
if (p === '')
continue;
/* c8 ignore start */
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
throw new Error('invalid part: ' + p);
}
/* c8 ignore stop */
this.#parts.push(p);
}
}
toJSON() {
const ret = this.type === null ? this.#parts.slice() : [this.type, ...this.#parts];
if (this.isStart() && !this.type)
ret.unshift([]);
if (this.isEnd() &&
(this === this.#root ||
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
ret.push({});
}
return ret;
}
isStart() {
if (this.#root === this)
return true;
// if (this.type) return !!this.#parent?.isStart()
if (!this.#parent?.isStart())
return false;
return this.#parentIndex === 0;
}
isEnd() {
if (this.#root === this)
return true;
if (this.#parent?.type === '!')
return true;
if (!this.#parent?.isEnd())
return false;
if (!this.type)
return this.#parent?.isEnd();
return (this.#parentIndex === (this.#parent ? this.#parent.#parts.length : 0) - 1);
}
copyIn(part) {
if (typeof part === 'string')
this.push(part);
else
this.push(part.clone(this));
}
clone(parent) {
const c = new AST(this.type, parent);
for (const p of this.#parts) {
c.copyIn(p);
}
return c;
}
static #parseAST(str, ast, pos, opt) {
let escaping = false;
if (ast.type === null) {
// outside of a extglob, append until we find a start
let i = pos;
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
ast.push(acc);
acc = '';
const ext = new AST(c, ast);
i = AST.#parseAST(str, ext, i, opt);
ast.push(ext);
continue;
}
acc += c;
}
ast.push(acc);
return i;
}
// some kind of extglob, pos is at the (
// find the next | or )
let i = pos + 1;
let part = new AST(null, ast);
const parts = [];
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (isExtglobType(c) && str.charAt(i) === '(') {
part.push(acc);
acc = '';
const ext = new AST(c, part);
part.push(ext);
i = AST.#parseAST(str, ext, i, opt);
continue;
}
if (c === '|') {
part.push(acc);
acc = '';
parts.push(part);
part = new AST(null, ast);
continue;
}
if (c === ')') {
part.push(acc);
acc = '';
ast.push(...parts, part);
return i;
}
acc += c;
}
// if we got here, it was a malformed extglob! not an extglob, but
// maybe something else in there.
ast.type = null;
ast.#parts = [str.substring(pos)];
return i;
}
static fromGlob(pattern, options = {}) {
const ast = new AST(null, undefined, options);
AST.#parseAST(pattern, ast, 0, options);
console.log('parsed', pattern, JSON.stringify(ast));
return ast;
}
toRegExpSource() {
if (this.#root === this)
this.fillNegs();
if (!this.type) {
const src = this.#parts
.map(p => {
if (typeof p === 'string')
return AST.#parseGlob(p, this.#options);
else
return p.toRegExpSource();
})
.join('');
let start = '';
if (this.isStart() && typeof this.#parts[0] === 'string') {
// '.' and '..' cannot match unless the pattern is that exactly
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
if (dotTravAllowed) {
start = '(?:^|\\/)';
}
else {
const dotsAllowed = this.#options.dot ||
// no need to prevent dots if it can't match a dot, or if a sub-pattern
// will be preventing it anyway.
!addPatternStart.has(src.charAt(0));
start = dotsAllowed ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))' : '(?!\\.)';
}
}
let end = '';
if (this.isEnd() &&
(this === this.#root ||
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
end = '(?:$|\\/)';
}
return start + src + end;
}
// some kind of extglob
const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
const body = this.#parts
.map(p => {
/* c8 ignore start */
if (typeof p === 'string') {
throw new Error('string type in extglob ast??');
}
/* c8 ignore stop */
return p.toRegExpSource();
})
.join('|');
const close = this.type === '!'
? '))[^/]*?)'
: this.type === '@'
? ')'
: `)${this.type}`;
return start + body + close;
}
static #parseGlob(glob, options) {
let escaping = false;
let re = '';
let uflag = false;
let hasMagic = false;
for (let i = 0; i < glob.length; i++) {
const c = glob.charAt(i);
if (escaping) {
escaping = false;
re += (reSpecials.has(c) ? '\\' : '') + c;
continue;
}
if (c === '\\') {
if (i === glob.length - 1) {
re += '\\\\';
}
else {
escaping = true;
}
continue;
}
if (c === '[') {
const [src, needUflag, consumed, magic] = parseClass(glob, i);
if (consumed) {
re += src;
uflag = uflag || needUflag;
i += consumed - 1;
hasMagic = hasMagic || magic;
continue;
}
}
if (c === '*') {
re += star;
hasMagic = true;
continue;
}
if (c === '?') {
re += qmark;
hasMagic = true;
continue;
}
re += regExpEscape(c);
}
return re;
}
}
const pattern = 'a@(i|w!(x|y)z+(l|m)|j)';
const ast = AST.fromGlob(pattern).fillNegs();
console.log('negged', pattern, JSON.stringify(ast));
console.log('to re src', pattern, ast.toRegExpSource());
// // the type (exttype or null for strings), and array of children tokens
//
// // append everything after a negative extglob to each of the parts
// // of the negative extglob node. So, eg, [a, [!, x, y], z]
//
// //
// //
// //
// //
//
// const globUnescape = (s: string) => s.replace(/\\(.)/g, '$1')
// const regExpEscape = (s: string) =>
// s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&')
//
// // "abc" -> { a:true, b:true, c:true }
// const charSet = (s: string) =>
// s.split('').reduce((set: { [k: string]: boolean }, c) => {
// set[c] = true
// return set
// }, {})
//
// // characters that need to be escaped in RegExp.
// const reSpecials = charSet('().*{}+?[]^$\\!')
//
// // characters that indicate we have to add the pattern start
// const addPatternStartSet = charSet('[.(')
//
// // any single thing other than /
// // don't need to escape / when using new RegExp()
// const qmark = '[^/]'
//
// // * => any number of characters
// const star = qmark + '*?'
//
// // TODO: take an offset and length, so we can sub-parse the extglobs
// const parse = (
// options: MinimatchOptions,
// pattern: string,
// debug: (...a: any[]) => void
// ): false | string => {
// assertValidPattern(pattern)
//
// if (pattern === '') return ''
//
// let re = ''
// let hasMagic = false
// let escaping = false
// // ? => one single character
// let uflag = false
//
// // . and .. never match anything that doesn't start with .,
// // even when options.dot is set. However, if the pattern
// // starts with ., then traversal patterns can match.
// let dotTravAllowed = pattern.charAt(0) === '.'
// let dotFileAllowed = options.dot || dotTravAllowed
// const patternStart = () =>
// dotTravAllowed
// ? ''
// : dotFileAllowed
// ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))'
// : '(?!\\.)'
// const subPatternStart = (p: string) =>
// p.charAt(0) === '.'
// ? ''
// : options.dot
// ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))'
// : '(?!\\.)'
//
// const clearStateChar = () => {
// if (stateChar) {
// // we had some state-tracking character
// // that wasn't consumed by this pass.
// switch (stateChar) {
// case '*':
// re += star
// hasMagic = true
// break
// case '?':
// re += qmark
// hasMagic = true
// break
// default:
// re += '\\' + stateChar
// break
// }
// debug('clearStateChar %j %j', stateChar, re)
// stateChar = false
// }
// }
//
// for (
// let i = 0, c: string;
// i < pattern.length && (c = pattern.charAt(i));
// i++
// ) {
// debug('%s\t%s %s %j', pattern, i, re, c)
//
// // skip over any that are escaped.
// if (escaping) {
// // completely not allowed, even escaped.
// // should be impossible.
// /* c8 ignore start */
// if (c === '/') {
// return false
// }
// /* c8 ignore stop */
//
// if (reSpecials[c]) {
// re += '\\'
// }
// re += c
// escaping = false
// continue
// }
//
// switch (c) {
// // Should already be path-split by now.
// /* c8 ignore start */
// case '/': {
// return false
// }
// /* c8 ignore stop */
//
// case '\\':
// clearStateChar()
// escaping = true
// continue
//
// // the various stateChar values
// // for the "extglob" stuff.
// case '?':
// case '*':
// case '+':
// case '@':
// case '!':
// debug('%s\t%s %s %j <-- stateChar', pattern, i, re, c)
//
// // if we already have a stateChar, then it means
// // that there was something like ** or +? in there.
// // Handle the stateChar, then proceed with this one.
// debug('call clearStateChar %j', stateChar)
// clearStateChar()
// stateChar = c
// // if extglob is disabled, then +(asdf|foo) isn't a thing.
// // just clear the statechar *now*, rather than even diving into
// // the patternList stuff.
// if (options.noext) clearStateChar()
// continue
//
// case '(': {
// if (!stateChar) {
// re += '\\('
// continue
// }
//
// const plEntry: PatternListEntry = {
// type: stateChar,
// start: i - 1,
// reStart: re.length,
// open: plTypes[stateChar].open,
// close: plTypes[stateChar].close,
// }
// debug(pattern, '\t', plEntry)
// patternListStack.push(plEntry)
// // negation is (?:(?!(?:js)(?:<rest>))[^/]*)
// re += plEntry.open
// // next entry starts with a dot maybe?
// if (plEntry.start === 0 && plEntry.type !== '!') {
// dotTravAllowed = true
// re += subPatternStart(pattern.slice(i + 1))
// }
// debug('plType %j %j', stateChar, re)
// stateChar = false
// continue
// }
//
// case ')': {
// const plEntry = patternListStack[patternListStack.length - 1]
// if (!plEntry) {
// re += '\\)'
// continue
// }
// patternListStack.pop()
//
// // closing an extglob
// clearStateChar()
// hasMagic = true
// pl = plEntry
// // negation is (?:(?!js)[^/]*)
// // The others are (?:<pattern>)<type>
// re += pl.close
// if (pl.type === '!') {
// negativeLists.push(Object.assign(pl, { reEnd: re.length }))
// }
// continue
// }
//
// case '|': {
// const plEntry = patternListStack[patternListStack.length - 1]
// if (!plEntry) {
// re += '\\|'
// continue
// }
//
// clearStateChar()
// re += '|'
// // next subpattern can start with a dot?
// if (plEntry.start === 0 && plEntry.type !== '!') {
// dotTravAllowed = true
// re += subPatternStart(pattern.slice(i + 1))
// }
// continue
// }
//
// // these are mostly the same in regexp and glob
// case '[':
// // swallow any state-tracking char before the [
// clearStateChar()
// const [src, needUflag, consumed, magic] = parseClass(pattern, i)
// if (consumed) {
// re += src
// uflag = uflag || needUflag
// i += consumed - 1
// hasMagic = hasMagic || magic
// } else {
// re += '\\['
// }
// continue
//
// case ']':
// re += '\\' + c
// continue
//
// default:
// // swallow any state char that wasn't consumed
// clearStateChar()
//
// re += regExpEscape(c)
// break
// } // switch
// } // for
//
// // handle the case where we had a +( thing at the *end*
// // of the pattern.
// // each pattern list stack adds 3 chars, and we need to go through
// // and escape any | chars that were passed through as-is for the regexp.
// // Go through and escape them, taking care not to double-escape any
// // | chars that were already escaped.
// for (pl = patternListStack.pop(); pl; pl = patternListStack.pop()) {
// let tail: string
// tail = re.slice(pl.reStart + pl.open.length)
// debug(pattern, 'setting tail', re, pl)
// // maybe some even number of \, then maybe 1 \, followed by a |
// tail = tail.replace(/((?:\\{2}){0,64})(\\?)\|/g, (_, $1, $2) => {
// if (!$2) {
// // the | isn't already escaped, so escape it.
// $2 = '\\'
// // should already be done
// /* c8 ignore start */
// }
// /* c8 ignore stop */
//
// // need to escape all those slashes *again*, without escaping the
// // one that we need for escaping the | character. As it works out,
// // escaping an even number of slashes can be done by simply repeating
// // it exactly after itself. That's why this trick works.
// //
// // I am sorry that you have to see this.
// return $1 + $1 + $2 + '|'
// })
//
// debug('tail=%j\n %s', tail, tail, pl, re)
// const t = pl.type === '*' ? star : pl.type === '?' ? qmark : '\\' + pl.type
//
// hasMagic = true
// re = re.slice(0, pl.reStart) + t + '\\(' + tail
// }
//
// // handle trailing things that only matter at the very end.
// clearStateChar()
// if (escaping) {
// // trailing \\
// re += '\\\\'
// }
//
// // only need to apply the nodot start if the re starts with
// // something that could conceivably capture a dot
// const addPatternStart = addPatternStartSet[re.charAt(0)]
//
// // Hack to work around lack of negative lookbehind in JS
// // A pattern like: *.!(x).!(y|z) needs to ensure that a name
// // like 'a.xyz.yz' doesn't match. So, the first negative
// // lookahead, has to look ALL the way ahead, to the end of
// // the pattern.
// for (let n = negativeLists.length - 1; n > -1; n--) {
// const nl = negativeLists[n]
//
// const nlBefore = re.slice(0, nl.reStart)
// const nlFirst = re.slice(nl.reStart, nl.reEnd - 8)
// let nlAfter = re.slice(nl.reEnd)
// const nlLast = re.slice(nl.reEnd - 8, nl.reEnd) + nlAfter
//
// // Handle nested stuff like *(*.js|!(*.json)), where open parens
// // mean that we should *not* include the ) in the bit that is considered
// // "after" the negated section.
// const closeParensBefore = nlBefore.split(')').length
// const openParensBefore = nlBefore.split('(').length - closeParensBefore
// let cleanAfter = nlAfter
// for (let i = 0; i < openParensBefore; i++) {
// cleanAfter = cleanAfter.replace(/\)[+*?]?/, '')
// }
// nlAfter = cleanAfter
//
// const dollar = nlAfter === '' ? '(?:$|\\/)' : ''
//
// re = nlBefore + nlFirst + nlAfter + dollar + nlLast
// }
//
// // if the re is not "" at this point, then we need to make sure
// // it doesn't match against an empty path part.
// // Otherwise a/* will match a/, which it should not.
// if (re !== '' && hasMagic) {
// re = '(?=.)' + re
// }
//
// if (addPatternStart) {
// re = patternStart() + re
// }
//
// // if it's nocase, and the lcase/uppercase don't match, it's magic
// if (options.nocase && !hasMagic && !options.nocaseMagicOnly) {
// hasMagic = pattern.toUpperCase() !== pattern.toLowerCase()
// }
//
// // skip the regexp for non-magical patterns
// // unescape anything in it, though, so that it'll be
// // an exact match against a file etc.
// if (!hasMagic) {
// return globUnescape(re)
// }
//
// return re
// }
//# sourceMappingURL=parse.js.map
Zerion Mini Shell 1.0