Mini Shell
"""
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
See https://www.python-ldap.org/ for details.
"""
import re
TOKENS_FINDALL = re.compile(
r"(\()" # opening parenthesis
r"|" # or
r"(\))" # closing parenthesis
r"|" # or
r"([^'$()\s]+)" # string of length >= 1 without '$() or whitespace
r"|" # or
r"('(?:[^'\\]|\\\\|\\.)*?'(?!\w))"
# any string or empty string surrounded by unescaped
# single quotes except if right quote is succeeded by
# alphanumeric char
r"|" # or
r"([^\s]+?)", # residue, all non-whitespace strings
).findall
UNESCAPE_PATTERN = re.compile(r"\\(.)")
def split_tokens(s):
"""
Returns list of syntax elements with quotes and spaces stripped.
"""
parts = []
parens = 0
for opar, cpar, unquoted, quoted, residue in TOKENS_FINDALL(s):
if unquoted:
parts.append(unquoted)
elif quoted:
parts.append(UNESCAPE_PATTERN.sub(r'\1', quoted[1:-1]))
elif opar:
parens += 1
parts.append(opar)
elif cpar:
parens -= 1
parts.append(cpar)
elif residue == '$':
if not parens:
raise ValueError("'$' outside parenthesis in %r" % (s))
else:
raise ValueError(residue, s)
if parens:
raise ValueError("Unbalanced parenthesis in %r" % (s))
return parts
def extract_tokens(l,known_tokens):
"""
Returns dictionary of known tokens with all values
"""
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
result = {}
result.update(known_tokens)
i = 0
l_len = len(l)
while i<l_len:
if l[i] in result:
token = l[i]
i += 1 # Consume token
if i<l_len:
if l[i] in result:
# non-valued
result[token] = (())
elif l[i]=="(":
# multi-valued
i += 1 # Consume left parentheses
start = i
while i<l_len and l[i]!=")":
i += 1
result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
i += 1 # Consume right parentheses
else:
# single-valued
result[token] = l[i],
i += 1 # Consume single value
else:
i += 1 # Consume unrecognized item
return result
Zerion Mini Shell 1.0