| 1 | """ | 
| 2 | tdop_lexer.py | 
| 3 | """ | 
| 4 | from __future__ import print_function | 
| 5 |  | 
| 6 | import re | 
| 7 | from typing import Iterator, Tuple, cast, TYPE_CHECKING | 
| 8 |  | 
| 9 | from asdl.examples.tdop import Token | 
| 10 |  | 
| 11 | if TYPE_CHECKING: | 
| 12 | TupleStr4 = Tuple[str, str, str, str] | 
| 13 | else: | 
| 14 | TupleStr4 = None  # Using runtime stub | 
| 15 |  | 
| 16 | # | 
| 17 | # Using the pattern here: http://effbot.org/zone/xml-scanner.htm | 
| 18 | # | 
| 19 |  | 
| 20 | # NOTE: () and [] need to be on their own so (-1+2) works | 
| 21 | TOKEN_RE = re.compile(r""" | 
| 22 | \s* (?: (\d+) | (\w+) | ( [\-\+\*/%!~<>=&^|?:,]+ ) | ([\(\)\[\]]) ) | 
| 23 | """, re.VERBOSE) | 
| 24 |  | 
| 25 | def Tokenize(s): | 
| 26 | # type: (str) -> Iterator[Token] | 
| 27 | for item in TOKEN_RE.findall(s): | 
| 28 | # The type checker can't know the true type of item! | 
| 29 | item = cast(TupleStr4, item) | 
| 30 | if item[0]: | 
| 31 | typ = 'number' | 
| 32 | val = item[0] | 
| 33 | elif item[1]: | 
| 34 | typ = 'name' | 
| 35 | val = item[1] | 
| 36 | elif item[2]: | 
| 37 | typ = item[2] | 
| 38 | val = item[2] | 
| 39 | elif item[3]: | 
| 40 | typ = item[3] | 
| 41 | val = item[3] | 
| 42 | yield Token(typ, val) |