| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | arith_parse.py: Parse shell-like and C-like arithmetic.
|
| 4 | """
|
| 5 | from __future__ import print_function
|
| 6 |
|
| 7 | import sys
|
| 8 |
|
| 9 | import tdop
|
| 10 | from tdop import CompositeNode
|
| 11 |
|
| 12 | import demo_asdl
|
| 13 |
|
| 14 | arith_expr = demo_asdl.arith_expr
|
| 15 | op_id_e = demo_asdl.op_id_e
|
| 16 |
|
| 17 |
|
| 18 | #
|
| 19 | # Null Denotation -- token that takes nothing on the left
|
| 20 | #
|
| 21 |
|
| 22 | def NullConstant(p, token, bp):
|
| 23 | if token.type == 'number':
|
| 24 | return arith_expr.Const(token.val)
|
| 25 | # We have to wrap a string in some kind of variant.
|
| 26 | if token.type == 'name':
|
| 27 | return arith_expr.ArithVar(token.val)
|
| 28 |
|
| 29 | raise AssertionError(token.type)
|
| 30 |
|
| 31 |
|
| 32 | def NullParen(p, token, bp):
|
| 33 | """ Arithmetic grouping """
|
| 34 | r = p.ParseUntil(bp)
|
| 35 | p.Eat(')')
|
| 36 | return r
|
| 37 |
|
| 38 |
|
| 39 | def NullPrefixOp(p, token, bp):
|
| 40 | """Prefix operator.
|
| 41 |
|
| 42 | Low precedence: return, raise, etc.
|
| 43 | return x+y is return (x+y), not (return x) + y
|
| 44 |
|
| 45 | High precedence: logical negation, bitwise complement, etc.
|
| 46 | !x && y is (!x) && y, not !(x && y)
|
| 47 | """
|
| 48 | r = p.ParseUntil(bp)
|
| 49 | return CompositeNode(token, [r])
|
| 50 |
|
| 51 |
|
| 52 | def NullIncDec(p, token, bp):
|
| 53 | """ ++x or ++x[1] """
|
| 54 | right = p.ParseUntil(bp)
|
| 55 | if right.token.type not in ('name', 'get'):
|
| 56 | raise tdop.ParseError("Can't assign to %r (%s)" % (right, right.token))
|
| 57 | return CompositeNode(token, [right])
|
| 58 |
|
| 59 |
|
| 60 | #
|
| 61 | # Left Denotation -- token that takes an expression on the left
|
| 62 | #
|
| 63 |
|
| 64 | def LeftIncDec(p, token, left, rbp):
|
| 65 | """ For i++ and i--
|
| 66 | """
|
| 67 | if left.token.type not in ('name', 'get'):
|
| 68 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
| 69 | token.type = 'post' + token.type
|
| 70 | return CompositeNode(token, [left])
|
| 71 |
|
| 72 |
|
| 73 | def LeftIndex(p, token, left, unused_bp):
|
| 74 | """ index f[x+1] """
|
| 75 | # f[x] or f[x][y]
|
| 76 | if not isinstance(left, demo_asdl.ArithVar):
|
| 77 | raise tdop.ParseError("%s can't be indexed" % left)
|
| 78 | index = p.ParseUntil(0)
|
| 79 | if p.AtToken(':'):
|
| 80 | p.Next()
|
| 81 | end = p.ParseUntil(0)
|
| 82 | else:
|
| 83 | end = None
|
| 84 |
|
| 85 | p.Eat(']')
|
| 86 |
|
| 87 | # TODO: If you see ], then
|
| 88 | # 1:4
|
| 89 | # 1:4:2
|
| 90 | # Both end and step are optional
|
| 91 |
|
| 92 | if end:
|
| 93 | return demo_asdl.Slice(left, index, end, None)
|
| 94 | else:
|
| 95 | return demo_asdl.Index(left, index)
|
| 96 |
|
| 97 |
|
| 98 | def LeftTernary(p, token, left, bp):
|
| 99 | """ e.g. a > 1 ? x : y """
|
| 100 | true_expr = p.ParseUntil(bp)
|
| 101 | p.Eat(':')
|
| 102 | false_expr = p.ParseUntil(bp)
|
| 103 | children = [left, true_expr, false_expr]
|
| 104 | return CompositeNode(token, children)
|
| 105 |
|
| 106 |
|
| 107 | def LeftBinaryOp(p, token, left, rbp):
|
| 108 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
| 109 | if token.val == '+':
|
| 110 | op_id_ = op_id_e.Plus
|
| 111 | elif token.val == '-':
|
| 112 | op_id_ = op_id_e.Minus
|
| 113 | elif token.val == '*':
|
| 114 | op_id_ = op_id_e.Star
|
| 115 | else:
|
| 116 | raise AssertionError(token.val)
|
| 117 | return arith_expr.ArithBinary(op_id_, left, p.ParseUntil(rbp))
|
| 118 |
|
| 119 |
|
| 120 | def LeftAssign(p, token, left, rbp):
|
| 121 | """ Normal binary operator like 1+2 or 2*3, etc. """
|
| 122 | # x += 1, or a[i] += 1
|
| 123 | if left.token.type not in ('name', 'get'):
|
| 124 | raise tdop.ParseError("Can't assign to %r (%s)" % (left, left.token))
|
| 125 | return CompositeNode(token, [left, p.ParseUntil(rbp)])
|
| 126 |
|
| 127 |
|
| 128 | def LeftComma(p, token, left, rbp):
|
| 129 | """ foo, bar, baz
|
| 130 |
|
| 131 | Could be sequencing operator, or tuple without parens
|
| 132 | """
|
| 133 | r = p.ParseUntil(rbp)
|
| 134 | if left.token.type == ',': # Keep adding more children
|
| 135 | left.children.append(r)
|
| 136 | return left
|
| 137 | children = [left, r]
|
| 138 | return CompositeNode(token, children)
|
| 139 |
|
| 140 |
|
| 141 | # For overloading of , inside function calls
|
| 142 | COMMA_PREC = 1
|
| 143 |
|
| 144 | def LeftFuncCall(p, token, left, unused_bp):
|
| 145 | """ Function call f(a, b). """
|
| 146 | args = []
|
| 147 | # f(x) or f[i](x)
|
| 148 | if not isinstance(left, demo_asdl.ArithVar):
|
| 149 | raise tdop.ParseError("%s can't be called" % left)
|
| 150 | func_name = left.name # get a string
|
| 151 |
|
| 152 | while not p.AtToken(')'):
|
| 153 | # We don't want to grab the comma, e.g. it is NOT a sequence operator. So
|
| 154 | # set the precedence to 5.
|
| 155 | args.append(p.ParseUntil(COMMA_PREC))
|
| 156 | if p.AtToken(','):
|
| 157 | p.Next()
|
| 158 | p.Eat(")")
|
| 159 | return demo_asdl.FuncCall(func_name, args)
|
| 160 |
|
| 161 |
|
| 162 | def MakeShellParserSpec():
|
| 163 | """
|
| 164 | Create a parser.
|
| 165 |
|
| 166 | Compare the code below with this table of C operator precedence:
|
| 167 | http://en.cppreference.com/w/c/language/operator_precedence
|
| 168 | """
|
| 169 | spec = tdop.ParserSpec()
|
| 170 |
|
| 171 | spec.Left(31, LeftIncDec, ['++', '--'])
|
| 172 | spec.Left(31, LeftFuncCall, ['('])
|
| 173 | spec.Left(31, LeftIndex, ['['])
|
| 174 |
|
| 175 | # 29 -- binds to everything except function call, indexing, postfix ops
|
| 176 | spec.Null(29, NullIncDec, ['++', '--'])
|
| 177 | spec.Null(29, NullPrefixOp, ['+', '!', '~', '-'])
|
| 178 |
|
| 179 | # Right associative: 2 ** 3 ** 2 == 2 ** (3 ** 2)
|
| 180 | spec.LeftRightAssoc(27, LeftBinaryOp, ['**'])
|
| 181 | spec.Left(25, LeftBinaryOp, ['*', '/', '%'])
|
| 182 |
|
| 183 | spec.Left(23, LeftBinaryOp, ['+', '-'])
|
| 184 | spec.Left(21, LeftBinaryOp, ['<<', '>>'])
|
| 185 | spec.Left(19, LeftBinaryOp, ['<', '>', '<=', '>='])
|
| 186 | spec.Left(17, LeftBinaryOp, ['!=', '=='])
|
| 187 |
|
| 188 | spec.Left(15, LeftBinaryOp, ['&'])
|
| 189 | spec.Left(13, LeftBinaryOp, ['^'])
|
| 190 | spec.Left(11, LeftBinaryOp, ['|'])
|
| 191 | spec.Left(9, LeftBinaryOp, ['&&'])
|
| 192 | spec.Left(7, LeftBinaryOp, ['||'])
|
| 193 |
|
| 194 | spec.LeftRightAssoc(5, LeftTernary, ['?'])
|
| 195 |
|
| 196 | # Right associative: a = b = 2 is a = (b = 2)
|
| 197 | spec.LeftRightAssoc(3, LeftAssign, [
|
| 198 | '=',
|
| 199 | '+=', '-=', '*=', '/=', '%=',
|
| 200 | '<<=', '>>=', '&=', '^=', '|='])
|
| 201 |
|
| 202 | spec.Left(COMMA_PREC, LeftComma, [','])
|
| 203 |
|
| 204 | # 0 precedence -- doesn't bind until )
|
| 205 | spec.Null(0, NullParen, ['(']) # for grouping
|
| 206 |
|
| 207 | # -1 precedence -- never used
|
| 208 | spec.Null(-1, NullConstant, ['name', 'number'])
|
| 209 | spec.Null(-1, tdop.NullError, [')', ']', ':', 'eof'])
|
| 210 |
|
| 211 | return spec
|
| 212 |
|
| 213 |
|
| 214 | def MakeParser(s):
|
| 215 | """Used by tests."""
|
| 216 | spec = MakeShellParserSpec()
|
| 217 | lexer = tdop.Tokenize(s)
|
| 218 | p = tdop.Parser(spec, lexer)
|
| 219 | return p
|
| 220 |
|
| 221 |
|
| 222 | def ParseShell(s, expected=None):
|
| 223 | """Used by tests."""
|
| 224 | p = MakeParser(s)
|
| 225 | tree = p.Parse()
|
| 226 |
|
| 227 | sexpr = repr(tree)
|
| 228 | if expected is not None:
|
| 229 | assert sexpr == expected, '%r != %r' % (sexpr, expected)
|
| 230 |
|
| 231 | #print('%-40s %s' % (s, sexpr))
|
| 232 | return tree
|
| 233 |
|
| 234 |
|
| 235 | def main(argv):
|
| 236 | try:
|
| 237 | s = argv[1]
|
| 238 | except IndexError:
|
| 239 | print('Usage: ./arith_parse.py EXPRESSION')
|
| 240 | else:
|
| 241 | try:
|
| 242 | tree = ParseShell(s)
|
| 243 | except tdop.ParseError as e:
|
| 244 | print('Error parsing %r: %s' % (s, e), file=sys.stderr)
|
| 245 | print(tree)
|
| 246 |
|
| 247 |
|
| 248 | if __name__ == '__main__':
|
| 249 | main(sys.argv)
|