1 | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
2 | # Licensed to PSF under a Contributor Agreement.
|
3 |
|
4 | # Modifications:
|
5 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
6 | # Licensed to PSF under a Contributor Agreement.
|
7 |
|
8 | """Parser driver.
|
9 |
|
10 | This provides a high-level interface to parse a file into a syntax tree.
|
11 |
|
12 | """
|
13 |
|
14 | __author__ = "Guido van Rossum <guido@python.org>"
|
15 |
|
16 | __all__ = ["Driver", "load_grammar"]
|
17 |
|
18 | # Python imports
|
19 | import codecs
|
20 | import io
|
21 | import os
|
22 | import logging
|
23 | import sys
|
24 |
|
25 | # Pgen imports
|
26 | from . import grammar, parse, token, tokenize
|
27 |
|
28 |
|
29 | class Driver(object):
|
30 |
|
31 | def __init__(self, grammar, convert=None, logger=None):
|
32 | self.grammar = grammar
|
33 | if logger is None:
|
34 | logger = logging.getLogger()
|
35 | self.logger = logger
|
36 | self.convert = convert
|
37 |
|
38 | def parse_tokens(self, tokens, start_symbol=None, debug=False):
|
39 | """Parse a series of tokens and return the syntax tree."""
|
40 | # XXX Move the prefix computation into a wrapper around tokenize.
|
41 | p = parse.Parser(self.grammar, self.convert)
|
42 | p.setup(start=start_symbol)
|
43 | lineno = 1
|
44 | column = 0
|
45 | type = value = start = end = line_text = None
|
46 | prefix = ""
|
47 | for quintuple in tokens:
|
48 | type, value, start, end, line_text = quintuple
|
49 | if start != (lineno, column):
|
50 | assert (lineno, column) <= start, ((lineno, column), start)
|
51 | s_lineno, s_column = start
|
52 | if lineno < s_lineno:
|
53 | prefix += "\n" * (s_lineno - lineno)
|
54 | lineno = s_lineno
|
55 | column = 0
|
56 | if column < s_column:
|
57 | prefix += line_text[column:s_column]
|
58 | column = s_column
|
59 | if type in (tokenize.COMMENT, tokenize.NL):
|
60 | prefix += value
|
61 | lineno, column = end
|
62 | if value.endswith("\n"):
|
63 | lineno += 1
|
64 | column = 0
|
65 | continue
|
66 | if type == token.OP:
|
67 | type = grammar.opmap[value]
|
68 | if debug:
|
69 | self.logger.debug("%s %r (prefix=%r)",
|
70 | token.tok_name[type], value, prefix)
|
71 | if p.addtoken(type, value, (prefix, start)):
|
72 | if debug:
|
73 | self.logger.debug("Stop.")
|
74 | break
|
75 | prefix = ""
|
76 | lineno, column = end
|
77 | if value.endswith("\n"):
|
78 | lineno += 1
|
79 | column = 0
|
80 | else:
|
81 | # We never broke out -- EOF is too soon (how can this happen???)
|
82 | raise parse.ParseError("incomplete input",
|
83 | type, value, (prefix, start))
|
84 | return p.rootnode
|
85 |
|
86 | def parse_stream_raw(self, stream, debug=False):
|
87 | """Parse a stream and return the syntax tree."""
|
88 | tokens = tokenize.generate_tokens(stream.readline)
|
89 | return self.parse_tokens(tokens, debug)
|
90 |
|
91 | def parse_stream(self, stream, debug=False):
|
92 | """Parse a stream and return the syntax tree."""
|
93 | return self.parse_stream_raw(stream, debug)
|
94 |
|
95 | def parse_file(self, filename, encoding=None, debug=False):
|
96 | """Parse a file and return the syntax tree."""
|
97 | stream = codecs.open(filename, "r", encoding)
|
98 | try:
|
99 | return self.parse_stream(stream, debug)
|
100 | finally:
|
101 | stream.close()
|
102 |
|
103 | def parse_string(self, text, debug=False):
|
104 | """Parse a string and return the syntax tree."""
|
105 | tokens = tokenize.generate_tokens(io.StringIO(text).readline)
|
106 | return self.parse_tokens(tokens, debug)
|