OILS / pyext / fastlex.c View on Github | oilshell.org

324 lines, 220 significant
1/*
2 * Fast lexer using re2c.
3 */
4
5#include <stdarg.h> // va_list, etc.
6#include <stdio.h> // printf
7
8#include <Python.h>
9
10#include "_gen/frontend/id_kind.asdl_c.h"
11#include "_gen/frontend/types.asdl_c.h" // for lex_mode_e
12#include "_gen/frontend/match.re2c.h"
13
14// TODO: Should this be shared among all extensions?
15// Log messages to stderr.
16#if 0
17static void debug(const char* fmt, ...) {
18 va_list args;
19 va_start(args, fmt);
20 vfprintf(stderr, fmt, args);
21 va_end(args);
22 fprintf(stderr, "\n");
23}
24#endif
25
26static PyObject *
27fastlex_MatchOshToken(PyObject *self, PyObject *args) {
28 int lex_mode;
29
30 unsigned char* line;
31 int line_len;
32
33 int start_pos;
34 if (!PyArg_ParseTuple(args, "is#i",
35 &lex_mode, &line, &line_len, &start_pos)) {
36 return NULL;
37 }
38
39 // Bounds checking. It's OK to be called with a start_pos looking at \0.
40 // Eol_Tok is inserted everywhere.
41 if (start_pos > line_len) {
42 PyErr_Format(PyExc_ValueError,
43 "Invalid MatchOshToken call (start_pos = %d, line_len = %d)",
44 start_pos, line_len);
45 return NULL;
46 }
47
48 int id;
49 int end_pos;
50 MatchOshToken(lex_mode, line, line_len, start_pos, &id, &end_pos);
51 return Py_BuildValue("(ii)", id, end_pos);
52}
53
54static PyObject *
55fastlex_MatchEchoToken(PyObject *self, PyObject *args) {
56 unsigned char* line;
57 int line_len;
58
59 int start_pos;
60 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
61 return NULL;
62 }
63
64 // Bounds checking.
65 if (start_pos > line_len) {
66 PyErr_Format(PyExc_ValueError,
67 "Invalid MatchEchoToken call (start_pos = %d, line_len = %d)",
68 start_pos, line_len);
69 return NULL;
70 }
71
72 int id;
73 int end_pos;
74 MatchEchoToken(line, line_len, start_pos, &id, &end_pos);
75 return Py_BuildValue("(ii)", id, end_pos);
76}
77
78static PyObject *
79fastlex_MatchGlobToken(PyObject *self, PyObject *args) {
80 unsigned char* line;
81 int line_len;
82
83 int start_pos;
84 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
85 return NULL;
86 }
87
88 // Bounds checking.
89 if (start_pos > line_len) {
90 PyErr_Format(PyExc_ValueError,
91 "Invalid MatchGlobToken call (start_pos = %d, line_len = %d)",
92 start_pos, line_len);
93 return NULL;
94 }
95
96 int id;
97 int end_pos;
98 MatchGlobToken(line, line_len, start_pos, &id, &end_pos);
99 return Py_BuildValue("(ii)", id, end_pos);
100}
101
102static PyObject *
103fastlex_MatchPS1Token(PyObject *self, PyObject *args) {
104 unsigned char* line;
105 int line_len;
106
107 int start_pos;
108 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
109 return NULL;
110 }
111
112 // Bounds checking.
113 if (start_pos > line_len) {
114 PyErr_Format(PyExc_ValueError,
115 "Invalid MatchPS1Token call (start_pos = %d, line_len = %d)",
116 start_pos, line_len);
117 return NULL;
118 }
119
120 int id;
121 int end_pos;
122 MatchPS1Token(line, line_len, start_pos, &id, &end_pos);
123 return Py_BuildValue("(ii)", id, end_pos);
124}
125
126static PyObject *
127fastlex_MatchHistoryToken(PyObject *self, PyObject *args) {
128 unsigned char* line;
129 int line_len;
130
131 int start_pos;
132 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
133 return NULL;
134 }
135
136 // Bounds checking.
137 if (start_pos > line_len) {
138 PyErr_Format(PyExc_ValueError,
139 "Invalid MatchHistoryToken call (start_pos = %d, line_len = %d)",
140 start_pos, line_len);
141 return NULL;
142 }
143
144 int id;
145 int end_pos;
146 MatchHistoryToken(line, line_len, start_pos, &id, &end_pos);
147 return Py_BuildValue("(ii)", id, end_pos);
148}
149
150static PyObject *
151fastlex_MatchBraceRangeToken(PyObject *self, PyObject *args) {
152 unsigned char* line;
153 int line_len;
154
155 int start_pos;
156 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
157 return NULL;
158 }
159
160 // Bounds checking.
161 if (start_pos > line_len) {
162 PyErr_Format(PyExc_ValueError,
163 "Invalid MatchBraceRangeToken call (start_pos = %d, line_len = %d)",
164 start_pos, line_len);
165 return NULL;
166 }
167
168 int id;
169 int end_pos;
170 MatchBraceRangeToken(line, line_len, start_pos, &id, &end_pos);
171 return Py_BuildValue("(ii)", id, end_pos);
172}
173
174static PyObject *
175fastlex_MatchJ8Token(PyObject *self, PyObject *args) {
176 unsigned char* line;
177 int line_len;
178
179 int start_pos;
180 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
181 return NULL;
182 }
183
184 // Bounds checking.
185 if (start_pos > line_len) {
186 PyErr_Format(PyExc_ValueError,
187 "Invalid MatchJ8Token call (start_pos = %d, line_len = %d)",
188 start_pos, line_len);
189 return NULL;
190 }
191
192 int id;
193 int end_pos;
194 MatchJ8Token(line, line_len, start_pos, &id, &end_pos);
195 return Py_BuildValue("(ii)", id, end_pos);
196}
197
198static PyObject *
199fastlex_MatchJ8StrToken(PyObject *self, PyObject *args) {
200 unsigned char* line;
201 int line_len;
202
203 int start_pos;
204 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
205 return NULL;
206 }
207
208 // Bounds checking.
209 if (start_pos > line_len) {
210 PyErr_Format(PyExc_ValueError,
211 "Invalid MatchJ8StrToken call (start_pos = %d, line_len = %d)",
212 start_pos, line_len);
213 return NULL;
214 }
215
216 int id;
217 int end_pos;
218 MatchJ8StrToken(line, line_len, start_pos, &id, &end_pos);
219 return Py_BuildValue("(ii)", id, end_pos);
220}
221
222static PyObject *
223fastlex_MatchJsonStrToken(PyObject *self, PyObject *args) {
224 unsigned char* line;
225 int line_len;
226
227 int start_pos;
228 if (!PyArg_ParseTuple(args, "s#i", &line, &line_len, &start_pos)) {
229 return NULL;
230 }
231
232 // Bounds checking.
233 if (start_pos > line_len) {
234 PyErr_Format(PyExc_ValueError,
235 "Invalid MatchJsonStrToken call (start_pos = %d, line_len = %d)",
236 start_pos, line_len);
237 return NULL;
238 }
239
240 int id;
241 int end_pos;
242 MatchJsonStrToken(line, line_len, start_pos, &id, &end_pos);
243 return Py_BuildValue("(ii)", id, end_pos);
244}
245
246static PyObject *
247fastlex_IsValidVarName(PyObject *self, PyObject *args) {
248 unsigned char *name;
249 int len;
250
251 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
252 return NULL;
253 }
254 return PyBool_FromLong(IsValidVarName(name, len));
255}
256
257static PyObject *
258fastlex_ShouldHijack(PyObject *self, PyObject *args) {
259 unsigned char *name;
260 int len;
261
262 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
263 return NULL;
264 }
265 return PyBool_FromLong(ShouldHijack(name, len));
266}
267
268static PyObject *
269fastlex_LooksLikeInteger(PyObject *self, PyObject *args) {
270 unsigned char *name;
271 int len;
272
273 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
274 return NULL;
275 }
276 return PyBool_FromLong(LooksLikeInteger(name, len));
277}
278
279static PyObject *
280fastlex_LooksLikeFloat(PyObject *self, PyObject *args) {
281 unsigned char *name;
282 int len;
283
284 if (!PyArg_ParseTuple(args, "s#", &name, &len)) {
285 return NULL;
286 }
287 return PyBool_FromLong(LooksLikeFloat(name, len));
288}
289
290#ifdef OVM_MAIN
291#include "pyext/fastlex.c/methods.def"
292#else
293static PyMethodDef methods[] = {
294 {"MatchOshToken", fastlex_MatchOshToken, METH_VARARGS,
295 "(lexer mode, line, start_pos) -> (id, end_pos)."},
296 {"MatchEchoToken", fastlex_MatchEchoToken, METH_VARARGS,
297 "(line, start_pos) -> (id, end_pos)."},
298 {"MatchGlobToken", fastlex_MatchGlobToken, METH_VARARGS,
299 "(line, start_pos) -> (id, end_pos)."},
300 {"MatchPS1Token", fastlex_MatchPS1Token, METH_VARARGS,
301 "(line, start_pos) -> (id, end_pos)."},
302 {"MatchHistoryToken", fastlex_MatchHistoryToken, METH_VARARGS,
303 "(line, start_pos) -> (id, end_pos)."},
304 {"MatchBraceRangeToken", fastlex_MatchBraceRangeToken, METH_VARARGS,
305 "(line, start_pos) -> (id, end_pos)."},
306 {"MatchJ8Token", fastlex_MatchJ8Token, METH_VARARGS,
307 "(line, start_pos) -> (id, end_pos)."},
308 {"MatchJ8StrToken", fastlex_MatchJ8StrToken, METH_VARARGS,
309 "(line, start_pos) -> (id, end_pos)."},
310 {"MatchJsonStrToken", fastlex_MatchJsonStrToken, METH_VARARGS,
311 "(line, start_pos) -> (id, end_pos)."},
312 {"IsValidVarName", fastlex_IsValidVarName, METH_VARARGS,
313 "Is it a valid var name?"},
314 // Should we hijack this shebang line?
315 {"ShouldHijack", fastlex_ShouldHijack, METH_VARARGS, ""},
316 {"LooksLikeInteger", fastlex_LooksLikeInteger, METH_VARARGS, ""},
317 {"LooksLikeFloat", fastlex_LooksLikeFloat, METH_VARARGS, ""},
318 {NULL, NULL},
319};
320#endif
321
322void initfastlex(void) {
323 Py_InitModule("fastlex", methods);
324}