| 1 | /* | 
| 2 | * Python interface to libc functions. | 
| 3 | */ | 
| 4 |  | 
| 5 | // - Enable GNU extensions in fnmatch.h for extended glob. | 
| 6 | // - It's also apparently needed for wchar.h in combination with Python. | 
| 7 | //   https://github.com/python-pillow/Pillow/issues/1850 | 
| 8 | //   - It's currently hard-coded in pyconfig.h. | 
| 9 | #define _GNU_SOURCE 1 | 
| 10 |  | 
| 11 | #include <stdarg.h>  // va_list, etc. | 
| 12 | #include <stdio.h>  // printf | 
| 13 | #include <limits.h> | 
| 14 | #include <wchar.h> | 
| 15 | #include <stdlib.h> | 
| 16 | #include <sys/ioctl.h> | 
| 17 | #include <locale.h> | 
| 18 | #include <fnmatch.h> | 
| 19 | #include <glob.h> | 
| 20 | #include <regex.h> | 
| 21 |  | 
| 22 | #include <Python.h> | 
| 23 |  | 
| 24 | // Log messages to stderr. | 
| 25 | static void debug(const char* fmt, ...) { | 
| 26 | #ifdef LIBC_VERBOSE | 
| 27 | va_list args; | 
| 28 | va_start(args, fmt); | 
| 29 | vfprintf(stderr, fmt, args); | 
| 30 | va_end(args); | 
| 31 | fprintf(stderr, "\n"); | 
| 32 | #endif | 
| 33 | } | 
| 34 |  | 
| 35 | static PyObject * | 
| 36 | func_realpath(PyObject *self, PyObject *args) { | 
| 37 | const char *symlink; | 
| 38 |  | 
| 39 | if (!PyArg_ParseTuple(args, "s", &symlink)) { | 
| 40 | return NULL; | 
| 41 | } | 
| 42 | char target[PATH_MAX + 1]; | 
| 43 | char *status = realpath(symlink, target); | 
| 44 |  | 
| 45 | // TODO: Throw exception like IOError here | 
| 46 | if (status == NULL) { | 
| 47 | debug("error from realpath()"); | 
| 48 | Py_RETURN_NONE; | 
| 49 | } | 
| 50 |  | 
| 51 | return PyString_FromString(target); | 
| 52 | } | 
| 53 |  | 
| 54 | static PyObject * | 
| 55 | func_fnmatch(PyObject *self, PyObject *args) { | 
| 56 | const char *pattern; | 
| 57 | const char *str; | 
| 58 | int flags = 0; | 
| 59 |  | 
| 60 | if (!PyArg_ParseTuple(args, "ss|i", &pattern, &str, &flags)) { | 
| 61 | return NULL; | 
| 62 | } | 
| 63 |  | 
| 64 | // NOTE: Testing for __GLIBC__ is the version detection anti-pattern.  We | 
| 65 | // should really use feature detection in our configure script.  But I plan | 
| 66 | // to get rid of the dependency on FNM_EXTMATCH because it doesn't work on | 
| 67 | // musl libc (or OS X).  Instead we should compile extended globs to extended | 
| 68 | // regex syntax. | 
| 69 | #ifdef __GLIBC__ | 
| 70 | flags |= FNM_EXTMATCH; | 
| 71 | #else | 
| 72 | debug("Warning: FNM_EXTMATCH is not defined"); | 
| 73 | #endif | 
| 74 |  | 
| 75 | int ret = fnmatch(pattern, str, flags); | 
| 76 |  | 
| 77 | switch (ret) { | 
| 78 | case 0: | 
| 79 | debug("matched: %s", str); | 
| 80 | return PyLong_FromLong(1); | 
| 81 | break; | 
| 82 | case FNM_NOMATCH: | 
| 83 | debug("no match: %s", str); | 
| 84 | return PyLong_FromLong(0); | 
| 85 | break; | 
| 86 | default: | 
| 87 | debug("other error: %s", str); | 
| 88 | return PyLong_FromLong(-1); | 
| 89 | break; | 
| 90 | } | 
| 91 | } | 
| 92 |  | 
| 93 | // error callback to glob() | 
| 94 | // | 
| 95 | // Disabled because of spurious errors.  For example, sed -i s/.*// (without | 
| 96 | // quotes) is OK, but it would be treated as a glob, and prints an error if the | 
| 97 | // directory 's' doesn't exist. | 
| 98 | // | 
| 99 | // Bash does its own globbing -- it doesn't use libc.  Likewise, I think dash | 
| 100 | // and mksh do their own globbing. | 
| 101 |  | 
| 102 | int globerr(const char *path, int errno_) { | 
| 103 | fprintf(stderr, "globerr: %s: %s\n", path, strerror(errno_)); | 
| 104 | return 0;  // let glob() keep going | 
| 105 | } | 
| 106 |  | 
| 107 | static PyObject * | 
| 108 | func_glob(PyObject *self, PyObject *args) { | 
| 109 | const char* pattern; | 
| 110 | if (!PyArg_ParseTuple(args, "s", &pattern)) { | 
| 111 | return NULL; | 
| 112 | } | 
| 113 |  | 
| 114 | glob_t results; | 
| 115 | // Hm, it's weird that the first one can't be called with GLOB_APPEND.  You | 
| 116 | // get a segfault. | 
| 117 | int flags = 0; | 
| 118 | // int flags = GLOB_APPEND; | 
| 119 | //flags |= GLOB_NOMAGIC; | 
| 120 | int ret = glob(pattern, flags, NULL, &results); | 
| 121 |  | 
| 122 | const char *err_str = NULL; | 
| 123 | switch (ret) { | 
| 124 | case 0:  // no error | 
| 125 | break; | 
| 126 | case GLOB_ABORTED: | 
| 127 | err_str = "read error"; | 
| 128 | break; | 
| 129 | case GLOB_NOMATCH: | 
| 130 | // No error, because not matching isn't necessarily a problem. | 
| 131 | // NOTE: This can be turned on to log overaggressive calls to glob(). | 
| 132 | //err_str = "nothing matched"; | 
| 133 | break; | 
| 134 | case GLOB_NOSPACE: | 
| 135 | err_str = "no dynamic memory"; | 
| 136 | break; | 
| 137 | default: | 
| 138 | err_str = "unknown problem"; | 
| 139 | break; | 
| 140 | } | 
| 141 | if (err_str) { | 
| 142 | //fprintf(stderr, "func_glob: %s: %s\n", pattern, err_str); | 
| 143 | PyErr_SetString(PyExc_RuntimeError, err_str); | 
| 144 | return NULL; | 
| 145 | } | 
| 146 |  | 
| 147 | // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak | 
| 148 | size_t n = results.gl_pathc; | 
| 149 | PyObject* matches = PyList_New(n); | 
| 150 |  | 
| 151 | // Print array of results | 
| 152 | size_t i; | 
| 153 | for (i = 0; i < n; i++) { | 
| 154 | //printf("%s\n", results.gl_pathv[i]); | 
| 155 | PyObject* m = Py_BuildValue("s", results.gl_pathv[i]); | 
| 156 | PyList_SetItem(matches, i, m); | 
| 157 | } | 
| 158 | globfree(&results); | 
| 159 |  | 
| 160 | return matches; | 
| 161 | } | 
| 162 |  | 
| 163 | static PyObject * | 
| 164 | func_regex_search(PyObject *self, PyObject *args) { | 
| 165 | const char* pattern; | 
| 166 | const char* str; | 
| 167 | int cflags = 0; | 
| 168 | int eflags = 0; | 
| 169 | int pos = 0; | 
| 170 |  | 
| 171 | if (!PyArg_ParseTuple(args, "sisi|i", &pattern, &cflags, &str, &eflags, &pos)) { | 
| 172 | return NULL; | 
| 173 | } | 
| 174 |  | 
| 175 | cflags |= REG_EXTENDED; | 
| 176 | regex_t pat; | 
| 177 | int status = regcomp(&pat, pattern, cflags); | 
| 178 | if (status != 0) { | 
| 179 | char error_desc[50]; | 
| 180 | regerror(status, &pat, error_desc, 50); | 
| 181 |  | 
| 182 | char error_message[80]; | 
| 183 | snprintf(error_message, 80, "Invalid regex %s (%s)", pattern, error_desc); | 
| 184 |  | 
| 185 | PyErr_SetString(PyExc_ValueError, error_message); | 
| 186 | return NULL; | 
| 187 | } | 
| 188 |  | 
| 189 | int num_groups = pat.re_nsub + 1; | 
| 190 | PyObject *ret = PyList_New(num_groups * 2); | 
| 191 |  | 
| 192 | if (ret == NULL) { | 
| 193 | regfree(&pat); | 
| 194 | return NULL; | 
| 195 | } | 
| 196 |  | 
| 197 | regmatch_t *pmatch = (regmatch_t*) malloc(sizeof(regmatch_t) * num_groups); | 
| 198 | int match = regexec(&pat, str + pos, num_groups, pmatch, eflags); | 
| 199 | if (match == 0) { | 
| 200 | int i; | 
| 201 | for (i = 0; i < num_groups; i++) { | 
| 202 | int start = pmatch[i].rm_so; | 
| 203 | if (start != -1) { | 
| 204 | start += pos; | 
| 205 | } | 
| 206 | PyList_SetItem(ret, 2*i, PyInt_FromLong(start)); | 
| 207 |  | 
| 208 | int end = pmatch[i].rm_eo; | 
| 209 | if (end != -1) { | 
| 210 | end += pos; | 
| 211 | } | 
| 212 | PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end)); | 
| 213 | } | 
| 214 | } | 
| 215 |  | 
| 216 | free(pmatch); | 
| 217 | regfree(&pat); | 
| 218 |  | 
| 219 | if (match != 0) { | 
| 220 | Py_RETURN_NONE; | 
| 221 | } | 
| 222 |  | 
| 223 | return ret; | 
| 224 | } | 
| 225 |  | 
| 226 | // For ${//}, the number of groups is always 1, so we want 2 match position | 
| 227 | // results -- the whole regex (which we ignore), and then first group. | 
| 228 | // | 
| 229 | // For [[ =~ ]], do we need to count how many matches the user gave? | 
| 230 |  | 
| 231 | #define NMATCH 2 | 
| 232 |  | 
| 233 | static PyObject * | 
| 234 | func_regex_first_group_match(PyObject *self, PyObject *args) { | 
| 235 | const char* pattern; | 
| 236 | const char* str; | 
| 237 | int pos; | 
| 238 | if (!PyArg_ParseTuple(args, "ssi", &pattern, &str, &pos)) { | 
| 239 | return NULL; | 
| 240 | } | 
| 241 |  | 
| 242 | regex_t pat; | 
| 243 | regmatch_t m[NMATCH]; | 
| 244 |  | 
| 245 | // Could have been checked by regex_parse for [[ =~ ]], but not for glob | 
| 246 | // patterns like ${foo/x*/y}. | 
| 247 |  | 
| 248 | int status = regcomp(&pat, pattern, REG_EXTENDED); | 
| 249 | if (status != 0) { | 
| 250 | char error_string[80]; | 
| 251 | regerror(status, &pat, error_string, 80); | 
| 252 | PyErr_SetString(PyExc_RuntimeError, error_string); | 
| 253 | return NULL; | 
| 254 | } | 
| 255 |  | 
| 256 | debug("first_group_match pat %s str %s pos %d", pattern, str, pos); | 
| 257 |  | 
| 258 | // Match at offset 'pos' | 
| 259 | int result = regexec(&pat, str + pos, NMATCH, m, 0 /*flags*/); | 
| 260 | regfree(&pat); | 
| 261 |  | 
| 262 | if (result != 0) { | 
| 263 | Py_RETURN_NONE;  // no match | 
| 264 | } | 
| 265 |  | 
| 266 | // Assume there is a match | 
| 267 | regoff_t start = m[1].rm_so; | 
| 268 | regoff_t end = m[1].rm_eo; | 
| 269 | return Py_BuildValue("(i,i)", pos + start, pos + end); | 
| 270 | } | 
| 271 |  | 
| 272 | // We do this in C so we can remove '%f' % 0.1 from the CPython build.  That | 
| 273 | // involves dtoa.c and pystrod.c, which are thousands of lines of code. | 
| 274 | static PyObject * | 
| 275 | func_print_time(PyObject *self, PyObject *args) { | 
| 276 | double real, user, sys; | 
| 277 | if (!PyArg_ParseTuple(args, "ddd", &real, &user, &sys)) { | 
| 278 | return NULL; | 
| 279 | } | 
| 280 | fprintf(stderr, "real\t%.3f\n", real); | 
| 281 | fprintf(stderr, "user\t%.3f\n",  user); | 
| 282 | fprintf(stderr, "sys\t%.3f\n", sys); | 
| 283 | Py_RETURN_NONE; | 
| 284 | } | 
| 285 |  | 
| 286 | // A copy of socket.gethostname() from socketmodule.c.  That module brings in | 
| 287 | // too many dependencies. | 
| 288 |  | 
| 289 | static PyObject *errno_error; | 
| 290 |  | 
| 291 | static PyObject * | 
| 292 | socket_gethostname(PyObject *self, PyObject *unused) | 
| 293 | { | 
| 294 | char buf[1024]; | 
| 295 | int res; | 
| 296 | Py_BEGIN_ALLOW_THREADS | 
| 297 | res = gethostname(buf, (int) sizeof buf - 1); | 
| 298 | //res = gethostname(buf, 0);  // For testing errors | 
| 299 | Py_END_ALLOW_THREADS | 
| 300 | if (res < 0) | 
| 301 | return PyErr_SetFromErrno(errno_error); | 
| 302 | buf[sizeof buf - 1] = '\0'; | 
| 303 | return PyString_FromString(buf); | 
| 304 | } | 
| 305 |  | 
| 306 | static PyObject * | 
| 307 | func_get_terminal_width(PyObject *self, PyObject *unused) { | 
| 308 | struct winsize w; | 
| 309 | int res; | 
| 310 | res = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); | 
| 311 | if (res < 0) | 
| 312 | return PyErr_SetFromErrno(errno_error); | 
| 313 | return PyLong_FromLong(w.ws_col); | 
| 314 | } | 
| 315 |  | 
| 316 | static PyObject * | 
| 317 | func_wcswidth(PyObject *self, PyObject *args){ | 
| 318 | char *string; | 
| 319 | if (!PyArg_ParseTuple(args, "s", &string)) { | 
| 320 | return NULL; | 
| 321 | } | 
| 322 |  | 
| 323 | int num_wide_chars = mbstowcs(NULL, string, 0); | 
| 324 | if (num_wide_chars == -1) { | 
| 325 | PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 1"); | 
| 326 | return NULL; | 
| 327 | } | 
| 328 | int buf_size = (num_wide_chars + 1) * sizeof(wchar_t); | 
| 329 | wchar_t* wide_chars = (wchar_t*)malloc(buf_size); | 
| 330 | assert(wide_chars != NULL); | 
| 331 |  | 
| 332 | num_wide_chars = mbstowcs(wide_chars, string, num_wide_chars); | 
| 333 | if (num_wide_chars == -1) { | 
| 334 | PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 2"); | 
| 335 | return NULL; | 
| 336 | } | 
| 337 |  | 
| 338 | int width = wcswidth(wide_chars, num_wide_chars); | 
| 339 | if (width == -1) { | 
| 340 | PyErr_SetString(PyExc_UnicodeError, "wcswidth()"); | 
| 341 | return NULL; | 
| 342 | } | 
| 343 |  | 
| 344 | return PyInt_FromLong(width); | 
| 345 | } | 
| 346 |  | 
| 347 | static PyObject * | 
| 348 | func_cpython_reset_locale(PyObject *self, PyObject *unused) | 
| 349 | { | 
| 350 | // From man setlocale: | 
| 351 | //   The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems. | 
| 352 | //   On startup of the main program, the portable "C" locale is selected as default. | 
| 353 |  | 
| 354 | // Python overrides this, so we set it back. | 
| 355 | if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) { | 
| 356 | // Our CI machines don't work with C.UTF-8, even though it's supposed | 
| 357 | // to exist? | 
| 358 | if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) { | 
| 359 | PyErr_SetString(PyExc_SystemError, "Couldn't set locale to C.UTF-8 or en_US.UTF-8"); | 
| 360 | return NULL; | 
| 361 | } | 
| 362 | } | 
| 363 | Py_RETURN_NONE; | 
| 364 | } | 
| 365 |  | 
| 366 | #ifdef OVM_MAIN | 
| 367 | #include "pyext/libc.c/methods.def" | 
| 368 | #else | 
| 369 | static PyMethodDef methods[] = { | 
| 370 | // Return the canonical version of a path with symlinks, or None if there is | 
| 371 | // an error. | 
| 372 | {"realpath", func_realpath, METH_VARARGS, ""}, | 
| 373 |  | 
| 374 | // Return whether a string matches a pattern." | 
| 375 | {"fnmatch", func_fnmatch, METH_VARARGS, ""}, | 
| 376 |  | 
| 377 | // Return a list of files that match a pattern. | 
| 378 | // We need this since Python's glob doesn't have char classes. | 
| 379 | {"glob", func_glob, METH_VARARGS, ""}, | 
| 380 |  | 
| 381 | // Search a string for regex.  Returns a list of matches, None if no | 
| 382 | // match.  Raises RuntimeError if the regex is invalid. | 
| 383 | {"regex_search", func_regex_search, METH_VARARGS, ""}, | 
| 384 |  | 
| 385 | // If the regex matches the string, return the start and end position of the | 
| 386 | // first group.  Returns None if there is no match.  Raises RuntimeError if | 
| 387 | // the regex is invalid. | 
| 388 | {"regex_first_group_match", func_regex_first_group_match, METH_VARARGS, ""}, | 
| 389 |  | 
| 390 | // "Print three floating point values for the 'time' builtin. | 
| 391 | {"print_time", func_print_time, METH_VARARGS, ""}, | 
| 392 |  | 
| 393 | {"gethostname", socket_gethostname, METH_NOARGS, ""}, | 
| 394 |  | 
| 395 | // ioctl() to get the terminal width. | 
| 396 | {"get_terminal_width", func_get_terminal_width, METH_NOARGS, ""}, | 
| 397 |  | 
| 398 | // Get the display width of a string. Throw an exception if the string is invalid UTF8. | 
| 399 | {"wcswidth", func_wcswidth, METH_VARARGS, ""}, | 
| 400 |  | 
| 401 | // Workaround for CPython's calling setlocale() in pythonrun.c.  ONLY used | 
| 402 | // by tests and bin/oil.py. | 
| 403 | {"cpython_reset_locale", func_cpython_reset_locale, METH_NOARGS, ""}, | 
| 404 | {NULL, NULL}, | 
| 405 | }; | 
| 406 | #endif | 
| 407 |  | 
| 408 | void initlibc(void) { | 
| 409 | PyObject *module; | 
| 410 |  | 
| 411 | module = Py_InitModule("libc", methods); | 
| 412 | if (module != NULL) { | 
| 413 | PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD); | 
| 414 | PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE); | 
| 415 | PyModule_AddIntConstant(module, "REG_NEWLINE", REG_NEWLINE); | 
| 416 | PyModule_AddIntConstant(module, "REG_NOTBOL", REG_NOTBOL); | 
| 417 | } | 
| 418 |  | 
| 419 | errno_error = PyErr_NewException("libc.error", | 
| 420 | PyExc_IOError, NULL); | 
| 421 | } |