/home/uke/oil/cpp/libc.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // libc.cc: Replacement for pyext/libc.c |
2 | | |
3 | | #include "cpp/libc.h" |
4 | | |
5 | | #include <errno.h> |
6 | | #include <fnmatch.h> |
7 | | #include <glob.h> |
8 | | #include <locale.h> |
9 | | #include <regex.h> |
10 | | #include <sys/ioctl.h> |
11 | | #include <unistd.h> // gethostname() |
12 | | #include <wchar.h> |
13 | | |
14 | | namespace libc { |
15 | | |
16 | 2 | BigStr* gethostname() { |
17 | | // Note: Fixed issue #1656 - OS X and FreeBSD don't have HOST_NAME_MAX |
18 | | // https://reviews.freebsd.org/D30062 |
19 | 2 | BigStr* result = OverAllocatedStr(_POSIX_HOST_NAME_MAX); |
20 | 2 | int status = ::gethostname(result->data_, _POSIX_HOST_NAME_MAX); |
21 | 2 | if (status != 0) { |
22 | 0 | throw Alloc<OSError>(errno); |
23 | 0 | } |
24 | | // Important: set the length of the string! |
25 | 2 | result->MaybeShrink(strlen(result->data_)); |
26 | 2 | return result; |
27 | 2 | } |
28 | | |
29 | 2 | BigStr* realpath(BigStr* path) { |
30 | 2 | BigStr* result = OverAllocatedStr(PATH_MAX); |
31 | 2 | char* p = ::realpath(path->data_, result->data_); |
32 | 2 | if (p == nullptr) { |
33 | 1 | throw Alloc<OSError>(errno); |
34 | 1 | } |
35 | 1 | result->MaybeShrink(strlen(result->data_)); |
36 | 1 | return result; |
37 | 2 | } |
38 | | |
39 | 5 | int fnmatch(BigStr* pat, BigStr* str, int flags) { |
40 | 5 | #ifdef FNM_EXTMATCH |
41 | 5 | flags |= FNM_EXTMATCH; |
42 | | #else |
43 | | // TODO: We should detect this at ./configure time, and then maybe flag these |
44 | | // at parse time, not runtime |
45 | | #endif |
46 | | |
47 | 5 | int result = ::fnmatch(pat->data_, str->data_, flags); |
48 | 5 | switch (result) { |
49 | 3 | case 0: |
50 | 3 | return 1; |
51 | 2 | case FNM_NOMATCH: |
52 | 2 | return 0; |
53 | 0 | default: |
54 | | // Other error |
55 | 0 | return -1; |
56 | 5 | } |
57 | 5 | } |
58 | | |
59 | 2 | List<BigStr*>* glob(BigStr* pat) { |
60 | 2 | glob_t results; |
61 | | // Hm, it's weird that the first one can't be called with GLOB_APPEND. You |
62 | | // get a segfault. |
63 | 2 | int flags = 0; |
64 | | // int flags = GLOB_APPEND; |
65 | | // flags |= GLOB_NOMAGIC; |
66 | 2 | int ret = glob(pat->data_, flags, NULL, &results); |
67 | | |
68 | 2 | const char* err_str = NULL; |
69 | 2 | switch (ret) { |
70 | 1 | case 0: // no error |
71 | 1 | break; |
72 | 0 | case GLOB_ABORTED: |
73 | 0 | err_str = "read error"; |
74 | 0 | break; |
75 | 1 | case GLOB_NOMATCH: |
76 | | // No error, because not matching isn't necessarily a problem. |
77 | | // NOTE: This can be turned on to log overaggressive calls to glob(). |
78 | | // err_str = "nothing matched"; |
79 | 1 | break; |
80 | 0 | case GLOB_NOSPACE: |
81 | 0 | err_str = "no dynamic memory"; |
82 | 0 | break; |
83 | 0 | default: |
84 | 0 | err_str = "unknown problem"; |
85 | 0 | break; |
86 | 2 | } |
87 | 2 | if (err_str) { |
88 | 0 | throw Alloc<RuntimeError>(StrFromC(err_str)); |
89 | 0 | } |
90 | | |
91 | | // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak |
92 | 2 | size_t n = results.gl_pathc; |
93 | 2 | auto matches = NewList<BigStr*>(); |
94 | | |
95 | | // Print array of results |
96 | 2 | size_t i; |
97 | 5 | for (i = 0; i < n; i++) { |
98 | 3 | const char* m = results.gl_pathv[i]; |
99 | 3 | matches->append(StrFromC(m)); |
100 | 3 | } |
101 | 2 | globfree(&results); |
102 | | |
103 | 2 | return matches; |
104 | 2 | } |
105 | | |
106 | | // Raises RuntimeError if the pattern is invalid. TODO: Use a different |
107 | | // exception? |
108 | | List<int>* regex_search(BigStr* pattern, int cflags, BigStr* str, int eflags, |
109 | 4 | int pos) { |
110 | 4 | cflags |= REG_EXTENDED; |
111 | 4 | regex_t pat; |
112 | 4 | int status = regcomp(&pat, pattern->data_, cflags); |
113 | 4 | if (status != 0) { |
114 | 0 | char error_desc[50]; |
115 | 0 | regerror(status, &pat, error_desc, 50); |
116 | |
|
117 | 0 | char error_message[80]; |
118 | 0 | snprintf(error_message, 80, "Invalid regex %s (%s)", pattern->data_, |
119 | 0 | error_desc); |
120 | |
|
121 | 0 | throw Alloc<ValueError>(StrFromC(error_message)); |
122 | 0 | } |
123 | | // log("pat = %d, str = %d", len(pattern), len(str)); |
124 | | |
125 | 4 | int num_groups = pat.re_nsub + 1; // number of captures |
126 | | |
127 | 4 | List<int>* indices = NewList<int>(); |
128 | 4 | indices->reserve(num_groups * 2); |
129 | | |
130 | 4 | const char* s = str->data_; |
131 | 4 | regmatch_t* pmatch = |
132 | 4 | static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * num_groups)); |
133 | 4 | bool match = regexec(&pat, s + pos, num_groups, pmatch, eflags) == 0; |
134 | 4 | if (match) { |
135 | 3 | int i; |
136 | 10 | for (i = 0; i < num_groups; i++) { |
137 | 7 | int start = pmatch[i].rm_so; |
138 | 7 | if (start != -1) { |
139 | 6 | start += pos; |
140 | 6 | } |
141 | 7 | indices->append(start); |
142 | | |
143 | 7 | int end = pmatch[i].rm_eo; |
144 | 7 | if (end != -1) { |
145 | 6 | end += pos; |
146 | 6 | } |
147 | 7 | indices->append(end); |
148 | 7 | } |
149 | 3 | } |
150 | | |
151 | 4 | free(pmatch); |
152 | 4 | regfree(&pat); |
153 | | |
154 | 4 | if (!match) { |
155 | 1 | return nullptr; |
156 | 1 | } |
157 | | |
158 | 3 | return indices; |
159 | 4 | } |
160 | | |
161 | | // For ${//}, the number of groups is always 1, so we want 2 match position |
162 | | // results -- the whole regex (which we ignore), and then first group. |
163 | | // |
164 | | // For [[ =~ ]], do we need to count how many matches the user gave? |
165 | | |
166 | | const int NMATCH = 2; |
167 | | |
168 | | // Odd: This a Tuple2* not Tuple2 because it's Optional[Tuple2]! |
169 | | Tuple2<int, int>* regex_first_group_match(BigStr* pattern, BigStr* str, |
170 | 3 | int pos) { |
171 | 3 | regex_t pat; |
172 | 3 | regmatch_t m[NMATCH]; |
173 | | |
174 | | // Could have been checked by regex_parse for [[ =~ ]], but not for glob |
175 | | // patterns like ${foo/x*/y}. |
176 | | |
177 | 3 | if (regcomp(&pat, pattern->data_, REG_EXTENDED) != 0) { |
178 | 0 | throw Alloc<RuntimeError>( |
179 | 0 | StrFromC("Invalid regex syntax (func_regex_first_group_match)")); |
180 | 0 | } |
181 | | |
182 | | // Match at offset 'pos' |
183 | 3 | int result = regexec(&pat, str->data_ + pos, NMATCH, m, 0 /*flags*/); |
184 | 3 | regfree(&pat); |
185 | | |
186 | 3 | if (result != 0) { |
187 | 0 | return nullptr; |
188 | 0 | } |
189 | | |
190 | | // Assume there is a match |
191 | 3 | regoff_t start = m[0].rm_so; |
192 | 3 | regoff_t end = m[0].rm_eo; |
193 | 3 | Tuple2<int, int>* tup = Alloc<Tuple2<int, int>>(pos + start, pos + end); |
194 | | |
195 | 3 | return tup; |
196 | 3 | } |
197 | | |
198 | 1 | int wcswidth(BigStr* s) { |
199 | | // Behavior of mbstowcs() depends on LC_CTYPE |
200 | | |
201 | | // Calculate length first |
202 | 1 | int num_wide_chars = ::mbstowcs(NULL, s->data_, 0); |
203 | 1 | if (num_wide_chars == -1) { |
204 | 0 | throw Alloc<UnicodeError>(StrFromC("mbstowcs() 1")); |
205 | 0 | } |
206 | | |
207 | | // Allocate buffer |
208 | 1 | int buf_size = (num_wide_chars + 1) * sizeof(wchar_t); |
209 | 1 | wchar_t* wide_chars = static_cast<wchar_t*>(malloc(buf_size)); |
210 | 1 | DCHECK(wide_chars != nullptr); |
211 | | |
212 | | // Convert to wide chars |
213 | 0 | num_wide_chars = ::mbstowcs(wide_chars, s->data_, num_wide_chars); |
214 | 1 | if (num_wide_chars == -1) { |
215 | 0 | free(wide_chars); // cleanup |
216 | |
|
217 | 0 | throw Alloc<UnicodeError>(StrFromC("mbstowcs() 2")); |
218 | 0 | } |
219 | | |
220 | | // Find number of columns |
221 | 1 | int width = ::wcswidth(wide_chars, num_wide_chars); |
222 | 1 | if (width == -1) { |
223 | 0 | free(wide_chars); // cleanup |
224 | | |
225 | | // unprintable chars |
226 | 0 | throw Alloc<UnicodeError>(StrFromC("wcswidth()")); |
227 | 0 | } |
228 | | |
229 | 1 | free(wide_chars); |
230 | 1 | return width; |
231 | 1 | } |
232 | | |
233 | 1 | int get_terminal_width() { |
234 | 1 | struct winsize w; |
235 | 1 | if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == -1) { |
236 | 1 | throw Alloc<IOError>(errno); |
237 | 1 | } |
238 | 0 | return w.ws_col; |
239 | 1 | } |
240 | | |
241 | | } // namespace libc |