OILS / cpp / libc_test.cc View on Github | oilshell.org

276 lines, 168 significant
1#include "cpp/libc.h"
2
3#include <regex.h> // regcomp()
4#include <unistd.h> // gethostname()
5
6#include "mycpp/runtime.h"
7#include "vendor/greatest.h"
8
9TEST hostname_test() {
10 BigStr* s0 = libc::gethostname();
11 ASSERT(s0 != nullptr);
12
13 char buf[1024];
14 ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
15 ASSERT(str_equals(s0, StrFromC(buf)));
16
17 PASS();
18}
19
20TEST realpath_test() {
21 BigStr* result = libc::realpath(StrFromC("/"));
22 ASSERT(str_equals(StrFromC("/"), result));
23
24 bool caught = false;
25 try {
26 libc::realpath(StrFromC("/nonexistent_ZZZ"));
27 } catch (IOError_OSError* e) {
28 caught = true;
29 }
30 ASSERT(caught);
31
32 PASS();
33}
34
35TEST libc_test() {
36 log("sizeof(wchar_t) = %d", sizeof(wchar_t));
37
38 int width = 0;
39
40 // TODO: enable this test. Is it not picking LC_CTYPE?
41 // Do we have to do some initialization like libc.cpython_reset_locale() ?
42#if 0
43 try {
44 // mu character \u{03bc} in utf-8
45 width = libc::wcswidth(StrFromC("\xce\xbc"));
46 } catch (UnicodeError* e) {
47 log("UnicodeError %s", e->message->data_);
48 }
49 ASSERT_EQ_FMT(2, width, "%d");
50#endif
51
52 BigStr* h = libc::gethostname();
53 log("gethostname() = %s %d", h->data_, len(h));
54
55 width = libc::wcswidth(StrFromC("foo"));
56 ASSERT_EQ(3, width);
57
58 libc::print_time(0.1, 0.2, 0.3);
59
60 BigStr* s1 = (StrFromC("foo.py "))->strip();
61 ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
62 ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
63
64 // extended glob
65 ASSERT(libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.py")));
66 ASSERT(!libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.p")));
67
68 PASS();
69}
70
71static List<BigStr*>* Groups(BigStr* s, List<int>* indices) {
72 List<BigStr*>* groups = NewList<BigStr*>();
73 int n = len(indices) / 2;
74 for (int i = 0; i < n; ++i) {
75 int start = indices->at(2 * i);
76 int end = indices->at(2 * i + 1);
77 if (start == -1) {
78 groups->append(nullptr);
79 } else {
80 groups->append(s->slice(start, end));
81 }
82 }
83 return groups;
84}
85
86TEST regex_test() {
87 BigStr* s1 = StrFromC("-abaacaaa");
88 List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
89 List<BigStr*>* results = Groups(s1, indices);
90 ASSERT_EQ_FMT(3, len(results), "%d");
91 ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
92 ASSERT(str_equals(StrFromC("a"), results->at(1)));
93 ASSERT(str_equals(StrFromC("aa"), results->at(2)));
94
95 indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
96 ASSERT_EQ(nullptr, indices);
97
98 // Alternation gives unmatched group
99 BigStr* s2 = StrFromC("b");
100 indices = libc::regex_search(StrFromC("(a)|(b)"), 0, s2, 0);
101 results = Groups(s2, indices);
102 ASSERT_EQ_FMT(3, len(results), "%d");
103 ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
104 ASSERT_EQ(nullptr, results->at(1));
105 ASSERT(str_equals(StrFromC("b"), results->at(2)));
106
107 Tuple2<int, int>* result;
108 BigStr* s = StrFromC("oXooXoooXoX");
109 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
110 ASSERT_EQ_FMT(1, result->at0(), "%d");
111 ASSERT_EQ_FMT(3, result->at1(), "%d");
112
113 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
114 ASSERT_EQ_FMT(4, result->at0(), "%d");
115 ASSERT_EQ_FMT(6, result->at1(), "%d");
116
117 result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
118 ASSERT_EQ_FMT(8, result->at0(), "%d");
119 ASSERT_EQ_FMT(10, result->at1(), "%d");
120
121 PASS();
122}
123
124TEST libc_glob_test() {
125 // This depends on the file system
126 auto files = libc::glob(StrFromC("*.testdata"));
127 // 3 files are made by the shell wrapper
128 ASSERT_EQ_FMT(3, len(files), "%d");
129
130 print(files->at(0));
131
132 auto files2 = libc::glob(StrFromC("*.pyzzz"));
133 ASSERT_EQ_FMT(0, len(files2), "%d");
134
135 PASS();
136}
137
138TEST for_test_coverage() {
139 // Sometimes we're not connected to a terminal
140 try {
141 libc::get_terminal_width();
142 } catch (IOError_OSError* e) {
143 }
144
145 PASS();
146}
147
148void FindAll(const char* p, const char* s) {
149 regex_t pat;
150
151 int cflags = REG_EXTENDED;
152 if (regcomp(&pat, p, cflags) != 0) {
153 FAIL();
154 }
155 int outlen = pat.re_nsub + 1; // number of captures
156
157 // TODO: Could statically allocate 99, and assert that re_nsub is less than
158 // 99. Would speed up loops.
159 regmatch_t* pmatch =
160 static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
161
162 int cur_pos = 0;
163 // int n = strlen(s);
164 while (true) {
165 // Necessary so ^ doesn't match in the middle!
166 int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
167 bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
168
169 if (!match) {
170 break;
171 }
172 int i;
173 for (i = 0; i < outlen; i++) {
174 int start = pmatch[i].rm_so;
175 int end = pmatch[i].rm_eo;
176 int len = end - start;
177 BigStr* m = StrFromC(s + cur_pos + start, len);
178 log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
179 }
180 log("");
181 int match_len = pmatch[0].rm_eo;
182 if (match_len == 0) {
183 break;
184 }
185 cur_pos += match_len;
186 }
187
188 free(pmatch);
189 regfree(&pat);
190}
191
192// adjacent matches
193const char* s = "a345y-axy- there b789y- cy-";
194
195TEST regex_unanchored() {
196 const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
197 FindAll(unanchored, s);
198
199 PASS();
200}
201
202TEST regex_caret() {
203 const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
204 FindAll(anchored, s);
205
206 PASS();
207}
208
209TEST regex_lexer() {
210 // like the Yaks / Make-a-Lisp pattern
211 const char* lexer = "([a-z]+)|([0-9]+)|([ ]+)|([+-])";
212 FindAll(lexer, s);
213
214 PASS();
215}
216
217TEST regex_repeat_with_capture() {
218 const char* lexer = "(([a-z]+)([0-9]+)-)*((A+)|(Z+))*";
219 FindAll(lexer, "a0-b1-c2-AAZZZA");
220 // Groups are weird
221 // whole match 0: a0-b1-c2-
222 // 1: c2- # last repetition
223 // 2: c # last one
224 // 3: 2 # last one
225 //
226 // And then there's an empty match
227 //
228 // Ideas:
229 // - disallow nested groups in Eggex?
230 // - I really care about the inner ones -- groups 2 and 3
231 // - I want flat groups
232
233 PASS();
234}
235
236// Disallow this in eggex, as well as the above
237TEST regex_nested_capture() {
238 const char* lexer = "(([a-z]+)([0-9]+))";
239 FindAll(lexer, "a0");
240 PASS();
241}
242
243// I think we allow this in eggex
244TEST regex_alt_with_capture() {
245 const char* lexer = "([a-z]+)|([0-9]+)(-)";
246 FindAll(lexer, "x-");
247 FindAll(lexer, "7-");
248 PASS();
249}
250
251GREATEST_MAIN_DEFS();
252
253int main(int argc, char** argv) {
254 gHeap.Init();
255
256 GREATEST_MAIN_BEGIN();
257
258 RUN_TEST(hostname_test);
259 RUN_TEST(realpath_test);
260 RUN_TEST(libc_test);
261 RUN_TEST(regex_test);
262 RUN_TEST(libc_glob_test);
263 RUN_TEST(for_test_coverage);
264
265 RUN_TEST(regex_unanchored);
266 RUN_TEST(regex_caret);
267 RUN_TEST(regex_lexer);
268 RUN_TEST(regex_repeat_with_capture);
269 RUN_TEST(regex_alt_with_capture);
270 RUN_TEST(regex_nested_capture);
271
272 gHeap.CleanProcessExit();
273
274 GREATEST_MAIN_END();
275 return 0;
276}