1 | #include "cpp/libc.h"
|
2 |
|
3 | #include <regex.h> // regcomp()
|
4 | #include <unistd.h> // gethostname()
|
5 |
|
6 | #include "mycpp/runtime.h"
|
7 | #include "vendor/greatest.h"
|
8 |
|
9 | TEST hostname_test() {
|
10 | BigStr* s0 = libc::gethostname();
|
11 | ASSERT(s0 != nullptr);
|
12 |
|
13 | char buf[1024];
|
14 | ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
|
15 | ASSERT(str_equals(s0, StrFromC(buf)));
|
16 |
|
17 | PASS();
|
18 | }
|
19 |
|
20 | TEST realpath_test() {
|
21 | BigStr* result = libc::realpath(StrFromC("/"));
|
22 | ASSERT(str_equals(StrFromC("/"), result));
|
23 |
|
24 | bool caught = false;
|
25 | try {
|
26 | libc::realpath(StrFromC("/nonexistent_ZZZ"));
|
27 | } catch (IOError_OSError* e) {
|
28 | caught = true;
|
29 | }
|
30 | ASSERT(caught);
|
31 |
|
32 | PASS();
|
33 | }
|
34 |
|
35 | TEST libc_test() {
|
36 | log("sizeof(wchar_t) = %d", sizeof(wchar_t));
|
37 |
|
38 | int width = 0;
|
39 |
|
40 | // TODO: enable this test. Is it not picking LC_CTYPE?
|
41 | // Do we have to do some initialization like libc.cpython_reset_locale() ?
|
42 | #if 0
|
43 | try {
|
44 | // mu character \u{03bc} in utf-8
|
45 | width = libc::wcswidth(StrFromC("\xce\xbc"));
|
46 | } catch (UnicodeError* e) {
|
47 | log("UnicodeError %s", e->message->data_);
|
48 | }
|
49 | ASSERT_EQ_FMT(2, width, "%d");
|
50 | #endif
|
51 |
|
52 | BigStr* h = libc::gethostname();
|
53 | log("gethostname() = %s %d", h->data_, len(h));
|
54 |
|
55 | width = libc::wcswidth(StrFromC("foo"));
|
56 | ASSERT_EQ(3, width);
|
57 |
|
58 | libc::print_time(0.1, 0.2, 0.3);
|
59 |
|
60 | BigStr* s1 = (StrFromC("foo.py "))->strip();
|
61 | ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
|
62 | ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
|
63 |
|
64 | // extended glob
|
65 | ASSERT(libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.py")));
|
66 | ASSERT(!libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.p")));
|
67 |
|
68 | PASS();
|
69 | }
|
70 |
|
71 | static List<BigStr*>* Groups(BigStr* s, List<int>* indices) {
|
72 | List<BigStr*>* groups = NewList<BigStr*>();
|
73 | int n = len(indices) / 2;
|
74 | for (int i = 0; i < n; ++i) {
|
75 | int start = indices->at(2 * i);
|
76 | int end = indices->at(2 * i + 1);
|
77 | if (start == -1) {
|
78 | groups->append(nullptr);
|
79 | } else {
|
80 | groups->append(s->slice(start, end));
|
81 | }
|
82 | }
|
83 | return groups;
|
84 | }
|
85 |
|
86 | TEST regex_test() {
|
87 | BigStr* s1 = StrFromC("-abaacaaa");
|
88 | List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
|
89 | List<BigStr*>* results = Groups(s1, indices);
|
90 | ASSERT_EQ_FMT(3, len(results), "%d");
|
91 | ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
|
92 | ASSERT(str_equals(StrFromC("a"), results->at(1)));
|
93 | ASSERT(str_equals(StrFromC("aa"), results->at(2)));
|
94 |
|
95 | indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
|
96 | ASSERT_EQ(nullptr, indices);
|
97 |
|
98 | // Alternation gives unmatched group
|
99 | BigStr* s2 = StrFromC("b");
|
100 | indices = libc::regex_search(StrFromC("(a)|(b)"), 0, s2, 0);
|
101 | results = Groups(s2, indices);
|
102 | ASSERT_EQ_FMT(3, len(results), "%d");
|
103 | ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
|
104 | ASSERT_EQ(nullptr, results->at(1));
|
105 | ASSERT(str_equals(StrFromC("b"), results->at(2)));
|
106 |
|
107 | Tuple2<int, int>* result;
|
108 | BigStr* s = StrFromC("oXooXoooXoX");
|
109 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
|
110 | ASSERT_EQ_FMT(1, result->at0(), "%d");
|
111 | ASSERT_EQ_FMT(3, result->at1(), "%d");
|
112 |
|
113 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
|
114 | ASSERT_EQ_FMT(4, result->at0(), "%d");
|
115 | ASSERT_EQ_FMT(6, result->at1(), "%d");
|
116 |
|
117 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
|
118 | ASSERT_EQ_FMT(8, result->at0(), "%d");
|
119 | ASSERT_EQ_FMT(10, result->at1(), "%d");
|
120 |
|
121 | PASS();
|
122 | }
|
123 |
|
124 | TEST libc_glob_test() {
|
125 | // This depends on the file system
|
126 | auto files = libc::glob(StrFromC("*.testdata"));
|
127 | // 3 files are made by the shell wrapper
|
128 | ASSERT_EQ_FMT(3, len(files), "%d");
|
129 |
|
130 | print(files->at(0));
|
131 |
|
132 | auto files2 = libc::glob(StrFromC("*.pyzzz"));
|
133 | ASSERT_EQ_FMT(0, len(files2), "%d");
|
134 |
|
135 | PASS();
|
136 | }
|
137 |
|
138 | TEST for_test_coverage() {
|
139 | // Sometimes we're not connected to a terminal
|
140 | try {
|
141 | libc::get_terminal_width();
|
142 | } catch (IOError_OSError* e) {
|
143 | }
|
144 |
|
145 | PASS();
|
146 | }
|
147 |
|
148 | void FindAll(const char* p, const char* s) {
|
149 | regex_t pat;
|
150 |
|
151 | int cflags = REG_EXTENDED;
|
152 | if (regcomp(&pat, p, cflags) != 0) {
|
153 | FAIL();
|
154 | }
|
155 | int outlen = pat.re_nsub + 1; // number of captures
|
156 |
|
157 | // TODO: Could statically allocate 99, and assert that re_nsub is less than
|
158 | // 99. Would speed up loops.
|
159 | regmatch_t* pmatch =
|
160 | static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
|
161 |
|
162 | int cur_pos = 0;
|
163 | // int n = strlen(s);
|
164 | while (true) {
|
165 | // Necessary so ^ doesn't match in the middle!
|
166 | int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
|
167 | bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
|
168 |
|
169 | if (!match) {
|
170 | break;
|
171 | }
|
172 | int i;
|
173 | for (i = 0; i < outlen; i++) {
|
174 | int start = pmatch[i].rm_so;
|
175 | int end = pmatch[i].rm_eo;
|
176 | int len = end - start;
|
177 | BigStr* m = StrFromC(s + cur_pos + start, len);
|
178 | log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
|
179 | }
|
180 | log("");
|
181 | int match_len = pmatch[0].rm_eo;
|
182 | if (match_len == 0) {
|
183 | break;
|
184 | }
|
185 | cur_pos += match_len;
|
186 | }
|
187 |
|
188 | free(pmatch);
|
189 | regfree(&pat);
|
190 | }
|
191 |
|
192 | // adjacent matches
|
193 | const char* s = "a345y-axy- there b789y- cy-";
|
194 |
|
195 | TEST regex_unanchored() {
|
196 | const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
|
197 | FindAll(unanchored, s);
|
198 |
|
199 | PASS();
|
200 | }
|
201 |
|
202 | TEST regex_caret() {
|
203 | const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
|
204 | FindAll(anchored, s);
|
205 |
|
206 | PASS();
|
207 | }
|
208 |
|
209 | TEST regex_lexer() {
|
210 | // like the Yaks / Make-a-Lisp pattern
|
211 | const char* lexer = "([a-z]+)|([0-9]+)|([ ]+)|([+-])";
|
212 | FindAll(lexer, s);
|
213 |
|
214 | PASS();
|
215 | }
|
216 |
|
217 | TEST regex_repeat_with_capture() {
|
218 | const char* lexer = "(([a-z]+)([0-9]+)-)*((A+)|(Z+))*";
|
219 | FindAll(lexer, "a0-b1-c2-AAZZZA");
|
220 | // Groups are weird
|
221 | // whole match 0: a0-b1-c2-
|
222 | // 1: c2- # last repetition
|
223 | // 2: c # last one
|
224 | // 3: 2 # last one
|
225 | //
|
226 | // And then there's an empty match
|
227 | //
|
228 | // Ideas:
|
229 | // - disallow nested groups in Eggex?
|
230 | // - I really care about the inner ones -- groups 2 and 3
|
231 | // - I want flat groups
|
232 |
|
233 | PASS();
|
234 | }
|
235 |
|
236 | // Disallow this in eggex, as well as the above
|
237 | TEST regex_nested_capture() {
|
238 | const char* lexer = "(([a-z]+)([0-9]+))";
|
239 | FindAll(lexer, "a0");
|
240 | PASS();
|
241 | }
|
242 |
|
243 | // I think we allow this in eggex
|
244 | TEST regex_alt_with_capture() {
|
245 | const char* lexer = "([a-z]+)|([0-9]+)(-)";
|
246 | FindAll(lexer, "x-");
|
247 | FindAll(lexer, "7-");
|
248 | PASS();
|
249 | }
|
250 |
|
251 | GREATEST_MAIN_DEFS();
|
252 |
|
253 | int main(int argc, char** argv) {
|
254 | gHeap.Init();
|
255 |
|
256 | GREATEST_MAIN_BEGIN();
|
257 |
|
258 | RUN_TEST(hostname_test);
|
259 | RUN_TEST(realpath_test);
|
260 | RUN_TEST(libc_test);
|
261 | RUN_TEST(regex_test);
|
262 | RUN_TEST(libc_glob_test);
|
263 | RUN_TEST(for_test_coverage);
|
264 |
|
265 | RUN_TEST(regex_unanchored);
|
266 | RUN_TEST(regex_caret);
|
267 | RUN_TEST(regex_lexer);
|
268 | RUN_TEST(regex_repeat_with_capture);
|
269 | RUN_TEST(regex_alt_with_capture);
|
270 | RUN_TEST(regex_nested_capture);
|
271 |
|
272 | gHeap.CleanProcessExit();
|
273 |
|
274 | GREATEST_MAIN_END();
|
275 | return 0;
|
276 | }
|