1 | #include "cpp/libc.h"
2 |
3 | #include <regex.h> // regcomp()
4 | #include <unistd.h> // gethostname()
5 |
6 | #include "mycpp/runtime.h"
7 | #include "vendor/greatest.h"
8 |
9 | TEST hostname_test() {
10 | BigStr* s0 = libc::gethostname();
11 | ASSERT(s0 != nullptr);
12 |
13 | char buf[1024];
14 | ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
15 | ASSERT(str_equals(s0, StrFromC(buf)));
16 |
17 | PASS();
18 | }
19 |
20 | TEST realpath_test() {
21 | BigStr* result = libc::realpath(StrFromC("/"));
22 | ASSERT(str_equals(StrFromC("/"), result));
23 |
24 | bool caught = false;
25 | try {
26 | libc::realpath(StrFromC("/nonexistent_ZZZ"));
27 | } catch (IOError_OSError* e) {
28 | caught = true;
29 | }
30 | ASSERT(caught);
31 |
32 | PASS();
33 | }
34 |
35 | TEST libc_test() {
36 | log("sizeof(wchar_t) = %d", sizeof(wchar_t));
37 |
38 | int width = 0;
39 |
40 | // TODO: enable this test. Is it not picking LC_CTYPE?
41 | // Do we have to do some initialization like libc.cpython_reset_locale() ?
42 | #if 0
43 | try {
44 | // mu character \u{03bc} in utf-8
45 | width = libc::wcswidth(StrFromC("\xce\xbc"));
46 | } catch (UnicodeError* e) {
47 | log("UnicodeError %s", e->message->data_);
48 | }
49 | ASSERT_EQ_FMT(2, width, "%d");
50 | #endif
51 |
52 | BigStr* h = libc::gethostname();
53 | log("gethostname() = %s %d", h->data_, len(h));
54 |
55 | width = libc::wcswidth(StrFromC("foo"));
56 | ASSERT_EQ(3, width);
57 |
58 | libc::print_time(0.1, 0.2, 0.3);
59 |
60 | BigStr* s1 = (StrFromC("foo.py "))->strip();
61 | ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
62 | ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
63 |
64 | // extended glob
65 | ASSERT(libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.py")));
66 | ASSERT(!libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.p")));
67 |
68 | PASS();
69 | }
70 |
71 | static List<BigStr*>* Groups(BigStr* s, List<int>* indices) {
72 | List<BigStr*>* groups = NewList<BigStr*>();
73 | int n = len(indices) / 2;
74 | for (int i = 0; i < n; ++i) {
75 | int start = indices->at(2 * i);
76 | int end = indices->at(2 * i + 1);
77 | if (start == -1) {
78 | groups->append(nullptr);
79 | } else {
80 | groups->append(s->slice(start, end));
81 | }
82 | }
83 | return groups;
84 | }
85 |
86 | TEST regex_test() {
87 | BigStr* s1 = StrFromC("-abaacaaa");
88 | List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
89 | List<BigStr*>* results = Groups(s1, indices);
90 | ASSERT_EQ_FMT(3, len(results), "%d");
91 | ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
92 | ASSERT(str_equals(StrFromC("a"), results->at(1)));
93 | ASSERT(str_equals(StrFromC("aa"), results->at(2)));
94 |
95 | indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
96 | ASSERT_EQ(nullptr, indices);
97 |
98 | // Alternation gives unmatched group
99 | BigStr* s2 = StrFromC("b");
100 | indices = libc::regex_search(StrFromC("(a)|(b)"), 0, s2, 0);
101 | results = Groups(s2, indices);
102 | ASSERT_EQ_FMT(3, len(results), "%d");
103 | ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
104 | ASSERT_EQ(nullptr, results->at(1));
105 | ASSERT(str_equals(StrFromC("b"), results->at(2)));
106 |
107 | Tuple2<int, int>* result;
108 | BigStr* s = StrFromC("oXooXoooXoX");
109 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
110 | ASSERT_EQ_FMT(1, result->at0(), "%d");
111 | ASSERT_EQ_FMT(3, result->at1(), "%d");
112 |
113 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
114 | ASSERT_EQ_FMT(4, result->at0(), "%d");
115 | ASSERT_EQ_FMT(6, result->at1(), "%d");
116 |
117 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
118 | ASSERT_EQ_FMT(8, result->at0(), "%d");
119 | ASSERT_EQ_FMT(10, result->at1(), "%d");
120 |
121 | PASS();
122 | }
123 |
124 | TEST libc_glob_test() {
125 | // This depends on the file system
126 | auto files = libc::glob(StrFromC("*.testdata"));
127 | // 3 files are made by the shell wrapper
128 | ASSERT_EQ_FMT(3, len(files), "%d");
129 |
130 | print(files->at(0));
131 |
132 | auto files2 = libc::glob(StrFromC("*.pyzzz"));
133 | ASSERT_EQ_FMT(0, len(files2), "%d");
134 |
135 | PASS();
136 | }
137 |
138 | TEST for_test_coverage() {
139 | // Sometimes we're not connected to a terminal
140 | try {
141 | libc::get_terminal_width();
142 | } catch (IOError_OSError* e) {
143 | }
144 |
145 | PASS();
146 | }
147 |
148 | void FindAll(const char* p, const char* s) {
149 | regex_t pat;
150 |
151 | int cflags = REG_EXTENDED;
152 | if (regcomp(&pat, p, cflags) != 0) {
153 | FAIL();
154 | }
155 | int outlen = pat.re_nsub + 1; // number of captures
156 |
157 | // TODO: Could statically allocate 99, and assert that re_nsub is less than
158 | // 99. Would speed up loops.
159 | regmatch_t* pmatch =
160 | static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
161 |
162 | int cur_pos = 0;
163 | // int n = strlen(s);
164 | while (true) {
165 | // Necessary so ^ doesn't match in the middle!
166 | int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
167 | bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
168 |
169 | if (!match) {
170 | break;
171 | }
172 | int i;
173 | for (i = 0; i < outlen; i++) {
174 | int start = pmatch[i].rm_so;
175 | int end = pmatch[i].rm_eo;
176 | int len = end - start;
177 | BigStr* m = StrFromC(s + cur_pos + start, len);
178 | log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
179 | }
180 | log("");
181 | int match_len = pmatch[0].rm_eo;
182 | if (match_len == 0) {
183 | break;
184 | }
185 | cur_pos += match_len;
186 | }
187 |
188 | free(pmatch);
189 | regfree(&pat);
190 | }
191 |
192 | // adjacent matches
193 | const char* s = "a345y-axy- there b789y- cy-";
194 |
195 | TEST regex_unanchored() {
196 | const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
197 | FindAll(unanchored, s);
198 |
199 | PASS();
200 | }
201 |
202 | TEST regex_caret() {
203 | const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
204 | FindAll(anchored, s);
205 |
206 | PASS();
207 | }
208 |
209 | TEST regex_lexer() {
210 | // like the Yaks / Make-a-Lisp pattern
211 | const char* lexer = "([a-z]+)|([0-9]+)|([ ]+)|([+-])";
212 | FindAll(lexer, s);
213 |
214 | PASS();
215 | }
216 |
217 | TEST regex_repeat_with_capture() {
218 | const char* lexer = "(([a-z]+)([0-9]+)-)*((A+)|(Z+))*";
219 | FindAll(lexer, "a0-b1-c2-AAZZZA");
220 | // Groups are weird
221 | // whole match 0: a0-b1-c2-
222 | // 1: c2- # last repetition
223 | // 2: c # last one
224 | // 3: 2 # last one
225 | //
226 | // And then there's an empty match
227 | //
228 | // Ideas:
229 | // - disallow nested groups in Eggex?
230 | // - I really care about the inner ones -- groups 2 and 3
231 | // - I want flat groups
232 |
233 | PASS();
234 | }
235 |
236 | // Disallow this in eggex, as well as the above
237 | TEST regex_nested_capture() {
238 | const char* lexer = "(([a-z]+)([0-9]+))";
239 | FindAll(lexer, "a0");
240 | PASS();
241 | }
242 |
243 | // I think we allow this in eggex
244 | TEST regex_alt_with_capture() {
245 | const char* lexer = "([a-z]+)|([0-9]+)(-)";
246 | FindAll(lexer, "x-");
247 | FindAll(lexer, "7-");
248 | PASS();
249 | }
250 |
252 |
253 | int main(int argc, char** argv) {
254 | gHeap.Init();
255 |
257 |
258 | RUN_TEST(hostname_test);
259 | RUN_TEST(realpath_test);
260 | RUN_TEST(libc_test);
261 | RUN_TEST(regex_test);
262 | RUN_TEST(libc_glob_test);
263 | RUN_TEST(for_test_coverage);
264 |
265 | RUN_TEST(regex_unanchored);
266 | RUN_TEST(regex_caret);
267 | RUN_TEST(regex_lexer);
268 | RUN_TEST(regex_repeat_with_capture);
269 | RUN_TEST(regex_alt_with_capture);
270 | RUN_TEST(regex_nested_capture);
271 |
272 | gHeap.CleanProcessExit();
273 |
275 | return 0;
276 | }