| 1 | #include "cpp/libc.h"
|
| 2 |
|
| 3 | #include <regex.h> // regcomp()
|
| 4 | #include <unistd.h> // gethostname()
|
| 5 |
|
| 6 | #include "mycpp/runtime.h"
|
| 7 | #include "vendor/greatest.h"
|
| 8 |
|
| 9 | TEST hostname_test() {
|
| 10 | BigStr* s0 = libc::gethostname();
|
| 11 | ASSERT(s0 != nullptr);
|
| 12 |
|
| 13 | char buf[1024];
|
| 14 | ASSERT(gethostname(buf, HOST_NAME_MAX) == 0);
|
| 15 | ASSERT(str_equals(s0, StrFromC(buf)));
|
| 16 |
|
| 17 | PASS();
|
| 18 | }
|
| 19 |
|
| 20 | TEST realpath_test() {
|
| 21 | BigStr* result = libc::realpath(StrFromC("/"));
|
| 22 | ASSERT(str_equals(StrFromC("/"), result));
|
| 23 |
|
| 24 | bool caught = false;
|
| 25 | try {
|
| 26 | libc::realpath(StrFromC("/nonexistent_ZZZ"));
|
| 27 | } catch (IOError_OSError* e) {
|
| 28 | caught = true;
|
| 29 | }
|
| 30 | ASSERT(caught);
|
| 31 |
|
| 32 | PASS();
|
| 33 | }
|
| 34 |
|
| 35 | TEST libc_test() {
|
| 36 | log("sizeof(wchar_t) = %d", sizeof(wchar_t));
|
| 37 |
|
| 38 | int width = 0;
|
| 39 |
|
| 40 | // TODO: enable this test. Is it not picking LC_CTYPE?
|
| 41 | // Do we have to do some initialization like libc.cpython_reset_locale() ?
|
| 42 | #if 0
|
| 43 | try {
|
| 44 | // mu character \u{03bc} in utf-8
|
| 45 | width = libc::wcswidth(StrFromC("\xce\xbc"));
|
| 46 | } catch (UnicodeError* e) {
|
| 47 | log("UnicodeError %s", e->message->data_);
|
| 48 | }
|
| 49 | ASSERT_EQ_FMT(2, width, "%d");
|
| 50 | #endif
|
| 51 |
|
| 52 | BigStr* h = libc::gethostname();
|
| 53 | log("gethostname() = %s %d", h->data_, len(h));
|
| 54 |
|
| 55 | width = libc::wcswidth(StrFromC("foo"));
|
| 56 | ASSERT_EQ(3, width);
|
| 57 |
|
| 58 | libc::print_time(0.1, 0.2, 0.3);
|
| 59 |
|
| 60 | BigStr* s1 = (StrFromC("foo.py "))->strip();
|
| 61 | ASSERT(libc::fnmatch(StrFromC("*.py"), s1));
|
| 62 | ASSERT(!libc::fnmatch(StrFromC("*.py"), StrFromC("foo.p")));
|
| 63 |
|
| 64 | // extended glob
|
| 65 | ASSERT(libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.py")));
|
| 66 | ASSERT(!libc::fnmatch(StrFromC("*(foo|bar).py"), StrFromC("foo.p")));
|
| 67 |
|
| 68 | PASS();
|
| 69 | }
|
| 70 |
|
| 71 | static List<BigStr*>* Groups(BigStr* s, List<int>* indices) {
|
| 72 | List<BigStr*>* groups = NewList<BigStr*>();
|
| 73 | int n = len(indices) / 2;
|
| 74 | for (int i = 0; i < n; ++i) {
|
| 75 | int start = indices->at(2 * i);
|
| 76 | int end = indices->at(2 * i + 1);
|
| 77 | if (start == -1) {
|
| 78 | groups->append(nullptr);
|
| 79 | } else {
|
| 80 | groups->append(s->slice(start, end));
|
| 81 | }
|
| 82 | }
|
| 83 | return groups;
|
| 84 | }
|
| 85 |
|
| 86 | TEST regex_test() {
|
| 87 | BigStr* s1 = StrFromC("-abaacaaa");
|
| 88 | List<int>* indices = libc::regex_search(StrFromC("(a+).(a+)"), 0, s1, 0);
|
| 89 | List<BigStr*>* results = Groups(s1, indices);
|
| 90 | ASSERT_EQ_FMT(3, len(results), "%d");
|
| 91 | ASSERT(str_equals(StrFromC("abaa"), results->at(0))); // whole match
|
| 92 | ASSERT(str_equals(StrFromC("a"), results->at(1)));
|
| 93 | ASSERT(str_equals(StrFromC("aa"), results->at(2)));
|
| 94 |
|
| 95 | indices = libc::regex_search(StrFromC("z+"), 0, StrFromC("abaacaaa"), 0);
|
| 96 | ASSERT_EQ(nullptr, indices);
|
| 97 |
|
| 98 | // Alternation gives unmatched group
|
| 99 | BigStr* s2 = StrFromC("b");
|
| 100 | indices = libc::regex_search(StrFromC("(a)|(b)"), 0, s2, 0);
|
| 101 | results = Groups(s2, indices);
|
| 102 | ASSERT_EQ_FMT(3, len(results), "%d");
|
| 103 | ASSERT(str_equals(StrFromC("b"), results->at(0))); // whole match
|
| 104 | ASSERT_EQ(nullptr, results->at(1));
|
| 105 | ASSERT(str_equals(StrFromC("b"), results->at(2)));
|
| 106 |
|
| 107 | Tuple2<int, int>* result;
|
| 108 | BigStr* s = StrFromC("oXooXoooXoX");
|
| 109 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 0);
|
| 110 | ASSERT_EQ_FMT(1, result->at0(), "%d");
|
| 111 | ASSERT_EQ_FMT(3, result->at1(), "%d");
|
| 112 |
|
| 113 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 3);
|
| 114 | ASSERT_EQ_FMT(4, result->at0(), "%d");
|
| 115 | ASSERT_EQ_FMT(6, result->at1(), "%d");
|
| 116 |
|
| 117 | result = libc::regex_first_group_match(StrFromC("(X.)"), s, 6);
|
| 118 | ASSERT_EQ_FMT(8, result->at0(), "%d");
|
| 119 | ASSERT_EQ_FMT(10, result->at1(), "%d");
|
| 120 |
|
| 121 | PASS();
|
| 122 | }
|
| 123 |
|
| 124 | TEST libc_glob_test() {
|
| 125 | // This depends on the file system
|
| 126 | auto files = libc::glob(StrFromC("*.testdata"));
|
| 127 | // 3 files are made by the shell wrapper
|
| 128 | ASSERT_EQ_FMT(3, len(files), "%d");
|
| 129 |
|
| 130 | print(files->at(0));
|
| 131 |
|
| 132 | auto files2 = libc::glob(StrFromC("*.pyzzz"));
|
| 133 | ASSERT_EQ_FMT(0, len(files2), "%d");
|
| 134 |
|
| 135 | PASS();
|
| 136 | }
|
| 137 |
|
| 138 | TEST for_test_coverage() {
|
| 139 | // Sometimes we're not connected to a terminal
|
| 140 | try {
|
| 141 | libc::get_terminal_width();
|
| 142 | } catch (IOError_OSError* e) {
|
| 143 | }
|
| 144 |
|
| 145 | PASS();
|
| 146 | }
|
| 147 |
|
| 148 | void FindAll(const char* p, const char* s) {
|
| 149 | regex_t pat;
|
| 150 |
|
| 151 | int cflags = REG_EXTENDED;
|
| 152 | if (regcomp(&pat, p, cflags) != 0) {
|
| 153 | FAIL();
|
| 154 | }
|
| 155 | int outlen = pat.re_nsub + 1; // number of captures
|
| 156 |
|
| 157 | // TODO: Could statically allocate 99, and assert that re_nsub is less than
|
| 158 | // 99. Would speed up loops.
|
| 159 | regmatch_t* pmatch =
|
| 160 | static_cast<regmatch_t*>(malloc(sizeof(regmatch_t) * outlen));
|
| 161 |
|
| 162 | int cur_pos = 0;
|
| 163 | // int n = strlen(s);
|
| 164 | while (true) {
|
| 165 | // Necessary so ^ doesn't match in the middle!
|
| 166 | int eflags = cur_pos == 0 ? 0 : REG_NOTBOL;
|
| 167 | bool match = regexec(&pat, s + cur_pos, outlen, pmatch, eflags) == 0;
|
| 168 |
|
| 169 | if (!match) {
|
| 170 | break;
|
| 171 | }
|
| 172 | int i;
|
| 173 | for (i = 0; i < outlen; i++) {
|
| 174 | int start = pmatch[i].rm_so;
|
| 175 | int end = pmatch[i].rm_eo;
|
| 176 | int len = end - start;
|
| 177 | BigStr* m = StrFromC(s + cur_pos + start, len);
|
| 178 | log("%d GROUP %d (%d .. %d) = [%s]", cur_pos, i, start, end, m->data_);
|
| 179 | }
|
| 180 | log("");
|
| 181 | int match_len = pmatch[0].rm_eo;
|
| 182 | if (match_len == 0) {
|
| 183 | break;
|
| 184 | }
|
| 185 | cur_pos += match_len;
|
| 186 | }
|
| 187 |
|
| 188 | free(pmatch);
|
| 189 | regfree(&pat);
|
| 190 | }
|
| 191 |
|
| 192 | // adjacent matches
|
| 193 | const char* s = "a345y-axy- there b789y- cy-";
|
| 194 |
|
| 195 | TEST regex_unanchored() {
|
| 196 | const char* unanchored = "[abc]([0-9]*)(x?)(y)-";
|
| 197 | FindAll(unanchored, s);
|
| 198 |
|
| 199 | PASS();
|
| 200 | }
|
| 201 |
|
| 202 | TEST regex_caret() {
|
| 203 | const char* anchored = "^[abc]([0-9]*)(x?)(y)-";
|
| 204 | FindAll(anchored, s);
|
| 205 |
|
| 206 | PASS();
|
| 207 | }
|
| 208 |
|
| 209 | TEST regex_lexer() {
|
| 210 | // like the Yaks / Make-a-Lisp pattern
|
| 211 | const char* lexer = "([a-z]+)|([0-9]+)|([ ]+)|([+-])";
|
| 212 | FindAll(lexer, s);
|
| 213 |
|
| 214 | PASS();
|
| 215 | }
|
| 216 |
|
| 217 | TEST regex_repeat_with_capture() {
|
| 218 | const char* lexer = "(([a-z]+)([0-9]+)-)*((A+)|(Z+))*";
|
| 219 | FindAll(lexer, "a0-b1-c2-AAZZZA");
|
| 220 | // Groups are weird
|
| 221 | // whole match 0: a0-b1-c2-
|
| 222 | // 1: c2- # last repetition
|
| 223 | // 2: c # last one
|
| 224 | // 3: 2 # last one
|
| 225 | //
|
| 226 | // And then there's an empty match
|
| 227 | //
|
| 228 | // Ideas:
|
| 229 | // - disallow nested groups in Eggex?
|
| 230 | // - I really care about the inner ones -- groups 2 and 3
|
| 231 | // - I want flat groups
|
| 232 |
|
| 233 | PASS();
|
| 234 | }
|
| 235 |
|
| 236 | // Disallow this in eggex, as well as the above
|
| 237 | TEST regex_nested_capture() {
|
| 238 | const char* lexer = "(([a-z]+)([0-9]+))";
|
| 239 | FindAll(lexer, "a0");
|
| 240 | PASS();
|
| 241 | }
|
| 242 |
|
| 243 | // I think we allow this in eggex
|
| 244 | TEST regex_alt_with_capture() {
|
| 245 | const char* lexer = "([a-z]+)|([0-9]+)(-)";
|
| 246 | FindAll(lexer, "x-");
|
| 247 | FindAll(lexer, "7-");
|
| 248 | PASS();
|
| 249 | }
|
| 250 |
|
| 251 | GREATEST_MAIN_DEFS();
|
| 252 |
|
| 253 | int main(int argc, char** argv) {
|
| 254 | gHeap.Init();
|
| 255 |
|
| 256 | GREATEST_MAIN_BEGIN();
|
| 257 |
|
| 258 | RUN_TEST(hostname_test);
|
| 259 | RUN_TEST(realpath_test);
|
| 260 | RUN_TEST(libc_test);
|
| 261 | RUN_TEST(regex_test);
|
| 262 | RUN_TEST(libc_glob_test);
|
| 263 | RUN_TEST(for_test_coverage);
|
| 264 |
|
| 265 | RUN_TEST(regex_unanchored);
|
| 266 | RUN_TEST(regex_caret);
|
| 267 | RUN_TEST(regex_lexer);
|
| 268 | RUN_TEST(regex_repeat_with_capture);
|
| 269 | RUN_TEST(regex_alt_with_capture);
|
| 270 | RUN_TEST(regex_nested_capture);
|
| 271 |
|
| 272 | gHeap.CleanProcessExit();
|
| 273 |
|
| 274 | GREATEST_MAIN_END();
|
| 275 | return 0;
|
| 276 | }
|