/home/uke/oil/mycpp/my_runtime.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // my_runtime.cc |
2 | | |
3 | | #include "my_runtime.h" |
4 | | |
5 | | #include <ctype.h> // isspace(), isdigit() |
6 | | |
7 | | #include <cstdarg> // va_list, etc. |
8 | | #include <vector> |
9 | | |
10 | | #include "mylib2.h" // BufWriter |
11 | | |
12 | | using gc_heap::kEmptyString; |
13 | | using gc_heap::StackRoots; |
14 | | |
15 | | // Translation of Python's print(). |
16 | 61 | void print(Str* s) { |
17 | 61 | int n = len(s); |
18 | 61 | fwrite(s->data_, sizeof(char), n, stdout); |
19 | 61 | fputs("\n", stdout); |
20 | 61 | } |
21 | | |
22 | | // Like print(..., file=sys.stderr), but Python code explicitly calls it. |
23 | 209 | void println_stderr(Str* s) { |
24 | 209 | int n = len(s); |
25 | 209 | fwrite(s->data_, sizeof(char), n, stderr); |
26 | 209 | fputs("\n", stderr); |
27 | 209 | } |
28 | | |
29 | | // Helper for str_to_int() that doesn't use exceptions. |
30 | | // Like atoi(), but with better error checking. |
31 | 15 | bool _str_to_int(Str* s, int* result, int base) { |
32 | 15 | if (len(s) == 0) { |
33 | 0 | return false; // special case for empty string |
34 | 0 | } |
35 | | |
36 | 15 | char* p; // mutated by strtol |
37 | 15 | long v = strtol(s->data_, &p, base); // base 10 |
38 | | |
39 | 15 | switch (v) { |
40 | 0 | case LONG_MIN: |
41 | | // log("underflow"); |
42 | 0 | return false; |
43 | 0 | case LONG_MAX: |
44 | | // log("overflow"); |
45 | 0 | return false; |
46 | 15 | } |
47 | | |
48 | 15 | *result = v; |
49 | | |
50 | | // Return true if it consumed ALL characters. |
51 | 15 | const char* end = s->data_ + len(s); |
52 | | |
53 | | // log("start %p p %p end %p", s->data_, p, end); |
54 | 15 | if (p == end) { |
55 | 15 | return true; |
56 | 15 | } |
57 | | |
58 | | // Trailing space is OK! |
59 | 0 | while (p < end) { |
60 | 0 | if (!isspace(*p)) { |
61 | 0 | return false; |
62 | 0 | } |
63 | 0 | p++; |
64 | 0 | } |
65 | 0 | return true; |
66 | 0 | } |
67 | | |
68 | | // Python-like wrapper |
69 | 15 | int to_int(Str* s) { |
70 | 15 | int i; |
71 | 15 | if (_str_to_int(s, &i, 10)) { |
72 | 15 | return i; |
73 | 15 | } else { |
74 | 0 | throw new ValueError(); |
75 | 0 | } |
76 | 15 | } |
77 | | |
78 | 0 | int to_int(Str* s, int base) { |
79 | 0 | int i; |
80 | 0 | if (_str_to_int(s, &i, base)) { |
81 | 0 | return i; |
82 | 0 | } else { |
83 | 0 | throw new ValueError(); |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | 48 | Str* str_concat(Str* a, Str* b) { |
88 | 48 | Str* result = nullptr; |
89 | 48 | StackRoots _roots({&a, &b, &result}); |
90 | 48 | int len_a = len(a); |
91 | 48 | int len_b = len(b); |
92 | 48 | assert(len_a >= 0); |
93 | 0 | assert(len_b >= 0); |
94 | | |
95 | 0 | result = AllocStr(len_a + len_b); |
96 | 48 | char* buf = result->data_; |
97 | 48 | memcpy(buf, a->data_, len_a); |
98 | 48 | memcpy(buf + len_a, b->data_, len_b); |
99 | | |
100 | 48 | assert(buf[len_a + len_b] == '\0'); |
101 | 0 | return result; |
102 | 48 | } |
103 | | |
104 | 39 | Str* str_repeat(Str* s, int times) { |
105 | 39 | StackRoots _roots({&s}); |
106 | | |
107 | | // Python allows -1 too, and Oil used that |
108 | 39 | if (times <= 0) { |
109 | 13 | return kEmptyString; |
110 | 13 | } |
111 | 26 | int part_len = len(s); |
112 | 26 | int result_len = part_len * times; |
113 | 26 | Str* result = AllocStr(result_len); |
114 | | |
115 | 26 | char* p_result = result->data_; |
116 | 111 | for (int i = 0; i < times; i++) { |
117 | 85 | memcpy(p_result, s->data_, part_len); |
118 | 85 | p_result += part_len; |
119 | 85 | } |
120 | 26 | assert(p_result[result_len] == '\0'); |
121 | 0 | return result; |
122 | 39 | } |
123 | | |
124 | | // |
125 | | // Str methods |
126 | | // |
127 | | |
128 | 47 | bool Str::isdigit() { |
129 | 47 | int n = len(this); |
130 | 47 | if (n == 0) { |
131 | 0 | return false; // special case |
132 | 0 | } |
133 | 61 | for (int i = 0; i < n; ++i) { |
134 | 47 | if (!::isdigit(data_[i])) { |
135 | 33 | return false; |
136 | 33 | } |
137 | 47 | } |
138 | 14 | return true; |
139 | 47 | } |
140 | 33 | bool Str::isalpha() { |
141 | 33 | int n = len(this); |
142 | 33 | if (n == 0) { |
143 | 0 | return false; // special case |
144 | 0 | } |
145 | 45 | for (int i = 0; i < n; ++i) { |
146 | 33 | if (!::isalpha(data_[i])) { |
147 | 21 | return false; |
148 | 21 | } |
149 | 33 | } |
150 | 12 | return true; |
151 | 33 | } |
152 | | |
153 | | // e.g. for osh/braces.py |
154 | 0 | bool Str::isupper() { |
155 | 0 | int n = len(this); |
156 | 0 | if (n == 0) { |
157 | 0 | return false; // special case |
158 | 0 | } |
159 | 0 | for (int i = 0; i < n; ++i) { |
160 | 0 | if (!::isupper(data_[i])) { |
161 | 0 | return false; |
162 | 0 | } |
163 | 0 | } |
164 | 0 | return true; |
165 | 0 | } |
166 | | |
167 | | // Helper for lstrip() and strip() |
168 | 1 | int Str::_strip_left_pos() { |
169 | 1 | assert(len(this) > 0); |
170 | | |
171 | 0 | int i = 0; |
172 | 1 | int n = len(this); |
173 | 1 | bool done = false; |
174 | 2 | while (i < n && !done) { |
175 | 1 | switch (data_[i]) { |
176 | 0 | case ' ': |
177 | 0 | case '\t': |
178 | 0 | case '\r': |
179 | 0 | case '\n': |
180 | 0 | i++; |
181 | 1 | default: |
182 | 1 | done = true; |
183 | 1 | break; |
184 | 1 | } |
185 | 1 | } |
186 | 1 | return i; |
187 | 1 | } |
188 | | |
189 | | // Helper for rstrip() and strip() |
190 | 1 | int Str::_strip_right_pos() { |
191 | 1 | assert(len(this) > 0); |
192 | | |
193 | 0 | int last = len(this) - 1; |
194 | 1 | int i = last; |
195 | 1 | bool done = false; |
196 | 2 | while (i > 0 && !done) { |
197 | 1 | switch (data_[i]) { |
198 | 1 | case ' ': |
199 | 1 | case '\t': |
200 | 1 | case '\r': |
201 | 1 | case '\n': |
202 | 1 | i--; |
203 | 1 | default: |
204 | 1 | done = true; |
205 | 1 | break; |
206 | 1 | } |
207 | 1 | } |
208 | 1 | return i; |
209 | 1 | } |
210 | | |
211 | 1 | Str* Str::strip() { |
212 | 1 | auto self = this; |
213 | 1 | Str* result = nullptr; |
214 | 1 | StackRoots _roots({&self, &result}); |
215 | | |
216 | 1 | int n = len(this); |
217 | 1 | if (n == 0) { |
218 | 0 | return this; |
219 | 0 | } |
220 | | |
221 | 1 | int left_pos = _strip_left_pos(); |
222 | 1 | int right_pos = _strip_right_pos(); |
223 | 1 | if (left_pos == 0 && right_pos == n - 1) { |
224 | 0 | return this; |
225 | 0 | } |
226 | | |
227 | 1 | int new_len = right_pos - left_pos + 1; |
228 | 1 | result = AllocStr(new_len); |
229 | 1 | memcpy(result->data_, self->data_ + left_pos, new_len); |
230 | 1 | return result; |
231 | 1 | } |
232 | | |
233 | | // Used for CommandSub in osh/cmd_exec.py |
234 | 0 | Str* Str::rstrip(Str* chars) { |
235 | 0 | NotImplemented(); // Uncalled |
236 | 0 | } |
237 | | |
238 | 0 | Str* Str::rstrip() { |
239 | 0 | auto self = this; |
240 | 0 | Str* result = nullptr; |
241 | 0 | StackRoots _roots({&self, &result}); |
242 | |
|
243 | 0 | int n = len(this); |
244 | 0 | if (n == 0) { |
245 | 0 | return this; |
246 | 0 | } |
247 | 0 | int right_pos = _strip_right_pos(); |
248 | 0 | if (right_pos == n - 1) { // nothing stripped |
249 | 0 | return this; |
250 | 0 | } |
251 | 0 | int new_len = right_pos + 1; |
252 | 0 | result = AllocStr(new_len); |
253 | 0 | memcpy(result->data_, self->data_, new_len); |
254 | 0 | return result; |
255 | 0 | } |
256 | | |
257 | 0 | Str* Str::ljust(int width, Str* fillchar) { |
258 | 0 | auto self = this; |
259 | 0 | StackRoots _roots({&self, &fillchar}); |
260 | |
|
261 | 0 | assert(len(fillchar) == 1); |
262 | | |
263 | 0 | int length = len(this); |
264 | 0 | int num_fill = width - length; |
265 | 0 | if (num_fill < 0) { |
266 | 0 | return this; |
267 | 0 | } else { |
268 | 0 | Str* result = AllocStr(width); |
269 | 0 | char c = fillchar->data_[0]; |
270 | 0 | memcpy(result->data_, self->data_, length); |
271 | 0 | for (int i = length; i < width; ++i) { |
272 | 0 | result->data_[i] = c; |
273 | 0 | } |
274 | 0 | assert(result->data_[width] == '\0'); |
275 | 0 | return result; |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | Str* Str::rjust(int width, Str* fillchar) { |
280 | 0 | auto self = this; |
281 | 0 | StackRoots _roots({&self, &fillchar}); |
282 | |
|
283 | 0 | assert(len(fillchar) == 1); |
284 | | |
285 | 0 | int length = len(this); |
286 | 0 | int num_fill = width - length; |
287 | 0 | if (num_fill < 0) { |
288 | 0 | return this; |
289 | 0 | } else { |
290 | 0 | Str* result = AllocStr(width); |
291 | 0 | char c = fillchar->data_[0]; |
292 | 0 | for (int i = 0; i < num_fill; ++i) { |
293 | 0 | result->data_[i] = c; |
294 | 0 | } |
295 | 0 | memcpy(result->data_ + num_fill, self->data_, length); |
296 | 0 | assert(result->data_[width] == '\0'); |
297 | 0 | return result; |
298 | 0 | } |
299 | 0 | } |
300 | | |
301 | 2 | bool Str::startswith(Str* s) { |
302 | 2 | int n = len(s); |
303 | 2 | if (n > len(this)) { |
304 | 0 | return false; |
305 | 0 | } |
306 | 2 | return memcmp(data_, s->data_, n) == 0; |
307 | 2 | } |
308 | | |
309 | 0 | bool Str::endswith(Str* s) { |
310 | 0 | int len_s = len(s); |
311 | 0 | int len_this = len(this); |
312 | 0 | if (len_s > len_this) { |
313 | 0 | return false; |
314 | 0 | } |
315 | 0 | const char* start = data_ + len_this - len_s; |
316 | 0 | return memcmp(start, s->data_, len_s) == 0; |
317 | 0 | } |
318 | | |
319 | | // Get a string with one character |
320 | 50 | Str* Str::index_(int i) { |
321 | 50 | if (i < 0) { |
322 | 0 | i = len(this) + i; |
323 | 0 | } |
324 | 50 | assert(i >= 0); |
325 | 0 | assert(i < len(this)); // had a problem here! |
326 | | |
327 | 0 | Str* result = AllocStr(1); |
328 | 50 | char* buf = result->data_; |
329 | 50 | buf[0] = data_[i]; |
330 | 50 | assert(buf[1] == '\0'); |
331 | 0 | return result; |
332 | 50 | } |
333 | | |
334 | | // s[begin:] |
335 | 2 | Str* Str::slice(int begin) { |
336 | 2 | if (begin == 0) { |
337 | 0 | return this; // s[i:] where i == 0 is common in here docs |
338 | 0 | } |
339 | 2 | int length = len(this); |
340 | 2 | if (begin < 0) { |
341 | 1 | begin = length + begin; |
342 | 1 | } |
343 | 2 | return slice(begin, length); |
344 | 2 | } |
345 | | |
346 | | // s[begin:end] |
347 | 5 | Str* Str::slice(int begin, int end) { |
348 | 5 | if (begin < 0) { |
349 | 0 | begin = len(this) + begin; |
350 | 0 | } |
351 | 5 | if (end < 0) { |
352 | 1 | end = len(this) + end; |
353 | 1 | } |
354 | 5 | int new_len = end - begin; |
355 | 5 | Str* result = AllocStr(new_len); |
356 | 5 | char* buf = result->data_; |
357 | 5 | memcpy(buf, data_ + begin, new_len); |
358 | 5 | assert(buf[new_len] == '\0'); |
359 | 0 | return result; |
360 | 5 | } |
361 | | |
362 | 345 | Str* Str::replace(Str* old, Str* new_str) { |
363 | 345 | assert(len(old) == 1); // Restriction that Oil code is OK with |
364 | | |
365 | 0 | Str* self = this; // must be a root! |
366 | 345 | Str* result = nullptr; // may not need to be a root, but make it robust |
367 | | |
368 | | // log(" self BEFORE %p", self); |
369 | 345 | StackRoots _roots({&self, &old, &new_str, &result}); |
370 | | |
371 | 345 | int len_this = len(self); |
372 | 345 | char old_char = old->data_[0]; |
373 | 345 | const char* p_this = self->data_; // advances through 'this' |
374 | 345 | const char* p_end = p_this + len_this; |
375 | | // printf(" p_this BEFORE %s\n", p_this); |
376 | | |
377 | | // First pass to calculate the new length |
378 | 345 | int replace_count = 0; |
379 | 46.5k | while (p_this < p_end) { |
380 | 46.2k | if (*p_this == old_char) { |
381 | 344 | replace_count++; |
382 | 344 | } |
383 | 46.2k | p_this++; |
384 | 46.2k | } |
385 | | |
386 | 345 | if (replace_count == 0) { |
387 | 2 | return self; // Reuse the string if there were no replacements |
388 | 2 | } |
389 | | |
390 | 343 | int result_len = |
391 | 343 | len_this - (replace_count * len(old)) + (replace_count * len(new_str)); |
392 | | |
393 | | // Second pass to copy into new 'result' |
394 | 343 | result = AllocStr(result_len); |
395 | | // log(" alloc result = %p", result); |
396 | | // log(" result = %p", result); |
397 | | // log(" self AFTER %p", self); |
398 | | |
399 | 343 | const char* new_data = new_str->data_; |
400 | 343 | const size_t new_len = len(new_str); |
401 | | |
402 | 343 | p_this = self->data_; // back to beginning |
403 | | // printf(" p_this AFTER %p\n", p_this); |
404 | 343 | p_end = p_this + len_this; // Must be rebound AFTER Alloc<>! |
405 | | |
406 | 343 | char* p_result = result->data_; // advances through 'result' |
407 | | |
408 | | // log(" p_this = %p, p_end %p", p_this, p_end); |
409 | 46.5k | while (p_this < p_end) { |
410 | | // log(" *p_this [%d]", *p_this); |
411 | 46.2k | if (*p_this == old_char) { |
412 | 344 | memcpy(p_result, new_data, new_len); // Copy from new_str |
413 | 344 | p_this++; |
414 | 344 | p_result += new_len; |
415 | 45.8k | } else { |
416 | 45.8k | *p_result = *p_this; |
417 | 45.8k | p_this++; |
418 | 45.8k | p_result++; |
419 | 45.8k | } |
420 | 46.2k | } |
421 | 343 | assert(result->data_[result_len] == '\0'); // buffer should have been zero'd |
422 | 0 | return result; |
423 | 345 | } |
424 | | |
425 | 0 | List<Str*>* Str::split(Str* sep) { |
426 | 0 | auto self = this; |
427 | 0 | List<Str*>* result = nullptr; |
428 | 0 | char* place = nullptr; |
429 | 0 | StackRoots _roots({&self, &sep, &result, &place}); |
430 | |
|
431 | 0 | assert(len(sep) == 1); // we can only split one char |
432 | 0 | char sep_char = sep->data_[0]; |
433 | |
|
434 | 0 | int length = len(this); |
435 | 0 | if (length == 0) { |
436 | | // weird case consistent with Python: ''.split(':') == [''] |
437 | 0 | return NewList<Str*>(std::initializer_list<Str*>{kEmptyString}); |
438 | 0 | } |
439 | | |
440 | | // Find breaks first so we can allocate the right number of strings ALL AT |
441 | | // ONCE. We want to avoid invalidating self->data_. |
442 | 0 | int num_bytes = 0; |
443 | 0 | int prev_pos = 0; |
444 | 0 | std::vector<int> breaks; |
445 | 0 | breaks.push_back(-1); // beginning of first part |
446 | 0 | for (int i = 0; i < length; ++i) { |
447 | 0 | if (data_[i] == sep_char) { |
448 | 0 | breaks.push_back(i); |
449 | 0 | int part_len = i - prev_pos - 1; |
450 | 0 | if (part_len > 0) { // only non-empty parts |
451 | 0 | num_bytes += aligned(kStrHeaderSize + part_len + 1); |
452 | 0 | } |
453 | 0 | prev_pos = i; |
454 | 0 | } |
455 | 0 | } |
456 | 0 | breaks.push_back(length); // end of last part |
457 | |
|
458 | 0 | result = NewList<Str*>(nullptr, breaks.size() - 1); // reserve enough space |
459 | 0 | place = reinterpret_cast<char*>(gHeap.Allocate(num_bytes)); |
460 | 0 | int n = breaks.size(); |
461 | 0 | for (int i = 1; i < n; ++i) { |
462 | 0 | int prev_pos = breaks[i - 1]; |
463 | 0 | int part_len = breaks[i] - prev_pos - 1; |
464 | 0 | if (part_len > 0) { |
465 | | // like AllocStr(), but IN PLACE |
466 | 0 | int obj_len = kStrHeaderSize + part_len + 1; // NUL terminator |
467 | 0 | Str* part = new (place) Str(); // placement new |
468 | 0 | part->SetObjLen(obj_len); // So the GC can copy it |
469 | 0 | memcpy(part->data_, self->data_ + prev_pos + 1, part_len); |
470 | 0 | result->set(i - 1, part); |
471 | 0 | place += aligned(obj_len); |
472 | 0 | } else { |
473 | 0 | result->set(i - 1, kEmptyString); // save some space |
474 | 0 | } |
475 | 0 | } |
476 | |
|
477 | 0 | return result; |
478 | 0 | } |
479 | | |
480 | 36 | Str* Str::join(List<Str*>* items) { |
481 | 36 | auto self = this; // must be a root! |
482 | 36 | Str* result = nullptr; // may not need to be a root, but make it robust |
483 | | |
484 | 36 | StackRoots _roots({&self, &result, &items}); |
485 | | |
486 | 36 | int result_len = 0; |
487 | 36 | int num_parts = len(items); |
488 | 36 | if (num_parts == 0) { // " ".join([]) == "" |
489 | 1 | return kEmptyString; |
490 | 1 | } |
491 | 156 | for (int i = 0; i < num_parts; ++i) { |
492 | 121 | result_len += len(items->index_(i)); |
493 | 121 | } |
494 | 35 | int sep_len = len(self); |
495 | | // add length of all the separators |
496 | 35 | result_len += sep_len * (num_parts - 1); |
497 | | |
498 | | // log("len: %d", len); |
499 | | // log("v.size(): %d", v.size()); |
500 | | |
501 | 35 | result = AllocStr(result_len); |
502 | 35 | char* p_result = result->data_; // advances through |
503 | | |
504 | 156 | for (int i = 0; i < num_parts; ++i) { |
505 | | // log("i %d", i); |
506 | 121 | if (i != 0 && sep_len) { // optimize common case of ''.join() |
507 | 0 | memcpy(p_result, self->data_, sep_len); // copy the separator |
508 | 0 | p_result += sep_len; |
509 | | // log("len_ %d", len_); |
510 | 0 | } |
511 | | |
512 | 121 | int n = len(items->index_(i)); |
513 | 121 | if (n < 0) { |
514 | 0 | log("n: %d", n); |
515 | 0 | InvalidCodePath(); |
516 | 0 | } |
517 | 0 | memcpy(p_result, items->index_(i)->data_, n); // copy the list item |
518 | 121 | p_result += n; |
519 | 121 | } |
520 | | |
521 | 35 | assert(p_result[result_len] == '\0'); // GC should zero it |
522 | 0 | return result; |
523 | 36 | } |
524 | | |
525 | 0 | Str* repr(Str* s) { |
526 | 0 | mylib::BufWriter f; |
527 | 0 | f.format_r(s); |
528 | 0 | return f.getvalue(); |
529 | 0 | } |