OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

467 lines, 254 significant
1#include <ctype.h> // isspace()
2#include <errno.h> // errno
3#include <float.h> // DBL_MIN, DBL_MAX
4#include <math.h> // INFINITY
5#include <stdio.h> // required for readline/readline.h (man readline)
6
7#include "_build/detected-cpp-config.h"
8#include "mycpp/runtime.h"
9#ifdef HAVE_READLINE
10 #include "cpp/frontend_pyreadline.h"
11#endif
12
13// Translation of Python's print().
14void print(BigStr* s) {
15 fputs(s->data_, stdout); // print until first NUL
16 fputc('\n', stdout);
17}
18
19BigStr* str(int i) {
20 BigStr* s = OverAllocatedStr(kIntBufSize);
21 int length = snprintf(s->data(), kIntBufSize, "%d", i);
22 s->MaybeShrink(length);
23 return s;
24}
25
26// TODO:
27// - This could use a fancy exact algorithm, not libc
28// - Does libc depend on locale?
29BigStr* str(double d) {
30 char buf[64]; // overestimate, but we use snprintf() to be safe
31
32 // Problem:
33 // %f prints 3.0000000 and 3.500000
34 // %g prints 3 and 3.5
35 //
36 // We want literal syntax to indicate float, so add '.'
37
38 int n = sizeof(buf) - 2; // in case we add '.0'
39
40 // %.9g digits for string that can be converted back to the same FLOAT
41 // (not double)
42 // https://stackoverflow.com/a/21162120
43 // https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
44 int length = snprintf(buf, n, "%.9g", d);
45
46 // %a is a hexfloat form, could use that somewhere
47 // int length = snprintf(buf, n, "%a", d);
48
49 if (strchr(buf, 'i')) { // inf or -inf
50 return StrFromC(buf);
51 }
52
53 if (!strchr(buf, '.')) { // 12345 -> 12345.0
54 buf[length] = '.';
55 buf[length + 1] = '0';
56 buf[length + 2] = '\0';
57 }
58
59 return StrFromC(buf);
60}
61
62// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
63// better for getting values out of Token.line without allocating?
64//
65// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
66//
67// Also for SmallStr, we don't care about interning. Only for HeapStr.
68
69BigStr* intern(BigStr* s) {
70 // TODO: put in table gHeap.interned_
71 return s;
72}
73
74// Print quoted string. TODO: use C-style strings (YSTR)
75BigStr* repr(BigStr* s) {
76 // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
77 int n = len(s);
78 int upper_bound = n * 4 + 2;
79
80 BigStr* result = OverAllocatedStr(upper_bound);
81
82 // Single quote by default.
83 char quote = '\'';
84 if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
85 quote = '"';
86 }
87 char* p = result->data_;
88
89 // From PyString_Repr()
90 *p++ = quote;
91 for (int i = 0; i < n; ++i) {
92 char c = s->data_[i];
93 if (c == quote || c == '\\') {
94 *p++ = '\\';
95 *p++ = c;
96 } else if (c == '\t') {
97 *p++ = '\\';
98 *p++ = 't';
99 } else if (c == '\n') {
100 *p++ = '\\';
101 *p++ = 'n';
102 } else if (c == '\r') {
103 *p++ = '\\';
104 *p++ = 'r';
105 } else if (isprint(c)) {
106 *p++ = c;
107 } else { // Unprintable is \xff
108 sprintf(p, "\\x%02x", c & 0xff);
109 p += 4;
110 }
111 }
112 *p++ = quote;
113 *p = '\0';
114
115 int length = p - result->data_;
116 result->MaybeShrink(length);
117 return result;
118}
119
120// Helper functions that don't use exceptions.
121
122bool StringToInt(const char* s, int length, int base, int* result) {
123 if (length == 0) {
124 return false; // empty string isn't a valid integer
125 }
126
127 // Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
128 // sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
129 // static_assert(sizeof(long) == 8);
130
131 char* pos; // mutated by strtol
132
133 errno = 0;
134 long v = strtol(s, &pos, base);
135
136 if (errno == ERANGE) {
137 switch (v) {
138 case LONG_MIN:
139 return false; // underflow of long, which may be 64 bits
140 case LONG_MAX:
141 return false; // overflow of long
142 }
143 }
144
145 // It should ALSO fit in an int, not just a long
146 if (v > INT_MAX) {
147 return false;
148 }
149 if (v < INT_MIN) {
150 return false;
151 }
152
153 const char* end = s + length;
154 if (pos == end) {
155 *result = v;
156 return true; // strtol() consumed ALL characters.
157 }
158
159 while (pos < end) {
160 if (!isspace(*pos)) {
161 return false; // Trailing non-space
162 }
163 pos++;
164 }
165
166 *result = v;
167 return true; // Trailing space is OK
168}
169
170bool StringToInt64(const char* s, int length, int base, int64_t* result) {
171 if (length == 0) {
172 return false; // empty string isn't a valid integer
173 }
174
175 // These should be the same type
176 static_assert(sizeof(long long) == sizeof(int64_t));
177
178 char* pos; // mutated by strtol
179
180 errno = 0;
181 long long v = strtoll(s, &pos, base);
182
183 if (errno == ERANGE) {
184 switch (v) {
185 case LLONG_MIN:
186 return false; // underflow
187 case LLONG_MAX:
188 return false; // overflow
189 }
190 }
191
192 const char* end = s + length;
193 if (pos == end) {
194 *result = v;
195 return true; // strtol() consumed ALL characters.
196 }
197
198 while (pos < end) {
199 if (!isspace(*pos)) {
200 return false; // Trailing non-space
201 }
202 pos++;
203 }
204
205 *result = v;
206 return true; // Trailing space is OK
207}
208
209int to_int(BigStr* s, int base) {
210 int i;
211 if (StringToInt(s->data_, len(s), base, &i)) {
212 return i; // truncated to int
213 } else {
214 throw Alloc<ValueError>();
215 }
216}
217
218BigStr* chr(int i) {
219 // NOTE: i should be less than 256, in which we could return an object from
220 // GLOBAL_STR() pool, like StrIter
221 auto result = NewStr(1);
222 result->data_[0] = i;
223 return result;
224}
225
226int ord(BigStr* s) {
227 assert(len(s) == 1);
228 // signed to unsigned conversion, so we don't get values like -127
229 uint8_t c = static_cast<uint8_t>(s->data_[0]);
230 return c;
231}
232
233bool to_bool(BigStr* s) {
234 return len(s) != 0;
235}
236
237double to_float(int i) {
238 return static_cast<double>(i);
239}
240
241double to_float(BigStr* s) {
242 char* begin = s->data_;
243 char* end = begin + len(s);
244
245 errno = 0;
246 double result = strtod(begin, &end);
247
248 if (errno == ERANGE) { // error: overflow or underflow
249 if (result >= HUGE_VAL) {
250 return INFINITY;
251 } else if (result <= -HUGE_VAL) {
252 return -INFINITY;
253 } else if (-DBL_MIN <= result && result <= DBL_MIN) {
254 return 0.0;
255 } else {
256 FAIL("Invalid value after ERANGE");
257 }
258 }
259 if (end == begin) { // error: not a floating point number
260 throw Alloc<ValueError>();
261 }
262
263 return result;
264}
265
266// e.g. ('a' in 'abc')
267bool str_contains(BigStr* haystack, BigStr* needle) {
268 // Common case
269 if (len(needle) == 1) {
270 return memchr(haystack->data_, needle->data_[0], len(haystack));
271 }
272
273 if (len(needle) > len(haystack)) {
274 return false;
275 }
276
277 // General case. TODO: We could use a smarter substring algorithm.
278
279 const char* end = haystack->data_ + len(haystack);
280 const char* last_possible = end - len(needle);
281 const char* p = haystack->data_;
282
283 while (p <= last_possible) {
284 if (memcmp(p, needle->data_, len(needle)) == 0) {
285 return true;
286 }
287 p++;
288 }
289 return false;
290}
291
292BigStr* str_repeat(BigStr* s, int times) {
293 // Python allows -1 too, and Oil used that
294 if (times <= 0) {
295 return kEmptyString;
296 }
297 int len_ = len(s);
298 int new_len = len_ * times;
299 BigStr* result = NewStr(new_len);
300
301 char* dest = result->data_;
302 for (int i = 0; i < times; i++) {
303 memcpy(dest, s->data_, len_);
304 dest += len_;
305 }
306 return result;
307}
308
309// for os_path.join()
310// NOTE(Jesse): Perfect candidate for BoundedBuffer
311BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
312 int a_len = len(a);
313 int b_len = len(b);
314 int c_len = len(c);
315
316 int new_len = a_len + b_len + c_len;
317 BigStr* result = NewStr(new_len);
318 char* pos = result->data_;
319
320 memcpy(pos, a->data_, a_len);
321 pos += a_len;
322
323 memcpy(pos, b->data_, b_len);
324 pos += b_len;
325
326 memcpy(pos, c->data_, c_len);
327
328 assert(pos + c_len == result->data_ + new_len);
329
330 return result;
331}
332
333BigStr* str_concat(BigStr* a, BigStr* b) {
334 int a_len = len(a);
335 int b_len = len(b);
336 int new_len = a_len + b_len;
337 BigStr* result = NewStr(new_len);
338 char* buf = result->data_;
339
340 memcpy(buf, a->data_, a_len);
341 memcpy(buf + a_len, b->data_, b_len);
342
343 return result;
344}
345
346//
347// Comparators
348//
349
350bool str_equals(BigStr* left, BigStr* right) {
351 // Fast path for identical strings. String deduplication during GC could
352 // make this more likely. String interning could guarantee it, allowing us
353 // to remove memcmp().
354 if (left == right) {
355 return true;
356 }
357
358 if (left == nullptr || right == nullptr) {
359 return false;
360 }
361
362 // obj_len equal implies string lengths are equal
363
364 if (left->len_ == right->len_) {
365 // assert(len(left) == len(right));
366 return memcmp(left->data_, right->data_, left->len_) == 0;
367 }
368
369 return false;
370}
371
372bool maybe_str_equals(BigStr* left, BigStr* right) {
373 if (left && right) {
374 return str_equals(left, right);
375 }
376
377 if (!left && !right) {
378 return true; // None == None
379 }
380
381 return false; // one is None and one is a BigStr*
382}
383
384// TODO(Jesse): Make an inline version of this
385bool are_equal(BigStr* left, BigStr* right) {
386 return str_equals(left, right);
387}
388
389// TODO(Jesse): Make an inline version of this
390bool are_equal(int left, int right) {
391 return left == right;
392}
393
394// TODO(Jesse): Make an inline version of this
395bool keys_equal(int left, int right) {
396 return left == right;
397}
398
399// TODO(Jesse): Make an inline version of this
400bool keys_equal(BigStr* left, BigStr* right) {
401 return are_equal(left, right);
402}
403
404bool are_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
405 bool result = are_equal(t1->at0(), t2->at0());
406 result = result && (t1->at1() == t2->at1());
407 return result;
408}
409
410bool are_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
411 return t1->at0() == t2->at0() && t1->at1() == t2->at1();
412}
413
414bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
415 return are_equal(t1, t2);
416}
417
418bool keys_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
419 return are_equal(t1, t2);
420}
421
422bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
423 // Needs SmallStr change
424 if (len(s) == c_len) {
425 return memcmp(s->data_, c_string, c_len) == 0;
426 } else {
427 return false;
428 }
429}
430
431bool str_equals0(const char* c_string, BigStr* s) {
432 int n = strlen(c_string);
433 if (len(s) == n) {
434 return memcmp(s->data_, c_string, n) == 0;
435 } else {
436 return false;
437 }
438}
439
440int hash(BigStr* s) {
441 return s->hash(fnv1);
442}
443
444int max(int a, int b) {
445 return std::max(a, b);
446}
447
448int min(int a, int b) {
449 return std::min(a, b);
450}
451
452int max(List<int>* elems) {
453 int n = len(elems);
454 if (n < 1) {
455 throw Alloc<ValueError>();
456 }
457
458 int ret = elems->at(0);
459 for (int i = 0; i < n; ++i) {
460 int cand = elems->at(i);
461 if (cand > ret) {
462 ret = cand;
463 }
464 }
465
466 return ret;
467}