mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oilshell.org

474 lines, 258 significant

1	#include <ctype.h> // isspace()
2	#include <errno.h> // errno
3	#include <float.h> // DBL_MIN, DBL_MAX
4	#include <math.h> // INFINITY
5	#include <stdio.h> // required for readline/readline.h (man readline)
6
7	#include "_build/detected-cpp-config.h"
8	#include "mycpp/runtime.h"
9	#ifdef HAVE_READLINE
10	#include "cpp/frontend_pyreadline.h"
11	#endif
12
13	// Translation of Python's print().
14	void print(BigStr* s) {
15	fputs(s->data_, stdout); // print until first NUL
16	fputc('\n', stdout);
17	}
18
19	BigStr* str(int i) {
20	BigStr* s = OverAllocatedStr(kIntBufSize);
21	int length = snprintf(s->data(), kIntBufSize, "%d", i);
22	s->MaybeShrink(length);
23	return s;
24	}
25
26	// TODO:
27	// - This could use a fancy exact algorithm, not libc
28	// - Does libc depend on locale?
29	BigStr* str(double d) {
30	char buf[64]; // overestimate, but we use snprintf() to be safe
31
32	// Problem:
33	// %f prints 3.0000000 and 3.500000
34	// %g prints 3 and 3.5
35	//
36	// We want literal syntax to indicate float, so add '.'
37
38	int n = sizeof(buf) - 2; // in case we add '.0'
39
40	// %.9g digits for string that can be converted back to the same FLOAT
41	// (not double)
42	// https://stackoverflow.com/a/21162120
43	// https://en.cppreference.com/w/cpp/types/numeric_limits/max_digits10
44	int length = snprintf(buf, n, "%.9g", d);
45
46	// %a is a hexfloat form, could use that somewhere
47	// int length = snprintf(buf, n, "%a", d);
48
49	if (strchr(buf, 'i')) { // inf or -inf
50	return StrFromC(buf);
51	}
52
53	if (!strchr(buf, '.')) { // 12345 -> 12345.0
54	buf[length] = '.';
55	buf[length + 1] = '0';
56	buf[length + 2] = '\0';
57	}
58
59	return StrFromC(buf);
60	}
61
62	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
63	// better for getting values out of Token.line without allocating?
64	//
65	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
66	//
67	// Also for SmallStr, we don't care about interning. Only for HeapStr.
68
69	BigStr* intern(BigStr* s) {
70	// TODO: put in table gHeap.interned_
71	return s;
72	}
73
74	// Print quoted string. TODO: use C-style strings (YSTR)
75	BigStr* repr(BigStr* s) {
76	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
77	int n = len(s);
78	int upper_bound = n * 4 + 2;
79
80	BigStr* result = OverAllocatedStr(upper_bound);
81
82	// Single quote by default.
83	char quote = '\'';
84	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
85	quote = '"';
86	}
87	char* p = result->data_;
88
89	// From PyString_Repr()
90	*p++ = quote;
91	for (int i = 0; i < n; ++i) {
92	char c = s->data_[i];
93	if (c == quote \|\| c == '\\') {
94	*p++ = '\\';
95	*p++ = c;
96	} else if (c == '\t') {
97	*p++ = '\\';
98	*p++ = 't';
99	} else if (c == '\n') {
100	*p++ = '\\';
101	*p++ = 'n';
102	} else if (c == '\r') {
103	*p++ = '\\';
104	*p++ = 'r';
105	} else if (isprint(c)) {
106	*p++ = c;
107	} else { // Unprintable is \xff
108	sprintf(p, "\\x%02x", c & 0xff);
109	p += 4;
110	}
111	}
112	*p++ = quote;
113	*p = '\0';
114
115	int length = p - result->data_;
116	result->MaybeShrink(length);
117	return result;
118	}
119
120	// Helper functions that don't use exceptions.
121
122	bool StringToInt(const char* s, int length, int base, int* result) {
123	if (length == 0) {
124	return false; // empty string isn't a valid integer
125	}
126
127	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
128	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
129	// static_assert(sizeof(long) == 8);
130
131	char* pos; // mutated by strtol
132
133	errno = 0;
134	long v = strtol(s, &pos, base);
135
136	if (errno == ERANGE) {
137	switch (v) {
138	case LONG_MIN:
139	return false; // underflow of long, which may be 64 bits
140	case LONG_MAX:
141	return false; // overflow of long
142	}
143	}
144
145	// It should ALSO fit in an int, not just a long
146	if (v > INT_MAX) {
147	return false;
148	}
149	if (v < INT_MIN) {
150	return false;
151	}
152
153	const char* end = s + length;
154	if (pos == end) {
155	*result = v;
156	return true; // strtol() consumed ALL characters.
157	}
158
159	while (pos < end) {
160	if (!isspace(*pos)) {
161	return false; // Trailing non-space
162	}
163	pos++;
164	}
165
166	*result = v;
167	return true; // Trailing space is OK
168	}
169
170	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
171	if (length == 0) {
172	return false; // empty string isn't a valid integer
173	}
174
175	// These should be the same type
176	static_assert(sizeof(long long) == sizeof(int64_t));
177
178	char* pos; // mutated by strtol
179
180	errno = 0;
181	long long v = strtoll(s, &pos, base);
182
183	if (errno == ERANGE) {
184	switch (v) {
185	case LLONG_MIN:
186	return false; // underflow
187	case LLONG_MAX:
188	return false; // overflow
189	}
190	}
191
192	const char* end = s + length;
193	if (pos == end) {
194	*result = v;
195	return true; // strtol() consumed ALL characters.
196	}
197
198	while (pos < end) {
199	if (!isspace(*pos)) {
200	return false; // Trailing non-space
201	}
202	pos++;
203	}
204
205	*result = v;
206	return true; // Trailing space is OK
207	}
208
209	int to_int(BigStr* s, int base) {
210	int i;
211	if (StringToInt(s->data_, len(s), base, &i)) {
212	return i; // truncated to int
213	} else {
214	throw Alloc<ValueError>();
215	}
216	}
217
218	BigStr* chr(int i) {
219	// NOTE: i should be less than 256, in which we could return an object from
220	// GLOBAL_STR() pool, like StrIter
221	auto result = NewStr(1);
222	result->data_[0] = i;
223	return result;
224	}
225
226	int ord(BigStr* s) {
227	assert(len(s) == 1);
228	// signed to unsigned conversion, so we don't get values like -127
229	uint8_t c = static_cast<uint8_t>(s->data_[0]);
230	return c;
231	}
232
233	bool to_bool(BigStr* s) {
234	return len(s) != 0;
235	}
236
237	double to_float(int i) {
238	return static_cast<double>(i);
239	}
240
241	double to_float(BigStr* s) {
242	char* begin = s->data_;
243	char* end = begin + len(s);
244
245	errno = 0;
246	double result = strtod(begin, &end);
247
248	if (errno == ERANGE) { // error: overflow or underflow
249	if (result >= HUGE_VAL) {
250	return INFINITY;
251	} else if (result <= -HUGE_VAL) {
252	return -INFINITY;
253	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
254	return 0.0;
255	} else {
256	FAIL("Invalid value after ERANGE");
257	}
258	}
259	if (end == begin) { // error: not a floating point number
260	throw Alloc<ValueError>();
261	}
262
263	return result;
264	}
265
266	// e.g. ('a' in 'abc')
267	bool str_contains(BigStr* haystack, BigStr* needle) {
268	// Common case
269	if (len(needle) == 1) {
270	return memchr(haystack->data_, needle->data_[0], len(haystack));
271	}
272
273	if (len(needle) > len(haystack)) {
274	return false;
275	}
276
277	// General case. TODO: We could use a smarter substring algorithm.
278
279	const char* end = haystack->data_ + len(haystack);
280	const char* last_possible = end - len(needle);
281	const char* p = haystack->data_;
282
283	while (p <= last_possible) {
284	if (memcmp(p, needle->data_, len(needle)) == 0) {
285	return true;
286	}
287	p++;
288	}
289	return false;
290	}
291
292	BigStr* str_repeat(BigStr* s, int times) {
293	// Python allows -1 too, and Oil used that
294	if (times <= 0) {
295	return kEmptyString;
296	}
297	int len_ = len(s);
298	int new_len = len_ * times;
299	BigStr* result = NewStr(new_len);
300
301	char* dest = result->data_;
302	for (int i = 0; i < times; i++) {
303	memcpy(dest, s->data_, len_);
304	dest += len_;
305	}
306	return result;
307	}
308
309	// for os_path.join()
310	// NOTE(Jesse): Perfect candidate for BoundedBuffer
311	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
312	int a_len = len(a);
313	int b_len = len(b);
314	int c_len = len(c);
315
316	int new_len = a_len + b_len + c_len;
317	BigStr* result = NewStr(new_len);
318	char* pos = result->data_;
319
320	memcpy(pos, a->data_, a_len);
321	pos += a_len;
322
323	memcpy(pos, b->data_, b_len);
324	pos += b_len;
325
326	memcpy(pos, c->data_, c_len);
327
328	assert(pos + c_len == result->data_ + new_len);
329
330	return result;
331	}
332
333	BigStr* str_concat(BigStr* a, BigStr* b) {
334	int a_len = len(a);
335	int b_len = len(b);
336	int new_len = a_len + b_len;
337	BigStr* result = NewStr(new_len);
338	char* buf = result->data_;
339
340	memcpy(buf, a->data_, a_len);
341	memcpy(buf + a_len, b->data_, b_len);
342
343	return result;
344	}
345
346	//
347	// Comparators
348	//
349
350	bool str_equals(BigStr* left, BigStr* right) {
351	// Fast path for identical strings. String deduplication during GC could
352	// make this more likely. String interning could guarantee it, allowing us
353	// to remove memcmp().
354	if (left == right) {
355	return true;
356	}
357
358	if (left == nullptr \|\| right == nullptr) {
359	return false;
360	}
361
362	// obj_len equal implies string lengths are equal
363
364	if (left->len_ == right->len_) {
365	// assert(len(left) == len(right));
366	return memcmp(left->data_, right->data_, left->len_) == 0;
367	}
368
369	return false;
370	}
371
372	bool maybe_str_equals(BigStr* left, BigStr* right) {
373	if (left && right) {
374	return str_equals(left, right);
375	}
376
377	if (!left && !right) {
378	return true; // None == None
379	}
380
381	return false; // one is None and one is a BigStr*
382	}
383
384	// TODO: inline these functions?
385	bool are_equal(int left, int right) {
386	return left == right;
387	}
388
389	bool keys_equal(int left, int right) {
390	return left == right;
391	}
392
393	bool are_equal(BigStr* left, BigStr* right) {
394	return str_equals(left, right);
395	}
396
397	bool keys_equal(BigStr* left, BigStr* right) {
398	return are_equal(left, right);
399	}
400
401	// Shouldn't be used?
402	bool are_equal(void* left, void* right) {
403	assert(0);
404	}
405
406	// e.g. for Dict<Token*, int>, use object IDENTITY, not value
407	bool keys_equal(void* left, void* right) {
408	return left == right;
409	}
410
411	bool are_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
412	bool result = are_equal(t1->at0(), t2->at0());
413	result = result && (t1->at1() == t2->at1());
414	return result;
415	}
416
417	bool are_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
418	return t1->at0() == t2->at0() && t1->at1() == t2->at1();
419	}
420
421	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
422	return are_equal(t1, t2);
423	}
424
425	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
426	return are_equal(t1, t2);
427	}
428
429	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
430	// Needs SmallStr change
431	if (len(s) == c_len) {
432	return memcmp(s->data_, c_string, c_len) == 0;
433	} else {
434	return false;
435	}
436	}
437
438	bool str_equals0(const char* c_string, BigStr* s) {
439	int n = strlen(c_string);
440	if (len(s) == n) {
441	return memcmp(s->data_, c_string, n) == 0;
442	} else {
443	return false;
444	}
445	}
446
447	int hash(BigStr* s) {
448	return s->hash(fnv1);
449	}
450
451	int max(int a, int b) {
452	return std::max(a, b);
453	}
454
455	int min(int a, int b) {
456	return std::min(a, b);
457	}
458
459	int max(List<int>* elems) {
460	int n = len(elems);
461	if (n < 1) {
462	throw Alloc<ValueError>();
463	}
464
465	int ret = elems->at(0);
466	for (int i = 0; i < n; ++i) {
467	int cand = elems->at(i);
468	if (cand > ret) {
469	ret = cand;
470	}
471	}
472
473	return ret;
474	}