mycpp

Coverage Report

Created: 2024-06-09 06:28

/home/uke/oil/mycpp/gc_mylib.h
Line
Count
Source (jump to first uncovered line)
1
// gc_mylib.h - corresponds to mycpp/mylib.py
2
3
#ifndef MYCPP_GC_MYLIB_H
4
#define MYCPP_GC_MYLIB_H
5
6
#include <limits.h>  // CHAR_BIT
7
8
#include "mycpp/gc_alloc.h"  // gHeap
9
#include "mycpp/gc_dict.h"   // for dict_erase()
10
#include "mycpp/gc_tuple.h"
11
12
template <class K, class V>
13
class Dict;
14
15
// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
16
// Notes:
17
// - Python 2.7's intobject.c has an erroneous +6
18
// - This is 13, but len('-2147483648') is 11, which means we only need 12?
19
// - This formula is valid for octal(), because 2^(3 bits) = 8
20
21
const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
22
23
namespace mylib {
24
25
void InitCppOnly();
26
27
// Wrappers around our C++ APIs
28
29
1
inline void MaybeCollect() {
30
1
  gHeap.MaybeCollect();
31
1
}
32
33
void print_stderr(BigStr* s);
34
35
0
inline int ByteAt(BigStr* s, int i) {
36
0
  DCHECK(0 <= i);
37
0
  DCHECK(i <= len(s));
38
0
39
0
  return static_cast<unsigned char>(s->data_[i]);
40
0
}
41
42
0
inline int ByteEquals(int byte, BigStr* ch) {
43
0
  DCHECK(0 <= byte);
44
0
  DCHECK(byte < 256);
45
0
46
0
  DCHECK(len(ch) == 1);
47
0
48
0
  return byte == static_cast<unsigned char>(ch->data_[0]);
49
0
}
50
51
0
inline int ByteInSet(int byte, BigStr* byte_set) {
52
0
  DCHECK(0 <= byte);
53
0
  DCHECK(byte < 256);
54
0
55
0
  int n = len(byte_set);
56
0
  for (int i = 0; i < n; ++i) {
57
0
    int b = static_cast<unsigned char>(byte_set->data_[i]);
58
0
    if (byte == b) {
59
0
      return true;
60
0
    }
61
0
  }
62
0
  return false;
63
0
}
64
65
BigStr* JoinBytes(List<int>* byte_list);
66
67
// const int kStdout = 1;
68
// const int kStderr = 2;
69
70
// void writeln(BigStr* s, int fd = kStdout);
71
72
Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
73
74
template <typename K, typename V>
75
140
void dict_erase(Dict<K, V>* haystack, K needle) {
76
140
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
140
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
140
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
140
  if (kv_index < 0) {
85
1
    return;
86
1
  }
87
88
139
  int last_kv_index = haystack->len_ - 1;
89
139
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
139
  if (kv_index != last_kv_index) {
104
71
    K last_key = haystack->keys_->items_[last_kv_index];
105
71
    V last_val = haystack->values_->items_[last_kv_index];
106
71
    int last_pos = haystack->hash_and_probe(last_key);
107
71
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
71
    haystack->values_->items_[kv_index] = last_val;
110
71
    haystack->index_->items_[last_pos] = kv_index;
111
71
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
139
  haystack->values_->items_[last_kv_index] = 0;
116
139
  haystack->index_->items_[pos] = kDeletedEntry;
117
139
  haystack->len_--;
118
139
  DCHECK(haystack->len_ < haystack->capacity_);
119
139
}
_ZN5mylib10dict_eraseIP6BigStriEEvP4DictIT_T0_ES4_
Line
Count
Source
75
1
void dict_erase(Dict<K, V>* haystack, K needle) {
76
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
1
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
1
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
1
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
1
  int last_kv_index = haystack->len_ - 1;
89
1
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
1
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
1
  haystack->values_->items_[last_kv_index] = 0;
116
1
  haystack->index_->items_[pos] = kDeletedEntry;
117
1
  haystack->len_--;
118
1
  DCHECK(haystack->len_ < haystack->capacity_);
119
1
}
_ZN5mylib10dict_eraseIP6BigStrS2_EEvP4DictIT_T0_ES4_
Line
Count
Source
75
1
void dict_erase(Dict<K, V>* haystack, K needle) {
76
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
1
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
1
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
1
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
1
  int last_kv_index = haystack->len_ - 1;
89
1
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
1
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
1
  haystack->values_->items_[last_kv_index] = 0;
116
1
  haystack->index_->items_[pos] = kDeletedEntry;
117
1
  haystack->len_--;
118
1
  DCHECK(haystack->len_ < haystack->capacity_);
119
1
}
_ZN5mylib10dict_eraseIiiEEvP4DictIT_T0_ES2_
Line
Count
Source
75
137
void dict_erase(Dict<K, V>* haystack, K needle) {
76
137
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
137
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
137
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
137
  if (kv_index < 0) {
85
0
    return;
86
0
  }
87
88
137
  int last_kv_index = haystack->len_ - 1;
89
137
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
137
  if (kv_index != last_kv_index) {
104
71
    K last_key = haystack->keys_->items_[last_kv_index];
105
71
    V last_val = haystack->values_->items_[last_kv_index];
106
71
    int last_pos = haystack->hash_and_probe(last_key);
107
71
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
71
    haystack->values_->items_[kv_index] = last_val;
110
71
    haystack->index_->items_[last_pos] = kv_index;
111
71
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
137
  haystack->values_->items_[last_kv_index] = 0;
116
137
  haystack->index_->items_[pos] = kDeletedEntry;
117
137
  haystack->len_--;
118
137
  DCHECK(haystack->len_ < haystack->capacity_);
119
137
}
_ZN5mylib10dict_eraseIiP6BigStrEEvP4DictIT_T0_ES4_
Line
Count
Source
75
1
void dict_erase(Dict<K, V>* haystack, K needle) {
76
1
  DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
77
78
0
  int pos = haystack->hash_and_probe(needle);
79
1
  if (pos == kTooSmall) {
80
0
    return;
81
0
  }
82
1
  DCHECK(pos >= 0);
83
0
  int kv_index = haystack->index_->items_[pos];
84
1
  if (kv_index < 0) {
85
1
    return;
86
1
  }
87
88
0
  int last_kv_index = haystack->len_ - 1;
89
0
  DCHECK(kv_index <= last_kv_index);
90
91
  // Swap the target entry with the most recently inserted one before removing
92
  // it. This has two benefits.
93
  //   (1) It keeps the entry arrays compact. All valid entries occupy a
94
  //       contiguous region in memory.
95
  //   (2) It prevents holes in the entry arrays. This makes iterating over
96
  //       entries (e.g. in keys() or DictIter()) trivial and doesn't require
97
  //       any extra validity state (like a bitset of unusable slots). This is
98
  //       important because keys and values wont't always be pointers, so we
99
  //       can't rely on NULL checks for validity. We also can't wrap the slab
100
  //       entry types in some other type without modifying the garbage
101
  //       collector to trace through unmanaged types (or paying the extra
102
  //       allocations for the outer type).
103
0
  if (kv_index != last_kv_index) {
104
0
    K last_key = haystack->keys_->items_[last_kv_index];
105
0
    V last_val = haystack->values_->items_[last_kv_index];
106
0
    int last_pos = haystack->hash_and_probe(last_key);
107
0
    DCHECK(last_pos != kNotFound);
108
0
    haystack->keys_->items_[kv_index] = last_key;
109
0
    haystack->values_->items_[kv_index] = last_val;
110
0
    haystack->index_->items_[last_pos] = kv_index;
111
0
  }
112
113
  // Zero out for GC.  These could be nullptr or 0
114
0
  haystack->keys_->items_[last_kv_index] = 0;
115
0
  haystack->values_->items_[last_kv_index] = 0;
116
0
  haystack->index_->items_[pos] = kDeletedEntry;
117
0
  haystack->len_--;
118
0
  DCHECK(haystack->len_ < haystack->capacity_);
119
0
}
120
121
// NOTE: Can use OverAllocatedStr for all of these, rather than copying
122
123
2
inline BigStr* hex_lower(int i) {
124
2
  char buf[kIntBufSize];
125
2
  int len = snprintf(buf, kIntBufSize, "%x", i);
126
2
  return ::StrFromC(buf, len);
127
2
}
128
129
2
inline BigStr* hex_upper(int i) {
130
2
  char buf[kIntBufSize];
131
2
  int len = snprintf(buf, kIntBufSize, "%X", i);
132
2
  return ::StrFromC(buf, len);
133
2
}
134
135
2
inline BigStr* octal(int i) {
136
2
  char buf[kIntBufSize];
137
2
  int len = snprintf(buf, kIntBufSize, "%o", i);
138
2
  return ::StrFromC(buf, len);
139
2
}
140
141
// Abstract type: Union of LineReader and Writer
142
class File {
143
 public:
144
14
  File() {
145
14
  }
146
  // Writer
147
  virtual void write(BigStr* s) = 0;
148
  virtual void flush() = 0;
149
150
  // Reader
151
  virtual BigStr* readline() = 0;
152
153
  // Both
154
  virtual bool isatty() = 0;
155
  virtual void close() = 0;
156
157
0
  static constexpr ObjHeader obj_header() {
158
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(File));
159
0
  }
160
161
7
  static constexpr uint32_t field_mask() {
162
7
    return kZeroMask;
163
7
  }
164
};
165
166
// Wrap a FILE* for read and write
167
class CFile : public File {
168
 public:
169
7
  explicit CFile(FILE* f) : File(), f_(f) {
170
7
  }
171
  // Writer
172
  void write(BigStr* s) override;
173
  void flush() override;
174
175
  // Reader
176
  BigStr* readline() override;
177
178
  // Both
179
  bool isatty() override;
180
  void close() override;
181
182
7
  static constexpr ObjHeader obj_header() {
183
7
    return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
184
7
  }
185
186
7
  static constexpr uint32_t field_mask() {
187
    // not mutating field_mask because FILE* isn't a GC object
188
7
    return File::field_mask();
189
7
  }
190
191
 private:
192
  FILE* f_;
193
194
  DISALLOW_COPY_AND_ASSIGN(CFile)
195
};
196
197
// Abstract File we can only read from.
198
// TODO: can we get rid of DCHECK() and reinterpret_cast?
199
class LineReader : public File {
200
 public:
201
3
  LineReader() : File() {
202
3
  }
203
0
  void write(BigStr* s) override {
204
0
    CHECK(false);  // should not happen
205
0
  }
206
0
  void flush() override {
207
0
    CHECK(false);  // should not happen
208
0
  }
209
210
0
  static constexpr ObjHeader obj_header() {
211
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
212
0
  }
213
214
3
  static constexpr uint32_t field_mask() {
215
3
    return kZeroMask;
216
3
  }
217
};
218
219
class BufLineReader : public LineReader {
220
 public:
221
3
  explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
222
3
  }
223
  virtual BigStr* readline();
224
1
  virtual bool isatty() {
225
1
    return false;
226
1
  }
227
1
  virtual void close() {
228
1
  }
229
230
  BigStr* s_;
231
  int pos_;
232
233
3
  static constexpr ObjHeader obj_header() {
234
3
    return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
235
3
  }
236
237
3
  static constexpr uint32_t field_mask() {
238
3
    return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
239
3
  }
240
241
  DISALLOW_COPY_AND_ASSIGN(BufLineReader)
242
};
243
244
extern LineReader* gStdin;
245
246
1
inline LineReader* Stdin() {
247
1
  if (gStdin == nullptr) {
248
1
    gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
249
1
  }
250
1
  return gStdin;
251
1
}
252
253
LineReader* open(BigStr* path);
254
255
// Abstract File we can only write to.
256
// TODO: can we get rid of DCHECK() and reinterpret_cast?
257
class Writer : public File {
258
 public:
259
4
  Writer() : File() {
260
4
  }
261
0
  BigStr* readline() override {
262
0
    CHECK(false);  // should not happen
263
0
  }
264
265
0
  static constexpr ObjHeader obj_header() {
266
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
267
0
  }
268
269
4
  static constexpr uint32_t field_mask() {
270
4
    return kZeroMask;
271
4
  }
272
};
273
274
class MutableStr;
275
276
class BufWriter : public Writer {
277
 public:
278
4
  BufWriter() : Writer(), str_(nullptr), len_(0) {
279
4
  }
280
  void write(BigStr* s) override;
281
  void write_spaces(int n);
282
1
  void clear() {  // Reuse this instance
283
1
    str_ = nullptr;
284
1
    len_ = 0;
285
1
    is_valid_ = true;
286
1
  }
287
0
  void close() override {
288
0
  }
289
1
  void flush() override {
290
1
  }
291
1
  bool isatty() override {
292
1
    return false;
293
1
  }
294
  BigStr* getvalue();  // part of cStringIO API
295
296
  //
297
  // Low Level API for C++ usage only
298
  //
299
300
  // Convenient API that avoids BigStr*
301
  void WriteConst(const char* c_string);
302
303
  // Potentially resizes the buffer.
304
  void EnsureMoreSpace(int n);
305
  // After EnsureMoreSpace(42), you can write 42 more bytes safely.
306
  //
307
  // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
308
  // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
309
310
  // (Similar to vector::reserve(n), but it takes an integer to ADD to the
311
  // capacity.)
312
313
  uint8_t* LengthPointer();    // start + length
314
  uint8_t* CapacityPointer();  // start + capacity
315
  void SetLengthFrom(uint8_t* length_ptr);
316
317
0
  int Length() {
318
0
    return len_;
319
0
  }
320
321
  // Rewind to earlier position, future writes start there
322
  void Truncate(int length);
323
324
4
  static constexpr ObjHeader obj_header() {
325
4
    return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
326
4
  }
327
328
4
  static constexpr unsigned field_mask() {
329
    // maskvit_v() because BufWriter has virtual methods
330
4
    return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
331
4
  }
332
333
 private:
334
  void WriteRaw(char* s, int n);
335
336
  MutableStr* str_;  // getvalue() turns this directly into Str*, no copying
337
  int len_;          // how many bytes have been written so far
338
  bool is_valid_ = true;  // It becomes invalid after getvalue() is called
339
};
340
341
extern Writer* gStdout;
342
343
1
inline Writer* Stdout() {
344
1
  if (gStdout == nullptr) {
345
1
    gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
346
1
    gHeap.RootGlobalVar(gStdout);
347
1
  }
348
1
  return gStdout;
349
1
}
350
351
extern Writer* gStderr;
352
353
1
inline Writer* Stderr() {
354
1
  if (gStderr == nullptr) {
355
1
    gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
356
1
    gHeap.RootGlobalVar(gStderr);
357
1
  }
358
1
  return gStderr;
359
1
}
360
361
class UniqueObjects {
362
  // Can't be expressed in typed Python because we don't have uint64_t for
363
  // addresses
364
365
 public:
366
0
  UniqueObjects() {
367
0
  }
368
0
  void Add(void* obj) {
369
0
  }
370
0
  int Get(void* obj) {
371
0
    return -1;
372
0
  }
373
374
0
  static constexpr ObjHeader obj_header() {
375
0
    return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
376
0
  }
377
378
  // SPECIAL CASE? We should never have a unique reference to an object?  So
379
  // don't bother tracing
380
0
  static constexpr uint32_t field_mask() {
381
0
    return kZeroMask;
382
0
  }
383
384
 private:
385
  // address -> small integer ID
386
  Dict<void*, int> addresses_;
387
};
388
389
}  // namespace mylib
390
391
#endif  // MYCPP_GC_MYLIB_H