| 1 | // yaks_runtime_test.cc
|
| 2 | //
|
| 3 | // Related to small_str_test.cc
|
| 4 |
|
| 5 | #include <inttypes.h>
|
| 6 | #include <limits.h> // HOST_NAME_MAX
|
| 7 | #include <unistd.h> // gethostname()
|
| 8 |
|
| 9 | #include <new> // placement new
|
| 10 |
|
| 11 | #include "mycpp/common.h"
|
| 12 | #include "mycpp/gc_obj.h" // ObjHeader
|
| 13 | #include "mycpp/runtime.h"
|
| 14 | #include "vendor/greatest.h"
|
| 15 |
|
| 16 | // namespace yaks {
|
| 17 |
|
| 18 | /*
|
| 19 |
|
| 20 | To Do
|
| 21 | =====
|
| 22 |
|
| 23 | - prototype rooting
|
| 24 | - register YaksValue only, not other primitive types
|
| 25 | - Is there maybe a ImmediateVariant type?
|
| 26 | - it can never be a HeapObj, and thus doesn't need to be rooted
|
| 27 | - how common is this? Probably not very
|
| 28 |
|
| 29 | - What are the ASDL schema language changes you need?
|
| 30 | - defining tags and so forth
|
| 31 | - shared tags ACROSS MODULES -- I ran into this with %LiteralBlock
|
| 32 | - and probably %Name for (Token tok, str name)
|
| 33 |
|
| 34 | - Does this make sense?
|
| 35 |
|
| 36 | alias CompoundWord = List[word_part]
|
| 37 |
|
| 38 | - What about
|
| 39 |
|
| 40 | tagged value =
|
| 41 | Int %Int
|
| 42 | | Frame %Dict[str, str]
|
| 43 |
|
| 44 | - What's the penalty in Python?
|
| 45 | - I think it's mainly mymember() bar() syntax everywhere
|
| 46 | - but then do you need to generate lots of tiny classes
|
| 47 | - or use getattr() on the list of known field names to simulate it
|
| 48 | - but that won't pass MyPy
|
| 49 | - damn you would need a ton of type annotations then ... every wrapper
|
| 50 | would get a type annotation
|
| 51 | - but this is only for public fields? Maybe it's not that bad
|
| 52 | - or do you need public self(), like perhaps
|
| 53 | - cmd_ev.self().word_ev
|
| 54 | - cmd_ev.member().foo
|
| 55 | - hm that's possible
|
| 56 | - self()->length_ = 32; // this can be direct? At least within a class
|
| 57 |
|
| 58 |
|
| 59 | ## Use Cases
|
| 60 |
|
| 61 | - write out value_t class more
|
| 62 | - value.Int is
|
| 63 | - immediate int32_t
|
| 64 | - or what about the 53 bit idea? (matching double precision)
|
| 65 | - 64-3 bits = 61, or 64-11 = 53
|
| 66 | - value.BigStr is BigStr and SmallStr
|
| 67 | - value.BashAssoc is Dict[str, str]
|
| 68 | - value.Dict is Dict[str, value]
|
| 69 | - value.Frame is Dict[str, cell]
|
| 70 |
|
| 71 | - CompoundWord is List[word_part] parts
|
| 72 |
|
| 73 | - a_index = BigStr(str s) | Int(int i)
|
| 74 | - this is not immdiate
|
| 75 |
|
| 76 | - Custom Token VALUE type as 16 bytes?
|
| 77 | - this makes GC scanning harder
|
| 78 | - can't have Token on the stack, but can have it embedded in other objects?
|
| 79 | - but then you can't have any of those on the stack either
|
| 80 | - There could be a restriction, hm
|
| 81 |
|
| 82 | ## Problems
|
| 83 |
|
| 84 | ### Code gen issue: -> vs .
|
| 85 |
|
| 86 | Is it possible for mycpp classes to use
|
| 87 |
|
| 88 | this->token = token;
|
| 89 | this->args = args;
|
| 90 |
|
| 91 | While ASDL uses
|
| 92 |
|
| 93 | w.parts()[0].tok
|
| 94 |
|
| 95 | How would you distinguish the two things?
|
| 96 |
|
| 97 | Plus they will be mixed, like:
|
| 98 |
|
| 99 | this->cmd_ev->w.parts()
|
| 100 |
|
| 101 | Cheap hack is to come up with a naming convention for the types
|
| 102 |
|
| 103 | Actually we already have it - types look like this. See visit_member_expr:
|
| 104 |
|
| 105 | lhs_type core.ui.ErrorFormatter expr NameExpr(errfmt [l]) name
|
| 106 | PrettyPrintError lhs_type core.util.UserExit expr NameExpr(e [l]) name status
|
| 107 | lhs_type osh.cmd_eval.CommandEvaluator expr NameExpr(cmd_ev [l]) name
|
| 108 | MaybeRunExitTrap lhs_type _devbuild.gen.syntax_asdl.IntParamBox expr
|
| 109 | NameExpr(mut_status [l]) name i
|
| 110 |
|
| 111 | So we can look for syntax_asdl / runtime_asdl
|
| 112 |
|
| 113 | ## Design
|
| 114 |
|
| 115 | - YaksValue contains a uint64_t ?
|
| 116 | - this means any BigStr, List, Dict, Tuple, or class
|
| 117 | - can List and Dict have type tag in addition to pointer?
|
| 118 |
|
| 119 | These are less than 8 bytes:
|
| 120 |
|
| 121 | - int32_t
|
| 122 | - float
|
| 123 | - enum class value_e {}
|
| 124 |
|
| 125 | Features that mycpp runtime doesn't have:
|
| 126 |
|
| 127 | - More efficient rooting, either:
|
| 128 | - our StackRoots({&a, &b, ...}) is slow
|
| 129 | - our StackRoot takes up a lot of code space!
|
| 130 | - Either
|
| 131 | - Shadow Stack for Garbage Collection
|
| 132 | - It's better to spill pointers to a separate frame
|
| 133 | - Hybrid rooting
|
| 134 | - ParamRoot, ParamRoot, SortedPointerRoots,
|
| 135 |
|
| 136 | - Not just more EFFICIENT, but LESS ROOTING
|
| 137 | - call graph analysis for stuff above the stack
|
| 138 |
|
| 139 | Tagged Pointers ("Boxless optimization")
|
| 140 |
|
| 141 | - Immediate Values - 8 bytes for variant type tag
|
| 142 | - Zero-arg constructors are integers
|
| 143 | - int32_t
|
| 144 | - float
|
| 145 | - Small BigStr
|
| 146 | - 4 bits len
|
| 147 | - how is this shared with type_tag?
|
| 148 | - are we reserving half of this space for small_str?
|
| 149 | - 6 bytes data
|
| 150 | - 1 byte nullptr
|
| 151 | - Pointer
|
| 152 |
|
| 153 | Later:
|
| 154 |
|
| 155 | - Value types? How does the GC scan them on the stack??? That is hard
|
| 156 |
|
| 157 | */
|
| 158 |
|
| 159 | // A GC heap value or an immediate value
|
| 160 | //
|
| 161 | // Can also be used for say:
|
| 162 | //
|
| 163 | // tagged num = yaks.Int | yaks.Float
|
| 164 | //
|
| 165 | // There is more than enough space for a tag!
|
| 166 | // A double would hae to be heap-allocated.
|
| 167 |
|
| 168 | // Note that Yaks is statically typed, and you can have a simple double, float,
|
| 169 | // or int64_t. (maybe i32 i64 f32 f64 like WASM.)
|
| 170 | //
|
| 171 | // You would only use a YaksValue for an Int if it's part of a variant type.
|
| 172 |
|
| 173 | // Kinds of layouts:
|
| 174 | //
|
| 175 | // - primitive i32 i64 f32 f64
|
| 176 | // - enum class scope_e
|
| 177 | // - YaksValue that MAY have a heap_obj (which is any BigStr value)
|
| 178 | // - then you have only 7 possible tags
|
| 179 | // - YaksValue that does NOT have a heap_obj -- then you have more room for tags
|
| 180 | // - you could have variatn of Bool, Int, and some other enum_class_e
|
| 181 |
|
| 182 | #define ASDL_NAMES struct
|
| 183 |
|
| 184 | ASDL_NAMES ytag_e {
|
| 185 | enum no_name {
|
| 186 | HeapObj = 0,
|
| 187 | HeapStr = 1,
|
| 188 | SmallStr = 2,
|
| 189 |
|
| 190 | // Do these make sense, or would you want to use Bool, Float for something
|
| 191 | // else? Bool is questionable.
|
| 192 | Bool = 3,
|
| 193 | Int = 4,
|
| 194 | Float = 5,
|
| 195 |
|
| 196 | // Note: Is this POSSIBLE? NO
|
| 197 | // EmptyList = 6, // much more common
|
| 198 | // EmptyDict = 7, // less common
|
| 199 |
|
| 200 | // Potential optimization:
|
| 201 | // - the minute they are append() or set(), i.e. non-empty, they become
|
| 202 | // HeapObj
|
| 203 | // - but you can iterate over an EmptyList or Dict
|
| 204 |
|
| 205 | // MUTABILITY breaks it
|
| 206 | //
|
| 207 | // var mylist = []
|
| 208 | // myfunc(mylist) // can't copy by value, must be by reference
|
| 209 | // print(len(mylist)) // must be mutated
|
| 210 | };
|
| 211 | };
|
| 212 |
|
| 213 | // Picture here
|
| 214 | // https://bernsteinbear.com/blog/compiling-a-lisp-2/#pointer-tagging-scheme
|
| 215 |
|
| 216 | union YaksValue {
|
| 217 | int ytag() const {
|
| 218 | // 0 for SmallStr -- any value can be a string
|
| 219 | // 1..7 for immediate values
|
| 220 | // for heap values: look in heap_obj->tag()
|
| 221 | return ytag_e::HeapObj;
|
| 222 | }
|
| 223 |
|
| 224 | void* heap_obj;
|
| 225 | bool b;
|
| 226 | float f;
|
| 227 |
|
| 228 | int32_t i;
|
| 229 |
|
| 230 | // It's 8 bytes on 32 bit systems too -- I guess we need this for small str?
|
| 231 | uint64_t whole;
|
| 232 |
|
| 233 | // TODO: where do we put
|
| 234 | // - 1 byte type_tag? Or do we have fewer than that? Maybe 5 bits, since 3
|
| 235 | // are for small_str len?
|
| 236 | // - that's 32 immediate types, and then you can overflow into heap?
|
| 237 | // - NO WE only have THREE BITS because our allocator is 24 or 48 byte
|
| 238 | // aligned
|
| 239 | // - so we have at most 8 immediate values, and the rest heap values?
|
| 240 | // - but SmallStr always takes up some space
|
| 241 |
|
| 242 | // - NUL terminator for SmallStr
|
| 243 | };
|
| 244 |
|
| 245 | /* Class translation
|
| 246 |
|
| 247 | class Slice:
|
| 248 | def __init__(self, s: str, start: int, length: int):
|
| 249 | self.s = s
|
| 250 | self.start = start
|
| 251 | self.length = length
|
| 252 |
|
| 253 | def get(self) -> str:
|
| 254 | return self.s[self.start : self.start + self.length]
|
| 255 | */
|
| 256 |
|
| 257 | class Slice {
|
| 258 | public:
|
| 259 | Slice(BigStr* s, int start, int length) {
|
| 260 | // TODO: allocate Members
|
| 261 | self_.heap_obj = Alloc<Members>();
|
| 262 |
|
| 263 | self()->s_ = s;
|
| 264 | self()->start_ = start;
|
| 265 | self()->length_ = length;
|
| 266 | }
|
| 267 |
|
| 268 | struct Members {
|
| 269 | BigStr* s_;
|
| 270 | int start_;
|
| 271 | int length_;
|
| 272 |
|
| 273 | static constexpr ObjHeader obj_header() {
|
| 274 | return ObjHeader::ClassFixed(field_mask(), sizeof(Members));
|
| 275 | }
|
| 276 | static constexpr uint32_t field_mask() {
|
| 277 | return maskbit(offsetof(Members, s_));
|
| 278 | }
|
| 279 | };
|
| 280 |
|
| 281 | Members* self() {
|
| 282 | return static_cast<Members*>(self_.heap_obj);
|
| 283 | }
|
| 284 |
|
| 285 | int start() {
|
| 286 | return self()->start_;
|
| 287 | }
|
| 288 |
|
| 289 | // Field Accessors -- this will make the generated code a lot longer?
|
| 290 | // Naming convention is like protobuf
|
| 291 |
|
| 292 | int length() {
|
| 293 | return self()->length_;
|
| 294 | }
|
| 295 |
|
| 296 | // Methods
|
| 297 | BigStr* Get() {
|
| 298 | // return StrFromC("yo");
|
| 299 |
|
| 300 | int n = self()->length_;
|
| 301 |
|
| 302 | log("start = %d", self()->start_);
|
| 303 | log("n = %d", n);
|
| 304 |
|
| 305 | BigStr* result = NewStr(n);
|
| 306 |
|
| 307 | memcpy(result->data_, self()->s_->data_ + self()->start_, n);
|
| 308 |
|
| 309 | result->data_[n] = '\0';
|
| 310 |
|
| 311 | log("result->data %s", result->data_);
|
| 312 |
|
| 313 | return result;
|
| 314 | }
|
| 315 |
|
| 316 | YaksValue self_;
|
| 317 | };
|
| 318 |
|
| 319 | void SliceFunc(Slice myslice) {
|
| 320 | BigStr* s = myslice.Get();
|
| 321 | log("val = %s", s->data_);
|
| 322 | print(s);
|
| 323 |
|
| 324 | log("start %d length %d", myslice.start(), myslice.length());
|
| 325 | }
|
| 326 |
|
| 327 | // Based on _gen/core/runtime.asdl.h
|
| 328 |
|
| 329 | // TODO: This can have a typetag() method
|
| 330 | //
|
| 331 | // - It will look in the immediate YaksValue for the common cases?
|
| 332 | // - and in the case you only have immediates
|
| 333 | // And then look in the GC header of the heap allocated object for the other
|
| 334 | // cases?
|
| 335 |
|
| 336 | class value_t {
|
| 337 | protected:
|
| 338 | value_t() {
|
| 339 | }
|
| 340 |
|
| 341 | public:
|
| 342 | int typetag() const {
|
| 343 | // Look if it's an integer or string
|
| 344 |
|
| 345 | // Look for heap object
|
| 346 | int ytag = self_.ytag();
|
| 347 | if (ytag == ytag_e::HeapObj) {
|
| 348 | return 0;
|
| 349 | }
|
| 350 | return 1;
|
| 351 |
|
| 352 | // return ObjHeader::FromObject(this)->type_tag;
|
| 353 | }
|
| 354 |
|
| 355 | // All variants have this.
|
| 356 | YaksValue self_;
|
| 357 |
|
| 358 | // hnode_t* PrettyTree();
|
| 359 | DISALLOW_COPY_AND_ASSIGN(value_t)
|
| 360 | };
|
| 361 |
|
| 362 | class value__Int : public value_t {
|
| 363 | public:
|
| 364 | int typetag() const {
|
| 365 | // Reuse the primitive tag
|
| 366 | return ytag_e::Int;
|
| 367 | }
|
| 368 | };
|
| 369 |
|
| 370 | class value__Str : public value_t {
|
| 371 | public:
|
| 372 | int typetag() const {
|
| 373 | // int ytag = self_.ytag();
|
| 374 | // CHECK(ytag == ytag_e::SmallStr || ytag == ytag_e::HeapStr) {
|
| 375 |
|
| 376 | // TODO: return value_e::Str
|
| 377 | return 0;
|
| 378 | }
|
| 379 | };
|
| 380 |
|
| 381 | TEST yaks_test() {
|
| 382 | Slice myslice(StrFromC("hello"), 1, 3);
|
| 383 |
|
| 384 | log("myslice %p", &myslice);
|
| 385 |
|
| 386 | SliceFunc(myslice);
|
| 387 |
|
| 388 | // TODO: constructor
|
| 389 | value__Int i;
|
| 390 | log(" i.typetag = %d", i.typetag());
|
| 391 |
|
| 392 | value__Str s;
|
| 393 | log(" s.typetag = %d", s.typetag());
|
| 394 |
|
| 395 | PASS();
|
| 396 | }
|
| 397 |
|
| 398 | //} // namespace small_str_test
|
| 399 |
|
| 400 | GREATEST_MAIN_DEFS();
|
| 401 |
|
| 402 | int main(int argc, char** argv) {
|
| 403 | // gHeap.Init();
|
| 404 |
|
| 405 | GREATEST_MAIN_BEGIN();
|
| 406 |
|
| 407 | // RUN_TEST(yaks::yaks_test);
|
| 408 | RUN_TEST(yaks_test);
|
| 409 |
|
| 410 | // gHeap.CleanProcessExit();
|
| 411 |
|
| 412 | GREATEST_MAIN_END();
|
| 413 | return 0;
|
| 414 | }
|