cpp

Coverage Report

Created: 2024-07-28 06:14

/home/uke/oil/data_lang/j8.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef DATA_LANG_J8_H
2
#define DATA_LANG_J8_H
3
4
#include <stdio.h>   // sprintf
5
#include <string.h>  // memcmp, memcpy, strlen
6
7
#include "data_lang/utf8.h"
8
9
#define J8_OUT(ch) \
10
396
  **p_out = (ch);  \
11
396
  (*p_out)++
12
13
static inline int J8EncodeOne(unsigned char** p_in, unsigned char** p_out,
14
1.01k
                              int j8_escape) {
15
  // We use a slightly weird double pointer style because
16
  //   *p_in may be advanced by 1 to 4 bytes (depending on whether it's UTF-8)
17
  //   *p_out may be advanced by 1 to 6 bytes (depending on escaping)
18
19
  // IMPORTANT: J8EncodeOne(), BourneShellEncodeOne(), BashDollarEncodeOne() all
20
  // call utf8_decode() which require that p_in MUST have a NUL terminator. This
21
  // is so INCOMPLETE UTF-8 sequences are terminated with an INVALID byte, and
22
  // 0x00 can only be ITSELF, never part of a sequence. An alternative would be
23
  // to do more bounds checks in these functions.
24
25
  // CALLER MUST CHECK that we are able to write up to 6 bytes!
26
  //   Because the longest output is \u001f or \u{1f} for control chars, since
27
  //   we don't emit escapes like \u{1f926} right now
28
  //
29
  // j8_escape: Whether to use j8 escapes, i.e. LOSSLESS encoding of data
30
  //   \yff instead of Unicode replacement char
31
  //   \u{1} instead of \u0001 for unprintable low chars
32
33
  // Returns:
34
  //   0   wrote valid UTF-8 (encoded or not)
35
  //   1   wrote byte that's invalid UTF-8
36
37
1.01k
  unsigned char ch = **p_in;
38
39
  //
40
  // Handle \\ \b \f \n \r \t
41
  //
42
43
  // clang-format off
44
1.01k
  switch (ch) {
45
12
  case '\\': J8_OUT('\\'); J8_OUT('\\'); (*p_in)++; return 0;
46
15
  case '\b': J8_OUT('\\'); J8_OUT('b'); (*p_in)++; return 0;
47
15
  case '\f': J8_OUT('\\'); J8_OUT('f'); (*p_in)++; return 0;
48
15
  case '\n': J8_OUT('\\'); J8_OUT('n'); (*p_in)++; return 0;
49
15
  case '\r': J8_OUT('\\'); J8_OUT('r'); (*p_in)++; return 0;
50
15
  case '\t': J8_OUT('\\'); J8_OUT('t'); (*p_in)++; return 0;
51
1.01k
  }
52
  // clang-format on
53
54
  //
55
  // Conditionally handle \' and \"
56
  //
57
927
  if (ch == '\'' && j8_escape) {  // J8-style strings \'
58
0
    J8_OUT('\\');
59
0
    J8_OUT('\'');
60
0
    (*p_in)++;
61
0
    return 0;
62
0
  }
63
927
  if (ch == '"' && !j8_escape) {  // JSON-style strings \"
64
0
    J8_OUT('\\');
65
0
    J8_OUT('"');
66
0
    (*p_in)++;
67
0
    return 0;
68
0
  }
69
70
  //
71
  // Unprintable ASCII control codes
72
  //
73
927
  if (ch < 0x20) {
74
189
    if (j8_escape) {
75
      // printf("Writing for %04x %p\n", ch, *p_out);
76
75
      int n = sprintf((char*)*p_out, "\\u{%x}", ch);
77
      // printf("! Wrote %d bytes for %04x\n", n, ch);
78
75
      *p_out += n;
79
114
    } else {
80
      // printf("Writing for %04x %p\n", ch, *p_out);
81
114
      int n = sprintf((char*)*p_out, "\\u%04x", ch);
82
114
      *p_out += n;
83
      // printf("Wrote %d bytes for %04x\n", n, ch);
84
114
    }
85
189
    (*p_in)++;
86
189
    return 0;
87
189
  }
88
89
  //
90
  // UTF-8 encoded runes and invalid bytes
91
  //
92
738
  Utf8Result_t result;
93
738
  utf8_decode(*p_in, &result);
94
95
738
  if (result.error == UTF8_OK) {
96
660
    memcpy(*p_out, *p_in, result.bytes_read);
97
660
    *p_in += result.bytes_read;
98
660
    *p_out += result.bytes_read;
99
660
    return 0;
100
660
  }
101
102
  // We have a UTF-8 decoding error. This is handled one of three ways:
103
  //  1. Losslessly encode as J8 byte literals (only applicable in J8)
104
  //  2. Try to encode a lone surrogate
105
  //  3. Insert a Unicode replacement char
106
107
78
  if (j8_escape) {
108
30
    int n = sprintf((char*)*p_out, "\\y%02x", ch);
109
30
    *p_in += 1;
110
30
    *p_out += n;
111
48
  } else if (result.error == UTF8_ERR_SURROGATE) {
112
0
    int n = sprintf((char*)*p_out, "\\u%04x", result.codepoint);
113
0
    *p_in += result.bytes_read;
114
0
    *p_out += n;
115
0
    return 1;
116
48
  } else {
117
    // Unicode replacement char is U+FFFD, so write encoded form
118
    // >>> '\ufffd'.encode('utf-8')
119
    // b'\xef\xbf\xbd'
120
48
    J8_OUT('\xef');
121
48
    J8_OUT('\xbf');
122
48
    J8_OUT('\xbd');
123
48
    *p_in += 1;  // Advance past the byte we wrote
124
48
  }
125
126
78
  return 1;
127
78
}
data_lang.cc:_ZL11J8EncodeOnePPhS0_i
Line
Count
Source
14
676
                              int j8_escape) {
15
  // We use a slightly weird double pointer style because
16
  //   *p_in may be advanced by 1 to 4 bytes (depending on whether it's UTF-8)
17
  //   *p_out may be advanced by 1 to 6 bytes (depending on escaping)
18
19
  // IMPORTANT: J8EncodeOne(), BourneShellEncodeOne(), BashDollarEncodeOne() all
20
  // call utf8_decode() which require that p_in MUST have a NUL terminator. This
21
  // is so INCOMPLETE UTF-8 sequences are terminated with an INVALID byte, and
22
  // 0x00 can only be ITSELF, never part of a sequence. An alternative would be
23
  // to do more bounds checks in these functions.
24
25
  // CALLER MUST CHECK that we are able to write up to 6 bytes!
26
  //   Because the longest output is \u001f or \u{1f} for control chars, since
27
  //   we don't emit escapes like \u{1f926} right now
28
  //
29
  // j8_escape: Whether to use j8 escapes, i.e. LOSSLESS encoding of data
30
  //   \yff instead of Unicode replacement char
31
  //   \u{1} instead of \u0001 for unprintable low chars
32
33
  // Returns:
34
  //   0   wrote valid UTF-8 (encoded or not)
35
  //   1   wrote byte that's invalid UTF-8
36
37
676
  unsigned char ch = **p_in;
38
39
  //
40
  // Handle \\ \b \f \n \r \t
41
  //
42
43
  // clang-format off
44
676
  switch (ch) {
45
8
  case '\\': J8_OUT('\\'); J8_OUT('\\'); (*p_in)++; return 0;
46
10
  case '\b': J8_OUT('\\'); J8_OUT('b'); (*p_in)++; return 0;
47
10
  case '\f': J8_OUT('\\'); J8_OUT('f'); (*p_in)++; return 0;
48
10
  case '\n': J8_OUT('\\'); J8_OUT('n'); (*p_in)++; return 0;
49
10
  case '\r': J8_OUT('\\'); J8_OUT('r'); (*p_in)++; return 0;
50
10
  case '\t': J8_OUT('\\'); J8_OUT('t'); (*p_in)++; return 0;
51
676
  }
52
  // clang-format on
53
54
  //
55
  // Conditionally handle \' and \"
56
  //
57
618
  if (ch == '\'' && j8_escape) {  // J8-style strings \'
58
0
    J8_OUT('\\');
59
0
    J8_OUT('\'');
60
0
    (*p_in)++;
61
0
    return 0;
62
0
  }
63
618
  if (ch == '"' && !j8_escape) {  // JSON-style strings \"
64
0
    J8_OUT('\\');
65
0
    J8_OUT('"');
66
0
    (*p_in)++;
67
0
    return 0;
68
0
  }
69
70
  //
71
  // Unprintable ASCII control codes
72
  //
73
618
  if (ch < 0x20) {
74
126
    if (j8_escape) {
75
      // printf("Writing for %04x %p\n", ch, *p_out);
76
50
      int n = sprintf((char*)*p_out, "\\u{%x}", ch);
77
      // printf("! Wrote %d bytes for %04x\n", n, ch);
78
50
      *p_out += n;
79
76
    } else {
80
      // printf("Writing for %04x %p\n", ch, *p_out);
81
76
      int n = sprintf((char*)*p_out, "\\u%04x", ch);
82
76
      *p_out += n;
83
      // printf("Wrote %d bytes for %04x\n", n, ch);
84
76
    }
85
126
    (*p_in)++;
86
126
    return 0;
87
126
  }
88
89
  //
90
  // UTF-8 encoded runes and invalid bytes
91
  //
92
492
  Utf8Result_t result;
93
492
  utf8_decode(*p_in, &result);
94
95
492
  if (result.error == UTF8_OK) {
96
440
    memcpy(*p_out, *p_in, result.bytes_read);
97
440
    *p_in += result.bytes_read;
98
440
    *p_out += result.bytes_read;
99
440
    return 0;
100
440
  }
101
102
  // We have a UTF-8 decoding error. This is handled one of three ways:
103
  //  1. Losslessly encode as J8 byte literals (only applicable in J8)
104
  //  2. Try to encode a lone surrogate
105
  //  3. Insert a Unicode replacement char
106
107
52
  if (j8_escape) {
108
20
    int n = sprintf((char*)*p_out, "\\y%02x", ch);
109
20
    *p_in += 1;
110
20
    *p_out += n;
111
32
  } else if (result.error == UTF8_ERR_SURROGATE) {
112
0
    int n = sprintf((char*)*p_out, "\\u%04x", result.codepoint);
113
0
    *p_in += result.bytes_read;
114
0
    *p_out += n;
115
0
    return 1;
116
32
  } else {
117
    // Unicode replacement char is U+FFFD, so write encoded form
118
    // >>> '\ufffd'.encode('utf-8')
119
    // b'\xef\xbf\xbd'
120
32
    J8_OUT('\xef');
121
32
    J8_OUT('\xbf');
122
32
    J8_OUT('\xbd');
123
32
    *p_in += 1;  // Advance past the byte we wrote
124
32
  }
125
126
52
  return 1;
127
52
}
j8_libc.c:_ZL11J8EncodeOnePPhS0_i
Line
Count
Source
14
338
                              int j8_escape) {
15
  // We use a slightly weird double pointer style because
16
  //   *p_in may be advanced by 1 to 4 bytes (depending on whether it's UTF-8)
17
  //   *p_out may be advanced by 1 to 6 bytes (depending on escaping)
18
19
  // IMPORTANT: J8EncodeOne(), BourneShellEncodeOne(), BashDollarEncodeOne() all
20
  // call utf8_decode() which require that p_in MUST have a NUL terminator. This
21
  // is so INCOMPLETE UTF-8 sequences are terminated with an INVALID byte, and
22
  // 0x00 can only be ITSELF, never part of a sequence. An alternative would be
23
  // to do more bounds checks in these functions.
24
25
  // CALLER MUST CHECK that we are able to write up to 6 bytes!
26
  //   Because the longest output is \u001f or \u{1f} for control chars, since
27
  //   we don't emit escapes like \u{1f926} right now
28
  //
29
  // j8_escape: Whether to use j8 escapes, i.e. LOSSLESS encoding of data
30
  //   \yff instead of Unicode replacement char
31
  //   \u{1} instead of \u0001 for unprintable low chars
32
33
  // Returns:
34
  //   0   wrote valid UTF-8 (encoded or not)
35
  //   1   wrote byte that's invalid UTF-8
36
37
338
  unsigned char ch = **p_in;
38
39
  //
40
  // Handle \\ \b \f \n \r \t
41
  //
42
43
  // clang-format off
44
338
  switch (ch) {
45
4
  case '\\': J8_OUT('\\'); J8_OUT('\\'); (*p_in)++; return 0;
46
5
  case '\b': J8_OUT('\\'); J8_OUT('b'); (*p_in)++; return 0;
47
5
  case '\f': J8_OUT('\\'); J8_OUT('f'); (*p_in)++; return 0;
48
5
  case '\n': J8_OUT('\\'); J8_OUT('n'); (*p_in)++; return 0;
49
5
  case '\r': J8_OUT('\\'); J8_OUT('r'); (*p_in)++; return 0;
50
5
  case '\t': J8_OUT('\\'); J8_OUT('t'); (*p_in)++; return 0;
51
338
  }
52
  // clang-format on
53
54
  //
55
  // Conditionally handle \' and \"
56
  //
57
309
  if (ch == '\'' && j8_escape) {  // J8-style strings \'
58
0
    J8_OUT('\\');
59
0
    J8_OUT('\'');
60
0
    (*p_in)++;
61
0
    return 0;
62
0
  }
63
309
  if (ch == '"' && !j8_escape) {  // JSON-style strings \"
64
0
    J8_OUT('\\');
65
0
    J8_OUT('"');
66
0
    (*p_in)++;
67
0
    return 0;
68
0
  }
69
70
  //
71
  // Unprintable ASCII control codes
72
  //
73
309
  if (ch < 0x20) {
74
63
    if (j8_escape) {
75
      // printf("Writing for %04x %p\n", ch, *p_out);
76
25
      int n = sprintf((char*)*p_out, "\\u{%x}", ch);
77
      // printf("! Wrote %d bytes for %04x\n", n, ch);
78
25
      *p_out += n;
79
38
    } else {
80
      // printf("Writing for %04x %p\n", ch, *p_out);
81
38
      int n = sprintf((char*)*p_out, "\\u%04x", ch);
82
38
      *p_out += n;
83
      // printf("Wrote %d bytes for %04x\n", n, ch);
84
38
    }
85
63
    (*p_in)++;
86
63
    return 0;
87
63
  }
88
89
  //
90
  // UTF-8 encoded runes and invalid bytes
91
  //
92
246
  Utf8Result_t result;
93
246
  utf8_decode(*p_in, &result);
94
95
246
  if (result.error == UTF8_OK) {
96
220
    memcpy(*p_out, *p_in, result.bytes_read);
97
220
    *p_in += result.bytes_read;
98
220
    *p_out += result.bytes_read;
99
220
    return 0;
100
220
  }
101
102
  // We have a UTF-8 decoding error. This is handled one of three ways:
103
  //  1. Losslessly encode as J8 byte literals (only applicable in J8)
104
  //  2. Try to encode a lone surrogate
105
  //  3. Insert a Unicode replacement char
106
107
26
  if (j8_escape) {
108
10
    int n = sprintf((char*)*p_out, "\\y%02x", ch);
109
10
    *p_in += 1;
110
10
    *p_out += n;
111
16
  } else if (result.error == UTF8_ERR_SURROGATE) {
112
0
    int n = sprintf((char*)*p_out, "\\u%04x", result.codepoint);
113
0
    *p_in += result.bytes_read;
114
0
    *p_out += n;
115
0
    return 1;
116
16
  } else {
117
    // Unicode replacement char is U+FFFD, so write encoded form
118
    // >>> '\ufffd'.encode('utf-8')
119
    // b'\xef\xbf\xbd'
120
16
    J8_OUT('\xef');
121
16
    J8_OUT('\xbf');
122
16
    J8_OUT('\xbd');
123
16
    *p_in += 1;  // Advance past the byte we wrote
124
16
  }
125
126
26
  return 1;
127
26
}
128
129
// Like the above, but
130
//
131
//   \xff instead of \yff
132
//   \u001f always, never \u{1f}
133
//   No JSON vs. J8
134
//     No \" escape ever
135
//     No errors -- it can encode everything
136
137
static inline void BashDollarEncodeOne(unsigned char** p_in,
138
0
                                       unsigned char** p_out) {
139
0
  unsigned char ch = **p_in;
140
141
  //
142
  // Handle \\ \b \f \n \r \t \'
143
  //
144
145
  // clang-format off
146
0
  switch (ch) {
147
0
  case '\\': J8_OUT('\\'); J8_OUT('\\'); (*p_in)++; return;
148
0
  case '\b': J8_OUT('\\'); J8_OUT('b'); (*p_in)++; return;
149
0
  case '\f': J8_OUT('\\'); J8_OUT('f'); (*p_in)++; return;
150
0
  case '\n': J8_OUT('\\'); J8_OUT('n'); (*p_in)++; return;
151
0
  case '\r': J8_OUT('\\'); J8_OUT('r'); (*p_in)++; return;
152
0
  case '\t': J8_OUT('\\'); J8_OUT('t'); (*p_in)++; return;
153
0
  case '\'': J8_OUT('\\'); J8_OUT('\''); (*p_in)++; return;
154
0
  }
155
  // clang-format on
156
157
  //
158
  // Unprintable ASCII control codes
159
  //
160
0
  if (ch < 0x20) {
161
    // printf("Writing for %04x %p\n", ch, *p_out);
162
0
    int n = sprintf((char*)*p_out, "\\u%04x", ch);
163
0
    *p_out += n;
164
    // printf("Wrote %d bytes for %04x\n", n, ch);
165
0
    (*p_in)++;
166
0
    return;
167
0
  }
168
169
  //
170
  // UTF-8 encoded runes and invalid bytes
171
  //
172
0
  Utf8Result_t result;
173
0
  utf8_decode(*p_in, &result);
174
0
  if (result.error == UTF8_OK) {
175
0
    memcpy(*p_out, *p_in, result.bytes_read);
176
0
    *p_in += result.bytes_read;
177
0
    *p_out += result.bytes_read;
178
0
  } else {
179
    // If not a valid UTF-8 byte sequence, losslessly encode the bad bytes
180
0
    int n = sprintf((char*)*p_out, "\\x%02x", **p_in);
181
0
    *p_out += n;
182
0
    *p_in += 1;  // Advance past the byte we wrote
183
0
  }
184
0
}
Unexecuted instantiation: data_lang.cc:_ZL19BashDollarEncodeOnePPhS0_
Unexecuted instantiation: j8_libc.c:_ZL19BashDollarEncodeOnePPhS0_
185
186
// BourneShellEncodeOne rules:
187
//
188
//   must be valid UTF-8
189
//   no control chars
190
//   no ' is required
191
//   no \ -- not required, but avoids ambiguous '\n'
192
//
193
// For example we write $'\\' or b'\\' not '\'
194
// The latter should be written r'\', but we're not outputing
195
196
static inline int BourneShellEncodeOne(unsigned char** p_in,
197
0
                                       unsigned char** p_out) {
198
0
  unsigned char ch = **p_in;
199
200
0
  if (ch == '\'' || ch == '\\') {  // can't encode these in Bourne shell ''
201
0
    return 1;
202
0
  }
203
0
  if (ch < 0x20) {  // Unprintable ASCII control codes
204
0
    return 1;
205
0
  }
206
207
  // UTF-8 encoded runes and invalid bytes
208
0
  Utf8Result_t result;
209
0
  utf8_decode(*p_in, &result);
210
0
  if (result.error == UTF8_OK) {
211
0
    memcpy(*p_out, *p_in, result.bytes_read);
212
0
    *p_in += result.bytes_read;
213
0
    *p_out += result.bytes_read;
214
0
    return 0;
215
0
  } else {
216
0
    return 1;
217
0
  }
218
0
}
Unexecuted instantiation: data_lang.cc:_ZL20BourneShellEncodeOnePPhS0_
Unexecuted instantiation: j8_libc.c:_ZL20BourneShellEncodeOnePPhS0_
219
220
// Right now \u001f and \u{1f} are the longest output sequences for a byte.
221
// Bug fix: we need 6 + 1 for the NUL terminator that sprintf() writes!  (Even
222
// though we don't technically need it)
223
224
// Bug: we may need up to 16 bytes: \yaa\yaa\yaa\yaa
225
// If this is too small, we would enter an infinite loop
226
// +1 for NUL terminator
227
228
1.06k
#define J8_MAX_BYTES_PER_INPUT_BYTE 7
229
230
// The minimum capacity must be more than the number above.
231
// TODO: Tune this for our allocator?  We call buf->EnsureMoreSpace(capacity);
232
96
#define J8_MIN_CAPACITY 16
233
234
static inline int J8EncodeChunk(unsigned char** p_in, unsigned char* in_end,
235
                                unsigned char** p_out, unsigned char* out_end,
236
152
                                int j8_escape) {
237
1.11k
  while (*p_in < in_end && (*p_out + J8_MAX_BYTES_PER_INPUT_BYTE) <= out_end) {
238
    // printf("iter %d  %p < %p \n", i++, *p_out, out_end);
239
1.01k
    int invalid_utf8 = J8EncodeOne(p_in, p_out, j8_escape);
240
1.01k
    if (invalid_utf8 && !j8_escape) {  // first JSON pass got binary data?
241
48
      return invalid_utf8;             // early return
242
48
    }
243
1.01k
  }
244
104
  return 0;
245
152
}
data_lang.cc:_ZL13J8EncodeChunkPPhS_S0_S_i
Line
Count
Source
236
96
                                int j8_escape) {
237
740
  while (*p_in < in_end && (*p_out + J8_MAX_BYTES_PER_INPUT_BYTE) <= out_end) {
238
    // printf("iter %d  %p < %p \n", i++, *p_out, out_end);
239
676
    int invalid_utf8 = J8EncodeOne(p_in, p_out, j8_escape);
240
676
    if (invalid_utf8 && !j8_escape) {  // first JSON pass got binary data?
241
32
      return invalid_utf8;             // early return
242
32
    }
243
676
  }
244
64
  return 0;
245
96
}
j8_libc.c:_ZL13J8EncodeChunkPPhS_S0_S_i
Line
Count
Source
236
56
                                int j8_escape) {
237
378
  while (*p_in < in_end && (*p_out + J8_MAX_BYTES_PER_INPUT_BYTE) <= out_end) {
238
    // printf("iter %d  %p < %p \n", i++, *p_out, out_end);
239
338
    int invalid_utf8 = J8EncodeOne(p_in, p_out, j8_escape);
240
338
    if (invalid_utf8 && !j8_escape) {  // first JSON pass got binary data?
241
16
      return invalid_utf8;             // early return
242
16
    }
243
338
  }
244
40
  return 0;
245
56
}
246
247
static inline int BashDollarEncodeChunk(unsigned char** p_in,
248
                                        unsigned char* in_end,
249
                                        unsigned char** p_out,
250
0
                                        unsigned char* out_end) {
251
0
  while (*p_in < in_end && (*p_out + J8_MAX_BYTES_PER_INPUT_BYTE) <= out_end) {
252
0
    BashDollarEncodeOne(p_in, p_out);
253
0
  }
254
0
  return 0;
255
0
}
Unexecuted instantiation: data_lang.cc:_ZL21BashDollarEncodeChunkPPhS_S0_S_
Unexecuted instantiation: j8_libc.c:_ZL21BashDollarEncodeChunkPPhS_S0_S_
256
257
static inline int BourneShellEncodeChunk(unsigned char** p_in,
258
                                         unsigned char* in_end,
259
                                         unsigned char** p_out,
260
0
                                         unsigned char* out_end) {
261
0
  while (*p_in < in_end && (*p_out + J8_MAX_BYTES_PER_INPUT_BYTE) <= out_end) {
262
0
    int cannot_encode = BourneShellEncodeOne(p_in, p_out);
263
0
    if (cannot_encode) {     // we need escaping, e.g. \u0001 or \'
264
0
      return cannot_encode;  // early return
265
0
    }
266
0
  }
267
0
  return 0;
268
0
}
Unexecuted instantiation: data_lang.cc:_ZL22BourneShellEncodeChunkPPhS_S0_S_
Unexecuted instantiation: j8_libc.c:_ZL22BourneShellEncodeChunkPPhS_S0_S_
269
270
0
static inline int CanOmitQuotes(unsigned char* s, int len) {
271
0
  if (len == 0) {  // empty string has to be quoted
272
0
    return 0;
273
0
  }
274
275
  // 3 special case keywords
276
0
  if (len == 4) {
277
0
    if (memcmp(s, "null", 4) == 0) {
278
0
      return 0;
279
0
    }
280
0
    if (memcmp(s, "true", 4) == 0) {
281
0
      return 0;
282
0
    }
283
0
  }
284
0
  if (len == 5) {
285
0
    if (memcmp(s, "false", 5) == 0) {
286
0
      return 0;
287
0
    }
288
0
  }
289
290
0
  for (int i = 0; i < len; ++i) {
291
0
    unsigned char ch = s[i];
292
293
    // Corresponds to regex [a-zA-Z0-9./_-]
294
0
    if ('a' <= ch && ch <= 'z') {
295
0
      continue;
296
0
    }
297
0
    if ('A' <= ch && ch <= 'Z') {
298
0
      continue;
299
0
    }
300
0
    if ('0' <= ch && ch <= '9') {
301
0
      continue;
302
0
    }
303
0
    if (ch == '.' || ch == '/' || ch == '_' || ch == '-') {
304
0
      continue;
305
0
    }
306
    // some byte requires quotes
307
    // Not including UTF-8 here because it can have chars that look like space
308
    // or quotes
309
0
    return 0;
310
0
  }
311
0
  return 1;  // everything OK
312
0
}
Unexecuted instantiation: data_lang.cc:_ZL13CanOmitQuotesPhi
Unexecuted instantiation: j8_libc.c:_ZL13CanOmitQuotesPhi
313
314
#endif  // DATA_LANG_J8_H