1 | """
|
2 | string_ops.py - String library functions that can be exposed with a saner syntax.
|
3 |
|
4 | OSH:
|
5 |
|
6 | local y=${x//a*/b}
|
7 |
|
8 | YSH:
|
9 |
|
10 | var y = x => sub('a*', 'b', :ALL)
|
11 |
|
12 | Pass x => sub('a*', 'b', :ALL) => var y
|
13 | """
|
14 |
|
15 | from _devbuild.gen.id_kind_asdl import Id
|
16 | from _devbuild.gen.syntax_asdl import loc, Token, suffix_op
|
17 | from core import pyutil
|
18 | from core import ui
|
19 | from core import error
|
20 | from core.error import e_die, e_strict
|
21 | from mycpp.mylib import log
|
22 | from osh import glob_
|
23 |
|
24 | import libc
|
25 |
|
26 | from typing import List, Tuple
|
27 |
|
28 | _ = log
|
29 |
|
30 | # TODO: Add details of the invalid character/byte here?
|
31 |
|
32 | INCOMPLETE_CHAR = 'Incomplete UTF-8 character'
|
33 | INVALID_CONT = 'Invalid UTF-8 continuation byte'
|
34 | INVALID_START = 'Invalid start of UTF-8 character'
|
35 |
|
36 |
|
37 | def _CheckContinuationByte(byte):
|
38 | # type: (str) -> None
|
39 | if (ord(byte) >> 6) != 0b10:
|
40 | e_strict(INVALID_CONT, loc.Missing)
|
41 |
|
42 |
|
43 | def _Utf8CharLen(starting_byte):
|
44 | # type: (int) -> int
|
45 | if (starting_byte >> 7) == 0b0:
|
46 | return 1
|
47 | elif (starting_byte >> 5) == 0b110:
|
48 | return 2
|
49 | elif (starting_byte >> 4) == 0b1110:
|
50 | return 3
|
51 | elif (starting_byte >> 3) == 0b11110:
|
52 | return 4
|
53 | else:
|
54 | e_strict(INVALID_START, loc.Missing)
|
55 |
|
56 |
|
57 | def _ReadOneUnit(s, cursor):
|
58 | # type: (str, int) -> Tuple[int, int]
|
59 | """Helper for DecodeUtf8Char"""
|
60 | if cursor >= len(s):
|
61 | raise error.Expr(INCOMPLETE_CHAR, loc.Missing)
|
62 |
|
63 | b = ord(s[cursor])
|
64 | cursor += 1
|
65 |
|
66 | if b & 0b11000000 != 0b10000000:
|
67 | raise error.Expr(INVALID_CONT, loc.Missing)
|
68 |
|
69 | return b & 0b00111111, cursor
|
70 |
|
71 |
|
72 | def DecodeUtf8Char(s, start):
|
73 | # type: (str, int) -> int
|
74 | """Given a string and start index, decode the Unicode char immediately
|
75 | following the start index. The start location is in bytes and should be
|
76 | found using a function like NextUtf8Char or PreviousUtf8Char.
|
77 |
|
78 | If the codepoint in invalid, we raise an `error.Expr`. (This is different
|
79 | from {Next,Previous}Utf8Char which raises an `error.Strict` on encoding
|
80 | errors.)
|
81 |
|
82 | Known Issues:
|
83 | - Doesn't raise issue on surrogate pairs
|
84 | - Doesn't raise issue on non-shortest form encodings
|
85 | - Isn't very performant and allocates one-byte-strings for each byte
|
86 | """
|
87 | # We use table 3.6 (reproduced below) from [0]. Note that table 3.6 is not
|
88 | # sufficient for validating UTF-8 as it allows surrogate pairs and
|
89 | # non-shortest form encodings. A correct decoder should follow the
|
90 | # encodings in table 3.7 from [0].
|
91 | #
|
92 | # | Scalar Value | 1st Byte | 2nd Byte | 3rd Byte | 4th Byte |
|
93 | # +----------------------------+----------+----------+----------+----------+
|
94 | # | 00000000 0xxxxxxx | 0xxxxxxx | | | |
|
95 | # | 00000yyy yyxxxxxx | 110yyyyy | 10xxxxxx | | |
|
96 | # | zzzzyyyy yyxxxxxx | 1110zzzz | 10yyyyyy | 10xxxxxx | |
|
97 | # | 000uuuuu zzzzyyyy yyxxxxxx | 11110uuu | 10uuzzzz | 10yyyyyy | 10xxxxxx |
|
98 | #
|
99 | # [0] https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf
|
100 | assert 0 <= start < len(s)
|
101 |
|
102 | b = ord(s[start])
|
103 | cursor = start + 1
|
104 |
|
105 | if b & 0b10000000 == 0:
|
106 | return b & 0b01111111
|
107 |
|
108 | if b & 0b11100000 == 0b11000000:
|
109 | y = b & 0b00011111
|
110 | y <<= 6
|
111 |
|
112 | x, cursor = _ReadOneUnit(s, cursor)
|
113 |
|
114 | return y | x
|
115 |
|
116 | if b & 0b11110000 == 0b11100000:
|
117 | z = b & 0b00001111
|
118 | z <<= 12
|
119 |
|
120 | y, cursor = _ReadOneUnit(s, cursor)
|
121 | y <<= 6
|
122 |
|
123 | x, cursor = _ReadOneUnit(s, cursor)
|
124 |
|
125 | return z | x | y
|
126 |
|
127 | if b & 0b11111000 == 0b11110000:
|
128 | u = b & 0b00000111
|
129 | u <<= 18
|
130 |
|
131 | z, cursor = _ReadOneUnit(s, cursor)
|
132 | z <<= 12
|
133 |
|
134 | y, cursor = _ReadOneUnit(s, cursor)
|
135 | y <<= 6
|
136 |
|
137 | x, cursor = _ReadOneUnit(s, cursor)
|
138 |
|
139 | return u | z | x | y
|
140 |
|
141 | raise error.Expr(INVALID_START, loc.Missing)
|
142 |
|
143 |
|
144 | def NextUtf8Char(s, i):
|
145 | # type: (str, int) -> int
|
146 | """Given a string and a byte offset, returns the byte position after the
|
147 | character at this position. Usually this is the position of the next
|
148 | character, but for the last character in the string, it's the position just
|
149 | past the end of the string.
|
150 |
|
151 | Validates UTF-8.
|
152 | """
|
153 | n = len(s)
|
154 | assert i < n, i # should always be in range
|
155 | byte_as_int = ord(s[i])
|
156 | length = _Utf8CharLen(byte_as_int)
|
157 | for j in xrange(i + 1, i + length):
|
158 | if j >= n:
|
159 | e_strict(INCOMPLETE_CHAR, loc.Missing)
|
160 | _CheckContinuationByte(s[j])
|
161 |
|
162 | return i + length
|
163 |
|
164 |
|
165 | def PreviousUtf8Char(s, i):
|
166 | # type: (str, int) -> int
|
167 | """Given a string and a byte offset, returns the position of the character
|
168 | before that offset. To start (find the first byte of the last character),
|
169 | pass len(s) for the initial value of i.
|
170 |
|
171 | Validates UTF-8.
|
172 | """
|
173 | # All bytes in a valid UTF-8 string have one of the following formats:
|
174 | #
|
175 | # 0xxxxxxx (1-byte char)
|
176 | # 110xxxxx (start of 2-byte char)
|
177 | # 1110xxxx (start of 3-byte char)
|
178 | # 11110xxx (start of 4-byte char)
|
179 | # 10xxxxxx (continuation byte)
|
180 | #
|
181 | # Any byte that starts with 10... MUST be a continuation byte,
|
182 | # otherwise it must be the start of a character (or just invalid
|
183 | # data).
|
184 | #
|
185 | # Walking backward, we stop at the first non-continuaton byte
|
186 | # found. We try to interpret it as a valid UTF-8 character starting
|
187 | # byte, and check that it indicates the correct length, based on how
|
188 | # far we've moved from the original byte. Possible problems:
|
189 | # * byte we stopped on does not have a valid value (e.g., 11111111)
|
190 | # * start byte indicates more or fewer continuation bytes than we've seen
|
191 | # * no start byte at beginning of array
|
192 | #
|
193 | # Note that because we are going backward, on malformed input, we
|
194 | # won't error out in the same place as when parsing the string
|
195 | # forwards as normal.
|
196 | orig_i = i
|
197 |
|
198 | while i > 0:
|
199 | i -= 1
|
200 | byte_as_int = ord(s[i])
|
201 | if (byte_as_int >> 6) != 0b10:
|
202 | offset = orig_i - i
|
203 | if offset != _Utf8CharLen(byte_as_int):
|
204 | # Leaving a generic error for now, but if we want to, it's not
|
205 | # hard to calculate the position where things go wrong. Note
|
206 | # that offset might be more than 4, for an invalid utf-8 string.
|
207 | e_strict(INVALID_START, loc.Missing)
|
208 | return i
|
209 |
|
210 | e_strict(INVALID_START, loc.Missing)
|
211 |
|
212 |
|
213 | def CountUtf8Chars(s):
|
214 | # type: (str) -> int
|
215 | """Returns the number of utf-8 characters in the byte string 's'.
|
216 |
|
217 | TODO: Raise exception rather than returning a string, so we can set the exit
|
218 | code of the command to 1 ?
|
219 |
|
220 | $ echo ${#bad}
|
221 | Invalid utf-8 at index 3 of string 'bad': 'ab\xffd'
|
222 | $ echo $?
|
223 | 1
|
224 | """
|
225 | num_chars = 0
|
226 | num_bytes = len(s)
|
227 | i = 0
|
228 | while i < num_bytes:
|
229 | i = NextUtf8Char(s, i)
|
230 | num_chars += 1
|
231 | return num_chars
|
232 |
|
233 |
|
234 | def AdvanceUtf8Chars(s, num_chars, byte_offset):
|
235 | # type: (str, int, int) -> int
|
236 | """Starting from byte offset, advance by N UTF-8 runes
|
237 |
|
238 | Returns a byte offset.
|
239 |
|
240 | Used for shell slicing.
|
241 | """
|
242 | num_bytes = len(s)
|
243 | i = byte_offset # current byte position
|
244 |
|
245 | for _ in xrange(num_chars):
|
246 | # Neither bash or zsh checks out of bounds for slicing. Either begin or
|
247 | # length.
|
248 | if i >= num_bytes:
|
249 | return i
|
250 | #raise RuntimeError('Out of bounds')
|
251 |
|
252 | i = NextUtf8Char(s, i)
|
253 |
|
254 | return i
|
255 |
|
256 |
|
257 | # Limited Unicode codepoints for whitespace characters.
|
258 | # Oils intentionally does not include characters from <USP>, as that set
|
259 | # depends on the version of the Unicode standard used.
|
260 | #
|
261 | # See discussion on the original pull request which added this list here:
|
262 | #
|
263 | # https://github.com/oilshell/oil/pull/1836#issuecomment-1942173520
|
264 | #
|
265 | # See also the Mozilla Javascript documentation, and the note on how
|
266 | # changes to the standard affected Javascript:
|
267 | #
|
268 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space
|
269 |
|
270 | SPACES = [
|
271 | 0x0009, # Horizontal tab (\t)
|
272 | 0x000A, # Newline (\n)
|
273 | 0x000B, # Vertical tab (\v)
|
274 | 0x000C, # Form feed (\f)
|
275 | 0x000D, # Carriage return (\r)
|
276 | 0x0020, # Normal space
|
277 | 0x00A0, # No-break space <NBSP>
|
278 | 0xFEFF, # Zero-width no-break space <ZWNBSP>
|
279 | ]
|
280 |
|
281 |
|
282 | def _IsSpace(codepoint):
|
283 | # type: (int) -> bool
|
284 | return codepoint in SPACES
|
285 |
|
286 |
|
287 | def StartsWithWhitespaceByteRange(s):
|
288 | # type: (str) -> Tuple[int, int]
|
289 | """Returns the range of 's' which has leading whitespace characters.
|
290 |
|
291 | If 's' has no leading whitespace, an valid but empty range is returned.
|
292 |
|
293 | The returned range is given as byte positions, and is a half-open range
|
294 | "[start, end)" which is returned as a tuple.
|
295 |
|
296 | Used for shell functions like 'trimStart' to match then trim whitespace.
|
297 | """
|
298 | len_s = len(s)
|
299 | i = 0
|
300 | while i < len_s:
|
301 | codepoint = DecodeUtf8Char(s, i)
|
302 | if not _IsSpace(codepoint):
|
303 | break
|
304 |
|
305 | try:
|
306 | i = NextUtf8Char(s, i)
|
307 | except error.Strict:
|
308 | assert False, "DecodeUtf8Char should have caught any encoding errors"
|
309 |
|
310 | start = 0
|
311 | end = i
|
312 | return (start, end)
|
313 |
|
314 |
|
315 | def EndsWithWhitespaceByteRange(s):
|
316 | # type: (str) -> Tuple[int, int]
|
317 | """Returns the range of 's' which has trailing whitespace characters.
|
318 |
|
319 | If 's' has no leading whitespace, an valid but empty range is returned.
|
320 |
|
321 | The returned range is given as byte positions, and is a half-open range
|
322 | "[start, end)" which is returned as a tuple.
|
323 |
|
324 | Used for shell functions like 'trimEnd' to match then trim whitespace.
|
325 | """
|
326 | len_s = len(s)
|
327 | i = len_s
|
328 | while i > 0:
|
329 | # TODO: Gracefully handle surrogate pairs and overlong encodings when
|
330 | # finding the start of each character.
|
331 | prev = PreviousUtf8Char(s, i)
|
332 |
|
333 | codepoint = DecodeUtf8Char(s, prev)
|
334 | if not _IsSpace(codepoint):
|
335 | break
|
336 |
|
337 | i = prev
|
338 |
|
339 | start = i
|
340 | end = len_s
|
341 | return (start, end)
|
342 |
|
343 |
|
344 | # Implementation without Python regex:
|
345 | #
|
346 | # (1) PatSub: I think we fill in GlobToExtendedRegex, then use regcomp and
|
347 | # regexec. in a loop. fnmatch() does NOT given positions of matches.
|
348 | #
|
349 | # (2) Strip -- % %% # ## -
|
350 | #
|
351 | # a. Fast path for constant strings.
|
352 | # b. Convert to POSIX extended regex, to see if it matches at ALL. If it
|
353 | # doesn't match, short circuit out? We can't do this with fnmatch.
|
354 | # c. If it does match, call fnmatch() iteratively over prefixes / suffixes.
|
355 | #
|
356 | # - # shortest prefix - [:1], [:2], [:3] until it matches
|
357 | # - ## longest prefix - [:-1] [:-2], [:3]. Works because fnmatch does not
|
358 | # match prefixes, it matches EXACTLY.
|
359 | # - % shortest suffix - [-1:] [-2:] [-3:] ...
|
360 | # - %% longest suffix - [1:] [2:] [3:]
|
361 | #
|
362 | # See remove_pattern() in subst.c for bash, and trimsub() in eval.c for
|
363 | # mksh. Dash doesn't implement it.
|
364 |
|
365 | # TODO:
|
366 | # - Unicode support: Convert both pattern, string, and replacement to unicode,
|
367 | # then the result back at the end.
|
368 | # - Compile time errors for [[:space:]] ?
|
369 |
|
370 |
|
371 | def DoUnarySuffixOp(s, op_tok, arg, is_extglob):
|
372 | # type: (str, Token, str, bool) -> str
|
373 | """Helper for ${x#prefix} and family."""
|
374 |
|
375 | id_ = op_tok.id
|
376 |
|
377 | # Fast path for constant strings.
|
378 | # TODO: Should be LooksLikeExtendedGlob!
|
379 | if not is_extglob and not glob_.LooksLikeGlob(arg):
|
380 | # It doesn't look like a glob, but we glob-escaped it (e.g. [ -> \[). So
|
381 | # reverse it. NOTE: We also do this check in Globber.Expand(). It would
|
382 | # be nice to somehow store the original string rather than
|
383 | # escaping/unescaping.
|
384 | arg = glob_.GlobUnescape(arg)
|
385 |
|
386 | if id_ in (Id.VOp1_Pound, Id.VOp1_DPound): # const prefix
|
387 | # explicit check for non-empty arg (len for mycpp)
|
388 | if len(arg) and s.startswith(arg):
|
389 | return s[len(arg):]
|
390 | else:
|
391 | return s
|
392 |
|
393 | elif id_ in (Id.VOp1_Percent, Id.VOp1_DPercent): # const suffix
|
394 | # need explicit check for non-empty arg (len for mycpp)
|
395 | if len(arg) and s.endswith(arg):
|
396 | return s[:-len(arg)]
|
397 | else:
|
398 | return s
|
399 |
|
400 | # These operators take glob arguments, we don't implement that obscure case.
|
401 | elif id_ == Id.VOp1_Comma: # Only lowercase the first letter
|
402 | if arg != '':
|
403 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
404 | if len(s):
|
405 | return s[0].lower() + s[1:]
|
406 | else:
|
407 | return s
|
408 |
|
409 | elif id_ == Id.VOp1_DComma:
|
410 | if arg != '':
|
411 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
412 | return s.lower()
|
413 |
|
414 | elif id_ == Id.VOp1_Caret: # Only uppercase the first letter
|
415 | if arg != '':
|
416 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
417 | if len(s):
|
418 | return s[0].upper() + s[1:]
|
419 | else:
|
420 | return s
|
421 |
|
422 | elif id_ == Id.VOp1_DCaret:
|
423 | if arg != '':
|
424 | e_die("%s can't have an argument" % ui.PrettyId(id_), op_tok)
|
425 | return s.upper()
|
426 |
|
427 | else: # e.g. ^ ^^ , ,,
|
428 | raise AssertionError(id_)
|
429 |
|
430 | # For patterns, do fnmatch() in a loop.
|
431 | #
|
432 | # TODO:
|
433 | # - Another potential fast path:
|
434 | # v=aabbccdd
|
435 | # echo ${v#*b} # strip shortest prefix
|
436 | #
|
437 | # If the whole thing doesn't match '*b*', then no test can succeed. So we
|
438 | # can fail early. Conversely echo ${v%%c*} and '*c*'.
|
439 | #
|
440 | # (Although honestly this whole construct is nuts and should be deprecated.)
|
441 |
|
442 | n = len(s)
|
443 |
|
444 | if id_ == Id.VOp1_Pound: # shortest prefix
|
445 | # 'abcd': match '', 'a', 'ab', 'abc', ...
|
446 | i = 0
|
447 | while True:
|
448 | assert i <= n
|
449 | #log('Matching pattern %r with %r', arg, s[:i])
|
450 | if libc.fnmatch(arg, s[:i]):
|
451 | return s[i:]
|
452 | if i >= n:
|
453 | break
|
454 | i = NextUtf8Char(s, i)
|
455 | return s
|
456 |
|
457 | elif id_ == Id.VOp1_DPound: # longest prefix
|
458 | # 'abcd': match 'abc', 'ab', 'a'
|
459 | i = n
|
460 | while True:
|
461 | assert i >= 0
|
462 | #log('Matching pattern %r with %r', arg, s[:i])
|
463 | if libc.fnmatch(arg, s[:i]):
|
464 | return s[i:]
|
465 | if i == 0:
|
466 | break
|
467 | i = PreviousUtf8Char(s, i)
|
468 | return s
|
469 |
|
470 | elif id_ == Id.VOp1_Percent: # shortest suffix
|
471 | # 'abcd': match 'abcd', 'abc', 'ab', 'a'
|
472 | i = n
|
473 | while True:
|
474 | assert i >= 0
|
475 | #log('Matching pattern %r with %r', arg, s[:i])
|
476 | if libc.fnmatch(arg, s[i:]):
|
477 | return s[:i]
|
478 | if i == 0:
|
479 | break
|
480 | i = PreviousUtf8Char(s, i)
|
481 | return s
|
482 |
|
483 | elif id_ == Id.VOp1_DPercent: # longest suffix
|
484 | # 'abcd': match 'abc', 'bc', 'c', ...
|
485 | i = 0
|
486 | while True:
|
487 | assert i <= n
|
488 | #log('Matching pattern %r with %r', arg, s[:i])
|
489 | if libc.fnmatch(arg, s[i:]):
|
490 | return s[:i]
|
491 | if i >= n:
|
492 | break
|
493 | i = NextUtf8Char(s, i)
|
494 | return s
|
495 |
|
496 | else:
|
497 | raise NotImplementedError(ui.PrettyId(id_))
|
498 |
|
499 |
|
500 | def _AllMatchPositions(s, regex):
|
501 | # type: (str, str) -> List[Tuple[int, int]]
|
502 | """Returns a list of all (start, end) match positions of the regex against
|
503 | s.
|
504 |
|
505 | (If there are no matches, it returns the empty list.)
|
506 | """
|
507 | matches = [] # type: List[Tuple[int, int]]
|
508 | pos = 0
|
509 | n = len(s)
|
510 | while pos < n: # needed to prevent infinite loop in (.*) case
|
511 | m = libc.regex_first_group_match(regex, s, pos)
|
512 | if m is None:
|
513 | break
|
514 | matches.append(m)
|
515 | start, end = m
|
516 | pos = end # advance position
|
517 | return matches
|
518 |
|
519 |
|
520 | def _PatSubAll(s, regex, replace_str):
|
521 | # type: (str, str, str) -> str
|
522 | parts = [] # type: List[str]
|
523 | prev_end = 0
|
524 | for start, end in _AllMatchPositions(s, regex):
|
525 | parts.append(s[prev_end:start])
|
526 | parts.append(replace_str)
|
527 | prev_end = end
|
528 | parts.append(s[prev_end:])
|
529 | return ''.join(parts)
|
530 |
|
531 |
|
532 | class GlobReplacer(object):
|
533 |
|
534 | def __init__(self, regex, replace_str, slash_tok):
|
535 | # type: (str, str, Token) -> None
|
536 |
|
537 | # TODO: It would be nice to cache the compilation of the regex here,
|
538 | # instead of just the string. That would require more sophisticated use of
|
539 | # the Python/C API in libc.c, which we might want to avoid.
|
540 | self.regex = regex
|
541 | self.replace_str = replace_str
|
542 | self.slash_tok = slash_tok
|
543 |
|
544 | def __repr__(self):
|
545 | # type: () -> str
|
546 | return '<_GlobReplacer regex %r r %r>' % (self.regex, self.replace_str)
|
547 |
|
548 | def Replace(self, s, op):
|
549 | # type: (str, suffix_op.PatSub) -> str
|
550 |
|
551 | regex = '(%s)' % self.regex # make it a group
|
552 |
|
553 | if op.replace_mode == Id.Lit_Slash:
|
554 | # Avoid infinite loop when replacing all copies of empty string
|
555 | if len(self.regex) == 0:
|
556 | return s
|
557 |
|
558 | try:
|
559 | return _PatSubAll(s, regex,
|
560 | self.replace_str) # loop over matches
|
561 | except RuntimeError as e:
|
562 | # Not sure if this is possible since we convert from glob:
|
563 | # libc.regex_first_group_match raises RuntimeError on regex syntax
|
564 | # error.
|
565 | msg = e.message # type: str
|
566 | e_die('Error matching regex %r: %s' % (regex, msg),
|
567 | self.slash_tok)
|
568 |
|
569 | if op.replace_mode == Id.Lit_Pound:
|
570 | regex = '^' + regex
|
571 | elif op.replace_mode == Id.Lit_Percent:
|
572 | regex = regex + '$'
|
573 |
|
574 | m = libc.regex_first_group_match(regex, s, 0)
|
575 | #log('regex = %r, s = %r, match = %r', regex, s, m)
|
576 | if m is None:
|
577 | return s
|
578 | start, end = m
|
579 | return s[:start] + self.replace_str + s[end:]
|
580 |
|
581 |
|
582 | def ShellQuoteB(s):
|
583 | # type: (str) -> str
|
584 | """Quote by adding backslashes.
|
585 |
|
586 | Used for autocompletion, so it's friendlier for display on the
|
587 | command line. We use the strategy above for other use cases.
|
588 | """
|
589 | # There's no way to escape a newline! Bash prints ^J for some reason, but
|
590 | # we're more explicit. This will happen if there's a newline on a file
|
591 | # system or a completion plugin returns a newline.
|
592 |
|
593 | # NOTE: tabs CAN be escaped with \.
|
594 | s = s.replace('\r', '<INVALID CR>').replace('\n', '<INVALID NEWLINE>')
|
595 |
|
596 | # ~ for home dir
|
597 | # ! for history
|
598 | # * [] ? for glob
|
599 | # {} for brace expansion
|
600 | # space because it separates words
|
601 | return pyutil.BackslashEscape(s, ' `~!$&*()[]{}\\|;\'"<>?')
|