| 1 | #!/usr/bin/env python2
|
| 2 | """Builtin_printf.py."""
|
| 3 | from __future__ import print_function
|
| 4 |
|
| 5 | import time as time_ # avoid name conflict
|
| 6 |
|
| 7 | from _devbuild.gen import arg_types
|
| 8 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_t
|
| 9 | from _devbuild.gen.runtime_asdl import cmd_value
|
| 10 | from _devbuild.gen.syntax_asdl import (
|
| 11 | loc,
|
| 12 | loc_e,
|
| 13 | loc_t,
|
| 14 | source,
|
| 15 | Token,
|
| 16 | CompoundWord,
|
| 17 | printf_part,
|
| 18 | printf_part_e,
|
| 19 | printf_part_t,
|
| 20 | )
|
| 21 | from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t
|
| 22 | from _devbuild.gen.value_asdl import (value, value_e)
|
| 23 |
|
| 24 | from core import alloc
|
| 25 | from core import error
|
| 26 | from core.error import e_die, p_die
|
| 27 | from core import state
|
| 28 | from core import vm
|
| 29 | from frontend import flag_util
|
| 30 | from frontend import consts
|
| 31 | from frontend import lexer
|
| 32 | from frontend import match
|
| 33 | from frontend import reader
|
| 34 | from mycpp import mops
|
| 35 | from mycpp import mylib
|
| 36 | from mycpp.mylib import log
|
| 37 | from osh import sh_expr_eval
|
| 38 | from osh import string_ops
|
| 39 | from osh import word_compile
|
| 40 | from data_lang import j8_lite
|
| 41 |
|
| 42 | import posix_ as posix
|
| 43 |
|
| 44 | from typing import Dict, List, Optional, TYPE_CHECKING, cast
|
| 45 |
|
| 46 | if TYPE_CHECKING:
|
| 47 | from display import ui
|
| 48 | from frontend import parse_lib
|
| 49 |
|
| 50 | _ = log
|
| 51 |
|
| 52 |
|
| 53 | class _FormatStringParser(object):
|
| 54 | """
|
| 55 | Grammar:
|
| 56 |
|
| 57 | width = Num | Star
|
| 58 | precision = Dot (Num | Star | Zero)?
|
| 59 | fmt = Percent (Flag | Zero)* width? precision? (Type | Time)
|
| 60 | part = Char_* | Format_EscapedPercent | fmt
|
| 61 | printf_format = part* Eof_Real # we're using the main lexer
|
| 62 |
|
| 63 | Maybe: bash also supports %(strftime)T
|
| 64 | """
|
| 65 |
|
| 66 | def __init__(self, lexer):
|
| 67 | # type: (lexer.Lexer) -> None
|
| 68 | self.lexer = lexer
|
| 69 |
|
| 70 | # uninitialized values
|
| 71 | self.cur_token = None # type: Token
|
| 72 | self.token_type = Id.Undefined_Tok # type: Id_t
|
| 73 | self.token_kind = Kind.Undefined # type: Kind_t
|
| 74 |
|
| 75 | def _Next(self, lex_mode):
|
| 76 | # type: (lex_mode_t) -> None
|
| 77 | """Advance a token."""
|
| 78 | self.cur_token = self.lexer.Read(lex_mode)
|
| 79 | self.token_type = self.cur_token.id
|
| 80 | self.token_kind = consts.GetKind(self.token_type)
|
| 81 |
|
| 82 | def _ParseFormatStr(self):
|
| 83 | # type: () -> printf_part_t
|
| 84 | """fmt = ..."""
|
| 85 | self._Next(lex_mode_e.PrintfPercent) # move past %
|
| 86 |
|
| 87 | part = printf_part.Percent.CreateNull(alloc_lists=True)
|
| 88 | while self.token_type in (Id.Format_Flag, Id.Format_Zero):
|
| 89 | # space and + could be implemented
|
| 90 | flag = lexer.TokenVal(self.cur_token) # allocation will be cached
|
| 91 | if flag in '# +':
|
| 92 | p_die("osh printf doesn't support the %r flag" % flag,
|
| 93 | self.cur_token)
|
| 94 |
|
| 95 | part.flags.append(self.cur_token)
|
| 96 | self._Next(lex_mode_e.PrintfPercent)
|
| 97 |
|
| 98 | if self.token_type in (Id.Format_Num, Id.Format_Star):
|
| 99 | part.width = self.cur_token
|
| 100 | self._Next(lex_mode_e.PrintfPercent)
|
| 101 |
|
| 102 | if self.token_type == Id.Format_Dot:
|
| 103 | part.precision = self.cur_token
|
| 104 | self._Next(lex_mode_e.PrintfPercent) # past dot
|
| 105 | if self.token_type in (Id.Format_Num, Id.Format_Star,
|
| 106 | Id.Format_Zero):
|
| 107 | part.precision = self.cur_token
|
| 108 | self._Next(lex_mode_e.PrintfPercent)
|
| 109 |
|
| 110 | if self.token_type in (Id.Format_Type, Id.Format_Time):
|
| 111 | part.type = self.cur_token
|
| 112 |
|
| 113 | # ADDITIONAL VALIDATION outside the "grammar".
|
| 114 | type_val = lexer.TokenVal(part.type) # allocation will be cached
|
| 115 | if type_val in 'eEfFgG':
|
| 116 | p_die("osh printf doesn't support floating point", part.type)
|
| 117 | # These two could be implemented. %c needs utf-8 decoding.
|
| 118 | if type_val == 'c':
|
| 119 | p_die("osh printf doesn't support single characters (bytes)",
|
| 120 | part.type)
|
| 121 |
|
| 122 | elif self.token_type == Id.Unknown_Tok:
|
| 123 | p_die('Invalid printf format character', self.cur_token)
|
| 124 |
|
| 125 | else:
|
| 126 | p_die('Expected a printf format character', self.cur_token)
|
| 127 |
|
| 128 | return part
|
| 129 |
|
| 130 | def Parse(self):
|
| 131 | # type: () -> List[printf_part_t]
|
| 132 | self._Next(lex_mode_e.PrintfOuter)
|
| 133 | parts = [] # type: List[printf_part_t]
|
| 134 | while True:
|
| 135 | if (self.token_kind in (Kind.Lit, Kind.Char) or self.token_type
|
| 136 | in (Id.Format_EscapedPercent, Id.Unknown_Backslash)):
|
| 137 |
|
| 138 | # Note: like in echo -e, we don't fail with Unknown_Backslash here
|
| 139 | # when shopt -u parse_backslash because it's at runtime rather than
|
| 140 | # parse time.
|
| 141 | # Users should use $'' or the future static printf ${x %.3f}.
|
| 142 |
|
| 143 | parts.append(self.cur_token)
|
| 144 |
|
| 145 | elif self.token_type == Id.Format_Percent:
|
| 146 | parts.append(self._ParseFormatStr())
|
| 147 |
|
| 148 | elif self.token_type in (Id.Eof_Real, Id.Eol_Tok):
|
| 149 | # Id.Eol_Tok: special case for format string of '\x00'.
|
| 150 | break
|
| 151 |
|
| 152 | else:
|
| 153 | raise AssertionError(Id_str(self.token_type))
|
| 154 |
|
| 155 | self._Next(lex_mode_e.PrintfOuter)
|
| 156 |
|
| 157 | return parts
|
| 158 |
|
| 159 |
|
| 160 | class _PrintfState(object):
|
| 161 |
|
| 162 | def __init__(self):
|
| 163 | # type: () -> None
|
| 164 | self.arg_index = 0
|
| 165 | self.backslash_c = False
|
| 166 | self.status = 0 # set to 1 before returning
|
| 167 |
|
| 168 |
|
| 169 | class Printf(vm._Builtin):
|
| 170 |
|
| 171 | def __init__(
|
| 172 | self,
|
| 173 | mem, # type: state.Mem
|
| 174 | parse_ctx, # type: parse_lib.ParseContext
|
| 175 | unsafe_arith, # type: sh_expr_eval.UnsafeArith
|
| 176 | errfmt, # type: ui.ErrorFormatter
|
| 177 | ):
|
| 178 | # type: (...) -> None
|
| 179 | self.mem = mem
|
| 180 | self.parse_ctx = parse_ctx
|
| 181 | self.unsafe_arith = unsafe_arith
|
| 182 | self.errfmt = errfmt
|
| 183 | self.parse_cache = {} # type: Dict[str, List[printf_part_t]]
|
| 184 |
|
| 185 | # this object initialized in main()
|
| 186 | self.shell_start_time = time_.time()
|
| 187 |
|
| 188 | def _Percent(self, pr, part, varargs, locs):
|
| 189 | # type: (_PrintfState, printf_part.Percent, List[str], List[CompoundWord]) -> Optional[str]
|
| 190 |
|
| 191 | num_args = len(varargs)
|
| 192 |
|
| 193 | # TODO: Cache this?
|
| 194 | flags = [] # type: List[str]
|
| 195 | if len(part.flags) > 0:
|
| 196 | for flag_token in part.flags:
|
| 197 | flags.append(lexer.TokenVal(flag_token))
|
| 198 |
|
| 199 | width = -1 # nonexistent
|
| 200 | if part.width:
|
| 201 | if part.width.id in (Id.Format_Num, Id.Format_Zero):
|
| 202 | width_str = lexer.TokenVal(part.width)
|
| 203 | width_loc = part.width # type: loc_t
|
| 204 | elif part.width.id == Id.Format_Star: # depends on data
|
| 205 | if pr.arg_index < num_args:
|
| 206 | width_str = varargs[pr.arg_index]
|
| 207 | width_loc = locs[pr.arg_index]
|
| 208 | pr.arg_index += 1
|
| 209 | else:
|
| 210 | width_str = '' # invalid
|
| 211 | width_loc = loc.Missing
|
| 212 | else:
|
| 213 | raise AssertionError()
|
| 214 |
|
| 215 | try:
|
| 216 | width = int(width_str)
|
| 217 | except ValueError:
|
| 218 | if width_loc.tag() == loc_e.Missing:
|
| 219 | width_loc = part.width
|
| 220 | self.errfmt.Print_("printf got invalid width %r" % width_str,
|
| 221 | blame_loc=width_loc)
|
| 222 | pr.status = 1
|
| 223 | return None
|
| 224 |
|
| 225 | precision = -1 # nonexistent
|
| 226 | if part.precision:
|
| 227 | if part.precision.id == Id.Format_Dot:
|
| 228 | precision_str = '0'
|
| 229 | precision_loc = part.precision # type: loc_t
|
| 230 | elif part.precision.id in (Id.Format_Num, Id.Format_Zero):
|
| 231 | precision_str = lexer.TokenVal(part.precision)
|
| 232 | precision_loc = part.precision
|
| 233 | elif part.precision.id == Id.Format_Star:
|
| 234 | if pr.arg_index < num_args:
|
| 235 | precision_str = varargs[pr.arg_index]
|
| 236 | precision_loc = locs[pr.arg_index]
|
| 237 | pr.arg_index += 1
|
| 238 | else:
|
| 239 | precision_str = ''
|
| 240 | precision_loc = loc.Missing
|
| 241 | else:
|
| 242 | raise AssertionError()
|
| 243 |
|
| 244 | try:
|
| 245 | precision = int(precision_str)
|
| 246 | except ValueError:
|
| 247 | if precision_loc.tag() == loc_e.Missing:
|
| 248 | precision_loc = part.precision
|
| 249 | self.errfmt.Print_('printf got invalid precision %r' %
|
| 250 | precision_str,
|
| 251 | blame_loc=precision_loc)
|
| 252 | pr.status = 1
|
| 253 | return None
|
| 254 |
|
| 255 | if pr.arg_index < num_args:
|
| 256 | s = varargs[pr.arg_index]
|
| 257 | word_loc = locs[pr.arg_index] # type: loc_t
|
| 258 | pr.arg_index += 1
|
| 259 | has_arg = True
|
| 260 | else:
|
| 261 | s = ''
|
| 262 | word_loc = loc.Missing
|
| 263 | has_arg = False
|
| 264 |
|
| 265 | # Note: %s could be lexed into Id.Percent_S. Although small string
|
| 266 | # optimization would remove the allocation as well.
|
| 267 | typ = lexer.TokenVal(part.type)
|
| 268 | if typ == 's':
|
| 269 | if precision >= 0:
|
| 270 | s = s[:precision] # truncate
|
| 271 |
|
| 272 | elif typ == 'q':
|
| 273 | # Most shells give \' for single quote, while OSH gives
|
| 274 | # $'\'' this could matter when SSH'ing.
|
| 275 | # Ditto for $'\\' vs. '\'
|
| 276 |
|
| 277 | s = j8_lite.MaybeShellEncode(s)
|
| 278 |
|
| 279 | elif typ == 'b':
|
| 280 | # Process just like echo -e, except \c handling is simpler.
|
| 281 |
|
| 282 | c_parts = [] # type: List[str]
|
| 283 | lex = match.EchoLexer(s)
|
| 284 | while True:
|
| 285 | id_, tok_val = lex.Next()
|
| 286 | if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator
|
| 287 | break
|
| 288 |
|
| 289 | p = word_compile.EvalCStringToken(id_, tok_val)
|
| 290 |
|
| 291 | # Unusual behavior: '\c' aborts processing!
|
| 292 | if p is None:
|
| 293 | pr.backslash_c = True
|
| 294 | break
|
| 295 |
|
| 296 | c_parts.append(p)
|
| 297 | s = ''.join(c_parts)
|
| 298 |
|
| 299 | elif part.type.id == Id.Format_Time or typ in 'diouxX':
|
| 300 | # %(...)T and %d share this complex integer conversion logic
|
| 301 |
|
| 302 | if match.LooksLikeInteger(s):
|
| 303 | # Note: spaces like ' -42 ' accepted and normalized
|
| 304 | d = mops.FromStr(s)
|
| 305 |
|
| 306 | else:
|
| 307 | # Check for 'a and "a
|
| 308 | # These are interpreted as the numeric ASCII value of 'a'
|
| 309 | num_bytes = len(s)
|
| 310 | if num_bytes > 0 and s[0] in '\'"':
|
| 311 | if num_bytes == 1:
|
| 312 | # NUL after quote
|
| 313 | d = mops.ZERO
|
| 314 | elif num_bytes == 2:
|
| 315 | # Allow invalid UTF-8, because all shells do
|
| 316 | d = mops.IntWiden(ord(s[1]))
|
| 317 | else:
|
| 318 | try:
|
| 319 | small_i = string_ops.DecodeUtf8Char(s, 1)
|
| 320 | except error.Expr as e:
|
| 321 | # Take the numeric value of first char, ignoring
|
| 322 | # the rest of the bytes.
|
| 323 | # Something like strict_arith or strict_printf
|
| 324 | # could throw an error in this case.
|
| 325 | self.errfmt.Print_(
|
| 326 | 'Warning: %s' % e.UserErrorString(), word_loc)
|
| 327 | small_i = ord(s[1])
|
| 328 |
|
| 329 | d = mops.IntWiden(small_i)
|
| 330 |
|
| 331 | # No argument means -1 for %(...)T as in Bash Reference Manual
|
| 332 | # 4.2 - "If no argument is specified, conversion behaves as if
|
| 333 | # -1 had been given."
|
| 334 | elif not has_arg and part.type.id == Id.Format_Time:
|
| 335 | d = mops.MINUS_ONE
|
| 336 |
|
| 337 | else:
|
| 338 | if has_arg:
|
| 339 | blame_loc = word_loc # type: loc_t
|
| 340 | else:
|
| 341 | blame_loc = part.type
|
| 342 | self.errfmt.Print_(
|
| 343 | 'printf expected an integer, got %r' % s, blame_loc)
|
| 344 | pr.status = 1
|
| 345 | return None
|
| 346 |
|
| 347 | if part.type.id == Id.Format_Time:
|
| 348 | # Initialize timezone:
|
| 349 | # `localtime' uses the current timezone information initialized
|
| 350 | # by `tzset'. The function `tzset' refers to the environment
|
| 351 | # variable `TZ'. When the exported variable `TZ' is present,
|
| 352 | # its value should be reflected in the real environment
|
| 353 | # variable `TZ' before call of `tzset'.
|
| 354 | #
|
| 355 | # Note: unlike LANG, TZ doesn't seem to change behavior if it's
|
| 356 | # not exported.
|
| 357 | #
|
| 358 | # TODO: In YSH, provide an API that doesn't rely on libc's global
|
| 359 | # state.
|
| 360 |
|
| 361 | tzcell = self.mem.GetCell('TZ')
|
| 362 | if (tzcell and tzcell.exported and
|
| 363 | tzcell.val.tag() == value_e.Str):
|
| 364 | tzval = cast(value.Str, tzcell.val)
|
| 365 | posix.putenv('TZ', tzval.s)
|
| 366 |
|
| 367 | time_.tzset()
|
| 368 |
|
| 369 | # Handle special values:
|
| 370 | # User can specify two special values -1 and -2 as in Bash
|
| 371 | # Reference Manual 4.2: "Two special argument values may be
|
| 372 | # used: -1 represents the current time, and -2 represents the
|
| 373 | # time the shell was invoked." from
|
| 374 | # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf
|
| 375 | if mops.Equal(d, mops.MINUS_ONE): # -1 is current time
|
| 376 | # TODO: 2038 problem
|
| 377 | ts = time_.time()
|
| 378 | elif mops.Equal(d, mops.MINUS_TWO): # -2 is shell start time
|
| 379 | ts = self.shell_start_time
|
| 380 | else:
|
| 381 | ts = mops.BigTruncate(d)
|
| 382 |
|
| 383 | s = time_.strftime(typ[1:-2], time_.localtime(ts))
|
| 384 | if precision >= 0:
|
| 385 | s = s[:precision] # truncate
|
| 386 |
|
| 387 | else: # typ in 'diouxX'
|
| 388 | # Disallowed because it depends on 32- or 64- bit
|
| 389 | if mops.Greater(mops.ZERO, d) and typ in 'ouxX':
|
| 390 | # TODO: Don't truncate it
|
| 391 | e_die(
|
| 392 | "Can't format negative number with %%%s: %d" %
|
| 393 | (typ, mops.BigTruncate(d)), part.type)
|
| 394 |
|
| 395 | if typ == 'o':
|
| 396 | s = mops.ToOctal(d)
|
| 397 | elif typ == 'x':
|
| 398 | s = mops.ToHexLower(d)
|
| 399 | elif typ == 'X':
|
| 400 | s = mops.ToHexUpper(d)
|
| 401 | else: # diu
|
| 402 | s = mops.ToStr(d) # without spaces like ' -42 '
|
| 403 |
|
| 404 | # There are TWO different ways to ZERO PAD, and they differ on
|
| 405 | # the negative sign! See spec/builtin-printf
|
| 406 |
|
| 407 | zero_pad = 0 # no zero padding
|
| 408 | if width >= 0 and '0' in flags:
|
| 409 | zero_pad = 1 # style 1
|
| 410 | elif precision > 0 and len(s) < precision:
|
| 411 | zero_pad = 2 # style 2
|
| 412 |
|
| 413 | if zero_pad:
|
| 414 | negative = (s[0] == '-')
|
| 415 | if negative:
|
| 416 | digits = s[1:]
|
| 417 | sign = '-'
|
| 418 | if zero_pad == 1:
|
| 419 | # [%06d] -42 becomes [-00042] (6 TOTAL)
|
| 420 | n = width - 1
|
| 421 | else:
|
| 422 | # [%6.6d] -42 becomes [-000042] (1 for '-' + 6)
|
| 423 | n = precision
|
| 424 | else:
|
| 425 | digits = s
|
| 426 | sign = ''
|
| 427 | if zero_pad == 1:
|
| 428 | n = width
|
| 429 | else:
|
| 430 | n = precision
|
| 431 | s = sign + digits.rjust(n, '0')
|
| 432 |
|
| 433 | else:
|
| 434 | raise AssertionError()
|
| 435 |
|
| 436 | if width >= 0:
|
| 437 | if '-' in flags:
|
| 438 | s = s.ljust(width, ' ')
|
| 439 | else:
|
| 440 | s = s.rjust(width, ' ')
|
| 441 | return s
|
| 442 |
|
| 443 | def _Format(self, parts, varargs, locs, out):
|
| 444 | # type: (List[printf_part_t], List[str], List[CompoundWord], List[str]) -> int
|
| 445 | """Hairy printf formatting logic."""
|
| 446 |
|
| 447 | pr = _PrintfState()
|
| 448 | num_args = len(varargs)
|
| 449 |
|
| 450 | while True: # loop over arguments
|
| 451 | for part in parts: # loop over parsed format string
|
| 452 | UP_part = part
|
| 453 | if part.tag() == printf_part_e.Literal:
|
| 454 | part = cast(Token, UP_part)
|
| 455 | if part.id == Id.Format_EscapedPercent:
|
| 456 | s = '%'
|
| 457 | else:
|
| 458 | s = word_compile.EvalCStringToken(
|
| 459 | part.id, lexer.LazyStr(part))
|
| 460 |
|
| 461 | elif part.tag() == printf_part_e.Percent:
|
| 462 | part = cast(printf_part.Percent, UP_part)
|
| 463 |
|
| 464 | s = self._Percent(pr, part, varargs, locs)
|
| 465 | if pr.status != 0:
|
| 466 | return pr.status
|
| 467 |
|
| 468 | else:
|
| 469 | raise AssertionError()
|
| 470 |
|
| 471 | out.append(s)
|
| 472 |
|
| 473 | if pr.backslash_c: # 'printf %b a\cb xx' - \c terminates processing!
|
| 474 | break
|
| 475 |
|
| 476 | if pr.arg_index == 0:
|
| 477 | # We went through ALL parts and didn't consume ANY arg.
|
| 478 | # Example: print x y
|
| 479 | break
|
| 480 | if pr.arg_index >= num_args:
|
| 481 | # We printed all args
|
| 482 | break
|
| 483 | # If there are more args, keep going. This implement 'arg recycling'
|
| 484 | # behavior
|
| 485 | # printf '%s ' 1 2 3 => 1 2 3
|
| 486 |
|
| 487 | return 0
|
| 488 |
|
| 489 | def Run(self, cmd_val):
|
| 490 | # type: (cmd_value.Argv) -> int
|
| 491 | """
|
| 492 | printf: printf [-v var] format [argument ...]
|
| 493 | """
|
| 494 | attrs, arg_r = flag_util.ParseCmdVal('printf', cmd_val)
|
| 495 | arg = arg_types.printf(attrs.attrs)
|
| 496 |
|
| 497 | fmt, fmt_loc = arg_r.ReadRequired2('requires a format string')
|
| 498 | varargs, locs = arg_r.Rest2()
|
| 499 |
|
| 500 | #log('fmt %s', fmt)
|
| 501 | #log('vals %s', vals)
|
| 502 |
|
| 503 | arena = self.parse_ctx.arena
|
| 504 | if fmt in self.parse_cache:
|
| 505 | parts = self.parse_cache[fmt]
|
| 506 | else:
|
| 507 | line_reader = reader.StringLineReader(fmt, arena)
|
| 508 | # TODO: Make public
|
| 509 | lexer = self.parse_ctx.MakeLexer(line_reader)
|
| 510 | parser = _FormatStringParser(lexer)
|
| 511 |
|
| 512 | with alloc.ctx_SourceCode(arena,
|
| 513 | source.ArgvWord('printf', fmt_loc)):
|
| 514 | try:
|
| 515 | parts = parser.Parse()
|
| 516 | except error.Parse as e:
|
| 517 | self.errfmt.PrettyPrintError(e)
|
| 518 | return 2 # parse error
|
| 519 |
|
| 520 | self.parse_cache[fmt] = parts
|
| 521 |
|
| 522 | if 0:
|
| 523 | print()
|
| 524 | for part in parts:
|
| 525 | part.PrettyPrint()
|
| 526 | print()
|
| 527 |
|
| 528 | out = [] # type: List[str]
|
| 529 | status = self._Format(parts, varargs, locs, out)
|
| 530 | if status != 0:
|
| 531 | return status # failure
|
| 532 |
|
| 533 | result = ''.join(out)
|
| 534 | if arg.v is not None:
|
| 535 | # TODO: get the location for arg.v!
|
| 536 | v_loc = loc.Missing
|
| 537 | lval = self.unsafe_arith.ParseLValue(arg.v, v_loc)
|
| 538 | state.BuiltinSetValue(self.mem, lval, value.Str(result))
|
| 539 | else:
|
| 540 | mylib.Stdout().write(result)
|
| 541 | return 0
|