| 1 | #!/usr/bin/env python2
|
| 2 | # Copyright 2019 Wilke Schwiedop. All rights reserved.
|
| 3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 | # you may not use this file except in compliance with the License.
|
| 5 | # You may obtain a copy of the License at
|
| 6 | #
|
| 7 | # http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
|
| 9 | from __future__ import print_function
|
| 10 |
|
| 11 | import argparse
|
| 12 | import collections
|
| 13 | import itertools
|
| 14 | import os
|
| 15 | # TODO docs.python.org suggests https://pypi.org/project/subprocess32/
|
| 16 | # for POSIX users
|
| 17 | import shlex
|
| 18 | import subprocess
|
| 19 | import sys
|
| 20 |
|
| 21 | class GNUXargsQuirks(argparse.Action):
|
| 22 | def __init__(self, option_strings, dest, **kwargs):
|
| 23 | super(GNUXargsQuirks, self).__init__(option_strings, dest, **kwargs)
|
| 24 | def __call__(self, parser, namespace, values, option_string=None):
|
| 25 | setattr(namespace, self.dest, values)
|
| 26 | if self.dest == 'replace_str':
|
| 27 | namespace.max_args = None
|
| 28 | namespace.max_lines = None
|
| 29 | elif self.dest == 'max_lines':
|
| 30 | namespace.max_args = None
|
| 31 | namespace.replace_str = None
|
| 32 | elif self.dest == 'max_args':
|
| 33 | namespace.max_lines = None
|
| 34 | if namespace.max_args == 1 and namespace.replace_str:
|
| 35 | namespace.max_args = None
|
| 36 | else:
|
| 37 | namespace.replace_str = None
|
| 38 | elif self.dest == 'max_chars':
|
| 39 | pass
|
| 40 | else:
|
| 41 | assert False, "dest '%s' not handled" % self.dest
|
| 42 |
|
| 43 | xargs = argparse.ArgumentParser(prog='xargs')
|
| 44 | xargs.add_argument('-a', '--arg-file', metavar='file', nargs=1, default='-', help='read arguments from FILE, not standard input')
|
| 45 | xargs.add_argument('-E', metavar='eof-str', dest='eof_str', help='set logical EOF string; if END occurs as a line of input, the rest of the input is ignored (ignored if -0 or -d was specified)')
|
| 46 | xargs.add_argument('-e', '--eof', metavar='eof-str', nargs='?', dest='eof_str', help='equivalent to -E END if END is specified; otherwise, there is no end-of-file string')
|
| 47 | xargs.add_argument('-0', '--null', dest='delimiter', action='store_const', const='\0', help='items are separated by a null, not whitespace; disables quote and backslash processing and logical EOF processing')
|
| 48 | xargs.add_argument('-d', '--delimiter', metavar='delimiter', dest='delimiter', help='items in input stream are separated by CHARACTER, not by whitespace; disables quote and backslash processing and logical EOF processing')
|
| 49 | xargs.add_argument('-I', metavar='replace-str', dest='replace_str', action=GNUXargsQuirks, help='same as --replace=R')
|
| 50 | xargs.add_argument('-i', '--replace', metavar='replace-str', nargs='?', const='{}', dest='replace_str', action=GNUXargsQuirks, help='replace R in INITIAL-ARGS with names read from standard input; if R is unspecified, assume {}')
|
| 51 | xargs.add_argument('-L', metavar='max-lines', dest='max_lines', type=int, action=GNUXargsQuirks, help='use at most MAX-LINES non-blank input lines per command line')
|
| 52 | xargs.add_argument('-l', '--max-lines', metavar='max-lines', nargs='?', const=1, dest='max_lines', type=int, action=GNUXargsQuirks, help='similar to -L but defaults to at most one non-blank input line if MAX-LINES is not specified')
|
| 53 | xargs.add_argument('-n', '--max-args', metavar='max-args', dest='max_args', type=int, action=GNUXargsQuirks, help='use at most MAX-ARGS arguments per command line')
|
| 54 | xargs.add_argument('-s', '--max-chars', metavar='max-chars', dest='max_chars', type=int, action=GNUXargsQuirks, help='limit length of command line to MAX-CHARS')
|
| 55 | xargs.add_argument('-P', '--max-procs', metavar='max-procs', default=1, dest='max_procs', type=int, help='run at most MAX-PROCS processes at a time')
|
| 56 | xargs.add_argument('--process-slot-var', metavar='name', help='set environment variable VAR in child processes')
|
| 57 | xargs.add_argument('-p', '--interactive', action='store_true', help='prompt before running commands')
|
| 58 | xargs.add_argument('-t', '--verbose', action='store_true', help='print commands before executing them')
|
| 59 | xargs.add_argument('-x', '--exit', action='store_true', help='exit if the size (see -s) is exceeded')
|
| 60 | xargs.add_argument('-r', '--no-run-if-empty', action='store_true', help='if there are no arguments, then do not run COMMAND; if this option is not given, COMMAND will be run at least once')
|
| 61 | xargs.add_argument('--show-limits', action='store_true', help='show limits on command-line length')
|
| 62 | xargs.add_argument('--version', action='version', version='%(prog)s 0.0.1', help='output version information and exit')
|
| 63 | xargs.add_argument('command', nargs='?', default='echo')
|
| 64 | xargs.add_argument('initial_arguments', nargs=argparse.REMAINDER)
|
| 65 |
|
| 66 | class PeekableIterator():
|
| 67 | def __init__(self, iterator):
|
| 68 | self.iterator = iterator
|
| 69 | self.peeked = False
|
| 70 | self.item = None
|
| 71 | def peek(self):
|
| 72 | """
|
| 73 | Return the next item but does not advance the iterator further.
|
| 74 | Raise StopIteration if there is no such item.
|
| 75 | """
|
| 76 | if not self.peeked:
|
| 77 | self.item = next(self.iterator)
|
| 78 | self.peeked = True
|
| 79 | return self.item
|
| 80 | def next(self):
|
| 81 | """
|
| 82 | Return the next item and advance the iterator.
|
| 83 | Raise StopIteration if there is no such item.
|
| 84 | """
|
| 85 | if self.peeked:
|
| 86 | self.peeked = False
|
| 87 | return self.item
|
| 88 | return next(self.iterator)
|
| 89 | def __iter__(self):
|
| 90 | return self
|
| 91 |
|
| 92 | def read_lines_eof(eof_str, input):
|
| 93 | # type (str, Iterable[str]) -> Iterable[str]
|
| 94 | """Read lines from input until a line equals eof_str or EOF is reached"""
|
| 95 | return iter(input.next, eof_str + '\n')
|
| 96 |
|
| 97 | def str_memsize(*strings):
|
| 98 | # type: (*str) -> int
|
| 99 | """Calculate the amount of memory required to store the strings in an argv."""
|
| 100 | return sum(len(s) + 1 for s in strings)
|
| 101 |
|
| 102 | def is_complete_line(line):
|
| 103 | # type: (str) -> bool
|
| 104 | return len(line) > 1 and line[-2] not in (' ', '\t')
|
| 105 |
|
| 106 | def argsplit_ws(lines):
|
| 107 | # type: (Iterable[str]) -> Iterator[str]
|
| 108 | """Split lines into arguments and append metainfo to each argument."""
|
| 109 | for line in lines:
|
| 110 | # TODO this might require some more testing
|
| 111 | for arg in shlex.split(line):
|
| 112 | yield arg
|
| 113 |
|
| 114 | def argsplit_delim(delim, lines):
|
| 115 | # type: (str, Iterable[str]) -> Iterator[str]
|
| 116 | """Split lines into arguments and append metainfo to each argument."""
|
| 117 | buf = []
|
| 118 | for c in itertools.chain.from_iterable(lines):
|
| 119 | if c == delim:
|
| 120 | yield "".join(buf)
|
| 121 | buf = []
|
| 122 | else:
|
| 123 | buf.append(c)
|
| 124 | if buf:
|
| 125 | yield "".join(buf)
|
| 126 |
|
| 127 | def read_n_xargs_lines(linec, line_iter):
|
| 128 | # type: (int, Iterator[str]) -> Iterator[str]
|
| 129 | while linec > 0:
|
| 130 | line = next(line_iter)
|
| 131 | yield line
|
| 132 | if is_complete_line(line):
|
| 133 | linec -= 1
|
| 134 |
|
| 135 | def take_chars(charc, iterator):
|
| 136 | # type: (int, Iterator[str]) -> Iterator[str]
|
| 137 | charc -= str_memsize(iterator.peek())
|
| 138 | while charc >= 0:
|
| 139 | yield next(iterator)
|
| 140 | charc -= str_memsize(iterator.peek())
|
| 141 |
|
| 142 | def take(n, iterator):
|
| 143 | # type: (int, Iterator[Any]) -> Iterator[Any]
|
| 144 | for _ in range(n):
|
| 145 | yield next(iterator)
|
| 146 |
|
| 147 | def group_args_lines(max_lines, input):
|
| 148 | # type: (int, Iterator[str]) -> Iterator[List[str]]
|
| 149 | while True:
|
| 150 | it = argsplit_ws(read_n_xargs_lines(max_lines, input))
|
| 151 | buf = [next(it)] # raise StopIteration if iterator is empty
|
| 152 | buf.extend(it)
|
| 153 | yield buf
|
| 154 |
|
| 155 | def group_args(max_chars, max_args, arg_iter):
|
| 156 | # type: (Optional[int], Optional[int], Iterator[str]) -> Iterator[List[str]]
|
| 157 | arg_iter = PeekableIterator(arg_iter)
|
| 158 | while arg_iter.peek() or True: # raise StopIteration if iterator is empty
|
| 159 | it = arg_iter
|
| 160 | if max_chars:
|
| 161 | it = take_chars(max_chars, it)
|
| 162 | if max_args:
|
| 163 | it = take(max_args, it)
|
| 164 | yield list(it)
|
| 165 |
|
| 166 | def replace_args(initial_arguments, replace_str, additional_arguments):
|
| 167 | # type: (Sequence[str], str, Iterable[str]) -> Iterator[str]
|
| 168 | additional_arguments = list(additional_arguments)
|
| 169 | for arg in initial_arguments:
|
| 170 | if arg == replace_str:
|
| 171 | for x in additional_arguments:
|
| 172 | yield x
|
| 173 | else:
|
| 174 | yield arg
|
| 175 |
|
| 176 | def build_cmdlines_replace(command, initial_arguments, replace_str, arggroup_iter):
|
| 177 | # type: (str, Sequence[str], str, Iterator[Iterator[str]]) -> Iterator[List[str]]
|
| 178 | """
|
| 179 | Build command-lines suitable for subprocess.Popen,
|
| 180 | replacing instances of replace_str in initial_arguments.
|
| 181 | """
|
| 182 | cmdline = [command]
|
| 183 | for additional_arguments in arggroup_iter:
|
| 184 | cmdline.extend(
|
| 185 | replace_args(
|
| 186 | initial_arguments,
|
| 187 | replace_str,
|
| 188 | additional_arguments
|
| 189 | )
|
| 190 | )
|
| 191 | yield cmdline
|
| 192 | cmdline = cmdline[:1]
|
| 193 |
|
| 194 | def build_cmdlines(command, initial_arguments, arggroup_iter):
|
| 195 | # type: (str, Sequence[str], Iterator[Iterator[str]]) -> Iterator[List[str]]
|
| 196 | """Build command-lines suitable for subprocess.Popen."""
|
| 197 | cmdline = [command]
|
| 198 | cmdline.extend(initial_arguments)
|
| 199 | for additional_arguments in arggroup_iter:
|
| 200 | cmdline.extend(additional_arguments)
|
| 201 | yield cmdline
|
| 202 | cmdline = cmdline[:1+len(initial_arguments)]
|
| 203 |
|
| 204 | def check_items(p, on_false, cmdline_iter):
|
| 205 | for cmdline in cmdline_iter:
|
| 206 | if p(cmdline):
|
| 207 | yield cmdline
|
| 208 | else:
|
| 209 | on_false()
|
| 210 |
|
| 211 | def tee_cmdline(cmdline_iter):
|
| 212 | # type: (Iterator[List[str]]) -> Iterator[List[str]]
|
| 213 | """Go over each cmdline and print them to stderr."""
|
| 214 | for cmdline in cmdline_iter:
|
| 215 | print(*cmdline, file=sys.stderr)
|
| 216 | yield cmdline
|
| 217 |
|
| 218 | def prompt_user(cmdline_iter):
|
| 219 | # type: (Iterator[List[str]]) -> Iterator[List[str]]
|
| 220 | """Prompt the user for each cmdline."""
|
| 221 | with open("/dev/tty", 'r') as tty:
|
| 222 | for cmdline in cmdline_iter:
|
| 223 | print(*cmdline, end=' ?...', file=sys.stderr)
|
| 224 | response = tty.readline()
|
| 225 | if response[0] not in ('y', 'Y'):
|
| 226 | continue
|
| 227 | yield cmdline
|
| 228 |
|
| 229 | def wait_open_slot(processes):
|
| 230 | # type: (List[Optional[Any]])-> int
|
| 231 | while processes:
|
| 232 | for i, p in enumerate(processes):
|
| 233 | # process doesn't yet exist or has finished
|
| 234 | if p is None or p.poll() is not None:
|
| 235 | return i
|
| 236 | _pid, _err = os.wait()
|
| 237 |
|
| 238 | def map_errcode(rc):
|
| 239 | # type: int -> int
|
| 240 | """map the returncode of a child-process to the returncode of the main process."""
|
| 241 | if rc == 0:
|
| 242 | return 0
|
| 243 | if rc >= 0 and rc <= 125:
|
| 244 | return 123
|
| 245 | if rc == 255:
|
| 246 | return 124
|
| 247 | if rc < 0:
|
| 248 | return 125
|
| 249 | return 1
|
| 250 |
|
| 251 | def main(xargs_args):
|
| 252 | # phase 1: read input
|
| 253 | if xargs_args.arg_file == '-':
|
| 254 | xargs_input = sys.stdin
|
| 255 | cmd_input = open(os.devnull, 'r')
|
| 256 | else:
|
| 257 | xargs_input = xargs_args.arg_file
|
| 258 | cmd_input = sys.stdin
|
| 259 |
|
| 260 | if xargs_args.eof_str:
|
| 261 | xargs_input = read_lines_eof(xargs_args.eof_str, xargs_input)
|
| 262 |
|
| 263 | # phase 2: parse and group args
|
| 264 | if xargs_args.max_lines:
|
| 265 | assert not xargs_args.max_args
|
| 266 | assert not xargs_args.delimiter
|
| 267 | assert xargs_args.exit
|
| 268 | arggroup_iter = group_args_lines(xargs_args.max_lines, xargs_input)
|
| 269 | else:
|
| 270 | if xargs_args.delimiter:
|
| 271 | arg_iter = argsplit_delim(xargs_args.delimiter, xargs_input)
|
| 272 | else:
|
| 273 | arg_iter = argsplit_ws(xargs_input)
|
| 274 | # if exit is True, max_chars is checked later
|
| 275 | arggroup_iter = group_args(
|
| 276 | xargs_args.max_chars if not xargs_args.exit else None,
|
| 277 | xargs_args.max_args,
|
| 278 | arg_iter
|
| 279 | )
|
| 280 |
|
| 281 | arggroup_iter = PeekableIterator(arggroup_iter)
|
| 282 | if xargs_args.no_run_if_empty:
|
| 283 | try:
|
| 284 | x = arggroup_iter.peek()
|
| 285 | # TODO not even sure how the interaction with -I is supposed to work
|
| 286 | # echo | xargs -I {} echo {} : dont run
|
| 287 | # echo | xargs -I {} echo {} "x" : dont run
|
| 288 | # echo | xargs -I {} echo "x" : dont run
|
| 289 | # echo x | xargs -I {} echo : run
|
| 290 | # echo xx | xargs -I {} -d 'x' echo {} : run 3 times ('', '', '\n')
|
| 291 |
|
| 292 | # if not x or not x[0]:
|
| 293 | if not x:
|
| 294 | return 0
|
| 295 | except StopIteration:
|
| 296 | return 0
|
| 297 | else:
|
| 298 | try:
|
| 299 | arggroup_iter.peek()
|
| 300 | except StopIteration:
|
| 301 | arggroup_iter = [[]]
|
| 302 |
|
| 303 | # phase 3: build command-lines
|
| 304 | if xargs_args.replace_str:
|
| 305 | cmdline_iter = build_cmdlines_replace(
|
| 306 | xargs_args.command,
|
| 307 | xargs_args.initial_arguments,
|
| 308 | xargs_args.replace_str,
|
| 309 | arggroup_iter
|
| 310 | )
|
| 311 | else:
|
| 312 | cmdline_iter = build_cmdlines(
|
| 313 | xargs_args.command,
|
| 314 | xargs_args.initial_arguments,
|
| 315 | arggroup_iter
|
| 316 | )
|
| 317 |
|
| 318 | if xargs_args.max_chars is not None and xargs_args.exit:
|
| 319 | cmdline_iter = check_items(
|
| 320 | lambda c: str_memsize(*c) < xargs_args.max_chars,
|
| 321 | lambda: sys.exit(1),
|
| 322 | cmdline_iter
|
| 323 | )
|
| 324 |
|
| 325 | if xargs_args.interactive:
|
| 326 | cmdline_iter = prompt_user(cmdline_iter)
|
| 327 | elif xargs_args.verbose:
|
| 328 | cmdline_iter = tee_cmdline(cmdline_iter)
|
| 329 |
|
| 330 | # phase 4: execute command-lines
|
| 331 | if xargs_args.max_procs > 1:
|
| 332 | ps = [None] * xargs_args.max_procs
|
| 333 | environ = os.environ.copy()
|
| 334 | for cmdline in cmdline_iter:
|
| 335 | i = wait_open_slot(ps)
|
| 336 | if ps[i] is not None and ps[i].returncode:
|
| 337 | break
|
| 338 | if xargs_args.process_slot_var:
|
| 339 | environ[xargs_args.process_slot_var] = str(i)
|
| 340 | ps[i] = subprocess.Popen(cmdline, stdin=cmd_input, env=environ)
|
| 341 | return max(map_errcode(p.wait()) for p in ps if p is not None)
|
| 342 | else:
|
| 343 | for cmdline in cmdline_iter:
|
| 344 | p = subprocess.Popen(cmdline, stdin=cmd_input)
|
| 345 | if p.wait():
|
| 346 | return map_errcode(p.returncode)
|
| 347 | return 0
|
| 348 |
|
| 349 | if __name__ == "__main__":
|
| 350 | xargs_args = xargs.parse_args()
|
| 351 |
|
| 352 | if xargs_args.delimiter:
|
| 353 | xargs_args.delimiter = xargs_args.delimiter.decode('string_escape')
|
| 354 | if len(xargs_args.delimiter) > 1:
|
| 355 | # TODO error
|
| 356 | sys.exit(1)
|
| 357 | if xargs_args.max_chars and not xargs_args.replace_str:
|
| 358 | base = str_memsize(xargs_args.command, *xargs_args.initial_arguments)
|
| 359 | if base > xargs_args.max_chars:
|
| 360 | # TODO error
|
| 361 | sys.exit(1)
|
| 362 | xargs_args.max_chars -= base
|
| 363 |
|
| 364 | # TODO warnings when appropriate
|
| 365 | # -d disables -e
|
| 366 | if xargs_args.delimiter and xargs_args.eof_str:
|
| 367 | xargs_args.eof_str = None
|
| 368 | # -I implies -L 1 (and transitively -x)
|
| 369 | if xargs_args.replace_str and xargs_args.max_lines != 1:
|
| 370 | xargs_args.max_lines = 1
|
| 371 | # -I implies -d '\n'
|
| 372 | if xargs_args.replace_str and xargs_args.delimiter != '\n':
|
| 373 | xargs_args.delimiter = '\n'
|
| 374 | # -L implies -x
|
| 375 | if xargs_args.max_lines is not None and not xargs_args.exit:
|
| 376 | xargs_args.exit = True
|
| 377 | # -p implies -t
|
| 378 | if xargs_args.interactive and not xargs_args.verbose:
|
| 379 | xargs_args.verbose = True
|
| 380 |
|
| 381 | # (undocumented)
|
| 382 | # if -d then -L equals -n
|
| 383 | if xargs_args.delimiter and xargs_args.max_lines:
|
| 384 | xargs_args.max_args = xargs_args.max_lines
|
| 385 | xargs_args.max_lines = None
|
| 386 | # TODO? -I implies -r
|
| 387 | if xargs_args.replace_str and not xargs_args.no_run_if_empty:
|
| 388 | xargs_args.no_run_if_empty = True
|
| 389 |
|
| 390 | sys.exit(main(xargs_args))
|