OILS / benchmarks / time_.py View on Github | oilshell.org

265 lines, 184 significant
1#!/usr/bin/env python2
2"""
3time.py -- Replacement for coreutils 'time'.
4
5TODO:
6(Must be Python 3 because it's used before a Python 2 WEDGE can be installed.)
7
8The interface of this program is modelled after:
9
10/usr/bin/time --append --output foo.txt --format '%x %e'
11
12Why we're not using /usr/bin/time:
13- We need to print extra TSV fields at the end of the row, but it prints a newline
14- It prints extraneous output: 'Command exited with non-zero status 1'
15- Elapsed time only has 2 digits of precision. Apparently it uses times()
16 rather than getrusage()? https://unix.stackexchange.com/questions/70653/increase-e-precision-with-usr-bin-time-shell-command
17
18Problems with resource.getrusage() in Python:
19
20 The memory usage of dash and bash get obscured by Python! Because
21 subprocess.call() does a fork(), which includes Python's address space.
22 # https://stackoverflow.com/questions/13880724/python-getrusage-with-rusage-children-behaves-stangely
23
24Problems with bash time builtin
25- has no way to get the exit code
26- writes to stderr, so you it's annoying to get both process stderr and the
27 timing
28
29This program also writes CSV and TSV directly.
30- CSV values get escaped
31- TSV values can't have tabs
32
33So we use a tiny C program time-helper.c to do it, and not /usr/bin/time.
34"""
35from __future__ import print_function
36
37import csv
38try:
39 import hashlib # PY3
40except ImportError:
41 hashlib = None
42 import md5
43
44import optparse
45import os
46import sys
47import subprocess
48import time
49
50THIS_DIR = os.path.dirname(os.path.abspath(sys.argv[0]))
51
52time1 = os.path.abspath(os.path.join(THIS_DIR, '../_devbuild/bin/time-helper'))
53# Pre-built one
54time2 = '/wedge/oils-for-unix.org/pkg/time-helper/2023-02-28/time-helper'
55
56if os.path.exists(time1):
57 TIME_HELPER = time1
58elif os.path.exists(time2):
59 TIME_HELPER = time2
60else:
61 raise AssertionError('time-helper not found')
62
63
64def log(msg, *args):
65 if args:
66 msg = msg % args
67 print(msg, file=sys.stderr)
68
69
70def Options():
71 """Returns an option parser instance."""
72 p = optparse.OptionParser('time.py [options] ARGV...')
73 p.add_option('--tsv',
74 dest='tsv',
75 default=False,
76 action='store_true',
77 help='Write output in TSV format')
78 p.add_option(
79 '--rusage',
80 dest='rusage',
81 default=False,
82 action='store_true',
83 help='Also show user time, system time, and max resident set size')
84 p.add_option(
85 '--rusage-2',
86 dest='rusage_2',
87 default=False,
88 action='store_true',
89 help='Also show page faults, context switches, etc.')
90 p.add_option('--time-span',
91 dest='time_span',
92 default=False,
93 action='store_true',
94 help='Also show start_time and end_time')
95 p.add_option('-o',
96 '--output',
97 dest='output',
98 default=None,
99 help='Name of output file to write to to')
100 p.add_option('-a',
101 '--append',
102 dest='append',
103 default=False,
104 action='store_true',
105 help='Append to the file instead of overwriting it')
106 p.add_option(
107 '--field',
108 dest='fields',
109 default=[],
110 action='append',
111 help='A string to append to each row, after the exit code and status')
112 p.add_option('--time-fmt',
113 dest='time_fmt',
114 default='%.4f',
115 help='sprintf format for elapsed seconds (float)')
116 p.add_option(
117 '--stdout',
118 dest='stdout',
119 default=None,
120 help='Save stdout to this file, and add a column for its md5 checksum')
121 p.add_option(
122 '--print-header',
123 dest='print_header',
124 default=False,
125 action='store_true',
126 help=
127 'Print an XSV header, respecting --time-span, --rusage, --rusage-2, --stdout, --field, and --tsv'
128 )
129 return p
130
131
132def MakeTableOutput(f, tsv):
133 if tsv: # TSV output.
134 out = csv.writer(f,
135 delimiter='\t',
136 lineterminator='\n',
137 doublequote=False,
138 quoting=csv.QUOTE_NONE)
139 else:
140 out = csv.writer(f)
141 return out
142
143
144def main(argv):
145 (opts, child_argv) = Options().parse_args(argv[1:])
146
147 # Used only for 'time' format string. For --field, we use our own.
148 sep = '\t' if opts.tsv else ','
149
150 if opts.print_header:
151 if child_argv:
152 raise RuntimeError('No arguments allowed with --print-header')
153 names = ['status', 'elapsed_secs']
154 if opts.time_span:
155 names.extend(['start_time', 'end_time'])
156 if opts.rusage:
157 names.extend(['user_secs', 'sys_secs', 'max_rss_KiB'])
158 if opts.rusage_2:
159 names.extend(['minor_faults', 'major_faults', 'swaps', 'in_block', 'out_block', 'signals', 'voluntary_ctx', 'involuntary_ctx'])
160 if opts.stdout:
161 names.append('stdout_md5sum')
162 names.extend(opts.fields)
163
164 if opts.output:
165 f = open(opts.output, 'w')
166 else:
167 f = sys.stdout
168 table_out = MakeTableOutput(f, opts.tsv)
169 table_out.writerow(names)
170 return 0
171
172 if not child_argv:
173 raise RuntimeError('Expected a command')
174
175 # built by build/dev.sh all
176 time_argv = [TIME_HELPER, '-d', sep]
177
178 if opts.append:
179 time_argv.append('-a')
180
181 if opts.output:
182 time_argv.extend(['-o', opts.output])
183 else:
184 pass
185 # Somehow this doesn't work? Getting binary data?
186 # time_argv.extend(['-o', '/dev/stdout'])
187
188 # %x: exit status
189 # %e: elapsed
190 time_argv.extend(['-x', '-e'])
191 if opts.time_span:
192 # begin and end time
193 time_argv.extend(['-y', '-z'])
194 if opts.rusage:
195 # %U: user time
196 # %S: sys time
197 # %M: Max RSS
198 time_argv.extend(['-U', '-S', '-M'])
199 if opts.rusage_2:
200 time_argv.extend(['-m'])
201
202 time_argv.append('--')
203 time_argv.extend(child_argv)
204 #log('time_argv %s', time_argv)
205
206 start_time = time.time()
207 try:
208 if opts.stdout:
209 # We don't want to intermingle 'time' stdout with the program's stdout
210 if not opts.output:
211 raise RuntimeError('Flag -o is required when --stdout')
212 with open(opts.stdout, 'w') as f:
213 exit_code = subprocess.call(time_argv, stdout=f)
214 else:
215 exit_code = subprocess.call(time_argv)
216 except OSError as e:
217 log('Error executing %s: %s', time_argv, e)
218 return 1
219
220 elapsed = time.time() - start_time
221 if opts.stdout:
222 if hashlib:
223 m = hashlib.md5() # PY3
224 else:
225 m = md5.new() # PY2
226 with open(opts.stdout, 'rb') as f:
227 while True:
228 chunk = f.read(4096)
229 if not chunk:
230 break
231 m.update(chunk)
232 maybe_stdout = [m.hexdigest()]
233 else:
234 maybe_stdout = [] # no field
235
236 more_cells = maybe_stdout + opts.fields
237
238 if opts.output:
239 with open(opts.output, 'a') as f: # append
240 if more_cells:
241 f.write(sep) # tab or comma for more cells
242 table_out = MakeTableOutput(f, opts.tsv)
243 table_out.writerow(more_cells)
244 else:
245 f.write('\n') # end the row
246 else:
247 if more_cells:
248 log("More cells that -o would have written: %s", more_cells)
249
250 # Preserve the command's exit code. (This means you can't distinguish
251 # between a failure of time.py and the command, but that's better than
252 # swallowing the error.)
253 return exit_code
254
255
256if __name__ == '__main__':
257 try:
258 status = main(sys.argv)
259 except KeyboardInterrupt as e:
260 print('%s: interrupted with Ctrl-C' % sys.argv[0], file=sys.stderr)
261 sys.exit(1)
262 except RuntimeError as e:
263 log('time_.py: %s', e)
264 status = 2
265 sys.exit(status)