| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Usage:
|
| 4 | # build/cpython-defs.sh <function name>
|
| 5 | #
|
| 6 | # Example:
|
| 7 | #
|
| 8 | # # make clean tree of .c files
|
| 9 | # devtools/release.sh quick-oil-tarball
|
| 10 | # devtools/release.sh test-oil-tar # can Ctrl-C this
|
| 11 | #
|
| 12 | # build/cpython-defs.sh oil-py-names # extract names
|
| 13 | # build/cpython-defs.sh filter-methods
|
| 14 | #
|
| 15 | # NOTE: 'build/ovm-compile.sh make-tar' is complex, so it's easier to just extract
|
| 16 | # the tarball, even though it leads to a weird dependency.
|
| 17 |
|
| 18 | set -o nounset
|
| 19 | set -o pipefail
|
| 20 | set -o errexit
|
| 21 |
|
| 22 | REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
|
| 23 | readonly REPO_ROOT
|
| 24 |
|
| 25 | source build/common.sh # $PY27
|
| 26 | source build/dev-shell.sh # R_LIBS_USER
|
| 27 |
|
| 28 | readonly BASE_DIR=_tmp/cpython-defs
|
| 29 |
|
| 30 | # Could be published in metrics?
|
| 31 | readonly PY_NAMES=_tmp/oil-py-names.txt
|
| 32 |
|
| 33 | # Print the .py files in the tarball in their original locations. For slimming
|
| 34 | # down the build. Similar to build/metrics.sh linecounts-pydeps.
|
| 35 | # Hm that doesn't seem to duplicate posixpath while this does?
|
| 36 | oil-py-deps() {
|
| 37 | cat _build/oil/opy-app-deps.txt | awk ' $1 ~ /\.py$/ { print $1 }'
|
| 38 | }
|
| 39 |
|
| 40 | oil-py-names() {
|
| 41 | time oil-py-deps | xargs bin/opyc lex-names | sort | uniq > $PY_NAMES
|
| 42 |
|
| 43 | wc -l $PY_NAMES
|
| 44 | }
|
| 45 |
|
| 46 | # NOTE: We can replace os with posix. Will save 700 lines of code, 25K + 25K.
|
| 47 | # os.getenv() is a trivial wrapper around os.environ.get(). It gets
|
| 48 | # initialized in posixmodule.c.
|
| 49 | os-module-deps() {
|
| 50 | #oil-py-deps | xargs egrep --no-filename -o '\bos\.[a-z]+' */*.py | sort | uniq -c |sort -n
|
| 51 | oil-py-deps | xargs egrep -l '\bos\.'
|
| 52 | }
|
| 53 |
|
| 54 | # TODO:
|
| 55 | # Write to a separate file like _build/pydefs/intobject.include
|
| 56 | # #ifdef OVM_MAIN
|
| 57 | # #include "intobject.include"
|
| 58 | # #else
|
| 59 | # ...
|
| 60 | # #end
|
| 61 | #
|
| 62 | # Should those files be checked in an edited by hand? Or join them somehow
|
| 63 | # with oil-symbols.txt?
|
| 64 | # I think this is hard because of METHODS.
|
| 65 | # Maybe you should have a config file that controls it. It takes a .include
|
| 66 | # file and then whitelist/blacklist, and then generates a new one.
|
| 67 | # could put it in build/pydefs-config.txt
|
| 68 | #
|
| 69 | # And then reprint the PyMethoDef without docstrings? It shouldn't be that
|
| 70 | # hard to parse. You can almost do it with a regex, since commas don't appear
|
| 71 | # in the string.
|
| 72 |
|
| 73 | extract-methods() {
|
| 74 | local path_prefix=$1 # to strip
|
| 75 | shift
|
| 76 |
|
| 77 | local edit_list=$BASE_DIR/method-edit-list.txt
|
| 78 |
|
| 79 | # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
|
| 80 | gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
|
| 81 | /static.*PyMethodDef/ {
|
| 82 | if (printing != 0) {
|
| 83 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
| 84 | exit 1;
|
| 85 | }
|
| 86 | # NOTE: We had to adjust stringobject.c and _weakref.c so that the name is
|
| 87 | # on one line! Not a big deal.
|
| 88 | if (match($0, /static.*PyMethodDef ([a-zA-Z0-9_]+)\[\]/, m)) {
|
| 89 | def_name = m[1];
|
| 90 | } else {
|
| 91 | printf("%s:%d Could not parse declaration name\n",
|
| 92 | FILENAME, FNR) > "/dev/stderr";
|
| 93 | exit 1;
|
| 94 | }
|
| 95 | printing = 1;
|
| 96 | line_begin = FNR;
|
| 97 |
|
| 98 | rel_path = substr(FILENAME, path_prefix_length + 1);
|
| 99 | if (!found[FILENAME]) {
|
| 100 | # This special line seems to survive the preprocessor?
|
| 101 | printf("\n");
|
| 102 | printf("FILE %s\n", rel_path);
|
| 103 | printf("\n");
|
| 104 |
|
| 105 | printf("Filtering %s\n", FILENAME) > "/dev/stderr";
|
| 106 | found[FILENAME] = 1 # count number of files that have matches
|
| 107 | }
|
| 108 | }
|
| 109 |
|
| 110 | printing { print }
|
| 111 |
|
| 112 | # Looking for closing brace (with leading space)
|
| 113 |
|
| 114 | /^[:space:]*\}/ && printing {
|
| 115 | # Print the edit list for #ifdef #endif.
|
| 116 | line_end = FNR;
|
| 117 | printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
|
| 118 | printing = 0;
|
| 119 | }
|
| 120 |
|
| 121 | END {
|
| 122 | for (name in found) {
|
| 123 | num_found++;
|
| 124 | }
|
| 125 | printf("extract-methods.awk: Found definitions in %d out of %d files\n",
|
| 126 | num_found, ARGC) > "/dev/stderr";
|
| 127 | }
|
| 128 | ' "$@"
|
| 129 | }
|
| 130 |
|
| 131 | preprocess() {
|
| 132 | # TODO: Use PREPROC_FLAGS from build/ovm-compile.sh.
|
| 133 | # - What about stuff in pyconfig.h?
|
| 134 | # - Hack to define WTERMSIG! We really need to include <sys/wait.h>, but
|
| 135 | # that causes parse errors in cpython_defs.py. Really we should get rid of
|
| 136 | # this whole hack!
|
| 137 | # - WIFSTOPPED is another likely thing...
|
| 138 | gcc -I $PY27 -E -D OVM_MAIN -D WTERMSIG -
|
| 139 | }
|
| 140 |
|
| 141 | readonly TARBALL_ROOT=$(echo _tmp/oil-tar-test/oil-*)
|
| 142 |
|
| 143 | extract-all-methods() {
|
| 144 | echo '#include "pyconfig.h"'
|
| 145 | # 52 different instances. Sometimes multiple ones per file.
|
| 146 | find "$TARBALL_ROOT" -type f -a -name '*.c' \
|
| 147 | | xargs -- $0 extract-methods "$TARBALL_ROOT/"
|
| 148 | }
|
| 149 |
|
| 150 | cpython-defs() {
|
| 151 | # Annoying: this depends on Oils for 'R' and 'C', then indirectly imports on
|
| 152 | # 'typing' module.
|
| 153 | PYTHONPATH='.:vendor' build/cpython_defs.py "$@"
|
| 154 | }
|
| 155 |
|
| 156 | filter-methods() {
|
| 157 | local tmp=$BASE_DIR
|
| 158 | mkdir -p $tmp
|
| 159 |
|
| 160 | extract-all-methods > $tmp/extracted.txt
|
| 161 | cat $tmp/extracted.txt | preprocess > $tmp/preprocessed.txt
|
| 162 |
|
| 163 | local out_dir=build/oil-defs
|
| 164 | mkdir -p $out_dir
|
| 165 |
|
| 166 | #head -n 30 $tmp
|
| 167 | cat $tmp/preprocessed.txt | cpython-defs filter $PY_NAMES $out_dir
|
| 168 |
|
| 169 | echo
|
| 170 | find $out_dir -name '*.def' | xargs wc -l | sort -n
|
| 171 |
|
| 172 | echo
|
| 173 | wc -l $tmp/*.txt
|
| 174 |
|
| 175 | # syntax check
|
| 176 | #cc _tmp/filtered.c
|
| 177 | }
|
| 178 |
|
| 179 | edit-file() {
|
| 180 | local rel_path=$1
|
| 181 | local def_name=$2
|
| 182 | local line_begin=$3
|
| 183 | local line_end=$4
|
| 184 |
|
| 185 | local def_path="${rel_path}/${def_name}.def"
|
| 186 |
|
| 187 | local tmp=_tmp/buf.txt
|
| 188 |
|
| 189 | # DESTRUCTIVE
|
| 190 | mv $rel_path $tmp
|
| 191 |
|
| 192 | gawk -v def_path=$def_path -v line_begin=$line_begin -v line_end=$line_end '
|
| 193 | NR == line_begin {
|
| 194 | print("#ifdef OVM_MAIN")
|
| 195 | printf("#include \"%s\"\n", def_path)
|
| 196 | print("#else")
|
| 197 | print # print the PyMethodDef line {
|
| 198 | next
|
| 199 | }
|
| 200 | NR == line_end {
|
| 201 | print # print the }
|
| 202 | print("#endif");
|
| 203 | next
|
| 204 | }
|
| 205 | # All other lines just get printed
|
| 206 | {
|
| 207 | print
|
| 208 | }
|
| 209 | ' $tmp > $rel_path
|
| 210 |
|
| 211 | echo "Wrote $rel_path"
|
| 212 | }
|
| 213 |
|
| 214 | edit-all() {
|
| 215 | # Reversed so that edits to the same file work! We are always inserting
|
| 216 | # lines.
|
| 217 | #tac $BASE_DIR/method-edit-list.txt | xargs -n 4 -- $0 edit-file
|
| 218 |
|
| 219 | # One-off editing
|
| 220 | grep typeobject.c $BASE_DIR/method-edit-list.txt \
|
| 221 | | tac | xargs -n 4 -- $0 edit-file
|
| 222 |
|
| 223 | }
|
| 224 |
|
| 225 | extract-types() {
|
| 226 | local path_prefix=$1 # to strip
|
| 227 | shift
|
| 228 |
|
| 229 | local edit_list=$BASE_DIR/type-edit-list.txt
|
| 230 |
|
| 231 | # NOTE: PyMemberDef is also interesting, but we don't need it for the build.
|
| 232 | gawk -v path_prefix_length=${#path_prefix} -v edit_list=$edit_list '
|
| 233 | function maybe_print_file_header() {
|
| 234 | rel_path = substr(FILENAME, path_prefix_length + 1);
|
| 235 | if (!found[FILENAME]) {
|
| 236 | # This special line seems to survive the preprocessor?
|
| 237 | printf("\n");
|
| 238 | printf("FILE %s\n", rel_path);
|
| 239 | printf("\n");
|
| 240 |
|
| 241 | printf("Filtering %s\n", FILENAME) > "/dev/stderr";
|
| 242 | found[FILENAME] = 1 # count number of files that have matches
|
| 243 | }
|
| 244 | }
|
| 245 |
|
| 246 | /PyTypeObject.*=.*\{.*\}/ {
|
| 247 | if (printing != 0) {
|
| 248 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
| 249 | exit 1;
|
| 250 | }
|
| 251 | // Found it all on one line
|
| 252 | print
|
| 253 | num_one_line_types++;
|
| 254 | next
|
| 255 | }
|
| 256 |
|
| 257 | /PyTypeObject.*=.*\{/ {
|
| 258 | if (printing != 0) {
|
| 259 | printf("%s:%d Expected not to be printing\n", FILENAME, FNR) > "/dev/stderr";
|
| 260 | exit 1;
|
| 261 | }
|
| 262 | printing = 1;
|
| 263 | line_begin = FNR;
|
| 264 |
|
| 265 | maybe_print_file_header()
|
| 266 | num_types++;
|
| 267 | }
|
| 268 |
|
| 269 | {
|
| 270 | if (printing) {
|
| 271 | print
|
| 272 | }
|
| 273 | }
|
| 274 |
|
| 275 | /^[:space:]*\}/ {
|
| 276 | if (printing) {
|
| 277 | # Print the edit list for #ifdef #endif.
|
| 278 | line_end = FNR;
|
| 279 | printf("%s %s %d %d\n", rel_path, def_name, line_begin, line_end) > edit_list;
|
| 280 | printing = 0;
|
| 281 | }
|
| 282 | }
|
| 283 |
|
| 284 | END {
|
| 285 | for (name in found) {
|
| 286 | num_found++;
|
| 287 | }
|
| 288 | printf("extract-types.awk: Found %d definitions in %d files (of %d files)\n",
|
| 289 | num_types, num_found, ARGC) > "/dev/stderr";
|
| 290 | printf("extract-types.awk: Also found %d types on one line\n",
|
| 291 | num_one_line_types) > "/dev/stderr";
|
| 292 | }
|
| 293 | ' "$@"
|
| 294 | }
|
| 295 |
|
| 296 | extract-all-types() {
|
| 297 | find "$TARBALL_ROOT" -type f -a -name '*.c' \
|
| 298 | | xargs -- $0 extract-types "$TARBALL_ROOT/"
|
| 299 | }
|
| 300 |
|
| 301 | #
|
| 302 | # Analysis
|
| 303 | #
|
| 304 |
|
| 305 | readonly METRICS_DIR=_tmp/metrics/cpython-defs
|
| 306 |
|
| 307 | # Show current Oil definitions literally.
|
| 308 | show-oil() {
|
| 309 | find build/oil-defs -name '*.def' | xargs cat | less
|
| 310 | }
|
| 311 |
|
| 312 | # Show in a contenses format.
|
| 313 | methods-audit() {
|
| 314 | mkdir -p $METRICS_DIR
|
| 315 | cat $BASE_DIR/preprocessed.txt | cpython-defs audit $PY_NAMES \
|
| 316 | | tee _tmp/methods.txt
|
| 317 |
|
| 318 | wc -l _tmp/methods.txt
|
| 319 | }
|
| 320 |
|
| 321 | methods-tsv() {
|
| 322 | mkdir -p $METRICS_DIR
|
| 323 | local out=$METRICS_DIR/methods.tsv
|
| 324 | cat $BASE_DIR/preprocessed.txt | cpython-defs tsv $PY_NAMES | tee $out
|
| 325 | }
|
| 326 |
|
| 327 | _report() {
|
| 328 | metrics/cpython-defs.R "$@"
|
| 329 | }
|
| 330 |
|
| 331 | report() {
|
| 332 | _report metrics $METRICS_DIR
|
| 333 | }
|
| 334 |
|
| 335 | run-for-release() {
|
| 336 | # Repeats what we did at the beginning of the release process, because _tmp/
|
| 337 | # was deleted
|
| 338 | oil-py-names
|
| 339 | filter-methods
|
| 340 |
|
| 341 | methods-tsv
|
| 342 | report | tee $METRICS_DIR/overview.txt
|
| 343 | }
|
| 344 |
|
| 345 | unfiltered() {
|
| 346 | cpython-defs filtered | sort > _tmp/left.txt
|
| 347 | awk '{print $1}' $BASE_DIR/edit-list.txt \
|
| 348 | | egrep -o '[^/]+$' \
|
| 349 | | sort | uniq > _tmp/right.txt
|
| 350 | diff -u _tmp/{left,right}.txt
|
| 351 | }
|
| 352 |
|
| 353 |
|
| 354 | "$@"
|