| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Spell checker.
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # doctools/spelling.sh <function name>
|
| 7 | #
|
| 8 | # Examples:
|
| 9 | # doctools/spelling.sh check-oils-docs
|
| 10 | # doctools/spelling.sh check-blog
|
| 11 |
|
| 12 | set -o nounset
|
| 13 | set -o pipefail
|
| 14 | set -o errexit
|
| 15 |
|
| 16 | # Make this symlink work:
|
| 17 | # ~/git/oilshell/oilshell.org -> ../oil/doctools/spelling.sh
|
| 18 |
|
| 19 | # This file is doctools/spelling.sh
|
| 20 | OIL_ROOT=$(dirname $(dirname $(readlink -f $0)))
|
| 21 | readonly OIL_ROOT
|
| 22 | echo $OIL_ROOT
|
| 23 |
|
| 24 | readonly SPELLING_PY=$OIL_ROOT/doctools/spelling.py
|
| 25 | readonly BASE_DIR=_tmp/spelling # relative path
|
| 26 |
|
| 27 | spelling() {
|
| 28 | PYTHONPATH=$OIL_ROOT $SPELLING_PY "$@"
|
| 29 | }
|
| 30 |
|
| 31 | to-ninja() {
|
| 32 | echo '
|
| 33 | rule text-dump
|
| 34 | command = lynx -dump $in > $out
|
| 35 | description = text-dump $in $out
|
| 36 |
|
| 37 | rule word-split
|
| 38 | command = cat $in | PYTHONPATH='"$OIL_ROOT $SPELLING_PY"' word-split > $out
|
| 39 | description = word-split $in $out
|
| 40 |
|
| 41 | '
|
| 42 |
|
| 43 | while read html; do
|
| 44 | # replace .html with .txt
|
| 45 | local txt=$BASE_DIR/${html//'.html'/.txt}
|
| 46 | local words=$BASE_DIR/${html//'.html'/.words}
|
| 47 |
|
| 48 | echo "build $txt: text-dump $html"
|
| 49 | echo
|
| 50 | echo "build $words: word-split $txt"
|
| 51 | echo
|
| 52 | done
|
| 53 | }
|
| 54 |
|
| 55 | lines() {
|
| 56 | for x in "$@"; do
|
| 57 | echo "$x"
|
| 58 | done
|
| 59 | }
|
| 60 |
|
| 61 | doc-to-text() {
|
| 62 | ### Convert files in the given directories
|
| 63 |
|
| 64 | # for the blog, omit anything that starts with _
|
| 65 | lines "$@" | to-ninja > _tmp/doc.ninja
|
| 66 |
|
| 67 | ninja -f _tmp/doc.ninja
|
| 68 | }
|
| 69 |
|
| 70 | clean() {
|
| 71 | rm -r -f -v $BASE_DIR
|
| 72 | }
|
| 73 |
|
| 74 | check-tree() {
|
| 75 | local subdir=$1
|
| 76 | shift
|
| 77 |
|
| 78 | # Depends on build/doc.sh all-markdown
|
| 79 | doc-to-text "$@"
|
| 80 |
|
| 81 | echo
|
| 82 | echo 'Word Counts'
|
| 83 | echo
|
| 84 |
|
| 85 | # For curiosity: word count by file
|
| 86 | find $BASE_DIR/$subdir -name '*.words' | xargs wc -l | sort -n
|
| 87 |
|
| 88 | # Use alphabetical order
|
| 89 | find $BASE_DIR/$subdir -name '*.words' | sort | xargs \
|
| 90 | $0 spelling check --known-words /usr/share/dict/words
|
| 91 | }
|
| 92 |
|
| 93 | check-one() {
|
| 94 | local words=${1:-_tmp/spelling/_release/VERSION/doc/eggex.words}
|
| 95 |
|
| 96 | spelling check --known-words /usr/share/dict/words $words
|
| 97 | }
|
| 98 |
|
| 99 | check-oils-docs() {
|
| 100 | local dir=_release/VERSION/doc
|
| 101 | check-tree $dir $dir/*.html
|
| 102 | }
|
| 103 |
|
| 104 | check-doc-ref() {
|
| 105 | local dir=_release/VERSION/doc/ref
|
| 106 | check-tree $dir $dir/*.html
|
| 107 | }
|
| 108 |
|
| 109 | check-blog() {
|
| 110 | # Omit drafts starting with _
|
| 111 | check-tree _site/blog _site/blog/20??/*/[^_]*.html
|
| 112 | }
|
| 113 |
|
| 114 | "$@"
|