Documentation Source Text

Check-in [effb349be5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a couple of other typo checking heuristics.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: effb349be5e11d3e5dc52d6b74c735e69b8d4077
User & Date: shaneh 2010-09-02 04:20:15.000
Context
2010-09-02
04:20
More typo fixes. (check-in: db66d3a1d4 user: shaneh tags: trunk)
04:20
Add a couple of other typo checking heuristics. (check-in: effb349be5 user: shaneh tags: trunk)
2010-09-01
11:49
New hyperlinks to FTS and RTree. New requirement that automatically generated rowids must be positive. (check-in: ddcf89bab0 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to spell_chk.sh.
1
2
3
4
5
6

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22








23
24
25
26
27
28







29











#!/bin/sh
#
# Usage:
#
#      sh spell_chk.sh doc '*.html' ./custom.txt
#      sh spell_chk.sh pages '*.in'

#
# This script scans all "*.html" file in subdirectory "doc" and reports
# spelling mistakes.
#
# Custom words (words to ignore) are stored in custom.txt.
#
if [ $# -ge 3 ] && [ -f $3 ]
then
  echo 'Updating custom dictionary '
  aspell --lang=en create master ./custom.rws < $3
fi
#
echo "Spell checking $1/$2 "
find $1 -name "$2" -print | grep -v matrix | while read file
do
  echo "Checking $file..."








  # aspell's "list" option just lists all the misspelled words w/o any context.
  # we pass this list to grep to get line numbers.
  aspell --extra-dicts ./custom.rws --mode=html list < $file | sort | uniq | while read word
  do
    grep -H -n -o -E "\b$word\b" $file
  done







done

















>












|


|
>
>
>
>
>
>
>
>


|

|

>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/sh
#
# Usage:
#
#      sh spell_chk.sh doc '*.html' ./custom.txt
#      sh spell_chk.sh pages '*.in'
#      sh spell_chk.sh ../sqlite/src '*.c'
#
# This script scans all "*.html" file in subdirectory "doc" and reports
# spelling mistakes.
#
# Custom words (words to ignore) are stored in custom.txt.
#
if [ $# -ge 3 ] && [ -f $3 ]
then
  echo 'Updating custom dictionary '
  aspell --lang=en create master ./custom.rws < $3
fi
#
echo "Spell checking $1/$2..."
find $1 -name "$2" -print | grep -v matrix | while read file
do
  # echo "Checking $file..."
  # determine spell check mode based on file extension
  mode=${file##*\.}
  if [ "$mode" = "html" ]; then mode="html"
  elif [ "$mode" = "c" ]; then mode="ccpp"
  elif [ "$mode" = "h" ]; then mode="ccpp"
  elif [ "$mode" = "test" ]; then mode="comment"
  elif [ "$mode" = "tcl" ]; then mode="comment"
  elif [ "$mode" = "pl" ]; then mode="perl"; fi
  # aspell's "list" option just lists all the misspelled words w/o any context.
  # we pass this list to grep to get line numbers.
  aspell --extra-dicts ./custom.rws --mode=$mode list < $file | sort | uniq | while read word
  do
    grep -H -n -o -P "\b$word\b" $file
  done

  # check some commonly "doubled" words 
  # http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/Repetitions
  for word in the it that you is a in to had use an or and at
  do
    # echo "$file -> [$word]"
    grep -H -n -o -i -P "\b$word\s+$word\b" $file
  done

  # "a" or "an" (filter some common exceptions)
  grep -H -n -o -P "\b[Aa]\s+[aeiou]\w+" $file | grep -v -i -P "(one|user|uniq|unary|union|hist)"
  grep -H -n -o -P "\b[Aa]n\s+[bcdfghjklmnpqrstvwxyz]\w+" $file | grep -v -i -P "(sqlite|honor|honest|x86)"
  # for abbreviations/acronyms (if first two letters caps)
  # vowel-sounding letters (take "an"):  A E F H I L M N O S X
  grep -H -n -o -P "\b[Aa]\s+[AEFHILMNOSX][A-Z]\w*" $file | grep -v -P "(FROM|HAVING|HIDDEN|LEFT|LIKE|LIMIT|MATCH|NEAR|NULL|SAVEPOINT|SELECT|SHARED)"
  # consonant-sounding letters (take "a"):  B C D G J K P Q R T U V W Y Z
  grep -H -n -o -P "\b[Aa]n\s+[BCDGJKPQRTUVWYZ][A-Z]\w*" $file | grep -v -P "(UPDATE)"
done
echo "Spell checking $1/$2... done."