-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdo_diff.bash
More file actions
executable file
·376 lines (356 loc) · 13.1 KB
/
do_diff.bash
File metadata and controls
executable file
·376 lines (356 loc) · 13.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
#! /usr/bin/env bash
#
# translation of do_diff.sh into bash via ChatGPT and manual revision
#
# do_diff.sh: compare a subset of the files in two directories
#
# Notes:
# - Also see diff.sh for simpler version.
# - shellcheck filtering:
# SC2016: Expressions don't expand in single quotes, use double quotes for that.
# SC2049: =~ is for regex. Use == for globs.
# SC2086: Double quote to prevent globbing and word splitting.
#
# TODO1:
# - Use --verbose to determine the level of detail for the usage
# (see calc_entropy.perl for an example.)
#
# TODO:
# - Reconcile with do_rcsdiff.sh (at least keep in synch put perhaps combine).
# - add sample input and output comments
# - Work in subdirectory tree comparison example into usage:
# find -name '*.java' | foreach.perl 'do_diff.sh -b $f /tmp/$F' -
# - Make --ignore-all-space optional as ignores tokenization.
# - * Send all output to stdout (e.g., "No such file or directory" warning).
#
#-------------------------------------------------------------------------------
# via diff info page (GNU diffutils version 3.2):
#
# The `--ignore-space-change' (`-b') option is stronger than `-E' and
# `-Z' combined. It ignores white space at line end, and considers all
# other sequences of one or more white space characters within a line to
# be equivalent. With this option, `diff' considers the following two
# lines to be equivalent, where `$' denotes the line end:
#
# Here lyeth muche rychnesse in lytell space. -- John Heywood$
# Here lyeth muche rychnesse in lytell space. -- John Heywood $
#
# The `--ignore-all-space' (`-w') option is stronger still. It
# ignores differences even if one line has white space where the other
# line has none. "White space" characters include tab, vertical tab,
# form feed, carriage return, and space; some locales may define
# additional characters to be white space. With this option, `diff'
# considers the following two lines to be equivalent, where `$' denotes
# the line end and `^M' denotes a carriage return:
#
# Here lyeth muche rychnesse in lytell space.-- John Heywood$
# He relyeth much erychnes seinly tells pace. --John Heywood ^M$
# ...
# The `--ignore-blank-lines' (`-B') option ignores changes that consist
# entirely of blank lines.
#
# Set bash regular and/or verbose tracing
# - xtrace shows arg expansion (and often is sufficient)
# - verbose shows source commands as is (but usually is superfluous w/ xtrace)
#
debug_level="${DEBUG_LEVEL:-0}"
if [ "$debug_level" -ge 4 ]; then
echo "$0 $@"
fi
if [ "${TRACE:-0}" = "1" ]; then
set -o xtrace
fi
if [ "${VERBOSE:-0}" = "1" ]; then
set -o verbose
fi
# Initialize
pattern=""
master="master"
diff_options=""
space_options="--ignore-space-change --ignore-blank-lines"
brief="0"
quiet="0"
diff_cmd="diff"
nopattern="0"
verbose_mode="1"
match_dot_files="0"
no_glob="0"
base_dir="."
recursive="0"
# Show usage statement if insufficient arguments given
if [ -z "$2" ]; then
script=$(basename "$0")
echo ""
echo "Usage: $script [option] {--all | pattern} master_dir"
echo ""
echo " options: [--check-space-changes | --ignore-spacing] [--brief] [--quiet] [--verbose] [--diff cmd] [--diff-options text] [--match-dot-files]"
echo " other options: [-side-by-side] [--ignore-all-space] [--no-pattern] [--no-glob] [--kdiff] [--trace] [--dir dir]"
echo ""
echo "Examples:"
echo ""
echo "$0 --ignore-spacing '*.[ch]*' MASTER-DIR"
echo ""
echo "$script '.py' .. > _python_diff.list 2>&1"
echo ""
## OLD:
## # shellcheck disable=SC2016
## echo '(for f in system.py main.py debug.py; do' "$script" '$f ~/mezcla-clone; done) 2>&1 | less'
echo "find . -type d -exec \"$script\" --dir {} --verbose '*' ~/repo-main/{} \; > _main-diff-all.log 2>&1"
echo ""
echo "git ls-tree -r --name-only HEAD | xargs -I '{}' $script --no-pattern '{}' ~/repo-main/'{}' > _main-diff-tracked.log 2>&1"
echo ""
## OLD: echo "$script" '--match-dot-files ".*bash* .*emacs*" .. > _bash-emacs-diff.list 2>&1'
echo "$script" '--match-dot-files ".*bash*" .. > _bash-diff.list 2>&1'
echo ""
echo "$script --ignore-spacing --diff-options '--context=1' '*.rb' vm-torre > vm-torre.diff 2>&1"
echo ""
echo "$script --no-glob '*.py *.mako' ~/xfer"
echo ""
echo "Notes:"
echo "- When . occurs in pattern, it is treated as a file extension:"
echo " 'py' => '*py*' but '.py' => '*.py' (not '*.py*')"
echo "- Use --match-dot-files, to ensure that . matches Unix dot files (e.g., .bashrc)"
# TODO: interchange .bash and emacs here and in example above
## OLD: echo " '.emacs' => '.emacs*' (not '*.emacs*')"
echo " '.git' => '.git*' (not '*.git*')"
echo "- The --no-pattern option treats pattern as a file (and likewise for master_dir)"
echo "- Changes due to whitespace are ignored by default (i.e., --ignore-space-change, and --ignore-blank-lines [diff -wB])."
echo "- Specify --ignore-all-space [diff -a] to ignore spacing even within tokens"
echo "- Use --check-space-changes to check for any changes in whitespace"
echo "- The --dir option is useful with find to achieve recursive diff (see example above)"
echo ""
exit
fi
# Parse command-line arguments
# TODO: allows options after pattern (e.g., `while (("$1" =~ -*) || ("$2" =~ -*))`)
while [[ "$1" =~ ^- ]]; do
if [ "$1" == "--all" ]; then
pattern="*"
elif [ "$1" == "--ignore-spacing" ]; then
# Ignore all spacing-related differences (i.e., -wbB)
space_options="--ignore-space-change --ignore-all-space --ignore-blank-lines"
elif [ "$1" == "--check-space-changes" ]; then
space_options=""
elif [ "$1" == "--brief" ]; then
brief="1"
elif [ "$1" == "--diff" ]; then
diff_cmd="$2"
shift
elif [ "$1" == "--diff-options" ]; then
diff_options="$diff_options $2"
shift
elif [ "$1" == "--dir" ]; then
recursive="1"
base_dir="$2"
quiet="1"
shift
elif [ "$1" == "--side-by-side" ]; then
width=$((2 * ${COLUMNS:-132}))
diff_options="$diff_options --side-by-side --suppress-common-lines --width=$width"
elif [ "$1" == "--quiet" ]; then
quiet="1"
verbose_mode="0"
elif [ "$1" == "--no-quiet" ]; then
quiet="0"
## TODO3?: verbose_mode="1"
elif [ "$1" == "--verbose" ]; then
verbose_mode="1"
elif [ "$1" == "--no-verbose" ]; then
verbose_mode="0"
elif [[ ("$1" == "--nopattern") || ("$1" == "--no-pattern") ]]; then
nopattern="1"
## TODO3: reduce redundant flags
no_glob="1"
elif [ "$1" == "--no-glob" ]; then
no_glob="1"
elif [ "$1" == "--match-dot-files" ]; then
match_dot_files="1"
elif [ "$1" == "--ignore-all-space" ]; then
diff_options="$diff_options --ignore-all-space"
elif [[ ("$1" == "--kdiff") || ("$1" == "--vdiff") ]]; then
# HACK: backdoor option for using kdiff
diff_cmd="kdiff.sh"
diff_options=""
space_options=""
brief="1"
elif [ "$1" == "--trace" ]; then
set -x
else
echo "ERROR: unknown option: $1"
exit 1
fi
shift
done
# Get pattern from first argument
# shellcheck disable=SC2049
if [ -z "$pattern" ]; then
if [ "$no_glob" == "1" ]; then
# Treat first argument as pattern without * added
pattern="$1"
elif [[ "$1" =~ \*.*\ \*\. ]]; then
# ex: "*.py *.mako"
echo "Warning: Assuming implicit --no-glob, as otherwise space would be in extension"
pattern="$1"
elif [ -f "$1" ]; then
# specific file (e.g., "README.txt")
pattern="$1"
elif [[ "$1" =~ \.* ]]; then
# note: dot file (e.g., ".emacs") requires use of --match-dot-files
if [ "$match_dot_files" == "1" ]; then
# note: special case handling since can't use *.emacs* (i.e., substring case below)
pattern="$1*"
else
# convenience so that '.py' gets treated as '*.py'
pattern="*$1"
fi
elif [[ "$1" =~ \*\. ]]; then
# extension (e.g., "*.py")
pattern="*$1"
else
# substring of file
pattern="*$1*"
fi
shift
fi
# Get master directory from second argument
master="$1"
if [ ! -d "$master" ]; then
nopattern="1"
fi
# Note: nopattern flag only used for producing output labels
## TODO3: clarify intention
if [ "$nopattern" == "0" ]; then
echo "checking files in pattern $pattern"
fi
# Optionally, change the directory
#
if [ "$base_dir" != "." ]; then
if [ "$verbose_mode" == "1" ]; then
echo in dir "$base_dir":
fi
cd "$base_dir"
fi
# Do the actual diff
log_file="${TMP:-/tmp}/_do_diff.$$.log"
count=0
# shellcheck disable=SC2086
for file in $pattern; do
## OLD:
## # Add line divider
## if [[ ("$verbose_mode" == "1") && ($count -ge 0) ]]; then
## echo "------------------------------------------------------------------------"
## fi
## let count++
# Ignore unresolved pattern file (e.g., '*')
if [ ! -e "$file" ]; then
continue
fi
# Resolve path for other file
if [[ "$file" =~ \$ ]]; then
echo "Warning: Ignoring file '$file' with $ in name"
continue
fi
# Derive base name for file, including relative directory (e.g., in case pattern specifies subdirectory)
base=$(basename "$file")
dir=$(dirname "$file")
if [ "$dir" != "." ]; then
## OLD: base="$dir/$base"
base="$(dirname $dir)/$base"
fi
other_file="$master"
#
if [ -d "$file" ]; then
if [ "$recursive" == "0" ]; then
echo "Warning: Ignoring subdirectory '$file'"
fi
continue
fi
# Use fallbacks based on whether pattern/file specifies path not resolvable
# directly via master directory.
# Note: similar to diff-rev alias
# ex: "bin/tests/README.ipynb" => "tests/README.ipynb"
if [ -d "$other_file" ]; then
if [ -e "$other_file/$file" ]; then
# Retains directory in "pattern"
# TODO: assert $nopattern
other_file="$other_file/$file"
else
# Ignores directory in "pattern"
other_file="$master/$base"
fi
fi
if [ ! -e "$other_file" ]; then
other_file="$master/$base"
fi
# note: recursive omits some verbose output to cut down on clutter
# example: the file-vs-other line is omitted; --dir sets --quiet
if [ "$quiet" == "0" ]; then
echo "$base_dir/$file vs. $other_file"
fi
if [ ! -e "$other_file" ]; then
if [ "$quiet" == "0" ]; then
echo "Warning: missing other file: '$other_file'"
fi
continue
fi
# Show the timestamps if the files differ, unless in brief mode.
# Note: Outputs 'Differences: {file1} {file2}' when files differ for convenient
# grepping (e.g., `do_diff.sh ... | grep '^Differences:'`).
files_differ=false
if [ "$brief" == "0" ]; then
"$diff_cmd" --brief $space_options $diff_options "$file" "$other_file" > "$log_file"
status=$?
## OLD: perl -pe 's/Files (.*) and (.*) differ/Differences: $1 $2/;' < "$log_file"
perl -e "\$bd='$base_dir';" -pe 's@Files (.*) and (.*) differ@Differences: $bd$d/$1 $2@;' < "$log_file"
# Show file info with time and size if there are differences
if [ "$status" != "0" ]; then
files_differ=true
## OLD:
## ls -l "$file"
## ls -l "$other_file"
fi
fi
# Add line divider and update output count
# note: avoids extraneous dashes with recursive diff
if [[ $files_differ && ($count -gt 0) ]]; then
let count++
if [ "$verbose_mode" == "1" ]; then
echo "------------------------------------------------------------------------"
count=0
fi
ls -l "$file"
ls -l "$other_file"
fi
# Perform the actual diff
"$diff_cmd" $space_options $diff_options "$file" "$other_file" > "$log_file" 2>&1
# Show relative difference percent
## TODO?: if [[ "$brief" == "0") && $files_differ ]]; then
if [ "$brief" == "0" ] && $files_differ; then
## OLD:
## num_lines1=$(wc -l < "$file")
## num_lines2=$(wc -l < "$other_file" || echo "0")
num_lines1=$(cat "$file" | wc -l)
num_lines2=$(cat "$other_file" | wc -l)
num_lines=$(( $num_lines1 + $num_lines2 ))
num_diffs=$(wc -l < "$log_file")
relative_diff=-1
if [ $num_lines -gt 0 ]; then
relative_diff=$(( $num_diffs * 100 / $num_lines ))
fi
## OLD: echo "${relative_diff}% differences for $base"
echo "${relative_diff}% differences for $base_dir/$file"
fi
# Show the actual file differences
cat "$log_file"
# Add space divider
## TEMP: for consistency with continue'd cases above omits space if verbose, becuase
## the divider provides separation.
## OLD: if [ "$quiet" == "0" ]; then
if [[ ("$quiet" == "0") && ("$verbose_mode" != "1") ]]; then
echo ""
fi
done
# Cleanup
if [[ $debug_level -lt 6 ]]; then
rm "$log_file"
fi