1 | #!/usr/bin/perl -w
|
---|
2 | #
|
---|
3 | # `Diff' program in Perl
|
---|
4 | # Copyright 1998 M-J. Dominus. ([email protected])
|
---|
5 | #
|
---|
6 | # This program is free software; you can redistribute it and/or modify it
|
---|
7 | # under the same terms as Perl itself.
|
---|
8 | #
|
---|
9 | # Altered to output in `context diff' format (but without context)
|
---|
10 | # September 1998 Christian Murphy ([email protected])
|
---|
11 | #
|
---|
12 | # Command-line arguments and context lines feature added
|
---|
13 | # September 1998 Amir D. Karger ([email protected])
|
---|
14 | #
|
---|
15 | # In this file, "item" usually means "line of text", and "item number" usually
|
---|
16 | # means "line number". But theoretically the code could be used more generally
|
---|
17 | use strict;
|
---|
18 |
|
---|
19 | use Algorithm::Diff qw(diff);
|
---|
20 | use File::stat;
|
---|
21 | use vars qw ($opt_C $opt_c $opt_u $opt_U);
|
---|
22 | use Getopt::Std;
|
---|
23 |
|
---|
24 | my $usage = << "ENDUSAGE";
|
---|
25 | Usage: $0 [{-c | -u}] [{-C | -U} lines] oldfile newfile
|
---|
26 | -c will do a context diff with 3 lines of context
|
---|
27 | -C will do a context diff with 'lines' lines of context
|
---|
28 | -u will do a unified diff with 3 lines of context
|
---|
29 | -U will do a unified diff with 'lines' lines of context
|
---|
30 | ENDUSAGE
|
---|
31 |
|
---|
32 | getopts('U:C:cu') or bag("$usage");
|
---|
33 | bag("$usage") unless @ARGV == 2;
|
---|
34 | my ($file1, $file2) = @ARGV;
|
---|
35 | if (defined $opt_C || defined $opt_c) {
|
---|
36 | $opt_c = ""; # -c on if -C given on command line
|
---|
37 | $opt_u = undef;
|
---|
38 | } elsif (defined $opt_U || defined $opt_u) {
|
---|
39 | $opt_u = ""; # -u on if -U given on command line
|
---|
40 | $opt_c = undef;
|
---|
41 | } else {
|
---|
42 | $opt_c = ""; # by default, do context diff, not old diff
|
---|
43 | }
|
---|
44 |
|
---|
45 | my ($char1, $char2); # string to print before file names
|
---|
46 | my $Context_Lines; # lines of context to print
|
---|
47 | if (defined $opt_c) {
|
---|
48 | $Context_Lines = defined $opt_C ? $opt_C : 3;
|
---|
49 | $char1 = '*' x 3; $char2 = '-' x 3;
|
---|
50 | } elsif (defined $opt_u) {
|
---|
51 | $Context_Lines = defined $opt_U ? $opt_U : 3;
|
---|
52 | $char1 = '-' x 3; $char2 = '+' x 3;
|
---|
53 | }
|
---|
54 |
|
---|
55 | # After we've read up to a certain point in each file, the number of items
|
---|
56 | # we've read from each file will differ by $FLD (could be 0)
|
---|
57 | my $File_Length_Difference = 0;
|
---|
58 |
|
---|
59 | open (F1, $file1) or bag("Couldn't open $file1: $!");
|
---|
60 | open (F2, $file2) or bag("Couldn't open $file2: $!");
|
---|
61 | my (@f1, @f2);
|
---|
62 | chomp(@f1 = <F1>);
|
---|
63 | close F1;
|
---|
64 | chomp(@f2 = <F2>);
|
---|
65 | close F2;
|
---|
66 |
|
---|
67 | # diff yields lots of pieces, each of which is basically a Block object
|
---|
68 | my $diffs = diff(\@f1, \@f2);
|
---|
69 | exit 0 unless @$diffs;
|
---|
70 |
|
---|
71 | my $st = stat($file1);
|
---|
72 | print "$char1 $file1\t", scalar localtime($st->mtime), "\n";
|
---|
73 | $st = stat($file2);
|
---|
74 | print "$char2 $file2\t", scalar localtime($st->mtime), "\n";
|
---|
75 |
|
---|
76 | my ($hunk,$oldhunk);
|
---|
77 | # Loop over hunks. If a hunk overlaps with the last hunk, join them.
|
---|
78 | # Otherwise, print out the old one.
|
---|
79 | foreach my $piece (@$diffs) {
|
---|
80 | $hunk = new Hunk ($piece, $Context_Lines);
|
---|
81 | next unless $oldhunk;
|
---|
82 |
|
---|
83 | if ($hunk->does_overlap($oldhunk)) {
|
---|
84 | $hunk->prepend_hunk($oldhunk);
|
---|
85 | } else {
|
---|
86 | $oldhunk->output_diff(\@f1, \@f2);
|
---|
87 | }
|
---|
88 |
|
---|
89 | } continue {
|
---|
90 | $oldhunk = $hunk;
|
---|
91 | }
|
---|
92 |
|
---|
93 | # print the last hunk
|
---|
94 | $oldhunk->output_diff(\@f1, \@f2);
|
---|
95 | exit 1;
|
---|
96 | # END MAIN PROGRAM
|
---|
97 |
|
---|
98 | sub bag {
|
---|
99 | my $msg = shift;
|
---|
100 | $msg .= "\n";
|
---|
101 | warn $msg;
|
---|
102 | exit 2;
|
---|
103 | }
|
---|
104 |
|
---|
105 | # Package Hunk. A Hunk is a group of Blocks which overlap because of the
|
---|
106 | # context surrounding each block. (So if we're not using context, every
|
---|
107 | # hunk will contain one block.)
|
---|
108 | {
|
---|
109 | package Hunk;
|
---|
110 |
|
---|
111 | sub new {
|
---|
112 | # Arg1 is output from &LCS::diff (which corresponds to one Block)
|
---|
113 | # Arg2 is the number of items (lines, e.g.,) of context around each block
|
---|
114 | #
|
---|
115 | # This subroutine changes $File_Length_Difference
|
---|
116 | #
|
---|
117 | # Fields in a Hunk:
|
---|
118 | # blocks - a list of Block objects
|
---|
119 | # start - index in file 1 where first block of the hunk starts
|
---|
120 | # end - index in file 1 where last block of the hunk ends
|
---|
121 | #
|
---|
122 | # Variables:
|
---|
123 | # before_diff - how much longer file 2 is than file 1 due to all hunks
|
---|
124 | # until but NOT including this one
|
---|
125 | # after_diff - difference due to all hunks including this one
|
---|
126 | my ($class, $piece, $context_items) = @_;
|
---|
127 |
|
---|
128 | my $block = new Block ($piece); # this modifies $FLD!
|
---|
129 |
|
---|
130 | my $before_diff = $File_Length_Difference; # BEFORE this hunk
|
---|
131 | my $after_diff = $before_diff + $block->{"length_diff"};
|
---|
132 | $File_Length_Difference += $block->{"length_diff"};
|
---|
133 |
|
---|
134 | # @remove_array and @insert_array hold the items to insert and remove
|
---|
135 | # Save the start & beginning of each array. If the array doesn't exist
|
---|
136 | # though (e.g., we're only adding items in this block), then figure
|
---|
137 | # out the line number based on the line number of the other file and
|
---|
138 | # the current difference in file lenghts
|
---|
139 | my @remove_array = $block->remove;
|
---|
140 | my @insert_array = $block->insert;
|
---|
141 | my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
|
---|
142 | $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
|
---|
143 | $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
|
---|
144 | $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
|
---|
145 | $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
|
---|
146 |
|
---|
147 | $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
|
---|
148 | $end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
|
---|
149 | $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
|
---|
150 | $end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
|
---|
151 |
|
---|
152 | # At first, a hunk will have just one Block in it
|
---|
153 | my $hunk = {
|
---|
154 | "start1" => $start1,
|
---|
155 | "start2" => $start2,
|
---|
156 | "end1" => $end1,
|
---|
157 | "end2" => $end2,
|
---|
158 | "blocks" => [$block],
|
---|
159 | };
|
---|
160 | bless $hunk, $class;
|
---|
161 |
|
---|
162 | $hunk->flag_context($context_items);
|
---|
163 |
|
---|
164 | return $hunk;
|
---|
165 | }
|
---|
166 |
|
---|
167 | # Change the "start" and "end" fields to note that context should be added
|
---|
168 | # to this hunk
|
---|
169 | sub flag_context {
|
---|
170 | my ($hunk, $context_items) = @_;
|
---|
171 | return unless $context_items; # no context
|
---|
172 |
|
---|
173 | # add context before
|
---|
174 | my $start1 = $hunk->{"start1"};
|
---|
175 | my $num_added = $context_items > $start1 ? $start1 : $context_items;
|
---|
176 | $hunk->{"start1"} -= $num_added;
|
---|
177 | $hunk->{"start2"} -= $num_added;
|
---|
178 |
|
---|
179 | # context after
|
---|
180 | my $end1 = $hunk->{"end1"};
|
---|
181 | $num_added = ($end1+$context_items > $#f1) ?
|
---|
182 | $#f1 - $end1 :
|
---|
183 | $context_items;
|
---|
184 | $hunk->{"end1"} += $num_added;
|
---|
185 | $hunk->{"end2"} += $num_added;
|
---|
186 | }
|
---|
187 |
|
---|
188 | # Is there an overlap between hunk arg0 and old hunk arg1?
|
---|
189 | # Note: if end of old hunk is one less than beginning of second, they overlap
|
---|
190 | sub does_overlap {
|
---|
191 | my ($hunk, $oldhunk) = @_;
|
---|
192 | return "" unless $oldhunk; # first time through, $oldhunk is empty
|
---|
193 |
|
---|
194 | # Do I actually need to test both?
|
---|
195 | return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
|
---|
196 | $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
|
---|
197 | }
|
---|
198 |
|
---|
199 | # Prepend hunk arg1 to hunk arg0
|
---|
200 | # Note that arg1 isn't updated! Only arg0 is.
|
---|
201 | sub prepend_hunk {
|
---|
202 | my ($hunk, $oldhunk) = @_;
|
---|
203 |
|
---|
204 | $hunk->{"start1"} = $oldhunk->{"start1"};
|
---|
205 | $hunk->{"start2"} = $oldhunk->{"start2"};
|
---|
206 |
|
---|
207 | unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
|
---|
208 | }
|
---|
209 |
|
---|
210 |
|
---|
211 | # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
|
---|
212 | sub output_diff {
|
---|
213 | if (defined $main::opt_u) {&output_unified_diff(@_)}
|
---|
214 | elsif (defined $main::opt_c) {&output_context_diff(@_)}
|
---|
215 | else {die "unknown diff"}
|
---|
216 | }
|
---|
217 |
|
---|
218 | sub output_unified_diff {
|
---|
219 | my ($hunk, $fileref1, $fileref2) = @_;
|
---|
220 | my @blocklist;
|
---|
221 |
|
---|
222 | # Calculate item number range.
|
---|
223 | my $range1 = $hunk->unified_range(1);
|
---|
224 | my $range2 = $hunk->unified_range(2);
|
---|
225 | print "@@ -$range1 +$range2 @@\n";
|
---|
226 |
|
---|
227 | # Outlist starts containing the hunk of file 1.
|
---|
228 | # Removing an item just means putting a '-' in front of it.
|
---|
229 | # Inserting an item requires getting it from file2 and splicing it in.
|
---|
230 | # We splice in $num_added items. Remove blocks use $num_added because
|
---|
231 | # splicing changed the length of outlist.
|
---|
232 | # We remove $num_removed items. Insert blocks use $num_removed because
|
---|
233 | # their item numbers---corresponding to positions in file *2*--- don't take
|
---|
234 | # removed items into account.
|
---|
235 | my $low = $hunk->{"start1"};
|
---|
236 | my $hi = $hunk->{"end1"};
|
---|
237 | my ($num_added, $num_removed) = (0,0);
|
---|
238 | my @outlist = @$fileref1[$low..$hi];
|
---|
239 | map {s/^/ /} @outlist; # assume it's just context
|
---|
240 |
|
---|
241 | foreach my $block (@{$hunk->{"blocks"}}) {
|
---|
242 | foreach my $item ($block->remove) {
|
---|
243 | my $op = $item->{"sign"}; # -
|
---|
244 | my $offset = $item->{"item_no"} - $low + $num_added;
|
---|
245 | $outlist[$offset] =~ s/^ /$op/;
|
---|
246 | $num_removed++;
|
---|
247 | }
|
---|
248 | foreach my $item ($block->insert) {
|
---|
249 | my $op = $item->{"sign"}; # +
|
---|
250 | my $i = $item->{"item_no"};
|
---|
251 | my $offset = $i - $hunk->{"start2"} + $num_removed;
|
---|
252 | splice(@outlist,$offset,0,"$op$$fileref2[$i]");
|
---|
253 | $num_added++;
|
---|
254 | }
|
---|
255 | }
|
---|
256 |
|
---|
257 | map {s/$/\n/} @outlist; # add \n's
|
---|
258 | print @outlist;
|
---|
259 |
|
---|
260 | }
|
---|
261 |
|
---|
262 | sub output_context_diff {
|
---|
263 | my ($hunk, $fileref1, $fileref2) = @_;
|
---|
264 | my @blocklist;
|
---|
265 |
|
---|
266 | print "***************\n";
|
---|
267 | # Calculate item number range.
|
---|
268 | my $range1 = $hunk->context_range(1);
|
---|
269 | my $range2 = $hunk->context_range(2);
|
---|
270 |
|
---|
271 | # Print out file 1 part for each block in context diff format if there are
|
---|
272 | # any blocks that remove items
|
---|
273 | print "*** $range1 ****\n";
|
---|
274 | my $low = $hunk->{"start1"};
|
---|
275 | my $hi = $hunk->{"end1"};
|
---|
276 | if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
|
---|
277 | my @outlist = @$fileref1[$low..$hi];
|
---|
278 | map {s/^/ /} @outlist; # assume it's just context
|
---|
279 | foreach my $block (@blocklist) {
|
---|
280 | my $op = $block->op; # - or !
|
---|
281 | foreach my $item ($block->remove) {
|
---|
282 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
|
---|
283 | }
|
---|
284 | }
|
---|
285 | map {s/$/\n/} @outlist; # add \n's
|
---|
286 | print @outlist;
|
---|
287 | }
|
---|
288 |
|
---|
289 | print "--- $range2 ----\n";
|
---|
290 | $low = $hunk->{"start2"};
|
---|
291 | $hi = $hunk->{"end2"};
|
---|
292 | if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
|
---|
293 | my @outlist = @$fileref2[$low..$hi];
|
---|
294 | map {s/^/ /} @outlist; # assume it's just context
|
---|
295 | foreach my $block (@blocklist) {
|
---|
296 | my $op = $block->op; # + or !
|
---|
297 | foreach my $item ($block->insert) {
|
---|
298 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
|
---|
299 | }
|
---|
300 | }
|
---|
301 | map {s/$/\n/} @outlist; # add \n's
|
---|
302 | print @outlist;
|
---|
303 | }
|
---|
304 | }
|
---|
305 |
|
---|
306 | sub context_range {
|
---|
307 | # Generate a range of item numbers to print. Only print 1 number if the range
|
---|
308 | # has only one item in it. Otherwise, it's 'start,end'
|
---|
309 | my ($hunk, $flag) = @_;
|
---|
310 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
|
---|
311 | $start++; $end++; # index from 1, not zero
|
---|
312 | my $range = ($start < $end) ? "$start,$end" : $end;
|
---|
313 | return $range;
|
---|
314 | }
|
---|
315 |
|
---|
316 | sub unified_range {
|
---|
317 | # Generate a range of item numbers to print for unified diff
|
---|
318 | # Print number where block starts, followed by number of lines in the block
|
---|
319 | # (don't print number of lines if it's 1)
|
---|
320 | my ($hunk, $flag) = @_;
|
---|
321 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
|
---|
322 | $start++; $end++; # index from 1, not zero
|
---|
323 | my $length = $end - $start + 1;
|
---|
324 | my $first = $length < 2 ? $end : $start; # strange, but correct...
|
---|
325 | my $range = $length== 1 ? $first : "$first,$length";
|
---|
326 | return $range;
|
---|
327 | }
|
---|
328 | } # end Package Hunk
|
---|
329 |
|
---|
330 | # Package Block. A block is an operation removing, adding, or changing
|
---|
331 | # a group of items. Basically, this is just a list of changes, where each
|
---|
332 | # change adds or deletes a single item.
|
---|
333 | # (Change could be a separate class, but it didn't seem worth it)
|
---|
334 | {
|
---|
335 | package Block;
|
---|
336 | sub new {
|
---|
337 | # Input is a chunk from &Algorithm::LCS::diff
|
---|
338 | # Fields in a block:
|
---|
339 | # length_diff - how much longer file 2 is than file 1 due to this block
|
---|
340 | # Each change has:
|
---|
341 | # sign - '+' for insert, '-' for remove
|
---|
342 | # item_no - number of the item in the file (e.g., line number)
|
---|
343 | # We don't bother storing the text of the item
|
---|
344 | #
|
---|
345 | my ($class,$chunk) = @_;
|
---|
346 | my @changes = ();
|
---|
347 |
|
---|
348 | # This just turns each change into a hash.
|
---|
349 | foreach my $item (@$chunk) {
|
---|
350 | my ($sign, $item_no, $text) = @$item;
|
---|
351 | my $hashref = {"sign" => $sign, "item_no" => $item_no};
|
---|
352 | push @changes, $hashref;
|
---|
353 | }
|
---|
354 |
|
---|
355 | my $block = { "changes" => \@changes };
|
---|
356 | bless $block, $class;
|
---|
357 |
|
---|
358 | $block->{"length_diff"} = $block->insert - $block->remove;
|
---|
359 | return $block;
|
---|
360 | }
|
---|
361 |
|
---|
362 |
|
---|
363 | # LOW LEVEL FUNCTIONS
|
---|
364 | sub op {
|
---|
365 | # what kind of block is this?
|
---|
366 | my $block = shift;
|
---|
367 | my $insert = $block->insert;
|
---|
368 | my $remove = $block->remove;
|
---|
369 |
|
---|
370 | $remove && $insert and return '!';
|
---|
371 | $remove and return '-';
|
---|
372 | $insert and return '+';
|
---|
373 | warn "unknown block type";
|
---|
374 | return '^'; # context block
|
---|
375 | }
|
---|
376 |
|
---|
377 | # Returns a list of the changes in this block that remove items
|
---|
378 | # (or the number of removals if called in scalar context)
|
---|
379 | sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
|
---|
380 |
|
---|
381 | # Returns a list of the changes in this block that insert items
|
---|
382 | sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
|
---|
383 |
|
---|
384 | } # end of package Block
|
---|
385 |
|
---|