source: other-projects/diffcol/trunk/diffcol/Algorithm/cdiff.pl@ 21711

Last change on this file since 21711 was 21711, checked in by oranfry, 14 years ago

bringing across the diffcol project

File size: 12.0 KB
Line 
1#!/usr/bin/perl -w
2#
3# `Diff' program in Perl
4# Copyright 1998 M-J. Dominus. ([email protected])
5#
6# This program is free software; you can redistribute it and/or modify it
7# under the same terms as Perl itself.
8#
9# Altered to output in `context diff' format (but without context)
10# September 1998 Christian Murphy ([email protected])
11#
12# Command-line arguments and context lines feature added
13# September 1998 Amir D. Karger ([email protected])
14#
15# In this file, "item" usually means "line of text", and "item number" usually
16# means "line number". But theoretically the code could be used more generally
17use strict;
18
19use Algorithm::Diff qw(diff);
20use File::stat;
21use vars qw ($opt_C $opt_c $opt_u $opt_U);
22use Getopt::Std;
23
24my $usage = << "ENDUSAGE";
25Usage: $0 [{-c | -u}] [{-C | -U} lines] oldfile newfile
26 -c will do a context diff with 3 lines of context
27 -C will do a context diff with 'lines' lines of context
28 -u will do a unified diff with 3 lines of context
29 -U will do a unified diff with 'lines' lines of context
30ENDUSAGE
31
32getopts('U:C:cu') or bag("$usage");
33bag("$usage") unless @ARGV == 2;
34my ($file1, $file2) = @ARGV;
35if (defined $opt_C || defined $opt_c) {
36 $opt_c = ""; # -c on if -C given on command line
37 $opt_u = undef;
38} elsif (defined $opt_U || defined $opt_u) {
39 $opt_u = ""; # -u on if -U given on command line
40 $opt_c = undef;
41} else {
42 $opt_c = ""; # by default, do context diff, not old diff
43}
44
45my ($char1, $char2); # string to print before file names
46my $Context_Lines; # lines of context to print
47if (defined $opt_c) {
48 $Context_Lines = defined $opt_C ? $opt_C : 3;
49 $char1 = '*' x 3; $char2 = '-' x 3;
50} elsif (defined $opt_u) {
51 $Context_Lines = defined $opt_U ? $opt_U : 3;
52 $char1 = '-' x 3; $char2 = '+' x 3;
53}
54
55# After we've read up to a certain point in each file, the number of items
56# we've read from each file will differ by $FLD (could be 0)
57my $File_Length_Difference = 0;
58
59open (F1, $file1) or bag("Couldn't open $file1: $!");
60open (F2, $file2) or bag("Couldn't open $file2: $!");
61my (@f1, @f2);
62chomp(@f1 = <F1>);
63close F1;
64chomp(@f2 = <F2>);
65close F2;
66
67# diff yields lots of pieces, each of which is basically a Block object
68my $diffs = diff(\@f1, \@f2);
69exit 0 unless @$diffs;
70
71my $st = stat($file1);
72print "$char1 $file1\t", scalar localtime($st->mtime), "\n";
73$st = stat($file2);
74print "$char2 $file2\t", scalar localtime($st->mtime), "\n";
75
76my ($hunk,$oldhunk);
77# Loop over hunks. If a hunk overlaps with the last hunk, join them.
78# Otherwise, print out the old one.
79foreach my $piece (@$diffs) {
80 $hunk = new Hunk ($piece, $Context_Lines);
81 next unless $oldhunk;
82
83 if ($hunk->does_overlap($oldhunk)) {
84 $hunk->prepend_hunk($oldhunk);
85 } else {
86 $oldhunk->output_diff(\@f1, \@f2);
87 }
88
89} continue {
90 $oldhunk = $hunk;
91}
92
93# print the last hunk
94$oldhunk->output_diff(\@f1, \@f2);
95exit 1;
96# END MAIN PROGRAM
97
98sub bag {
99 my $msg = shift;
100 $msg .= "\n";
101 warn $msg;
102 exit 2;
103}
104
105# Package Hunk. A Hunk is a group of Blocks which overlap because of the
106# context surrounding each block. (So if we're not using context, every
107# hunk will contain one block.)
108{
109package Hunk;
110
111sub new {
112# Arg1 is output from &LCS::diff (which corresponds to one Block)
113# Arg2 is the number of items (lines, e.g.,) of context around each block
114#
115# This subroutine changes $File_Length_Difference
116#
117# Fields in a Hunk:
118# blocks - a list of Block objects
119# start - index in file 1 where first block of the hunk starts
120# end - index in file 1 where last block of the hunk ends
121#
122# Variables:
123# before_diff - how much longer file 2 is than file 1 due to all hunks
124# until but NOT including this one
125# after_diff - difference due to all hunks including this one
126 my ($class, $piece, $context_items) = @_;
127
128 my $block = new Block ($piece); # this modifies $FLD!
129
130 my $before_diff = $File_Length_Difference; # BEFORE this hunk
131 my $after_diff = $before_diff + $block->{"length_diff"};
132 $File_Length_Difference += $block->{"length_diff"};
133
134 # @remove_array and @insert_array hold the items to insert and remove
135 # Save the start & beginning of each array. If the array doesn't exist
136 # though (e.g., we're only adding items in this block), then figure
137 # out the line number based on the line number of the other file and
138 # the current difference in file lenghts
139 my @remove_array = $block->remove;
140 my @insert_array = $block->insert;
141 my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
142 $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
143 $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
144 $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
145 $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
146
147 $start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
148 $end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
149 $start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
150 $end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
151
152 # At first, a hunk will have just one Block in it
153 my $hunk = {
154 "start1" => $start1,
155 "start2" => $start2,
156 "end1" => $end1,
157 "end2" => $end2,
158 "blocks" => [$block],
159 };
160 bless $hunk, $class;
161
162 $hunk->flag_context($context_items);
163
164 return $hunk;
165}
166
167# Change the "start" and "end" fields to note that context should be added
168# to this hunk
169sub flag_context {
170 my ($hunk, $context_items) = @_;
171 return unless $context_items; # no context
172
173 # add context before
174 my $start1 = $hunk->{"start1"};
175 my $num_added = $context_items > $start1 ? $start1 : $context_items;
176 $hunk->{"start1"} -= $num_added;
177 $hunk->{"start2"} -= $num_added;
178
179 # context after
180 my $end1 = $hunk->{"end1"};
181 $num_added = ($end1+$context_items > $#f1) ?
182 $#f1 - $end1 :
183 $context_items;
184 $hunk->{"end1"} += $num_added;
185 $hunk->{"end2"} += $num_added;
186}
187
188# Is there an overlap between hunk arg0 and old hunk arg1?
189# Note: if end of old hunk is one less than beginning of second, they overlap
190sub does_overlap {
191 my ($hunk, $oldhunk) = @_;
192 return "" unless $oldhunk; # first time through, $oldhunk is empty
193
194 # Do I actually need to test both?
195 return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
196 $hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
197}
198
199# Prepend hunk arg1 to hunk arg0
200# Note that arg1 isn't updated! Only arg0 is.
201sub prepend_hunk {
202 my ($hunk, $oldhunk) = @_;
203
204 $hunk->{"start1"} = $oldhunk->{"start1"};
205 $hunk->{"start2"} = $oldhunk->{"start2"};
206
207 unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
208}
209
210
211# DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
212sub output_diff {
213 if (defined $main::opt_u) {&output_unified_diff(@_)}
214 elsif (defined $main::opt_c) {&output_context_diff(@_)}
215 else {die "unknown diff"}
216}
217
218sub output_unified_diff {
219 my ($hunk, $fileref1, $fileref2) = @_;
220 my @blocklist;
221
222 # Calculate item number range.
223 my $range1 = $hunk->unified_range(1);
224 my $range2 = $hunk->unified_range(2);
225 print "@@ -$range1 +$range2 @@\n";
226
227 # Outlist starts containing the hunk of file 1.
228 # Removing an item just means putting a '-' in front of it.
229 # Inserting an item requires getting it from file2 and splicing it in.
230 # We splice in $num_added items. Remove blocks use $num_added because
231 # splicing changed the length of outlist.
232 # We remove $num_removed items. Insert blocks use $num_removed because
233 # their item numbers---corresponding to positions in file *2*--- don't take
234 # removed items into account.
235 my $low = $hunk->{"start1"};
236 my $hi = $hunk->{"end1"};
237 my ($num_added, $num_removed) = (0,0);
238 my @outlist = @$fileref1[$low..$hi];
239 map {s/^/ /} @outlist; # assume it's just context
240
241 foreach my $block (@{$hunk->{"blocks"}}) {
242 foreach my $item ($block->remove) {
243 my $op = $item->{"sign"}; # -
244 my $offset = $item->{"item_no"} - $low + $num_added;
245 $outlist[$offset] =~ s/^ /$op/;
246 $num_removed++;
247 }
248 foreach my $item ($block->insert) {
249 my $op = $item->{"sign"}; # +
250 my $i = $item->{"item_no"};
251 my $offset = $i - $hunk->{"start2"} + $num_removed;
252 splice(@outlist,$offset,0,"$op$$fileref2[$i]");
253 $num_added++;
254 }
255 }
256
257 map {s/$/\n/} @outlist; # add \n's
258 print @outlist;
259
260}
261
262sub output_context_diff {
263 my ($hunk, $fileref1, $fileref2) = @_;
264 my @blocklist;
265
266 print "***************\n";
267 # Calculate item number range.
268 my $range1 = $hunk->context_range(1);
269 my $range2 = $hunk->context_range(2);
270
271 # Print out file 1 part for each block in context diff format if there are
272 # any blocks that remove items
273 print "*** $range1 ****\n";
274 my $low = $hunk->{"start1"};
275 my $hi = $hunk->{"end1"};
276 if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
277 my @outlist = @$fileref1[$low..$hi];
278 map {s/^/ /} @outlist; # assume it's just context
279 foreach my $block (@blocklist) {
280 my $op = $block->op; # - or !
281 foreach my $item ($block->remove) {
282 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
283 }
284 }
285 map {s/$/\n/} @outlist; # add \n's
286 print @outlist;
287 }
288
289 print "--- $range2 ----\n";
290 $low = $hunk->{"start2"};
291 $hi = $hunk->{"end2"};
292 if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
293 my @outlist = @$fileref2[$low..$hi];
294 map {s/^/ /} @outlist; # assume it's just context
295 foreach my $block (@blocklist) {
296 my $op = $block->op; # + or !
297 foreach my $item ($block->insert) {
298 $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
299 }
300 }
301 map {s/$/\n/} @outlist; # add \n's
302 print @outlist;
303 }
304}
305
306sub context_range {
307# Generate a range of item numbers to print. Only print 1 number if the range
308# has only one item in it. Otherwise, it's 'start,end'
309 my ($hunk, $flag) = @_;
310 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
311 $start++; $end++; # index from 1, not zero
312 my $range = ($start < $end) ? "$start,$end" : $end;
313 return $range;
314}
315
316sub unified_range {
317# Generate a range of item numbers to print for unified diff
318# Print number where block starts, followed by number of lines in the block
319# (don't print number of lines if it's 1)
320 my ($hunk, $flag) = @_;
321 my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
322 $start++; $end++; # index from 1, not zero
323 my $length = $end - $start + 1;
324 my $first = $length < 2 ? $end : $start; # strange, but correct...
325 my $range = $length== 1 ? $first : "$first,$length";
326 return $range;
327}
328} # end Package Hunk
329
330# Package Block. A block is an operation removing, adding, or changing
331# a group of items. Basically, this is just a list of changes, where each
332# change adds or deletes a single item.
333# (Change could be a separate class, but it didn't seem worth it)
334{
335package Block;
336sub new {
337# Input is a chunk from &Algorithm::LCS::diff
338# Fields in a block:
339# length_diff - how much longer file 2 is than file 1 due to this block
340# Each change has:
341# sign - '+' for insert, '-' for remove
342# item_no - number of the item in the file (e.g., line number)
343# We don't bother storing the text of the item
344#
345 my ($class,$chunk) = @_;
346 my @changes = ();
347
348# This just turns each change into a hash.
349 foreach my $item (@$chunk) {
350 my ($sign, $item_no, $text) = @$item;
351 my $hashref = {"sign" => $sign, "item_no" => $item_no};
352 push @changes, $hashref;
353 }
354
355 my $block = { "changes" => \@changes };
356 bless $block, $class;
357
358 $block->{"length_diff"} = $block->insert - $block->remove;
359 return $block;
360}
361
362
363# LOW LEVEL FUNCTIONS
364sub op {
365# what kind of block is this?
366 my $block = shift;
367 my $insert = $block->insert;
368 my $remove = $block->remove;
369
370 $remove && $insert and return '!';
371 $remove and return '-';
372 $insert and return '+';
373 warn "unknown block type";
374 return '^'; # context block
375}
376
377# Returns a list of the changes in this block that remove items
378# (or the number of removals if called in scalar context)
379sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
380
381# Returns a list of the changes in this block that insert items
382sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
383
384} # end of package Block
385
Note: See TracBrowser for help on using the repository browser.