Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

diffnew.pl@ 27767

Last change on this file since 27767 was 21711, checked in by oranfry, 14 years ago
bringing across the diffcol project
File size: 16.6 KB

Line
1	#!/usr/bin/perl
2	#
3	# `Diff' program in Perl
4	# Copyright 1998 M-J. Dominus. ([email protected])
5	#
6	# This program is free software; you can redistribute it and/or modify it
7	# under the same terms as Perl itself.
8	#
9	# Altered to output in `context diff' format (but without context)
10	# September 1998 Christian Murphy ([email protected])
11	#
12	# Context lines feature added
13	# Unified, "Old" (Standard UNIX), Ed diff added September 1998
14	# Reverse_Ed (-f option) added March 1999
15	# Amir D. Karger ([email protected])
16	#
17	# Modular functions integrated into program
18	# February 1999 M-J. Dominus ([email protected])
19	#
20	# In this file, "item" usually means "line of text", and "item number" usually
21	# means "line number". But theoretically the code could be used more generally
22	use strict;
23	use Algorithm::Diff qw(diff);
24
25	# GLOBAL VARIABLES ####
26	# After we've read up to a certain point in each file, the number of items
27	# we've read from each file will differ by $FLD (could be 0)
28	my $File_Length_Difference = 0;
29
30	#ed diff outputs hunks backwards, so we need to save hunks when doing ed diff
31	my @Ed_Hunks = ();
32	########################
33
34	my $usage = << "ENDUSAGE";
35	Usage: $0 [{-c \| -C lines -e \| -f \| -u \| -U lines}] oldfile newfile
36	-c do a context diff with 3 lines of context
37	-C do a context diff with 'lines' lines of context (implies -c)
38	-e create a script for the ed editor to change oldfile to newfile
39	-f like -e but in reverse order
40	-u do a unified diff with 3 lines of context
41	-U do a unified diff with 'lines' lines of context (implies -u)
42	-q report only whether or not the files differ
43
44	By default it will do an "old-style" diff, with output like UNIX diff
45	ENDUSAGE
46
47	my $Context_Lines = 0; # lines of context to print. 0 for old-style diff
48	my $Diff_Type = "OLD"; # by default, do standard UNIX diff
49	my ($opt_c, $opt_u, $opt_e, $opt_f, $opt_q);
50	while ($ARGV[0] =~ /^-/) {
51	my $opt = shift;
52	last if $opt eq '--';
53	if ($opt =~ /^-C(.*)/) {
54	$Context_Lines = $1 \|\| shift;
55	$opt_c = 1;
56	$Diff_Type = "CONTEXT";
57	} elsif ($opt =~ /^-c$/) {
58	$Context_Lines = 3;
59	$opt_c = 1;
60	$Diff_Type = "CONTEXT";
61	} elsif ($opt =~ /^-e$/) {
62	$opt_e = 1;
63	$Diff_Type = "ED";
64	} elsif ($opt =~ /^-f$/) {
65	$opt_f = 1;
66	$Diff_Type = "REVERSE_ED";
67	} elsif ($opt =~ /^-U(.*)$/) {
68	$Context_Lines = $1 \|\| shift;
69	$opt_u = 1;
70	$Diff_Type = "UNIFIED";
71	} elsif ($opt =~ /^-u$/) {
72	$Context_Lines = 3;
73	$opt_u = 1;
74	$Diff_Type = "UNIFIED";
75	} elsif ($opt =~ /^-q$/) {
76	$Context_Lines = 0;
77	$opt_q = 1;
78	$opt_e = 1;
79	$Diff_Type = "ED";
80	} else {
81	$opt =~ s/^-//;
82	bag("Illegal option -- $opt");
83	}
84	}
85
86	if ($opt_q and grep($_,($opt_c, $opt_f, $opt_u)) > 1) {
87	bag("Combining -q with other options is nonsensical");
88	}
89
90	if (grep($_,($opt_c, $opt_e, $opt_f, $opt_u)) > 1) {
91	bag("Only one of -c, -u, -f, -e are allowed");
92	}
93
94	bag($usage) unless @ARGV == 2;
95
96	######## DO THE DIFF!
97	my ($file1, $file2) = @ARGV;
98
99	my ($char1, $char2); # string to print before file names
100	if ($Diff_Type eq "CONTEXT") {
101	$char1 = '*' x 3; $char2 = '-' x 3;
102	} elsif ($Diff_Type eq "UNIFIED") {
103	$char1 = '-' x 3; $char2 = '+' x 3;
104	}
105
106	open (F1, $file1) or bag("Couldn't open $file1: $!");
107	open (F2, $file2) or bag("Couldn't open $file2: $!");
108	my (@f1, @f2);
109	chomp(@f1 = <F1>);
110	close F1;
111	chomp(@f2 = <F2>);
112	close F2;
113
114	# diff yields lots of pieces, each of which is basically a Block object
115	my $diffs = diff(\@f1, \@f2);
116	exit 0 unless @$diffs;
117
118	if ($opt_q and @$diffs) {
119	print "Files $file1 and $file2 differ\n";
120	exit 1;
121	}
122
123	if ($Diff_Type =~ /UNIFIED\|CONTEXT/) {
124	my @st = stat($file1);
125	my $MTIME = 9;
126	print "$char1 $file1\t", scalar localtime($st[$MTIME]), "\n";
127	@st = stat($file2);
128	print "$char2 $file2\t", scalar localtime($st[$MTIME]), "\n";
129	}
130
131	my ($hunk,$oldhunk);
132	# Loop over hunks. If a hunk overlaps with the last hunk, join them.
133	# Otherwise, print out the old one.
134	foreach my $piece (@$diffs) {
135	$hunk = new Hunk ($piece, $Context_Lines);
136	next unless $oldhunk; # first time through
137
138	# Don't need to check for overlap if blocks have no context lines
139	if ($Context_Lines && $hunk->does_overlap($oldhunk)) {
140	$hunk->prepend_hunk($oldhunk);
141	} else {
142	$oldhunk->output_diff(\@f1, \@f2, $Diff_Type);
143	}
144
145	} continue {
146	$oldhunk = $hunk;
147	}
148
149	# print the last hunk
150	$oldhunk->output_diff(\@f1, \@f2, $Diff_Type);
151
152	# Print hunks backwards if we're doing an ed diff
153	map {$_->output_ed_diff(\@f1, \@f2, $Diff_Type)} @Ed_Hunks if @Ed_Hunks;
154
155	exit 1;
156	# END MAIN PROGRAM
157
158	sub bag {
159	my $msg = shift;
160	$msg .= "\n";
161	warn $msg;
162	exit 2;
163	}
164
165	########
166	# Package Hunk. A Hunk is a group of Blocks which overlap because of the
167	# context surrounding each block. (So if we're not using context, every
168	# hunk will contain one block.)
169	{
170	package Hunk;
171
172	sub new {
173	# Arg1 is output from &LCS::diff (which corresponds to one Block)
174	# Arg2 is the number of items (lines, e.g.,) of context around each block
175	#
176	# This subroutine changes $File_Length_Difference
177	#
178	# Fields in a Hunk:
179	# blocks - a list of Block objects
180	# start - index in file 1 where first block of the hunk starts
181	# end - index in file 1 where last block of the hunk ends
182	#
183	# Variables:
184	# before_diff - how much longer file 2 is than file 1 due to all hunks
185	# until but NOT including this one
186	# after_diff - difference due to all hunks including this one
187	my ($class, $piece, $context_items) = @_;
188
189	my $block = new Block ($piece); # this modifies $FLD!
190
191	my $before_diff = $File_Length_Difference; # BEFORE this hunk
192	my $after_diff = $before_diff + $block->{"length_diff"};
193	$File_Length_Difference += $block->{"length_diff"};
194
195	# @remove_array and @insert_array hold the items to insert and remove
196	# Save the start & beginning of each array. If the array doesn't exist
197	# though (e.g., we're only adding items in this block), then figure
198	# out the line number based on the line number of the other file and
199	# the current difference in file lenghts
200	my @remove_array = $block->remove;
201	my @insert_array = $block->insert;
202	my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
203	$a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
204	$a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
205	$b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
206	$b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
207
208	$start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
209	$end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
210	$start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
211	$end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
212
213	# At first, a hunk will have just one Block in it
214	my $hunk = {
215	"start1" => $start1,
216	"start2" => $start2,
217	"end1" => $end1,
218	"end2" => $end2,
219	"blocks" => [$block],
220	};
221	bless $hunk, $class;
222
223	$hunk->flag_context($context_items);
224
225	return $hunk;
226	}
227
228	# Change the "start" and "end" fields to note that context should be added
229	# to this hunk
230	sub flag_context {
231	my ($hunk, $context_items) = @_;
232	return unless $context_items; # no context
233
234	# add context before
235	my $start1 = $hunk->{"start1"};
236	my $num_added = $context_items > $start1 ? $start1 : $context_items;
237	$hunk->{"start1"} -= $num_added;
238	$hunk->{"start2"} -= $num_added;
239
240	# context after
241	my $end1 = $hunk->{"end1"};
242	$num_added = ($end1+$context_items > $#f1) ?
243	$#f1 - $end1 :
244	$context_items;
245	$hunk->{"end1"} += $num_added;
246	$hunk->{"end2"} += $num_added;
247	}
248
249	# Is there an overlap between hunk arg0 and old hunk arg1?
250	# Note: if end of old hunk is one less than beginning of second, they overlap
251	sub does_overlap {
252	my ($hunk, $oldhunk) = @_;
253	return "" unless $oldhunk; # first time through, $oldhunk is empty
254
255	# Do I actually need to test both?
256	return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 \|\|
257	$hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
258	}
259
260	# Prepend hunk arg1 to hunk arg0
261	# Note that arg1 isn't updated! Only arg0 is.
262	sub prepend_hunk {
263	my ($hunk, $oldhunk) = @_;
264
265	$hunk->{"start1"} = $oldhunk->{"start1"};
266	$hunk->{"start2"} = $oldhunk->{"start2"};
267
268	unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
269	}
270
271
272	# DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
273	sub output_diff {
274	# First arg is the current hunk of course
275	# Next args are refs to the files
276	# last arg is type of diff
277	my $diff_type = $_[-1];
278	my %funchash = ("OLD" => \&output_old_diff,
279	"CONTEXT" => \&output_context_diff,
280	"ED" => \&store_ed_diff,
281	"REVERSE_ED" => \&output_ed_diff,
282	"UNIFIED" => \&output_unified_diff,
283	);
284	if (exists $funchash{$diff_type}) {
285	&{$funchash{$diff_type}}(@_); # pass in all args
286	} else {die "unknown diff type $diff_type"}
287	}
288
289	sub output_old_diff {
290	# Note that an old diff can't have any context. Therefore, we know that
291	# there's only one block in the hunk.
292	my ($hunk, $fileref1, $fileref2) = @_;
293	my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c');
294
295	my @blocklist = @{$hunk->{"blocks"}};
296	warn ("Expecting one block in an old diff hunk!") if scalar @blocklist != 1;
297	my $block = $blocklist[0];
298	my $op = $block->op; # +, -, or !
299
300	# Calculate item number range.
301	# old diff range is just like a context diff range, except the ranges
302	# are on one line with the action between them.
303	my $range1 = $hunk->context_range(1);
304	my $range2 = $hunk->context_range(2);
305	my $action = $op_hash{$op} \|\| warn "unknown op $op";
306	print "$range1$action$range2\n";
307
308	# If removing anything, just print out all the remove lines in the hunk
309	# which is just all the remove lines in the block
310	if ($block->remove) {
311	my @outlist = @$fileref1[$hunk->{"start1"}..$hunk->{"end1"}];
312	map {$_ = "< $_\n"} @outlist; # all lines will be '< text\n'
313	print @outlist;
314	}
315
316	print "---\n" if $op eq '!'; # only if inserting and removing
317	if ($block->insert) {
318	my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}];
319	map {$_ = "> $_\n"} @outlist; # all lines will be '> text\n'
320	print @outlist;
321	}
322	}
323
324	sub output_unified_diff {
325	my ($hunk, $fileref1, $fileref2) = @_;
326	my @blocklist;
327
328	# Calculate item number range.
329	my $range1 = $hunk->unified_range(1);
330	my $range2 = $hunk->unified_range(2);
331	print "@@ -$range1 +$range2 @@\n";
332
333	# Outlist starts containing the hunk of file 1.
334	# Removing an item just means putting a '-' in front of it.
335	# Inserting an item requires getting it from file2 and splicing it in.
336	# We splice in $num_added items. Remove blocks use $num_added because
337	# splicing changed the length of outlist.
338	# We remove $num_removed items. Insert blocks use $num_removed because
339	# their item numbers---corresponding to positions in file 2--- don't take
340	# removed items into account.
341	my $low = $hunk->{"start1"};
342	my $hi = $hunk->{"end1"};
343	my ($num_added, $num_removed) = (0,0);
344	my @outlist = @$fileref1[$low..$hi];
345	map {s/^/ /} @outlist; # assume it's just context
346
347	foreach my $block (@{$hunk->{"blocks"}}) {
348	foreach my $item ($block->remove) {
349	my $op = $item->{"sign"}; # -
350	my $offset = $item->{"item_no"} - $low + $num_added;
351	$outlist[$offset] =~ s/^ /$op/;
352	$num_removed++;
353	}
354	foreach my $item ($block->insert) {
355	my $op = $item->{"sign"}; # +
356	my $i = $item->{"item_no"};
357	my $offset = $i - $hunk->{"start2"} + $num_removed;
358	splice(@outlist,$offset,0,"$op$$fileref2[$i]");
359	$num_added++;
360	}
361	}
362
363	map {s/$/\n/} @outlist; # add \n's
364	print @outlist;
365
366	}
367
368	sub output_context_diff {
369	my ($hunk, $fileref1, $fileref2) = @_;
370	my @blocklist;
371
372	print "***************\n";
373	# Calculate item number range.
374	my $range1 = $hunk->context_range(1);
375	my $range2 = $hunk->context_range(2);
376
377	# Print out file 1 part for each block in context diff format if there are
378	# any blocks that remove items
379	print "* $range1 **\n";
380	my $low = $hunk->{"start1"};
381	my $hi = $hunk->{"end1"};
382	if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) {
383	my @outlist = @$fileref1[$low..$hi];
384	map {s/^/ /} @outlist; # assume it's just context
385	foreach my $block (@blocklist) {
386	my $op = $block->op; # - or !
387	foreach my $item ($block->remove) {
388	$outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
389	}
390	}
391	map {s/$/\n/} @outlist; # add \n's
392	print @outlist;
393	}
394
395	print "--- $range2 ----\n";
396	$low = $hunk->{"start2"};
397	$hi = $hunk->{"end2"};
398	if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) {
399	my @outlist = @$fileref2[$low..$hi];
400	map {s/^/ /} @outlist; # assume it's just context
401	foreach my $block (@blocklist) {
402	my $op = $block->op; # + or !
403	foreach my $item ($block->insert) {
404	$outlist[$item->{"item_no"} - $low] =~ s/^ /$op/;
405	}
406	}
407	map {s/$/\n/} @outlist; # add \n's
408	print @outlist;
409	}
410	}
411
412	sub store_ed_diff {
413	# ed diff prints out diffs backwards. So save them while we're generating
414	# them, then print them out at the end
415	my $hunk = shift;
416	unshift @Ed_Hunks, $hunk;
417	}
418
419	sub output_ed_diff {
420	# This sub is used for ed ('diff -e') OR reverse_ed ('diff -f').
421	# last arg is type of diff
422	my $diff_type = $_[-1];
423	my ($hunk, $fileref1, $fileref2) = @_;
424	my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c');
425
426	# Can't be any context for this kind of diff, so each hunk has one block
427	my @blocklist = @{$hunk->{"blocks"}};
428	warn ("Expecting one block in an ed diff hunk!") if scalar @blocklist != 1;
429	my $block = $blocklist[0];
430	my $op = $block->op; # +, -, or !
431
432	# Calculate item number range.
433	# old diff range is just like a context diff range, except the ranges
434	# are on one line with the action between them.
435	my $range1 = $hunk->context_range(1);
436	$range1 =~ s/,/ / if $diff_type eq "REVERSE_ED";
437	my $action = $op_hash{$op} \|\| warn "unknown op $op";
438	print ($diff_type eq "ED" ? "$range1$action\n" : "$action$range1\n");
439
440	if ($block->insert) {
441	my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}];
442	map {s/$/\n/} @outlist; # add \n's
443	print @outlist;
444	print ".\n"; # end of ed 'c' or 'a' command
445	}
446	}
447
448	sub context_range {
449	# Generate a range of item numbers to print. Only print 1 number if the range
450	# has only one item in it. Otherwise, it's 'start,end'
451	# Flag is the number of the file (1 or 2)
452	my ($hunk, $flag) = @_;
453	my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
454	$start++; $end++; # index from 1, not zero
455	my $range = ($start < $end) ? "$start,$end" : $end;
456	return $range;
457	}
458
459	sub unified_range {
460	# Generate a range of item numbers to print for unified diff
461	# Print number where block starts, followed by number of lines in the block
462	# (don't print number of lines if it's 1)
463	my ($hunk, $flag) = @_;
464	my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
465	$start++; $end++; # index from 1, not zero
466	my $length = $end - $start + 1;
467	my $first = $length < 2 ? $end : $start; # strange, but correct...
468	my $range = $length== 1 ? $first : "$first,$length";
469	return $range;
470	}
471	} # end Package Hunk
472
473	########
474	# Package Block. A block is an operation removing, adding, or changing
475	# a group of items. Basically, this is just a list of changes, where each
476	# change adds or deletes a single item.
477	# (Change could be a separate class, but it didn't seem worth it)
478	{
479	package Block;
480	sub new {
481	# Input is a chunk from &Algorithm::LCS::diff
482	# Fields in a block:
483	# length_diff - how much longer file 2 is than file 1 due to this block
484	# Each change has:
485	# sign - '+' for insert, '-' for remove
486	# item_no - number of the item in the file (e.g., line number)
487	# We don't bother storing the text of the item
488	#
489	my ($class,$chunk) = @_;
490	my @changes = ();
491
492	# This just turns each change into a hash.
493	foreach my $item (@$chunk) {
494	my ($sign, $item_no, $text) = @$item;
495	my $hashref = {"sign" => $sign, "item_no" => $item_no};
496	push @changes, $hashref;
497	}
498
499	my $block = { "changes" => \@changes };
500	bless $block, $class;
501
502	$block->{"length_diff"} = $block->insert - $block->remove;
503	return $block;
504	}
505
506
507	# LOW LEVEL FUNCTIONS
508	sub op {
509	# what kind of block is this?
510	my $block = shift;
511	my $insert = $block->insert;
512	my $remove = $block->remove;
513
514	$remove && $insert and return '!';
515	$remove and return '-';
516	$insert and return '+';
517	warn "unknown block type";
518	return '^'; # context block
519	}
520
521	# Returns a list of the changes in this block that remove items
522	# (or the number of removals if called in scalar context)
523	sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
524
525	# Returns a list of the changes in this block that insert items
526	sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
527
528	} # end of package Block

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/nightly-tasks/diffcol/trunk/diffcol/Algorithm/diffnew.pl@ 27767

Download in other formats: