1 | package Text::Diff;
|
---|
2 |
|
---|
3 | $VERSION = 0.35;
|
---|
4 |
|
---|
5 | =head1 NAME
|
---|
6 |
|
---|
7 | Text::Diff - Perform diffs on files and record sets
|
---|
8 |
|
---|
9 | =head1 SYNOPSIS
|
---|
10 |
|
---|
11 | use Text::Diff;
|
---|
12 |
|
---|
13 | ## Mix and match filenames, strings, file handles, producer subs,
|
---|
14 | ## or arrays of records; returns diff in a string.
|
---|
15 | ## WARNING: can return B<large> diffs for large files.
|
---|
16 | my $diff = diff "file1.txt", "file2.txt", { STYLE => "Context" };
|
---|
17 | my $diff = diff \$string1, \$string2, \%options;
|
---|
18 | my $diff = diff \*FH1, \*FH2;
|
---|
19 | my $diff = diff \&reader1, \&reader2;
|
---|
20 | my $diff = diff \@records1, \@records2;
|
---|
21 |
|
---|
22 | ## May also mix input types:
|
---|
23 | my $diff = diff \@records1, "file_B.txt";
|
---|
24 |
|
---|
25 | =head1 DESCRIPTION
|
---|
26 |
|
---|
27 | C<diff()> provides a basic set of services akin to the GNU C<diff> utility. It
|
---|
28 | is not anywhere near as feature complete as GNU C<diff>, but it is better
|
---|
29 | integrated with Perl and available on all platforms. It is often faster than
|
---|
30 | shelling out to a system's C<diff> executable for small files, and generally
|
---|
31 | slower on larger files.
|
---|
32 |
|
---|
33 | Relies on L<Algorithm::Diff> for, well, the algorithm. This may not produce
|
---|
34 | the same exact diff as a system's local C<diff> executable, but it will be a
|
---|
35 | valid diff and comprehensible by C<patch>. We haven't seen any differences
|
---|
36 | between Algorithm::Diff's logic and GNU diff's, but we have not examined them
|
---|
37 | to make sure they are indeed identical.
|
---|
38 |
|
---|
39 | B<Note>: If you don't want to import the C<diff> function, do one of the
|
---|
40 | following:
|
---|
41 |
|
---|
42 | use Text::Diff ();
|
---|
43 |
|
---|
44 | require Text::Diff;
|
---|
45 |
|
---|
46 | That's a pretty rare occurence, so C<diff()> is exported by default.
|
---|
47 |
|
---|
48 | =cut
|
---|
49 |
|
---|
50 | use Exporter;
|
---|
51 | @ISA = qw( Exporter );
|
---|
52 | @EXPORT = qw( diff );
|
---|
53 |
|
---|
54 | use strict;
|
---|
55 | use Carp;
|
---|
56 | use Algorithm::Diff qw( traverse_sequences );
|
---|
57 |
|
---|
58 | ## Hunks are made of ops. An op is the starting index for each
|
---|
59 | ## sequence and the opcode:
|
---|
60 | use constant A => 0; # Array index before match/discard
|
---|
61 | use constant B => 1;
|
---|
62 | use constant OPCODE => 2; # "-", " ", "+"
|
---|
63 | use constant FLAG => 3; # What to display if not OPCODE "!"
|
---|
64 |
|
---|
65 |
|
---|
66 | =head1 OPTIONS
|
---|
67 |
|
---|
68 | diff() takes two parameters from which to draw input and a set of
|
---|
69 | options to control it's output. The options are:
|
---|
70 |
|
---|
71 | =over
|
---|
72 |
|
---|
73 | =item FILENAME_A, MTIME_A, FILENAME_B, MTIME_B
|
---|
74 |
|
---|
75 | The name of the file and the modification time "files"
|
---|
76 |
|
---|
77 | These are filled in automatically for each file when diff() is passed a
|
---|
78 | filename, unless a defined value is passed in.
|
---|
79 |
|
---|
80 | If a filename is not passed in and FILENAME_A and FILENAME_B are not provided
|
---|
81 | or C<undef>, the header will not be printed.
|
---|
82 |
|
---|
83 | Unused on C<OldStyle> diffs.
|
---|
84 |
|
---|
85 | =item OFFSET_A, OFFSET_B
|
---|
86 |
|
---|
87 | The index of the first line / element. These default to 1 for all
|
---|
88 | parameter types except ARRAY references, for which the default is 0. This
|
---|
89 | is because ARRAY references are presumed to be data structures, while the
|
---|
90 | others are line oriented text.
|
---|
91 |
|
---|
92 | =item STYLE
|
---|
93 |
|
---|
94 | "Unified", "Context", "OldStyle", or an object or class reference for a class
|
---|
95 | providing C<file_header()>, C<hunk_header()>, C<hunk()>, C<hunk_footer()> and
|
---|
96 | C<file_footer()> methods. The two footer() methods are provided for
|
---|
97 | overloading only; none of the formats provide them.
|
---|
98 |
|
---|
99 | Defaults to "Unified" (unlike standard C<diff>, but Unified is what's most
|
---|
100 | often used in submitting patches and is the most human readable of the three.
|
---|
101 |
|
---|
102 | If the package indicated by the STYLE has no hunk() method, c<diff()> will
|
---|
103 | load it automatically (lazy loading). Since all such packages should inherit
|
---|
104 | from Text::Diff::Base, this should be marvy.
|
---|
105 |
|
---|
106 | Styles may be specified as class names (C<STYLE => "Foo"), in which case they
|
---|
107 | will be C<new()>ed with no parameters, or as objects (C<STYLE => Foo->new>).
|
---|
108 |
|
---|
109 | =item CONTEXT
|
---|
110 |
|
---|
111 | How many lines before and after each diff to display. Ignored on old-style
|
---|
112 | diffs. Defaults to 3.
|
---|
113 |
|
---|
114 | =item OUTPUT
|
---|
115 |
|
---|
116 | Examples and their equivalent subroutines:
|
---|
117 |
|
---|
118 | OUTPUT => \*FOOHANDLE, # like: sub { print FOOHANDLE shift() }
|
---|
119 | OUTPUT => \$output, # like: sub { $output .= shift }
|
---|
120 | OUTPUT => \@output, # like: sub { push @output, shift }
|
---|
121 | OUTPUT => sub { $output .= shift },
|
---|
122 |
|
---|
123 | If no C<OUTPUT> is supplied, returns the diffs in a string. If
|
---|
124 | C<OUTPUT> is a C<CODE> ref, it will be called once with the (optional)
|
---|
125 | file header, and once for each hunk body with the text to emit. If
|
---|
126 | C<OUTPUT> is an L<IO::Handle>, output will be emitted to that handle.
|
---|
127 |
|
---|
128 | =item FILENAME_PREFIX_A, FILENAME_PREFIX_B
|
---|
129 |
|
---|
130 | The string to print before the filename in the header. Unused on C<OldStyle>
|
---|
131 | diffs. Defaults are C<"---">, C<"+++"> for Unified and C<"***">, C<"+++"> for
|
---|
132 | Context.
|
---|
133 |
|
---|
134 | =item KEYGEN, KEYGEN_ARGS
|
---|
135 |
|
---|
136 | These are passed to L<Algorithm::Diff/traverse_sequences>.
|
---|
137 |
|
---|
138 | =back
|
---|
139 |
|
---|
140 | B<Note>: if neither C<FILENAME_> option is defined, the header will not be
|
---|
141 | printed. If at one is present, the other and both MTIME_ options must be
|
---|
142 | present or "Use of undefined variable" warnings will be generated (except
|
---|
143 | on C<OldStyle> diffs, which ignores these options).
|
---|
144 |
|
---|
145 | =cut
|
---|
146 |
|
---|
147 | my %internal_styles = (
|
---|
148 | Unified => undef,
|
---|
149 | Context => undef,
|
---|
150 | OldStyle => undef,
|
---|
151 | Table => undef, ## "internal", but in another module
|
---|
152 | );
|
---|
153 |
|
---|
154 | sub diff {
|
---|
155 | my @seqs = ( shift, shift );
|
---|
156 | my $options = shift || {};
|
---|
157 |
|
---|
158 | for my $i ( 0..1 ) {
|
---|
159 | my $seq = $seqs[$i];
|
---|
160 | my $type = ref $seq;
|
---|
161 |
|
---|
162 | while ( $type eq "CODE" ) {
|
---|
163 | $seqs[$i] = $seq = $seq->( $options );
|
---|
164 | $type = ref $seq;
|
---|
165 | }
|
---|
166 |
|
---|
167 | my $AorB = !$i ? "A" : "B";
|
---|
168 |
|
---|
169 | if ( $type eq "ARRAY" ) {
|
---|
170 | ## This is most efficient :)
|
---|
171 | $options->{"OFFSET_$AorB"} = 0
|
---|
172 | unless defined $options->{"OFFSET_$AorB"};
|
---|
173 | }
|
---|
174 | elsif ( $type eq "SCALAR" ) {
|
---|
175 | $seqs[$i] = [split( /^/m, $$seq )];
|
---|
176 | $options->{"OFFSET_$AorB"} = 1
|
---|
177 | unless defined $options->{"OFFSET_$AorB"};
|
---|
178 | }
|
---|
179 | elsif ( ! $type ) {
|
---|
180 | $options->{"OFFSET_$AorB"} = 1
|
---|
181 | unless defined $options->{"OFFSET_$AorB"};
|
---|
182 | $options->{"FILENAME_$AorB"} = $seq
|
---|
183 | unless defined $options->{"FILENAME_$AorB"};
|
---|
184 | $options->{"MTIME_$AorB"} = (stat($seq))[9]
|
---|
185 | unless defined $options->{"MTIME_$AorB"};
|
---|
186 |
|
---|
187 | local $/ = "\n";
|
---|
188 | open F, "<$seq" or carp "$!: $seq";
|
---|
189 | $seqs[$i] = [<F>];
|
---|
190 | close F;
|
---|
191 |
|
---|
192 | }
|
---|
193 | elsif ( $type eq "GLOB" || UNIVERSAL::isa( $seq, "IO::Handle" ) ) {
|
---|
194 | $options->{"OFFSET_$AorB"} = 1
|
---|
195 | unless defined $options->{"OFFSET_$AorB"};
|
---|
196 | local $/ = "\n";
|
---|
197 | $seqs[$i] = [<$seq>];
|
---|
198 | }
|
---|
199 | else {
|
---|
200 | confess "Can't handle input of type ", ref;
|
---|
201 | }
|
---|
202 | }
|
---|
203 |
|
---|
204 | ## Config vars
|
---|
205 | my $output;
|
---|
206 | my $output_handler = $options->{OUTPUT};
|
---|
207 | my $type = ref $output_handler ;
|
---|
208 | if ( ! defined $output_handler ) {
|
---|
209 | $output = "";
|
---|
210 | $output_handler = sub { $output .= shift };
|
---|
211 | }
|
---|
212 | elsif ( $type eq "CODE" ) {
|
---|
213 | ## No problems, mate.
|
---|
214 | }
|
---|
215 | elsif ( $type eq "SCALAR" ) {
|
---|
216 | my $out_ref = $output_handler;
|
---|
217 | $output_handler = sub { $$out_ref .= shift };
|
---|
218 | }
|
---|
219 | elsif ( $type eq "ARRAY" ) {
|
---|
220 | my $out_ref = $output_handler;
|
---|
221 | $output_handler = sub { push @$out_ref, shift };
|
---|
222 | }
|
---|
223 | elsif ( $type eq "GLOB" || UNIVERSAL::isa $output_handler, "IO::Handle" ) {
|
---|
224 | my $output_handle = $output_handler;
|
---|
225 | $output_handler = sub { print $output_handle shift };
|
---|
226 | }
|
---|
227 | else {
|
---|
228 | croak "Unrecognized output type: $type";
|
---|
229 | }
|
---|
230 |
|
---|
231 | my $style = $options->{STYLE};
|
---|
232 | $style = "Unified" unless defined $options->{STYLE};
|
---|
233 | $style = "Text::Diff::$style" if exists $internal_styles{$style};
|
---|
234 |
|
---|
235 | if ( ! $style->can( "hunk" ) ) {
|
---|
236 | eval "require $style; 1" or die $@;
|
---|
237 | }
|
---|
238 |
|
---|
239 | $style = $style->new
|
---|
240 | if ! ref $style && $style->can( "new" );
|
---|
241 |
|
---|
242 | my $ctx_lines = $options->{CONTEXT};
|
---|
243 | $ctx_lines = 3 unless defined $ctx_lines;
|
---|
244 | $ctx_lines = 0 if $style->isa( "Text::Diff::OldStyle" );
|
---|
245 |
|
---|
246 | my @keygen_args = $options->{KEYGEN_ARGS}
|
---|
247 | ? @{$options->{KEYGEN_ARGS}}
|
---|
248 | : ();
|
---|
249 |
|
---|
250 | ## State vars
|
---|
251 | my $diffs = 0; ## Number of discards this hunk
|
---|
252 | my $ctx = 0; ## Number of " " (ctx_lines) ops pushed after last diff.
|
---|
253 | my @ops; ## ops (" ", +, -) in this hunk
|
---|
254 | my $hunks = 0; ## Number of hunks
|
---|
255 |
|
---|
256 | my $emit_ops = sub {
|
---|
257 | $output_handler->( $style->file_header( @seqs, $options ) )
|
---|
258 | unless $hunks++;
|
---|
259 | $output_handler->( $style->hunk_header( @seqs, @_, $options ) );
|
---|
260 | $output_handler->( $style->hunk ( @seqs, @_, $options ) );
|
---|
261 | $output_handler->( $style->hunk_footer( @seqs, @_, $options ) );
|
---|
262 | };
|
---|
263 |
|
---|
264 | ## We keep 2*ctx_lines so that if a diff occurs
|
---|
265 | ## at 2*ctx_lines we continue to grow the hunk instead
|
---|
266 | ## of emitting diffs and context as we go. We
|
---|
267 | ## need to know the total length of both of the two
|
---|
268 | ## subsequences so the line count can be printed in the
|
---|
269 | ## header.
|
---|
270 | my $dis_a = sub {push @ops, [@_[0,1],"-"]; ++$diffs ; $ctx = 0 };
|
---|
271 | my $dis_b = sub {push @ops, [@_[0,1],"+"]; ++$diffs ; $ctx = 0 };
|
---|
272 |
|
---|
273 | traverse_sequences(
|
---|
274 | @seqs,
|
---|
275 | {
|
---|
276 | MATCH => sub {
|
---|
277 | push @ops, [@_[0,1]," "];
|
---|
278 |
|
---|
279 | if ( $diffs && ++$ctx > $ctx_lines * 2 ) {
|
---|
280 | $emit_ops->( [ splice @ops, 0, $#ops - $ctx_lines ] );
|
---|
281 | $ctx = $diffs = 0;
|
---|
282 | }
|
---|
283 |
|
---|
284 | ## throw away context lines that aren't needed any more
|
---|
285 | shift @ops if ! $diffs && @ops > $ctx_lines;
|
---|
286 | },
|
---|
287 | DISCARD_A => $dis_a,
|
---|
288 | DISCARD_B => $dis_b,
|
---|
289 | },
|
---|
290 | $options->{KEYGEN}, # pass in user arguments for key gen function
|
---|
291 | @keygen_args,
|
---|
292 | );
|
---|
293 |
|
---|
294 | if ( $diffs ) {
|
---|
295 | $#ops -= $ctx - $ctx_lines if $ctx > $ctx_lines;
|
---|
296 | $emit_ops->( \@ops );
|
---|
297 | }
|
---|
298 |
|
---|
299 | $output_handler->( $style->file_footer( @seqs, $options ) ) if $hunks;
|
---|
300 |
|
---|
301 | return defined $output ? $output : $hunks;
|
---|
302 | }
|
---|
303 |
|
---|
304 |
|
---|
305 | sub _header {
|
---|
306 | my ( $h ) = @_;
|
---|
307 | my ( $p1, $fn1, $t1, $p2, $fn2, $t2 ) = @{$h}{
|
---|
308 | "FILENAME_PREFIX_A",
|
---|
309 | "FILENAME_A",
|
---|
310 | "MTIME_A",
|
---|
311 | "FILENAME_PREFIX_B",
|
---|
312 | "FILENAME_B",
|
---|
313 | "MTIME_B"
|
---|
314 | };
|
---|
315 |
|
---|
316 | ## remember to change Text::Diff::Table if this logic is tweaked.
|
---|
317 | return "" unless defined $fn1 && defined $fn2;
|
---|
318 |
|
---|
319 | return join( "",
|
---|
320 | $p1, " ", $fn1, defined $t1 ? "\t" . localtime $t1 : (), "\n",
|
---|
321 | $p2, " ", $fn2, defined $t2 ? "\t" . localtime $t2 : (), "\n",
|
---|
322 | );
|
---|
323 | }
|
---|
324 |
|
---|
325 | ## _range encapsulates the building of, well, ranges. Turns out there are
|
---|
326 | ## a few nuances.
|
---|
327 | sub _range {
|
---|
328 | my ( $ops, $a_or_b, $format ) = @_;
|
---|
329 |
|
---|
330 | my $start = $ops->[ 0]->[$a_or_b];
|
---|
331 | my $after = $ops->[-1]->[$a_or_b];
|
---|
332 |
|
---|
333 | ## The sequence indexes in the lines are from *before* the OPCODE is
|
---|
334 | ## executed, so we bump the last index up unless the OP indicates
|
---|
335 | ## it didn't change.
|
---|
336 | ++$after
|
---|
337 | unless $ops->[-1]->[OPCODE] eq ( $a_or_b == A ? "+" : "-" );
|
---|
338 |
|
---|
339 | ## convert from 0..n index to 1..(n+1) line number. The unless modifier
|
---|
340 | ## handles diffs with no context, where only one file is affected. In this
|
---|
341 | ## case $start == $after indicates an empty range, and the $start must
|
---|
342 | ## not be incremented.
|
---|
343 | my $empty_range = $start == $after;
|
---|
344 | ++$start unless $empty_range;
|
---|
345 |
|
---|
346 | return
|
---|
347 | $start == $after
|
---|
348 | ? $format eq "unified" && $empty_range
|
---|
349 | ? "$start,0"
|
---|
350 | : $start
|
---|
351 | : $format eq "unified"
|
---|
352 | ? "$start,".($after-$start+1)
|
---|
353 | : "$start,$after";
|
---|
354 | }
|
---|
355 |
|
---|
356 |
|
---|
357 | sub _op_to_line {
|
---|
358 | my ( $seqs, $op, $a_or_b, $op_prefixes ) = @_;
|
---|
359 |
|
---|
360 | my $opcode = $op->[OPCODE];
|
---|
361 | return () unless defined $op_prefixes->{$opcode};
|
---|
362 |
|
---|
363 | my $op_sym = defined $op->[FLAG] ? $op->[FLAG] : $opcode;
|
---|
364 | $op_sym = $op_prefixes->{$op_sym};
|
---|
365 | return () unless defined $op_sym;
|
---|
366 |
|
---|
367 | $a_or_b = $op->[OPCODE] ne "+" ? 0 : 1 unless defined $a_or_b;
|
---|
368 | return ( $op_sym, $seqs->[$a_or_b][$op->[$a_or_b]] );
|
---|
369 | }
|
---|
370 |
|
---|
371 |
|
---|
372 | =head1 Formatting Classes
|
---|
373 |
|
---|
374 | These functions implement the output formats. They are grouped in to classes
|
---|
375 | so diff() can use class names to call the correct set of output routines and so
|
---|
376 | that you may inherit from them easily. There are no constructors or instance
|
---|
377 | methods for these classes, though subclasses may provide them if need be.
|
---|
378 |
|
---|
379 | Each class has file_header(), hunk_header(), hunk(), and footer() methods
|
---|
380 | identical to those documented in the Text::Diff::Unified section. header() is
|
---|
381 | called before the hunk() is first called, footer() afterwards. The default
|
---|
382 | footer function is an empty method provided for overloading:
|
---|
383 |
|
---|
384 | sub footer { return "End of patch\n" }
|
---|
385 |
|
---|
386 | Some output formats are provided by external modules (which are loaded
|
---|
387 | automatically), such as L<Text::Diff::Table>. These are
|
---|
388 | are documented here to keep the documentation simple.
|
---|
389 |
|
---|
390 | =over
|
---|
391 |
|
---|
392 | =head2 Text::Diff::Base
|
---|
393 |
|
---|
394 | Returns "" for all methods (other than C<new()>).
|
---|
395 |
|
---|
396 | =cut
|
---|
397 |
|
---|
398 | {
|
---|
399 | package Text::Diff::Base;
|
---|
400 | sub new {
|
---|
401 | my $proto = shift;
|
---|
402 | return bless { @_ }, ref $proto || $proto;
|
---|
403 | }
|
---|
404 |
|
---|
405 | sub file_header { return "" }
|
---|
406 | sub hunk_header { return "" }
|
---|
407 | sub hunk { return "" }
|
---|
408 | sub hunk_footer { return "" }
|
---|
409 | sub file_footer { return "" }
|
---|
410 | }
|
---|
411 |
|
---|
412 |
|
---|
413 | =head2 Text::Diff::Unified
|
---|
414 |
|
---|
415 | --- A Mon Nov 12 23:49:30 2001
|
---|
416 | +++ B Mon Nov 12 23:49:30 2001
|
---|
417 | @@ -2,13 +2,13 @@
|
---|
418 | 2
|
---|
419 | 3
|
---|
420 | 4
|
---|
421 | -5d
|
---|
422 | +5a
|
---|
423 | 6
|
---|
424 | 7
|
---|
425 | 8
|
---|
426 | 9
|
---|
427 | +9a
|
---|
428 | 10
|
---|
429 | 11
|
---|
430 | -11d
|
---|
431 | 12
|
---|
432 | 13
|
---|
433 |
|
---|
434 | =over
|
---|
435 |
|
---|
436 | =item file_header
|
---|
437 |
|
---|
438 | $s = Text::Diff::Unified->file_header( $options );
|
---|
439 |
|
---|
440 | Returns a string containing a unified header. The sole parameter is the
|
---|
441 | options hash passed in to diff(), containing at least:
|
---|
442 |
|
---|
443 | FILENAME_A => $fn1,
|
---|
444 | MTIME_A => $mtime1,
|
---|
445 | FILENAME_B => $fn2,
|
---|
446 | MTIME_B => $mtime2
|
---|
447 |
|
---|
448 | May also contain
|
---|
449 |
|
---|
450 | FILENAME_PREFIX_A => "---",
|
---|
451 | FILENAME_PREFIX_B => "+++",
|
---|
452 |
|
---|
453 | to override the default prefixes (default values shown).
|
---|
454 |
|
---|
455 | =cut
|
---|
456 |
|
---|
457 | @Text::Diff::Unified::ISA = qw( Text::Diff::Base );
|
---|
458 |
|
---|
459 | sub Text::Diff::Unified::file_header {
|
---|
460 | shift; ## No instance data
|
---|
461 | my $options = pop ;
|
---|
462 |
|
---|
463 | _header(
|
---|
464 | { FILENAME_PREFIX_A => "---", FILENAME_PREFIX_B => "+++", %$options }
|
---|
465 | );
|
---|
466 | }
|
---|
467 |
|
---|
468 | =item hunk_header
|
---|
469 |
|
---|
470 | Text::Diff::Unified->hunk_header( \@ops, $options );
|
---|
471 |
|
---|
472 | Returns a string containing the output of one hunk of unified diff.
|
---|
473 |
|
---|
474 | =cut
|
---|
475 |
|
---|
476 | sub Text::Diff::Unified::hunk_header {
|
---|
477 | shift; ## No instance data
|
---|
478 | pop; ## Ignore options
|
---|
479 | my $ops = pop;
|
---|
480 |
|
---|
481 | return join( "",
|
---|
482 | "@@ -",
|
---|
483 | _range( $ops, A, "unified" ),
|
---|
484 | " +",
|
---|
485 | _range( $ops, B, "unified" ),
|
---|
486 | " @@\n",
|
---|
487 | );
|
---|
488 | }
|
---|
489 |
|
---|
490 |
|
---|
491 | =item Text::Diff::Unified::hunk
|
---|
492 |
|
---|
493 | Text::Diff::Unified->hunk( \@seq_a, \@seq_b, \@ops, $options );
|
---|
494 |
|
---|
495 | Returns a string containing the output of one hunk of unified diff.
|
---|
496 |
|
---|
497 | =cut
|
---|
498 |
|
---|
499 | sub Text::Diff::Unified::hunk {
|
---|
500 | shift; ## No instance data
|
---|
501 | pop; ## Ignore options
|
---|
502 | my $ops = pop;
|
---|
503 |
|
---|
504 | my $prefixes = { "+" => "+", " " => " ", "-" => "-" };
|
---|
505 |
|
---|
506 | return join "", map _op_to_line( \@_, $_, undef, $prefixes ), @$ops
|
---|
507 | }
|
---|
508 |
|
---|
509 |
|
---|
510 | =back
|
---|
511 |
|
---|
512 | =head2 Text::Diff::Table
|
---|
513 |
|
---|
514 | +--+----------------------------------+--+------------------------------+
|
---|
515 | | |../Test-Differences-0.2/MANIFEST | |../Test-Differences/MANIFEST |
|
---|
516 | | |Thu Dec 13 15:38:49 2001 | |Sat Dec 15 02:09:44 2001 |
|
---|
517 | +--+----------------------------------+--+------------------------------+
|
---|
518 | | | * 1|Changes *
|
---|
519 | | 1|Differences.pm | 2|Differences.pm |
|
---|
520 | | 2|MANIFEST | 3|MANIFEST |
|
---|
521 | | | * 4|MANIFEST.SKIP *
|
---|
522 | | 3|Makefile.PL | 5|Makefile.PL |
|
---|
523 | | | * 6|t/00escape.t *
|
---|
524 | | 4|t/00flatten.t | 7|t/00flatten.t |
|
---|
525 | | 5|t/01text_vs_data.t | 8|t/01text_vs_data.t |
|
---|
526 | | 6|t/10test.t | 9|t/10test.t |
|
---|
527 | +--+----------------------------------+--+------------------------------+
|
---|
528 |
|
---|
529 | This format also goes to some pains to highlight "invisible" characters on
|
---|
530 | differing elements by selectively escaping whitespace:
|
---|
531 |
|
---|
532 | +--+--------------------------+--------------------------+
|
---|
533 | | |demo_ws_A.txt |demo_ws_B.txt |
|
---|
534 | | |Fri Dec 21 08:36:32 2001 |Fri Dec 21 08:36:50 2001 |
|
---|
535 | +--+--------------------------+--------------------------+
|
---|
536 | | 1|identical |identical |
|
---|
537 | * 2| spaced in | also spaced in *
|
---|
538 | * 3|embedded space |embedded tab *
|
---|
539 | | 4|identical |identical |
|
---|
540 | * 5| spaced in |\ttabbed in *
|
---|
541 | * 6|trailing spaces\s\s\n |trailing tabs\t\t\n *
|
---|
542 | | 7|identical |identical |
|
---|
543 | * 8|lf line\n |crlf line\r\n *
|
---|
544 | * 9|embedded ws |embedded\tws *
|
---|
545 | +--+--------------------------+--------------------------+
|
---|
546 |
|
---|
547 | See L</Text::Diff::Table> for more details, including how the whitespace
|
---|
548 | escaping works.
|
---|
549 |
|
---|
550 | =head2 Text::Diff::Context
|
---|
551 |
|
---|
552 | *** A Mon Nov 12 23:49:30 2001
|
---|
553 | --- B Mon Nov 12 23:49:30 2001
|
---|
554 | ***************
|
---|
555 | *** 2,14 ****
|
---|
556 | 2
|
---|
557 | 3
|
---|
558 | 4
|
---|
559 | ! 5d
|
---|
560 | 6
|
---|
561 | 7
|
---|
562 | 8
|
---|
563 | 9
|
---|
564 | 10
|
---|
565 | 11
|
---|
566 | - 11d
|
---|
567 | 12
|
---|
568 | 13
|
---|
569 | --- 2,14 ----
|
---|
570 | 2
|
---|
571 | 3
|
---|
572 | 4
|
---|
573 | ! 5a
|
---|
574 | 6
|
---|
575 | 7
|
---|
576 | 8
|
---|
577 | 9
|
---|
578 | + 9a
|
---|
579 | 10
|
---|
580 | 11
|
---|
581 | 12
|
---|
582 | 13
|
---|
583 |
|
---|
584 | Note: hunk_header() returns only "***************\n".
|
---|
585 |
|
---|
586 | =cut
|
---|
587 |
|
---|
588 |
|
---|
589 | @Text::Diff::Context::ISA = qw( Text::Diff::Base );
|
---|
590 |
|
---|
591 | sub Text::Diff::Context::file_header {
|
---|
592 | _header { FILENAME_PREFIX_A=>"***", FILENAME_PREFIX_B=>"---", %{$_[-1]} };
|
---|
593 | }
|
---|
594 |
|
---|
595 |
|
---|
596 | sub Text::Diff::Context::hunk_header {
|
---|
597 | return "***************\n";
|
---|
598 | }
|
---|
599 |
|
---|
600 | sub Text::Diff::Context::hunk {
|
---|
601 | shift; ## No instance data
|
---|
602 | pop; ## Ignore options
|
---|
603 | my $ops = pop;
|
---|
604 | ## Leave the sequences in @_[0,1]
|
---|
605 |
|
---|
606 | my $a_range = _range( $ops, A, "" );
|
---|
607 | my $b_range = _range( $ops, B, "" );
|
---|
608 |
|
---|
609 | ## Sigh. Gotta make sure that differences that aren't adds/deletions
|
---|
610 | ## get prefixed with "!", and that the old opcodes are removed.
|
---|
611 | my $after;
|
---|
612 | for ( my $start = 0; $start <= $#$ops ; $start = $after ) {
|
---|
613 | ## Scan until next difference
|
---|
614 | $after = $start + 1;
|
---|
615 | my $opcode = $ops->[$start]->[OPCODE];
|
---|
616 | next if $opcode eq " ";
|
---|
617 |
|
---|
618 | my $bang_it;
|
---|
619 | while ( $after <= $#$ops && $ops->[$after]->[OPCODE] ne " " ) {
|
---|
620 | $bang_it ||= $ops->[$after]->[OPCODE] ne $opcode;
|
---|
621 | ++$after;
|
---|
622 | }
|
---|
623 |
|
---|
624 | if ( $bang_it ) {
|
---|
625 | for my $i ( $start..($after-1) ) {
|
---|
626 | $ops->[$i]->[FLAG] = "!";
|
---|
627 | }
|
---|
628 | }
|
---|
629 | }
|
---|
630 |
|
---|
631 | my $b_prefixes = { "+" => "+ ", " " => " ", "-" => undef, "!" => "! " };
|
---|
632 | my $a_prefixes = { "+" => undef, " " => " ", "-" => "- ", "!" => "! " };
|
---|
633 |
|
---|
634 | return join( "",
|
---|
635 | "*** ", $a_range, " ****\n",
|
---|
636 | map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
|
---|
637 | "--- ", $b_range, " ----\n",
|
---|
638 | map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
|
---|
639 | );
|
---|
640 | }
|
---|
641 | =head2 Text::Diff::OldStyle
|
---|
642 |
|
---|
643 | 5c5
|
---|
644 | < 5d
|
---|
645 | ---
|
---|
646 | > 5a
|
---|
647 | 9a10
|
---|
648 | > 9a
|
---|
649 | 12d12
|
---|
650 | < 11d
|
---|
651 |
|
---|
652 | Note: no file_header().
|
---|
653 |
|
---|
654 | =cut
|
---|
655 |
|
---|
656 | @Text::Diff::OldStyle::ISA = qw( Text::Diff::Base );
|
---|
657 |
|
---|
658 | sub _op {
|
---|
659 | my $ops = shift;
|
---|
660 | my $op = $ops->[0]->[OPCODE];
|
---|
661 | $op = "c" if grep $_->[OPCODE] ne $op, @$ops;
|
---|
662 | $op = "a" if $op eq "+";
|
---|
663 | $op = "d" if $op eq "-";
|
---|
664 | return $op;
|
---|
665 | }
|
---|
666 |
|
---|
667 | sub Text::Diff::OldStyle::hunk_header {
|
---|
668 | shift; ## No instance data
|
---|
669 | pop; ## ignore options
|
---|
670 | my $ops = pop;
|
---|
671 |
|
---|
672 | my $op = _op $ops;
|
---|
673 |
|
---|
674 | return join "", _range( $ops, A, "" ), $op, _range( $ops, B, "" ), "\n";
|
---|
675 | }
|
---|
676 |
|
---|
677 | sub Text::Diff::OldStyle::hunk {
|
---|
678 | shift; ## No instance data
|
---|
679 | pop; ## ignore options
|
---|
680 | my $ops = pop;
|
---|
681 | ## Leave the sequences in @_[0,1]
|
---|
682 |
|
---|
683 | my $a_prefixes = { "+" => undef, " " => undef, "-" => "< " };
|
---|
684 | my $b_prefixes = { "+" => "> ", " " => undef, "-" => undef };
|
---|
685 |
|
---|
686 | my $op = _op $ops;
|
---|
687 |
|
---|
688 | return join( "",
|
---|
689 | map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
|
---|
690 | $op eq "c" ? "---\n" : (),
|
---|
691 | map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
|
---|
692 | );
|
---|
693 | }
|
---|
694 |
|
---|
695 | =head1 LIMITATIONS
|
---|
696 |
|
---|
697 | Must suck both input files entirely in to memory and store them with a normal
|
---|
698 | amount of Perlish overhead (one array location) per record. This is implied by
|
---|
699 | the implementation of Algorithm::Diff, which takes two arrays. If
|
---|
700 | Algorithm::Diff ever offers an incremental mode, this can be changed (contact
|
---|
701 | the maintainers of Algorithm::Diff and Text::Diff if you need this; it
|
---|
702 | shouldn't be too terribly hard to tie arrays in this fashion).
|
---|
703 |
|
---|
704 | Does not provide most of the more refined GNU diff options: recursive directory
|
---|
705 | tree scanning, ignoring blank lines / whitespace, etc., etc. These can all be
|
---|
706 | added as time permits and need arises, many are rather easy; patches quite
|
---|
707 | welcome.
|
---|
708 |
|
---|
709 | Uses closures internally, this may lead to leaks on C<perl> versions 5.6.1 and
|
---|
710 | prior if used many times over a process' life time.
|
---|
711 |
|
---|
712 | =head1 AUTHOR
|
---|
713 |
|
---|
714 | Barrie Slaymaker <[email protected]>.
|
---|
715 |
|
---|
716 | =head1 COPYRIGHT & LICENSE
|
---|
717 |
|
---|
718 | Copyright 2001, Barrie Slaymaker. All Rights Reserved.
|
---|
719 |
|
---|
720 | You may use this under the terms of either the Artistic License or GNU Public
|
---|
721 | License v 2.0 or greater.
|
---|
722 |
|
---|
723 | =cut
|
---|
724 |
|
---|
725 | 1;
|
---|