source: main/trunk/greenstone2/perllib/cpan/Sort/Naturally.pm@ 33236

Last change on this file since 33236 was 33236, checked in by davidb, 5 years ago

CPAN module that provides a generalized form of alpha-numerical sorting. Useful in classifiers

File size: 22.8 KB
Line 
1
2require 5;
3package Sort::Naturally; # Time-stamp: "2004-12-29 18:30:03 AST"
4$VERSION = '1.03';
5@EXPORT = ('nsort', 'ncmp');
6require Exporter;
7@ISA = ('Exporter');
8
9use strict;
10use locale;
11use integer;
12
13#-----------------------------------------------------------------------------
14# constants:
15BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
16
17use Config ();
18BEGIN {
19 # Make a constant such that if a whole-number string is that long
20 # or shorter, we KNOW it's treatable as an integer
21 no integer;
22 my $x = length(256 ** $Config::Config{'intsize'} / 2) - 1;
23 die "Crazy intsize: <$Config::Config{'intsize'}>" if $x < 4;
24 eval 'sub MAX_INT_SIZE () {' . $x . '}';
25 die $@ if $@;
26 print "intsize $Config::Config{'intsize'} => MAX_INT_SIZE $x\n" if DEBUG;
27}
28
29sub X_FIRST () {-1}
30sub Y_FIRST () { 1}
31
32my @ORD = ('same', 'swap', 'asis');
33
34#-----------------------------------------------------------------------------
35# For lack of a preprocessor:
36
37my($code, $guts);
38$guts = <<'EOGUTS'; # This is the guts of both ncmp and nsort:
39
40 if($x eq $y) {
41 # trap this expensive case first, and then fall thru to tiebreaker
42 $rv = 0;
43
44 # Convoluted hack to get numerics to sort first, at string start:
45 } elsif($x =~ m/^\d/s) {
46 if($y =~ m/^\d/s) {
47 $rv = 0; # fall thru to normal comparison for the two numbers
48 } else {
49 $rv = X_FIRST;
50 DEBUG > 1 and print "Numeric-initial $x trumps letter-initial $y\n";
51 }
52 } elsif($y =~ m/^\d/s) {
53 $rv = Y_FIRST;
54 DEBUG > 1 and print "Numeric-initial $y trumps letter-initial $x\n";
55 } else {
56 $rv = 0;
57 }
58
59 unless($rv) {
60 # Normal case:
61 $rv = 0;
62 DEBUG and print "<$x> and <$y> compared...\n";
63
64 Consideration:
65 while(length $x and length $y) {
66
67 DEBUG > 2 and print " <$x> and <$y>...\n";
68
69 # First, non-numeric comparison:
70 $x2 = ($x =~ m/^(\D+)/s) ? length($1) : 0;
71 $y2 = ($y =~ m/^(\D+)/s) ? length($1) : 0;
72 # Now make x2 the min length of the two:
73 $x2 = $y2 if $x2 > $y2;
74 if($x2) {
75 DEBUG > 1 and printf " <%s> and <%s> lexically for length $x2...\n",
76 substr($x,0,$x2), substr($y,0,$x2);
77 do {
78 my $i = substr($x,0,$x2);
79 my $j = substr($y,0,$x2);
80 my $sv = $i cmp $j;
81 print "SCREAM! on <$i><$j> -- $sv != $rv \n" unless $rv == $sv;
82 last;
83 }
84
85
86 if $rv =
87 # The ''. things here force a copy that seems to work around a
88 # mysterious intermittent bug that 'use locale' provokes in
89 # many versions of Perl.
90 $cmp
91 ? $cmp->(substr($x,0,$x2) . '',
92 substr($y,0,$x2) . '',
93 )
94 :
95 scalar(( substr($x,0,$x2) . '' ) cmp
96 ( substr($y,0,$x2) . '' )
97 )
98 ;
99 # otherwise trim and keep going:
100 substr($x,0,$x2) = '';
101 substr($y,0,$x2) = '';
102 }
103
104 # Now numeric:
105 # (actually just using $x2 and $y2 as scratch)
106
107 if( $x =~ s/^(\d+)//s ) {
108 $x2 = $1;
109 if( $y =~ s/^(\d+)//s ) {
110 # We have two numbers here.
111 DEBUG > 1 and print " <$x2> and <$1> numerically\n";
112 if(length($x2) < MAX_INT_SIZE and length($1) < MAX_INT_SIZE) {
113 # small numbers: we can compare happily
114 last if $rv = $x2 <=> $1;
115 } else {
116 # ARBITRARILY large integers!
117
118 # This saves on loss of precision that could happen
119 # with actual stringification.
120 # Also, I sense that very large numbers aren't too
121 # terribly common in sort data.
122
123 # trim leading 0's:
124 ($y2 = $1) =~ s/^0+//s;
125 $x2 =~ s/^0+//s;
126 print " Treating $x2 and $y2 as bigint\n" if DEBUG;
127
128 no locale; # we want the dumb cmp back.
129 last if $rv = (
130 # works only for non-negative whole numbers:
131 length($x2) <=> length($y2)
132 # the longer the numeral, the larger the value
133 or $x2 cmp $y2
134 # between equals, compare lexically!! amazing but true.
135 );
136 }
137 } else {
138 # X is numeric but Y isn't
139 $rv = Y_FIRST;
140 last;
141 }
142 } elsif( $y =~ s/^\d+//s ) { # we don't need to capture the substring
143 $rv = X_FIRST;
144 last;
145 }
146 # else one of them is 0-length.
147
148 # end-while
149 }
150 }
151EOGUTS
152
153sub maker {
154 my $code = $_[0];
155 $code =~ s/~COMPARATOR~/$guts/g || die "Can't find ~COMPARATOR~";
156 eval $code;
157 die $@ if $@;
158}
159
160##############################################################################
161
162maker(<<'EONSORT');
163sub nsort {
164 # get options:
165 my($cmp, $lc);
166 ($cmp,$lc) = @{shift @_} if @_ and ref($_[0]) eq 'ARRAY';
167
168 return @_ unless @_ > 1 or wantarray; # be clever
169
170 my($x, $x2, $y, $y2, $rv); # scratch vars
171
172 # We use a Schwartzian xform to memoize the lc'ing and \W-removal
173
174 map $_->[0],
175 sort {
176 if($a->[0] eq $b->[0]) { 0 } # trap this expensive case
177 else {
178
179 $x = $a->[1];
180 $y = $b->[1];
181
182~COMPARATOR~
183
184 # Tiebreakers...
185 DEBUG > 1 and print " -<${$a}[0]> cmp <${$b}[0]> is $rv ($ORD[$rv])\n";
186 $rv ||= (length($x) <=> length($y)) # shorter is always first
187 || ($cmp and $cmp->($x,$y) || $cmp->($a->[0], $b->[0]))
188 || ($x cmp $y )
189 || ($a->[0] cmp $b->[0])
190 ;
191
192 DEBUG > 1 and print " <${$a}[0]> cmp <${$b}[0]> is $rv ($ORD[$rv])\n";
193 $rv;
194 }}
195
196 map {;
197 $x = $lc ? $lc->($_) : lc($_); # x as scratch
198 $x =~ s/\W+//s;
199 [$_, $x];
200 }
201 @_
202}
203EONSORT
204
205#-----------------------------------------------------------------------------
206maker(<<'EONCMP');
207sub ncmp {
208 # The guts are basically the same as above...
209
210 # get options:
211 my($cmp, $lc);
212 ($cmp,$lc) = @{shift @_} if @_ and ref($_[0]) eq 'ARRAY';
213
214 if(@_ == 0) {
215 @_ = ($a, $b); # bit of a hack!
216 DEBUG > 1 and print "Hacking in <$a><$b>\n";
217 } elsif(@_ != 2) {
218 require Carp;
219 Carp::croak("Not enough options to ncmp!");
220 }
221 my($a,$b) = @_;
222 my($x, $x2, $y, $y2, $rv); # scratch vars
223
224 DEBUG > 1 and print "ncmp args <$a><$b>\n";
225 if($a eq $b) { # trap this expensive case
226 0;
227 } else {
228 $x = ($lc ? $lc->($a) : lc($a));
229 $x =~ s/\W+//s;
230 $y = ($lc ? $lc->($b) : lc($b));
231 $y =~ s/\W+//s;
232
233~COMPARATOR~
234
235
236 # Tiebreakers...
237 DEBUG > 1 and print " -<$a> cmp <$b> is $rv ($ORD[$rv])\n";
238 $rv ||= (length($x) <=> length($y)) # shorter is always first
239 || ($cmp and $cmp->($x,$y) || $cmp->($a,$b))
240 || ($x cmp $y)
241 || ($a cmp $b)
242 ;
243
244 DEBUG > 1 and print " <$a> cmp <$b> is $rv\n";
245 $rv;
246 }
247}
248EONCMP
249
250# clean up:
251undef $guts;
252undef &maker;
253
254#-----------------------------------------------------------------------------
2551;
256
257############### END OF MAIN SOURCE ###########################################
258__END__
259
260=head1 NAME
261
262Sort::Naturally -- sort lexically, but sort numeral parts numerically
263
264=head1 SYNOPSIS
265
266 @them = nsort(qw(
267 foo12a foo12z foo13a foo 14 9x foo12 fooa foolio Foolio Foo12a
268 ));
269 print join(' ', @them), "\n";
270
271Prints:
272
273 9x 14 foo fooa foolio Foolio foo12 foo12a Foo12a foo12z foo13a
274
275(Or "foo12a" + "Foo12a" and "foolio" + "Foolio" and might be
276switched, depending on your locale.)
277
278=head1 DESCRIPTION
279
280This module exports two functions, C<nsort> and C<ncmp>; they are used
281in implementing my idea of a "natural sorting" algorithm. Under natural
282sorting, numeric substrings are compared numerically, and other
283word-characters are compared lexically.
284
285This is the way I define natural sorting:
286
287=over
288
289=item *
290
291Non-numeric word-character substrings are sorted lexically,
292case-insensitively: "Foo" comes between "fish" and "fowl".
293
294=item *
295
296Numeric substrings are sorted numerically:
297"100" comes after "20", not before.
298
299=item *
300
301\W substrings (neither words-characters nor digits) are I<ignored>.
302
303=item *
304
305Our use of \w, \d, \D, and \W is locale-sensitive: Sort::Naturally
306uses a C<use locale> statement.
307
308=item *
309
310When comparing two strings, where a numeric substring in one
311place is I<not> up against a numeric substring in another,
312the non-numeric always comes first. This is fudged by
313reading pretending that the lack of a number substring has
314the value -1, like so:
315
316 foo => "foo", -1
317 foobar => "foo", -1, "bar"
318 foo13 => "foo", 13,
319 foo13xyz => "foo", 13, "xyz"
320
321That's so that "foo" will come before "foo13", which will come
322before "foobar".
323
324=item *
325
326The start of a string is exceptional: leading non-\W (non-word,
327non-digit)
328components are are ignored, and numbers come I<before> letters.
329
330=item *
331
332I define "numeric substring" just as sequences matching m/\d+/ --
333scientific notation, commas, decimals, etc., are not seen. If
334your data has thousands separators in numbers
335("20,000 Leagues Under The Sea" or "20.000 lieues sous les mers"),
336consider stripping them before feeding them to C<nsort> or
337C<ncmp>.
338
339=back
340
341=head2 The nsort function
342
343This function takes a list of strings, and returns a copy of the list,
344sorted.
345
346This is what most people will want to use:
347
348 @stuff = nsort(...list...);
349
350When nsort needs to compare non-numeric substrings, it
351uses Perl's C<lc> function in scope of a <use locale>.
352And when nsort needs to lowercase things, it uses Perl's
353C<lc> function in scope of a <use locale>. If you want nsort
354to use other functions instead, you can specify them in
355an arrayref as the first argument to nsort:
356
357 @stuff = nsort( [
358 \&string_comparator, # optional
359 \&lowercaser_function # optional
360 ],
361 ...list...
362 );
363
364If you want to specify a string comparator but no lowercaser,
365then the options list is C<[\&comparator, '']> or
366C<[\&comparator]>. If you want to specify no string comparator
367but a lowercaser, then the options list is
368C<['', \&lowercaser]>.
369
370Any comparator you specify is called as
371C<$comparator-E<gt>($left, $right)>,
372and, like a normal Perl C<cmp> replacement, must return
373-1, 0, or 1 depending on whether the left argument is stringwise
374less than, equal to, or greater than the right argument.
375
376Any lowercaser function you specify is called as
377C<$lowercased = $lowercaser-E<gt>($original)>. The routine
378must not modify its C<$_[0]>.
379
380=head2 The ncmp function
381
382Often, when sorting non-string values like this:
383
384 @objects_sorted = sort { $a->tag cmp $b->tag } @objects;
385
386...or even in a Schwartzian transform, like this:
387
388 @strings =
389 map $_->[0]
390 sort { $a->[1] cmp $b->[1] }
391 map { [$_, make_a_sort_key_from($_) ]
392 @_
393 ;
394
395...you wight want something that replaces not C<sort>, but C<cmp>.
396That's what Sort::Naturally's C<ncmp> function is for. Call it with
397the syntax C<ncmp($left,$right)> instead of C<$left cmp $right>,
398but otherwise it's a fine replacement:
399
400 @objects_sorted = sort { ncmp($a->tag,$b->tag) } @objects;
401
402 @strings =
403 map $_->[0]
404 sort { ncmp($a->[1], $b->[1]) }
405 map { [$_, make_a_sort_key_from($_) ]
406 @_
407 ;
408
409Just as with C<nsort> can take different a string-comparator
410and/or lowercaser, you can do the same with C<ncmp>, by passing
411an arrayref as the first argument:
412
413 ncmp( [
414 \&string_comparator, # optional
415 \&lowercaser_function # optional
416 ],
417 $left, $right
418 )
419
420You might get string comparators from L<Sort::ArbBiLex|Sort::ArbBiLex>.
421
422=head1 NOTES
423
424=over
425
426=item *
427
428This module is not a substitute for
429L<Sort::Versions|Sort::Versions>! If
430you just need proper version sorting, use I<that!>
431
432=item *
433
434If you need something that works I<sort of> like this module's
435functions, but not quite the same, consider scouting thru this
436module's source code, and adapting what you see. Besides
437the functions that actually compile in this module, after the POD,
438there's several alternate attempts of mine at natural sorting
439routines, which are not compiled as part of the module, but which you
440might find useful. They should all be I<working> implementations of
441slightly different algorithms
442(all of them based on Martin Pool's C<nsort>) which I eventually
443discarded in favor of my algorithm. If you are having to
444naturally-sort I<very large> data sets, and sorting is getting
445ridiculously slow, you might consider trying one of those
446discarded functions -- I have a feeling they might be faster on
447large data sets. Benchmark them on your data and see. (Unless
448you I<need> the speed, don't bother. Hint: substitute C<sort>
449for C<nsort> in your code, and unless your program speeds up
450drastically, it's not the sorting that's slowing things down.
451But if it I<is> C<nsort> that's slowing things down, consider
452just:
453
454 if(@set >= SOME_VERY_BIG_NUMBER) {
455 no locale; # vroom vroom
456 @sorted = sort(@set); # feh, good enough
457 } elsif(@set >= SOME_BIG_NUMBER) {
458 use locale;
459 @sorted = sort(@set); # feh, good enough
460 } else {
461 # but keep it pretty for normal cases
462 @sorted = nsort(@set);
463 }
464
465=item *
466
467If you do adapt the routines in this module, email me; I'd
468just be interested in hearing about it.
469
470=item *
471
472Thanks to the EFNet #perl people for encouraging this module,
473especially magister and a-mused.
474
475=back
476
477=head1 COPYRIGHT AND DISCLAIMER
478
479Copyright 2001, Sean M. Burke C<[email protected]>, all rights
480reserved. This program is free software; you can redistribute it
481and/or modify it under the same terms as Perl itself.
482
483This program is distributed in the hope that it will be useful, but
484without any warranty; without even the implied warranty of
485merchantability or fitness for a particular purpose.
486
487=head1 AUTHOR
488
489Sean M. Burke C<[email protected]>
490
491=cut
492
493############ END OF DOCS ############
494
495############################################################################
496############################################################################
497
498############ BEGIN OLD STUFF ############
499
500# We can't have "use integer;", or else (5 <=> 5.1) comes out "0" !
501
502#-----------------------------------------------------------------------------
503sub nsort {
504 my($cmp, $lc);
505 return @_ if @_ < 2; # Just to be CLEVER.
506
507 my($x, $i); # scratch vars
508
509 # And now, the GREAT BIG Schwartzian transform:
510
511 map
512 $_->[0],
513
514 sort {
515 # Uses $i as the index variable, $x as the result.
516 $x = 0;
517 $i = 1;
518 DEBUG and print "\nComparing ", map("{$_}", @$a),
519 ' : ', map("{$_}", @$b), , "...\n";
520
521 while($i < @$a and $i < @$b) {
522 DEBUG and print " comparing $i: {$a->[$i]} cmp {$b->[$i]} => ",
523 $a->[$i] cmp $b->[$i], "\n";
524 last if ($x = ($a->[$i] cmp $b->[$i])); # lexicographic
525 ++$i;
526
527 DEBUG and print " comparing $i: {$a->[$i]} <=> {$b->[$i]} => ",
528 $a->[$i] <=> $b->[$i], "\n";
529 last if ($x = ($a->[$i] <=> $b->[$i])); # numeric
530 ++$i;
531 }
532
533 DEBUG and print "{$a->[0]} : {$b->[0]} is ",
534 $x || (@$a <=> @$b) || 0
535 ,"\n"
536 ;
537 $x || (@$a <=> @$b) || ($a->[0] cmp $b->[0]);
538 # unless we found a result for $x in the while loop,
539 # use length as a tiebreaker, otherwise use cmp
540 # on the original string as a fallback tiebreaker.
541 }
542
543 map {
544 my @bit = ($x = defined($_) ? $_ : '');
545
546 if($x =~ m/^[+-]?(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?\z/s) {
547 # It's entirely purely numeric, so treat it specially:
548 push @bit, '', $x;
549 } else {
550 # Consume the string.
551 while(length $x) {
552 push @bit, ($x =~ s/^(\D+)//s) ? lc($1) : '';
553 push @bit, ($x =~ s/^(\d+)//s) ? $1 : 0;
554 }
555 }
556 DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
557
558 # End result: [original bit , (text, number), (text, number), ...]
559 # Minimally: [0-length original bit,]
560 # Examples:
561 # ['10' => '' , 10, ]
562 # ['fo900' => 'fo' , 900, ]
563 # ['foo10' => 'foo', 10, ]
564 # ['foo9.pl' => 'foo', 9, , '.pl', 0 ]
565 # ['foo32.pl' => 'foo', 32, , '.pl', 0 ]
566 # ['foo325.pl' => 'foo', 325, , '.pl', 0 ]
567 # Yes, always an ODD number of elements.
568
569 \@bit;
570 }
571 @_;
572}
573
574#-----------------------------------------------------------------------------
575# Same as before, except without the pure-number trap.
576
577sub nsorts {
578 return @_ if @_ < 2; # Just to be CLEVER.
579
580 my($x, $i); # scratch vars
581
582 # And now, the GREAT BIG Schwartzian transform:
583
584 map
585 $_->[0],
586
587 sort {
588 # Uses $i as the index variable, $x as the result.
589 $x = 0;
590 $i = 1;
591 DEBUG and print "\nComparing ", map("{$_}", @$a),
592 ' : ', map("{$_}", @$b), , "...\n";
593
594 while($i < @$a and $i < @$b) {
595 DEBUG and print " comparing $i: {$a->[$i]} cmp {$b->[$i]} => ",
596 $a->[$i] cmp $b->[$i], "\n";
597 last if ($x = ($a->[$i] cmp $b->[$i])); # lexicographic
598 ++$i;
599
600 DEBUG and print " comparing $i: {$a->[$i]} <=> {$b->[$i]} => ",
601 $a->[$i] <=> $b->[$i], "\n";
602 last if ($x = ($a->[$i] <=> $b->[$i])); # numeric
603 ++$i;
604 }
605
606 DEBUG and print "{$a->[0]} : {$b->[0]} is ",
607 $x || (@$a <=> @$b) || 0
608 ,"\n"
609 ;
610 $x || (@$a <=> @$b) || ($a->[0] cmp $b->[0]);
611 # unless we found a result for $x in the while loop,
612 # use length as a tiebreaker, otherwise use cmp
613 # on the original string as a fallback tiebreaker.
614 }
615
616 map {
617 my @bit = ($x = defined($_) ? $_ : '');
618
619 while(length $x) {
620 push @bit, ($x =~ s/^(\D+)//s) ? lc($1) : '';
621 push @bit, ($x =~ s/^(\d+)//s) ? $1 : 0;
622 }
623 DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
624
625 # End result: [original bit , (text, number), (text, number), ...]
626 # Minimally: [0-length original bit,]
627 # Examples:
628 # ['10' => '' , 10, ]
629 # ['fo900' => 'fo' , 900, ]
630 # ['foo10' => 'foo', 10, ]
631 # ['foo9.pl' => 'foo', 9, , '.pl', 0 ]
632 # ['foo32.pl' => 'foo', 32, , '.pl', 0 ]
633 # ['foo325.pl' => 'foo', 325, , '.pl', 0 ]
634 # Yes, always an ODD number of elements.
635
636 \@bit;
637 }
638 @_;
639}
640
641#-----------------------------------------------------------------------------
642# Same as before, except for the sort-key-making
643
644sub nsort0 {
645 return @_ if @_ < 2; # Just to be CLEVER.
646
647 my($x, $i); # scratch vars
648
649 # And now, the GREAT BIG Schwartzian transform:
650
651 map
652 $_->[0],
653
654 sort {
655 # Uses $i as the index variable, $x as the result.
656 $x = 0;
657 $i = 1;
658 DEBUG and print "\nComparing ", map("{$_}", @$a),
659 ' : ', map("{$_}", @$b), , "...\n";
660
661 while($i < @$a and $i < @$b) {
662 DEBUG and print " comparing $i: {$a->[$i]} cmp {$b->[$i]} => ",
663 $a->[$i] cmp $b->[$i], "\n";
664 last if ($x = ($a->[$i] cmp $b->[$i])); # lexicographic
665 ++$i;
666
667 DEBUG and print " comparing $i: {$a->[$i]} <=> {$b->[$i]} => ",
668 $a->[$i] <=> $b->[$i], "\n";
669 last if ($x = ($a->[$i] <=> $b->[$i])); # numeric
670 ++$i;
671 }
672
673 DEBUG and print "{$a->[0]} : {$b->[0]} is ",
674 $x || (@$a <=> @$b) || 0
675 ,"\n"
676 ;
677 $x || (@$a <=> @$b) || ($a->[0] cmp $b->[0]);
678 # unless we found a result for $x in the while loop,
679 # use length as a tiebreaker, otherwise use cmp
680 # on the original string as a fallback tiebreaker.
681 }
682
683 map {
684 my @bit = ($x = defined($_) ? $_ : '');
685
686 if($x =~ m/^[+-]?(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?\z/s) {
687 # It's entirely purely numeric, so treat it specially:
688 push @bit, '', $x;
689 } else {
690 # Consume the string.
691 while(length $x) {
692 push @bit, ($x =~ s/^(\D+)//s) ? lc($1) : '';
693 # Secret sauce:
694 if($x =~ s/^(\d+)//s) {
695 if(substr($1,0,1) eq '0' and $1 != 0) {
696 push @bit, $1 / (10 ** length($1));
697 } else {
698 push @bit, $1;
699 }
700 } else {
701 push @bit, 0;
702 }
703 }
704 }
705 DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
706
707 \@bit;
708 }
709 @_;
710}
711
712#-----------------------------------------------------------------------------
713# Like nsort0, but WITHOUT pure number handling, and WITH special treatment
714# of pulling off extensions and version numbers.
715
716sub nsortf {
717 return @_ if @_ < 2; # Just to be CLEVER.
718
719 my($x, $i); # scratch vars
720
721 # And now, the GREAT BIG Schwartzian transform:
722
723 map
724 $_->[0],
725
726 sort {
727 # Uses $i as the index variable, $x as the result.
728 $x = 0;
729 $i = 3;
730 DEBUG and print "\nComparing ", map("{$_}", @$a),
731 ' : ', map("{$_}", @$b), , "...\n";
732
733 while($i < @$a and $i < @$b) {
734 DEBUG and print " comparing $i: {$a->[$i]} cmp {$b->[$i]} => ",
735 $a->[$i] cmp $b->[$i], "\n";
736 last if ($x = ($a->[$i] cmp $b->[$i])); # lexicographic
737 ++$i;
738
739 DEBUG and print " comparing $i: {$a->[$i]} <=> {$b->[$i]} => ",
740 $a->[$i] <=> $b->[$i], "\n";
741 last if ($x = ($a->[$i] <=> $b->[$i])); # numeric
742 ++$i;
743 }
744
745 DEBUG and print "{$a->[0]} : {$b->[0]} is ",
746 $x || (@$a <=> @$b) || 0
747 ,"\n"
748 ;
749 $x || (@$a <=> @$b ) || ($a->[1] cmp $b->[1])
750 || ($a->[2] <=> $b->[2]) || ($a->[0] cmp $b->[0]);
751 # unless we found a result for $x in the while loop,
752 # use length as a tiebreaker, otherwise use the
753 # lc'd extension, otherwise the verison, otherwise use
754 # the original string as a fallback tiebreaker.
755 }
756
757 map {
758 my @bit = ( ($x = defined($_) ? $_ : ''), '',0 );
759
760 {
761 # Consume the string.
762
763 # First, pull off any VAX-style version
764 $bit[2] = $1 if $x =~ s/;(\d+)$//;
765
766 # Then pull off any apparent extension
767 if( $x !~ m/^\.+$/s and # don't mangle ".", "..", or "..."
768 $x =~ s/(\.[^\.\;]*)$//sg
769 # We could try to avoid catching all-digit extensions,
770 # but I think that's getting /too/ clever.
771 ) {
772 $i = $1;
773 if($x =~ m<[^\\\://]$>s) {
774 # We didn't take the whole basename.
775 $bit[1] = lc $i;
776 DEBUG and print "Consuming extension \"$1\"\n";
777 } else {
778 # We DID take the whole basename. Fix it.
779 $x = $1; # Repair it.
780 }
781 }
782
783 push @bit, '', -1 if $x =~ m/^\./s;
784 # A hack to make .-initial filenames sort first, regardless of locale.
785 # And -1 is always a sort-firster, since in the code below, there's
786 # no allowance for filenames containing negative numbers: -1.dat
787 # will be read as string '-' followed by number 1.
788
789 while(length $x) {
790 push @bit, ($x =~ s/^(\D+)//s) ? lc($1) : '';
791 # Secret sauce:
792 if($x =~ s/^(\d+)//s) {
793 if(substr($1,0,1) eq '0' and $1 != 0) {
794 push @bit, $1 / (10 ** length($1));
795 } else {
796 push @bit, $1;
797 }
798 } else {
799 push @bit, 0;
800 }
801 }
802 }
803
804 DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
805
806 \@bit;
807 }
808 @_;
809}
810
811# yowza yowza yowza.
812
Note: See TracBrowser for help on using the repository browser.