## PJFileNameList.pm A version of List that correctly sorts Taonga catalogue numbers package PJFileNameList; use BaseClassifier; use strict; sub BEGIN { @PJFileNameList::ISA = ('BaseClassifier'); } my $arguments = []; my $options = { 'name' => "PJfileNameList", 'desc' => "{PJFileNameList.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($classifierslist,$inputargs,$hashArgOptLists) = @_; push(@$classifierslist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists); if ($self->{'info_only'}) { # don't worry about any options etc return bless $self, $class; } # Manually set $self parameters. $self->{'list'} = {}; $self->{'childtype'} = "HList"; return bless $self, $class; } sub init { my $self = shift(@_); } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $doc_OID = $doc_obj->get_OID(); my $filename = $doc_obj->get_sourcefile(); return unless $filename =~ /\.item$/; # only classify the item files here $filename =~ s/\.item$//; # remove the file extension print STDERR "classifying $filename\n"; if (defined $self->{'list'}->{$doc_OID}) { my $outhandle = $self->{'outhandle'}; print $outhandle "WARNING: PJFileNameList::classify called multiple times for $doc_OID\n"; return; } $self->{'list'}->{$doc_OID} = "$filename"; } sub get_classify_info { my $self = shift (@_); #my @classlist = sort {$self->{'list'}->{$a} alpha_numeric_cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}; my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}}; return $self->splitlist(\@classlist); } sub get_entry { my $self = shift (@_); my ($title, $childtype, $thistype) = @_; # organise into classification structure my %classifyinfo = ('childtype'=>$childtype, 'Title'=>$title, 'contains'=>[], 'mdtype'=>'filename'); $classifyinfo{'thistype'} = $thistype if defined $thistype && $thistype =~ /\w/; return \%classifyinfo; } # splitlist takes an ordered list of classifications (@$classlistref) and # splits it up into sub-sections by date sub splitlist { my $self = shift (@_); my ($classlistref) = @_; my $classhash = {}; # top level #my $childtype = "HList"; # the top level node my $classifyinfo = $self->get_entry ("FileNames", "HList", "Invisible"); # make the classifications foreach my $item (@$classlistref) { my $filename = $self->{'list'}->{$item}; print STDERR "filename=$filename\n"; my ($first, $second) = $filename =~ m@^(3[A-Z])(\d+)-(\d+)(.*)$@; $classhash->{$first} = () unless defined $classhash->{$first}; $classhash->{$first}->{$second} = [] unless defined $classhash->{$first}->{$second}; push (@{$classhash->{$first}->{$second}}, $item); } ##### UP to here!!! my @subclasslist = sort keys %$classhash; foreach my $subclass (@subclasslist) { my $topclassify = $self->get_entry($subclass, "HList"); my @subsubclasslist = sort {$a <=> $b} (keys %{$classhash->{$subclass}}); foreach my $subsubclass (@subsubclasslist) { my $secondclassify = $self->get_entry($subsubclass, "VList"); push (@{$topclassify->{'contains'}}, $secondclassify); foreach my $subsubOID (@{$classhash->{$subclass}->{$subsubclass}}) { push (@{$secondclassify->{'contains'}}, {'OID'=>$subsubOID}); } } push (@{$classifyinfo->{'contains'}}, $topclassify); } return $classifyinfo; } # sort items like 3A01-13_Part_1 properly # 3A08-05 # 3A08-02_Part_1 # 3A05-04_Photo_1 # 3A08-28_Part_A # 3A08-15_No_03 #3A09-10a_Part_1 #3A09-11e sub alpha_numeric_cmp { my ($self,$a,$b) = @_; my ($a1, $a2, $a3, $a4) = $a =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@; my ($b1, $b2, $b3, $b4) = $b =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@; if ($a1 && $b1) { if ($a1 != $b1) { # letter return ($a1 cmp $b1); } if ($a2 && $b2) { if ($a2 != $b2) { # numeric return ($a2 <=> $b2); } if ($a3 && $b3) { if ($a3 != $b3) { # numeric return ($a3 <=> $b3); } else { # both strings start with the same thing eg 3A01-01 return ($a4 cmp $b4); } } } } return $a cmp $b; } 1;