source: main/trunk/model-sites-dev/pei-jones/collect/written-works/perllib/classify/PJFileNameList.pm@ 31923

Last change on this file since 31923 was 31923, checked in by kjdon, 7 years ago

new classifier for catalogue numbers

File size: 4.4 KB
Line 
1## PJFileNameList.pm A version of List that correctly sorts Taonga catalogue numbers
2package PJFileNameList;
3
4use BaseClassifier;
5
6use strict;
7
8sub BEGIN {
9 @PJFileNameList::ISA = ('BaseClassifier');
10}
11
12my $arguments = [];
13
14my $options = { 'name' => "PJfileNameList",
15 'desc' => "{PJFileNameList.desc}",
16 'abstract' => "no",
17 'inherits' => "yes",
18 'args' => $arguments };
19
20
21
22sub new {
23 my ($class) = shift (@_);
24 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
25 push(@$classifierslist, $class);
26
27 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
28 push(@{$hashArgOptLists->{"OptList"}},$options);
29
30 my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
31
32 if ($self->{'info_only'}) {
33 # don't worry about any options etc
34 return bless $self, $class;
35 }
36
37 # Manually set $self parameters.
38 $self->{'list'} = {};
39
40 $self->{'childtype'} = "HList";
41
42 return bless $self, $class;
43}
44
45sub init
46{
47 my $self = shift(@_);
48
49}
50
51sub classify {
52
53 my $self = shift (@_);
54 my ($doc_obj) = @_;
55
56 my $doc_OID = $doc_obj->get_OID();
57 my $filename = $doc_obj->get_sourcefile();
58 return unless $filename =~ /\.item$/; # only classify the item files here
59 $filename =~ s/\.item$//; # remove the file extension
60 print STDERR "classifying $filename\n";
61 if (defined $self->{'list'}->{$doc_OID}) {
62 my $outhandle = $self->{'outhandle'};
63 print $outhandle "WARNING: PJFileNameList::classify called multiple times for $doc_OID\n";
64 return;
65 }
66
67 $self->{'list'}->{$doc_OID} = "$filename";
68
69
70}
71
72sub get_classify_info {
73 my $self = shift (@_);
74
75 #my @classlist = sort {$self->{'list'}->{$a} alpha_numeric_cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
76 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
77
78 return $self->splitlist(\@classlist);
79}
80
81sub get_entry {
82 my $self = shift (@_);
83 my ($title, $childtype, $thistype) = @_;
84
85 # organise into classification structure
86 my %classifyinfo = ('childtype'=>$childtype,
87 'Title'=>$title,
88 'contains'=>[],
89 'mdtype'=>'filename');
90 $classifyinfo{'thistype'} = $thistype
91 if defined $thistype && $thistype =~ /\w/;
92
93 return \%classifyinfo;
94}
95
96# splitlist takes an ordered list of classifications (@$classlistref) and
97# splits it up into sub-sections by date
98sub splitlist {
99 my $self = shift (@_);
100 my ($classlistref) = @_;
101 my $classhash = {};
102
103 # top level
104 #my $childtype = "HList";
105
106 # the top level node
107 my $classifyinfo = $self->get_entry ("FileNames", "HList", "Invisible");
108
109 # make the classifications
110 foreach my $item (@$classlistref) {
111 my $filename = $self->{'list'}->{$item};
112 print STDERR "filename=$filename\n";
113 my ($first, $second) = $filename =~ m@^(3[A-Z])(\d+)-(\d+)(.*)$@;
114 $classhash->{$first} = () unless defined $classhash->{$first};
115 $classhash->{$first}->{$second} = [] unless defined $classhash->{$first}->{$second};
116 push (@{$classhash->{$first}->{$second}}, $item);
117 }
118
119 ##### UP to here!!!
120 my @subclasslist = sort keys %$classhash;
121 foreach my $subclass (@subclasslist) {
122 my $topclassify = $self->get_entry($subclass, "HList");
123 my @subsubclasslist = sort {$a <=> $b} (keys %{$classhash->{$subclass}});
124 foreach my $subsubclass (@subsubclasslist) {
125 my $secondclassify = $self->get_entry($subsubclass, "VList");
126 push (@{$topclassify->{'contains'}}, $secondclassify);
127 foreach my $subsubOID
128 (@{$classhash->{$subclass}->{$subsubclass}}) {
129 push (@{$secondclassify->{'contains'}},
130 {'OID'=>$subsubOID});
131 }
132
133
134 }
135 push (@{$classifyinfo->{'contains'}}, $topclassify);
136
137 }
138 return $classifyinfo;
139}
140
141# sort items like 3A01-13_Part_1 properly
142# 3A08-05
143# 3A08-02_Part_1
144# 3A05-04_Photo_1
145# 3A08-28_Part_A
146# 3A08-15_No_03
147#3A09-10a_Part_1
148#3A09-11e
149sub alpha_numeric_cmp
150{
151 my ($self,$a,$b) = @_;
152 my ($a1, $a2, $a3, $a4) = $a =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@;
153 my ($b1, $b2, $b3, $b4) = $b =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@;
154 if ($a1 && $b1) {
155 if ($a1 != $b1) { # letter
156 return ($a1 cmp $b1);
157 }
158 if ($a2 && $b2) {
159 if ($a2 != $b2) { # numeric
160 return ($a2 <=> $b2);
161 }
162 if ($a3 && $b3) {
163 if ($a3 != $b3) { # numeric
164 return ($a3 <=> $b3);
165 } else {
166 # both strings start with the same thing eg 3A01-01
167 return ($a4 cmp $b4);
168
169 }
170 }
171 }
172 }
173 return $a cmp $b;
174}
175
176
177
1781;
Note: See TracBrowser for help on using the repository browser.