root/main/trunk/model-sites-dev/pei-jones/collect/written-works/perllib/classify/PJFileNameList.pm @ 31923

Revision 31923, 4.4 KB (checked in by kjdon, 3 years ago)

new classifier for catalogue numbers

Line 
1## PJFileNameList.pm A version of List that correctly sorts Taonga catalogue numbers
2package PJFileNameList;
3
4use BaseClassifier;
5
6use strict;
7
8sub BEGIN {
9    @PJFileNameList::ISA = ('BaseClassifier');
10}
11
12my $arguments = [];
13
14my $options = { 'name'     => "PJfileNameList",
15        'desc'     => "{PJFileNameList.desc}",
16        'abstract' => "no",
17        'inherits' => "yes",
18        'args'     => $arguments };
19
20
21
22sub new {
23    my ($class) = shift (@_);
24    my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
25    push(@$classifierslist, $class);
26
27    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
28    push(@{$hashArgOptLists->{"OptList"}},$options);
29
30    my $self = new BaseClassifier($classifierslist, $inputargs, $hashArgOptLists);
31
32    if ($self->{'info_only'}) {
33    # don't worry about any options etc
34    return bless $self, $class;
35    }
36
37    # Manually set $self parameters.
38    $self->{'list'} = {};
39
40    $self->{'childtype'} = "HList";
41
42    return bless $self, $class;
43}
44
45sub init
46{
47    my $self = shift(@_);
48   
49}
50
51sub classify {
52
53    my $self = shift (@_);
54    my ($doc_obj) = @_;
55
56    my $doc_OID = $doc_obj->get_OID();
57    my $filename = $doc_obj->get_sourcefile();
58    return unless $filename =~ /\.item$/; # only classify the item files here
59    $filename =~ s/\.item$//; # remove the file extension
60    print STDERR "classifying $filename\n";
61    if (defined $self->{'list'}->{$doc_OID}) {
62    my $outhandle = $self->{'outhandle'};
63    print $outhandle "WARNING: PJFileNameList::classify called multiple times for $doc_OID\n";
64    return;
65    }
66   
67    $self->{'list'}->{$doc_OID} = "$filename";
68   
69
70}
71
72sub get_classify_info {
73    my $self = shift (@_);
74
75    #my @classlist = sort {$self->{'list'}->{$a} alpha_numeric_cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
76    my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
77
78    return $self->splitlist(\@classlist);
79}
80
81sub get_entry {
82    my $self = shift (@_);
83    my ($title, $childtype, $thistype) = @_;
84   
85    # organise into classification structure
86    my %classifyinfo = ('childtype'=>$childtype,
87            'Title'=>$title,
88            'contains'=>[],
89            'mdtype'=>'filename');
90    $classifyinfo{'thistype'} = $thistype
91    if defined $thistype && $thistype =~ /\w/;
92
93    return \%classifyinfo;
94}
95
96# splitlist takes an ordered list of classifications (@$classlistref) and
97# splits it up into sub-sections by date
98sub splitlist {
99    my $self = shift (@_);
100    my ($classlistref) = @_;
101    my $classhash = {};
102
103    # top level
104    #my $childtype = "HList";
105
106    # the top level node
107    my $classifyinfo = $self->get_entry ("FileNames", "HList", "Invisible");
108
109    # make the classifications
110    foreach my $item (@$classlistref) {
111    my $filename = $self->{'list'}->{$item};
112    print STDERR "filename=$filename\n";
113    my ($first, $second) = $filename =~ m@^(3[A-Z])(\d+)-(\d+)(.*)$@;
114    $classhash->{$first} = () unless defined $classhash->{$first};
115    $classhash->{$first}->{$second} = [] unless defined $classhash->{$first}->{$second};
116    push (@{$classhash->{$first}->{$second}}, $item);
117    }
118
119    ##### UP to here!!!
120    my @subclasslist = sort keys %$classhash;
121    foreach my $subclass (@subclasslist) {
122    my $topclassify = $self->get_entry($subclass, "HList");
123    my @subsubclasslist = sort {$a <=> $b} (keys %{$classhash->{$subclass}});
124    foreach my $subsubclass (@subsubclasslist) {
125        my $secondclassify = $self->get_entry($subsubclass, "VList");
126        push (@{$topclassify->{'contains'}}, $secondclassify);
127        foreach my $subsubOID
128        (@{$classhash->{$subclass}->{$subsubclass}}) {
129            push (@{$secondclassify->{'contains'}},
130              {'OID'=>$subsubOID});
131        }
132
133
134    }
135    push (@{$classifyinfo->{'contains'}}, $topclassify);
136
137    }
138    return $classifyinfo;
139}
140
141# sort items like 3A01-13_Part_1 properly
142# 3A08-05
143# 3A08-02_Part_1
144# 3A05-04_Photo_1
145# 3A08-28_Part_A
146# 3A08-15_No_03
147#3A09-10a_Part_1
148#3A09-11e
149sub alpha_numeric_cmp
150{
151    my ($self,$a,$b) = @_;
152    my ($a1, $a2, $a3, $a4) = $a =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@;
153    my ($b1, $b2, $b3, $b4) = $b =~ m@^3([A-Z])(\d+)-(\d+)(.*)$@;
154    if ($a1 && $b1) {
155    if ($a1 != $b1) { # letter
156        return ($a1 cmp $b1);
157    }
158    if ($a2 && $b2) {
159        if ($a2 != $b2) { # numeric
160        return ($a2 <=> $b2);
161        }
162        if ($a3 && $b3) {
163        if ($a3 != $b3) { # numeric
164            return ($a3 <=> $b3);
165        } else {
166            # both strings start with the same thing eg 3A01-01
167            return ($a4 cmp $b4);
168           
169        }
170        }
171    }
172    }
173    return $a cmp $b;
174}
175
176
177
1781;
Note: See TracBrowser for help on using the browser.