source: main/trunk/greenstone2/perllib/classify/SectionList.pm@ 32594

Last change on this file since 32594 was 23116, checked in by kjdon, 14 years ago

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1###########################################################################
2#
3# SectionList.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Same as SimpleList classifier but includes all sections of document
27# (excluding top level) rather than just top level document
28# itself
29
30
31package SectionList;
32
33use SimpleList;
34use sorttools;
35
36use strict;
37no strict 'refs'; # allow filehandles to be variables and viceversa
38
39sub BEGIN {
40 @SectionList::ISA = ('SimpleList');
41}
42
43my $arguments = [];
44my $options = { 'name' => "SectionList",
45 'desc' => "{SectionList.desc}",
46 'abstract' => "no",
47 'inherits' => "yes" };
48
49
50sub new {
51 my ($class) = shift (@_);
52 my ($classifierslist,$inputargs,$hashArgOptLists) = @_;
53 push(@$classifierslist, $class);
54
55 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
56 push(@{$hashArgOptLists->{"OptList"}},$options);
57
58 my $self = new SimpleList($classifierslist, $inputargs, $hashArgOptLists);
59
60 return bless $self, $class;
61}
62
63sub classify {
64 my $self = shift (@_);
65 my ($doc_obj, @options) = @_;
66
67 # @options used by AZCompactList when is uses SectionList internally
68 # are we sorting the list??
69 my $nosort = 0;
70 if (defined $self->{'sort'} && $self->{'sort'} eq "nosort") {
71 $nosort = 1;
72 }
73
74 my $thissection = undef;
75
76 foreach my $option (@options)
77 {
78 if ($option =~ m/^section=(\d+)$/i)
79 {
80 $thissection = $1;
81 }
82 }
83
84 my $sortmeta = "";
85 if (!$nosort && defined $self->{'sort'}) {
86 if ($self->{'sort'} =~ /^filename$/i) {
87 $sortmeta = $doc_obj->get_source_filename();
88 } else {
89 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sort'});
90 if (defined $sortmeta && !$self->{'no_metadata_formatting'}) {
91 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sort'}, $sortmeta, $doc_obj);
92 }
93 }
94 $sortmeta = "" unless defined $sortmeta;
95 }
96
97 if (defined $thissection) {
98 # just classify the one section
99 $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort);
100 } else
101 {
102 $thissection = $doc_obj->get_next_section ($doc_obj->get_top_section());
103 while (defined $thissection) {
104 $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort);
105 $thissection = $doc_obj->get_next_section ($thissection);
106 }
107 }
108}
109
110sub classify_section {
111 my $self = shift (@_);
112 my ($section, $doc_obj, $sortmeta, $nosort) = @_;
113
114 my $doc_OID = $doc_obj->get_OID();
115 $nosort = 0 unless defined $nosort;
116 $sortmeta = "" unless defined $sortmeta;
117
118 my $metavalue;
119 my $metaname;
120 if (defined $self->{'meta_list'}) {
121 # find the first available metadata
122 foreach my $m (@{$self->{'meta_list'}}) {
123 $metavalue = $doc_obj->get_metadata_element($section, $m);
124 $metaname = $m;
125 last if defined $metavalue;
126 }
127 #if we haven't found a metavalue here, then the section shouldn't be included
128 return unless defined $metavalue;
129 }
130
131 # we know the section should be included, add it now if we are not sorting
132 if ($nosort) {
133 push (@{$self->{'list'}}, "$doc_OID.$section");
134 return;
135 }
136 # check that it hasn't been added already
137 if (defined $self->{'list'}->{"$doc_OID.$section"}) {
138 my $outhandle = $self->{'outhandle'};
139 print $outhandle "WARNING: SectionList::classify called multiple times for $doc_OID.$section\n";
140 }
141
142 if (defined $self->{'sort'}) {
143 # sorting on alternative metadata
144 $self->{'list'}->{"$doc_OID.$section"} = $sortmeta;
145 } else {
146 # sorting on the classification metadata
147 # do the same formatting on the meta value as for sort meta
148 $metavalue = &sorttools::format_metadata_for_sorting($metaname, $metavalue, $doc_obj) unless $self->{'no_metadata_formatting'};
149 $self->{'list'}->{"$doc_OID.$section"} = $metavalue;
150 }
151}
1521;
Note: See TracBrowser for help on using the repository browser.