source: main/trunk/greenstone2/perllib/IncrementalDocument.pm@ 24460

Last change on this file since 24460 was 23485, checked in by davidb, 13 years ago

read_infodb_entry now returns a hashmap directly. Code updated to take advantage of this, and in places where the hashmap is not needed, the alternative read_infodb_rawentry is called.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the info database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use strict;
30
31
32# /**
33# */
34sub new()
35 {
36 my ($class, $collection, $infodbtype, $oid) = @_;
37
38 #rint STDERR "IncrementalDocument::new($collection, $infodbtype, $oid)\n";
39
40 # Test the parameters
41 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
42 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
43
44 # Store the variables
45 my $self = {};
46
47 # The collection this document object has been loaded from.
48 $self->{'collection'} = $collection;
49
50 # The infodbtype for the collection
51 $self->{'infodbtype'} = $infodbtype;
52
53 # An associative array of information retrieved from the info database
54 # which maps a key string to a nested associative array listing values.
55 $self->{'data'} = {};
56
57 # The unique identifier of the document loaded
58 $self->{'oid'} = $oid;
59
60 # Stores the order in which metadata keys where discovered/added.
61 $self->{'order'} = {};
62
63 bless $self, $class;
64 return $self;
65 }
66# /** new() **/
67
68# /**
69# */
70sub addMetadata()
71 {
72 my ($self, $key, $value, $internal) = @_;
73
74 # Validate the arguments
75 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
76 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
77
78 # Is this a new key that we haven't encountered before? If so ensure an
79 # array exists for its values, and record the order in which we encountered
80 # this key.
81 if (!defined($self->{'data'}->{$key}))
82 {
83 # Determine how many data keys we're already storing, so we can add the next
84 # one at the appropriate index
85 my $index = scalar(keys %{$self->{'order'}});
86 $self->{'order'}->{$index} = $key;
87 $self->{'data'}->{$key} = {};
88 }
89
90 # Set the value of the associative path to 1.
91 $self->{'data'}->{$key}->{$value} = 1;
92 }
93# /** addMetadata() **/
94
95# /** Retrieve all the metadata of this document as an array of pairs.
96# *
97# */
98sub getAllMetadata()
99{
100 my ($self) = @_;
101 my @all_metadata;
102
103 print STDERR "IncrementalDocument.getAllMetadata()\n";
104
105 my $key_count = scalar(keys %{$self->{'order'}});
106 for (my $i = 0; $i < $key_count; $i++)
107 {
108 my $key = $self->{'order'}->{$i};
109 # Check if this key has been set
110 if ($self->{'data'}->{$key})
111 {
112 # Note: there may be zero values left
113 foreach my $value (sort keys %{$self->{'data'}->{$key}})
114 {
115 if ($self->{'data'}->{$key}->{$value})
116 {
117 print STDERR "* Storing $key => $value\n";
118 push(@all_metadata, [$key, $value]);
119 }
120 }
121 }
122 }
123 print STDERR "Complete!\n";
124 return \@all_metadata;
125 }
126# /** getAllMetadata() **/
127
128# /**
129# */
130sub getDocNum()
131 {
132 my ($self) = @_;
133 my $docnum = -1;
134 # Check the docnum path exists in the associated data
135 if(defined($self->{'data'}->{'docnum'}))
136 {
137 # Get the list of keys from that associative path
138 my @values = keys (%{$self->{'data'}->{'docnum'}});
139 # And since we know there will only ever be one value for docnum
140 $docnum = $values[0];
141 }
142 return $docnum;
143 }
144# /** getDocNum() **/
145
146# /**
147# */
148sub loadDocument()
149 {
150 my ($self) = @_;
151 #rint STDERR "IncrementalDocument::loadDocument()\n";
152 # Load the raw text for the document object from the info database
153 my $collection = $self->{'collection'};
154 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
155 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $collection, $index_text_directory_path);
156 my $text = &dbutil::read_infodb_rawentry($self->{'infodbtype'}, $infodb_file_path, $self->{'oid'});
157 # For each line in the raw text, extract the key (enclosed in angle
158 # brackets) and the value
159 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
160 # Done
161 }
162# /** loadDocument() **/
163
164# /** Locates and removes the given key/value mappings from this document
165# * object.
166# *
167# * @param $self A reference to this IncrementalDocument object
168# * @param $key The metadata key as a string
169# * @param $value The obsolete metadata value as a string
170# *
171# * @author John Thompson, DL Consulting Ltd.
172# */
173sub removeMetadata()
174 {
175 my ($self, $key, $value) = @_;
176 # Ensure the value doesn't exist by simply setting to 0 the correct
177 # associative path
178 $self->{'data'}->{$key}->{$value} = 0;
179 }
180# /*** removeMetadat() **/
181
182# /**
183# */
184sub saveDocument()
185 {
186 my ($self) = @_;
187 # Get a textual version of this object
188 my $text = $self->toString();
189
190 # Now store the object in the info database
191 my $collection = $self->{'collection'};
192
193 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
194 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $collection, $index_text_directory_path);
195 my $infodb_file_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path, "append");
196 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_file_handle, $self->{'oid'}, &dbutil::convert_infodb_string_to_hash($text));
197 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_file_handle);
198
199 # There is a little bit of extra complexity when saving an incremental
200 # document in that we should ensure that a reverse lookup-from DocNum or
201 # nodeID to Greenstone document hash-exists in the database.
202 my $doc_num = $self->getDocNum();
203 if($doc_num >= 0)
204 {
205 my $text = &dbutil::read_infodb_rawentry($self->{'infodbtype'}, $infodb_file_path, $doc_num);
206
207 # If there is no reverse lookup, then add one now
208 if($text !~ /<section>/)
209 {
210 $infodb_file_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path, "append");
211 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_file_handle, $doc_num, &dbutil::convert_infodb_string_to_hash("<section>" . $self->{'oid'}));
212 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_file_handle);
213 }
214 }
215 # Done
216 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
217 }
218# /** saveDocument() **/
219
220# /** Produces a textual representation of this object.
221# *
222# * @return A string which describes this incremental document object
223# *
224# * @author John Thompson, DL Consulting Ltd.
225# */
226sub toString()
227 {
228 my ($self) = @_;
229 my $text = "";
230
231 my $key_count = scalar(keys %{$self->{'order'}});
232 for (my $i = 0; $i < $key_count; $i++)
233 {
234 my $key = $self->{'order'}->{$i};
235 # Check if this key has been set
236 if ($self->{'data'}->{$key})
237 {
238 # Note: there may be zero values left
239 foreach my $value (sort keys %{$self->{'data'}->{$key}})
240 {
241 if ($self->{'data'}->{$key}->{$value})
242 {
243 $text .= "<$key>$value\n";
244 }
245 }
246 }
247 }
248 return $text;
249 }
250# /** toString() **/
251
2521;
Note: See TracBrowser for help on using the repository browser.