source: main/trunk/greenstone2/perllib/IncrementalDocument.pm@ 31861

Last change on this file since 31861 was 29116, checked in by ak19, 10 years ago

imports of dbutil and util are necessary to get set_metadata.pl (which ends up using IncrementalDocument) to at least run.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the info database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29
30use strict;
31use util;
32use dbutil;
33
34# /**
35# */
36sub new()
37 {
38 my ($class, $collection, $infodbtype, $oid) = @_;
39
40 #rint STDERR "IncrementalDocument::new($collection, $infodbtype, $oid)\n";
41
42 # Test the parameters
43 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
44 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
45
46 # Store the variables
47 my $self = {};
48
49 # The collection this document object has been loaded from.
50 $self->{'collection'} = $collection;
51
52 # The infodbtype for the collection
53 $self->{'infodbtype'} = $infodbtype;
54
55 # An associative array of information retrieved from the info database
56 # which maps a key string to a nested associative array listing values.
57 $self->{'data'} = {};
58
59 # The unique identifier of the document loaded
60 $self->{'oid'} = $oid;
61
62 # Stores the order in which metadata keys where discovered/added.
63 $self->{'order'} = {};
64
65 bless $self, $class;
66 return $self;
67 }
68# /** new() **/
69
70# /**
71# */
72sub addMetadata()
73 {
74 my ($self, $key, $value, $internal) = @_;
75
76 # Validate the arguments
77 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
78 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
79
80 # Is this a new key that we haven't encountered before? If so ensure an
81 # array exists for its values, and record the order in which we encountered
82 # this key.
83 if (!defined($self->{'data'}->{$key}))
84 {
85 # Determine how many data keys we're already storing, so we can add the next
86 # one at the appropriate index
87 my $index = scalar(keys %{$self->{'order'}});
88 $self->{'order'}->{$index} = $key;
89 $self->{'data'}->{$key} = {};
90 }
91
92 # Set the value of the associative path to 1.
93 $self->{'data'}->{$key}->{$value} = 1;
94 }
95# /** addMetadata() **/
96
97# /** Retrieve all the metadata of this document as an array of pairs.
98# *
99# */
100sub getAllMetadata()
101{
102 my ($self) = @_;
103 my @all_metadata;
104
105 print STDERR "IncrementalDocument.getAllMetadata()\n";
106
107 my $key_count = scalar(keys %{$self->{'order'}});
108 for (my $i = 0; $i < $key_count; $i++)
109 {
110 my $key = $self->{'order'}->{$i};
111 # Check if this key has been set
112 if ($self->{'data'}->{$key})
113 {
114 # Note: there may be zero values left
115 foreach my $value (sort keys %{$self->{'data'}->{$key}})
116 {
117 if ($self->{'data'}->{$key}->{$value})
118 {
119 print STDERR "* Storing $key => $value\n";
120 push(@all_metadata, [$key, $value]);
121 }
122 }
123 }
124 }
125 print STDERR "Complete!\n";
126 return \@all_metadata;
127 }
128# /** getAllMetadata() **/
129
130# /**
131# */
132sub getDocNum()
133 {
134 my ($self) = @_;
135 my $docnum = -1;
136 # Check the docnum path exists in the associated data
137 if(defined($self->{'data'}->{'docnum'}))
138 {
139 # Get the list of keys from that associative path
140 my @values = keys (%{$self->{'data'}->{'docnum'}});
141 # And since we know there will only ever be one value for docnum
142 $docnum = $values[0];
143 }
144 return $docnum;
145 }
146# /** getDocNum() **/
147
148# /**
149# */
150sub loadDocument()
151 {
152 my ($self) = @_;
153 #rint STDERR "IncrementalDocument::loadDocument()\n";
154 # Load the raw text for the document object from the info database
155 my $collection = $self->{'collection'};
156 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
157 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $collection, $index_text_directory_path);
158 my $text = &dbutil::read_infodb_rawentry($self->{'infodbtype'}, $infodb_file_path, $self->{'oid'});
159 # For each line in the raw text, extract the key (enclosed in angle
160 # brackets) and the value
161 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
162 # Done
163 }
164# /** loadDocument() **/
165
166# /** Locates and removes the given key/value mappings from this document
167# * object.
168# *
169# * @param $self A reference to this IncrementalDocument object
170# * @param $key The metadata key as a string
171# * @param $value The obsolete metadata value as a string
172# *
173# * @author John Thompson, DL Consulting Ltd.
174# */
175sub removeMetadata()
176 {
177 my ($self, $key, $value) = @_;
178 # Ensure the value doesn't exist by simply setting to 0 the correct
179 # associative path
180 $self->{'data'}->{$key}->{$value} = 0;
181 }
182# /*** removeMetadat() **/
183
184# /**
185# */
186sub saveDocument()
187 {
188 my ($self) = @_;
189 # Get a textual version of this object
190 my $text = $self->toString();
191
192 # Now store the object in the info database
193 my $collection = $self->{'collection'};
194
195 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $collection, $index_text_directory_path);
197 my $infodb_file_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path, "append");
198 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_file_handle, $self->{'oid'}, &dbutil::convert_infodb_string_to_hash($text));
199 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_file_handle);
200
201 # There is a little bit of extra complexity when saving an incremental
202 # document in that we should ensure that a reverse lookup-from DocNum or
203 # nodeID to Greenstone document hash-exists in the database.
204 my $doc_num = $self->getDocNum();
205 if($doc_num >= 0)
206 {
207 my $text = &dbutil::read_infodb_rawentry($self->{'infodbtype'}, $infodb_file_path, $doc_num);
208
209 # If there is no reverse lookup, then add one now
210 if($text !~ /<section>/)
211 {
212 $infodb_file_handle = &dbutil::open_infodb_write_handle($self->{'infodbtype'}, $infodb_file_path, "append");
213 &dbutil::write_infodb_entry($self->{'infodbtype'}, $infodb_file_handle, $doc_num, &dbutil::convert_infodb_string_to_hash("<section>" . $self->{'oid'}));
214 &dbutil::close_infodb_write_handle($self->{'infodbtype'}, $infodb_file_handle);
215 }
216 }
217 # Done
218 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
219 }
220# /** saveDocument() **/
221
222# /** Produces a textual representation of this object.
223# *
224# * @return A string which describes this incremental document object
225# *
226# * @author John Thompson, DL Consulting Ltd.
227# */
228sub toString()
229 {
230 my ($self) = @_;
231 my $text = "";
232
233 my $key_count = scalar(keys %{$self->{'order'}});
234 for (my $i = 0; $i < $key_count; $i++)
235 {
236 my $key = $self->{'order'}->{$i};
237 # Check if this key has been set
238 if ($self->{'data'}->{$key})
239 {
240 # Note: there may be zero values left
241 foreach my $value (sort keys %{$self->{'data'}->{$key}})
242 {
243 if ($self->{'data'}->{$key}->{$value})
244 {
245 $text .= "<$key>$value\n";
246 }
247 }
248 }
249 }
250 return $text;
251 }
252# /** toString() **/
253
2541;
Note: See TracBrowser for help on using the repository browser.