source: main/trunk/greenstone2/perllib/IncrementalDocument.pm@ 21564

Last change on this file since 21564 was 21564, checked in by mdewsnip, 14 years ago

Changed lots of occurrences of "GDBM" in comments, variable names and function names, where the code isn't GDBM-specific. Part of making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.1 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the info database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use strict;
30
31
32# /**
33# */
34sub new()
35 {
36 my ($class, $collection, $oid) = @_;
37
38 #rint STDERR "IncrementalDocument::new($collection, $oid)\n";
39
40 # Test the parameters
41 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
42 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
43
44 # Store the variables
45 my $self = {};
46
47 # The collection this document object has been loaded from.
48 $self->{'collection'} = $collection;
49
50 # An associative array of information retrieved from the info database
51 # which maps a key string to a nested associative array listing values.
52 $self->{'data'} = {};
53
54 # The unique identifier of the document loaded
55 $self->{'oid'} = $oid;
56
57 # Stores the order in which metadata keys where discovered/added.
58 $self->{'order'} = {};
59
60 bless $self, $class;
61 return $self;
62 }
63# /** new() **/
64
65# /**
66# */
67sub addMetadata()
68 {
69 my ($self, $key, $value, $internal) = @_;
70
71 # Validate the arguments
72 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
73 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
74
75 # Is this a new key that we haven't encountered before? If so ensure an
76 # array exists for its values, and record the order in which we encountered
77 # this key.
78 if (!defined($self->{'data'}->{$key}))
79 {
80 # Determine how many data keys we're already storing, so we can add the next
81 # one at the appropriate index
82 my $index = scalar(keys %{$self->{'order'}});
83 $self->{'order'}->{$index} = $key;
84 $self->{'data'}->{$key} = {};
85 }
86
87 # Set the value of the associative path to 1.
88 $self->{'data'}->{$key}->{$value} = 1;
89 }
90# /** addMetadata() **/
91
92# /** Retrieve all the metadata of this document as an array of pairs.
93# *
94# */
95sub getAllMetadata()
96{
97 my ($self) = @_;
98 my @all_metadata;
99
100 print STDERR "IncrementalDocument.getAllMetadata()\n";
101
102 my $key_count = scalar(keys %{$self->{'order'}});
103 for (my $i = 0; $i < $key_count; $i++)
104 {
105 my $key = $self->{'order'}->{$i};
106 # Check if this key has been set
107 if ($self->{'data'}->{$key})
108 {
109 # Note: there may be zero values left
110 foreach my $value (sort keys %{$self->{'data'}->{$key}})
111 {
112 if ($self->{'data'}->{$key}->{$value})
113 {
114 print STDERR "* Storing $key => $value\n";
115 push(@all_metadata, [$key, $value]);
116 }
117 }
118 }
119 }
120 print STDERR "Complete!\n";
121 return \@all_metadata;
122 }
123# /** getAllMetadata() **/
124
125# /**
126# */
127sub getDocNum()
128 {
129 my ($self) = @_;
130 my $docnum = -1;
131 # Check the docnum path exists in the associated data
132 if(defined($self->{'data'}->{'docnum'}))
133 {
134 # Get the list of keys from that associative path
135 my @values = keys (%{$self->{'data'}->{'docnum'}});
136 # And since we know there will only ever be one value for docnum
137 $docnum = $values[0];
138 }
139 return $docnum;
140 }
141# /** getDocNum() **/
142
143# /**
144# */
145sub loadDocument()
146 {
147 my ($self) = @_;
148 #rint STDERR "IncrementalDocument::loadDocument()\n";
149 # Load the raw text for the document object from the info database
150 my $collection = $self->{'collection'};
151 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
152 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
153 my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $self->{'oid'});
154 # For each line in the raw text, extract the key (enclosed in angle
155 # brackets) and the value
156 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
157 # Done
158 }
159# /** loadDocument() **/
160
161# /** Locates and removes the given key/value mappings from this document
162# * object.
163# *
164# * @param $self A reference to this IncrementalDocument object
165# * @param $key The metadata key as a string
166# * @param $value The obsolete metadata value as a string
167# *
168# * @author John Thompson, DL Consulting Ltd.
169# */
170sub removeMetadata()
171 {
172 my ($self, $key, $value) = @_;
173 # Ensure the value doesn't exist by simply setting to 0 the correct
174 # associative path
175 $self->{'data'}->{$key}->{$value} = 0;
176 }
177# /*** removeMetadat() **/
178
179# /**
180# */
181sub saveDocument()
182 {
183 my ($self) = @_;
184 # Get a textual version of this object
185 my $text = $self->toString();
186
187 # Now store the object in the info database
188 my $collection = $self->{'collection'};
189
190 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
191 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
192 my $infodb_file_handle = &dbutil::open_infodb_write_handle("gdbm", $infodb_file_path, "append");
193 &dbutil::write_infodb_entry("gdbm", $infodb_file_handle, $self->{'oid'}, &dbutil::convert_infodb_string_to_hash($text));
194 &dbutil::close_infodb_write_handle("gdbm", $infodb_file_handle);
195
196 # There is a little bit of extra complexity when saving an incremental
197 # document in that we should ensure that a reverse lookup-from DocNum or
198 # nodeID to Greenstone document hash-exists in the database.
199 my $doc_num = $self->getDocNum();
200 if($doc_num >= 0)
201 {
202 my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $doc_num);
203
204 # If there is no reverse lookup, then add one now
205 if($text !~ /<section>/)
206 {
207 $infodb_file_handle = &dbutil::open_infodb_write_handle("gdbm", $infodb_file_path, "append");
208 &dbutil::write_infodb_entry("gdbm", $infodb_file_handle, $doc_num, &dbutil::convert_infodb_string_to_hash("<section>" . $self->{'oid'}));
209 &dbutil::close_infodb_write_handle("gdbm", $infodb_file_handle);
210 }
211 }
212 # Done
213 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
214 }
215# /** saveDocument() **/
216
217# /** Produces a textual representation of this object.
218# *
219# * @return A string which describes this incremental document object
220# *
221# * @author John Thompson, DL Consulting Ltd.
222# */
223sub toString()
224 {
225 my ($self) = @_;
226 my $text = "";
227
228 my $key_count = scalar(keys %{$self->{'order'}});
229 for (my $i = 0; $i < $key_count; $i++)
230 {
231 my $key = $self->{'order'}->{$i};
232 # Check if this key has been set
233 if ($self->{'data'}->{$key})
234 {
235 # Note: there may be zero values left
236 foreach my $value (sort keys %{$self->{'data'}->{$key}})
237 {
238 if ($self->{'data'}->{$key}->{$value})
239 {
240 $text .= "<$key>$value\n";
241 }
242 }
243 }
244 }
245 return $text;
246 }
247# /** toString() **/
248
2491;
Note: See TracBrowser for help on using the repository browser.