source: main/trunk/greenstone2/perllib/IncrementalDocument.pm@ 21561

Last change on this file since 21561 was 21561, checked in by mdewsnip, 14 years ago

Changed calls to GDBMUtils::gdbmCachedCollectionGet() to dbutil::read_infodb_entry(). Part of removing GDBMUtils.pm and making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the GDBM database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use GDBMUtils;
30use strict;
31
32# /**
33# */
34sub new()
35 {
36 my ($class, $collection, $oid) = @_;
37
38 #rint STDERR "IncrementalDocument::new($collection, $oid)\n";
39
40 # Test the parameters
41 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
42 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
43
44 # Store the variables
45 my $self = {};
46
47 # The collection this document object has been loaded from.
48 $self->{'collection'} = $collection;
49
50 # An associative array of information retrieved from the GDBM database
51 # which maps a key string to a nested associative array listing values.
52 $self->{'data'} = {};
53
54 # The unique identifier of the document loaded
55 $self->{'oid'} = $oid;
56
57 # Stores the order in which metadata keys where discovered/added.
58 $self->{'order'} = {};
59
60 bless $self, $class;
61 return $self;
62 }
63# /** new() **/
64
65# /**
66# */
67sub addMetadata()
68 {
69 my ($self, $key, $value, $internal) = @_;
70
71 # Validate the arguments
72 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
73 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
74
75 # Is this a new key that we haven't encountered before? If so ensure an
76 # array exists for its values, and record the order in which we encountered
77 # this key.
78 if (!defined($self->{'data'}->{$key}))
79 {
80 # Determine how many data keys we're already storing, so we can add the next
81 # one at the appropriate index
82 my $index = scalar(keys %{$self->{'order'}});
83 $self->{'order'}->{$index} = $key;
84 $self->{'data'}->{$key} = {};
85 }
86
87 # Set the value of the associative path to 1.
88 $self->{'data'}->{$key}->{$value} = 1;
89 }
90# /** addMetadata() **/
91
92# /** Retrieve all the metadata of this document as an array of pairs.
93# *
94# */
95sub getAllMetadata()
96{
97 my ($self) = @_;
98 my @all_metadata;
99
100 print STDERR "IncrementalDocument.getAllMetadata()\n";
101
102 my $key_count = scalar(keys %{$self->{'order'}});
103 for (my $i = 0; $i < $key_count; $i++)
104 {
105 my $key = $self->{'order'}->{$i};
106 # Check if this key has been set
107 if ($self->{'data'}->{$key})
108 {
109 # Note: there may be zero values left
110 foreach my $value (sort keys %{$self->{'data'}->{$key}})
111 {
112 if ($self->{'data'}->{$key}->{$value})
113 {
114 print STDERR "* Storing $key => $value\n";
115 push(@all_metadata, [$key, $value]);
116 }
117 }
118 }
119 }
120 print STDERR "Complete!\n";
121 return \@all_metadata;
122 }
123# /** getAllMetadata() **/
124
125# /**
126# */
127sub getDocNum()
128 {
129 my ($self) = @_;
130 my $docnum = -1;
131 # Check the docnum path exists in the associated data
132 if(defined($self->{'data'}->{'docnum'}))
133 {
134 # Get the list of keys from that associative path
135 my @values = keys (%{$self->{'data'}->{'docnum'}});
136 # And since we know there will only ever be one value for docnum
137 $docnum = $values[0];
138 }
139 return $docnum;
140 }
141# /** getDocNum() **/
142
143# /**
144# */
145sub loadDocument()
146 {
147 my ($self) = @_;
148 #rint STDERR "IncrementalDocument::loadDocument()\n";
149 # Load the raw text for the document object from GDBM
150 my $collection = $self->{'collection'};
151 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
152 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
153 my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $self->{'oid'});
154 # For each line in the raw text, extract the key (enclosed in angle
155 # brackets) and the value
156 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
157 # Done
158 }
159# /** loadDocument() **/
160
161# /** Locates and removes the given key/value mappings from this document
162# * object.
163# *
164# * @param $self A reference to this IncrementalDocument object
165# * @param $key The metadata key as a string
166# * @param $value The obsolete metadata value as a string
167# *
168# * @author John Thompson, DL Consulting Ltd.
169# */
170sub removeMetadata()
171 {
172 my ($self, $key, $value) = @_;
173 # Ensure the value doesn't exist by simply setting to 0 the correct
174 # associative path
175 $self->{'data'}->{$key}->{$value} = 0;
176 }
177# /*** removeMetadat() **/
178
179# /**
180# */
181sub saveDocument()
182 {
183 my ($self) = @_;
184 # Get a textual version of this object
185 my $text = $self->toString();
186
187 # Now store the object in the database using the GDBM utilities
188 my $collection = $self->{'collection'};
189 &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'oid'}, $text);
190
191 # There is a little bit of extra complexity when saving an incremental
192 # document in that we should ensure that a reverse lookup-from DocNum or
193 # nodeID to Greenstone document hash-exists in the database.
194 my $doc_num = $self->getDocNum();
195 if($doc_num >= 0)
196 {
197 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
198 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
199 my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $doc_num);
200
201 # If there is no reverse lookup, then add one now
202 if($text !~ /<section>/)
203 {
204 &GDBMUtils::gdbmCachedCollectionSet($collection, $doc_num, "<section>" . $self->{'oid'});
205 }
206 }
207 # Done
208 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
209 }
210# /** saveDocument() **/
211
212# /** Produces a textual representation of this object.
213# *
214# * @return A string which describes this incremental document object
215# *
216# * @author John Thompson, DL Consulting Ltd.
217# */
218sub toString()
219 {
220 my ($self) = @_;
221 my $text = "";
222
223 my $key_count = scalar(keys %{$self->{'order'}});
224 for (my $i = 0; $i < $key_count; $i++)
225 {
226 my $key = $self->{'order'}->{$i};
227 # Check if this key has been set
228 if ($self->{'data'}->{$key})
229 {
230 # Note: there may be zero values left
231 foreach my $value (sort keys %{$self->{'data'}->{$key}})
232 {
233 if ($self->{'data'}->{$key}->{$value})
234 {
235 $text .= "<$key>$value\n";
236 }
237 }
238 }
239 }
240 return $text;
241 }
242# /** toString() **/
243
2441;
Note: See TracBrowser for help on using the repository browser.