source: gsdl/trunk/perllib/IncrementalDocument.pm@ 14926

Last change on this file since 14926 was 12844, checked in by mdewsnip, 18 years ago

Incremental building and dynamic GDBM updating code, many thanks to John Rowe and John Thompson at DL Consulting Ltd.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the GDBM database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use GDBMUtils;
30
31# /**
32# */
33sub new()
34 {
35 my ($class, $collection, $oid) = @_;
36 #rint STDERR "IncrementalDocument::new($collection, $oid)\n";
37 # Test the parameters
38 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
39 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
40 # Store the variables
41 my $self = {};
42 # The collection this document object has been loaded from.
43 $self->{'collection'} = $collection;
44 # An associative array of information retrieved from the GDBM database
45 # which maps a key string to a nested associative array listing values.
46 $self->{'data'} = {};
47 # The unique identifier of the document loaded
48 $self->{'oid'} = $oid;
49 # Stores the order in which metadata keys where discovered/added.
50 $self->{'order'} = {};
51 # Bless me father for I have sinned
52 bless $self, $class;
53 return $self;
54 }
55# /** new() **/
56
57# /**
58# */
59sub addMetadata()
60 {
61 my ($self, $key, $value, $internal) = @_;
62 # Validate the arguments
63 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
64 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
65 # Is this a new key that we haven't encountered before? If so ensure an
66 # array exists for its values, and record the order in which we encountered
67 # this key.
68 if (!defined($self->{'data'}->{$key}))
69 {
70 # Determine how many data keys we're already storing, so we can add the next
71 # one at the appropriate index
72 my $index = scalar(keys %{$self->{'order'}});
73 $self->{'order'}->{$index} = $key;
74 $self->{'data'}->{$key} = {};
75 }
76 # Set the value of the associative path to 1.
77 $self->{'data'}->{$key}->{$value} = 1;
78 }
79# /** addMetadata() **/
80
81# /** Retrieve all the metadata of this document as an array of pairs.
82# *
83# */
84sub getAllMetadata()
85 {
86 print STDERR "IncrementalDocument.getAllMetadata()\n";
87 my ($self) = @_;
88 my @all_metadata;
89
90 my $key_count = scalar(keys %{$self->{'order'}});
91 for (my $i = 0; $i < $key_count; $i++)
92 {
93 my $key = $self->{'order'}->{$i};
94 # Check if this key has been set
95 if ($self->{'data'}->{$key})
96 {
97 # Note: there may be zero values left
98 foreach my $value (sort keys %{$self->{'data'}->{$key}})
99 {
100 if ($self->{'data'}->{$key}->{$value})
101 {
102 print STDERR "* Storing $key => $value\n";
103 push(@all_metadata, [$key, $value]);
104 }
105 }
106 }
107 }
108 print STDERR "Complete!\n";
109 return \@all_metadata;
110 }
111# /** getAllMetadata() **/
112
113# /**
114# */
115sub getDocNum()
116 {
117 my ($self) = @_;
118 my $docnum = -1;
119 # Check the docnum path exists in the associated data
120 if(defined($self->{'data'}->{'docnum'}))
121 {
122 # Get the list of keys from that associative path
123 my @values = keys (%{$self->{'data'}->{'docnum'}});
124 # And since we know there will only ever be one value for docnum
125 $docnum = $values[0];
126 }
127 return $docnum;
128 }
129# /** getDocNum() **/
130
131# /**
132# */
133sub loadDocument()
134 {
135 my ($self) = @_;
136 #rint STDERR "IncrementalDocument::loadDocument()\n";
137 # Load the raw text for the document object from GDBM
138 my $text = &GDBMUtils::gdbmGet($self->{'collection'}, $self->{'oid'});
139 # For each line in the raw text, extract the key (enclosed in angle
140 # brackets) and the value
141 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
142 # Done
143 }
144# /** loadDocument() **/
145
146# /** Locates and removes the given key/value mappings from this document
147# * object.
148# *
149# * @param $self A reference to this IncrementalDocument object
150# * @param $key The metadata key as a string
151# * @param $value The obsolete metadata value as a string
152# *
153# * @author John Thompson, DL Consulting Ltd.
154# */
155sub removeMetadata()
156 {
157 my ($self, $key, $value) = @_;
158 # Ensure the value doesn't exist by simply setting to 0 the correct
159 # associative path
160 $self->{'data'}->{$key}->{$value} = 0;
161 }
162# /*** removeMetadat() **/
163
164# /**
165# */
166sub saveDocument()
167 {
168 my ($self) = @_;
169 # Get a textual version of this object
170 my $text = $self->toString();
171 # Now store the object in the database using the GDBM utilities
172 &GDBMUtils::gdbmSet($self->{'collection'}, $self->{'oid'}, $text);
173 # There is a little bit of extra complexity when saving an incremental
174 # document in that we should ensure that a reverse lookup-from DocNum or
175 # nodeID to Greenstone document hash-exists in the database.
176 my $doc_num = $self->getDocNum();
177 if($doc_num >= 0)
178 {
179 my $text = &GDBMUtils::gdbmGet($self->{'collection'}, $doc_num);
180 # If there is no reverse lookup, then add one now
181 if($text !~ /<section>/)
182 {
183 &GDBMUtils::gdbmSet($self->{'collection'}, $doc_num, "<section>" . $self->{'oid'});
184 }
185 }
186 # Done
187 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
188 }
189# /** saveDocument() **/
190
191# /** Produces a textual representation of this object.
192# *
193# * @return A string which describes this incremental document object
194# *
195# * @author John Thompson, DL Consulting Ltd.
196# */
197sub toString()
198 {
199 my ($self) = @_;
200 my $text = "";
201
202 my $key_count = scalar(keys %{$self->{'order'}});
203 for (my $i = 0; $i < $key_count; $i++)
204 {
205 my $key = $self->{'order'}->{$i};
206 # Check if this key has been set
207 if ($self->{'data'}->{$key})
208 {
209 # Note: there may be zero values left
210 foreach my $value (sort keys %{$self->{'data'}->{$key}})
211 {
212 if ($self->{'data'}->{$key}->{$value})
213 {
214 $text .= "<$key>$value\n";
215 }
216 }
217 }
218 }
219 return $text;
220 }
221# /** toString() **/
222
2231;
Note: See TracBrowser for help on using the repository browser.