########################################################################### # # IncrementalDocument.pm -- An object to encapsulate the Greenstone # document retrieved from the GDBM database. # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package IncrementalDocument; use GDBMUtils; # /** # */ sub new() { my ($class, $collection, $oid) = @_; #rint STDERR "IncrementalDocument::new($collection, $oid)\n"; # Test the parameters die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection; die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid; # Store the variables my $self = {}; # The collection this document object has been loaded from. $self->{'collection'} = $collection; # An associative array of information retrieved from the GDBM database # which maps a key string to a nested associative array listing values. $self->{'data'} = {}; # The unique identifier of the document loaded $self->{'oid'} = $oid; # Stores the order in which metadata keys where discovered/added. $self->{'order'} = {}; # Bless me father for I have sinned bless $self, $class; return $self; } # /** new() **/ # /** # */ sub addMetadata() { my ($self, $key, $value, $internal) = @_; # Validate the arguments die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/; die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/; # Is this a new key that we haven't encountered before? If so ensure an # array exists for its values, and record the order in which we encountered # this key. if (!defined($self->{'data'}->{$key})) { # Determine how many data keys we're already storing, so we can add the next # one at the appropriate index my $index = scalar(keys %{$self->{'order'}}); $self->{'order'}->{$index} = $key; $self->{'data'}->{$key} = {}; } # Set the value of the associative path to 1. $self->{'data'}->{$key}->{$value} = 1; } # /** addMetadata() **/ # /** Retrieve all the metadata of this document as an array of pairs. # * # */ sub getAllMetadata() { print STDERR "IncrementalDocument.getAllMetadata()\n"; my ($self) = @_; my @all_metadata; my $key_count = scalar(keys %{$self->{'order'}}); for (my $i = 0; $i < $key_count; $i++) { my $key = $self->{'order'}->{$i}; # Check if this key has been set if ($self->{'data'}->{$key}) { # Note: there may be zero values left foreach my $value (sort keys %{$self->{'data'}->{$key}}) { if ($self->{'data'}->{$key}->{$value}) { print STDERR "* Storing $key => $value\n"; push(@all_metadata, [$key, $value]); } } } } print STDERR "Complete!\n"; return \@all_metadata; } # /** getAllMetadata() **/ # /** # */ sub getDocNum() { my ($self) = @_; my $docnum = -1; # Check the docnum path exists in the associated data if(defined($self->{'data'}->{'docnum'})) { # Get the list of keys from that associative path my @values = keys (%{$self->{'data'}->{'docnum'}}); # And since we know there will only ever be one value for docnum $docnum = $values[0]; } return $docnum; } # /** getDocNum() **/ # /** # */ sub loadDocument() { my ($self) = @_; #rint STDERR "IncrementalDocument::loadDocument()\n"; # Load the raw text for the document object from GDBM my $text = &GDBMUtils::gdbmGet($self->{'collection'}, $self->{'oid'}); # For each line in the raw text, extract the key (enclosed in angle # brackets) and the value $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs; # Done } # /** loadDocument() **/ # /** Locates and removes the given key/value mappings from this document # * object. # * # * @param $self A reference to this IncrementalDocument object # * @param $key The metadata key as a string # * @param $value The obsolete metadata value as a string # * # * @author John Thompson, DL Consulting Ltd. # */ sub removeMetadata() { my ($self, $key, $value) = @_; # Ensure the value doesn't exist by simply setting to 0 the correct # associative path $self->{'data'}->{$key}->{$value} = 0; } # /*** removeMetadat() **/ # /** # */ sub saveDocument() { my ($self) = @_; # Get a textual version of this object my $text = $self->toString(); # Now store the object in the database using the GDBM utilities &GDBMUtils::gdbmSet($self->{'collection'}, $self->{'oid'}, $text); # There is a little bit of extra complexity when saving an incremental # document in that we should ensure that a reverse lookup-from DocNum or # nodeID to Greenstone document hash-exists in the database. my $doc_num = $self->getDocNum(); if($doc_num >= 0) { my $text = &GDBMUtils::gdbmGet($self->{'collection'}, $doc_num); # If there is no reverse lookup, then add one now if($text !~ /
/) { &GDBMUtils::gdbmSet($self->{'collection'}, $doc_num, "
" . $self->{'oid'}); } } # Done #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n"; } # /** saveDocument() **/ # /** Produces a textual representation of this object. # * # * @return A string which describes this incremental document object # * # * @author John Thompson, DL Consulting Ltd. # */ sub toString() { my ($self) = @_; my $text = ""; my $key_count = scalar(keys %{$self->{'order'}}); for (my $i = 0; $i < $key_count; $i++) { my $key = $self->{'order'}->{$i}; # Check if this key has been set if ($self->{'data'}->{$key}) { # Note: there may be zero values left foreach my $value (sort keys %{$self->{'data'}->{$key}}) { if ($self->{'data'}->{$key}->{$value}) { $text .= "<$key>$value\n"; } } } } return $text; } # /** toString() **/ 1;