########################################################################### # # doc.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # class to hold documents package doc; use basedoc; BEGIN { @ISA = ('basedoc'); } # the document type may be indexed_doc, nonindexed_doc, or # classification sub new { my $class = shift (@_); my ($source_filename, $doc_type) = @_; my $self = new basedoc(); # $self->set_source_filename ($source_filename) if defined $source_filename; push (@{$self->{'metadata'}}, ["gsdlsourcefilename", &unicode::ascii2utf8($source_filename)]) if defined $source_filename; # $self->set_doc_type ($doc_type) if defined $doc_type; push (@{$self->{'metadata'}}, ["gsdldoctype", &unicode::ascii2utf8($doc_type)]) if defined $doc_type; bless($self,$class); return $self; } # methods for dealing with metadata # set_metadata_element and get_metadata_element are for metadata # which should only have one value. add_meta_data and get_metadata # are for metadata which can have more than one value. # set_metadata_element assumes the value is in (extended) ascii form. # For text which hash been already converted to the UTF-8 format use # set_utf8_metadata_element. sub set_metadata_element { my $self = shift (@_); my ($section, $field, $value) = @_; $self->set_utf8_metadata_element ($section, $field, &unicode::ascii2utf8($value)); } # set_utf8_metadata_element assumes the text has already been # converted to the UTF-8 encoding. sub set_utf8_metadata_element { my $self = shift (@_); my ($section, $field, $value) = @_; $self->delete_metadata ($section, $field); $self->add_utf8_metadata ($section, $field, $value); } # add_metadata assumes the text is in (extended) ascii form. For # text which hash been already converted to the UTF-8 format use # add_utf8_metadata. sub add_metadata { my $self = shift (@_); my ($section, $field, $value) = @_; $self->add_utf8_metadata ($section, $field, &unicode::ascii2utf8($value)); } # add_utf8_metadata assumes the text has already been converted # to the UTF-8 encoding. sub add_utf8_metadata { my $self = shift (@_); my ($section, $field, $value) = @_; my $section_ptr = $self->_lookup_section($section); if (!defined $section_ptr) { print STDERR "doc::add_utf8_metadata couldn't find section " . "$section\n"; return; } push (@{$section_ptr->{'metadata'}}, [$field, $value]); } # methods for dealing with text # add_text assumes the text is in (extended) ascii form. For # text which has been already converted to the UTF-8 format # use add_utf8_text. sub add_text { my $self = shift (@_); my ($section, $text) = @_; # convert the text to UTF-8 encoded unicode characters # and add the text $self->add_utf8_text($section, &unicode::ascii2utf8($text)); } # add_utf8_text assumes the text to be added has already # been converted to the UTF-8 encoding. For ascii text use # add_text sub add_utf8_text { my $self = shift (@_); my ($section, $text) = @_; my $section_ptr = $self->_lookup_section($section); if (!defined $section_ptr) { print STDERR "doc::add_utf8_text couldn't find section " . "$section\n"; return; } $section_ptr->{'text'} .= $text; } 1;