source: gsdl/trunk/perllib/doc.pm@ 15894

Last change on this file since 15894 was 15894, checked in by mdewsnip, 16 years ago

Added "use strict" to the files missing it.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 25.3 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use strict;
37use unicode;
38use util;
39use ghtml;
40use File::stat;
41##use hashdoc;
42use docprint;
43
44# the document type may be indexed_doc, nonindexed_doc, or
45# classification
46
47my $OIDcount = 0;
48
49sub new {
50 my $class = shift (@_);
51 my ($source_filename, $doc_type) = @_;
52
53
54 my $self = bless {'associated_files'=>[],
55 'subsection_order'=>[],
56 'next_subsection'=>1,
57 'subsections'=>{},
58 'metadata'=>[],
59 'text'=>"",
60 'OIDtype'=>"hash"}, $class;
61
62 # used to set lastmodified here, but this can screw up the HASH ids, so
63 # the docsave processor now calls set_lastmodified
64
65 $self->{'source_path'} = $source_filename;
66
67 if (defined $source_filename) {
68 $source_filename = &util::filename_within_collection($source_filename);
69 $self->set_source_filename ($source_filename);
70 }
71
72 $self->set_doc_type ($doc_type) if defined $doc_type;
73
74 return $self;
75}
76# set lastmodified for OAI purposes, added by GRB, moved by kjdon
77sub set_lastmodified {
78 my $self = shift (@_);
79
80 my $source_path = $self->{'source_path'};
81
82 if (defined $source_path && (-e $source_path)) {
83
84 my $file_stat = stat($source_path);
85 my $mtime = $file_stat->mtime;
86 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
87 }
88}
89
90# clone the $self object
91sub duplicate {
92 my $self = shift (@_);
93
94 my $newobj = {};
95
96 foreach my $k (keys %$self) {
97 $newobj->{$k} = &clone ($self->{$k});
98 }
99
100 bless $newobj, ref($self);
101 return $newobj;
102}
103
104sub clone {
105 my ($from) = @_;
106 my $type = ref ($from);
107
108 if ($type eq "HASH") {
109 my $to = {};
110 foreach my $key (keys %$from) {
111 $to->{$key} = &clone ($from->{$key});
112 }
113 return $to;
114 } elsif ($type eq "ARRAY") {
115 my $to = [];
116 foreach my $v (@$from) {
117 push (@$to, &clone ($v));
118 }
119 return $to;
120 } else {
121 return $from;
122 }
123}
124
125sub set_OIDtype {
126 my $self = shift (@_);
127 my ($type, $metadata) = @_;
128
129 if (defined $type && $type =~ /^(hash|incremental|dirname|assigned)$/) {
130 $self->{'OIDtype'} = $type;
131 } else {
132 $self->{'OIDtype'} = "hash";
133 }
134 if ($type =~ /^assigned$/) {
135 if (defined $metadata) {
136 $self->{'OIDmetadata'} = $metadata;
137 } else {
138 $self->{'OIDmetadata'} = "dc.Identifier";
139 }
140 }
141}
142
143sub set_source_filename {
144 my $self = shift (@_);
145 my ($source_filename) = @_;
146
147 $self->set_metadata_element ($self->get_top_section(),
148 "gsdlsourcefilename",
149 $source_filename);
150}
151
152sub set_converted_filename {
153 my $self = shift (@_);
154 my ($converted_filename) = @_;
155
156 $self->set_metadata_element ($self->get_top_section(),
157 "gsdlconvertedfilename",
158 $converted_filename);
159}
160
161
162# returns the source_filename as it was provided
163sub get_source_filename {
164 my $self = shift (@_);
165
166 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
167}
168
169# returns converted filename if available else returns source filename
170sub get_filename_for_hashing {
171 my $self = shift (@_);
172
173 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
174
175 if (!defined $filename) {
176 my $plugin_name = $self->get_metadata_element ($self->get_top_section(), "Plugin");
177 # if NULPlug processed file, then don't give a filename
178 if (defined $plugin_name && $plugin_name eq "NULPlug") {
179 $filename = undef;
180 } else {
181 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
182 }
183 }
184 return $filename;
185}
186
187sub set_doc_type {
188 my $self = shift (@_);
189 my ($doc_type) = @_;
190
191 $self->set_metadata_element ($self->get_top_section(),
192 "gsdldoctype",
193 $doc_type);
194}
195
196# returns the gsdldoctype as it was provided
197# the default of "indexed_doc" is used if no document
198# type was provided
199sub get_doc_type {
200 my $self = shift (@_);
201
202 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
203 return $doc_type if (defined $doc_type);
204 return "indexed_doc";
205}
206
207
208# look up the reference to the a particular section
209sub _lookup_section {
210 my $self = shift (@_);
211 my ($section) = @_;
212
213 my ($num);
214 my $sectionref = $self;
215
216 while (defined $section && $section ne "") {
217
218 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
219
220 $num =~ s/^0+(\d)/$1/ if defined $num ; # remove leading 0s
221
222 $section = "" unless defined $section;
223
224
225 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
226 $sectionref = $sectionref->{'subsections'}->{$num};
227 } else {
228 return undef;
229 }
230 }
231
232 return $sectionref;
233}
234
235# calculate OID by hashing the contents of the document
236sub _calc_OID {
237 my $self = shift (@_);
238 my ($filename) = @_;
239
240 my $osexe = &util::get_os_exe();
241
242 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
243 $ENV{'GSDLOS'},"hashfile$osexe");
244
245 my $result = "NULL";
246
247 if (-e "$hashfile_exe") {
248# $result = `\"$hashfile_exe\" \"$filename\"`;
249 $result = `hashfile$osexe \"$filename\"`;
250 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
251
252 } else {
253 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
254 }
255 return "HASH$result";
256}
257
258# methods dealing with OID, not groups of them.
259
260# if $OID is not provided one is calculated
261sub set_OID {
262 my $self = shift (@_);
263 my ($OID) = @_;
264
265 my $use_hash_oid = 0;
266 # if an OID wasn't provided claculate one
267 if (!defined $OID) {
268 $OID = "NULL";
269 if ($self->{'OIDtype'} eq "hash") {
270 $use_hash_oid = 1;
271 } elsif ($self->{'OIDtype'} eq "incremental") {
272 $OID = "D" . $OIDcount;
273 $OIDcount ++;
274
275 } elsif ($self->{'OIDtype'} eq "dirname") {
276 $OID = 'J';
277 my $filename = $self->get_source_filename();
278 if (defined($filename)) { # && -e $filename) {
279 $OID = &File::Basename::dirname($filename);
280 if (defined $OID) {
281 $OID = 'J'.&File::Basename::basename($OID);
282 $OID =~ s/\.//; #remove any periods
283 } else {
284 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
285 $use_hash_oid = 1;
286 }
287 } else {
288 print STDERR "Failed to find filename, generating hash id\n";
289 $use_hash_oid = 1;
290 }
291
292 } elsif ($self->{'OIDtype'} eq "assigned") {
293 my $identifier = $self->get_metadata_element ($self->get_top_section(), $self->{'OIDmetadata'});
294 if (defined $identifier && $identifier ne "") {
295 $OID = "D" . $identifier;
296 $OID =~ s/\.//; #remove any periods
297 } else {
298 # need a hash id
299 print STDERR "no $self->{'OIDmetadata'} metadata found, generating hash id\n";
300 $use_hash_oid = 1;
301 }
302
303 } else {
304 $use_hash_oid = 1;
305 }
306
307 if ($use_hash_oid) {
308
309 # "hash" OID - feed file to hashfile.exe
310 #my $filename = $self->get_source_filename();
311 # we want to use the converted file for hashing if available
312 # cos its quicker
313 my $filename = $self->get_filename_for_hashing();
314 # -z: don't want to hash on the file if it is zero size
315 if (defined($filename) && -e $filename && !-z $filename) {
316 $OID = $self->_calc_OID ($filename);
317 } else {
318 $filename = &util::get_tmp_filename();
319 if (!open (OUTFILE, ">$filename")) {
320 print STDERR "doc::set_OID could not write to $filename\n";
321 } else {
322 my $doc_text = &docprint::get_section_xml($self, $self->get_top_section());
323 print OUTFILE $doc_text;
324 close (OUTFILE);
325 }
326 $OID = $self->_calc_OID ($filename);
327 &util::rm ($filename);
328 }
329 }
330 }
331 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
332}
333
334# this uses hashdoc (embedded c thingy) which is faster but still
335# needs a little work to be suffiently stable
336sub ___set_OID {
337 my $self = shift (@_);
338 my ($OID) = @_;
339
340 # if an OID wasn't provided then calculate hash value based on document
341 if (!defined $OID)
342 {
343 my $hash_text = &docprint::get_section_xml($self, $self->get_top_section());
344 my $hash_len = length($hash_text);
345
346 $OID = &hashdoc::buffer($hash_text,$hash_len);
347 }
348
349 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
350}
351
352# returns the OID for this document
353sub get_OID {
354 my $self = shift (@_);
355 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
356 return $OID if (defined $OID);
357 return "NULL";
358}
359
360sub delete_OID {
361 my $self = shift (@_);
362
363 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
364}
365
366
367# methods for manipulating section names
368
369# returns the name of the top-most section (the top
370# level of the document
371sub get_top_section {
372 my $self = shift (@_);
373
374 return "";
375}
376
377# returns a section
378sub get_parent_section {
379 my $self = shift (@_);
380 my ($section) = @_;
381
382 $section =~ s/(^|\.)\d+$//;
383
384 return $section;
385}
386
387# returns the first child section (or the end child
388# if there isn't any)
389sub get_begin_child {
390 my $self = shift (@_);
391 my ($section) = @_;
392
393 my $section_ptr = $self->_lookup_section($section);
394 return "" unless defined $section_ptr;
395
396 if (defined $section_ptr->{'subsection_order'}->[0]) {
397 return "$section.$section_ptr->{'subsection_order'}->[0]";
398 }
399
400 return $self->get_end_child ($section);
401}
402
403# returns the next child of a parent section
404sub get_next_child {
405 my $self = shift (@_);
406 my ($section) = @_;
407
408 my $parent_section = $self->get_parent_section($section);
409 my $parent_section_ptr = $self->_lookup_section($parent_section);
410 return undef unless defined $parent_section_ptr;
411
412 my ($section_num) = $section =~ /(\d+)$/;
413 return undef unless defined $section_num;
414
415 my $i = 0;
416 my $section_order = $parent_section_ptr->{'subsection_order'};
417 while ($i < scalar(@$section_order)) {
418 last if $section_order->[$i] eq $section_num;
419 $i++;
420 }
421
422 $i++; # the next child
423 if ($i < scalar(@$section_order)) {
424 return $section_order->[$i] if $parent_section eq "";
425 return "$parent_section.$section_order->[$i]";
426 }
427
428 # no more sections in this level
429 return undef;
430}
431
432# returns a reference to a list of children
433sub get_children {
434 my $self = shift (@_);
435 my ($section) = @_;
436
437 my $section_ptr = $self->_lookup_section($section);
438 return [] unless defined $section_ptr;
439
440 my @children = @{$section_ptr->{'subsection_order'}};
441
442 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
443 return \@children;
444}
445
446# returns the child section one past the last one (which
447# is coded as "0")
448sub get_end_child {
449 my $self = shift (@_);
450 my ($section) = @_;
451
452 return $section . ".0" unless $section eq "";
453 return "0";
454}
455
456# returns the next section in book order
457sub get_next_section {
458 my $self = shift (@_);
459 my ($section) = @_;
460
461 return undef unless defined $section;
462
463 my $section_ptr = $self->_lookup_section($section);
464 return undef unless defined $section_ptr;
465
466 # first try to find first child
467 if (defined $section_ptr->{'subsection_order'}->[0]) {
468 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
469 return "$section.$section_ptr->{'subsection_order'}->[0]";
470 }
471
472 do {
473 # try to find sibling
474 my $next_child = $self->get_next_child ($section);
475 return $next_child if (defined $next_child);
476
477 # move up one level
478 $section = $self->get_parent_section ($section);
479 } while $section =~ /\d/;
480
481 return undef;
482}
483
484sub is_leaf_section {
485 my $self = shift (@_);
486 my ($section) = @_;
487
488 my $section_ptr = $self->_lookup_section($section);
489 return 1 unless defined $section_ptr;
490
491 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
492}
493
494# methods for dealing with sections
495
496# returns the name of the inserted section
497sub insert_section {
498 my $self = shift (@_);
499 my ($before_section) = @_;
500
501 # get the child to insert before and its parent section
502 my $parent_section = "";
503 my $before_child = "0";
504 my @before_section = split (/\./, $before_section);
505 if (scalar(@before_section) > 0) {
506 $before_child = pop (@before_section);
507 $parent_section = join (".", @before_section);
508 }
509
510 my $parent_section_ptr = $self->_lookup_section($parent_section);
511 if (!defined $parent_section_ptr) {
512 print STDERR "doc::insert_section couldn't find parent section " .
513 "$parent_section\n";
514 return;
515 }
516
517 # get the next section number
518 my $section_num = $parent_section_ptr->{'next_subsection'}++;
519
520 my $i = 0;
521 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
522 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
523 $i++;
524 }
525
526 # insert the section number into the order list
527 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
528
529 # add this section to the parent section
530 my $section_ptr = {'subsection_order'=>[],
531 'next_subsection'=>1,
532 'subsections'=>{},
533 'metadata'=>[],
534 'text'=>""};
535 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
536
537 # work out the full section number
538 my $section = $parent_section;
539 $section .= "." unless $section eq "";
540 $section .= $section_num;
541
542 return $section;
543}
544
545# creates a pre-named section
546sub create_named_section {
547 my $self = shift (@_);
548 my ($mastersection) = @_;
549
550 my ($num);
551 my $section = $mastersection;
552 my $sectionref = $self;
553
554 while ($section ne "") {
555 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
556 $num =~ s/^0+(\d)/$1/; # remove leading 0s
557 $section = "" unless defined $section;
558
559 if (defined $num) {
560 if (!defined $sectionref->{'subsections'}->{$num}) {
561 push (@{$sectionref->{'subsection_order'}}, $num);
562 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
563 'next_subsection'=>1,
564 'subsections'=>{},
565 'metadata'=>[],
566 'text'=>""};
567 if ($num >= $sectionref->{'next_subsection'}) {
568 $sectionref->{'next_subsection'} = $num + 1;
569 }
570 }
571 $sectionref = $sectionref->{'subsections'}->{$num};
572
573 } else {
574 print STDERR "doc::create_named_section couldn't create section ";
575 print STDERR "$mastersection\n";
576 last;
577 }
578 }
579}
580
581# returns a reference to a list of subsections
582sub list_subsections {
583 my $self = shift (@_);
584 my ($section) = @_;
585
586 my $section_ptr = $self->_lookup_section ($section);
587 if (!defined $section_ptr) {
588 print STDERR "doc::list_subsections couldn't find section $section\n";
589 return [];
590 }
591
592 return [@{$section_ptr->{'subsection_order'}}];
593}
594
595sub delete_section {
596 my $self = shift (@_);
597 my ($section) = @_;
598
599# my $section_ptr = {'subsection_order'=>[],
600# 'next_subsection'=>1,
601# 'subsections'=>{},
602# 'metadata'=>[],
603# 'text'=>""};
604
605 # if this is the top section reset everything
606 if ($section eq "") {
607 $self->{'subsection_order'} = [];
608 $self->{'subsections'} = {};
609 $self->{'metadata'} = [];
610 $self->{'text'} = "";
611 return;
612 }
613
614 # find the parent of the section to delete
615 my $parent_section = "";
616 my $child = "0";
617 my @section = split (/\./, $section);
618 if (scalar(@section) > 0) {
619 $child = pop (@section);
620 $parent_section = join (".", @section);
621 }
622
623 my $parent_section_ptr = $self->_lookup_section($parent_section);
624 if (!defined $parent_section_ptr) {
625 print STDERR "doc::delete_section couldn't find parent section " .
626 "$parent_section\n";
627 return;
628 }
629
630 # remove this section from the subsection_order list
631 my $i = 0;
632 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
633 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
634 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
635 last;
636 }
637 $i++;
638 }
639
640 # remove this section from the subsection hash
641 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
642 undef $parent_section_ptr->{'subsections'}->{$child};
643 }
644}
645
646#--
647# methods for dealing with metadata
648
649# set_metadata_element and get_metadata_element are for metadata
650# which should only have one value. add_meta_data and get_metadata
651# are for metadata which can have more than one value.
652
653# returns the first metadata value which matches field
654
655# This version of get metadata element works much like the one above,
656# except it allows for the namespace portion of a metadata element to
657# be ignored, thus if you are searching for dc.Title, the first piece
658# of matching metadata ending with the name Title (once any namespace
659# is removed) would be returned.
660# 28-11-2003 John Thompson
661sub get_metadata_element {
662 my $self = shift (@_);
663 my ($section, $field, $ignore_namespace) = @_;
664 my ($data);
665
666 $ignore_namespace = 0 unless defined $ignore_namespace;
667
668 my $section_ptr = $self->_lookup_section($section);
669 if (!defined $section_ptr) {
670 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
671 return;
672 }
673
674 # Remove the any namespace if we are being told to ignore them
675 if($ignore_namespace) {
676 $field =~ s/^\w*\.//;
677 }
678
679 foreach $data (@{$section_ptr->{'metadata'}}) {
680
681 my $data_name = $data->[0];
682
683 # Remove the any namespace if we are being told to ignore them
684 if($ignore_namespace) {
685 $data_name =~ s/^\w*\.//;
686 }
687
688 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
689 }
690
691 return undef; # was not found
692}
693
694# returns a list of the form [value1, value2, ...]
695sub get_metadata {
696 my $self = shift (@_);
697 my ($section, $field, $ignore_namespace) = @_;
698 my ($data);
699
700 $ignore_namespace = 0 unless defined $ignore_namespace;
701
702 my $section_ptr = $self->_lookup_section($section);
703 if (!defined $section_ptr) {
704 print STDERR "doc::get_metadata couldn't find section ",
705 $section, "\n";
706 return;
707 }
708
709 # Remove the any namespace if we are being told to ignore them
710 if($ignore_namespace) {
711 $field =~ s/^\w*\.//;
712 }
713
714 my @metadata = ();
715 foreach $data (@{$section_ptr->{'metadata'}}) {
716
717 my $data_name = $data->[0];
718 # Remove the any namespace if we are being told to ignore them
719 if($ignore_namespace) {
720 $data_name =~ s/^\w*\.//;
721 }
722
723 push (@metadata, $data->[1]) if ($data_name eq $field);
724 }
725
726 return \@metadata;
727}
728
729# returns a list of the form [[field,value],[field,value],...]
730sub get_all_metadata {
731 my $self = shift (@_);
732 my ($section) = @_;
733
734 my $section_ptr = $self->_lookup_section($section);
735 if (!defined $section_ptr) {
736 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
737 return;
738 }
739
740 return $section_ptr->{'metadata'};
741}
742
743# $value is optional
744sub delete_metadata {
745 my $self = shift (@_);
746 my ($section, $field, $value) = @_;
747
748 my $section_ptr = $self->_lookup_section($section);
749 if (!defined $section_ptr) {
750 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
751 return;
752 }
753
754 my $i = 0;
755 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
756 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
757 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
758 splice (@{$section_ptr->{'metadata'}}, $i, 1);
759 } else {
760 $i++;
761 }
762 }
763}
764
765sub delete_all_metadata {
766 my $self = shift (@_);
767 my ($section) = @_;
768
769 my $section_ptr = $self->_lookup_section($section);
770 if (!defined $section_ptr) {
771 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
772 return;
773 }
774
775 $section_ptr->{'metadata'} = [];
776}
777
778sub set_metadata_element {
779 my $self = shift (@_);
780 my ($section, $field, $value) = @_;
781
782 $self->set_utf8_metadata_element ($section, $field,
783 &unicode::ascii2utf8(\$value));
784}
785
786# set_utf8_metadata_element assumes the text has already been
787# converted to the UTF-8 encoding.
788sub set_utf8_metadata_element {
789 my $self = shift (@_);
790 my ($section, $field, $value) = @_;
791
792 $self->delete_metadata ($section, $field);
793 $self->add_utf8_metadata ($section, $field, $value);
794}
795
796
797# add_metadata assumes the text is in (extended) ascii form. For
798# text which has already been converted to the UTF-8 format use
799# add_utf8_metadata.
800sub add_metadata {
801 my $self = shift (@_);
802 my ($section, $field, $value) = @_;
803
804 $self->add_utf8_metadata ($section, $field,
805 &unicode::ascii2utf8(\$value));
806}
807
808sub add_utf8_metadata {
809 my $self = shift (@_);
810 my ($section, $field, $value) = @_;
811
812 my $section_ptr = $self->_lookup_section($section);
813 if (!defined $section_ptr) {
814 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
815 return;
816 }
817 if (!defined $value) {
818 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
819 return;
820 }
821 if (!defined $field) {
822 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
823 return;
824 }
825
826 #print STDERR "###$field=$value\n";
827 # double check that the value is utf-8
828 if (unicode::ensure_utf8(\$value)) {
829 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
830 }
831
832 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
833}
834
835
836# methods for dealing with text
837
838# returns the text for a section
839sub get_text {
840 my $self = shift (@_);
841 my ($section) = @_;
842
843 my $section_ptr = $self->_lookup_section($section);
844 if (!defined $section_ptr) {
845 print STDERR "doc::get_text couldn't find section " .
846 "$section\n";
847 return "";
848 }
849
850 return $section_ptr->{'text'};
851}
852
853# returns the (utf-8 encoded) length of the text for a section
854sub get_text_length {
855 my $self = shift (@_);
856 my ($section) = @_;
857
858 my $section_ptr = $self->_lookup_section($section);
859 if (!defined $section_ptr) {
860 print STDERR "doc::get_text_length couldn't find section " .
861 "$section\n";
862 return 0;
863 }
864
865 return length ($section_ptr->{'text'});
866}
867
868sub delete_text {
869 my $self = shift (@_);
870 my ($section) = @_;
871
872 my $section_ptr = $self->_lookup_section($section);
873 if (!defined $section_ptr) {
874 print STDERR "doc::delete_text couldn't find section " .
875 "$section\n";
876 return;
877 }
878
879 $section_ptr->{'text'} = "";
880}
881
882# add_text assumes the text is in (extended) ascii form. For
883# text which has been already converted to the UTF-8 format
884# use add_utf8_text.
885sub add_text {
886 my $self = shift (@_);
887 my ($section, $text) = @_;
888
889 # convert the text to UTF-8 encoded unicode characters
890 # and add the text
891 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
892}
893
894
895# add_utf8_text assumes the text to be added has already
896# been converted to the UTF-8 encoding. For ascii text use
897# add_text
898sub add_utf8_text {
899 my $self = shift (@_);
900 my ($section, $text) = @_;
901
902 my $section_ptr = $self->_lookup_section($section);
903 if (!defined $section_ptr) {
904 print STDERR "doc::add_utf8_text couldn't find section " .
905 "$section\n";
906 return;
907 }
908
909 $section_ptr->{'text'} .= $text;
910}
911
912
913# methods for dealing with associated files
914
915# a file is associated with a document, NOT a section.
916# if section is defined it is noted in the data structure
917# only so that files associated from a particular section
918# may be removed later (using delete_section_assoc_files)
919sub associate_file {
920 my $self = shift (@_);
921 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
922 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
923
924 # remove all associated files with the same name
925 $self->delete_assoc_file ($assoc_filename);
926
927 push (@{$self->{'associated_files'}},
928 [$real_filename, $assoc_filename, $mime_type, $section]);
929}
930
931# returns a list of associated files in the form
932# [[real_filename, assoc_filename, mimetype], ...]
933sub get_assoc_files {
934 my $self = shift (@_);
935
936 return $self->{'associated_files'};
937}
938
939sub delete_section_assoc_files {
940 my $self = shift (@_);
941 my ($section) = @_;
942
943 my $i=0;
944 while ($i < scalar (@{$self->{'associated_files'}})) {
945 if (defined $self->{'associated_files'}->[$i]->[3] &&
946 $self->{'associated_files'}->[$i]->[3] eq $section) {
947 splice (@{$self->{'associated_files'}}, $i, 1);
948 } else {
949 $i++;
950 }
951 }
952}
953
954sub delete_assoc_file {
955 my $self = shift (@_);
956 my ($assoc_filename) = @_;
957
958 my $i=0;
959 while ($i < scalar (@{$self->{'associated_files'}})) {
960 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
961 splice (@{$self->{'associated_files'}}, $i, 1);
962 } else {
963 $i++;
964 }
965 }
966}
967
968sub reset_nextsection_ptr {
969 my $self = shift (@_);
970 my ($section) = @_;
971
972 my $section_ptr = $self->_lookup_section($section);
973 $section_ptr->{'next_subsection'} = 1;
974}
975
9761;
Note: See TracBrowser for help on using the repository browser.