source: trunk/gsdl/perllib/doc.pm@ 2484

Last change on this file since 2484 was 2484, checked in by say1, 23 years ago

Changed SplitPlug to allow control over the OID. Changed BibTexPlug to be more permissive in the Bibtex format it accepts. Changed BibTexPlug to use the BibTex key as the OID.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.5 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
33}
34
35use unicode;
36use util;
37use ghtml;
38##use hashdoc;
39
40# the document type may be indexed_doc, nonindexed_doc, or
41# classification
42
43my $OIDcount = 0;
44
45sub new {
46 my $class = shift (@_);
47 my ($source_filename, $doc_type) = @_;
48
49 my $self = bless {'associated_files'=>[],
50 'subsection_order'=>[],
51 'next_subsection'=>1,
52 'subsections'=>{},
53 'metadata'=>[],
54 'text'=>"",
55 'OIDtype'=>"hash"}, $class;
56
57 $self->set_source_filename ($source_filename) if defined $source_filename;
58 $self->set_doc_type ($doc_type) if defined $doc_type;
59
60 return $self;
61}
62
63# clone the $self object
64sub duplicate {
65 my $self = shift (@_);
66
67 my $newobj = {};
68
69 foreach $k (keys %$self) {
70 $newobj->{$k} = &clone ($self->{$k});
71 }
72
73 bless $newobj, ref($self);
74 return $newobj;
75}
76
77sub clone {
78 my ($from) = @_;
79 my $type = ref ($from);
80
81 if ($type eq "HASH") {
82 my $to = {};
83 foreach $key (keys %$from) {
84 $to->{$key} = &clone ($from->{$key});
85 }
86 return $to;
87 } elsif ($type eq "ARRAY") {
88 my $to = [];
89 foreach $v (@$from) {
90 push (@$to, &clone ($v));
91 }
92 return $to;
93 } else {
94 return $from;
95 }
96}
97
98sub set_OIDtype {
99 my $self = shift (@_);
100 my ($type) = @_;
101
102 if ($type eq "incremental") {
103 $self->{'OIDtype'} = $type;
104 } else {
105 $self->{'OIDtype'} = "hash";
106 }
107}
108
109sub set_source_filename {
110 my $self = shift (@_);
111 my ($source_filename) = @_;
112
113 $self->set_metadata_element ($self->get_top_section(),
114 "gsdlsourcefilename",
115 $source_filename);
116}
117
118# returns the source_filename as it was provided
119sub get_source_filename {
120 my $self = shift (@_);
121
122 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
123}
124
125sub set_doc_type {
126 my $self = shift (@_);
127 my ($doc_type) = @_;
128
129 $self->set_metadata_element ($self->get_top_section(),
130 "gsdldoctype",
131 $doc_type);
132}
133
134# returns the source_filename as it was provided
135# the default of "indexed_doc" is used if no document
136# type was provided
137sub get_doc_type {
138 my $self = shift (@_);
139
140 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
141 return $doc_type if (defined $doc_type);
142 return "indexed_doc";
143}
144
145sub _escape_text {
146 my ($text) = @_;
147
148 # special characters in the gml encoding
149 $text =~ s/&/&/g; # this has to be first...
150 $text =~ s/</&lt;/g;
151 $text =~ s/>/&gt;/g;
152 $text =~ s/\"/&quot;/g;
153
154 return $text;
155}
156
157
158sub buffer_section_gml {
159 my $self = shift (@_);
160 my ($section, $suppress_subject_info) = @_;
161
162 $suppress_subject_info = 0 unless defined $suppress_subject_info;
163 my ($all_text,$data, $subsection);
164
165 my $section_ptr = $self->_lookup_section ($section);
166 my ($section_num) = $section =~ /(\d+)$/;
167
168 return "" unless defined $section_ptr;
169
170 # output the section header (including the section number
171 # and metadata)
172
173 $all_text = "<gsdlsection";
174 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
175 foreach $data (@{$section_ptr->{'metadata'}}) {
176 $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
177 unless $suppress_subject_info && $data->[0] eq "Subject";
178 }
179 $all_text .= ">";
180
181 # output the text
182 $all_text .= &_escape_text($section_ptr->{'text'});
183
184 # output all the subsections
185 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
186 $all_text .= $self->buffer_section_gml("$section.$subsection",
187 $suppress_subject_info);
188 }
189
190 # output the closing tag
191 $all_text .= "</gsdlsection>\n";
192
193 return $all_text;
194}
195
196
197sub buffer_section_xml {
198 my $self = shift (@_);
199 my ($section, $dtd_metadata, $suppress_subject_info) = @_;
200
201 $suppress_subject_info = 0 unless defined $suppress_subject_info;
202 my ($all_text, $data, $subsection);
203
204 my $section_ptr = $self->_lookup_section ($section);
205 my ($section_num) = $section =~ /(\d+)$/;
206
207 return "" unless defined $section_ptr;
208
209 # output the section header (including the section number
210 # and metadata)
211
212 $all_text .= "<gsdlsection";
213 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
214 $all_text .= ">\n";
215
216 $all_text .= " <metadata>\n";
217
218 # output metadata
219 foreach $data (@{$section_ptr->{'metadata'}}) {
220 my $tag_name = $data->[0];
221
222 # a tagname beginning with '/' (like </srclink>) will cause problems
223 # so we'll escape any leading '/'
224 $tag_name =~ s/^\//&\#47;/;
225
226 my $tag_value = &_escape_text($data->[1]);
227
228 unless ($suppress_subject_info && $tag_name eq "Subject")
229 {
230 if (defined $dtd_metadata)
231 {
232 $dtd_metadata->{$tag_name}++;
233 }
234
235 $all_text .= " <$tag_name>$tag_value</$tag_name>\n";
236 }
237 }
238
239 $all_text .= " </metadata>\n";
240
241 # output the text
242 $all_text .= &_escape_text($section_ptr->{'text'});
243
244 # output all the subsections
245 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
246 $all_text .= $self->buffer_section_xml("$section.$subsection",
247 $dtd_metadata,
248 $suppress_subject_info);
249 }
250
251 # output the closing tag
252 $all_text .= "</gsdlsection>\n";
253
254 return $all_text;
255}
256
257sub output_section {
258 my $self = shift (@_);
259 my ($handle, $section, $colname, $dtd_metadata,
260 $suppress_subject_info) = @_;
261
262 my $all_text = $self->buffer_section_xml($section, $dtd_metadata,
263 $suppress_subject_info);
264
265 # xml header
266 if (defined $collection)
267 {
268 my $xml_head
269 = '<? xml version="1.0" standalone="no" encoding="UTF-8" ?>';
270 $xml_head .= "\n<!DOCTYPE gsdl:$colname SYSTEM \"$colname.dtd\">\n";
271 $all_text = $xml_head.$all_text;
272 }
273
274 print $handle $all_text;
275}
276
277# look up the reference to the a particular section
278sub _lookup_section {
279 my $self = shift (@_);
280 my ($section) = @_;
281
282 my ($num);
283 my $sectionref = $self;
284
285 while (defined $section && $section ne "") {
286 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
287 $num =~ s/^0+(\d)/$1/; # remove leading 0s
288 $section = "" unless defined $section;
289
290 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
291 $sectionref = $sectionref->{'subsections'}->{$num};
292 } else {
293 return undef;
294 }
295 }
296
297 return $sectionref;
298}
299
300# calculate OID by hashing the contents of the document
301sub _calc_OID {
302 my $self = shift (@_);
303 my ($filename) = @_;
304
305 my $osexe = &util::get_os_exe();
306
307 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
308 $ENV{'GSDLOS'},"hashfile$osexe");
309 my $result = "NULL";
310
311 if (-e "$hashfile_exe") {
312# $result = `\"$hashfile_exe\" \"$filename\"`;
313 $result = `hashfile$osexe \"$filename\"`;
314 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
315
316 } else {
317 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
318 }
319
320 return "HASH$result";
321}
322
323# methods dealing with OID, not groups of them.
324
325# if $OID is not provided one is calculated
326sub set_OID {
327 my $self = shift (@_);
328 my ($OID) = @_;
329
330 # if an OID wasn't provided claculate one
331 if (!defined $OID) {
332 $OID = "NULL";
333
334 if ($self->{'OIDtype'} eq "incremental") {
335 $OID = "D" . $OIDcount;
336 $OIDcount ++;
337
338 } else {
339 # "hash" OID - feed file to hashfile.exe
340 my $filename = $self->get_source_filename();
341 if (defined($filename) && -e $filename) {
342
343 $OID = $self->_calc_OID ($filename);
344
345 } else {
346 $filename = &util::get_tmp_filename();
347 if (!open (OUTFILE, ">$filename")) {
348 print STDERR "doc::set_OID could not write to $filename\n";
349 } else {
350 $self->output_section('OUTFILE', $self->get_top_section(),
351 undef, 1);
352 close (OUTFILE);
353 }
354
355 $OID = $self->_calc_OID ($filename);
356 &util::rm ($filename);
357 }
358 }
359 }
360 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
361}
362
363# this uses hashdoc (embedded c thingy) which is faster but still
364# needs a little work to be suffiently stable
365sub ___set_OID {
366 my $self = shift (@_);
367 my ($OID) = @_;
368
369 # if an OID wasn't provided then calculate hash value based on document
370 if (!defined $OID)
371 {
372 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
373 undef, 1);
374 my $hash_len = length($hash_text);
375
376 $OID = &hashdoc::buffer($hash_text,$hash_len);
377 }
378
379 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
380}
381
382# returns the OID for this document
383sub get_OID {
384 my $self = shift (@_);
385 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
386 return $OID if (defined $OID);
387 return "NULL";
388}
389
390sub delete_OID {
391 my $self = shift (@_);
392
393 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
394}
395
396
397# methods for manipulating section names
398
399# returns the name of the top-most section (the top
400# level of the document
401sub get_top_section {
402 my $self = shift (@_);
403
404 return "";
405}
406
407# returns a section
408sub get_parent_section {
409 my $self = shift (@_);
410 my ($section) = @_;
411
412 $section =~ s/(^|\.)\d+$//;
413
414 return $section;
415}
416
417# returns the first child section (or the end child
418# if there isn't any)
419sub get_begin_child {
420 my $self = shift (@_);
421 my ($section) = @_;
422
423 my $section_ptr = $self->_lookup_section($section);
424 return "" unless defined $section_ptr;
425
426 if (defined $section_ptr->{'subsection_order'}->[0]) {
427 return "$section.$section_ptr->{'subsection_order'}->[0]";
428 }
429
430 return $self->get_end_child ($section);
431}
432
433# returns the next child of a parent section
434sub get_next_child {
435 my $self = shift (@_);
436 my ($section) = @_;
437
438 my $parent_section = $self->get_parent_section($section);
439 my $parent_section_ptr = $self->_lookup_section($parent_section);
440 return undef unless defined $parent_section_ptr;
441
442 my ($section_num) = $section =~ /(\d+)$/;
443 return undef unless defined $section_num;
444
445 my $i = 0;
446 my $section_order = $parent_section_ptr->{'subsection_order'};
447 while ($i < scalar(@$section_order)) {
448 last if $section_order->[$i] eq $section_num;
449 $i++;
450 }
451
452 $i++; # the next child
453 if ($i < scalar(@$section_order)) {
454 return $section_order->[$i] if $parent_section eq "";
455 return "$parent_section.$section_order->[$i]";
456 }
457
458 # no more sections in this level
459 return undef;
460}
461
462# returns a reference to a list of children
463sub get_children {
464 my $self = shift (@_);
465 my ($section) = @_;
466
467 my $section_ptr = $self->_lookup_section($section);
468 return [] unless defined $section_ptr;
469
470 my @children = @{$section_ptr->{'subsection_order'}};
471
472 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
473 return \@children;
474}
475
476# returns the child section one past the last one (which
477# is coded as "0")
478sub get_end_child {
479 my $self = shift (@_);
480 my ($section) = @_;
481
482 return $section . ".0" unless $section eq "";
483 return "0";
484}
485
486# returns the next section in book order
487sub get_next_section {
488 my $self = shift (@_);
489 my ($section) = @_;
490
491 return undef unless defined $section;
492
493 my $section_ptr = $self->_lookup_section($section);
494 return undef unless defined $section_ptr;
495
496 # first try to find first child
497 if (defined $section_ptr->{'subsection_order'}->[0]) {
498 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
499 return "$section.$section_ptr->{'subsection_order'}->[0]";
500 }
501
502 do {
503 # try to find sibling
504 my $next_child = $self->get_next_child ($section);
505 return $next_child if (defined $next_child);
506
507 # move up one level
508 $section = $self->get_parent_section ($section);
509 } while $section =~ /\d/;
510
511 return undef;
512}
513
514sub is_leaf_section {
515 my $self = shift (@_);
516 my ($section) = @_;
517
518 my $section_ptr = $self->_lookup_section($section);
519 return 1 unless defined $section_ptr;
520
521 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
522}
523
524# methods for dealing with sections
525
526# returns the name of the inserted section
527sub insert_section {
528 my $self = shift (@_);
529 my ($before_section) = @_;
530
531 # get the child to insert before and its parent section
532 my $parent_section = "";
533 my $before_child = "0";
534 my @before_section = split (/\./, $before_section);
535 if (scalar(@before_section) > 0) {
536 $before_child = pop (@before_section);
537 $parent_section = join (".", @before_section);
538 }
539
540 my $parent_section_ptr = $self->_lookup_section($parent_section);
541 if (!defined $parent_section_ptr) {
542 print STDERR "doc::insert_section couldn't find parent section " .
543 "$parent_section\n";
544 return;
545 }
546
547 # get the next section number
548 my $section_num = $parent_section_ptr->{'next_subsection'}++;
549
550 my $i = 0;
551 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
552 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
553 $i++;
554 }
555
556 # insert the section number into the order list
557 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
558
559 # add this section to the parent section
560 my $section_ptr = {'subsection_order'=>[],
561 'next_subsection'=>1,
562 'subsections'=>{},
563 'metadata'=>[],
564 'text'=>""};
565 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
566
567 # work out the full section number
568 my $section = $parent_section;
569 $section .= "." unless $section eq "";
570 $section .= $section_num;
571
572 return $section;
573}
574
575# creates a pre-named section
576sub create_named_section {
577 my $self = shift (@_);
578 my ($mastersection) = @_;
579
580 my ($num);
581 my $section = $mastersection;
582 my $sectionref = $self;
583
584 while ($section ne "") {
585 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
586 $num =~ s/^0+(\d)/$1/; # remove leading 0s
587 $section = "" unless defined $section;
588
589 if (defined $num) {
590 if (!defined $sectionref->{'subsections'}->{$num}) {
591 push (@{$sectionref->{'subsection_order'}}, $num);
592 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
593 'next_subsection'=>1,
594 'subsections'=>{},
595 'metadata'=>[],
596 'text'=>""};
597 if ($num >= $sectionref->{'next_subsection'}) {
598 $sectionref->{'next_subsection'} = $num + 1;
599 }
600 }
601 $sectionref = $sectionref->{'subsections'}->{$num};
602
603 } else {
604 print STDERR "doc::create_named_section couldn't create section ";
605 print STDERR "$mastersection\n";
606 last;
607 }
608 }
609}
610
611# returns a reference to a list of subsections
612sub list_subsections {
613 my $self = shift (@_);
614 my ($section) = @_;
615
616 my $section_ptr = $self->_lookup_section ($section);
617 if (!defined $section_ptr) {
618 print STDERR "doc::list_subsections couldn't find section $section\n";
619 return [];
620 }
621
622 return [@{$section_ptr->{'subsection_order'}}];
623}
624
625sub delete_section {
626 my $self = shift (@_);
627 my ($section) = @_;
628
629# my $section_ptr = {'subsection_order'=>[],
630# 'next_subsection'=>1,
631# 'subsections'=>{},
632# 'metadata'=>[],
633# 'text'=>""};
634
635 # if this is the top section reset everything
636 if ($section eq "") {
637 $self->{'subsection_order'} = [];
638 $self->{'subsections'} = {};
639 $self->{'metadata'} = [];
640 $self->{'text'} = "";
641 return;
642 }
643
644 # find the parent of the section to delete
645 my $parent_section = "";
646 my $child = "0";
647 my @section = split (/\./, $section);
648 if (scalar(@section) > 0) {
649 $child = pop (@section);
650 $parent_section = join (".", @section);
651 }
652
653 my $parent_section_ptr = $self->_lookup_section($parent_section);
654 if (!defined $parent_section_ptr) {
655 print STDERR "doc::delete_section couldn't find parent section " .
656 "$parent_section\n";
657 return;
658 }
659
660 # remove this section from the subsection_order list
661 my $i = 0;
662 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
663 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
664 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
665 last;
666 }
667 $i++;
668 }
669
670 # remove this section from the subsection hash
671 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
672 undef $parent_section_ptr->{'subsections'}->{$child};
673 }
674}
675
676#--
677# methods for dealing with metadata
678
679# set_metadata_element and get_metadata_element are for metadata
680# which should only have one value. add_meta_data and get_metadata
681# are for metadata which can have more than one value.
682
683# returns the first metadata value which matches field
684sub get_metadata_element {
685 my $self = shift (@_);
686 my ($section, $field) = @_;
687 my ($data);
688
689 my $section_ptr = $self->_lookup_section($section);
690 if (!defined $section_ptr) {
691 print STDERR "doc::get_metadata_element couldn't find section " .
692 "$section\n";
693 return;
694 }
695
696 foreach $data (@{$section_ptr->{'metadata'}}) {
697 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
698 }
699
700 return undef; # was not found
701}
702
703
704# returns a list of the form [value1, value2, ...]
705sub get_metadata {
706 my $self = shift (@_);
707 my ($section, $field) = @_;
708 my ($data);
709
710 my $section_ptr = $self->_lookup_section($section);
711 if (!defined $section_ptr) {
712 print STDERR "doc::get_metadata couldn't find section " .
713 "$section\n";
714 return;
715 }
716
717 my @metadata = ();
718 foreach $data (@{$section_ptr->{'metadata'}}) {
719 push (@metadata, $data->[1]) if ($data->[0] eq $field);
720 }
721
722 return \@metadata;
723}
724
725# returns a list of the form [[field,value],[field,value],...]
726sub get_all_metadata {
727 my $self = shift (@_);
728 my ($section) = @_;
729
730 my $section_ptr = $self->_lookup_section($section);
731 if (!defined $section_ptr) {
732 print STDERR "doc::get_all_metadata couldn't find section " .
733 "$section\n";
734 return;
735 }
736
737 return $section_ptr->{'metadata'};
738}
739
740# $value is optional
741sub delete_metadata {
742 my $self = shift (@_);
743 my ($section, $field, $value) = @_;
744
745 my $section_ptr = $self->_lookup_section($section);
746 if (!defined $section_ptr) {
747 print STDERR "doc::delete_metadata couldn't find section " .
748 "$section\n";
749 return;
750 }
751
752 my $i = 0;
753 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
754 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
755 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
756 splice (@{$section_ptr->{'metadata'}}, $i, 1);
757 } else {
758 $i++;
759 }
760 }
761}
762
763sub delete_all_metadata {
764 my $self = shift (@_);
765 my ($section) = @_;
766
767 my $section_ptr = $self->_lookup_section($section);
768 if (!defined $section_ptr) {
769 print STDERR "doc::delete_all_metadata couldn't find section " .
770 "$section\n";
771 return;
772 }
773
774 $section_ptr->{'metadata'} = [];
775}
776
777sub set_metadata_element {
778 my $self = shift (@_);
779 my ($section, $field, $value) = @_;
780
781 $self->set_utf8_metadata_element ($section, $field,
782 &unicode::ascii2utf8(\$value));
783}
784
785# set_utf8_metadata_element assumes the text has already been
786# converted to the UTF-8 encoding.
787sub set_utf8_metadata_element {
788 my $self = shift (@_);
789 my ($section, $field, $value) = @_;
790
791 $self->delete_metadata ($section, $field);
792 $self->add_utf8_metadata ($section, $field, $value);
793}
794
795
796# add_metadata assumes the text is in (extended) ascii form. For
797# text which hash been already converted to the UTF-8 format use
798# add_utf8_metadata.
799sub add_metadata {
800 my $self = shift (@_);
801 my ($section, $field, $value) = @_;
802
803 $self->add_utf8_metadata ($section, $field,
804 &unicode::ascii2utf8(\$value));
805}
806
807# add_utf8_metadata assumes the text has already been converted
808# to the UTF-8 encoding.
809sub add_utf8_metadata {
810 my $self = shift (@_);
811 my ($section, $field, $value) = @_;
812
813 my $section_ptr = $self->_lookup_section($section);
814 if (!defined $section_ptr) {
815 print STDERR "doc::add_utf8_metadata couldn't find section " .
816 "$section\n";
817 return;
818 }
819 if (!defined $value) {
820 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
821 return;
822 }
823 if (!defined $field) {
824 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
825 return;
826 }
827
828 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
829}
830
831
832# methods for dealing with text
833
834# returns the text for a section
835sub get_text {
836 my $self = shift (@_);
837 my ($section) = @_;
838
839 my $section_ptr = $self->_lookup_section($section);
840 if (!defined $section_ptr) {
841 print STDERR "doc::get_text couldn't find section " .
842 "$section\n";
843 return "";
844 }
845
846 return $section_ptr->{'text'};
847}
848
849# returns the (utf-8 encoded) length of the text for a section
850sub get_text_length {
851 my $self = shift (@_);
852 my ($section) = @_;
853
854 my $section_ptr = $self->_lookup_section($section);
855 if (!defined $section_ptr) {
856 print STDERR "doc::get_text_length couldn't find section " .
857 "$section\n";
858 return 0;
859 }
860
861 return length ($section_ptr->{'text'});
862}
863
864sub delete_text {
865 my $self = shift (@_);
866 my ($section) = @_;
867
868 my $section_ptr = $self->_lookup_section($section);
869 if (!defined $section_ptr) {
870 print STDERR "doc::delete_text couldn't find section " .
871 "$section\n";
872 return;
873 }
874
875 $section_ptr->{'text'} = "";
876}
877
878# add_text assumes the text is in (extended) ascii form. For
879# text which has been already converted to the UTF-8 format
880# use add_utf8_text.
881sub add_text {
882 my $self = shift (@_);
883 my ($section, $text) = @_;
884
885 # convert the text to UTF-8 encoded unicode characters
886 # and add the text
887 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
888}
889
890
891# add_utf8_text assumes the text to be added has already
892# been converted to the UTF-8 encoding. For ascii text use
893# add_text
894sub add_utf8_text {
895 my $self = shift (@_);
896 my ($section, $text) = @_;
897
898 my $section_ptr = $self->_lookup_section($section);
899 if (!defined $section_ptr) {
900 print STDERR "doc::add_utf8_text couldn't find section " .
901 "$section\n";
902 return;
903 }
904
905 $section_ptr->{'text'} .= $text;
906}
907
908
909# methods for dealing with associated files
910
911# a file is associated with a document, NOT a section.
912# if section is defined it is noted in the data structure
913# only so that files associated from a particular section
914# may be removed later (using delete_section_assoc_files)
915sub associate_file {
916 my $self = shift (@_);
917 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
918 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
919
920 # remove all associated files with the same name
921 $self->delete_assoc_file ($assoc_filename);
922
923 push (@{$self->{'associated_files'}},
924 [$real_filename, $assoc_filename, $mime_type, $section]);
925}
926
927# returns a list of associated files in the form
928# [[real_filename, assoc_filename, mimetype], ...]
929sub get_assoc_files {
930 my $self = shift (@_);
931
932 return $self->{'associated_files'};
933}
934
935sub delete_section_assoc_files {
936 my $self = shift (@_);
937 my ($section) = @_;
938
939 my $i=0;
940 while ($i < scalar (@{$self->{'associated_files'}})) {
941 if (defined $self->{'associated_files'}->[$i]->[3] &&
942 $self->{'associated_files'}->[$i]->[3] eq $section) {
943 splice (@{$self->{'associated_files'}}, $i, 1);
944 } else {
945 $i++;
946 }
947 }
948}
949
950sub delete_assoc_file {
951 my $self = shift (@_);
952 my ($assoc_filename) = @_;
953
954 my $i=0;
955 while ($i < scalar (@{$self->{'associated_files'}})) {
956 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
957 splice (@{$self->{'associated_files'}}, $i, 1);
958 } else {
959 $i++;
960 }
961 }
962}
963
964sub reset_nextsection_ptr {
965 my $self = shift (@_);
966 my ($section) = @_;
967
968 my $section_ptr = $self->_lookup_section($section);
969 $section_ptr->{'next_subsection'} = 1;
970}
971
9721;
Note: See TracBrowser for help on using the repository browser.