source: gsdl/tags/gsdl-2_32-distribution/gsdl/perllib/doc.pm@ 18460

Last change on this file since 18460 was 2267, checked in by davidb, 23 years ago

GML file syntax altered to be XML compliant. This basically meant
turning attribute lists of metadata names (which in Greenstone can
appear multiple times within a tag) into tag names themselves, which
are then explicitly stated in a <metadata>...</metadata> block.

Newly built collection will use the new syntactic form, however the
GMLPlug file is backwards compatible and so will still import in
files in the older GML format.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.2 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
33}
34
35use unicode;
36use util;
37use ghtml;
38##use hashdoc;
39
40# the document type may be indexed_doc, nonindexed_doc, or
41# classification
42
43sub new {
44 my $class = shift (@_);
45 my ($source_filename, $doc_type) = @_;
46
47 my $self = bless {'associated_files'=>[],
48 'subsection_order'=>[],
49 'next_subsection'=>1,
50 'subsections'=>{},
51 'metadata'=>[],
52 'text'=>""}, $class;
53
54 $self->set_source_filename ($source_filename) if defined $source_filename;
55 $self->set_doc_type ($doc_type) if defined $doc_type;
56
57 return $self;
58}
59
60# clone the $self object
61sub duplicate {
62 my $self = shift (@_);
63
64 my $newobj = {};
65
66 foreach $k (keys %$self) {
67 $newobj->{$k} = &clone ($self->{$k});
68 }
69
70 bless $newobj, ref($self);
71 return $newobj;
72}
73
74sub clone {
75 my ($from) = @_;
76 my $type = ref ($from);
77
78 if ($type eq "HASH") {
79 my $to = {};
80 foreach $key (keys %$from) {
81 $to->{$key} = &clone ($from->{$key});
82 }
83 return $to;
84 } elsif ($type eq "ARRAY") {
85 my $to = [];
86 foreach $v (@$from) {
87 push (@$to, &clone ($v));
88 }
89 return $to;
90 } else {
91 return $from;
92 }
93}
94
95
96sub set_source_filename {
97 my $self = shift (@_);
98 my ($source_filename) = @_;
99
100 $self->set_metadata_element ($self->get_top_section(),
101 "gsdlsourcefilename",
102 $source_filename);
103}
104
105# returns the source_filename as it was provided
106sub get_source_filename {
107 my $self = shift (@_);
108
109 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
110}
111
112sub set_doc_type {
113 my $self = shift (@_);
114 my ($doc_type) = @_;
115
116 $self->set_metadata_element ($self->get_top_section(),
117 "gsdldoctype",
118 $doc_type);
119}
120
121# returns the source_filename as it was provided
122# the default of "indexed_doc" is used if no document
123# type was provided
124sub get_doc_type {
125 my $self = shift (@_);
126
127 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
128 return $doc_type if (defined $doc_type);
129 return "indexed_doc";
130}
131
132sub _escape_text {
133 my ($text) = @_;
134
135 # special characters in the gml encoding
136 $text =~ s/&/&amp;/g; # this has to be first...
137 $text =~ s/</&lt;/g;
138 $text =~ s/>/&gt;/g;
139 $text =~ s/\"/&quot;/g;
140
141 return $text;
142}
143
144
145sub buffer_section_gml {
146 my $self = shift (@_);
147 my ($section, $suppress_subject_info) = @_;
148
149 $suppress_subject_info = 0 unless defined $suppress_subject_info;
150 my ($all_text,$data, $subsection);
151
152 my $section_ptr = $self->_lookup_section ($section);
153 my ($section_num) = $section =~ /(\d+)$/;
154
155 return "" unless defined $section_ptr;
156
157 # output the section header (including the section number
158 # and metadata)
159
160 $all_text = "<gsdlsection";
161 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
162 foreach $data (@{$section_ptr->{'metadata'}}) {
163 $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
164 unless $suppress_subject_info && $data->[0] eq "Subject";
165 }
166 $all_text .= ">";
167
168 # output the text
169 $all_text .= &_escape_text($section_ptr->{'text'});
170
171 # output all the subsections
172 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
173 $all_text .= $self->buffer_section_gml("$section.$subsection",
174 $suppress_subject_info);
175 }
176
177 # output the closing tag
178 $all_text .= "</gsdlsection>\n";
179
180 return $all_text;
181}
182
183
184sub buffer_section_xml {
185 my $self = shift (@_);
186 my ($section, $dtd_metadata, $suppress_subject_info) = @_;
187
188 $suppress_subject_info = 0 unless defined $suppress_subject_info;
189 my ($all_text, $data, $subsection);
190
191 my $section_ptr = $self->_lookup_section ($section);
192 my ($section_num) = $section =~ /(\d+)$/;
193
194 return "" unless defined $section_ptr;
195
196 # output the section header (including the section number
197 # and metadata)
198
199 $all_text .= "<gsdlsection";
200 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
201 $all_text .= ">\n";
202
203 $all_text .= " <metadata>\n";
204
205 # output metadata
206 foreach $data (@{$section_ptr->{'metadata'}}) {
207 my $tag_name = $data->[0];
208 my $tag_value = &_escape_text($data->[1]);
209
210 unless ($suppress_subject_info && $tag_name eq "Subject")
211 {
212 if (defined $dtd_metadata)
213 {
214 $dtd_metadata->{$tag_name}++;
215 }
216
217 $all_text .= " <$tag_name>$tag_value</$tag_name>\n";
218 }
219 }
220
221 $all_text .= " </metadata>\n";
222
223 # output the text
224 $all_text .= &_escape_text($section_ptr->{'text'});
225
226 # output all the subsections
227 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
228 $all_text .= $self->buffer_section_xml("$section.$subsection",
229 $dtd_metadata,
230 $suppress_subject_info);
231 }
232
233 # output the closing tag
234 $all_text .= "</gsdlsection>\n";
235
236 return $all_text;
237}
238
239sub output_section {
240 my $self = shift (@_);
241 my ($handle, $section, $colname, $dtd_metadata,
242 $suppress_subject_info) = @_;
243
244 my $all_text = $self->buffer_section_xml($section, $dtd_metadata,
245 $suppress_subject_info);
246
247 # xml header
248 if (defined $collection)
249 {
250 my $xml_head
251 = '<? xml version="1.0" standalone="no" encoding="UTF-8" ?>';
252 $xml_head .= "\n<!DOCTYPE gsdl:$colname SYSTEM \"$colname.dtd\">\n";
253 $all_text = $xml_head.$all_text;
254 }
255
256 print $handle $all_text;
257}
258
259# look up the reference to the a particular section
260sub _lookup_section {
261 my $self = shift (@_);
262 my ($section) = @_;
263
264 my ($num);
265 my $sectionref = $self;
266
267 while (defined $section && $section ne "") {
268 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
269 $num =~ s/^0+(\d)/$1/; # remove leading 0s
270 $section = "" unless defined $section;
271
272 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
273 $sectionref = $sectionref->{'subsections'}->{$num};
274 } else {
275 return undef;
276 }
277 }
278
279 return $sectionref;
280}
281
282sub _calc_OID {
283 my $self = shift (@_);
284 my ($filename) = @_;
285
286 my $osexe = &util::get_os_exe();
287
288 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
289 $ENV{'GSDLOS'},"hashfile$osexe");
290 my $result = "NULL";
291
292 if (-e "$hashfile_exe") {
293# $result = `\"$hashfile_exe\" \"$filename\"`;
294 $result = `hashfile$osexe \"$filename\"`;
295 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
296
297 } else {
298 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
299 }
300
301 return "HASH$result";
302}
303
304# methods dealing with OID, not groups of them.
305
306# if $OID is not provided one is calculated from hashing the
307# current contents of the document
308# An OID are actually stored as metadata of the document
309sub set_OID {
310 my $self = shift (@_);
311 my ($OID) = @_;
312
313 # if an OID wasn't provided then feed this document to
314 # hashfile.exe
315 if (!defined $OID) {
316 $OID = "NULL";
317
318 my $filename = $self->get_source_filename();
319 if (defined($filename) && -e $filename) {
320
321 $OID = $self->_calc_OID ($filename);
322
323 } else {
324
325 # this warning causes more confusion than it's worth I think
326 # -- sorry Gordon.
327# print STDERR "doc::set_OID source filename undefined/non-existant (continuing)\n";
328
329 $filename = &util::get_tmp_filename();
330 if (!open (OUTFILE, ">$filename")) {
331 print STDERR "doc::set_OID could not write to $filename\n";
332 } else {
333 $self->output_section('OUTFILE', $self->get_top_section(),
334 undef, 1);
335 close (OUTFILE);
336 }
337
338 $OID = $self->_calc_OID ($filename);
339 &util::rm ($filename);
340 }
341 }
342
343 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
344}
345
346# this uses hashdoc (embedded c thingy) which is faster but still
347# needs a little work to be suffiently stable
348sub ___set_OID {
349 my $self = shift (@_);
350 my ($OID) = @_;
351
352 # if an OID wasn't provided then calculate hash value based on document
353 if (!defined $OID)
354 {
355 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
356 undef, 1);
357 my $hash_len = length($hash_text);
358
359 $OID = &hashdoc::buffer($hash_text,$hash_len);
360 }
361
362 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
363}
364
365# returns the OID for this document
366sub get_OID {
367 my $self = shift (@_);
368 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
369 return $OID if (defined $OID);
370 return "NULL";
371}
372
373sub delete_OID {
374 my $self = shift (@_);
375
376 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
377}
378
379
380# methods for manipulating section names
381
382# returns the name of the top-most section (the top
383# level of the document
384sub get_top_section {
385 my $self = shift (@_);
386
387 return "";
388}
389
390# returns a section
391sub get_parent_section {
392 my $self = shift (@_);
393 my ($section) = @_;
394
395 $section =~ s/(^|\.)\d+$//;
396
397 return $section;
398}
399
400# returns the first child section (or the end child
401# if there isn't any)
402sub get_begin_child {
403 my $self = shift (@_);
404 my ($section) = @_;
405
406 my $section_ptr = $self->_lookup_section($section);
407 return "" unless defined $section_ptr;
408
409 if (defined $section_ptr->{'subsection_order'}->[0]) {
410 return "$section.$section_ptr->{'subsection_order'}->[0]";
411 }
412
413 return $self->get_end_child ($section);
414}
415
416# returns the next child of a parent section
417sub get_next_child {
418 my $self = shift (@_);
419 my ($section) = @_;
420
421 my $parent_section = $self->get_parent_section($section);
422 my $parent_section_ptr = $self->_lookup_section($parent_section);
423 return undef unless defined $parent_section_ptr;
424
425 my ($section_num) = $section =~ /(\d+)$/;
426 return undef unless defined $section_num;
427
428 my $i = 0;
429 my $section_order = $parent_section_ptr->{'subsection_order'};
430 while ($i < scalar(@$section_order)) {
431 last if $section_order->[$i] eq $section_num;
432 $i++;
433 }
434
435 $i++; # the next child
436 if ($i < scalar(@$section_order)) {
437 return $section_order->[$i] if $parent_section eq "";
438 return "$parent_section.$section_order->[$i]";
439 }
440
441 # no more sections in this level
442 return undef;
443}
444
445# returns a reference to a list of children
446sub get_children {
447 my $self = shift (@_);
448 my ($section) = @_;
449
450 my $section_ptr = $self->_lookup_section($section);
451 return [] unless defined $section_ptr;
452
453 my @children = @{$section_ptr->{'subsection_order'}};
454
455 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
456 return \@children;
457}
458
459# returns the child section one past the last one (which
460# is coded as "0")
461sub get_end_child {
462 my $self = shift (@_);
463 my ($section) = @_;
464
465 return $section . ".0" unless $section eq "";
466 return "0";
467}
468
469# returns the next section in book order
470sub get_next_section {
471 my $self = shift (@_);
472 my ($section) = @_;
473
474 return undef unless defined $section;
475
476 my $section_ptr = $self->_lookup_section($section);
477 return undef unless defined $section_ptr;
478
479 # first try to find first child
480 if (defined $section_ptr->{'subsection_order'}->[0]) {
481 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
482 return "$section.$section_ptr->{'subsection_order'}->[0]";
483 }
484
485 do {
486 # try to find sibling
487 my $next_child = $self->get_next_child ($section);
488 return $next_child if (defined $next_child);
489
490 # move up one level
491 $section = $self->get_parent_section ($section);
492 } while $section =~ /\d/;
493
494 return undef;
495}
496
497sub is_leaf_section {
498 my $self = shift (@_);
499 my ($section) = @_;
500
501 my $section_ptr = $self->_lookup_section($section);
502 return 1 unless defined $section_ptr;
503
504 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
505}
506
507# methods for dealing with sections
508
509# returns the name of the inserted section
510sub insert_section {
511 my $self = shift (@_);
512 my ($before_section) = @_;
513
514 # get the child to insert before and its parent section
515 my $parent_section = "";
516 my $before_child = "0";
517 my @before_section = split (/\./, $before_section);
518 if (scalar(@before_section) > 0) {
519 $before_child = pop (@before_section);
520 $parent_section = join (".", @before_section);
521 }
522
523 my $parent_section_ptr = $self->_lookup_section($parent_section);
524 if (!defined $parent_section_ptr) {
525 print STDERR "doc::insert_section couldn't find parent section " .
526 "$parent_section\n";
527 return;
528 }
529
530 # get the next section number
531 my $section_num = $parent_section_ptr->{'next_subsection'}++;
532
533 my $i = 0;
534 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
535 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
536 $i++;
537 }
538
539 # insert the section number into the order list
540 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
541
542 # add this section to the parent section
543 my $section_ptr = {'subsection_order'=>[],
544 'next_subsection'=>1,
545 'subsections'=>{},
546 'metadata'=>[],
547 'text'=>""};
548 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
549
550 # work out the full section number
551 my $section = $parent_section;
552 $section .= "." unless $section eq "";
553 $section .= $section_num;
554
555 return $section;
556}
557
558# creates a pre-named section
559sub create_named_section {
560 my $self = shift (@_);
561 my ($mastersection) = @_;
562
563 my ($num);
564 my $section = $mastersection;
565 my $sectionref = $self;
566
567 while ($section ne "") {
568 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
569 $num =~ s/^0+(\d)/$1/; # remove leading 0s
570 $section = "" unless defined $section;
571
572 if (defined $num) {
573 if (!defined $sectionref->{'subsections'}->{$num}) {
574 push (@{$sectionref->{'subsection_order'}}, $num);
575 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
576 'next_subsection'=>1,
577 'subsections'=>{},
578 'metadata'=>[],
579 'text'=>""};
580 if ($num >= $sectionref->{'next_subsection'}) {
581 $sectionref->{'next_subsection'} = $num + 1;
582 }
583 }
584 $sectionref = $sectionref->{'subsections'}->{$num};
585
586 } else {
587 print STDERR "doc::create_named_section couldn't create section ";
588 print STDERR "$mastersection\n";
589 last;
590 }
591 }
592}
593
594# returns a reference to a list of subsections
595sub list_subsections {
596 my $self = shift (@_);
597 my ($section) = @_;
598
599 my $section_ptr = $self->_lookup_section ($section);
600 if (!defined $section_ptr) {
601 print STDERR "doc::list_subsections couldn't find section $section\n";
602 return [];
603 }
604
605 return [@{$section_ptr->{'subsection_order'}}];
606}
607
608sub delete_section {
609 my $self = shift (@_);
610 my ($section) = @_;
611
612# my $section_ptr = {'subsection_order'=>[],
613# 'next_subsection'=>1,
614# 'subsections'=>{},
615# 'metadata'=>[],
616# 'text'=>""};
617
618 # if this is the top section reset everything
619 if ($section eq "") {
620 $self->{'subsection_order'} = [];
621 $self->{'subsections'} = {};
622 $self->{'metadata'} = [];
623 $self->{'text'} = "";
624 return;
625 }
626
627 # find the parent of the section to delete
628 my $parent_section = "";
629 my $child = "0";
630 my @section = split (/\./, $section);
631 if (scalar(@section) > 0) {
632 $child = pop (@section);
633 $parent_section = join (".", @section);
634 }
635
636 my $parent_section_ptr = $self->_lookup_section($parent_section);
637 if (!defined $parent_section_ptr) {
638 print STDERR "doc::delete_section couldn't find parent section " .
639 "$parent_section\n";
640 return;
641 }
642
643 # remove this section from the subsection_order list
644 my $i = 0;
645 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
646 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
647 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
648 last;
649 }
650 $i++;
651 }
652
653 # remove this section from the subsection hash
654 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
655 undef $parent_section_ptr->{'subsections'}->{$child};
656 }
657}
658
659#--
660# methods for dealing with metadata
661
662# set_metadata_element and get_metadata_element are for metadata
663# which should only have one value. add_meta_data and get_metadata
664# are for metadata which can have more than one value.
665
666# returns the first metadata value which matches field
667sub get_metadata_element {
668 my $self = shift (@_);
669 my ($section, $field) = @_;
670 my ($data);
671
672 my $section_ptr = $self->_lookup_section($section);
673 if (!defined $section_ptr) {
674 print STDERR "doc::get_metadata_element couldn't find section " .
675 "$section\n";
676 return;
677 }
678
679 foreach $data (@{$section_ptr->{'metadata'}}) {
680 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
681 }
682
683 return undef; # was not found
684}
685
686
687# returns a list of the form [value1, value2, ...]
688sub get_metadata {
689 my $self = shift (@_);
690 my ($section, $field) = @_;
691 my ($data);
692
693 my $section_ptr = $self->_lookup_section($section);
694 if (!defined $section_ptr) {
695 print STDERR "doc::get_metadata couldn't find section " .
696 "$section\n";
697 return;
698 }
699
700 my @metadata = ();
701 foreach $data (@{$section_ptr->{'metadata'}}) {
702 push (@metadata, $data->[1]) if ($data->[0] eq $field);
703 }
704
705 return \@metadata;
706}
707
708# returns a list of the form [[field,value],[field,value],...]
709sub get_all_metadata {
710 my $self = shift (@_);
711 my ($section) = @_;
712
713 my $section_ptr = $self->_lookup_section($section);
714 if (!defined $section_ptr) {
715 print STDERR "doc::get_all_metadata couldn't find section " .
716 "$section\n";
717 return;
718 }
719
720 return $section_ptr->{'metadata'};
721}
722
723# $value is optional
724sub delete_metadata {
725 my $self = shift (@_);
726 my ($section, $field, $value) = @_;
727
728 my $section_ptr = $self->_lookup_section($section);
729 if (!defined $section_ptr) {
730 print STDERR "doc::delete_metadata couldn't find section " .
731 "$section\n";
732 return;
733 }
734
735 my $i = 0;
736 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
737 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
738 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
739 splice (@{$section_ptr->{'metadata'}}, $i, 1);
740 } else {
741 $i++;
742 }
743 }
744}
745
746sub delete_all_metadata {
747 my $self = shift (@_);
748 my ($section) = @_;
749
750 my $section_ptr = $self->_lookup_section($section);
751 if (!defined $section_ptr) {
752 print STDERR "doc::delete_all_metadata couldn't find section " .
753 "$section\n";
754 return;
755 }
756
757 $section_ptr->{'metadata'} = [];
758}
759
760sub set_metadata_element {
761 my $self = shift (@_);
762 my ($section, $field, $value) = @_;
763
764 $self->set_utf8_metadata_element ($section, $field,
765 &unicode::ascii2utf8(\$value));
766}
767
768# set_utf8_metadata_element assumes the text has already been
769# converted to the UTF-8 encoding.
770sub set_utf8_metadata_element {
771 my $self = shift (@_);
772 my ($section, $field, $value) = @_;
773
774 $self->delete_metadata ($section, $field);
775 $self->add_utf8_metadata ($section, $field, $value);
776}
777
778
779# add_metadata assumes the text is in (extended) ascii form. For
780# text which hash been already converted to the UTF-8 format use
781# add_utf8_metadata.
782sub add_metadata {
783 my $self = shift (@_);
784 my ($section, $field, $value) = @_;
785
786 $self->add_utf8_metadata ($section, $field,
787 &unicode::ascii2utf8(\$value));
788}
789
790# add_utf8_metadata assumes the text has already been converted
791# to the UTF-8 encoding.
792sub add_utf8_metadata {
793 my $self = shift (@_);
794 my ($section, $field, $value) = @_;
795
796 my $section_ptr = $self->_lookup_section($section);
797 if (!defined $section_ptr) {
798 print STDERR "doc::add_utf8_metadata couldn't find section " .
799 "$section\n";
800 return;
801 }
802 if (!defined $value) {
803 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
804 return;
805 }
806 if (!defined $field) {
807 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
808 return;
809 }
810
811 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
812}
813
814
815# methods for dealing with text
816
817# returns the text for a section
818sub get_text {
819 my $self = shift (@_);
820 my ($section) = @_;
821
822 my $section_ptr = $self->_lookup_section($section);
823 if (!defined $section_ptr) {
824 print STDERR "doc::get_text couldn't find section " .
825 "$section\n";
826 return "";
827 }
828
829 return $section_ptr->{'text'};
830}
831
832# returns the (utf-8 encoded) length of the text for a section
833sub get_text_length {
834 my $self = shift (@_);
835 my ($section) = @_;
836
837 my $section_ptr = $self->_lookup_section($section);
838 if (!defined $section_ptr) {
839 print STDERR "doc::get_text_length couldn't find section " .
840 "$section\n";
841 return 0;
842 }
843
844 return length ($section_ptr->{'text'});
845}
846
847sub delete_text {
848 my $self = shift (@_);
849 my ($section) = @_;
850
851 my $section_ptr = $self->_lookup_section($section);
852 if (!defined $section_ptr) {
853 print STDERR "doc::delete_text couldn't find section " .
854 "$section\n";
855 return;
856 }
857
858 $section_ptr->{'text'} = "";
859}
860
861# add_text assumes the text is in (extended) ascii form. For
862# text which has been already converted to the UTF-8 format
863# use add_utf8_text.
864sub add_text {
865 my $self = shift (@_);
866 my ($section, $text) = @_;
867
868 # convert the text to UTF-8 encoded unicode characters
869 # and add the text
870 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
871}
872
873
874# add_utf8_text assumes the text to be added has already
875# been converted to the UTF-8 encoding. For ascii text use
876# add_text
877sub add_utf8_text {
878 my $self = shift (@_);
879 my ($section, $text) = @_;
880
881 my $section_ptr = $self->_lookup_section($section);
882 if (!defined $section_ptr) {
883 print STDERR "doc::add_utf8_text couldn't find section " .
884 "$section\n";
885 return;
886 }
887
888 $section_ptr->{'text'} .= $text;
889}
890
891
892# methods for dealing with associated files
893
894# a file is associated with a document, NOT a section.
895# if section is defined it is noted in the data structure
896# only so that files associated from a particular section
897# may be removed later (using delete_section_assoc_files)
898sub associate_file {
899 my $self = shift (@_);
900 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
901 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
902
903 # remove all associated files with the same name
904 $self->delete_assoc_file ($assoc_filename);
905
906 push (@{$self->{'associated_files'}},
907 [$real_filename, $assoc_filename, $mime_type, $section]);
908}
909
910# returns a list of associated files in the form
911# [[real_filename, assoc_filename, mimetype], ...]
912sub get_assoc_files {
913 my $self = shift (@_);
914
915 return $self->{'associated_files'};
916}
917
918sub delete_section_assoc_files {
919 my $self = shift (@_);
920 my ($section) = @_;
921
922 my $i=0;
923 while ($i < scalar (@{$self->{'associated_files'}})) {
924 if (defined $self->{'associated_files'}->[$i]->[3] &&
925 $self->{'associated_files'}->[$i]->[3] eq $section) {
926 splice (@{$self->{'associated_files'}}, $i, 1);
927 } else {
928 $i++;
929 }
930 }
931}
932
933sub delete_assoc_file {
934 my $self = shift (@_);
935 my ($assoc_filename) = @_;
936
937 my $i=0;
938 while ($i < scalar (@{$self->{'associated_files'}})) {
939 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
940 splice (@{$self->{'associated_files'}}, $i, 1);
941 } else {
942 $i++;
943 }
944 }
945}
946
947sub reset_nextsection_ptr {
948 my $self = shift (@_);
949 my ($section) = @_;
950
951 my $section_ptr = $self->_lookup_section($section);
952 $section_ptr->{'next_subsection'} = 1;
953}
954
9551;
Note: See TracBrowser for help on using the repository browser.