source: main/tags/2.33/gsdl/perllib/doc.pm@ 25201

Last change on this file since 25201 was 2327, checked in by sjboddie, 23 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.5 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
33}
34
35use unicode;
36use util;
37use ghtml;
38##use hashdoc;
39
40# the document type may be indexed_doc, nonindexed_doc, or
41# classification
42
43my $OIDcount = 0;
44
45sub new {
46 my $class = shift (@_);
47 my ($source_filename, $doc_type) = @_;
48
49 my $self = bless {'associated_files'=>[],
50 'subsection_order'=>[],
51 'next_subsection'=>1,
52 'subsections'=>{},
53 'metadata'=>[],
54 'text'=>"",
55 'OIDtype'=>"hash"}, $class;
56
57 $self->set_source_filename ($source_filename) if defined $source_filename;
58 $self->set_doc_type ($doc_type) if defined $doc_type;
59
60 return $self;
61}
62
63# clone the $self object
64sub duplicate {
65 my $self = shift (@_);
66
67 my $newobj = {};
68
69 foreach $k (keys %$self) {
70 $newobj->{$k} = &clone ($self->{$k});
71 }
72
73 bless $newobj, ref($self);
74 return $newobj;
75}
76
77sub clone {
78 my ($from) = @_;
79 my $type = ref ($from);
80
81 if ($type eq "HASH") {
82 my $to = {};
83 foreach $key (keys %$from) {
84 $to->{$key} = &clone ($from->{$key});
85 }
86 return $to;
87 } elsif ($type eq "ARRAY") {
88 my $to = [];
89 foreach $v (@$from) {
90 push (@$to, &clone ($v));
91 }
92 return $to;
93 } else {
94 return $from;
95 }
96}
97
98sub set_OIDtype {
99 my $self = shift (@_);
100 my ($type) = @_;
101
102 if ($type eq "incremental") {
103 $self->{'OIDtype'} = $type;
104 } else {
105 $self->{'OIDtype'} = "hash";
106 }
107}
108
109sub set_source_filename {
110 my $self = shift (@_);
111 my ($source_filename) = @_;
112
113 $self->set_metadata_element ($self->get_top_section(),
114 "gsdlsourcefilename",
115 $source_filename);
116}
117
118# returns the source_filename as it was provided
119sub get_source_filename {
120 my $self = shift (@_);
121
122 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
123}
124
125sub set_doc_type {
126 my $self = shift (@_);
127 my ($doc_type) = @_;
128
129 $self->set_metadata_element ($self->get_top_section(),
130 "gsdldoctype",
131 $doc_type);
132}
133
134# returns the source_filename as it was provided
135# the default of "indexed_doc" is used if no document
136# type was provided
137sub get_doc_type {
138 my $self = shift (@_);
139
140 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
141 return $doc_type if (defined $doc_type);
142 return "indexed_doc";
143}
144
145sub _escape_text {
146 my ($text) = @_;
147
148 # special characters in the gml encoding
149 $text =~ s/&/&/g; # this has to be first...
150 $text =~ s/</&lt;/g;
151 $text =~ s/>/&gt;/g;
152 $text =~ s/\"/&quot;/g;
153
154 return $text;
155}
156
157
158sub buffer_section_gml {
159 my $self = shift (@_);
160 my ($section, $suppress_subject_info) = @_;
161
162 $suppress_subject_info = 0 unless defined $suppress_subject_info;
163 my ($all_text,$data, $subsection);
164
165 my $section_ptr = $self->_lookup_section ($section);
166 my ($section_num) = $section =~ /(\d+)$/;
167
168 return "" unless defined $section_ptr;
169
170 # output the section header (including the section number
171 # and metadata)
172
173 $all_text = "<gsdlsection";
174 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
175 foreach $data (@{$section_ptr->{'metadata'}}) {
176 $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
177 unless $suppress_subject_info && $data->[0] eq "Subject";
178 }
179 $all_text .= ">";
180
181 # output the text
182 $all_text .= &_escape_text($section_ptr->{'text'});
183
184 # output all the subsections
185 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
186 $all_text .= $self->buffer_section_gml("$section.$subsection",
187 $suppress_subject_info);
188 }
189
190 # output the closing tag
191 $all_text .= "</gsdlsection>\n";
192
193 return $all_text;
194}
195
196
197sub buffer_section_xml {
198 my $self = shift (@_);
199 my ($section, $dtd_metadata, $suppress_subject_info) = @_;
200
201 $suppress_subject_info = 0 unless defined $suppress_subject_info;
202 my ($all_text, $data, $subsection);
203
204 my $section_ptr = $self->_lookup_section ($section);
205 my ($section_num) = $section =~ /(\d+)$/;
206
207 return "" unless defined $section_ptr;
208
209 # output the section header (including the section number
210 # and metadata)
211
212 $all_text .= "<gsdlsection";
213 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
214 $all_text .= ">\n";
215
216 $all_text .= " <metadata>\n";
217
218 # output metadata
219 foreach $data (@{$section_ptr->{'metadata'}}) {
220 my $tag_name = $data->[0];
221
222 # a tagname beginning with '/' (like </srclink>) will cause problems
223 # so we'll escape any leading '/'
224 $tag_name =~ s/^\//&\#47;/;
225
226 my $tag_value = &_escape_text($data->[1]);
227
228 unless ($suppress_subject_info && $tag_name eq "Subject")
229 {
230 if (defined $dtd_metadata)
231 {
232 $dtd_metadata->{$tag_name}++;
233 }
234
235 $all_text .= " <$tag_name>$tag_value</$tag_name>\n";
236 }
237 }
238
239 $all_text .= " </metadata>\n";
240
241 # output the text
242 $all_text .= &_escape_text($section_ptr->{'text'});
243
244 # output all the subsections
245 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
246 $all_text .= $self->buffer_section_xml("$section.$subsection",
247 $dtd_metadata,
248 $suppress_subject_info);
249 }
250
251 # output the closing tag
252 $all_text .= "</gsdlsection>\n";
253
254 return $all_text;
255}
256
257sub output_section {
258 my $self = shift (@_);
259 my ($handle, $section, $colname, $dtd_metadata,
260 $suppress_subject_info) = @_;
261
262 my $all_text = $self->buffer_section_xml($section, $dtd_metadata,
263 $suppress_subject_info);
264
265 # xml header
266 if (defined $collection)
267 {
268 my $xml_head
269 = '<? xml version="1.0" standalone="no" encoding="UTF-8" ?>';
270 $xml_head .= "\n<!DOCTYPE gsdl:$colname SYSTEM \"$colname.dtd\">\n";
271 $all_text = $xml_head.$all_text;
272 }
273
274 print $handle $all_text;
275}
276
277# look up the reference to the a particular section
278sub _lookup_section {
279 my $self = shift (@_);
280 my ($section) = @_;
281
282 my ($num);
283 my $sectionref = $self;
284
285 while (defined $section && $section ne "") {
286 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
287 $num =~ s/^0+(\d)/$1/; # remove leading 0s
288 $section = "" unless defined $section;
289
290 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
291 $sectionref = $sectionref->{'subsections'}->{$num};
292 } else {
293 return undef;
294 }
295 }
296
297 return $sectionref;
298}
299
300# calculate OID by hashing the contents of the document
301sub _calc_OID {
302 my $self = shift (@_);
303 my ($filename) = @_;
304
305 my $osexe = &util::get_os_exe();
306
307 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
308 $ENV{'GSDLOS'},"hashfile$osexe");
309 my $result = "NULL";
310
311 if (-e "$hashfile_exe") {
312# $result = `\"$hashfile_exe\" \"$filename\"`;
313 $result = `hashfile$osexe \"$filename\"`;
314 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
315
316 } else {
317 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
318 }
319
320 return "HASH$result";
321}
322
323# methods dealing with OID, not groups of them.
324
325# if $OID is not provided one is calculated
326sub set_OID {
327 my $self = shift (@_);
328 my ($OID) = @_;
329
330 # if an OID wasn't provided claculate one
331 if (!defined $OID) {
332 $OID = "NULL";
333
334 if ($self->{'OIDtype'} eq "incremental") {
335 $OID = "D" . $OIDcount;
336 $OIDcount ++;
337
338 } else {
339 # "hash" OID - feed file to hashfile.exe
340 my $filename = $self->get_source_filename();
341 if (defined($filename) && -e $filename) {
342
343 $OID = $self->_calc_OID ($filename);
344
345 } else {
346 $filename = &util::get_tmp_filename();
347 if (!open (OUTFILE, ">$filename")) {
348 print STDERR "doc::set_OID could not write to $filename\n";
349 } else {
350 $self->output_section('OUTFILE', $self->get_top_section(),
351 undef, 1);
352 close (OUTFILE);
353 }
354
355 $OID = $self->_calc_OID ($filename);
356 &util::rm ($filename);
357 }
358 }
359 }
360
361 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
362}
363
364# this uses hashdoc (embedded c thingy) which is faster but still
365# needs a little work to be suffiently stable
366sub ___set_OID {
367 my $self = shift (@_);
368 my ($OID) = @_;
369
370 # if an OID wasn't provided then calculate hash value based on document
371 if (!defined $OID)
372 {
373 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
374 undef, 1);
375 my $hash_len = length($hash_text);
376
377 $OID = &hashdoc::buffer($hash_text,$hash_len);
378 }
379
380 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
381}
382
383# returns the OID for this document
384sub get_OID {
385 my $self = shift (@_);
386 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
387 return $OID if (defined $OID);
388 return "NULL";
389}
390
391sub delete_OID {
392 my $self = shift (@_);
393
394 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
395}
396
397
398# methods for manipulating section names
399
400# returns the name of the top-most section (the top
401# level of the document
402sub get_top_section {
403 my $self = shift (@_);
404
405 return "";
406}
407
408# returns a section
409sub get_parent_section {
410 my $self = shift (@_);
411 my ($section) = @_;
412
413 $section =~ s/(^|\.)\d+$//;
414
415 return $section;
416}
417
418# returns the first child section (or the end child
419# if there isn't any)
420sub get_begin_child {
421 my $self = shift (@_);
422 my ($section) = @_;
423
424 my $section_ptr = $self->_lookup_section($section);
425 return "" unless defined $section_ptr;
426
427 if (defined $section_ptr->{'subsection_order'}->[0]) {
428 return "$section.$section_ptr->{'subsection_order'}->[0]";
429 }
430
431 return $self->get_end_child ($section);
432}
433
434# returns the next child of a parent section
435sub get_next_child {
436 my $self = shift (@_);
437 my ($section) = @_;
438
439 my $parent_section = $self->get_parent_section($section);
440 my $parent_section_ptr = $self->_lookup_section($parent_section);
441 return undef unless defined $parent_section_ptr;
442
443 my ($section_num) = $section =~ /(\d+)$/;
444 return undef unless defined $section_num;
445
446 my $i = 0;
447 my $section_order = $parent_section_ptr->{'subsection_order'};
448 while ($i < scalar(@$section_order)) {
449 last if $section_order->[$i] eq $section_num;
450 $i++;
451 }
452
453 $i++; # the next child
454 if ($i < scalar(@$section_order)) {
455 return $section_order->[$i] if $parent_section eq "";
456 return "$parent_section.$section_order->[$i]";
457 }
458
459 # no more sections in this level
460 return undef;
461}
462
463# returns a reference to a list of children
464sub get_children {
465 my $self = shift (@_);
466 my ($section) = @_;
467
468 my $section_ptr = $self->_lookup_section($section);
469 return [] unless defined $section_ptr;
470
471 my @children = @{$section_ptr->{'subsection_order'}};
472
473 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
474 return \@children;
475}
476
477# returns the child section one past the last one (which
478# is coded as "0")
479sub get_end_child {
480 my $self = shift (@_);
481 my ($section) = @_;
482
483 return $section . ".0" unless $section eq "";
484 return "0";
485}
486
487# returns the next section in book order
488sub get_next_section {
489 my $self = shift (@_);
490 my ($section) = @_;
491
492 return undef unless defined $section;
493
494 my $section_ptr = $self->_lookup_section($section);
495 return undef unless defined $section_ptr;
496
497 # first try to find first child
498 if (defined $section_ptr->{'subsection_order'}->[0]) {
499 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
500 return "$section.$section_ptr->{'subsection_order'}->[0]";
501 }
502
503 do {
504 # try to find sibling
505 my $next_child = $self->get_next_child ($section);
506 return $next_child if (defined $next_child);
507
508 # move up one level
509 $section = $self->get_parent_section ($section);
510 } while $section =~ /\d/;
511
512 return undef;
513}
514
515sub is_leaf_section {
516 my $self = shift (@_);
517 my ($section) = @_;
518
519 my $section_ptr = $self->_lookup_section($section);
520 return 1 unless defined $section_ptr;
521
522 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
523}
524
525# methods for dealing with sections
526
527# returns the name of the inserted section
528sub insert_section {
529 my $self = shift (@_);
530 my ($before_section) = @_;
531
532 # get the child to insert before and its parent section
533 my $parent_section = "";
534 my $before_child = "0";
535 my @before_section = split (/\./, $before_section);
536 if (scalar(@before_section) > 0) {
537 $before_child = pop (@before_section);
538 $parent_section = join (".", @before_section);
539 }
540
541 my $parent_section_ptr = $self->_lookup_section($parent_section);
542 if (!defined $parent_section_ptr) {
543 print STDERR "doc::insert_section couldn't find parent section " .
544 "$parent_section\n";
545 return;
546 }
547
548 # get the next section number
549 my $section_num = $parent_section_ptr->{'next_subsection'}++;
550
551 my $i = 0;
552 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
553 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
554 $i++;
555 }
556
557 # insert the section number into the order list
558 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
559
560 # add this section to the parent section
561 my $section_ptr = {'subsection_order'=>[],
562 'next_subsection'=>1,
563 'subsections'=>{},
564 'metadata'=>[],
565 'text'=>""};
566 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
567
568 # work out the full section number
569 my $section = $parent_section;
570 $section .= "." unless $section eq "";
571 $section .= $section_num;
572
573 return $section;
574}
575
576# creates a pre-named section
577sub create_named_section {
578 my $self = shift (@_);
579 my ($mastersection) = @_;
580
581 my ($num);
582 my $section = $mastersection;
583 my $sectionref = $self;
584
585 while ($section ne "") {
586 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
587 $num =~ s/^0+(\d)/$1/; # remove leading 0s
588 $section = "" unless defined $section;
589
590 if (defined $num) {
591 if (!defined $sectionref->{'subsections'}->{$num}) {
592 push (@{$sectionref->{'subsection_order'}}, $num);
593 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
594 'next_subsection'=>1,
595 'subsections'=>{},
596 'metadata'=>[],
597 'text'=>""};
598 if ($num >= $sectionref->{'next_subsection'}) {
599 $sectionref->{'next_subsection'} = $num + 1;
600 }
601 }
602 $sectionref = $sectionref->{'subsections'}->{$num};
603
604 } else {
605 print STDERR "doc::create_named_section couldn't create section ";
606 print STDERR "$mastersection\n";
607 last;
608 }
609 }
610}
611
612# returns a reference to a list of subsections
613sub list_subsections {
614 my $self = shift (@_);
615 my ($section) = @_;
616
617 my $section_ptr = $self->_lookup_section ($section);
618 if (!defined $section_ptr) {
619 print STDERR "doc::list_subsections couldn't find section $section\n";
620 return [];
621 }
622
623 return [@{$section_ptr->{'subsection_order'}}];
624}
625
626sub delete_section {
627 my $self = shift (@_);
628 my ($section) = @_;
629
630# my $section_ptr = {'subsection_order'=>[],
631# 'next_subsection'=>1,
632# 'subsections'=>{},
633# 'metadata'=>[],
634# 'text'=>""};
635
636 # if this is the top section reset everything
637 if ($section eq "") {
638 $self->{'subsection_order'} = [];
639 $self->{'subsections'} = {};
640 $self->{'metadata'} = [];
641 $self->{'text'} = "";
642 return;
643 }
644
645 # find the parent of the section to delete
646 my $parent_section = "";
647 my $child = "0";
648 my @section = split (/\./, $section);
649 if (scalar(@section) > 0) {
650 $child = pop (@section);
651 $parent_section = join (".", @section);
652 }
653
654 my $parent_section_ptr = $self->_lookup_section($parent_section);
655 if (!defined $parent_section_ptr) {
656 print STDERR "doc::delete_section couldn't find parent section " .
657 "$parent_section\n";
658 return;
659 }
660
661 # remove this section from the subsection_order list
662 my $i = 0;
663 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
664 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
665 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
666 last;
667 }
668 $i++;
669 }
670
671 # remove this section from the subsection hash
672 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
673 undef $parent_section_ptr->{'subsections'}->{$child};
674 }
675}
676
677#--
678# methods for dealing with metadata
679
680# set_metadata_element and get_metadata_element are for metadata
681# which should only have one value. add_meta_data and get_metadata
682# are for metadata which can have more than one value.
683
684# returns the first metadata value which matches field
685sub get_metadata_element {
686 my $self = shift (@_);
687 my ($section, $field) = @_;
688 my ($data);
689
690 my $section_ptr = $self->_lookup_section($section);
691 if (!defined $section_ptr) {
692 print STDERR "doc::get_metadata_element couldn't find section " .
693 "$section\n";
694 return;
695 }
696
697 foreach $data (@{$section_ptr->{'metadata'}}) {
698 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
699 }
700
701 return undef; # was not found
702}
703
704
705# returns a list of the form [value1, value2, ...]
706sub get_metadata {
707 my $self = shift (@_);
708 my ($section, $field) = @_;
709 my ($data);
710
711 my $section_ptr = $self->_lookup_section($section);
712 if (!defined $section_ptr) {
713 print STDERR "doc::get_metadata couldn't find section " .
714 "$section\n";
715 return;
716 }
717
718 my @metadata = ();
719 foreach $data (@{$section_ptr->{'metadata'}}) {
720 push (@metadata, $data->[1]) if ($data->[0] eq $field);
721 }
722
723 return \@metadata;
724}
725
726# returns a list of the form [[field,value],[field,value],...]
727sub get_all_metadata {
728 my $self = shift (@_);
729 my ($section) = @_;
730
731 my $section_ptr = $self->_lookup_section($section);
732 if (!defined $section_ptr) {
733 print STDERR "doc::get_all_metadata couldn't find section " .
734 "$section\n";
735 return;
736 }
737
738 return $section_ptr->{'metadata'};
739}
740
741# $value is optional
742sub delete_metadata {
743 my $self = shift (@_);
744 my ($section, $field, $value) = @_;
745
746 my $section_ptr = $self->_lookup_section($section);
747 if (!defined $section_ptr) {
748 print STDERR "doc::delete_metadata couldn't find section " .
749 "$section\n";
750 return;
751 }
752
753 my $i = 0;
754 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
755 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
756 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
757 splice (@{$section_ptr->{'metadata'}}, $i, 1);
758 } else {
759 $i++;
760 }
761 }
762}
763
764sub delete_all_metadata {
765 my $self = shift (@_);
766 my ($section) = @_;
767
768 my $section_ptr = $self->_lookup_section($section);
769 if (!defined $section_ptr) {
770 print STDERR "doc::delete_all_metadata couldn't find section " .
771 "$section\n";
772 return;
773 }
774
775 $section_ptr->{'metadata'} = [];
776}
777
778sub set_metadata_element {
779 my $self = shift (@_);
780 my ($section, $field, $value) = @_;
781
782 $self->set_utf8_metadata_element ($section, $field,
783 &unicode::ascii2utf8(\$value));
784}
785
786# set_utf8_metadata_element assumes the text has already been
787# converted to the UTF-8 encoding.
788sub set_utf8_metadata_element {
789 my $self = shift (@_);
790 my ($section, $field, $value) = @_;
791
792 $self->delete_metadata ($section, $field);
793 $self->add_utf8_metadata ($section, $field, $value);
794}
795
796
797# add_metadata assumes the text is in (extended) ascii form. For
798# text which hash been already converted to the UTF-8 format use
799# add_utf8_metadata.
800sub add_metadata {
801 my $self = shift (@_);
802 my ($section, $field, $value) = @_;
803
804 $self->add_utf8_metadata ($section, $field,
805 &unicode::ascii2utf8(\$value));
806}
807
808# add_utf8_metadata assumes the text has already been converted
809# to the UTF-8 encoding.
810sub add_utf8_metadata {
811 my $self = shift (@_);
812 my ($section, $field, $value) = @_;
813
814 my $section_ptr = $self->_lookup_section($section);
815 if (!defined $section_ptr) {
816 print STDERR "doc::add_utf8_metadata couldn't find section " .
817 "$section\n";
818 return;
819 }
820 if (!defined $value) {
821 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
822 return;
823 }
824 if (!defined $field) {
825 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
826 return;
827 }
828
829 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
830}
831
832
833# methods for dealing with text
834
835# returns the text for a section
836sub get_text {
837 my $self = shift (@_);
838 my ($section) = @_;
839
840 my $section_ptr = $self->_lookup_section($section);
841 if (!defined $section_ptr) {
842 print STDERR "doc::get_text couldn't find section " .
843 "$section\n";
844 return "";
845 }
846
847 return $section_ptr->{'text'};
848}
849
850# returns the (utf-8 encoded) length of the text for a section
851sub get_text_length {
852 my $self = shift (@_);
853 my ($section) = @_;
854
855 my $section_ptr = $self->_lookup_section($section);
856 if (!defined $section_ptr) {
857 print STDERR "doc::get_text_length couldn't find section " .
858 "$section\n";
859 return 0;
860 }
861
862 return length ($section_ptr->{'text'});
863}
864
865sub delete_text {
866 my $self = shift (@_);
867 my ($section) = @_;
868
869 my $section_ptr = $self->_lookup_section($section);
870 if (!defined $section_ptr) {
871 print STDERR "doc::delete_text couldn't find section " .
872 "$section\n";
873 return;
874 }
875
876 $section_ptr->{'text'} = "";
877}
878
879# add_text assumes the text is in (extended) ascii form. For
880# text which has been already converted to the UTF-8 format
881# use add_utf8_text.
882sub add_text {
883 my $self = shift (@_);
884 my ($section, $text) = @_;
885
886 # convert the text to UTF-8 encoded unicode characters
887 # and add the text
888 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
889}
890
891
892# add_utf8_text assumes the text to be added has already
893# been converted to the UTF-8 encoding. For ascii text use
894# add_text
895sub add_utf8_text {
896 my $self = shift (@_);
897 my ($section, $text) = @_;
898
899 my $section_ptr = $self->_lookup_section($section);
900 if (!defined $section_ptr) {
901 print STDERR "doc::add_utf8_text couldn't find section " .
902 "$section\n";
903 return;
904 }
905
906 $section_ptr->{'text'} .= $text;
907}
908
909
910# methods for dealing with associated files
911
912# a file is associated with a document, NOT a section.
913# if section is defined it is noted in the data structure
914# only so that files associated from a particular section
915# may be removed later (using delete_section_assoc_files)
916sub associate_file {
917 my $self = shift (@_);
918 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
919 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
920
921 # remove all associated files with the same name
922 $self->delete_assoc_file ($assoc_filename);
923
924 push (@{$self->{'associated_files'}},
925 [$real_filename, $assoc_filename, $mime_type, $section]);
926}
927
928# returns a list of associated files in the form
929# [[real_filename, assoc_filename, mimetype], ...]
930sub get_assoc_files {
931 my $self = shift (@_);
932
933 return $self->{'associated_files'};
934}
935
936sub delete_section_assoc_files {
937 my $self = shift (@_);
938 my ($section) = @_;
939
940 my $i=0;
941 while ($i < scalar (@{$self->{'associated_files'}})) {
942 if (defined $self->{'associated_files'}->[$i]->[3] &&
943 $self->{'associated_files'}->[$i]->[3] eq $section) {
944 splice (@{$self->{'associated_files'}}, $i, 1);
945 } else {
946 $i++;
947 }
948 }
949}
950
951sub delete_assoc_file {
952 my $self = shift (@_);
953 my ($assoc_filename) = @_;
954
955 my $i=0;
956 while ($i < scalar (@{$self->{'associated_files'}})) {
957 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
958 splice (@{$self->{'associated_files'}}, $i, 1);
959 } else {
960 $i++;
961 }
962 }
963}
964
965sub reset_nextsection_ptr {
966 my $self = shift (@_);
967 my ($section) = @_;
968
969 my $section_ptr = $self->_lookup_section($section);
970 $section_ptr->{'next_subsection'} = 1;
971}
972
9731;
Note: See TracBrowser for help on using the repository browser.