source: tags/gsdl-2_51-distribution/gsdl/perllib/doc.pm@ 7622

Last change on this file since 7622 was 7569, checked in by kjdon, 20 years ago

can now set gsdlconvertedfilename - gsdlsourcefilename is the original file, gsdlconvertedfilename is the converted file. calculating hash ids now uses converted filename is available, otherwise uses the source filename

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.3 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39##use hashdoc;
40
41# the document type may be indexed_doc, nonindexed_doc, or
42# classification
43
44my $OIDcount = 0;
45
46sub new {
47 my $class = shift (@_);
48 my ($source_filename, $doc_type) = @_;
49
50 my $self = bless {'associated_files'=>[],
51 'subsection_order'=>[],
52 'next_subsection'=>1,
53 'subsections'=>{},
54 'metadata'=>[],
55 'text'=>"",
56 'OIDtype'=>"hash"}, $class;
57
58 $self->set_source_filename ($source_filename) if defined $source_filename;
59 $self->set_doc_type ($doc_type) if defined $doc_type;
60
61 return $self;
62}
63
64# clone the $self object
65sub duplicate {
66 my $self = shift (@_);
67
68 my $newobj = {};
69
70 foreach $k (keys %$self) {
71 $newobj->{$k} = &clone ($self->{$k});
72 }
73
74 bless $newobj, ref($self);
75 return $newobj;
76}
77
78sub clone {
79 my ($from) = @_;
80 my $type = ref ($from);
81
82 if ($type eq "HASH") {
83 my $to = {};
84 foreach $key (keys %$from) {
85 $to->{$key} = &clone ($from->{$key});
86 }
87 return $to;
88 } elsif ($type eq "ARRAY") {
89 my $to = [];
90 foreach $v (@$from) {
91 push (@$to, &clone ($v));
92 }
93 return $to;
94 } else {
95 return $from;
96 }
97}
98
99sub set_OIDtype {
100 my $self = shift (@_);
101 my ($type) = @_;
102
103 if ($type eq "incremental") {
104 $self->{'OIDtype'} = $type;
105 } else {
106 $self->{'OIDtype'} = "hash";
107 }
108}
109
110sub set_source_filename {
111 my $self = shift (@_);
112 my ($source_filename) = @_;
113
114 $self->set_metadata_element ($self->get_top_section(),
115 "gsdlsourcefilename",
116 $source_filename);
117}
118
119sub set_converted_filename {
120 my $self = shift (@_);
121 my ($converted_filename) = @_;
122
123 $self->set_metadata_element ($self->get_top_section(),
124 "gsdlconvertedfilename",
125 $converted_filename);
126}
127
128
129# returns the source_filename as it was provided
130sub get_source_filename {
131 my $self = shift (@_);
132
133 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
134}
135
136# returns converted filename if available else returns source filename
137sub get_filename_for_hashing {
138 my $self = shift (@_);
139
140 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
141
142 if (!defined $filename) {
143 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
144 }
145 return $filename;
146}
147
148sub set_doc_type {
149 my $self = shift (@_);
150 my ($doc_type) = @_;
151
152 $self->set_metadata_element ($self->get_top_section(),
153 "gsdldoctype",
154 $doc_type);
155}
156
157# returns the source_filename as it was provided
158# the default of "indexed_doc" is used if no document
159# type was provided
160sub get_doc_type {
161 my $self = shift (@_);
162
163 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
164 return $doc_type if (defined $doc_type);
165 return "indexed_doc";
166}
167
168sub _escape_text {
169 my ($text) = @_;
170
171 # special characters in the gml encoding
172 $text =~ s/&/&/g; # this has to be first...
173 $text =~ s/</&lt;/g;
174 $text =~ s/>/&gt;/g;
175 $text =~ s/\"/&quot;/g;
176
177 return $text;
178}
179
180sub buffer_section_xml {
181 my $self = shift (@_);
182 my ($section) = @_;
183
184 my $section_ptr = $self->_lookup_section ($section);
185 return "" unless defined $section_ptr;
186
187 my $all_text = "<Section>\n";
188 $all_text .= " <Description>\n";
189
190 # output metadata
191 foreach my $data (@{$section_ptr->{'metadata'}}) {
192 my $escaped_value = &_escape_text($data->[1]);
193 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
194 }
195
196 $all_text .= " </Description>\n";
197
198 # output the text
199 $all_text .= " <Content>";
200 $all_text .= &_escape_text($section_ptr->{'text'});
201 $all_text .= "</Content>\n";
202
203 # output all the subsections
204 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
205 $all_text .= $self->buffer_section_xml("$section.$subsection");
206 }
207
208 $all_text .= "</Section>\n";
209
210 # make sure no nasty control characters have snuck through
211 # (XML::Parser will barf on anything it doesn't consider to be
212 # valid UTF-8 text, including things like \c@, \cC etc.)
213 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
214
215 return $all_text;
216}
217
218sub output_section {
219 my $self = shift (@_);
220 my ($handle, $section) = @_;
221
222 print $handle $self->buffer_section_xml($section);
223}
224
225# look up the reference to the a particular section
226sub _lookup_section {
227 my $self = shift (@_);
228 my ($section) = @_;
229
230 my ($num);
231 my $sectionref = $self;
232
233 while (defined $section && $section ne "") {
234 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
235 $num =~ s/^0+(\d)/$1/; # remove leading 0s
236 $section = "" unless defined $section;
237
238 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
239 $sectionref = $sectionref->{'subsections'}->{$num};
240 } else {
241 return undef;
242 }
243 }
244
245 return $sectionref;
246}
247
248# calculate OID by hashing the contents of the document
249sub _calc_OID {
250 my $self = shift (@_);
251 my ($filename) = @_;
252
253 my $osexe = &util::get_os_exe();
254
255 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
256 $ENV{'GSDLOS'},"hashfile$osexe");
257 my $result = "NULL";
258
259 if (-e "$hashfile_exe") {
260# $result = `\"$hashfile_exe\" \"$filename\"`;
261 $result = `hashfile$osexe \"$filename\"`;
262 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
263
264 } else {
265 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
266 }
267
268 return "HASH$result";
269}
270
271# methods dealing with OID, not groups of them.
272
273# if $OID is not provided one is calculated
274sub set_OID {
275 my $self = shift (@_);
276 my ($OID) = @_;
277
278 # if an OID wasn't provided claculate one
279 if (!defined $OID) {
280 $OID = "NULL";
281
282 if ($self->{'OIDtype'} eq "incremental") {
283 $OID = "D" . $OIDcount;
284 $OIDcount ++;
285
286 } else {
287 # "hash" OID - feed file to hashfile.exe
288 #my $filename = $self->get_source_filename();
289 # we want to use the converted file for hashing if available
290 # cos its quicker
291 my $filename = $self->get_filename_for_hashing();
292 if (defined($filename) && -e $filename) {
293 $OID = $self->_calc_OID ($filename);
294 } else {
295 $filename = &util::get_tmp_filename();
296 if (!open (OUTFILE, ">$filename")) {
297 print STDERR "doc::set_OID could not write to $filename\n";
298 } else {
299 $self->output_section('OUTFILE', $self->get_top_section(),
300 undef, 1);
301 close (OUTFILE);
302 }
303
304 $OID = $self->_calc_OID ($filename);
305 &util::rm ($filename);
306 }
307 }
308 }
309 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
310}
311
312# this uses hashdoc (embedded c thingy) which is faster but still
313# needs a little work to be suffiently stable
314sub ___set_OID {
315 my $self = shift (@_);
316 my ($OID) = @_;
317
318 # if an OID wasn't provided then calculate hash value based on document
319 if (!defined $OID)
320 {
321 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
322 undef, 1);
323 my $hash_len = length($hash_text);
324
325 $OID = &hashdoc::buffer($hash_text,$hash_len);
326 }
327
328 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
329}
330
331# returns the OID for this document
332sub get_OID {
333 my $self = shift (@_);
334 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
335 return $OID if (defined $OID);
336 return "NULL";
337}
338
339sub delete_OID {
340 my $self = shift (@_);
341
342 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
343}
344
345
346# methods for manipulating section names
347
348# returns the name of the top-most section (the top
349# level of the document
350sub get_top_section {
351 my $self = shift (@_);
352
353 return "";
354}
355
356# returns a section
357sub get_parent_section {
358 my $self = shift (@_);
359 my ($section) = @_;
360
361 $section =~ s/(^|\.)\d+$//;
362
363 return $section;
364}
365
366# returns the first child section (or the end child
367# if there isn't any)
368sub get_begin_child {
369 my $self = shift (@_);
370 my ($section) = @_;
371
372 my $section_ptr = $self->_lookup_section($section);
373 return "" unless defined $section_ptr;
374
375 if (defined $section_ptr->{'subsection_order'}->[0]) {
376 return "$section.$section_ptr->{'subsection_order'}->[0]";
377 }
378
379 return $self->get_end_child ($section);
380}
381
382# returns the next child of a parent section
383sub get_next_child {
384 my $self = shift (@_);
385 my ($section) = @_;
386
387 my $parent_section = $self->get_parent_section($section);
388 my $parent_section_ptr = $self->_lookup_section($parent_section);
389 return undef unless defined $parent_section_ptr;
390
391 my ($section_num) = $section =~ /(\d+)$/;
392 return undef unless defined $section_num;
393
394 my $i = 0;
395 my $section_order = $parent_section_ptr->{'subsection_order'};
396 while ($i < scalar(@$section_order)) {
397 last if $section_order->[$i] eq $section_num;
398 $i++;
399 }
400
401 $i++; # the next child
402 if ($i < scalar(@$section_order)) {
403 return $section_order->[$i] if $parent_section eq "";
404 return "$parent_section.$section_order->[$i]";
405 }
406
407 # no more sections in this level
408 return undef;
409}
410
411# returns a reference to a list of children
412sub get_children {
413 my $self = shift (@_);
414 my ($section) = @_;
415
416 my $section_ptr = $self->_lookup_section($section);
417 return [] unless defined $section_ptr;
418
419 my @children = @{$section_ptr->{'subsection_order'}};
420
421 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
422 return \@children;
423}
424
425# returns the child section one past the last one (which
426# is coded as "0")
427sub get_end_child {
428 my $self = shift (@_);
429 my ($section) = @_;
430
431 return $section . ".0" unless $section eq "";
432 return "0";
433}
434
435# returns the next section in book order
436sub get_next_section {
437 my $self = shift (@_);
438 my ($section) = @_;
439
440 return undef unless defined $section;
441
442 my $section_ptr = $self->_lookup_section($section);
443 return undef unless defined $section_ptr;
444
445 # first try to find first child
446 if (defined $section_ptr->{'subsection_order'}->[0]) {
447 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
448 return "$section.$section_ptr->{'subsection_order'}->[0]";
449 }
450
451 do {
452 # try to find sibling
453 my $next_child = $self->get_next_child ($section);
454 return $next_child if (defined $next_child);
455
456 # move up one level
457 $section = $self->get_parent_section ($section);
458 } while $section =~ /\d/;
459
460 return undef;
461}
462
463sub is_leaf_section {
464 my $self = shift (@_);
465 my ($section) = @_;
466
467 my $section_ptr = $self->_lookup_section($section);
468 return 1 unless defined $section_ptr;
469
470 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
471}
472
473# methods for dealing with sections
474
475# returns the name of the inserted section
476sub insert_section {
477 my $self = shift (@_);
478 my ($before_section) = @_;
479
480 # get the child to insert before and its parent section
481 my $parent_section = "";
482 my $before_child = "0";
483 my @before_section = split (/\./, $before_section);
484 if (scalar(@before_section) > 0) {
485 $before_child = pop (@before_section);
486 $parent_section = join (".", @before_section);
487 }
488
489 my $parent_section_ptr = $self->_lookup_section($parent_section);
490 if (!defined $parent_section_ptr) {
491 print STDERR "doc::insert_section couldn't find parent section " .
492 "$parent_section\n";
493 return;
494 }
495
496 # get the next section number
497 my $section_num = $parent_section_ptr->{'next_subsection'}++;
498
499 my $i = 0;
500 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
501 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
502 $i++;
503 }
504
505 # insert the section number into the order list
506 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
507
508 # add this section to the parent section
509 my $section_ptr = {'subsection_order'=>[],
510 'next_subsection'=>1,
511 'subsections'=>{},
512 'metadata'=>[],
513 'text'=>""};
514 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
515
516 # work out the full section number
517 my $section = $parent_section;
518 $section .= "." unless $section eq "";
519 $section .= $section_num;
520
521 return $section;
522}
523
524# creates a pre-named section
525sub create_named_section {
526 my $self = shift (@_);
527 my ($mastersection) = @_;
528
529 my ($num);
530 my $section = $mastersection;
531 my $sectionref = $self;
532
533 while ($section ne "") {
534 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
535 $num =~ s/^0+(\d)/$1/; # remove leading 0s
536 $section = "" unless defined $section;
537
538 if (defined $num) {
539 if (!defined $sectionref->{'subsections'}->{$num}) {
540 push (@{$sectionref->{'subsection_order'}}, $num);
541 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
542 'next_subsection'=>1,
543 'subsections'=>{},
544 'metadata'=>[],
545 'text'=>""};
546 if ($num >= $sectionref->{'next_subsection'}) {
547 $sectionref->{'next_subsection'} = $num + 1;
548 }
549 }
550 $sectionref = $sectionref->{'subsections'}->{$num};
551
552 } else {
553 print STDERR "doc::create_named_section couldn't create section ";
554 print STDERR "$mastersection\n";
555 last;
556 }
557 }
558}
559
560# returns a reference to a list of subsections
561sub list_subsections {
562 my $self = shift (@_);
563 my ($section) = @_;
564
565 my $section_ptr = $self->_lookup_section ($section);
566 if (!defined $section_ptr) {
567 print STDERR "doc::list_subsections couldn't find section $section\n";
568 return [];
569 }
570
571 return [@{$section_ptr->{'subsection_order'}}];
572}
573
574sub delete_section {
575 my $self = shift (@_);
576 my ($section) = @_;
577
578# my $section_ptr = {'subsection_order'=>[],
579# 'next_subsection'=>1,
580# 'subsections'=>{},
581# 'metadata'=>[],
582# 'text'=>""};
583
584 # if this is the top section reset everything
585 if ($section eq "") {
586 $self->{'subsection_order'} = [];
587 $self->{'subsections'} = {};
588 $self->{'metadata'} = [];
589 $self->{'text'} = "";
590 return;
591 }
592
593 # find the parent of the section to delete
594 my $parent_section = "";
595 my $child = "0";
596 my @section = split (/\./, $section);
597 if (scalar(@section) > 0) {
598 $child = pop (@section);
599 $parent_section = join (".", @section);
600 }
601
602 my $parent_section_ptr = $self->_lookup_section($parent_section);
603 if (!defined $parent_section_ptr) {
604 print STDERR "doc::delete_section couldn't find parent section " .
605 "$parent_section\n";
606 return;
607 }
608
609 # remove this section from the subsection_order list
610 my $i = 0;
611 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
612 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
613 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
614 last;
615 }
616 $i++;
617 }
618
619 # remove this section from the subsection hash
620 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
621 undef $parent_section_ptr->{'subsections'}->{$child};
622 }
623}
624
625#--
626# methods for dealing with metadata
627
628# set_metadata_element and get_metadata_element are for metadata
629# which should only have one value. add_meta_data and get_metadata
630# are for metadata which can have more than one value.
631
632# returns the first metadata value which matches field
633
634# This version of get metadata element works much like the one above,
635# except it allows for the namespace portion of a metadata element to
636# be ignored, thus if you are searching for dc.Title, the first piece
637# of matching metadata ending with the name Title (once any namespace
638# is removed) would be returned.
639# 28-11-2003 John Thompson
640sub get_metadata_element {
641 my $self = shift (@_);
642 my ($section, $field, $ignore_namespace) = @_;
643 my ($data);
644
645 $ignore_namespace = 0 unless defined $ignore_namespace;
646
647 my $section_ptr = $self->_lookup_section($section);
648 if (!defined $section_ptr) {
649 print STDERR "doc::get_metadata_element couldn't find section " .
650 "$section\n";
651 return;
652 }
653
654 # Remove the any namespace if we are being told to ignore them
655 if($ignore_namespace) {
656 $field =~ s/^\w*\.//;
657 }
658
659 foreach $data (@{$section_ptr->{'metadata'}}) {
660
661 my $data_name = $data->[0];
662 # Remove the any namespace if we are being told to ignore them
663 if($ignore_namespace) {
664 $data_name =~ s/^\w*\.//;
665 }
666
667 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
668 }
669
670 return undef; # was not found
671}
672
673# returns a list of the form [value1, value2, ...]
674sub get_metadata {
675 my $self = shift (@_);
676 my ($section, $field, $ignore_namespace) = @_;
677 my ($data);
678
679 $ignore_namespace = 0 unless defined $ignore_namespace;
680
681 my $section_ptr = $self->_lookup_section($section);
682 if (!defined $section_ptr) {
683 print STDERR "doc::get_metadata couldn't find section " .
684 "$section\n";
685 return;
686 }
687
688 # Remove the any namespace if we are being told to ignore them
689 if($ignore_namespace) {
690 $field =~ s/^\w*\.//;
691 }
692
693 my @metadata = ();
694 foreach $data (@{$section_ptr->{'metadata'}}) {
695
696 my $data_name = $data->[0];
697 # Remove the any namespace if we are being told to ignore them
698 if($ignore_namespace) {
699 $data_name =~ s/^\w*\.//;
700 }
701
702 push (@metadata, $data->[1]) if ($data_name eq $field);
703 }
704
705 return \@metadata;
706}
707
708# returns a list of the form [[field,value],[field,value],...]
709sub get_all_metadata {
710 my $self = shift (@_);
711 my ($section) = @_;
712
713 my $section_ptr = $self->_lookup_section($section);
714 if (!defined $section_ptr) {
715 print STDERR "doc::get_all_metadata couldn't find section " .
716 "$section\n";
717 return;
718 }
719
720 return $section_ptr->{'metadata'};
721}
722
723# $value is optional
724sub delete_metadata {
725 my $self = shift (@_);
726 my ($section, $field, $value) = @_;
727
728 my $section_ptr = $self->_lookup_section($section);
729 if (!defined $section_ptr) {
730 print STDERR "doc::delete_metadata couldn't find section " .
731 "$section\n";
732 return;
733 }
734
735 my $i = 0;
736 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
737 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
738 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
739 splice (@{$section_ptr->{'metadata'}}, $i, 1);
740 } else {
741 $i++;
742 }
743 }
744}
745
746sub delete_all_metadata {
747 my $self = shift (@_);
748 my ($section) = @_;
749
750 my $section_ptr = $self->_lookup_section($section);
751 if (!defined $section_ptr) {
752 print STDERR "doc::delete_all_metadata couldn't find section " .
753 "$section\n";
754 return;
755 }
756
757 $section_ptr->{'metadata'} = [];
758}
759
760sub set_metadata_element {
761 my $self = shift (@_);
762 my ($section, $field, $value) = @_;
763
764 $self->set_utf8_metadata_element ($section, $field,
765 &unicode::ascii2utf8(\$value));
766}
767
768# set_utf8_metadata_element assumes the text has already been
769# converted to the UTF-8 encoding.
770sub set_utf8_metadata_element {
771 my $self = shift (@_);
772 my ($section, $field, $value) = @_;
773
774 $self->delete_metadata ($section, $field);
775 $self->add_utf8_metadata ($section, $field, $value);
776}
777
778
779# add_metadata assumes the text is in (extended) ascii form. For
780# text which hash been already converted to the UTF-8 format use
781# add_utf8_metadata.
782sub add_metadata {
783 my $self = shift (@_);
784 my ($section, $field, $value) = @_;
785
786 $self->add_utf8_metadata ($section, $field,
787 &unicode::ascii2utf8(\$value));
788}
789
790# add_utf8_metadata assumes the text has already been converted
791# to the UTF-8 encoding.
792sub add_utf8_metadata {
793 my $self = shift (@_);
794 my ($section, $field, $value) = @_;
795
796 my $section_ptr = $self->_lookup_section($section);
797 if (!defined $section_ptr) {
798 print STDERR "doc::add_utf8_metadata couldn't find section " .
799 "$section\n";
800 return;
801 }
802 if (!defined $value) {
803 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
804 return;
805 }
806 if (!defined $field) {
807 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
808 return;
809 }
810
811 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
812}
813
814
815# methods for dealing with text
816
817# returns the text for a section
818sub get_text {
819 my $self = shift (@_);
820 my ($section) = @_;
821
822 my $section_ptr = $self->_lookup_section($section);
823 if (!defined $section_ptr) {
824 print STDERR "doc::get_text couldn't find section " .
825 "$section\n";
826 return "";
827 }
828
829 return $section_ptr->{'text'};
830}
831
832# returns the (utf-8 encoded) length of the text for a section
833sub get_text_length {
834 my $self = shift (@_);
835 my ($section) = @_;
836
837 my $section_ptr = $self->_lookup_section($section);
838 if (!defined $section_ptr) {
839 print STDERR "doc::get_text_length couldn't find section " .
840 "$section\n";
841 return 0;
842 }
843
844 return length ($section_ptr->{'text'});
845}
846
847sub delete_text {
848 my $self = shift (@_);
849 my ($section) = @_;
850
851 my $section_ptr = $self->_lookup_section($section);
852 if (!defined $section_ptr) {
853 print STDERR "doc::delete_text couldn't find section " .
854 "$section\n";
855 return;
856 }
857
858 $section_ptr->{'text'} = "";
859}
860
861# add_text assumes the text is in (extended) ascii form. For
862# text which has been already converted to the UTF-8 format
863# use add_utf8_text.
864sub add_text {
865 my $self = shift (@_);
866 my ($section, $text) = @_;
867
868 # convert the text to UTF-8 encoded unicode characters
869 # and add the text
870 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
871}
872
873
874# add_utf8_text assumes the text to be added has already
875# been converted to the UTF-8 encoding. For ascii text use
876# add_text
877sub add_utf8_text {
878 my $self = shift (@_);
879 my ($section, $text) = @_;
880
881 my $section_ptr = $self->_lookup_section($section);
882 if (!defined $section_ptr) {
883 print STDERR "doc::add_utf8_text couldn't find section " .
884 "$section\n";
885 return;
886 }
887
888 $section_ptr->{'text'} .= $text;
889}
890
891
892# methods for dealing with associated files
893
894# a file is associated with a document, NOT a section.
895# if section is defined it is noted in the data structure
896# only so that files associated from a particular section
897# may be removed later (using delete_section_assoc_files)
898sub associate_file {
899 my $self = shift (@_);
900 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
901 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
902
903 # remove all associated files with the same name
904 $self->delete_assoc_file ($assoc_filename);
905
906 push (@{$self->{'associated_files'}},
907 [$real_filename, $assoc_filename, $mime_type, $section]);
908}
909
910# returns a list of associated files in the form
911# [[real_filename, assoc_filename, mimetype], ...]
912sub get_assoc_files {
913 my $self = shift (@_);
914
915 return $self->{'associated_files'};
916}
917
918sub delete_section_assoc_files {
919 my $self = shift (@_);
920 my ($section) = @_;
921
922 my $i=0;
923 while ($i < scalar (@{$self->{'associated_files'}})) {
924 if (defined $self->{'associated_files'}->[$i]->[3] &&
925 $self->{'associated_files'}->[$i]->[3] eq $section) {
926 splice (@{$self->{'associated_files'}}, $i, 1);
927 } else {
928 $i++;
929 }
930 }
931}
932
933sub delete_assoc_file {
934 my $self = shift (@_);
935 my ($assoc_filename) = @_;
936
937 my $i=0;
938 while ($i < scalar (@{$self->{'associated_files'}})) {
939 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
940 splice (@{$self->{'associated_files'}}, $i, 1);
941 } else {
942 $i++;
943 }
944 }
945}
946
947sub reset_nextsection_ptr {
948 my $self = shift (@_);
949 my ($section) = @_;
950
951 my $section_ptr = $self->_lookup_section($section);
952 $section_ptr->{'next_subsection'} = 1;
953}
954
9551;
Note: See TracBrowser for help on using the repository browser.