source: trunk/gsdl/perllib/doc.pm@ 1844

Last change on this file since 1844 was 1844, checked in by sjboddie, 23 years ago

Added an 'auto' argument to BasPlug's '-input_encoding' option ('auto' is
now the default instead of 'ascii'). Wihen -input_encoding is 'auto' textcat
is used to work out the language and encoding of each document prior to
processing it. This allows for documents within the same collection to be
in different encodings and all be imported correctly (as long as they're
in an encoding that's supported - notable exceptions at the moment are
Big5 Chinese and any kind of Japanese).
Doing things this way means each document is read in twice at import time,
no doubt slowing things down considerably. You can therefore still set
-input_encoding explicitly if you know that all your documents are a
particular encoding.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.9 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
33}
34
35use unicode;
36use util;
37use ghtml;
38##use hashdoc;
39
40# the document type may be indexed_doc, nonindexed_doc, or
41# classification
42
43sub new {
44 my $class = shift (@_);
45 my ($source_filename, $doc_type) = @_;
46
47 my $self = bless {'associated_files'=>[],
48 'subsection_order'=>[],
49 'next_subsection'=>1,
50 'subsections'=>{},
51 'metadata'=>[],
52 'text'=>""}, $class;
53
54 $self->set_source_filename ($source_filename) if defined $source_filename;
55 $self->set_doc_type ($doc_type) if defined $doc_type;
56
57 return $self;
58}
59
60# clone the $self object
61sub duplicate {
62 my $self = shift (@_);
63
64 my $newobj = {};
65
66 foreach $k (keys %$self) {
67 $newobj->{$k} = &clone ($self->{$k});
68 }
69
70 bless $newobj, ref($self);
71 return $newobj;
72}
73
74sub clone {
75 my ($from) = @_;
76 my $type = ref ($from);
77
78 if ($type eq "HASH") {
79 my $to = {};
80 foreach $key (keys %$from) {
81 $to->{$key} = &clone ($from->{$key});
82 }
83 return $to;
84 } elsif ($type eq "ARRAY") {
85 my $to = [];
86 foreach $v (@$from) {
87 push (@$to, &clone ($v));
88 }
89 return $to;
90 } else {
91 return $from;
92 }
93}
94
95
96sub set_source_filename {
97 my $self = shift (@_);
98 my ($source_filename) = @_;
99
100 $self->set_metadata_element ($self->get_top_section(),
101 "gsdlsourcefilename",
102 $source_filename);
103}
104
105# returns the source_filename as it was provided
106sub get_source_filename {
107 my $self = shift (@_);
108
109 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
110}
111
112sub set_doc_type {
113 my $self = shift (@_);
114 my ($doc_type) = @_;
115
116 $self->set_metadata_element ($self->get_top_section(),
117 "gsdldoctype",
118 $doc_type);
119}
120
121# returns the source_filename as it was provided
122# the default of "indexed_doc" is used if no document
123# type was provided
124sub get_doc_type {
125 my $self = shift (@_);
126
127 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
128 return $doc_type if (defined $doc_type);
129 return "indexed_doc";
130}
131
132sub set_source_encoding {
133 my $self = shift (@_);
134 my ($source_encoding) = @_;
135
136 $self->set_metadata_element ($self->get_top_section(),
137 "gsdlsourceencoding",
138 $source_encoding);
139}
140
141# returns the source_encoding as it was provided
142sub get_source_encoding {
143 my $self = shift (@_);
144
145 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourceencoding");
146}
147
148sub _escape_text {
149 my ($text) = @_;
150
151 # special characters in the gml encoding
152 $text =~ s/&/&/g; # this has to be first...
153 $text =~ s/</&lt;/g;
154 $text =~ s/>/&gt;/g;
155 $text =~ s/\"/&quot;/g;
156
157 return $text;
158}
159
160
161sub buffer_section {
162 my $self = shift (@_);
163 my ($section, $suppress_subject_info) = @_;
164
165 $suppress_subject_info = 0 unless defined $suppress_subject_info;
166 my ($all_text,$data, $subsection);
167
168 my $section_ptr = $self->_lookup_section ($section);
169 my ($section_num) = $section =~ /(\d+)$/;
170
171 return "" unless defined $section_ptr;
172
173 # output the section header (including the section number
174 # and metadata)
175
176 $all_text = "<gsdlsection";
177 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
178 foreach $data (@{$section_ptr->{'metadata'}}) {
179 $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
180 unless $suppress_subject_info && $data->[0] eq "Subject";
181 }
182 $all_text .= ">";
183
184 # output the text
185 $all_text .= &_escape_text($section_ptr->{'text'});
186
187 # output all the subsections
188 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
189 $all_text .= $self->buffer_section("$section.$subsection", $suppress_subject_info);
190 }
191
192 # output the closing tag
193 $all_text .= "</gsdlsection>\n";
194
195 return $all_text;
196}
197
198sub output_section {
199 my $self = shift (@_);
200 my ($handle, $section, $suppress_subject_info) = @_;
201
202 my $all_text = $self->buffer_section($section, $suppress_subject_info);
203 print $handle $all_text;
204}
205
206# look up the reference to the a particular section
207sub _lookup_section {
208 my $self = shift (@_);
209 my ($section) = @_;
210
211 my ($num);
212 my $sectionref = $self;
213
214 while (defined $section && $section ne "") {
215 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
216 $num =~ s/^0+(\d)/$1/; # remove leading 0s
217 $section = "" unless defined $section;
218
219 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
220 $sectionref = $sectionref->{'subsections'}->{$num};
221 } else {
222 return undef;
223 }
224 }
225
226 return $sectionref;
227}
228
229sub _calc_OID {
230 my $self = shift (@_);
231 my ($filename) = @_;
232
233 my $osexe = &util::get_os_exe();
234
235 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
236 $ENV{'GSDLOS'},"hashfile$osexe");
237 my $result = "NULL";
238
239 if (-e "$hashfile_exe") {
240# $result = `\"$hashfile_exe\" \"$filename\"`;
241 $result = `hashfile$osexe \"$filename\"`;
242 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
243
244 } else {
245 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
246 }
247
248 return "HASH$result";
249}
250
251# methods dealing with OID, not groups of them.
252
253# if $OID is not provided one is calculated from hashing the
254# current contents of the document
255# An OID are actually stored as metadata of the document
256sub set_OID {
257 my $self = shift (@_);
258 my ($OID) = @_;
259
260 # if an OID wasn't provided then feed this document to
261 # hashfile.exe
262 if (!defined $OID) {
263 $OID = "NULL";
264
265 my $filename = $self->get_source_filename();
266 if (defined($filename) && -e $filename) {
267
268 $OID = $self->_calc_OID ($filename);
269
270 } else {
271
272 # this warning causes more confusion than it's worth I think
273 # -- sorry Gordon.
274# print STDERR "doc::set_OID source filename undefined/non-existant (continuing)\n";
275
276 $filename = &util::get_tmp_filename();
277 if (!open (OUTFILE, ">$filename")) {
278 print STDERR "doc::set_OID could not write to $filename\n";
279 } else {
280 $self->output_section('OUTFILE', $self->get_top_section(), 1);
281 close (OUTFILE);
282 }
283
284 $OID = $self->_calc_OID ($filename);
285 &util::rm ($filename);
286 }
287 }
288
289 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
290}
291
292# this uses hashdoc (embedded c thingy) which is faster but still
293# needs a little work to be suffiently stable
294sub ___set_OID {
295 my $self = shift (@_);
296 my ($OID) = @_;
297
298 # if an OID wasn't provided then calculate hash value based on document
299 if (!defined $OID)
300 {
301 my $hash_text = $self->buffer_section($self->get_top_section(), 1);
302 my $hash_len = length($hash_text);
303
304 $OID = &hashdoc::buffer($hash_text,$hash_len);
305 }
306
307 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
308}
309
310# returns the OID for this document
311sub get_OID {
312 my $self = shift (@_);
313 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
314 return $OID if (defined $OID);
315 return "NULL";
316}
317
318sub delete_OID {
319 my $self = shift (@_);
320
321 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
322}
323
324
325# methods for manipulating section names
326
327# returns the name of the top-most section (the top
328# level of the document
329sub get_top_section {
330 my $self = shift (@_);
331
332 return "";
333}
334
335# returns a section
336sub get_parent_section {
337 my $self = shift (@_);
338 my ($section) = @_;
339
340 $section =~ s/(^|\.)\d+$//;
341
342 return $section;
343}
344
345# returns the first child section (or the end child
346# if there isn't any)
347sub get_begin_child {
348 my $self = shift (@_);
349 my ($section) = @_;
350
351 my $section_ptr = $self->_lookup_section($section);
352 return "" unless defined $section_ptr;
353
354 if (defined $section_ptr->{'subsection_order'}->[0]) {
355 return "$section.$section_ptr->{'subsection_order'}->[0]";
356 }
357
358 return $self->get_end_child ($section);
359}
360
361# returns the next child of a parent section
362sub get_next_child {
363 my $self = shift (@_);
364 my ($section) = @_;
365
366 my $parent_section = $self->get_parent_section($section);
367 my $parent_section_ptr = $self->_lookup_section($parent_section);
368 return undef unless defined $parent_section_ptr;
369
370 my ($section_num) = $section =~ /(\d+)$/;
371 return undef unless defined $section_num;
372
373 my $i = 0;
374 my $section_order = $parent_section_ptr->{'subsection_order'};
375 while ($i < scalar(@$section_order)) {
376 last if $section_order->[$i] eq $section_num;
377 $i++;
378 }
379
380 $i++; # the next child
381 if ($i < scalar(@$section_order)) {
382 return $section_order->[$i] if $parent_section eq "";
383 return "$parent_section.$section_order->[$i]";
384 }
385
386 # no more sections in this level
387 return undef;
388}
389
390# returns a reference to a list of children
391sub get_children {
392 my $self = shift (@_);
393 my ($section) = @_;
394
395 my $section_ptr = $self->_lookup_section($section);
396 return [] unless defined $section_ptr;
397
398 my @children = @{$section_ptr->{'subsection_order'}};
399
400 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
401 return \@children;
402}
403
404# returns the child section one past the last one (which
405# is coded as "0")
406sub get_end_child {
407 my $self = shift (@_);
408 my ($section) = @_;
409
410 return $section . ".0" unless $section eq "";
411 return "0";
412}
413
414# returns the next section in book order
415sub get_next_section {
416 my $self = shift (@_);
417 my ($section) = @_;
418
419 return undef unless defined $section;
420
421 my $section_ptr = $self->_lookup_section($section);
422 return undef unless defined $section_ptr;
423
424 # first try to find first child
425 if (defined $section_ptr->{'subsection_order'}->[0]) {
426 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
427 return "$section.$section_ptr->{'subsection_order'}->[0]";
428 }
429
430 do {
431 # try to find sibling
432 my $next_child = $self->get_next_child ($section);
433 return $next_child if (defined $next_child);
434
435 # move up one level
436 $section = $self->get_parent_section ($section);
437 } while $section =~ /\d/;
438
439 return undef;
440}
441
442sub is_leaf_section {
443 my $self = shift (@_);
444 my ($section) = @_;
445
446 my $section_ptr = $self->_lookup_section($section);
447 return 1 unless defined $section_ptr;
448
449 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
450}
451
452# methods for dealing with sections
453
454# returns the name of the inserted section
455sub insert_section {
456 my $self = shift (@_);
457 my ($before_section) = @_;
458
459 # get the child to insert before and its parent section
460 my $parent_section = "";
461 my $before_child = "0";
462 my @before_section = split (/\./, $before_section);
463 if (scalar(@before_section) > 0) {
464 $before_child = pop (@before_section);
465 $parent_section = join (".", @before_section);
466 }
467
468 my $parent_section_ptr = $self->_lookup_section($parent_section);
469 if (!defined $parent_section_ptr) {
470 print STDERR "doc::insert_section couldn't find parent section " .
471 "$parent_section\n";
472 return;
473 }
474
475 # get the next section number
476 my $section_num = $parent_section_ptr->{'next_subsection'}++;
477
478 my $i = 0;
479 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
480 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
481 $i++;
482 }
483
484 # insert the section number into the order list
485 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
486
487 # add this section to the parent section
488 my $section_ptr = {'subsection_order'=>[],
489 'next_subsection'=>1,
490 'subsections'=>{},
491 'metadata'=>[],
492 'text'=>""};
493 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
494
495 # work out the full section number
496 my $section = $parent_section;
497 $section .= "." unless $section eq "";
498 $section .= $section_num;
499
500 return $section;
501}
502
503# creates a pre-named section
504sub create_named_section {
505 my $self = shift (@_);
506 my ($mastersection) = @_;
507
508 my ($num);
509 my $section = $mastersection;
510 my $sectionref = $self;
511
512#### print STDERR "*** mastersection = $mastersection\n";
513
514 while ($section ne "") {
515 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
516 $num =~ s/^0+(\d)/$1/; # remove leading 0s
517 $section = "" unless defined $section;
518
519 if (defined $num) {
520 if (!defined $sectionref->{'subsections'}->{$num}) {
521 push (@{$sectionref->{'subsection_order'}}, $num);
522 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
523 'next_subsection'=>1,
524 'subsections'=>{},
525 'metadata'=>[],
526 'text'=>""};
527 if ($num >= $sectionref->{'next_subsection'}) {
528 $sectionref->{'next_subsection'} = $num + 1;
529 }
530 }
531 $sectionref = $sectionref->{'subsections'}->{$num};
532
533 } else {
534 print STDERR "doc::create_named_section couldn't create section ";
535 print STDERR "$mastersection\n";
536 last;
537 }
538 }
539}
540
541# returns a reference to a list of subsections
542sub list_subsections {
543 my $self = shift (@_);
544 my ($section) = @_;
545
546 my $section_ptr = $self->_lookup_section ($section);
547 if (!defined $section_ptr) {
548 print STDERR "doc::list_subsections couldn't find section $section\n";
549 return [];
550 }
551
552 return [@{$section_ptr->{'subsection_order'}}];
553}
554
555sub delete_section {
556 my $self = shift (@_);
557 my ($section) = @_;
558
559# my $section_ptr = {'subsection_order'=>[],
560# 'next_subsection'=>1,
561# 'subsections'=>{},
562# 'metadata'=>[],
563# 'text'=>""};
564
565 # if this is the top section reset everything
566 if ($section eq "") {
567 $self->{'subsection_order'} = [];
568 $self->{'subsections'} = {};
569 $self->{'metadata'} = [];
570 $self->{'text'} = "";
571 return;
572 }
573
574 # find the parent of the section to delete
575 my $parent_section = "";
576 my $child = "0";
577 my @section = split (/\./, $section);
578 if (scalar(@section) > 0) {
579 $child = pop (@section);
580 $parent_section = join (".", @section);
581 }
582
583 my $parent_section_ptr = $self->_lookup_section($parent_section);
584 if (!defined $parent_section_ptr) {
585 print STDERR "doc::delete_section couldn't find parent section " .
586 "$parent_section\n";
587 return;
588 }
589
590 # remove this section from the subsection_order list
591 my $i = 0;
592 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
593 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
594 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
595 last;
596 }
597 $i++;
598 }
599
600 # remove this section from the subsection hash
601 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
602 undef $parent_section_ptr->{'subsections'}->{$child};
603 }
604}
605
606#--
607# methods for dealing with metadata
608
609# set_metadata_element and get_metadata_element are for metadata
610# which should only have one value. add_meta_data and get_metadata
611# are for metadata which can have more than one value.
612
613# returns the first metadata value which matches field
614sub get_metadata_element {
615 my $self = shift (@_);
616 my ($section, $field) = @_;
617 my ($data);
618
619 my $section_ptr = $self->_lookup_section($section);
620 if (!defined $section_ptr) {
621 print STDERR "doc::get_metadata_element couldn't find section " .
622 "$section\n";
623 return;
624 }
625
626 foreach $data (@{$section_ptr->{'metadata'}}) {
627 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
628 }
629
630 return undef; # was not found
631}
632
633
634# returns a list of the form [value1, value2, ...]
635sub get_metadata {
636 my $self = shift (@_);
637 my ($section, $field) = @_;
638 my ($data);
639
640 my $section_ptr = $self->_lookup_section($section);
641 if (!defined $section_ptr) {
642 print STDERR "doc::get_metadata couldn't find section " .
643 "$section\n";
644 return;
645 }
646
647 my @metadata = ();
648 foreach $data (@{$section_ptr->{'metadata'}}) {
649 push (@metadata, $data->[1]) if ($data->[0] eq $field);
650 }
651
652 return \@metadata;
653}
654
655# returns a list of the form [[field,value],[field,value],...]
656sub get_all_metadata {
657 my $self = shift (@_);
658 my ($section) = @_;
659
660 my $section_ptr = $self->_lookup_section($section);
661 if (!defined $section_ptr) {
662 print STDERR "doc::get_all_metadata couldn't find section " .
663 "$section\n";
664 return;
665 }
666
667 return $section_ptr->{'metadata'};
668}
669
670# $value is optional
671sub delete_metadata {
672 my $self = shift (@_);
673 my ($section, $field, $value) = @_;
674
675 my $section_ptr = $self->_lookup_section($section);
676 if (!defined $section_ptr) {
677 print STDERR "doc::delete_metadata couldn't find section " .
678 "$section\n";
679 return;
680 }
681
682 my $i = 0;
683 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
684 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
685 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
686 splice (@{$section_ptr->{'metadata'}}, $i, 1);
687 } else {
688 $i++;
689 }
690 }
691}
692
693sub delete_all_metadata {
694 my $self = shift (@_);
695 my ($section) = @_;
696
697 my $section_ptr = $self->_lookup_section($section);
698 if (!defined $section_ptr) {
699 print STDERR "doc::delete_all_metadata couldn't find section " .
700 "$section\n";
701 return;
702 }
703
704 $section_ptr->{'metadata'} = [];
705}
706
707sub set_metadata_element {
708 my $self = shift (@_);
709 my ($section, $field, $value) = @_;
710
711 $self->set_utf8_metadata_element ($section, $field,
712 &unicode::ascii2utf8($value));
713}
714
715# set_utf8_metadata_element assumes the text has already been
716# converted to the UTF-8 encoding.
717sub set_utf8_metadata_element {
718 my $self = shift (@_);
719 my ($section, $field, $value) = @_;
720
721 $self->delete_metadata ($section, $field);
722 $self->add_utf8_metadata ($section, $field, $value);
723}
724
725
726# add_metadata assumes the text is in (extended) ascii form. For
727# text which hash been already converted to the UTF-8 format use
728# add_utf8_metadata.
729sub add_metadata {
730 my $self = shift (@_);
731 my ($section, $field, $value) = @_;
732
733 $self->add_utf8_metadata ($section, $field,
734 &unicode::ascii2utf8($value));
735}
736
737# add_utf8_metadata assumes the text has already been converted
738# to the UTF-8 encoding.
739sub add_utf8_metadata {
740 my $self = shift (@_);
741 my ($section, $field, $value) = @_;
742
743 my $section_ptr = $self->_lookup_section($section);
744 if (!defined $section_ptr) {
745 print STDERR "doc::add_utf8_metadata couldn't find section " .
746 "$section\n";
747 return;
748 }
749 if (!defined $value) {
750 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
751 return;
752 }
753 if (!defined $field) {
754 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
755 return;
756 }
757
758 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
759}
760
761
762# methods for dealing with text
763
764# returns the text for a section
765sub get_text {
766 my $self = shift (@_);
767 my ($section) = @_;
768
769 my $section_ptr = $self->_lookup_section($section);
770 if (!defined $section_ptr) {
771 print STDERR "doc::get_text couldn't find section " .
772 "$section\n";
773 return "";
774 }
775
776 return $section_ptr->{'text'};
777}
778
779# returns the (utf-8 encoded) length of the text for a section
780sub get_text_length {
781 my $self = shift (@_);
782 my ($section) = @_;
783
784 my $section_ptr = $self->_lookup_section($section);
785 if (!defined $section_ptr) {
786 print STDERR "doc::get_text_length couldn't find section " .
787 "$section\n";
788 return 0;
789 }
790
791 return length ($section_ptr->{'text'});
792}
793
794sub delete_text {
795 my $self = shift (@_);
796 my ($section) = @_;
797
798 my $section_ptr = $self->_lookup_section($section);
799 if (!defined $section_ptr) {
800 print STDERR "doc::delete_text couldn't find section " .
801 "$section\n";
802 return;
803 }
804
805 $section_ptr->{'text'} = "";
806}
807
808# add_text assumes the text is in (extended) ascii form. For
809# text which has been already converted to the UTF-8 format
810# use add_utf8_text.
811sub add_text {
812 my $self = shift (@_);
813 my ($section, $text) = @_;
814
815 # convert the text to UTF-8 encoded unicode characters
816 # and add the text
817 $self->add_utf8_text($section, &unicode::ascii2utf8($text));
818}
819
820
821# add_utf8_text assumes the text to be added has already
822# been converted to the UTF-8 encoding. For ascii text use
823# add_text
824sub add_utf8_text {
825 my $self = shift (@_);
826 my ($section, $text) = @_;
827
828 my $section_ptr = $self->_lookup_section($section);
829 if (!defined $section_ptr) {
830 print STDERR "doc::add_utf8_text couldn't find section " .
831 "$section\n";
832 return;
833 }
834
835 $section_ptr->{'text'} .= $text;
836}
837
838
839# methods for dealing with associated files
840
841# a file is associated with a document, NOT a section.
842# if section is defined it is noted in the data structure
843# only so that files associated from a particular section
844# may be removed later (using delete_section_assoc_files)
845sub associate_file {
846 my $self = shift (@_);
847 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
848 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
849
850 # remove all associated files with the same name
851 $self->delete_assoc_file ($assoc_filename);
852
853 push (@{$self->{'associated_files'}},
854 [$real_filename, $assoc_filename, $mime_type, $section]);
855}
856
857# returns a list of associated files in the form
858# [[real_filename, assoc_filename, mimetype], ...]
859sub get_assoc_files {
860 my $self = shift (@_);
861
862 return $self->{'associated_files'};
863}
864
865sub delete_section_assoc_files {
866 my $self = shift (@_);
867 my ($section) = @_;
868
869 my $i=0;
870 while ($i < scalar (@{$self->{'associated_files'}})) {
871 if (defined $self->{'associated_files'}->[$i]->[3] &&
872 $self->{'associated_files'}->[$i]->[3] eq $section) {
873 splice (@{$self->{'associated_files'}}, $i, 1);
874 } else {
875 $i++;
876 }
877 }
878}
879
880sub delete_assoc_file {
881 my $self = shift (@_);
882 my ($assoc_filename) = @_;
883
884 my $i=0;
885 while ($i < scalar (@{$self->{'associated_files'}})) {
886 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
887 splice (@{$self->{'associated_files'}}, $i, 1);
888 } else {
889 $i++;
890 }
891 }
892}
893
894sub reset_nextsection_ptr {
895 my $self = shift (@_);
896 my ($section) = @_;
897
898 my $section_ptr = $self->_lookup_section($section);
899 $section_ptr->{'next_subsection'} = 1;
900}
901
9021;
Note: See TracBrowser for help on using the repository browser.