source: trunk/gsdl/perllib/doc.pm@ 6111

Last change on this file since 6111 was 6111, checked in by jmt12, 20 years ago

Changed the description for the -metadata flag to foreshadow the coming enhancement. This commit also happens to include the prototype -ignore_arguments flag to AZList, that will never actually be used because of the aforementioned metadata enhancement.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.6 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39##use hashdoc;
40
41# the document type may be indexed_doc, nonindexed_doc, or
42# classification
43
44my $OIDcount = 0;
45
46sub new {
47 my $class = shift (@_);
48 my ($source_filename, $doc_type) = @_;
49
50 my $self = bless {'associated_files'=>[],
51 'subsection_order'=>[],
52 'next_subsection'=>1,
53 'subsections'=>{},
54 'metadata'=>[],
55 'text'=>"",
56 'OIDtype'=>"hash"}, $class;
57
58 $self->set_source_filename ($source_filename) if defined $source_filename;
59 $self->set_doc_type ($doc_type) if defined $doc_type;
60
61 return $self;
62}
63
64# clone the $self object
65sub duplicate {
66 my $self = shift (@_);
67
68 my $newobj = {};
69
70 foreach $k (keys %$self) {
71 $newobj->{$k} = &clone ($self->{$k});
72 }
73
74 bless $newobj, ref($self);
75 return $newobj;
76}
77
78sub clone {
79 my ($from) = @_;
80 my $type = ref ($from);
81
82 if ($type eq "HASH") {
83 my $to = {};
84 foreach $key (keys %$from) {
85 $to->{$key} = &clone ($from->{$key});
86 }
87 return $to;
88 } elsif ($type eq "ARRAY") {
89 my $to = [];
90 foreach $v (@$from) {
91 push (@$to, &clone ($v));
92 }
93 return $to;
94 } else {
95 return $from;
96 }
97}
98
99sub set_OIDtype {
100 my $self = shift (@_);
101 my ($type) = @_;
102
103 if ($type eq "incremental") {
104 $self->{'OIDtype'} = $type;
105 } else {
106 $self->{'OIDtype'} = "hash";
107 }
108}
109
110sub set_source_filename {
111 my $self = shift (@_);
112 my ($source_filename) = @_;
113
114 $self->set_metadata_element ($self->get_top_section(),
115 "gsdlsourcefilename",
116 $source_filename);
117}
118
119# returns the source_filename as it was provided
120sub get_source_filename {
121 my $self = shift (@_);
122
123 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
124}
125
126sub set_doc_type {
127 my $self = shift (@_);
128 my ($doc_type) = @_;
129
130 $self->set_metadata_element ($self->get_top_section(),
131 "gsdldoctype",
132 $doc_type);
133}
134
135# returns the source_filename as it was provided
136# the default of "indexed_doc" is used if no document
137# type was provided
138sub get_doc_type {
139 my $self = shift (@_);
140
141 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
142 return $doc_type if (defined $doc_type);
143 return "indexed_doc";
144}
145
146sub _escape_text {
147 my ($text) = @_;
148
149 # special characters in the gml encoding
150 $text =~ s/&/&/g; # this has to be first...
151 $text =~ s/</&lt;/g;
152 $text =~ s/>/&gt;/g;
153 $text =~ s/\"/&quot;/g;
154
155 return $text;
156}
157
158sub buffer_section_xml {
159 my $self = shift (@_);
160 my ($section) = @_;
161
162 my $section_ptr = $self->_lookup_section ($section);
163 return "" unless defined $section_ptr;
164
165 my $all_text = "<Section>\n";
166 $all_text .= " <Description>\n";
167
168 # output metadata
169 foreach my $data (@{$section_ptr->{'metadata'}}) {
170 my $escaped_value = &_escape_text($data->[1]);
171 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
172 }
173
174 $all_text .= " </Description>\n";
175
176 # output the text
177 $all_text .= " <Content>";
178 $all_text .= &_escape_text($section_ptr->{'text'});
179 $all_text .= "</Content>\n";
180
181 # output all the subsections
182 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
183 $all_text .= $self->buffer_section_xml("$section.$subsection");
184 }
185
186 $all_text .= "</Section>\n";
187
188 # make sure no nasty control characters have snuck through
189 # (XML::Parser will barf on anything it doesn't consider to be
190 # valid UTF-8 text, including things like \c@, \cC etc.)
191 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
192
193 return $all_text;
194}
195
196sub output_section {
197 my $self = shift (@_);
198 my ($handle, $section) = @_;
199
200 print $handle $self->buffer_section_xml($section);
201}
202
203# look up the reference to the a particular section
204sub _lookup_section {
205 my $self = shift (@_);
206 my ($section) = @_;
207
208 my ($num);
209 my $sectionref = $self;
210
211 while (defined $section && $section ne "") {
212 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
213 $num =~ s/^0+(\d)/$1/; # remove leading 0s
214 $section = "" unless defined $section;
215
216 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
217 $sectionref = $sectionref->{'subsections'}->{$num};
218 } else {
219 return undef;
220 }
221 }
222
223 return $sectionref;
224}
225
226# calculate OID by hashing the contents of the document
227sub _calc_OID {
228 my $self = shift (@_);
229 my ($filename) = @_;
230
231 my $osexe = &util::get_os_exe();
232
233 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
234 $ENV{'GSDLOS'},"hashfile$osexe");
235 my $result = "NULL";
236
237 if (-e "$hashfile_exe") {
238# $result = `\"$hashfile_exe\" \"$filename\"`;
239 $result = `hashfile$osexe \"$filename\"`;
240 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
241
242 } else {
243 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
244 }
245
246 return "HASH$result";
247}
248
249# methods dealing with OID, not groups of them.
250
251# if $OID is not provided one is calculated
252sub set_OID {
253 my $self = shift (@_);
254 my ($OID) = @_;
255
256 # if an OID wasn't provided claculate one
257 if (!defined $OID) {
258 $OID = "NULL";
259
260 if ($self->{'OIDtype'} eq "incremental") {
261 $OID = "D" . $OIDcount;
262 $OIDcount ++;
263
264 } else {
265 # "hash" OID - feed file to hashfile.exe
266 my $filename = $self->get_source_filename();
267 if (defined($filename) && -e $filename) {
268
269 $OID = $self->_calc_OID ($filename);
270
271 } else {
272 $filename = &util::get_tmp_filename();
273 if (!open (OUTFILE, ">$filename")) {
274 print STDERR "doc::set_OID could not write to $filename\n";
275 } else {
276 $self->output_section('OUTFILE', $self->get_top_section(),
277 undef, 1);
278 close (OUTFILE);
279 }
280
281 $OID = $self->_calc_OID ($filename);
282 &util::rm ($filename);
283 }
284 }
285 }
286 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
287}
288
289# this uses hashdoc (embedded c thingy) which is faster but still
290# needs a little work to be suffiently stable
291sub ___set_OID {
292 my $self = shift (@_);
293 my ($OID) = @_;
294
295 # if an OID wasn't provided then calculate hash value based on document
296 if (!defined $OID)
297 {
298 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
299 undef, 1);
300 my $hash_len = length($hash_text);
301
302 $OID = &hashdoc::buffer($hash_text,$hash_len);
303 }
304
305 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
306}
307
308# returns the OID for this document
309sub get_OID {
310 my $self = shift (@_);
311 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
312 return $OID if (defined $OID);
313 return "NULL";
314}
315
316sub delete_OID {
317 my $self = shift (@_);
318
319 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
320}
321
322
323# methods for manipulating section names
324
325# returns the name of the top-most section (the top
326# level of the document
327sub get_top_section {
328 my $self = shift (@_);
329
330 return "";
331}
332
333# returns a section
334sub get_parent_section {
335 my $self = shift (@_);
336 my ($section) = @_;
337
338 $section =~ s/(^|\.)\d+$//;
339
340 return $section;
341}
342
343# returns the first child section (or the end child
344# if there isn't any)
345sub get_begin_child {
346 my $self = shift (@_);
347 my ($section) = @_;
348
349 my $section_ptr = $self->_lookup_section($section);
350 return "" unless defined $section_ptr;
351
352 if (defined $section_ptr->{'subsection_order'}->[0]) {
353 return "$section.$section_ptr->{'subsection_order'}->[0]";
354 }
355
356 return $self->get_end_child ($section);
357}
358
359# returns the next child of a parent section
360sub get_next_child {
361 my $self = shift (@_);
362 my ($section) = @_;
363
364 my $parent_section = $self->get_parent_section($section);
365 my $parent_section_ptr = $self->_lookup_section($parent_section);
366 return undef unless defined $parent_section_ptr;
367
368 my ($section_num) = $section =~ /(\d+)$/;
369 return undef unless defined $section_num;
370
371 my $i = 0;
372 my $section_order = $parent_section_ptr->{'subsection_order'};
373 while ($i < scalar(@$section_order)) {
374 last if $section_order->[$i] eq $section_num;
375 $i++;
376 }
377
378 $i++; # the next child
379 if ($i < scalar(@$section_order)) {
380 return $section_order->[$i] if $parent_section eq "";
381 return "$parent_section.$section_order->[$i]";
382 }
383
384 # no more sections in this level
385 return undef;
386}
387
388# returns a reference to a list of children
389sub get_children {
390 my $self = shift (@_);
391 my ($section) = @_;
392
393 my $section_ptr = $self->_lookup_section($section);
394 return [] unless defined $section_ptr;
395
396 my @children = @{$section_ptr->{'subsection_order'}};
397
398 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
399 return \@children;
400}
401
402# returns the child section one past the last one (which
403# is coded as "0")
404sub get_end_child {
405 my $self = shift (@_);
406 my ($section) = @_;
407
408 return $section . ".0" unless $section eq "";
409 return "0";
410}
411
412# returns the next section in book order
413sub get_next_section {
414 my $self = shift (@_);
415 my ($section) = @_;
416
417 return undef unless defined $section;
418
419 my $section_ptr = $self->_lookup_section($section);
420 return undef unless defined $section_ptr;
421
422 # first try to find first child
423 if (defined $section_ptr->{'subsection_order'}->[0]) {
424 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
425 return "$section.$section_ptr->{'subsection_order'}->[0]";
426 }
427
428 do {
429 # try to find sibling
430 my $next_child = $self->get_next_child ($section);
431 return $next_child if (defined $next_child);
432
433 # move up one level
434 $section = $self->get_parent_section ($section);
435 } while $section =~ /\d/;
436
437 return undef;
438}
439
440sub is_leaf_section {
441 my $self = shift (@_);
442 my ($section) = @_;
443
444 my $section_ptr = $self->_lookup_section($section);
445 return 1 unless defined $section_ptr;
446
447 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
448}
449
450# methods for dealing with sections
451
452# returns the name of the inserted section
453sub insert_section {
454 my $self = shift (@_);
455 my ($before_section) = @_;
456
457 # get the child to insert before and its parent section
458 my $parent_section = "";
459 my $before_child = "0";
460 my @before_section = split (/\./, $before_section);
461 if (scalar(@before_section) > 0) {
462 $before_child = pop (@before_section);
463 $parent_section = join (".", @before_section);
464 }
465
466 my $parent_section_ptr = $self->_lookup_section($parent_section);
467 if (!defined $parent_section_ptr) {
468 print STDERR "doc::insert_section couldn't find parent section " .
469 "$parent_section\n";
470 return;
471 }
472
473 # get the next section number
474 my $section_num = $parent_section_ptr->{'next_subsection'}++;
475
476 my $i = 0;
477 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
478 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
479 $i++;
480 }
481
482 # insert the section number into the order list
483 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
484
485 # add this section to the parent section
486 my $section_ptr = {'subsection_order'=>[],
487 'next_subsection'=>1,
488 'subsections'=>{},
489 'metadata'=>[],
490 'text'=>""};
491 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
492
493 # work out the full section number
494 my $section = $parent_section;
495 $section .= "." unless $section eq "";
496 $section .= $section_num;
497
498 return $section;
499}
500
501# creates a pre-named section
502sub create_named_section {
503 my $self = shift (@_);
504 my ($mastersection) = @_;
505
506 my ($num);
507 my $section = $mastersection;
508 my $sectionref = $self;
509
510 while ($section ne "") {
511 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
512 $num =~ s/^0+(\d)/$1/; # remove leading 0s
513 $section = "" unless defined $section;
514
515 if (defined $num) {
516 if (!defined $sectionref->{'subsections'}->{$num}) {
517 push (@{$sectionref->{'subsection_order'}}, $num);
518 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
519 'next_subsection'=>1,
520 'subsections'=>{},
521 'metadata'=>[],
522 'text'=>""};
523 if ($num >= $sectionref->{'next_subsection'}) {
524 $sectionref->{'next_subsection'} = $num + 1;
525 }
526 }
527 $sectionref = $sectionref->{'subsections'}->{$num};
528
529 } else {
530 print STDERR "doc::create_named_section couldn't create section ";
531 print STDERR "$mastersection\n";
532 last;
533 }
534 }
535}
536
537# returns a reference to a list of subsections
538sub list_subsections {
539 my $self = shift (@_);
540 my ($section) = @_;
541
542 my $section_ptr = $self->_lookup_section ($section);
543 if (!defined $section_ptr) {
544 print STDERR "doc::list_subsections couldn't find section $section\n";
545 return [];
546 }
547
548 return [@{$section_ptr->{'subsection_order'}}];
549}
550
551sub delete_section {
552 my $self = shift (@_);
553 my ($section) = @_;
554
555# my $section_ptr = {'subsection_order'=>[],
556# 'next_subsection'=>1,
557# 'subsections'=>{},
558# 'metadata'=>[],
559# 'text'=>""};
560
561 # if this is the top section reset everything
562 if ($section eq "") {
563 $self->{'subsection_order'} = [];
564 $self->{'subsections'} = {};
565 $self->{'metadata'} = [];
566 $self->{'text'} = "";
567 return;
568 }
569
570 # find the parent of the section to delete
571 my $parent_section = "";
572 my $child = "0";
573 my @section = split (/\./, $section);
574 if (scalar(@section) > 0) {
575 $child = pop (@section);
576 $parent_section = join (".", @section);
577 }
578
579 my $parent_section_ptr = $self->_lookup_section($parent_section);
580 if (!defined $parent_section_ptr) {
581 print STDERR "doc::delete_section couldn't find parent section " .
582 "$parent_section\n";
583 return;
584 }
585
586 # remove this section from the subsection_order list
587 my $i = 0;
588 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
589 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
590 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
591 last;
592 }
593 $i++;
594 }
595
596 # remove this section from the subsection hash
597 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
598 undef $parent_section_ptr->{'subsections'}->{$child};
599 }
600}
601
602#--
603# methods for dealing with metadata
604
605# set_metadata_element and get_metadata_element are for metadata
606# which should only have one value. add_meta_data and get_metadata
607# are for metadata which can have more than one value.
608
609# returns the first metadata value which matches field
610
611# This version of get metadata element works much like the one above,
612# except it allows for the namespace portion of a metadata element to
613# be ignored, thus if you are searching for dc.Title, the first piece
614# of matching metadata ending with the name Title (once any namespace
615# is removed) would be returned.
616# 28-11-2003 John Thompson
617sub get_metadata_element {
618 my $self = shift (@_);
619 my ($section, $field, $ignore_namespace) = @_;
620 my ($data);
621
622 $ignore_namespace = 0 unless defined $ignore_namespace;
623
624 my $section_ptr = $self->_lookup_section($section);
625 if (!defined $section_ptr) {
626 print STDERR "doc::get_metadata_element couldn't find section " .
627 "$section\n";
628 return;
629 }
630
631 # Remove the any namespace if we are being told to ignore them
632 if($ignore_namespace) {
633 $field =~ s/^\w*\.//;
634 }
635
636 foreach $data (@{$section_ptr->{'metadata'}}) {
637
638 my $data_name = $data->[0];
639 # Remove the any namespace if we are being told to ignore them
640 if($ignore_namespace) {
641 $data_name =~ s/^\w*\.//;
642 }
643
644 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
645 }
646
647 return undef; # was not found
648}
649
650# returns a list of the form [value1, value2, ...]
651sub get_metadata {
652 my $self = shift (@_);
653 my ($section, $field, $ignore_namespace) = @_;
654 my ($data);
655
656 $ignore_namespace = 0 unless defined $ignore_namespace;
657
658 my $section_ptr = $self->_lookup_section($section);
659 if (!defined $section_ptr) {
660 print STDERR "doc::get_metadata couldn't find section " .
661 "$section\n";
662 return;
663 }
664
665 # Remove the any namespace if we are being told to ignore them
666 if($ignore_namespace) {
667 $field =~ s/^\w*\.//;
668 }
669
670 my @metadata = ();
671 foreach $data (@{$section_ptr->{'metadata'}}) {
672
673 my $data_name = $data->[0];
674 # Remove the any namespace if we are being told to ignore them
675 if($ignore_namespace) {
676 $data_name =~ s/^\w*\.//;
677 }
678
679 push (@metadata, $data->[1]) if ($data_name eq $field);
680 }
681
682 return \@metadata;
683}
684
685# returns a list of the form [[field,value],[field,value],...]
686sub get_all_metadata {
687 my $self = shift (@_);
688 my ($section) = @_;
689
690 my $section_ptr = $self->_lookup_section($section);
691 if (!defined $section_ptr) {
692 print STDERR "doc::get_all_metadata couldn't find section " .
693 "$section\n";
694 return;
695 }
696
697 return $section_ptr->{'metadata'};
698}
699
700# $value is optional
701sub delete_metadata {
702 my $self = shift (@_);
703 my ($section, $field, $value) = @_;
704
705 my $section_ptr = $self->_lookup_section($section);
706 if (!defined $section_ptr) {
707 print STDERR "doc::delete_metadata couldn't find section " .
708 "$section\n";
709 return;
710 }
711
712 my $i = 0;
713 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
714 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
715 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
716 splice (@{$section_ptr->{'metadata'}}, $i, 1);
717 } else {
718 $i++;
719 }
720 }
721}
722
723sub delete_all_metadata {
724 my $self = shift (@_);
725 my ($section) = @_;
726
727 my $section_ptr = $self->_lookup_section($section);
728 if (!defined $section_ptr) {
729 print STDERR "doc::delete_all_metadata couldn't find section " .
730 "$section\n";
731 return;
732 }
733
734 $section_ptr->{'metadata'} = [];
735}
736
737sub set_metadata_element {
738 my $self = shift (@_);
739 my ($section, $field, $value) = @_;
740
741 $self->set_utf8_metadata_element ($section, $field,
742 &unicode::ascii2utf8(\$value));
743}
744
745# set_utf8_metadata_element assumes the text has already been
746# converted to the UTF-8 encoding.
747sub set_utf8_metadata_element {
748 my $self = shift (@_);
749 my ($section, $field, $value) = @_;
750
751 $self->delete_metadata ($section, $field);
752 $self->add_utf8_metadata ($section, $field, $value);
753}
754
755
756# add_metadata assumes the text is in (extended) ascii form. For
757# text which hash been already converted to the UTF-8 format use
758# add_utf8_metadata.
759sub add_metadata {
760 my $self = shift (@_);
761 my ($section, $field, $value) = @_;
762
763 $self->add_utf8_metadata ($section, $field,
764 &unicode::ascii2utf8(\$value));
765}
766
767# add_utf8_metadata assumes the text has already been converted
768# to the UTF-8 encoding.
769sub add_utf8_metadata {
770 my $self = shift (@_);
771 my ($section, $field, $value) = @_;
772
773 my $section_ptr = $self->_lookup_section($section);
774 if (!defined $section_ptr) {
775 print STDERR "doc::add_utf8_metadata couldn't find section " .
776 "$section\n";
777 return;
778 }
779 if (!defined $value) {
780 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
781 return;
782 }
783 if (!defined $field) {
784 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
785 return;
786 }
787
788 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
789}
790
791
792# methods for dealing with text
793
794# returns the text for a section
795sub get_text {
796 my $self = shift (@_);
797 my ($section) = @_;
798
799 my $section_ptr = $self->_lookup_section($section);
800 if (!defined $section_ptr) {
801 print STDERR "doc::get_text couldn't find section " .
802 "$section\n";
803 return "";
804 }
805
806 return $section_ptr->{'text'};
807}
808
809# returns the (utf-8 encoded) length of the text for a section
810sub get_text_length {
811 my $self = shift (@_);
812 my ($section) = @_;
813
814 my $section_ptr = $self->_lookup_section($section);
815 if (!defined $section_ptr) {
816 print STDERR "doc::get_text_length couldn't find section " .
817 "$section\n";
818 return 0;
819 }
820
821 return length ($section_ptr->{'text'});
822}
823
824sub delete_text {
825 my $self = shift (@_);
826 my ($section) = @_;
827
828 my $section_ptr = $self->_lookup_section($section);
829 if (!defined $section_ptr) {
830 print STDERR "doc::delete_text couldn't find section " .
831 "$section\n";
832 return;
833 }
834
835 $section_ptr->{'text'} = "";
836}
837
838# add_text assumes the text is in (extended) ascii form. For
839# text which has been already converted to the UTF-8 format
840# use add_utf8_text.
841sub add_text {
842 my $self = shift (@_);
843 my ($section, $text) = @_;
844
845 # convert the text to UTF-8 encoded unicode characters
846 # and add the text
847 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
848}
849
850
851# add_utf8_text assumes the text to be added has already
852# been converted to the UTF-8 encoding. For ascii text use
853# add_text
854sub add_utf8_text {
855 my $self = shift (@_);
856 my ($section, $text) = @_;
857
858 my $section_ptr = $self->_lookup_section($section);
859 if (!defined $section_ptr) {
860 print STDERR "doc::add_utf8_text couldn't find section " .
861 "$section\n";
862 return;
863 }
864
865 $section_ptr->{'text'} .= $text;
866}
867
868
869# methods for dealing with associated files
870
871# a file is associated with a document, NOT a section.
872# if section is defined it is noted in the data structure
873# only so that files associated from a particular section
874# may be removed later (using delete_section_assoc_files)
875sub associate_file {
876 my $self = shift (@_);
877 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
878 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
879
880 # remove all associated files with the same name
881 $self->delete_assoc_file ($assoc_filename);
882
883 push (@{$self->{'associated_files'}},
884 [$real_filename, $assoc_filename, $mime_type, $section]);
885}
886
887# returns a list of associated files in the form
888# [[real_filename, assoc_filename, mimetype], ...]
889sub get_assoc_files {
890 my $self = shift (@_);
891
892 return $self->{'associated_files'};
893}
894
895sub delete_section_assoc_files {
896 my $self = shift (@_);
897 my ($section) = @_;
898
899 my $i=0;
900 while ($i < scalar (@{$self->{'associated_files'}})) {
901 if (defined $self->{'associated_files'}->[$i]->[3] &&
902 $self->{'associated_files'}->[$i]->[3] eq $section) {
903 splice (@{$self->{'associated_files'}}, $i, 1);
904 } else {
905 $i++;
906 }
907 }
908}
909
910sub delete_assoc_file {
911 my $self = shift (@_);
912 my ($assoc_filename) = @_;
913
914 my $i=0;
915 while ($i < scalar (@{$self->{'associated_files'}})) {
916 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
917 splice (@{$self->{'associated_files'}}, $i, 1);
918 } else {
919 $i++;
920 }
921 }
922}
923
924sub reset_nextsection_ptr {
925 my $self = shift (@_);
926 my ($section) = @_;
927
928 my $section_ptr = $self->_lookup_section($section);
929 $section_ptr->{'next_subsection'} = 1;
930}
931
9321;
Note: See TracBrowser for help on using the repository browser.