source: tags/gsdl-2_70u-distribution/gsdl/perllib/doc.pm@ 11745

Last change on this file since 11745 was 11745, checked in by (none), 18 years ago

This commit was manufactured by cvs2svn to create tag
'gsdl-2_70u-distribution'.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 42.3 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 # used to set lastmodified here, but this can screw up the HASH ids, so
60 # the docsave processor now calls set_lastmodified
61
62 if (defined $source_filename) {
63 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
64
65 if (defined $collect_dir) {
66 my $dirsep = &util::get_dirsep();
67 if ($collect_dir !~ m/$dirsep$/) {
68 $collect_dir .= $dirsep;
69 }
70
71 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
72
73 # if from within GSDLCOLLECTDIR, then remove directory prefix
74 # so source_filename is realative to it. This is done to aid
75 # portability, i.e. the collection can be moved to somewhere
76 # else on the file system and the archives directory will still
77 # work. This is needed, for example in the applet version of
78 # GLI where GSDLHOME/collect on the server will be different to
79 # the collect directory of the remove user. Of course,
80 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
81 # it back into a full pathname.
82
83 if ($source_filename =~ /^$collect_dir(.*)$/) {
84 $source_filename = $1;
85 }
86 }
87
88 $self->set_source_filename ($source_filename);
89 }
90
91 $self->set_doc_type ($doc_type) if defined $doc_type;
92
93 return $self;
94}
95# set lastmodified for OAI purposes, added by GRB, moved by kjdon
96sub set_lastmodified {
97 my $self = shift (@_);
98
99 my $source_filename = $self->get_source_filename();
100 if ((defined $self->get_metadata_element ($self->get_top_section(), "gsdldoctype")) &&
101 (defined $source_filename) && (-e $source_filename)) {
102 my $file_stat = stat($source_filename);
103 my $mtime = $file_stat->mtime;
104 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
105 }
106}
107
108# clone the $self object
109sub duplicate {
110 my $self = shift (@_);
111
112 my $newobj = {};
113
114 foreach my $k (keys %$self) {
115 $newobj->{$k} = &clone ($self->{$k});
116 }
117
118 bless $newobj, ref($self);
119 return $newobj;
120}
121
122sub clone {
123 my ($from) = @_;
124 my $type = ref ($from);
125
126 if ($type eq "HASH") {
127 my $to = {};
128 foreach my $key (keys %$from) {
129 $to->{$key} = &clone ($from->{$key});
130 }
131 return $to;
132 } elsif ($type eq "ARRAY") {
133 my $to = [];
134 foreach my $v (@$from) {
135 push (@$to, &clone ($v));
136 }
137 return $to;
138 } else {
139 return $from;
140 }
141}
142
143sub set_OIDtype {
144 my $self = shift (@_);
145 my ($type) = @_;
146
147 if (defined $type && $type =~ /^(hash|incremental|dirname|assigned)$/) {
148 $self->{'OIDtype'} = $type;
149 } else {
150 $self->{'OIDtype'} = "hash";
151 }
152}
153
154sub set_source_filename {
155 my $self = shift (@_);
156 my ($source_filename) = @_;
157
158 $self->set_metadata_element ($self->get_top_section(),
159 "gsdlsourcefilename",
160 $source_filename);
161}
162
163sub set_converted_filename {
164 my $self = shift (@_);
165 my ($converted_filename) = @_;
166
167 $self->set_metadata_element ($self->get_top_section(),
168 "gsdlconvertedfilename",
169 $converted_filename);
170}
171
172
173# returns the source_filename as it was provided
174sub get_source_filename {
175 my $self = shift (@_);
176
177 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
178}
179
180# returns converted filename if available else returns source filename
181sub get_filename_for_hashing {
182 my $self = shift (@_);
183
184 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
185
186 if (!defined $filename) {
187 my $plugin_name = $self->get_metadata_element ($self->get_top_section(), "Plugin");
188 # if NULPlug processed file, then don't give a filename
189 if (defined $plugin_name && $plugin_name eq "NULPlug") {
190 $filename = undef;
191 } else {
192 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
193 }
194 }
195 return $filename;
196}
197
198sub set_doc_type {
199 my $self = shift (@_);
200 my ($doc_type) = @_;
201
202 $self->set_metadata_element ($self->get_top_section(),
203 "gsdldoctype",
204 $doc_type);
205}
206
207# returns the gsdldoctype as it was provided
208# the default of "indexed_doc" is used if no document
209# type was provided
210sub get_doc_type {
211 my $self = shift (@_);
212
213 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
214 return $doc_type if (defined $doc_type);
215 return "indexed_doc";
216}
217
218sub _escape_text {
219 my ($text) = @_;
220
221 # special characters in the gml encoding
222 $text =~ s/&/&/g; # this has to be first...
223 $text =~ s/</&lt;/g;
224 $text =~ s/>/&gt;/g;
225 $text =~ s/\"/&quot;/g;
226
227 return $text;
228}
229
230sub buffer_section_xml {
231 my $self = shift (@_);
232 my ($section) = @_;
233
234 my $section_ptr = $self->_lookup_section ($section);
235 return "" unless defined $section_ptr;
236
237 my $all_text = "<Section>\n";
238 $all_text .= " <Description>\n";
239
240 # output metadata
241 foreach my $data (@{$section_ptr->{'metadata'}}) {
242 my $escaped_value = &_escape_text($data->[1]);
243 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
244 }
245
246 $all_text .= " </Description>\n";
247
248 # output the text
249 $all_text .= " <Content>";
250 $all_text .= &_escape_text($section_ptr->{'text'});
251 $all_text .= "</Content>\n";
252
253 # output all the subsections
254 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
255 $all_text .= $self->buffer_section_xml("$section.$subsection");
256 }
257
258 $all_text .= "</Section>\n";
259
260 # make sure no nasty control characters have snuck through
261 # (XML::Parser will barf on anything it doesn't consider to be
262 # valid UTF-8 text, including things like \c@, \cC etc.)
263 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
264
265 return $all_text;
266}
267
268sub buffer_txt_section_xml {
269 my $self = shift(@_);
270 my ($section) = @_;
271
272 my $section_ptr = $self->_lookup_section ($section);
273
274 return "" unless defined $section_ptr;
275
276 my $all_text = "<Section>\n";
277
278 ##output the text
279 #$all_text .= " <Content>";
280 $all_text .= &_escape_text($section_ptr->{'text'});
281 #$all_text .= " </Content>\n";
282
283
284 #output all the subsections
285 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
286 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
287 }
288
289 $all_text .= "</Section>\n";
290
291
292 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
293 return $all_text;
294}
295
296sub buffer_mets_fileSection_section_xml() {
297 my $self = shift(@_);
298 my ($section,$version) = @_;
299
300 #$section="" unless defined $section;
301
302
303 my $section_ptr=$self->_lookup_section($section);
304 return "" unless defined $section_ptr;
305
306
307 # output fileSection by sections
308 my $section_num ="1". $section;
309
310 my $filePath = 'doctxt.xml';
311
312 my $opt_owner_id = "";
313 if ($version eq "fedora") {
314 $opt_owner_id = "OWNERID=\"M\"";
315 }
316
317 # output the fileSection details
318 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
319 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
320 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
321
322 my $xpath = "1".$section;
323 $xpath =~ s/\./]\/Section[/g;
324
325 $all_text .= $xpath;
326
327 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
328 $all_text .= " </mets:file>\n";
329 $all_text .= " </mets:fileGrp>\n";
330
331
332 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
333 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
334 }
335
336 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
337
338 return $all_text;
339}
340
341sub buffer_mets_fileWhole_section_xml(){
342 my $self = shift(@_);
343 my ($section,$version,$working_dir) = @_;
344
345 my $section_ptr = $self-> _lookup_section($section);
346 return "" unless defined $section_ptr;
347
348 my $all_text="" unless defined $all_txt;
349
350 my $fileID=0;
351
352 # Output the fileSection for the whole section
353 # => get the sourcefile and associative file
354
355 my $id_root = "";
356 my $opt_owner_id = "";
357
358 if ($version eq "fedora") {
359 $opt_owner_id = "OWNERID=\"M\"";
360 }
361 else {
362 $id_root = "default";
363 }
364
365 if ($version ne "fedora") {
366 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
367 }
368
369 foreach my $data (@{$section_ptr->{'metadata'}}){
370 my $escaped_value = &_escape_text($data->[1]);
371
372 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
373 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
374
375 ++$fileID;
376 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
377
378 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
379
380 $all_text .= " </mets:file>\n";
381 }
382
383 if ($data->[0] eq "gsdlassocfile"){
384
385 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
386 my $assoc_file = $1;
387 my $mime_type = $2;
388 my $assoc_dir = $3;
389
390 if ($version eq "fedora") {
391 $id_root = $assoc_file;
392 $id_root =~ s/\//_/g;
393 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
394 }
395
396 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
397 ++$fileID;
398
399 my $mime_attr = "MIMETYPE=\"$mime_type\"";
400 my $xlink_title = "xlink:title=\"$assoc_file\"";
401
402 my $id_attr;
403 my $xlink_href;
404
405 if ($version eq "fedora") {
406 $id_attr = "ID=\"$id_root.0\"";
407
408 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
409 if (!defined $fedora_prefix) {
410 $xlink_href = "xlink:href=\"$assfilePath\"";
411 }
412 else
413 {
414 my $gsdlhome = $ENV{'GSDLHOME'};
415 my $gsdl_href = "$working_dir/$assfilePath";
416
417 $gsdl_href =~ s/^$gsdlhome(\/)?//;
418 $gsdl_href = "/gsdl/$gsdl_href";
419
420 my $fserver = $ENV{'FEDORA_HOSTNAME'};
421 my $fport = $ENV{'FEDORA_SERVER_PORT'};
422
423 my $fdomain = "http://$fserver:$fport";
424 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
425 }
426
427 my $top_section = $self->get_top_section();
428 my $id = $self->get_metadata_element($top_section,"Identifier");
429 }
430 else {
431 $id_attr = "ID=\"$id_root.$fileID\"";
432 $xlink_href = "xlink:href=\"$assfilePath\"";
433 }
434
435 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
436 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
437
438 $all_text .= " </mets:file>\n";
439
440 if ($version eq "fedora") {
441 $all_text .= " </mets:fileGrp>\n";
442 }
443
444 }
445 }
446
447 if ($version ne "fedora") {
448 $all_text .= " </mets:fileGrp>\n";
449 }
450
451 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
452
453 return $all_text;
454}
455
456sub buffer_mets_StructMapSection_section_xml(){
457 my $self = shift(@_);
458 my ($section, $order_numref) = @_;
459
460 $section="" unless defined $section;
461
462
463 my $section_ptr=$self->_lookup_section($section);
464 return "" unless defined $section_ptr;
465
466
467 # output fileSection by Sections
468 my $section_num ="1". $section;
469 my $dmd_num = $section_num;
470
471 ##**output the dmdSection details
472 #if ($section_num eq "1") {
473 # $dmd_num = "0";
474 #}
475
476 #**output the StructMap details
477
478 my $dmdid_attr = "DM$dmd_num";
479
480 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
481 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
482 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
483
484 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
485
486
487 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
488 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
489 }
490
491 $all_text .= " </mets:div>\n";
492
493 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
494
495 return $all_text;
496}
497
498
499sub buffer_mets_StructMapWhole_section_xml(){
500 my $self = shift(@_);
501 my ($section) = @_;
502
503 my $section_ptr = $self-> _lookup_section($section);
504 return "" unless defined $section_ptr;
505
506 my $all_text="" unless defined $all_txt;
507 my $fileID=0;
508 my $order_num = 0;
509
510 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
511
512 #** output the StructMapSection for the whole section
513 # get the sourcefile and associative file
514
515 foreach my $data (@{$section_ptr->{'metadata'}}){
516 my $escaped_value = &_escape_text($data->[1]);
517
518 if ($data->[0] eq "gsdlsourcefilename") {
519 ++$fileID;
520 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
521 }
522
523 if ($data->[0] eq "gsdlassocfile"){
524 ++$fileID;
525 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
526 }
527 }
528 $all_text .= " </mets:div>\n";
529
530 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
531
532 return $all_text;
533}
534
535
536sub buffer_mets_dmdSection_section_xml(){
537 my $self = shift(@_);
538 my ($section,$version) = @_;
539
540 $section="" unless defined $section;
541
542 my $section_ptr=$self->_lookup_section($section);
543 return "" unless defined $section_ptr;
544
545 # convert section number
546 my $section_num ="1". $section;
547 my $dmd_num = $section_num;
548
549 # #**output the dmdSection details
550 # if ($section_num eq "1") {
551 # $dmd_num = "0";
552 # }
553
554
555 my $all_text = "";
556
557 my $label_attr = "";
558 if ($version eq "fedora") {
559 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
560 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
561
562 $label_attr = "LABEL=\"Dublin Core Metadata\"";
563 }
564 else {
565 # TODO::
566 #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
567 #print STDERR "***** Check to see if <techMD> required\n";
568 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
569
570 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
571 }
572
573 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
574 $all_text .= " <mets:xmlData>\n";
575
576 if ($version eq "fedora") {
577 my $dc_namespace = "";
578 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
579 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
580
581 $all_text .= " <oai_dc:dc $dc_namespace>\n";
582
583 $all_text .= $self->buffer_dc_section($section,"oai_dc");
584 $all_text .= " </oai_dc:dc>\n";
585 }
586 else {
587 foreach my $data (@{$section_ptr->{'metadata'}}){
588 my $escaped_value = &_escape_text($data->[1]);
589 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
590 if ($data->[0] eq "dc.Title") {
591 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
592 }
593 }
594 }
595
596 $all_text .= " </mets:xmlData>\n";
597 $all_text .= " </mets:mdWrap>\n";
598
599 if ($version eq "fedora") {
600 $all_text .= " </mets:techMD>\n";
601 $all_text .= "</mets:amdSec>\n";
602 }
603 else {
604 $all_text .= "</mets:dmdSec>\n";
605 }
606
607
608 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
609 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
610 }
611
612 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
613
614 return $all_text;
615}
616
617sub output_section {
618 my $self = shift (@_);
619 my ($handle, $section) = @_;
620
621 print $handle $self->buffer_section_xml($section);
622}
623
624# print out DSpace dublin_core metadata section
625sub output_dspace_section {
626 my $self = shift (@_);
627 my ($handle, $section) = @_;
628
629 my $section_ptr = $self->_lookup_section ($section);
630 return "" unless defined $section_ptr;
631
632 my $all_text = "<Section>\n";
633 $all_text .= " <Description>\n";
634
635 # output metadata
636 foreach my $data (@{$section_ptr->{'metadata'}}) {
637 my $escaped_value = &_escape_text($data->[1]);
638 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
639 }
640
641 $all_text .= " </Description>\n";
642 $all_text .= "</Section>\n";
643
644 # make sure no nasty control characters have snuck through
645 # (XML::Parser will barf on anything it doesn't consider to be
646 # valid UTF-8 text, including things like \c@, \cC etc.)
647 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
648
649 return $all_text;
650}
651
652# print out doctxt.xml file
653sub output_txt_section {
654 my $self = shift (@_);
655 my ($handle, $section) = @_;
656
657 print $handle $self->buffer_txt_section_xml($section);
658}
659
660# print out docmets.xml file
661sub output_mets_section {
662 my $self = shift(@_);
663 my ($handle, $section, $version, $working_dir) = @_;
664
665 # print out the dmdSection
666 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
667
668 print $handle "<mets:fileSec>\n";
669 if ($version eq "fedora") {
670 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
671 }
672
673 # print out the fileSection by sections
674 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
675
676 # print out the whole fileSection
677 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
678
679 if ($version eq "fedora") {
680 print $handle " </mets:fileGrp>\n";
681 }
682 print $handle "</mets:fileSec>\n";
683
684 # print out the StructMapSection by sections
685
686 my $struct_type;
687 if ($version eq "fedora") {
688 $struct_type = "fedora:dsBindingMap";
689 }
690 else {
691 $struct_type = "Section";
692 }
693
694 if ($version ne "fedora") {
695 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
696 my $order_num=0;
697 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
698 print $handle "</mets:structMap>\n";
699
700 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
701 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
702 print $handle "</mets:structMap>\n";
703 }
704
705}
706
707my $dc_set = { Title => 1,
708 Creator => 1,
709 Subject => 1,
710 Description => 1,
711 Publisher => 1,
712 Contributors => 1,
713 Date => 1,
714 Type => 1,
715 Format => 1,
716 Identifier => 1,
717 Source => 1,
718 Language => 1,
719 Relation => 1,
720 Coverage => 1,
721 Rights => 1};
722
723
724
725
726# Build up dublin_core metadata. Priority given to dc.* over ex.*
727
728sub buffer_dc_section {
729 my $self = shift(@_);
730 my ($section, $version) = @_;
731
732 # build up string of dublin core metadata
733 $section="" unless defined $section;
734
735 my $section_ptr=$self->_lookup_section($section);
736 return "" unless defined $section_ptr;
737
738 my $explicit_dc = {};
739 my $explicit_ex = {};
740
741 my $all_text="";
742 foreach my $data (@{$section_ptr->{'metadata'}}){
743 my $escaped_value = &_escape_text($data->[1]);
744 if ($data->[0]=~ m/^dc\./) {
745 $data->[0] =~ tr/[A-Z]/[a-z]/;
746
747 $data->[0] =~ m/^dc\.(.*)/;
748 my $dc_element = $1;
749
750 if (!defined $explicit_dc->{$dc_element}) {
751 $explicit_dc->{$dc_element} = [];
752 }
753 push(@{$explicit_dc->{$dc_element}},$escaped_value);
754
755 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
756 if (defined $version && ($version eq "oai_dc")) {
757 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
758 }
759 else {
760 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
761 }
762
763 }
764 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
765 $data->[0] =~ m/^(ex\.)?(.*)/;
766 my $ex_element = $2;
767 my $lc_ex_element = lc($ex_element);
768
769 if (defined $dc_set->{$ex_element}) {
770 if (!defined $explicit_ex->{$lc_ex_element}) {
771 $explicit_ex->{$lc_ex_element} = [];
772 }
773 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
774 }
775 }
776 }
777
778 # go through dc_set and for any element *not* defined in explicit_dc
779 # that do exist in explicit_ex, add it in as metadata
780 foreach my $k ( keys %$dc_set ) {
781 my $lc_k = lc($k);
782
783 if (!defined $explicit_dc->{$lc_k}) {
784 if (defined $explicit_ex->{$lc_k}) {
785
786 foreach my $v (@{$explicit_ex->{$lc_k}}) {
787 my $dc_element = $lc_k;
788 my $escaped_value = $v;
789
790 if (defined $version && ($version eq "oai_dc")) {
791 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
792 }
793 else {
794 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
795 }
796
797 }
798 }
799 }
800 }
801
802 if ($all_text eq "") {
803 $all_text .= " There is no Dublin Core metatdata in this document\n";
804 }
805 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
806
807 return $all_text;
808}
809
810
811# Print out dublin_core metadata
812sub output_dc_section {
813 my $self = shift(@_);
814 my ($handle, $section, $version) = @_;
815
816 my $all_text = $self->buffer_dc_section($section,$version);
817
818 print $handle $all_text;
819}
820
821
822# look up the reference to the a particular section
823sub _lookup_section {
824 my $self = shift (@_);
825 my ($section) = @_;
826
827 my ($num);
828 my $sectionref = $self;
829
830 while (defined $section && $section ne "") {
831 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
832 $num =~ s/^0+(\d)/$1/; # remove leading 0s
833 $section = "" unless defined $section;
834
835 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
836 $sectionref = $sectionref->{'subsections'}->{$num};
837 } else {
838 return undef;
839 }
840 }
841
842 return $sectionref;
843}
844
845# calculate OID by hashing the contents of the document
846sub _calc_OID {
847 my $self = shift (@_);
848 my ($filename) = @_;
849
850 my $osexe = &util::get_os_exe();
851
852 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
853 $ENV{'GSDLOS'},"hashfile$osexe");
854
855 my $result = "NULL";
856
857 if (-e "$hashfile_exe") {
858# $result = `\"$hashfile_exe\" \"$filename\"`;
859 $result = `hashfile$osexe \"$filename\"`;
860 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
861
862 } else {
863 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
864 }
865 return "HASH$result";
866}
867
868# methods dealing with OID, not groups of them.
869
870# if $OID is not provided one is calculated
871sub set_OID {
872 my $self = shift (@_);
873 my ($OID) = @_;
874
875 my $use_hash_oid = 0;
876 # if an OID wasn't provided claculate one
877 if (!defined $OID) {
878 $OID = "NULL";
879 if ($self->{'OIDtype'} eq "hash") {
880 $use_hash_oid = 1;
881 } elsif ($self->{'OIDtype'} eq "incremental") {
882 $OID = "D" . $OIDcount;
883 $OIDcount ++;
884
885 } elsif ($self->{'OIDtype'} eq "dirname") {
886 $OID = 'J';
887 my $filename = $self->get_source_filename();
888 if (defined($filename)) { # && -e $filename) {
889 $OID = &File::Basename::dirname($filename);
890 if (defined $OID) {
891 $OID = 'J'.&File::Basename::basename($OID);
892 $OID =~ s/\.//; #remove any periods
893 } else {
894 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
895 $use_hash_oid = 1;
896 }
897 } else {
898 print STDERR "Failed to find filename, generating hash id\n";
899 $use_hash_oid = 1;
900 }
901
902 } elsif ($self->{'OIDtype'} eq "assigned") {
903 my $identifier = $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
904 if (defined $identifier && $identifier ne "") {
905 $OID = "D" . $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
906 $OID =~ s/\.//; #remove any periods
907 } else {
908 # need a hash id
909 print STDERR "no dc.Identifier found, generating hash id\n";
910 $use_hash_oid = 1;
911 }
912
913 } else {
914 $use_hash_oid = 1;
915 }
916
917 if ($use_hash_oid) {
918
919 # "hash" OID - feed file to hashfile.exe
920 #my $filename = $self->get_source_filename();
921 # we want to use the converted file for hashing if available
922 # cos its quicker
923 my $filename = $self->get_filename_for_hashing();
924 # -z: don't want to hash on the file if it is zero size
925 if (defined($filename) && -e $filename && !-z $filename) {
926 $OID = $self->_calc_OID ($filename);
927 } else {
928 $filename = &util::get_tmp_filename();
929 if (!open (OUTFILE, ">$filename")) {
930 print STDERR "doc::set_OID could not write to $filename\n";
931 } else {
932 $self->output_section('OUTFILE', $self->get_top_section(),
933 undef, 1);
934 close (OUTFILE);
935 }
936 $OID = $self->_calc_OID ($filename);
937 &util::rm ($filename);
938 }
939 }
940 }
941 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
942}
943
944# this uses hashdoc (embedded c thingy) which is faster but still
945# needs a little work to be suffiently stable
946sub ___set_OID {
947 my $self = shift (@_);
948 my ($OID) = @_;
949
950 # if an OID wasn't provided then calculate hash value based on document
951 if (!defined $OID)
952 {
953 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
954 undef, 1);
955 my $hash_len = length($hash_text);
956
957 $OID = &hashdoc::buffer($hash_text,$hash_len);
958 }
959
960 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
961}
962
963# returns the OID for this document
964sub get_OID {
965 my $self = shift (@_);
966 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
967 return $OID if (defined $OID);
968 return "NULL";
969}
970
971sub delete_OID {
972 my $self = shift (@_);
973
974 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
975}
976
977
978# methods for manipulating section names
979
980# returns the name of the top-most section (the top
981# level of the document
982sub get_top_section {
983 my $self = shift (@_);
984
985 return "";
986}
987
988# returns a section
989sub get_parent_section {
990 my $self = shift (@_);
991 my ($section) = @_;
992
993 $section =~ s/(^|\.)\d+$//;
994
995 return $section;
996}
997
998# returns the first child section (or the end child
999# if there isn't any)
1000sub get_begin_child {
1001 my $self = shift (@_);
1002 my ($section) = @_;
1003
1004 my $section_ptr = $self->_lookup_section($section);
1005 return "" unless defined $section_ptr;
1006
1007 if (defined $section_ptr->{'subsection_order'}->[0]) {
1008 return "$section.$section_ptr->{'subsection_order'}->[0]";
1009 }
1010
1011 return $self->get_end_child ($section);
1012}
1013
1014# returns the next child of a parent section
1015sub get_next_child {
1016 my $self = shift (@_);
1017 my ($section) = @_;
1018
1019 my $parent_section = $self->get_parent_section($section);
1020 my $parent_section_ptr = $self->_lookup_section($parent_section);
1021 return undef unless defined $parent_section_ptr;
1022
1023 my ($section_num) = $section =~ /(\d+)$/;
1024 return undef unless defined $section_num;
1025
1026 my $i = 0;
1027 my $section_order = $parent_section_ptr->{'subsection_order'};
1028 while ($i < scalar(@$section_order)) {
1029 last if $section_order->[$i] eq $section_num;
1030 $i++;
1031 }
1032
1033 $i++; # the next child
1034 if ($i < scalar(@$section_order)) {
1035 return $section_order->[$i] if $parent_section eq "";
1036 return "$parent_section.$section_order->[$i]";
1037 }
1038
1039 # no more sections in this level
1040 return undef;
1041}
1042
1043# returns a reference to a list of children
1044sub get_children {
1045 my $self = shift (@_);
1046 my ($section) = @_;
1047
1048 my $section_ptr = $self->_lookup_section($section);
1049 return [] unless defined $section_ptr;
1050
1051 my @children = @{$section_ptr->{'subsection_order'}};
1052
1053 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1054 return \@children;
1055}
1056
1057# returns the child section one past the last one (which
1058# is coded as "0")
1059sub get_end_child {
1060 my $self = shift (@_);
1061 my ($section) = @_;
1062
1063 return $section . ".0" unless $section eq "";
1064 return "0";
1065}
1066
1067# returns the next section in book order
1068sub get_next_section {
1069 my $self = shift (@_);
1070 my ($section) = @_;
1071
1072 return undef unless defined $section;
1073
1074 my $section_ptr = $self->_lookup_section($section);
1075 return undef unless defined $section_ptr;
1076
1077 # first try to find first child
1078 if (defined $section_ptr->{'subsection_order'}->[0]) {
1079 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1080 return "$section.$section_ptr->{'subsection_order'}->[0]";
1081 }
1082
1083 do {
1084 # try to find sibling
1085 my $next_child = $self->get_next_child ($section);
1086 return $next_child if (defined $next_child);
1087
1088 # move up one level
1089 $section = $self->get_parent_section ($section);
1090 } while $section =~ /\d/;
1091
1092 return undef;
1093}
1094
1095sub is_leaf_section {
1096 my $self = shift (@_);
1097 my ($section) = @_;
1098
1099 my $section_ptr = $self->_lookup_section($section);
1100 return 1 unless defined $section_ptr;
1101
1102 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1103}
1104
1105# methods for dealing with sections
1106
1107# returns the name of the inserted section
1108sub insert_section {
1109 my $self = shift (@_);
1110 my ($before_section) = @_;
1111
1112 # get the child to insert before and its parent section
1113 my $parent_section = "";
1114 my $before_child = "0";
1115 my @before_section = split (/\./, $before_section);
1116 if (scalar(@before_section) > 0) {
1117 $before_child = pop (@before_section);
1118 $parent_section = join (".", @before_section);
1119 }
1120
1121 my $parent_section_ptr = $self->_lookup_section($parent_section);
1122 if (!defined $parent_section_ptr) {
1123 print STDERR "doc::insert_section couldn't find parent section " .
1124 "$parent_section\n";
1125 return;
1126 }
1127
1128 # get the next section number
1129 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1130
1131 my $i = 0;
1132 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1133 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1134 $i++;
1135 }
1136
1137 # insert the section number into the order list
1138 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1139
1140 # add this section to the parent section
1141 my $section_ptr = {'subsection_order'=>[],
1142 'next_subsection'=>1,
1143 'subsections'=>{},
1144 'metadata'=>[],
1145 'text'=>""};
1146 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1147
1148 # work out the full section number
1149 my $section = $parent_section;
1150 $section .= "." unless $section eq "";
1151 $section .= $section_num;
1152
1153 return $section;
1154}
1155
1156# creates a pre-named section
1157sub create_named_section {
1158 my $self = shift (@_);
1159 my ($mastersection) = @_;
1160
1161 my ($num);
1162 my $section = $mastersection;
1163 my $sectionref = $self;
1164
1165 while ($section ne "") {
1166 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1167 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1168 $section = "" unless defined $section;
1169
1170 if (defined $num) {
1171 if (!defined $sectionref->{'subsections'}->{$num}) {
1172 push (@{$sectionref->{'subsection_order'}}, $num);
1173 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1174 'next_subsection'=>1,
1175 'subsections'=>{},
1176 'metadata'=>[],
1177 'text'=>""};
1178 if ($num >= $sectionref->{'next_subsection'}) {
1179 $sectionref->{'next_subsection'} = $num + 1;
1180 }
1181 }
1182 $sectionref = $sectionref->{'subsections'}->{$num};
1183
1184 } else {
1185 print STDERR "doc::create_named_section couldn't create section ";
1186 print STDERR "$mastersection\n";
1187 last;
1188 }
1189 }
1190}
1191
1192# returns a reference to a list of subsections
1193sub list_subsections {
1194 my $self = shift (@_);
1195 my ($section) = @_;
1196
1197 my $section_ptr = $self->_lookup_section ($section);
1198 if (!defined $section_ptr) {
1199 print STDERR "doc::list_subsections couldn't find section $section\n";
1200 return [];
1201 }
1202
1203 return [@{$section_ptr->{'subsection_order'}}];
1204}
1205
1206sub delete_section {
1207 my $self = shift (@_);
1208 my ($section) = @_;
1209
1210# my $section_ptr = {'subsection_order'=>[],
1211# 'next_subsection'=>1,
1212# 'subsections'=>{},
1213# 'metadata'=>[],
1214# 'text'=>""};
1215
1216 # if this is the top section reset everything
1217 if ($section eq "") {
1218 $self->{'subsection_order'} = [];
1219 $self->{'subsections'} = {};
1220 $self->{'metadata'} = [];
1221 $self->{'text'} = "";
1222 return;
1223 }
1224
1225 # find the parent of the section to delete
1226 my $parent_section = "";
1227 my $child = "0";
1228 my @section = split (/\./, $section);
1229 if (scalar(@section) > 0) {
1230 $child = pop (@section);
1231 $parent_section = join (".", @section);
1232 }
1233
1234 my $parent_section_ptr = $self->_lookup_section($parent_section);
1235 if (!defined $parent_section_ptr) {
1236 print STDERR "doc::delete_section couldn't find parent section " .
1237 "$parent_section\n";
1238 return;
1239 }
1240
1241 # remove this section from the subsection_order list
1242 my $i = 0;
1243 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1244 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1245 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1246 last;
1247 }
1248 $i++;
1249 }
1250
1251 # remove this section from the subsection hash
1252 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1253 undef $parent_section_ptr->{'subsections'}->{$child};
1254 }
1255}
1256
1257#--
1258# methods for dealing with metadata
1259
1260# set_metadata_element and get_metadata_element are for metadata
1261# which should only have one value. add_meta_data and get_metadata
1262# are for metadata which can have more than one value.
1263
1264# returns the first metadata value which matches field
1265
1266# This version of get metadata element works much like the one above,
1267# except it allows for the namespace portion of a metadata element to
1268# be ignored, thus if you are searching for dc.Title, the first piece
1269# of matching metadata ending with the name Title (once any namespace
1270# is removed) would be returned.
1271# 28-11-2003 John Thompson
1272sub get_metadata_element {
1273 my $self = shift (@_);
1274 my ($section, $field, $ignore_namespace) = @_;
1275 my ($data);
1276
1277 $ignore_namespace = 0 unless defined $ignore_namespace;
1278
1279 my $section_ptr = $self->_lookup_section($section);
1280 if (!defined $section_ptr) {
1281 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1282 return;
1283 }
1284
1285 # Remove the any namespace if we are being told to ignore them
1286 if($ignore_namespace) {
1287 $field =~ s/^\w*\.//;
1288 }
1289
1290 foreach $data (@{$section_ptr->{'metadata'}}) {
1291
1292 my $data_name = $data->[0];
1293 # Remove the any namespace if we are being told to ignore them
1294 if($ignore_namespace) {
1295 $data_name =~ s/^\w*\.//;
1296 }
1297
1298 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1299 }
1300
1301 return undef; # was not found
1302}
1303
1304# returns a list of the form [value1, value2, ...]
1305sub get_metadata {
1306 my $self = shift (@_);
1307 my ($section, $field, $ignore_namespace) = @_;
1308 my ($data);
1309
1310 $ignore_namespace = 0 unless defined $ignore_namespace;
1311
1312 my $section_ptr = $self->_lookup_section($section);
1313 if (!defined $section_ptr) {
1314 print STDERR "doc::get_metadata couldn't find section ",
1315 $section, "\n";
1316 return;
1317 }
1318
1319 # Remove the any namespace if we are being told to ignore them
1320 if($ignore_namespace) {
1321 $field =~ s/^\w*\.//;
1322 }
1323
1324 my @metadata = ();
1325 foreach $data (@{$section_ptr->{'metadata'}}) {
1326
1327 my $data_name = $data->[0];
1328 # Remove the any namespace if we are being told to ignore them
1329 if($ignore_namespace) {
1330 $data_name =~ s/^\w*\.//;
1331 }
1332
1333 push (@metadata, $data->[1]) if ($data_name eq $field);
1334 }
1335
1336 return \@metadata;
1337}
1338
1339# returns a list of the form [[field,value],[field,value],...]
1340sub get_all_metadata {
1341 my $self = shift (@_);
1342 my ($section) = @_;
1343
1344 my $section_ptr = $self->_lookup_section($section);
1345 if (!defined $section_ptr) {
1346 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1347 return;
1348 }
1349
1350 return $section_ptr->{'metadata'};
1351}
1352
1353# $value is optional
1354sub delete_metadata {
1355 my $self = shift (@_);
1356 my ($section, $field, $value) = @_;
1357
1358 my $section_ptr = $self->_lookup_section($section);
1359 if (!defined $section_ptr) {
1360 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1361 return;
1362 }
1363
1364 my $i = 0;
1365 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1366 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1367 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1368 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1369 } else {
1370 $i++;
1371 }
1372 }
1373}
1374
1375sub delete_all_metadata {
1376 my $self = shift (@_);
1377 my ($section) = @_;
1378
1379 my $section_ptr = $self->_lookup_section($section);
1380 if (!defined $section_ptr) {
1381 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1382 return;
1383 }
1384
1385 $section_ptr->{'metadata'} = [];
1386}
1387
1388sub set_metadata_element {
1389 my $self = shift (@_);
1390 my ($section, $field, $value) = @_;
1391
1392 $self->set_utf8_metadata_element ($section, $field,
1393 &unicode::ascii2utf8(\$value));
1394}
1395
1396# set_utf8_metadata_element assumes the text has already been
1397# converted to the UTF-8 encoding.
1398sub set_utf8_metadata_element {
1399 my $self = shift (@_);
1400 my ($section, $field, $value) = @_;
1401
1402 $self->delete_metadata ($section, $field);
1403 $self->add_utf8_metadata ($section, $field, $value);
1404}
1405
1406
1407# add_metadata assumes the text is in (extended) ascii form. For
1408# text which has already been converted to the UTF-8 format use
1409# add_utf8_metadata.
1410sub add_metadata {
1411 my $self = shift (@_);
1412 my ($section, $field, $value) = @_;
1413
1414 $self->add_utf8_metadata ($section, $field,
1415 &unicode::ascii2utf8(\$value));
1416}
1417
1418sub add_utf8_metadata {
1419 my $self = shift (@_);
1420 my ($section, $field, $value) = @_;
1421
1422 my $section_ptr = $self->_lookup_section($section);
1423 if (!defined $section_ptr) {
1424 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1425 return;
1426 }
1427 if (!defined $value) {
1428 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1429 return;
1430 }
1431 if (!defined $field) {
1432 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1433 return;
1434 }
1435
1436 #print STDERR "###$field=$value\n";
1437 # double check that the value is utf-8
1438 if (unicode::ensure_utf8(\$value)) {
1439 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1440 }
1441
1442 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1443}
1444
1445
1446# methods for dealing with text
1447
1448# returns the text for a section
1449sub get_text {
1450 my $self = shift (@_);
1451 my ($section) = @_;
1452
1453 my $section_ptr = $self->_lookup_section($section);
1454 if (!defined $section_ptr) {
1455 print STDERR "doc::get_text couldn't find section " .
1456 "$section\n";
1457 return "";
1458 }
1459
1460 return $section_ptr->{'text'};
1461}
1462
1463# returns the (utf-8 encoded) length of the text for a section
1464sub get_text_length {
1465 my $self = shift (@_);
1466 my ($section) = @_;
1467
1468 my $section_ptr = $self->_lookup_section($section);
1469 if (!defined $section_ptr) {
1470 print STDERR "doc::get_text_length couldn't find section " .
1471 "$section\n";
1472 return 0;
1473 }
1474
1475 return length ($section_ptr->{'text'});
1476}
1477
1478sub delete_text {
1479 my $self = shift (@_);
1480 my ($section) = @_;
1481
1482 my $section_ptr = $self->_lookup_section($section);
1483 if (!defined $section_ptr) {
1484 print STDERR "doc::delete_text couldn't find section " .
1485 "$section\n";
1486 return;
1487 }
1488
1489 $section_ptr->{'text'} = "";
1490}
1491
1492# add_text assumes the text is in (extended) ascii form. For
1493# text which has been already converted to the UTF-8 format
1494# use add_utf8_text.
1495sub add_text {
1496 my $self = shift (@_);
1497 my ($section, $text) = @_;
1498
1499 # convert the text to UTF-8 encoded unicode characters
1500 # and add the text
1501 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1502}
1503
1504
1505# add_utf8_text assumes the text to be added has already
1506# been converted to the UTF-8 encoding. For ascii text use
1507# add_text
1508sub add_utf8_text {
1509 my $self = shift (@_);
1510 my ($section, $text) = @_;
1511
1512 my $section_ptr = $self->_lookup_section($section);
1513 if (!defined $section_ptr) {
1514 print STDERR "doc::add_utf8_text couldn't find section " .
1515 "$section\n";
1516 return;
1517 }
1518
1519 $section_ptr->{'text'} .= $text;
1520}
1521
1522
1523# methods for dealing with associated files
1524
1525# a file is associated with a document, NOT a section.
1526# if section is defined it is noted in the data structure
1527# only so that files associated from a particular section
1528# may be removed later (using delete_section_assoc_files)
1529sub associate_file {
1530 my $self = shift (@_);
1531 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1532 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1533
1534 # remove all associated files with the same name
1535 $self->delete_assoc_file ($assoc_filename);
1536
1537 push (@{$self->{'associated_files'}},
1538 [$real_filename, $assoc_filename, $mime_type, $section]);
1539}
1540
1541# returns a list of associated files in the form
1542# [[real_filename, assoc_filename, mimetype], ...]
1543sub get_assoc_files {
1544 my $self = shift (@_);
1545
1546 return $self->{'associated_files'};
1547}
1548
1549sub delete_section_assoc_files {
1550 my $self = shift (@_);
1551 my ($section) = @_;
1552
1553 my $i=0;
1554 while ($i < scalar (@{$self->{'associated_files'}})) {
1555 if (defined $self->{'associated_files'}->[$i]->[3] &&
1556 $self->{'associated_files'}->[$i]->[3] eq $section) {
1557 splice (@{$self->{'associated_files'}}, $i, 1);
1558 } else {
1559 $i++;
1560 }
1561 }
1562}
1563
1564sub delete_assoc_file {
1565 my $self = shift (@_);
1566 my ($assoc_filename) = @_;
1567
1568 my $i=0;
1569 while ($i < scalar (@{$self->{'associated_files'}})) {
1570 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1571 splice (@{$self->{'associated_files'}}, $i, 1);
1572 } else {
1573 $i++;
1574 }
1575 }
1576}
1577
1578sub reset_nextsection_ptr {
1579 my $self = shift (@_);
1580 my ($section) = @_;
1581
1582 my $section_ptr = $self->_lookup_section($section);
1583 $section_ptr->{'next_subsection'} = 1;
1584}
1585
15861;
Note: See TracBrowser for help on using the repository browser.