source: main/tags/2.63/gsdl/perllib/doc.pm

Last change on this file was 11097, checked in by kjdon, 18 years ago

check existence of Plugin metadata before using it, line 186

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 42.3 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 # used to set lastmodified here, but this can screw up the HASH ids, so
60 # the docsave processor now calls set_lastmodified
61
62 if (defined $source_filename) {
63 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
64
65 if (defined $collect_dir) {
66 my $dirsep = &util::get_dirsep();
67 if ($collect_dir !~ m/$dirsep$/) {
68 $collect_dir .= $dirsep;
69 }
70
71 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
72
73 # if from within GSDLCOLLECTDIR, then remove directory prefix
74 # so source_filename is realative to it. This is done to aid
75 # portability, i.e. the collection can be moved to somewhere
76 # else on the file system and the archives directory will still
77 # work. This is needed, for example in the applet version of
78 # GLI where GSDLHOME/collect on the server will be different to
79 # the collect directory of the remove user. Of course,
80 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
81 # it back into a full pathname.
82
83 if ($source_filename =~ /^$collect_dir(.*)$/) {
84 $source_filename = $1;
85 }
86 }
87
88 $self->set_source_filename ($source_filename);
89 }
90
91 $self->set_doc_type ($doc_type) if defined $doc_type;
92
93 return $self;
94}
95# set lastmodified for OAI purposes, added by GRB, moved by kjdon
96sub set_lastmodified {
97 my $self = shift (@_);
98
99 my $source_filename = $self->get_source_filename();
100 if ((defined $self->get_metadata_element ($self->get_top_section(), "gsdldoctype")) &&
101 (defined $source_filename) && (-e $source_filename)) {
102 my $file_stat = stat($source_filename);
103 my $mtime = $file_stat->mtime;
104 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
105 }
106}
107
108# clone the $self object
109sub duplicate {
110 my $self = shift (@_);
111
112 my $newobj = {};
113
114 foreach my $k (keys %$self) {
115 $newobj->{$k} = &clone ($self->{$k});
116 }
117
118 bless $newobj, ref($self);
119 return $newobj;
120}
121
122sub clone {
123 my ($from) = @_;
124 my $type = ref ($from);
125
126 if ($type eq "HASH") {
127 my $to = {};
128 foreach my $key (keys %$from) {
129 $to->{$key} = &clone ($from->{$key});
130 }
131 return $to;
132 } elsif ($type eq "ARRAY") {
133 my $to = [];
134 foreach my $v (@$from) {
135 push (@$to, &clone ($v));
136 }
137 return $to;
138 } else {
139 return $from;
140 }
141}
142
143sub set_OIDtype {
144 my $self = shift (@_);
145 my ($type) = @_;
146
147 if (defined $type && $type =~ /^(hash|incremental|dirname|assigned)$/) {
148 $self->{'OIDtype'} = $type;
149 } else {
150 $self->{'OIDtype'} = "hash";
151 }
152}
153
154sub set_source_filename {
155 my $self = shift (@_);
156 my ($source_filename) = @_;
157
158 $self->set_metadata_element ($self->get_top_section(),
159 "gsdlsourcefilename",
160 $source_filename);
161}
162
163sub set_converted_filename {
164 my $self = shift (@_);
165 my ($converted_filename) = @_;
166
167 $self->set_metadata_element ($self->get_top_section(),
168 "gsdlconvertedfilename",
169 $converted_filename);
170}
171
172
173# returns the source_filename as it was provided
174sub get_source_filename {
175 my $self = shift (@_);
176
177 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
178}
179
180# returns converted filename if available else returns source filename
181sub get_filename_for_hashing {
182 my $self = shift (@_);
183
184 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
185
186 if (!defined $filename) {
187 my $plugin_name = $self->get_metadata_element ($self->get_top_section(), "Plugin");
188 # if NULPlug processed file, then don't give a filename
189 if (defined $plugin_name && $plugin_name eq "NULPlug") {
190 $filename = undef;
191 } else {
192 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
193 }
194 }
195 return $filename;
196}
197
198sub set_doc_type {
199 my $self = shift (@_);
200 my ($doc_type) = @_;
201
202 $self->set_metadata_element ($self->get_top_section(),
203 "gsdldoctype",
204 $doc_type);
205}
206
207# returns the gsdldoctype as it was provided
208# the default of "indexed_doc" is used if no document
209# type was provided
210sub get_doc_type {
211 my $self = shift (@_);
212
213 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
214 return $doc_type if (defined $doc_type);
215 return "indexed_doc";
216}
217
218sub _escape_text {
219 my ($text) = @_;
220
221 # special characters in the gml encoding
222 $text =~ s/&/&/g; # this has to be first...
223 $text =~ s/</&lt;/g;
224 $text =~ s/>/&gt;/g;
225 $text =~ s/\"/&quot;/g;
226
227 return $text;
228}
229
230sub buffer_section_xml {
231 my $self = shift (@_);
232 my ($section) = @_;
233
234 my $section_ptr = $self->_lookup_section ($section);
235 return "" unless defined $section_ptr;
236
237 my $all_text = "<Section>\n";
238 $all_text .= " <Description>\n";
239
240 # output metadata
241 foreach my $data (@{$section_ptr->{'metadata'}}) {
242 my $escaped_value = &_escape_text($data->[1]);
243 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
244 }
245
246 $all_text .= " </Description>\n";
247
248 # output the text
249 $all_text .= " <Content>";
250 $all_text .= &_escape_text($section_ptr->{'text'});
251 $all_text .= "</Content>\n";
252
253 # output all the subsections
254 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
255 $all_text .= $self->buffer_section_xml("$section.$subsection");
256 }
257
258 $all_text .= "</Section>\n";
259
260 # make sure no nasty control characters have snuck through
261 # (XML::Parser will barf on anything it doesn't consider to be
262 # valid UTF-8 text, including things like \c@, \cC etc.)
263 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
264
265 return $all_text;
266}
267
268sub buffer_txt_section_xml {
269 my $self = shift(@_);
270 my ($section) = @_;
271
272 my $section_ptr = $self->_lookup_section ($section);
273
274 return "" unless defined $section_ptr;
275
276 my $all_text = "<Section>\n";
277
278 ##output the text
279 #$all_text .= " <Content>";
280 $all_text .= &_escape_text($section_ptr->{'text'});
281 #$all_text .= " </Content>\n";
282
283
284 #output all the subsections
285 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
286 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
287 }
288
289 $all_text .= "</Section>\n";
290
291
292 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
293 return $all_text;
294}
295
296sub buffer_mets_fileSection_section_xml() {
297 my $self = shift(@_);
298 my ($section,$version) = @_;
299
300 #$section="" unless defined $section;
301
302
303 my $section_ptr=$self->_lookup_section($section);
304 return "" unless defined $section_ptr;
305
306
307 # output fileSection by sections
308 my $section_num ="1". $section;
309
310 my $filePath = 'doctxt.xml';
311
312 my $opt_owner_id = "";
313 if ($version eq "fedora") {
314 $opt_owner_id = "OWNERID=\"M\"";
315 }
316
317 # output the fileSection details
318 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
319 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
320 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
321
322 my $xpath = "1".$section;
323 $xpath =~ s/\./]\/Section[/g;
324
325 $all_text .= $xpath;
326
327 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
328 $all_text .= " </mets:file>\n";
329 $all_text .= " </mets:fileGrp>\n";
330
331
332 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
333 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
334 }
335
336 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
337
338 return $all_text;
339}
340
341sub buffer_mets_fileWhole_section_xml(){
342 my $self = shift(@_);
343 my ($section,$version,$working_dir) = @_;
344
345 my $section_ptr = $self-> _lookup_section($section);
346 return "" unless defined $section_ptr;
347
348 my $all_text="" unless defined $all_txt;
349
350 my $fileID=0;
351
352 # Output the fileSection for the whole section
353 # => get the sourcefile and associative file
354
355 my $id_root = "";
356 my $opt_owner_id = "";
357
358 if ($version eq "fedora") {
359 $opt_owner_id = "OWNERID=\"M\"";
360 }
361 else {
362 $id_root = "default";
363 }
364
365 if ($version ne "fedora") {
366 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
367 }
368
369 foreach my $data (@{$section_ptr->{'metadata'}}){
370 my $escaped_value = &_escape_text($data->[1]);
371
372 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
373 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
374
375 ++$fileID;
376 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
377
378 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
379
380 $all_text .= " </mets:file>\n";
381 }
382
383 if ($data->[0] eq "gsdlassocfile"){
384
385 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
386 my $assoc_file = $1;
387 my $mime_type = $2;
388 my $assoc_dir = $3;
389
390 if ($version eq "fedora") {
391 $id_root = $assoc_file;
392 $id_root =~ s/\//_/g;
393 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
394 }
395
396 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
397 ++$fileID;
398
399 my $mime_attr = "MIMETYPE=\"$mime_type\"";
400 my $xlink_title = "xlink:title=\"$assoc_file\"";
401
402 my $id_attr;
403 my $xlink_href;
404
405 if ($version eq "fedora") {
406 $id_attr = "ID=\"$id_root.0\"";
407
408 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
409 if (!defined $fedora_prefix) {
410 $xlink_href = "xlink:href=\"$assfilePath\"";
411 }
412 else
413 {
414 my $gsdlhome = $ENV{'GSDLHOME'};
415 my $gsdl_href = "$working_dir/$assfilePath";
416
417 $gsdl_href =~ s/^$gsdlhome(\/)?//;
418 $gsdl_href = "/gsdl/$gsdl_href";
419
420 my $fserver = $ENV{'FEDORA_HOSTNAME'};
421 my $fport = $ENV{'FEDORA_SERVER_PORT'};
422
423 my $fdomain = "http://$fserver:$fport";
424 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
425 }
426
427 my $top_section = $self->get_top_section();
428 my $id = $self->get_metadata_element($top_section,"Identifier");
429 }
430 else {
431 $id_attr = "ID=\"$id_root.$fileID\"";
432 $xlink_href = "xlink:href=\"$assfilePath\"";
433 }
434
435 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
436 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
437
438 $all_text .= " </mets:file>\n";
439
440 if ($version eq "fedora") {
441 $all_text .= " </mets:fileGrp>\n";
442 }
443
444 }
445 }
446
447 if ($version ne "fedora") {
448 $all_text .= " </mets:fileGrp>\n";
449 }
450
451 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
452
453 return $all_text;
454}
455
456sub buffer_mets_StructMapSection_section_xml(){
457 my $self = shift(@_);
458 my ($section, $order_numref) = @_;
459
460 $section="" unless defined $section;
461
462
463 my $section_ptr=$self->_lookup_section($section);
464 return "" unless defined $section_ptr;
465
466
467 # output fileSection by Sections
468 my $section_num ="1". $section;
469 my $dmd_num = $section_num;
470
471 ##**output the dmdSection details
472 #if ($section_num eq "1") {
473 # $dmd_num = "0";
474 #}
475
476 #**output the StructMap details
477
478 my $dmdid_attr = "DM$dmd_num";
479
480 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
481 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
482 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
483
484 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
485
486
487 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
488 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
489 }
490
491 $all_text .= " </mets:div>\n";
492
493 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
494
495 return $all_text;
496}
497
498
499sub buffer_mets_StructMapWhole_section_xml(){
500 my $self = shift(@_);
501 my ($section) = @_;
502
503 my $section_ptr = $self-> _lookup_section($section);
504 return "" unless defined $section_ptr;
505
506 my $all_text="" unless defined $all_txt;
507 my $fileID=0;
508 my $order_num = 0;
509
510 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
511
512 #** output the StructMapSection for the whole section
513 # get the sourcefile and associative file
514
515 foreach my $data (@{$section_ptr->{'metadata'}}){
516 my $escaped_value = &_escape_text($data->[1]);
517
518 if ($data->[0] eq "gsdlsourcefilename") {
519 ++$fileID;
520 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
521 }
522
523 if ($data->[0] eq "gsdlassocfile"){
524 ++$fileID;
525 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
526 }
527 }
528 $all_text .= " </mets:div>\n";
529
530 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
531
532 return $all_text;
533}
534
535
536sub buffer_mets_dmdSection_section_xml(){
537 my $self = shift(@_);
538 my ($section,$version) = @_;
539
540 $section="" unless defined $section;
541
542 my $section_ptr=$self->_lookup_section($section);
543 return "" unless defined $section_ptr;
544
545 # convert section number
546 my $section_num ="1". $section;
547 my $dmd_num = $section_num;
548
549 # #**output the dmdSection details
550 # if ($section_num eq "1") {
551 # $dmd_num = "0";
552 # }
553
554
555 my $all_text = "";
556
557 my $label_attr = "";
558 if ($version eq "fedora") {
559 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
560 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
561
562 $label_attr = "LABEL=\"Dublin Core Metadata\"";
563 }
564 else {
565 print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
566 print STDERR "***** Check to see if <techMD> required\n";
567 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
568
569 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
570 }
571
572 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
573 $all_text .= " <mets:xmlData>\n";
574
575 if ($version eq "fedora") {
576 my $dc_namespace = "";
577 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
578 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
579
580 $all_text .= " <oai_dc:dc $dc_namespace>\n";
581
582 $all_text .= $self->buffer_dc_section($section,"oai_dc");
583 $all_text .= " </oai_dc:dc>\n";
584 }
585 else {
586 foreach my $data (@{$section_ptr->{'metadata'}}){
587 my $escaped_value = &_escape_text($data->[1]);
588 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
589 if ($data->[0] eq "dc.Title") {
590 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
591 }
592 }
593 }
594
595 $all_text .= " </mets:xmlData>\n";
596 $all_text .= " </mets:mdWrap>\n";
597
598 if ($version eq "fedora") {
599 $all_text .= " </mets:techMD>\n";
600 $all_text .= "</mets:amdSec>\n";
601 }
602 else {
603 $all_text .= "</mets:dmdSec>\n";
604 }
605
606
607 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
608 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
609 }
610
611 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
612
613 return $all_text;
614}
615
616sub output_section {
617 my $self = shift (@_);
618 my ($handle, $section) = @_;
619
620 print $handle $self->buffer_section_xml($section);
621}
622
623# print out DSpace dublin_core metadata section
624sub output_dspace_section {
625 my $self = shift (@_);
626 my ($handle, $section) = @_;
627
628 my $section_ptr = $self->_lookup_section ($section);
629 return "" unless defined $section_ptr;
630
631 my $all_text = "<Section>\n";
632 $all_text .= " <Description>\n";
633
634 # output metadata
635 foreach my $data (@{$section_ptr->{'metadata'}}) {
636 my $escaped_value = &_escape_text($data->[1]);
637 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
638 }
639
640 $all_text .= " </Description>\n";
641 $all_text .= "</Section>\n";
642
643 # make sure no nasty control characters have snuck through
644 # (XML::Parser will barf on anything it doesn't consider to be
645 # valid UTF-8 text, including things like \c@, \cC etc.)
646 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
647
648 return $all_text;
649}
650
651# print out doctxt.xml file
652sub output_txt_section {
653 my $self = shift (@_);
654 my ($handle, $section) = @_;
655
656 print $handle $self->buffer_txt_section_xml($section);
657}
658
659# print out docmets.xml file
660sub output_mets_section {
661 my $self = shift(@_);
662 my ($handle, $section, $version, $working_dir) = @_;
663
664 # print out the dmdSection
665 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
666
667 print $handle "<mets:fileSec>\n";
668 if ($version eq "fedora") {
669 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
670 }
671
672 # print out the fileSection by sections
673 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
674
675 # print out the whole fileSection
676 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
677
678 if ($version eq "fedora") {
679 print $handle " </mets:fileGrp>\n";
680 }
681 print $handle "</mets:fileSec>\n";
682
683 # print out the StructMapSection by sections
684
685 my $struct_type;
686 if ($version eq "fedora") {
687 $struct_type = "fedora:dsBindingMap";
688 }
689 else {
690 $struct_type = "Section";
691 }
692
693 if ($version ne "fedora") {
694 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
695 my $order_num=0;
696 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
697 print $handle "</mets:structMap>\n";
698
699 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
700 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
701 print $handle "</mets:structMap>\n";
702 }
703
704}
705
706my $dc_set = { Title => 1,
707 Creator => 1,
708 Subject => 1,
709 Description => 1,
710 Publisher => 1,
711 Contributors => 1,
712 Date => 1,
713 Type => 1,
714 Format => 1,
715 Identifier => 1,
716 Source => 1,
717 Language => 1,
718 Relation => 1,
719 Coverage => 1,
720 Rights => 1};
721
722
723
724
725# Build up dublin_core metadata. Priority given to dc.* over ex.*
726
727sub buffer_dc_section {
728 my $self = shift(@_);
729 my ($section, $version) = @_;
730
731 # build up string of dublin core metadata
732 $section="" unless defined $section;
733
734 my $section_ptr=$self->_lookup_section($section);
735 return "" unless defined $section_ptr;
736
737 my $explicit_dc = {};
738 my $explicit_ex = {};
739
740 my $all_text="";
741 foreach my $data (@{$section_ptr->{'metadata'}}){
742 my $escaped_value = &_escape_text($data->[1]);
743 if ($data->[0]=~ m/^dc\./) {
744 $data->[0] =~ tr/[A-Z]/[a-z]/;
745
746 $data->[0] =~ m/^dc\.(.*)/;
747 my $dc_element = $1;
748
749 if (!defined $explicit_dc->{$dc_element}) {
750 $explicit_dc->{$dc_element} = [];
751 }
752 push(@{$explicit_dc->{$dc_element}},$escaped_value);
753
754 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
755 if (defined $version && ($version eq "oai_dc")) {
756 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
757 }
758 else {
759 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
760 }
761
762 }
763 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
764 $data->[0] =~ m/^(ex\.)?(.*)/;
765 my $ex_element = $2;
766 my $lc_ex_element = lc($ex_element);
767
768 if (defined $dc_set->{$ex_element}) {
769 if (!defined $explicit_ex->{$lc_ex_element}) {
770 $explicit_ex->{$lc_ex_element} = [];
771 }
772 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
773 }
774 }
775 }
776
777 # go through dc_set and for any element *not* defined in explicit_dc
778 # that do exist in explicit_ex, add it in as metadata
779 foreach my $k ( keys %$dc_set ) {
780 my $lc_k = lc($k);
781
782 if (!defined $explicit_dc->{$lc_k}) {
783 if (defined $explicit_ex->{$lc_k}) {
784
785 foreach my $v (@{$explicit_ex->{$lc_k}}) {
786 my $dc_element = $lc_k;
787 my $escaped_value = $v;
788
789 if (defined $version && ($version eq "oai_dc")) {
790 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
791 }
792 else {
793 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
794 }
795
796 }
797 }
798 }
799 }
800
801 if ($all_text eq "") {
802 $all_text .= " There is no Dublin Core metatdata in this document\n";
803 }
804 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
805
806 return $all_text;
807}
808
809
810# Print out dublin_core metadata
811sub output_dc_section {
812 my $self = shift(@_);
813 my ($handle, $section, $version) = @_;
814
815 my $all_text = $self->buffer_dc_section($section,$version);
816
817 print $handle $all_text;
818}
819
820
821# look up the reference to the a particular section
822sub _lookup_section {
823 my $self = shift (@_);
824 my ($section) = @_;
825
826 my ($num);
827 my $sectionref = $self;
828
829 while (defined $section && $section ne "") {
830 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
831 $num =~ s/^0+(\d)/$1/; # remove leading 0s
832 $section = "" unless defined $section;
833
834 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
835 $sectionref = $sectionref->{'subsections'}->{$num};
836 } else {
837 return undef;
838 }
839 }
840
841 return $sectionref;
842}
843
844# calculate OID by hashing the contents of the document
845sub _calc_OID {
846 my $self = shift (@_);
847 my ($filename) = @_;
848
849 my $osexe = &util::get_os_exe();
850
851 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
852 $ENV{'GSDLOS'},"hashfile$osexe");
853
854 my $result = "NULL";
855
856 if (-e "$hashfile_exe") {
857# $result = `\"$hashfile_exe\" \"$filename\"`;
858 $result = `hashfile$osexe \"$filename\"`;
859 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
860
861 } else {
862 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
863 }
864 return "HASH$result";
865}
866
867# methods dealing with OID, not groups of them.
868
869# if $OID is not provided one is calculated
870sub set_OID {
871 my $self = shift (@_);
872 my ($OID) = @_;
873
874 my $use_hash_oid = 0;
875 # if an OID wasn't provided claculate one
876 if (!defined $OID) {
877 $OID = "NULL";
878 if ($self->{'OIDtype'} eq "hash") {
879 $use_hash_oid = 1;
880 } elsif ($self->{'OIDtype'} eq "incremental") {
881 $OID = "D" . $OIDcount;
882 $OIDcount ++;
883
884 } elsif ($self->{'OIDtype'} eq "dirname") {
885 $OID = 'J';
886 my $filename = $self->get_source_filename();
887 if (defined($filename)) { # && -e $filename) {
888 $OID = &File::Basename::dirname($filename);
889 if (defined $OID) {
890 $OID = 'J'.&File::Basename::basename($OID);
891 $OID =~ s/\.//; #remove any periods
892 } else {
893 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
894 $use_hash_oid = 1;
895 }
896 } else {
897 print STDERR "Failed to find filename, generating hash id\n";
898 $use_hash_oid = 1;
899 }
900
901 } elsif ($self->{'OIDtype'} eq "assigned") {
902 my $identifier = $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
903 if (defined $identifier && $identifier ne "") {
904 $OID = "D" . $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
905 $OID =~ s/\.//; #remove any periods
906 } else {
907 # need a hash id
908 print STDERR "no dc.Identifier found, generating hash id\n";
909 $use_hash_oid = 1;
910 }
911
912 } else {
913 $use_hash_oid = 1;
914 }
915
916 if ($use_hash_oid) {
917
918 # "hash" OID - feed file to hashfile.exe
919 #my $filename = $self->get_source_filename();
920 # we want to use the converted file for hashing if available
921 # cos its quicker
922 my $filename = $self->get_filename_for_hashing();
923 # -z: don't want to hash on the file if it is zero size
924 if (defined($filename) && -e $filename && !-z $filename) {
925 $OID = $self->_calc_OID ($filename);
926 } else {
927 $filename = &util::get_tmp_filename();
928 if (!open (OUTFILE, ">$filename")) {
929 print STDERR "doc::set_OID could not write to $filename\n";
930 } else {
931 $self->output_section('OUTFILE', $self->get_top_section(),
932 undef, 1);
933 close (OUTFILE);
934 }
935 $OID = $self->_calc_OID ($filename);
936 &util::rm ($filename);
937 }
938 }
939 }
940 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
941}
942
943# this uses hashdoc (embedded c thingy) which is faster but still
944# needs a little work to be suffiently stable
945sub ___set_OID {
946 my $self = shift (@_);
947 my ($OID) = @_;
948
949 # if an OID wasn't provided then calculate hash value based on document
950 if (!defined $OID)
951 {
952 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
953 undef, 1);
954 my $hash_len = length($hash_text);
955
956 $OID = &hashdoc::buffer($hash_text,$hash_len);
957 }
958
959 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
960}
961
962# returns the OID for this document
963sub get_OID {
964 my $self = shift (@_);
965 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
966 return $OID if (defined $OID);
967 return "NULL";
968}
969
970sub delete_OID {
971 my $self = shift (@_);
972
973 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
974}
975
976
977# methods for manipulating section names
978
979# returns the name of the top-most section (the top
980# level of the document
981sub get_top_section {
982 my $self = shift (@_);
983
984 return "";
985}
986
987# returns a section
988sub get_parent_section {
989 my $self = shift (@_);
990 my ($section) = @_;
991
992 $section =~ s/(^|\.)\d+$//;
993
994 return $section;
995}
996
997# returns the first child section (or the end child
998# if there isn't any)
999sub get_begin_child {
1000 my $self = shift (@_);
1001 my ($section) = @_;
1002
1003 my $section_ptr = $self->_lookup_section($section);
1004 return "" unless defined $section_ptr;
1005
1006 if (defined $section_ptr->{'subsection_order'}->[0]) {
1007 return "$section.$section_ptr->{'subsection_order'}->[0]";
1008 }
1009
1010 return $self->get_end_child ($section);
1011}
1012
1013# returns the next child of a parent section
1014sub get_next_child {
1015 my $self = shift (@_);
1016 my ($section) = @_;
1017
1018 my $parent_section = $self->get_parent_section($section);
1019 my $parent_section_ptr = $self->_lookup_section($parent_section);
1020 return undef unless defined $parent_section_ptr;
1021
1022 my ($section_num) = $section =~ /(\d+)$/;
1023 return undef unless defined $section_num;
1024
1025 my $i = 0;
1026 my $section_order = $parent_section_ptr->{'subsection_order'};
1027 while ($i < scalar(@$section_order)) {
1028 last if $section_order->[$i] eq $section_num;
1029 $i++;
1030 }
1031
1032 $i++; # the next child
1033 if ($i < scalar(@$section_order)) {
1034 return $section_order->[$i] if $parent_section eq "";
1035 return "$parent_section.$section_order->[$i]";
1036 }
1037
1038 # no more sections in this level
1039 return undef;
1040}
1041
1042# returns a reference to a list of children
1043sub get_children {
1044 my $self = shift (@_);
1045 my ($section) = @_;
1046
1047 my $section_ptr = $self->_lookup_section($section);
1048 return [] unless defined $section_ptr;
1049
1050 my @children = @{$section_ptr->{'subsection_order'}};
1051
1052 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1053 return \@children;
1054}
1055
1056# returns the child section one past the last one (which
1057# is coded as "0")
1058sub get_end_child {
1059 my $self = shift (@_);
1060 my ($section) = @_;
1061
1062 return $section . ".0" unless $section eq "";
1063 return "0";
1064}
1065
1066# returns the next section in book order
1067sub get_next_section {
1068 my $self = shift (@_);
1069 my ($section) = @_;
1070
1071 return undef unless defined $section;
1072
1073 my $section_ptr = $self->_lookup_section($section);
1074 return undef unless defined $section_ptr;
1075
1076 # first try to find first child
1077 if (defined $section_ptr->{'subsection_order'}->[0]) {
1078 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1079 return "$section.$section_ptr->{'subsection_order'}->[0]";
1080 }
1081
1082 do {
1083 # try to find sibling
1084 my $next_child = $self->get_next_child ($section);
1085 return $next_child if (defined $next_child);
1086
1087 # move up one level
1088 $section = $self->get_parent_section ($section);
1089 } while $section =~ /\d/;
1090
1091 return undef;
1092}
1093
1094sub is_leaf_section {
1095 my $self = shift (@_);
1096 my ($section) = @_;
1097
1098 my $section_ptr = $self->_lookup_section($section);
1099 return 1 unless defined $section_ptr;
1100
1101 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1102}
1103
1104# methods for dealing with sections
1105
1106# returns the name of the inserted section
1107sub insert_section {
1108 my $self = shift (@_);
1109 my ($before_section) = @_;
1110
1111 # get the child to insert before and its parent section
1112 my $parent_section = "";
1113 my $before_child = "0";
1114 my @before_section = split (/\./, $before_section);
1115 if (scalar(@before_section) > 0) {
1116 $before_child = pop (@before_section);
1117 $parent_section = join (".", @before_section);
1118 }
1119
1120 my $parent_section_ptr = $self->_lookup_section($parent_section);
1121 if (!defined $parent_section_ptr) {
1122 print STDERR "doc::insert_section couldn't find parent section " .
1123 "$parent_section\n";
1124 return;
1125 }
1126
1127 # get the next section number
1128 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1129
1130 my $i = 0;
1131 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1132 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1133 $i++;
1134 }
1135
1136 # insert the section number into the order list
1137 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1138
1139 # add this section to the parent section
1140 my $section_ptr = {'subsection_order'=>[],
1141 'next_subsection'=>1,
1142 'subsections'=>{},
1143 'metadata'=>[],
1144 'text'=>""};
1145 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1146
1147 # work out the full section number
1148 my $section = $parent_section;
1149 $section .= "." unless $section eq "";
1150 $section .= $section_num;
1151
1152 return $section;
1153}
1154
1155# creates a pre-named section
1156sub create_named_section {
1157 my $self = shift (@_);
1158 my ($mastersection) = @_;
1159
1160 my ($num);
1161 my $section = $mastersection;
1162 my $sectionref = $self;
1163
1164 while ($section ne "") {
1165 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1166 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1167 $section = "" unless defined $section;
1168
1169 if (defined $num) {
1170 if (!defined $sectionref->{'subsections'}->{$num}) {
1171 push (@{$sectionref->{'subsection_order'}}, $num);
1172 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1173 'next_subsection'=>1,
1174 'subsections'=>{},
1175 'metadata'=>[],
1176 'text'=>""};
1177 if ($num >= $sectionref->{'next_subsection'}) {
1178 $sectionref->{'next_subsection'} = $num + 1;
1179 }
1180 }
1181 $sectionref = $sectionref->{'subsections'}->{$num};
1182
1183 } else {
1184 print STDERR "doc::create_named_section couldn't create section ";
1185 print STDERR "$mastersection\n";
1186 last;
1187 }
1188 }
1189}
1190
1191# returns a reference to a list of subsections
1192sub list_subsections {
1193 my $self = shift (@_);
1194 my ($section) = @_;
1195
1196 my $section_ptr = $self->_lookup_section ($section);
1197 if (!defined $section_ptr) {
1198 print STDERR "doc::list_subsections couldn't find section $section\n";
1199 return [];
1200 }
1201
1202 return [@{$section_ptr->{'subsection_order'}}];
1203}
1204
1205sub delete_section {
1206 my $self = shift (@_);
1207 my ($section) = @_;
1208
1209# my $section_ptr = {'subsection_order'=>[],
1210# 'next_subsection'=>1,
1211# 'subsections'=>{},
1212# 'metadata'=>[],
1213# 'text'=>""};
1214
1215 # if this is the top section reset everything
1216 if ($section eq "") {
1217 $self->{'subsection_order'} = [];
1218 $self->{'subsections'} = {};
1219 $self->{'metadata'} = [];
1220 $self->{'text'} = "";
1221 return;
1222 }
1223
1224 # find the parent of the section to delete
1225 my $parent_section = "";
1226 my $child = "0";
1227 my @section = split (/\./, $section);
1228 if (scalar(@section) > 0) {
1229 $child = pop (@section);
1230 $parent_section = join (".", @section);
1231 }
1232
1233 my $parent_section_ptr = $self->_lookup_section($parent_section);
1234 if (!defined $parent_section_ptr) {
1235 print STDERR "doc::delete_section couldn't find parent section " .
1236 "$parent_section\n";
1237 return;
1238 }
1239
1240 # remove this section from the subsection_order list
1241 my $i = 0;
1242 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1243 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1244 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1245 last;
1246 }
1247 $i++;
1248 }
1249
1250 # remove this section from the subsection hash
1251 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1252 undef $parent_section_ptr->{'subsections'}->{$child};
1253 }
1254}
1255
1256#--
1257# methods for dealing with metadata
1258
1259# set_metadata_element and get_metadata_element are for metadata
1260# which should only have one value. add_meta_data and get_metadata
1261# are for metadata which can have more than one value.
1262
1263# returns the first metadata value which matches field
1264
1265# This version of get metadata element works much like the one above,
1266# except it allows for the namespace portion of a metadata element to
1267# be ignored, thus if you are searching for dc.Title, the first piece
1268# of matching metadata ending with the name Title (once any namespace
1269# is removed) would be returned.
1270# 28-11-2003 John Thompson
1271sub get_metadata_element {
1272 my $self = shift (@_);
1273 my ($section, $field, $ignore_namespace) = @_;
1274 my ($data);
1275
1276 $ignore_namespace = 0 unless defined $ignore_namespace;
1277
1278 my $section_ptr = $self->_lookup_section($section);
1279 if (!defined $section_ptr) {
1280 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1281 return;
1282 }
1283
1284 # Remove the any namespace if we are being told to ignore them
1285 if($ignore_namespace) {
1286 $field =~ s/^\w*\.//;
1287 }
1288
1289 foreach $data (@{$section_ptr->{'metadata'}}) {
1290
1291 my $data_name = $data->[0];
1292 # Remove the any namespace if we are being told to ignore them
1293 if($ignore_namespace) {
1294 $data_name =~ s/^\w*\.//;
1295 }
1296
1297 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1298 }
1299
1300 return undef; # was not found
1301}
1302
1303# returns a list of the form [value1, value2, ...]
1304sub get_metadata {
1305 my $self = shift (@_);
1306 my ($section, $field, $ignore_namespace) = @_;
1307 my ($data);
1308
1309 $ignore_namespace = 0 unless defined $ignore_namespace;
1310
1311 my $section_ptr = $self->_lookup_section($section);
1312 if (!defined $section_ptr) {
1313 print STDERR "doc::get_metadata couldn't find section ",
1314 $section, "\n";
1315 return;
1316 }
1317
1318 # Remove the any namespace if we are being told to ignore them
1319 if($ignore_namespace) {
1320 $field =~ s/^\w*\.//;
1321 }
1322
1323 my @metadata = ();
1324 foreach $data (@{$section_ptr->{'metadata'}}) {
1325
1326 my $data_name = $data->[0];
1327 # Remove the any namespace if we are being told to ignore them
1328 if($ignore_namespace) {
1329 $data_name =~ s/^\w*\.//;
1330 }
1331
1332 push (@metadata, $data->[1]) if ($data_name eq $field);
1333 }
1334
1335 return \@metadata;
1336}
1337
1338# returns a list of the form [[field,value],[field,value],...]
1339sub get_all_metadata {
1340 my $self = shift (@_);
1341 my ($section) = @_;
1342
1343 my $section_ptr = $self->_lookup_section($section);
1344 if (!defined $section_ptr) {
1345 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1346 return;
1347 }
1348
1349 return $section_ptr->{'metadata'};
1350}
1351
1352# $value is optional
1353sub delete_metadata {
1354 my $self = shift (@_);
1355 my ($section, $field, $value) = @_;
1356
1357 my $section_ptr = $self->_lookup_section($section);
1358 if (!defined $section_ptr) {
1359 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1360 return;
1361 }
1362
1363 my $i = 0;
1364 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1365 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1366 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1367 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1368 } else {
1369 $i++;
1370 }
1371 }
1372}
1373
1374sub delete_all_metadata {
1375 my $self = shift (@_);
1376 my ($section) = @_;
1377
1378 my $section_ptr = $self->_lookup_section($section);
1379 if (!defined $section_ptr) {
1380 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1381 return;
1382 }
1383
1384 $section_ptr->{'metadata'} = [];
1385}
1386
1387sub set_metadata_element {
1388 my $self = shift (@_);
1389 my ($section, $field, $value) = @_;
1390
1391 $self->set_utf8_metadata_element ($section, $field,
1392 &unicode::ascii2utf8(\$value));
1393}
1394
1395# set_utf8_metadata_element assumes the text has already been
1396# converted to the UTF-8 encoding.
1397sub set_utf8_metadata_element {
1398 my $self = shift (@_);
1399 my ($section, $field, $value) = @_;
1400
1401 $self->delete_metadata ($section, $field);
1402 $self->add_utf8_metadata ($section, $field, $value);
1403}
1404
1405
1406# add_metadata assumes the text is in (extended) ascii form. For
1407# text which has already been converted to the UTF-8 format use
1408# add_utf8_metadata.
1409sub add_metadata {
1410 my $self = shift (@_);
1411 my ($section, $field, $value) = @_;
1412
1413 $self->add_utf8_metadata ($section, $field,
1414 &unicode::ascii2utf8(\$value));
1415}
1416
1417sub add_utf8_metadata {
1418 my $self = shift (@_);
1419 my ($section, $field, $value) = @_;
1420
1421 my $section_ptr = $self->_lookup_section($section);
1422 if (!defined $section_ptr) {
1423 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1424 return;
1425 }
1426 if (!defined $value) {
1427 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1428 return;
1429 }
1430 if (!defined $field) {
1431 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1432 return;
1433 }
1434
1435 #print STDERR "###$field=$value\n";
1436 # double check that the value is utf-8
1437 if (unicode::ensure_utf8(\$value)) {
1438 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1439 }
1440
1441 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1442}
1443
1444
1445# methods for dealing with text
1446
1447# returns the text for a section
1448sub get_text {
1449 my $self = shift (@_);
1450 my ($section) = @_;
1451
1452 my $section_ptr = $self->_lookup_section($section);
1453 if (!defined $section_ptr) {
1454 print STDERR "doc::get_text couldn't find section " .
1455 "$section\n";
1456 return "";
1457 }
1458
1459 return $section_ptr->{'text'};
1460}
1461
1462# returns the (utf-8 encoded) length of the text for a section
1463sub get_text_length {
1464 my $self = shift (@_);
1465 my ($section) = @_;
1466
1467 my $section_ptr = $self->_lookup_section($section);
1468 if (!defined $section_ptr) {
1469 print STDERR "doc::get_text_length couldn't find section " .
1470 "$section\n";
1471 return 0;
1472 }
1473
1474 return length ($section_ptr->{'text'});
1475}
1476
1477sub delete_text {
1478 my $self = shift (@_);
1479 my ($section) = @_;
1480
1481 my $section_ptr = $self->_lookup_section($section);
1482 if (!defined $section_ptr) {
1483 print STDERR "doc::delete_text couldn't find section " .
1484 "$section\n";
1485 return;
1486 }
1487
1488 $section_ptr->{'text'} = "";
1489}
1490
1491# add_text assumes the text is in (extended) ascii form. For
1492# text which has been already converted to the UTF-8 format
1493# use add_utf8_text.
1494sub add_text {
1495 my $self = shift (@_);
1496 my ($section, $text) = @_;
1497
1498 # convert the text to UTF-8 encoded unicode characters
1499 # and add the text
1500 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1501}
1502
1503
1504# add_utf8_text assumes the text to be added has already
1505# been converted to the UTF-8 encoding. For ascii text use
1506# add_text
1507sub add_utf8_text {
1508 my $self = shift (@_);
1509 my ($section, $text) = @_;
1510
1511 my $section_ptr = $self->_lookup_section($section);
1512 if (!defined $section_ptr) {
1513 print STDERR "doc::add_utf8_text couldn't find section " .
1514 "$section\n";
1515 return;
1516 }
1517
1518 $section_ptr->{'text'} .= $text;
1519}
1520
1521
1522# methods for dealing with associated files
1523
1524# a file is associated with a document, NOT a section.
1525# if section is defined it is noted in the data structure
1526# only so that files associated from a particular section
1527# may be removed later (using delete_section_assoc_files)
1528sub associate_file {
1529 my $self = shift (@_);
1530 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1531 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1532
1533 # remove all associated files with the same name
1534 $self->delete_assoc_file ($assoc_filename);
1535
1536 push (@{$self->{'associated_files'}},
1537 [$real_filename, $assoc_filename, $mime_type, $section]);
1538}
1539
1540# returns a list of associated files in the form
1541# [[real_filename, assoc_filename, mimetype], ...]
1542sub get_assoc_files {
1543 my $self = shift (@_);
1544
1545 return $self->{'associated_files'};
1546}
1547
1548sub delete_section_assoc_files {
1549 my $self = shift (@_);
1550 my ($section) = @_;
1551
1552 my $i=0;
1553 while ($i < scalar (@{$self->{'associated_files'}})) {
1554 if (defined $self->{'associated_files'}->[$i]->[3] &&
1555 $self->{'associated_files'}->[$i]->[3] eq $section) {
1556 splice (@{$self->{'associated_files'}}, $i, 1);
1557 } else {
1558 $i++;
1559 }
1560 }
1561}
1562
1563sub delete_assoc_file {
1564 my $self = shift (@_);
1565 my ($assoc_filename) = @_;
1566
1567 my $i=0;
1568 while ($i < scalar (@{$self->{'associated_files'}})) {
1569 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1570 splice (@{$self->{'associated_files'}}, $i, 1);
1571 } else {
1572 $i++;
1573 }
1574 }
1575}
1576
1577sub reset_nextsection_ptr {
1578 my $self = shift (@_);
1579 my ($section) = @_;
1580
1581 my $section_ptr = $self->_lookup_section($section);
1582 $section_ptr->{'next_subsection'} = 1;
1583}
1584
15851;
Note: See TracBrowser for help on using the repository browser.