source: trunk/gsdl/perllib/doc.pm@ 9953

Last change on this file since 9953 was 9953, checked in by davidb, 19 years ago

Code for saving documents for import.pl and export.pl repeatative in places
and at time inconsistent. These changes bring the code more in to line.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 41.8 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 #GRB: Added code to set lastmodified for OAI purposes
60 if ((defined $doc_type) && (defined $source_filename) && (-e $source_filename)) {
61 my $file_stat = stat($source_filename);
62 my $mtime = $file_stat->mtime;
63 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
64 }
65 #GRB: end inserted code
66
67 if (defined $source_filename) {
68 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
69
70 if (defined $collect_dir) {
71 my $dirsep = &util::get_dirsep();
72 if ($collect_dir !~ m/$dirsep$/) {
73 $collect_dir .= $dirsep;
74 }
75
76 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
77
78 # if from within GSDLCOLLECTDIR, then remove directory prefix
79 # so source_filename is realative to it. This is done to aid
80 # portability, i.e. the collection can be moved to somewhere
81 # else on the file system and the archives directory will still
82 # work. This is needed, for example in the applet version of
83 # GLI where GSDLHOME/collect on the server will be different to
84 # the collect directory of the remove user. Of course,
85 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
86 # it back into a full pathname.
87
88 if ($source_filename =~ /^$collect_dir(.*)$/) {
89 $source_filename = $1;
90 }
91 }
92
93 $self->set_source_filename ($source_filename);
94 }
95
96 $self->set_doc_type ($doc_type) if defined $doc_type;
97
98 return $self;
99}
100
101# clone the $self object
102sub duplicate {
103 my $self = shift (@_);
104
105 my $newobj = {};
106
107 foreach my $k (keys %$self) {
108 $newobj->{$k} = &clone ($self->{$k});
109 }
110
111 bless $newobj, ref($self);
112 return $newobj;
113}
114
115sub clone {
116 my ($from) = @_;
117 my $type = ref ($from);
118
119 if ($type eq "HASH") {
120 my $to = {};
121 foreach my $key (keys %$from) {
122 $to->{$key} = &clone ($from->{$key});
123 }
124 return $to;
125 } elsif ($type eq "ARRAY") {
126 my $to = [];
127 foreach my $v (@$from) {
128 push (@$to, &clone ($v));
129 }
130 return $to;
131 } else {
132 return $from;
133 }
134}
135
136sub set_OIDtype {
137 my $self = shift (@_);
138 my ($type) = @_;
139
140 if ($type =~ /^(hash|incremental|dirname|assigned)$/) {
141 $self->{'OIDtype'} = $type;
142 } else {
143 $self->{'OIDtype'} = "hash";
144 }
145}
146
147sub set_source_filename {
148 my $self = shift (@_);
149 my ($source_filename) = @_;
150
151 $self->set_metadata_element ($self->get_top_section(),
152 "gsdlsourcefilename",
153 $source_filename);
154}
155
156sub set_converted_filename {
157 my $self = shift (@_);
158 my ($converted_filename) = @_;
159
160 $self->set_metadata_element ($self->get_top_section(),
161 "gsdlconvertedfilename",
162 $converted_filename);
163}
164
165
166# returns the source_filename as it was provided
167sub get_source_filename {
168 my $self = shift (@_);
169
170 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
171}
172
173# returns converted filename if available else returns source filename
174sub get_filename_for_hashing {
175 my $self = shift (@_);
176
177 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
178
179 if (!defined $filename) {
180 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
181 }
182 return $filename;
183}
184
185sub set_doc_type {
186 my $self = shift (@_);
187 my ($doc_type) = @_;
188
189 $self->set_metadata_element ($self->get_top_section(),
190 "gsdldoctype",
191 $doc_type);
192}
193
194# returns the source_filename as it was provided
195# the default of "indexed_doc" is used if no document
196# type was provided
197sub get_doc_type {
198 my $self = shift (@_);
199
200 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
201 return $doc_type if (defined $doc_type);
202 return "indexed_doc";
203}
204
205sub _escape_text {
206 my ($text) = @_;
207
208 # special characters in the gml encoding
209 $text =~ s/&/&/g; # this has to be first...
210 $text =~ s/</&lt;/g;
211 $text =~ s/>/&gt;/g;
212 $text =~ s/\"/&quot;/g;
213
214 return $text;
215}
216
217sub buffer_section_xml {
218 my $self = shift (@_);
219 my ($section) = @_;
220
221 my $section_ptr = $self->_lookup_section ($section);
222 return "" unless defined $section_ptr;
223
224 my $all_text = "<Section>\n";
225 $all_text .= " <Description>\n";
226
227 # output metadata
228 foreach my $data (@{$section_ptr->{'metadata'}}) {
229 my $escaped_value = &_escape_text($data->[1]);
230 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
231 }
232
233 $all_text .= " </Description>\n";
234
235 # output the text
236 $all_text .= " <Content>";
237 $all_text .= &_escape_text($section_ptr->{'text'});
238 $all_text .= "</Content>\n";
239
240 # output all the subsections
241 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
242 $all_text .= $self->buffer_section_xml("$section.$subsection");
243 }
244
245 $all_text .= "</Section>\n";
246
247 # make sure no nasty control characters have snuck through
248 # (XML::Parser will barf on anything it doesn't consider to be
249 # valid UTF-8 text, including things like \c@, \cC etc.)
250 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
251
252 return $all_text;
253}
254
255sub buffer_txt_section_xml {
256 my $self = shift(@_);
257 my ($section) = @_;
258
259 my $section_ptr = $self->_lookup_section ($section);
260
261 return "" unless defined $section_ptr;
262
263 my $all_text = "<Section>\n";
264
265 ##output the text
266 #$all_text .= " <Content>";
267 $all_text .= &_escape_text($section_ptr->{'text'});
268 #$all_text .= " </Content>\n";
269
270
271 #output all the subsections
272 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
273 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
274 }
275
276 $all_text .= "</Section>\n";
277
278
279 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
280 return $all_text;
281}
282
283sub buffer_mets_fileSection_section_xml() {
284 my $self = shift(@_);
285 my ($section,$version) = @_;
286
287 #$section="" unless defined $section;
288
289
290 my $section_ptr=$self->_lookup_section($section);
291 return "" unless defined $section_ptr;
292
293
294 # output fileSection by sections
295 my $section_num ="1". $section;
296
297 my $filePath = 'doctxt.xml';
298
299 my $opt_owner_id = "";
300 if ($version eq "fedora") {
301 $opt_owner_id = "OWNERID=\"M\"";
302 }
303
304 # output the fileSection details
305 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
306 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
307 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
308
309 my $xpath = "1".$section;
310 $xpath =~ s/\./]\/Section[/g;
311
312 $all_text .= $xpath;
313
314 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
315 $all_text .= " </mets:file>\n";
316 $all_text .= " </mets:fileGrp>\n";
317
318
319 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
320 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
321 }
322
323 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
324
325 return $all_text;
326}
327
328sub buffer_mets_fileWhole_section_xml(){
329 my $self = shift(@_);
330 my ($section,$version,$working_dir) = @_;
331
332 my $section_ptr = $self-> _lookup_section($section);
333 return "" unless defined $section_ptr;
334
335 my $all_text="" unless defined $all_txt;
336
337 my $fileID=0;
338
339 # Output the fileSection for the whole section
340 # => get the sourcefile and associative file
341
342 my $id_root = "";
343 my $opt_owner_id = "";
344
345 if ($version eq "fedora") {
346 $opt_owner_id = "OWNERID=\"M\"";
347 }
348 else {
349 $id_root = "default";
350 }
351
352 if ($version ne "fedora") {
353 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
354 }
355
356 foreach my $data (@{$section_ptr->{'metadata'}}){
357 my $escaped_value = &_escape_text($data->[1]);
358
359 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
360 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
361
362 ++$fileID;
363 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
364
365 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
366
367 $all_text .= " </mets:file>\n";
368 }
369
370 if ($data->[0] eq "gsdlassocfile"){
371
372 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
373 my $assoc_file = $1;
374 my $mime_type = $2;
375 my $assoc_dir = $3;
376
377 if ($version eq "fedora") {
378 $id_root = $assoc_file;
379 $id_root =~ s/\//_/g;
380 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
381 }
382
383 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
384 ++$fileID;
385
386 my $mime_attr = "MIMETYPE=\"$mime_type\"";
387 my $xlink_title = "xlink:title=\"$assoc_file\"";
388
389 my $id_attr;
390 my $xlink_href;
391
392 if ($version eq "fedora") {
393 $id_attr = "ID=\"$id_root.0\"";
394
395 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
396 if (!defined $fedora_prefix) {
397 $xlink_href = "xlink:href=\"$assfilePath\"";
398 }
399 else
400 {
401 my $gsdlhome = $ENV{'GSDLHOME'};
402 my $gsdl_href = "$working_dir/$assfilePath";
403
404 $gsdl_href =~ s/^$gsdlhome(\/)?//;
405 $gsdl_href = "/gsdl/$gsdl_href";
406
407 my $fserver = $ENV{'FEDORA_HOSTNAME'};
408 my $fport = $ENV{'FEDORA_SERVER_PORT'};
409
410 my $fdomain = "http://$fserver:$fport";
411 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
412 }
413
414 my $top_section = $self->get_top_section();
415 my $id = $self->get_metadata_element($top_section,"Identifier");
416 }
417 else {
418 $id_attr = "ID=\"$id_root.$fileID\"";
419 $xlink_href = "xlink:href=\"$assfilePath\"";
420 }
421
422 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
423 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
424
425 $all_text .= " </mets:file>\n";
426
427 if ($version eq "fedora") {
428 $all_text .= " </mets:fileGrp>\n";
429 }
430
431 }
432 }
433
434 if ($version ne "fedora") {
435 $all_text .= " </mets:fileGrp>\n";
436 }
437
438 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
439
440 return $all_text;
441}
442
443sub buffer_mets_StructMapSection_section_xml(){
444 my $self = shift(@_);
445 my ($section, $order_numref) = @_;
446
447 $section="" unless defined $section;
448
449
450 my $section_ptr=$self->_lookup_section($section);
451 return "" unless defined $section_ptr;
452
453
454 # output fileSection by Sections
455 my $section_num ="1". $section;
456 my $dmd_num = $section_num;
457
458 ##**output the dmdSection details
459 #if ($section_num eq "1") {
460 # $dmd_num = "0";
461 #}
462
463 #**output the StructMap details
464
465 my $dmdid_attr = "DM$dmd_num";
466
467 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
468 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
469 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
470
471 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
472
473
474 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
475 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
476 }
477
478 $all_text .= " </mets:div>\n";
479
480 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
481
482 return $all_text;
483}
484
485
486sub buffer_mets_StructMapWhole_section_xml(){
487 my $self = shift(@_);
488 my ($section) = @_;
489
490 my $section_ptr = $self-> _lookup_section($section);
491 return "" unless defined $section_ptr;
492
493 my $all_text="" unless defined $all_txt;
494 my $fileID=0;
495 my $order_num = 0;
496
497 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
498
499 #** output the StructMapSection for the whole section
500 # get the sourcefile and associative file
501
502 foreach my $data (@{$section_ptr->{'metadata'}}){
503 my $escaped_value = &_escape_text($data->[1]);
504
505 if ($data->[0] eq "gsdlsourcefilename") {
506 ++$fileID;
507 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
508 }
509
510 if ($data->[0] eq "gsdlassocfile"){
511 ++$fileID;
512 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
513 }
514 }
515 $all_text .= " </mets:div>\n";
516
517 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
518
519 return $all_text;
520}
521
522
523sub buffer_mets_dmdSection_section_xml(){
524 my $self = shift(@_);
525 my ($section,$version) = @_;
526
527 $section="" unless defined $section;
528
529 my $section_ptr=$self->_lookup_section($section);
530 return "" unless defined $section_ptr;
531
532 # convert section number
533 my $section_num ="1". $section;
534 my $dmd_num = $section_num;
535
536 # #**output the dmdSection details
537 # if ($section_num eq "1") {
538 # $dmd_num = "0";
539 # }
540
541
542 my $all_text = "";
543
544 my $label_attr = "";
545 if ($version eq "fedora") {
546 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
547 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
548
549 $label_attr = "LABEL=\"Dublin Core Metadata\"";
550 }
551 else {
552 print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
553 print STDERR "***** Check to see if <techMD> required\n";
554 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
555
556 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
557 }
558
559 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
560 $all_text .= " <mets:xmlData>\n";
561
562 if ($version eq "fedora") {
563 my $dc_namespace = "";
564 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
565 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
566
567 $all_text .= " <oai_dc:dc $dc_namespace>\n";
568
569 $all_text .= $self->buffer_dc_section($section,"oai_dc");
570 $all_text .= " </oai_dc:dc>\n";
571 }
572 else {
573 foreach my $data (@{$section_ptr->{'metadata'}}){
574 my $escaped_value = &_escape_text($data->[1]);
575 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
576 if ($data->[0] eq "dc.Title") {
577 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
578 }
579 }
580 }
581
582 $all_text .= " </mets:xmlData>\n";
583 $all_text .= " </mets:mdWrap>\n";
584
585 if ($version eq "fedora") {
586 $all_text .= " </mets:techMD>\n";
587 $all_text .= "</mets:amdSec>\n";
588 }
589 else {
590 $all_text .= "</mets:dmdSec>\n";
591 }
592
593
594 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
595 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
596 }
597
598 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
599
600 return $all_text;
601}
602
603sub output_section {
604 my $self = shift (@_);
605 my ($handle, $section) = @_;
606
607 print $handle $self->buffer_section_xml($section);
608}
609
610# print out DSpace dublin_core metadata section
611sub output_dspace_section {
612 my $self = shift (@_);
613 my ($handle, $section) = @_;
614
615 my $section_ptr = $self->_lookup_section ($section);
616 return "" unless defined $section_ptr;
617
618 my $all_text = "<Section>\n";
619 $all_text .= " <Description>\n";
620
621 # output metadata
622 foreach my $data (@{$section_ptr->{'metadata'}}) {
623 my $escaped_value = &_escape_text($data->[1]);
624 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
625 }
626
627 $all_text .= " </Description>\n";
628 $all_text .= "</Section>\n";
629
630 # make sure no nasty control characters have snuck through
631 # (XML::Parser will barf on anything it doesn't consider to be
632 # valid UTF-8 text, including things like \c@, \cC etc.)
633 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
634
635 return $all_text;
636}
637
638# print out doctxt.xml file
639sub output_txt_section {
640 my $self = shift (@_);
641 my ($handle, $section) = @_;
642
643 print $handle $self->buffer_txt_section_xml($section);
644}
645
646# print out docmets.xml file
647sub output_mets_section {
648 my $self = shift(@_);
649 my ($handle, $section, $version, $working_dir) = @_;
650
651 # print out the dmdSection
652 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
653
654 print $handle "<mets:fileSec>\n";
655 if ($version eq "fedora") {
656 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
657 }
658
659 # print out the fileSection by sections
660 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
661
662 # print out the whole fileSection
663 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
664
665 if ($version eq "fedora") {
666 print $handle " </mets:fileGrp>\n";
667 }
668 print $handle "</mets:fileSec>\n";
669
670 # print out the StructMapSection by sections
671
672 my $struct_type;
673 if ($version eq "fedora") {
674 $struct_type = "fedora:dsBindingMap";
675 }
676 else {
677 $struct_type = "Section";
678 }
679
680 if ($version ne "fedora") {
681 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
682 my $order_num=0;
683 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
684 print $handle "</mets:structMap>\n";
685
686 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
687 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
688 print $handle "</mets:structMap>\n";
689 }
690
691}
692
693my $dc_set = { Title => 1,
694 Creator => 1,
695 Subject => 1,
696 Description => 1,
697 Publisher => 1,
698 Contributors => 1,
699 Date => 1,
700 Type => 1,
701 Format => 1,
702 Identifier => 1,
703 Source => 1,
704 Language => 1,
705 Relation => 1,
706 Coverage => 1,
707 Rights => 1};
708
709
710
711
712# Build up dublin_core metadata. Priority given to dc.* over ex.*
713
714sub buffer_dc_section {
715 my $self = shift(@_);
716 my ($section, $version) = @_;
717
718 # build up string of dublin core metadata
719 $section="" unless defined $section;
720
721 my $section_ptr=$self->_lookup_section($section);
722 return "" unless defined $section_ptr;
723
724 my $explicit_dc = {};
725 my $explicit_ex = {};
726
727 my $all_text="";
728 foreach my $data (@{$section_ptr->{'metadata'}}){
729 my $escaped_value = &_escape_text($data->[1]);
730 if ($data->[0]=~ m/^dc\./) {
731 $data->[0] =~ tr/[A-Z]/[a-z]/;
732
733 $data->[0] =~ m/^dc\.(.*)/;
734 my $dc_element = $1;
735
736 if (!defined $explicit_dc->{$dc_element}) {
737 $explicit_dc->{$dc_element} = [];
738 }
739 push(@{$explicit_dc->{$dc_element}},$escaped_value);
740
741 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
742 if (defined $version && ($version eq "oai_dc")) {
743 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
744 }
745 else {
746 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
747 }
748
749 }
750 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
751 $data->[0] =~ m/^(ex\.)?(.*)/;
752 my $ex_element = $2;
753 my $lc_ex_element = lc($ex_element);
754
755 if (defined $dc_set->{$ex_element}) {
756 if (!defined $explicit_ex->{$lc_ex_element}) {
757 $explicit_ex->{$lc_ex_element} = [];
758 }
759 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
760 }
761 }
762 }
763
764 # go through dc_set and for any element *not* defined in explicit_dc
765 # that do exist in explicit_ex, add it in as metadata
766 foreach my $k ( keys %$dc_set ) {
767 my $lc_k = lc($k);
768
769 if (!defined $explicit_dc->{$lc_k}) {
770 if (defined $explicit_ex->{$lc_k}) {
771
772 foreach my $v (@{$explicit_ex->{$lc_k}}) {
773 my $dc_element = $lc_k;
774 my $escaped_value = $v;
775
776 if (defined $version && ($version eq "oai_dc")) {
777 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
778 }
779 else {
780 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
781 }
782
783 }
784 }
785 }
786 }
787
788 if ($all_text eq "") {
789 $all_text .= " There is no Dublin Core metatdata in this document\n";
790 }
791 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
792
793 return $all_text;
794}
795
796
797# Print out dublin_core metadata
798sub output_dc_section {
799 my $self = shift(@_);
800 my ($handle, $section, $version) = @_;
801
802 my $all_text = $self->buffer_dc_section($section,$version);
803
804 print $handle $all_text;
805}
806
807
808# look up the reference to the a particular section
809sub _lookup_section {
810 my $self = shift (@_);
811 my ($section) = @_;
812
813 my ($num);
814 my $sectionref = $self;
815
816 while (defined $section && $section ne "") {
817 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
818 $num =~ s/^0+(\d)/$1/; # remove leading 0s
819 $section = "" unless defined $section;
820
821 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
822 $sectionref = $sectionref->{'subsections'}->{$num};
823 } else {
824 return undef;
825 }
826 }
827
828 return $sectionref;
829}
830
831# calculate OID by hashing the contents of the document
832sub _calc_OID {
833 my $self = shift (@_);
834 my ($filename) = @_;
835
836 my $osexe = &util::get_os_exe();
837
838 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
839 $ENV{'GSDLOS'},"hashfile$osexe");
840
841 my $result = "NULL";
842
843 if (-e "$hashfile_exe") {
844# $result = `\"$hashfile_exe\" \"$filename\"`;
845 $result = `hashfile$osexe \"$filename\"`;
846 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
847
848 } else {
849 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
850 }
851 return "HASH$result";
852}
853
854# methods dealing with OID, not groups of them.
855
856# if $OID is not provided one is calculated
857sub set_OID {
858 my $self = shift (@_);
859 my ($OID) = @_;
860
861 my $use_hash_oid = 0;
862 # if an OID wasn't provided claculate one
863 if (!defined $OID) {
864 $OID = "NULL";
865 if ($self->{'OIDtype'} eq "hash") {
866 $use_hash_oid = 1;
867 } elsif ($self->{'OIDtype'} eq "incremental") {
868 $OID = "D" . $OIDcount;
869 $OIDcount ++;
870
871 } elsif ($self->{'OIDtype'} eq "dirname") {
872 $OID = 'J';
873 my $filename = $self->get_source_filename();
874 if (defined($filename)) { # && -e $filename) {
875 $OID = &File::Basename::dirname($filename);
876 if (defined $OID) {
877 $OID = 'J'.&File::Basename::basename($OID);
878 $OID =~ s/\.//; #remove any periods
879 } else {
880 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
881 $use_hash_oid = 1;
882 }
883 } else {
884 print STDERR "Failed to find filename, generating hash id\n";
885 $use_hash_oid = 1;
886 }
887
888 } elsif ($self->{'OIDtype'} eq "assigned") {
889 my $identifier = $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
890 if (defined $identifier && $identifier ne "") {
891 $OID = "D" . $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
892 $OID =~ s/\.//; #remove any periods
893 } else {
894 # need a hash id
895 print STDERR "no dc.Identifier found, generating hash id\n";
896 $use_hash_oid = 1;
897 }
898
899 } else {
900 $use_hash_oid = 1;
901 }
902
903 if ($use_hash_oid) {
904
905 # "hash" OID - feed file to hashfile.exe
906 #my $filename = $self->get_source_filename();
907 # we want to use the converted file for hashing if available
908 # cos its quicker
909 my $filename = $self->get_filename_for_hashing();
910
911 # -z: don't want to hash on the file if it is zero size
912 if (defined($filename) && -e $filename && !-z $filename) {
913 $OID = $self->_calc_OID ($filename);
914 } else {
915 $filename = &util::get_tmp_filename();
916 if (!open (OUTFILE, ">$filename")) {
917 print STDERR "doc::set_OID could not write to $filename\n";
918 } else {
919 $self->output_section('OUTFILE', $self->get_top_section(),
920 undef, 1);
921 close (OUTFILE);
922 }
923 $OID = $self->_calc_OID ($filename);
924 &util::rm ($filename);
925 }
926 }
927 }
928 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
929}
930
931# this uses hashdoc (embedded c thingy) which is faster but still
932# needs a little work to be suffiently stable
933sub ___set_OID {
934 my $self = shift (@_);
935 my ($OID) = @_;
936
937 # if an OID wasn't provided then calculate hash value based on document
938 if (!defined $OID)
939 {
940 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
941 undef, 1);
942 my $hash_len = length($hash_text);
943
944 $OID = &hashdoc::buffer($hash_text,$hash_len);
945 }
946
947 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
948}
949
950# returns the OID for this document
951sub get_OID {
952 my $self = shift (@_);
953 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
954 return $OID if (defined $OID);
955 return "NULL";
956}
957
958sub delete_OID {
959 my $self = shift (@_);
960
961 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
962}
963
964
965# methods for manipulating section names
966
967# returns the name of the top-most section (the top
968# level of the document
969sub get_top_section {
970 my $self = shift (@_);
971
972 return "";
973}
974
975# returns a section
976sub get_parent_section {
977 my $self = shift (@_);
978 my ($section) = @_;
979
980 $section =~ s/(^|\.)\d+$//;
981
982 return $section;
983}
984
985# returns the first child section (or the end child
986# if there isn't any)
987sub get_begin_child {
988 my $self = shift (@_);
989 my ($section) = @_;
990
991 my $section_ptr = $self->_lookup_section($section);
992 return "" unless defined $section_ptr;
993
994 if (defined $section_ptr->{'subsection_order'}->[0]) {
995 return "$section.$section_ptr->{'subsection_order'}->[0]";
996 }
997
998 return $self->get_end_child ($section);
999}
1000
1001# returns the next child of a parent section
1002sub get_next_child {
1003 my $self = shift (@_);
1004 my ($section) = @_;
1005
1006 my $parent_section = $self->get_parent_section($section);
1007 my $parent_section_ptr = $self->_lookup_section($parent_section);
1008 return undef unless defined $parent_section_ptr;
1009
1010 my ($section_num) = $section =~ /(\d+)$/;
1011 return undef unless defined $section_num;
1012
1013 my $i = 0;
1014 my $section_order = $parent_section_ptr->{'subsection_order'};
1015 while ($i < scalar(@$section_order)) {
1016 last if $section_order->[$i] eq $section_num;
1017 $i++;
1018 }
1019
1020 $i++; # the next child
1021 if ($i < scalar(@$section_order)) {
1022 return $section_order->[$i] if $parent_section eq "";
1023 return "$parent_section.$section_order->[$i]";
1024 }
1025
1026 # no more sections in this level
1027 return undef;
1028}
1029
1030# returns a reference to a list of children
1031sub get_children {
1032 my $self = shift (@_);
1033 my ($section) = @_;
1034
1035 my $section_ptr = $self->_lookup_section($section);
1036 return [] unless defined $section_ptr;
1037
1038 my @children = @{$section_ptr->{'subsection_order'}};
1039
1040 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1041 return \@children;
1042}
1043
1044# returns the child section one past the last one (which
1045# is coded as "0")
1046sub get_end_child {
1047 my $self = shift (@_);
1048 my ($section) = @_;
1049
1050 return $section . ".0" unless $section eq "";
1051 return "0";
1052}
1053
1054# returns the next section in book order
1055sub get_next_section {
1056 my $self = shift (@_);
1057 my ($section) = @_;
1058
1059 return undef unless defined $section;
1060
1061 my $section_ptr = $self->_lookup_section($section);
1062 return undef unless defined $section_ptr;
1063
1064 # first try to find first child
1065 if (defined $section_ptr->{'subsection_order'}->[0]) {
1066 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1067 return "$section.$section_ptr->{'subsection_order'}->[0]";
1068 }
1069
1070 do {
1071 # try to find sibling
1072 my $next_child = $self->get_next_child ($section);
1073 return $next_child if (defined $next_child);
1074
1075 # move up one level
1076 $section = $self->get_parent_section ($section);
1077 } while $section =~ /\d/;
1078
1079 return undef;
1080}
1081
1082sub is_leaf_section {
1083 my $self = shift (@_);
1084 my ($section) = @_;
1085
1086 my $section_ptr = $self->_lookup_section($section);
1087 return 1 unless defined $section_ptr;
1088
1089 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1090}
1091
1092# methods for dealing with sections
1093
1094# returns the name of the inserted section
1095sub insert_section {
1096 my $self = shift (@_);
1097 my ($before_section) = @_;
1098
1099 # get the child to insert before and its parent section
1100 my $parent_section = "";
1101 my $before_child = "0";
1102 my @before_section = split (/\./, $before_section);
1103 if (scalar(@before_section) > 0) {
1104 $before_child = pop (@before_section);
1105 $parent_section = join (".", @before_section);
1106 }
1107
1108 my $parent_section_ptr = $self->_lookup_section($parent_section);
1109 if (!defined $parent_section_ptr) {
1110 print STDERR "doc::insert_section couldn't find parent section " .
1111 "$parent_section\n";
1112 return;
1113 }
1114
1115 # get the next section number
1116 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1117
1118 my $i = 0;
1119 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1120 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1121 $i++;
1122 }
1123
1124 # insert the section number into the order list
1125 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1126
1127 # add this section to the parent section
1128 my $section_ptr = {'subsection_order'=>[],
1129 'next_subsection'=>1,
1130 'subsections'=>{},
1131 'metadata'=>[],
1132 'text'=>""};
1133 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1134
1135 # work out the full section number
1136 my $section = $parent_section;
1137 $section .= "." unless $section eq "";
1138 $section .= $section_num;
1139
1140 return $section;
1141}
1142
1143# creates a pre-named section
1144sub create_named_section {
1145 my $self = shift (@_);
1146 my ($mastersection) = @_;
1147
1148 my ($num);
1149 my $section = $mastersection;
1150 my $sectionref = $self;
1151
1152 while ($section ne "") {
1153 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1154 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1155 $section = "" unless defined $section;
1156
1157 if (defined $num) {
1158 if (!defined $sectionref->{'subsections'}->{$num}) {
1159 push (@{$sectionref->{'subsection_order'}}, $num);
1160 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1161 'next_subsection'=>1,
1162 'subsections'=>{},
1163 'metadata'=>[],
1164 'text'=>""};
1165 if ($num >= $sectionref->{'next_subsection'}) {
1166 $sectionref->{'next_subsection'} = $num + 1;
1167 }
1168 }
1169 $sectionref = $sectionref->{'subsections'}->{$num};
1170
1171 } else {
1172 print STDERR "doc::create_named_section couldn't create section ";
1173 print STDERR "$mastersection\n";
1174 last;
1175 }
1176 }
1177}
1178
1179# returns a reference to a list of subsections
1180sub list_subsections {
1181 my $self = shift (@_);
1182 my ($section) = @_;
1183
1184 my $section_ptr = $self->_lookup_section ($section);
1185 if (!defined $section_ptr) {
1186 print STDERR "doc::list_subsections couldn't find section $section\n";
1187 return [];
1188 }
1189
1190 return [@{$section_ptr->{'subsection_order'}}];
1191}
1192
1193sub delete_section {
1194 my $self = shift (@_);
1195 my ($section) = @_;
1196
1197# my $section_ptr = {'subsection_order'=>[],
1198# 'next_subsection'=>1,
1199# 'subsections'=>{},
1200# 'metadata'=>[],
1201# 'text'=>""};
1202
1203 # if this is the top section reset everything
1204 if ($section eq "") {
1205 $self->{'subsection_order'} = [];
1206 $self->{'subsections'} = {};
1207 $self->{'metadata'} = [];
1208 $self->{'text'} = "";
1209 return;
1210 }
1211
1212 # find the parent of the section to delete
1213 my $parent_section = "";
1214 my $child = "0";
1215 my @section = split (/\./, $section);
1216 if (scalar(@section) > 0) {
1217 $child = pop (@section);
1218 $parent_section = join (".", @section);
1219 }
1220
1221 my $parent_section_ptr = $self->_lookup_section($parent_section);
1222 if (!defined $parent_section_ptr) {
1223 print STDERR "doc::delete_section couldn't find parent section " .
1224 "$parent_section\n";
1225 return;
1226 }
1227
1228 # remove this section from the subsection_order list
1229 my $i = 0;
1230 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1231 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1232 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1233 last;
1234 }
1235 $i++;
1236 }
1237
1238 # remove this section from the subsection hash
1239 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1240 undef $parent_section_ptr->{'subsections'}->{$child};
1241 }
1242}
1243
1244#--
1245# methods for dealing with metadata
1246
1247# set_metadata_element and get_metadata_element are for metadata
1248# which should only have one value. add_meta_data and get_metadata
1249# are for metadata which can have more than one value.
1250
1251# returns the first metadata value which matches field
1252
1253# This version of get metadata element works much like the one above,
1254# except it allows for the namespace portion of a metadata element to
1255# be ignored, thus if you are searching for dc.Title, the first piece
1256# of matching metadata ending with the name Title (once any namespace
1257# is removed) would be returned.
1258# 28-11-2003 John Thompson
1259sub get_metadata_element {
1260 my $self = shift (@_);
1261 my ($section, $field, $ignore_namespace) = @_;
1262 my ($data);
1263
1264 $ignore_namespace = 0 unless defined $ignore_namespace;
1265
1266 my $section_ptr = $self->_lookup_section($section);
1267 if (!defined $section_ptr) {
1268 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1269 return;
1270 }
1271
1272 # Remove the any namespace if we are being told to ignore them
1273 if($ignore_namespace) {
1274 $field =~ s/^\w*\.//;
1275 }
1276
1277 foreach $data (@{$section_ptr->{'metadata'}}) {
1278
1279 my $data_name = $data->[0];
1280 # Remove the any namespace if we are being told to ignore them
1281 if($ignore_namespace) {
1282 $data_name =~ s/^\w*\.//;
1283 }
1284
1285 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1286 }
1287
1288 return undef; # was not found
1289}
1290
1291# returns a list of the form [value1, value2, ...]
1292sub get_metadata {
1293 my $self = shift (@_);
1294 my ($section, $field, $ignore_namespace) = @_;
1295 my ($data);
1296
1297 $ignore_namespace = 0 unless defined $ignore_namespace;
1298
1299 my $section_ptr = $self->_lookup_section($section);
1300 if (!defined $section_ptr) {
1301 print STDERR "doc::get_metadata couldn't find section ",
1302 $section, "\n";
1303 return;
1304 }
1305
1306 # Remove the any namespace if we are being told to ignore them
1307 if($ignore_namespace) {
1308 $field =~ s/^\w*\.//;
1309 }
1310
1311 my @metadata = ();
1312 foreach $data (@{$section_ptr->{'metadata'}}) {
1313
1314 my $data_name = $data->[0];
1315 # Remove the any namespace if we are being told to ignore them
1316 if($ignore_namespace) {
1317 $data_name =~ s/^\w*\.//;
1318 }
1319
1320 push (@metadata, $data->[1]) if ($data_name eq $field);
1321 }
1322
1323 return \@metadata;
1324}
1325
1326# returns a list of the form [[field,value],[field,value],...]
1327sub get_all_metadata {
1328 my $self = shift (@_);
1329 my ($section) = @_;
1330
1331 my $section_ptr = $self->_lookup_section($section);
1332 if (!defined $section_ptr) {
1333 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1334 return;
1335 }
1336
1337 return $section_ptr->{'metadata'};
1338}
1339
1340# $value is optional
1341sub delete_metadata {
1342 my $self = shift (@_);
1343 my ($section, $field, $value) = @_;
1344
1345 my $section_ptr = $self->_lookup_section($section);
1346 if (!defined $section_ptr) {
1347 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1348 return;
1349 }
1350
1351 my $i = 0;
1352 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1353 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1354 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1355 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1356 } else {
1357 $i++;
1358 }
1359 }
1360}
1361
1362sub delete_all_metadata {
1363 my $self = shift (@_);
1364 my ($section) = @_;
1365
1366 my $section_ptr = $self->_lookup_section($section);
1367 if (!defined $section_ptr) {
1368 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1369 return;
1370 }
1371
1372 $section_ptr->{'metadata'} = [];
1373}
1374
1375sub set_metadata_element {
1376 my $self = shift (@_);
1377 my ($section, $field, $value) = @_;
1378
1379 $self->set_utf8_metadata_element ($section, $field,
1380 &unicode::ascii2utf8(\$value));
1381}
1382
1383# set_utf8_metadata_element assumes the text has already been
1384# converted to the UTF-8 encoding.
1385sub set_utf8_metadata_element {
1386 my $self = shift (@_);
1387 my ($section, $field, $value) = @_;
1388
1389 $self->delete_metadata ($section, $field);
1390 $self->add_utf8_metadata ($section, $field, $value);
1391}
1392
1393
1394# add_metadata assumes the text is in (extended) ascii form. For
1395# text which has already been converted to the UTF-8 format use
1396# add_utf8_metadata.
1397sub add_metadata {
1398 my $self = shift (@_);
1399 my ($section, $field, $value) = @_;
1400
1401 $self->add_utf8_metadata ($section, $field,
1402 &unicode::ascii2utf8(\$value));
1403}
1404
1405sub add_utf8_metadata {
1406 my $self = shift (@_);
1407 my ($section, $field, $value) = @_;
1408
1409 my $section_ptr = $self->_lookup_section($section);
1410 if (!defined $section_ptr) {
1411 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1412 return;
1413 }
1414 if (!defined $value) {
1415 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1416 return;
1417 }
1418 if (!defined $field) {
1419 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1420 return;
1421 }
1422
1423 #print STDERR "###$field=$value\n";
1424 # double check that the value is utf-8
1425 if (unicode::ensure_utf8(\$value)) {
1426 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1427 }
1428
1429 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1430}
1431
1432
1433# methods for dealing with text
1434
1435# returns the text for a section
1436sub get_text {
1437 my $self = shift (@_);
1438 my ($section) = @_;
1439
1440 my $section_ptr = $self->_lookup_section($section);
1441 if (!defined $section_ptr) {
1442 print STDERR "doc::get_text couldn't find section " .
1443 "$section\n";
1444 return "";
1445 }
1446
1447 return $section_ptr->{'text'};
1448}
1449
1450# returns the (utf-8 encoded) length of the text for a section
1451sub get_text_length {
1452 my $self = shift (@_);
1453 my ($section) = @_;
1454
1455 my $section_ptr = $self->_lookup_section($section);
1456 if (!defined $section_ptr) {
1457 print STDERR "doc::get_text_length couldn't find section " .
1458 "$section\n";
1459 return 0;
1460 }
1461
1462 return length ($section_ptr->{'text'});
1463}
1464
1465sub delete_text {
1466 my $self = shift (@_);
1467 my ($section) = @_;
1468
1469 my $section_ptr = $self->_lookup_section($section);
1470 if (!defined $section_ptr) {
1471 print STDERR "doc::delete_text couldn't find section " .
1472 "$section\n";
1473 return;
1474 }
1475
1476 $section_ptr->{'text'} = "";
1477}
1478
1479# add_text assumes the text is in (extended) ascii form. For
1480# text which has been already converted to the UTF-8 format
1481# use add_utf8_text.
1482sub add_text {
1483 my $self = shift (@_);
1484 my ($section, $text) = @_;
1485
1486 # convert the text to UTF-8 encoded unicode characters
1487 # and add the text
1488 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1489}
1490
1491
1492# add_utf8_text assumes the text to be added has already
1493# been converted to the UTF-8 encoding. For ascii text use
1494# add_text
1495sub add_utf8_text {
1496 my $self = shift (@_);
1497 my ($section, $text) = @_;
1498
1499 my $section_ptr = $self->_lookup_section($section);
1500 if (!defined $section_ptr) {
1501 print STDERR "doc::add_utf8_text couldn't find section " .
1502 "$section\n";
1503 return;
1504 }
1505
1506 $section_ptr->{'text'} .= $text;
1507}
1508
1509
1510# methods for dealing with associated files
1511
1512# a file is associated with a document, NOT a section.
1513# if section is defined it is noted in the data structure
1514# only so that files associated from a particular section
1515# may be removed later (using delete_section_assoc_files)
1516sub associate_file {
1517 my $self = shift (@_);
1518 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1519 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1520
1521 # remove all associated files with the same name
1522 $self->delete_assoc_file ($assoc_filename);
1523
1524 push (@{$self->{'associated_files'}},
1525 [$real_filename, $assoc_filename, $mime_type, $section]);
1526}
1527
1528# returns a list of associated files in the form
1529# [[real_filename, assoc_filename, mimetype], ...]
1530sub get_assoc_files {
1531 my $self = shift (@_);
1532
1533 return $self->{'associated_files'};
1534}
1535
1536sub delete_section_assoc_files {
1537 my $self = shift (@_);
1538 my ($section) = @_;
1539
1540 my $i=0;
1541 while ($i < scalar (@{$self->{'associated_files'}})) {
1542 if (defined $self->{'associated_files'}->[$i]->[3] &&
1543 $self->{'associated_files'}->[$i]->[3] eq $section) {
1544 splice (@{$self->{'associated_files'}}, $i, 1);
1545 } else {
1546 $i++;
1547 }
1548 }
1549}
1550
1551sub delete_assoc_file {
1552 my $self = shift (@_);
1553 my ($assoc_filename) = @_;
1554
1555 my $i=0;
1556 while ($i < scalar (@{$self->{'associated_files'}})) {
1557 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1558 splice (@{$self->{'associated_files'}}, $i, 1);
1559 } else {
1560 $i++;
1561 }
1562 }
1563}
1564
1565sub reset_nextsection_ptr {
1566 my $self = shift (@_);
1567 my ($section) = @_;
1568
1569 my $section_ptr = $self->_lookup_section($section);
1570 $section_ptr->{'next_subsection'} = 1;
1571}
1572
15731;
Note: See TracBrowser for help on using the repository browser.