source: trunk/gsdl/perllib/doc.pm@ 10218

Last change on this file since 10218 was 10217, checked in by kjdon, 19 years ago

moved the addition of lastmodified metadata from the new method, into docsave.process - this way it doesn't affect the HASH id

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 42.1 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 # used to set lastmodified here, but this can screw up the HASH ids, so
60 # the docsave processor now calls set_lastmodified
61
62 if (defined $source_filename) {
63 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
64
65 if (defined $collect_dir) {
66 my $dirsep = &util::get_dirsep();
67 if ($collect_dir !~ m/$dirsep$/) {
68 $collect_dir .= $dirsep;
69 }
70
71 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
72
73 # if from within GSDLCOLLECTDIR, then remove directory prefix
74 # so source_filename is realative to it. This is done to aid
75 # portability, i.e. the collection can be moved to somewhere
76 # else on the file system and the archives directory will still
77 # work. This is needed, for example in the applet version of
78 # GLI where GSDLHOME/collect on the server will be different to
79 # the collect directory of the remove user. Of course,
80 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
81 # it back into a full pathname.
82
83 if ($source_filename =~ /^$collect_dir(.*)$/) {
84 $source_filename = $1;
85 }
86 }
87
88 $self->set_source_filename ($source_filename);
89 }
90
91 $self->set_doc_type ($doc_type) if defined $doc_type;
92
93 return $self;
94}
95# set lastmodified for OAI purposes, added by GRB, moved by kjdon
96sub set_lastmodified {
97 my $self = shift (@_);
98
99 my $source_filename = $self->get_source_filename();
100 if ((defined $self->get_metadata_element ($self->get_top_section(), "gsdldoctype")) &&
101 (defined $source_filename) && (-e $source_filename)) {
102 my $file_stat = stat($source_filename);
103 my $mtime = $file_stat->mtime;
104 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
105 }
106}
107
108# clone the $self object
109sub duplicate {
110 my $self = shift (@_);
111
112 my $newobj = {};
113
114 foreach my $k (keys %$self) {
115 $newobj->{$k} = &clone ($self->{$k});
116 }
117
118 bless $newobj, ref($self);
119 return $newobj;
120}
121
122sub clone {
123 my ($from) = @_;
124 my $type = ref ($from);
125
126 if ($type eq "HASH") {
127 my $to = {};
128 foreach my $key (keys %$from) {
129 $to->{$key} = &clone ($from->{$key});
130 }
131 return $to;
132 } elsif ($type eq "ARRAY") {
133 my $to = [];
134 foreach my $v (@$from) {
135 push (@$to, &clone ($v));
136 }
137 return $to;
138 } else {
139 return $from;
140 }
141}
142
143sub set_OIDtype {
144 my $self = shift (@_);
145 my ($type) = @_;
146
147 if ($type =~ /^(hash|incremental|dirname|assigned)$/) {
148 $self->{'OIDtype'} = $type;
149 } else {
150 $self->{'OIDtype'} = "hash";
151 }
152}
153
154sub set_source_filename {
155 my $self = shift (@_);
156 my ($source_filename) = @_;
157
158 $self->set_metadata_element ($self->get_top_section(),
159 "gsdlsourcefilename",
160 $source_filename);
161}
162
163sub set_converted_filename {
164 my $self = shift (@_);
165 my ($converted_filename) = @_;
166
167 $self->set_metadata_element ($self->get_top_section(),
168 "gsdlconvertedfilename",
169 $converted_filename);
170}
171
172
173# returns the source_filename as it was provided
174sub get_source_filename {
175 my $self = shift (@_);
176
177 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
178}
179
180# returns converted filename if available else returns source filename
181sub get_filename_for_hashing {
182 my $self = shift (@_);
183
184 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
185
186 if (!defined $filename) {
187 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
188 }
189 return $filename;
190}
191
192sub set_doc_type {
193 my $self = shift (@_);
194 my ($doc_type) = @_;
195
196 $self->set_metadata_element ($self->get_top_section(),
197 "gsdldoctype",
198 $doc_type);
199}
200
201# returns the gsdldoctype as it was provided
202# the default of "indexed_doc" is used if no document
203# type was provided
204sub get_doc_type {
205 my $self = shift (@_);
206
207 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
208 return $doc_type if (defined $doc_type);
209 return "indexed_doc";
210}
211
212sub _escape_text {
213 my ($text) = @_;
214
215 # special characters in the gml encoding
216 $text =~ s/&/&/g; # this has to be first...
217 $text =~ s/</&lt;/g;
218 $text =~ s/>/&gt;/g;
219 $text =~ s/\"/&quot;/g;
220
221 return $text;
222}
223
224sub buffer_section_xml {
225 my $self = shift (@_);
226 my ($section) = @_;
227
228 my $section_ptr = $self->_lookup_section ($section);
229 return "" unless defined $section_ptr;
230
231 my $all_text = "<Section>\n";
232 $all_text .= " <Description>\n";
233
234 # output metadata
235 foreach my $data (@{$section_ptr->{'metadata'}}) {
236 my $escaped_value = &_escape_text($data->[1]);
237 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
238 }
239
240 $all_text .= " </Description>\n";
241
242 # output the text
243 $all_text .= " <Content>";
244 $all_text .= &_escape_text($section_ptr->{'text'});
245 $all_text .= "</Content>\n";
246
247 # output all the subsections
248 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
249 $all_text .= $self->buffer_section_xml("$section.$subsection");
250 }
251
252 $all_text .= "</Section>\n";
253
254 # make sure no nasty control characters have snuck through
255 # (XML::Parser will barf on anything it doesn't consider to be
256 # valid UTF-8 text, including things like \c@, \cC etc.)
257 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
258
259 return $all_text;
260}
261
262sub buffer_txt_section_xml {
263 my $self = shift(@_);
264 my ($section) = @_;
265
266 my $section_ptr = $self->_lookup_section ($section);
267
268 return "" unless defined $section_ptr;
269
270 my $all_text = "<Section>\n";
271
272 ##output the text
273 #$all_text .= " <Content>";
274 $all_text .= &_escape_text($section_ptr->{'text'});
275 #$all_text .= " </Content>\n";
276
277
278 #output all the subsections
279 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
280 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
281 }
282
283 $all_text .= "</Section>\n";
284
285
286 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
287 return $all_text;
288}
289
290sub buffer_mets_fileSection_section_xml() {
291 my $self = shift(@_);
292 my ($section,$version) = @_;
293
294 #$section="" unless defined $section;
295
296
297 my $section_ptr=$self->_lookup_section($section);
298 return "" unless defined $section_ptr;
299
300
301 # output fileSection by sections
302 my $section_num ="1". $section;
303
304 my $filePath = 'doctxt.xml';
305
306 my $opt_owner_id = "";
307 if ($version eq "fedora") {
308 $opt_owner_id = "OWNERID=\"M\"";
309 }
310
311 # output the fileSection details
312 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
313 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
314 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
315
316 my $xpath = "1".$section;
317 $xpath =~ s/\./]\/Section[/g;
318
319 $all_text .= $xpath;
320
321 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
322 $all_text .= " </mets:file>\n";
323 $all_text .= " </mets:fileGrp>\n";
324
325
326 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
327 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
328 }
329
330 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
331
332 return $all_text;
333}
334
335sub buffer_mets_fileWhole_section_xml(){
336 my $self = shift(@_);
337 my ($section,$version,$working_dir) = @_;
338
339 my $section_ptr = $self-> _lookup_section($section);
340 return "" unless defined $section_ptr;
341
342 my $all_text="" unless defined $all_txt;
343
344 my $fileID=0;
345
346 # Output the fileSection for the whole section
347 # => get the sourcefile and associative file
348
349 my $id_root = "";
350 my $opt_owner_id = "";
351
352 if ($version eq "fedora") {
353 $opt_owner_id = "OWNERID=\"M\"";
354 }
355 else {
356 $id_root = "default";
357 }
358
359 if ($version ne "fedora") {
360 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
361 }
362
363 foreach my $data (@{$section_ptr->{'metadata'}}){
364 my $escaped_value = &_escape_text($data->[1]);
365
366 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
367 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
368
369 ++$fileID;
370 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
371
372 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
373
374 $all_text .= " </mets:file>\n";
375 }
376
377 if ($data->[0] eq "gsdlassocfile"){
378
379 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
380 my $assoc_file = $1;
381 my $mime_type = $2;
382 my $assoc_dir = $3;
383
384 if ($version eq "fedora") {
385 $id_root = $assoc_file;
386 $id_root =~ s/\//_/g;
387 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
388 }
389
390 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
391 ++$fileID;
392
393 my $mime_attr = "MIMETYPE=\"$mime_type\"";
394 my $xlink_title = "xlink:title=\"$assoc_file\"";
395
396 my $id_attr;
397 my $xlink_href;
398
399 if ($version eq "fedora") {
400 $id_attr = "ID=\"$id_root.0\"";
401
402 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
403 if (!defined $fedora_prefix) {
404 $xlink_href = "xlink:href=\"$assfilePath\"";
405 }
406 else
407 {
408 my $gsdlhome = $ENV{'GSDLHOME'};
409 my $gsdl_href = "$working_dir/$assfilePath";
410
411 $gsdl_href =~ s/^$gsdlhome(\/)?//;
412 $gsdl_href = "/gsdl/$gsdl_href";
413
414 my $fserver = $ENV{'FEDORA_HOSTNAME'};
415 my $fport = $ENV{'FEDORA_SERVER_PORT'};
416
417 my $fdomain = "http://$fserver:$fport";
418 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
419 }
420
421 my $top_section = $self->get_top_section();
422 my $id = $self->get_metadata_element($top_section,"Identifier");
423 }
424 else {
425 $id_attr = "ID=\"$id_root.$fileID\"";
426 $xlink_href = "xlink:href=\"$assfilePath\"";
427 }
428
429 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
430 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
431
432 $all_text .= " </mets:file>\n";
433
434 if ($version eq "fedora") {
435 $all_text .= " </mets:fileGrp>\n";
436 }
437
438 }
439 }
440
441 if ($version ne "fedora") {
442 $all_text .= " </mets:fileGrp>\n";
443 }
444
445 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
446
447 return $all_text;
448}
449
450sub buffer_mets_StructMapSection_section_xml(){
451 my $self = shift(@_);
452 my ($section, $order_numref) = @_;
453
454 $section="" unless defined $section;
455
456
457 my $section_ptr=$self->_lookup_section($section);
458 return "" unless defined $section_ptr;
459
460
461 # output fileSection by Sections
462 my $section_num ="1". $section;
463 my $dmd_num = $section_num;
464
465 ##**output the dmdSection details
466 #if ($section_num eq "1") {
467 # $dmd_num = "0";
468 #}
469
470 #**output the StructMap details
471
472 my $dmdid_attr = "DM$dmd_num";
473
474 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
475 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
476 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
477
478 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
479
480
481 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
482 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
483 }
484
485 $all_text .= " </mets:div>\n";
486
487 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
488
489 return $all_text;
490}
491
492
493sub buffer_mets_StructMapWhole_section_xml(){
494 my $self = shift(@_);
495 my ($section) = @_;
496
497 my $section_ptr = $self-> _lookup_section($section);
498 return "" unless defined $section_ptr;
499
500 my $all_text="" unless defined $all_txt;
501 my $fileID=0;
502 my $order_num = 0;
503
504 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
505
506 #** output the StructMapSection for the whole section
507 # get the sourcefile and associative file
508
509 foreach my $data (@{$section_ptr->{'metadata'}}){
510 my $escaped_value = &_escape_text($data->[1]);
511
512 if ($data->[0] eq "gsdlsourcefilename") {
513 ++$fileID;
514 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
515 }
516
517 if ($data->[0] eq "gsdlassocfile"){
518 ++$fileID;
519 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
520 }
521 }
522 $all_text .= " </mets:div>\n";
523
524 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
525
526 return $all_text;
527}
528
529
530sub buffer_mets_dmdSection_section_xml(){
531 my $self = shift(@_);
532 my ($section,$version) = @_;
533
534 $section="" unless defined $section;
535
536 my $section_ptr=$self->_lookup_section($section);
537 return "" unless defined $section_ptr;
538
539 # convert section number
540 my $section_num ="1". $section;
541 my $dmd_num = $section_num;
542
543 # #**output the dmdSection details
544 # if ($section_num eq "1") {
545 # $dmd_num = "0";
546 # }
547
548
549 my $all_text = "";
550
551 my $label_attr = "";
552 if ($version eq "fedora") {
553 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
554 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
555
556 $label_attr = "LABEL=\"Dublin Core Metadata\"";
557 }
558 else {
559 print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
560 print STDERR "***** Check to see if <techMD> required\n";
561 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
562
563 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
564 }
565
566 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
567 $all_text .= " <mets:xmlData>\n";
568
569 if ($version eq "fedora") {
570 my $dc_namespace = "";
571 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
572 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
573
574 $all_text .= " <oai_dc:dc $dc_namespace>\n";
575
576 $all_text .= $self->buffer_dc_section($section,"oai_dc");
577 $all_text .= " </oai_dc:dc>\n";
578 }
579 else {
580 foreach my $data (@{$section_ptr->{'metadata'}}){
581 my $escaped_value = &_escape_text($data->[1]);
582 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
583 if ($data->[0] eq "dc.Title") {
584 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
585 }
586 }
587 }
588
589 $all_text .= " </mets:xmlData>\n";
590 $all_text .= " </mets:mdWrap>\n";
591
592 if ($version eq "fedora") {
593 $all_text .= " </mets:techMD>\n";
594 $all_text .= "</mets:amdSec>\n";
595 }
596 else {
597 $all_text .= "</mets:dmdSec>\n";
598 }
599
600
601 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
602 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
603 }
604
605 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
606
607 return $all_text;
608}
609
610sub output_section {
611 my $self = shift (@_);
612 my ($handle, $section) = @_;
613
614 print $handle $self->buffer_section_xml($section);
615}
616
617# print out DSpace dublin_core metadata section
618sub output_dspace_section {
619 my $self = shift (@_);
620 my ($handle, $section) = @_;
621
622 my $section_ptr = $self->_lookup_section ($section);
623 return "" unless defined $section_ptr;
624
625 my $all_text = "<Section>\n";
626 $all_text .= " <Description>\n";
627
628 # output metadata
629 foreach my $data (@{$section_ptr->{'metadata'}}) {
630 my $escaped_value = &_escape_text($data->[1]);
631 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
632 }
633
634 $all_text .= " </Description>\n";
635 $all_text .= "</Section>\n";
636
637 # make sure no nasty control characters have snuck through
638 # (XML::Parser will barf on anything it doesn't consider to be
639 # valid UTF-8 text, including things like \c@, \cC etc.)
640 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
641
642 return $all_text;
643}
644
645# print out doctxt.xml file
646sub output_txt_section {
647 my $self = shift (@_);
648 my ($handle, $section) = @_;
649
650 print $handle $self->buffer_txt_section_xml($section);
651}
652
653# print out docmets.xml file
654sub output_mets_section {
655 my $self = shift(@_);
656 my ($handle, $section, $version, $working_dir) = @_;
657
658 # print out the dmdSection
659 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
660
661 print $handle "<mets:fileSec>\n";
662 if ($version eq "fedora") {
663 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
664 }
665
666 # print out the fileSection by sections
667 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
668
669 # print out the whole fileSection
670 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
671
672 if ($version eq "fedora") {
673 print $handle " </mets:fileGrp>\n";
674 }
675 print $handle "</mets:fileSec>\n";
676
677 # print out the StructMapSection by sections
678
679 my $struct_type;
680 if ($version eq "fedora") {
681 $struct_type = "fedora:dsBindingMap";
682 }
683 else {
684 $struct_type = "Section";
685 }
686
687 if ($version ne "fedora") {
688 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
689 my $order_num=0;
690 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
691 print $handle "</mets:structMap>\n";
692
693 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
694 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
695 print $handle "</mets:structMap>\n";
696 }
697
698}
699
700my $dc_set = { Title => 1,
701 Creator => 1,
702 Subject => 1,
703 Description => 1,
704 Publisher => 1,
705 Contributors => 1,
706 Date => 1,
707 Type => 1,
708 Format => 1,
709 Identifier => 1,
710 Source => 1,
711 Language => 1,
712 Relation => 1,
713 Coverage => 1,
714 Rights => 1};
715
716
717
718
719# Build up dublin_core metadata. Priority given to dc.* over ex.*
720
721sub buffer_dc_section {
722 my $self = shift(@_);
723 my ($section, $version) = @_;
724
725 # build up string of dublin core metadata
726 $section="" unless defined $section;
727
728 my $section_ptr=$self->_lookup_section($section);
729 return "" unless defined $section_ptr;
730
731 my $explicit_dc = {};
732 my $explicit_ex = {};
733
734 my $all_text="";
735 foreach my $data (@{$section_ptr->{'metadata'}}){
736 my $escaped_value = &_escape_text($data->[1]);
737 if ($data->[0]=~ m/^dc\./) {
738 $data->[0] =~ tr/[A-Z]/[a-z]/;
739
740 $data->[0] =~ m/^dc\.(.*)/;
741 my $dc_element = $1;
742
743 if (!defined $explicit_dc->{$dc_element}) {
744 $explicit_dc->{$dc_element} = [];
745 }
746 push(@{$explicit_dc->{$dc_element}},$escaped_value);
747
748 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
749 if (defined $version && ($version eq "oai_dc")) {
750 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
751 }
752 else {
753 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
754 }
755
756 }
757 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
758 $data->[0] =~ m/^(ex\.)?(.*)/;
759 my $ex_element = $2;
760 my $lc_ex_element = lc($ex_element);
761
762 if (defined $dc_set->{$ex_element}) {
763 if (!defined $explicit_ex->{$lc_ex_element}) {
764 $explicit_ex->{$lc_ex_element} = [];
765 }
766 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
767 }
768 }
769 }
770
771 # go through dc_set and for any element *not* defined in explicit_dc
772 # that do exist in explicit_ex, add it in as metadata
773 foreach my $k ( keys %$dc_set ) {
774 my $lc_k = lc($k);
775
776 if (!defined $explicit_dc->{$lc_k}) {
777 if (defined $explicit_ex->{$lc_k}) {
778
779 foreach my $v (@{$explicit_ex->{$lc_k}}) {
780 my $dc_element = $lc_k;
781 my $escaped_value = $v;
782
783 if (defined $version && ($version eq "oai_dc")) {
784 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
785 }
786 else {
787 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
788 }
789
790 }
791 }
792 }
793 }
794
795 if ($all_text eq "") {
796 $all_text .= " There is no Dublin Core metatdata in this document\n";
797 }
798 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
799
800 return $all_text;
801}
802
803
804# Print out dublin_core metadata
805sub output_dc_section {
806 my $self = shift(@_);
807 my ($handle, $section, $version) = @_;
808
809 my $all_text = $self->buffer_dc_section($section,$version);
810
811 print $handle $all_text;
812}
813
814
815# look up the reference to the a particular section
816sub _lookup_section {
817 my $self = shift (@_);
818 my ($section) = @_;
819
820 my ($num);
821 my $sectionref = $self;
822
823 while (defined $section && $section ne "") {
824 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
825 $num =~ s/^0+(\d)/$1/; # remove leading 0s
826 $section = "" unless defined $section;
827
828 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
829 $sectionref = $sectionref->{'subsections'}->{$num};
830 } else {
831 return undef;
832 }
833 }
834
835 return $sectionref;
836}
837
838# calculate OID by hashing the contents of the document
839sub _calc_OID {
840 my $self = shift (@_);
841 my ($filename) = @_;
842
843 my $osexe = &util::get_os_exe();
844
845 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
846 $ENV{'GSDLOS'},"hashfile$osexe");
847
848 my $result = "NULL";
849
850 if (-e "$hashfile_exe") {
851# $result = `\"$hashfile_exe\" \"$filename\"`;
852 $result = `hashfile$osexe \"$filename\"`;
853 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
854
855 } else {
856 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
857 }
858 return "HASH$result";
859}
860
861# methods dealing with OID, not groups of them.
862
863# if $OID is not provided one is calculated
864sub set_OID {
865 my $self = shift (@_);
866 my ($OID) = @_;
867
868 my $use_hash_oid = 0;
869 # if an OID wasn't provided claculate one
870 if (!defined $OID) {
871 $OID = "NULL";
872 if ($self->{'OIDtype'} eq "hash") {
873 $use_hash_oid = 1;
874 } elsif ($self->{'OIDtype'} eq "incremental") {
875 $OID = "D" . $OIDcount;
876 $OIDcount ++;
877
878 } elsif ($self->{'OIDtype'} eq "dirname") {
879 $OID = 'J';
880 my $filename = $self->get_source_filename();
881 if (defined($filename)) { # && -e $filename) {
882 $OID = &File::Basename::dirname($filename);
883 if (defined $OID) {
884 $OID = 'J'.&File::Basename::basename($OID);
885 $OID =~ s/\.//; #remove any periods
886 } else {
887 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
888 $use_hash_oid = 1;
889 }
890 } else {
891 print STDERR "Failed to find filename, generating hash id\n";
892 $use_hash_oid = 1;
893 }
894
895 } elsif ($self->{'OIDtype'} eq "assigned") {
896 my $identifier = $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
897 if (defined $identifier && $identifier ne "") {
898 $OID = "D" . $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
899 $OID =~ s/\.//; #remove any periods
900 } else {
901 # need a hash id
902 print STDERR "no dc.Identifier found, generating hash id\n";
903 $use_hash_oid = 1;
904 }
905
906 } else {
907 $use_hash_oid = 1;
908 }
909
910 if ($use_hash_oid) {
911
912 # "hash" OID - feed file to hashfile.exe
913 #my $filename = $self->get_source_filename();
914 # we want to use the converted file for hashing if available
915 # cos its quicker
916 my $filename = $self->get_filename_for_hashing();
917
918 # -z: don't want to hash on the file if it is zero size
919 if (defined($filename) && -e $filename && !-z $filename) {
920 $OID = $self->_calc_OID ($filename);
921 } else {
922 $filename = &util::get_tmp_filename();
923 if (!open (OUTFILE, ">$filename")) {
924 print STDERR "doc::set_OID could not write to $filename\n";
925 } else {
926 $self->output_section('OUTFILE', $self->get_top_section(),
927 undef, 1);
928 close (OUTFILE);
929 }
930 $OID = $self->_calc_OID ($filename);
931 &util::rm ($filename);
932 }
933 }
934 }
935 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
936}
937
938# this uses hashdoc (embedded c thingy) which is faster but still
939# needs a little work to be suffiently stable
940sub ___set_OID {
941 my $self = shift (@_);
942 my ($OID) = @_;
943
944 # if an OID wasn't provided then calculate hash value based on document
945 if (!defined $OID)
946 {
947 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
948 undef, 1);
949 my $hash_len = length($hash_text);
950
951 $OID = &hashdoc::buffer($hash_text,$hash_len);
952 }
953
954 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
955}
956
957# returns the OID for this document
958sub get_OID {
959 my $self = shift (@_);
960 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
961 return $OID if (defined $OID);
962 return "NULL";
963}
964
965sub delete_OID {
966 my $self = shift (@_);
967
968 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
969}
970
971
972# methods for manipulating section names
973
974# returns the name of the top-most section (the top
975# level of the document
976sub get_top_section {
977 my $self = shift (@_);
978
979 return "";
980}
981
982# returns a section
983sub get_parent_section {
984 my $self = shift (@_);
985 my ($section) = @_;
986
987 $section =~ s/(^|\.)\d+$//;
988
989 return $section;
990}
991
992# returns the first child section (or the end child
993# if there isn't any)
994sub get_begin_child {
995 my $self = shift (@_);
996 my ($section) = @_;
997
998 my $section_ptr = $self->_lookup_section($section);
999 return "" unless defined $section_ptr;
1000
1001 if (defined $section_ptr->{'subsection_order'}->[0]) {
1002 return "$section.$section_ptr->{'subsection_order'}->[0]";
1003 }
1004
1005 return $self->get_end_child ($section);
1006}
1007
1008# returns the next child of a parent section
1009sub get_next_child {
1010 my $self = shift (@_);
1011 my ($section) = @_;
1012
1013 my $parent_section = $self->get_parent_section($section);
1014 my $parent_section_ptr = $self->_lookup_section($parent_section);
1015 return undef unless defined $parent_section_ptr;
1016
1017 my ($section_num) = $section =~ /(\d+)$/;
1018 return undef unless defined $section_num;
1019
1020 my $i = 0;
1021 my $section_order = $parent_section_ptr->{'subsection_order'};
1022 while ($i < scalar(@$section_order)) {
1023 last if $section_order->[$i] eq $section_num;
1024 $i++;
1025 }
1026
1027 $i++; # the next child
1028 if ($i < scalar(@$section_order)) {
1029 return $section_order->[$i] if $parent_section eq "";
1030 return "$parent_section.$section_order->[$i]";
1031 }
1032
1033 # no more sections in this level
1034 return undef;
1035}
1036
1037# returns a reference to a list of children
1038sub get_children {
1039 my $self = shift (@_);
1040 my ($section) = @_;
1041
1042 my $section_ptr = $self->_lookup_section($section);
1043 return [] unless defined $section_ptr;
1044
1045 my @children = @{$section_ptr->{'subsection_order'}};
1046
1047 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1048 return \@children;
1049}
1050
1051# returns the child section one past the last one (which
1052# is coded as "0")
1053sub get_end_child {
1054 my $self = shift (@_);
1055 my ($section) = @_;
1056
1057 return $section . ".0" unless $section eq "";
1058 return "0";
1059}
1060
1061# returns the next section in book order
1062sub get_next_section {
1063 my $self = shift (@_);
1064 my ($section) = @_;
1065
1066 return undef unless defined $section;
1067
1068 my $section_ptr = $self->_lookup_section($section);
1069 return undef unless defined $section_ptr;
1070
1071 # first try to find first child
1072 if (defined $section_ptr->{'subsection_order'}->[0]) {
1073 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1074 return "$section.$section_ptr->{'subsection_order'}->[0]";
1075 }
1076
1077 do {
1078 # try to find sibling
1079 my $next_child = $self->get_next_child ($section);
1080 return $next_child if (defined $next_child);
1081
1082 # move up one level
1083 $section = $self->get_parent_section ($section);
1084 } while $section =~ /\d/;
1085
1086 return undef;
1087}
1088
1089sub is_leaf_section {
1090 my $self = shift (@_);
1091 my ($section) = @_;
1092
1093 my $section_ptr = $self->_lookup_section($section);
1094 return 1 unless defined $section_ptr;
1095
1096 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1097}
1098
1099# methods for dealing with sections
1100
1101# returns the name of the inserted section
1102sub insert_section {
1103 my $self = shift (@_);
1104 my ($before_section) = @_;
1105
1106 # get the child to insert before and its parent section
1107 my $parent_section = "";
1108 my $before_child = "0";
1109 my @before_section = split (/\./, $before_section);
1110 if (scalar(@before_section) > 0) {
1111 $before_child = pop (@before_section);
1112 $parent_section = join (".", @before_section);
1113 }
1114
1115 my $parent_section_ptr = $self->_lookup_section($parent_section);
1116 if (!defined $parent_section_ptr) {
1117 print STDERR "doc::insert_section couldn't find parent section " .
1118 "$parent_section\n";
1119 return;
1120 }
1121
1122 # get the next section number
1123 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1124
1125 my $i = 0;
1126 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1127 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1128 $i++;
1129 }
1130
1131 # insert the section number into the order list
1132 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1133
1134 # add this section to the parent section
1135 my $section_ptr = {'subsection_order'=>[],
1136 'next_subsection'=>1,
1137 'subsections'=>{},
1138 'metadata'=>[],
1139 'text'=>""};
1140 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1141
1142 # work out the full section number
1143 my $section = $parent_section;
1144 $section .= "." unless $section eq "";
1145 $section .= $section_num;
1146
1147 return $section;
1148}
1149
1150# creates a pre-named section
1151sub create_named_section {
1152 my $self = shift (@_);
1153 my ($mastersection) = @_;
1154
1155 my ($num);
1156 my $section = $mastersection;
1157 my $sectionref = $self;
1158
1159 while ($section ne "") {
1160 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1161 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1162 $section = "" unless defined $section;
1163
1164 if (defined $num) {
1165 if (!defined $sectionref->{'subsections'}->{$num}) {
1166 push (@{$sectionref->{'subsection_order'}}, $num);
1167 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1168 'next_subsection'=>1,
1169 'subsections'=>{},
1170 'metadata'=>[],
1171 'text'=>""};
1172 if ($num >= $sectionref->{'next_subsection'}) {
1173 $sectionref->{'next_subsection'} = $num + 1;
1174 }
1175 }
1176 $sectionref = $sectionref->{'subsections'}->{$num};
1177
1178 } else {
1179 print STDERR "doc::create_named_section couldn't create section ";
1180 print STDERR "$mastersection\n";
1181 last;
1182 }
1183 }
1184}
1185
1186# returns a reference to a list of subsections
1187sub list_subsections {
1188 my $self = shift (@_);
1189 my ($section) = @_;
1190
1191 my $section_ptr = $self->_lookup_section ($section);
1192 if (!defined $section_ptr) {
1193 print STDERR "doc::list_subsections couldn't find section $section\n";
1194 return [];
1195 }
1196
1197 return [@{$section_ptr->{'subsection_order'}}];
1198}
1199
1200sub delete_section {
1201 my $self = shift (@_);
1202 my ($section) = @_;
1203
1204# my $section_ptr = {'subsection_order'=>[],
1205# 'next_subsection'=>1,
1206# 'subsections'=>{},
1207# 'metadata'=>[],
1208# 'text'=>""};
1209
1210 # if this is the top section reset everything
1211 if ($section eq "") {
1212 $self->{'subsection_order'} = [];
1213 $self->{'subsections'} = {};
1214 $self->{'metadata'} = [];
1215 $self->{'text'} = "";
1216 return;
1217 }
1218
1219 # find the parent of the section to delete
1220 my $parent_section = "";
1221 my $child = "0";
1222 my @section = split (/\./, $section);
1223 if (scalar(@section) > 0) {
1224 $child = pop (@section);
1225 $parent_section = join (".", @section);
1226 }
1227
1228 my $parent_section_ptr = $self->_lookup_section($parent_section);
1229 if (!defined $parent_section_ptr) {
1230 print STDERR "doc::delete_section couldn't find parent section " .
1231 "$parent_section\n";
1232 return;
1233 }
1234
1235 # remove this section from the subsection_order list
1236 my $i = 0;
1237 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1238 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1239 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1240 last;
1241 }
1242 $i++;
1243 }
1244
1245 # remove this section from the subsection hash
1246 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1247 undef $parent_section_ptr->{'subsections'}->{$child};
1248 }
1249}
1250
1251#--
1252# methods for dealing with metadata
1253
1254# set_metadata_element and get_metadata_element are for metadata
1255# which should only have one value. add_meta_data and get_metadata
1256# are for metadata which can have more than one value.
1257
1258# returns the first metadata value which matches field
1259
1260# This version of get metadata element works much like the one above,
1261# except it allows for the namespace portion of a metadata element to
1262# be ignored, thus if you are searching for dc.Title, the first piece
1263# of matching metadata ending with the name Title (once any namespace
1264# is removed) would be returned.
1265# 28-11-2003 John Thompson
1266sub get_metadata_element {
1267 my $self = shift (@_);
1268 my ($section, $field, $ignore_namespace) = @_;
1269 my ($data);
1270
1271 $ignore_namespace = 0 unless defined $ignore_namespace;
1272
1273 my $section_ptr = $self->_lookup_section($section);
1274 if (!defined $section_ptr) {
1275 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1276 return;
1277 }
1278
1279 # Remove the any namespace if we are being told to ignore them
1280 if($ignore_namespace) {
1281 $field =~ s/^\w*\.//;
1282 }
1283
1284 foreach $data (@{$section_ptr->{'metadata'}}) {
1285
1286 my $data_name = $data->[0];
1287 # Remove the any namespace if we are being told to ignore them
1288 if($ignore_namespace) {
1289 $data_name =~ s/^\w*\.//;
1290 }
1291
1292 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1293 }
1294
1295 return undef; # was not found
1296}
1297
1298# returns a list of the form [value1, value2, ...]
1299sub get_metadata {
1300 my $self = shift (@_);
1301 my ($section, $field, $ignore_namespace) = @_;
1302 my ($data);
1303
1304 $ignore_namespace = 0 unless defined $ignore_namespace;
1305
1306 my $section_ptr = $self->_lookup_section($section);
1307 if (!defined $section_ptr) {
1308 print STDERR "doc::get_metadata couldn't find section ",
1309 $section, "\n";
1310 return;
1311 }
1312
1313 # Remove the any namespace if we are being told to ignore them
1314 if($ignore_namespace) {
1315 $field =~ s/^\w*\.//;
1316 }
1317
1318 my @metadata = ();
1319 foreach $data (@{$section_ptr->{'metadata'}}) {
1320
1321 my $data_name = $data->[0];
1322 # Remove the any namespace if we are being told to ignore them
1323 if($ignore_namespace) {
1324 $data_name =~ s/^\w*\.//;
1325 }
1326
1327 push (@metadata, $data->[1]) if ($data_name eq $field);
1328 }
1329
1330 return \@metadata;
1331}
1332
1333# returns a list of the form [[field,value],[field,value],...]
1334sub get_all_metadata {
1335 my $self = shift (@_);
1336 my ($section) = @_;
1337
1338 my $section_ptr = $self->_lookup_section($section);
1339 if (!defined $section_ptr) {
1340 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1341 return;
1342 }
1343
1344 return $section_ptr->{'metadata'};
1345}
1346
1347# $value is optional
1348sub delete_metadata {
1349 my $self = shift (@_);
1350 my ($section, $field, $value) = @_;
1351
1352 my $section_ptr = $self->_lookup_section($section);
1353 if (!defined $section_ptr) {
1354 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1355 return;
1356 }
1357
1358 my $i = 0;
1359 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1360 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1361 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1362 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1363 } else {
1364 $i++;
1365 }
1366 }
1367}
1368
1369sub delete_all_metadata {
1370 my $self = shift (@_);
1371 my ($section) = @_;
1372
1373 my $section_ptr = $self->_lookup_section($section);
1374 if (!defined $section_ptr) {
1375 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1376 return;
1377 }
1378
1379 $section_ptr->{'metadata'} = [];
1380}
1381
1382sub set_metadata_element {
1383 my $self = shift (@_);
1384 my ($section, $field, $value) = @_;
1385
1386 $self->set_utf8_metadata_element ($section, $field,
1387 &unicode::ascii2utf8(\$value));
1388}
1389
1390# set_utf8_metadata_element assumes the text has already been
1391# converted to the UTF-8 encoding.
1392sub set_utf8_metadata_element {
1393 my $self = shift (@_);
1394 my ($section, $field, $value) = @_;
1395
1396 $self->delete_metadata ($section, $field);
1397 $self->add_utf8_metadata ($section, $field, $value);
1398}
1399
1400
1401# add_metadata assumes the text is in (extended) ascii form. For
1402# text which has already been converted to the UTF-8 format use
1403# add_utf8_metadata.
1404sub add_metadata {
1405 my $self = shift (@_);
1406 my ($section, $field, $value) = @_;
1407
1408 $self->add_utf8_metadata ($section, $field,
1409 &unicode::ascii2utf8(\$value));
1410}
1411
1412sub add_utf8_metadata {
1413 my $self = shift (@_);
1414 my ($section, $field, $value) = @_;
1415
1416 my $section_ptr = $self->_lookup_section($section);
1417 if (!defined $section_ptr) {
1418 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1419 return;
1420 }
1421 if (!defined $value) {
1422 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1423 return;
1424 }
1425 if (!defined $field) {
1426 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1427 return;
1428 }
1429
1430 #print STDERR "###$field=$value\n";
1431 # double check that the value is utf-8
1432 if (unicode::ensure_utf8(\$value)) {
1433 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1434 }
1435
1436 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1437}
1438
1439
1440# methods for dealing with text
1441
1442# returns the text for a section
1443sub get_text {
1444 my $self = shift (@_);
1445 my ($section) = @_;
1446
1447 my $section_ptr = $self->_lookup_section($section);
1448 if (!defined $section_ptr) {
1449 print STDERR "doc::get_text couldn't find section " .
1450 "$section\n";
1451 return "";
1452 }
1453
1454 return $section_ptr->{'text'};
1455}
1456
1457# returns the (utf-8 encoded) length of the text for a section
1458sub get_text_length {
1459 my $self = shift (@_);
1460 my ($section) = @_;
1461
1462 my $section_ptr = $self->_lookup_section($section);
1463 if (!defined $section_ptr) {
1464 print STDERR "doc::get_text_length couldn't find section " .
1465 "$section\n";
1466 return 0;
1467 }
1468
1469 return length ($section_ptr->{'text'});
1470}
1471
1472sub delete_text {
1473 my $self = shift (@_);
1474 my ($section) = @_;
1475
1476 my $section_ptr = $self->_lookup_section($section);
1477 if (!defined $section_ptr) {
1478 print STDERR "doc::delete_text couldn't find section " .
1479 "$section\n";
1480 return;
1481 }
1482
1483 $section_ptr->{'text'} = "";
1484}
1485
1486# add_text assumes the text is in (extended) ascii form. For
1487# text which has been already converted to the UTF-8 format
1488# use add_utf8_text.
1489sub add_text {
1490 my $self = shift (@_);
1491 my ($section, $text) = @_;
1492
1493 # convert the text to UTF-8 encoded unicode characters
1494 # and add the text
1495 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1496}
1497
1498
1499# add_utf8_text assumes the text to be added has already
1500# been converted to the UTF-8 encoding. For ascii text use
1501# add_text
1502sub add_utf8_text {
1503 my $self = shift (@_);
1504 my ($section, $text) = @_;
1505
1506 my $section_ptr = $self->_lookup_section($section);
1507 if (!defined $section_ptr) {
1508 print STDERR "doc::add_utf8_text couldn't find section " .
1509 "$section\n";
1510 return;
1511 }
1512
1513 $section_ptr->{'text'} .= $text;
1514}
1515
1516
1517# methods for dealing with associated files
1518
1519# a file is associated with a document, NOT a section.
1520# if section is defined it is noted in the data structure
1521# only so that files associated from a particular section
1522# may be removed later (using delete_section_assoc_files)
1523sub associate_file {
1524 my $self = shift (@_);
1525 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1526 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1527
1528 # remove all associated files with the same name
1529 $self->delete_assoc_file ($assoc_filename);
1530
1531 push (@{$self->{'associated_files'}},
1532 [$real_filename, $assoc_filename, $mime_type, $section]);
1533}
1534
1535# returns a list of associated files in the form
1536# [[real_filename, assoc_filename, mimetype], ...]
1537sub get_assoc_files {
1538 my $self = shift (@_);
1539
1540 return $self->{'associated_files'};
1541}
1542
1543sub delete_section_assoc_files {
1544 my $self = shift (@_);
1545 my ($section) = @_;
1546
1547 my $i=0;
1548 while ($i < scalar (@{$self->{'associated_files'}})) {
1549 if (defined $self->{'associated_files'}->[$i]->[3] &&
1550 $self->{'associated_files'}->[$i]->[3] eq $section) {
1551 splice (@{$self->{'associated_files'}}, $i, 1);
1552 } else {
1553 $i++;
1554 }
1555 }
1556}
1557
1558sub delete_assoc_file {
1559 my $self = shift (@_);
1560 my ($assoc_filename) = @_;
1561
1562 my $i=0;
1563 while ($i < scalar (@{$self->{'associated_files'}})) {
1564 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1565 splice (@{$self->{'associated_files'}}, $i, 1);
1566 } else {
1567 $i++;
1568 }
1569 }
1570}
1571
1572sub reset_nextsection_ptr {
1573 my $self = shift (@_);
1574 my ($section) = @_;
1575
1576 my $section_ptr = $self->_lookup_section($section);
1577 $section_ptr->{'next_subsection'} = 1;
1578}
1579
15801;
Note: See TracBrowser for help on using the repository browser.