source: trunk/gsdl/perllib/doc.pm@ 11089

Last change on this file since 11089 was 10980, checked in by kjdon, 18 years ago

in get_filename_for_hashing, if its been processed by NULPlug, return undef. nul files should be empty, but sometimes they aren't so don't want to hash on them.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 42.3 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 # used to set lastmodified here, but this can screw up the HASH ids, so
60 # the docsave processor now calls set_lastmodified
61
62 if (defined $source_filename) {
63 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
64
65 if (defined $collect_dir) {
66 my $dirsep = &util::get_dirsep();
67 if ($collect_dir !~ m/$dirsep$/) {
68 $collect_dir .= $dirsep;
69 }
70
71 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
72
73 # if from within GSDLCOLLECTDIR, then remove directory prefix
74 # so source_filename is realative to it. This is done to aid
75 # portability, i.e. the collection can be moved to somewhere
76 # else on the file system and the archives directory will still
77 # work. This is needed, for example in the applet version of
78 # GLI where GSDLHOME/collect on the server will be different to
79 # the collect directory of the remove user. Of course,
80 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
81 # it back into a full pathname.
82
83 if ($source_filename =~ /^$collect_dir(.*)$/) {
84 $source_filename = $1;
85 }
86 }
87
88 $self->set_source_filename ($source_filename);
89 }
90
91 $self->set_doc_type ($doc_type) if defined $doc_type;
92
93 return $self;
94}
95# set lastmodified for OAI purposes, added by GRB, moved by kjdon
96sub set_lastmodified {
97 my $self = shift (@_);
98
99 my $source_filename = $self->get_source_filename();
100 if ((defined $self->get_metadata_element ($self->get_top_section(), "gsdldoctype")) &&
101 (defined $source_filename) && (-e $source_filename)) {
102 my $file_stat = stat($source_filename);
103 my $mtime = $file_stat->mtime;
104 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
105 }
106}
107
108# clone the $self object
109sub duplicate {
110 my $self = shift (@_);
111
112 my $newobj = {};
113
114 foreach my $k (keys %$self) {
115 $newobj->{$k} = &clone ($self->{$k});
116 }
117
118 bless $newobj, ref($self);
119 return $newobj;
120}
121
122sub clone {
123 my ($from) = @_;
124 my $type = ref ($from);
125
126 if ($type eq "HASH") {
127 my $to = {};
128 foreach my $key (keys %$from) {
129 $to->{$key} = &clone ($from->{$key});
130 }
131 return $to;
132 } elsif ($type eq "ARRAY") {
133 my $to = [];
134 foreach my $v (@$from) {
135 push (@$to, &clone ($v));
136 }
137 return $to;
138 } else {
139 return $from;
140 }
141}
142
143sub set_OIDtype {
144 my $self = shift (@_);
145 my ($type) = @_;
146
147 if (defined $type && $type =~ /^(hash|incremental|dirname|assigned)$/) {
148 $self->{'OIDtype'} = $type;
149 } else {
150 $self->{'OIDtype'} = "hash";
151 }
152}
153
154sub set_source_filename {
155 my $self = shift (@_);
156 my ($source_filename) = @_;
157
158 $self->set_metadata_element ($self->get_top_section(),
159 "gsdlsourcefilename",
160 $source_filename);
161}
162
163sub set_converted_filename {
164 my $self = shift (@_);
165 my ($converted_filename) = @_;
166
167 $self->set_metadata_element ($self->get_top_section(),
168 "gsdlconvertedfilename",
169 $converted_filename);
170}
171
172
173# returns the source_filename as it was provided
174sub get_source_filename {
175 my $self = shift (@_);
176
177 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
178}
179
180# returns converted filename if available else returns source filename
181sub get_filename_for_hashing {
182 my $self = shift (@_);
183
184 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
185
186 if (!defined $filename) {
187 # if NULPlug processed file, then don't give a filename
188 if ($self->get_metadata_element ($self->get_top_section(), "Plugin") eq "NULPlug") {
189 $filename = undef;
190 } else {
191 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
192 }
193 }
194 return $filename;
195}
196
197sub set_doc_type {
198 my $self = shift (@_);
199 my ($doc_type) = @_;
200
201 $self->set_metadata_element ($self->get_top_section(),
202 "gsdldoctype",
203 $doc_type);
204}
205
206# returns the gsdldoctype as it was provided
207# the default of "indexed_doc" is used if no document
208# type was provided
209sub get_doc_type {
210 my $self = shift (@_);
211
212 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
213 return $doc_type if (defined $doc_type);
214 return "indexed_doc";
215}
216
217sub _escape_text {
218 my ($text) = @_;
219
220 # special characters in the gml encoding
221 $text =~ s/&/&/g; # this has to be first...
222 $text =~ s/</&lt;/g;
223 $text =~ s/>/&gt;/g;
224 $text =~ s/\"/&quot;/g;
225
226 return $text;
227}
228
229sub buffer_section_xml {
230 my $self = shift (@_);
231 my ($section) = @_;
232
233 my $section_ptr = $self->_lookup_section ($section);
234 return "" unless defined $section_ptr;
235
236 my $all_text = "<Section>\n";
237 $all_text .= " <Description>\n";
238
239 # output metadata
240 foreach my $data (@{$section_ptr->{'metadata'}}) {
241 my $escaped_value = &_escape_text($data->[1]);
242 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
243 }
244
245 $all_text .= " </Description>\n";
246
247 # output the text
248 $all_text .= " <Content>";
249 $all_text .= &_escape_text($section_ptr->{'text'});
250 $all_text .= "</Content>\n";
251
252 # output all the subsections
253 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
254 $all_text .= $self->buffer_section_xml("$section.$subsection");
255 }
256
257 $all_text .= "</Section>\n";
258
259 # make sure no nasty control characters have snuck through
260 # (XML::Parser will barf on anything it doesn't consider to be
261 # valid UTF-8 text, including things like \c@, \cC etc.)
262 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
263
264 return $all_text;
265}
266
267sub buffer_txt_section_xml {
268 my $self = shift(@_);
269 my ($section) = @_;
270
271 my $section_ptr = $self->_lookup_section ($section);
272
273 return "" unless defined $section_ptr;
274
275 my $all_text = "<Section>\n";
276
277 ##output the text
278 #$all_text .= " <Content>";
279 $all_text .= &_escape_text($section_ptr->{'text'});
280 #$all_text .= " </Content>\n";
281
282
283 #output all the subsections
284 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
285 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
286 }
287
288 $all_text .= "</Section>\n";
289
290
291 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
292 return $all_text;
293}
294
295sub buffer_mets_fileSection_section_xml() {
296 my $self = shift(@_);
297 my ($section,$version) = @_;
298
299 #$section="" unless defined $section;
300
301
302 my $section_ptr=$self->_lookup_section($section);
303 return "" unless defined $section_ptr;
304
305
306 # output fileSection by sections
307 my $section_num ="1". $section;
308
309 my $filePath = 'doctxt.xml';
310
311 my $opt_owner_id = "";
312 if ($version eq "fedora") {
313 $opt_owner_id = "OWNERID=\"M\"";
314 }
315
316 # output the fileSection details
317 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
318 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
319 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
320
321 my $xpath = "1".$section;
322 $xpath =~ s/\./]\/Section[/g;
323
324 $all_text .= $xpath;
325
326 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
327 $all_text .= " </mets:file>\n";
328 $all_text .= " </mets:fileGrp>\n";
329
330
331 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
332 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
333 }
334
335 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
336
337 return $all_text;
338}
339
340sub buffer_mets_fileWhole_section_xml(){
341 my $self = shift(@_);
342 my ($section,$version,$working_dir) = @_;
343
344 my $section_ptr = $self-> _lookup_section($section);
345 return "" unless defined $section_ptr;
346
347 my $all_text="" unless defined $all_txt;
348
349 my $fileID=0;
350
351 # Output the fileSection for the whole section
352 # => get the sourcefile and associative file
353
354 my $id_root = "";
355 my $opt_owner_id = "";
356
357 if ($version eq "fedora") {
358 $opt_owner_id = "OWNERID=\"M\"";
359 }
360 else {
361 $id_root = "default";
362 }
363
364 if ($version ne "fedora") {
365 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
366 }
367
368 foreach my $data (@{$section_ptr->{'metadata'}}){
369 my $escaped_value = &_escape_text($data->[1]);
370
371 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
372 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
373
374 ++$fileID;
375 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
376
377 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
378
379 $all_text .= " </mets:file>\n";
380 }
381
382 if ($data->[0] eq "gsdlassocfile"){
383
384 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
385 my $assoc_file = $1;
386 my $mime_type = $2;
387 my $assoc_dir = $3;
388
389 if ($version eq "fedora") {
390 $id_root = $assoc_file;
391 $id_root =~ s/\//_/g;
392 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
393 }
394
395 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
396 ++$fileID;
397
398 my $mime_attr = "MIMETYPE=\"$mime_type\"";
399 my $xlink_title = "xlink:title=\"$assoc_file\"";
400
401 my $id_attr;
402 my $xlink_href;
403
404 if ($version eq "fedora") {
405 $id_attr = "ID=\"$id_root.0\"";
406
407 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
408 if (!defined $fedora_prefix) {
409 $xlink_href = "xlink:href=\"$assfilePath\"";
410 }
411 else
412 {
413 my $gsdlhome = $ENV{'GSDLHOME'};
414 my $gsdl_href = "$working_dir/$assfilePath";
415
416 $gsdl_href =~ s/^$gsdlhome(\/)?//;
417 $gsdl_href = "/gsdl/$gsdl_href";
418
419 my $fserver = $ENV{'FEDORA_HOSTNAME'};
420 my $fport = $ENV{'FEDORA_SERVER_PORT'};
421
422 my $fdomain = "http://$fserver:$fport";
423 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
424 }
425
426 my $top_section = $self->get_top_section();
427 my $id = $self->get_metadata_element($top_section,"Identifier");
428 }
429 else {
430 $id_attr = "ID=\"$id_root.$fileID\"";
431 $xlink_href = "xlink:href=\"$assfilePath\"";
432 }
433
434 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
435 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
436
437 $all_text .= " </mets:file>\n";
438
439 if ($version eq "fedora") {
440 $all_text .= " </mets:fileGrp>\n";
441 }
442
443 }
444 }
445
446 if ($version ne "fedora") {
447 $all_text .= " </mets:fileGrp>\n";
448 }
449
450 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
451
452 return $all_text;
453}
454
455sub buffer_mets_StructMapSection_section_xml(){
456 my $self = shift(@_);
457 my ($section, $order_numref) = @_;
458
459 $section="" unless defined $section;
460
461
462 my $section_ptr=$self->_lookup_section($section);
463 return "" unless defined $section_ptr;
464
465
466 # output fileSection by Sections
467 my $section_num ="1". $section;
468 my $dmd_num = $section_num;
469
470 ##**output the dmdSection details
471 #if ($section_num eq "1") {
472 # $dmd_num = "0";
473 #}
474
475 #**output the StructMap details
476
477 my $dmdid_attr = "DM$dmd_num";
478
479 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
480 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
481 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
482
483 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
484
485
486 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
487 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
488 }
489
490 $all_text .= " </mets:div>\n";
491
492 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
493
494 return $all_text;
495}
496
497
498sub buffer_mets_StructMapWhole_section_xml(){
499 my $self = shift(@_);
500 my ($section) = @_;
501
502 my $section_ptr = $self-> _lookup_section($section);
503 return "" unless defined $section_ptr;
504
505 my $all_text="" unless defined $all_txt;
506 my $fileID=0;
507 my $order_num = 0;
508
509 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
510
511 #** output the StructMapSection for the whole section
512 # get the sourcefile and associative file
513
514 foreach my $data (@{$section_ptr->{'metadata'}}){
515 my $escaped_value = &_escape_text($data->[1]);
516
517 if ($data->[0] eq "gsdlsourcefilename") {
518 ++$fileID;
519 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
520 }
521
522 if ($data->[0] eq "gsdlassocfile"){
523 ++$fileID;
524 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
525 }
526 }
527 $all_text .= " </mets:div>\n";
528
529 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
530
531 return $all_text;
532}
533
534
535sub buffer_mets_dmdSection_section_xml(){
536 my $self = shift(@_);
537 my ($section,$version) = @_;
538
539 $section="" unless defined $section;
540
541 my $section_ptr=$self->_lookup_section($section);
542 return "" unless defined $section_ptr;
543
544 # convert section number
545 my $section_num ="1". $section;
546 my $dmd_num = $section_num;
547
548 # #**output the dmdSection details
549 # if ($section_num eq "1") {
550 # $dmd_num = "0";
551 # }
552
553
554 my $all_text = "";
555
556 my $label_attr = "";
557 if ($version eq "fedora") {
558 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
559 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
560
561 $label_attr = "LABEL=\"Dublin Core Metadata\"";
562 }
563 else {
564 print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
565 print STDERR "***** Check to see if <techMD> required\n";
566 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
567
568 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
569 }
570
571 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
572 $all_text .= " <mets:xmlData>\n";
573
574 if ($version eq "fedora") {
575 my $dc_namespace = "";
576 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
577 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
578
579 $all_text .= " <oai_dc:dc $dc_namespace>\n";
580
581 $all_text .= $self->buffer_dc_section($section,"oai_dc");
582 $all_text .= " </oai_dc:dc>\n";
583 }
584 else {
585 foreach my $data (@{$section_ptr->{'metadata'}}){
586 my $escaped_value = &_escape_text($data->[1]);
587 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
588 if ($data->[0] eq "dc.Title") {
589 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
590 }
591 }
592 }
593
594 $all_text .= " </mets:xmlData>\n";
595 $all_text .= " </mets:mdWrap>\n";
596
597 if ($version eq "fedora") {
598 $all_text .= " </mets:techMD>\n";
599 $all_text .= "</mets:amdSec>\n";
600 }
601 else {
602 $all_text .= "</mets:dmdSec>\n";
603 }
604
605
606 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
607 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
608 }
609
610 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
611
612 return $all_text;
613}
614
615sub output_section {
616 my $self = shift (@_);
617 my ($handle, $section) = @_;
618
619 print $handle $self->buffer_section_xml($section);
620}
621
622# print out DSpace dublin_core metadata section
623sub output_dspace_section {
624 my $self = shift (@_);
625 my ($handle, $section) = @_;
626
627 my $section_ptr = $self->_lookup_section ($section);
628 return "" unless defined $section_ptr;
629
630 my $all_text = "<Section>\n";
631 $all_text .= " <Description>\n";
632
633 # output metadata
634 foreach my $data (@{$section_ptr->{'metadata'}}) {
635 my $escaped_value = &_escape_text($data->[1]);
636 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
637 }
638
639 $all_text .= " </Description>\n";
640 $all_text .= "</Section>\n";
641
642 # make sure no nasty control characters have snuck through
643 # (XML::Parser will barf on anything it doesn't consider to be
644 # valid UTF-8 text, including things like \c@, \cC etc.)
645 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
646
647 return $all_text;
648}
649
650# print out doctxt.xml file
651sub output_txt_section {
652 my $self = shift (@_);
653 my ($handle, $section) = @_;
654
655 print $handle $self->buffer_txt_section_xml($section);
656}
657
658# print out docmets.xml file
659sub output_mets_section {
660 my $self = shift(@_);
661 my ($handle, $section, $version, $working_dir) = @_;
662
663 # print out the dmdSection
664 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
665
666 print $handle "<mets:fileSec>\n";
667 if ($version eq "fedora") {
668 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
669 }
670
671 # print out the fileSection by sections
672 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
673
674 # print out the whole fileSection
675 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
676
677 if ($version eq "fedora") {
678 print $handle " </mets:fileGrp>\n";
679 }
680 print $handle "</mets:fileSec>\n";
681
682 # print out the StructMapSection by sections
683
684 my $struct_type;
685 if ($version eq "fedora") {
686 $struct_type = "fedora:dsBindingMap";
687 }
688 else {
689 $struct_type = "Section";
690 }
691
692 if ($version ne "fedora") {
693 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
694 my $order_num=0;
695 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
696 print $handle "</mets:structMap>\n";
697
698 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
699 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
700 print $handle "</mets:structMap>\n";
701 }
702
703}
704
705my $dc_set = { Title => 1,
706 Creator => 1,
707 Subject => 1,
708 Description => 1,
709 Publisher => 1,
710 Contributors => 1,
711 Date => 1,
712 Type => 1,
713 Format => 1,
714 Identifier => 1,
715 Source => 1,
716 Language => 1,
717 Relation => 1,
718 Coverage => 1,
719 Rights => 1};
720
721
722
723
724# Build up dublin_core metadata. Priority given to dc.* over ex.*
725
726sub buffer_dc_section {
727 my $self = shift(@_);
728 my ($section, $version) = @_;
729
730 # build up string of dublin core metadata
731 $section="" unless defined $section;
732
733 my $section_ptr=$self->_lookup_section($section);
734 return "" unless defined $section_ptr;
735
736 my $explicit_dc = {};
737 my $explicit_ex = {};
738
739 my $all_text="";
740 foreach my $data (@{$section_ptr->{'metadata'}}){
741 my $escaped_value = &_escape_text($data->[1]);
742 if ($data->[0]=~ m/^dc\./) {
743 $data->[0] =~ tr/[A-Z]/[a-z]/;
744
745 $data->[0] =~ m/^dc\.(.*)/;
746 my $dc_element = $1;
747
748 if (!defined $explicit_dc->{$dc_element}) {
749 $explicit_dc->{$dc_element} = [];
750 }
751 push(@{$explicit_dc->{$dc_element}},$escaped_value);
752
753 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
754 if (defined $version && ($version eq "oai_dc")) {
755 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
756 }
757 else {
758 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
759 }
760
761 }
762 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
763 $data->[0] =~ m/^(ex\.)?(.*)/;
764 my $ex_element = $2;
765 my $lc_ex_element = lc($ex_element);
766
767 if (defined $dc_set->{$ex_element}) {
768 if (!defined $explicit_ex->{$lc_ex_element}) {
769 $explicit_ex->{$lc_ex_element} = [];
770 }
771 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
772 }
773 }
774 }
775
776 # go through dc_set and for any element *not* defined in explicit_dc
777 # that do exist in explicit_ex, add it in as metadata
778 foreach my $k ( keys %$dc_set ) {
779 my $lc_k = lc($k);
780
781 if (!defined $explicit_dc->{$lc_k}) {
782 if (defined $explicit_ex->{$lc_k}) {
783
784 foreach my $v (@{$explicit_ex->{$lc_k}}) {
785 my $dc_element = $lc_k;
786 my $escaped_value = $v;
787
788 if (defined $version && ($version eq "oai_dc")) {
789 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
790 }
791 else {
792 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
793 }
794
795 }
796 }
797 }
798 }
799
800 if ($all_text eq "") {
801 $all_text .= " There is no Dublin Core metatdata in this document\n";
802 }
803 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
804
805 return $all_text;
806}
807
808
809# Print out dublin_core metadata
810sub output_dc_section {
811 my $self = shift(@_);
812 my ($handle, $section, $version) = @_;
813
814 my $all_text = $self->buffer_dc_section($section,$version);
815
816 print $handle $all_text;
817}
818
819
820# look up the reference to the a particular section
821sub _lookup_section {
822 my $self = shift (@_);
823 my ($section) = @_;
824
825 my ($num);
826 my $sectionref = $self;
827
828 while (defined $section && $section ne "") {
829 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
830 $num =~ s/^0+(\d)/$1/; # remove leading 0s
831 $section = "" unless defined $section;
832
833 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
834 $sectionref = $sectionref->{'subsections'}->{$num};
835 } else {
836 return undef;
837 }
838 }
839
840 return $sectionref;
841}
842
843# calculate OID by hashing the contents of the document
844sub _calc_OID {
845 my $self = shift (@_);
846 my ($filename) = @_;
847
848 my $osexe = &util::get_os_exe();
849
850 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
851 $ENV{'GSDLOS'},"hashfile$osexe");
852
853 my $result = "NULL";
854
855 if (-e "$hashfile_exe") {
856# $result = `\"$hashfile_exe\" \"$filename\"`;
857 $result = `hashfile$osexe \"$filename\"`;
858 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
859
860 } else {
861 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
862 }
863 return "HASH$result";
864}
865
866# methods dealing with OID, not groups of them.
867
868# if $OID is not provided one is calculated
869sub set_OID {
870 my $self = shift (@_);
871 my ($OID) = @_;
872
873 my $use_hash_oid = 0;
874 # if an OID wasn't provided claculate one
875 if (!defined $OID) {
876 $OID = "NULL";
877 if ($self->{'OIDtype'} eq "hash") {
878 $use_hash_oid = 1;
879 } elsif ($self->{'OIDtype'} eq "incremental") {
880 $OID = "D" . $OIDcount;
881 $OIDcount ++;
882
883 } elsif ($self->{'OIDtype'} eq "dirname") {
884 $OID = 'J';
885 my $filename = $self->get_source_filename();
886 if (defined($filename)) { # && -e $filename) {
887 $OID = &File::Basename::dirname($filename);
888 if (defined $OID) {
889 $OID = 'J'.&File::Basename::basename($OID);
890 $OID =~ s/\.//; #remove any periods
891 } else {
892 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
893 $use_hash_oid = 1;
894 }
895 } else {
896 print STDERR "Failed to find filename, generating hash id\n";
897 $use_hash_oid = 1;
898 }
899
900 } elsif ($self->{'OIDtype'} eq "assigned") {
901 my $identifier = $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
902 if (defined $identifier && $identifier ne "") {
903 $OID = "D" . $self->get_metadata_element ($self->get_top_section(), "dc.Identifier");
904 $OID =~ s/\.//; #remove any periods
905 } else {
906 # need a hash id
907 print STDERR "no dc.Identifier found, generating hash id\n";
908 $use_hash_oid = 1;
909 }
910
911 } else {
912 $use_hash_oid = 1;
913 }
914
915 if ($use_hash_oid) {
916
917 # "hash" OID - feed file to hashfile.exe
918 #my $filename = $self->get_source_filename();
919 # we want to use the converted file for hashing if available
920 # cos its quicker
921 my $filename = $self->get_filename_for_hashing();
922 # -z: don't want to hash on the file if it is zero size
923 if (defined($filename) && -e $filename && !-z $filename) {
924 $OID = $self->_calc_OID ($filename);
925 } else {
926 $filename = &util::get_tmp_filename();
927 if (!open (OUTFILE, ">$filename")) {
928 print STDERR "doc::set_OID could not write to $filename\n";
929 } else {
930 $self->output_section('OUTFILE', $self->get_top_section(),
931 undef, 1);
932 close (OUTFILE);
933 }
934 $OID = $self->_calc_OID ($filename);
935 &util::rm ($filename);
936 }
937 }
938 }
939 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
940}
941
942# this uses hashdoc (embedded c thingy) which is faster but still
943# needs a little work to be suffiently stable
944sub ___set_OID {
945 my $self = shift (@_);
946 my ($OID) = @_;
947
948 # if an OID wasn't provided then calculate hash value based on document
949 if (!defined $OID)
950 {
951 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
952 undef, 1);
953 my $hash_len = length($hash_text);
954
955 $OID = &hashdoc::buffer($hash_text,$hash_len);
956 }
957
958 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
959}
960
961# returns the OID for this document
962sub get_OID {
963 my $self = shift (@_);
964 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
965 return $OID if (defined $OID);
966 return "NULL";
967}
968
969sub delete_OID {
970 my $self = shift (@_);
971
972 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
973}
974
975
976# methods for manipulating section names
977
978# returns the name of the top-most section (the top
979# level of the document
980sub get_top_section {
981 my $self = shift (@_);
982
983 return "";
984}
985
986# returns a section
987sub get_parent_section {
988 my $self = shift (@_);
989 my ($section) = @_;
990
991 $section =~ s/(^|\.)\d+$//;
992
993 return $section;
994}
995
996# returns the first child section (or the end child
997# if there isn't any)
998sub get_begin_child {
999 my $self = shift (@_);
1000 my ($section) = @_;
1001
1002 my $section_ptr = $self->_lookup_section($section);
1003 return "" unless defined $section_ptr;
1004
1005 if (defined $section_ptr->{'subsection_order'}->[0]) {
1006 return "$section.$section_ptr->{'subsection_order'}->[0]";
1007 }
1008
1009 return $self->get_end_child ($section);
1010}
1011
1012# returns the next child of a parent section
1013sub get_next_child {
1014 my $self = shift (@_);
1015 my ($section) = @_;
1016
1017 my $parent_section = $self->get_parent_section($section);
1018 my $parent_section_ptr = $self->_lookup_section($parent_section);
1019 return undef unless defined $parent_section_ptr;
1020
1021 my ($section_num) = $section =~ /(\d+)$/;
1022 return undef unless defined $section_num;
1023
1024 my $i = 0;
1025 my $section_order = $parent_section_ptr->{'subsection_order'};
1026 while ($i < scalar(@$section_order)) {
1027 last if $section_order->[$i] eq $section_num;
1028 $i++;
1029 }
1030
1031 $i++; # the next child
1032 if ($i < scalar(@$section_order)) {
1033 return $section_order->[$i] if $parent_section eq "";
1034 return "$parent_section.$section_order->[$i]";
1035 }
1036
1037 # no more sections in this level
1038 return undef;
1039}
1040
1041# returns a reference to a list of children
1042sub get_children {
1043 my $self = shift (@_);
1044 my ($section) = @_;
1045
1046 my $section_ptr = $self->_lookup_section($section);
1047 return [] unless defined $section_ptr;
1048
1049 my @children = @{$section_ptr->{'subsection_order'}};
1050
1051 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1052 return \@children;
1053}
1054
1055# returns the child section one past the last one (which
1056# is coded as "0")
1057sub get_end_child {
1058 my $self = shift (@_);
1059 my ($section) = @_;
1060
1061 return $section . ".0" unless $section eq "";
1062 return "0";
1063}
1064
1065# returns the next section in book order
1066sub get_next_section {
1067 my $self = shift (@_);
1068 my ($section) = @_;
1069
1070 return undef unless defined $section;
1071
1072 my $section_ptr = $self->_lookup_section($section);
1073 return undef unless defined $section_ptr;
1074
1075 # first try to find first child
1076 if (defined $section_ptr->{'subsection_order'}->[0]) {
1077 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1078 return "$section.$section_ptr->{'subsection_order'}->[0]";
1079 }
1080
1081 do {
1082 # try to find sibling
1083 my $next_child = $self->get_next_child ($section);
1084 return $next_child if (defined $next_child);
1085
1086 # move up one level
1087 $section = $self->get_parent_section ($section);
1088 } while $section =~ /\d/;
1089
1090 return undef;
1091}
1092
1093sub is_leaf_section {
1094 my $self = shift (@_);
1095 my ($section) = @_;
1096
1097 my $section_ptr = $self->_lookup_section($section);
1098 return 1 unless defined $section_ptr;
1099
1100 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1101}
1102
1103# methods for dealing with sections
1104
1105# returns the name of the inserted section
1106sub insert_section {
1107 my $self = shift (@_);
1108 my ($before_section) = @_;
1109
1110 # get the child to insert before and its parent section
1111 my $parent_section = "";
1112 my $before_child = "0";
1113 my @before_section = split (/\./, $before_section);
1114 if (scalar(@before_section) > 0) {
1115 $before_child = pop (@before_section);
1116 $parent_section = join (".", @before_section);
1117 }
1118
1119 my $parent_section_ptr = $self->_lookup_section($parent_section);
1120 if (!defined $parent_section_ptr) {
1121 print STDERR "doc::insert_section couldn't find parent section " .
1122 "$parent_section\n";
1123 return;
1124 }
1125
1126 # get the next section number
1127 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1128
1129 my $i = 0;
1130 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1131 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1132 $i++;
1133 }
1134
1135 # insert the section number into the order list
1136 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1137
1138 # add this section to the parent section
1139 my $section_ptr = {'subsection_order'=>[],
1140 'next_subsection'=>1,
1141 'subsections'=>{},
1142 'metadata'=>[],
1143 'text'=>""};
1144 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1145
1146 # work out the full section number
1147 my $section = $parent_section;
1148 $section .= "." unless $section eq "";
1149 $section .= $section_num;
1150
1151 return $section;
1152}
1153
1154# creates a pre-named section
1155sub create_named_section {
1156 my $self = shift (@_);
1157 my ($mastersection) = @_;
1158
1159 my ($num);
1160 my $section = $mastersection;
1161 my $sectionref = $self;
1162
1163 while ($section ne "") {
1164 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1165 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1166 $section = "" unless defined $section;
1167
1168 if (defined $num) {
1169 if (!defined $sectionref->{'subsections'}->{$num}) {
1170 push (@{$sectionref->{'subsection_order'}}, $num);
1171 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1172 'next_subsection'=>1,
1173 'subsections'=>{},
1174 'metadata'=>[],
1175 'text'=>""};
1176 if ($num >= $sectionref->{'next_subsection'}) {
1177 $sectionref->{'next_subsection'} = $num + 1;
1178 }
1179 }
1180 $sectionref = $sectionref->{'subsections'}->{$num};
1181
1182 } else {
1183 print STDERR "doc::create_named_section couldn't create section ";
1184 print STDERR "$mastersection\n";
1185 last;
1186 }
1187 }
1188}
1189
1190# returns a reference to a list of subsections
1191sub list_subsections {
1192 my $self = shift (@_);
1193 my ($section) = @_;
1194
1195 my $section_ptr = $self->_lookup_section ($section);
1196 if (!defined $section_ptr) {
1197 print STDERR "doc::list_subsections couldn't find section $section\n";
1198 return [];
1199 }
1200
1201 return [@{$section_ptr->{'subsection_order'}}];
1202}
1203
1204sub delete_section {
1205 my $self = shift (@_);
1206 my ($section) = @_;
1207
1208# my $section_ptr = {'subsection_order'=>[],
1209# 'next_subsection'=>1,
1210# 'subsections'=>{},
1211# 'metadata'=>[],
1212# 'text'=>""};
1213
1214 # if this is the top section reset everything
1215 if ($section eq "") {
1216 $self->{'subsection_order'} = [];
1217 $self->{'subsections'} = {};
1218 $self->{'metadata'} = [];
1219 $self->{'text'} = "";
1220 return;
1221 }
1222
1223 # find the parent of the section to delete
1224 my $parent_section = "";
1225 my $child = "0";
1226 my @section = split (/\./, $section);
1227 if (scalar(@section) > 0) {
1228 $child = pop (@section);
1229 $parent_section = join (".", @section);
1230 }
1231
1232 my $parent_section_ptr = $self->_lookup_section($parent_section);
1233 if (!defined $parent_section_ptr) {
1234 print STDERR "doc::delete_section couldn't find parent section " .
1235 "$parent_section\n";
1236 return;
1237 }
1238
1239 # remove this section from the subsection_order list
1240 my $i = 0;
1241 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1242 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1243 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1244 last;
1245 }
1246 $i++;
1247 }
1248
1249 # remove this section from the subsection hash
1250 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1251 undef $parent_section_ptr->{'subsections'}->{$child};
1252 }
1253}
1254
1255#--
1256# methods for dealing with metadata
1257
1258# set_metadata_element and get_metadata_element are for metadata
1259# which should only have one value. add_meta_data and get_metadata
1260# are for metadata which can have more than one value.
1261
1262# returns the first metadata value which matches field
1263
1264# This version of get metadata element works much like the one above,
1265# except it allows for the namespace portion of a metadata element to
1266# be ignored, thus if you are searching for dc.Title, the first piece
1267# of matching metadata ending with the name Title (once any namespace
1268# is removed) would be returned.
1269# 28-11-2003 John Thompson
1270sub get_metadata_element {
1271 my $self = shift (@_);
1272 my ($section, $field, $ignore_namespace) = @_;
1273 my ($data);
1274
1275 $ignore_namespace = 0 unless defined $ignore_namespace;
1276
1277 my $section_ptr = $self->_lookup_section($section);
1278 if (!defined $section_ptr) {
1279 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1280 return;
1281 }
1282
1283 # Remove the any namespace if we are being told to ignore them
1284 if($ignore_namespace) {
1285 $field =~ s/^\w*\.//;
1286 }
1287
1288 foreach $data (@{$section_ptr->{'metadata'}}) {
1289
1290 my $data_name = $data->[0];
1291 # Remove the any namespace if we are being told to ignore them
1292 if($ignore_namespace) {
1293 $data_name =~ s/^\w*\.//;
1294 }
1295
1296 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1297 }
1298
1299 return undef; # was not found
1300}
1301
1302# returns a list of the form [value1, value2, ...]
1303sub get_metadata {
1304 my $self = shift (@_);
1305 my ($section, $field, $ignore_namespace) = @_;
1306 my ($data);
1307
1308 $ignore_namespace = 0 unless defined $ignore_namespace;
1309
1310 my $section_ptr = $self->_lookup_section($section);
1311 if (!defined $section_ptr) {
1312 print STDERR "doc::get_metadata couldn't find section ",
1313 $section, "\n";
1314 return;
1315 }
1316
1317 # Remove the any namespace if we are being told to ignore them
1318 if($ignore_namespace) {
1319 $field =~ s/^\w*\.//;
1320 }
1321
1322 my @metadata = ();
1323 foreach $data (@{$section_ptr->{'metadata'}}) {
1324
1325 my $data_name = $data->[0];
1326 # Remove the any namespace if we are being told to ignore them
1327 if($ignore_namespace) {
1328 $data_name =~ s/^\w*\.//;
1329 }
1330
1331 push (@metadata, $data->[1]) if ($data_name eq $field);
1332 }
1333
1334 return \@metadata;
1335}
1336
1337# returns a list of the form [[field,value],[field,value],...]
1338sub get_all_metadata {
1339 my $self = shift (@_);
1340 my ($section) = @_;
1341
1342 my $section_ptr = $self->_lookup_section($section);
1343 if (!defined $section_ptr) {
1344 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1345 return;
1346 }
1347
1348 return $section_ptr->{'metadata'};
1349}
1350
1351# $value is optional
1352sub delete_metadata {
1353 my $self = shift (@_);
1354 my ($section, $field, $value) = @_;
1355
1356 my $section_ptr = $self->_lookup_section($section);
1357 if (!defined $section_ptr) {
1358 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1359 return;
1360 }
1361
1362 my $i = 0;
1363 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1364 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1365 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1366 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1367 } else {
1368 $i++;
1369 }
1370 }
1371}
1372
1373sub delete_all_metadata {
1374 my $self = shift (@_);
1375 my ($section) = @_;
1376
1377 my $section_ptr = $self->_lookup_section($section);
1378 if (!defined $section_ptr) {
1379 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1380 return;
1381 }
1382
1383 $section_ptr->{'metadata'} = [];
1384}
1385
1386sub set_metadata_element {
1387 my $self = shift (@_);
1388 my ($section, $field, $value) = @_;
1389
1390 $self->set_utf8_metadata_element ($section, $field,
1391 &unicode::ascii2utf8(\$value));
1392}
1393
1394# set_utf8_metadata_element assumes the text has already been
1395# converted to the UTF-8 encoding.
1396sub set_utf8_metadata_element {
1397 my $self = shift (@_);
1398 my ($section, $field, $value) = @_;
1399
1400 $self->delete_metadata ($section, $field);
1401 $self->add_utf8_metadata ($section, $field, $value);
1402}
1403
1404
1405# add_metadata assumes the text is in (extended) ascii form. For
1406# text which has already been converted to the UTF-8 format use
1407# add_utf8_metadata.
1408sub add_metadata {
1409 my $self = shift (@_);
1410 my ($section, $field, $value) = @_;
1411
1412 $self->add_utf8_metadata ($section, $field,
1413 &unicode::ascii2utf8(\$value));
1414}
1415
1416sub add_utf8_metadata {
1417 my $self = shift (@_);
1418 my ($section, $field, $value) = @_;
1419
1420 my $section_ptr = $self->_lookup_section($section);
1421 if (!defined $section_ptr) {
1422 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1423 return;
1424 }
1425 if (!defined $value) {
1426 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1427 return;
1428 }
1429 if (!defined $field) {
1430 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1431 return;
1432 }
1433
1434 #print STDERR "###$field=$value\n";
1435 # double check that the value is utf-8
1436 if (unicode::ensure_utf8(\$value)) {
1437 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1438 }
1439
1440 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1441}
1442
1443
1444# methods for dealing with text
1445
1446# returns the text for a section
1447sub get_text {
1448 my $self = shift (@_);
1449 my ($section) = @_;
1450
1451 my $section_ptr = $self->_lookup_section($section);
1452 if (!defined $section_ptr) {
1453 print STDERR "doc::get_text couldn't find section " .
1454 "$section\n";
1455 return "";
1456 }
1457
1458 return $section_ptr->{'text'};
1459}
1460
1461# returns the (utf-8 encoded) length of the text for a section
1462sub get_text_length {
1463 my $self = shift (@_);
1464 my ($section) = @_;
1465
1466 my $section_ptr = $self->_lookup_section($section);
1467 if (!defined $section_ptr) {
1468 print STDERR "doc::get_text_length couldn't find section " .
1469 "$section\n";
1470 return 0;
1471 }
1472
1473 return length ($section_ptr->{'text'});
1474}
1475
1476sub delete_text {
1477 my $self = shift (@_);
1478 my ($section) = @_;
1479
1480 my $section_ptr = $self->_lookup_section($section);
1481 if (!defined $section_ptr) {
1482 print STDERR "doc::delete_text couldn't find section " .
1483 "$section\n";
1484 return;
1485 }
1486
1487 $section_ptr->{'text'} = "";
1488}
1489
1490# add_text assumes the text is in (extended) ascii form. For
1491# text which has been already converted to the UTF-8 format
1492# use add_utf8_text.
1493sub add_text {
1494 my $self = shift (@_);
1495 my ($section, $text) = @_;
1496
1497 # convert the text to UTF-8 encoded unicode characters
1498 # and add the text
1499 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1500}
1501
1502
1503# add_utf8_text assumes the text to be added has already
1504# been converted to the UTF-8 encoding. For ascii text use
1505# add_text
1506sub add_utf8_text {
1507 my $self = shift (@_);
1508 my ($section, $text) = @_;
1509
1510 my $section_ptr = $self->_lookup_section($section);
1511 if (!defined $section_ptr) {
1512 print STDERR "doc::add_utf8_text couldn't find section " .
1513 "$section\n";
1514 return;
1515 }
1516
1517 $section_ptr->{'text'} .= $text;
1518}
1519
1520
1521# methods for dealing with associated files
1522
1523# a file is associated with a document, NOT a section.
1524# if section is defined it is noted in the data structure
1525# only so that files associated from a particular section
1526# may be removed later (using delete_section_assoc_files)
1527sub associate_file {
1528 my $self = shift (@_);
1529 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1530 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1531
1532 # remove all associated files with the same name
1533 $self->delete_assoc_file ($assoc_filename);
1534
1535 push (@{$self->{'associated_files'}},
1536 [$real_filename, $assoc_filename, $mime_type, $section]);
1537}
1538
1539# returns a list of associated files in the form
1540# [[real_filename, assoc_filename, mimetype], ...]
1541sub get_assoc_files {
1542 my $self = shift (@_);
1543
1544 return $self->{'associated_files'};
1545}
1546
1547sub delete_section_assoc_files {
1548 my $self = shift (@_);
1549 my ($section) = @_;
1550
1551 my $i=0;
1552 while ($i < scalar (@{$self->{'associated_files'}})) {
1553 if (defined $self->{'associated_files'}->[$i]->[3] &&
1554 $self->{'associated_files'}->[$i]->[3] eq $section) {
1555 splice (@{$self->{'associated_files'}}, $i, 1);
1556 } else {
1557 $i++;
1558 }
1559 }
1560}
1561
1562sub delete_assoc_file {
1563 my $self = shift (@_);
1564 my ($assoc_filename) = @_;
1565
1566 my $i=0;
1567 while ($i < scalar (@{$self->{'associated_files'}})) {
1568 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1569 splice (@{$self->{'associated_files'}}, $i, 1);
1570 } else {
1571 $i++;
1572 }
1573 }
1574}
1575
1576sub reset_nextsection_ptr {
1577 my $self = shift (@_);
1578 my ($section) = @_;
1579
1580 my $section_ptr = $self->_lookup_section($section);
1581 $section_ptr->{'next_subsection'} = 1;
1582}
1583
15841;
Note: See TracBrowser for help on using the repository browser.