source: trunk/gsdl/perllib/doc.pm@ 12270

Last change on this file since 12270 was 12268, checked in by kjdon, 18 years ago

set_OIDtype now takes a second argument which is the metadata element to use for the unique id

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 42.5 KB
Line 
1###########################################################################
2#
3# doc.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# base class to hold documents
27
28package doc;
29eval {require bytes};
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
34}
35
36use unicode;
37use util;
38use ghtml;
39use File::stat;
40##use hashdoc;
41
42# the document type may be indexed_doc, nonindexed_doc, or
43# classification
44
45my $OIDcount = 0;
46
47sub new {
48 my $class = shift (@_);
49 my ($source_filename, $doc_type) = @_;
50
51 my $self = bless {'associated_files'=>[],
52 'subsection_order'=>[],
53 'next_subsection'=>1,
54 'subsections'=>{},
55 'metadata'=>[],
56 'text'=>"",
57 'OIDtype'=>"hash"}, $class;
58
59 # used to set lastmodified here, but this can screw up the HASH ids, so
60 # the docsave processor now calls set_lastmodified
61
62 if (defined $source_filename) {
63 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
64
65 if (defined $collect_dir) {
66 my $dirsep = &util::get_dirsep();
67 if ($collect_dir !~ m/$dirsep$/) {
68 $collect_dir .= $dirsep;
69 }
70
71 $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
72
73 # if from within GSDLCOLLECTDIR, then remove directory prefix
74 # so source_filename is realative to it. This is done to aid
75 # portability, i.e. the collection can be moved to somewhere
76 # else on the file system and the archives directory will still
77 # work. This is needed, for example in the applet version of
78 # GLI where GSDLHOME/collect on the server will be different to
79 # the collect directory of the remove user. Of course,
80 # GSDLCOLLECTDIR subsequently needs to be put back on to turn
81 # it back into a full pathname.
82
83 if ($source_filename =~ /^$collect_dir(.*)$/) {
84 $source_filename = $1;
85 }
86 }
87
88 $self->set_source_filename ($source_filename);
89 }
90
91 $self->set_doc_type ($doc_type) if defined $doc_type;
92
93 return $self;
94}
95# set lastmodified for OAI purposes, added by GRB, moved by kjdon
96sub set_lastmodified {
97 my $self = shift (@_);
98
99 my $source_filename = $self->get_source_filename();
100 if ((defined $self->get_metadata_element ($self->get_top_section(), "gsdldoctype")) &&
101 (defined $source_filename) && (-e $source_filename)) {
102 my $file_stat = stat($source_filename);
103 my $mtime = $file_stat->mtime;
104 $self->add_utf8_metadata($self->get_top_section(), "lastmodified", $file_stat->mtime);
105 }
106}
107
108# clone the $self object
109sub duplicate {
110 my $self = shift (@_);
111
112 my $newobj = {};
113
114 foreach my $k (keys %$self) {
115 $newobj->{$k} = &clone ($self->{$k});
116 }
117
118 bless $newobj, ref($self);
119 return $newobj;
120}
121
122sub clone {
123 my ($from) = @_;
124 my $type = ref ($from);
125
126 if ($type eq "HASH") {
127 my $to = {};
128 foreach my $key (keys %$from) {
129 $to->{$key} = &clone ($from->{$key});
130 }
131 return $to;
132 } elsif ($type eq "ARRAY") {
133 my $to = [];
134 foreach my $v (@$from) {
135 push (@$to, &clone ($v));
136 }
137 return $to;
138 } else {
139 return $from;
140 }
141}
142
143sub set_OIDtype {
144 my $self = shift (@_);
145 my ($type, $metadata) = @_;
146
147 if (defined $type && $type =~ /^(hash|incremental|dirname|assigned)$/) {
148 $self->{'OIDtype'} = $type;
149 } else {
150 $self->{'OIDtype'} = "hash";
151 }
152 if ($type =~ /^assigned$/) {
153 if (defined $metadata) {
154 $self->{'OIDmetadata'} = $metadata;
155 } else {
156 $self->{'OIDmetadata'} = "dc.Identifier";
157 }
158 }
159}
160
161sub set_source_filename {
162 my $self = shift (@_);
163 my ($source_filename) = @_;
164
165 $self->set_metadata_element ($self->get_top_section(),
166 "gsdlsourcefilename",
167 $source_filename);
168}
169
170sub set_converted_filename {
171 my $self = shift (@_);
172 my ($converted_filename) = @_;
173
174 $self->set_metadata_element ($self->get_top_section(),
175 "gsdlconvertedfilename",
176 $converted_filename);
177}
178
179
180# returns the source_filename as it was provided
181sub get_source_filename {
182 my $self = shift (@_);
183
184 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
185}
186
187# returns converted filename if available else returns source filename
188sub get_filename_for_hashing {
189 my $self = shift (@_);
190
191 my $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlconvertedfilename");
192
193 if (!defined $filename) {
194 my $plugin_name = $self->get_metadata_element ($self->get_top_section(), "Plugin");
195 # if NULPlug processed file, then don't give a filename
196 if (defined $plugin_name && $plugin_name eq "NULPlug") {
197 $filename = undef;
198 } else {
199 $filename = $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
200 }
201 }
202 return $filename;
203}
204
205sub set_doc_type {
206 my $self = shift (@_);
207 my ($doc_type) = @_;
208
209 $self->set_metadata_element ($self->get_top_section(),
210 "gsdldoctype",
211 $doc_type);
212}
213
214# returns the gsdldoctype as it was provided
215# the default of "indexed_doc" is used if no document
216# type was provided
217sub get_doc_type {
218 my $self = shift (@_);
219
220 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
221 return $doc_type if (defined $doc_type);
222 return "indexed_doc";
223}
224
225sub _escape_text {
226 my ($text) = @_;
227
228 # special characters in the gml encoding
229 $text =~ s/&/&/g; # this has to be first...
230 $text =~ s/</&lt;/g;
231 $text =~ s/>/&gt;/g;
232 $text =~ s/\"/&quot;/g;
233
234 return $text;
235}
236
237sub buffer_section_xml {
238 my $self = shift (@_);
239 my ($section) = @_;
240
241 my $section_ptr = $self->_lookup_section ($section);
242 return "" unless defined $section_ptr;
243
244 my $all_text = "<Section>\n";
245 $all_text .= " <Description>\n";
246
247 # output metadata
248 foreach my $data (@{$section_ptr->{'metadata'}}) {
249 my $escaped_value = &_escape_text($data->[1]);
250 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
251 }
252
253 $all_text .= " </Description>\n";
254
255 # output the text
256 $all_text .= " <Content>";
257 $all_text .= &_escape_text($section_ptr->{'text'});
258 $all_text .= "</Content>\n";
259
260 # output all the subsections
261 foreach my $subsection (@{$section_ptr->{'subsection_order'}}) {
262 $all_text .= $self->buffer_section_xml("$section.$subsection");
263 }
264
265 $all_text .= "</Section>\n";
266
267 # make sure no nasty control characters have snuck through
268 # (XML::Parser will barf on anything it doesn't consider to be
269 # valid UTF-8 text, including things like \c@, \cC etc.)
270 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
271
272 return $all_text;
273}
274
275sub buffer_txt_section_xml {
276 my $self = shift(@_);
277 my ($section) = @_;
278
279 my $section_ptr = $self->_lookup_section ($section);
280
281 return "" unless defined $section_ptr;
282
283 my $all_text = "<Section>\n";
284
285 ##output the text
286 #$all_text .= " <Content>";
287 $all_text .= &_escape_text($section_ptr->{'text'});
288 #$all_text .= " </Content>\n";
289
290
291 #output all the subsections
292 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
293 $all_text .= $self->buffer_txt_section_xml("$section.$subsection");
294 }
295
296 $all_text .= "</Section>\n";
297
298
299 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
300 return $all_text;
301}
302
303sub buffer_mets_fileSection_section_xml() {
304 my $self = shift(@_);
305 my ($section,$version) = @_;
306
307 #$section="" unless defined $section;
308
309
310 my $section_ptr=$self->_lookup_section($section);
311 return "" unless defined $section_ptr;
312
313
314 # output fileSection by sections
315 my $section_num ="1". $section;
316
317 my $filePath = 'doctxt.xml';
318
319 my $opt_owner_id = "";
320 if ($version eq "fedora") {
321 $opt_owner_id = "OWNERID=\"M\"";
322 }
323
324 # output the fileSection details
325 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n";
326 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"FILE$section_num\" $opt_owner_id >\n";
327 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section[';
328
329 my $xpath = "1".$section;
330 $xpath =~ s/\./]\/Section[/g;
331
332 $all_text .= $xpath;
333
334 $all_text .= ']/text())" xlink:title="Hierarchical Document Structure"/>' . "\n";
335 $all_text .= " </mets:file>\n";
336 $all_text .= " </mets:fileGrp>\n";
337
338
339 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
340 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$version);
341 }
342
343 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
344
345 return $all_text;
346}
347
348sub buffer_mets_fileWhole_section_xml(){
349 my $self = shift(@_);
350 my ($section,$version,$working_dir) = @_;
351
352 my $section_ptr = $self-> _lookup_section($section);
353 return "" unless defined $section_ptr;
354
355 my $all_text="" unless defined $all_txt;
356
357 my $fileID=0;
358
359 # Output the fileSection for the whole section
360 # => get the sourcefile and associative file
361
362 my $id_root = "";
363 my $opt_owner_id = "";
364
365 if ($version eq "fedora") {
366 $opt_owner_id = "OWNERID=\"M\"";
367 }
368 else {
369 $id_root = "default";
370 }
371
372 if ($version ne "fedora") {
373 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
374 }
375
376 foreach my $data (@{$section_ptr->{'metadata'}}){
377 my $escaped_value = &_escape_text($data->[1]);
378
379 if (($data->[0] eq "gsdlsourcefilename") && ($version ne "fedora")) {
380 my ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/;
381
382 ++$fileID;
383 $all_text .= " <mets:file MIMETYPE=\"text/xml\" ID=\"$id_root.$fileID\" $opt_owner_id >\n";
384
385 $all_text .= ' <mets:FLocat LOCTYPE="URL" xlink:href="file:'.$data->[1].'" />'."\n";
386
387 $all_text .= " </mets:file>\n";
388 }
389
390 if ($data->[0] eq "gsdlassocfile"){
391
392 $escaped_value =~ m/^(.*?):(.*):(.*)$/;
393 my $assoc_file = $1;
394 my $mime_type = $2;
395 my $assoc_dir = $3;
396
397 if ($version eq "fedora") {
398 $id_root = $assoc_file;
399 $id_root =~ s/\//_/g;
400 $all_text .= " <mets:fileGrp ID=\"$id_root\">\n";
401 }
402
403 my $assfilePath = ($assoc_dir eq "") ? $assoc_file : "$assoc_dir/$assoc_file";
404 ++$fileID;
405
406 my $mime_attr = "MIMETYPE=\"$mime_type\"";
407 my $xlink_title = "xlink:title=\"$assoc_file\"";
408
409 my $id_attr;
410 my $xlink_href;
411
412 if ($version eq "fedora") {
413 $id_attr = "ID=\"$id_root.0\"";
414
415 my $fedora_prefix = $ENV{'FEDORA_PREFIX'};
416 if (!defined $fedora_prefix) {
417 $xlink_href = "xlink:href=\"$assfilePath\"";
418 }
419 else
420 {
421 my $gsdlhome = $ENV{'GSDLHOME'};
422 my $gsdl_href = "$working_dir/$assfilePath";
423
424 $gsdl_href =~ s/^$gsdlhome(\/)?//;
425 $gsdl_href = "/gsdl/$gsdl_href";
426
427 my $fserver = $ENV{'FEDORA_HOSTNAME'};
428 my $fport = $ENV{'FEDORA_SERVER_PORT'};
429
430 my $fdomain = "http://$fserver:$fport";
431 $xlink_href = "xlink:href=\"$fdomain$gsdl_href\"";
432 }
433
434 my $top_section = $self->get_top_section();
435 my $id = $self->get_metadata_element($top_section,"Identifier");
436 }
437 else {
438 $id_attr = "ID=\"$id_root.$fileID\"";
439 $xlink_href = "xlink:href=\"$assfilePath\"";
440 }
441
442 $all_text .= " <mets:file $mime_attr $id_attr $opt_owner_id >\n";
443 $all_text .= " <mets:FLocat LOCTYPE=\"URL\" $xlink_href $xlink_title />\n";
444
445 $all_text .= " </mets:file>\n";
446
447 if ($version eq "fedora") {
448 $all_text .= " </mets:fileGrp>\n";
449 }
450
451 }
452 }
453
454 if ($version ne "fedora") {
455 $all_text .= " </mets:fileGrp>\n";
456 }
457
458 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
459
460 return $all_text;
461}
462
463sub buffer_mets_StructMapSection_section_xml(){
464 my $self = shift(@_);
465 my ($section, $order_numref) = @_;
466
467 $section="" unless defined $section;
468
469
470 my $section_ptr=$self->_lookup_section($section);
471 return "" unless defined $section_ptr;
472
473
474 # output fileSection by Sections
475 my $section_num ="1". $section;
476 my $dmd_num = $section_num;
477
478 ##**output the dmdSection details
479 #if ($section_num eq "1") {
480 # $dmd_num = "0";
481 #}
482
483 #**output the StructMap details
484
485 my $dmdid_attr = "DM$dmd_num";
486
487 my $all_text = " <mets:div ID=\"DS$section_num\" TYPE=\"Section\" \n";
488 $all_text .= ' ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" '."\n";
489 $all_text .= " LABEL=\"$section_num\" DMDID=\"$dmdid_attr\">\n";
490
491 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n";
492
493
494 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
495 $all_text .= $self->buffer_mets_StructMapSection_section_xml("$section.$subsection", $order_numref);
496 }
497
498 $all_text .= " </mets:div>\n";
499
500 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
501
502 return $all_text;
503}
504
505
506sub buffer_mets_StructMapWhole_section_xml(){
507 my $self = shift(@_);
508 my ($section) = @_;
509
510 my $section_ptr = $self-> _lookup_section($section);
511 return "" unless defined $section_ptr;
512
513 my $all_text="" unless defined $all_txt;
514 my $fileID=0;
515 my $order_num = 0;
516
517 $all_text .= ' <mets:div ID="DSAll" TYPE="Document" ORDER="'.$order_num.'" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n";
518
519 #** output the StructMapSection for the whole section
520 # get the sourcefile and associative file
521
522 foreach my $data (@{$section_ptr->{'metadata'}}){
523 my $escaped_value = &_escape_text($data->[1]);
524
525 if ($data->[0] eq "gsdlsourcefilename") {
526 ++$fileID;
527 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n";
528 }
529
530 if ($data->[0] eq "gsdlassocfile"){
531 ++$fileID;
532 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n";
533 }
534 }
535 $all_text .= " </mets:div>\n";
536
537 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
538
539 return $all_text;
540}
541
542
543sub buffer_mets_dmdSection_section_xml(){
544 my $self = shift(@_);
545 my ($section,$version) = @_;
546
547 $section="" unless defined $section;
548
549 my $section_ptr=$self->_lookup_section($section);
550 return "" unless defined $section_ptr;
551
552 # convert section number
553 my $section_num ="1". $section;
554 my $dmd_num = $section_num;
555
556 # #**output the dmdSection details
557 # if ($section_num eq "1") {
558 # $dmd_num = "0";
559 # }
560
561
562 my $all_text = "";
563
564 my $label_attr = "";
565 if ($version eq "fedora") {
566 $all_text .= "<mets:amdSec ID=\"DC\" >\n";
567 $all_text .= " <mets:techMD ID=\"DC.0\">\n"; # .0 fedora version number?
568
569 $label_attr = "LABEL=\"Dublin Core Metadata\"";
570 }
571 else {
572 # TODO::
573 #print STDERR "***** Check that GROUPID in dmdSec is valid!!!\n";
574 #print STDERR "***** Check to see if <techMD> required\n";
575 # if it isn't allowed, go back and set $mdTag = dmdSec/amdSec
576
577 $all_text .= "<mets:dmdSec ID=\"DM$dmd_num\" GROUPID=\"$section_num\">\n";
578 }
579
580 $all_text .= " <mets:mdWrap $label_attr MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"gsdl$section_num\">\n";
581 $all_text .= " <mets:xmlData>\n";
582
583 if ($version eq "fedora") {
584 my $dc_namespace = "";
585 $dc_namespace .= "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"";
586 $dc_namespace .= " xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\">\n";
587
588 $all_text .= " <oai_dc:dc $dc_namespace>\n";
589
590 $all_text .= $self->buffer_dc_section($section,"oai_dc");
591 $all_text .= " </oai_dc:dc>\n";
592 }
593 else {
594 foreach my $data (@{$section_ptr->{'metadata'}}){
595 my $escaped_value = &_escape_text($data->[1]);
596 $all_text .= ' <gsdl3:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl3:Metadata>\n";
597 if ($data->[0] eq "dc.Title") {
598 $all_text .= ' <gsdl3:Metadata name="Title">'. $escaped_value."</gsdl3:Metadata>\n";
599 }
600 }
601 }
602
603 $all_text .= " </mets:xmlData>\n";
604 $all_text .= " </mets:mdWrap>\n";
605
606 if ($version eq "fedora") {
607 $all_text .= " </mets:techMD>\n";
608 $all_text .= "</mets:amdSec>\n";
609 }
610 else {
611 $all_text .= "</mets:dmdSec>\n";
612 }
613
614
615 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){
616 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection",$version);
617 }
618
619 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
620
621 return $all_text;
622}
623
624sub output_section {
625 my $self = shift (@_);
626 my ($handle, $section) = @_;
627
628 print $handle $self->buffer_section_xml($section);
629}
630
631# print out DSpace dublin_core metadata section
632sub output_dspace_section {
633 my $self = shift (@_);
634 my ($handle, $section) = @_;
635
636 my $section_ptr = $self->_lookup_section ($section);
637 return "" unless defined $section_ptr;
638
639 my $all_text = "<Section>\n";
640 $all_text .= " <Description>\n";
641
642 # output metadata
643 foreach my $data (@{$section_ptr->{'metadata'}}) {
644 my $escaped_value = &_escape_text($data->[1]);
645 $all_text .= ' <Metadata name="' . $data->[0] . '">' . $escaped_value . "</Metadata>\n";
646 }
647
648 $all_text .= " </Description>\n";
649 $all_text .= "</Section>\n";
650
651 # make sure no nasty control characters have snuck through
652 # (XML::Parser will barf on anything it doesn't consider to be
653 # valid UTF-8 text, including things like \c@, \cC etc.)
654 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
655
656 return $all_text;
657}
658
659# print out doctxt.xml file
660sub output_txt_section {
661 my $self = shift (@_);
662 my ($handle, $section) = @_;
663
664 print $handle $self->buffer_txt_section_xml($section);
665}
666
667# print out docmets.xml file
668sub output_mets_section {
669 my $self = shift(@_);
670 my ($handle, $section, $version, $working_dir) = @_;
671
672 # print out the dmdSection
673 print $handle $self->buffer_mets_dmdSection_section_xml($section, $version);
674
675 print $handle "<mets:fileSec>\n";
676 if ($version eq "fedora") {
677 print $handle " <mets:fileGrp ID=\"DATASTREAMS\">\n";
678 }
679
680 # print out the fileSection by sections
681 print $handle $self->buffer_mets_fileSection_section_xml($section,$version);
682
683 # print out the whole fileSection
684 print $handle $self->buffer_mets_fileWhole_section_xml($section,$version,$working_dir);
685
686 if ($version eq "fedora") {
687 print $handle " </mets:fileGrp>\n";
688 }
689 print $handle "</mets:fileSec>\n";
690
691 # print out the StructMapSection by sections
692
693 my $struct_type;
694 if ($version eq "fedora") {
695 $struct_type = "fedora:dsBindingMap";
696 }
697 else {
698 $struct_type = "Section";
699 }
700
701 if ($version ne "fedora") {
702 print $handle "<mets:structMap ID=\"Section\" TYPE=\"$struct_type\" LABEL=\"Section\">\n";
703 my $order_num=0;
704 print $handle $self->buffer_mets_StructMapSection_section_xml($section, \$order_num);
705 print $handle "</mets:structMap>\n";
706
707 print $handle '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n";
708 print $handle $self->buffer_mets_StructMapWhole_section_xml($section);
709 print $handle "</mets:structMap>\n";
710 }
711
712}
713
714my $dc_set = { Title => 1,
715 Creator => 1,
716 Subject => 1,
717 Description => 1,
718 Publisher => 1,
719 Contributor => 1,
720 Date => 1,
721 Type => 1,
722 Format => 1,
723 Identifier => 1,
724 Source => 1,
725 Language => 1,
726 Relation => 1,
727 Coverage => 1,
728 Rights => 1};
729
730
731
732
733# Build up dublin_core metadata. Priority given to dc.* over ex.*
734
735sub buffer_dc_section {
736 my $self = shift(@_);
737 my ($section, $version) = @_;
738
739 # build up string of dublin core metadata
740 $section="" unless defined $section;
741
742 my $section_ptr=$self->_lookup_section($section);
743 return "" unless defined $section_ptr;
744
745 my $explicit_dc = {};
746 my $explicit_ex = {};
747
748 my $all_text="";
749 foreach my $data (@{$section_ptr->{'metadata'}}){
750 my $escaped_value = &_escape_text($data->[1]);
751 if ($data->[0]=~ m/^dc\./) {
752 $data->[0] =~ tr/[A-Z]/[a-z]/;
753
754 $data->[0] =~ m/^dc\.(.*)/;
755 my $dc_element = $1;
756
757 if (!defined $explicit_dc->{$dc_element}) {
758 $explicit_dc->{$dc_element} = [];
759 }
760 push(@{$explicit_dc->{$dc_element}},$escaped_value);
761
762 #$all_text .= ' <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
763 if (defined $version && ($version eq "oai_dc")) {
764 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
765 }
766 else {
767 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
768 }
769
770 }
771 elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
772 $data->[0] =~ m/^(ex\.)?(.*)/;
773 my $ex_element = $2;
774 my $lc_ex_element = lc($ex_element);
775
776 if (defined $dc_set->{$ex_element}) {
777 if (!defined $explicit_ex->{$lc_ex_element}) {
778 $explicit_ex->{$lc_ex_element} = [];
779 }
780 push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
781 }
782 }
783 }
784
785 # go through dc_set and for any element *not* defined in explicit_dc
786 # that do exist in explicit_ex, add it in as metadata
787 foreach my $k ( keys %$dc_set ) {
788 my $lc_k = lc($k);
789
790 if (!defined $explicit_dc->{$lc_k}) {
791 if (defined $explicit_ex->{$lc_k}) {
792
793 foreach my $v (@{$explicit_ex->{$lc_k}}) {
794 my $dc_element = $lc_k;
795 my $escaped_value = $v;
796
797 if (defined $version && ($version eq "oai_dc")) {
798 $all_text .= " <dc:$dc_element>$escaped_value</dc:$dc_element>\n";
799 }
800 else {
801 $all_text .= ' <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
802 }
803
804 }
805 }
806 }
807 }
808
809 if ($all_text eq "") {
810 $all_text .= " There is no Dublin Core metatdata in this document\n";
811 }
812 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g;
813
814 return $all_text;
815}
816
817
818# Print out dublin_core metadata
819sub output_dc_section {
820 my $self = shift(@_);
821 my ($handle, $section, $version) = @_;
822
823 my $all_text = $self->buffer_dc_section($section,$version);
824
825 print $handle $all_text;
826}
827
828
829# look up the reference to the a particular section
830sub _lookup_section {
831 my $self = shift (@_);
832 my ($section) = @_;
833
834 my ($num);
835 my $sectionref = $self;
836
837 while (defined $section && $section ne "") {
838 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
839 $num =~ s/^0+(\d)/$1/; # remove leading 0s
840 $section = "" unless defined $section;
841
842 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
843 $sectionref = $sectionref->{'subsections'}->{$num};
844 } else {
845 return undef;
846 }
847 }
848
849 return $sectionref;
850}
851
852# calculate OID by hashing the contents of the document
853sub _calc_OID {
854 my $self = shift (@_);
855 my ($filename) = @_;
856
857 my $osexe = &util::get_os_exe();
858
859 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
860 $ENV{'GSDLOS'},"hashfile$osexe");
861
862 my $result = "NULL";
863
864 if (-e "$hashfile_exe") {
865# $result = `\"$hashfile_exe\" \"$filename\"`;
866 $result = `hashfile$osexe \"$filename\"`;
867 ($result) = $result =~ /:\s*([0-9a-f]+)/i;
868
869 } else {
870 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
871 }
872 return "HASH$result";
873}
874
875# methods dealing with OID, not groups of them.
876
877# if $OID is not provided one is calculated
878sub set_OID {
879 my $self = shift (@_);
880 my ($OID) = @_;
881
882 my $use_hash_oid = 0;
883 # if an OID wasn't provided claculate one
884 if (!defined $OID) {
885 $OID = "NULL";
886 if ($self->{'OIDtype'} eq "hash") {
887 $use_hash_oid = 1;
888 } elsif ($self->{'OIDtype'} eq "incremental") {
889 $OID = "D" . $OIDcount;
890 $OIDcount ++;
891
892 } elsif ($self->{'OIDtype'} eq "dirname") {
893 $OID = 'J';
894 my $filename = $self->get_source_filename();
895 if (defined($filename)) { # && -e $filename) {
896 $OID = &File::Basename::dirname($filename);
897 if (defined $OID) {
898 $OID = 'J'.&File::Basename::basename($OID);
899 $OID =~ s/\.//; #remove any periods
900 } else {
901 print STDERR "Failed to find base for filename ($filename)...generating hash id\n";
902 $use_hash_oid = 1;
903 }
904 } else {
905 print STDERR "Failed to find filename, generating hash id\n";
906 $use_hash_oid = 1;
907 }
908
909 } elsif ($self->{'OIDtype'} eq "assigned") {
910 my $identifier = $self->get_metadata_element ($self->get_top_section(), $self->{'OIDmetadata'});
911 if (defined $identifier && $identifier ne "") {
912 $OID = "D" . $identifier;
913 $OID =~ s/\.//; #remove any periods
914 } else {
915 # need a hash id
916 print STDERR "no $self->{'OIDmetadata'} metadata found, generating hash id\n";
917 $use_hash_oid = 1;
918 }
919
920 } else {
921 $use_hash_oid = 1;
922 }
923
924 if ($use_hash_oid) {
925
926 # "hash" OID - feed file to hashfile.exe
927 #my $filename = $self->get_source_filename();
928 # we want to use the converted file for hashing if available
929 # cos its quicker
930 my $filename = $self->get_filename_for_hashing();
931 # -z: don't want to hash on the file if it is zero size
932 if (defined($filename) && -e $filename && !-z $filename) {
933 $OID = $self->_calc_OID ($filename);
934 } else {
935 $filename = &util::get_tmp_filename();
936 if (!open (OUTFILE, ">$filename")) {
937 print STDERR "doc::set_OID could not write to $filename\n";
938 } else {
939 $self->output_section('OUTFILE', $self->get_top_section(),
940 undef, 1);
941 close (OUTFILE);
942 }
943 $OID = $self->_calc_OID ($filename);
944 &util::rm ($filename);
945 }
946 }
947 }
948 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
949}
950
951# this uses hashdoc (embedded c thingy) which is faster but still
952# needs a little work to be suffiently stable
953sub ___set_OID {
954 my $self = shift (@_);
955 my ($OID) = @_;
956
957 # if an OID wasn't provided then calculate hash value based on document
958 if (!defined $OID)
959 {
960 my $hash_text = $self->buffer_section_gml($self->get_top_section(),
961 undef, 1);
962 my $hash_len = length($hash_text);
963
964 $OID = &hashdoc::buffer($hash_text,$hash_len);
965 }
966
967 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
968}
969
970# returns the OID for this document
971sub get_OID {
972 my $self = shift (@_);
973 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
974 return $OID if (defined $OID);
975 return "NULL";
976}
977
978sub delete_OID {
979 my $self = shift (@_);
980
981 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
982}
983
984
985# methods for manipulating section names
986
987# returns the name of the top-most section (the top
988# level of the document
989sub get_top_section {
990 my $self = shift (@_);
991
992 return "";
993}
994
995# returns a section
996sub get_parent_section {
997 my $self = shift (@_);
998 my ($section) = @_;
999
1000 $section =~ s/(^|\.)\d+$//;
1001
1002 return $section;
1003}
1004
1005# returns the first child section (or the end child
1006# if there isn't any)
1007sub get_begin_child {
1008 my $self = shift (@_);
1009 my ($section) = @_;
1010
1011 my $section_ptr = $self->_lookup_section($section);
1012 return "" unless defined $section_ptr;
1013
1014 if (defined $section_ptr->{'subsection_order'}->[0]) {
1015 return "$section.$section_ptr->{'subsection_order'}->[0]";
1016 }
1017
1018 return $self->get_end_child ($section);
1019}
1020
1021# returns the next child of a parent section
1022sub get_next_child {
1023 my $self = shift (@_);
1024 my ($section) = @_;
1025
1026 my $parent_section = $self->get_parent_section($section);
1027 my $parent_section_ptr = $self->_lookup_section($parent_section);
1028 return undef unless defined $parent_section_ptr;
1029
1030 my ($section_num) = $section =~ /(\d+)$/;
1031 return undef unless defined $section_num;
1032
1033 my $i = 0;
1034 my $section_order = $parent_section_ptr->{'subsection_order'};
1035 while ($i < scalar(@$section_order)) {
1036 last if $section_order->[$i] eq $section_num;
1037 $i++;
1038 }
1039
1040 $i++; # the next child
1041 if ($i < scalar(@$section_order)) {
1042 return $section_order->[$i] if $parent_section eq "";
1043 return "$parent_section.$section_order->[$i]";
1044 }
1045
1046 # no more sections in this level
1047 return undef;
1048}
1049
1050# returns a reference to a list of children
1051sub get_children {
1052 my $self = shift (@_);
1053 my ($section) = @_;
1054
1055 my $section_ptr = $self->_lookup_section($section);
1056 return [] unless defined $section_ptr;
1057
1058 my @children = @{$section_ptr->{'subsection_order'}};
1059
1060 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
1061 return \@children;
1062}
1063
1064# returns the child section one past the last one (which
1065# is coded as "0")
1066sub get_end_child {
1067 my $self = shift (@_);
1068 my ($section) = @_;
1069
1070 return $section . ".0" unless $section eq "";
1071 return "0";
1072}
1073
1074# returns the next section in book order
1075sub get_next_section {
1076 my $self = shift (@_);
1077 my ($section) = @_;
1078
1079 return undef unless defined $section;
1080
1081 my $section_ptr = $self->_lookup_section($section);
1082 return undef unless defined $section_ptr;
1083
1084 # first try to find first child
1085 if (defined $section_ptr->{'subsection_order'}->[0]) {
1086 return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
1087 return "$section.$section_ptr->{'subsection_order'}->[0]";
1088 }
1089
1090 do {
1091 # try to find sibling
1092 my $next_child = $self->get_next_child ($section);
1093 return $next_child if (defined $next_child);
1094
1095 # move up one level
1096 $section = $self->get_parent_section ($section);
1097 } while $section =~ /\d/;
1098
1099 return undef;
1100}
1101
1102sub is_leaf_section {
1103 my $self = shift (@_);
1104 my ($section) = @_;
1105
1106 my $section_ptr = $self->_lookup_section($section);
1107 return 1 unless defined $section_ptr;
1108
1109 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
1110}
1111
1112# methods for dealing with sections
1113
1114# returns the name of the inserted section
1115sub insert_section {
1116 my $self = shift (@_);
1117 my ($before_section) = @_;
1118
1119 # get the child to insert before and its parent section
1120 my $parent_section = "";
1121 my $before_child = "0";
1122 my @before_section = split (/\./, $before_section);
1123 if (scalar(@before_section) > 0) {
1124 $before_child = pop (@before_section);
1125 $parent_section = join (".", @before_section);
1126 }
1127
1128 my $parent_section_ptr = $self->_lookup_section($parent_section);
1129 if (!defined $parent_section_ptr) {
1130 print STDERR "doc::insert_section couldn't find parent section " .
1131 "$parent_section\n";
1132 return;
1133 }
1134
1135 # get the next section number
1136 my $section_num = $parent_section_ptr->{'next_subsection'}++;
1137
1138 my $i = 0;
1139 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
1140 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
1141 $i++;
1142 }
1143
1144 # insert the section number into the order list
1145 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
1146
1147 # add this section to the parent section
1148 my $section_ptr = {'subsection_order'=>[],
1149 'next_subsection'=>1,
1150 'subsections'=>{},
1151 'metadata'=>[],
1152 'text'=>""};
1153 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
1154
1155 # work out the full section number
1156 my $section = $parent_section;
1157 $section .= "." unless $section eq "";
1158 $section .= $section_num;
1159
1160 return $section;
1161}
1162
1163# creates a pre-named section
1164sub create_named_section {
1165 my $self = shift (@_);
1166 my ($mastersection) = @_;
1167
1168 my ($num);
1169 my $section = $mastersection;
1170 my $sectionref = $self;
1171
1172 while ($section ne "") {
1173 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
1174 $num =~ s/^0+(\d)/$1/; # remove leading 0s
1175 $section = "" unless defined $section;
1176
1177 if (defined $num) {
1178 if (!defined $sectionref->{'subsections'}->{$num}) {
1179 push (@{$sectionref->{'subsection_order'}}, $num);
1180 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
1181 'next_subsection'=>1,
1182 'subsections'=>{},
1183 'metadata'=>[],
1184 'text'=>""};
1185 if ($num >= $sectionref->{'next_subsection'}) {
1186 $sectionref->{'next_subsection'} = $num + 1;
1187 }
1188 }
1189 $sectionref = $sectionref->{'subsections'}->{$num};
1190
1191 } else {
1192 print STDERR "doc::create_named_section couldn't create section ";
1193 print STDERR "$mastersection\n";
1194 last;
1195 }
1196 }
1197}
1198
1199# returns a reference to a list of subsections
1200sub list_subsections {
1201 my $self = shift (@_);
1202 my ($section) = @_;
1203
1204 my $section_ptr = $self->_lookup_section ($section);
1205 if (!defined $section_ptr) {
1206 print STDERR "doc::list_subsections couldn't find section $section\n";
1207 return [];
1208 }
1209
1210 return [@{$section_ptr->{'subsection_order'}}];
1211}
1212
1213sub delete_section {
1214 my $self = shift (@_);
1215 my ($section) = @_;
1216
1217# my $section_ptr = {'subsection_order'=>[],
1218# 'next_subsection'=>1,
1219# 'subsections'=>{},
1220# 'metadata'=>[],
1221# 'text'=>""};
1222
1223 # if this is the top section reset everything
1224 if ($section eq "") {
1225 $self->{'subsection_order'} = [];
1226 $self->{'subsections'} = {};
1227 $self->{'metadata'} = [];
1228 $self->{'text'} = "";
1229 return;
1230 }
1231
1232 # find the parent of the section to delete
1233 my $parent_section = "";
1234 my $child = "0";
1235 my @section = split (/\./, $section);
1236 if (scalar(@section) > 0) {
1237 $child = pop (@section);
1238 $parent_section = join (".", @section);
1239 }
1240
1241 my $parent_section_ptr = $self->_lookup_section($parent_section);
1242 if (!defined $parent_section_ptr) {
1243 print STDERR "doc::delete_section couldn't find parent section " .
1244 "$parent_section\n";
1245 return;
1246 }
1247
1248 # remove this section from the subsection_order list
1249 my $i = 0;
1250 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
1251 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
1252 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
1253 last;
1254 }
1255 $i++;
1256 }
1257
1258 # remove this section from the subsection hash
1259 if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
1260 undef $parent_section_ptr->{'subsections'}->{$child};
1261 }
1262}
1263
1264#--
1265# methods for dealing with metadata
1266
1267# set_metadata_element and get_metadata_element are for metadata
1268# which should only have one value. add_meta_data and get_metadata
1269# are for metadata which can have more than one value.
1270
1271# returns the first metadata value which matches field
1272
1273# This version of get metadata element works much like the one above,
1274# except it allows for the namespace portion of a metadata element to
1275# be ignored, thus if you are searching for dc.Title, the first piece
1276# of matching metadata ending with the name Title (once any namespace
1277# is removed) would be returned.
1278# 28-11-2003 John Thompson
1279sub get_metadata_element {
1280 my $self = shift (@_);
1281 my ($section, $field, $ignore_namespace) = @_;
1282 my ($data);
1283
1284 $ignore_namespace = 0 unless defined $ignore_namespace;
1285
1286 my $section_ptr = $self->_lookup_section($section);
1287 if (!defined $section_ptr) {
1288 print STDERR "doc::get_metadata_element couldn't find section ", $section, "\n";
1289 return;
1290 }
1291
1292 # Remove the any namespace if we are being told to ignore them
1293 if($ignore_namespace) {
1294 $field =~ s/^\w*\.//;
1295 }
1296
1297 foreach $data (@{$section_ptr->{'metadata'}}) {
1298
1299 my $data_name = $data->[0];
1300 # Remove the any namespace if we are being told to ignore them
1301 if($ignore_namespace) {
1302 $data_name =~ s/^\w*\.//;
1303 }
1304
1305 return $data->[1] if (scalar(@$data) >= 2 && $data_name eq $field);
1306 }
1307
1308 return undef; # was not found
1309}
1310
1311# returns a list of the form [value1, value2, ...]
1312sub get_metadata {
1313 my $self = shift (@_);
1314 my ($section, $field, $ignore_namespace) = @_;
1315 my ($data);
1316
1317 $ignore_namespace = 0 unless defined $ignore_namespace;
1318
1319 my $section_ptr = $self->_lookup_section($section);
1320 if (!defined $section_ptr) {
1321 print STDERR "doc::get_metadata couldn't find section ",
1322 $section, "\n";
1323 return;
1324 }
1325
1326 # Remove the any namespace if we are being told to ignore them
1327 if($ignore_namespace) {
1328 $field =~ s/^\w*\.//;
1329 }
1330
1331 my @metadata = ();
1332 foreach $data (@{$section_ptr->{'metadata'}}) {
1333
1334 my $data_name = $data->[0];
1335 # Remove the any namespace if we are being told to ignore them
1336 if($ignore_namespace) {
1337 $data_name =~ s/^\w*\.//;
1338 }
1339
1340 push (@metadata, $data->[1]) if ($data_name eq $field);
1341 }
1342
1343 return \@metadata;
1344}
1345
1346# returns a list of the form [[field,value],[field,value],...]
1347sub get_all_metadata {
1348 my $self = shift (@_);
1349 my ($section) = @_;
1350
1351 my $section_ptr = $self->_lookup_section($section);
1352 if (!defined $section_ptr) {
1353 print STDERR "doc::get_all_metadata couldn't find section ", $section, "\n";
1354 return;
1355 }
1356
1357 return $section_ptr->{'metadata'};
1358}
1359
1360# $value is optional
1361sub delete_metadata {
1362 my $self = shift (@_);
1363 my ($section, $field, $value) = @_;
1364
1365 my $section_ptr = $self->_lookup_section($section);
1366 if (!defined $section_ptr) {
1367 print STDERR "doc::delete_metadata couldn't find section ", $section, "\n";
1368 return;
1369 }
1370
1371 my $i = 0;
1372 while ($i < scalar (@{$section_ptr->{'metadata'}})) {
1373 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
1374 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
1375 splice (@{$section_ptr->{'metadata'}}, $i, 1);
1376 } else {
1377 $i++;
1378 }
1379 }
1380}
1381
1382sub delete_all_metadata {
1383 my $self = shift (@_);
1384 my ($section) = @_;
1385
1386 my $section_ptr = $self->_lookup_section($section);
1387 if (!defined $section_ptr) {
1388 print STDERR "doc::delete_all_metadata couldn't find section ", $section, "\n";
1389 return;
1390 }
1391
1392 $section_ptr->{'metadata'} = [];
1393}
1394
1395sub set_metadata_element {
1396 my $self = shift (@_);
1397 my ($section, $field, $value) = @_;
1398
1399 $self->set_utf8_metadata_element ($section, $field,
1400 &unicode::ascii2utf8(\$value));
1401}
1402
1403# set_utf8_metadata_element assumes the text has already been
1404# converted to the UTF-8 encoding.
1405sub set_utf8_metadata_element {
1406 my $self = shift (@_);
1407 my ($section, $field, $value) = @_;
1408
1409 $self->delete_metadata ($section, $field);
1410 $self->add_utf8_metadata ($section, $field, $value);
1411}
1412
1413
1414# add_metadata assumes the text is in (extended) ascii form. For
1415# text which has already been converted to the UTF-8 format use
1416# add_utf8_metadata.
1417sub add_metadata {
1418 my $self = shift (@_);
1419 my ($section, $field, $value) = @_;
1420
1421 $self->add_utf8_metadata ($section, $field,
1422 &unicode::ascii2utf8(\$value));
1423}
1424
1425sub add_utf8_metadata {
1426 my $self = shift (@_);
1427 my ($section, $field, $value) = @_;
1428
1429 my $section_ptr = $self->_lookup_section($section);
1430 if (!defined $section_ptr) {
1431 print STDERR "doc::add_utf8_metadata couldn't find section ", $section, "\n";
1432 return;
1433 }
1434 if (!defined $value) {
1435 print STDERR "doc::add_utf8_metadata undefined value for $field\n";
1436 return;
1437 }
1438 if (!defined $field) {
1439 print STDERR "doc::add_utf8_metadata undefined metadata type \n";
1440 return;
1441 }
1442
1443 #print STDERR "###$field=$value\n";
1444 # double check that the value is utf-8
1445 if (unicode::ensure_utf8(\$value)) {
1446 print STDERR "doc::add_utf8_metadata: warning: '$field' wasn't utf8\n";
1447 }
1448
1449 push (@{$section_ptr->{'metadata'}}, [$field, $value]);
1450}
1451
1452
1453# methods for dealing with text
1454
1455# returns the text for a section
1456sub get_text {
1457 my $self = shift (@_);
1458 my ($section) = @_;
1459
1460 my $section_ptr = $self->_lookup_section($section);
1461 if (!defined $section_ptr) {
1462 print STDERR "doc::get_text couldn't find section " .
1463 "$section\n";
1464 return "";
1465 }
1466
1467 return $section_ptr->{'text'};
1468}
1469
1470# returns the (utf-8 encoded) length of the text for a section
1471sub get_text_length {
1472 my $self = shift (@_);
1473 my ($section) = @_;
1474
1475 my $section_ptr = $self->_lookup_section($section);
1476 if (!defined $section_ptr) {
1477 print STDERR "doc::get_text_length couldn't find section " .
1478 "$section\n";
1479 return 0;
1480 }
1481
1482 return length ($section_ptr->{'text'});
1483}
1484
1485sub delete_text {
1486 my $self = shift (@_);
1487 my ($section) = @_;
1488
1489 my $section_ptr = $self->_lookup_section($section);
1490 if (!defined $section_ptr) {
1491 print STDERR "doc::delete_text couldn't find section " .
1492 "$section\n";
1493 return;
1494 }
1495
1496 $section_ptr->{'text'} = "";
1497}
1498
1499# add_text assumes the text is in (extended) ascii form. For
1500# text which has been already converted to the UTF-8 format
1501# use add_utf8_text.
1502sub add_text {
1503 my $self = shift (@_);
1504 my ($section, $text) = @_;
1505
1506 # convert the text to UTF-8 encoded unicode characters
1507 # and add the text
1508 $self->add_utf8_text($section, &unicode::ascii2utf8(\$text));
1509}
1510
1511
1512# add_utf8_text assumes the text to be added has already
1513# been converted to the UTF-8 encoding. For ascii text use
1514# add_text
1515sub add_utf8_text {
1516 my $self = shift (@_);
1517 my ($section, $text) = @_;
1518
1519 my $section_ptr = $self->_lookup_section($section);
1520 if (!defined $section_ptr) {
1521 print STDERR "doc::add_utf8_text couldn't find section " .
1522 "$section\n";
1523 return;
1524 }
1525
1526 $section_ptr->{'text'} .= $text;
1527}
1528
1529
1530# methods for dealing with associated files
1531
1532# a file is associated with a document, NOT a section.
1533# if section is defined it is noted in the data structure
1534# only so that files associated from a particular section
1535# may be removed later (using delete_section_assoc_files)
1536sub associate_file {
1537 my $self = shift (@_);
1538 my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
1539 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
1540
1541 # remove all associated files with the same name
1542 $self->delete_assoc_file ($assoc_filename);
1543
1544 push (@{$self->{'associated_files'}},
1545 [$real_filename, $assoc_filename, $mime_type, $section]);
1546}
1547
1548# returns a list of associated files in the form
1549# [[real_filename, assoc_filename, mimetype], ...]
1550sub get_assoc_files {
1551 my $self = shift (@_);
1552
1553 return $self->{'associated_files'};
1554}
1555
1556sub delete_section_assoc_files {
1557 my $self = shift (@_);
1558 my ($section) = @_;
1559
1560 my $i=0;
1561 while ($i < scalar (@{$self->{'associated_files'}})) {
1562 if (defined $self->{'associated_files'}->[$i]->[3] &&
1563 $self->{'associated_files'}->[$i]->[3] eq $section) {
1564 splice (@{$self->{'associated_files'}}, $i, 1);
1565 } else {
1566 $i++;
1567 }
1568 }
1569}
1570
1571sub delete_assoc_file {
1572 my $self = shift (@_);
1573 my ($assoc_filename) = @_;
1574
1575 my $i=0;
1576 while ($i < scalar (@{$self->{'associated_files'}})) {
1577 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
1578 splice (@{$self->{'associated_files'}}, $i, 1);
1579 } else {
1580 $i++;
1581 }
1582 }
1583}
1584
1585sub reset_nextsection_ptr {
1586 my $self = shift (@_);
1587 my ($section) = @_;
1588
1589 my $section_ptr = $self->_lookup_section($section);
1590 $section_ptr->{'next_subsection'} = 1;
1591}
1592
15931;
Note: See TracBrowser for help on using the repository browser.