- Timestamp:
- 1999-12-13T16:51:57+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r537 r832 28 28 package doc; 29 29 30 use unicode; 31 use util; 32 use html; 30 use basedoc; 31 32 BEGIN { 33 @ISA = ('basedoc'); 34 } 33 35 34 36 # the document type may be indexed_doc, nonindexed_doc, or … … 39 41 my ($source_filename, $doc_type) = @_; 40 42 41 my $self = bless {'associated_files'=>[], 42 'subsection_order'=>[], 43 'next_subsection'=>1, 44 'subsections'=>{}, 45 'metadata'=>[], 46 'text'=>""}, $class; 47 43 my $self = new basedoc(); 48 44 49 45 # $self->set_source_filename ($source_filename) if defined $source_filename; … … 54 50 if defined $doc_type; 55 51 52 bless($self,$class); 56 53 return $self; 57 54 } 58 55 59 sub set_source_filename {60 my $self = shift (@_);61 my ($source_filename) = @_;62 63 $self->set_metadata_element ($self->get_top_section(),64 "gsdlsourcefilename",65 $source_filename);66 }67 68 # returns the source_filename as it was provided69 sub get_source_filename {70 my $self = shift (@_);71 72 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");73 }74 75 sub set_doc_type {76 my $self = shift (@_);77 my ($doc_type) = @_;78 79 $self->set_metadata_element ($self->get_top_section(),80 "gsdldoctype",81 $doc_type);82 }83 84 # returns the source_filename as it was provided85 # the default of "indexed_doc" is used if no document86 # type was provided87 sub get_doc_type {88 my $self = shift (@_);89 90 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");91 return $doc_type if (defined $doc_type);92 return "indexed_doc";93 }94 95 sub _escape_text {96 my ($text) = @_;97 98 # special characters in the gml encoding99 $text =~ s/&/&/g; # this has to be first...100 $text =~ s/</</g;101 $text =~ s/>/>/g;102 $text =~ s/\"/"/g;103 104 return $text;105 }106 107 sub output_section {108 my $self = shift (@_);109 my ($handle, $section, $suppress_subject_info) = @_;110 $suppress_subject_info = 0 unless defined $suppress_subject_info;111 my ($data, $subsection);112 113 my $section_ptr = $self->_lookup_section ($section);114 my ($section_num) = $section =~ /(\d+)$/;115 116 return unless defined $section_ptr;117 118 # output the section header (including the section number119 # and metadata)120 print $handle "<gsdlsection";121 print $handle " gsdlnum=\"$section_num\"" if defined $section_num;122 foreach $data (@{$section_ptr->{'metadata'}}) {123 print $handle " $data->[0]=\"" . &_escape_text($data->[1]) . "\""124 unless $suppress_subject_info && $data->[0] eq "Subject";125 }126 print $handle ">";127 128 # output the text129 print $handle &_escape_text($section_ptr->{'text'});130 131 # output all the subsections132 foreach $subsection (@{$section_ptr->{'subsection_order'}}) {133 $self->output_section($handle, "$section.$subsection", $suppress_subject_info);134 }135 136 # output the closing tag137 print $handle "</gsdlsection>\n";138 }139 140 # look up the reference to the a particular section141 sub _lookup_section {142 my $self = shift (@_);143 my ($section) = @_;144 145 my ($num);146 my $sectionref = $self;147 148 while (defined $section && $section ne "") {149 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;150 $num =~ s/^0+(\d)/$1/; # remove leading 0s151 $section = "" unless defined $section;152 153 if (defined $num && defined $sectionref->{'subsections'}->{$num}) {154 $sectionref = $sectionref->{'subsections'}->{$num};155 } else {156 return undef;157 }158 }159 160 return $sectionref;161 }162 56 163 57 sub _calc_OID { 164 my $self = shift (@_); 165 my ($filename) = @_; 166 167 my $osexe = &util::get_os_exe(); 168 169 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin", 170 $ENV{'GSDLOS'},"hashfile$osexe"); 171 my $result = "NULL"; 172 173 if (-e "$hashfile_exe") { 174 $result = `$hashfile_exe \"$filename\"`; 175 ($result) = $result =~ /:\s*([0-9a-f]+)/i; 176 177 } else { 178 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n"; 179 } 180 181 return "HASH$result"; 182 } 183 184 # methods dealing with OID, not groups of them. 185 186 # if $OID is not provided one is calculated from hashing the 187 # current contents of the document 188 # An OID are actually stored as metadata of the document 189 sub set_OID { 190 my $self = shift (@_); 191 my ($OID) = @_; 192 193 # if an OID wasn't provided then feed this document to 194 # hashfile.exe 195 if (!defined $OID) { 196 $OID = "NULL"; 197 my $tmp_filename = &util::get_tmp_filename(); 198 if (!open (OUTFILE, ">$tmp_filename")) { 199 print STDERR "doc::set_OID could not write to $tmp_filename\n"; 200 } else { 201 $self->output_section('OUTFILE', $self->get_top_section(), 1); 202 close (OUTFILE); 203 204 $OID = $self->_calc_OID ($tmp_filename); 205 &util::rm ($tmp_filename); 206 } 207 } 208 209 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID); 210 } 211 212 # returns the OID for this document 213 sub get_OID { 214 my $self = shift (@_); 215 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier"); 216 return $OID if (defined $OID); 217 return "NULL"; 218 } 219 220 sub delete_OID { 221 my $self = shift (@_); 222 223 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL"); 224 } 225 226 227 # methods for manipulating section names 228 229 # returns the name of the top-most section (the top 230 # level of the document 231 sub get_top_section { 232 my $self = shift (@_); 233 234 return ""; 235 } 236 237 # returns a section 238 sub get_parent_section { 239 my $self = shift (@_); 240 my ($section) = @_; 241 242 $section =~ s/(^|\.)\d+$//; 243 244 return $section; 245 } 246 247 # returns the first child section (or the end child 248 # if there isn't any) 249 sub get_begin_child { 250 my $self = shift (@_); 251 my ($section) = @_; 252 253 my $section_ptr = $self->_lookup_section($section); 254 return "" unless defined $section_ptr; 255 256 if (defined $section_ptr->{'subsection_order'}->[0]) { 257 return "$section.$section_ptr->{'subsection_order'}->[0]"; 258 } 259 260 return $self->get_end_child ($section); 261 } 262 263 # returns the next child of a parent section 264 sub get_next_child { 265 my $self = shift (@_); 266 my ($section) = @_; 267 268 my $parent_section = $self->get_parent_section($section); 269 my $parent_section_ptr = $self->_lookup_section($parent_section); 270 return undef unless defined $parent_section_ptr; 271 272 my ($section_num) = $section =~ /(\d+)$/; 273 return undef unless defined $section_num; 274 275 my $i = 0; 276 my $section_order = $parent_section_ptr->{'subsection_order'}; 277 while ($i < scalar(@$section_order)) { 278 last if $section_order->[$i] eq $section_num; 279 $i++; 280 } 281 282 $i++; # the next child 283 if ($i < scalar(@$section_order)) { 284 return $section_order->[$i] if $parent_section eq ""; 285 return "$parent_section.$section_order->[$i]"; 286 } 287 288 # no more sections in this level 289 return undef; 290 } 291 292 # returns a reference to a list of children 293 sub get_children { 294 my $self = shift (@_); 295 my ($section) = @_; 296 297 my $section_ptr = $self->_lookup_section($section); 298 return [] unless defined $section_ptr; 299 300 my @children = @{$section_ptr->{'subsection_order'}}; 301 302 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children; 303 return \@children; 304 } 305 306 # returns the child section one past the last one (which 307 # is coded as "0") 308 sub get_end_child { 309 my $self = shift (@_); 310 my ($section) = @_; 311 312 return $section . ".0" unless $section eq ""; 313 return "0"; 314 } 315 316 # returns the next section in book order 317 sub get_next_section { 318 my $self = shift (@_); 319 my ($section) = @_; 320 321 return undef unless defined $section; 322 323 my $section_ptr = $self->_lookup_section($section); 324 return undef unless defined $section_ptr; 325 326 # first try to find first child 327 if (defined $section_ptr->{'subsection_order'}->[0]) { 328 return $section_ptr->{'subsection_order'}->[0] if ($section eq ""); 329 return "$section.$section_ptr->{'subsection_order'}->[0]"; 330 } 331 332 do { 333 # try to find sibling 334 my $next_child = $self->get_next_child ($section); 335 return $next_child if (defined $next_child); 336 337 # move up one level 338 $section = $self->get_parent_section ($section); 339 } while $section =~ /\d/; 340 341 return undef; 342 } 343 344 sub is_leaf_section { 345 my $self = shift (@_); 346 my ($section) = @_; 347 348 my $section_ptr = $self->_lookup_section($section); 349 return 1 unless defined $section_ptr; 350 351 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0); 352 } 353 354 # methods for dealing with sections 355 356 # returns the name of the inserted section 357 sub insert_section { 358 my $self = shift (@_); 359 my ($before_section) = @_; 360 361 # get the child to insert before and its parent section 362 my $parent_section = ""; 363 my $before_child = "0"; 364 my @before_section = split (/\./, $before_section); 365 if (scalar(@before_section) > 0) { 366 $before_child = pop (@before_section); 367 $parent_section = join (".", @before_section); 368 } 369 370 my $parent_section_ptr = $self->_lookup_section($parent_section); 371 if (!defined $parent_section_ptr) { 372 print STDERR "doc::insert_section couldn't find parent section " . 373 "$parent_section\n"; 374 return; 375 } 376 377 # get the next section number 378 my $section_num = $parent_section_ptr->{'next_subsection'}++; 379 380 my $i = 0; 381 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) && 382 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) { 383 $i++; 384 } 385 386 # insert the section number into the order list 387 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num); 388 389 # add this section to the parent section 390 my $section_ptr = {'subsection_order'=>[], 391 'next_subsection'=>1, 392 'subsections'=>{}, 393 'metadata'=>[], 394 'text'=>""}; 395 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr; 396 397 # work out the full section number 398 my $section = $parent_section; 399 $section .= "." unless $section eq ""; 400 $section .= $section_num; 401 402 return $section; 403 } 404 405 # creates a pre-named section 406 sub create_named_section { 407 my $self = shift (@_); 408 my ($mastersection) = @_; 409 410 my ($num); 411 my $section = $mastersection; 412 my $sectionref = $self; 413 414 while ($section ne "") { 415 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/; 416 $num =~ s/^0+(\d)/$1/; # remove leading 0s 417 $section = "" unless defined $section; 418 419 if (defined $num) { 420 if (!defined $sectionref->{'subsections'}->{$num}) { 421 push (@{$sectionref->{'subsection_order'}}, $num); 422 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[], 423 'next_subsection'=>1, 424 'subsections'=>{}, 425 'metadata'=>[], 426 'text'=>""}; 427 if ($num >= $sectionref->{'next_subsection'}) { 428 $sectionref->{'next_subsection'} = $num + 1; 429 } 430 } 431 $sectionref = $sectionref->{'subsections'}->{$num}; 432 433 } else { 434 print STDERR "doc::create_named_section couldn't create section " . 435 "$mastersection\n"; 436 last; 437 } 438 } 439 } 440 441 # returns a reference to a list of subsections 442 sub list_subsections { 443 my $self = shift (@_); 444 my ($section) = @_; 445 446 my $section_ptr = $self->_lookup_section ($section); 447 if (!defined $section_ptr) { 448 print STDERR "doc::list_subsections couldn't find section $section\n"; 449 return []; 450 } 451 452 return [@{$section_ptr->{'subsection_order'}}]; 453 } 454 455 sub delete_section { 456 my $self = shift (@_); 457 my ($section) = @_; 458 459 # my $section_ptr = {'subsection_order'=>[], 460 # 'next_subsection'=>1, 461 # 'subsections'=>{}, 462 # 'metadata'=>[], 463 # 'text'=>""}; 464 465 # if this is the top section reset everything 466 if ($section eq "") { 467 $self->{'subsection_order'} = []; 468 $self->{'subsections'} = {}; 469 $self->{'metadata'} = []; 470 $self->{'text'} = ""; 471 return; 472 } 473 474 # find the parent of the section to delete 475 my $parent_section = ""; 476 my $child = "0"; 477 my @section = split (".", $section); 478 if (scalar(@section) > 0) { 479 $child = pop (@section); 480 $parent_section = join (".", @section); 481 } 482 483 my $parent_section_ptr = $self->_lookup_section($parent_section); 484 if (!defined $parent_section_ptr) { 485 print STDERR "doc::delete_section couldn't find parent section " . 486 "$parent_section\n"; 487 return; 488 } 489 490 # remove this section from the subsection_order list 491 my $i = 0; 492 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) { 493 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) { 494 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1); 495 last; 496 } 497 $i++; 498 } 499 500 # remove this section from the subsection hash 501 if (defined ($parent_section_ptr->{'subsections'}->{$child})) { 502 undef $parent_section_ptr->{'subsections'}->{$child}; 503 } 58 # included for backwards compatibility 59 # (in particular precalculating OID in HTMLPlug) 60 return basedoc::_calc_OID(@_); 504 61 } 505 62 … … 532 89 } 533 90 534 # returns the first metadata value which matches field535 sub get_metadata_element {536 my $self = shift (@_);537 my ($section, $field) = @_;538 my ($data);539 540 my $section_ptr = $self->_lookup_section($section);541 if (!defined $section_ptr) {542 print STDERR "doc::get_metadata_element couldn't find section " .543 "$section\n";544 return;545 }546 547 foreach $data (@{$section_ptr->{'metadata'}}) {548 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);549 }550 551 return undef; # was not found552 }553 91 554 92 # add_metadata assumes the text is in (extended) ascii form. For … … 577 115 578 116 push (@{$section_ptr->{'metadata'}}, [$field, $value]); 579 }580 581 # returns a list of the form [value1, value2, ...]582 sub get_metadata {583 my $self = shift (@_);584 my ($section, $field) = @_;585 my ($data);586 587 my $section_ptr = $self->_lookup_section($section);588 if (!defined $section_ptr) {589 print STDERR "doc::get_metadata couldn't find section " .590 "$section\n";591 return;592 }593 594 my @metadata = ();595 foreach $data (@{$section_ptr->{'metadata'}}) {596 push (@metadata, $data->[1]) if ($data->[0] eq $field);597 }598 599 return \@metadata;600 }601 602 # returns a list of the form [[field,value],[field,value],...]603 sub get_all_metadata {604 my $self = shift (@_);605 my ($section) = @_;606 607 my $section_ptr = $self->_lookup_section($section);608 if (!defined $section_ptr) {609 print STDERR "doc::get_all_metadata couldn't find section " .610 "$section\n";611 return;612 }613 614 return $section_ptr->{'metadata'};615 }616 617 # $value is optional618 sub delete_metadata {619 my $self = shift (@_);620 my ($section, $field, $value) = @_;621 622 my $section_ptr = $self->_lookup_section($section);623 if (!defined $section_ptr) {624 print STDERR "doc::delete_metadata couldn't find section " .625 "$section\n";626 return;627 }628 629 my $i = 0;630 while ($i < scalar (@{$section_ptr->{'metadata'}})) {631 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&632 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {633 splice (@{$section_ptr->{'metadata'}}, $i, 1);634 } else {635 $i++;636 }637 }638 }639 640 sub delete_all_metadata {641 my $self = shift (@_);642 my ($section) = @_;643 644 my $section_ptr = $self->_lookup_section($section);645 if (!defined $section_ptr) {646 print STDERR "doc::delete_all_metadata couldn't find section " .647 "$section\n";648 return;649 }650 651 $section_ptr->{'metadata'} = [];652 }653 654 655 # methods for dealing with associated files656 657 sub associate_file {658 my $self = shift (@_);659 my ($real_filename, $assoc_filename, $mime_type) = @_;660 $mime_type = &html::guess_mime_type ($real_filename) unless defined $mime_type;661 662 # remove all associated files with the same name663 $self->delete_assoc_file ($assoc_filename);664 665 push (@{$self->{'associated_files'}},666 [$real_filename, $assoc_filename, $mime_type]);667 }668 669 # returns a list of associated files in the form670 # [[real_filename, assoc_filename, mimetype], ...]671 sub get_assoc_files {672 my $self = shift (@_);673 674 return $self->{'associated_files'};675 }676 677 sub delete_assoc_file {678 my $self = shift (@_);679 my ($assoc_filename) = @_;680 681 my $i=0;682 while ($i < scalar (@{$self->{'associated_files'}})) {683 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {684 splice (@{$self->{'associated_files'}}, $i, 1);685 } else {686 $i++;687 }688 }689 117 } 690 118 … … 722 150 } 723 151 724 # returns the text for a section725 sub get_text {726 my $self = shift (@_);727 my ($section) = @_;728 729 my $section_ptr = $self->_lookup_section($section);730 if (!defined $section_ptr) {731 print STDERR "doc::get_text couldn't find section " .732 "$section\n";733 return "";734 }735 736 return $section_ptr->{'text'};737 }738 739 # returns the (utf-8 encoded) length of the text for a section740 sub get_text_length {741 my $self = shift (@_);742 my ($section) = @_;743 744 my $section_ptr = $self->_lookup_section($section);745 if (!defined $section_ptr) {746 print STDERR "doc::get_text_length couldn't find section " .747 "$section\n";748 return 0;749 }750 751 return length ($section_ptr->{'text'});752 }753 754 sub delete_text {755 my $self = shift (@_);756 my ($section) = @_;757 758 my $section_ptr = $self->_lookup_section($section);759 if (!defined $section_ptr) {760 print STDERR "doc::delete_text couldn't find section " .761 "$section\n";762 return;763 }764 765 $section_ptr->{'text'} = "";766 }767 768 152 769 153 1;
Note:
See TracChangeset
for help on using the changeset viewer.