Changeset 7902
- Timestamp:
- 2004-08-06T16:34:02+12:00 (20 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r7798 r7902 216 216 } 217 217 218 sub buffer_txt_section_xml { 219 my $self = shift(@_); 220 my ($section) = @_; 221 222 my $section_ptr = $self->_lookup_section ($section); 223 224 return "" unless defined $section_ptr; 225 226 my $all_text = "<Section>\n"; 227 228 ##output the text 229 #$all_text .= " <Content>"; 230 $all_text .= &_escape_text($section_ptr->{'text'}); 231 #$all_text .= " </Content>\n"; 232 233 234 #output all the subsections 235 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 236 $all_text .= $self->buffer_txt_section_xml("$section.$subsection"); 237 } 238 239 $all_text .= "</Section>\n"; 240 241 242 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 243 return $all_text; 244 } 245 246 sub buffer_mets_fileSection_section_xml() { 247 my $self = shift(@_); 248 my ($section, $doc_Dir) = @_; 249 250 $section="" unless defined $section; 251 252 253 my $section_ptr=$self->_lookup_section($section); 254 return "" unless defined $section_ptr; 255 256 257 #**output fileSection by sections 258 my $section_num ="1". $section; 259 260 261 my $filePath = $doc_Dir . '/doctxt.xml'; 262 263 #**output the fileSection details 264 my $all_text = ' <mets:fileGrp ID="FILEGROUP_PRELUDE' . $section_num . '">'. "\n"; 265 $all_text .= ' <mets:file MIMETYPE="text/xml" ID="FILE'.$section_num. '">'. "\n"; 266 $all_text .= ' <mets:FLocate LOCTYPE="URL" xlink:href="file:'.$filePath.'#xpointer(/Section['; 267 268 my $xpath = "1".$section; 269 270 $xpath =~ s/\./]\/Section[/g; 271 272 $all_text .= $xpath; 273 274 $all_text .= ']/text())" ID="FILE'. $section_num. '" />' . "\n"; 275 $all_text .= " </mets:file>\n"; 276 $all_text .= " </mets:fileGrp>\n"; 277 278 279 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 280 $all_text .= $self->buffer_mets_fileSection_section_xml("$section.$subsection",$doc_Dir); 281 } 282 283 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 284 285 return $all_text; 286 } 287 288 sub buffer_mets_fileWhole_section_xml(){ 289 my $self = shift(@_); 290 my ($section) = @_; 291 292 my $section_ptr = $self-> _lookup_section($section); 293 return "" unless defined $section_ptr; 294 295 my $all_text="" unless defined $all_txt; 296 297 my ($dirPath)="" unless defined $dirPath; 298 my $fileID=0; 299 300 #** output the fileSection for the whole section 301 #*** get the sourcefile and associative file 302 303 foreach my $data (@{$section_ptr->{'metadata'}}){ 304 my $escaped_value = &_escape_text($data->[1]); 305 if ($data->[0] eq "gsdlsourcefilename") { 306 ($dirPath) = $escaped_value =~ m/^(.*)[\/\\][^\/\\]*$/; 307 308 $all_text .= ' <mets:fileGrp ID="default">'."\n"; 309 ++$fileID; 310 $all_text .= ' <mets:file MIMETYPE="text/xml" ID="default.'.$fileID.'">'. "\n"; 311 $all_text .= ' <mets:FLocate LOCTYPE="URL" xlink:href="file:'.$data->[1].'" ID="default.'.$fileID.'" />'."\n"; 312 313 $all_text .= " </mets:file>\n"; 314 } 315 316 if ($data->[0] eq "gsdlassocfile"){ 317 $escaped_value =~ m/^(.*?):(.*):$/; 318 319 my $assfilePath = $dirPath . '/'. $1; 320 ++$fileID; 321 $all_text .= ' <mets:file MIMETYPE="'.$2.'" ID="default.'.$fileID. '">'. "\n"; 322 $all_text .= ' <mets:FLocate LOCTYPE="URL" xlink:href="file:'.$assfilePath.'" ID="default.'. $fileID.'" />'."\n"; 323 324 $all_text .= " </mets:file>\n"; 325 } 326 } 327 $all_text .= " </mets:fileGrp>\n"; 328 329 330 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 331 332 return $all_text; 333 } 334 335 sub buffer_mets_StruMapSection_section_xml(){ 336 my $self = shift(@_); 337 my ($section, $order_numref) = @_; 338 339 $section="" unless defined $section; 340 341 342 my $section_ptr=$self->_lookup_section($section); 343 return "" unless defined $section_ptr; 344 345 #**output fileSection by sections 346 my $section_num ="1". $section; 347 348 #**output the StruMap details 349 350 my $all_text = ' <mets:div ID="DS'. $section_num .'" TYPE="Section" ORDER="'.$$order_numref++.'" ORDERLABEL="'. $section_num .'" LABEL="'; 351 $all_text .= $section_num . '" DMDID="DM'.$section_num.'">'. "\n"; 352 353 $all_text .= ' <mets:fptr FILEID="FILEGROUP_PRELUDE'.$section_num.'" />'. "\n"; 354 355 356 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 357 $all_text .= $self->buffer_mets_StruMapSection_section_xml("$section.$subsection", $order_numref); 358 } 359 360 $all_text .= " </mets:div>\n"; 361 362 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 363 364 return $all_text; 365 } 366 367 368 sub buffer_mets_StruMapWhole_section_xml(){ 369 my $self = shift(@_); 370 my ($section) = @_; 371 372 my $section_ptr = $self-> _lookup_section($section); 373 return "" unless defined $section_ptr; 374 375 my $all_text="" unless defined $all_txt; 376 my $fileID=0; 377 378 $all_text .= '<mets:structMap ID="All" TYPE="Whole Document" LABEL="All">'."\n"; 379 $all_text .= ' <mets:div ID="All" TYPE="Document" ORDER="All" ORDERLABEL="All" LABEL="Whole Documemt" DMDID="DM1">' . "\n"; 380 381 382 #** output the StruMapSection for the whole section 383 #*** get the sourcefile and associative file 384 385 foreach my $data (@{$section_ptr->{'metadata'}}){ 386 my $escaped_value = &_escape_text($data->[1]); 387 388 if ($data->[0] eq "gsdlsourcefilename") { 389 ++$fileID; 390 $all_text .= ' <mets:fptr FILEID="default.'.$fileID.'" />'."\n"; 391 } 392 393 if ($data->[0] eq "gsdlassocfile"){ 394 ++$fileID; 395 $all_text .= ' <mets:fptr FILEID="default.'.$fileID. '" />'. "\n"; 396 } 397 } 398 $all_text .= " </mets:div>\n"; 399 400 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 401 402 return $all_text; 403 } 404 405 406 sub buffer_mets_dmdSection_section_xml(){ 407 my $self = shift(@_); 408 my ($section) = @_; 409 410 $section="" unless defined $section; 411 412 my $section_ptr=$self->_lookup_section($section); 413 return "" unless defined $section_ptr; 414 415 #***convert section number 416 my $section_num ="1". $section; 417 418 #**output the dmdSection details 419 my $all_text = '<mets:dmdSec ID="DM'.$section_num.'" GROUPID="'.$section_num.'">'. "\n"; 420 $all_text .= ' <mets:mdWrap MDType="gsdl" ID="'.$section_num.'">'."\n"; 421 $all_text .= " <mets:xmlData>\n"; 422 foreach my $data (@{$section_ptr->{'metadata'}}){ 423 my $escaped_value = &_escape_text($data->[1]); 424 $all_text .= ' <gsdl:Metadata name="'. $data->[0].'">'. $escaped_value. "</gsdl:Metadata>\n"; 425 } 426 $all_text .= " </mets:xmlData>\n"; 427 $all_text .= " </mets:mdWrap>\n"; 428 $all_text .= "</mets:dmdSec>\n"; 429 430 foreach my $subsection (@{$section_ptr->{'subsection_order'}}){ 431 $all_text .= $self->buffer_mets_dmdSection_section_xml("$section.$subsection"); 432 } 433 434 $all_text =~ s/[\x00-\x09\x0B\x0C\x0E-\x1F]//g; 435 436 return $all_text; 437 } 438 218 439 sub output_section { 219 440 my $self = shift (@_); … … 222 443 print $handle $self->buffer_section_xml($section); 223 444 } 445 446 447 #*** print out doctxt.xml file 448 sub output_txt_section { 449 my $self = shift (@_); 450 my ($handle, $section) = @_; 451 452 print $handle $self->buffer_txt_section_xml($section); 453 } 454 455 #*** print out docmets.xml file 456 sub output_mets_section { 457 my $self = shift(@_); 458 my ($handle, $section, $doc_Dir) = @_; 459 460 #***print out the dmdSection 461 print $handle $self->buffer_mets_dmdSection_section_xml($section); 462 463 #***print out the fileSection by sections 464 print $handle "<mets:fileSec>\n"; 465 print $handle $self->buffer_mets_fileSection_section_xml($section,$doc_Dir); 466 #***print out the whole fileSection 467 print $handle $self->buffer_mets_fileWhole_section_xml($section); 468 print $handle "</mets:fileSec>\n"; 469 470 #***print out the StruMapSection by sections 471 print $handle '<mets:structMap ID="Section" TYPE="Section" LABEL="Section">' . "\n"; 472 my $order_num=0; 473 print $handle $self->buffer_mets_StruMapSection_section_xml($section, \$order_num); 474 print $handle "</mets:structMap>\n"; 475 print $handle $self->buffer_mets_StruMapWhole_section_xml($section); 476 print $handle "</mets:structMap>\n"; 477 } 478 224 479 225 480 # look up the reference to the a particular section -
trunk/gsdl/perllib/docproc.pm
r2327 r7902 57 57 } 58 58 59 sub set_saveas { 60 my $self = shift (@_); 61 my ($saveas) = @_; 62 63 $self->{'saveas'} = $saveas; 64 65 } 66 59 67 60 68 1; 61 69 70 71 72 73 74 75 -
trunk/gsdl/perllib/docsave.pm
r3834 r7902 96 96 my $OID = $doc_obj->get_OID(); 97 97 $OID = "NULL" unless defined $OID; 98 98 99 99 100 # get document's directory 100 101 my $doc_dir = $self->get_doc_dir ($OID); 101 102 103 102 104 # copy all the associated files, add this information as metadata 103 105 # to the document … … 106 108 my $doc_file 107 109 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml"); 108 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.xml"); 109 110 111 #***define doctxt.xml file 112 my $doc_txt_file 113 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml"); 114 my $working_dir 115 =&util::filename_cat ($self->{'archive_dir'}, $doc_dir); 116 117 #***define docmets.xmlfile 118 my $doc_mets_file 119 = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml"); 120 121 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.xml"); 122 #my $short_txt_doc_file=&util::filename_cat ($doc_dir, "doctxt.xml"); 123 my $short_mets_doc_file=&util::filename_cat ($doc_dir, "docmets.xml"); 124 125 110 126 if (!open (OUTDOC, ">$doc_file")) { 111 print $outhandle "docsave::process could not write to file $doc_file\n";127 print $outhandle "docsave::process could not write to file $doc_file\n"; 112 128 return; 113 129 } … … 117 133 $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section()); 118 134 $self->output_xml_footer('docsave::OUTDOC'); 119 close OUTDOC; 135 136 #***save the document without metadata:doctxt.xml: Chi-Yu Huang 137 138 if (!open(OUTDOC_TXT, ">$doc_txt_file")){ 139 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 140 return; 141 } 142 143 $self->output_txt_xml_header('docsave::OUTDOC_TXT'); 144 $doc_obj->output_txt_section('docsave::OUTDOC_TXT', $doc_obj->get_top_section()); 145 #$self->output_txt_xml_footer('docsave::OUTDOC_TXT'); 146 147 #***Convert doctxt.xml file to docmets.xml: Chi-Yu Huang 148 if (!open(OUTDOC_METS,">$doc_mets_file")){ 149 print $outhandle "docsave::process could not write to file $doc_mets_file\n"; 150 return; 151 } 152 153 $self->output_mets_xml_header('docsave::OUTDOC_METS', $OID); 154 $doc_obj->output_mets_section('docsave::OUTDOC_METS',$doc_obj->get_top_section(), $working_dir); 155 $self->output_mets_xml_footer('docsave::OUTDOC_METS'); 156 157 close OUTDOC; 158 close OUTDOC_TXT; 159 close OUTDOC_METS; 120 160 121 161 if ($self->{'gzip'}) { … … 131 171 132 172 # do the sortmeta thing 133 my ($metadata); 134 if (defined ($self->{'sortmeta'})) { 173 my ($metadata); if (defined ($self->{'sortmeta'})) { 135 174 $metadata = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sortmeta'}); 136 175 } 137 176 138 177 # store reference in the archive_info 139 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 140 } 141 } 142 178 if ($self->{'saveas'} eq "METS"){ 179 $self->{'archive_info'}->add_info($OID, $short_mets_doc_file, $metadata); 180 } else { 181 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 182 } 183 } 184 } 185 186 143 187 sub group_process { 144 188 my $self = shift (@_); … … 283 327 284 328 # store reference in the archive_info 285 $self->{'archive_info'}->add_info($OID, $short_doc_file); 286 329 if ($self->{'saveas'} eq "METS"){ 330 $self->{'archive_info'}->add_info($OID, $short_mets_doc_file); 331 } else { 332 $self->{'archive_info'}->add_info($OID, $short_doc_file); 333 } 334 287 335 return 1; 288 336 } … … 292 340 my ($handle) = @_; 293 341 342 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; 343 344 print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n"; 345 print $handle "<Archive>\n"; 346 } 347 348 sub output_xml_footer { 349 my $self = shift (@_); 350 my ($handle) = @_; 351 352 print $handle "</Archive>\n"; 353 } 354 355 sub output_txt_xml_header{ 356 my $self = shift (@_); 357 my ($handle) = @_; 294 358 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; 295 359 print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n"; 296 print $handle "<Archive>\n"; 297 } 298 299 sub output_xml_footer { 300 my $self = shift (@_); 301 my ($handle) = @_; 302 303 print $handle "</Archive>\n"; 360 } 361 362 sub output_txt_xml_footer{ 363 my $self = shift(@_); 364 my ($handle) = @_; 365 print $handle "<the end of the file>\n"; 366 } 367 368 sub output_mets_xml_header(){ 369 my $self = shift(@_); 370 my ($handle, $OID) = @_; 371 print $handle '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' . "\n"; 372 print $handle '<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">' . "\n"; 373 print $handle '<mets:mets OBJID="'. $OID. ':2">' . "\n"; 374 } 375 376 sub output_mets_xml_footer() { 377 my $self = shift(@_); 378 my ($handle) = @_; 379 print $handle '</mets:mets>' . "\n"; 304 380 } 305 381 306 382 1; 383 384 385
Note:
See TracChangeset
for help on using the changeset viewer.