Changeset 9838


Ignore:
Timestamp:
2005-05-09T11:01:10+12:00 (19 years ago)
Author:
davidb
Message:

General improvements to saving files in DSpace format. Main additino is to
use extracted metadata values for dc.* values that are not present. For example
if there is no dc.Title then ex.Title is used instead.

Location:
trunk/gsdl/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r9241 r9838  
    7676        $collect_dir =~ s/\\/\\\\/g; # escape DOS style file separator
    7777
     78        # if from within GSDLCOLLECTDIR, then remove directory prefix
     79        # so source_filename is realative to it.  This is done to aid
     80        # portability, i.e. the collection can be moved to somewhere
     81        # else on the file system and the archives directory will still
     82        # work.  This is needed, for example in the applet version of
     83        # GLI where GSDLHOME/collect on the server will be different to
     84        # the collect directory of the remove user.  Of course,
     85        # GSDLCOLLECTDIR subsequently needs to be put back on to turn
     86        # it back into a full pathname.
     87
    7888        if ($source_filename =~ /^$collect_dir(.*)$/) {
    7989        $source_filename = $1;
     
    557567}
    558568
     569my $dc_set = { Title => 1,       
     570           Creator => 1,
     571           Subject => 1,
     572           Description => 1,
     573           Publisher => 1,
     574           Contributors => 1,
     575           Date => 1,
     576           Type => 1,
     577           Format => 1,
     578           Identifier => 1,
     579           Source => 1,
     580           Language => 1,
     581           Relation => 1,
     582           Coverage => 1,
     583           Rights => 1};
     584
     585
     586
    559587#*** print out dublin_core.xml file
    560588sub output_dc_section {
     
    567595    my $section_ptr=$self->_lookup_section($section);
    568596    return "" unless defined $section_ptr;
     597
     598    my $explicit_dc = {};
     599    my $explicit_ex = {};
     600
    569601    my $all_text="";
    570602    foreach my $data (@{$section_ptr->{'metadata'}}){
    571603    my $escaped_value = &_escape_text($data->[1]);
    572     if ($data->[0]=~ /^dc/) {
     604    if ($data->[0]=~ m/^dc\./) {
    573605        $data->[0] =~ tr/[A-Z]/[a-z]/;
    574         $data->[0] =~ /^dc\.(.*)/;
     606
     607        $data->[0] =~ m/^dc\.(.*)/;
    575608        my $dc_element =  $1;
     609
     610        if (!defined $explicit_dc->{$dc_element}) {
     611        $explicit_dc->{$dc_element} = [];
     612        }
     613        push(@{$explicit_dc->{$dc_element}},$escaped_value);
     614
    576615        #$all_text .= '   <dcvalue element="'. $data->[0].'" qualifier="#####">'. $escaped_value. "</dcvalue>\n";
    577616        $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
    578617    }
    579     }
     618    elsif (($data->[0] =~ m/^ex\./) || ($data->[0] !~ m/\./)) {
     619        $data->[0] =~ m/^(ex\.)?(.*)/;
     620        my $ex_element =  $2;
     621        my $lc_ex_element = lc($ex_element);
     622
     623        if (defined $dc_set->{$ex_element}) {
     624        if (!defined $explicit_ex->{$lc_ex_element}) {
     625            $explicit_ex->{$lc_ex_element} = [];
     626        }
     627        push(@{$explicit_ex->{$lc_ex_element}},$escaped_value);
     628        }
     629    }
     630    }
     631
     632    # go through dc_set and for any element *not* defined in explicit_dc
     633    # that do exist in explicit_ex, add it in as metadata
     634    foreach my $k ( keys %$dc_set ) {
     635    my $lc_k = lc($k);
     636
     637    if (!defined $explicit_dc->{$lc_k}) {
     638        if (defined $explicit_ex->{$lc_k}) {
     639
     640        foreach my $v (@{$explicit_ex->{$lc_k}}) {
     641            my $dc_element    = $lc_k;
     642            my $escaped_value = $v;
     643
     644            $all_text .= '   <dcvalue element="'. $dc_element.'">'. $escaped_value. "</dcvalue>\n";
     645           
     646        }
     647        }
     648    }
     649    }
     650
    580651    if ($all_text eq "") {
    581652    $all_text .= "   There is no Dublin Core metatdata in this document\n";
  • trunk/gsdl/perllib/docsave.pm

    r9231 r9838  
    153153    # to the document
    154154    if ($service eq "export" && $save_as eq "DSpace") {
     155    # create handle file based on doc_dir
     156
     157    my $doc_handle_file
     158        = &util::filename_cat ($self->{'export_dir'},$doc_dir, "handle");
     159   
     160    if (!open(OUTDOC_EXPORT_HANDLE,">$doc_handle_file")){
     161        print $outhandle "docsave::process could not write collection handle to file $doc_handle_file\n";
     162        return;
     163    }
     164
     165    my ($handle) = ($doc_dir =~ m/^(.*)\.dir$/);
     166    print OUTDOC_EXPORT_HANDLE "123456789/$handle\n";
     167
     168    close(OUTDOC_EXPORT_HANDLE);
     169
    155170    # open contents file
    156171    my $doc_contents_file
     
    171186    my $short_doc_file;
    172187
    173     #Import collection to GS2 in GS Archive format and METs format
     188    # Save collection as either Greenstone Archive or  METS format
    174189    if ($service eq "import") {
    175190    my $doc_file
    176191        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.xml");
    177192
    178     #***define doctxt.xml file
     193    # define doctxt.xml file
    179194    my $doc_txt_file
    180195        = &util::filename_cat ($self->{'archive_dir'}, $doc_dir,"doctxt.xml");
     
    183198        =&util::filename_cat ($self->{'archive_dir'}, $doc_dir);
    184199   
    185     #***define docmets.xml file
     200    # define docmets.xml file
    186201    my $doc_mets_file
    187202        = &util::filename_cat ($self->{'archive_dir'},$doc_dir, "docmets.xml");
     
    420435    # same one.
    421436    $doc_dir = $doc_info->[0];
    422     $doc_dir =~ s/\/?doc(mets)?\.xml(\.gz)?$//;
     437    $doc_dir =~ s/\/?((doc(mets)?)|(dublin_core))\.xml(\.gz)?$//;
    423438    } elsif ($self->{'keepimportstructure'}) {
    424439    $source_filename = &File::Basename::dirname($source_filename);
     
    433448    if ($doc_dir eq "") {
    434449    # have to get a new document directory
    435     my $doc_dir_rest = $OID;
    436     my $doc_dir_num = 0;
    437     do {
    438         $doc_dir .= "/" if $doc_dir_num > 0;
    439         if ($doc_dir_rest =~ s/^(.{1,8})//) {
    440         $doc_dir .= $1;
    441         $doc_dir_num++;
    442         }
    443     } while ($doc_dir_rest ne "" &&
    444          ((-d &util::filename_cat ($working_dir, "$doc_dir.dir")) ||
    445           ($working_info->size() >= 1024 && $doc_dir_num < 2)));
     450
     451    if ($service eq "import") {
     452        my $doc_dir_rest = $OID;
     453        my $doc_dir_num = 0;
     454
     455        do {
     456        $doc_dir .= "/" if $doc_dir_num > 0;
     457        if ($doc_dir_rest =~ s/^(.{1,8})//) {
     458            $doc_dir .= $1;
     459            $doc_dir_num++;
     460        }
     461        } while ($doc_dir_rest ne "" &&
     462             ((-d &util::filename_cat ($working_dir, "$doc_dir.dir")) ||
     463              ($working_info->size() >= 1024 && $doc_dir_num < 2)));
     464    }
     465    else {
     466        # Export formats such as DSpace need the directory structure to
     467        # be flat.  This is simple to arrange (set 'doc_dir' to bit the
     468        # documents OID) but breaks Windows 3.1 file system compliance.
     469        # Such a loss is not a bit thing in this situation as such
     470        # systems don't run on Windows 3.1 anyway.
     471
     472        $doc_dir = $OID;
     473    }
     474
    446475
    447476    $doc_dir .= ".dir";
     
    471500    }
    472501
     502    my $source_filename = $doc_obj->get_source_filename();
     503
     504    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
     505
     506    if (defined $collect_dir) {
     507    my $dirsep = &util::get_dirsep();
     508
     509    if ($collect_dir !~ m/$dirsep$/) {
     510        $collect_dir .= $dirsep; # ensure there is a slash at the end
     511    }
     512   
     513    if ($source_filename !~ /^$dirsep/) {
     514        $source_filename
     515        = &util::filename_cat($collect_dir,$source_filename);
     516    }
     517    }
     518
     519
    473520    if ($save_as eq "DSpace") {
    474 
    475     my ($tail_filename) = ($doc_obj->get_source_filename() =~ m/\/([^\/\\]*)$/);
     521    my ($tail_filename) = ($source_filename =~ m/\/([^\/\\]*)$/);
    476522
    477523    print $handle "$tail_filename\n";
    478524
    479525    $filename = &util::filename_cat($working_dir, $doc_dir, $tail_filename);
    480     &util::hard_link ($doc_obj->get_source_filename(), $filename);
     526    &util::hard_link ($source_filename, $filename);
    481527    }
    482528           
    483     foreach my $assoc_file (@{$doc_obj->get_assoc_files()}) {
    484     my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/;
     529    foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
     530    my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
    485531    $dir = "" unless defined $dir;
    486532
    487     # Store the associated file to the "contents" file
    488     if ($save_as eq "DSpace") {
    489         print $handle "$assoc_file->[1]\n";
    490     }
    491 
    492     if (-e $assoc_file->[0]) {
     533
     534    my $real_filename = $assoc_file_rec->[0];
     535    if (-e $real_filename) {
     536
     537
     538        if ($save_as eq "DSpace") {
     539        if ($real_filename =~ m/$source_filename$/) {
     540            next;
     541        }
     542        else {
     543            my $bundle = "bundle:ORIGINAL";
     544
     545            if ($afile =~ m/^thumbnail\./) {
     546            $bundle = "bundle:THUMBNAIL";
     547            }
     548
     549            # Store the associated file to the "contents" file
     550            print $handle "$assoc_file_rec->[1]\t$bundle\n";
     551        }
     552        }
     553
    493554        $filename = &util::filename_cat($working_dir, $doc_dir, $afile);
    494555
    495         &util::hard_link ($assoc_file->[0], $filename);
     556
     557        &util::hard_link ($real_filename, $filename);
    496558       
    497559        $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
    498560                     "gsdlassocfile",
    499                      "$afile:$assoc_file->[2]:$dir");
     561                     "$afile:$assoc_file_rec->[2]:$dir");
    500562        $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
    501563                         "assocfilepath",
     
    503565    } elsif ($self->{'verbosity'} > 2) {
    504566        print $outhandle "docsave::process couldn't copy the associated file " .
    505         "$assoc_file->[0] to $afile\n";
     567        "$real_filename to $afile\n";
    506568    }
    507569    }
Note: See TracChangeset for help on using the changeset viewer.