Changeset 16102


Ignore:
Timestamp:
2008-06-23T10:54:24+12:00 (16 years ago)
Author:
davidb
Message:

Some minor adjustments to ingesting documents into a Fedoar repository: it now descends into all the folders generated in archives/export; and it tests for ImagePlugin (rather than the older ImagePlug) name;

Location:
gsdl/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/bin/script/g2f-buildcol.pl

    r14959 r16102  
    205205
    206206    if (opendir(DIR, $export_dir)) {
    207     my @hash_dirs = grep { /\.dir$/ } readdir(DIR);
    208207    closedir DIR;
     208    ## my @hash_dirs = grep { /\.dir$/ } readdir(DIR);
     209    my @hash_dirs = &g2futil::get_all_hash_dirs($export_dir);
    209210
    210211
     
    213214
    214215        my $docmets_filename
    215         = &util::filename_cat($export_dir,$hd,"docmets.xml");
     216        = &util::filename_cat($hd,"docmets.xml");
    216217
    217218        print STDERR "<Build>\n" if $gli;
  • gsdl/trunk/bin/script/g2f-import.pl

    r15657 r16102  
    204204    if ( -e $export_dir ) {
    205205        print "***\n";
    206     print "* Removing existing Greenstone $gs_col objects from Fedora $pid_namespace\n";
     206    print "* Updating existing Greenstone $gs_col objects from Fedora $pid_namespace\n";
    207207        print "***\n";
    208208
    209209    # readdir
    210210    if (opendir(DIR, $export_dir)) {
    211         my @hash_dirs = grep { /\.dir$/ } readdir(DIR);
     211
    212212        closedir DIR;
    213 
     213        my @hash_dirs = &g2futil::get_all_hash_dirs($export_dir,$maxdocs);
    214214
    215215        # for each hash dir, purge its respective PID
    216         foreach my $hd (@hash_dirs) {
    217 
    218         my $full_hd = &util::filename_cat($export_dir,$hd);
     216        foreach my $full_hd (@hash_dirs) {
     217
    219218        my $hash_id = &g2futil::get_hash_id($full_hd);
    220219
     
    226225
    227226        if ($dsinfo_status == 0) {
    228             print "  $pid being removed.\n";       
     227            print "  $pid being updated.\n";       
    229228            &g2futil::run_purge($pid,$options);
    230229        }
  • gsdl/trunk/perllib/g2futil.pm

    r15979 r16102  
    204204
    205205
     206sub rec_get_all_hash_dirs
     207{
     208    my ($full_dir,$all_dirs) = @_;
     209
     210    if (opendir(DIR, $full_dir)) {
     211    my @sub_dirs = grep { ($_ !~ /^\./) && (-d &util::filename_cat($full_dir,$_)) } readdir(DIR);
     212    closedir DIR;
     213
     214    my @hash_dirs = grep { $_ =~ m/\.dir$/ } @sub_dirs;
     215    my @rec_dirs = grep { $_ !~ m/\.dir$/ } @sub_dirs;
     216   
     217    foreach my $hd (@hash_dirs) {
     218        my $full_hash_dir = &util::filename_cat($full_dir,$hd);
     219        push(@$all_dirs,$full_hash_dir);
     220    }
     221
     222    foreach my $rd (@rec_dirs) {
     223        my $full_rec_dir = &util::filename_cat($full_dir,$rd);
     224        rec_get_all_hash_dirs($full_rec_dir,$all_dirs);
     225    }       
     226    }
     227}
     228
     229sub get_all_hash_dirs
     230{
     231    my ($start_dir,$maxdocs) = @_;
     232   
     233    my @all_dirs = ();
     234    rec_get_all_hash_dirs($start_dir,\@all_dirs);
     235
     236    if ((defined $maxdocs) && ($maxdocs ne "")) {
     237    my @maxdoc_dirs = ();
     238    for (my $i=0; $i<$maxdocs; $i++) {
     239        push(@maxdoc_dirs,shift(@all_dirs));
     240    }
     241    @all_dirs = @maxdoc_dirs;
     242    }
     243
     244    return @all_dirs;
     245}
    206246
    207247sub get_hash_id
     
    296336    close(FIN); # close the file
    297337    if($xml_contents eq $gsdlXMLcontents) {
    298         print STDERR "The old gsdl.xml file already contains the same.\n";
     338        print STDERR "Fedora links to FLI import folder through gsdl.xml.\n";
    299339        # it already contains what we want, we're done
    300340        return "gsdl.xml";
  • gsdl/trunk/perllib/plugouts/FedoraMETSPlugout.pm

    r15604 r16102  
    201201
    202202    my $section = $doc_obj->get_top_section();
    203 
    204     my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
    205    
    206     $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
    207 
    208     my $outhandler;
    209 
    210     if (defined $self->{'xslt_writer'}){
    211     $outhandler = $self->{'xslt_writer'};
    212     }
    213     else{
    214     $outhandler = $self->get_output_handler($doc_txt_file);
    215     }
    216 
    217     print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
    218 
    219     if (defined $self->{'xslt_writer'}){     
    220     $self->close_xslt_pipe();
    221     }
    222     else{
    223     close($outhandler);
    224    }
     203    my $section_ptr=$doc_obj->_lookup_section($section);
     204    my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
     205
     206    # If num_subsections is 0, then there is no nested TOC
     207
     208    if ($num_subsections>0) {
     209
     210    my $doc_txt_file = &util::filename_cat ($working_dir,"doctoc.xml");
     211   
     212    $self->open_xslt_pipe($doc_txt_file,$self->{'xslt_txt'});
     213   
     214    my $outhandler;
     215   
     216    if (defined $self->{'xslt_writer'}){
     217        $outhandler = $self->{'xslt_writer'};
     218    }
     219    else{
     220        $outhandler = $self->get_output_handler($doc_txt_file);
     221    }
     222    print $outhandler $self->buffer_toc($doc_obj, $working_dir, $section, 0);
     223   
     224    if (defined $self->{'xslt_writer'}){     
     225        $self->close_xslt_pipe();
     226    }
     227    else{
     228        close($outhandler);
     229    }
     230    }
    225231
    226232}
     
    245251    my $plugin_type = $doc_obj->get_metadata_element($top_section,"Plugin");
    246252
    247     if ((defined $plugin_type) && ($plugin_type eq "ImagePlug"))
     253    if ((defined $plugin_type) && ($plugin_type eq "ImagePlugin"))
    248254    {
    249255
     
    284290
    285291    # Generate Filestream for Table of Contents (TOC)
    286     print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
     292    my $section_ptr=$doc_obj->_lookup_section($section);
     293    my $num_subsections = scalar(@{$section_ptr->{'subsection_order'}});
     294
     295    # If num_subsections is 0, then there is no nested TOC
     296
     297    if ($num_subsections>0) {
     298    print $handle $self->buffer_mets_fileSection_toc($doc_obj,$section,$working_dir);
     299    }
    287300
    288301    # print out the fileSection by sections
Note: See TracChangeset for help on using the changeset viewer.