Changeset 23742

Show
Ignore:
Timestamp:
01.03.2011 17:25:59 (9 years ago)
Author:
davidb
Message:

Removing part of the explode algorithm added

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm

    r23480 r23742  
    3333use ghtml; 
    3434 
     35use File::Basename; 
    3536 
    3637BEGIN { 
     
    8889 
    8990 
    90 sub oid_to_import_filenames 
     91sub docid_to_import_filenames 
    9192{ 
    9293    my $self = shift @_; 
     
    103104                    $archive_dir); 
    104105 
    105     print STDERR "**** arcinfo doc filename = $arcinfo_doc_filename\n"; 
    106  
    107     my @import_files = (); 
     106    my %all_import_file_keys = (); 
    108107     
    109108    foreach my $docid (@docids) { 
    110109    # Obtain the src and associated files specified docID 
    111110     
    112     print STDERR "*** looking up key \"$docid\"\n"; 
    113  
    114111    my $doc_rec 
    115112        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,  
    116113                     $docid); 
    117  
    118     print STDERR "*** doc_rec = $doc_rec\n"; 
    119114     
    120115    my $src_files = $doc_rec->{'src-file'}; 
    121116    my $assoc_files = $doc_rec->{'assoc-file'}; 
    122117     
    123     push(@import_files,@$src_files) if (defined $src_files); 
    124     push(@import_files,@$assoc_files) if (defined $assoc_files); 
    125     } 
    126  
    127     return \@import_files; 
    128 } 
    129  
    130  
    131 sub import_filenames_to_oids 
     118    if (defined $src_files) { 
     119        foreach my $if (@$src_files) { 
     120        $all_import_file_keys{$if} = 1; 
     121        } 
     122    } 
     123 
     124    if (defined $assoc_files) { 
     125        foreach my $if (@$assoc_files) { 
     126        $all_import_file_keys{$if} = 1; 
     127        } 
     128    } 
     129    } 
     130 
     131    my @all_import_files = keys %all_import_file_keys; 
     132 
     133    return \@all_import_files; 
     134} 
     135 
     136 
     137sub import_filenames_to_docids 
    132138{ 
    133139    my $self = shift @_; 
     
    140146    my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives"); 
    141147 
     148    # Obtain the oids for the specified import filenames 
     149    my $arcinfo_src_filename  
     150    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",  
     151                    $archive_dir); 
     152 
     153    my %all_oid_keys = (); 
     154 
     155    foreach my $if (@$import_filenames) { 
     156 
     157    my $src_rec 
     158        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,  
     159                     $if); 
     160    my $oids = $src_rec->{'oid'}; 
     161 
     162    foreach my $o (@$oids) { 
     163        $all_oid_keys{$o} = 1; 
     164    } 
     165    } 
     166 
     167    my @all_oids = keys %all_oid_keys; 
     168 
     169    return \@all_oids; 
     170} 
     171 
     172 
     173sub remove_import_filenames 
     174{ 
     175    my $self = shift @_; 
     176    my ($expanded_import_filenames) = @_; 
     177 
     178    foreach my $f (@$expanded_import_filenames) { 
     179    # If this document has been exploded before then 
     180    # its original source files will have already been removed   
     181    if (-e $f) { 
     182        &util::rm($f); 
     183    } 
     184    } 
     185} 
     186 
     187sub move_docoids_to_import 
     188{ 
     189    my $self = shift @_; 
     190    my ($docids) = @_; 
     191 
     192    my $infodb_type = $self->{'infodbtype'}; 
     193 
     194    # Derive the archives and import directories 
     195    my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives"); 
     196    my $import_dir  = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"import"); 
     197 
    142198    # Obtain the doc.xml path for the specified docID 
    143199    my $arcinfo_doc_filename  
    144     = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",  
     200    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",  
    145201                    $archive_dir); 
    146202 
    147     my %all_oid_keys = (); 
    148  
    149     foreach my $if (@$import_filenames) { 
    150  
    151     print STDERR "*** looking if key \"$if\"\n"; 
    152  
    153     my $src_rec 
     203    foreach my $docid (@$docids) { 
     204 
     205    my $doc_rec 
    154206        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,  
    155                      $if); 
    156     my $oids = $src_rec->{'oid'}; 
    157  
    158     foreach my $o (@$oids) { 
    159         $all_oid_keys{$o} = 1; 
    160     } 
    161     } 
    162  
    163     my @all_oids = keys %all_oid_keys; 
    164  
    165     return \@all_oids; 
    166 } 
    167  
     207                     $docid); 
     208 
     209    my $doc_xml_file = $doc_rec->{'doc-file'}->[0]; 
     210 
     211    # The $doc_xml_file is relative to the archives, so need to do 
     212    # a bit more work to make sure the right folder containing this 
     213    # is moved to the right place in the import folder 
     214 
     215    my $assoc_path = dirname($doc_xml_file); 
     216    my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path); 
     217    my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path); 
     218 
     219    # If assoc_path involves more than one sub directory, then need to make 
     220    # sure the necessary directories exist in the import area also. 
     221    # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to 
     222    # exists before moving "archives/a/b/c.dir" -> "import/a/b" 
     223    my $import_target_parent_dir = dirname($import_assoc_dir); 
     224 
     225    if (-d $import_assoc_dir) { 
     226        # detected version from previous explode => remove it 
     227        &util::rm_r($import_assoc_dir); 
     228    } 
     229    else { 
     230        # First time => make sure parent directory exists to move  
     231        # "c.dir" (see above) into 
     232         
     233        &util::mk_all_dir($import_target_parent_dir); 
     234    } 
     235 
     236    &util::cp_r($archive_assoc_dir,$import_target_parent_dir) 
     237    } 
     238} 
    168239 
    169240 
     
    196267    } 
    197268 
    198     my $orig_import_filenames = $self->oid_to_import_filenames($docid); 
    199     my $oid_keys = $self->import_filenames_to_oids($orig_import_filenames); 
    200     my $expanded_import_filenames = $self->oid_to_import_filenames(@$oid_keys); 
    201  
     269    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/); 
     270 
     271    my $orig_import_filenames = $self->docid_to_import_filenames($docid_root); 
     272    my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames); 
     273    my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys); 
     274 
     275    $self->remove_import_filenames($expanded_import_filenames); 
     276    $self->move_docoids_to_import($docid_keys); 
    202277 
    203278    # Release the lock once it is done 
    204279    $self->unlock_collection($username, $collect); 
    205280 
    206     my $mess = "Base OID: $docid\n-----\n"; 
     281    my $mess = "Base Doc ID: $docid_root\n-----\n"; 
    207282    $mess .= join("\n",@$expanded_import_filenames); 
    208283