Changeset 23742


Ignore:
Timestamp:
2011-03-01T17:25:59+13:00 (13 years ago)
Author:
davidb
Message:

Removing part of the explode algorithm added

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm

    r23480 r23742  
    3333use ghtml;
    3434
     35use File::Basename;
    3536
    3637BEGIN {
     
    8889
    8990
    90 sub oid_to_import_filenames
     91sub docid_to_import_filenames
    9192{
    9293    my $self = shift @_;
     
    103104                    $archive_dir);
    104105
    105     print STDERR "**** arcinfo doc filename = $arcinfo_doc_filename\n";
    106 
    107     my @import_files = ();
     106    my %all_import_file_keys = ();
    108107   
    109108    foreach my $docid (@docids) {
    110109    # Obtain the src and associated files specified docID
    111110   
    112     print STDERR "*** looking up key \"$docid\"\n";
    113 
    114111    my $doc_rec
    115112        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
    116113                     $docid);
    117 
    118     print STDERR "*** doc_rec = $doc_rec\n";
    119114   
    120115    my $src_files = $doc_rec->{'src-file'};
    121116    my $assoc_files = $doc_rec->{'assoc-file'};
    122117   
    123     push(@import_files,@$src_files) if (defined $src_files);
    124     push(@import_files,@$assoc_files) if (defined $assoc_files);
    125     }
    126 
    127     return \@import_files;
    128 }
    129 
    130 
    131 sub import_filenames_to_oids
     118    if (defined $src_files) {
     119        foreach my $if (@$src_files) {
     120        $all_import_file_keys{$if} = 1;
     121        }
     122    }
     123
     124    if (defined $assoc_files) {
     125        foreach my $if (@$assoc_files) {
     126        $all_import_file_keys{$if} = 1;
     127        }
     128    }
     129    }
     130
     131    my @all_import_files = keys %all_import_file_keys;
     132
     133    return \@all_import_files;
     134}
     135
     136
     137sub import_filenames_to_docids
    132138{
    133139    my $self = shift @_;
     
    140146    my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
    141147
     148    # Obtain the oids for the specified import filenames
     149    my $arcinfo_src_filename
     150    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
     151                    $archive_dir);
     152
     153    my %all_oid_keys = ();
     154
     155    foreach my $if (@$import_filenames) {
     156
     157    my $src_rec
     158        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
     159                     $if);
     160    my $oids = $src_rec->{'oid'};
     161
     162    foreach my $o (@$oids) {
     163        $all_oid_keys{$o} = 1;
     164    }
     165    }
     166
     167    my @all_oids = keys %all_oid_keys;
     168
     169    return \@all_oids;
     170}
     171
     172
     173sub remove_import_filenames
     174{
     175    my $self = shift @_;
     176    my ($expanded_import_filenames) = @_;
     177
     178    foreach my $f (@$expanded_import_filenames) {
     179    # If this document has been exploded before then
     180    # its original source files will have already been removed 
     181    if (-e $f) {
     182        &util::rm($f);
     183    }
     184    }
     185}
     186
     187sub move_docoids_to_import
     188{
     189    my $self = shift @_;
     190    my ($docids) = @_;
     191
     192    my $infodb_type = $self->{'infodbtype'};
     193
     194    # Derive the archives and import directories
     195    my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
     196    my $import_dir  = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"import");
     197
    142198    # Obtain the doc.xml path for the specified docID
    143199    my $arcinfo_doc_filename
    144     = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
     200    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
    145201                    $archive_dir);
    146202
    147     my %all_oid_keys = ();
    148 
    149     foreach my $if (@$import_filenames) {
    150 
    151     print STDERR "*** looking if key \"$if\"\n";
    152 
    153     my $src_rec
     203    foreach my $docid (@$docids) {
     204
     205    my $doc_rec
    154206        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
    155                      $if);
    156     my $oids = $src_rec->{'oid'};
    157 
    158     foreach my $o (@$oids) {
    159         $all_oid_keys{$o} = 1;
    160     }
    161     }
    162 
    163     my @all_oids = keys %all_oid_keys;
    164 
    165     return \@all_oids;
    166 }
    167 
     207                     $docid);
     208
     209    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
     210
     211    # The $doc_xml_file is relative to the archives, so need to do
     212    # a bit more work to make sure the right folder containing this
     213    # is moved to the right place in the import folder
     214
     215    my $assoc_path = dirname($doc_xml_file);
     216    my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
     217    my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
     218
     219    # If assoc_path involves more than one sub directory, then need to make
     220    # sure the necessary directories exist in the import area also.
     221    # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
     222    # exists before moving "archives/a/b/c.dir" -> "import/a/b"
     223    my $import_target_parent_dir = dirname($import_assoc_dir);
     224
     225    if (-d $import_assoc_dir) {
     226        # detected version from previous explode => remove it
     227        &util::rm_r($import_assoc_dir);
     228    }
     229    else {
     230        # First time => make sure parent directory exists to move
     231        # "c.dir" (see above) into
     232       
     233        &util::mk_all_dir($import_target_parent_dir);
     234    }
     235
     236    &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
     237    }
     238}
    168239
    169240
     
    196267    }
    197268
    198     my $orig_import_filenames = $self->oid_to_import_filenames($docid);
    199     my $oid_keys = $self->import_filenames_to_oids($orig_import_filenames);
    200     my $expanded_import_filenames = $self->oid_to_import_filenames(@$oid_keys);
    201 
     269    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
     270
     271    my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
     272    my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
     273    my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
     274
     275    $self->remove_import_filenames($expanded_import_filenames);
     276    $self->move_docoids_to_import($docid_keys);
    202277
    203278    # Release the lock once it is done
    204279    $self->unlock_collection($username, $collect);
    205280
    206     my $mess = "Base OID: $docid\n-----\n";
     281    my $mess = "Base Doc ID: $docid_root\n-----\n";
    207282    $mess .= join("\n",@$expanded_import_filenames);
    208283
Note: See TracChangeset for help on using the changeset viewer.