Changeset 30354
- Timestamp:
- 2015-12-16T16:15:39+13:00 (8 years ago)
- Location:
- gs2-extensions/parallel-building/trunk/src
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl
r29663 r30354 4 4 use strict; 5 5 use warnings; 6 7 BEGIN 8 { 9 if ( !defined $ENV{'GEXTPARALLELBUILDING_INSTALLED'}) { 10 die "GEXTPARALLELBUILDING_INSTALLED not set\n"; 11 } 12 # Installed CPAN packages for GEXT*INSTALL 13 my $perl_version = `perl-version.pl`; 14 my $perl_path = sprintf("%s/lib/perl/%s", $ENV{'GEXTPARALLELBUILDING_INSTALLED'}, $perl_version); 15 unshift (@INC, $perl_path); 16 } 6 17 7 18 # Modules -
gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl
r28015 r30354 33 33 my $hdfs_fs_prefix = 'HDThriftFS://'; 34 34 my $refresh_import = 0; 35 my $remove_old = 0; 35 36 my $username = `whoami`; 36 37 chomp($username); … … 49 50 else 50 51 { 51 print STDERR "usage: hadoop_import.pl <collection> [-debug] [- disable_thrift] [-dry_run] [-start_thrift] [-refresh_import] [-flush_diskcache] [-use_nfs] [-stagger]\n";52 print STDERR "usage: hadoop_import.pl <collection> [-debug] [-enable_thrift] [-dry_run] [-start_thrift] [-refresh_import] [-flush_diskcache] [-use_nfs] [-stagger] [-removeold]\n"; 52 53 print STDERR "where: [debug] print more debug messages to STDERR\n"; 53 54 print STDERR " [dry_run] don't actually perform an file actions\n"; … … 61 62 $debug = 1; 62 63 } 63 if ($ARGV[$offset] eq '- disable_thrift')64 { 65 $use_thrift = 0;64 if ($ARGV[$offset] eq '-enable_thrift') 65 { 66 $use_thrift = 1; 66 67 } 67 68 if ($ARGV[$offset] eq '-dry_run') … … 88 89 { 89 90 $use_nfs = 1; 91 } 92 if ($ARGV[$offset] eq '-removeold') 93 { 94 $remove_old = 1; 90 95 } 91 96 if ($ARGV[$offset] eq '-logdir') -
gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDFSShell.pm
r27525 r30354 249 249 } 250 250 ## isHDFS() 251 252 253 ## @function isSpecialDirectory 254 # 255 sub isSpecialDirectory 256 { 257 my ($path) = @_; 258 return ($path =~ /^HDFSShell:\/\/[a-zA-Z]+:\d+$/); 259 } 260 ## isSpecialDirectory() 251 261 252 262 … … 318 328 my @files; 319 329 my $result = &_executeHDFSCommand(1, 'ls', $path); 330 if ($result =~ /No such file or directory/) 331 { 332 print STDERR "BOOM! BOOM! BOOM!\n"; 333 return undef; 334 } 320 335 my @lines = split(/\r?\n/, $result); 321 336 foreach my $line (@lines) -
gs2-extensions/parallel-building/trunk/src/perllib/inexport.pm
r30292 r30354 190 190 my $self = { 'xml' => 0, 'mode' => $mode }; 191 191 192 print "INFO: This inexport.pm supports version 2 manifest files\n";193 194 192 # general options available to all plugins 195 193 my $arguments = $options->{'args'}; … … 201 199 print STDERR "Something went wrong during parsing the arguments. Scroll up for details.\n"; 202 200 die "\n"; 201 } 202 203 if ($self->{'verbosity'} > 2) { 204 print "[INFO] This inexport.pm supports version 2 manifest files\n"; 205 } 206 if ($self->{'verbosity'} > 3) { 207 print '[DEBUG] Perl @INC: ' . join(", ", @INC) . "\n"; 203 208 } 204 209 … … 735 740 else 736 741 { 737 print STDERR "Skipping global file scan due to manifest and complexmeta configuration\n";742 print STDERR "Skipping import directory-level global file scan due to manifest and complexmeta configuration\n"; 738 743 } 739 744 … … 757 762 : &FileUtils::filenameConcatenate($importdir,$df); 758 763 759 if (-d $full_df ) {764 if (-d $full_df && $self->{'manifest_version'} != 2) { 760 765 &add_dir_contents_to_list($full_df, \@full_deleted_files); 761 766 } else { … … 783 788 : &FileUtils::filenameConcatenate($importdir,$rf); 784 789 785 if (-d $full_rf ) {790 if (-d $full_rf && $self->{'manifest_version'} != 2) { 786 791 &add_dir_contents_to_list($full_rf, \@full_reindex_files); 787 792 } else { … … 814 819 : &FileUtils::filenameConcatenate($importdir,$nf); 815 820 816 if (-d $full_nf ) {821 if (-d $full_nf && $self->{'manifest_version'} != 2) { 817 822 &add_dir_contents_to_list($full_nf, \@full_new_files); 818 823 } else { … … 885 890 foreach my $file_to_import (keys %{$block_hash->{'reindex_files'}}, keys %{$block_hash->{'new_files'}}) 886 891 { 887 $self->{'directoryplugin'}->read_for_manifest_v2($pluginfo, $file_to_import, $block_hash, $processor, $gli); 892 if (&FileUtils::directoryExists($file_to_import)) { 893 # print "DEBUG: Directory to import: \"" . $file_to_import . "\"\n"; 894 &plugin::file_block_read($pluginfo, '', $file_to_import, $block_hash, $metadata, $gli); 895 # print "\n===== BLOCK HASH =====\n"; 896 # Dump($block_hash); 897 # print "\n===== =====\n\n"; 898 $self->perform_process_files($manifest, $pluginfo, $importdir, $file_to_import, $block_hash, $metadata, $processor, $maxdocs); 899 } 900 else 901 { 902 # print "DEBUG: File to import: \"" . $file_to_import . "\"\n"; 903 $self->{'directoryplugin'}->read_for_manifest_v2($pluginfo, $file_to_import, $block_hash, $processor, $gli); 904 } 888 905 } 889 906 } -
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java
r27654 r30354 262 262 263 263 // - call Greenstone passing in the path to the manifest 264 ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection); 264 //ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection); 265 String environment_script_filename = "setup.bash"; 266 StringBuffer cmd_buffer = new StringBuffer(); 267 cmd_buffer.append("source ./"); 268 cmd_buffer.append(environment_script_filename); 269 cmd_buffer.append(" && time -p import.pl -keepold -manifest \""); 270 cmd_buffer.append(manifest_path.toString()); 271 cmd_buffer.append("\" -archivedir \""); 272 cmd_buffer.append(conf.get("archivesdir")); 273 cmd_buffer.append("\" "); 274 cmd_buffer.append(collection); 275 ProcessBuilder import_process_builder = new ProcessBuilder("bash", "-c", cmd_buffer.toString()); 265 276 fw1.write("[Command:" + import_process_builder.command() + "]\n"); 277 /* 266 278 // - alter environment 267 279 Map<String, String> import_process_env = import_process_builder.environment(); 268 // -path280 // - build up the path 269 281 String path = import_process_env.get("PATH"); 270 282 path = gsdlhome + "/ext/parallel-building/bin/script:" + path; … … 294 306 import_process_env.put("HADOOP_PREFIX", hadoop_home); 295 307 fw1.write("[HADOOP_PREFIX: " + hadoop_home + "]\n"); 308 */ 296 309 297 310 // - change working directory -
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest2.java
r28312 r30354 324 324 manifest_writer.close(); 325 325 326 /* Original process calling - sets up environment in Java 326 327 // - call Greenstone passing in the path to the manifest 327 328 ProcessBuilder import_process_builder = new ProcessBuilder("time", "-p", "import.pl", "-manifest", manifest_path.toString(), "-keepold", "-archivedir", conf.get("archivesdir"), collection); … … 357 358 import_process_env.put("HADOOP_PREFIX", hadoop_home); 358 359 fw1.write("[HADOOP_PREFIX: " + hadoop_home + "]\n"); 360 */ 361 362 /* New process call - adds call to setup.bash first to prepare 363 * environment... hopefully */ 364 // - call Greenstone passing in the path to the manifest 365 String environment_script_filename = "setup.bash"; 366 StringBuffer cmd_buffer = new StringBuffer(); 367 cmd_buffer.append("source ./"); 368 cmd_buffer.append(environment_script_filename); 369 cmd_buffer.append(" && time -p import.pl -keepold -manifest \""); 370 cmd_buffer.append(manifest_path.toString()); 371 cmd_buffer.append("\" -archivedir \""); 372 cmd_buffer.append(conf.get("archivesdir")); 373 cmd_buffer.append("\" "); 374 cmd_buffer.append(collection); 375 ProcessBuilder import_process_builder = new ProcessBuilder("bash", "-c", cmd_buffer.toString()); 376 fw1.write("[Command:" + import_process_builder.command() + "]\n"); 359 377 360 378 // - change working directory … … 738 756 job.setReducerClass(GSReducer.class); 739 757 740 // Sets the input and output handlers - may need to adjust input to provide me741 // a series of filenames (TextInputFormat will instead read in a text file and742 // return each line...)758 // Sets the input and output handlers - may need to adjust input to provide 759 // a series of filenames (TextInputFormat will instead read in a text file 760 // and return each line...) 743 761 job.setInputFormatClass(GSFileInputFormat.class); 744 762 //job.setOutputFormatClass(NullOutputFormat.class);
Note:
See TracChangeset
for help on using the changeset viewer.