# # ChangeLog for gs2-extensions/parallel-building/trunk/src/bin/script # # Generated by Trac 1.4.2 # 2024-05-23T19:15:43+12:00 Sun, 21 Jul 2013 22:38:06 GMT jmt12 [27914] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Trying to get around a couple of divide-by-zero issues when ... Sun, 21 Jul 2013 22:37:02 GMT jmt12 [27913] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Made the ingester to be used (version 1 without reduce phase, or ... Thu, 04 Jul 2013 01:45:08 GMT jmt12 [27753] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Adding Handbrake's percentage complete to report - although this is ... Thu, 04 Jul 2013 01:44:22 GMT jmt12 [27752] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Data locality file not being found is no longer fatal (HDFS-NFS-Proxy ... Tue, 02 Jul 2013 02:35:42 GMT jmt12 [27732] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Nice the copy itself too Fri, 21 Jun 2013 00:25:32 GMT jmt12 [27686] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) A little more progress comments Fri, 21 Jun 2013 00:24:54 GMT jmt12 [27685] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) in the case of multiple attempts you need to retain the information ... Fri, 21 Jun 2013 00:22:25 GMT jmt12 [27684] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Adding natural sorting into report generation - so also needed to add ... Fri, 21 Jun 2013 00:20:27 GMT jmt12 [27683] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) moving a few more headings around to help with information block layout Wed, 19 Jun 2013 21:26:05 GMT jmt12 [27669] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Sort compute nodes naturally before labelling them with incremental ... Mon, 17 Jun 2013 22:59:52 GMT jmt12 [27654] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) * gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java (modified) Add the ability to stagger the starting of Mappers by placing a ... Mon, 17 Jun 2013 22:31:34 GMT jmt12 [27644] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extended to support HDFS-access via NFS. This applies to both the ... Mon, 17 Jun 2013 22:30:13 GMT jmt12 [27643] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Changed the script generator so it can recurse through directories ... Mon, 17 Jun 2013 22:28:53 GMT jmt12 [27642] * gs2-extensions/parallel-building/trunk/src/bin/script/ffsplit.sh (added) A script I downloaded that successfully splits video files - ... Mon, 10 Jun 2013 05:09:36 GMT jmt12 [27594] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extend hadoop_import.pl to be able to start and stop the Thrift server(s) Mon, 10 Jun 2013 04:27:49 GMT jmt12 [27590] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Adding statistics about data locality, and highlighting tasks where ... Mon, 10 Jun 2013 02:19:21 GMT jmt12 [27589] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Fixing up some minor bugs in regex's Mon, 10 Jun 2013 02:12:28 GMT jmt12 [27588] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Extend parser to support jobs that are split over several logs. Also ... Sun, 09 Jun 2013 23:29:03 GMT jmt12 [27587] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Allow debug mode to be enabled from the command line Sun, 09 Jun 2013 23:15:36 GMT jmt12 [27586] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Updating script to date date of hadoop job into account when ... Sun, 09 Jun 2013 22:25:10 GMT jmt12 [27585] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) The perl on Medusa won't let you immediately treat a returned array ... Sun, 09 Jun 2013 22:23:46 GMT jmt12 [27584] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) I wasn't doing -r when attempting to clear directories left in /tmp ... Sun, 09 Jun 2013 22:22:19 GMT jmt12 [27583] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Adding code to differentiate between workers in a cluster - all of ... Wed, 05 Jun 2013 23:16:31 GMT jmt12 [27560] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Fixing typo in regexp that meant filenames sometimes ignored Wed, 05 Jun 2013 23:15:28 GMT jmt12 [27559] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Changed mime-type away from binary - I hope. Meanwhile, generate ... Wed, 05 Jun 2013 01:07:43 GMT jmt12 [27551] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Altered so that it expects to be given a CSV containing parallel ... Wed, 05 Jun 2013 01:06:32 GMT jmt12 [27550] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Ensure the hostname is added to the Hadoop logs so we can identify ... Wed, 05 Jun 2013 01:04:58 GMT jmt12 [27549] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (added) Extract information from the logs generated by parallel Greenstone ... Wed, 05 Jun 2013 01:04:30 GMT jmt12 [27548] * gs2-extensions/parallel-building/trunk/src/bin/script/openmpi_report.pl (added) Extract information from the logs generated by parallel Greenstone ... Tue, 04 Jun 2013 23:53:16 GMT jmt12 [27543] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (added) Adding generate_gantt.pl script in its original form - i.e. directly ... Mon, 03 Jun 2013 23:08:37 GMT jmt12 [27530] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Clear out old logs, and adding more comments about what the script is ... Thu, 30 May 2013 00:15:06 GMT jmt12 [27515] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Making the file used durig buffertes be configurable Wed, 29 May 2013 22:16:22 GMT jmt12 [27512] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Adding in a special test for measuring the effect of altering ... Mon, 27 May 2013 23:38:08 GMT jmt12 [27495] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) removing doubled up debug comments and putting some paths in ... Mon, 27 May 2013 00:27:31 GMT jmt12 [27481] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils.pm (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDThriftFS.pm (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/LocalFS.pm (modified) Adding makeAllDirectories() (which I'd only implemented in LocalFS) ... Mon, 27 May 2013 00:22:03 GMT jmt12 [27480] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Removing DateTime dependency (so HDFSShell will always fail ... Thu, 23 May 2013 23:13:28 GMT jmt12 [27436] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (added) Adding the actual script - rather than a symlink to my dropbox. doh Thu, 23 May 2013 23:12:52 GMT jmt12 [27435] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (deleted) Gah - only a symbolic link Thu, 23 May 2013 21:24:16 GMT jmt12 [27414] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Allowing more processing arguments to be configured at the call, and ... Thu, 23 May 2013 21:22:13 GMT jmt12 [27412] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) I obviously hadn't run this script on Karearea before - assumed all ... Thu, 23 May 2013 01:54:45 GMT jmt12 [27409] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (added) Unit test like testing for the FileUtils class and LocalFS, ... Thu, 23 May 2013 01:54:09 GMT jmt12 [27408] * gs2-extensions/parallel-building/trunk/src/bin/script/start_thrift_server.sh (added) A symbolic link to the actual script in the packages directory Tue, 21 May 2013 02:48:55 GMT jmt12 [27378] * gs2-extensions/parallel-building/trunk/src/bin/script/buildcol.pl (deleted) Parallel processing support now added (via buildcolutil subclass) to ... Mon, 25 Mar 2013 20:14:31 GMT jmt12 [27126] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extra clean up commands (like removing cached versions of video ... Mon, 25 Mar 2013 20:10:44 GMT jmt12 [27125] * gs2-extensions/parallel-building/trunk/src/bin/script/flush_caches.pl (added) A script to try and flush all caches - I'm certain it's flushing disk ... Mon, 25 Mar 2013 20:05:34 GMT jmt12 [27124] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Use the new perl version script to extract the version number - so as ... Sun, 24 Mar 2013 22:47:06 GMT jmt12 [27119] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (modified) Merging version finder from Medusa with the one lurking on Karearea Tue, 12 Mar 2013 00:37:21 GMT jmt12 [27058] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Adding data locality report generation to Hadoop greenstone imports Mon, 11 Mar 2013 23:18:56 GMT jmt12 [27052] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (modified) Turns out the Perl on Medusa doesn't support $^V, so I've had to ... Mon, 11 Mar 2013 22:24:11 GMT jmt12 [27041] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) INC path now includes the installed extensions perl path (including ... Mon, 11 Mar 2013 22:05:41 GMT jmt12 [27040] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (added) A simple script that returns just the version number of Perl Mon, 11 Mar 2013 20:24:15 GMT jmt12 [27036] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (added) A script to extract data locality and other task information from the ... Mon, 04 Mar 2013 22:55:31 GMT jmt12 [27006] * gs2-extensions/parallel-building/trunk/src/bin/script/report-hadoop-processes.pl (added) A companion script to stop-hadoop-processes that just reports running ... Mon, 04 Mar 2013 22:53:56 GMT jmt12 [27005] * gs2-extensions/parallel-building/trunk/src/bin/script/stop-runaway-hadoop.pl (added) Similar to stop-impt.pl, this script uses kill to stop runaway Hadoop ... Mon, 04 Mar 2013 22:51:49 GMT jmt12 [27004] * gs2-extensions/parallel-building/trunk/src/bin/script/stop-impt.pl (added) A script to stop (using kill) a runaway import process and any ... Mon, 04 Mar 2013 22:47:57 GMT jmt12 [27001] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Passing more environment variables (HADOOPPREFIX, HDFSHOST, HDFSPORT) ... Mon, 04 Mar 2013 22:08:22 GMT jmt12 [26999] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl (modified) Ensuring MPI binds to correct interface, and passing through ... Mon, 04 Mar 2013 22:07:26 GMT jmt12 [26998] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl (modified) Adding maxdocs variable, lots of debug comments, added some tests for ... Mon, 25 Feb 2013 21:54:56 GMT jmt12 [26953] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (added) Checking in the script rather than a symbolic link to the script :P Mon, 25 Feb 2013 21:54:41 GMT jmt12 [26952] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (deleted) Accidentally checked in symbolic link rather than script Mon, 25 Feb 2013 21:43:17 GMT jmt12 [26949] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (added) Parallel import using Hadoop Mon, 25 Feb 2013 20:49:36 GMT jmt12 [26930] * gs2-extensions/parallel-building/trunk/src/bin/script/manifestinator.pl (modified) Randomized order of files, and added the ability to specify a maximum ... Mon, 25 Feb 2013 20:48:49 GMT jmt12 [26929] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (added) A script to comprehensively clean up a collection between imports... ... Mon, 25 Feb 2013 20:43:49 GMT jmt12 [26923] * gs2-extensions/parallel-building/trunk/src/bin/script/importsubsetinator.pl (added) Generates a specficied-size subset of a larger import directory Tue, 25 Sep 2012 00:41:51 GMT jmt12 [26242] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl (modified) Modifications to progress messages to improve extracting information ... Wed, 12 Sep 2012 23:10:27 GMT jmt12 [26187] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl (added) * gs2-extensions/parallel-building/trunk/src/opt (added) * gs2-extensions/parallel-building/trunk/src/opt/Terrier (added) * gs2-extensions/parallel-building/trunk/src/opt/Terrier/FileIndexer.java (added) * gs2-extensions/parallel-building/trunk/src/opt/Terrier/README.txt (added) Adding the rest of parallel processing support for Terrier into SVN. ... Thu, 12 Jul 2012 22:35:32 GMT jmt12 [25943] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl (modified) Updated script to support cluster processing Mon, 25 Jun 2012 22:38:51 GMT jmt12 [25809] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl (added) Script to parallel media filter DSpace Tue, 21 Feb 2012 00:35:11 GMT jmt12 [25118] * gs2-extensions/parallel-building/trunk/src/bin/script/batch-testing.pl (modified) Significant changes to support the latest round of batch tests (for ... Tue, 21 Feb 2012 00:34:12 GMT jmt12 [25117] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_import.pl (modified) Added some debug comments Wed, 30 Nov 2011 23:34:28 GMT jmt12 [24848] * gs2-extensions/parallel-building/trunk/src/bin/script/manifestinator.pl (modified) Extended the manifestinator to correctly recurse into directories Wed, 30 Nov 2011 23:33:53 GMT jmt12 [24847] * gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl (modified) Added the ability for the server to detect if it's calling process ... Wed, 30 Nov 2011 23:32:40 GMT jmt12 [24846] * gs2-extensions/parallel-building/trunk/src/bin/script/test-server.pl (added) A small SocketSwimmingThreadPoolServer to allow testing of the client ... Wed, 30 Nov 2011 23:31:46 GMT jmt12 [24845] * gs2-extensions/parallel-building/trunk/src/bin/script/poll-processor.pl (added) Similar to poll-gsdl.pl except that this uses mpstat to gather ... Wed, 30 Nov 2011 23:30:47 GMT jmt12 [24844] * gs2-extensions/parallel-building/trunk/src/bin/script/poll-report.pl (added) Script to take the raw output from poll-gsdl.pl and collate the ... Wed, 30 Nov 2011 23:30:13 GMT jmt12 [24843] * gs2-extensions/parallel-building/trunk/src/bin/script/poll-gsdl.pl (added) A script that launches an (affinity assigned) Greenstone import while ... Wed, 30 Nov 2011 23:28:39 GMT jmt12 [24842] * gs2-extensions/parallel-building/trunk/src/bin/script/poll-processor-report.pl (added) Script to take the raw output from poll-processor.pl and collate the ... Wed, 30 Nov 2011 23:26:53 GMT jmt12 [24841] * gs2-extensions/parallel-building/trunk/src/bin/script/spawn_collections.pl (modified) Thu, 29 Sep 2011 21:51:51 GMT jmt12 [24695] * gs2-extensions/parallel-building/trunk/src/bin/script/batch-testing.pl (added) Script to run a battery of tests and record the results in a database Thu, 29 Sep 2011 21:51:29 GMT jmt12 [24694] * gs2-extensions/parallel-building/trunk/src/bin/script/spawn_collections.pl (added) Setup script to generate a number of collections or varying sizes ... Thu, 29 Sep 2011 21:51:02 GMT jmt12 [24693] * gs2-extensions/parallel-building/trunk/src/bin/script/reset_memcache_editor.sh (added) A hack to allow sudoedit to modify the drop_caches value non- ... Wed, 28 Sep 2011 00:16:19 GMT jmt12 [24680] * gs2-extensions/parallel-building/trunk/src/bin/script/buildcol.pl (modified) Removed sanity check for GDBM (where was my sanity - GDBM works, it's ... Wed, 28 Sep 2011 00:15:12 GMT jmt12 [24679] * gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl (modified) Added code to determine the open file handles and persist them ... Wed, 28 Sep 2011 00:14:03 GMT jmt12 [24678] * gs2-extensions/parallel-building/trunk/src/bin/script/gdbm-diff.pl (added) A tool for comparing the contents of two GDBM databases Wed, 28 Sep 2011 00:13:45 GMT jmt12 [24677] * gs2-extensions/parallel-building/trunk/src/bin/script/manifestinator.pl (added) A tool for generating manifest files from import directories Mon, 26 Sep 2011 23:43:12 GMT jmt12 [24667] * gs2-extensions/parallel-building/trunk/src/bin/script/buildcol.pl (modified) Adding another sanity test to prevent parallel building when infodb ... Mon, 26 Sep 2011 23:41:04 GMT jmt12 [24666] * gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl (added) A daemonizable server that modifies a GDBM database (via the GDBMCLI ... Mon, 26 Sep 2011 23:40:11 GMT jmt12 [24665] * gs2-extensions/parallel-building/trunk/src/bin/script/client.pl (added) A simple client for testing GDBMServer and for properly shutting it ... Wed, 21 Sep 2011 03:56:28 GMT jmt12 [24622] * gs2-extensions/parallel-building/trunk/src/bin (added) * gs2-extensions/parallel-building/trunk/src/bin/script (added) * gs2-extensions/parallel-building/trunk/src/bin/script/buildcol.pl (added) * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_import.pl (added) Parallel enabled version of buildcol.pl and the parallel_import.pl ...