# # ChangeLog for gs2-extensions/parallel-building/trunk/src/bin # # Generated by Trac 1.4.2 # 2024-05-14T17:08:02+12:00 Wed, 16 Dec 2015 03:15:39 GMT jmt12 [30354] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDFSShell.pm (modified) * gs2-extensions/parallel-building/trunk/src/perllib/inexport.pm (modified) * gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java (modified) * gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest2.java (modified) Extending manifest v2 support to allow for directories to be listed ... Sun, 11 Oct 2015 22:24:21 GMT jmt12 [30306] * gs2-extensions/parallel-building/trunk/src/bin/script/GDBMServer.pl (modified) Making the setup of CPAN path more robust based on the better control ... Thu, 18 Dec 2014 23:30:03 GMT jmt12 [29663] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Supporting grayscale printing, fixing mismatched tags and ... Thu, 18 Dec 2014 23:28:36 GMT jmt12 [29662] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (modified) Now removes building and index directories if found Thu, 18 Dec 2014 23:26:36 GMT jmt12 [29661] * gs2-extensions/parallel-building/trunk/src/bin/script/deletinator.pl (added) A helper script to clean-up the bogus directories sometimes created ... Mon, 21 Jul 2014 22:46:42 GMT jmt12 [29158] * gs2-extensions/parallel-building/trunk/src/bin/script/logreportinator.pl (added) Initial checkin of script to convert a number of Greenstone|| logs ... Thu, 19 Jun 2014 05:28:20 GMT jmt12 [29106] * gs2-extensions/parallel-building/trunk/src/bin/script/linkinator.pl (added) Check-in of script to symlink lorem files to matching files in ... Wed, 18 Jun 2014 23:26:28 GMT jmt12 [29104] * gs2-extensions/parallel-building/trunk/src/bin/script/text_metricinator.pl (added) A script for extracting textual metrics from a collection of text ... Wed, 18 Jun 2014 23:26:01 GMT jmt12 [29103] * gs2-extensions/parallel-building/trunk/src/bin/script/importsubsetinator.pl (modified) updated - not any more efficient (Schlemiel the painter performance) ... Tue, 17 Dec 2013 21:53:57 GMT jmt12 [28769] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_import.pl (deleted) No longer used. import.pl now smart enough to dynamically load ... Tue, 17 Dec 2013 21:53:15 GMT jmt12 [28768] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_import.pl (modified) Initially added microtime to this script, but then remembered it ... Tue, 17 Dec 2013 21:21:53 GMT jmt12 [28767] * gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl (modified) Drastically increased the script to allow 1) battery of imports ... Tue, 17 Dec 2013 21:20:09 GMT jmt12 [28766] * gs2-extensions/parallel-building/trunk/src/bin/script/strace_to_tsv.pl (modified) Removing an occasional few characters of garbage that turn up in the ... Mon, 16 Dec 2013 23:08:10 GMT jmt12 [28764] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl (modified) Adding microsecond timing messages Thu, 21 Nov 2013 00:36:40 GMT jmt12 [28666] * gs2-extensions/parallel-building/trunk/src/bin/script/strace_to_tsv.pl (added) A script to transform a strace.out into a Tab separated file worthy ... Thu, 21 Nov 2013 00:35:52 GMT jmt12 [28665] * gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl (modified) Latest changes to workaround resumed syscalls massive duration problem Tue, 19 Nov 2013 23:57:27 GMT jmt12 [28652] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Changes to support running the reports over logs produced from ... Tue, 19 Nov 2013 23:51:45 GMT jmt12 [28648] * gs2-extensions/parallel-building/trunk/src/bin/script/flush_caches.pl (modified) Adding a short delay after writing to the flush_cache file just to ... Tue, 19 Nov 2013 23:49:26 GMT jmt12 [28647] * gs2-extensions/parallel-building/trunk/src/bin/script/update_data_locality.pl (modified) Adding progress messages and making a debug message optional Tue, 19 Nov 2013 22:31:31 GMT jmt12 [28646] * gs2-extensions/parallel-building/trunk/src/bin/script/import_with_io_metric.pl (added) A script that uses strace to produce IO metrics of a Greenstone import Tue, 19 Nov 2013 22:31:07 GMT jmt12 [28645] * gs2-extensions/parallel-building/trunk/src/bin/script/dlreport.pl (added) Script to generate a report on data locality from GreenstoneHadoop logs Sun, 06 Oct 2013 21:04:32 GMT jmt12 [28358] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Replacing my earlier decision to only have data locality information ... Sun, 06 Oct 2013 21:02:54 GMT jmt12 [28357] * gs2-extensions/parallel-building/trunk/src/bin/script/update_data_locality.pl (added) used to update the data_locality.csv file in the case where other ... Sun, 06 Oct 2013 21:01:39 GMT jmt12 [28356] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Support the legacy version of taskno in the data_locality.csv file ... Thu, 29 Aug 2013 21:18:21 GMT jmt12 [28191] * gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl (modified) Removing redundant error stream redirect - this wasn't causing the ... Thu, 29 Aug 2013 21:08:04 GMT jmt12 [28190] * gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl (modified) Had accidently hardcoded the max replication number - allow it to be ... Thu, 29 Aug 2013 21:06:56 GMT jmt12 [28189] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Replace the newer (and faster) while(@file) loop with the older (and ... Thu, 29 Aug 2013 20:58:33 GMT jmt12 [28188] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Minor fix to allow for tasks that start in the same second (now each ... Thu, 29 Aug 2013 20:55:57 GMT jmt12 [28186] * gs2-extensions/parallel-building/trunk/src/bin/script/iotop_report.pl (added) A (failed) attempt to use the unix iotop tool to determine IO percentage Fri, 09 Aug 2013 01:30:35 GMT jmt12 [28018] * gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl (modified) Try really hard to capture the output from 'time' function as Medusa ... Fri, 09 Aug 2013 01:26:02 GMT jmt12 [28017] * gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl (modified) Forgot to add processing comment before call to hadoop_import.pl Fri, 09 Aug 2013 01:16:44 GMT jmt12 [28016] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Allow the hadoop report generator to parse start and end times ... Fri, 09 Aug 2013 01:16:06 GMT jmt12 [28015] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Add an extra option that allows me to pass in the directory to write ... Fri, 09 Aug 2013 01:15:02 GMT jmt12 [28014] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Remove tasks that have had data locality established from the array ... Fri, 09 Aug 2013 01:14:22 GMT jmt12 [28013] * gs2-extensions/parallel-building/trunk/src/bin/script/replication_tests.pl (added) A new script to run a battery of Hadoop ingests at varying ... Sun, 21 Jul 2013 22:38:06 GMT jmt12 [27914] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Trying to get around a couple of divide-by-zero issues when ... Sun, 21 Jul 2013 22:37:02 GMT jmt12 [27913] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Made the ingester to be used (version 1 without reduce phase, or ... Thu, 04 Jul 2013 01:45:08 GMT jmt12 [27753] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Adding Handbrake's percentage complete to report - although this is ... Thu, 04 Jul 2013 01:44:22 GMT jmt12 [27752] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Data locality file not being found is no longer fatal (HDFS-NFS-Proxy ... Tue, 02 Jul 2013 02:35:42 GMT jmt12 [27732] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Nice the copy itself too Fri, 21 Jun 2013 00:25:32 GMT jmt12 [27686] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) A little more progress comments Fri, 21 Jun 2013 00:24:54 GMT jmt12 [27685] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) in the case of multiple attempts you need to retain the information ... Fri, 21 Jun 2013 00:22:25 GMT jmt12 [27684] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Adding natural sorting into report generation - so also needed to add ... Fri, 21 Jun 2013 00:20:27 GMT jmt12 [27683] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) moving a few more headings around to help with information block layout Wed, 19 Jun 2013 21:26:05 GMT jmt12 [27669] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Sort compute nodes naturally before labelling them with incremental ... Mon, 17 Jun 2013 22:59:52 GMT jmt12 [27654] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) * gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest.java (modified) Add the ability to stagger the starting of Mappers by placing a ... Mon, 17 Jun 2013 22:31:34 GMT jmt12 [27644] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extended to support HDFS-access via NFS. This applies to both the ... Mon, 17 Jun 2013 22:30:13 GMT jmt12 [27643] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Changed the script generator so it can recurse through directories ... Mon, 17 Jun 2013 22:28:53 GMT jmt12 [27642] * gs2-extensions/parallel-building/trunk/src/bin/script/ffsplit.sh (added) A script I downloaded that successfully splits video files - ... Mon, 10 Jun 2013 05:09:36 GMT jmt12 [27594] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extend hadoop_import.pl to be able to start and stop the Thrift server(s) Mon, 10 Jun 2013 04:27:49 GMT jmt12 [27590] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Adding statistics about data locality, and highlighting tasks where ... Mon, 10 Jun 2013 02:19:21 GMT jmt12 [27589] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Fixing up some minor bugs in regex's Mon, 10 Jun 2013 02:12:28 GMT jmt12 [27588] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Extend parser to support jobs that are split over several logs. Also ... Sun, 09 Jun 2013 23:29:03 GMT jmt12 [27587] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Allow debug mode to be enabled from the command line Sun, 09 Jun 2013 23:15:36 GMT jmt12 [27586] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Updating script to date date of hadoop job into account when ... Sun, 09 Jun 2013 22:25:10 GMT jmt12 [27585] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) The perl on Medusa won't let you immediately treat a returned array ... Sun, 09 Jun 2013 22:23:46 GMT jmt12 [27584] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) I wasn't doing -r when attempting to clear directories left in /tmp ... Sun, 09 Jun 2013 22:22:19 GMT jmt12 [27583] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Adding code to differentiate between workers in a cluster - all of ... Wed, 05 Jun 2013 23:16:31 GMT jmt12 [27560] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (modified) Fixing typo in regexp that meant filenames sometimes ignored Wed, 05 Jun 2013 23:15:28 GMT jmt12 [27559] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Changed mime-type away from binary - I hope. Meanwhile, generate ... Wed, 05 Jun 2013 01:07:43 GMT jmt12 [27551] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (modified) Altered so that it expects to be given a CSV containing parallel ... Wed, 05 Jun 2013 01:06:32 GMT jmt12 [27550] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Ensure the hostname is added to the Hadoop logs so we can identify ... Wed, 05 Jun 2013 01:04:58 GMT jmt12 [27549] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_report.pl (added) Extract information from the logs generated by parallel Greenstone ... Wed, 05 Jun 2013 01:04:30 GMT jmt12 [27548] * gs2-extensions/parallel-building/trunk/src/bin/script/openmpi_report.pl (added) Extract information from the logs generated by parallel Greenstone ... Tue, 04 Jun 2013 23:53:16 GMT jmt12 [27543] * gs2-extensions/parallel-building/trunk/src/bin/script/generate_gantt.pl (added) Adding generate_gantt.pl script in its original form - i.e. directly ... Mon, 03 Jun 2013 23:08:37 GMT jmt12 [27530] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Clear out old logs, and adding more comments about what the script is ... Thu, 30 May 2013 00:15:06 GMT jmt12 [27515] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Making the file used durig buffertes be configurable Wed, 29 May 2013 22:16:22 GMT jmt12 [27512] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Adding in a special test for measuring the effect of altering ... Mon, 27 May 2013 23:38:08 GMT jmt12 [27495] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) removing doubled up debug comments and putting some paths in ... Mon, 27 May 2013 00:27:31 GMT jmt12 [27481] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils.pm (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDThriftFS.pm (modified) * gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/LocalFS.pm (modified) Adding makeAllDirectories() (which I'd only implemented in LocalFS) ... Mon, 27 May 2013 00:22:03 GMT jmt12 [27480] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (modified) Removing DateTime dependency (so HDFSShell will always fail ... Thu, 23 May 2013 23:13:28 GMT jmt12 [27436] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (added) Adding the actual script - rather than a symlink to my dropbox. doh Thu, 23 May 2013 23:12:52 GMT jmt12 [27435] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (deleted) Gah - only a symbolic link Thu, 23 May 2013 21:24:16 GMT jmt12 [27414] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Allowing more processing arguments to be configured at the call, and ... Thu, 23 May 2013 21:22:13 GMT jmt12 [27412] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) I obviously hadn't run this script on Karearea before - assumed all ... Thu, 23 May 2013 01:54:45 GMT jmt12 [27409] * gs2-extensions/parallel-building/trunk/src/bin/script/test_fileutils.pl (added) Unit test like testing for the FileUtils class and LocalFS, ... Thu, 23 May 2013 01:54:09 GMT jmt12 [27408] * gs2-extensions/parallel-building/trunk/src/bin/script/start_thrift_server.sh (added) A symbolic link to the actual script in the packages directory Tue, 21 May 2013 02:48:55 GMT jmt12 [27378] * gs2-extensions/parallel-building/trunk/src/bin/script/buildcol.pl (deleted) Parallel processing support now added (via buildcolutil subclass) to ... Mon, 25 Mar 2013 20:14:31 GMT jmt12 [27126] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Extra clean up commands (like removing cached versions of video ... Mon, 25 Mar 2013 20:10:44 GMT jmt12 [27125] * gs2-extensions/parallel-building/trunk/src/bin/script/flush_caches.pl (added) A script to try and flush all caches - I'm certain it's flushing disk ... Mon, 25 Mar 2013 20:05:34 GMT jmt12 [27124] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) Use the new perl version script to extract the version number - so as ... Sun, 24 Mar 2013 22:47:06 GMT jmt12 [27119] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (modified) Merging version finder from Medusa with the one lurking on Karearea Tue, 12 Mar 2013 00:37:21 GMT jmt12 [27058] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Adding data locality report generation to Hadoop greenstone imports Mon, 11 Mar 2013 23:18:56 GMT jmt12 [27052] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (modified) Turns out the Perl on Medusa doesn't support $^V, so I've had to ... Mon, 11 Mar 2013 22:24:11 GMT jmt12 [27041] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (modified) INC path now includes the installed extensions perl path (including ... Mon, 11 Mar 2013 22:05:41 GMT jmt12 [27040] * gs2-extensions/parallel-building/trunk/src/bin/script/perl-version.pl (added) A simple script that returns just the version number of Perl Mon, 11 Mar 2013 20:24:15 GMT jmt12 [27036] * gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl (added) A script to extract data locality and other task information from the ... Mon, 04 Mar 2013 22:55:31 GMT jmt12 [27006] * gs2-extensions/parallel-building/trunk/src/bin/script/report-hadoop-processes.pl (added) A companion script to stop-hadoop-processes that just reports running ... Mon, 04 Mar 2013 22:53:56 GMT jmt12 [27005] * gs2-extensions/parallel-building/trunk/src/bin/script/stop-runaway-hadoop.pl (added) Similar to stop-impt.pl, this script uses kill to stop runaway Hadoop ... Mon, 04 Mar 2013 22:51:49 GMT jmt12 [27004] * gs2-extensions/parallel-building/trunk/src/bin/script/stop-impt.pl (added) A script to stop (using kill) a runaway import process and any ... Mon, 04 Mar 2013 22:47:57 GMT jmt12 [27001] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (modified) Passing more environment variables (HADOOPPREFIX, HDFSHOST, HDFSPORT) ... Mon, 04 Mar 2013 22:08:22 GMT jmt12 [26999] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_dspace_filtermedia.pl (modified) Ensuring MPI binds to correct interface, and passing through ... Mon, 04 Mar 2013 22:07:26 GMT jmt12 [26998] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl (modified) Adding maxdocs variable, lots of debug comments, added some tests for ... Mon, 25 Feb 2013 21:54:56 GMT jmt12 [26953] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (added) Checking in the script rather than a symbolic link to the script :P Mon, 25 Feb 2013 21:54:41 GMT jmt12 [26952] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (deleted) Accidentally checked in symbolic link rather than script Mon, 25 Feb 2013 21:43:17 GMT jmt12 [26949] * gs2-extensions/parallel-building/trunk/src/bin/script/hadoop_import.pl (added) Parallel import using Hadoop Mon, 25 Feb 2013 20:49:36 GMT jmt12 [26930] * gs2-extensions/parallel-building/trunk/src/bin/script/manifestinator.pl (modified) Randomized order of files, and added the ability to specify a maximum ... Mon, 25 Feb 2013 20:48:49 GMT jmt12 [26929] * gs2-extensions/parallel-building/trunk/src/bin/script/rm_archives.pl (added) A script to comprehensively clean up a collection between imports... ... Mon, 25 Feb 2013 20:43:49 GMT jmt12 [26923] * gs2-extensions/parallel-building/trunk/src/bin/script/importsubsetinator.pl (added) Generates a specficied-size subset of a larger import directory Tue, 25 Sep 2012 00:41:51 GMT jmt12 [26242] * gs2-extensions/parallel-building/trunk/src/bin/script/parallel_terrier_fileindex.pl (modified) Modifications to progress messages to improve extracting information ...