Context Navigation

source: gs2-extensions/parallel-building/trunk/src/bin/script/parse_task_info_from_hadoop_log.pl@ 27589

Last change on this file since 27589 was 27589, checked in by jmt12, 11 years ago
Fixing up some minor bugs in regex's
Property svn:executable set to ``*
File size: 9.1 KB

Rev	Line
[27036]	1	#!/usr/bin/perl
	2
	3	# Pragma
	4	use strict;
	5	use warnings;
	6
	7	# Configuration
	8	my $debug = 0;
	9
	10	# Requires setup.bash to have been sourced
	11	BEGIN
	12	{
	13	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
	14	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
	15	die "HADOOP_PREFIX not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HADOOP_PREFIX'};
	16	die "HDFS HOST not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSHOST'};
	17	die "HDFS PORT not set (set in <gsdl>/ext/parallel_processing/setup.bash)\n" unless defined $ENV{'HDFSPORT'};
[27041]	18	# Ensure Greenstone Perl locations are in INC
	19	unshift (@INC, $ENV{'GSDLHOME'} . '/perllib');
	20	unshift (@INC, $ENV{'GSDLHOME'} . '/perllib/cpan');
	21	# we'll need the perl version number
[27124]	22	my ($version_number) = `perl-version.pl`;
[27041]	23	if (defined $ENV{'GSDLEXTS'})
	24	{
	25	my @extensions = split(/:/,$ENV{'GSDLEXTS'});
	26	foreach my $e (@extensions)
	27	{
	28	my $ext_prefix = $ENV{'GSDLHOME'} . '/ext/' . $e;
	29	unshift (@INC, $ext_prefix . '/perllib');
	30	unshift (@INC, $ext_prefix . '/perllib/cpan');
	31	unshift (@INC, $ext_prefix . '/' . $ENV{'GSDLOS'} . '/lib/perl/' . $version_number);
	32	}
	33	}
[27036]	34	}
	35
	36	# Libraries (depends on unshift above
	37	use Sort::Key::Natural qw(natsort);
	38
	39	# Begin
	40	print "===== Parse Hadoop Log =====\n";
	41
	42	# 0. Init
	43	if (!defined $ARGV[0])
	44	{
[27587]	45	die("usage: parse_task_info_from_hadoop_log.pl <results dir> [-debug]\n");
[27036]	46	}
	47	my $results_dir = $ARGV[0];
	48	if (!-d $results_dir)
	49	{
	50	die("Error! Can't find results directory: " . $results_dir . "\n");
	51	}
	52	print " Results directory: " . $results_dir . "\n";
	53
[27587]	54	if (defined $ARGV[1] && $ARGV[1] eq '-debug')
	55	{
	56	$debug = 1;
	57	}
	58
[27586]	59	# 1. Determine job ID
[27036]	60	my $hadoop_log_path = &fileCat($results_dir, 'hadoop.log');
	61	if (!-e $hadoop_log_path)
	62	{
	63	die("Error! Hadoop log file cannot be found: " . $hadoop_log_path . "\n");
	64	}
	65	print " Hadoop log path: " . $hadoop_log_path . "\n";
[27586]	66	print " * Determine JobID: ";
	67	my $job_id;
	68	my $result = `grep "Running job:" "$hadoop_log_path"`;
	69	if ($result =~ /Running job: job_(\d+_\d+)/)
	70	{
	71	$job_id = $1;
	72	}
	73	else
	74	{
	75	die("Error! Failed to locate JobID\n");
	76	}
	77	print $job_id . "\n";
	78	# - we'll need the date to locate the appopriate log file
[27588]	79	my $log_date_year = 0;
	80	my $log_date_month = 0;
	81	my $log_date_day = 0;
[27586]	82	if ($job_id =~ /^(\d\d\d\d)(\d\d)(\d\d)/)
	83	{
[27588]	84	$log_date_year = $1;
	85	$log_date_month = $2;
	86	$log_date_day = $3;
[27586]	87	}
[27588]	88	else
	89	{
	90	die('Error! Failed to parse date from Job ID: ' . $job_id . "\n");
	91	}
[27036]	92
[27588]	93	# 2. Determine user and system details
[27036]	94	my $username = `whoami`;
	95	chomp($username);
	96	print " Username: " . $username . "\n";
	97	my $hostname = `hostname`;
	98	chomp($hostname);
	99	print " Hostname: " . $hostname . "\n";
	100	my $data_locality_report_path = &fileCat($results_dir, 'data_locality.csv');
	101	print " Report path: " . $data_locality_report_path . "\n";
	102
[27586]	103	# 3. Parse log
[27588]	104	print " * Parse JobTracker Log(s)...\n";
[27036]	105	my $tid_2_splits = {};
	106	my $tid_2_node = {};
	107	my $aid_2_node = {};
[27588]	108	my $job_complete = 0;
	109	my $parsed_latest_log = 0;
	110	while (!$job_complete && !$parsed_latest_log)
[27036]	111	{
[27588]	112	# - determine appropriate job tracker log
	113	my $jobtracker_log_path_prefix = &fileCat($ENV{'HADOOP_PREFIX'}, 'logs', 'hadoop-' . $username . '-jobtracker-' . $hostname . '.log');
	114	my $jobtracker_log_path = sprintf('%s.%04d-%02d-%02d', $jobtracker_log_path_prefix, $log_date_year, $log_date_month, $log_date_day);
	115	# - maybe the log hasn't been rolled yet
	116	if (!-e $jobtracker_log_path)
[27036]	117	{
[27588]	118	$jobtracker_log_path = $jobtracker_log_path_prefix;
	119	# - nope, no applicable log found
	120	if (!-e $jobtracker_log_path)
[27036]	121	{
[27588]	122	die('Error! Hadoop JobTracker log file cannot be found: ' . $jobtracker_log_path . "\n");
[27036]	123	}
[27588]	124	else
[27036]	125	{
[27588]	126	$parsed_latest_log = 1;
[27036]	127	}
[27588]	128	}
	129	print " - parsing JobTracker log: " . $jobtracker_log_path . "\n";
	130
	131	if (open(JTLIN, '<', $jobtracker_log_path))
	132	{
	133	my $line = '';
	134	while ($line = <JTLIN>)
[27036]	135	{
[27588]	136	# Tips provide a match between task and file splits
	137	if ($line =~ /tip:task_${job_id}(_m_\d+) has split on node:\/default-rack\/([^\.\r\n]+)/)
[27036]	138	{
[27588]	139	my $task_id = $job_id . $1;
	140	my $compute_node = $2;
	141	&debugPrint('found tip: ' . $task_id . ' => ' . $compute_node);
	142	if (!defined $tid_2_splits->{$task_id})
	143	{
	144	$tid_2_splits->{$task_id} = [$compute_node];
	145	}
	146	else
	147	{
	148	push(@{$tid_2_splits->{$task_id}}, $compute_node);
	149	}
[27036]	150	}
[27588]	151	# JobTracker (MAP) entries give us a mapping between task, attempt, and
	152	# compute node
	153	elsif ($line =~ /Adding task \(MAP\) 'attempt_${job_id}(_m_\d+)(_\d+)'.*tracker_([^\.]+).local/)
	154	{
	155	my $task_id = $job_id . $1;
	156	my $attempt_id = $job_id . $1 . $2;
	157	my $compute_node = $3;
	158	&debugPrint('found MAP: ' . $attempt_id . ' => ' . $compute_node);
	159	$aid_2_node->{$attempt_id} = {'compute_node' => $compute_node,
	160	'succeeded' => 0
	161	};
	162	}
	163	# Watch for attempt successes (so we can weed out failures)
	164	elsif ($line =~ /Task 'attempt_${job_id}(_m_\d+_\d+)' has completed .* successfully/)
	165	{
	166	my $attempt_id = $job_id . $1;
	167	&debugPrint('successful attempt: ' . $attempt_id);
	168	if (defined $aid_2_node->{$attempt_id})
	169	{
	170	$aid_2_node->{$attempt_id}->{'succeeded'} = 1;
	171	}
	172	}
	173	# And job completion... so we can keep parsing other log files as
	174	# necessary
[27589]	175	elsif ($line =~ /Job job_${job_id} has completed successfully\./)
[27588]	176	{
	177	$job_complete = 1;
	178	}
[27036]	179	}
[27588]	180	close(JTLIN);
[27036]	181	}
[27588]	182	else
	183	{
	184	die('Error! Failed to open JobTracker log for reading: ' . $jobtracker_log_path . "\n");
	185	}
	186	# Increment the day by one - unfortunately this leads to unpleasant date
	187	# maths to ensure month and year roll over appropriately too
	188	$log_date_day++;
	189	# On to a new month?
	190	# Leap Year Feb > 29 otherwise Feb > 28
	191	# Apr, Jun, Sep, Nov > 30
	192	# Rest > 31
	193	if ($log_date_month == 2 && ((&isLeapYear($log_date_year) && $log_date_day > 29) \|\| $log_date_day > 28))
	194	{
	195	$log_date_day = 1;
	196	$log_date_month++;
	197	}
	198	elsif (($log_date_month == 4 \|\| $log_date_month == 6 \|\| $log_date_month == 9 \|\| $log_date_month == 11) && $log_date_day > 30)
	199	{
	200	$log_date_day = 1;
	201	$log_date_month++;
	202	}
	203	elsif ($log_date_day > 31)
	204	{
	205	$log_date_day = 1;
	206	$log_date_month++;
	207	}
	208	# On to a new year?
	209	if ($log_date_month > 12)
	210	{
	211	$log_date_month = 1;
	212	$log_date_year++;
	213	}
[27036]	214	}
[27589]	215	print " Done\n";
[27588]	216
	217	if (!$job_complete)
[27036]	218	{
[27588]	219	print "Warning! Failed to parse in information for a complete job.\n";
[27036]	220	}
	221
[27586]	222	# 4. Write CSV of information
[27036]	223	print " * Writing Job Information... ";
[27588]	224	&debugPrint("AttemptID \tComputeNode\tSucceeded", 1);
[27036]	225	foreach my $attempt_id (keys %{$aid_2_node})
	226	{
	227	&debugPrint($attempt_id . "\t" . $aid_2_node->{$attempt_id}->{'compute_node'} . "\t" . $aid_2_node->{$attempt_id}->{'succeeded'});
	228	}
[27588]	229	&debugPrint("TaskID \tComputeNodeSplits");
	230	my $split_counter = 0;
[27036]	231	foreach my $task_id (keys %{$tid_2_splits})
	232	{
	233	&debugPrint($task_id . "\t" . join(',', natsort(@{$tid_2_splits->{$task_id}})));
[27588]	234	$split_counter++;
[27036]	235	}
[27588]	236	&debugPrint(' * Number of split records: ' . $split_counter);
	237	&debugPrint('');
[27412]	238
[27036]	239	# - open the CSV file and write out the combined information from above
	240	if (open(CSVOUT, '>:utf8', $data_locality_report_path))
	241	{
	242	print CSVOUT "TaskNo,AttemptNo,Data Local,Compute Node,Splits\n";
	243	foreach my $attempt_id (natsort(keys %{$aid_2_node}))
	244	{
	245	my ($job_id, $task_number, $attempt_number) = $attempt_id =~ /^(\d+_\d+)_m_(\d+)_(\d+)/;
	246	my $task_id = $job_id . '_m_' . $task_number;
	247	my $compute_node = $aid_2_node->{$attempt_id}->{'compute_node'};
	248	my @splits = @{$tid_2_splits->{$task_id}};
[27412]	249	my $data_local = 0;
[27036]	250	if (grep($_ eq $compute_node, @splits))
	251	{
[27412]	252	$data_local = 1;
[27036]	253	}
[27412]	254	print CSVOUT $task_number . "," . $attempt_number . "," . $data_local . ",\"" . $compute_node . "\",\"" . join(',', natsort(@splits)) . "\"\n";
[27589]	255	$tid_2_splits->{$task_id} = undef;
[27036]	256	}
[27588]	257
	258	# Report on any other splits that were recorded in the log, but for unknown
	259	# reasons aren't matched with a 'successful' task
	260	foreach my $task_id (keys %{$tid_2_splits})
	261	{
[27589]	262	if (defined $tid_2_splits->{$task_id})
	263	{
	264	my ($job_id, $task_number) = $task_id =~ /^(\d+_\d+)_m_(\d+)/;
	265	my @splits = @{$tid_2_splits->{$task_id}};
	266	print CSVOUT $task_number . ",-1,-1,\"\",\"" . join(',', natsort(@splits)) . "\"\n";
	267	}
[27588]	268	}
	269
[27036]	270	close(CSVOUT);
	271	}
	272	else
	273	{
	274	die("Error! Failed to open file for writing: " . $data_locality_report_path);
	275	}
	276	print "Done\n";
	277
[27586]	278	# 5. Done
[27036]	279	print "===== Complete! =====\n\n";
	280	exit;
	281
	282	# Subs
	283
	284	sub debugPrint
	285	{
[27588]	286	my ($msg, $force_newline) = @_;
[27036]	287	if ($debug)
	288	{
[27588]	289	if (defined $force_newline && $force_newline)
	290	{
	291	print "\n[debug] " . $msg . "\n";
	292	}
	293	else
	294	{
	295	print '[debug] ' . $msg . "\n";
	296	}
[27036]	297	}
	298	}
	299
	300	sub fileCat
	301	{
	302	my $path = join('/', @_);
	303	$path =~ s/\/\/+/\//g;
	304	return $path;
	305	}
[27588]	306
	307	## @function isLeapYear()
	308	#
	309	sub isLeapYear
	310	{
	311	my ($year) = @_;
	312	if ($year % 400 == 0)
	313	{
	314	return 1;
	315	}
	316	elsif ($year % 100 == 0)
	317	{
	318	return 0;
	319	}
	320	elsif ($year % 4 == 0)
	321	{
	322	return 1;
	323	}
	324	return 0;
	325	}
	326	## isLeapYear() ##
	327
	328	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: