source: gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDFSShell.pm@ 27386

Last change on this file since 27386 was 27386, checked in by jmt12, 11 years ago

Forgot these were just symbolic links to my Dropbox folder - adding in the actual files

File size: 11.7 KB
Line 
1###############################################################################
2#
3# HDFSShell.pm -- file functions acting upon a HDFS via the CLI hadoop
4# application
5#
6# A component of the Greenstone digital library software from the New Zealand
7# Digital Library Project at the University of Waikato, New Zealand.
8#
9# Copyright (C) 2013 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19# details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package FileUtils::HDFSShell;
28
29# Pragma
30use strict;
31
32# Modules
33use DateTime;
34
35# Configuration
36my $debug = 0;
37
38################################################################################
39######################### Private Functions & Variables ########################
40################################################################################
41
42## @function _executeHDFSCommand()
43#
44# Executes a HDFS command without caring about the resulting output
45# while still reacting appropriately to failed executions.
46#
47sub _executeHDFSCommand
48{
49 my $return_result = shift(@_);
50 if ($return_result != 0 && $return_result != 1)
51 {
52 &FileUtils::printError('Unexpected value for return_result argument - should be 0 or 1: ' . $return_result, 1);
53 }
54 my $command = &_generateHDFSCommand(@_);
55 my $result = `$command 2>&1`;
56 my $return_value = $?;
57 &_printDebug(' -> util::executeHDFSCommand() => |' . $result . '| [' . $return_value . ']');
58 # sometimes we may want the actual resulting output returned, for
59 # instance when parsing ls
60 if ($return_result)
61 {
62 $return_value = $result;
63 }
64 return $return_value;
65}
66## _executeHDFSCommand()
67
68
69## @function _generateHDFSCommand()
70#
71sub _generateHDFSCommand
72{
73 my $action = shift(@_);
74 my @args = @_;
75 my $arguments = '';
76 foreach my $path (@args)
77 {
78 # Replace the prefix with one HDFS Shell understands
79 $path =~ s/HDFSShell:/hdfs:/;
80 # special case for standard streams
81 if ($path eq '-')
82 {
83 $arguments .= '- ';
84 }
85 else
86 {
87 $arguments .= '"' . $path . '" ';
88 }
89 }
90 my $command = 'hadoop fs -' . $action . ' ' . $arguments;
91 &_printDebug(' -> _generateHDFSCommand("' . $action . '", ...) => |' . $command . '|');
92 return $command;
93}
94## _generateHDFSCommand()
95
96
97## @function _printDebug()
98#
99sub _printDebug
100{
101 my ($message) = @_;
102 if ($debug)
103 {
104 print STDERR '[DEBUG] ' . $message . "\n";
105 }
106}
107## _printDebug()
108
109
110################################################################################
111############################### Public Functions ###############################
112################################################################################
113
114
115## @function closeFileHandle()
116#
117sub closeFileHandle
118{
119 my $fh_ref = shift(@_);
120 close($$fh_ref);
121 return 1;
122}
123## closeFileHandle()
124
125
126## @function fileSize()
127#
128sub fileSize
129{
130 my ($path) = @_;
131 my $file_stats = &fileStats($path);
132 return $file_stats->{'filesize'};
133}
134## fileSize()
135
136
137## @function fileStats()
138#
139sub fileStats
140{
141 my ($path) = @_;
142 my $stats = {};
143 my $result = &_executeHDFSCommand(1, 'ls', $path);
144 # - parse the results
145 if ($result =~ /([ds\-][rwx\-]+)\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+(\d\d\d\d-\d\d-\d\d)\s+(\d\d:\d\d)\s+([^\s]+)$/)
146 {
147 $stats->{'filename'} = $8;
148 $stats->{'replicas'} = $2;
149 $stats->{'filesize'} = $5;
150 $stats->{'modification_date'} = $6;
151 $stats->{'modification_time'} = $7;
152 $stats->{'permissions'} = $1;
153 $stats->{'userid'} = $3;
154 $stats->{'groupid'} = $4;
155 }
156 else
157 {
158 &FileUtils::printError('Failed to parse -ls result: ' . $result, 1);
159 }
160 return $stats;
161}
162## fileStats()
163
164
165## @function fileTest()
166#
167sub fileTest
168{
169 my ($filename_full_path, $test_op) = @_;
170 # Sanity tests
171 # Special case: HDFS doesn't support symlinking - swap for -e instead
172 if (!defined $test_op || $test_op eq '-l')
173 {
174 $test_op = '-e';
175 }
176 my $retval = -1; # cmd return fails > 0 are errors
177 # Special case: the easiest way to support -f is to run a -e followed by a -d
178 # (which should fail for files)
179 if ($test_op eq '-f')
180 {
181 my $retval1 = &_executeHDFSCommand(0, 'test -e', $filename_full_path);
182 if ($retval1 == 0)
183 {
184 my $retval2 = &_executeHDFSCommand(0, 'test -d', $filename_full_path);
185 if ($retval2 > 0)
186 {
187 $retval = 0;
188 }
189 }
190 }
191 # very limited test op support for HDFS
192 elsif ($test_op ne '-d' && $test_op ne '-e' && $test_op ne '-z')
193 {
194 &FileUtils::printError('Unknown or unsupported test mode: ' . $test_op);
195 }
196 else
197 {
198 $retval = &_executeHDFSCommand(0, 'test ' . $test_op, $filename_full_path);
199 }
200 return ($retval == 0 ? 1 : 0);
201}
202## fileTest()
203
204
205## @function filenameConcatenate()
206#
207sub filenameConcatenate
208{
209 my $protocol = shift(@_);
210 my $filename = join('/', @_);
211 # remove repeated slashes
212 $filename =~ s/[\/]+/\//g;
213 # append protocol (which may cause multiple slashes)
214 $filename = $protocol . '/' . $filename;
215 # strip any trailing slashes
216 $filename =~ s/[\\\/]$//;
217 return $filename;
218}
219## filenameConcatenate()
220
221
222## @function makeDirectory()
223#
224sub makeDirectory
225{
226 my ($dir) = @_;
227 my $result = &_executeHDFSCommand(0, 'mkdir', $dir);
228 # HDFSShell mkdir returns 0 on success, -1 on failure
229 return ($result == 0 ? 1 : 0);
230}
231## makeDirectory()
232
233
234## @function modificationTime()
235#
236sub modificationTime
237{
238 my ($path) = @_;
239 my $file_stats = &fileStats($path);
240 my $mod_date = $file_stats->{'modification_date'};
241 $mod_date =~ /(\d\d\d\d)-(\d\d)-(\d\d)/;
242 my $mod_year = $1;
243 my $mod_month = $2;
244 my $mod_day = $3;
245 my $mod_time = $file_stats->{'modification_time'};
246 $mod_time =~ /(\d\d):(\d\d)/;
247 my $mod_hour = $1;
248 my $mod_minute = $2;
249 my $mod_datetime = DateTime->new(year => $mod_year,
250 month => $mod_month,
251 day => $mod_day,
252 hour => $mod_hour,
253 minute => $mod_minute,
254 time_zone => 'local');
255 return $mod_datetime->epoch();
256}
257## modificationTime()
258
259
260## @function openFileHandle()
261#
262sub openFileHandle
263{
264 my ($path, $mode, $fh_ref) = @_;
265 if ($mode eq '>>' || $mode eq 'a')
266 {
267 &FileUtils::printError('Append (>>) mode not supported', 1);
268 }
269 elsif ($mode eq '>' || $mode eq 'w')
270 {
271 # the put command fails if the file already exists
272 if (&fileTest($path, '-e'))
273 {
274 &removeFiles($path);
275 }
276 open($$fh_ref, '| ' . &_generateHDFSCommand('put', '-', $path)) or &FileUtils::printError('Failed to open pipe to HDFS (put) for writing: ' . $path, 1);
277 }
278 else
279 {
280 open($$fh_ref, &_generateHDFSCommand('cat', $path) . ' |') or &FileUtils::printError('Failed to open pipe to HDFS (cat) for reading: ' . $path, 1);
281 }
282 return 1;
283}
284## openFileHandle()
285
286
287## @function readDirectory()
288#
289sub readDirectory
290{
291 my ($path) = @_;
292 my @files;
293 my $result = &_executeHDFSCommand(1, 'ls', $path);
294 my @lines = split(/\r?\n/, $result);
295 foreach my $line (@lines)
296 {
297 if ($line =~ /\/([^\/]+)$/)
298 {
299 my $file = $1;
300 push(@files, $file);
301 }
302 }
303 return \@files;
304}
305## readDirectory()
306
307
308## @function removeFiles()
309#
310sub removeFiles
311{
312 my ($path, $including_dir) = @_;
313 my $result;
314 if (defined $including_dir && $including_dir)
315 {
316 $result = &_executeHDFSCommand(0, 'rmr', $path);
317 }
318 else
319 {
320 $result = &_executeHDFSCommand(0, 'rm', $path);
321 }
322 # HDFSShell mkdir returns 0 on success, -1 on failure
323 return ($result == 0 ? 1 : 0);
324}
325## removeFiles()
326
327
328## @function removeFilesFiltered()
329#
330sub removeFilesFiltered
331{
332 my ($paths, $accept_re, $reject_re) = @_;
333 # Perform a depth first, recursive, removal of files and directories that
334 # match the given accept and reject patterns
335 my @paths_array = (ref $paths eq "ARRAY") ? @$paths : ($paths);
336 my $num_removed = 0;
337 foreach my $path (@paths_array)
338 {
339 # remove trailing slashes
340 $path =~ s/[\/\\]+$//;
341 if (!&fileTest($path, '-e'))
342 {
343 &FileUtils::printError('path does not exist: ' . $path);
344 }
345 elsif (&fileTest($path, '-d'))
346 {
347 my @files = @{&readDirectory($path)};
348 foreach my $file (@files)
349 {
350 my $child_path = $path . '/' . $file;
351 $num_removed += &removeFilesFiltered($child_path, $accept_re, $reject_re);
352 }
353 if (!defined $accept_re && !defined $reject_re)
354 {
355 # remove this directory
356 my $result = &removeFiles($path, 1);
357 if ($result != 1)
358 {
359 &FileUtils::printError('could not remove directory: ' . $path);
360 }
361 else
362 {
363 $num_removed++;
364 }
365 }
366 }
367 else
368 {
369 if (defined $reject_re && ($path =~ m/$reject_re/))
370 {
371 next;
372 }
373 if ((!defined $accept_re) || ($path =~ m/$accept_re/))
374 {
375 # remove this file
376 my $result = &removeFiles($path);
377 if ($result != 1)
378 {
379 &FileUtils::printError('could not remove file: ' . $path);
380 }
381 else
382 {
383 $num_removed++;
384 }
385 }
386 }
387 }
388 return $num_removed;
389}
390## removeFilesFiltered()
391
392
393## @function removeFilesRecursive()
394#
395sub removeFilesRecursive
396{
397 my ($path) = @_;
398 # use the more general removeFilesFiltered() function with no accept
399 # or reject expressions
400 return &removeFilesFiltered($path, undef, undef);
401}
402## removeFilesRecursive()
403
404
405## @function supportsSymbolicLink
406#
407sub supportsSymbolicLink
408{
409 return 0;
410}
411## supportsSymbolicLink()
412
413
414## @function transferFile()
415#
416sub transferFile
417{
418 my ($mode, $src, $dst) = @_;
419 my $result;
420 if ($mode eq 'COPY')
421 {
422 $result = &_executeHDFSCommand(0, 'cp', $src, $dst);
423 }
424 else
425 {
426 $result = &_executeHDFSCommand(0, 'mv', $src, $dst);
427 }
428 # HDFSShell mkdir returns 0 on success, -1 on failure
429 return ($result == 0 ? 1 : 0);
430}
431## transferFile()
432
433
434## @function transferFileFromLocal()
435#
436sub transferFileFromLocal
437{
438 my ($mode, $src, $dst) = @_;
439 if (!-f $src)
440 {
441 &FileUtils::printError('Source file (during ' . $mode . ') doesn\'t exists: ' . $src);
442 }
443 if (&fileTest($dst, '-d'))
444 {
445 my ($filename) = $src =~ /([^\\\/]+)$/;
446 $dst .= '/' . $filename;
447 }
448 if (&fileTest($dst, '-f'))
449 {
450 &FileUtils::printError('Destination file (during ' . $mode . ') already exists: ' . $dst);
451 }
452 my $result = &_executeHDFSCommand(0, 'put', $src, $dst);
453 my $remove_result = 1;
454 if ($mode eq 'MOVE')
455 {
456 unlink($src);
457 # failed to delete somehow
458 if (-f $src)
459 {
460 $remove_result = 0;
461 }
462 }
463 return ($result == 0 && $remove_result ? 1 : 0);
464}
465## transferFileFromLocal()
466
467
468## @function transferFileToLocal()
469#
470sub transferFileToLocal
471{
472 my ($mode, $src, $dst) = @_;
473 if (!&fileTest($src, '-f'))
474 {
475 &FileUtils::printError('Source file (during ' . $mode . ') does not exist: ' . $src);
476 }
477 if (-d $dst)
478 {
479 my ($filename) = $src =~ /([^\\\/]+)$/;
480 $dst .= '/' . $filename;
481 }
482 if (-e $dst)
483 {
484 &FileUtils::printError('Destination file (during ' . $mode . ') already exists: ' . $dst);
485 }
486 my $result = &_executeHDFSCommand(0, 'get', $src, $dst);
487 my $remove_result = 1;
488 if ($mode eq 'MOVE')
489 {
490 $remove_result = &removeFiles($src);
491 }
492 return ($result == 0 && $remove_result ? 1 : 0);
493}
494## transferFileToLocal()
495
496
4971;
Note: See TracBrowser for help on using the repository browser.