source: gs2-extensions/parallel-building/trunk/src/perllib/FileUtils/HDFSShell.pm@ 27525

Last change on this file since 27525 was 27525, checked in by jmt12, 11 years ago

Adding in a 'isHDFS()' function so that some plugins (SimpleVideoPlug) can know to move the files where other executables (HandbrakeCLI etc) can see them

File size: 12.0 KB
RevLine 
[27386]1###############################################################################
2#
3# HDFSShell.pm -- file functions acting upon a HDFS via the CLI hadoop
4# application
5#
6# A component of the Greenstone digital library software from the New Zealand
7# Digital Library Project at the University of Waikato, New Zealand.
8#
9# Copyright (C) 2013 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software
13# Foundation; either version 2 of the License, or (at your option) any later
14# version.
15#
16# This program is distributed in the hope that it will be useful, but WITHOUT
17# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19# details.
20#
21# You should have received a copy of the GNU General Public License along with
22# this program; if not, write to the Free Software Foundation, Inc., 675 Mass
23# Ave, Cambridge, MA 02139, USA.
24#
25###############################################################################
26
27package FileUtils::HDFSShell;
28
29# Pragma
30use strict;
31
32# Configuration
33my $debug = 0;
34
35################################################################################
36######################### Private Functions & Variables ########################
37################################################################################
38
39## @function _executeHDFSCommand()
40#
41# Executes a HDFS command without caring about the resulting output
42# while still reacting appropriately to failed executions.
43#
44sub _executeHDFSCommand
45{
46 my $return_result = shift(@_);
47 if ($return_result != 0 && $return_result != 1)
48 {
49 &FileUtils::printError('Unexpected value for return_result argument - should be 0 or 1: ' . $return_result, 1);
50 }
51 my $command = &_generateHDFSCommand(@_);
52 my $result = `$command 2>&1`;
53 my $return_value = $?;
54 &_printDebug(' -> util::executeHDFSCommand() => |' . $result . '| [' . $return_value . ']');
55 # sometimes we may want the actual resulting output returned, for
56 # instance when parsing ls
57 if ($return_result)
58 {
59 $return_value = $result;
60 }
61 return $return_value;
62}
63## _executeHDFSCommand()
64
65
66## @function _generateHDFSCommand()
67#
68sub _generateHDFSCommand
69{
70 my $action = shift(@_);
71 my @args = @_;
72 my $arguments = '';
73 foreach my $path (@args)
74 {
75 # Replace the prefix with one HDFS Shell understands
76 $path =~ s/HDFSShell:/hdfs:/;
77 # special case for standard streams
78 if ($path eq '-')
79 {
80 $arguments .= '- ';
81 }
82 else
83 {
84 $arguments .= '"' . $path . '" ';
85 }
86 }
87 my $command = 'hadoop fs -' . $action . ' ' . $arguments;
88 &_printDebug(' -> _generateHDFSCommand("' . $action . '", ...) => |' . $command . '|');
89 return $command;
90}
91## _generateHDFSCommand()
92
93
94## @function _printDebug()
95#
96sub _printDebug
97{
98 my ($message) = @_;
99 if ($debug)
100 {
101 print STDERR '[DEBUG] ' . $message . "\n";
102 }
103}
104## _printDebug()
105
106
107################################################################################
108############################### Public Functions ###############################
109################################################################################
110
111
[27423]112## @function canRead()
113#
114sub canRead
115{
116 my $path = shift(@_);
117 # On my Hadoop setups it appears everyone can read everything... pretty sure
118 # this won't always be the case but I'm not sure if there is some easy way to
119 # determine readability (you'd need to parse the permissions, user, and group
120 # and then somehow compare to the current user). So instead I'll just return
121 # if the file exists
122 return &fileTest($path, '-f');
123}
124## canRead()
125
126
[27386]127## @function closeFileHandle()
128#
129sub closeFileHandle
130{
131 my $fh_ref = shift(@_);
132 close($$fh_ref);
133 return 1;
134}
135## closeFileHandle()
136
137
138## @function fileSize()
139#
140sub fileSize
141{
142 my ($path) = @_;
143 my $file_stats = &fileStats($path);
144 return $file_stats->{'filesize'};
145}
146## fileSize()
147
148
149## @function fileStats()
150#
151sub fileStats
152{
153 my ($path) = @_;
154 my $stats = {};
155 my $result = &_executeHDFSCommand(1, 'ls', $path);
156 # - parse the results
157 if ($result =~ /([ds\-][rwx\-]+)\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+(\d\d\d\d-\d\d-\d\d)\s+(\d\d:\d\d)\s+([^\s]+)$/)
158 {
159 $stats->{'filename'} = $8;
160 $stats->{'replicas'} = $2;
161 $stats->{'filesize'} = $5;
162 $stats->{'modification_date'} = $6;
163 $stats->{'modification_time'} = $7;
164 $stats->{'permissions'} = $1;
165 $stats->{'userid'} = $3;
166 $stats->{'groupid'} = $4;
167 }
168 else
169 {
170 &FileUtils::printError('Failed to parse -ls result: ' . $result, 1);
171 }
172 return $stats;
173}
174## fileStats()
175
176
177## @function fileTest()
178#
179sub fileTest
180{
181 my ($filename_full_path, $test_op) = @_;
182 # Sanity tests
183 # Special case: HDFS doesn't support symlinking - swap for -e instead
184 if (!defined $test_op || $test_op eq '-l')
185 {
186 $test_op = '-e';
187 }
188 my $retval = -1; # cmd return fails > 0 are errors
189 # Special case: the easiest way to support -f is to run a -e followed by a -d
190 # (which should fail for files)
191 if ($test_op eq '-f')
192 {
193 my $retval1 = &_executeHDFSCommand(0, 'test -e', $filename_full_path);
194 if ($retval1 == 0)
195 {
196 my $retval2 = &_executeHDFSCommand(0, 'test -d', $filename_full_path);
197 if ($retval2 > 0)
198 {
199 $retval = 0;
200 }
201 }
202 }
203 # very limited test op support for HDFS
204 elsif ($test_op ne '-d' && $test_op ne '-e' && $test_op ne '-z')
205 {
206 &FileUtils::printError('Unknown or unsupported test mode: ' . $test_op);
207 }
208 else
209 {
210 $retval = &_executeHDFSCommand(0, 'test ' . $test_op, $filename_full_path);
211 }
212 return ($retval == 0 ? 1 : 0);
213}
214## fileTest()
215
216
217## @function filenameConcatenate()
218#
219sub filenameConcatenate
220{
221 my $protocol = shift(@_);
222 my $filename = join('/', @_);
223 # remove repeated slashes
224 $filename =~ s/[\/]+/\//g;
225 # append protocol (which may cause multiple slashes)
226 $filename = $protocol . '/' . $filename;
227 # strip any trailing slashes
228 $filename =~ s/[\\\/]$//;
229 return $filename;
230}
231## filenameConcatenate()
232
233
[27423]234## @function isFilenameAbsolute()
235#
236sub isFilenameAbsolute
237{
238 # File paths against HDFS must be.
239 return 1;
240}
241# isFilenameAbsolute()
242
243
[27525]244## @function isHDFS
245#
246sub isHDFS
247{
248 return 1;
249}
250## isHDFS()
251
252
[27386]253## @function makeDirectory()
254#
255sub makeDirectory
256{
257 my ($dir) = @_;
258 my $result = &_executeHDFSCommand(0, 'mkdir', $dir);
259 # HDFSShell mkdir returns 0 on success, -1 on failure
260 return ($result == 0 ? 1 : 0);
261}
262## makeDirectory()
263
264
265## @function modificationTime()
266#
267sub modificationTime
268{
269 my ($path) = @_;
[27479]270 &FileUtils::printWarning("modificationTime() not supported");
[27386]271 my $file_stats = &fileStats($path);
272 my $mod_date = $file_stats->{'modification_date'};
273 $mod_date =~ /(\d\d\d\d)-(\d\d)-(\d\d)/;
274 my $mod_year = $1;
275 my $mod_month = $2;
276 my $mod_day = $3;
277 my $mod_time = $file_stats->{'modification_time'};
278 $mod_time =~ /(\d\d):(\d\d)/;
279 my $mod_hour = $1;
280 my $mod_minute = $2;
[27479]281 return 0;
[27386]282}
283## modificationTime()
284
285
286## @function openFileHandle()
287#
288sub openFileHandle
289{
290 my ($path, $mode, $fh_ref) = @_;
291 if ($mode eq '>>' || $mode eq 'a')
292 {
293 &FileUtils::printError('Append (>>) mode not supported', 1);
294 }
295 elsif ($mode eq '>' || $mode eq 'w')
296 {
297 # the put command fails if the file already exists
298 if (&fileTest($path, '-e'))
299 {
300 &removeFiles($path);
301 }
302 open($$fh_ref, '| ' . &_generateHDFSCommand('put', '-', $path)) or &FileUtils::printError('Failed to open pipe to HDFS (put) for writing: ' . $path, 1);
303 }
304 else
305 {
306 open($$fh_ref, &_generateHDFSCommand('cat', $path) . ' |') or &FileUtils::printError('Failed to open pipe to HDFS (cat) for reading: ' . $path, 1);
307 }
308 return 1;
309}
310## openFileHandle()
311
312
313## @function readDirectory()
314#
315sub readDirectory
316{
317 my ($path) = @_;
318 my @files;
319 my $result = &_executeHDFSCommand(1, 'ls', $path);
320 my @lines = split(/\r?\n/, $result);
321 foreach my $line (@lines)
322 {
323 if ($line =~ /\/([^\/]+)$/)
324 {
325 my $file = $1;
326 push(@files, $file);
327 }
328 }
329 return \@files;
330}
331## readDirectory()
332
333
334## @function removeFiles()
335#
336sub removeFiles
337{
338 my ($path, $including_dir) = @_;
339 my $result;
340 if (defined $including_dir && $including_dir)
341 {
342 $result = &_executeHDFSCommand(0, 'rmr', $path);
343 }
344 else
345 {
346 $result = &_executeHDFSCommand(0, 'rm', $path);
347 }
348 # HDFSShell mkdir returns 0 on success, -1 on failure
349 return ($result == 0 ? 1 : 0);
350}
351## removeFiles()
352
353
354## @function removeFilesFiltered()
355#
356sub removeFilesFiltered
357{
358 my ($paths, $accept_re, $reject_re) = @_;
359 # Perform a depth first, recursive, removal of files and directories that
360 # match the given accept and reject patterns
361 my @paths_array = (ref $paths eq "ARRAY") ? @$paths : ($paths);
362 my $num_removed = 0;
363 foreach my $path (@paths_array)
364 {
365 # remove trailing slashes
366 $path =~ s/[\/\\]+$//;
367 if (!&fileTest($path, '-e'))
368 {
369 &FileUtils::printError('path does not exist: ' . $path);
370 }
371 elsif (&fileTest($path, '-d'))
372 {
373 my @files = @{&readDirectory($path)};
374 foreach my $file (@files)
375 {
376 my $child_path = $path . '/' . $file;
377 $num_removed += &removeFilesFiltered($child_path, $accept_re, $reject_re);
378 }
379 if (!defined $accept_re && !defined $reject_re)
380 {
381 # remove this directory
382 my $result = &removeFiles($path, 1);
383 if ($result != 1)
384 {
385 &FileUtils::printError('could not remove directory: ' . $path);
386 }
387 else
388 {
389 $num_removed++;
390 }
391 }
392 }
393 else
394 {
395 if (defined $reject_re && ($path =~ m/$reject_re/))
396 {
397 next;
398 }
399 if ((!defined $accept_re) || ($path =~ m/$accept_re/))
400 {
401 # remove this file
402 my $result = &removeFiles($path);
403 if ($result != 1)
404 {
405 &FileUtils::printError('could not remove file: ' . $path);
406 }
407 else
408 {
409 $num_removed++;
410 }
411 }
412 }
413 }
414 return $num_removed;
415}
416## removeFilesFiltered()
417
418
419## @function removeFilesRecursive()
420#
421sub removeFilesRecursive
422{
423 my ($path) = @_;
424 # use the more general removeFilesFiltered() function with no accept
425 # or reject expressions
426 return &removeFilesFiltered($path, undef, undef);
427}
428## removeFilesRecursive()
429
430
431## @function supportsSymbolicLink
432#
433sub supportsSymbolicLink
434{
435 return 0;
436}
437## supportsSymbolicLink()
438
439
440## @function transferFile()
441#
442sub transferFile
443{
444 my ($mode, $src, $dst) = @_;
445 my $result;
446 if ($mode eq 'COPY')
447 {
448 $result = &_executeHDFSCommand(0, 'cp', $src, $dst);
449 }
450 else
451 {
452 $result = &_executeHDFSCommand(0, 'mv', $src, $dst);
453 }
454 # HDFSShell mkdir returns 0 on success, -1 on failure
455 return ($result == 0 ? 1 : 0);
456}
457## transferFile()
458
459
460## @function transferFileFromLocal()
461#
462sub transferFileFromLocal
463{
464 my ($mode, $src, $dst) = @_;
465 if (!-f $src)
466 {
467 &FileUtils::printError('Source file (during ' . $mode . ') doesn\'t exists: ' . $src);
468 }
469 if (&fileTest($dst, '-d'))
470 {
471 my ($filename) = $src =~ /([^\\\/]+)$/;
472 $dst .= '/' . $filename;
473 }
474 if (&fileTest($dst, '-f'))
475 {
476 &FileUtils::printError('Destination file (during ' . $mode . ') already exists: ' . $dst);
477 }
478 my $result = &_executeHDFSCommand(0, 'put', $src, $dst);
479 my $remove_result = 1;
480 if ($mode eq 'MOVE')
481 {
482 unlink($src);
483 # failed to delete somehow
484 if (-f $src)
485 {
486 $remove_result = 0;
487 }
488 }
489 return ($result == 0 && $remove_result ? 1 : 0);
490}
491## transferFileFromLocal()
492
493
494## @function transferFileToLocal()
495#
496sub transferFileToLocal
497{
498 my ($mode, $src, $dst) = @_;
499 if (!&fileTest($src, '-f'))
500 {
501 &FileUtils::printError('Source file (during ' . $mode . ') does not exist: ' . $src);
502 }
503 if (-d $dst)
504 {
505 my ($filename) = $src =~ /([^\\\/]+)$/;
506 $dst .= '/' . $filename;
507 }
508 if (-e $dst)
509 {
510 &FileUtils::printError('Destination file (during ' . $mode . ') already exists: ' . $dst);
511 }
512 my $result = &_executeHDFSCommand(0, 'get', $src, $dst);
513 my $remove_result = 1;
514 if ($mode eq 'MOVE')
515 {
516 $remove_result = &removeFiles($src);
517 }
518 return ($result == 0 && $remove_result ? 1 : 0);
519}
520## transferFileToLocal()
521
522
5231;
Note: See TracBrowser for help on using the repository browser.