source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/gslis-cluster/SCRIPTS/FILE-SIZE-CHECK-SUBFOLDERS.pl@ 31171

Last change on this file since 31171 was 31171, checked in by davidb, 7 years ago

Util to help find where missing files are

  • Property svn:executable set to *
File size: 1.8 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6my $ARGC=scalar(@ARGV);
7
8if ($ARGC != 2) {
9 print STDERR "Usage: $0 filename-file.txt filesize-file.txt\n";
10 exit 1;
11}
12
13my $filename_filename = $ARGV[0];
14my $filesize_filename = $ARGV[1];
15
16open my $fnin, $filename_filename or die "Could not open $filename_filename: $!";
17open my $fsin, $filesize_filename or die "Could not open $filesize_filename: $!";
18
19my $subdir_total = {};
20
21my $line_counter = 1;
22
23my $flag_error = 0;
24my $prev_subdir = "";
25
26while (1) {
27 my $fn_line = <$fnin>;
28 my $fs_line = <$fsin>;
29
30 if (defined $fn_line && ($fn_line ne "")) {
31 if (defined $fs_line && ($fs_line ne "")) {
32 # process the pair
33
34 my ($top_level,$subdir,$rest) = ($fn_line =~ m/(.*?)\.(..)(.*)/);
35
36 chomp($fs_line);
37
38 # print "$top_level: $subdir $rest -> $fs_line\n";
39
40 if ($subdir ne $prev_subdir) {
41 print "Processing $subdir\n";
42 }
43
44 $subdir_total->{$subdir} += $fs_line;
45
46 $prev_subdir = $subdir;
47
48 }
49 else {
50 chomp $fn_line;
51 print STDERR "Error on line $line_counter: there was no matching file-size for file-name $fn_line\n";
52 $flag_error = 1;
53 last;
54 }
55 }
56 else {
57 if (defined $fs_line) {
58 chomp $fs_line;
59 print STDERR "Error on line $line_counter: there was no matching file-name for file-size $fs_line\n";
60 print STDERR "Sub-directory level being processed: $prev_subdir\n";
61 $flag_error = 1;
62 last;
63 }
64 else {
65 # Both not defined => correctc criteria for stopping
66 last;
67 }
68 }
69
70 $line_counter++;
71}
72
73for my $key (sort keys %$subdir_total) {
74 my $val = $subdir_total->{$key};
75 print "$val $key\n";
76}
77
78if ($flag_error) {
79 print STDERR "***** Warning: due to error in processing, subdirectory filesize calculation incomplete\n";
80}
81
82close $fnin;
83close $fsin;
Note: See TracBrowser for help on using the repository browser.