root/other-projects/hathitrust/wcsa/extracted-features-solr/trunk/gslis-cluster/SCRIPTS/FILE-SIZE-CHECK-SUBFOLDERS.pl @ 31307

Revision 31171, 1.8 KB (checked in by davidb, 3 years ago)

Util to help find where missing files are

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2
3use strict;
4use warnings;
5
6my $ARGC=scalar(@ARGV);
7
8if ($ARGC != 2) {
9    print STDERR "Usage: $0 filename-file.txt filesize-file.txt\n";
10    exit 1;
11}
12
13my $filename_filename = $ARGV[0];
14my $filesize_filename = $ARGV[1];
15
16open my $fnin, $filename_filename or die "Could not open $filename_filename: $!";
17open my $fsin, $filesize_filename or die "Could not open $filesize_filename: $!";
18
19my $subdir_total = {};
20
21my $line_counter = 1;
22
23my $flag_error = 0;
24my $prev_subdir = "";
25
26while (1) {
27    my $fn_line = <$fnin>;
28    my $fs_line = <$fsin>;
29
30    if (defined $fn_line && ($fn_line ne "")) {
31    if (defined $fs_line && ($fs_line ne "")) {
32        # process the pair
33
34        my ($top_level,$subdir,$rest) = ($fn_line =~ m/(.*?)\.(..)(.*)/);
35
36        chomp($fs_line);
37
38        # print "$top_level: $subdir $rest -> $fs_line\n";   
39
40        if ($subdir ne $prev_subdir) {
41        print "Processing $subdir\n";
42        }
43
44        $subdir_total->{$subdir} += $fs_line;
45
46        $prev_subdir = $subdir;
47
48    }
49    else {
50        chomp $fn_line;
51        print STDERR "Error on line $line_counter: there was no matching file-size for file-name $fn_line\n";
52        $flag_error = 1;
53        last;
54    }
55    }
56    else {
57    if (defined $fs_line) {
58        chomp $fs_line;
59        print STDERR "Error on line $line_counter: there was no matching file-name for file-size $fs_line\n";
60        print STDERR "Sub-directory level being processed: $prev_subdir\n";
61        $flag_error = 1;
62        last;
63    }
64    else {
65        # Both not defined => correctc criteria for stopping
66        last;
67    }
68    }
69
70    $line_counter++;
71}
72
73for my $key (sort keys %$subdir_total) {
74    my $val = $subdir_total->{$key};
75    print "$val $key\n";
76}
77
78if ($flag_error) {
79    print STDERR "***** Warning: due to error in processing, subdirectory filesize calculation incomplete\n";
80}
81
82close $fnin;
83close $fsin;
Note: See TracBrowser for help on using the browser.