1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use warnings;
|
---|
5 |
|
---|
6 | use List::Util 'shuffle';
|
---|
7 |
|
---|
8 | if (!defined $ARGV[0] || !-d $ARGV[0])
|
---|
9 | {
|
---|
10 | print "usage: manifestinator.pl <import directory> [<max number of documents>]\n";
|
---|
11 | exit(0);
|
---|
12 | }
|
---|
13 |
|
---|
14 | my $max_docs = 0;
|
---|
15 | if (defined $ARGV[1] && $ARGV[1] =~ /^\d+$/)
|
---|
16 | {
|
---|
17 | $max_docs = $ARGV[1];
|
---|
18 | }
|
---|
19 |
|
---|
20 | my $manifest_filename = 'manifest';
|
---|
21 | if ($max_docs > 0)
|
---|
22 | {
|
---|
23 | $manifest_filename .= '-' . $max_docs;
|
---|
24 | }
|
---|
25 | open(XMLOUT, '>:utf8', $manifest_filename . '.xml');
|
---|
26 | print XMLOUT "<Manifest>\n";
|
---|
27 | print XMLOUT " <Index>\n";
|
---|
28 |
|
---|
29 | &manifestify($ARGV[0], '', $max_docs, 0);
|
---|
30 |
|
---|
31 | print XMLOUT " </Index>\n";
|
---|
32 | print XMLOUT "</Manifest>\n";
|
---|
33 |
|
---|
34 | close(XMLOUT);
|
---|
35 | exit;
|
---|
36 |
|
---|
37 | sub manifestify
|
---|
38 | {
|
---|
39 | my ($dir, $prefix, $max_docs, $current_count) = @_;
|
---|
40 | if ($max_docs > 0 && $current_count >= $max_docs)
|
---|
41 | {
|
---|
42 | return $current_count;
|
---|
43 | }
|
---|
44 | if (!opendir(DH, $dir))
|
---|
45 | {
|
---|
46 | die ("Failed to open import directory for reading!\n");
|
---|
47 | }
|
---|
48 | my @files = readdir(DH);
|
---|
49 | closedir(DH);
|
---|
50 | foreach my $file (shuffle @files)
|
---|
51 | {
|
---|
52 | if ($file =~ /^\./)
|
---|
53 | {
|
---|
54 | next;
|
---|
55 | }
|
---|
56 | my $path = $dir . '/' . $file;
|
---|
57 | if (-d $path)
|
---|
58 | {
|
---|
59 | my $new_prefix = $prefix . $file . '/';
|
---|
60 | $current_count = &manifestify($path, $new_prefix, $max_docs, $current_count);
|
---|
61 | }
|
---|
62 | else
|
---|
63 | {
|
---|
64 | print XMLOUT " <Filename>" . $prefix . $file . "</Filename>\n";
|
---|
65 | $current_count++;
|
---|
66 | }
|
---|
67 | if ($max_docs > 0 && $current_count >= $max_docs)
|
---|
68 | {
|
---|
69 | return $current_count;
|
---|
70 | }
|
---|
71 | }
|
---|
72 | return $current_count;
|
---|
73 | }
|
---|