1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | print "\n";
|
---|
4 | print "==================== GDBM Diff ====================\n";
|
---|
5 | print "Diff two GDBM files to determine if their key value\n";
|
---|
6 | print "pairs differ in any way. Ignores ordering of pairs.\n";
|
---|
7 | print "---------------------------------------------------\n";
|
---|
8 | print "\n";
|
---|
9 |
|
---|
10 | # 0. Initialize and check arguments
|
---|
11 | # - we'll store all the key value pairs here
|
---|
12 | my $data = {};
|
---|
13 | my $differences = {};
|
---|
14 | # - read in GDBM file paths from the arguments
|
---|
15 | if (!defined $ARGV[0] || !-f $ARGV[0])
|
---|
16 | {
|
---|
17 | &printUsage('First GDBM file not specified or isn\'t a file');
|
---|
18 | }
|
---|
19 | my $gdbm_one_path = $ARGV[0];
|
---|
20 | if (!defined $ARGV[1] || !-f $ARGV[1])
|
---|
21 | {
|
---|
22 | &printUsage('Second GDBM file not specified or isn\'t a file');
|
---|
23 | }
|
---|
24 | my $gdbm_two_path = $ARGV[1];
|
---|
25 | print "GDBM 1: " . $gdbm_one_path . "\n";
|
---|
26 | print "GDBM 2: " . $gdbm_two_path . "\n";
|
---|
27 | # - also check that GSDLHOME is set and that db2txt is available
|
---|
28 | if (!defined $ENV{GSDLHOME})
|
---|
29 | {
|
---|
30 | &printUsage('GSDLHOME not set. Please source Greenstone\'s setup.bash first.');
|
---|
31 | }
|
---|
32 | print "Found Greenstone enviroment\n";
|
---|
33 | my $test_result = `db2txt 2>&1`;
|
---|
34 | if ($test_result !~ /usage\:\s+db2txt\s+database\-name/i)
|
---|
35 | {
|
---|
36 | &printUsage('The program db2txt could not be found. Ensure Greenstone environment is set up properly and that bin/<os>/db2txt exists and it executable.');
|
---|
37 | }
|
---|
38 | print "Found application db2txt\n";
|
---|
39 | print "\n";
|
---|
40 |
|
---|
41 | # 1. Transform the first GDBM file into TXT and then parse in key-value pairs
|
---|
42 | # into a hashmap
|
---|
43 | print " * Read in first GDBM file: " . $gdbm_one_path . "\n";
|
---|
44 | my $cmd = 'db2txt ' . $gdbm_one_path . ' 2>&1';
|
---|
45 | my $txt = `$cmd`;
|
---|
46 | # - parse out each key-value pair and store
|
---|
47 | while ($txt =~ s/\[([^\]]+)\]\n(.*?)\n\-+\n//s)
|
---|
48 | {
|
---|
49 | my $a_key = $1;
|
---|
50 | my $a_value = $2;
|
---|
51 | print " - storing key '" . $a_key . "'\n";
|
---|
52 | print " - value '" . $a_value . "'\n";
|
---|
53 | $data->{$a_key} = $a_value;
|
---|
54 | }
|
---|
55 | print " - read " . scalar(keys(%{$data})) . " pairs\n";
|
---|
56 | if ($txt =~ /\w/)
|
---|
57 | {
|
---|
58 | print " - left over txt: |" . $txt . "|\n";
|
---|
59 | }
|
---|
60 | print "\n";
|
---|
61 |
|
---|
62 | # 2. Now we parse the second GDBM file in a similar fashion, except now we
|
---|
63 | # compare any keys found to ones in the existing data structure. In the
|
---|
64 | # case that the key doesn't exist, then we've found a record in B that
|
---|
65 | # is not in A. In the case it does exist we compare the files to see
|
---|
66 | # if they are the same.
|
---|
67 | print " * Read in second GDBM file: " . $gdbm_two_path . "\n";
|
---|
68 | my $difference_count = 0;
|
---|
69 | $cmd = 'db2txt ' . $gdbm_two_path . ' 2>&1';
|
---|
70 | $txt = `$cmd`;
|
---|
71 | my $b_pair_count = 0;
|
---|
72 | while ($txt =~ s/\[([^\]]+)\]\n(.*?)\n\-+\n//s)
|
---|
73 | {
|
---|
74 | my $b_key = $1;
|
---|
75 | my $b_value = $2;
|
---|
76 | print STDERR " - testing key: '" . $b_key . "'\n";
|
---|
77 | print STDERR " - value '" . b_value . "'\n";
|
---|
78 | if (!defined $data->{$b_key})
|
---|
79 | {
|
---|
80 | print STDERR " - couldn't find in A\n";
|
---|
81 | $differences->{$b_key} = 2;
|
---|
82 | }
|
---|
83 | elsif ($data->{$b_key} ne $b_value)
|
---|
84 | {
|
---|
85 | print STDERR " - different value for A\n";
|
---|
86 | $differences->{$b_key} = 3;
|
---|
87 | }
|
---|
88 | # - no difference. Remove from data structure as we've dealt with this entry
|
---|
89 | else
|
---|
90 | {
|
---|
91 | print STDERR " - the same!\n";
|
---|
92 | delete($data->{$b_key});
|
---|
93 | }
|
---|
94 | $b_pair_count++;
|
---|
95 | }
|
---|
96 | print " - read " . $b_pair_count . " pairs\n";
|
---|
97 | if ($txt =~ /\w/)
|
---|
98 | {
|
---|
99 | print " - left over txt: |" . $txt . "|\n";
|
---|
100 | }
|
---|
101 | # - now we tackle the final case, that of records left over (hence were found
|
---|
102 | # in A but not in B).
|
---|
103 | foreach my $a_key (keys %{$data})
|
---|
104 | {
|
---|
105 | $differences->{$a_key} = 1;
|
---|
106 | }
|
---|
107 | print "\n";
|
---|
108 |
|
---|
109 | print "Result: ";
|
---|
110 | my $difference_count = scalar keys %{$differences};
|
---|
111 | if ($difference_count)
|
---|
112 | {
|
---|
113 | print "Found " . $difference_count . " differences.\n\n";
|
---|
114 | foreach my $d_key (sort keys %{$differences})
|
---|
115 | {
|
---|
116 | my $d_value = $differences->{$d_key};
|
---|
117 | if ($d_value == 1)
|
---|
118 | {
|
---|
119 | print " ! only in gdbm one: " . $d_key . "\n";
|
---|
120 | }
|
---|
121 | elsif ($d_value == 2)
|
---|
122 | {
|
---|
123 | print " ! only in gdbm two: " . $d_key . "\n";
|
---|
124 | }
|
---|
125 | else
|
---|
126 | {
|
---|
127 | print " ! values for key differ: " . $d_key . "\n";
|
---|
128 | }
|
---|
129 | }
|
---|
130 | }
|
---|
131 | else
|
---|
132 | {
|
---|
133 | print "Files match!\n";
|
---|
134 | }
|
---|
135 | print "\n";
|
---|
136 |
|
---|
137 | print "==================== Complete! ====================\n\n";
|
---|
138 |
|
---|
139 | exit;
|
---|
140 |
|
---|
141 | sub printUsage
|
---|
142 | {
|
---|
143 | my ($msg) = @_;
|
---|
144 | if (defined $msg)
|
---|
145 | {
|
---|
146 | print "Fatal Error! " . $msg . "\n";
|
---|
147 | }
|
---|
148 | print "Usage: gdbm-diff.pl <gdbm db path> <gdbm db path>\n\n";
|
---|
149 | exit;
|
---|
150 | }
|
---|
151 | # /** printUsage() **/
|
---|