source: gs2-extensions/parallel-building/trunk/src/bin/script/gdbm-diff.pl@ 30354

Last change on this file since 30354 was 24678, checked in by jmt12, 13 years ago

A tool for comparing the contents of two GDBM databases

  • Property svn:executable set to *
File size: 4.1 KB
Line 
1#!/usr/bin/perl
2
3print "\n";
4print "==================== GDBM Diff ====================\n";
5print "Diff two GDBM files to determine if their key value\n";
6print "pairs differ in any way. Ignores ordering of pairs.\n";
7print "---------------------------------------------------\n";
8print "\n";
9
10# 0. Initialize and check arguments
11# - we'll store all the key value pairs here
12my $data = {};
13my $differences = {};
14# - read in GDBM file paths from the arguments
15if (!defined $ARGV[0] || !-f $ARGV[0])
16{
17 &printUsage('First GDBM file not specified or isn\'t a file');
18}
19my $gdbm_one_path = $ARGV[0];
20if (!defined $ARGV[1] || !-f $ARGV[1])
21{
22 &printUsage('Second GDBM file not specified or isn\'t a file');
23}
24my $gdbm_two_path = $ARGV[1];
25print "GDBM 1: " . $gdbm_one_path . "\n";
26print "GDBM 2: " . $gdbm_two_path . "\n";
27# - also check that GSDLHOME is set and that db2txt is available
28if (!defined $ENV{GSDLHOME})
29{
30 &printUsage('GSDLHOME not set. Please source Greenstone\'s setup.bash first.');
31}
32print "Found Greenstone enviroment\n";
33my $test_result = `db2txt 2>&1`;
34if ($test_result !~ /usage\:\s+db2txt\s+database\-name/i)
35{
36 &printUsage('The program db2txt could not be found. Ensure Greenstone environment is set up properly and that bin/<os>/db2txt exists and it executable.');
37}
38print "Found application db2txt\n";
39print "\n";
40
41# 1. Transform the first GDBM file into TXT and then parse in key-value pairs
42# into a hashmap
43print " * Read in first GDBM file: " . $gdbm_one_path . "\n";
44my $cmd = 'db2txt ' . $gdbm_one_path . ' 2>&1';
45my $txt = `$cmd`;
46# - parse out each key-value pair and store
47while ($txt =~ s/\[([^\]]+)\]\n(.*?)\n\-+\n//s)
48{
49 my $a_key = $1;
50 my $a_value = $2;
51 print " - storing key '" . $a_key . "'\n";
52 print " - value '" . $a_value . "'\n";
53 $data->{$a_key} = $a_value;
54}
55print " - read " . scalar(keys(%{$data})) . " pairs\n";
56if ($txt =~ /\w/)
57{
58 print " - left over txt: |" . $txt . "|\n";
59}
60print "\n";
61
62# 2. Now we parse the second GDBM file in a similar fashion, except now we
63# compare any keys found to ones in the existing data structure. In the
64# case that the key doesn't exist, then we've found a record in B that
65# is not in A. In the case it does exist we compare the files to see
66# if they are the same.
67print " * Read in second GDBM file: " . $gdbm_two_path . "\n";
68my $difference_count = 0;
69$cmd = 'db2txt ' . $gdbm_two_path . ' 2>&1';
70$txt = `$cmd`;
71my $b_pair_count = 0;
72while ($txt =~ s/\[([^\]]+)\]\n(.*?)\n\-+\n//s)
73{
74 my $b_key = $1;
75 my $b_value = $2;
76 print STDERR " - testing key: '" . $b_key . "'\n";
77 print STDERR " - value '" . b_value . "'\n";
78 if (!defined $data->{$b_key})
79 {
80 print STDERR " - couldn't find in A\n";
81 $differences->{$b_key} = 2;
82 }
83 elsif ($data->{$b_key} ne $b_value)
84 {
85 print STDERR " - different value for A\n";
86 $differences->{$b_key} = 3;
87 }
88 # - no difference. Remove from data structure as we've dealt with this entry
89 else
90 {
91 print STDERR " - the same!\n";
92 delete($data->{$b_key});
93 }
94 $b_pair_count++;
95}
96print " - read " . $b_pair_count . " pairs\n";
97if ($txt =~ /\w/)
98{
99 print " - left over txt: |" . $txt . "|\n";
100}
101# - now we tackle the final case, that of records left over (hence were found
102# in A but not in B).
103foreach my $a_key (keys %{$data})
104{
105 $differences->{$a_key} = 1;
106}
107print "\n";
108
109print "Result: ";
110my $difference_count = scalar keys %{$differences};
111if ($difference_count)
112{
113 print "Found " . $difference_count . " differences.\n\n";
114 foreach my $d_key (sort keys %{$differences})
115 {
116 my $d_value = $differences->{$d_key};
117 if ($d_value == 1)
118 {
119 print " ! only in gdbm one: " . $d_key . "\n";
120 }
121 elsif ($d_value == 2)
122 {
123 print " ! only in gdbm two: " . $d_key . "\n";
124 }
125 else
126 {
127 print " ! values for key differ: " . $d_key . "\n";
128 }
129 }
130}
131else
132{
133 print "Files match!\n";
134}
135print "\n";
136
137print "==================== Complete! ====================\n\n";
138
139exit;
140
141sub printUsage
142{
143 my ($msg) = @_;
144 if (defined $msg)
145 {
146 print "Fatal Error! " . $msg . "\n";
147 }
148 print "Usage: gdbm-diff.pl <gdbm db path> <gdbm db path>\n\n";
149 exit;
150}
151# /** printUsage() **/
Note: See TracBrowser for help on using the repository browser.