package gdbdiff;

BEGIN {
    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
}

use util;
use diffutil;
use Text::Diff;

use Win32; # for working out Windows Long Filenames from Win 8.3 short filenames

sub readin_gdb
{
    my ($cmd) = @_;

    open(PIN,"$cmd|") 
	|| die "Unable to open pipe to $cmd: $!\n";

    my $text_content = "";

    while (defined (my $line = <PIN>)) {
	$text_content .= $line;
    }

    close(PIN);
    return $text_content;
}

# for debugging. Prints txt contents of db to file
sub print_string_to_file 
{
	my ($text, $outfile) = @_;
	
	open(FOUT, ">$outfile") or die "ERROR failed to write to $outfile: $!\n";
    print FOUT $text;
    close(FOUT);
}

sub test_gdb
{
    my ($full_modeldb, $full_testdb,$strColName) = @_;

   # print "Now is testing database\n";
	my ($dbname, $dirname, $suffix)= &File::Basename::fileparse($full_testdb, "\\.[^\\.]+\$");
   
    # need to sort text output of both test and model col database files, to normalise them for the comparison
    # the -sort option to db2txt was added specifically to support diffcol
    my $model_cmd = "db2txt -sort $full_modeldb 2>&1";
    my $test_cmd  = "db2txt -sort $full_testdb 2>&1";

    my $model_text = readin_gdb($model_cmd);
    my $test_text = readin_gdb($test_cmd);

#	my $savepath = "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\"
#	print_string_to_file($test_text, $savepath.$dbname."_test.out");	
#	print_string_to_file($model_text, $savepath.$dbname."_model.out");

    # filter out the fields that can be ignored in the two database files
    my $ignore_line_re = "\n<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>([^\n])*";
    $model_text =~ s/$ignore_line_re//g;
    $test_text =~ s/$ignore_line_re//g;
  

	# if the OS doesn't match and one of them is windows, extra work needs to be done to bring the db files
	# in test and model collection to an even base for comparison
	
	my $testIsWin = &isDBWindowsSensitive($dbname, $test_text);
	my $modelIsWin = &isDBWindowsSensitive($dbname, $model_text);
	
	if($testIsWin == $modelIsWin) { # both linux or both windows, do the basic test we did on linux machines:
		    # ignore absolute path prefixes in modelcol and testcol (necessary for archiveinf-doc and -src.gdb files)

			# Remember the original model col on SVN could have been built anywhere, 
			# and in the gdb files, absolute paths are stored to the collection location. 
			# Crop these paths to the collect/<colname> point.
			
			# Entries are of the form [Entry] or <Entry>. In order to do a sensible diff,
			# need to remove the prefix to the collect/colname folder in any (absolute) path that occurs in Entry
			# E.g. [/full/path/collect/colname/import/file.ext] should become [collect/colname/import/file.ext]
			# Better regex is of the form /BEGIN((?:(?!BEGIN).)*)END/, see http://docstore.mik.ua/orelly/perl/cookbook/ch06_16.htm

			$model_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;			
			$test_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;						
	}
	
	else { # one of the collections was built on windows
		# handling slashes and other differences between a model coll built on one OS (e.g. linux) 
		# and a test collection built and diffed on another OS (windows)
	
		my ($win_text, $lin_text); # references
		if($testIsWin) {
			$win_text = \$test_text;
			$lin_text = \$model_text;
		} else {
			$win_text = \$model_text;
			$lin_text = \$test_text;
		}
		
		if($dbname =~ m/archiveinf-doc/) {
			my $tmp = ""; # rebuild windows file's set of lines after processing them one by one
	
			# convert short filenames to long perl: 
			# http://www.mombu.com/programming/perl/t-convert-dos-83-filenames-to-win32-long-filenames-using-perl-525448.html
			for my $line (split /^/, $$win_text) { # split the string into newlines
				
				if($line =~ m@^<assoc-file>(.*)(\s+)@s) {
					$line = $1; # may be a short file name
					# perhaps test here if it is a shortfilename? should match /CAPS....~number(.ext)/
				
					$line = "<assoc-file>".&Win32::GetLongPathName($line)."$2"; # make it a long file name and prefix assoc-file to it again					
				}
				$tmp .= $line;
			}
			$$win_text = $tmp;			
		}
		
		# slashes in windows text need to be turned into linux style slashes
		$$win_text =~ s@\\@/@g;
		
		# cut down absolute paths to files to just collect/colname/.../file, same as before
		$$lin_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
		$$win_text =~ s@^([^\\//]*).*(\\|/)(collect(\\|/)$strColName)(.*)$@$1$3$5@mg;
		
		# for the windows text, need to further get rid of the driveletter after [ or <meta>
		$$win_text =~ s@^(\[|<[^>]*>)[a-zA-Z]:collect@$1collect@mg;	
		# now can go back to using $model_text and $test_text
#	print_string_to_file($$win_text, $savepath.$dbname."_test.out");	
#	print_string_to_file($$lin_text, $$savepath.$dbname."_model.out");
		
	} # end of equalising differences between a windows collection's db file and linux coll's db file

	
	# now can go back to using $model_text and $test_text
	#print_string_to_file($test_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_test.out");	
	#print_string_to_file($model_text, "C:\\Research\\Nightly\\tools\\envi\\etc\\tasks\\diffcol\\".$dbname."_model.out");
		
	
    my $report_type = "OldStyle"; # Can not change this type.
    my $diff_gdb = diff \$model_text, \$test_text, { STYLE => $report_type };
    
    # leaving the ignore regex as it used to be in the following, in case it helps with single line comparisons
    $diff_gdb = &diffutil::GenerateOutput($diff_gdb,"^<(lastmodified|lastmodifieddate|oailastmodified|oailastmodifieddate)>.*");

    if($diff_gdb eq "")
    {
	return "";
    }
    else
    {
	return "Difference Report: Differences found in the Database file: \n$diff_gdb";
    }
    # Call diff?
}

# returns true if the contents are windows AND it matters for the diffing on the db that it's windows
# For col.gdb it does not seem to matter so far, if it is generated on a windows machine and to be compared to a linux-generated col.gdb
sub isDBWindowsSensitive 
{
	my ($dbtailname, $db_contents) = @_; # db filename without suffix
	
	if($dbtailname !~ m/archiveinf/) { # only archiveinf-doc and archive-inf source need special Windows processing, not col.gdb
		return 0;
	}
	return ($db_contents =~ m/\\/) ? 1 : 0; # windows slashes detected. Better test would be: [Something\something] OR <tag>something\something
	# for doc.xml:
	#     <Metadata name="gsdlsourcefilename">import/html_files/cleves.html</Metadata>
}

1;