Changeset 21411


Ignore:
Timestamp:
2010-01-04T23:10:50+13:00 (14 years ago)
Author:
davidb
Message:

Separation of different database back-ends into individual files

Location:
main/trunk/greenstone2/perllib
Files:
6 added
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/dbutil.pm

    r19773 r21411  
    11###########################################################################
    22#
    3 # dbutil.pm -- utility functions for writing to different databases
     3# dbutil.pm -- gateway to utilities for reading/writing to different databases
    44# Copyright (C) 2008  DL Consulting Ltd
    55#
     
    2626package dbutil;
    2727
    28 use Encode;
    2928use strict;
    3029
     30
     31use dbutil::gdbmtxtgz;
     32use dbutil::gdbm;
     33use dbutil::jdbm;
     34use dbutil::sqlite;
     35use dbutil::mssql;
    3136
    3237sub open_infodb_write_handle
     
    3742  if ($infodb_type eq "sqlite")
    3843  {
    39     return &open_infodb_write_handle_sqlite($infodb_file_path);
     44    return &dbutil::sqlite::open_infodb_write_handle($infodb_file_path);
    4045  }
    4146  elsif ($infodb_type eq "gdbm-txtgz")
    4247  {
    43     return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path);
     48    return &dbutil::gdbmtxtgz::open_infodb_write_handle($infodb_file_path);
     49  }
     50  elsif ($infodb_type eq "jdbm")
     51  {
     52    return &dbutil::jdbm::open_infodb_write_handle($infodb_file_path);
    4453  }
    4554  elsif ($infodb_type eq "mssql")
    4655  {
    47     return &open_infodb_write_handle_mssql($infodb_file_path);
    48   }
    49 
    50   # Use GDBM if the infodb type is empty or not one of the values above
    51   return &open_infodb_write_handle_gdbm($infodb_file_path);
     56    return &dbutil::mssql::open_infodb_write_handle($infodb_file_path);
     57  }
     58
     59  # Use GDBM if the infodb type is empty or not one of the values above
     60  return &dbutil::gdbm::open_infodb_write_handle($infodb_file_path);
    5261}
    5362
     
    6069  if ($infodb_type eq "sqlite")
    6170  {
    62     return &close_infodb_write_handle_sqlite($infodb_handle);
    63   }
    64   elsif ($infodb_type eq "gdbm-txtgz")
    65   {
    66     return &close_infodb_write_handle_gdbm_txtgz($infodb_handle);
    67   }
    68   elsif ($infodb_type eq "mssql")
    69   {
    70     return &close_infodb_write_handle_mssql($infodb_handle);
     71    return &dbutil::sqlite::close_infodb_write_handle($infodb_handle);
     72  }
     73  elsif ($infodb_type eq "gdbm-txtgz")
     74  {
     75    return &dbutil::gdbmtxtgz::close_infodb_write_handle($infodb_handle);
     76  }
     77  elsif ($infodb_type eq "jdbm")
     78  {
     79    return &dbutil::jdbm::close_infodb_write_handle($infodb_handle);
     80  }
     81  elsif ($infodb_type eq "mssql")
     82  {
     83    return &dbutil::mssql::close_infodb_write_handle($infodb_handle);
    7184  }
    7285   
    7386  # Use GDBM if the infodb type is empty or not one of the values above
    74   return &close_infodb_write_handle_gdbm($infodb_handle);
     87  return &dbutil::gdbm::close_infodb_write_handle($infodb_handle);
    7588}
    7689
     
    92105  if ($infodb_type eq "sqlite")
    93106  {
    94     return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
    95   }
    96   elsif ($infodb_type eq "gdbm-txtgz")
    97   {
    98     return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path);
     107    return &dbutil::sqlite::get_infodb_file_path($collection_name, $infodb_directory_path);
     108  }
     109  elsif ($infodb_type eq "gdbm-txtgz")
     110  {
     111    return &dbutil::gdbmtxtgz::get_infodb_file_path($collection_name, $infodb_directory_path);
     112  }
     113  elsif ($infodb_type eq "jdbm")
     114  {
     115    return &dbutil::jdbm::get_infodb_file_path($collection_name, $infodb_directory_path);
    99116  }
    100117  elsif ($infodb_type eq "mssql")
     
    108125    # Added by Jeffrey (2008/08/15 Friday)
    109126    # No file path required for MS SQL, it is a server-client connection.
    110     # At the moment the information is hard coded in open_infodb_write_handle_mssql
     127    # At the moment the information is hard coded in dbutil::mssql::open_infodb_write_handle
    111128    # the this might need some tidy up sometime.
    112129    #==================================================================================================#
    113     return &get_infodb_file_path_mssql($collection_name, $infodb_directory_path);
     130    return &dbutil::mssql::get_infodb_file_path($collection_name, $infodb_directory_path);
    114131  }
    115132 
    116133  # Use GDBM if the infodb type is empty or not one of the values above
    117   return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
     134  return &dbutil::gdbm::get_infodb_file_path($collection_name, $infodb_directory_path);
    118135}
    119136
     
    127144  if ($infodb_type eq "sqlite")
    128145  {
    129     return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
    130   }
    131   elsif ($infodb_type eq "gdbm-txtgz")
    132   {
    133     return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map);
    134   }
    135   elsif ($infodb_type eq "mssql")
    136   {
    137     return &read_infodb_file_mssql($infodb_file_path, $infodb_map);
     146    return &dbutil::sqlite::read_infodb_file($infodb_file_path, $infodb_map);
     147  }
     148  elsif ($infodb_type eq "gdbm-txtgz")
     149  {
     150    return &dbutil::gdbmtxtgz::read_infodb_file($infodb_file_path, $infodb_map);
     151  }
     152  elsif ($infodb_type eq "jdbm")
     153  {
     154    return &dbutil::jdbm::read_infodb_file($infodb_file_path, $infodb_map);
     155  }
     156  elsif ($infodb_type eq "mssql")
     157  {
     158    return &dbutil::mssql::read_infodb_file($infodb_file_path, $infodb_map);
    138159  }
    139160 
    140161  # Use GDBM if the infodb type is empty or not one of the values above
    141   return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
     162  return &dbutil::gdbm::read_infodb_file($infodb_file_path, $infodb_map);
    142163}
    143164
     
    152173  if ($infodb_type eq "sqlite")
    153174  {
    154     return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
    155   }
    156   elsif ($infodb_type eq "gdbm-txtgz")
    157   {
    158     return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map);
    159   }
    160   elsif ($infodb_type eq "mssql")
    161   {
    162     return &write_infodb_entry_mssql($infodb_handle, $infodb_key, $infodb_map);
    163   }
    164 
    165   # Use GDBM if the infodb type is empty or not one of the values above
    166   return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
     175    return &dbutil::sqlite::write_infodb_entry($infodb_handle, $infodb_key, $infodb_map);
     176  }
     177  elsif ($infodb_type eq "gdbm-txtgz")
     178  {
     179    return &dbutil::gdbmtxtgz::write_infodb_entry($infodb_handle, $infodb_key, $infodb_map);
     180  }
     181  elsif ($infodb_type eq "jdbm")
     182  {
     183    return &dbutil::jdbm::write_infodb_entry($infodb_handle, $infodb_key, $infodb_map);
     184  }
     185  elsif ($infodb_type eq "mssql")
     186  {
     187    return &dbutil::mssql::write_infodb_entry($infodb_handle, $infodb_key, $infodb_map);
     188  }
     189
     190  # Use GDBM if the infodb type is empty or not one of the values above
     191  return &dbutil::gdbm::write_infodb_entry($infodb_handle, $infodb_key, $infodb_map);
    167192}
    168193
     
    176201  if ($infodb_type eq "sqlite")
    177202  {
    178     return &delete_infodb_entry_sqlite($infodb_handle, $infodb_key);
    179   }
    180   elsif ($infodb_type eq "gdbm-txtgz")
    181   {
    182     return &delete_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key);
    183   }
    184   elsif ($infodb_type eq "mssql")
    185   {
    186     return &delete_infodb_entry_mssql($infodb_handle, $infodb_key);
    187   }
    188 
    189   # Use GDBM if the infodb type is empty or not one of the values above
    190   return &delete_infodb_entry_gdbm($infodb_handle, $infodb_key);
    191 }
    192 
    193 
    194 
    195 # -----------------------------------------------------------------------------
    196 #   GDBM TXT-GZ IMPLEMENTATION
    197 # -----------------------------------------------------------------------------
    198 
    199 sub open_infodb_write_handle_gdbm_txtgz
    200 {
    201   # Keep infodb in GDBM neutral form => save data as compressed text file,
    202   # read for txt2db to be run on it later (i.e. by the runtime system,
    203   # first time the collection is ever accessed).  This makes it easier
    204   # distribute pre-built collections to various architectures.
    205   #
    206   # NB: even if two architectures are little endian (e.g. Intel and
    207   # ARM procesors) GDBM does *not* guarantee that the database generated on
    208   # one will work on the other
    209 
    210   my $infodb_file_path = shift(@_);
    211 
    212   # Greenstone ships with gzip for windows, on $PATH
    213 
    214   my $infodb_file_handle = undef;
    215   if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\""))
    216   {
    217     return undef;
    218   }
    219 
    220   return $infodb_file_handle;
    221 }
    222 
    223 
    224 sub close_infodb_write_handle_gdbm_txtgz
    225 {
    226   my $infodb_handle = shift(@_);
    227 
    228   close($infodb_handle);
    229 }
    230 
    231 
    232 sub get_infodb_file_path_gdbm_txtgz
    233 {
    234   my $collection_name = shift(@_);
    235   my $infodb_directory_path = shift(@_);
    236 
    237   my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz";
    238   return &util::filename_cat($infodb_directory_path, $infodb_file_name);
    239 }
    240 
    241 
    242 sub read_infodb_file_gdbm_txtgz
    243 {
    244   my $infodb_file_path = shift(@_);
    245   my $infodb_map = shift(@_);
    246 
    247   my $cmd = "gzip --decompress \"$infodb_file_path\"";
    248 
    249   open (PIPEIN, "$cmd |")
    250   || die "Error: Couldn't open pipe from gzip: $!\n  $cmd\n";
    251 
    252   my $infodb_line = "";
    253   my $infodb_key = "";
    254   my $infodb_value = "";
    255   while (defined ($infodb_line = <PIPEIN>))
    256   {
    257     if ($infodb_line =~ /^\[([^\]]+)\]$/)
    258     {
    259       $infodb_key = $1;
    260     }
    261     elsif ($infodb_line =~ /^-{70}$/)
    262     {
    263       $infodb_map->{$infodb_key} = $infodb_value;
    264       $infodb_key = "";
    265       $infodb_value = "";
    266     }
    267     else
    268     {
    269       $infodb_value .= $infodb_line;
    270     }
    271   }
    272 
    273   close (PIPEIN);
    274 }
    275 
    276    
    277 sub write_infodb_entry_gdbm_txtgz
    278 {
    279 
    280   my $infodb_handle = shift(@_);
    281   my $infodb_key = shift(@_);
    282   my $infodb_map = shift(@_);
    283  
    284   print $infodb_handle "[$infodb_key]\n";
    285   foreach my $infodb_value_key (keys(%$infodb_map))
    286   {
    287     foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
    288     {
    289       if ($infodb_value =~ /-{70,}/)
    290       {
    291         # if value contains 70 or more hyphens in a row we need to escape them
    292         # to prevent txt2db from treating them as a separator
    293         $infodb_value =~ s/-/&\#045;/gi;
    294       }
    295       print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
    296     }
    297   }
    298   print $infodb_handle '-' x 70, "\n";
    299 }
    300 
    301 
    302 sub delete_infodb_entry_gdbm_txtgz
    303 {
    304 
    305   my $infodb_handle = shift(@_);
    306   my $infodb_key = shift(@_);
    307  
    308 
    309   # A minus at the end of a key (after the ]) signifies 'delete'
    310   print $infodb_handle "[$infodb_key]-\n";
    311 
    312   # The 70 minus signs are also needed, to help make the parsing by db2txt simple
    313   print $infodb_handle '-' x 70, "\n";
    314 }
    315 
    316 
    317 
    318 # -----------------------------------------------------------------------------
    319 #   GDBM IMPLEMENTATION
    320 # -----------------------------------------------------------------------------
    321 
    322 sub open_infodb_write_handle_gdbm
    323 {
    324   my $infodb_file_path = shift(@_);
    325   my $opt_append = shift(@_);
    326 
    327   my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe());
    328   my $infodb_file_handle = undef;
    329   my $cmd = "\"$txt2db_exe\"";
    330   if ((defined $opt_append) && ($opt_append eq "append")) {
    331       $cmd .= " -append";
    332   }
    333   $cmd .= " \"$infodb_file_path\"";
    334 
    335   if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| $cmd"))
    336   {
    337     return undef;
    338   }
    339 
    340   return $infodb_file_handle;
    341 }
    342 
    343 
    344 
    345 sub close_infodb_write_handle_gdbm
    346 {
    347   my $infodb_handle = shift(@_);
    348 
    349   close($infodb_handle);
    350 }
    351 
    352 
    353 sub get_infodb_file_path_gdbm
    354 {
    355   my $collection_name = shift(@_);
    356   my $infodb_directory_path = shift(@_);
    357 
    358   my $infodb_file_extension = ".gdb";
    359   my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
    360   return &util::filename_cat($infodb_directory_path, $infodb_file_name);
    361 }
    362 
    363 
    364 sub read_infodb_file_gdbm
    365 {
    366   my $infodb_file_path = shift(@_);
    367   my $infodb_map = shift(@_);
    368 
    369   open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
    370   my $infodb_line = "";
    371   my $infodb_key = "";
    372   my $infodb_value = "";
    373   while (defined ($infodb_line = <PIPEIN>))
    374   {
    375     if ($infodb_line =~ /^\[([^\]]+)\]$/)
    376     {
    377       $infodb_key = $1;
    378     }
    379     elsif ($infodb_line =~ /^-{70}$/)
    380     {
    381       $infodb_map->{$infodb_key} = $infodb_value;
    382       $infodb_key = "";
    383       $infodb_value = "";
    384     }
    385     else
    386     {
    387       $infodb_value .= $infodb_line;
    388     }
    389   }
    390 
    391   close (PIPEIN);
    392 }
    393 
    394 sub read_infodb_keys_gdbm
    395 {
    396   my $infodb_file_path = shift(@_);
    397   my $infodb_map = shift(@_);
    398 
    399   open (PIPEIN, "gdbmkeys \"$infodb_file_path\" |") || die "couldn't open pipe from gdbmkeys \$infodb_file_path\"\n";
    400   my $infodb_line = "";
    401   my $infodb_key = "";
    402   my $infodb_value = "";
    403   while (defined ($infodb_line = <PIPEIN>))
    404   {
    405       chomp $infodb_line; # remove end of line
    406 
    407       $infodb_map->{$infodb_line} = 1;
    408   }
    409 
    410   close (PIPEIN);
    411 }
    412 
    413    
    414 sub write_infodb_entry_gdbm
    415 {
    416   # With infodb_handle already set up, works the same as _gdbm_txtgz version
    417   write_infodb_entry_gdbm_txtgz(@_);
    418 }
    419 
    420 sub delete_infodb_entry_gdbm
    421 {
    422   # With infodb_handle already set up, works the same as _gdbm_txtgz version
    423   delete_infodb_entry_gdbm_txtgz(@_);
    424 }
    425 
    426 
    427 
    428 # -----------------------------------------------------------------------------
    429 #   SQLITE IMPLEMENTATION
    430 # -----------------------------------------------------------------------------
    431 
    432 sub open_infodb_write_handle_sqlite
    433 {
    434   my $infodb_file_path = shift(@_);
    435 
    436   my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
    437   my $infodb_handle = undef;
    438   if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
    439   {
    440     return undef;
    441   }
    442 
    443   print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
    444   print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
    445 
    446   # This is crucial for efficiency when importing large amounts of data
    447   print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
    448 
    449   # This is very important for efficiency, otherwise each command will be actioned one at a time
    450   print $infodb_handle "BEGIN TRANSACTION;\n";
    451 
    452   return $infodb_handle;
    453 }
    454 
    455 
    456 sub close_infodb_write_handle_sqlite
    457 {
    458   my $infodb_handle = shift(@_);
    459 
    460   # Close the transaction we began after opening the file
    461   print $infodb_handle "END TRANSACTION;\n";
    462 
    463   # This is crucial for efficient queries on the database!
    464   print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
    465 
    466   close($infodb_handle);
    467 }
    468 
    469 
    470 sub get_infodb_file_path_sqlite
    471 {
    472   my $collection_name = shift(@_);
    473   my $infodb_directory_path = shift(@_);
    474 
    475   my $infodb_file_extension = ".db";
    476   my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
    477   return &util::filename_cat($infodb_directory_path, $infodb_file_name);
    478 }
    479 
    480 
    481 sub read_infodb_file_sqlite
    482 {
    483   my $infodb_file_path = shift(@_);
    484   my $infodb_map = shift(@_);
    485 
    486   # !! TO IMPLEMENT
    487 }
    488 
    489 
    490 sub write_infodb_entry_sqlite
    491 {
    492   my $infodb_handle = shift(@_);
    493   my $infodb_key = shift(@_);
    494   my $infodb_map = shift(@_);
    495 
    496   # Add the key -> value mapping into the "data" table
    497   my $infodb_entry_value = "";
    498   foreach my $infodb_value_key (keys(%$infodb_map))
    499   {
    500     foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
    501     {
    502       $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
    503     }
    504   }
    505 
    506   my $safe_infodb_key = &sqlite_safe($infodb_key);
    507   print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
    508 
    509   # If this infodb entry is for a document, add all the interesting document metadata to the
    510   #   "document_metadata" table (for use by the dynamic classifiers)
    511   if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
    512   {
    513     print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
    514 
    515     foreach my $infodb_value_key (keys(%$infodb_map))
    516     {
    517       # We're not interested in most of the automatically added document metadata
    518       next if ($infodb_value_key eq "archivedir" ||
    519                $infodb_value_key eq "assocfilepath" ||
    520                $infodb_value_key eq "childtype" ||
    521                $infodb_value_key eq "contains" ||
    522                $infodb_value_key eq "docnum" ||
    523                $infodb_value_key eq "doctype" ||
    524                $infodb_value_key eq "Encoding" ||
    525                $infodb_value_key eq "FileSize" ||
    526                $infodb_value_key eq "hascover" ||
    527                $infodb_value_key eq "hastxt" ||
    528                $infodb_value_key eq "lastmodified" ||
    529                $infodb_value_key eq "metadataset" ||
    530                $infodb_value_key eq "thistype" ||
    531                $infodb_value_key =~ /^metadatafreq\-/ ||
    532                $infodb_value_key =~ /^metadatalist\-/);
    533 
    534       foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
    535       {
    536         print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
    537       }
    538     }
    539   }
    540 }
    541 
    542 
    543 
    544 sub delete_infodb_entry_sqlite
    545 {
    546   my $infodb_handle = shift(@_);
    547   my $infodb_key = shift(@_);
    548 
    549   # Delete the key from the "data" table
    550 
    551   my $safe_infodb_key = &sqlite_safe($infodb_key);
    552 
    553   print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
    554 
    555   # If this infodb entry is for a document, delete the
    556   #   "document_metadata" table entry also (for use by the dynamic classifiers)
    557   if ($infodb_key !~ /\./)
    558   {
    559       # Possible for there not to be a docOID matching this infodb_key
    560       # (entries are only made when <doctype> == doc
    561       # Attempt to delete it, and don't complain if one isn't found
    562 
    563       print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
    564 
    565   }
    566 }
    567 
    568 
    569 
    570 
    571 
    572 
    573 sub sqlite_safe
    574 {
    575   my $value = shift(@_);
    576 
    577   # Escape any single quotes in the value
    578   $value =~ s/\'/\'\'/g;
    579 
    580   return $value;
    581 }
    582 
    583 
    584 
    585 # ----------------------------------------------------------------------------------------
    586 #   MSSQL IMPLEMENTATION
    587 # ----------------------------------------------------------------------------------------
    588 
    589 my $mssql_collection_name = "";
    590 my $mssql_data_table_name = "";
    591 my $mssql_document_metadata_table_name = "";
    592 
    593 
    594 sub open_infodb_write_handle_mssql
    595 {
    596   my $infodb_file_path = shift(@_);
    597 
    598   # You might have to install the DBD::ADO module from CPAN
    599   #================================================================#
    600   # Uncomment this if you want to use MSSQL!!!
    601   # By the way, MSSQL only works on a Windows machine...
    602   #================================================================#
    603   #use DBI;
    604   #use DBD::ADO;
    605   #Win32::OLE->Option(CP => Win32::OLE::CP_UTF8);
    606 
    607   # The hard coded server connection thingy which should be placed
    608   # in some configuration file.
    609   # If you have problem connecting to your MS SQL server:
    610   # 1. Check if your MSSQL server has been started.
    611   # 2. Check if the TCP/IP connection has been enabled.
    612   # 3. Use telnet to the server
    613   # (don't forget to specify the port, which can be found in the configuration manager)
    614   # If none of the above helped, the you need to start googling then.
    615   my $host = "localhost,1660"; # Need to look up your SQL server and see what port is it using.
    616   my $user = "sa";
    617   my $pwd = "[When installing the MSSQL, you will be asked to input a password for the sa user, use that password]";
    618   my $database = "[Create a database in MSSQL and use it here]";
    619  
    620   # Create the unique name for the table
    621   # We do not want to change the database for the current running index
    622   # Therefore we use timestamp and collection short name to create an unqiue name
    623   my $cur_time = time();
    624   my $unique_key = $mssql_collection_name . "_" . $cur_time;
    625   $mssql_data_table_name = "data_" . $unique_key;
    626   $mssql_document_metadata_table_name = "document_metadata_" . $unique_key;
    627   print STDERR "MSSQL: Creating unique table name. Unique ID:[" . $unique_key . "]\n";
    628      
    629   # Store these information into the infodbfile 
    630   open(FH, ">" . $infodb_file_path);
    631   print FH "mss-host\t" . $host . "\n";
    632   print FH "username\t" . $user . "\n";
    633   print FH "password\t" . $pwd . "\n";
    634   print FH "database\t" . $database . "\n";
    635   print FH "tableid\t" . $unique_key . "\n";
    636   close(FH);
    637   print STDERR "MSSQL: Saving db info into :[" . $infodb_file_path . "]\n";
    638 
    639   # Make the connection
    640   my $dsn = "Provider=SQLNCLI;Server=$host;Database=$database";
    641   my $infodb_handle = DBI->connect("dbi:ADO:$dsn", $user, $pwd, { RaiseError => 1, AutoCommit => 1}) || return undef;
    642   print STDERR "MSSQL: Connect to MS SQL database. DSN:[" . $dsn . "]\n";
    643 
    644   # Make sure the data table has been created.
    645   my $data_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_data_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
    646   if (scalar(@{$data_table_checker_array}) == 0)
    647   {
    648     dbquery($infodb_handle, "CREATE TABLE " . $mssql_data_table_name . " (one_key NVARCHAR(50) UNIQUE, one_value NVARCHAR(MAX))");
    649   }
    650   print STDERR "MSSQL: Making sure the data table(" . $mssql_data_table_name . ") exists\n";
    651    
    652   # Make sure the document_metadata table has been created.
    653   my $document_metadata_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_document_metadata_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
    654   if (scalar(@{$document_metadata_table_checker_array}) == 0)
    655   {
    656     dbquery($infodb_handle, "CREATE TABLE " . $mssql_document_metadata_table_name . " (id INTEGER IDENTITY(1,1) PRIMARY KEY, docOID NVARCHAR(50), element NVARCHAR(MAX), value NVARCHAR(MAX))");
    657     dbquery($infodb_handle, "CREATE INDEX dmd ON " . $mssql_document_metadata_table_name . "(docOID)");
    658   }
    659   print STDERR "MSSQL: Making sure the document_metadata table(" . $mssql_data_table_name . ") exists.\n";
    660  
    661   return $infodb_handle;
    662 }
    663 
    664 
    665 sub close_infodb_write_handle_mssql
    666 {
    667   my $infodb_handle = shift(@_);   
    668  
    669   $infodb_handle->disconnect();
    670 }
    671 
    672 
    673 sub get_infodb_file_path_mssql
    674 {
    675   my $collection_name = shift(@_);
    676   my $infodb_directory_path = shift(@_);
    677 
    678   my $infodb_file_extension = ".mssqldbinfo";
    679   my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
    680  
    681   # This will be used in the open_infodb_write_handle_mssql function
    682   $mssql_collection_name =  $collection_name;
    683 
    684   return &util::filename_cat($infodb_directory_path, $infodb_file_name);
    685 }
    686 
    687 
    688 sub read_infodb_file_mssql
    689 {
    690   my $infodb_file_path = shift(@_);
    691   my $infodb_map = shift(@_);
    692 
    693   # !! TO IMPLEMENT
    694 }
    695 
    696 
    697 sub write_infodb_entry_mssql
    698 {
    699   my $infodb_handle = shift(@_);
    700   my $infodb_key = shift(@_);
    701   my $infodb_map = shift(@_);
    702  
    703   # Add the key -> value mapping into the "data" table
    704   my $infodb_entry_value = "";
    705   foreach my $infodb_value_key (keys(%$infodb_map))
    706   {
    707     foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
    708     {
    709       $infodb_entry_value .= "<$infodb_value_key>" . &Encode::decode_utf8($infodb_value) . "\n";
    710     }
    711   }
    712  
    713   # Prepare the query
    714   my $safe_infodb_key = &mssql_safe($infodb_key);
    715   my $query = "INSERT INTO " . $mssql_data_table_name . " (one_key, one_value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe($infodb_entry_value) . "')";
    716   dbquery($infodb_handle, $query);
    717  
    718   # If this infodb entry is for a document, add all the interesting document metadata to the
    719   # "document_metadata" table (for use by the dynamic classifiers)
    720   if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
    721   {
    722     dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
    723    
    724     foreach my $infodb_value_key (keys(%$infodb_map))
    725     {           
    726       # We're not interested in most of the automatically added document metadata
    727       next if ($infodb_value_key eq "archivedir" ||
    728                $infodb_value_key eq "assocfilepath" ||
    729                $infodb_value_key eq "childtype" ||
    730                $infodb_value_key eq "contains" ||
    731                $infodb_value_key eq "docnum" ||
    732                $infodb_value_key eq "doctype" ||
    733                $infodb_value_key eq "Encoding" ||
    734                $infodb_value_key eq "FileSize" ||
    735                $infodb_value_key eq "hascover" ||
    736                $infodb_value_key eq "hastxt" ||
    737                $infodb_value_key eq "lastmodified" ||
    738                $infodb_value_key eq "metadataset" ||
    739                $infodb_value_key eq "thistype" ||
    740                $infodb_value_key =~ /^metadatafreq\-/ ||
    741                $infodb_value_key =~ /^metadatalist\-/);
    742       foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
    743       {
    744         $infodb_handle->{LongReadLen} = 65535; # Added for the encoding issue
    745         my $query = "INSERT INTO " . $mssql_document_metadata_table_name . " (docOID, element, value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value_key)) . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value)) . "')";
    746         dbquery($infodb_handle, $query);
    747       }
    748     }
    749   }
    750 }
    751 
    752 sub delete_infodb_entry_mssql
    753 {
    754   my $infodb_handle = shift(@_);
    755   my $infodb_key = shift(@_);
    756  
    757   # Delete the key from the "data" table
    758 
    759  
    760   # Prepare the query
    761   my $safe_infodb_key = &mssql_safe($infodb_key);
    762   my $query = "DELETE FROM " . $mssql_data_table_name . " WHERE one_key=N'" . $safe_infodb_key . "'";
    763   dbquery($infodb_handle, $query);
    764  
    765   # If this infodb entry is for a document, add all the interesting document metadata to the
    766   # "document_metadata" table (for use by the dynamic classifiers)
    767   if ($infodb_key !~ /\./)
    768   {
    769       # Possible for there not to be a docOID matching this infodb_key
    770       # (entries are only made when <doctype> == doc
    771       # Attempt to delete it, and don't complain if one isn't found
    772 
    773       dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
    774      
    775   }
    776 }
    777 
    778 
    779 
    780 sub mssql_safe
    781 {
    782   my $value = shift(@_);
    783  
    784   # Escape any single quotes in the value
    785   $value =~ s/\'/\'\'/g;
    786  
    787   return $value;
    788 }
    789 
    790 
    791 sub dbquery
    792 {
    793   my $infodb_handle = shift(@_);
    794   my $sql_query = shift(@_);
    795 
    796   # Execute the SQL statement
    797   my $statement_handle = $infodb_handle->prepare($sql_query);
    798   $statement_handle->execute();
    799   if ($statement_handle->err)
    800   {
    801     print STDERR "Error:" . $statement_handle->errstr . "\n";
    802     return undef;
    803   }
    804  
    805   return $statement_handle;
    806 }
    807 
    808 
    809 sub dbgetarray
    810 {
    811   my $infodb_handle = shift(@_);
    812   my $sql_query = shift(@_);
    813  
    814   my $statement_handle = dbquery($infodb_handle, $sql_query);
    815   my $return_array = [];
    816  
    817   # Iterate through the results and push them into an array
    818   if (!defined($statement_handle))
    819   {
    820     return [];
    821   }
    822  
    823   while ((my $temp_hash = $statement_handle->fetchrow_hashref()))
    824   {
    825     push(@$return_array, $temp_hash);
    826   }
    827 
    828   return $return_array;
     203    return &dbutil::sqlite::delete_infodb_entry($infodb_handle, $infodb_key);
     204  }
     205  elsif ($infodb_type eq "gdbm-txtgz")
     206  {
     207    return &dbutil::gdbmtxtgz::delete_infodb_entry($infodb_handle, $infodb_key);
     208  }
     209  elsif ($infodb_type eq "jdbm")
     210  {
     211    return &dbutil::jdbm::delete_infodb_entry($infodb_handle, $infodb_key);
     212  }
     213  elsif ($infodb_type eq "mssql")
     214  {
     215    return &dbutil::mssql::delete_infodb_entry($infodb_handle, $infodb_key);
     216  }
     217
     218  # Use GDBM if the infodb type is empty or not one of the values above
     219  return &dbutil::gdbm::delete_infodb_entry($infodb_handle, $infodb_key);
    829220}
    830221
Note: See TracChangeset for help on using the changeset viewer.