Changeset 17476 for gsdl/trunk/perllib/dbutil.pm
- Timestamp:
- 2008-10-03T16:00:50+13:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/dbutil.pm
r17105 r17476 26 26 package dbutil; 27 27 28 use Encode; 28 29 use strict; 29 30 … … 31 32 sub open_infodb_write_handle 32 33 { 33 my $infodb_type = shift(@_); 34 my $infodb_file_path = shift(@_); 35 36 if ($infodb_type eq "sqlite") 37 { 38 return &open_infodb_write_handle_sqlite($infodb_file_path); 39 } 40 elsif ($infodb_type eq "gdbm-txtgz") 41 { 42 return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path); 43 } 44 45 # Use GDBM if the infodb type is empty or not one of the values above 46 return &open_infodb_write_handle_gdbm($infodb_file_path); 34 my $infodb_type = shift(@_); 35 my $infodb_file_path = shift(@_); 36 37 if ($infodb_type eq "sqlite") 38 { 39 return &open_infodb_write_handle_sqlite($infodb_file_path); 40 } 41 elsif ($infodb_type eq "gdbm-txtgz") 42 { 43 return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path); 44 } 45 elsif ($infodb_type eq "mssql") 46 { 47 return &open_infodb_write_handle_mssql($infodb_file_path); 48 } 49 50 # Use GDBM if the infodb type is empty or not one of the values above 51 return &open_infodb_write_handle_gdbm($infodb_file_path); 47 52 } 48 53 … … 50 55 sub close_infodb_write_handle 51 56 { 52 my $infodb_type = shift(@_); 53 my $infodb_handle = shift(@_); 54 55 if ($infodb_type eq "sqlite") 56 { 57 return &close_infodb_write_handle_sqlite($infodb_handle); 58 } 59 elsif ($infodb_type eq "gdbm-txtgz") 60 { 61 return &close_infodb_write_handle_gdbm_txtgz($infodb_handle); 62 } 63 64 # Use GDBM if the infodb type is empty or not one of the values above 65 &close_infodb_write_handle_gdbm($infodb_handle); 57 my $infodb_type = shift(@_); 58 my $infodb_handle = shift(@_); 59 60 if ($infodb_type eq "sqlite") 61 { 62 return &close_infodb_write_handle_sqlite($infodb_handle); 63 } 64 elsif ($infodb_type eq "gdbm-txtgz") 65 { 66 return &close_infodb_write_handle_gdbm_txtgz($infodb_handle); 67 } 68 elsif ($infodb_type eq "mssql") 69 { 70 return &close_infodb_write_handle_mssql($infodb_handle); 71 } 72 73 # Use GDBM if the infodb type is empty or not one of the values above 74 return &close_infodb_write_handle_gdbm($infodb_handle); 66 75 } 67 76 … … 69 78 sub get_default_infodb_type 70 79 { 71 72 73 80 # The default is GDBM so everything works the same for existing collections 81 # To use something else, specify the "infodbtype" in the collection's collect.cfg file 82 return "gdbm"; 74 83 } 75 84 … … 77 86 sub get_infodb_file_path 78 87 { 79 my $infodb_type = shift(@_); 80 my $collection_name = shift(@_); 81 my $infodb_directory_path = shift(@_); 82 83 if ($infodb_type eq "sqlite") 84 { 85 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path); 86 } 87 elsif ($infodb_type eq "gdbm-txtgz") 88 { 89 return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path); 90 } 91 92 # Use GDBM if the infodb type is empty or not one of the values above 93 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path); 88 my $infodb_type = shift(@_); 89 my $collection_name = shift(@_); 90 my $infodb_directory_path = shift(@_); 91 92 if ($infodb_type eq "sqlite") 93 { 94 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path); 95 } 96 elsif ($infodb_type eq "gdbm-txtgz") 97 { 98 return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path); 99 } 100 elsif ($infodb_type eq "mssql") 101 { 102 #==================================================================================================# 103 # Updated by Jeffrey (2008/08/25 Monday) 104 # After look into the run-time code, it seems we should still create a database file. 105 # Since the run-time code is always try to read a database file, the easiest way here is not 106 # to change the whole structure, but to give whatever the system is looking for. 107 #==================================================================================================# 108 # Added by Jeffrey (2008/08/15 Friday) 109 # No file path required for MS SQL, it is a server-client connection. 110 # At the moment the information is hard coded in open_infodb_write_handle_mssql 111 # the this might need some tidy up sometime. 112 #==================================================================================================# 113 return &get_infodb_file_path_mssql($collection_name, $infodb_directory_path); 114 } 115 116 # Use GDBM if the infodb type is empty or not one of the values above 117 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path); 94 118 } 95 119 … … 97 121 sub read_infodb_file 98 122 { 99 my $infodb_type = shift(@_); 100 my $infodb_file_path = shift(@_); 101 my $infodb_map = shift(@_); 102 103 if ($infodb_type eq "sqlite") 104 { 105 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map); 106 } 107 elsif ($infodb_type eq "gdbm-txtgz") 108 { 109 return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map); 110 } 111 112 # Use GDBM if the infodb type is empty or not one of the values above 113 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map); 123 my $infodb_type = shift(@_); 124 my $infodb_file_path = shift(@_); 125 my $infodb_map = shift(@_); 126 127 if ($infodb_type eq "sqlite") 128 { 129 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map); 130 } 131 elsif ($infodb_type eq "gdbm-txtgz") 132 { 133 return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map); 134 } 135 elsif ($infodb_type eq "mssql") 136 { 137 return &read_infodb_file_mssql($infodb_file_path, $infodb_map); 138 } 139 140 # Use GDBM if the infodb type is empty or not one of the values above 141 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map); 114 142 } 115 143 … … 117 145 sub write_infodb_entry 118 146 { 119 my $infodb_type = shift(@_); 120 my $infodb_handle = shift(@_); 121 my $infodb_key = shift(@_); 122 my $infodb_map = shift(@_); 123 124 if ($infodb_type eq "sqlite") 125 { 126 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map); 127 } 128 elsif ($infodb_type eq "gdbm-txtgz") 129 { 130 return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map); 131 } 132 133 # Use GDBM if the infodb type is empty or not one of the values above 134 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map); 147 my $infodb_type = shift(@_); 148 my $infodb_handle = shift(@_); 149 my $infodb_key = shift(@_); 150 my $infodb_map = shift(@_); 151 152 if ($infodb_type eq "sqlite") 153 { 154 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map); 155 } 156 elsif ($infodb_type eq "gdbm-txtgz") 157 { 158 return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map); 159 } 160 elsif ($infodb_type eq "mssql") 161 { 162 return &write_infodb_entry_mssql($infodb_handle, $infodb_key, $infodb_map); 163 } 164 165 # Use GDBM if the infodb type is empty or not one of the values above 166 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map); 135 167 } 136 168 … … 143 175 sub open_infodb_write_handle_gdbm_txtgz 144 176 { 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 177 # Keep infodb in GDBM neutral form => save data as compressed text file, 178 # read for txt2db to be run on it later (i.e. by the runtime system, 179 # first time the collection is ever accessed). This makes it easier 180 # distribute pre-built collections to various architectures. 181 # 182 # NB: even if two architectures are little endian (e.g. Intel and 183 # ARM procesors) GDBM does *not* guarantee that the database generated on 184 # one will work on the other 185 186 my $infodb_file_path = shift(@_); 187 188 # Greenstone ships with gzip for windows, on $PATH 189 190 my $infodb_file_handle = undef; 191 if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\"")) 192 { 193 return undef; 194 } 195 196 return $infodb_file_handle; 165 197 } 166 198 … … 168 200 sub close_infodb_write_handle_gdbm_txtgz 169 201 { 170 171 172 202 my $infodb_handle = shift(@_); 203 204 close($infodb_handle); 173 205 } 174 206 … … 176 208 sub get_infodb_file_path_gdbm_txtgz 177 209 { 178 179 180 181 182 210 my $collection_name = shift(@_); 211 my $infodb_directory_path = shift(@_); 212 213 my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz"; 214 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 183 215 } 184 216 … … 186 218 sub read_infodb_file_gdbm_txtgz 187 219 { 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 220 my $infodb_file_path = shift(@_); 221 my $infodb_map = shift(@_); 222 223 my $cmd = "gzip --decompress \"$infodb_file_path\""; 224 225 open (PIPEIN, "$cmd |") 226 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n"; 227 228 my $infodb_line = ""; 229 my $infodb_key = ""; 230 my $infodb_value = ""; 231 while (defined ($infodb_line = <PIPEIN>)) 232 { 233 if ($infodb_line =~ /^\[([^\]]+)\]$/) 234 { 235 $infodb_key = $1; 236 } 237 elsif ($infodb_line =~ /^-{70}$/) 238 { 239 $infodb_map->{$infodb_key} = $infodb_value; 240 $infodb_key = ""; 241 $infodb_value = ""; 242 } 243 else 244 { 245 $infodb_value .= $infodb_line; 246 } 247 } 248 249 close (PIPEIN); 218 250 } 219 251 … … 222 254 { 223 255 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 256 my $infodb_handle = shift(@_); 257 my $infodb_key = shift(@_); 258 my $infodb_map = shift(@_); 259 260 print $infodb_handle "[$infodb_key]\n"; 261 foreach my $infodb_value_key (keys(%$infodb_map)) 262 { 263 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 264 { 265 if ($infodb_value =~ /-{70,}/) 266 { 267 # if value contains 70 or more hyphens in a row we need to escape them 268 # to prevent txt2db from treating them as a separator 269 $infodb_value =~ s/-/&\#045;/gi; 270 } 271 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n"; 272 } 273 } 274 print $infodb_handle '-' x 70, "\n"; 243 275 } 244 276 … … 251 283 sub open_infodb_write_handle_gdbm 252 284 { 253 254 255 256 257 258 259 260 261 262 285 my $infodb_file_path = shift(@_); 286 287 my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe()); 288 my $infodb_file_handle = undef; 289 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\"")) 290 { 291 return undef; 292 } 293 294 return $infodb_file_handle; 263 295 } 264 296 … … 266 298 sub close_infodb_write_handle_gdbm 267 299 { 268 269 270 300 my $infodb_handle = shift(@_); 301 302 close($infodb_handle); 271 303 } 272 304 … … 274 306 sub get_infodb_file_path_gdbm 275 307 { 276 277 278 279 280 281 308 my $collection_name = shift(@_); 309 my $infodb_directory_path = shift(@_); 310 311 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb"); 312 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension; 313 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 282 314 } 283 315 … … 285 317 sub read_infodb_file_gdbm 286 318 { 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 319 my $infodb_file_path = shift(@_); 320 my $infodb_map = shift(@_); 321 322 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n"; 323 my $infodb_line = ""; 324 my $infodb_key = ""; 325 my $infodb_value = ""; 326 while (defined ($infodb_line = <PIPEIN>)) 327 { 328 if ($infodb_line =~ /^\[([^\]]+)\]$/) 329 { 330 $infodb_key = $1; 331 } 332 elsif ($infodb_line =~ /^-{70}$/) 333 { 334 $infodb_map->{$infodb_key} = $infodb_value; 335 $infodb_key = ""; 336 $infodb_value = ""; 337 } 338 else 339 { 340 $infodb_value .= $infodb_line; 341 } 342 } 343 344 close (PIPEIN); 313 345 } 314 346 … … 316 348 sub write_infodb_entry_gdbm 317 349 { 318 319 350 # With infodb_handle already set up, works the same as _gdbm_txtgz version 351 write_infodb_entry_gdbm_txtgz(@_); 320 352 } 321 353 … … 328 360 sub open_infodb_write_handle_sqlite 329 361 { 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 362 my $infodb_file_path = shift(@_); 363 364 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe()); 365 my $infodb_handle = undef; 366 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"")) 367 { 368 return undef; 369 } 370 371 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n"; 372 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n"; 373 374 # This is crucial for efficiency when importing large amounts of data 375 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n"; 376 377 # This is very important for efficiency, otherwise each command will be actioned one at a time 378 print $infodb_handle "BEGIN TRANSACTION;\n"; 379 380 return $infodb_handle; 349 381 } 350 382 … … 352 384 sub close_infodb_write_handle_sqlite 353 385 { 354 355 356 357 358 359 360 361 362 386 my $infodb_handle = shift(@_); 387 388 # Close the transaction we began after opening the file 389 print $infodb_handle "END TRANSACTION;\n"; 390 391 # This is crucial for efficient queries on the database! 392 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n"; 393 394 close($infodb_handle); 363 395 } 364 396 … … 366 398 sub get_infodb_file_path_sqlite 367 399 { 368 369 370 371 372 373 400 my $collection_name = shift(@_); 401 my $infodb_directory_path = shift(@_); 402 403 my $infodb_file_extension = ".db"; 404 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension; 405 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 374 406 } 375 407 … … 377 409 sub read_infodb_file_sqlite 378 410 { 379 380 381 382 411 my $infodb_file_path = shift(@_); 412 my $infodb_map = shift(@_); 413 414 # !! TO IMPLEMENT 383 415 } 384 416 … … 386 418 sub write_infodb_entry_sqlite 387 419 { 388 my $infodb_handle = shift(@_); 389 my $infodb_key = shift(@_); 390 my $infodb_map = shift(@_); 391 392 # Add the key -> value mapping into the "data" table 393 my $infodb_entry_value = ""; 420 my $infodb_handle = shift(@_); 421 my $infodb_key = shift(@_); 422 my $infodb_map = shift(@_); 423 424 # Add the key -> value mapping into the "data" table 425 my $infodb_entry_value = ""; 426 foreach my $infodb_value_key (keys(%$infodb_map)) 427 { 428 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 429 { 430 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n"; 431 } 432 } 433 434 my $safe_infodb_key = &sqlite_safe($infodb_key); 435 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n"; 436 437 # If this infodb entry is for a document, add all the interesting document metadata to the 438 # "document_metadata" table (for use by the dynamic classifiers) 439 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/) 440 { 441 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n"; 442 394 443 foreach my $infodb_value_key (keys(%$infodb_map)) 395 444 { 396 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 397 { 398 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n"; 399 } 400 } 401 402 my $safe_infodb_key = &sqlite_safe($infodb_key); 403 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n"; 404 405 # If this infodb entry is for a document, add all the interesting document metadata to the 406 # "document_metadata" table (for use by the dynamic classifiers) 407 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/) 408 { 409 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n"; 410 411 foreach my $infodb_value_key (keys(%$infodb_map)) 412 { 413 # We're not interested in most of the automatically added document metadata 414 next if ($infodb_value_key eq "archivedir" || 415 $infodb_value_key eq "assocfilepath" || 416 $infodb_value_key eq "childtype" || 417 $infodb_value_key eq "contains" || 418 $infodb_value_key eq "docnum" || 419 $infodb_value_key eq "doctype" || 420 $infodb_value_key eq "Encoding" || 421 $infodb_value_key eq "FileSize" || 422 $infodb_value_key eq "hascover" || 423 $infodb_value_key eq "hastxt" || 424 $infodb_value_key eq "lastmodified" || 425 $infodb_value_key eq "metadataset" || 426 $infodb_value_key eq "thistype" || 427 $infodb_value_key =~ /^metadatafreq\-/ || 428 $infodb_value_key =~ /^metadatalist\-/); 429 430 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 431 { 432 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n"; 433 } 434 } 435 } 445 # We're not interested in most of the automatically added document metadata 446 next if ($infodb_value_key eq "archivedir" || 447 $infodb_value_key eq "assocfilepath" || 448 $infodb_value_key eq "childtype" || 449 $infodb_value_key eq "contains" || 450 $infodb_value_key eq "docnum" || 451 $infodb_value_key eq "doctype" || 452 $infodb_value_key eq "Encoding" || 453 $infodb_value_key eq "FileSize" || 454 $infodb_value_key eq "hascover" || 455 $infodb_value_key eq "hastxt" || 456 $infodb_value_key eq "lastmodified" || 457 $infodb_value_key eq "metadataset" || 458 $infodb_value_key eq "thistype" || 459 $infodb_value_key =~ /^metadatafreq\-/ || 460 $infodb_value_key =~ /^metadatalist\-/); 461 462 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 463 { 464 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n"; 465 } 466 } 467 } 436 468 } 437 469 … … 439 471 sub sqlite_safe 440 472 { 441 my $value = shift(@_); 442 443 # Escape any single quotes in the value 444 $value =~ s/\'/\'\'/g; 445 446 return $value; 473 my $value = shift(@_); 474 475 # Escape any single quotes in the value 476 $value =~ s/\'/\'\'/g; 477 478 return $value; 479 } 480 481 482 483 # ---------------------------------------------------------------------------------------- 484 # MSSQL IMPLEMENTATION 485 # ---------------------------------------------------------------------------------------- 486 487 my $mssql_collection_name = ""; 488 my $mssql_data_table_name = ""; 489 my $mssql_document_metadata_table_name = ""; 490 491 492 sub open_infodb_write_handle_mssql 493 { 494 my $infodb_file_path = shift(@_); 495 496 # You might have to install the DBD::ADO module from CPAN 497 #================================================================# 498 # Uncomment this if you want to use MSSQL!!! 499 # By the way, MSSQL only works on a Windows machine... 500 #================================================================# 501 #use DBI; 502 #use DBD::ADO; 503 #Win32::OLE->Option(CP => Win32::OLE::CP_UTF8); 504 505 # The hard coded server connection thingy which should be placed 506 # in some configuration file. 507 # If you have problem connecting to your MS SQL server: 508 # 1. Check if your MSSQL server has been started. 509 # 2. Check if the TCP/IP connection has been enabled. 510 # 3. Use telnet to the server 511 # (don't forget to specify the port, which can be found in the configuration manager) 512 # If none of the above helped, the you need to start googling then. 513 my $host = "localhost,1660"; # Need to look up your SQL server and see what port is it using. 514 my $user = "sa"; 515 my $pwd = "[When installing the MSSQL, you will be asked to input a password for the sa user, use that password]"; 516 my $database = "[Create a database in MSSQL and use it here]"; 517 518 # Create the unique name for the table 519 # We do not want to change the database for the current running index 520 # Therefore we use timestamp and collection short name to create an unqiue name 521 my $cur_time = time(); 522 my $unique_key = $mssql_collection_name . "_" . $cur_time; 523 $mssql_data_table_name = "data_" . $unique_key; 524 $mssql_document_metadata_table_name = "document_metadata_" . $unique_key; 525 print STDERR "MSSQL: Creating unique table name. Unique ID:[" . $unique_key . "]\n"; 526 527 # Store these information into the infodbfile 528 open(FH, ">" . $infodb_file_path); 529 print FH "mss-host\t" . $host . "\n"; 530 print FH "username\t" . $user . "\n"; 531 print FH "password\t" . $pwd . "\n"; 532 print FH "database\t" . $database . "\n"; 533 print FH "tableid\t" . $unique_key . "\n"; 534 close(FH); 535 print STDERR "MSSQL: Saving db info into :[" . $infodb_file_path . "]\n"; 536 537 # Make the connection 538 my $dsn = "Provider=SQLNCLI;Server=$host;Database=$database"; 539 my $infodb_handle = DBI->connect("dbi:ADO:$dsn", $user, $pwd, { RaiseError => 1, AutoCommit => 1}) || return undef; 540 print STDERR "MSSQL: Connect to MS SQL database. DSN:[" . $dsn . "]\n"; 541 542 # Make sure the data table has been created. 543 my $data_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_data_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1"); 544 if (scalar(@{$data_table_checker_array}) == 0) 545 { 546 dbquery($infodb_handle, "CREATE TABLE " . $mssql_data_table_name . " (one_key NVARCHAR(50) UNIQUE, one_value NVARCHAR(MAX))"); 547 } 548 print STDERR "MSSQL: Making sure the data table(" . $mssql_data_table_name . ") exists\n"; 549 550 # Make sure the document_metadata table has been created. 551 my $document_metadata_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_document_metadata_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1"); 552 if (scalar(@{$document_metadata_table_checker_array}) == 0) 553 { 554 dbquery($infodb_handle, "CREATE TABLE " . $mssql_document_metadata_table_name . " (id INTEGER IDENTITY(1,1) PRIMARY KEY, docOID NVARCHAR(50), element NVARCHAR(MAX), value NVARCHAR(MAX))"); 555 dbquery($infodb_handle, "CREATE INDEX dmd ON " . $mssql_document_metadata_table_name . "(docOID)"); 556 } 557 print STDERR "MSSQL: Making sure the document_metadata table(" . $mssql_data_table_name . ") exists.\n"; 558 559 return $infodb_handle; 560 } 561 562 563 sub close_infodb_write_handle_mssql 564 { 565 my $infodb_handle = shift(@_); 566 567 $infodb_handle->disconnect(); 568 } 569 570 571 sub get_infodb_file_path_mssql 572 { 573 my $collection_name = shift(@_); 574 my $infodb_directory_path = shift(@_); 575 576 my $infodb_file_extension = ".mssqldbinfo"; 577 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension; 578 579 # This will be used in the open_infodb_write_handle_mssql function 580 $mssql_collection_name = $collection_name; 581 582 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 583 } 584 585 586 sub read_infodb_file_mssql 587 { 588 my $infodb_file_path = shift(@_); 589 my $infodb_map = shift(@_); 590 591 # !! TO IMPLEMENT 592 } 593 594 595 sub write_infodb_entry_mssql 596 { 597 my $infodb_handle = shift(@_); 598 my $infodb_key = shift(@_); 599 my $infodb_map = shift(@_); 600 601 # Add the key -> value mapping into the "data" table 602 my $infodb_entry_value = ""; 603 foreach my $infodb_value_key (keys(%$infodb_map)) 604 { 605 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 606 { 607 $infodb_entry_value .= "<$infodb_value_key>" . &Encode::decode_utf8($infodb_value) . "\n"; 608 } 609 } 610 611 # Prepare the query 612 my $safe_infodb_key = &mssql_safe($infodb_key); 613 my $query = "INSERT INTO " . $mssql_data_table_name . " (one_key, one_value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe($infodb_entry_value) . "')"; 614 dbquery($infodb_handle, $query); 615 616 # If this infodb entry is for a document, add all the interesting document metadata to the 617 # "document_metadata" table (for use by the dynamic classifiers) 618 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/) 619 { 620 dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'"); 621 622 foreach my $infodb_value_key (keys(%$infodb_map)) 623 { 624 # We're not interested in most of the automatically added document metadata 625 next if ($infodb_value_key eq "archivedir" || 626 $infodb_value_key eq "assocfilepath" || 627 $infodb_value_key eq "childtype" || 628 $infodb_value_key eq "contains" || 629 $infodb_value_key eq "docnum" || 630 $infodb_value_key eq "doctype" || 631 $infodb_value_key eq "Encoding" || 632 $infodb_value_key eq "FileSize" || 633 $infodb_value_key eq "hascover" || 634 $infodb_value_key eq "hastxt" || 635 $infodb_value_key eq "lastmodified" || 636 $infodb_value_key eq "metadataset" || 637 $infodb_value_key eq "thistype" || 638 $infodb_value_key =~ /^metadatafreq\-/ || 639 $infodb_value_key =~ /^metadatalist\-/); 640 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}}) 641 { 642 $infodb_handle->{LongReadLen} = 65535; # Added for the encoding issue 643 my $query = "INSERT INTO " . $mssql_document_metadata_table_name . " (docOID, element, value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value_key)) . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value)) . "')"; 644 dbquery($infodb_handle, $query); 645 } 646 } 647 } 648 } 649 650 651 sub mssql_safe 652 { 653 my $value = shift(@_); 654 655 # Escape any single quotes in the value 656 $value =~ s/\'/\'\'/g; 657 658 return $value; 659 } 660 661 662 sub dbquery 663 { 664 my $infodb_handle = shift(@_); 665 my $sql_query = shift(@_); 666 667 # Execute the SQL statement 668 my $statement_handle = $infodb_handle->prepare($sql_query); 669 $statement_handle->execute(); 670 if ($statement_handle->err) 671 { 672 print STDERR "Error:" . $statement_handle->errstr . "\n"; 673 return undef; 674 } 675 676 return $statement_handle; 677 } 678 679 680 sub dbgetarray 681 { 682 my $infodb_handle = shift(@_); 683 my $sql_query = shift(@_); 684 685 my $statement_handle = dbquery($infodb_handle, $sql_query); 686 my $return_array = []; 687 688 # Iterate through the results and push them into an array 689 if (!defined($statement_handle)) 690 { 691 return []; 692 } 693 694 while ((my $temp_hash = $statement_handle->fetchrow_hashref())) 695 { 696 push(@$return_array, $temp_hash); 697 } 698 699 return $return_array; 447 700 } 448 701
Note:
See TracChangeset
for help on using the changeset viewer.