source: gsdl/trunk/perllib/dbutil.pm@ 18467

Last change on this file since 18467 was 18467, checked in by davidb, 15 years ago

Additions made to dbutil so various DB backends can support deletion from database

File size: 24.7 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use Encode;
29use strict;
30
31
32sub open_infodb_write_handle
33{
34 my $infodb_type = shift(@_);
35 my $infodb_file_path = shift(@_);
36
37 if ($infodb_type eq "sqlite")
38 {
39 return &open_infodb_write_handle_sqlite($infodb_file_path);
40 }
41 elsif ($infodb_type eq "gdbm-txtgz")
42 {
43 return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path);
44 }
45 elsif ($infodb_type eq "mssql")
46 {
47 return &open_infodb_write_handle_mssql($infodb_file_path);
48 }
49
50 # Use GDBM if the infodb type is empty or not one of the values above
51 return &open_infodb_write_handle_gdbm($infodb_file_path);
52}
53
54
55sub close_infodb_write_handle
56{
57 my $infodb_type = shift(@_);
58 my $infodb_handle = shift(@_);
59
60 if ($infodb_type eq "sqlite")
61 {
62 return &close_infodb_write_handle_sqlite($infodb_handle);
63 }
64 elsif ($infodb_type eq "gdbm-txtgz")
65 {
66 return &close_infodb_write_handle_gdbm_txtgz($infodb_handle);
67 }
68 elsif ($infodb_type eq "mssql")
69 {
70 return &close_infodb_write_handle_mssql($infodb_handle);
71 }
72
73 # Use GDBM if the infodb type is empty or not one of the values above
74 return &close_infodb_write_handle_gdbm($infodb_handle);
75}
76
77
78sub get_default_infodb_type
79{
80 # The default is GDBM so everything works the same for existing collections
81 # To use something else, specify the "infodbtype" in the collection's collect.cfg file
82 return "gdbm";
83}
84
85
86sub get_infodb_file_path
87{
88 my $infodb_type = shift(@_);
89 my $collection_name = shift(@_);
90 my $infodb_directory_path = shift(@_);
91
92 if ($infodb_type eq "sqlite")
93 {
94 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
95 }
96 elsif ($infodb_type eq "gdbm-txtgz")
97 {
98 return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path);
99 }
100 elsif ($infodb_type eq "mssql")
101 {
102 #==================================================================================================#
103 # Updated by Jeffrey (2008/08/25 Monday)
104 # After look into the run-time code, it seems we should still create a database file.
105 # Since the run-time code is always try to read a database file, the easiest way here is not
106 # to change the whole structure, but to give whatever the system is looking for.
107 #==================================================================================================#
108 # Added by Jeffrey (2008/08/15 Friday)
109 # No file path required for MS SQL, it is a server-client connection.
110 # At the moment the information is hard coded in open_infodb_write_handle_mssql
111 # the this might need some tidy up sometime.
112 #==================================================================================================#
113 return &get_infodb_file_path_mssql($collection_name, $infodb_directory_path);
114 }
115
116 # Use GDBM if the infodb type is empty or not one of the values above
117 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
118}
119
120
121sub read_infodb_file
122{
123 my $infodb_type = shift(@_);
124 my $infodb_file_path = shift(@_);
125 my $infodb_map = shift(@_);
126
127 if ($infodb_type eq "sqlite")
128 {
129 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
130 }
131 elsif ($infodb_type eq "gdbm-txtgz")
132 {
133 return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map);
134 }
135 elsif ($infodb_type eq "mssql")
136 {
137 return &read_infodb_file_mssql($infodb_file_path, $infodb_map);
138 }
139
140 # Use GDBM if the infodb type is empty or not one of the values above
141 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
142}
143
144
145sub write_infodb_entry
146{
147 my $infodb_type = shift(@_);
148 my $infodb_handle = shift(@_);
149 my $infodb_key = shift(@_);
150 my $infodb_map = shift(@_);
151
152 if ($infodb_type eq "sqlite")
153 {
154 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
155 }
156 elsif ($infodb_type eq "gdbm-txtgz")
157 {
158 return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map);
159 }
160 elsif ($infodb_type eq "mssql")
161 {
162 return &write_infodb_entry_mssql($infodb_handle, $infodb_key, $infodb_map);
163 }
164
165 # Use GDBM if the infodb type is empty or not one of the values above
166 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
167}
168
169
170sub delete_infodb_entry
171{
172 my $infodb_type = shift(@_);
173 my $infodb_handle = shift(@_);
174 my $infodb_key = shift(@_);
175
176 if ($infodb_type eq "sqlite")
177 {
178 return &delete_infodb_entry_sqlite($infodb_handle, $infodb_key);
179 }
180 elsif ($infodb_type eq "gdbm-txtgz")
181 {
182 return &delete_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key);
183 }
184 elsif ($infodb_type eq "mssql")
185 {
186 return &delete_infodb_entry_mssql($infodb_handle, $infodb_key);
187 }
188
189 # Use GDBM if the infodb type is empty or not one of the values above
190 return &delete_infodb_entry_gdbm($infodb_handle, $infodb_key);
191}
192
193
194
195# -----------------------------------------------------------------------------
196# GDBM TXT-GZ IMPLEMENTATION
197# -----------------------------------------------------------------------------
198
199sub open_infodb_write_handle_gdbm_txtgz
200{
201 # Keep infodb in GDBM neutral form => save data as compressed text file,
202 # read for txt2db to be run on it later (i.e. by the runtime system,
203 # first time the collection is ever accessed). This makes it easier
204 # distribute pre-built collections to various architectures.
205 #
206 # NB: even if two architectures are little endian (e.g. Intel and
207 # ARM procesors) GDBM does *not* guarantee that the database generated on
208 # one will work on the other
209
210 my $infodb_file_path = shift(@_);
211
212 # Greenstone ships with gzip for windows, on $PATH
213
214 my $infodb_file_handle = undef;
215 if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\""))
216 {
217 return undef;
218 }
219
220 return $infodb_file_handle;
221}
222
223
224sub close_infodb_write_handle_gdbm_txtgz
225{
226 my $infodb_handle = shift(@_);
227
228 close($infodb_handle);
229}
230
231
232sub get_infodb_file_path_gdbm_txtgz
233{
234 my $collection_name = shift(@_);
235 my $infodb_directory_path = shift(@_);
236
237 my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz";
238 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
239}
240
241
242sub read_infodb_file_gdbm_txtgz
243{
244 my $infodb_file_path = shift(@_);
245 my $infodb_map = shift(@_);
246
247 my $cmd = "gzip --decompress \"$infodb_file_path\"";
248
249 open (PIPEIN, "$cmd |")
250 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n";
251
252 my $infodb_line = "";
253 my $infodb_key = "";
254 my $infodb_value = "";
255 while (defined ($infodb_line = <PIPEIN>))
256 {
257 if ($infodb_line =~ /^\[([^\]]+)\]$/)
258 {
259 $infodb_key = $1;
260 }
261 elsif ($infodb_line =~ /^-{70}$/)
262 {
263 $infodb_map->{$infodb_key} = $infodb_value;
264 $infodb_key = "";
265 $infodb_value = "";
266 }
267 else
268 {
269 $infodb_value .= $infodb_line;
270 }
271 }
272
273 close (PIPEIN);
274}
275
276
277sub write_infodb_entry_gdbm_txtgz
278{
279
280 my $infodb_handle = shift(@_);
281 my $infodb_key = shift(@_);
282 my $infodb_map = shift(@_);
283
284 print $infodb_handle "[$infodb_key]\n";
285 foreach my $infodb_value_key (keys(%$infodb_map))
286 {
287 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
288 {
289 if ($infodb_value =~ /-{70,}/)
290 {
291 # if value contains 70 or more hyphens in a row we need to escape them
292 # to prevent txt2db from treating them as a separator
293 $infodb_value =~ s/-/&\#045;/gi;
294 }
295 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
296 }
297 }
298 print $infodb_handle '-' x 70, "\n";
299}
300
301
302sub delete_infodb_entry_gdbm_txtgz
303{
304
305 my $infodb_handle = shift(@_);
306 my $infodb_key = shift(@_);
307
308
309 # A minus at the end of a key (after the ]) signifies 'delete'
310 print $infodb_handle "[$infodb_key]-\n";
311
312 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
313 print $infodb_handle '-' x 70, "\n";
314}
315
316
317
318# -----------------------------------------------------------------------------
319# GDBM IMPLEMENTATION
320# -----------------------------------------------------------------------------
321
322sub open_infodb_write_handle_gdbm
323{
324 my $infodb_file_path = shift(@_);
325
326 my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe());
327 my $infodb_file_handle = undef;
328 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
329 {
330 return undef;
331 }
332
333 return $infodb_file_handle;
334}
335
336
337sub close_infodb_write_handle_gdbm
338{
339 my $infodb_handle = shift(@_);
340
341 close($infodb_handle);
342}
343
344
345sub get_infodb_file_path_gdbm
346{
347 my $collection_name = shift(@_);
348 my $infodb_directory_path = shift(@_);
349
350 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
351 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
352 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
353}
354
355
356sub read_infodb_file_gdbm
357{
358 my $infodb_file_path = shift(@_);
359 my $infodb_map = shift(@_);
360
361 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
362 my $infodb_line = "";
363 my $infodb_key = "";
364 my $infodb_value = "";
365 while (defined ($infodb_line = <PIPEIN>))
366 {
367 if ($infodb_line =~ /^\[([^\]]+)\]$/)
368 {
369 $infodb_key = $1;
370 }
371 elsif ($infodb_line =~ /^-{70}$/)
372 {
373 $infodb_map->{$infodb_key} = $infodb_value;
374 $infodb_key = "";
375 $infodb_value = "";
376 }
377 else
378 {
379 $infodb_value .= $infodb_line;
380 }
381 }
382
383 close (PIPEIN);
384}
385
386sub read_infodb_keys_gdbm
387{
388 my $infodb_file_path = shift(@_);
389 my $infodb_map = shift(@_);
390
391 open (PIPEIN, "gdbmkeys \"$infodb_file_path\" |") || die "couldn't open pipe from gdbmkeys \$infodb_file_path\"\n";
392 my $infodb_line = "";
393 my $infodb_key = "";
394 my $infodb_value = "";
395 while (defined ($infodb_line = <PIPEIN>))
396 {
397 chomp $infodb_line; # remove end of line
398
399 $infodb_map->{$infodb_line} = 1;
400 }
401
402 close (PIPEIN);
403}
404
405
406sub write_infodb_entry_gdbm
407{
408 # With infodb_handle already set up, works the same as _gdbm_txtgz version
409 write_infodb_entry_gdbm_txtgz(@_);
410}
411
412sub delete_infodb_entry_gdbm
413{
414 # With infodb_handle already set up, works the same as _gdbm_txtgz version
415 delete_infodb_entry_gdbm_txtgz(@_);
416}
417
418
419
420# -----------------------------------------------------------------------------
421# SQLITE IMPLEMENTATION
422# -----------------------------------------------------------------------------
423
424sub open_infodb_write_handle_sqlite
425{
426 my $infodb_file_path = shift(@_);
427
428 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
429 my $infodb_handle = undef;
430 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
431 {
432 return undef;
433 }
434
435 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
436 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
437
438 # This is crucial for efficiency when importing large amounts of data
439 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
440
441 # This is very important for efficiency, otherwise each command will be actioned one at a time
442 print $infodb_handle "BEGIN TRANSACTION;\n";
443
444 return $infodb_handle;
445}
446
447
448sub close_infodb_write_handle_sqlite
449{
450 my $infodb_handle = shift(@_);
451
452 # Close the transaction we began after opening the file
453 print $infodb_handle "END TRANSACTION;\n";
454
455 # This is crucial for efficient queries on the database!
456 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
457
458 close($infodb_handle);
459}
460
461
462sub get_infodb_file_path_sqlite
463{
464 my $collection_name = shift(@_);
465 my $infodb_directory_path = shift(@_);
466
467 my $infodb_file_extension = ".db";
468 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
469 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
470}
471
472
473sub read_infodb_file_sqlite
474{
475 my $infodb_file_path = shift(@_);
476 my $infodb_map = shift(@_);
477
478 # !! TO IMPLEMENT
479}
480
481
482sub write_infodb_entry_sqlite
483{
484 my $infodb_handle = shift(@_);
485 my $infodb_key = shift(@_);
486 my $infodb_map = shift(@_);
487
488 # Add the key -> value mapping into the "data" table
489 my $infodb_entry_value = "";
490 foreach my $infodb_value_key (keys(%$infodb_map))
491 {
492 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
493 {
494 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
495 }
496 }
497
498 my $safe_infodb_key = &sqlite_safe($infodb_key);
499 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
500
501 # If this infodb entry is for a document, add all the interesting document metadata to the
502 # "document_metadata" table (for use by the dynamic classifiers)
503 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
504 {
505 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
506
507 foreach my $infodb_value_key (keys(%$infodb_map))
508 {
509 # We're not interested in most of the automatically added document metadata
510 next if ($infodb_value_key eq "archivedir" ||
511 $infodb_value_key eq "assocfilepath" ||
512 $infodb_value_key eq "childtype" ||
513 $infodb_value_key eq "contains" ||
514 $infodb_value_key eq "docnum" ||
515 $infodb_value_key eq "doctype" ||
516 $infodb_value_key eq "Encoding" ||
517 $infodb_value_key eq "FileSize" ||
518 $infodb_value_key eq "hascover" ||
519 $infodb_value_key eq "hastxt" ||
520 $infodb_value_key eq "lastmodified" ||
521 $infodb_value_key eq "metadataset" ||
522 $infodb_value_key eq "thistype" ||
523 $infodb_value_key =~ /^metadatafreq\-/ ||
524 $infodb_value_key =~ /^metadatalist\-/);
525
526 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
527 {
528 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
529 }
530 }
531 }
532}
533
534
535
536sub delete_infodb_entry_sqlite
537{
538 my $infodb_handle = shift(@_);
539 my $infodb_key = shift(@_);
540
541 # Delete the key from the "data" table
542
543 my $safe_infodb_key = &sqlite_safe($infodb_key);
544
545 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
546
547 # If this infodb entry is for a document, delete the
548 # "document_metadata" table entry also (for use by the dynamic classifiers)
549 if ($infodb_key !~ /\./)
550 {
551 # Possible for there not to be a docOID matching this infodb_key
552 # (entries are only made when <doctype> == doc
553 # Attempt to delete it, and don't complain if one isn't found
554
555 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
556
557 }
558}
559
560
561
562
563
564
565sub sqlite_safe
566{
567 my $value = shift(@_);
568
569 # Escape any single quotes in the value
570 $value =~ s/\'/\'\'/g;
571
572 return $value;
573}
574
575
576
577# ----------------------------------------------------------------------------------------
578# MSSQL IMPLEMENTATION
579# ----------------------------------------------------------------------------------------
580
581my $mssql_collection_name = "";
582my $mssql_data_table_name = "";
583my $mssql_document_metadata_table_name = "";
584
585
586sub open_infodb_write_handle_mssql
587{
588 my $infodb_file_path = shift(@_);
589
590 # You might have to install the DBD::ADO module from CPAN
591 #================================================================#
592 # Uncomment this if you want to use MSSQL!!!
593 # By the way, MSSQL only works on a Windows machine...
594 #================================================================#
595 #use DBI;
596 #use DBD::ADO;
597 #Win32::OLE->Option(CP => Win32::OLE::CP_UTF8);
598
599 # The hard coded server connection thingy which should be placed
600 # in some configuration file.
601 # If you have problem connecting to your MS SQL server:
602 # 1. Check if your MSSQL server has been started.
603 # 2. Check if the TCP/IP connection has been enabled.
604 # 3. Use telnet to the server
605 # (don't forget to specify the port, which can be found in the configuration manager)
606 # If none of the above helped, the you need to start googling then.
607 my $host = "localhost,1660"; # Need to look up your SQL server and see what port is it using.
608 my $user = "sa";
609 my $pwd = "[When installing the MSSQL, you will be asked to input a password for the sa user, use that password]";
610 my $database = "[Create a database in MSSQL and use it here]";
611
612 # Create the unique name for the table
613 # We do not want to change the database for the current running index
614 # Therefore we use timestamp and collection short name to create an unqiue name
615 my $cur_time = time();
616 my $unique_key = $mssql_collection_name . "_" . $cur_time;
617 $mssql_data_table_name = "data_" . $unique_key;
618 $mssql_document_metadata_table_name = "document_metadata_" . $unique_key;
619 print STDERR "MSSQL: Creating unique table name. Unique ID:[" . $unique_key . "]\n";
620
621 # Store these information into the infodbfile
622 open(FH, ">" . $infodb_file_path);
623 print FH "mss-host\t" . $host . "\n";
624 print FH "username\t" . $user . "\n";
625 print FH "password\t" . $pwd . "\n";
626 print FH "database\t" . $database . "\n";
627 print FH "tableid\t" . $unique_key . "\n";
628 close(FH);
629 print STDERR "MSSQL: Saving db info into :[" . $infodb_file_path . "]\n";
630
631 # Make the connection
632 my $dsn = "Provider=SQLNCLI;Server=$host;Database=$database";
633 my $infodb_handle = DBI->connect("dbi:ADO:$dsn", $user, $pwd, { RaiseError => 1, AutoCommit => 1}) || return undef;
634 print STDERR "MSSQL: Connect to MS SQL database. DSN:[" . $dsn . "]\n";
635
636 # Make sure the data table has been created.
637 my $data_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_data_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
638 if (scalar(@{$data_table_checker_array}) == 0)
639 {
640 dbquery($infodb_handle, "CREATE TABLE " . $mssql_data_table_name . " (one_key NVARCHAR(50) UNIQUE, one_value NVARCHAR(MAX))");
641 }
642 print STDERR "MSSQL: Making sure the data table(" . $mssql_data_table_name . ") exists\n";
643
644 # Make sure the document_metadata table has been created.
645 my $document_metadata_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_document_metadata_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
646 if (scalar(@{$document_metadata_table_checker_array}) == 0)
647 {
648 dbquery($infodb_handle, "CREATE TABLE " . $mssql_document_metadata_table_name . " (id INTEGER IDENTITY(1,1) PRIMARY KEY, docOID NVARCHAR(50), element NVARCHAR(MAX), value NVARCHAR(MAX))");
649 dbquery($infodb_handle, "CREATE INDEX dmd ON " . $mssql_document_metadata_table_name . "(docOID)");
650 }
651 print STDERR "MSSQL: Making sure the document_metadata table(" . $mssql_data_table_name . ") exists.\n";
652
653 return $infodb_handle;
654}
655
656
657sub close_infodb_write_handle_mssql
658{
659 my $infodb_handle = shift(@_);
660
661 $infodb_handle->disconnect();
662}
663
664
665sub get_infodb_file_path_mssql
666{
667 my $collection_name = shift(@_);
668 my $infodb_directory_path = shift(@_);
669
670 my $infodb_file_extension = ".mssqldbinfo";
671 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
672
673 # This will be used in the open_infodb_write_handle_mssql function
674 $mssql_collection_name = $collection_name;
675
676 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
677}
678
679
680sub read_infodb_file_mssql
681{
682 my $infodb_file_path = shift(@_);
683 my $infodb_map = shift(@_);
684
685 # !! TO IMPLEMENT
686}
687
688
689sub write_infodb_entry_mssql
690{
691 my $infodb_handle = shift(@_);
692 my $infodb_key = shift(@_);
693 my $infodb_map = shift(@_);
694
695 # Add the key -> value mapping into the "data" table
696 my $infodb_entry_value = "";
697 foreach my $infodb_value_key (keys(%$infodb_map))
698 {
699 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
700 {
701 $infodb_entry_value .= "<$infodb_value_key>" . &Encode::decode_utf8($infodb_value) . "\n";
702 }
703 }
704
705 # Prepare the query
706 my $safe_infodb_key = &mssql_safe($infodb_key);
707 my $query = "INSERT INTO " . $mssql_data_table_name . " (one_key, one_value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe($infodb_entry_value) . "')";
708 dbquery($infodb_handle, $query);
709
710 # If this infodb entry is for a document, add all the interesting document metadata to the
711 # "document_metadata" table (for use by the dynamic classifiers)
712 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
713 {
714 dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
715
716 foreach my $infodb_value_key (keys(%$infodb_map))
717 {
718 # We're not interested in most of the automatically added document metadata
719 next if ($infodb_value_key eq "archivedir" ||
720 $infodb_value_key eq "assocfilepath" ||
721 $infodb_value_key eq "childtype" ||
722 $infodb_value_key eq "contains" ||
723 $infodb_value_key eq "docnum" ||
724 $infodb_value_key eq "doctype" ||
725 $infodb_value_key eq "Encoding" ||
726 $infodb_value_key eq "FileSize" ||
727 $infodb_value_key eq "hascover" ||
728 $infodb_value_key eq "hastxt" ||
729 $infodb_value_key eq "lastmodified" ||
730 $infodb_value_key eq "metadataset" ||
731 $infodb_value_key eq "thistype" ||
732 $infodb_value_key =~ /^metadatafreq\-/ ||
733 $infodb_value_key =~ /^metadatalist\-/);
734 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
735 {
736 $infodb_handle->{LongReadLen} = 65535; # Added for the encoding issue
737 my $query = "INSERT INTO " . $mssql_document_metadata_table_name . " (docOID, element, value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value_key)) . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value)) . "')";
738 dbquery($infodb_handle, $query);
739 }
740 }
741 }
742}
743
744sub delete_infodb_entry_mssql
745{
746 my $infodb_handle = shift(@_);
747 my $infodb_key = shift(@_);
748
749 # Delete the key from the "data" table
750
751
752 # Prepare the query
753 my $safe_infodb_key = &mssql_safe($infodb_key);
754 my $query = "DELETE FROM " . $mssql_data_table_name . " WHERE one_key=N'" . $safe_infodb_key . "'";
755 dbquery($infodb_handle, $query);
756
757 # If this infodb entry is for a document, add all the interesting document metadata to the
758 # "document_metadata" table (for use by the dynamic classifiers)
759 if ($infodb_key !~ /\./)
760 {
761 # Possible for there not to be a docOID matching this infodb_key
762 # (entries are only made when <doctype> == doc
763 # Attempt to delete it, and don't complain if one isn't found
764
765 dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
766
767 }
768}
769
770
771
772sub mssql_safe
773{
774 my $value = shift(@_);
775
776 # Escape any single quotes in the value
777 $value =~ s/\'/\'\'/g;
778
779 return $value;
780}
781
782
783sub dbquery
784{
785 my $infodb_handle = shift(@_);
786 my $sql_query = shift(@_);
787
788 # Execute the SQL statement
789 my $statement_handle = $infodb_handle->prepare($sql_query);
790 $statement_handle->execute();
791 if ($statement_handle->err)
792 {
793 print STDERR "Error:" . $statement_handle->errstr . "\n";
794 return undef;
795 }
796
797 return $statement_handle;
798}
799
800
801sub dbgetarray
802{
803 my $infodb_handle = shift(@_);
804 my $sql_query = shift(@_);
805
806 my $statement_handle = dbquery($infodb_handle, $sql_query);
807 my $return_array = [];
808
809 # Iterate through the results and push them into an array
810 if (!defined($statement_handle))
811 {
812 return [];
813 }
814
815 while ((my $temp_hash = $statement_handle->fetchrow_hashref()))
816 {
817 push(@$return_array, $temp_hash);
818 }
819
820 return $return_array;
821}
822
823
8241;
Note: See TracBrowser for help on using the repository browser.