source: gsdl/trunk/perllib/dbutil.pm@ 19773

Last change on this file since 19773 was 19773, checked in by davidb, 15 years ago

optional parameter to open GDBM database that allows for records to be appended

File size: 24.8 KB
Line 
1###########################################################################
2#
3# dbutil.pm -- utility functions for writing to different databases
4# Copyright (C) 2008 DL Consulting Ltd
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package dbutil;
27
28use Encode;
29use strict;
30
31
32sub open_infodb_write_handle
33{
34 my $infodb_type = shift(@_);
35 my $infodb_file_path = shift(@_);
36
37 if ($infodb_type eq "sqlite")
38 {
39 return &open_infodb_write_handle_sqlite($infodb_file_path);
40 }
41 elsif ($infodb_type eq "gdbm-txtgz")
42 {
43 return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path);
44 }
45 elsif ($infodb_type eq "mssql")
46 {
47 return &open_infodb_write_handle_mssql($infodb_file_path);
48 }
49
50 # Use GDBM if the infodb type is empty or not one of the values above
51 return &open_infodb_write_handle_gdbm($infodb_file_path);
52}
53
54
55sub close_infodb_write_handle
56{
57 my $infodb_type = shift(@_);
58 my $infodb_handle = shift(@_);
59
60 if ($infodb_type eq "sqlite")
61 {
62 return &close_infodb_write_handle_sqlite($infodb_handle);
63 }
64 elsif ($infodb_type eq "gdbm-txtgz")
65 {
66 return &close_infodb_write_handle_gdbm_txtgz($infodb_handle);
67 }
68 elsif ($infodb_type eq "mssql")
69 {
70 return &close_infodb_write_handle_mssql($infodb_handle);
71 }
72
73 # Use GDBM if the infodb type is empty or not one of the values above
74 return &close_infodb_write_handle_gdbm($infodb_handle);
75}
76
77
78sub get_default_infodb_type
79{
80 # The default is GDBM so everything works the same for existing collections
81 # To use something else, specify the "infodbtype" in the collection's collect.cfg file
82 return "gdbm";
83}
84
85
86sub get_infodb_file_path
87{
88 my $infodb_type = shift(@_);
89 my $collection_name = shift(@_);
90 my $infodb_directory_path = shift(@_);
91
92 if ($infodb_type eq "sqlite")
93 {
94 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
95 }
96 elsif ($infodb_type eq "gdbm-txtgz")
97 {
98 return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path);
99 }
100 elsif ($infodb_type eq "mssql")
101 {
102 #==================================================================================================#
103 # Updated by Jeffrey (2008/08/25 Monday)
104 # After look into the run-time code, it seems we should still create a database file.
105 # Since the run-time code is always try to read a database file, the easiest way here is not
106 # to change the whole structure, but to give whatever the system is looking for.
107 #==================================================================================================#
108 # Added by Jeffrey (2008/08/15 Friday)
109 # No file path required for MS SQL, it is a server-client connection.
110 # At the moment the information is hard coded in open_infodb_write_handle_mssql
111 # the this might need some tidy up sometime.
112 #==================================================================================================#
113 return &get_infodb_file_path_mssql($collection_name, $infodb_directory_path);
114 }
115
116 # Use GDBM if the infodb type is empty or not one of the values above
117 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
118}
119
120
121sub read_infodb_file
122{
123 my $infodb_type = shift(@_);
124 my $infodb_file_path = shift(@_);
125 my $infodb_map = shift(@_);
126
127 if ($infodb_type eq "sqlite")
128 {
129 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
130 }
131 elsif ($infodb_type eq "gdbm-txtgz")
132 {
133 return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map);
134 }
135 elsif ($infodb_type eq "mssql")
136 {
137 return &read_infodb_file_mssql($infodb_file_path, $infodb_map);
138 }
139
140 # Use GDBM if the infodb type is empty or not one of the values above
141 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
142}
143
144
145sub write_infodb_entry
146{
147 my $infodb_type = shift(@_);
148 my $infodb_handle = shift(@_);
149 my $infodb_key = shift(@_);
150 my $infodb_map = shift(@_);
151
152 if ($infodb_type eq "sqlite")
153 {
154 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
155 }
156 elsif ($infodb_type eq "gdbm-txtgz")
157 {
158 return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map);
159 }
160 elsif ($infodb_type eq "mssql")
161 {
162 return &write_infodb_entry_mssql($infodb_handle, $infodb_key, $infodb_map);
163 }
164
165 # Use GDBM if the infodb type is empty or not one of the values above
166 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
167}
168
169
170sub delete_infodb_entry
171{
172 my $infodb_type = shift(@_);
173 my $infodb_handle = shift(@_);
174 my $infodb_key = shift(@_);
175
176 if ($infodb_type eq "sqlite")
177 {
178 return &delete_infodb_entry_sqlite($infodb_handle, $infodb_key);
179 }
180 elsif ($infodb_type eq "gdbm-txtgz")
181 {
182 return &delete_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key);
183 }
184 elsif ($infodb_type eq "mssql")
185 {
186 return &delete_infodb_entry_mssql($infodb_handle, $infodb_key);
187 }
188
189 # Use GDBM if the infodb type is empty or not one of the values above
190 return &delete_infodb_entry_gdbm($infodb_handle, $infodb_key);
191}
192
193
194
195# -----------------------------------------------------------------------------
196# GDBM TXT-GZ IMPLEMENTATION
197# -----------------------------------------------------------------------------
198
199sub open_infodb_write_handle_gdbm_txtgz
200{
201 # Keep infodb in GDBM neutral form => save data as compressed text file,
202 # read for txt2db to be run on it later (i.e. by the runtime system,
203 # first time the collection is ever accessed). This makes it easier
204 # distribute pre-built collections to various architectures.
205 #
206 # NB: even if two architectures are little endian (e.g. Intel and
207 # ARM procesors) GDBM does *not* guarantee that the database generated on
208 # one will work on the other
209
210 my $infodb_file_path = shift(@_);
211
212 # Greenstone ships with gzip for windows, on $PATH
213
214 my $infodb_file_handle = undef;
215 if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\""))
216 {
217 return undef;
218 }
219
220 return $infodb_file_handle;
221}
222
223
224sub close_infodb_write_handle_gdbm_txtgz
225{
226 my $infodb_handle = shift(@_);
227
228 close($infodb_handle);
229}
230
231
232sub get_infodb_file_path_gdbm_txtgz
233{
234 my $collection_name = shift(@_);
235 my $infodb_directory_path = shift(@_);
236
237 my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz";
238 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
239}
240
241
242sub read_infodb_file_gdbm_txtgz
243{
244 my $infodb_file_path = shift(@_);
245 my $infodb_map = shift(@_);
246
247 my $cmd = "gzip --decompress \"$infodb_file_path\"";
248
249 open (PIPEIN, "$cmd |")
250 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n";
251
252 my $infodb_line = "";
253 my $infodb_key = "";
254 my $infodb_value = "";
255 while (defined ($infodb_line = <PIPEIN>))
256 {
257 if ($infodb_line =~ /^\[([^\]]+)\]$/)
258 {
259 $infodb_key = $1;
260 }
261 elsif ($infodb_line =~ /^-{70}$/)
262 {
263 $infodb_map->{$infodb_key} = $infodb_value;
264 $infodb_key = "";
265 $infodb_value = "";
266 }
267 else
268 {
269 $infodb_value .= $infodb_line;
270 }
271 }
272
273 close (PIPEIN);
274}
275
276
277sub write_infodb_entry_gdbm_txtgz
278{
279
280 my $infodb_handle = shift(@_);
281 my $infodb_key = shift(@_);
282 my $infodb_map = shift(@_);
283
284 print $infodb_handle "[$infodb_key]\n";
285 foreach my $infodb_value_key (keys(%$infodb_map))
286 {
287 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
288 {
289 if ($infodb_value =~ /-{70,}/)
290 {
291 # if value contains 70 or more hyphens in a row we need to escape them
292 # to prevent txt2db from treating them as a separator
293 $infodb_value =~ s/-/&\#045;/gi;
294 }
295 print $infodb_handle "<$infodb_value_key>" . $infodb_value . "\n";
296 }
297 }
298 print $infodb_handle '-' x 70, "\n";
299}
300
301
302sub delete_infodb_entry_gdbm_txtgz
303{
304
305 my $infodb_handle = shift(@_);
306 my $infodb_key = shift(@_);
307
308
309 # A minus at the end of a key (after the ]) signifies 'delete'
310 print $infodb_handle "[$infodb_key]-\n";
311
312 # The 70 minus signs are also needed, to help make the parsing by db2txt simple
313 print $infodb_handle '-' x 70, "\n";
314}
315
316
317
318# -----------------------------------------------------------------------------
319# GDBM IMPLEMENTATION
320# -----------------------------------------------------------------------------
321
322sub open_infodb_write_handle_gdbm
323{
324 my $infodb_file_path = shift(@_);
325 my $opt_append = shift(@_);
326
327 my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe());
328 my $infodb_file_handle = undef;
329 my $cmd = "\"$txt2db_exe\"";
330 if ((defined $opt_append) && ($opt_append eq "append")) {
331 $cmd .= " -append";
332 }
333 $cmd .= " \"$infodb_file_path\"";
334
335 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| $cmd"))
336 {
337 return undef;
338 }
339
340 return $infodb_file_handle;
341}
342
343
344
345sub close_infodb_write_handle_gdbm
346{
347 my $infodb_handle = shift(@_);
348
349 close($infodb_handle);
350}
351
352
353sub get_infodb_file_path_gdbm
354{
355 my $collection_name = shift(@_);
356 my $infodb_directory_path = shift(@_);
357
358 my $infodb_file_extension = ".gdb";
359 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
360 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
361}
362
363
364sub read_infodb_file_gdbm
365{
366 my $infodb_file_path = shift(@_);
367 my $infodb_map = shift(@_);
368
369 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt \$infodb_file_path\"\n";
370 my $infodb_line = "";
371 my $infodb_key = "";
372 my $infodb_value = "";
373 while (defined ($infodb_line = <PIPEIN>))
374 {
375 if ($infodb_line =~ /^\[([^\]]+)\]$/)
376 {
377 $infodb_key = $1;
378 }
379 elsif ($infodb_line =~ /^-{70}$/)
380 {
381 $infodb_map->{$infodb_key} = $infodb_value;
382 $infodb_key = "";
383 $infodb_value = "";
384 }
385 else
386 {
387 $infodb_value .= $infodb_line;
388 }
389 }
390
391 close (PIPEIN);
392}
393
394sub read_infodb_keys_gdbm
395{
396 my $infodb_file_path = shift(@_);
397 my $infodb_map = shift(@_);
398
399 open (PIPEIN, "gdbmkeys \"$infodb_file_path\" |") || die "couldn't open pipe from gdbmkeys \$infodb_file_path\"\n";
400 my $infodb_line = "";
401 my $infodb_key = "";
402 my $infodb_value = "";
403 while (defined ($infodb_line = <PIPEIN>))
404 {
405 chomp $infodb_line; # remove end of line
406
407 $infodb_map->{$infodb_line} = 1;
408 }
409
410 close (PIPEIN);
411}
412
413
414sub write_infodb_entry_gdbm
415{
416 # With infodb_handle already set up, works the same as _gdbm_txtgz version
417 write_infodb_entry_gdbm_txtgz(@_);
418}
419
420sub delete_infodb_entry_gdbm
421{
422 # With infodb_handle already set up, works the same as _gdbm_txtgz version
423 delete_infodb_entry_gdbm_txtgz(@_);
424}
425
426
427
428# -----------------------------------------------------------------------------
429# SQLITE IMPLEMENTATION
430# -----------------------------------------------------------------------------
431
432sub open_infodb_write_handle_sqlite
433{
434 my $infodb_file_path = shift(@_);
435
436 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
437 my $infodb_handle = undef;
438 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
439 {
440 return undef;
441 }
442
443 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
444 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
445
446 # This is crucial for efficiency when importing large amounts of data
447 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
448
449 # This is very important for efficiency, otherwise each command will be actioned one at a time
450 print $infodb_handle "BEGIN TRANSACTION;\n";
451
452 return $infodb_handle;
453}
454
455
456sub close_infodb_write_handle_sqlite
457{
458 my $infodb_handle = shift(@_);
459
460 # Close the transaction we began after opening the file
461 print $infodb_handle "END TRANSACTION;\n";
462
463 # This is crucial for efficient queries on the database!
464 print $infodb_handle "CREATE INDEX IF NOT EXISTS dme ON document_metadata(element);\n";
465
466 close($infodb_handle);
467}
468
469
470sub get_infodb_file_path_sqlite
471{
472 my $collection_name = shift(@_);
473 my $infodb_directory_path = shift(@_);
474
475 my $infodb_file_extension = ".db";
476 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
477 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
478}
479
480
481sub read_infodb_file_sqlite
482{
483 my $infodb_file_path = shift(@_);
484 my $infodb_map = shift(@_);
485
486 # !! TO IMPLEMENT
487}
488
489
490sub write_infodb_entry_sqlite
491{
492 my $infodb_handle = shift(@_);
493 my $infodb_key = shift(@_);
494 my $infodb_map = shift(@_);
495
496 # Add the key -> value mapping into the "data" table
497 my $infodb_entry_value = "";
498 foreach my $infodb_value_key (keys(%$infodb_map))
499 {
500 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
501 {
502 $infodb_entry_value .= "<$infodb_value_key>" . $infodb_value . "\n";
503 }
504 }
505
506 my $safe_infodb_key = &sqlite_safe($infodb_key);
507 print $infodb_handle "INSERT OR REPLACE INTO data (key, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_entry_value) . "');\n";
508
509 # If this infodb entry is for a document, add all the interesting document metadata to the
510 # "document_metadata" table (for use by the dynamic classifiers)
511 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
512 {
513 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
514
515 foreach my $infodb_value_key (keys(%$infodb_map))
516 {
517 # We're not interested in most of the automatically added document metadata
518 next if ($infodb_value_key eq "archivedir" ||
519 $infodb_value_key eq "assocfilepath" ||
520 $infodb_value_key eq "childtype" ||
521 $infodb_value_key eq "contains" ||
522 $infodb_value_key eq "docnum" ||
523 $infodb_value_key eq "doctype" ||
524 $infodb_value_key eq "Encoding" ||
525 $infodb_value_key eq "FileSize" ||
526 $infodb_value_key eq "hascover" ||
527 $infodb_value_key eq "hastxt" ||
528 $infodb_value_key eq "lastmodified" ||
529 $infodb_value_key eq "metadataset" ||
530 $infodb_value_key eq "thistype" ||
531 $infodb_value_key =~ /^metadatafreq\-/ ||
532 $infodb_value_key =~ /^metadatalist\-/);
533
534 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
535 {
536 print $infodb_handle "INSERT INTO document_metadata (docOID, element, value) VALUES ('" . $safe_infodb_key . "', '" . &sqlite_safe($infodb_value_key) . "', '" . &sqlite_safe($infodb_value) . "');\n";
537 }
538 }
539 }
540}
541
542
543
544sub delete_infodb_entry_sqlite
545{
546 my $infodb_handle = shift(@_);
547 my $infodb_key = shift(@_);
548
549 # Delete the key from the "data" table
550
551 my $safe_infodb_key = &sqlite_safe($infodb_key);
552
553 print $infodb_handle "DELETE FROM data WHERE key='" . $safe_infodb_key . "';\n";
554
555 # If this infodb entry is for a document, delete the
556 # "document_metadata" table entry also (for use by the dynamic classifiers)
557 if ($infodb_key !~ /\./)
558 {
559 # Possible for there not to be a docOID matching this infodb_key
560 # (entries are only made when <doctype> == doc
561 # Attempt to delete it, and don't complain if one isn't found
562
563 print $infodb_handle "DELETE FROM document_metadata WHERE docOID='" . $safe_infodb_key . "';\n";
564
565 }
566}
567
568
569
570
571
572
573sub sqlite_safe
574{
575 my $value = shift(@_);
576
577 # Escape any single quotes in the value
578 $value =~ s/\'/\'\'/g;
579
580 return $value;
581}
582
583
584
585# ----------------------------------------------------------------------------------------
586# MSSQL IMPLEMENTATION
587# ----------------------------------------------------------------------------------------
588
589my $mssql_collection_name = "";
590my $mssql_data_table_name = "";
591my $mssql_document_metadata_table_name = "";
592
593
594sub open_infodb_write_handle_mssql
595{
596 my $infodb_file_path = shift(@_);
597
598 # You might have to install the DBD::ADO module from CPAN
599 #================================================================#
600 # Uncomment this if you want to use MSSQL!!!
601 # By the way, MSSQL only works on a Windows machine...
602 #================================================================#
603 #use DBI;
604 #use DBD::ADO;
605 #Win32::OLE->Option(CP => Win32::OLE::CP_UTF8);
606
607 # The hard coded server connection thingy which should be placed
608 # in some configuration file.
609 # If you have problem connecting to your MS SQL server:
610 # 1. Check if your MSSQL server has been started.
611 # 2. Check if the TCP/IP connection has been enabled.
612 # 3. Use telnet to the server
613 # (don't forget to specify the port, which can be found in the configuration manager)
614 # If none of the above helped, the you need to start googling then.
615 my $host = "localhost,1660"; # Need to look up your SQL server and see what port is it using.
616 my $user = "sa";
617 my $pwd = "[When installing the MSSQL, you will be asked to input a password for the sa user, use that password]";
618 my $database = "[Create a database in MSSQL and use it here]";
619
620 # Create the unique name for the table
621 # We do not want to change the database for the current running index
622 # Therefore we use timestamp and collection short name to create an unqiue name
623 my $cur_time = time();
624 my $unique_key = $mssql_collection_name . "_" . $cur_time;
625 $mssql_data_table_name = "data_" . $unique_key;
626 $mssql_document_metadata_table_name = "document_metadata_" . $unique_key;
627 print STDERR "MSSQL: Creating unique table name. Unique ID:[" . $unique_key . "]\n";
628
629 # Store these information into the infodbfile
630 open(FH, ">" . $infodb_file_path);
631 print FH "mss-host\t" . $host . "\n";
632 print FH "username\t" . $user . "\n";
633 print FH "password\t" . $pwd . "\n";
634 print FH "database\t" . $database . "\n";
635 print FH "tableid\t" . $unique_key . "\n";
636 close(FH);
637 print STDERR "MSSQL: Saving db info into :[" . $infodb_file_path . "]\n";
638
639 # Make the connection
640 my $dsn = "Provider=SQLNCLI;Server=$host;Database=$database";
641 my $infodb_handle = DBI->connect("dbi:ADO:$dsn", $user, $pwd, { RaiseError => 1, AutoCommit => 1}) || return undef;
642 print STDERR "MSSQL: Connect to MS SQL database. DSN:[" . $dsn . "]\n";
643
644 # Make sure the data table has been created.
645 my $data_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_data_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
646 if (scalar(@{$data_table_checker_array}) == 0)
647 {
648 dbquery($infodb_handle, "CREATE TABLE " . $mssql_data_table_name . " (one_key NVARCHAR(50) UNIQUE, one_value NVARCHAR(MAX))");
649 }
650 print STDERR "MSSQL: Making sure the data table(" . $mssql_data_table_name . ") exists\n";
651
652 # Make sure the document_metadata table has been created.
653 my $document_metadata_table_checker_array = dbgetarray($infodb_handle, "SELECT name FROM sysobjects WHERE name = '" . $mssql_document_metadata_table_name . "' AND OBJECTPROPERTY(id,'IsUserTable') = 1");
654 if (scalar(@{$document_metadata_table_checker_array}) == 0)
655 {
656 dbquery($infodb_handle, "CREATE TABLE " . $mssql_document_metadata_table_name . " (id INTEGER IDENTITY(1,1) PRIMARY KEY, docOID NVARCHAR(50), element NVARCHAR(MAX), value NVARCHAR(MAX))");
657 dbquery($infodb_handle, "CREATE INDEX dmd ON " . $mssql_document_metadata_table_name . "(docOID)");
658 }
659 print STDERR "MSSQL: Making sure the document_metadata table(" . $mssql_data_table_name . ") exists.\n";
660
661 return $infodb_handle;
662}
663
664
665sub close_infodb_write_handle_mssql
666{
667 my $infodb_handle = shift(@_);
668
669 $infodb_handle->disconnect();
670}
671
672
673sub get_infodb_file_path_mssql
674{
675 my $collection_name = shift(@_);
676 my $infodb_directory_path = shift(@_);
677
678 my $infodb_file_extension = ".mssqldbinfo";
679 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
680
681 # This will be used in the open_infodb_write_handle_mssql function
682 $mssql_collection_name = $collection_name;
683
684 return &util::filename_cat($infodb_directory_path, $infodb_file_name);
685}
686
687
688sub read_infodb_file_mssql
689{
690 my $infodb_file_path = shift(@_);
691 my $infodb_map = shift(@_);
692
693 # !! TO IMPLEMENT
694}
695
696
697sub write_infodb_entry_mssql
698{
699 my $infodb_handle = shift(@_);
700 my $infodb_key = shift(@_);
701 my $infodb_map = shift(@_);
702
703 # Add the key -> value mapping into the "data" table
704 my $infodb_entry_value = "";
705 foreach my $infodb_value_key (keys(%$infodb_map))
706 {
707 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
708 {
709 $infodb_entry_value .= "<$infodb_value_key>" . &Encode::decode_utf8($infodb_value) . "\n";
710 }
711 }
712
713 # Prepare the query
714 my $safe_infodb_key = &mssql_safe($infodb_key);
715 my $query = "INSERT INTO " . $mssql_data_table_name . " (one_key, one_value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe($infodb_entry_value) . "')";
716 dbquery($infodb_handle, $query);
717
718 # If this infodb entry is for a document, add all the interesting document metadata to the
719 # "document_metadata" table (for use by the dynamic classifiers)
720 if ($infodb_key !~ /\./ && $infodb_entry_value =~ /\<doctype\>doc\n/)
721 {
722 dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
723
724 foreach my $infodb_value_key (keys(%$infodb_map))
725 {
726 # We're not interested in most of the automatically added document metadata
727 next if ($infodb_value_key eq "archivedir" ||
728 $infodb_value_key eq "assocfilepath" ||
729 $infodb_value_key eq "childtype" ||
730 $infodb_value_key eq "contains" ||
731 $infodb_value_key eq "docnum" ||
732 $infodb_value_key eq "doctype" ||
733 $infodb_value_key eq "Encoding" ||
734 $infodb_value_key eq "FileSize" ||
735 $infodb_value_key eq "hascover" ||
736 $infodb_value_key eq "hastxt" ||
737 $infodb_value_key eq "lastmodified" ||
738 $infodb_value_key eq "metadataset" ||
739 $infodb_value_key eq "thistype" ||
740 $infodb_value_key =~ /^metadatafreq\-/ ||
741 $infodb_value_key =~ /^metadatalist\-/);
742 foreach my $infodb_value (@{$infodb_map->{$infodb_value_key}})
743 {
744 $infodb_handle->{LongReadLen} = 65535; # Added for the encoding issue
745 my $query = "INSERT INTO " . $mssql_document_metadata_table_name . " (docOID, element, value) VALUES (N'" . $safe_infodb_key . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value_key)) . "', N'" . &mssql_safe(&Encode::decode_utf8($infodb_value)) . "')";
746 dbquery($infodb_handle, $query);
747 }
748 }
749 }
750}
751
752sub delete_infodb_entry_mssql
753{
754 my $infodb_handle = shift(@_);
755 my $infodb_key = shift(@_);
756
757 # Delete the key from the "data" table
758
759
760 # Prepare the query
761 my $safe_infodb_key = &mssql_safe($infodb_key);
762 my $query = "DELETE FROM " . $mssql_data_table_name . " WHERE one_key=N'" . $safe_infodb_key . "'";
763 dbquery($infodb_handle, $query);
764
765 # If this infodb entry is for a document, add all the interesting document metadata to the
766 # "document_metadata" table (for use by the dynamic classifiers)
767 if ($infodb_key !~ /\./)
768 {
769 # Possible for there not to be a docOID matching this infodb_key
770 # (entries are only made when <doctype> == doc
771 # Attempt to delete it, and don't complain if one isn't found
772
773 dbquery($infodb_handle, "DELETE FROM " . $mssql_document_metadata_table_name . " WHERE docOID=N'" . $safe_infodb_key . "'");
774
775 }
776}
777
778
779
780sub mssql_safe
781{
782 my $value = shift(@_);
783
784 # Escape any single quotes in the value
785 $value =~ s/\'/\'\'/g;
786
787 return $value;
788}
789
790
791sub dbquery
792{
793 my $infodb_handle = shift(@_);
794 my $sql_query = shift(@_);
795
796 # Execute the SQL statement
797 my $statement_handle = $infodb_handle->prepare($sql_query);
798 $statement_handle->execute();
799 if ($statement_handle->err)
800 {
801 print STDERR "Error:" . $statement_handle->errstr . "\n";
802 return undef;
803 }
804
805 return $statement_handle;
806}
807
808
809sub dbgetarray
810{
811 my $infodb_handle = shift(@_);
812 my $sql_query = shift(@_);
813
814 my $statement_handle = dbquery($infodb_handle, $sql_query);
815 my $return_array = [];
816
817 # Iterate through the results and push them into an array
818 if (!defined($statement_handle))
819 {
820 return [];
821 }
822
823 while ((my $temp_hash = $statement_handle->fetchrow_hashref()))
824 {
825 push(@$return_array, $temp_hash);
826 }
827
828 return $return_array;
829}
830
831
8321;
Note: See TracBrowser for help on using the repository browser.