source: main/trunk/greenstone2/perllib/gssql.pm@ 32531

Last change on this file since 32531 was 32531, checked in by ak19, 5 years ago
  1. bugfix to GS SQLPlugout: recursive call didn't go through self variable, noticed only when processing a doc with structure (subsections). 2. Being more explicit about the params passed to gssql.pm constructor. 3. MySQL didn't let me create a table with hyphens in the tablename. So collection names that contain hyphens need to first be adjusted for use in table names.
File size: 16.2 KB
Line 
1###########################################################################
2#
3# gssql.pm -- DBI for SQL related utility functions used by
4# GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too.
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package gssql;
28
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33use GreenstoneXMLPlugout;
34use docprint;
35
36use DBI; # the central package for this plugout
37
38# Need params_map keys:
39# - collection_name
40# - db_encoding (db content encoding) - MySQL can set this at server, db, table levels. For MySQL
41# we set the enc during connect at server level. Not sure whether other DB's support it at the
42# same levels.
43
44# For connection to MySQL, need:
45# - db_driver, db_client_user, db_client_pwd, db_host, (db_port not used at present)
46# So these will be parameterised, but in a hashmap, for just the connect method.
47
48# Parameterise (one or more methods may use them):
49# - build_mode (like removeold)
50# - db_name (which is the GS3 sitename)
51
52# TODO: add infrastructure for db_port, AutoCommit etc
53# For port, see https://stackoverflow.com/questions/2248665/perl-script-to-connect-to-mysql-server-port-3307
54
55sub new
56{
57
58 my $class = shift(@_);
59
60 my ($params_map) = @_;
61
62 # library_url: to be specified on the cmdline if not using a GS-included web server
63 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
64
65 # https://stackoverflow.com/questions/7083453/copying-a-hashref-in-perl
66 # Making a shallow copy works, and can handle unknown params:
67 #my $self = $params_map;
68
69 # but being explicit for class params needed for MySQL:
70 my $self = {
71 'collection_name' => $params_map->{'collection_name'},
72 'db_encoding' => $params_map->{'db_encoding'}
73 };
74
75 # (My)SQL doesn't like tables with - (hyphens) in their names
76 my $coll_name = $params_map->{'collection_name'};
77 $coll_name =~ s/-/_/g;
78 $self->{'tablename_prefix'} = $coll_name;
79
80 return bless($self, $class);
81}
82
83
84#################################
85
86# Database access related functions
87# http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm
88# https://www.guru99.com/insert-into.html
89
90# TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?
91# Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?
92# https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data
93# BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?
94# The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.
95# Unless they do a full rebuild, which will recreate the table from scratch?
96# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/
97# But then should set AutoCommit to off on connection, and remember to commit every time
98
99#################
100# Database functions that use the perl DBI module (with the DBD driver module for mysql)
101#################
102
103# THE NEW DB FUNCTIONS
104# NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".
105# https://dev.mysql.com/doc/refman/5.5/en/keywords.html
106
107# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later
108
109sub connect_to_db {
110 my $self= shift (@_);
111 my ($params_map) = @_;
112 my $db_enc = $self->{'db_encoding'} || "utf8";
113
114 # these are the params for connecting to MySQL
115 my $db_driver = $params_map->{'db_driver'} || "mysql";
116 my $db_user = $params_map->{'db_client_user'} || "root";
117 my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef, we'll see a sensible error message
118 # when connect fails
119 my $db_host = $params_map->{'db_host'} || "127.0.0.1";
120 # localhost doesn't work for us, but 127.0.0.1 works
121 # https://metacpan.org/pod/DBD::mysql
122 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server
123 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server
124 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."
125 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";
126 my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db - allows checking the db exists
127 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,
128 {
129 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message
130 PrintError => 1, # on by default, but being explicit
131 RaiseError => 0, # off by default, but being explicit
132 AutoCommit => 1 # on by default, but being explicit
133 });
134
135 if(!$dbh) {
136 # NOTE, despite handle dbh being undefined, error code will be in DBI->err
137 return 0;
138 }
139
140 # set encoding https://metacpan.org/pod/DBD::mysql
141 # https://dev.mysql.com/doc/refman/5.7/en/charset.html
142 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html
143 # Setting the encoding at db server level.
144 # Not sure if this command is mysql specific:
145 my $stmt = "set NAMES '" . $db_enc . "'";
146 $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");
147
148 # if we're here, then connection succeeded, store handle
149 $self->{'db_handle'} = $dbh;
150 return 1;
151}
152
153sub load_db_and_tables {
154 my $self= shift (@_);
155 my ($db_name, $build_mode) = @_;
156 my $dbh = $self->{'db_handle'};
157
158 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
159 # do() returns undef on error.
160 # connection succeeded, try to load our database. If that didn't work, attempt to create db
161 my $success = $dbh->do("use $db_name");
162
163 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet
164 # attempt to create the db and its tables
165 $self->create_db($db_name) || return 0;
166
167 print STDERR "@@@ CREATED DATABASE $db_name\n";
168
169 # once more attempt to use db, now that it exists
170 $dbh->do("use $db_name") || return 0;
171 #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";
172
173 # attempt to create tables in current db
174 $self->create_metadata_table() || return 0;
175 $self->create_fulltext_table() || return 0;
176
177 $success = 1;
178 }
179 elsif($success) { # database existed and loaded successfully, but
180 # before proceeding check that the current collection's tables exist
181
182 print STDERR "@@@ DATABASE $db_name EXISTED\n";
183
184
185 if($build_mode eq "removeold") {
186 $self->delete_collection_tables();
187 }
188
189 # use existing tables if any
190 # attempt to create tables in current db
191 if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {
192 $self->create_metadata_table() || return 0;
193 } else {
194 print STDERR "@@@ Meta table exists\n";
195 }
196 if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {
197 $self->create_fulltext_table() || return 0;
198 } else {
199 print STDERR "@@@ Fulltxt table exists\n";
200 }
201
202 }
203
204 return $success; # could still return 0, if database failed to load with an error code != 1049
205}
206
207# this will terminate if the db does not exist
208# it will not attempt to create the requested db (nor its tables)
209# The upcoming GreenstoneSQLPlugin can use this.
210sub use_db {
211 my $self= shift (@_);
212 my ($db_name) = @_;
213 my $dbh = $self->{'db_handle'};
214
215 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434
216 # do() returns undef on error.
217 # connection succeeded, try to load our database. If that didn't work, attempt to create db
218 return $dbh->do("use $db_name") || warn();
219}
220
221# disconnect from db - https://metacpan.org/pod/DBI#disconnect
222# TODO: make sure to have committed or rolled back before disconnect
223# and that you've call finish() on statement handles if any fetch remnants remain
224sub disconnect_from_db {
225 my $self= shift (@_);
226 my $dbh = $self->{'db_handle'};
227
228 # make sure any active stmt handles are finished
229 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."
230
231 #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};
232 #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};
233 #$meta_sth->finish() if($meta_sth);
234 #$txt_sth->finish() if($txt_sth);
235
236 my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?
237 return $rc;
238}
239
240sub create_db {
241 my $self= shift (@_);
242 my $db_name = $self->{'db_name'};
243 my $dbh = $self->{'db_handle'};
244
245 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script
246 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do
247}
248
249
250sub create_metadata_table {
251 my $self= shift (@_);
252 my $dbh = $self->{'db_handle'};
253
254 my $table_name = $self->get_metadata_table_name();
255
256 # If using an auto incremented primary key:
257 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";
258 return $dbh->do($stmt);
259}
260
261# TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT
262# 12.9.1 Natural Language Full-Text Searches
263# to see whether we have to index the 'fulltxt' column of the 'fulltext' tables
264# or let user edit this file, or add it as another option
265sub create_fulltext_table {
266 my $self= shift (@_);
267 my $dbh = $self->{'db_handle'};
268
269 my $table_name = $self->get_fulltext_table_name();
270
271 # If using an auto incremented primary key:
272 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";
273 return $dbh->do($stmt);
274
275}
276
277
278# USEFUL: https://metacpan.org/pod/DBI
279# "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."
280
281
282# https://www.guru99.com/insert-into.html
283# and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html
284# for inserting multiple rows at once
285# https://www.perlmonks.org/bare/?node_id=316183
286# https://metacpan.org/pod/DBI#do
287# https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query
288# https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm
289
290# https://metacpan.org/pod/DBI#performance
291# 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'
292sub prepare_insert_metadata_row_stmthandle {
293 my $self = shift (@_);
294 #my ($did, $sid, $metaname, $metavalue) = @_;
295 my $dbh = $self->{'db_handle'};
296
297 my $tablename = $self->get_metadata_table_name();
298
299 #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?
300
301 # using qq{} since we want $tablename placeholder to be filled in
302 # returns Statement Handle object!
303 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");
304
305 print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";
306
307 return $sth;
308}
309
310sub prepare_insert_fulltxt_row_stmthandle {
311 my $self = shift (@_);
312 #my ($did, $sid, $fulltext) = @_;
313 my $dbh = $self->{'db_handle'};
314
315 my $tablename = $self->get_fulltext_table_name();
316
317 #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?
318
319 # using qq{} since we want $tablename placeholder to be filled in
320 # returns Statement Handle object!
321 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");
322
323 print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";
324
325 return $sth;
326}
327
328# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"
329#MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax
330# https://dev.mysql.com/doc/en/drop-database.html
331sub delete_collection_tables {
332 my $self= shift (@_);
333 my $dbh = $self->{'db_handle'};
334
335 print STDERR "### Build mode is removeold, so deleting tables for current collection\n";
336
337 # drop table <tablename>
338 my $table = $self->get_metadata_table_name();
339 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
340 $table = $self->get_fulltext_table_name();
341 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");
342}
343
344# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!
345# this is just for debugging
346sub _delete_database {
347 my $self= shift (@_);
348 my ($db_name) = @_;
349 my $dbh = $self->{'db_handle'};
350
351 # "drop database dbname"
352 $dbh->do("drop database $db_name") || return 0;
353
354 return 1;
355}
356
357# More basic helper methods
358sub get_metadata_table_name {
359 my $self= shift (@_);
360 my $table_name = $self->{'tablename_prefix'} . "_metadata";
361 return $table_name;
362}
363
364# FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html
365# So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.
366sub get_fulltext_table_name {
367 my $self= shift (@_);
368 my $table_name = $self->{'tablename_prefix'} . "_fulltxt";
369 return $table_name;
370}
371
372
373# I can get my version of table_exists to work, but it's not so ideal
374# Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,
375# see https://www.perlmonks.org/bare/?node=DBI%20Recipes
376# The page further has a table_exists function that could work with proper comparison
377# Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though
378sub table_exists {
379 my $self = shift (@_);
380 my $dbh = $self->{'db_handle'};
381 my ($table_name) = @_;
382
383 my @table_list = $dbh->tables;
384 #my $tables_str = @table_list[0];
385 foreach my $table (@table_list) {
386 return 1 if ($table =~ m/$table_name/);
387 }
388 return 0;
389}
390
3911;
Note: See TracBrowser for help on using the repository browser.