- Timestamp:
- 2018-10-19T20:42:08+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32527 r32529 32 32 no strict 'subs'; 33 33 34 use util;35 use FileUtils;36 34 use GreenstoneXMLPlugout; 37 35 use docprint; 36 use gssql; 38 37 39 38 use DBI; # the central package for this plugout 40 39 41 use IPC::Open2;42 use POSIX ":sys_wait_h"; # for waitpid, http://perldoc.perl.org/functions/waitpid.html43 44 40 45 41 # TODO: SIGTERM rollback and disconnect? 46 47 # TODO: rename class to GreenstoneSQLPlugout48 42 49 43 … … 53 47 } 54 48 49 # NOTTODO: die() statements need to be replaced with premature_termination 50 # which should ensure the GreenstoneXMLPlugin (group)'s stuff is closed and cleaned up SOMEHOW 51 # It's fine: the die() stmts all take place before setting up the super class' begin 55 52 56 53 # TODO: deal with -removeold and everything? Or type out instructions for user … … 118 115 ############ LOAD NECESSARY OPTIONS ########### 119 116 print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 120 $self->{'collection_name'} = $ENV{'GSDLCOLLECTION'};117 #$self->{'collection_name'} = $ENV{'GSDLCOLLECTION'}; 121 118 print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n"; 122 123 if(!$self->connect_to_db()) { 119 120 my $db_params = { 121 'collection_name' => $ENV{'GSDLCOLLECTION'}, 122 'db_driver' => $self->{'db_driver'}, 123 'db_client_user' => $self->{'db_client_user'}, 124 'db_client_pwd' => $self->{'db_client_pwd'}, 125 'db_host' => $self->{'db_host'}, 126 'db_encoding' => $self->{'db_encoding'} 127 #'db_name' => $self->{'site_name'}, 128 #'build_mode' => $self->{'build_mode'}, 129 }; 130 131 my $gs_sql = new gssql($db_params); 132 133 if(!$gs_sql->connect_to_db()) { 124 134 # This is fatal for the plugout, let's terminate here 125 135 # PrintError would already have displayed the warning message on connection fail … … 129 139 my $db_name = $self->{'site_name'} || "localsite"; # one database per GS3 site 130 140 my $build_mode = $self->{'build_mode'} || "removeold"; 131 if(!$ self->load_db_and_tables($db_name, $build_mode)) {141 if(!$gs_sql->load_db_and_tables($db_name, $build_mode)) { 132 142 133 143 # This is fatal for the plugout, let's terminate here … … 140 150 my $proc_mode = $self->{'process_mode'}; 141 151 if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 142 $self->{'metadata_prepared_insert_statement_handle'} = $ self->prepare_insert_metadata_row_stmthandle();152 $self->{'metadata_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_metadata_row_stmthandle(); 143 153 } 144 154 if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 145 $self->{'fulltxt_prepared_insert_statement_handle'} = $self->prepare_insert_fulltxt_row_stmthandle(); 146 } 147 155 $self->{'fulltxt_prepared_insert_statement_handle'} = $gs_sql->prepare_insert_fulltxt_row_stmthandle(); 156 } 157 158 # store the DBI wrapper instance 159 $self->{'gs_sql'} = $gs_sql; 160 148 161 print STDERR "#### Meta stmt: " . $self->{'metadata_prepared_insert_statement_handle'}->{'Statement'} . "\n"; 149 162 print STDERR "#### Full stmt: " . $self->{'fulltxt_prepared_insert_statement_handle'}->{'Statement'} . "\n"; … … 164 177 $self->SUPER::end(@_); 165 178 166 $self-> disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer179 $self->{'gs_sql'}->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 167 180 } 168 181 … … 304 317 } 305 318 306 #################################307 308 # Database access related functions309 # http://g2pc1.bu.edu/~qzpeng/manual/MySQL%20Commands.htm310 # https://www.guru99.com/insert-into.html311 312 # TODO Q: What on cancelling a build: delete table? But what if it was a rebuild and the rebuild is cancelled (not the original build)?313 # Do we create a copy of the orig database as backup, then start populating current db, and if cancelled, delete current db and RENAME backup table to current?314 # https://stackoverflow.com/questions/3280006/duplicating-a-mysql-table-indexes-and-data315 # BUT what if the table is HUGE? (Think of a collection with millions of docs.) Huge overhead in copying?316 # The alternative is we just quit on cancel, but then: cancel could leave the table in a partial committed state, with no way of rolling back.317 # Unless they do a full rebuild, which will recreate the table from scratch?318 # SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/319 # But then should set AutoCommit to off on connection, and remember to commit every time320 321 #################322 # Database functions that use the perl DBI module (with the DBD driver module for mysql)323 #################324 325 # THE NEW DB FUNCTIONS326 # NOTE: FULLTEXT is a reserved keyword in (My)SQL. So we can't name a table or any of its columns "fulltext".327 # https://dev.mysql.com/doc/refman/5.5/en/keywords.html328 329 # TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later330 331 sub connect_to_db {332 my $self= shift (@_);333 334 my $db_driver = $self->{'db_driver'};335 my $db_user = $self->{'db_client_user'} || "root";336 my $db_pwd = $self->{'db_client_pwd'};337 my $db_host = $self->{'db_host'} || "127.0.0.1";338 my $db_enc = $self->{'db_encoding'} || "utf8";339 340 #my $db_name = $self->{'site_name'};341 342 # try connecting to the mysql db, if that fails it will die343 # so don't bother preparing GreenstoneXMLPlugout by calling superclass' begin()344 345 # localhost doesn't work for us, but 127.0.0.1 works346 # https://metacpan.org/pod/DBD::mysql347 # "The hostname, if not specified or specified as '' or 'localhost', will default to a MySQL server348 # running on the local machine using the default for the UNIX socket. To connect to a MySQL server349 # on the local machine via TCP, you must specify the loopback IP address (127.0.0.1) as the host."350 #my $connect_str = "dbi:$db_driver:database=$db_name;host=$db_host";351 my $connect_str = "dbi:$db_driver:host=$db_host"; # don't provide db, so we can check the db is there352 my $dbh = DBI->connect("$connect_str", $db_user, $db_pwd,353 {354 ShowErrorStatement => 1, # more informative as DBI will append failed SQL stmt to error message355 PrintError => 1, # on by default, but being explicit356 RaiseError => 0, # off by default, but being explicit357 AutoCommit => 1 # on by default, but being explicit358 });359 360 if(!$dbh) {361 # NOTE, despite handle dbh being undefined, error code will be in DBI->err362 return 0;363 }364 365 # set encoding https://metacpan.org/pod/DBD::mysql366 # https://dev.mysql.com/doc/refman/5.7/en/charset.html367 # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html368 # Setting the encoding at db server level.369 # Not sure if this command is mysql specific:370 my $stmt = "set NAMES '" . $db_enc . "'";371 $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n");372 373 # if we're here, then connection succeeded, store handle374 $self->{'db_handle'} = $dbh;375 return 1;376 }377 378 sub load_db_and_tables {379 my $self= shift (@_);380 my ($db_name, $build_mode) = @_;381 my $dbh = $self->{'db_handle'};382 383 # perl DBI switch database: https://www.perlmonks.org/?node_id=995434384 # do() returns undef on error.385 # connection succeeded, try to load our database. If that didn't work, attempt to create db386 my $success = $dbh->do("use $db_name");387 388 if(!$success && $dbh->err == 1049) { # "Unknown database" error has code 1049 (mysql only?) meaning db doesn't exist yet389 # attempt to create the db and its tables390 $self->create_db($db_name) || return 0;391 392 print STDERR "@@@ CREATED DATABASE $db_name\n";393 394 # once more attempt to use db, now that it exists395 $dbh->do("use $db_name") || return 0;396 #$dbh->do("use localsite") or die "Error (code" . $dbh->err ."): " . $dbh->errstr . "\n";397 398 # attempt to create tables in current db399 $self->create_metadata_table() || return 0;400 $self->create_fulltext_table() || return 0;401 402 $success = 1;403 }404 elsif($success) { # database existed and loaded successfully, but405 # before proceeding check that the current collection's tables exist406 407 print STDERR "@@@ DATABASE $db_name EXISTED\n";408 409 410 if($build_mode eq "removeold") {411 $self->delete_collection_tables();412 }413 414 # use existing tables if any415 # attempt to create tables in current db416 if($build_mode eq "removeold" || !$self->table_exists($self->get_metadata_table_name())) {417 $self->create_metadata_table() || return 0;418 } else {419 print STDERR "@@@ Meta table exists\n";420 }421 if($build_mode eq "removeold" || !$self->table_exists($self->get_fulltext_table_name())) {422 $self->create_fulltext_table() || return 0;423 } else {424 print STDERR "@@@ Fulltxt table exists\n";425 }426 427 }428 429 return $success; # could still return 0, if database failed to load with an error code != 1049430 }431 432 # disconnect from db - https://metacpan.org/pod/DBI#disconnect433 # TODO: make sure to have committed or rolled back before disconnect434 # and that you've call finish() on statement handles if any fetch remnants remain435 sub disconnect_from_db {436 my $self= shift (@_);437 my $dbh = $self->{'db_handle'};438 439 # make sure any active stmt handles are finished440 # NO: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon."441 442 #$meta_sth = $self->{'metadata_prepared_insert_statement_handle'};443 #$txt_sth = $self->{'fulltxt_prepared_insert_statement_handle'};444 #$meta_sth->finish() if($meta_sth);445 #$txt_sth->finish() if($txt_sth);446 447 my $rc = $dbh->disconnect or warn $dbh->errstr; # The handle is of little use after disconnecting. Possibly PrintError already prints a warning and this duplicates it?448 return $rc;449 }450 451 sub create_db {452 my $self= shift (@_);453 my $db_name = $self->{'site_name'};454 my $dbh = $self->{'db_handle'};455 456 # https://stackoverflow.com/questions/5025768/how-can-i-create-a-mysql-database-from-a-perl-script457 return $dbh->do("create database $db_name"); # do() will return undef on fail, https://metacpan.org/pod/DBI#do458 }459 460 461 sub create_metadata_table {462 my $self= shift (@_);463 my $dbh = $self->{'db_handle'};464 465 my $table_name = $self->get_metadata_table_name();466 467 # If using an auto incremented primary key:468 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));";469 return $dbh->do($stmt);470 }471 472 # TODO: Investigate: https://dev.mysql.com/doc/search/?d=10&p=1&q=FULLTEXT473 # 12.9.1 Natural Language Full-Text Searches474 # to see whether we have to index the 'fulltxt' column of the 'fulltext' tables475 # or let user edit this file, or add it as another option476 sub create_fulltext_table {477 my $self= shift (@_);478 my $dbh = $self->{'db_handle'};479 480 my $table_name = $self->get_fulltext_table_name();481 482 # If using an auto incremented primary key:483 my $stmt = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));";484 return $dbh->do($stmt);485 486 }487 488 489 # USEFUL: https://metacpan.org/pod/DBI490 # "Many methods have an optional \%attr parameter which can be used to pass information to the driver implementing the method. Except where specifically documented, the \%attr parameter can only be used to pass driver specific hints. In general, you can ignore \%attr parameters or pass it as undef."491 492 493 # https://www.guru99.com/insert-into.html494 # and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html495 # for inserting multiple rows at once496 # https://www.perlmonks.org/bare/?node_id=316183497 # https://metacpan.org/pod/DBI#do498 # https://www.quora.com/What-is-the-difference-between-prepare-and-do-statements-in-Perl-while-we-make-a-connection-to-the-database-for-executing-the-query499 # https://docstore.mik.ua/orelly/linux/dbi/ch05_05.htm500 501 # https://metacpan.org/pod/DBI#performance502 # 'The q{...} style quoting used in this example avoids clashing with quotes that may be used in the SQL statement. Use the double-quote like qq{...} operator if you want to interpolate variables into the string. See "Quote and Quote-like Operators" in perlop for more details.'503 sub prepare_insert_metadata_row_stmthandle {504 my $self = shift (@_);505 #my ($did, $sid, $metaname, $metavalue) = @_;506 my $dbh = $self->{'db_handle'};507 508 my $tablename = $self->get_metadata_table_name();509 510 #my $stmt = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES ('$did', '$sid', '$metaname', '$metavalue');"; # ?, ?, ?, ?511 512 # using qq{} since we want $tablename placeholder to be filled in513 # returns Statement Handle object!514 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES (?, ?, ?, ?)}) || warn("Could not prepare insert statement for metadata table\n");515 516 print STDERR "@@@@ Prepared meta insert statement: ".$sth->{'Statement'}."\n";517 518 return $sth;519 }520 521 sub prepare_insert_fulltxt_row_stmthandle {522 my $self = shift (@_);523 #my ($did, $sid, $fulltext) = @_;524 my $dbh = $self->{'db_handle'};525 526 my $tablename = $self->get_fulltext_table_name();527 528 #my $stmt = "INSERT INTO $tablename (did, sid, fulltxt) VALUES ('$did', '$sid', '$fulltext');"; ?, ?, ?529 530 # using qq{} since we want $tablename placeholder to be filled in531 # returns Statement Handle object!532 my $sth = $dbh->prepare(qq{INSERT INTO $tablename (did, sid, fulltxt) VALUES (?, ?, ?)}) || warn("Could not prepare insert statement for fulltxt table\n");533 534 print STDERR "@@@@ Prepared fulltext insert statement: ".$sth->{'Statement'}."\n";535 536 return $sth;537 }538 539 # "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012"540 #MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax541 # https://dev.mysql.com/doc/en/drop-database.html542 sub delete_collection_tables {543 my $self= shift (@_);544 my $dbh = $self->{'db_handle'};545 546 print STDERR "### Build mode is removeold, so deleting tables for current collection\n";547 548 # drop table <tablename>549 my $table = $self->get_metadata_table_name();550 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");551 $table = $self->get_fulltext_table_name();552 $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table");553 }554 555 # Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)!556 # this is just for debugging557 sub _delete_database {558 my $self= shift (@_);559 my ($db_name) = @_;560 my $dbh = $self->{'db_handle'};561 562 # "drop database dbname"563 $dbh->do("drop database $db_name") || return 0;564 565 return 1;566 }567 568 # More basic helper methods569 sub get_metadata_table_name {570 my $self= shift (@_);571 my $table_name = $self->{'collection_name'} . "_metadata";572 return $table_name;573 }574 575 # FULLTEXT is a reserved keyword in (My)SQL. https://dev.mysql.com/doc/refman/5.5/en/keywords.html576 # So we can't name a table or any of its columns "fulltext". We use "fulltxt" instead.577 sub get_fulltext_table_name {578 my $self= shift (@_);579 my $table_name = $self->{'collection_name'} . "_fulltxt";580 return $table_name;581 }582 583 # I can get my version of table_exists to work, but it's not so ideal584 # Interesting that MySQL has non-standard command to CREATE TABLE IF NOT EXISTS and DROP TABLE IF EXISTS,585 # see https://www.perlmonks.org/bare/?node=DBI%20Recipes586 # The page further has a table_exists function that could work with proper comparison587 # Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though588 sub table_exists {589 my $self = shift (@_);590 my $dbh = $self->{'db_handle'};591 my ($table_name) = @_;592 593 my @table_list = $dbh->tables;594 #my $tables_str = @table_list[0];595 foreach my $table (@table_list) {596 return 1 if ($table =~ m/$table_name/);597 }598 return 0;599 }600 319 601 320 1;
Note:
See TracChangeset
for help on using the changeset viewer.