Changeset 32583
- Timestamp:
- 2018-11-08T17:22:04+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/gssql.pm
r32582 r32583 2 2 # 3 3 # gssql.pm -- DBI for SQL related utility functions used by 4 # GreenstoneSQLPlugout and hereafter byGreenstoneSQLPlugin too.4 # GreenstoneSQLPlugout and GreenstoneSQLPlugin too. 5 5 # A component of the Greenstone digital library software 6 6 # from the New Zealand Digital Library Project at the … … 61 61 62 62 # Parameterise (one or more methods may use them): 63 # - db_name (which is the GS3 sitename )63 # - db_name (which is the GS3 sitename, or "greenstone2" for GS2) 64 64 65 65 … … 83 83 } 84 84 } 85 86 85 87 86 die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY) … … 125 124 # "Itâs common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die." 126 125 # 127 # https://perldoc.perl.org/perlobj.html#Destructors126 # Useful: https://perldoc.perl.org/perlobj.html#Destructors 128 127 # 129 128 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect … … 145 144 if ($_dbh_instance) { # database handle still active. Use singleton handle! 146 145 147 # THIS CODE HAS MOVED TO finish_signal_handler() WHERE IT BELONGS 148 # If autocommit wasn't set, then this is a cancel operation. 149 # If we've not disconnected from the sql db yet and if we've not committed 150 # transactions yet, then cancel means we do a rollback here 151 152 # if($_dbh_instance->{AutoCommit} == 0) { 153 154 # $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do, 155 # # don't do a die() here: possibility of infinite loop and we still want to disconnect 156 # } 157 158 # Either way, we're now finally ready to disconnect as is required for premature 159 # termination too 146 # rollback code has moved to finish_signal_handler() where it belongs? 147 148 # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die() 149 # which will end up calling this DESTROY. If it was a die() that called DESTROY 150 # then need to rollback the db here. However, if it was not a die() but natural termination 151 # of the perl process, destroy() will also get called. In that case we don't want to rollback 152 # but do a commit() to the DB instead. 153 # Perhaps detecting the difference may be accomplished by checking ref_count: 154 # - If ref_count not 0 it may require a rollback? 155 # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count 156 # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't 157 # (have to) do that here. 158 159 # We're now finally ready to disconnect, as is required for both natural and premature termination 160 160 print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n"; 161 161 $_dbh_instance->disconnect or warn $_dbh_instance->errstr; … … 165 165 return; 166 166 } 167 167 168 } 168 169 … … 229 230 } 230 231 if($params_map->{'autocommit'}) { 231 print STDERR " SQL DB UNDOSUPPORT OFF.\n";232 print STDERR " SQL DB CANCEL SUPPORT OFF.\n"; 232 233 } else { 233 print STDERR " SQL DB UNDOSUPPORT ON.\n";234 print STDERR " SQL DB CANCEL SUPPORT ON.\n"; 234 235 } 235 236 } … … 326 327 327 328 # Will disconnect if this instance of gssql holds the last reference to the db connection 329 # If disconnecting and autocommit is off, then this will commit before disconnecting 328 330 sub finished { 329 my $self= shift (@_);330 331 # TODO: if AutoCommit was off, meaning transactions were on/enabled,332 # then here is where we commit our one long transaction.333 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit334 my $rc = 1;335 336 $ref_count--;337 if($ref_count == 0) {338 # Only commit transaction when we're about to disconnect, not before339 # If autocommit was on, then we'd have committed after every db operation, so nothing to do340 $rc = $self->do_commit_if_on();341 342 $self->force_disconnect_from_db();343 }344 345 return $rc;346 }347 348 sub do_commit_if_on {349 331 my $self= shift (@_); 350 332 my $dbh = $self->{'db_handle'}; … … 352 334 my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit 353 335 # (AutoCommit on or off) 354 355 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit 356 if($dbh->{AutoCommit} == 0) { 357 print STDERR " Committing transaction to SQL database now.\n" if $self->{'verbosity'}; 358 $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem 359 # worth embellishing error message 360 } 361 # If autocommit was on, then we'd have committed after every db operation, so nothing to do 336 337 $ref_count--; 338 if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before 339 340 # TODO: If AutoCommit was off, meaning transactions were on/enabled, 341 # then here is where we commit our one long transaction. 342 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit 343 if($dbh->{AutoCommit} == 0) { 344 print STDERR " Committing transaction to SQL database now.\n" if $self->{'verbosity'}; 345 $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem 346 # worth embellishing error message 347 } 348 # else if autocommit was on, then we'd have committed after every db operation, so nothing to do 349 350 $self->_force_disconnect_from_db(); 351 } 362 352 363 353 return $rc; 364 354 } 355 365 356 366 357 # Call this method on die(), so that you're sure the perl process has disconnected from SQL db … … 368 359 # TODO: make sure to have committed or rolled back before disconnect 369 360 # and that you've call finish() on statement handles if any fetch remnants remain 370 sub force_disconnect_from_db {361 sub _force_disconnect_from_db { 371 362 my $self= shift (@_); 372 363 -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32582 r32583 50 50 51 51 # DONE: 52 # + TODO: For on cancel, add a SIGTERM handler or so to call end() 53 # or to explicitly call gs_sql->close_connection if $gs_sql def 54 # 52 55 # + TODO: Incremental delete can't work until GSSQLPlugout has implemented build_mode = incremental 53 56 # (instead of tossing away db on every build) … … 126 129 127 130 return q^(?i)docsql(-\d+)?\.xml$^; # regex based on this method in GreenstoneXMLPlugin 128 #return q^(?i)docsql(-.+)?\.xml$^; # no longer storing the OID embedded in docsql .xml filename129 131 } 130 132 … … 191 193 192 194 193 # TODO: For on cancel, add a SIGTERM handler or so to call end() 194 # or to explicitly call gs_sql->close_connection if $gs_sql def 195 ###### Methods called during buildcol and import ####### 195 196 196 197 sub new { … … 217 218 } 218 219 219 ###### Called during import.pl 220 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 221 # This means it connects and deconnects during import.pl as well. This is okay 222 # as removeold, which should drop the collection tables, happens during the import phase, 223 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 224 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 225 # and Plugout would be using one connection during import.pl phase when both plugs exist. 226 227 # Call init() not begin() because there can be multiple plugin passes and begin() called for 228 # each pass (one for doc level and another for section level indexing), whereas init() should 229 # be called before any and all passes. 230 # This way, we can connect to the SQL database once per buildcol run. 231 sub init { 232 my ($self) = shift (@_); 233 ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 234 235 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 236 237 #################### 238 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 239 # print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 240 241 # print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 242 # print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 243 # print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 244 # print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 245 #################### 246 247 # create gssql object. 248 # collection name will be used for naming tables (site name will be used for naming database) 249 my $gs_sql = new gssql({ 250 'collection_name' => $ENV{'GSDLCOLLECTION'}, 251 'verbosity' => $self->{'verbosity'} || 0 252 }); 253 254 # if autocommit is set, there's no rollback support 255 my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 256 257 # try connecting to the mysql db, die if that fails 258 if(!$gs_sql->connect_to_db({ 259 'db_driver' => $self->{'db_driver'}, 260 'db_client_user' => $self->{'db_client_user'}, 261 'db_client_pwd' => $self->{'db_client_pwd'}, 262 'db_host' => $self->{'db_host'}, 263 'autocommit' => $autocommit 264 }) 265 ) 266 { 267 # This is fatal for the plugout, let's terminate here 268 # PrintError would already have displayed the warning message on connection fail 269 die("Could not connect to db. Can't proceed.\n"); 270 } 271 272 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 273 274 # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 275 # Bail if we can't use the database 276 if(!$gs_sql->use_db($db_name)) { 277 278 # This is fatal for the plugout, let's terminate here after disconnecting again 279 # PrintError would already have displayed the warning message on load fail 280 # And on die() perl will call gssql destroy which will ensure a disconnect() from db 281 #$gs_sql->force_disconnect_from_db(); 282 die("Could not use db $db_name. Can't proceed.\n"); 283 } 284 285 286 # store db handle now that we're connected 287 $self->{'gs_sql'} = $gs_sql; 288 } 289 290 291 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 292 # Call deinit() not end() because there can be multiple plugin passes: 293 # one for doc level and another for section level indexing 294 # and deinit() should be called before all passes 295 # This way, we can close the SQL database once per buildcol run. 296 sub deinit { 297 my ($self) = shift (@_); 298 299 ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 300 301 if($self->{'gs_sql'}) { 302 303 # Important to call finished(): 304 # it will disconnect from db if this is the last gssql instance, 305 # and it will commit to db before disconnecting if rollbback_on_cancel turned on 306 $self->{'gs_sql'}->finished(); 307 308 # Clear gs_sql (setting key to undef has a different meaning from deleting: 309 # undef makes key still exist but its value is unded whereas delete deletes the key) 310 # So all future use has to make the connection again 311 delete $self->{'gs_sql'}; 312 } 313 314 $self->SUPER::deinit(@_); 315 } 316 317 318 319 ###### Methods only called during import.pl ##### 220 320 221 321 # This is called once if removeold is set with import.pl. Most plugins will do … … 246 346 } 247 347 248 # UNNECESSARY249 # The removeold related DB transaction (deleting collection tables) is complete250 # Don't let GS SQL PlugIN interfere with GS SQL PlugOUT's database transactions251 # during import.pl hereafter. Finish up.252 #$gs_sql->do_commit_if_on();253 348 } 254 349 … … 294 389 } 295 390 296 #### Called during buildcol391 ##### Methods called only during buildcol ##### 297 392 298 393 sub xml_start_tag { … … 417 512 } 418 513 419 #### Called during buildcol and import 420 421 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 422 # This means it connects and deconnects during import.pl as well. This is okay 423 # as removeold, which should drop the collection tables, happens during the import phase, 424 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 425 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 426 # and Plugout would be using one connection during import.pl phase when both plugs exist. 427 428 # Call init() not begin() because there can be multiple plugin passes and begin() called for 429 # each pass (one for doc level and another for section level indexing), whereas init() should 430 # be called before any and all passes. 431 # This way, we can connect to the SQL database once per buildcol run. 432 sub init { 433 my ($self) = shift (@_); 434 ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 435 436 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 437 438 #################### 439 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 440 # print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 441 442 # print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 443 # print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 444 # print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 445 # print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 446 #################### 447 448 # create gssql object. 449 # collection name will be used for naming tables (site name will be used for naming database) 450 my $gs_sql = new gssql({ 451 'collection_name' => $ENV{'GSDLCOLLECTION'}, 452 'verbosity' => $self->{'verbosity'} || 0 453 }); 454 455 # if autocommit is set, there's no rollback support 456 my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 457 458 # try connecting to the mysql db, die if that fails 459 if(!$gs_sql->connect_to_db({ 460 'db_driver' => $self->{'db_driver'}, 461 'db_client_user' => $self->{'db_client_user'}, 462 'db_client_pwd' => $self->{'db_client_pwd'}, 463 'db_host' => $self->{'db_host'}, 464 'autocommit' => $autocommit 465 }) 466 ) 467 { 468 # This is fatal for the plugout, let's terminate here 469 # PrintError would already have displayed the warning message on connection fail 470 die("Could not connect to db. Can't proceed.\n"); 471 } 472 473 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 474 475 # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 476 # Bail if we can't use the database 477 if(!$gs_sql->use_db($db_name)) { 478 479 # This is fatal for the plugout, let's terminate here after disconnecting again 480 # PrintError would already have displayed the warning message on load fail 481 $gs_sql->force_disconnect_from_db(); 482 die("Could not use db $db_name. Can't proceed.\n"); 483 } 484 485 486 # store db handle now that we're connected 487 $self->{'gs_sql'} = $gs_sql; 488 } 489 490 491 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 492 # Call deinit() not end() because there can be multiple plugin passes: 493 # one for doc level and another for section level indexing 494 # and deinit() should be called before all passes 495 # This way, we can close the SQL database once per buildcol run. 496 sub deinit { 497 my ($self) = shift (@_); 498 499 ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 500 501 if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 502 # a value except during buildcol, so when processor =~ m/buildproc$/. 503 $self->{'gs_sql'}->finished(); 504 505 # Clear gs_sql (setting key to undef has a different meaning from deleting: 506 # undef makes key still exist but its value is unded whereas delete deletes the key) 507 # So all future use has to make the connection again 508 delete $self->{'gs_sql'}; 509 } 510 511 $self->SUPER::deinit(@_); 512 } 513 514 515 516 514 515 1; -
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32582 r32583 39 39 40 40 41 # This entire class is called only during import.pl 42 43 # TODO: SIGTERM rollback and disconnect? 44 # TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 41 # + TODO: SIGTERM rollback and disconnect? 42 # + TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 45 43 # TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 46 44 # Did I do the pass by ref in docprint's escape and unescape textref functions correctly, and how they're called here? … … 121 119 'args' => $arguments }; 122 120 121 ##### This entire class is called only during import.pl ##### 122 123 ##### Overridden methods ##### 124 123 125 sub new { 124 126 my ($class) = shift (@_); … … 205 207 # This is fatal for the plugout, let's terminate here after disconnecting again 206 208 # PrintError would already have displayed the warning message on load fail 207 $gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error 209 # And on die() perl will call gssql destroy which will ensure a disconnect() from db 210 #$gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error 208 211 die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n"); 209 212 } … … 227 230 # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup 228 231 $self->SUPER::end(@_); 229 230 $self->{'gs_sql'}->finished(); # will disconnect from db if last instance 232 233 # Important to call finished(): 234 # it will disconnect from db if this is the last gssql instance, 235 # and it will commit to db before disconnecting if rollbback_on_cancel turned on 236 $self->{'gs_sql'}->finished(); 231 237 delete $self->{'gs_sql'}; # key gs_sql no longer exists, not just the value being undef 232 238 } … … 295 301 296 302 297 # 3. post save out 298 #$self->SUPER::post_saveas(@_); 303 # 3. post save out 299 304 $self->SUPER::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file); 300 305 … … 304 309 } 305 310 311 ##### New methods, not inherited ##### 306 312 307 313 # write meta and/or text PER DOC out to DB
Note:
See TracChangeset
for help on using the changeset viewer.