- Timestamp:
- 2018-11-08T17:22:04+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32582 r32583 50 50 51 51 # DONE: 52 # + TODO: For on cancel, add a SIGTERM handler or so to call end() 53 # or to explicitly call gs_sql->close_connection if $gs_sql def 54 # 52 55 # + TODO: Incremental delete can't work until GSSQLPlugout has implemented build_mode = incremental 53 56 # (instead of tossing away db on every build) … … 126 129 127 130 return q^(?i)docsql(-\d+)?\.xml$^; # regex based on this method in GreenstoneXMLPlugin 128 #return q^(?i)docsql(-.+)?\.xml$^; # no longer storing the OID embedded in docsql .xml filename129 131 } 130 132 … … 191 193 192 194 193 # TODO: For on cancel, add a SIGTERM handler or so to call end() 194 # or to explicitly call gs_sql->close_connection if $gs_sql def 195 ###### Methods called during buildcol and import ####### 195 196 196 197 sub new { … … 217 218 } 218 219 219 ###### Called during import.pl 220 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 221 # This means it connects and deconnects during import.pl as well. This is okay 222 # as removeold, which should drop the collection tables, happens during the import phase, 223 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 224 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 225 # and Plugout would be using one connection during import.pl phase when both plugs exist. 226 227 # Call init() not begin() because there can be multiple plugin passes and begin() called for 228 # each pass (one for doc level and another for section level indexing), whereas init() should 229 # be called before any and all passes. 230 # This way, we can connect to the SQL database once per buildcol run. 231 sub init { 232 my ($self) = shift (@_); 233 ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 234 235 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 236 237 #################### 238 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 239 # print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 240 241 # print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 242 # print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 243 # print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 244 # print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 245 #################### 246 247 # create gssql object. 248 # collection name will be used for naming tables (site name will be used for naming database) 249 my $gs_sql = new gssql({ 250 'collection_name' => $ENV{'GSDLCOLLECTION'}, 251 'verbosity' => $self->{'verbosity'} || 0 252 }); 253 254 # if autocommit is set, there's no rollback support 255 my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 256 257 # try connecting to the mysql db, die if that fails 258 if(!$gs_sql->connect_to_db({ 259 'db_driver' => $self->{'db_driver'}, 260 'db_client_user' => $self->{'db_client_user'}, 261 'db_client_pwd' => $self->{'db_client_pwd'}, 262 'db_host' => $self->{'db_host'}, 263 'autocommit' => $autocommit 264 }) 265 ) 266 { 267 # This is fatal for the plugout, let's terminate here 268 # PrintError would already have displayed the warning message on connection fail 269 die("Could not connect to db. Can't proceed.\n"); 270 } 271 272 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 273 274 # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 275 # Bail if we can't use the database 276 if(!$gs_sql->use_db($db_name)) { 277 278 # This is fatal for the plugout, let's terminate here after disconnecting again 279 # PrintError would already have displayed the warning message on load fail 280 # And on die() perl will call gssql destroy which will ensure a disconnect() from db 281 #$gs_sql->force_disconnect_from_db(); 282 die("Could not use db $db_name. Can't proceed.\n"); 283 } 284 285 286 # store db handle now that we're connected 287 $self->{'gs_sql'} = $gs_sql; 288 } 289 290 291 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 292 # Call deinit() not end() because there can be multiple plugin passes: 293 # one for doc level and another for section level indexing 294 # and deinit() should be called before all passes 295 # This way, we can close the SQL database once per buildcol run. 296 sub deinit { 297 my ($self) = shift (@_); 298 299 ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 300 301 if($self->{'gs_sql'}) { 302 303 # Important to call finished(): 304 # it will disconnect from db if this is the last gssql instance, 305 # and it will commit to db before disconnecting if rollbback_on_cancel turned on 306 $self->{'gs_sql'}->finished(); 307 308 # Clear gs_sql (setting key to undef has a different meaning from deleting: 309 # undef makes key still exist but its value is unded whereas delete deletes the key) 310 # So all future use has to make the connection again 311 delete $self->{'gs_sql'}; 312 } 313 314 $self->SUPER::deinit(@_); 315 } 316 317 318 319 ###### Methods only called during import.pl ##### 220 320 221 321 # This is called once if removeold is set with import.pl. Most plugins will do … … 246 346 } 247 347 248 # UNNECESSARY249 # The removeold related DB transaction (deleting collection tables) is complete250 # Don't let GS SQL PlugIN interfere with GS SQL PlugOUT's database transactions251 # during import.pl hereafter. Finish up.252 #$gs_sql->do_commit_if_on();253 348 } 254 349 … … 294 389 } 295 390 296 #### Called during buildcol391 ##### Methods called only during buildcol ##### 297 392 298 393 sub xml_start_tag { … … 417 512 } 418 513 419 #### Called during buildcol and import 420 421 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 422 # This means it connects and deconnects during import.pl as well. This is okay 423 # as removeold, which should drop the collection tables, happens during the import phase, 424 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 425 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 426 # and Plugout would be using one connection during import.pl phase when both plugs exist. 427 428 # Call init() not begin() because there can be multiple plugin passes and begin() called for 429 # each pass (one for doc level and another for section level indexing), whereas init() should 430 # be called before any and all passes. 431 # This way, we can connect to the SQL database once per buildcol run. 432 sub init { 433 my ($self) = shift (@_); 434 ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 435 436 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 437 438 #################### 439 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 440 # print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 441 442 # print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 443 # print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 444 # print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 445 # print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 446 #################### 447 448 # create gssql object. 449 # collection name will be used for naming tables (site name will be used for naming database) 450 my $gs_sql = new gssql({ 451 'collection_name' => $ENV{'GSDLCOLLECTION'}, 452 'verbosity' => $self->{'verbosity'} || 0 453 }); 454 455 # if autocommit is set, there's no rollback support 456 my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 457 458 # try connecting to the mysql db, die if that fails 459 if(!$gs_sql->connect_to_db({ 460 'db_driver' => $self->{'db_driver'}, 461 'db_client_user' => $self->{'db_client_user'}, 462 'db_client_pwd' => $self->{'db_client_pwd'}, 463 'db_host' => $self->{'db_host'}, 464 'autocommit' => $autocommit 465 }) 466 ) 467 { 468 # This is fatal for the plugout, let's terminate here 469 # PrintError would already have displayed the warning message on connection fail 470 die("Could not connect to db. Can't proceed.\n"); 471 } 472 473 my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 474 475 # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 476 # Bail if we can't use the database 477 if(!$gs_sql->use_db($db_name)) { 478 479 # This is fatal for the plugout, let's terminate here after disconnecting again 480 # PrintError would already have displayed the warning message on load fail 481 $gs_sql->force_disconnect_from_db(); 482 die("Could not use db $db_name. Can't proceed.\n"); 483 } 484 485 486 # store db handle now that we're connected 487 $self->{'gs_sql'} = $gs_sql; 488 } 489 490 491 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 492 # Call deinit() not end() because there can be multiple plugin passes: 493 # one for doc level and another for section level indexing 494 # and deinit() should be called before all passes 495 # This way, we can close the SQL database once per buildcol run. 496 sub deinit { 497 my ($self) = shift (@_); 498 499 ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 500 501 if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 502 # a value except during buildcol, so when processor =~ m/buildproc$/. 503 $self->{'gs_sql'}->finished(); 504 505 # Clear gs_sql (setting key to undef has a different meaning from deleting: 506 # undef makes key still exist but its value is unded whereas delete deletes the key) 507 # So all future use has to make the connection again 508 delete $self->{'gs_sql'}; 509 } 510 511 $self->SUPER::deinit(@_); 512 } 513 514 515 516 514 515 1;
Note:
See TracChangeset
for help on using the changeset viewer.