Changeset 32583

Show
Ignore:
Timestamp:
08.11.2018 17:22:04 (12 days ago)
Author:
ak19
Message:

1. Some tidying up of the code. 2. Removing unnecessary calls to force_disconnect_from_db (now made 'private') after die() since on die() perl calls object destructor which will ensure disconnection anyway.

Location:
main/trunk/greenstone2/perllib
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32582 r32583  
    22# 
    33# gssql.pm -- DBI for SQL related utility functions used by 
    4 # GreenstoneSQLPlugout and hereafter by GreenstoneSQLPlugin too. 
     4# GreenstoneSQLPlugout and GreenstoneSQLPlugin too. 
    55# A component of the Greenstone digital library software 
    66# from the New Zealand Digital Library Project at the  
     
    6161 
    6262# Parameterise (one or more methods may use them): 
    63 # - db_name (which is the GS3 sitename) 
     63# - db_name (which is the GS3 sitename, or "greenstone2" for GS2) 
    6464 
    6565 
     
    8383    } 
    8484    } 
    85  
    8685     
    8786    die "Caught a $sig signal $!"; # die() will always call destructor (sub DESTROY) 
     
    125124# "It’s common to call die when handling SIGINT and SIGTERM. die is useful because it will ensure that Perl stops correctly: for example Perl will execute a destructor method if present when die is called, but the destructor method will not be called if a SIGINT or SIGTERM is received and no signal handler calls die." 
    126125# 
    127 # https://perldoc.perl.org/perlobj.html#Destructors 
     126# Useful: https://perldoc.perl.org/perlobj.html#Destructors 
    128127# 
    129128# https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect 
     
    145144    if ($_dbh_instance) { # database handle still active. Use singleton handle! 
    146145 
    147         # THIS CODE HAS MOVED TO finish_signal_handler() WHERE IT BELONGS 
    148         # If autocommit wasn't set, then this is a cancel operation. 
    149         # If we've not disconnected from the sql db yet and if we've not committed 
    150         # transactions yet, then cancel means we do a rollback here 
    151  
    152         # if($_dbh_instance->{AutoCommit} == 0) { 
    153          
    154         #   $_dbh_instance->rollback(); # will warn on failure, nothing more we can/want to do, 
    155             #     # don't do a die() here: possibility of infinite loop and we still want to disconnect 
    156         # } 
    157  
    158         # Either way, we're now finally ready to disconnect as is required for premature 
    159         # termination too 
     146        # rollback code has moved to finish_signal_handler() where it belongs? 
     147         
     148        # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die() 
     149        # which will end up calling this DESTROY. If it was a die() that called DESTROY 
     150        # then need to rollback the db here. However, if it was not a die() but natural termination 
     151        # of the perl process, destroy() will also get called. In that case we don't want to rollback 
     152        # but do a commit() to the DB instead. 
     153        # Perhaps detecting the difference may be accomplished by checking ref_count: 
     154        # - If ref_count not 0 it may require a rollback? 
     155        # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count 
     156        # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't 
     157        # (have to) do that here. 
     158         
     159        # We're now finally ready to disconnect, as is required for both natural and premature termination 
    160160        print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n"; 
    161161        $_dbh_instance->disconnect or warn $_dbh_instance->errstr; 
     
    165165    return; 
    166166    } 
     167 
    167168} 
    168169 
     
    229230    } 
    230231    if($params_map->{'autocommit'}) { 
    231         print STDERR "   SQL DB UNDO SUPPORT OFF.\n"; 
     232        print STDERR "   SQL DB CANCEL SUPPORT OFF.\n"; 
    232233    } else { 
    233         print STDERR "   SQL DB UNDO SUPPORT ON.\n"; 
     234        print STDERR "   SQL DB CANCEL SUPPORT ON.\n"; 
    234235    } 
    235236    } 
     
    326327 
    327328# Will disconnect if this instance of gssql holds the last reference to the db connection 
     329# If disconnecting and autocommit is off, then this will commit before disconnecting 
    328330sub finished { 
    329     my $self= shift (@_); 
    330  
    331     # TODO: if AutoCommit was off, meaning transactions were on/enabled, 
    332     # then here is where we commit our one long transaction. 
    333     # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit 
    334     my $rc = 1;     
    335      
    336     $ref_count--; 
    337     if($ref_count == 0) { 
    338     # Only commit transaction when we're about to disconnect, not before 
    339     # If autocommit was on, then we'd have committed after every db operation, so nothing to do 
    340     $rc = $self->do_commit_if_on(); 
    341      
    342     $self->force_disconnect_from_db(); 
    343     } 
    344  
    345     return $rc; 
    346 } 
    347  
    348 sub do_commit_if_on { 
    349331    my $self= shift (@_); 
    350332    my $dbh = $self->{'db_handle'}; 
     
    352334    my $rc = 1; # return code: everything went fine, regardless of whether we needed to commit 
    353335                # (AutoCommit on or off) 
    354  
    355     # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit 
    356     if($dbh->{AutoCommit} == 0) { 
    357     print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'}; 
    358     $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem 
    359                                                             # worth embellishing error message 
    360     } 
    361     # If autocommit was on, then we'd have committed after every db operation, so nothing to do 
     336     
     337    $ref_count--; 
     338    if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before 
     339     
     340    # TODO: If AutoCommit was off, meaning transactions were on/enabled, 
     341    # then here is where we commit our one long transaction. 
     342    # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit 
     343    if($dbh->{AutoCommit} == 0) { 
     344        print STDERR "   Committing transaction to SQL database now.\n" if $self->{'verbosity'}; 
     345        $rc = $dbh->commit() or warn("SQL DB COMMIT FAILED: " . $dbh->errstr); # important problem 
     346        # worth embellishing error message 
     347    } 
     348    # else if autocommit was on, then we'd have committed after every db operation, so nothing to do 
     349     
     350    $self->_force_disconnect_from_db(); 
     351    } 
    362352 
    363353    return $rc; 
    364354} 
     355 
    365356 
    366357# Call this method on die(), so that you're sure the perl process has disconnected from SQL db 
     
    368359# TODO: make sure to have committed or rolled back before disconnect 
    369360# and that you've call finish() on statement handles if any fetch remnants remain 
    370 sub force_disconnect_from_db { 
     361sub _force_disconnect_from_db { 
    371362    my $self= shift (@_); 
    372363 
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32582 r32583  
    5050 
    5151# DONE: 
     52# + TODO: For on cancel, add a SIGTERM handler or so to call end() 
     53# or to explicitly call gs_sql->close_connection if $gs_sql def 
     54# 
    5255# + TODO: Incremental delete can't work until GSSQLPlugout has implemented build_mode = incremental 
    5356# (instead of tossing away db on every build) 
     
    126129 
    127130    return q^(?i)docsql(-\d+)?\.xml$^; # regex based on this method in GreenstoneXMLPlugin 
    128     #return q^(?i)docsql(-.+)?\.xml$^; # no longer storing the OID embedded in docsql .xml filename 
    129131} 
    130132 
     
    191193 
    192194 
    193 # TODO: For on cancel, add a SIGTERM handler or so to call end() 
    194 # or to explicitly call gs_sql->close_connection if $gs_sql def 
     195###### Methods called during buildcol and import ####### 
    195196 
    196197sub new { 
     
    217218} 
    218219 
    219 ###### Called during import.pl 
     220# GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 
     221# This means it connects and deconnects during import.pl as well. This is okay 
     222# as removeold, which should drop the collection tables, happens during the import phase, 
     223# calling GreenstoneSQLPlugin::and therefore also requires a db connection. 
     224# TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 
     225# and Plugout would be using one connection during import.pl phase when both plugs exist. 
     226 
     227# Call init() not begin() because there can be multiple plugin passes and begin() called for 
     228# each pass (one for doc level and another for section level indexing), whereas init() should 
     229# be called before any and all passes. 
     230# This way, we can connect to the SQL database once per buildcol run. 
     231sub init { 
     232    my ($self) = shift (@_); 
     233    ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 
     234     
     235    $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 
     236 
     237    #################### 
     238#    print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 
     239#    print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 
     240 
     241#    print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 
     242#    print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 
     243#    print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 
     244#    print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 
     245    #################### 
     246 
     247    # create gssql object. 
     248    # collection name will be used for naming tables (site name will be used for naming database) 
     249    my $gs_sql = new gssql({ 
     250    'collection_name' => $ENV{'GSDLCOLLECTION'}, 
     251    'verbosity' => $self->{'verbosity'} || 0 
     252               }); 
     253     
     254    # if autocommit is set, there's no rollback support 
     255    my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 
     256 
     257    # try connecting to the mysql db, die if that fails 
     258    if(!$gs_sql->connect_to_db({ 
     259    'db_driver' => $self->{'db_driver'}, 
     260    'db_client_user' => $self->{'db_client_user'}, 
     261    'db_client_pwd' => $self->{'db_client_pwd'}, 
     262    'db_host' => $self->{'db_host'}, 
     263    'autocommit' => $autocommit 
     264                   }) 
     265    ) 
     266    { 
     267    # This is fatal for the plugout, let's terminate here 
     268    # PrintError would already have displayed the warning message on connection fail     
     269    die("Could not connect to db. Can't proceed.\n"); 
     270    } 
     271     
     272    my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 
     273 
     274    # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 
     275    # Bail if we can't use the database 
     276    if(!$gs_sql->use_db($db_name)) { 
     277     
     278    # This is fatal for the plugout, let's terminate here after disconnecting again 
     279    # PrintError would already have displayed the warning message on load fail 
     280    # And on die() perl will call gssql destroy which will ensure a disconnect() from db 
     281    #$gs_sql->force_disconnect_from_db(); 
     282    die("Could not use db $db_name. Can't proceed.\n"); 
     283    } 
     284     
     285     
     286    # store db handle now that we're connected 
     287    $self->{'gs_sql'} = $gs_sql;     
     288} 
     289 
     290 
     291# This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 
     292# Call deinit() not end() because there can be multiple plugin passes: 
     293# one for doc level and another for section level indexing 
     294# and deinit() should be called before all passes 
     295# This way, we can close the SQL database once per buildcol run. 
     296sub deinit { 
     297    my ($self) = shift (@_); 
     298     
     299    ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 
     300     
     301    if($self->{'gs_sql'}) { 
     302 
     303    # Important to call finished(): 
     304    # it will disconnect from db if this is the last gssql instance, 
     305    # and it will commit to db before disconnecting if rollbback_on_cancel turned on 
     306    $self->{'gs_sql'}->finished(); 
     307 
     308    # Clear gs_sql (setting key to undef has a different meaning from deleting: 
     309    # undef makes key still exist but its value is unded whereas delete deletes the key) 
     310    # So all future use has to make the connection again 
     311    delete $self->{'gs_sql'}; 
     312    } 
     313 
     314    $self->SUPER::deinit(@_); 
     315} 
     316 
     317 
     318 
     319###### Methods only called during import.pl ##### 
    220320 
    221321# This is called once if removeold is set with import.pl. Most plugins will do 
     
    246346    } 
    247347 
    248     # UNNECESSARY 
    249     # The removeold related DB transaction (deleting collection tables) is complete 
    250     # Don't let GS SQL PlugIN interfere with GS SQL PlugOUT's database transactions 
    251     # during import.pl hereafter. Finish up. 
    252     #$gs_sql->do_commit_if_on(); 
    253348} 
    254349 
     
    294389} 
    295390 
    296 #### Called during buildcol 
     391##### Methods called only during buildcol ##### 
    297392 
    298393sub xml_start_tag { 
     
    417512} 
    418513 
    419 #### Called during buildcol and import 
    420  
    421 # GS SQL Plugin::init() (and deinit()) is called by import.pl and also by buildcol.pl 
    422 # This means it connects and deconnects during import.pl as well. This is okay 
    423 # as removeold, which should drop the collection tables, happens during the import phase, 
    424 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 
    425 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 
    426 # and Plugout would be using one connection during import.pl phase when both plugs exist. 
    427  
    428 # Call init() not begin() because there can be multiple plugin passes and begin() called for 
    429 # each pass (one for doc level and another for section level indexing), whereas init() should 
    430 # be called before any and all passes. 
    431 # This way, we can connect to the SQL database once per buildcol run. 
    432 sub init { 
    433     my ($self) = shift (@_); 
    434     ##print STDERR "@@@@@@@@@@ INIT CALLED\n"; 
    435      
    436     $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 
    437  
    438     #################### 
    439 #    print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 
    440 #    print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 
    441  
    442 #    print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 
    443 #    print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 
    444 #    print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 
    445 #    print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 
    446     #################### 
    447  
    448     # create gssql object. 
    449     # collection name will be used for naming tables (site name will be used for naming database) 
    450     my $gs_sql = new gssql({ 
    451     'collection_name' => $ENV{'GSDLCOLLECTION'}, 
    452     'verbosity' => $self->{'verbosity'} || 0 
    453                }); 
    454      
    455     # if autocommit is set, there's no rollback support 
    456     my $autocommit = ($self->{'rollback_on_cancel'} eq "false") ? 1 : 0; 
    457  
    458     # try connecting to the mysql db, die if that fails 
    459     if(!$gs_sql->connect_to_db({ 
    460     'db_driver' => $self->{'db_driver'}, 
    461     'db_client_user' => $self->{'db_client_user'}, 
    462     'db_client_pwd' => $self->{'db_client_pwd'}, 
    463     'db_host' => $self->{'db_host'}, 
    464     'autocommit' => $autocommit 
    465                    }) 
    466     ) 
    467     { 
    468     # This is fatal for the plugout, let's terminate here 
    469     # PrintError would already have displayed the warning message on connection fail     
    470     die("Could not connect to db. Can't proceed.\n"); 
    471     } 
    472      
    473     my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 
    474  
    475     # Attempt to use the db, create it if it doesn't exist (but don't create the tables yet) 
    476     # Bail if we can't use the database 
    477     if(!$gs_sql->use_db($db_name)) { 
    478      
    479     # This is fatal for the plugout, let's terminate here after disconnecting again 
    480     # PrintError would already have displayed the warning message on load fail 
    481     $gs_sql->force_disconnect_from_db(); 
    482     die("Could not use db $db_name. Can't proceed.\n"); 
    483     } 
    484      
    485      
    486     # store db handle now that we're connected 
    487     $self->{'gs_sql'} = $gs_sql;     
    488 } 
    489  
    490  
    491 # This method also runs on import.pl if gs_sql has a value. But we just want to run it on buildcol 
    492 # Call deinit() not end() because there can be multiple plugin passes: 
    493 # one for doc level and another for section level indexing 
    494 # and deinit() should be called before all passes 
    495 # This way, we can close the SQL database once per buildcol run. 
    496 sub deinit { 
    497     my ($self) = shift (@_); 
    498      
    499     ##print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 
    500      
    501     if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 
    502     # a value except during buildcol, so when processor =~ m/buildproc$/. 
    503     $self->{'gs_sql'}->finished(); 
    504  
    505     # Clear gs_sql (setting key to undef has a different meaning from deleting: 
    506     # undef makes key still exist but its value is unded whereas delete deletes the key) 
    507     # So all future use has to make the connection again 
    508     delete $self->{'gs_sql'}; 
    509     } 
    510  
    511     $self->SUPER::deinit(@_); 
    512 } 
    513  
    514  
    515  
    516  
     514 
     5151; 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32582 r32583  
    3939 
    4040 
    41 # This entire class is called only during import.pl 
    42  
    43 # TODO: SIGTERM rollback and disconnect? 
    44 # TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 
     41# + TODO: SIGTERM rollback and disconnect? 
     42# + TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 
    4543# TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 
    4644# Did I do the pass by ref in docprint's escape and unescape textref functions correctly, and how they're called here? 
     
    121119        'args'     => $arguments }; 
    122120 
     121##### This entire class is called only during import.pl ##### 
     122 
     123##### Overridden methods ##### 
     124 
    123125sub new { 
    124126    my ($class) = shift (@_); 
     
    205207    # This is fatal for the plugout, let's terminate here after disconnecting again 
    206208    # PrintError would already have displayed the warning message on load fail 
    207     $gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error 
     209    # And on die() perl will call gssql destroy which will ensure a disconnect() from db 
     210    #$gs_sql->force_disconnect_from_db(); # disconnect_from_db() will issue a warning on error 
    208211    die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n"); 
    209212    } 
     
    227230    # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup 
    228231    $self->SUPER::end(@_);     
    229      
    230     $self->{'gs_sql'}->finished(); # will disconnect from db if last instance 
     232 
     233    # Important to call finished(): 
     234    # it will disconnect from db if this is the last gssql instance, 
     235    # and it will commit to db before disconnecting if rollbback_on_cancel turned on 
     236    $self->{'gs_sql'}->finished(); 
    231237    delete $self->{'gs_sql'}; # key gs_sql no longer exists, not just the value being undef 
    232238} 
     
    295301 
    296302     
    297     # 3. post save out 
    298     #$self->SUPER::post_saveas(@_); 
     303    # 3. post save out  
    299304    $self->SUPER::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file); 
    300305     
     
    304309} 
    305310 
     311##### New methods, not inherited  ##### 
    306312 
    307313# write meta and/or text PER DOC out to DB