Changeset 32521

Show
Ignore:
Timestamp:
18.10.2018 20:25:28 (5 weeks ago)
Author:
ak19
Message:

1. Calling superclass GreenstoneXMLPlugout's begin(), end(), close_group_output(), pre_saveas() and post_saveas() only if MySQLPlugout's process mode is not set to output ALL (meta and text) to sql db. 2. Finally removed the open2 versions of the database functions.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugouts/MySQLPlugout.pm

    r32520 r32521  
    3232use util; 
    3333use FileUtils; 
    34 #use BasePlugout; 
    3534use GreenstoneXMLPlugout; 
    3635use docprint; 
     
    3837use IPC::Open2; 
    3938use POSIX ":sys_wait_h"; # for waitpid, http://perldoc.perl.org/functions/waitpid.html 
     39 
     40# TODO: SIGTERM rollback and disconnect? 
     41 
    4042 
    4143# this plugout does not output xml to a file, but outputs rows into a mysql table 
     
    100102    $self->{'db_driver'} = "mysql"; 
    101103    $self->{'site_name'} = "localsite";     
    102     $self->{'client_user'} = "root"; 
    103     $self->{'client_pwd'} = "6reenstone3"; 
     104    $self->{'db_client_user'} = "root"; 
     105    $self->{'db_client_pwd'} = "6reenstone3"; 
    104106    #$self->{'db_host'} = "127.0.0.1"; 
    105107    #$self->{'db_encoding'} = "utf8"; 
     108    #TODO: proc_mode is also a saveas option 
    106109     
    107110    ############ LOAD NECESSARY OPTIONS ########### 
     
    121124 
    122125    # prepare the shared/common HANDLES to SQL insert statements that contain placeholders 
    123     # and which we will reuse repeatedly when executing the actual insert statements 
     126    # and which we will reuse repeatedly when actually executing the insert statements 
    124127    my $proc_mode = $self->{'process_mode'}; 
    125128    if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { 
     
    129132    $self->{'fulltxt_prepared_insert_statement_handle'} = $self->prepare_insert_fulltxt_row_stmthandle(); 
    130133    } 
    131      
    132     # finally, call begin on super 
    133     $self->GreenstoneXMLPlugout::begin(@_);     
     134 
     135    # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here: 
     136    # won't bother preparing GreenstoneXMLPlugout by calling superclass' begin() 
     137    # finally, can call begin on super - important as doc.xml is opened as a group etc 
     138 
     139    if($proc_mode ne "all") { # TODO Q 
     140    $self->GreenstoneXMLPlugout::begin(@_); 
     141    } 
    134142} 
    135143 
     
    139147    my $self = shift(@_); 
    140148 
    141     # do the superclass stuff 
    142     $self->GreenstoneXMLPlugout::end(@_); 
    143     $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 
    144     #my $success = $self->GreenstoneXMLPlugout::end(@_); 
    145     #$success = $self->disconnect_from_db() && $success;     
    146     #return $success; 
    147 } 
    148      
     149    # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup 
     150     
     151    if($self->{'process_mode'} ne "all") {# TODO Q 
     152    $self->GreenstoneXMLPlugout::end(@_); 
     153    } 
     154     
     155    $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 
     156} 
     157  
     158sub close_group_output { 
     159    my $self = shift (@_); 
     160     
     161    if($proc_mode ne "all") { # TODO Q 
     162    $self->GreenstoneXMLPlugout::close_group_output(); 
     163    } 
     164} 
     165    
    149166# TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed 
    150167sub saveas { 
     
    152169    my ($doc_obj, $doc_dir) = @_; 
    153170 
    154     # pre save out 
    155     my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_); 
    156      
    157  
    158     # saving customisation 
    159     $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug 
    160  
    161     # write the INVERSE into doc.xml as to what is written to the db 
    162     my $proc_mode = $self->{'process_mode'}; 
    163     my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; 
    164     if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml 
    165     $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY; 
    166     } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml 
    167     $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY; 
    168     } 
    169  
    170     # now we've prepared to write out whatever is meant to go into docxml 
    171     # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 
    172     # So: write out the doc xml file for the current document 
    173     my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 
    174     print $docxml_outhandler $section_text;     
    175  
     171    # 1. pre save out and saving debug handle 
     172    if($proc_mode ne "all") { # TODO Q 
     173    my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_); 
     174 
     175    $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug 
     176    } 
     177    elsif ($self->{'debug'}) { # if we didn't call superclass::pre_saveas, then set debug handle 
     178    $self->{'debug_outhandle'} = STDOUT; # superclass also debugs to STDOUT 
     179    # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed 
     180    } 
     181     
     182     
     183    # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db     
     184 
     185    if($proc_mode ne "all" ) { # if all, then nothing to go into doc.xml 
     186    # write the INVERSE into doc.xml as to what is written to the db 
     187    my $proc_mode = $self->{'process_mode'}; 
     188     
     189    my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; # TODO Q 
     190     
     191    if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml 
     192        $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY; 
     193    } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml 
     194        $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY; 
     195    } 
     196 
     197    # now we've prepared to write out whatever is meant to go into docxml 
     198    # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 
     199    # So: write out the doc xml file for the current document 
     200    my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 
     201    print $docxml_outhandler $section_text;     
     202    } 
     203     
    176204    # We also write out whatever needs to go into the MySQL database 
    177205    $self->write_meta_and_text($doc_obj); 
    178206 
    179207     
    180     # post save out 
    181     $self->GreenstoneXMLPlugout::post_saveas(@_); 
    182  
    183     # TODO: close database connection here? Or do something like with groups 
    184     # so we don't open and close over and over during a single build? 
     208    # 3. post save out 
     209    if($proc_mode ne "all" ) { # TODO Q 
     210    #$self->GreenstoneXMLPlugout::post_saveas(@_); 
     211    $self->GreenstoneXMLPlugout::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file); 
     212    } 
     213     
     214    # database connection is closed in end() method 
     215    # so we don't open and close over and over for each doc during a single build 
    185216} 
    186217 
     
    193224    my $doc_oid = $doc_obj->get_OID(); # we're processing a single doc at a time, so single OID 
    194225 
    195     ##binmode($db_handle,":utf8"); ## WRONG FOR DB, NEED TO CREATE IN UTF8 MODE 
    196      
    197     # TODO if $self->debug is on 
    198      
    199     # Do we want to open and close a connection per doc? 
    200     # Would we not rather want to open and close per collection rebuild? 
    201      
    202     #$self->create_db_connection(); 
    203  
    204226    # load the prepared INSERT statement handles for both tables (can be undef for any table depending on whether meta_only or txt_only are set) 
    205227    my $metadata_table_sth = $self->{'metadata_prepared_insert_statement_handle'}; 
     
    207229     
    208230    $self->recursive_write_meta_and_text($doc_obj, $root_section, $metadata_table_sth, $fulltxt_table_sth); 
    209  
    210     #$self->close_db_connection(); 
    211 } 
     231} 
     232 
    212233# Perl: Reading or Writing to Another Program 
    213234# https://nnc3.com/mags/Perl3/cookbook/ch16_05.htm 
     
    290311# Unless they do a full rebuild, which will recreate the table from scratch? 
    291312# SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 
    292  
    293 # I'm using perl's open2 like in Z3950Download, as opposed to open3 like in WgetDownload 
    294 # since I'm assuming each insert statement is atomic: it either does the insertion or fails 
    295 # and we (may) get some message back. That means we should hopefully be able to terminate 
    296 # as well if we get SIGTERM/SIGKILL: we're not blocking, but are responsive after every INSERT stmt. 
    297  
    298 # Just runs the command without displaying it 
    299 sub run_command_basic 
    300 { 
    301     my ($self,$strCMD) = @_;  
    302      
    303      
    304     my $process_instr = $self->{'MYSQL_IN'}; 
    305  
    306     print $process_instr "$strCMD\n";   
    307 } 
    308  
    309 sub run_command 
    310 { 
    311     my ($self,$strCMD) = @_;     
    312  
    313     print STDERR "Running mysql command: $strCMD\n"; 
    314     $self->run_command_basic($strCMD); 
    315      
    316 } 
    317  
    318 sub response_line_contains { 
    319     my ($self,$expected) = @_; 
    320  
    321     my $out = $self->{'MYSQL_OUT'}; 
    322     my $opening_line = <$out>; 
    323     if ($opening_line =~ m/$expected/i) { 
    324     return 1; 
    325     } 
    326     return 0; 
    327 } 
    328  
    329 sub response_lines_contain { 
    330     my ($self,$expected) = @_; 
    331  
    332     my $found_expected = 0; 
    333     my $out = $self->{'MYSQL_OUT'}; 
    334     while (my $line = <$out>) { 
    335     print STDERR "$line\n"; 
    336     if($line =~ m/$expected/){ 
    337         #return 1; 
    338         $found_expected = 1; #won't break out of loop: loop will consume all on child out at present 
    339     }    
    340     } 
    341     return $found_expected; 
    342 } 
    343  
    344 # based on Z3950Download.pm::start_yaz() 
    345 # ./PATH/TO/mysql -u root -p 
    346 # Returns: 
    347 #   - 1 if client already quit after pwd fail (so can't send quit message to stopped client) 
    348 #   - 0 if load_db failed (needs to be quit), 
    349 #   - 1 if load_db succeeded (mysql client still running) 
    350 sub create_db_connection { 
    351     my $self = shift (@_); 
    352     print STDERR "Opening connection to MySQL db\n"; 
    353  
    354     my $mysql_client = $self->{'client_path'}; 
    355     my $client_user = $self->{'client_user'} || "root"; 
    356      
    357      
    358     my $launch_cmd = "\"./$mysql_client\" -u $client_user -p"; 
    359     my $childpid = open2(*MYSQL_OUT, *MYSQL_IN, $launch_cmd) 
    360     or (print STDERR "Done\n" and die "can't open2 pipe to mysql client: $!"); 
    361      
    362     $self->{'pid'} = $childpid; 
    363     $self->{'MYSQL_OUT'} = *MYSQL_OUT; 
    364     $self->{'MYSQL_IN'} = *MYSQL_IN; 
    365      
    366     # connect with pwd and load the database for this site 
    367     #my $conn_open = $self->open_connection();   
    368     # 
    369     #if (!$conn_open) { 
    370     #    print STDERR "Cannot connect to mysql db with $launch_cmd\n";  
    371     #    print STDERR "Done\n";   
    372     #    return 0; 
    373     #} 
    374      
    375     # connect with pwd 
    376     my $conn_success = $self->send_pwd();     
    377     if(!$conn_success) { 
    378     return -1; # if pwd failed, then the program already exited by itself 
    379                # (so don't send quit command after process terminated) 
    380     } 
    381     else { 
    382     return $self->load_db(); 
    383     }     
    384  
    385     # return $conn_open; 
    386     return $conn_success; # 1 if client already quit after pwd fail, 0 if load_db failed (needs to be quit), 1 if load_db succeeded (mysql client still running) 
    387 } 
    388  
    389  
    390 # Copied from Z3950Download.pm::quit_yaz() 
    391 sub close_db_connection { 
    392     my $self = shift (@_); 
    393      
    394     $self->run_command("quit"); 
    395     close($self->{'MYSQL_IN'}); # close the input to yaz. It also flushes quit command to mysql client 
    396  
    397     # make sure nothing is being output by mysql client 
    398     # flush the mysql-client process' outputstream, else we'll be stuck in an infinite 
    399     # loop waiting for the process to quit. 
    400     my $output = $self->{'MYSQL_OUT'}; 
    401     my $line; 
    402     while (defined ($line = <$output>)) {  
    403     if($line !~ m/\w/s) { # print anything other than plain whitespace in case it is important 
    404         print STDERR "***### $line"; 
    405     } 
    406     } 
    407  
    408     close($self->{'MYSQL_OUT'}); 
    409  
    410     # Is the following necessary? The PerlDoc on open2 (http://perldoc.perl.org/IPC/Open2.html) 
    411     # says that waitpid must be called to "reap the child process", or otherwise it will hang 
    412     # around like a zombie process in the background. Adding it here makes the code work as  
    413     # before, but it is certainly necessary to call waitpid on wget (see WgetDownload.pm). 
    414     # http://perldoc.perl.org/functions/waitpid.html 
    415     my $kidpid; 
    416     do { 
    417     $kidpid = waitpid($self->{'pid'}, WNOHANG); 
    418     } while $kidpid > 0; # waiting for pid to become -1 
    419 } 
    420  
    421 # should be called only once per site 
    422 sub create_database { 
    423     my $self = shift (@_); 
    424     #my $sitename = shift(@_); 
    425     my $sitename = $self->{'site_name'}; 
    426     my $cmd = "CREATE DATABASE $sitename;"; 
    427     $self->run_command($cmd); 
    428 } 
    429  
    430  
    431 sub send_pwd { 
    432     my $self = shift (@_); 
    433  
    434     my $client_pwd = $self->{'client_pwd'}; 
    435     my $out = $self->{'MYSQL_OUT'}; 
    436      
    437     # if connected, it's prompting for pwd. Write the pwd to the mysql client process: 
    438     $self->run_command_basic($client_pwd); 
    439      
    440     #my $opening_line = <$out>; 
    441     #if ($opening_line =~ m/Access denied/i) { 
    442     #print STDERR "Password not recognised. Got: $opening_line\n"; 
    443     #return 0; 
    444     #} 
    445      
    446     if($self->response_line_contains("Access denied")) { 
    447     print STDERR "Password not recognised. Got: Access denied.\n"; 
    448     return 0; 
    449     } 
    450  
    451     return 1; 
    452 } 
    453  
    454  
    455 sub load_db { 
    456     my $self = shift (@_); 
    457  
    458     # attempt to load the db 
    459     # use the database 
    460     my $db_name = $self->{'site_name'}; # TODO Q: site_name only exists for GS3. What about GS2? 
    461     $self->run_command("use " . $db_name . ";"); 
    462      
    463     my $db_found = 0; 
    464     my $out = $self->{'MYSQL_OUT'}; 
    465     while (my $line = <$out>) { 
    466     print STDERR "$line\n"; 
    467     if($line =~ m/Database changed/){ 
    468         # return 1; # TODO Q: consume all output of running command 
    469         $db_found = 1; 
    470     } 
    471     elsif($line =~ m/Unknown database/){ 
    472         $db_found = -1; 
    473     } 
    474     } 
    475  
    476     if($db_found == 1) {  
    477     return $db_found;  
    478     } 
    479     elsif ($db_found == -1) { # a db for the current sitename didn't exist, create it 
    480     $self->create_database(); 
    481  
    482     # attempt to load the newly created db 
    483     if($self->_load_db()) { # recursive call! 
    484         return 1; 
    485     } 
    486      
    487      
    488     #my $opening_line = <$out>; 
    489     #if ($opening_line !~ m/Query OK/) { 
    490     if(!$self->response_line_contains("Query OK")) { 
    491         print STDERR "Could not create db\n"; 
    492         return 0; # couldn't even create the db 
    493     } else { # success creating db 
    494         # so let's create the metadata and fulltxt tables for the current coll while we're at it 
    495         if($self->create_meta_table()) {         
    496         return $self->create_fulltxt_table(); 
    497         } 
    498     } 
    499     } 
    500     else { # unknown error trying to load db, bail 
    501     return 0; 
    502     } 
    503 } 
    504  
    505  
    506 sub create_meta_table { 
    507     my $self = shift (@_); 
    508     my $table_name = $self->{'collection_name'} . "metadata"; 
    509     #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));"; 
    510  
    511     # If using an auto incremented primary key: 
    512     my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));"; 
    513  
    514     $self->run_command($cmd); 
    515      
    516     if(!$self->response_lines_contain("Query OK")) { 
    517     print STDERR "Could not create metadata table\n"; 
    518     return 0; 
    519     } else { 
    520     return 1; 
    521     } 
    522 } 
    523  
    524 sub create_fulltxt_table { 
    525     my $self = shift (@_); 
    526     my $table_name = $self->{'collection_name'} . "fulltxt"; 
    527     #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));"; 
    528  
    529     # If using an auto incremented primary key: 
    530     my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));"; 
    531  
    532     $self->run_command($cmd); 
    533      
    534     if(!$self->response_lines_contain("Query OK")) { 
    535     print STDERR "Could not create table\n"; 
    536     return 0; 
    537     } else { 
    538     return 1; 
    539     } 
    540 } 
    541  
    542 # https://www.guru99.com/insert-into.html 
    543 # and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html 
    544 #     for inserting multiple rows at once 
    545 sub get_cmd_insert_new_row_into_meta_table { 
    546     my $self = shift (@_); 
    547     my ($did, $sid, $metaname, $metavalue) = @_; 
    548     my $tablename = $self->{'colname'}."_metadata"; 
    549  
    550     my $cmd = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES\n"; 
    551     $cmd .= "('$did', '$sid', '$metaname', '$metavalue');\n"; 
    552     return $cmd; 
    553 } 
    554  
    555 sub get_cmd_insert_new_row_into_txt_table { 
    556     my $self = shift (@_); 
    557     my ($did, $sid, $fulltext) = @_; 
    558     my $tablename = $self->{'colname'}."_fulltxt"; 
    559  
    560     my $cmd = "INSERT INTO $tablename (did, sid, fulltxt) VALUES\n"; 
    561     $cmd .= "('$did', '$sid', '$fulltext');\n"; 
    562     return $cmd; 
    563 } 
    564  
    565 # TODO: later add edit and delete (and nothing for "been indexed" status?) 
    566  
    567 ##################  
    568  
    569 ## UNUSED 
    570 sub X_open_connection() 
    571 { 
    572     my $self = shift (@_); 
    573     # connect with pwd 
    574     my $conn_success = $self->send_pwd(); 
    575     if(!$conn_success) { 
    576     return -1; # if pwd failed, then the program already exists by itself (so don't send quit command after process terminated) 
    577     } 
    578      
    579     if($conn_success) { 
    580     return $self->load_db(); 
    581     } else { 
    582     return $conn_success; 
    583     } 
    584 } 
     313# But then should set AutoCommit to off on connection, and remember to commit every time 
    585314 
    586315################# 
     
    598327     
    599328    my $db_driver = $self->{'db_driver'}; 
    600     my $db_user = $self->{'client_user'} || "root"; 
    601     my $db_pwd = $self->{'client_pwd'}; 
     329    my $db_user = $self->{'db_client_user'} || "root"; 
     330    my $db_pwd = $self->{'db_client_pwd'}; 
    602331    my $db_host = $self->{'db_host'} || "127.0.0.1"; 
    603332    my $db_enc = $self->{'db_encoding'} || "utf8";