Changeset 32521
- Timestamp:
- 2018-10-18T20:25:28+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugouts/MySQLPlugout.pm
r32520 r32521 32 32 use util; 33 33 use FileUtils; 34 #use BasePlugout;35 34 use GreenstoneXMLPlugout; 36 35 use docprint; … … 38 37 use IPC::Open2; 39 38 use POSIX ":sys_wait_h"; # for waitpid, http://perldoc.perl.org/functions/waitpid.html 39 40 # TODO: SIGTERM rollback and disconnect? 41 40 42 41 43 # this plugout does not output xml to a file, but outputs rows into a mysql table … … 100 102 $self->{'db_driver'} = "mysql"; 101 103 $self->{'site_name'} = "localsite"; 102 $self->{' client_user'} = "root";103 $self->{' client_pwd'} = "6reenstone3";104 $self->{'db_client_user'} = "root"; 105 $self->{'db_client_pwd'} = "6reenstone3"; 104 106 #$self->{'db_host'} = "127.0.0.1"; 105 107 #$self->{'db_encoding'} = "utf8"; 108 #TODO: proc_mode is also a saveas option 106 109 107 110 ############ LOAD NECESSARY OPTIONS ########### … … 121 124 122 125 # prepare the shared/common HANDLES to SQL insert statements that contain placeholders 123 # and which we will reuse repeatedly when executing the actualinsert statements126 # and which we will reuse repeatedly when actually executing the insert statements 124 127 my $proc_mode = $self->{'process_mode'}; 125 128 if($proc_mode eq "all" || $proc_mode eq "meta_only" ) { … … 129 132 $self->{'fulltxt_prepared_insert_statement_handle'} = $self->prepare_insert_fulltxt_row_stmthandle(); 130 133 } 131 132 # finally, call begin on super 133 $self->GreenstoneXMLPlugout::begin(@_); 134 135 # if setting up to work with sql db failed, we'd have terminated and wouldn't come up to here: 136 # won't bother preparing GreenstoneXMLPlugout by calling superclass' begin() 137 # finally, can call begin on super - important as doc.xml is opened as a group etc 138 139 if($proc_mode ne "all") { # TODO Q 140 $self->GreenstoneXMLPlugout::begin(@_); 141 } 134 142 } 135 143 … … 139 147 my $self = shift(@_); 140 148 141 # do the superclass stuff 142 $self->GreenstoneXMLPlugout::end(@_); 143 $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 144 #my $success = $self->GreenstoneXMLPlugout::end(@_); 145 #$success = $self->disconnect_from_db() && $success; 146 #return $success; 147 } 148 149 # do the superclass stuff first, as any sql db failures should not prevent superclass cleanup 150 151 if($self->{'process_mode'} ne "all") {# TODO Q 152 $self->GreenstoneXMLPlugout::end(@_); 153 } 154 155 $self->disconnect_from_db() || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); # disconnect_from_db() will also issue a warning, but this may be clearer 156 } 157 158 sub close_group_output { 159 my $self = shift (@_); 160 161 if($proc_mode ne "all") { # TODO Q 162 $self->GreenstoneXMLPlugout::close_group_output(); 163 } 164 } 165 149 166 # TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed 150 167 sub saveas { … … 152 169 my ($doc_obj, $doc_dir) = @_; 153 170 154 # pre save out 155 my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_); 156 157 158 # saving customisation 159 $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug 160 161 # write the INVERSE into doc.xml as to what is written to the db 162 my $proc_mode = $self->{'process_mode'}; 163 my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; 164 if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml 165 $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY; 166 } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml 167 $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY; 168 } 169 170 # now we've prepared to write out whatever is meant to go into docxml 171 # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 172 # So: write out the doc xml file for the current document 173 my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 174 print $docxml_outhandler $section_text; 175 171 # 1. pre save out and saving debug handle 172 if($proc_mode ne "all") { # TODO Q 173 my ($docxml_outhandler, $output_file) = $self->GreenstoneXMLPlugout::pre_saveas(@_); 174 175 $self->{'debug_outhandle'} = $docxml_outhandler if ($self->{'debug'}); # STDOUT if debug 176 } 177 elsif ($self->{'debug'}) { # if we didn't call superclass::pre_saveas, then set debug handle 178 $self->{'debug_outhandle'} = STDOUT; # superclass also debugs to STDOUT 179 # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed 180 } 181 182 183 # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db 184 185 if($proc_mode ne "all" ) { # if all, then nothing to go into doc.xml 186 # write the INVERSE into doc.xml as to what is written to the db 187 my $proc_mode = $self->{'process_mode'}; 188 189 my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; # TODO Q 190 191 if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml 192 $docxml_output_options->{'output'} = docprint::OUTPUT_TEXT_ONLY; 193 } elsif($proc_mode eq "text_only" ) { # since only full text to go into MySQL db, meta will go into docxml 194 $docxml_output_options->{'output'} = docprint::OUTPUT_META_ONLY; 195 } 196 197 # now we've prepared to write out whatever is meant to go into docxml 198 # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 199 # So: write out the doc xml file for the current document 200 my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 201 print $docxml_outhandler $section_text; 202 } 203 176 204 # We also write out whatever needs to go into the MySQL database 177 205 $self->write_meta_and_text($doc_obj); 178 206 179 207 180 # post save out 181 $self->GreenstoneXMLPlugout::post_saveas(@_); 182 183 # TODO: close database connection here? Or do something like with groups 184 # so we don't open and close over and over during a single build? 208 # 3. post save out 209 if($proc_mode ne "all" ) { # TODO Q 210 #$self->GreenstoneXMLPlugout::post_saveas(@_); 211 $self->GreenstoneXMLPlugout::post_saveas($doc_obj, $doc_dir, $docxml_outhandler, $output_file); 212 } 213 214 # database connection is closed in end() method 215 # so we don't open and close over and over for each doc during a single build 185 216 } 186 217 … … 193 224 my $doc_oid = $doc_obj->get_OID(); # we're processing a single doc at a time, so single OID 194 225 195 ##binmode($db_handle,":utf8"); ## WRONG FOR DB, NEED TO CREATE IN UTF8 MODE196 197 # TODO if $self->debug is on198 199 # Do we want to open and close a connection per doc?200 # Would we not rather want to open and close per collection rebuild?201 202 #$self->create_db_connection();203 204 226 # load the prepared INSERT statement handles for both tables (can be undef for any table depending on whether meta_only or txt_only are set) 205 227 my $metadata_table_sth = $self->{'metadata_prepared_insert_statement_handle'}; … … 207 229 208 230 $self->recursive_write_meta_and_text($doc_obj, $root_section, $metadata_table_sth, $fulltxt_table_sth); 209 210 #$self->close_db_connection(); 211 } 231 } 232 212 233 # Perl: Reading or Writing to Another Program 213 234 # https://nnc3.com/mags/Perl3/cookbook/ch16_05.htm … … 290 311 # Unless they do a full rebuild, which will recreate the table from scratch? 291 312 # SOLUTION-> rollback transaction on error, see https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 292 293 # I'm using perl's open2 like in Z3950Download, as opposed to open3 like in WgetDownload 294 # since I'm assuming each insert statement is atomic: it either does the insertion or fails 295 # and we (may) get some message back. That means we should hopefully be able to terminate 296 # as well if we get SIGTERM/SIGKILL: we're not blocking, but are responsive after every INSERT stmt. 297 298 # Just runs the command without displaying it 299 sub run_command_basic 300 { 301 my ($self,$strCMD) = @_; 302 303 304 my $process_instr = $self->{'MYSQL_IN'}; 305 306 print $process_instr "$strCMD\n"; 307 } 308 309 sub run_command 310 { 311 my ($self,$strCMD) = @_; 312 313 print STDERR "Running mysql command: $strCMD\n"; 314 $self->run_command_basic($strCMD); 315 316 } 317 318 sub response_line_contains { 319 my ($self,$expected) = @_; 320 321 my $out = $self->{'MYSQL_OUT'}; 322 my $opening_line = <$out>; 323 if ($opening_line =~ m/$expected/i) { 324 return 1; 325 } 326 return 0; 327 } 328 329 sub response_lines_contain { 330 my ($self,$expected) = @_; 331 332 my $found_expected = 0; 333 my $out = $self->{'MYSQL_OUT'}; 334 while (my $line = <$out>) { 335 print STDERR "$line\n"; 336 if($line =~ m/$expected/){ 337 #return 1; 338 $found_expected = 1; #won't break out of loop: loop will consume all on child out at present 339 } 340 } 341 return $found_expected; 342 } 343 344 # based on Z3950Download.pm::start_yaz() 345 # ./PATH/TO/mysql -u root -p 346 # Returns: 347 # - 1 if client already quit after pwd fail (so can't send quit message to stopped client) 348 # - 0 if load_db failed (needs to be quit), 349 # - 1 if load_db succeeded (mysql client still running) 350 sub create_db_connection { 351 my $self = shift (@_); 352 print STDERR "Opening connection to MySQL db\n"; 353 354 my $mysql_client = $self->{'client_path'}; 355 my $client_user = $self->{'client_user'} || "root"; 356 357 358 my $launch_cmd = "\"./$mysql_client\" -u $client_user -p"; 359 my $childpid = open2(*MYSQL_OUT, *MYSQL_IN, $launch_cmd) 360 or (print STDERR "Done\n" and die "can't open2 pipe to mysql client: $!"); 361 362 $self->{'pid'} = $childpid; 363 $self->{'MYSQL_OUT'} = *MYSQL_OUT; 364 $self->{'MYSQL_IN'} = *MYSQL_IN; 365 366 # connect with pwd and load the database for this site 367 #my $conn_open = $self->open_connection(); 368 # 369 #if (!$conn_open) { 370 # print STDERR "Cannot connect to mysql db with $launch_cmd\n"; 371 # print STDERR "Done\n"; 372 # return 0; 373 #} 374 375 # connect with pwd 376 my $conn_success = $self->send_pwd(); 377 if(!$conn_success) { 378 return -1; # if pwd failed, then the program already exited by itself 379 # (so don't send quit command after process terminated) 380 } 381 else { 382 return $self->load_db(); 383 } 384 385 # return $conn_open; 386 return $conn_success; # 1 if client already quit after pwd fail, 0 if load_db failed (needs to be quit), 1 if load_db succeeded (mysql client still running) 387 } 388 389 390 # Copied from Z3950Download.pm::quit_yaz() 391 sub close_db_connection { 392 my $self = shift (@_); 393 394 $self->run_command("quit"); 395 close($self->{'MYSQL_IN'}); # close the input to yaz. It also flushes quit command to mysql client 396 397 # make sure nothing is being output by mysql client 398 # flush the mysql-client process' outputstream, else we'll be stuck in an infinite 399 # loop waiting for the process to quit. 400 my $output = $self->{'MYSQL_OUT'}; 401 my $line; 402 while (defined ($line = <$output>)) { 403 if($line !~ m/\w/s) { # print anything other than plain whitespace in case it is important 404 print STDERR "***### $line"; 405 } 406 } 407 408 close($self->{'MYSQL_OUT'}); 409 410 # Is the following necessary? The PerlDoc on open2 (http://perldoc.perl.org/IPC/Open2.html) 411 # says that waitpid must be called to "reap the child process", or otherwise it will hang 412 # around like a zombie process in the background. Adding it here makes the code work as 413 # before, but it is certainly necessary to call waitpid on wget (see WgetDownload.pm). 414 # http://perldoc.perl.org/functions/waitpid.html 415 my $kidpid; 416 do { 417 $kidpid = waitpid($self->{'pid'}, WNOHANG); 418 } while $kidpid > 0; # waiting for pid to become -1 419 } 420 421 # should be called only once per site 422 sub create_database { 423 my $self = shift (@_); 424 #my $sitename = shift(@_); 425 my $sitename = $self->{'site_name'}; 426 my $cmd = "CREATE DATABASE $sitename;"; 427 $self->run_command($cmd); 428 } 429 430 431 sub send_pwd { 432 my $self = shift (@_); 433 434 my $client_pwd = $self->{'client_pwd'}; 435 my $out = $self->{'MYSQL_OUT'}; 436 437 # if connected, it's prompting for pwd. Write the pwd to the mysql client process: 438 $self->run_command_basic($client_pwd); 439 440 #my $opening_line = <$out>; 441 #if ($opening_line =~ m/Access denied/i) { 442 #print STDERR "Password not recognised. Got: $opening_line\n"; 443 #return 0; 444 #} 445 446 if($self->response_line_contains("Access denied")) { 447 print STDERR "Password not recognised. Got: Access denied.\n"; 448 return 0; 449 } 450 451 return 1; 452 } 453 454 455 sub load_db { 456 my $self = shift (@_); 457 458 # attempt to load the db 459 # use the database 460 my $db_name = $self->{'site_name'}; # TODO Q: site_name only exists for GS3. What about GS2? 461 $self->run_command("use " . $db_name . ";"); 462 463 my $db_found = 0; 464 my $out = $self->{'MYSQL_OUT'}; 465 while (my $line = <$out>) { 466 print STDERR "$line\n"; 467 if($line =~ m/Database changed/){ 468 # return 1; # TODO Q: consume all output of running command 469 $db_found = 1; 470 } 471 elsif($line =~ m/Unknown database/){ 472 $db_found = -1; 473 } 474 } 475 476 if($db_found == 1) { 477 return $db_found; 478 } 479 elsif ($db_found == -1) { # a db for the current sitename didn't exist, create it 480 $self->create_database(); 481 482 # attempt to load the newly created db 483 if($self->_load_db()) { # recursive call! 484 return 1; 485 } 486 487 488 #my $opening_line = <$out>; 489 #if ($opening_line !~ m/Query OK/) { 490 if(!$self->response_line_contains("Query OK")) { 491 print STDERR "Could not create db\n"; 492 return 0; # couldn't even create the db 493 } else { # success creating db 494 # so let's create the metadata and fulltxt tables for the current coll while we're at it 495 if($self->create_meta_table()) { 496 return $self->create_fulltxt_table(); 497 } 498 } 499 } 500 else { # unknown error trying to load db, bail 501 return 0; 502 } 503 } 504 505 506 sub create_meta_table { 507 my $self = shift (@_); 508 my $table_name = $self->{'collection_name'} . "metadata"; 509 #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));"; 510 511 # If using an auto incremented primary key: 512 my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, metaname VARCHAR(127) NOT NULL, metavalue VARCHAR(1023) NOT NULL, PRIMARY KEY(id));"; 513 514 $self->run_command($cmd); 515 516 if(!$self->response_lines_contain("Query OK")) { 517 print STDERR "Could not create metadata table\n"; 518 return 0; 519 } else { 520 return 1; 521 } 522 } 523 524 sub create_fulltxt_table { 525 my $self = shift (@_); 526 my $table_name = $self->{'collection_name'} . "fulltxt"; 527 #my $cmd = "CREATE TABLE $table_name (id VARCHAR(255) NOT NULL UNIQUE, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));"; 528 529 # If using an auto incremented primary key: 530 my $cmd = "CREATE TABLE $table_name (id INT NOT NULL AUTO_INCREMENT, did VARCHAR(63) NOT NULL, sid VARCHAR(63) NOT NULL, fulltxt LONGTEXT, PRIMARY KEY(id));"; 531 532 $self->run_command($cmd); 533 534 if(!$self->response_lines_contain("Query OK")) { 535 print STDERR "Could not create table\n"; 536 return 0; 537 } else { 538 return 1; 539 } 540 } 541 542 # https://www.guru99.com/insert-into.html 543 # and https://dev.mysql.com/doc/refman/8.0/en/example-auto-increment.html 544 # for inserting multiple rows at once 545 sub get_cmd_insert_new_row_into_meta_table { 546 my $self = shift (@_); 547 my ($did, $sid, $metaname, $metavalue) = @_; 548 my $tablename = $self->{'colname'}."_metadata"; 549 550 my $cmd = "INSERT INTO $tablename (did, sid, metaname, metavalue) VALUES\n"; 551 $cmd .= "('$did', '$sid', '$metaname', '$metavalue');\n"; 552 return $cmd; 553 } 554 555 sub get_cmd_insert_new_row_into_txt_table { 556 my $self = shift (@_); 557 my ($did, $sid, $fulltext) = @_; 558 my $tablename = $self->{'colname'}."_fulltxt"; 559 560 my $cmd = "INSERT INTO $tablename (did, sid, fulltxt) VALUES\n"; 561 $cmd .= "('$did', '$sid', '$fulltext');\n"; 562 return $cmd; 563 } 564 565 # TODO: later add edit and delete (and nothing for "been indexed" status?) 566 567 ################## 568 569 ## UNUSED 570 sub X_open_connection() 571 { 572 my $self = shift (@_); 573 # connect with pwd 574 my $conn_success = $self->send_pwd(); 575 if(!$conn_success) { 576 return -1; # if pwd failed, then the program already exists by itself (so don't send quit command after process terminated) 577 } 578 579 if($conn_success) { 580 return $self->load_db(); 581 } else { 582 return $conn_success; 583 } 584 } 313 # But then should set AutoCommit to off on connection, and remember to commit every time 585 314 586 315 ################# … … 598 327 599 328 my $db_driver = $self->{'db_driver'}; 600 my $db_user = $self->{' client_user'} || "root";601 my $db_pwd = $self->{' client_pwd'};329 my $db_user = $self->{'db_client_user'} || "root"; 330 my $db_pwd = $self->{'db_client_pwd'}; 602 331 my $db_host = $self->{'db_host'} || "127.0.0.1"; 603 332 my $db_enc = $self->{'db_encoding'} || "utf8";
Note:
See TracChangeset
for help on using the changeset viewer.