- Timestamp:
- 2018-11-09T19:01:04+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/gssql.pm
r32588 r32591 52 52 # + TODO: remove unnecessary warn() since PrintError is active 53 53 54 # TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds54 # + TODO: drop table if exists and create table if exists are available in MySQL. Use those cmds 55 55 # instead of always first checking for existence ourselves? Only when subclassing to specific 56 56 # mysql class? … … 157 157 # 158 158 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#disconnect 159 # "Disconnects the database from the database handle. disconnect is typically only used before exitin #g the program. The handle is of little use after disconnecting.159 # "Disconnects the database from the database handle. disconnect is typically only used before exiting the program. The handle is of little use after disconnecting. 160 160 # 161 161 # The transaction behaviour of the disconnect method is, sadly, undefined. Some database systems (such as Oracle and Ingres) will automatically commit any outstanding changes, but others (such as Informix) will rollback any outstanding changes. Applications not using AutoCommit should explicitly call commit or rollback before calling disconnect. … … 166 166 # 167 167 # If you disconnect from a database while you still have active statement handles (e.g., SELECT statement handles that may have more data to fetch), you will get a warning. The warning may indicate that a fetch loop terminated early, perhaps due to an uncaught error. To avoid the warning call the finish method on the active handles." 168 # 168 169 # 169 170 sub DESTROY { … … 171 172 172 173 if (${^GLOBAL_PHASE} eq 'DESTRUCT') { 173 174 174 175 if ($_dbh_instance) { # database handle still active. Use singleton handle! 175 176 # rollback code has moved to finish_signal_handler() where it belongs? 177 178 # NOTE: if RaiseError is set on dbi connection, then on any error, perl process will die() 179 # which will end up calling this DESTROY. If it was a die() that called DESTROY 180 # then need to rollback the db here. However, if it was not a die() but natural termination 181 # of the perl process, destroy() will also get called. In that case we don't want to rollback 182 # but do a commit() to the DB instead. 183 # Perhaps detecting the difference may be accomplished by checking ref_count: 184 # - If ref_count not 0 it may require a rollback? 185 # - If ref_count 0 it may be a natural termination and require a commit? Except that ref_count 186 # is set back to 0 in finished(), which will do the commit when ref_count becomes 0. So shouldn't 187 # (have to) do that here. 176 # dbh instance being active implies build was cancelled 177 178 # rollback code has moved to finish_signal_handler() where it belongs 179 # as rollback() should only happen on cancel/unnatural termination 180 # vs commit() happening in finished() before disconnect, which is natural termination. 181 188 182 189 183 # We're now finally ready to disconnect, as is required for both natural and premature termination 190 print STDERR "XXXXXXXX Global Destruct: Disconnecting from database\n"; 191 $_dbh_instance->disconnect or warn $_dbh_instance->errstr; 192 $_dbh_instance = undef; 193 $ref_count = 0; 184 # (Though natural termination would have disconnected already) 185 # We now leave DBI's own destructor to do the disconnection when perl calls its DESTROY() 186 # We'll just print a message to stop anyone from worrying whether cancelling build 187 # will ensure disconnection still happens. It happens, but silently. 188 print STDERR " Global Destruct Phase: DBI's own destructor will disconnect database\n"; 189 #$_dbh_instance->disconnect or warn $_dbh_instance->errstr; 190 #$_dbh_instance = undef; 191 #$ref_count = 0; 194 192 } 195 193 return; … … 246 244 } 247 245 if($params_map->{'autocommit'}) { 248 print STDERR " SQL DB CANCEL SUPPORT OFF.\n" ;246 print STDERR " SQL DB CANCEL SUPPORT OFF.\n" if($params_map->{'verbosity'} > 2); 249 247 } else { 250 248 print STDERR " SQL DB CANCEL SUPPORT ON.\n"; … … 259 257 my $db_enc = "utf8mb4" if $params_map->{'db_encoding'} eq "utf8"; 260 258 261 # these are the params for connecting to MySQL 262 my $db_driver = $params_map->{'db_driver'} || "mysql"; 263 my $db_user = $params_map->{'db_client_user'} || "root"; 259 # Params for connecting to MySQL 260 # These params are ensured default/fallback values by the GS SQL Plugs 261 # so no need to set it here 262 my $db_driver = $params_map->{'db_driver'}; 263 my $db_host = $params_map->{'db_host'}; 264 my $db_user = $params_map->{'db_client_user'}; 265 266 # params that can be undef are db_client_pwd and db_port 264 267 my $db_pwd = $params_map->{'db_client_pwd'}; # even if undef and password was necessary, 265 268 # we'll see a sensible error message when connect fails 266 my $db_host = $params_map->{'db_host'} || "127.0.0.1";267 269 # localhost doesn't work for us, but 127.0.0.1 works 268 270 # https://metacpan.org/pod/DBD::mysql … … 355 357 if($ref_count == 0) { # Only commit transaction when we're about to actually disconnect, not before 356 358 357 # TODO: If AutoCommit was off, meaning transactions were on/enabled,359 # + TODO: If AutoCommit was off, meaning transactions were on/enabled, 358 360 # then here is where we commit our one long transaction. 359 361 # https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#commit … … 521 523 } 522 524 523 # TODO Q: commit here, so that future select statements work?525 # + TODO Q: commit here, so that future select statements work? 524 526 # See https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm#Transactions 525 527 } -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32589 r32591 43 43 # back in from the sql db while the remainder is to be read back in from the docsql .xml files. 44 44 45 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking: basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.45 # + TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking: basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 46 46 # Discuss the plugin/plugout parameters. 47 47 … … 145 145 'desc' => "{GreenstoneSQLPlug.rollbacl_on_cancel}" } ]; 146 146 147 # TODO: If subclassing gssql for other supporting databases and if they have different required 148 # connection parameters, we can check how WordPlugin, upon detecting Word is installed, 149 # dynamically loads Word specific configuration options. 147 150 my $arguments = 148 151 [ { 'name' => "process_exp", … … 161 164 'type' => "enum", 162 165 'list' => $rollback_on_cancel_list, 163 'deft' => "false", # TODO Q: what's the better default? If "true", any memory concerns?166 'deft' => "false", # better default than true 164 167 'reqd' => "no", 165 168 'hiddengli' => "no"}, … … 178 181 'type' => "string", 179 182 'deft' => "", 180 'reqd' => "no"}, # pwd required? NO.183 'reqd' => "no"}, # pwd not required: can create mysql accounts without pwd 181 184 { 'name' => "db_host", 182 185 'desc' => "{GreenstoneSQLPlug.db_host}", 183 186 'type' => "string", 184 'deft' => "127.0.0.1", 187 'deft' => "127.0.0.1", # NOTE: make this int? No default for port, since it's not a required connection param 185 188 'reqd' => "yes"}, 186 189 { 'name' => "db_port", … … 226 229 # as removeold, which should drop the collection tables, happens during the import phase, 227 230 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 228 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin231 # + TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 229 232 # and Plugout would be using one connection during import.pl phase when both plugs exist. 230 233 … … 421 424 } 422 425 423 # TODO Q: Why are there 4 passes when we're only indexing at doc and section level (2 passes)? What's the dummy pass, why is there a pass for infodb? 426 # There are multiple passes processing the document (see buildcol's mode parameter description): 427 # - compressing the text which may be a dummy pass for lucene/solr, wherein they still want the 428 # docobj for different purposes, 429 # - the pass(es) for indexing, e.g. doc/didx and section/sidx level passes 430 # - and an infodb pass for processing the classifiers. This pass too needs the docobj 431 # Since all passes need the doc_obj, all are read in from docsql + SQL db into the docobj in memory 424 432 425 433 # We should only ever get here during the buildcol.pl phase … … 472 480 if $self->{'verbosity'} > 2; 473 481 474 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 475 $doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 482 # + TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 483 #$doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 484 485 # data stored unescaped in db: escaping only for html/xml files, not for txt files or db 486 $doc_obj->add_utf8_metadata($sid, $metaname, $metaval); 476 487 } 477 488 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" … … 501 512 502 513 # TODO - pass by ref? 503 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 504 my $textref = &docprint::unescape_textref(\$text); 505 $doc_obj->add_utf8_text($sid, $$textref); 514 # + TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 515 # data stored unescaped in db: escaping is only for html/xml files, not for txt files or db 516 #my $textref = &docprint::unescape_textref(\$text); 517 $doc_obj->add_utf8_textref($sid, \$text); 506 518 } 507 519 print $outhandle "----------FIN READING DOC's TXT FROM SQL DB------------\n" -
main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm
r32589 r32591 41 41 # + TODO: SIGTERM rollback and disconnect? 42 42 # + TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 43 # XTODO Q: introduced site param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes". No longer a param44 # Did I do the pass by ref in docprint's escape and unescape textref functions correctly, and how they're called here?43 #+ TODO Q: introduced site param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes". No longer a param 44 # !!!! Did I do the pass by ref in docprint's escape and unescape textref functions correctly, and how they're called here? 45 45 # Any more optimisation I can do around this? 46 46 … … 88 88 'type' => "enum", 89 89 'list' => $rollback_on_cancel_list, 90 'deft' => "false", # TODO Q: what's the better default? If "true", any memory concerns?90 'deft' => "false", # better default than true 91 91 'reqd' => "no", 92 92 'hiddengli' => "no"}, … … 105 105 'type' => "string", 106 106 'deft' => "", 107 'reqd' => "no"}, # pwd required? NO.107 'reqd' => "no"}, # pwd not required: can create mysql accounts without pwd 108 108 { 'name' => "db_host", 109 109 'desc' => "{GreenstoneSQLPlug.db_host}", 110 110 'type' => "string", 111 'deft' => "127.0.0.1", 111 'deft' => "127.0.0.1", # localhost doesn't work for us, but 127.0.0.1 works. See gsmysql.pm 112 112 'reqd' => "yes"}, 113 113 { 'name' => "db_port", … … 347 347 # TODO: does it need to be stored escaped, as it requires unescaping when read back in 348 348 # from db (unlike for reading back in from doc.xml) 349 my $escaped_meta_value = &docprint::escape_text($data->[1]); 349 350 # Treat db like a text file instead of an html/xml file: don't need to escape text 351 # going into it 352 #my $escaped_meta_value = &docprint::escape_text($data->[1]); 353 my $meta_value = $data->[1]; 350 354 351 355 # Write out the current section's meta to collection db's METADATA table … … 356 360 # OR if debugging, then it will print the SQL insert statement but not execute it 357 361 358 $gs_sql->insert_row_into_metadata_table($doc_oid, $section_name, $meta_name, $ escaped_meta_value, $self->{'debug'});362 $gs_sql->insert_row_into_metadata_table($doc_oid, $section_name, $meta_name, $meta_value, $self->{'debug'}); 359 363 } 360 364 } … … 362 366 363 367 if($proc_mode eq "all" || $proc_mode eq "text_only" ) { 364 365 my $section_textref = &docprint::escape_textref(\$section_ptr->{'text'}); 368 369 # See above, no need to html-escape for db 370 my $section_text = $section_ptr->{'text'}; #&docprint::escape_textref(\$section_ptr->{'text'}); 366 371 367 372 # fulltxt column can be SQL NULL. undef value gets written out as NULL: … … 369 374 # The following will do the SQL insertion 370 375 # or if debug, the following will print the SQL insert stmt without executing it 371 $gs_sql->insert_row_into_fulltxt_table($doc_oid, $section_name, $section_textref, $self->{'debug'});376 $gs_sql->insert_row_into_fulltxt_table($doc_oid, $section_name, \$section_text, $self->{'debug'}); 372 377 373 378 }
Note:
See TracChangeset
for help on using the changeset viewer.