Changeset 32544 for main/trunk/greenstone2
- Timestamp:
- 2018-10-26T20:12:14+13:00 (5 years ago)
- Location:
- main/trunk/greenstone2/perllib
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/gssql.pm
r32543 r32544 401 401 } 402 402 403 # delete all records in metatable with specified docid 404 # https://www.tutorialspoint.com/mysql/mysql-delete-query.htm 405 # DELETE FROM table_name [WHERE Clause] 406 # see example under 'do' at https://metacpan.org/pod/release/TIMB/DBI-1.634_50/DBI.pm 407 sub delete_recs_from_metatable_with_docid { 408 my $self= shift (@_); 409 my ($oid) = @_; 410 411 my $dbh = $self->{'db_handle'}; 412 my $meta_table = $self->get_metadata_table_name(); 413 414 #my $rows_deleted = 415 $dbh->do(qq{DELETE FROM $meta_table WHERE did = ?}, undef, $oid) 416 or warn $dbh->errstr; 417 } 418 419 # delete all records in metatable with specified docid 420 sub delete_recs_from_texttable_with_docid { 421 my $self= shift (@_); 422 my ($oid) = @_; 423 424 my $dbh = $self->{'db_handle'}; 425 my $fulltxt_table = $self->get_fulltext_table_name(); 426 427 $dbh->do(qq{DELETE FROM $fulltxt_table WHERE did = ?}, undef, $oid) 428 or warn $dbh->errstr; 429 } 403 430 404 431 # Can call this after connection succeeded to get the database handle, dbh, -
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32543 r32544 58 58 # Discuss the plugin/plugout parameters. 59 59 60 # TODO: when db is not running GLI is paralyzed -> can we set timeout on DBI connection attempt? 60 61 61 62 # GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext … … 158 159 159 160 # do anything else that needs to be done here when not pluginfo 161 #$self->{'delete_docids'} = (); # list of doc oids to delete during deinit() 160 162 161 163 return $self; … … 179 181 $self->{'doc_oid'} = $attr_hash{'docoid'}; 180 182 print $outhandle "Extracted OID from docsql.xml: ".$self->{'doc_oid'}."\n" 181 if $self->{'verbosity'} > 1;183 if $self->{'verbosity'} > 2; 182 184 183 185 } … … 196 198 197 199 my $outhandle = $self->{'outhandle'}; 198 my $doc_obj = $self->{'doc_obj'}; 199 my $gs_sql = $self->{'gs_sql'}; 200 my $doc_obj = $self->{'doc_obj'}; 201 # sub read() will make the db connection setting $self->{'gs_sql'} once: the first time read() 202 # is called on the GS SQLPlugin instance. 203 my $gs_sql = $self->{'gs_sql'} || return; # $self->lazy_get_gssql(); # won't want to call lazy_get_gssql() if close_doc called during (incr-)import.pl, only during buildcol.pl 204 205 # TODO: return statement skips "dummy" pass. Should we skip it or not? 206 # If we don't return, gs_sql is not set for dummy pass... 200 207 201 208 my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag() 202 209 print $outhandle "++++ OID of document (meta|text) to be read in from DB: $oid\n" 203 if $self->{'verbosity'} > 1;210 if $self->{'verbosity'} > 2; 204 211 205 212 # For now, we have access to doc_obj (until just before super::close_document() terminates) … … 214 221 # read in meta for the collection (i.e. select * from <col>_metadata table 215 222 216 my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 223 my $sth = $gs_sql->select_from_metatable_matching_docid($oid); 217 224 print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n" 218 if $self->{'verbosity'} > 1;219 220 print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 1;225 if $self->{'verbosity'} > 2; 226 227 print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 2; 221 228 # https://www.effectiveperlprogramming.com/2010/07/set-custom-dbi-error-handlers/ 222 229 while( my @row = $sth->fetchrow_array() ) { … … 228 235 $sid = $doc_obj->get_top_section() unless $sid; 229 236 print $outhandle "### did: $did, sid: |$sid|, meta: $metaname, val: $metaval\n" 230 if $self->{'verbosity'} > 1;237 if $self->{'verbosity'} > 2; 231 238 232 239 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: … … 234 241 } 235 242 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" 236 if $self->{'verbosity'} > 1;243 if $self->{'verbosity'} > 2; 237 244 } 238 245 … … 244 251 245 252 my $sth = $gs_sql->select_from_texttable_matching_docid($oid); 246 print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 1;253 print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 2; 247 254 248 255 print $outhandle "----------\nSQL DB contains txt entries for-----------\n" 249 if $self->{'verbosity'} > 1;256 if $self->{'verbosity'} > 2; 250 257 while( my ($primary_key, $did, $sid, $text) = $sth->fetchrow_array() ) { 251 258 … … 254 261 $sid = $doc_obj->get_top_section() if ($sid eq "root"); 255 262 print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT>\n" 256 if $self->{'verbosity'} > 1;263 if $self->{'verbosity'} > 2; 257 264 258 265 # TODO - pass by ref? … … 261 268 } 262 269 print $outhandle "----------FIN READING DOC's TXT FROM SQL DB------------\n" 263 if $self->{'verbosity'} > 1;270 if $self->{'verbosity'} > 2; 264 271 } 265 272 … … 274 281 # During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side 275 282 # effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 283 # Lazy connection. 276 284 277 285 # Call init() not begin() because there can be multiple plugin passes … … 279 287 # one for doc level and another for section level indexing 280 288 # This way, we can connect to the SQL database once per buildcol run. 281 sub init {282 my ($self) = shift (@_);289 #sub init { 290 # my ($self) = shift (@_); 283 291 # print STDERR "@@@@@@@@@@ INIT CALLED\n"; 284 292 285 $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 286 293 # $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 294 295 296 sub lazy_get_gssql { 297 my $self = shift(@_); 298 299 # if we failed to successfully connect once before, don't bother attempting to connect again 300 #return undef if(defined $self->{'failed'}); # plugin/process would have terminated with die() 301 # if we couldn't succeed connecting on any connection attempt 302 303 return $self->{'gs_sql'} if($self->{'gs_sql'}); 304 305 # assume we'll fail to connect 306 $self->{'failed'} = 1; 307 308 print STDERR "@@@@@@@@@@ LAZY CONNECT CALLED\n"; 309 287 310 #################### 288 311 # print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; … … 328 351 die("Could not use db $db_name. Can't proceed.\n"); 329 352 } 353 354 #undef $self->{'failed'}; 330 355 331 356 # store db handle now that we're connected 332 357 $self->{'gs_sql'} = $gs_sql; 358 return $gs_sql; 333 359 334 360 } … … 341 367 sub deinit { 342 368 my ($self) = shift (@_); 343 if($self->{'gs_sql'}) { # can cover TODO: only want to work with sql db if buildcol.pl 369 370 print STDERR "@@@@@@@@@@ GreenstoneSQLPlugin::DEINIT CALLED\n"; 371 372 if($self->{'gs_sql'}) { # only want to work with sql db if buildcol.pl, gs_sql won't have 373 # a value except during buildcol, so when processor =~ m/buildproc$/. 344 374 $self->{'gs_sql'}->disconnect_from_db() 345 375 || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 346 } 347 # print STDERR "@@@@@@@@@@ DEINIT CALLED\n"; 376 377 # explicitly set to undef so all future use has to make the connection again 378 undef $self->{'gs_sql'}; 379 } 380 348 381 $self->SUPER::deinit(@_); 349 382 } 350 383 384 # TODO: This can't work until GSSQLPlugout has implemented build_mode = incremental 385 # (instead of tossing away db on every build) 386 # then this method needs to undef $self->docid after deleting, and close_doc() has to 387 # just return if $self->docid undefined 388 389 sub read { 390 my $self = shift (@_); 391 392 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 393 394 my $rv = $self->SUPER::read(@_); # defined in ReadXMLFile inherited by superclass GS XML PLugin 395 396 if(defined $rv) { # undef if !can_proc_this_file, but -1 if failed to parse docsql.xml 397 398 # don't want to do any GreenstoneSQLPlugin DB stuff during import.pl 399 # only during in buildcol.pl 400 return if (ref($processor) !~ m/buildproc$/i); 401 402 # we know we're buildcol, let's proceed: 403 404 # make the connection once for the life of the plugin, not once for every doc 405 # so that we can disconnect at the very end of the plugin's life: on deinit() 406 # If we hadn't connected before, connect now 407 my $gs_sql = $self->{'gs_sql'} || $self->lazy_get_gssql(); # TODO which syntax best? 408 409 my $build_proc_mode = $processor->get_mode(); # can be "text" as per basebuildproc or 410 # "textdelete" or "textreindex" as per ArchivesInfPlugin 411 if($build_proc_mode =~ m/\.delete/) { 412 413 # NOTTODO: add current doc OID stored in $self->{'doc_oid'} to list of oids get rid 414 # of from table(s) entries. We'll do the actual deletion in deinit?? Since that's 415 # when ArchivesInfPlugin deletes the docsql.xml files 416 417 my $doc_oid = $self->{'doc_oid'}; 418 #my @delete_docids = $self->{'delete_docids'}; 419 #push (@delete_docids, $doc_oid); 420 421 my $proc_mode = $self->{'process_mode'}; 422 if($proc_mode eq "all" || $proc_mode eq "meta_only") { 423 print STDERR "@@@@@@@@ Deleting $doc_oid from meta table\n"; 424 $gs_sql->delete_recs_from_metatable_with_docid($doc_oid); 425 } 426 if($proc_mode eq "all" || $proc_mode eq "text_only") { 427 print STDERR "@@@@@@@@ Deleting $doc_oid from fulltxt table\n"; 428 $gs_sql->delete_recs_from_texttable_with_docid($doc_oid); 429 } 430 } 431 } 432 433 return $rv; 434 435 } 436
Note:
See TracChangeset
for help on using the changeset viewer.