- Timestamp:
- 2018-11-09T19:01:04+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32589 r32591 43 43 # back in from the sql db while the remainder is to be read back in from the docsql .xml files. 44 44 45 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking: basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work.45 # + TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking: basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 46 46 # Discuss the plugin/plugout parameters. 47 47 … … 145 145 'desc' => "{GreenstoneSQLPlug.rollbacl_on_cancel}" } ]; 146 146 147 # TODO: If subclassing gssql for other supporting databases and if they have different required 148 # connection parameters, we can check how WordPlugin, upon detecting Word is installed, 149 # dynamically loads Word specific configuration options. 147 150 my $arguments = 148 151 [ { 'name' => "process_exp", … … 161 164 'type' => "enum", 162 165 'list' => $rollback_on_cancel_list, 163 'deft' => "false", # TODO Q: what's the better default? If "true", any memory concerns?166 'deft' => "false", # better default than true 164 167 'reqd' => "no", 165 168 'hiddengli' => "no"}, … … 178 181 'type' => "string", 179 182 'deft' => "", 180 'reqd' => "no"}, # pwd required? NO.183 'reqd' => "no"}, # pwd not required: can create mysql accounts without pwd 181 184 { 'name' => "db_host", 182 185 'desc' => "{GreenstoneSQLPlug.db_host}", 183 186 'type' => "string", 184 'deft' => "127.0.0.1", 187 'deft' => "127.0.0.1", # NOTE: make this int? No default for port, since it's not a required connection param 185 188 'reqd' => "yes"}, 186 189 { 'name' => "db_port", … … 226 229 # as removeold, which should drop the collection tables, happens during the import phase, 227 230 # calling GreenstoneSQLPlugin::and therefore also requires a db connection. 228 # TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin231 # + TODO: Eventually can try moving get_gssql_instance into gssql.pm? That way both GS SQL Plugin 229 232 # and Plugout would be using one connection during import.pl phase when both plugs exist. 230 233 … … 421 424 } 422 425 423 # TODO Q: Why are there 4 passes when we're only indexing at doc and section level (2 passes)? What's the dummy pass, why is there a pass for infodb? 426 # There are multiple passes processing the document (see buildcol's mode parameter description): 427 # - compressing the text which may be a dummy pass for lucene/solr, wherein they still want the 428 # docobj for different purposes, 429 # - the pass(es) for indexing, e.g. doc/didx and section/sidx level passes 430 # - and an infodb pass for processing the classifiers. This pass too needs the docobj 431 # Since all passes need the doc_obj, all are read in from docsql + SQL db into the docobj in memory 424 432 425 433 # We should only ever get here during the buildcol.pl phase … … 472 480 if $self->{'verbosity'} > 2; 473 481 474 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 475 $doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 482 # + TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_meta directly: 483 #$doc_obj->add_utf8_metadata($sid, $metaname, &docprint::unescape_text($metaval)); 484 485 # data stored unescaped in db: escaping only for html/xml files, not for txt files or db 486 $doc_obj->add_utf8_metadata($sid, $metaname, $metaval); 476 487 } 477 488 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" … … 501 512 502 513 # TODO - pass by ref? 503 # TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 504 my $textref = &docprint::unescape_textref(\$text); 505 $doc_obj->add_utf8_text($sid, $$textref); 514 # + TODO: we accessed the db in utf8 mode, so, we can call doc_obj->add_utf8_text directly: 515 # data stored unescaped in db: escaping is only for html/xml files, not for txt files or db 516 #my $textref = &docprint::unescape_textref(\$text); 517 $doc_obj->add_utf8_textref($sid, \$text); 506 518 } 507 519 print $outhandle "----------FIN READING DOC's TXT FROM SQL DB------------\n"
Note:
See TracChangeset
for help on using the changeset viewer.