Changeset 32543

Show
Ignore:
Timestamp:
26.10.2018 15:10:47 (4 weeks ago)
Author:
ak19
Message:

Tidying up and adjusting TODO statements

Location:
main/trunk/greenstone2/perllib
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32541 r32543  
    212212# GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables) 
    213213# TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's 
    214 # init() method. This will fail if the db does not exist. Ideally want our plugin only called 
     214# init() method. This will fail if the db does not exist. Ideally want the gssqlplugin only called 
    215215# during buildcol.pl 
    216216sub use_db { 
     
    282282 
    283283# "IF EXISTS is used to prevent an error from occurring if the database does not exist. ... DROP DATABASE returns the number of tables that were removed. The DROP DATABASE statement removes from the given database directory those files and directories that MySQL itself may create during normal operation.Jun 20, 2012" 
    284 #MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax 
     284# MySQL 8.0 Reference Manual :: 13.1.22 DROP DATABASE Syntax 
    285285# https://dev.mysql.com/doc/en/drop-database.html 
    286286sub delete_collection_tables { 
     
    432432# see https://www.perlmonks.org/bare/?node=DBI%20Recipes 
    433433#    The page further has a table_exists function that could work with proper comparison 
    434 # TODO: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 
     434# TODO Q: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 
    435435sub table_exists { 
    436436    my $self = shift (@_); 
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32542 r32543  
    4040# TODO: 
    4141# - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge. 
    42 # Ask about docsql naming convention adopted to identify OID. Better way? 
     42# + Ask about docsql naming convention adopted to identify OID. Better way? 
    4343# collection names -> table names: it seems hyphens not allowed. Changed to underscores. 
    4444# + Startup parameters (except removeold/build_mode) 
     45# - how do we detect we're to do removeold during plugout in import.pl phase 
    4546# - incremental building: where do we need to add code to delete rows from our sql table after 
    4647# incrementally importing a coll with fewer docs (for instance)? What about deleted/modified meta? 
    47 # - Courier documents in lucene-sql collection: character (degree symbol) not preserved. Is this because we encode in utf8 when putting into db and reading back in? 
     48# - "Courier" demo documents in lucene-sql collection: character (degree symbol) not preserved in title. Is this because we encode in utf8 when putting into db and reading back in? 
    4849# - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that  
    4950# back in from the sql db while the remainder is to be read back in from the docsql .xml files. 
     
    5152# (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order 
    5253 
     54# TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 
     55# won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 
     56 
     57# TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 
     58# Discuss the plugin/plugout parameters. 
     59 
     60 
    5361# GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext 
    5462# is still written out to doc.xml (docsql .xml), that will be processed as usual, 
     
    5664# is written out by GreenstoneSQLPlugout into the SQL db). 
    5765 
    58 # TODO: 
    59 # no more docoid in docsql .xml filename, set OID as attribute of root element inside docsql.xml file instead 
    60 # and parse it out 
    61  
    62 # TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 
    63 # won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 
    64  
    65 # TODO: Add public instructions on using this plugin and its plugout: start with installing mysql binary, changing pwd, running the server (and the client against it for checking, basic cmds like create and drop). Then discuss db name, table names (per coll), db cols and col types, and how the plugout and plugin work. 
    66 # Discuss the plugin/plugout parameters. 
    6766 
    6867sub BEGIN { 
     
    201200 
    202201    my $oid = $self->{'doc_oid'}; # we stored current doc's OID during sub xml_start_tag() 
    203     print $outhandle "==== OID of document (meta|text) to be read in from DB: $oid\n" 
     202    print $outhandle "++++ OID of document (meta|text) to be read in from DB: $oid\n" 
    204203    if $self->{'verbosity'} > 1; 
    205  
    206204     
    207205    # For now, we have access to doc_obj (until just before super::close_document() terminates) 
    208      
    209     $self->{'doc_obj'}->set_OID($oid); # complex method. Is this necessary, since we just want to write meta and txt for the docobj to index? 
    210      
    211     # checking that complicated looking method set_OID() hasn't modified oid 
    212     if($oid ne $self->{'doc_obj'}->get_OID()) { 
    213     print STDERR "@@@@ WARNING: OID after setting on doc_obj = " . $self->{'doc_obj'}->get_OID() . " and is not the same as original OID $oid from docsqloid.xml filename\n"; 
    214     } 
    215  
    216      
    217     # TODO: This function is called on a per doc.xml file basis 
    218     # but we can process all docs of a collection in one go when dealing with the SQL tables for 
    219     # the collection. How and where should we read in the collection tables then? 
    220     # TODO: Perhaps MySQLPlugout could write out a token file (.gssql) into archives during import.pl 
    221     # and if that file is detected, then MySQLPlugin::read() is passed in that file during 
    222     # buildcol.pl. And that file will trigger reading the 2 tables for the collection??? 
     206 
     207    # no need to call $self->{'doc_obj'}->set_OID($oid); 
     208    # because either the OID is stored in the SQL db as meta 'Identifier' alongside other metadata 
     209    # or it's stored in the doc.xml as metadata 'Identifier' alongside other metadata 
     210    # Either way, Identifier meta will be read into the docobj automatically with other meta. 
     211 
    223212    my $proc_mode = $self->{'process_mode'}; 
    224213    if($proc_mode eq "all" || $proc_mode eq "meta_only") { 
     
    226215 
    227216    my $sth = $gs_sql->select_from_metatable_matching_docid($oid);   
    228     print $outhandle "### stmt: ".$sth->{'Statement'}."\n" if $self->{'verbosity'} > 1; 
     217    print $outhandle "### SQL select stmt: ".$sth->{'Statement'}."\n" 
     218        if $self->{'verbosity'} > 1; 
    229219 
    230220    print $outhandle "----------SQL DB contains meta-----------\n" if $self->{'verbosity'} > 1; 
     
    281271 
    282272 
    283 # TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl 
    284 # call init() not begin() because there can be multiple plugin passes 
     273# TODO: only want to work with sql db if buildcol.pl. Unfortunately, also runs on import.pl. 
     274# During import, the GS SQL Plugin is called before the GS SQL Plugout with undesirable side 
     275# effect that if the db doesn't exist, gssql::use_db() fails, as it won't create db. 
     276 
     277# Call init() not begin() because there can be multiple plugin passes 
    285278# and init() should be called before all passes: 
    286279# one for doc level and another for section level indexing 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32542 r32543  
    4040 
    4141# TODO: SIGTERM rollback and disconnect? 
    42 # TODO Q: what about verbosity for debugging 
     42# TODO Q: what about verbosity for debugging, instead of current situation of printing out upon debug set at the expense of writing to db 
    4343# TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 
    4444 
    45 # this plugout does not output xml to a file, but outputs rows into a mysql table 
     45# this plugout does not output the metadata and/or fulltxt xml to a file, 
     46# but outputs rows into a mysql table for metadata and/or a table for fulltxt 
    4647sub BEGIN { 
    4748    @GreenstoneSQLPlugout::ISA = ('GreenstoneXMLPlugout'); 
     
    5657 
    5758# TODO Q: what is "group" in GreenstoneXMLPlugout? 
    58 # TODO Q: site_name only exists for GS3. What about GS2? 
    59  
    6059 
    6160my $process_mode_list = 
     
    141140    #print STDERR "@@@@ proc_mode: " . $self->{'process_mode'} . "\n"; 
    142141     
    143     ########### TODO: these should be set from cmdline/GLI options to plugout ######### 
     142    ########### TODO: deal with build mode ######### 
    144143 
    145144    $self->{'build_mode'} = "removeold"; 
     
    236235} 
    237236  
    238 # TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed 
     237# TODO: check arc-inf.db for whether each entry is to be deleted/indexed/reindexed/been indexed? 
     238# That's only for indexing, not for this step which only generates the content in archives dir 
    239239sub saveas { 
    240240    my $self = shift (@_); 
     
    253253 
    254254    # TODO: also set debugging in begin()? Then stmts creating db and tables also sent to debug out and not executed 
    255  
    256     # TODO: remove unused old_unused_saveas from GreenstoneXMLPlugout 
    257      
    258      
    259     # 2. overriding saving behaviour to do what the superclass does PLUS saving to sql db     
    260  
    261     #NOTE: if proc_mode == all, then "breadcrumbs" go into both meta and txt elements of doc.xml: 
    262     # statements pointing viewer to the sql db for contents 
     255    
     256     
     257    # 2. overriding saving behaviour to do what the superclass does (writing out doc.xml files, 
     258    # under new name of docsql.xml, with breadcrumbs pointing to sql db) PLUS saving to sql db     
     259 
     260    # NOTE: if proc_mode == all, then "breadcrumbs" (statements pointing viewer to the sql db 
     261    # for contents) go into both meta and txt elements of doc.xml (docsql.xml specifically):  
    263262     
    264     # write the INVERSE into doc.xml as to what is written to the db     
     263    # write the INVERSE into doc.xml as to what is written to the SQL db     
    265264    my $docxml_output_options = { 'output' => docprint::OUTPUT_NONE }; 
    266265    if($proc_mode eq "meta_only" ) { # since only meta to go into MySQL db, text will go into docxml 
     
    272271    # now we've prepared to write out whatever is meant to go into docxml 
    273272    # and can do actual the steps superclass GreenstoneXMLPlugout carries out to write out docxml 
    274     # So: write out the doc xml file for the current document 
     273    # So: write out the doc xml file, "docsql.xml", for the current document 
    275274    my $section_text = &docprint::get_section_xml($doc_obj, $docxml_output_options); 
    276275    print $docxml_outhandler $section_text;     
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneXMLPlugout.pm

    r32542 r32543  
    7676} 
    7777 
    78 sub old_unused_saveas { 
    79     my $self = shift (@_); 
    80     my ($doc_obj, $doc_dir) = @_; 
    81     my $outhandler; 
    82     my $output_file; 
    83     if ($self->{'debug'}) { 
    84     $outhandler = STDOUT; 
    85     } 
    86     else { 
    87         
    88     $self->process_assoc_files($doc_obj, $doc_dir, ''); 
    89     $self->process_metafiles_metadata ($doc_obj); 
    90      
    91     # open up the outhandler     
    92     if ($self->is_group() && !$self->{'new_doc_dir'}) {  
    93         # we already have a handle open ?? 
    94         $outhandler = $self->{'group_outhandler'}; 
    95     } else { 
    96         $output_file = &FileUtils::filenameConcatenate( 
    97         $self->{'output_dir'}, $doc_dir, $self->get_doc_xml_filename($doc_obj)); 
    98         # open the new handle 
    99         $self->open_xslt_pipe($output_file, $self->{'xslt_file'}); 
    100  
    101         if (defined $self->{'xslt_writer'}){ 
    102         $outhandler = $self->{'xslt_writer'}; 
    103         } 
    104         else{ 
    105         $outhandler = $self->get_output_handler($output_file); 
    106         } 
    107          
    108         if ($self->is_group()) { 
    109         $self->{'group_outhandler'} = $outhandler; 
    110         } 
    111     } 
    112     } # else not debug 
    113     binmode($outhandler,":utf8"); 
    114  
    115     # only output the header if we have started a new doc 
    116     if (!$self->is_group() || $self->{'new_doc_dir'}) { 
    117     $self->output_xml_header($outhandler); 
    118     } 
    119  
    120     my $section_text = &docprint::get_section_xml($doc_obj); 
    121     print $outhandler $section_text; 
    122   
    123     # only output the footer if we are not doing group stuff. The group file will be finished in close_group_output 
    124     if (!$self->is_group()) { 
    125     $self->output_xml_footer($outhandler); 
    126     } 
    127  
    128     # close off the output - in a group process situation, this will be done by close_group_output 
    129     if (!$self->is_group() && !$self->{'debug'}) { 
    130     if (defined $self->{'xslt_writer'}){      
    131         $self->close_xslt_pipe();  
    132     } 
    133     else { 
    134         &FileUtils::closeFileHandle($output_file, \$outhandler) if defined $output_file; 
    135     } 
    136     } 
    137     $self->{'short_doc_file'} = &FileUtils::filenameConcatenate( 
    138     $doc_dir, $self->get_doc_xml_filename($doc_obj));   
    139      
    140     $self->store_output_info_reference($doc_obj); 
    141      
    142 } 
    143  
    14478# can be overridden in subclasses, for instance by GreenstoneSQLPlugout, to produce a different filename 
    14579# like docsql.xml