Changeset 32541

Show
Ignore:
Timestamp:
25.10.2018 20:12:42 (4 weeks ago)
Author:
ak19
Message:

Using proper parameters to GreenstoneSQLPlugin/Plugout instead of hardcoded values for params.

Location:
main/trunk/greenstone2
Files:
5 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/import.pl

    r32528 r32541  
    8989        'desc' => "{export.saveas.GreenstoneMETS}"}, 
    9090      { 'name' => "GreenstoneSQL", 
    91         'desc' => "{export.saveas.GreenstoneSQL}"}, 
    92       { 'name' => "GreenstoneSQL_metadata_only", 
    93         'desc' => "{export.saveas.GreenstoneSQL.meta_only}"}, 
    94       { 'name' => "GreenstoneSQL_fulltext_only", 
    95         'desc' => "{export.saveas.GreenstoneSQL.text_only}"}, 
     91        'desc' => "{export.saveas.GreenstoneSQL}"} 
    9692      ]; 
    9793 
  • main/trunk/greenstone2/perllib/gssql.pm

    r32538 r32541  
    106106# TODO: Consider AutoCommit status (and Autocommit off allowing commit or rollback for GS coll build cancel) later 
    107107 
     108# TODO: where should the defaults for these params be, here or in GS-SQLPlugin/Plugout? 
    108109sub connect_to_db { 
    109110    my $self= shift (@_); 
     
    210211# This will terminate if the db does not exist. Unlike load_db_and_tables() above, used by 
    211212# GreenstoneSQLPlugout, this method will not attempt to create the requested db (nor its tables) 
     213# TODO: GS SQLPlugin is called before GS SQLPlugout and attempts to use_db() - called in plugin's 
     214# init() method. This will fail if the db does not exist. Ideally want our plugin only called 
     215# during buildcol.pl 
    212216sub use_db { 
    213217    my $self= shift (@_); 
     
    294298 
    295299# Don't call this: it will delete the meta and full text tables for ALL collections in $db_name (localsite by default)! 
    296 # this is just for debugging 
     300# This method is just here for debugging (for testing creating a database when there is none) 
    297301sub _delete_database { 
    298302    my $self= shift (@_); 
     
    428432# see https://www.perlmonks.org/bare/?node=DBI%20Recipes 
    429433#    The page further has a table_exists function that could work with proper comparison 
    430 # Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 
     434# TODO: Couldn't get the first solution at https://www.perlmonks.org/bare/?node_id=500050 to work though 
    431435sub table_exists { 
    432436    my $self = shift (@_); 
  • main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm

    r32538 r32541  
    3939 
    4040# TODO: 
    41 # - Run TODOs here and in plugout by Dr Bainbridge. Ask about docsql naming convention adopted 
    42 # to identify OID. Better way? 
    43 # collection names -> table names: hyphens not allowed? Changed to underscores. 
     41# - Run TODOs here, in Plugout and in gssql.pm by Dr Bainbridge. 
     42# Ask about docsql naming convention adopted to identify OID. Better way? 
     43# collection names -> table names: it seems hyphens not allowed. Changed to underscores. 
    4444# - Startup parameters 
    4545# - incremental building: where do we need to add code to delete rows from our sql table after 
     
    4848# - Have not yet tested writing out just meta or just fulltxt to sql db and reading just that  
    4949# back in from the sql db while the remainder is to be read back in from the docsql .xml files. 
    50  
     50# - Ask if I can assume that all SQL dbs (not just MySQL) will preserve the order of inserted nodes 
     51# (sections) which in this case had made it easy to reconstruct the doc_obj in memory in the correct order 
    5152 
    5253# GreenstoneSQLPlugin inherits from GreenstoneXMLPlugin so that it if meta or fulltext 
     
    5556# is written out by GreenstoneSQLPlugout into the SQL db). 
    5657 
     58# TODO: 
     59# no more docoid in docsql .xml filename, set OID as attribute of root element inside docsql.xml file instead 
     60# and parse it out 
     61 
     62# TODO: deal with incremental vs removeold. If docs removed from import folder, then import step 
     63# won't delete it from archives but buildcol step will. Need to implement this with this database plugin or wherever the actual flow is 
    5764 
    5865sub BEGIN { 
     
    8390    'deft' => &get_default_process_exp(), 
    8491    'reqd' => "no" }, 
    85        { 'name' => "process_mode",  
    86      'desc' => "{GreenstoneSQLPlug.process_mode}", 
    87      'type' => "enum", 
    88      'list' => $process_mode_list, 
    89      'deft' => "all", 
    90      'reqd' => "no"} 
     92      { 'name' => "process_mode",  
     93    'desc' => "{GreenstoneSQLPlug.process_mode}", 
     94    'type' => "enum", 
     95    'list' => $process_mode_list, 
     96    'deft' => "all", 
     97    'reqd' => "no"}, 
     98      { 'name' => "db_driver",  
     99    'desc' => "{GreenstoneSQLPlug.db_driver}", 
     100    'type' => "string",  
     101    'deft' => "mysql", 
     102    'reqd' => "yes"}, 
     103      { 'name' => "db_client_user",  
     104    'desc' => "{GreenstoneSQLPlug.db_client_user}", 
     105    'type' => "string",  
     106    'deft' => "root", 
     107    'reqd' => "yes"}, 
     108      { 'name' => "db_client_pwd",  
     109    'desc' => "{GreenstoneSQLPlug.db_client_pwd}", 
     110    'type' => "string", 
     111    'deft' => "", 
     112    'reqd' => "yes"}, # pwd required? 
     113      { 'name' => "db_host",  
     114    'desc' => "{GreenstoneSQLPlug.db_host}", 
     115    'type' => "string", 
     116    'deft' => "127.0.0.1", 
     117    'reqd' => "yes"}, 
     118      { 'name' => "db_encoding",  
     119    'desc' => "{GreenstoneSQLPlug.db_encoding}", 
     120    'type' => "string", 
     121    'deft' => "utf8", 
     122    'reqd' => "yes"} 
    91123    ]; 
    92124 
     
    200232        #$sid =~ s@^root@@; 
    201233        $sid = $doc_obj->get_top_section() if ($sid eq "root"); 
    202         print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT NOT PRINTED>\n" 
     234        print $outhandle "### did: $did, sid: |$sid|, fulltext: <TXT>\n" 
    203235        if $self->{'verbosity'} > 1; 
    204236 
     
    229261    $self->SUPER::init(@_); # super (GreenstoneXMLPlugin) will not yet be trying to read from doc.xml (docsql .xml) files in init(). 
    230262 
    231         # TODO: how do we know what site we're dealing with unless this is passed in, by buildcol? 
    232     ########### 
     263    #################### 
    233264#    print "@@@ SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 
    234265#    print "@@@ COLL NAME: ". $ENV{'GSDLCOLLECTION'} . "\n"; 
    235      
    236     $self->{'db_driver'} = "mysql"; 
    237     $self->{'site_name'} = "localsite";     
    238     $self->{'db_client_user'} = "root"; 
    239     $self->{'db_client_pwd'} = "6reenstone3"; 
    240     $self->{'build_mode'} = "removeold"; 
    241     $self->{'db_host'} = "127.0.0.1"; 
    242     $self->{'db_encoding'} = "utf8"; 
    243     ########### 
     266 
     267#    print STDERR "@@@@ db_pwd: " . $self->{'db_client_pwd'} . "\n"; 
     268#    print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 
     269#    print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 
     270#    print STDERR "@@@@ db_enc: " . $self->{'db_encoding'} . "\n"; 
     271#    print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 
     272    #################### 
    244273     
    245274    my $gs_sql = new gssql({ 
    246275    'collection_name' => $ENV{'GSDLCOLLECTION'},     
    247276    'db_encoding' => $self->{'db_encoding'} 
    248     #'db_name' => $self->{'site_name'}, 
    249     #'build_mode' => $self->{'build_mode'}, 
    250277               } 
    251278    ); 
     
    265292    } 
    266293     
    267     my $db_name = $self->{'site_name'} || "localsite"; # one database per GS3 site 
     294    my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 
    268295    #my $build_mode = $self->{'build_mode'} || "removeold"; 
    269296 
     
    274301    # PrintError would already have displayed the warning message on load fail 
    275302    $gs_sql->disconnect_from_db() 
    276         || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 
     303        || warn("Unable to disconnect from database.\n"); 
    277304    die("Could not use db $db_name. Can't proceed.\n"); 
    278305    } 
  • main/trunk/greenstone2/perllib/plugouts/GreenstoneSQLPlugout.pm

    r32537 r32541  
    4141# TODO: SIGTERM rollback and disconnect? 
    4242# TODO Q: what about verbosity for debugging 
     43# TODO Q: introduced site_name param to plugins and plugouts. Did I do it right? And should they have hiddengli = "yes" 
    4344 
    4445# this plugout does not output xml to a file, but outputs rows into a mysql table 
     
    6465        'desc' => "{GreenstoneSQLPlug.process_mode.all}" } ]; 
    6566 
     67# The following are the saveas.options: 
    6668my $arguments = [  
    67        { 'name' => "process_mode",  
    68      'desc' => "{GreenstoneSQLPlug.process_mode}", 
    69      'type' => "enum", 
    70      'list' => $process_mode_list, 
    71      'deft' => "all", 
    72      'reqd' => "no", 
    73      'hiddengli' => "no"} ]; 
     69    { 'name' => "process_mode",  
     70      'desc' => "{GreenstoneSQLPlug.process_mode}", 
     71      'type' => "enum", 
     72      'list' => $process_mode_list, 
     73      'deft' => "all", 
     74      'reqd' => "no", 
     75      'hiddengli' => "no"}, 
     76    { 'name' => "db_driver",  
     77      'desc' => "{GreenstoneSQLPlug.db_driver}", 
     78      'type' => "string",    
     79      'deft' => "mysql", 
     80      'reqd' => "yes"}, 
     81    { 'name' => "db_client_user",  
     82      'desc' => "{GreenstoneSQLPlug.db_client_user}", 
     83      'type' => "string",    
     84      'deft' => "root", 
     85      'reqd' => "yes"}, 
     86    { 'name' => "db_client_pwd",  
     87      'desc' => "{GreenstoneSQLPlug.db_client_pwd}", 
     88      'type' => "string", 
     89      'deft' => "", 
     90      'reqd' => "yes"}, # pwd required? 
     91    { 'name' => "db_host",  
     92      'desc' => "{GreenstoneSQLPlug.db_host}", 
     93      'type' => "string", 
     94      'deft' => "127.0.0.1", 
     95      'reqd' => "yes"}, 
     96    { 'name' => "db_encoding",  
     97      'desc' => "{GreenstoneSQLPlug.db_encoding}", 
     98    'type' => "string", 
     99      'deft' => "utf8", 
     100      'reqd' => "yes"} 
     101    ]; 
    74102 
    75103my $options = { 'name'     => "GreenstoneSQLPlugout", 
     
    103131    my $self= shift (@_); 
    104132 
     133    # The saveas.options 
     134    #print STDERR "@@@@ PLUGOUT db_pwd: " . $self->{'db_client_pwd'} . "\n"; 
     135    #print STDERR "@@@@ user: " . $self->{'db_client_user'} . "\n"; 
     136    #print STDERR "@@@@ db_host: " . $self->{'db_host'} . "\n"; 
     137    #print STDERR "@@@@ db_enc: " . $self->{'db_encoding'} . "\n"; 
     138    #print STDERR "@@@@ db_driver: " . $self->{'db_driver'} . "\n"; 
     139    #print STDERR "@@@@ proc_mode: " . $self->{'process_mode'} . "\n"; 
     140     
    105141    ########### TODO: these should be set from cmdline/GLI options to plugout ######### 
    106     $self->{'db_driver'} = "mysql"; 
    107     $self->{'site_name'} = "localsite";     
    108     $self->{'db_client_user'} = "root"; 
    109     $self->{'db_client_pwd'} = "6reenstone3"; 
     142 
    110143    $self->{'build_mode'} = "removeold"; 
    111     #$self->{'db_host'} = "127.0.0.1"; 
    112     #$self->{'db_encoding'} = "utf8"; 
    113     #TODO: proc_mode is also a saveas option     
    114      
     144 
    115145    ############ LOAD NECESSARY OPTIONS ########### 
    116     print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 
    117     #$self->{'collection_name'} = $ENV{'GSDLCOLLECTION'}; 
     146    #print "@@@ plugout SITE NAME: ". $self->{'site_name'} . "\n" if defined $self->{'site_name'}; 
     147    #print STDERR "########## COLLECTION: ". $ENV{'GSDLCOLLECTION'}."\n"; 
     148 
    118149    print STDERR "***** GreenstoneSQLPlugout process mode = \"", $self->{'process_mode'}, "\"\n"; 
    119150 
     
    121152    'collection_name' => $ENV{'GSDLCOLLECTION'},     
    122153    'db_encoding' => $self->{'db_encoding'} 
    123     #'db_name' => $self->{'site_name'}, 
    124     #'build_mode' => $self->{'build_mode'}, 
    125154    }; 
    126155 
     
    142171    } 
    143172     
    144     my $db_name = $self->{'site_name'} || "localsite"; # one database per GS3 site 
     173    my $db_name = $self->{'site_name'} || "greenstone2"; # one database per GS3 site, for GS2 the db is called greenstone2 
    145174    my $build_mode = $self->{'build_mode'} || "removeold"; 
    146175    if(!$gs_sql->load_db_and_tables($db_name, $build_mode)) { 
     
    149178    # PrintError would already have displayed the warning message on load fail 
    150179    $gs_sql->disconnect_from_db() 
    151         || warn("Unable to disconnect from database " . $self->{'site_name'} . "\n"); 
     180        || warn("Unable to disconnect from database.\n"); 
    152181    die("Could not use db $db_name and/or prepare its tables. Can't proceed.\n"); 
    153182    } 
  • main/trunk/greenstone2/perllib/strings.properties

    r32540 r32541  
    417417 
    418418export.saveas.GreenstoneXML:Greenstone XML Archive format 
    419 export.saveas.GreenstoneSQL:Save metadata and text in MySQL database 
    420 export.saveas.GreenstoneSQL.meta_only:Store metadata in a MySQL database (and full text in Greenstone XML Archive format) 
    421 export.saveas.GreenstoneSQL.text_only:Store full text in a MySQL database (and metadata in Greenstone XML Archive format) 
     419export.saveas.GreenstoneSQL:Store the metadata and/or full text in a MySQL database, and the converse in the Greenstone XML Archive format. 
    422420 
    423421export.saveas.MARCXML:MARC XML format (an XML version of MARC 21) 
     
    14481446METSPlugout.xslt_mets:Transform a mets's docmets.xml with the XSLT in the named file.   
    14491447 
    1450 GreenstoneSQLPlugout.desc:Output metadata and/or full text to a MySQL database (named after GS3 site name for GS3 or named greenstone2 for GS2) instead of doc.xml. For Greenstone 3, the database name is the GS3 site name. For Greenstone 2, the database name is greenstone2. 
     1448GreenstoneSQLPlugout.desc:Output metadata and/or full text to a MySQL database (named after GS3 site name for GS3 or named greenstone2 for GS2) instead of doc.xml. For Greenstone 3, the database name is the GS3 site name. For Greenstone 2, the database name is greenstone2. The basic saveas.options for this Plugout are the same as the basic options for the matching GreenstoneSQLPlugin. 
    14511449 
    14521450#  
    14531451# GreenstoneSQLPlug strings are shared by both GreenstoneSQLPlugout and GreenstoneSQLPlugin 
    14541452# 
    1455 GreenstoneSQLPlug.process_mode:Setting determines whether full text and/or metadata will be output to a MySQL database instead of to doc.xml during import. 
     1453GreenstoneSQLPlug.process_mode:Setting determines whether full text and/or metadata will be output to a MySQL database instead of to doc.xml during import. Choose one of meta_only, text_only, or all (default). 
    14561454GreenstoneSQLPlug.process_mode.all:Import stage outputs the full text and metadata to a MySQL database instead of to doc.xml. 
    14571455GreenstoneSQLPlug.process_mode.meta_only:Import stage outputs the metadata to a MySQL database and any text to doc.xml.