Changeset 32557

Show
Ignore:
Timestamp:
31.10.2018 18:25:03 (3 weeks ago)
Author:
ak19
Message:

1. Bugfix to create_database method: db_name is now a parameter to the method and no longer a member variable of gssql object. 2. UTF8 encodings finally made to work with GreenstoneSQL connections. Doing: 'set NAMES utf8' was insufficient and further causes an error when building a collection with macrons as the utf8 encoding thus specified only supports utf8 chars that take up to 3 bytes to encode. For utf8 chars that can take up to 4 bytes, need to do two things. First, run the mysqld server with minus-minus character_set_server=utf8mb4. Second, EITHER set mysql_enable_utf8mb4 => 1 option when using perl DBI to connect to the db (which in one step tells MySQL to use UTF-8 for communication AND DBD::mysql to decode the data) OR after connection, do BOTH set NAMES 'utf8mb4' (instead of utf8) to tell MySQL to use UTF-8 for communication AND ->{mysql_enable_utf8mb4} = 1 to tell DBD::mysql to decode the data. See  https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc and  https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/gssql.pm

    r32555 r32557  
    110110    my $self= shift (@_); 
    111111    my ($params_map) = @_; 
    112     my $db_enc = $self->{'db_encoding'} || "utf8"; 
     112    #my $db_enc = $self->{'db_encoding'} || "utf8"; 
     113    my $db_enc = "utf8mb4"; 
    113114 
    114115    # these are the params for connecting to MySQL 
     
    130131                   PrintError => 1, # on by default, but being explicit 
    131132                   RaiseError => 0, # off by default, but being explicit 
    132                    AutoCommit => 1 # on by default, but being explicit 
     133                   AutoCommit => 1, # on by default, but being explicit 
     134                   mysql_enable_utf8mb4 => 1 # tells MySQL to use UTF-8 for communication and tells DBD::mysql to decode the data, see https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug  
    133135               }); 
    134136 
    135137    if(!$dbh) { 
    136     # NOTE, despite handle dbh being undefined, error code will be in DBI->err 
     138    # NOTE, despite handle dbh being undefined, error code will be in DBI->err (note caps) 
    137139    return 0;    
    138140    } 
     
    141143    # https://dev.mysql.com/doc/refman/5.7/en/charset.html 
    142144    # https://dev.mysql.com/doc/refman/5.7/en/charset-conversion.html 
    143     # Setting the encoding at db server level. 
    144     # Not sure if this command is mysql specific: 
    145     my $stmt = "set NAMES '" . $db_enc . "'"; 
    146     $dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); 
     145    # Setting the encoding at db server level: $dbh->do("set NAMES '" . $db_enc . "'"); 
     146    # HOWEVER: 
     147    # It turned out insufficient setting the encoding to utf8, as that only supports utf8 chars that 
     148    # need up to 3 bytes. We may need up to 4 bytes per utf8 character, e.g. chars with macron, 
     149    # and for that, we need the encoding to be set to utf8mb4. 
     150    # To set up a MySQL db to use utf8mb4 requires configuration on the server side too. 
     151    # https://stackoverflow.com/questions/10957238/incorrect-string-value-when-trying-to-insert-utf-8-into-mysql-via-jdbc 
     152    # https://stackoverflow.com/questions/46727362/perl-mysql-utf8mb4-issue-possible-bug 
     153    # To set up the db for utf8mb4, therefore,  
     154    # the MySQL server needs to be configured for that char encoding by running the server as: 
     155    # mysql-5.7.23-linux-glibc2.12-x86_64/bin>./mysqld_safe --datadir=/Scratch/ak19/mysql/data --character_set_server=utf8mb4 
     156    # AND when connecting to the server, we can can either set mysql_enable_utf8mb4 => 1 
     157    # as a connection option 
     158    # OR we need to do both "set NAMES utf8mb4" AND "$dbh->{mysql_enable_utf8mb4} = 1;" after connecting 
     159    # 
     160    # Search results for DBI Set Names imply the "SET NAMES '<enc>'" command is mysql specific too, 
     161    # so setting the mysql specific option during connection above as "mysql_enable_utf8mb4 => 1" 
     162    # is no more objectionable. It has the advantage of cutting out the 2 extra lines of doing 
     163    # set NAMES '<enc>' and $dbh->{mysql_enable_utf8mb4} = 1 here. 
     164    # These lines may be preferred if more db_driver options are to be supported in future: 
     165    # then a separate method called set_db_encoding($enc) can work out what db_driver we're using 
     166    # and if mysql and enc=utfy, then it can do the following whereas it will issue other do stmts 
     167    # for other db_drivers, see https://www.perlmonks.org/?node_id=259456: 
     168     
     169    #my $stmt = "set NAMES '" . $db_enc . "'"; 
     170    #$dbh->do($stmt) || warn("Unable to set charset encoding at db server level to: " . $db_enc . "\n"); # tells MySQL to use UTF-8 for communication 
     171    #$dbh->{mysql_enable_utf8mb4} = 1; # tells DBD::mysql to decode the data 
    147172     
    148173    # if we're here, then connection succeeded, store handle 
     
    248273sub create_db { 
    249274    my $self= shift (@_); 
    250     my $db_name = $self->{'db_name'}; 
     275    my ($db_name) = @_; 
    251276    my $dbh = $self->{'db_handle'}; 
    252277     
     
    294319    # drop table <tablename> 
    295320    my $table = $self->get_metadata_table_name(); 
    296     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
     321    if($self->table_exists($table)) { 
     322    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
     323    } 
    297324    $table = $self->get_fulltext_table_name(); 
    298     $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
     325    if($self->table_exists($table)) { 
     326    $dbh->do("drop table $table") || warn("@@@ Couldn't delete $table"); 
     327    } 
    299328} 
    300329