Ignore:
Timestamp:
2008-08-29T13:10:39+12:00 (16 years ago)
Author:
davidb
Message:

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin will be upgraded to use these to support these.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/dbutil.pm

    r16726 r17087  
    2929
    3030
     31sub open_infodb_write_handle
     32{
     33    my $infodb_type = shift(@_);
     34    my $infodb_file_path = shift(@_);
     35
     36    if ($infodb_type eq "sqlite") {
     37    return &open_infodb_write_handle_sqlite($infodb_file_path);
     38    }
     39    elsif ($infodb_type eq "gdbm") {
     40    return &open_infodb_write_handle_gdbm($infodb_file_path);
     41    }
     42
     43    # Use text (gzipped) version ready for convertion to GDBM
     44    # if the infodb type is empty or not one of the values above
     45    return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path);
     46}
     47
     48
    3149sub close_infodb_write_handle
    3250{
     
    3452    my $infodb_handle = shift(@_);
    3553
    36     if ($infodb_type eq "sqlite")
    37     {
     54    if ($infodb_type eq "sqlite") {
    3855    return &close_infodb_write_handle_sqlite($infodb_handle);
    3956    }
    40 
    41     # Use GDBM if the infodb type is empty or not one of the values above
    42     return &close_infodb_write_handle_gdbm($infodb_handle);
    43 }
     57    elsif ($infodb_type eq "gdbm") {
     58    return &close_infodb_write_handle_gdbm($infodb_handle);
     59    }
     60
     61    # Use text (gzipped) version ready for convertion to GDBM
     62    # if the infodb type is empty or not one of the values above return
     63
     64    &close_infodb_write_handle_gdbm_txtgz($infodb_handle); }
    4465
    4566
    4667sub get_default_infodb_type
    4768{
    48     return "gdbm";
     69    return "gdbm-txtgz";
    4970}
    5071
     
    6081    return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path);
    6182    }
    62 
    63     # Use GDBM if the infodb type is empty or not one of the values above
    64     return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
    65 }
    66 
    67 
    68 sub open_infodb_write_handle
     83    elsif ($infodb_type eq "gdbm") {
     84    return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path);
     85    }
     86
     87    # Use text (gzipped) version ready for convertion to GDBM
     88    # if the infodb type is empty or not one of the values above return
     89
     90    return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path);
     91}
     92
     93
     94
     95
     96sub read_infodb_file
    6997{
    7098    my $infodb_type = shift(@_);
    7199    my $infodb_file_path = shift(@_);
     100    my $infodb_map = shift(@_);
    72101
    73102    if ($infodb_type eq "sqlite")
    74103    {
    75     return &open_infodb_write_handle_sqlite($infodb_file_path);
    76     }
    77 
    78     # Use GDBM if the infodb type is empty or not one of the values above
    79     return &open_infodb_write_handle_gdbm($infodb_file_path);
    80 }
    81 
    82 
    83 sub read_infodb_file
     104    return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
     105    }
     106    elsif ($infodb_type eq "gdbm") {
     107    return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
     108    }
     109
     110    # Use text (gzipped) version ready for convertion to GDBM
     111    # if the infodb type is empty or not one of the values above return
     112
     113    return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map);
     114}
     115
     116
     117sub write_infodb_entry
    84118{
    85119    my $infodb_type = shift(@_);
    86     my $infodb_file_path = shift(@_);
     120    my $infodb_handle = shift(@_);
     121    my $infodb_key = shift(@_);
    87122    my $infodb_map = shift(@_);
    88123
    89124    if ($infodb_type eq "sqlite")
    90125    {
    91     return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);
    92     }
    93 
    94     # Use GDBM if the infodb type is empty or not one of the values above
    95     return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);
    96 }
    97 
    98 
    99 sub write_infodb_entry
    100 {
    101     my $infodb_type = shift(@_);
    102     my $infodb_handle = shift(@_);
    103     my $infodb_key = shift(@_);
    104     my $infodb_map = shift(@_);
    105 
    106     if ($infodb_type eq "sqlite")
    107     {
    108126    return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map);
    109127    }
    110 
    111     # Use GDBM if the infodb type is empty or not one of the values above
    112     return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
    113 }
    114 
    115 
    116 
    117 # ----------------------------------------------------------------------------------------
    118 #   GDBM IMPLEMENTATION
    119 # ----------------------------------------------------------------------------------------
    120 
    121 sub close_infodb_write_handle_gdbm
     128    elsif ($infodb_type eq "gdbm") {
     129    return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map);
     130    }
     131
     132    # Use text (gzipped) version ready for convertion to GDBM
     133    # if the infodb type is empty or not one of the values above return
     134    return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map);
     135}
     136
     137
     138
     139# -----------------------------------------------------------------------------
     140#   GDBM TXT-GZ IMPLEMENTATION
     141# -----------------------------------------------------------------------------
     142
     143sub open_infodb_write_handle_gdbm_txtgz
     144{
     145    # Keep infodb in GDBM neutral form => save data as compressed text file,
     146    # read for txt2db to be run on it later (i.e. by the runtime system,
     147    # first time the collection is ever accessed).  This makes it easier
     148    # distribute pre-built collections to various architectures.
     149    #
     150    # NB: even if two architectures are little endian (e.g. Intel and
     151    # ARM procesors) GDBM does *not* guarantee that the database generated on
     152    # one will work on the other
     153
     154    my $infodb_file_path = shift(@_);
     155
     156    # Greenstone ships with gzip for windows, on $PATH
     157
     158    my $infodb_file_handle = undef;
     159    if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\""))
     160    {
     161    return undef;
     162    }
     163
     164    return $infodb_file_handle;
     165}
     166
     167sub close_infodb_write_handle_gdbm_txtgz
    122168{
    123169    my $infodb_handle = shift(@_);
     
    127173
    128174
    129 sub get_infodb_file_path_gdbm
     175sub get_infodb_file_path_gdbm_txtgz
    130176{
    131177    my $collection_name = shift(@_);
    132178    my $infodb_directory_path = shift(@_);
    133179
    134     my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
    135     my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
     180    my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz";
    136181    return &util::filename_cat($infodb_directory_path, $infodb_file_name);
    137182}
    138183
    139184
    140 sub open_infodb_write_handle_gdbm
    141 {
    142     my $infodb_file_path = shift(@_);
    143 
    144     my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe());
    145     my $infodb_file_handle = undef;
    146     if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
    147     {
    148     return undef;
    149     }
    150 
    151     return $infodb_file_handle;
    152 }
    153 
    154 
    155 sub read_infodb_file_gdbm
    156 {
    157     my $infodb_file_path = shift(@_);
    158     my $infodb_map = shift(@_);
    159 
    160     open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
     185sub read_infodb_file_gdbm_txtgz
     186{
     187    my $infodb_file_path = shift(@_);
     188    my $infodb_map = shift(@_);
     189
     190    my $cmd = "gzip --decompress \"$infodb_file_path\"";
     191
     192    open (PIPEIN, "$cmd |")
     193    || die "Error: Couldn't open pipe from gzip: $!\n  $cmd\n";
     194
    161195    my $infodb_line = "";
    162196    my $infodb_key = "";
     
    184218
    185219   
    186 sub write_infodb_entry_gdbm
    187 {
     220sub write_infodb_entry_gdbm_txtgz
     221{
     222
    188223    my $infodb_handle = shift(@_);
    189224    my $infodb_key = shift(@_);
     
    209244
    210245
    211 # ----------------------------------------------------------------------------------------
     246# -----------------------------------------------------------------------------
     247#   GDBM IMPLEMENTATION
     248# -----------------------------------------------------------------------------
     249
     250sub open_infodb_write_handle_gdbm
     251{
     252    my $infodb_file_path = shift(@_);
     253
     254    my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe());
     255    my $infodb_file_handle = undef;
     256    if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\""))
     257    {
     258    return undef;
     259    }
     260
     261    return $infodb_file_handle;
     262}
     263
     264sub close_infodb_write_handle_gdbm
     265{
     266    my $infodb_handle = shift(@_);
     267
     268    close($infodb_handle);
     269}
     270
     271
     272sub get_infodb_file_path_gdbm
     273{
     274    my $collection_name = shift(@_);
     275    my $infodb_directory_path = shift(@_);
     276
     277    my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb");
     278    my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension;
     279    return &util::filename_cat($infodb_directory_path, $infodb_file_name);
     280}
     281
     282
     283
     284
     285sub read_infodb_file_gdbm
     286{
     287    my $infodb_file_path = shift(@_);
     288    my $infodb_map = shift(@_);
     289
     290    open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n";
     291    my $infodb_line = "";
     292    my $infodb_key = "";
     293    my $infodb_value = "";
     294    while (defined ($infodb_line = <PIPEIN>))
     295    {
     296    if ($infodb_line =~ /^\[([^\]]+)\]$/)
     297    {
     298        $infodb_key = $1;
     299    }
     300    elsif ($infodb_line =~ /^-{70}$/)
     301    {
     302        $infodb_map->{$infodb_key} = $infodb_value;
     303        $infodb_key = "";
     304        $infodb_value = "";
     305    }
     306    else
     307    {
     308        $infodb_value .= $infodb_line;
     309    }
     310    }
     311
     312    close (PIPEIN);
     313}
     314
     315   
     316sub write_infodb_entry_gdbm
     317{
     318    # With infodb_handle already set up, works the same as _gdbm_txtgz version
     319    write_infodb_entry_gdbm_txtgz(@_);
     320}
     321
     322
     323
     324# -----------------------------------------------------------------------------
    212325#   SQLITE IMPLEMENTATION
    213 # ----------------------------------------------------------------------------------------
     326# -----------------------------------------------------------------------------
     327
     328sub open_infodb_write_handle_sqlite
     329{
     330    my $infodb_file_path = shift(@_);
     331
     332    my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe());
     333    my $infodb_handle = undef;
     334    if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
     335    {
     336    return undef;
     337    }
     338
     339    print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
     340    print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
     341
     342    # This is crucial for efficiency when importing large amounts of data
     343    print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
     344
     345    # This is very important for efficiency, otherwise each command will be actioned one at a time
     346    print $infodb_handle "BEGIN TRANSACTION;\n";
     347
     348    return $infodb_handle;
     349}
     350
     351
    214352
    215353sub close_infodb_write_handle_sqlite
     
    237375}
    238376
    239 
    240 sub open_infodb_write_handle_sqlite
    241 {
    242     my $infodb_file_path = shift(@_);
    243 
    244     my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());
    245     my $infodb_handle = undef;
    246     if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))
    247     {
    248     return undef;
    249     }
    250 
    251     print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";
    252     print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";
    253 
    254     # This is crucial for efficiency when importing large amounts of data
    255     print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";
    256 
    257     # This is very important for efficiency, otherwise each command will be actioned one at a time
    258     print $infodb_handle "BEGIN TRANSACTION;\n";
    259 
    260     return $infodb_handle;
    261 }
    262377
    263378
Note: See TracChangeset for help on using the changeset viewer.