Changeset 17087
- Timestamp:
- 2008-08-29T13:10:39+12:00 (16 years ago)
- Location:
- gsdl/trunk/perllib
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/ClassifyTreeModel.pm
r15894 r17087 120 120 # Unfortunately I have to check that there is text to retrieve before I 121 121 # create a new node. 122 if(&GDBMUtils::gdbm Get($self->getCollection(), $clid) =~ /\w+/)122 if(&GDBMUtils::gdbmCachedCollectionGet($self->getCollection(), $clid) =~ /\w+/) 123 123 { 124 124 # Since the CLID can directly reference the correct entry in the GDBM we -
gsdl/trunk/perllib/ClassifyTreeNode.pm
r15890 r17087 26 26 $self->{'model'} = $model; 27 27 $self->{'clid'} = $clid; 28 29 my $collection = $model->getCollection(); 30 28 31 # Check if this node already exists in the database, and if not insert it 29 32 # now 30 my $text = &GDBMUtils::gdbm Get($model->getCollection(), $clid);33 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $clid); 31 34 if($text !~ /\w+/ && $force_new) 32 35 { 33 &GDBMUtils::gdbm Set($model->getCollection(), $clid,36 &GDBMUtils::gdbmCachedCollectionSet($collection, $clid, 34 37 "<doctype>classify\n<hastxt>0\n<childtype>VList\n<Title>\n<numleafdocs>0\n<contains>\n"); 35 38 } … … 108 111 # CLID 109 112 my @child_nodes = $self->getChildren(); 113 110 114 # Retrieve the current document 111 115 my $text = $self->toString(); 116 117 my $collection = $self->{'model'}->getCollection(); 118 112 119 # Create a new document with the correct CLID 113 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $new_clid, $text); 120 &GDBMUtils::gdbmCachedCollectionSet($collection, $new_clid, $text); 121 114 122 # Remove the old document 115 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'}); 123 &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'clid'}); 124 116 125 # Finally, change the clid stored in this document 117 126 $self->{'clid'} = $new_clid; 127 118 128 # Now go through this nodes children, and shift them too 119 129 foreach my $child_node (@child_nodes) … … 221 231 push(@clid_parts, $suffix); 222 232 my $next_clid = join(".", @clid_parts); 233 234 my $collection = $self->{'model'}->getCollection(); 235 223 236 # Now determine if this node exists. 224 if(&GDBMUtils::gdbm Get($self->{'model'}->getCollection(), $next_clid) =~ /\w+/)237 if(&GDBMUtils::gdbmCachedCollectionGet($collection, $next_clid) =~ /\w+/) 225 238 { 226 239 # And if so, create it. … … 474 487 # Now remove the node from the database. We do this calling set gdbm with 475 488 # no value argument. 476 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'}); 489 my $collection = $self->{'model'}->getCollection(); 490 &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'clid'}); 491 477 492 # Return the leaf count (so we can adjust the numleafdocs at the root node 478 493 # of this deletion. … … 605 620 # Replace any occurance of this nodes CLID with " 606 621 $contains =~ s/$self->{'clid'}/\"/g; 622 623 my $collection = $self->{'model'}->getCollection(); 624 my $clid = $self->{'clid'}; 625 607 626 # Load the text of this node 608 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'}); 627 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $clid); 628 609 629 # Replace the contains 610 630 #rint STDERR "Before: $text\n"; … … 612 632 #rint STDERR "After: $text\n"; 613 633 # Store the changed text 614 &GDBMUtils::gdbm Set($self->{'model'}->getCollection(), $self->{'clid'}, $text);634 &GDBMUtils::gdbmCachedCollectionSet($collection, $clid, $text); 615 635 } 616 636 # /** setContains() **/ … … 626 646 my ($self, $numleafdocs) = @_; 627 647 print STDERR "ClassifyTreeNode::setNumLeafDocs(numleafdocs)\n" unless !$self->{'debug'}; 648 649 my $collection = $self->{'model'}->getCollection(); 650 my $clid = $self->{'clid'}; 651 628 652 # Load the text of this node 629 my $text = &GDBMUtils::gdbm Get($self->{'model'}->getCollection(), $self->{'clid'});653 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $clid); 630 654 # Replace the numleafdocs 631 655 $text =~ s/<numleafdocs>\d*?\n+/<numleafdocs>$numleafdocs\n/; 632 656 # Store the changed text 633 &GDBMUtils::gdbm Set($self->{'model'}->getCollection(), $self->{'clid'}, $text);657 &GDBMUtils::gdbmCachedCollectionSet($collection, $clid, $text); 634 658 } 635 659 # /** setNumLeafDocs() **/ … … 647 671 my ($self, $title) = @_; 648 672 print STDERR "ClassifyTreeNode::setTitle(\"$title\")\n" unless !$self->{'debug'}; 673 674 my $collection = $self->{'model'}->getCollection(); 675 my $clid = $self->{'clid'}; 676 649 677 # Load the text of this node 650 my $text = &GDBMUtils::gdbm Get($self->{'model'}->getCollection(), $self->{'clid'});678 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $clid); 651 679 # Replace the title 652 680 $text =~ s/<Title>.*?\n+/<Title>$title\n/; 653 681 # Store the changed text 654 &GDBMUtils::gdbm Set($self->{'model'}->getCollection(), $self->{'clid'}, $text);682 &GDBMUtils::gdbmCachedCollectionSet($collection, $clid, $text); 655 683 } 656 684 # /** setValue() **/ … … 666 694 my ($self) = @_; 667 695 print STDERR "ClassifyTreeNode::toString()\n" unless !$self->{'debug'}; 668 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'}); 696 my $collection = $self->{'model'}->getCollection(); 697 my $clid = $self->{'clid'}; 698 699 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $clid); 669 700 return $text; 670 701 } -
gsdl/trunk/perllib/GDBMUtils.pm
r15890 r17087 6 6 my $debug = 0; 7 7 8 # /** Global variable to hold a string containing the last collection a gdbmGet 9 # * was performed on. 8 # /** Global variables to hold a strings containing: 9 # * the last collection, oid and value 10 # * a gdbmCachedCollectionGet() was performed on. 10 11 # */ 11 12 my $gdbmget_previous_collection = ""; 12 # /** Global variable to hold a string containing the last oid a gdbmGet was13 # * performed on.14 # */15 13 my $gdbmget_previous_oid = ""; 16 # /** Global variable to hold a string containing the resulting value of the17 # * last gdbmGet request.18 # */19 14 my $gdbmget_previous_value = ""; 15 16 17 18 sub gdbmDatabaseGet 19 { 20 my ($database, $oid) = @_; 21 22 # Are we in windows? Do we need .exe? 23 my $exe = &util::get_os_exe(); 24 25 # Retrieve the raw document content 26 print STDERR "#Get document\ncmd: gdbmget$exe \"$database\" \"$oid\"\n" if $debug; 27 my $value = `gdbmget$exe "$database" "$oid"`; 28 29 # Done 30 return $value; 31 } 32 33 sub gdbmDatabaseAppend 34 { 35 my ($database, $oid, $value) = @_; 36 37 # Are we in windows? Do we need .exe? 38 my $exe = &util::get_os_exe(); 39 40 # Escape any speech marks in the value 41 $value =~ s/\"/\\\"/g; 42 # Set the document content 43 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\" append\n" if $debug; 44 `gdbmset$exe "$database" "$oid" "$value" append`; 45 } 46 47 48 sub gdbmDatabaseSet 49 { 50 my ($database, $oid, $value) = @_; 51 52 # Are we in windows? Do we need .exe? 53 my $exe = &util::get_os_exe(); 54 55 # Escape any speech marks in the value 56 $value =~ s/\"/\\\"/g; 57 # Set the document content 58 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\"\n" if $debug; 59 `gdbmset$exe "$database" "$oid" "$value"`; 60 } 61 62 63 sub gdbmDatabaseRemove 64 { 65 my ($database, $oid) = @_; 66 67 # Are we in windows? Do we need .exe? 68 my $exe = &util::get_os_exe(); 69 70 # Remove the document from the database 71 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug; 72 73 # Think it would be clearer if this funcctionality was done 74 # by a separate executable, e.g. gdbmremove 75 `gdbmset$exe "$database" "$oid"`; 76 } 77 78 20 79 21 80 # /** This wraps John T's gdbmget executable to get the gdbm database entry for … … 29 88 # * @author John Thompson, DL Consulting Ltd. 30 89 # */ 31 sub gdbm Get()90 sub gdbmCachedCollectionGet 32 91 { 33 92 my ($collection, $oid) = @_; … … 42 101 return $gdbmget_previous_value; 43 102 } 103 44 104 # Where's the database? 45 my $database = &getDatabasePath($collection); 46 # Are we in windows? Do we need .exe? 47 my $exe = ""; 48 $exe = ".exe" if $ENV{'GSDLOS'} =~ /^windows$/i; 49 # Retrieve the raw document content 50 print STDERR "#Get document\ncmd: gdbmget$exe \"$database\" \"$oid\"\n" if $debug; 51 my $value = `gdbmget$exe "$database" "$oid"`; 105 my $database = _getDatabasePath($collection); 106 107 my $value = gdbmDatbaseGet($database,$oid); 108 52 109 # Tidy up the ever growing number of newlines at the end of the value 53 110 $value =~ s/(\r?\n)+/$1/g; 111 # Why do we need the above line? At the very least it would seem 112 # better that the data going in to the database through 'set' is 113 # monitored for superfluous \r\n which are then removed before being 114 # saved in GDBM 115 54 116 # Cache this result 55 117 $gdbmget_previous_collection = $collection; 56 118 $gdbmget_previous_oid = $oid; 57 119 $gdbmget_previous_value = $value; 120 58 121 # Done 59 122 return $value; 60 123 } 61 # /** gdbm Get()**/124 # /** gdbmCachedCollectionGet **/ 62 125 63 126 # /** This wraps John T's gdbmset executable to set the gdbm database entry for … … 70 133 # * @author John Rowe, DL Consulting Ltd. 71 134 # */ 72 sub gdbm Set()135 sub gdbmCachedCollectionSet 73 136 { 74 137 my ($collection, $oid, $value) = @_; 138 75 139 # Where's the database? 76 my $database = &getDatabasePath($collection);77 # Are we in windows? Do we need .exe? 78 my $exe = &util::get_os_exe(); 140 my $database = _getDatabasePath($collection); 141 142 79 143 # Check whether value is set 80 144 if (defined($value)) 81 145 { 82 # Escape any speech marks in the value 83 $value =~ s/\"/\\\"/g; 84 # Set the document content 85 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\"\n" if $debug; 86 `gdbmset$exe "$database" "$oid" "$value"`; 146 gdbmDatabaseSet($database,$oid,$value); 87 147 } 88 148 else 89 149 { 90 # Remove the document from the database 91 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug; 92 `gdbmset$exe "$database" "$oid"`; 150 gdbmDtabaseRemove($database,$oid); 93 151 } 152 94 153 # Empty any cached values, as they may now be invalid 154 95 155 # Cache this result 96 156 $gdbmget_previous_collection = ""; … … 98 158 $gdbmget_previous_value = 0; 99 159 } 100 # /** gdbm Set()**/160 # /** gdbmCollectionSet **/ 101 161 102 162 # /** This works out the database path and returns it to the calling … … 107 167 # * @author John Rowe, DL Consulting Ltd. 108 168 # */ 109 sub getDatabasePath() 169 170 sub _getDatabasePath 110 171 { 111 172 my $collection = shift(@_); 173 112 174 # Find out the database extension 113 my $ext = ".bdb";114 $ext = ".ldb" if &util::is_little_endian(); 175 my $ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 176 115 177 # Now return the full filename of the database 178 116 179 return &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text", $collection.$ext); 117 180 } 118 # /** getDatabasePath ()**/181 # /** getDatabasePath **/ 119 182 120 183 1; -
gsdl/trunk/perllib/IncrementalBuildUtils.pm
r15890 r17087 390 390 # Now use the GDBM utils to write a blank string to this oid in the 391 391 # database 392 &GDBMUtils::gdbm Set($collection, $oid, "");392 &GDBMUtils::gdbmCachedCollectionSet($collection, $oid, ""); 393 393 # Remove reverse lookup 394 &GDBMUtils::gdbm Set($collection, $doc_num, "");394 &GDBMUtils::gdbmCachedCollectionSet($collection, $doc_num, ""); 395 395 # And remove from the database 396 396 &callGS2LuceneDelete($collection, $doc_num); -
gsdl/trunk/perllib/IncrementalDocument.pm
r15894 r17087 35 35 { 36 36 my ($class, $collection, $oid) = @_; 37 37 38 #rint STDERR "IncrementalDocument::new($collection, $oid)\n"; 39 38 40 # Test the parameters 39 41 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection; 40 42 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid; 43 41 44 # Store the variables 42 45 my $self = {}; 46 43 47 # The collection this document object has been loaded from. 44 48 $self->{'collection'} = $collection; 49 45 50 # An associative array of information retrieved from the GDBM database 46 51 # which maps a key string to a nested associative array listing values. 47 52 $self->{'data'} = {}; 53 48 54 # The unique identifier of the document loaded 49 55 $self->{'oid'} = $oid; 56 50 57 # Stores the order in which metadata keys where discovered/added. 51 58 $self->{'order'} = {}; 52 # Bless me father for I have sinned 59 53 60 bless $self, $class; 54 61 return $self; … … 61 68 { 62 69 my ($self, $key, $value, $internal) = @_; 70 63 71 # Validate the arguments 64 72 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/; 65 73 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/; 74 66 75 # Is this a new key that we haven't encountered before? If so ensure an 67 76 # array exists for its values, and record the order in which we encountered … … 75 84 $self->{'data'}->{$key} = {}; 76 85 } 86 77 87 # Set the value of the associative path to 1. 78 88 $self->{'data'}->{$key}->{$value} = 1; … … 84 94 # */ 85 95 sub getAllMetadata() 86 { 96 { 97 my ($self) = @_; 98 my @all_metadata; 99 87 100 print STDERR "IncrementalDocument.getAllMetadata()\n"; 88 my ($self) = @_;89 my @all_metadata;90 101 91 102 my $key_count = scalar(keys %{$self->{'order'}}); … … 137 148 #rint STDERR "IncrementalDocument::loadDocument()\n"; 138 149 # Load the raw text for the document object from GDBM 139 my $text = &GDBMUtils::gdbmGet($self->{'collection'}, $self->{'oid'}); 150 my $collection = $self->{'collection'}; 151 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $self->{'oid'}); 140 152 # For each line in the raw text, extract the key (enclosed in angle 141 153 # brackets) and the value … … 170 182 # Get a textual version of this object 171 183 my $text = $self->toString(); 184 172 185 # Now store the object in the database using the GDBM utilities 173 &GDBMUtils::gdbmSet($self->{'collection'}, $self->{'oid'}, $text); 186 my $collection = $self->{'collection'}; 187 &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'oid'}, $text); 188 174 189 # There is a little bit of extra complexity when saving an incremental 175 190 # document in that we should ensure that a reverse lookup-from DocNum or … … 178 193 if($doc_num >= 0) 179 194 { 180 my $text = &GDBMUtils::gdbm Get($self->{'collection'}, $doc_num);195 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $doc_num); 181 196 # If there is no reverse lookup, then add one now 182 197 if($text !~ /<section>/) 183 198 { 184 &GDBMUtils::gdbm Set($self->{'collection'}, $doc_num, "<section>" . $self->{'oid'});199 &GDBMUtils::gdbmCachedCollectionSet($collection, $doc_num, "<section>" . $self->{'oid'}); 185 200 } 186 201 } -
gsdl/trunk/perllib/dbutil.pm
r16726 r17087 29 29 30 30 31 sub open_infodb_write_handle 32 { 33 my $infodb_type = shift(@_); 34 my $infodb_file_path = shift(@_); 35 36 if ($infodb_type eq "sqlite") { 37 return &open_infodb_write_handle_sqlite($infodb_file_path); 38 } 39 elsif ($infodb_type eq "gdbm") { 40 return &open_infodb_write_handle_gdbm($infodb_file_path); 41 } 42 43 # Use text (gzipped) version ready for convertion to GDBM 44 # if the infodb type is empty or not one of the values above 45 return &open_infodb_write_handle_gdbm_txtgz($infodb_file_path); 46 } 47 48 31 49 sub close_infodb_write_handle 32 50 { … … 34 52 my $infodb_handle = shift(@_); 35 53 36 if ($infodb_type eq "sqlite") 37 { 54 if ($infodb_type eq "sqlite") { 38 55 return &close_infodb_write_handle_sqlite($infodb_handle); 39 56 } 40 41 # Use GDBM if the infodb type is empty or not one of the values above 42 return &close_infodb_write_handle_gdbm($infodb_handle); 43 } 57 elsif ($infodb_type eq "gdbm") { 58 return &close_infodb_write_handle_gdbm($infodb_handle); 59 } 60 61 # Use text (gzipped) version ready for convertion to GDBM 62 # if the infodb type is empty or not one of the values above return 63 64 &close_infodb_write_handle_gdbm_txtgz($infodb_handle); } 44 65 45 66 46 67 sub get_default_infodb_type 47 68 { 48 return "gdbm ";69 return "gdbm-txtgz"; 49 70 } 50 71 … … 60 81 return &get_infodb_file_path_sqlite($collection_name, $infodb_directory_path); 61 82 } 62 63 # Use GDBM if the infodb type is empty or not one of the values above 64 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path); 65 } 66 67 68 sub open_infodb_write_handle 83 elsif ($infodb_type eq "gdbm") { 84 return &get_infodb_file_path_gdbm($collection_name, $infodb_directory_path); 85 } 86 87 # Use text (gzipped) version ready for convertion to GDBM 88 # if the infodb type is empty or not one of the values above return 89 90 return &get_infodb_file_path_gdbm_txtgz($collection_name, $infodb_directory_path); 91 } 92 93 94 95 96 sub read_infodb_file 69 97 { 70 98 my $infodb_type = shift(@_); 71 99 my $infodb_file_path = shift(@_); 100 my $infodb_map = shift(@_); 72 101 73 102 if ($infodb_type eq "sqlite") 74 103 { 75 return &open_infodb_write_handle_sqlite($infodb_file_path); 76 } 77 78 # Use GDBM if the infodb type is empty or not one of the values above 79 return &open_infodb_write_handle_gdbm($infodb_file_path); 80 } 81 82 83 sub read_infodb_file 104 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map); 105 } 106 elsif ($infodb_type eq "gdbm") { 107 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map); 108 } 109 110 # Use text (gzipped) version ready for convertion to GDBM 111 # if the infodb type is empty or not one of the values above return 112 113 return &read_infodb_file_gdbm_txtgz($infodb_file_path, $infodb_map); 114 } 115 116 117 sub write_infodb_entry 84 118 { 85 119 my $infodb_type = shift(@_); 86 my $infodb_file_path = shift(@_); 120 my $infodb_handle = shift(@_); 121 my $infodb_key = shift(@_); 87 122 my $infodb_map = shift(@_); 88 123 89 124 if ($infodb_type eq "sqlite") 90 125 { 91 return &read_infodb_file_sqlite($infodb_file_path, $infodb_map);92 }93 94 # Use GDBM if the infodb type is empty or not one of the values above95 return &read_infodb_file_gdbm($infodb_file_path, $infodb_map);96 }97 98 99 sub write_infodb_entry100 {101 my $infodb_type = shift(@_);102 my $infodb_handle = shift(@_);103 my $infodb_key = shift(@_);104 my $infodb_map = shift(@_);105 106 if ($infodb_type eq "sqlite")107 {108 126 return &write_infodb_entry_sqlite($infodb_handle, $infodb_key, $infodb_map); 109 127 } 110 111 # Use GDBM if the infodb type is empty or not one of the values above 112 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map); 113 } 114 115 116 117 # ---------------------------------------------------------------------------------------- 118 # GDBM IMPLEMENTATION 119 # ---------------------------------------------------------------------------------------- 120 121 sub close_infodb_write_handle_gdbm 128 elsif ($infodb_type eq "gdbm") { 129 return &write_infodb_entry_gdbm($infodb_handle, $infodb_key, $infodb_map); 130 } 131 132 # Use text (gzipped) version ready for convertion to GDBM 133 # if the infodb type is empty or not one of the values above return 134 return &write_infodb_entry_gdbm_txtgz($infodb_handle, $infodb_key, $infodb_map); 135 } 136 137 138 139 # ----------------------------------------------------------------------------- 140 # GDBM TXT-GZ IMPLEMENTATION 141 # ----------------------------------------------------------------------------- 142 143 sub open_infodb_write_handle_gdbm_txtgz 144 { 145 # Keep infodb in GDBM neutral form => save data as compressed text file, 146 # read for txt2db to be run on it later (i.e. by the runtime system, 147 # first time the collection is ever accessed). This makes it easier 148 # distribute pre-built collections to various architectures. 149 # 150 # NB: even if two architectures are little endian (e.g. Intel and 151 # ARM procesors) GDBM does *not* guarantee that the database generated on 152 # one will work on the other 153 154 my $infodb_file_path = shift(@_); 155 156 # Greenstone ships with gzip for windows, on $PATH 157 158 my $infodb_file_handle = undef; 159 if (!open($infodb_file_handle, "| gzip - > \"$infodb_file_path\"")) 160 { 161 return undef; 162 } 163 164 return $infodb_file_handle; 165 } 166 167 sub close_infodb_write_handle_gdbm_txtgz 122 168 { 123 169 my $infodb_handle = shift(@_); … … 127 173 128 174 129 sub get_infodb_file_path_gdbm 175 sub get_infodb_file_path_gdbm_txtgz 130 176 { 131 177 my $collection_name = shift(@_); 132 178 my $infodb_directory_path = shift(@_); 133 179 134 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb"); 135 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension; 180 my $infodb_file_name = &util::get_dirsep_tail($collection_name).".txt.gz"; 136 181 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 137 182 } 138 183 139 184 140 sub open_infodb_write_handle_gdbm 141 { 142 my $infodb_file_path = shift(@_); 143 144 my $txt2db_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "txt2db" . &util::get_os_exe()); 145 my $infodb_file_handle = undef; 146 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\"")) 147 { 148 return undef; 149 } 150 151 return $infodb_file_handle; 152 } 153 154 155 sub read_infodb_file_gdbm 156 { 157 my $infodb_file_path = shift(@_); 158 my $infodb_map = shift(@_); 159 160 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n"; 185 sub read_infodb_file_gdbm_txtgz 186 { 187 my $infodb_file_path = shift(@_); 188 my $infodb_map = shift(@_); 189 190 my $cmd = "gzip --decompress \"$infodb_file_path\""; 191 192 open (PIPEIN, "$cmd |") 193 || die "Error: Couldn't open pipe from gzip: $!\n $cmd\n"; 194 161 195 my $infodb_line = ""; 162 196 my $infodb_key = ""; … … 184 218 185 219 186 sub write_infodb_entry_gdbm 187 { 220 sub write_infodb_entry_gdbm_txtgz 221 { 222 188 223 my $infodb_handle = shift(@_); 189 224 my $infodb_key = shift(@_); … … 209 244 210 245 211 # ---------------------------------------------------------------------------------------- 246 # ----------------------------------------------------------------------------- 247 # GDBM IMPLEMENTATION 248 # ----------------------------------------------------------------------------- 249 250 sub open_infodb_write_handle_gdbm 251 { 252 my $infodb_file_path = shift(@_); 253 254 my $txt2db_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "txt2db" . &util::get_os_exe()); 255 my $infodb_file_handle = undef; 256 if (!-e "$txt2db_exe" || !open($infodb_file_handle, "| \"$txt2db_exe\" \"$infodb_file_path\"")) 257 { 258 return undef; 259 } 260 261 return $infodb_file_handle; 262 } 263 264 sub close_infodb_write_handle_gdbm 265 { 266 my $infodb_handle = shift(@_); 267 268 close($infodb_handle); 269 } 270 271 272 sub get_infodb_file_path_gdbm 273 { 274 my $collection_name = shift(@_); 275 my $infodb_directory_path = shift(@_); 276 277 my $infodb_file_extension = (&util::is_little_endian() ? ".ldb" : ".bdb"); 278 my $infodb_file_name = &util::get_dirsep_tail($collection_name) . $infodb_file_extension; 279 return &util::filename_cat($infodb_directory_path, $infodb_file_name); 280 } 281 282 283 284 285 sub read_infodb_file_gdbm 286 { 287 my $infodb_file_path = shift(@_); 288 my $infodb_map = shift(@_); 289 290 open (PIPEIN, "db2txt \"$infodb_file_path\" |") || die "couldn't open pipe from db2txt\n"; 291 my $infodb_line = ""; 292 my $infodb_key = ""; 293 my $infodb_value = ""; 294 while (defined ($infodb_line = <PIPEIN>)) 295 { 296 if ($infodb_line =~ /^\[([^\]]+)\]$/) 297 { 298 $infodb_key = $1; 299 } 300 elsif ($infodb_line =~ /^-{70}$/) 301 { 302 $infodb_map->{$infodb_key} = $infodb_value; 303 $infodb_key = ""; 304 $infodb_value = ""; 305 } 306 else 307 { 308 $infodb_value .= $infodb_line; 309 } 310 } 311 312 close (PIPEIN); 313 } 314 315 316 sub write_infodb_entry_gdbm 317 { 318 # With infodb_handle already set up, works the same as _gdbm_txtgz version 319 write_infodb_entry_gdbm_txtgz(@_); 320 } 321 322 323 324 # ----------------------------------------------------------------------------- 212 325 # SQLITE IMPLEMENTATION 213 # ---------------------------------------------------------------------------------------- 326 # ----------------------------------------------------------------------------- 327 328 sub open_infodb_write_handle_sqlite 329 { 330 my $infodb_file_path = shift(@_); 331 332 my $sqlite3_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",$ENV{'GSDLOS'}, "sqlite3" . &util::get_os_exe()); 333 my $infodb_handle = undef; 334 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\"")) 335 { 336 return undef; 337 } 338 339 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n"; 340 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n"; 341 342 # This is crucial for efficiency when importing large amounts of data 343 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n"; 344 345 # This is very important for efficiency, otherwise each command will be actioned one at a time 346 print $infodb_handle "BEGIN TRANSACTION;\n"; 347 348 return $infodb_handle; 349 } 350 351 214 352 215 353 sub close_infodb_write_handle_sqlite … … 237 375 } 238 376 239 240 sub open_infodb_write_handle_sqlite241 {242 my $infodb_file_path = shift(@_);243 244 my $sqlite3_exe = &util::filename_cat("$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}", "sqlite3" . &util::get_os_exe());245 my $infodb_handle = undef;246 if (!-e "$sqlite3_exe" || !open($infodb_handle, "| \"$sqlite3_exe\" \"$infodb_file_path\""))247 {248 return undef;249 }250 251 print $infodb_handle "CREATE TABLE IF NOT EXISTS data (key TEXT PRIMARY KEY, value TEXT);\n";252 print $infodb_handle "CREATE TABLE IF NOT EXISTS document_metadata (id INTEGER PRIMARY KEY, docOID TEXT, element TEXT, value TEXT);\n";253 254 # This is crucial for efficiency when importing large amounts of data255 print $infodb_handle "CREATE INDEX IF NOT EXISTS dmd ON document_metadata(docOID);\n";256 257 # This is very important for efficiency, otherwise each command will be actioned one at a time258 print $infodb_handle "BEGIN TRANSACTION;\n";259 260 return $infodb_handle;261 }262 377 263 378 -
gsdl/trunk/perllib/plugouts/BasPlugout.pm
r16252 r17087 35 35 use printusage; 36 36 use parse2; 37 use GDBMUtils; 38 37 39 38 40 # suppress the annoying "subroutine redefined" warning that various … … 402 404 ############################## 403 405 $self->saveas($doc_obj,$doc_dir); 404 406 $self->archiveinf_gdbm($doc_obj,$doc_dir); 407 405 408 } 406 409 … … 630 633 } 631 634 635 636 sub archiveinf_gdbm 637 { 638 my $self = shift (@_); 639 my ($doc_obj) = @_; 640 641 my $verbosity = $self->{'verbosity'}; 642 643 my $collect_dir = $ENV{'GSDLCOLLECTDIR'}; 644 if (defined $collect_dir) { 645 my $dirsep_regexp = &util::get_os_dirsep(); 646 647 if ($collect_dir !~ /$dirsep_regexp$/) { 648 # ensure there is a slash at the end 649 $collect_dir .= &util::get_dirsep(); 650 } 651 } 652 653 my $oid = $doc_obj->get_OID(); 654 my $source_filename = $doc_obj->get_source_filename(); 655 656 my $oid_files = { 'src-file' => $source_filename, 657 'assoc-files' => [] }; 658 659 my @reverse_lookups = ($source_filename); 660 661 foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) { 662 my $real_filename = $assoc_file_rec->[0]; 663 my $full_afile = $assoc_file_rec->[1]; 664 665 # for some reasons the image associate file has / before the full path 666 $real_filename =~ s/^\\(.*)/$1/i; 667 if (-e $real_filename) { 668 669 if (defined $collect_dir) { 670 $real_filename =~ s/^$collect_dir//; 671 } 672 673 push(@reverse_lookups,$real_filename); 674 675 push(@{$oid_files->{'assoc-files'}},$full_afile); 676 677 } 678 else { 679 print STDERR "Warning: archiveinf_gdbm()\n $real_filename does not appear to be on the file system\n"; 680 } 681 } 682 683 # better not to commit to a particular db implementation, but 684 # for simplicity, will use GDBM for now. 685 686 my $output_dir = $self->{'output_dir'}; 687 my $db_ext = &util::is_little_endian() ? ".ldb" : ".bdb"; 688 689 my $doc_db = &util::filename_cat($output_dir,"archiveinf-doc$db_ext"); 690 my $src_db = &util::filename_cat($output_dir,"archiveinf-src$db_ext"); 691 692 my $doc_db_text = ""; 693 $doc_db_text .= "<src-file>$oid_files->{'src-file'}\n"; 694 foreach my $af (@{$oid_files->{'assoc-files'}}) { 695 $doc_db_text .= "<assoc-file>$af\n"; 696 } 697 chomp($doc_db_text); # remove trailing \n 698 699 &GDBMUtils::gdbmDatabaseSet($doc_db,$oid,$doc_db_text); 700 701 foreach my $rl (@reverse_lookups) { 702 &GDBMUtils::gdbmDatabaseAppend($src_db,$rl,"<oid>$oid\n"); 703 } 704 705 } 706 707 632 708 sub set_sortmeta { 633 709 my $self = shift (@_);
Note:
See TracChangeset
for help on using the changeset viewer.