Changeset 1287
- Timestamp:
- 2000-07-14T12:24:20+12:00 (24 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/import.pl
r1269 r1287 60 60 print STDERR " -maxdocs number Maximum number of documents to import\n"; 61 61 print STDERR " -groupsize number Number of GML documents to group into one file\n"; 62 print STDERR " -sortmeta metadata Sort documents alphabetically by metadata for\n"; 63 print STDERR " building. This will be disabled if groupsize > 1\n"; 62 64 print STDERR " -debug Print imported text to STDOUT\n\n"; 63 65 } … … 69 71 my ($verbosity, $importdir, $archivedir, $keepold, 70 72 $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection, 71 $configfilename, $collectcfg, $pluginfo, 73 $configfilename, $collectcfg, $pluginfo, $sortmeta, 72 74 $archive_info_filename, $archive_info, $processor); 73 75 if (!parsargv::parse(\@ARGV, … … 79 81 'gzip', \$gzip, 80 82 'groupsize/\d+/1', \$groupsize, 83 'sortmeta/.*/', \$sortmeta, 81 84 'debug', \$debug, 82 85 'maxdocs/^\-?\d+/-1', \$maxdocs)) { … … 92 95 &print_usage(); 93 96 die "\n"; 97 } 98 99 # check sortmeta 100 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 101 if (defined $sortmeta && $groupsize > 1) { 102 print STDERR "WARNING: import.pl cannot sort documents when groupsize > 1\n"; 103 print STDERR " sortmeta option will be ignored\n\n"; 104 $sortmeta = undef; 94 105 } 95 106 … … 160 171 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize); 161 172 $processor->setarchivedir ($archivedir); 173 $processor->set_sortmeta ($sortmeta) if defined $sortmeta; 162 174 } else { 163 175 $processor = new docprint (); … … 173 185 # write out the archive information file 174 186 if (!$debug) { 175 $processor->close_file_output() ;187 $processor->close_file_output() if $groupsize > 1; 176 188 $archive_info->save_info($archive_info_filename); 177 189 } -
trunk/gsdl/perllib/arcinfo.pm
r537 r1287 93 93 my $i = 0; 94 94 while ($i < scalar (@{$self->{'order'}})) { 95 if ($self->{'order'}->[$i] eq $OID) {95 if ($self->{'order'}->[$i]->[0] eq $OID) { 96 96 splice (@{$self->{'order'}}, $i, 1); 97 97 last; … … 105 105 sub add_info { 106 106 my $self = shift (@_); 107 my ($OID, $doc_file) = @_; 107 my ($OID, $doc_file, $sortmeta) = @_; 108 $sortmeta = "" unless defined $sortmeta; 108 109 109 110 $self->delete_info ($OID); 110 111 $self->{'info'}->{$OID} = [$doc_file]; 111 push (@{$self->{'order'}}, $OID);112 push (@{$self->{'order'}}, [$OID, $sortmeta]); 112 113 } 113 114 … … 119 120 my @list = (); 120 121 121 foreach $OID ( @{$self->{'order'}}) {122 push (@list, [$OID , $self->{'info'}->{$OID}->[0]]);122 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) { 123 push (@list, [$OID->[0], $self->{'info'}->{$OID->[0]}->[0]]); 123 124 } 124 125 … … 133 134 my @list = (); 134 135 135 foreach $OID ( @{$self->{'order'}}) {136 push (@list, [$self->{'info'}->{$OID }->[0], $OID]);136 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) { 137 push (@list, [$self->{'info'}->{$OID->[0]}->[0], $OID->[0]]); 137 138 } 138 139 -
trunk/gsdl/perllib/docsave.pm
r898 r1287 56 56 $self->{'archive_dir'} = "$ENV{'GSDLHOME'}/collect/$self->{'collection'}/archives"; 57 57 58 $self->{'sortmeta'} = undef; 59 58 60 return bless $self, $class; 59 61 } … … 66 68 } 67 69 70 sub set_sortmeta { 71 my $self = shift (@_); 72 my ($sortmeta) = @_; 73 74 $self->{'sortmeta'} = $sortmeta; 75 } 76 68 77 sub process { 69 78 my $self = shift (@_); 70 79 my ($doc_obj) = @_; 71 80 72 my $archive_dir = $self->{'archive_dir'}; 81 if ($self->{'groupsize'} > 1) { 82 $self->group_process ($doc_obj); 83 84 } else { 85 # groupsize is 1 (i.e. one document per GML file) so sortmeta 86 # may be used 87 88 my $OID = $doc_obj->get_OID(); 89 $OID = "NULL" unless defined $OID; 90 91 # get document's directory 92 my $doc_dir = $self->get_doc_dir ($OID); 93 94 # copy all the associated files, add this information as metadata 95 # to the document 96 $self->process_assoc_files ($doc_obj, $doc_dir); 97 98 my $doc_file 99 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml"); 100 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml"); 101 102 if (!open (OUTDOC, ">$doc_file")) { 103 print STDERR "docsave::process could not write to file $doc_file\n"; 104 return; 105 } 106 107 # save this document 108 $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section()); 109 close OUTDOC; 110 111 if ($self->{'gzip'}) { 112 my $doc_file = $self->{'gs_filename'}; 113 `gzip $doc_file`; 114 $doc_file .= ".gz"; 115 $short_doc_file .= ".gz"; 116 if (!-e $doc_file) { 117 print STDERR "error while gzipping: $doc_file doesn't exist\n"; 118 return 0; 119 } 120 } 121 122 # do the sortmeta thing 123 my ($metadata); 124 if (defined ($self->{'sortmeta'})) { 125 $metadata = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sortmeta'}); 126 } 127 128 # store reference in the archive_info 129 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata); 130 } 131 } 132 133 sub group_process { 134 my $self = shift (@_); 135 my ($doc_obj) = @_; 136 73 137 my $OID = $doc_obj->get_OID(); 74 138 $OID = "NULL" unless defined $OID; … … 79 143 80 144 # opening a new file, or document has assoicated files => directory needed 81 if (($open_new_file) || (scalar(@{$doc_obj->get_assoc_files()})>0)) 82 { 83 # get the document's directory. 84 my $doc_info = $self->{'archive_info'}->get_info($OID); 85 my $doc_dir = ""; 86 if (defined $doc_info && scalar(@$doc_info) >= 1) { 87 # this OID already has an assigned directory, use the 88 # same one. 89 $doc_dir = $doc_info->[0]; 90 $doc_dir =~ s/\/?doc\.gml(\.gz)?$//; 91 } else { 92 # have to get a new document directory 93 my $doc_dir_rest = $OID; 94 my $doc_dir_num = 0; 95 do { 96 $doc_dir .= "/" if $doc_dir_num > 0; 97 if ($doc_dir_rest =~ s/^(.{1,8})//) { 98 $doc_dir .= $1; 99 $doc_dir_num++; 100 } 101 } while ($doc_dir_rest ne "" && 102 ((-d &util::filename_cat ($archive_dir, "$doc_dir.dir")) || 103 ($self->{'archive_info'}->size() >= 1024 && $doc_dir_num < 2))); 104 $doc_dir .= ".dir"; 105 106 } 107 108 &util::mk_all_dir ("$archive_dir/$doc_dir"); 109 145 if (($open_new_file) || (scalar(@{$doc_obj->get_assoc_files()})>0)) { 146 147 # get document's directory 148 my $doc_dir = $self->get_doc_dir ($OID); 149 110 150 # copy all the associated files, add this information as metadata 111 151 # to the document 112 my @assoc_files = (); 113 foreach $assoc_file (@{$doc_obj->get_assoc_files()}) { 114 my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/; 115 $dir = "" unless defined $dir; 116 if (-e $assoc_file->[0]) { 117 my $filepath = &util::filename_cat($archive_dir, $doc_dir, $afile); 118 &util::hard_link ($assoc_file->[0], $filepath); 119 $doc_obj->add_metadata ($doc_obj->get_top_section(), 120 "gsdlassocfile", 121 "$afile:$assoc_file->[2]:$dir"); 122 } else { 123 print STDERR "docsave::process couldn't copy the associated file " . 124 "$assoc_file->[0] to $afile\n"; 125 } 126 } 127 128 if ($open_new_file) 129 { 152 $self->process_assoc_files ($doc_obj, $doc_dir); 153 154 155 if ($open_new_file) { 130 156 # only if opening new file 131 157 my $doc_file 132 = &util::filename_cat ($ archive_dir, $doc_dir, "doc.gml");158 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml"); 133 159 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml"); 134 160 … … 139 165 140 166 if (!open (OUTDOC, ">$doc_file")) { 141 print STDERR "docsave:: process could not write to file $doc_file\n";167 print STDERR "docsave::group_process could not write to file $doc_file\n"; 142 168 return; 143 169 } … … 153 179 $self->{'gs_count'}++; 154 180 } 181 182 183 sub get_doc_dir { 184 my $self = shift (@_); 185 my ($OID) = @_; 186 187 my $doc_info = $self->{'archive_info'}->get_info($OID); 188 my $doc_dir = ""; 189 if (defined $doc_info && scalar(@$doc_info) >= 1) { 190 # this OID already has an assigned directory, use the 191 # same one. 192 $doc_dir = $doc_info->[0]; 193 $doc_dir =~ s/\/?doc\.gml(\.gz)?$//; 194 } else { 195 # have to get a new document directory 196 my $doc_dir_rest = $OID; 197 my $doc_dir_num = 0; 198 do { 199 $doc_dir .= "/" if $doc_dir_num > 0; 200 if ($doc_dir_rest =~ s/^(.{1,8})//) { 201 $doc_dir .= $1; 202 $doc_dir_num++; 203 } 204 } while ($doc_dir_rest ne "" && 205 ((-d &util::filename_cat ($self->{'archive_dir'}, "$doc_dir.dir")) || 206 ($self->{'archive_info'}->size() >= 1024 && $doc_dir_num < 2))); 207 $doc_dir .= ".dir"; 208 209 } 210 211 &util::mk_all_dir (&util::filename_cat ($self->{'archive_dir'}, $doc_dir)); 212 213 return $doc_dir; 214 } 215 216 217 sub process_assoc_files { 218 my $self = shift (@_); 219 my ($doc_obj, $doc_dir) = @_; 220 221 my @assoc_files = (); 222 foreach $assoc_file (@{$doc_obj->get_assoc_files()}) { 223 my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/; 224 $dir = "" unless defined $dir; 225 if (-e $assoc_file->[0]) { 226 my $filepath = &util::filename_cat($self->{'archive_dir'}, $doc_dir, $afile); 227 &util::hard_link ($assoc_file->[0], $filepath); 228 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(), 229 "gsdlassocfile", 230 "$afile:$assoc_file->[2]:$dir"); 231 } else { 232 print STDERR "docsave::process couldn't copy the associated file " . 233 "$assoc_file->[0] to $afile\n"; 234 } 235 } 236 } 237 155 238 156 239 sub close_file_output
Note:
See TracChangeset
for help on using the changeset viewer.