Changeset 139
- Timestamp:
- 1999-02-03T14:43:31+13:00 (25 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/colcfg.pm
r133 r139 13 13 # {'beta'}->string 14 14 # {'key'}->string 15 # {'languages'}->array of strings 15 16 # {'subcollection'}->hash of key-value pairs 16 17 # {'indexsubcollections'}->array of strings … … 25 26 '^(creator|public|beta|key|defaultindex|importdir|' . 26 27 'archivedir|cachedir|builddir|textindex|removeold)$', 27 '^(maintainer| indexsubcollections|indexes|plugins)$',28 '^(maintainer|languages|indexsubcollections|indexes|plugins)$', 28 29 '^(subcollection)$'); 29 30 } … … 35 36 '^(creator|public|beta|key|defaultindex|importdir|' . 36 37 'archivedir|cachedir|builddir|textindex|removeold)$', 37 '^(maintainer| subcollection|indexsubcollections|indexes|plugins)$',38 '^(maintainer|languages|indexsubcollections|indexes|plugins)$', 38 39 '^(subcollection)$'); 39 40 } -
trunk/gsdl/perllib/mgbuilder.pm
r134 r139 52 52 } 53 53 54 # sort out language subindexes 55 if (defined $self->{'collect_cfg'}->{'languages'}) { 56 my $indexes = $self->{'collect_cfg'}->{'indexes'}; 57 $self->{'collect_cfg'}->{'indexes'} = []; 58 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) { 59 foreach $index (@$indexes) { 60 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language"); 61 } 62 } 63 } 64 54 65 # get the list of plugins for this collection 55 66 my @plugins = (); # some good choice of plugins .... ???? … … 187 198 my %dirnames = ('text'=>'text', 188 199 'extra'=>'extra'); 189 200 my %pnames = ('index' => '', 'subcollection' => '', 'languages' => ''); 201 190 202 foreach $index (@$indexes) { 191 my ($level, $fields, $subcollection) = split (":", $index); 192 my @fields = split (/,/, $fields); 193 splice (@fields, 2); # just want first two fields 194 195 # the directory names starts with the first character of the index level 196 my ($dirname) = $level =~ /^(.)/; 197 198 # next comes a processed version of the first two fields in the index 199 # the processed version contains the first character and the next 200 # consonant 201 map {s/^(.).*?([bcdfghjklmnpqrstvwxyz]).*$/$1$2/i;} @fields; 202 $dirname .= join("", @fields); 203 my ($level, $gran, $subcollection, $languages) = split (":", $index); 204 205 # the directory name starts with the first character of the index level 206 my ($pindex) = $level =~ /^(.)/; 207 208 # next comes a processed version of the index 209 $pindex .= $self->process_field ($gran); 210 $pindex = lc ($pindex); 203 211 204 212 # next comes a processed version of the subcollection if there is one. 205 # the processed version contains the first character and the next 206 # consonant if there's only one field, otherwise the first character 207 # of the first two fields 208 if (defined ($subcollection) && $subcollection =~ /\w/) { 209 @fields = split /,/, $subcollection; 210 if (scalar @fields >= 2) { 211 splice (@fields, 2); 212 map {s/^(.).*$/$1/i;} @fields; 213 $dirname .= join("", @fields); 214 } else { 215 $subcollection =~ s/^(.).*?([bcdfghjklmnpqrstvwxyz]?).*$/$1$2/i; 216 $dirname .= $subcollection; 217 } 213 my $psub = $self->process_field ($subcollection); 214 $psub = lc ($psub); 215 216 # next comes a processed version of the language if there is one. 217 my $plang = $self->process_field ($languages); 218 $plang = lc ($plang); 219 220 my $dirname = $pindex . $psub . $plang; 221 222 # check to be sure all index names are unique 223 while (defined ($dirnames{$dirname})) { 224 $dirname = $self->make_unique (\%pnames, $index, \$pindex, \$psub, \$plang); 218 225 } 219 226 220 # convert the directory name to lowercase 221 $dirname = lc ($dirname); 222 223 # add a number to make this directory name unique 224 if (defined $dirnames{$dirname}) { 225 my $num = 1; 226 while (defined $dirnames{"$dirname$num"}) { 227 $num++; 228 } 229 $dirname .= $num; 230 } 231 227 $mapping{'indexmap'}{"$level:$gran"} = $pindex; 228 $mapping{'subcollectionmap'}{$subcollection} = $psub if $psub =~ /\w/; 229 $mapping{'languagemap'}{$languages} = $plang if $plang =~ /\w/; 232 230 $mapping{$index} = $dirname; 233 231 $dirnames{$dirname} = $index; 232 $pnames{'index'}{$pindex} = "$level:$gran"; 233 $pnames{'subcollection'}{$psub} = $subcollection; 234 $pnames{'languages'}{$plang} = $languages; 234 235 } 235 236 … … 237 238 } 238 239 240 # returns a processed version of a field. 241 # if the field has only one component the processed 242 # version will contain the first character and next consonant 243 # of that componant - otherwise it will contain the first 244 # character of the first two components 245 sub process_field { 246 my $self = shift (@_); 247 my ($field) = @_; 248 249 return "" unless (defined ($field) && $field =~ /\w/); 250 251 my @components = split /,/, $field; 252 if (scalar @components >= 2) { 253 splice (@components, 2); 254 map {s/^(.).*$/$1/;} @components; 255 return join("", @components); 256 } else { 257 my ($a, $b) = $field =~ /^(.).*?([bcdfghjklmnpqrstvwxyz])/i; 258 ($a, $b) = $field =~ /^(.)(.)/ unless defined $a && defined $b; 259 return "$a$b"; 260 } 261 } 262 263 sub make_unique { 264 my $self = shift (@_); 265 my ($namehash, $index, $indexref, $subref, $langref) = @_; 266 my ($level, $gran, $subcollection, $languages) = split (":", $index); 267 268 if ($namehash->{'index'}->{$$indexref} ne "$level:$gran") { 269 $self->get_next_version ($indexref); 270 } elsif ($namehash->{'subcollection'}->{$$subref} ne $subcollection) { 271 $self->get_next_version ($subref); 272 } elsif ($namehash->{'languages'}->{$$langref} ne $languages) { 273 $self->get_next_version ($langref); 274 } 275 return "$$indexref$$subref$$langref"; 276 } 277 278 sub get_next_version { 279 my $self = shift (@_); 280 my ($nameref) = @_; 281 282 if ($$nameref =~ /(\d\d)$/) { 283 my $num = $1; $num ++; 284 $$nameref =~ s/\d\d$/$num/; 285 } elsif ($$nameref =~ /(\d)$/) { 286 my $num = $1; 287 if ($num == 9) {$$nameref =~ s/\d\d$/10/;} 288 else {$num ++; $$nameref =~ s/\d$/$num/;} 289 } else { 290 $$nameref =~ s/.$/0/; 291 } 292 } 239 293 240 294 sub build_index { … … 277 331 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection}); 278 332 } 333 } 334 335 # add expressions for languages if this index belongs to 336 # a language subcollection 337 foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) { 338 if ($language =~ s/^\!//) { 339 push (@$indexexparr, "!Language/$language/"); 340 } else { 341 push (@$indexexparr, "Language/$language/"); 342 } 279 343 } 280 344 … … 407 471 # store the mapping between the index names and the directory names 408 472 my @indexmap = (); 409 foreach $index ( @{$self->{'collect_cfg'}->{'indexes'}}) {410 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{ $index}");473 foreach $index (keys (%{$self->{'index_mapping'}->{'indexmap'}})) { 474 push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}"); 411 475 } 412 476 $build_cfg->{'indexmap'} = \@indexmap; 477 478 my @subcollectionmap = (); 479 foreach $subcollection (keys (%{$self->{'index_mapping'}->{'subcollectionmap'}})) { 480 push (@subcollectionmap, "$subcollection\-\>$self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection}"); 481 } 482 $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap); 483 484 my @languagemap = (); 485 foreach $language (keys (%{$self->{'index_mapping'}->{'languagemap'}})) { 486 push (@languagemap, "$language\-\>$self->{'index_mapping'}->{'languagemap'}->{$language}"); 487 } 488 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap); 413 489 414 490 # write out the build information 415 491 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg, 416 '^(builddate|numdocs|numbytes)$', '^(indexmap )$');492 '^(builddate|numdocs|numbytes)$', '^(indexmap|subcollectionmap|languagemap)$'); 417 493 418 494 } -
trunk/gsdl/perllib/mgbuildproc.pm
r82 r139 83 83 sub process { 84 84 my $self = shift (@_); 85 my ($doc_obj) = @_;86 85 my $method = $self->{'mode'}; 87 86 88 $self->$method( $doc_obj);87 $self->$method(@_); 89 88 } 90 89 91 90 sub infodb { 92 91 my $self = shift (@_); 93 my ($doc_obj ) = @_;92 my ($doc_obj, $filename) = @_; 94 93 my $handle = $self->{'output_handle'}; 95 94 … … 142 141 $mapped_section =~ s/\.+$//; 143 142 144 my ($parent, $classification, $creator , $OID);143 my ($parent, $classification, $creator); 145 144 146 145 $classification = $self->get_classifications($doc_obj, $section, $mapped_section) … … 157 156 } else { 158 157 $creator = $doc_obj->get_metadata_element($section, "Creator"); 159 $OID = $doc_obj->get_OID(); 160 # this is a hack at getting OID to look like the directory path - I'm sure there's 161 # a better way to do it but it's late... Stefan 162 my @OIDchars = split //, $OID; 163 $OID = ""; 164 my $count = 0; 165 foreach $i (@OIDchars) { 166 if ($count == 7) { 167 $OID .= "$i/"; 168 $count = 0; 169 } else { 170 $OID .= $i; 171 $count ++; 172 } 173 } 174 $OID =~ s/\/$//; 175 $OID .= ".dir"; 158 159 # need filename so we know what directory to look in for associated files 160 $filename =~ s/^\/?(.*?\.dir).*$/$1/ if (defined $filename); 176 161 } 177 162 … … 187 172 } else { 188 173 $self->write_to_gdbm($handle, $mapped_section, $title, $creator, $source, $date, $jobnumber, undef, 189 $self->{'num_sections'}, $parent, $classification, $ OID);174 $self->{'num_sections'}, $parent, $classification, $filename); 190 175 } 191 176 } else { … … 215 200 $self->write_to_gdbm ($handle, $mapped_section, $title, $creator, $source, 216 201 $date, $jobnumber, $contains, $self->{'num_sections'}, 217 $parent, $classification, $ OID);202 $parent, $classification, $filename); 218 203 219 204 if ($doc_obj->get_text_length($section) > 0) { … … 222 207 $self->write_to_gdbm ($handle, $intromapsection, "<i>(introductory text)</i>", $creator, 223 208 $source, $date, $jobnumber, undef, $self->{'num_sections'}, 224 $mapped_section, $classification, $ OID);209 $mapped_section, $classification, $filename); 225 210 } 226 211 } -
trunk/gsdl/perllib/plugins/GMLPlug.pm
r88 r139 124 124 125 125 # process the document 126 $processor->process($doc_obj );126 $processor->process($doc_obj, $file); 127 127 128 128 return 1; # processed the file
Note:
See TracChangeset
for help on using the changeset viewer.