Changeset 69
- Timestamp:
- 1998-12-11T14:45:40+13:00 (25 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/cfgread.pm
r4 r69 28 28 # '^(this|that)$' 29 29 sub read_cfg_file { 30 my ($filename, $stringexp, $arrayexp ) = @_;30 my ($filename, $stringexp, $arrayexp, $hashexp) = @_; 31 31 my ($line); 32 32 my $data = {}; … … 41 41 } elsif ($key =~ /$arrayexp/) { 42 42 push (@{$data->{$key}}, @$line); 43 44 } elsif ($key =~ /$hashexp/) { 45 my $k = shift @$line; 46 my $v = shift @$line; 47 $data->{$key}->{$k} = $v; 43 48 } 44 49 } … … 56 61 # '^(this|that)$' 57 62 sub write_cfg_file { 58 my ($filename, $data, $stringexp, $arrayexp ) = @_;63 my ($filename, $data, $stringexp, $arrayexp, $hashexp) = @_; 59 64 60 65 if (open (COLCFG, ">$filename")) { … … 64 69 } elsif ($key =~ /$arrayexp/) { 65 70 &write_cfg_line ('COLCFG', [$key, @{$data->{$key}}]); 71 } elsif ($key =~ /$hashexp/) { 72 foreach $k (keys (%{$data->{$key}})) { 73 &write_cfg_line ('COLCFG', [$key, $k, $data->{$key}->{$k}]); 74 } 66 75 } 67 76 } -
trunk/gsdl/perllib/colcfg.pm
r4 r69 13 13 # {'beta'}->string 14 14 # {'key'}->string 15 # {'subcollection'}->hash of key-value pairs 16 # {'indexsubcollections'}->array of strings 15 17 # {'indexes'}->array of strings 16 18 # {'defaultindex'}->string … … 22 24 return &cfgread::read_cfg_file ($filename, 23 25 '^(creator|public|beta|key|defaultindex)$', 24 '^(maintainer|indexes|plugins)$'); 26 '^(maintainer|indexsubcollections|indexes|plugins)$', 27 '^(subcollection)$'); 25 28 } 26 29 … … 30 33 &cfgread::write_cfg_file($filename, $data, 31 34 '^(creator|public|beta|key|defaultindex)$', 32 '^(maintainer|indexes|plugins)$'); 35 '^(maintainer|subcollection|indexsubcollections|indexes|plugins)$', 36 '^(subcollection)$'); 33 37 } 34 38 -
trunk/gsdl/perllib/mgbuilder.pm
r16 r69 41 41 42 42 43 # sort out subcollection indexes 44 if (defined $self->{'collect_cfg'}->{'indexsubcollections'}) { 45 my $indexes = $self->{'collect_cfg'}->{'indexes'}; 46 $self->{'collect_cfg'}->{'indexes'} = []; 47 foreach $subcollection (@{$self->{'collect_cfg'}->{'indexsubcollections'}}) { 48 foreach $index (@$indexes) { 49 push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$subcollection"); 50 } 51 } 52 } 53 43 54 # get the list of plugins for this collection 44 55 my @plugins = (); # some good choice of plugins .... ???? … … 177 188 178 189 foreach $index (@$indexes) { 179 my ($level, $fields ) = split (":", $index);180 my @fields = split ( ",", $fields);190 my ($level, $fields, $subcollection) = split (":", $index); 191 my @fields = split (/,/, $fields); 181 192 splice (@fields, 2); # just want first two fields 182 193 … … 189 200 map {s/^(.).*?([bcdfghjklmnpqrstvwxyz]).*$/$1$2/i;} @fields; 190 201 $dirname .= join("", @fields); 202 203 # next comes a processed version of the subcollection if there is one. 204 # the processed version contains the first character and the next 205 # consonant if there's only one field, other wise the first character 206 # of the first two fields 207 if (defined ($subcollection) && $subcollection =~ /\w/) { 208 @fields = split /,/, $subcollection; 209 if (scalar @fields >= 2) { 210 splice (@fields, 2); 211 map {s/^(.).*$/$1/i;} @fields; 212 $dirname .= join("", @fields); 213 } else { 214 $subcollection =~ s/^(.).*?([bcdfghjklmnpqrstvwxyz]).*$/$1$2/i; 215 $dirname .= $subcollection; 216 } 217 } 191 218 192 219 # convert the directory name to lowercase … … 238 265 $index_level = 3 if $index =~ /^paragraph/i; 239 266 267 # get the index expression if this index belongs 268 # to a subcollection 269 my $indexexparr = []; 270 my ($level, $fields, $subcollection) = split (":", $index); 271 my (@subcollections) = split /,/, $subcollection; 272 273 foreach $subcollection (@subcollections) { 274 if (defined ($self->{'collect_cfg'}->{'subcollection'}->{$subcollection})) { 275 push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection}); 276 } 277 } 278 240 279 # set up the document processor 241 280 $self->{'buildproc'}->set_output_handle ('mgbuilder::PIPEOUT'); 242 281 $self->{'buildproc'}->set_mode ('text'); 243 $self->{'buildproc'}->set_index ($index );282 $self->{'buildproc'}->set_index ($index, $indexexparr); 244 283 245 284 -
trunk/gsdl/perllib/mgbuildproc.pm
r59 r69 24 24 $self->{'mode'} = "text"; 25 25 $self->{'index'} = "section:text"; 26 $self->{'indexexparr'} = []; 26 27 $self->{'output_handle'} = "STDOUT"; 27 28 $self->{'num_docs'} = 0; … … 74 75 sub set_index { 75 76 my $self = shift (@_); 76 my ($index ) = @_;77 my ($index, $indexexparr) = @_; 77 78 78 79 $self->{'index'} = $index; 80 $self->{'indexexparr'} = $indexexparr if defined $indexexparr; 79 81 } 80 82 … … 391 393 my ($doc_obj) = @_; 392 394 my $handle = $self->{'output_handle'}; 395 my $indexed_doc = 1; 393 396 394 397 # only output this document if it is one to be indexed 395 398 return if ($doc_obj->get_doc_type() ne "indexed_doc"); 399 400 # see if this document belongs to this subcollection 401 foreach $indexexp (@{$self->{'indexexparr'}}) { 402 $indexed_doc = 0; 403 my ($field, $exp, $options) = split /\//, $indexexp; 404 if (defined ($field) && defined ($exp)) { 405 my ($bool) = $field =~ /^(.)/; 406 $field =~ s/^.// if $bool eq '!'; 407 if ($field eq "filename") { 408 $field = $doc_obj->get_source_filename(); 409 } else { 410 $field = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $field); 411 } 412 next unless defined $field; 413 if ($bool eq '!') { 414 if ($options =~ /^i$/i) { 415 if ($field !~ /$exp/i) {$indexed_doc = 1; last;} 416 } else { 417 if ($field !~ /$exp/) {$indexed_doc = 1; last;} 418 } 419 } else { 420 if ($options =~ /^i$/i) { 421 if ($field =~ /$exp/i) {$indexed_doc = 1; last;} 422 } else { 423 if ($field =~ /$exp/) {$indexed_doc = 1; last;} 424 } 425 } 426 } 427 } 396 428 397 429 # this is another document … … 412 444 # update a few statistics 413 445 $doc_section++; 414 $self->{'num_bytes'} += $doc_obj->get_text_length ($section);415 446 $self->{'num_sections'} += 1; 416 417 foreach $field (split (/,/, $fields)) { 418 # only deal with this field if it doesn't start with top or 419 # this is the first section 420 my $real_field = $field; 421 if (!($real_field =~ s/^top//) || ($doc_section == 1)) { 422 my $new_text = ""; 423 if ($real_field eq "text") { 424 $new_text = $doc_obj->get_text ($section); 425 $new_text =~ s/[\cB\cC]//g; 426 $new_text =~ s/(<p\b)/\cC$1/gi; 447 448 if ($indexed_doc) { 449 $self->{'num_bytes'} += $doc_obj->get_text_length ($section); 450 foreach $field (split (/,/, $fields)) { 451 # only deal with this field if it doesn't start with top or 452 # this is the first section 453 my $real_field = $field; 454 if (!($real_field =~ s/^top//) || ($doc_section == 1)) { 455 my $new_text = ""; 456 if ($real_field eq "text") { 457 $new_text = $doc_obj->get_text ($section); 458 $new_text =~ s/[\cB\cC]//g; 459 $new_text =~ s/(<p\b)/\cC$1/gi; 460 461 } else { 462 $new_text = join ("\cC", @{$doc_obj->get_metadata ($section, $real_field)}); 463 } 427 464 428 } else { 429 $new_text = join ("\cC", @{$doc_obj->get_metadata ($section, $real_field)}); 430 } 431 432 $text .= "$new_text\cC"; 465 $text .= "$new_text\cC"; 466 } 433 467 } 434 468 }
Note:
See TracChangeset
for help on using the changeset viewer.