Changeset 139


Ignore:
Timestamp:
1999-02-03T14:43:31+13:00 (25 years ago)
Author:
sjboddie
Message:

Got building stuff to handle subcollections and language subcollections

Location:
trunk/gsdl/perllib
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/colcfg.pm

    r133 r139  
    1313# {'beta'}->string
    1414# {'key'}->string
     15# {'languages'}->array of strings
    1516# {'subcollection'}->hash of key-value pairs
    1617# {'indexsubcollections'}->array of strings
     
    2526            '^(creator|public|beta|key|defaultindex|importdir|' .
    2627                    'archivedir|cachedir|builddir|textindex|removeold)$',
    27             '^(maintainer|indexsubcollections|indexes|plugins)$',
     28            '^(maintainer|languages|indexsubcollections|indexes|plugins)$',
    2829            '^(subcollection)$');
    2930}
     
    3536            '^(creator|public|beta|key|defaultindex|importdir|' .
    3637                    'archivedir|cachedir|builddir|textindex|removeold)$',
    37             '^(maintainer|subcollection|indexsubcollections|indexes|plugins)$',
     38            '^(maintainer|languages|indexsubcollections|indexes|plugins)$',
    3839            '^(subcollection)$');
    3940}
  • trunk/gsdl/perllib/mgbuilder.pm

    r134 r139  
    5252    }
    5353
     54    # sort out language subindexes
     55    if (defined $self->{'collect_cfg'}->{'languages'}) {
     56    my $indexes = $self->{'collect_cfg'}->{'indexes'};
     57    $self->{'collect_cfg'}->{'indexes'} = [];
     58    foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
     59        foreach $index (@$indexes) {
     60        push (@{$self->{'collect_cfg'}->{'indexes'}}, "$index:$language");
     61        }
     62    }
     63    }
     64
    5465    # get the list of plugins for this collection
    5566    my @plugins = (); # some good choice of plugins .... ????
     
    187198    my %dirnames = ('text'=>'text',
    188199            'extra'=>'extra');
    189    
     200    my %pnames = ('index' => '', 'subcollection' => '', 'languages' => '');
     201
    190202    foreach $index (@$indexes) {
    191     my ($level, $fields, $subcollection) = split (":", $index);
    192     my @fields = split (/,/, $fields);
    193     splice (@fields, 2); # just want first two fields
    194    
    195     # the directory names starts with the first character of the index level
    196     my ($dirname) = $level =~ /^(.)/;
    197 
    198     # next comes a processed version of the first two fields in the index
    199     # the processed version contains the first character and the next
    200     # consonant
    201     map {s/^(.).*?([bcdfghjklmnpqrstvwxyz]).*$/$1$2/i;} @fields;
    202     $dirname .= join("", @fields);
     203    my ($level, $gran, $subcollection, $languages) = split (":", $index);
     204
     205    # the directory name starts with the first character of the index level
     206    my ($pindex) = $level =~ /^(.)/;
     207
     208    # next comes a processed version of the index
     209    $pindex .= $self->process_field ($gran);
     210    $pindex = lc ($pindex);
    203211
    204212    # next comes a processed version of the subcollection if there is one.
    205     # the processed version contains the first character and the next
    206     # consonant if there's only one field, otherwise the first character
    207     # of the first two fields
    208     if (defined ($subcollection) && $subcollection =~ /\w/) {
    209         @fields = split /,/, $subcollection;
    210         if (scalar @fields >= 2) {
    211         splice (@fields, 2);
    212         map {s/^(.).*$/$1/i;} @fields;
    213         $dirname .= join("", @fields);
    214         } else {
    215         $subcollection =~ s/^(.).*?([bcdfghjklmnpqrstvwxyz]?).*$/$1$2/i;
    216         $dirname .= $subcollection;
    217         }
     213    my $psub = $self->process_field ($subcollection);
     214    $psub = lc ($psub);
     215
     216    # next comes a processed version of the language if there is one.
     217    my $plang = $self->process_field ($languages);
     218    $plang = lc ($plang);
     219
     220    my $dirname = $pindex . $psub . $plang;
     221
     222    # check to be sure all index names are unique
     223    while (defined ($dirnames{$dirname})) {
     224        $dirname = $self->make_unique (\%pnames, $index, \$pindex, \$psub, \$plang);
    218225    }
    219226
    220     # convert the directory name to lowercase
    221     $dirname = lc ($dirname);
    222 
    223     # add a number to make this directory name unique
    224     if (defined $dirnames{$dirname}) {
    225         my $num = 1;
    226         while (defined $dirnames{"$dirname$num"}) {
    227         $num++;
    228         }
    229         $dirname .= $num;
    230     }
    231    
     227    $mapping{'indexmap'}{"$level:$gran"} = $pindex;
     228    $mapping{'subcollectionmap'}{$subcollection} = $psub if $psub =~ /\w/;
     229    $mapping{'languagemap'}{$languages} = $plang if $plang =~ /\w/;
    232230    $mapping{$index} = $dirname;
    233231    $dirnames{$dirname} = $index;
     232    $pnames{'index'}{$pindex} = "$level:$gran";
     233    $pnames{'subcollection'}{$psub} = $subcollection;
     234    $pnames{'languages'}{$plang} = $languages;
    234235    }
    235236
     
    237238}
    238239
     240# returns a processed version of a field.
     241# if the field has only one component the processed
     242# version will contain the first character and next consonant
     243# of that componant - otherwise it will contain the first
     244# character of the first two components
     245sub process_field {
     246    my $self = shift (@_);
     247    my ($field) = @_;
     248 
     249    return "" unless (defined ($field) && $field =~ /\w/);
     250
     251    my @components = split /,/, $field;
     252    if (scalar @components >= 2) {
     253    splice (@components, 2);
     254    map {s/^(.).*$/$1/;} @components;
     255    return join("", @components);
     256    } else {
     257    my ($a, $b) = $field =~ /^(.).*?([bcdfghjklmnpqrstvwxyz])/i;
     258    ($a, $b) = $field =~ /^(.)(.)/ unless defined $a && defined $b;
     259    return "$a$b";
     260    }
     261}
     262
     263sub make_unique {
     264    my $self = shift (@_);
     265    my ($namehash, $index, $indexref, $subref, $langref) = @_;
     266    my ($level, $gran, $subcollection, $languages) = split (":", $index);
     267
     268    if ($namehash->{'index'}->{$$indexref} ne "$level:$gran") {
     269    $self->get_next_version ($indexref);
     270    } elsif ($namehash->{'subcollection'}->{$$subref} ne $subcollection) {
     271    $self->get_next_version ($subref);
     272    } elsif ($namehash->{'languages'}->{$$langref} ne $languages) {
     273    $self->get_next_version ($langref);
     274    }
     275    return "$$indexref$$subref$$langref";
     276}   
     277
     278sub get_next_version {
     279    my $self = shift (@_);
     280    my ($nameref) = @_;
     281
     282    if ($$nameref =~ /(\d\d)$/) {
     283    my $num = $1; $num ++;
     284    $$nameref =~ s/\d\d$/$num/;
     285    } elsif ($$nameref =~ /(\d)$/) {
     286    my $num = $1;
     287    if ($num == 9) {$$nameref =~ s/\d\d$/10/;}
     288    else {$num ++; $$nameref =~ s/\d$/$num/;}
     289    } else {
     290    $$nameref =~ s/.$/0/;
     291    }
     292}
    239293
    240294sub build_index {
     
    277331        push (@$indexexparr, $self->{'collect_cfg'}->{'subcollection'}->{$subcollection});
    278332    }
     333    }
     334   
     335    # add expressions for languages if this index belongs to
     336    # a language subcollection
     337    foreach $language (@{$self->{'collect_cfg'}->{'languages'}}) {
     338    if ($language =~ s/^\!//) {
     339        push (@$indexexparr, "!Language/$language/");
     340    } else {
     341        push (@$indexexparr, "Language/$language/");
     342    }
    279343    }
    280344   
     
    407471    # store the mapping between the index names and the directory names
    408472    my @indexmap = ();
    409     foreach $index (@{$self->{'collect_cfg'}->{'indexes'}}) {
    410     push (@indexmap, "$index\-\>$self->{'index_mapping'}->{$index}");
     473    foreach $index (keys (%{$self->{'index_mapping'}->{'indexmap'}})) {
     474    push (@indexmap, "$index\-\>$self->{'index_mapping'}->{'indexmap'}->{$index}");
    411475    }
    412476    $build_cfg->{'indexmap'} = \@indexmap;
     477
     478    my @subcollectionmap = ();
     479    foreach $subcollection (keys (%{$self->{'index_mapping'}->{'subcollectionmap'}})) {
     480    push (@subcollectionmap, "$subcollection\-\>$self->{'index_mapping'}->{'subcollectionmap'}->{$subcollection}");
     481    }
     482    $build_cfg->{'subcollectionmap'} = \@subcollectionmap if scalar (@subcollectionmap);
     483
     484    my @languagemap = ();
     485    foreach $language (keys (%{$self->{'index_mapping'}->{'languagemap'}})) {
     486    push (@languagemap, "$language\-\>$self->{'index_mapping'}->{'languagemap'}->{$language}");
     487    }
     488    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
    413489
    414490    # write out the build information
    415491    &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
    416                  '^(builddate|numdocs|numbytes)$', '^(indexmap)$');
     492                 '^(builddate|numdocs|numbytes)$', '^(indexmap|subcollectionmap|languagemap)$');
    417493
    418494}
  • trunk/gsdl/perllib/mgbuildproc.pm

    r82 r139  
    8383sub process {
    8484    my $self = shift (@_);
    85     my ($doc_obj) = @_;
    8685    my $method = $self->{'mode'};
    8786
    88     $self->$method($doc_obj);
     87    $self->$method(@_);
    8988}
    9089
    9190sub infodb {
    9291    my $self = shift (@_);
    93     my ($doc_obj) = @_;
     92    my ($doc_obj, $filename) = @_;
    9493    my $handle = $self->{'output_handle'};
    9594
     
    142141    $mapped_section =~ s/\.+$//;
    143142
    144     my ($parent, $classification, $creator, $OID);
     143    my ($parent, $classification, $creator);
    145144   
    146145    $classification = $self->get_classifications($doc_obj, $section, $mapped_section)
     
    157156    } else {
    158157        $creator = $doc_obj->get_metadata_element($section, "Creator");
    159         $OID = $doc_obj->get_OID();
    160         # this is a hack at getting OID to look like the directory path - I'm sure there's
    161         # a better way to do it but it's late... Stefan
    162         my @OIDchars = split //, $OID;
    163         $OID = "";
    164         my $count = 0;
    165         foreach $i (@OIDchars) {
    166         if ($count == 7) {
    167             $OID .= "$i/";
    168             $count = 0;
    169         } else {
    170             $OID .= $i;
    171             $count ++;
    172         }
    173         }
    174         $OID =~ s/\/$//;
    175         $OID .= ".dir";
     158
     159        # need filename so we know what directory to look in for associated files
     160        $filename =~ s/^\/?(.*?\.dir).*$/$1/ if (defined $filename);
    176161    }
    177162
     
    187172        } else {
    188173        $self->write_to_gdbm($handle, $mapped_section, $title, $creator, $source, $date, $jobnumber, undef,
    189                      $self->{'num_sections'}, $parent, $classification, $OID);
     174                     $self->{'num_sections'}, $parent, $classification, $filename);
    190175        }
    191176    } else {
     
    215200        $self->write_to_gdbm ($handle, $mapped_section, $title, $creator, $source,
    216201                      $date, $jobnumber, $contains, $self->{'num_sections'},
    217                       $parent, $classification, $OID);
     202                      $parent, $classification, $filename);
    218203
    219204        if ($doc_obj->get_text_length($section) > 0) {
     
    222207            $self->write_to_gdbm ($handle, $intromapsection, "<i>(introductory text)</i>", $creator,
    223208                      $source, $date, $jobnumber, undef, $self->{'num_sections'},
    224                       $mapped_section, $classification, $OID);
     209                      $mapped_section, $classification, $filename);
    225210        }
    226211        }
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r88 r139  
    124124
    125125    # process the document
    126     $processor->process($doc_obj);
     126    $processor->process($doc_obj, $file);
    127127       
    128128    return 1; # processed the file
Note: See TracChangeset for help on using the changeset viewer.