Changeset 3540 for trunk/gsdl/perllib


Ignore:
Timestamp:
2002-11-18T17:43:56+13:00 (22 years ago)
Author:
kjdon
Message:

added John T's changes into CVS - added info to enable retrieval of usage info in xml

Location:
trunk/gsdl/perllib
Files:
35 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/AZCompactList.pm

    r3529 r3540  
    5050}
    5151
     52my $doclevel_list =
     53[ { 'name' => "top",
     54    'desc' => "Whole document." } ,
     55{   'name' => "section",
     56    'desc' => "By sections." }
     57];
     58
     59my $arguments =
     60[ {     'name' => "metadata",
     61    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     62    'type' => "metadata",
     63    'reqd' => "yes" } ,
     64{   'name' => "buttonname",
     65    'desc' => "Button name for this classification. Defaults to metadata name.",
     66    'type' => "string",
     67    'reqd' => "no" } ,
     68{   'name' => "mingroup",
     69    'desc' => "The smallest value that will cause a group in the hierarchy to form.",
     70    'type' => "int",
     71    'reqd' => "no" } ,
     72{   'name' => "minnesting",
     73    'desc' => "The smallest value that will cause a list to converted into nested list.",
     74    'type' => "int",
     75    'reqd' => "no" } ,
     76{   'name' => "mincompact",
     77    'desc' => "Used in compact list.",
     78    'type' => "int",
     79    'reqd' => "no" } ,
     80{   'name' => "maxcompact",
     81    'desc' => "Used in compact list.",
     82    'type' => "int",
     83    'reqd' => "no" } ,
     84{   'name' => "doclevel",
     85    'desc' => "Level to process document at.",
     86    'type' => "enum",
     87    'list' => $doclevel_list,
     88    'reqd' => "no" } ,
     89{   'name' => "onlyfirst",
     90    'desc' => "Control whether all or only first metadata value used from array of metadata.",
     91    'type' => "flag",
     92    'reqd' => "no" }
     93];
     94
     95my $options =
     96{   'name'     => "AZCompactList",
     97    'desc'     => "Classifier plugin for sorting alphabetically",
     98    'inherits' => "Yes",
     99    'args'     => $arguments };
     100
    52101sub print_usage {
    53102    print STDERR "
     
    72121    my $self = new BasClas($class, @_);
    73122
     123    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     124    my $option_list = $self->{'option_list'};
     125    push( @{$option_list}, $options );
     126
     127
    74128    my ($metaname, $title, $removeprefix);
    75129    my $mingroup = 2;
  • trunk/gsdl/perllib/classify/AZList.pm

    r3510 r3540  
    3535    @ISA = ('BasClas');
    3636}
     37
     38my $arguments = [ { 'name' => "metadata",
     39            'desc' => "Metadata field used for classification. List will be sorted by this element.",
     40            'type' => "metadata",
     41            'reqd' => "yes" } ,
     42          { 'name' => "buttonname",
     43            'desc' => "Button name for this classification. Defaults to metadata name.",
     44            'type' => "string",
     45            'reqd' => "no" } ,
     46          { 'name' => "removeprefix",
     47            'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
     48            'type' => "string",
     49            'reqd' => "no" } ];
     50
     51my $options = { 'name'     => "AZList",
     52                 'desc'     => "Classifier plugin for sorting alphabetically",
     53                     'inherits' => "Yes",
     54                     'args'     => $arguments };
    3755
    3856sub print_usage {
     
    5573    my $class = shift (@_);
    5674    my $self = new BasClas($class, @_);
     75   
     76    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     77    my $option_list = $self->{'option_list'};
     78    push( @{$option_list}, $options );
    5779   
    5880    my ($metaname, $title, $removeprefix);
  • trunk/gsdl/perllib/classify/AZSectionList.pm

    r2954 r3540  
    3434# to the classification
    3535
     36# 12/05/02 Added usage datastructure - John Thompson
     37
    3638package AZSectionList;
    3739
     
    4345}
    4446
     47my $arguments =
     48[ {     'name' => "metadata",
     49    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     50    'type' => "metadata",
     51    'reqd' => "yes" } ,
     52{   'name' => "buttonname",
     53    'desc' => "Button name for this classification. Defaults to metadata name.",
     54    'type' => "string",
     55    'reqd' => "no" }
     56];
     57
     58my $options =
     59{   'name'     => "AZSectionList",
     60    'desc'     => "Classifier plugin for sorting alphabetically. This is very similar to AZList except it sorts by section level metadata (excluding the top level) instead of just top level metadata. The only change is to the classify() subroutine which must now iterate through each section, adding each to the classification.",
     61    'inherits' => "Yes",
     62    'args'     => $arguments };
     63
    4564sub print_usage {
    4665    print STDERR "
    47   usage: classify AZSectionList -metadata X [options]
     66  usage: classify AZSectionList [options]
    4867  options:
    4968
    50   -metadata X       (required) Metadata field used for classification.
    51             List will be sorted by this element.
     69  -metadata X    (required) Metadata field used for classification,
     70                  list will be sorted by this element.
    5271
    53   -buttonname X     Button name for this classification.
    54             defaults to metadata name.
     72  -buttonname X  (OPTIONAL) Title field for this classification.
     73                  if not included title field will be Metaname.
    5574
    56   -removeprefix regex   A prefix to ignore in the Metadata values
    57             for the field when sorting.
    58 
     75  -removeprefix regex A prefix to ignore in the Metadata values
     76                      for the field when sorting.
    5977This is very similar to AZList except it sorts by section level metadata
    6078(excluding the top level) instead of just top level metadata.
     
    6583    my $class = shift (@_);
    6684    my $self = new AZList($class, @_);
     85
     86     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     87     my $option_list = $self->{'option_list'};
     88     push( @{$option_list}, $options );
    6789
    6890    return bless $self, $class;
     
    91113    # if this section doesn't contain the metadata element we're
    92114    # sorting by we won't include it in this classification
     115
    93116    if (defined $metavalue && $metavalue ne "") {
    94117    if ($self->{'removeprefix'}) {
    95118        $metavalue =~ s/^$self->{'removeprefix'}//;
    96119    }
    97 
    98120    if ($self->{'metaname'} eq 'Creator') {
    99121        &sorttools::format_string_name_english (\$metavalue);
  • trunk/gsdl/perllib/classify/BasClas.pm

    r1885 r3540  
    5050#    display it.
    5151
     52# 09/05/02 Added usage datastructure - John Thompson
     53
    5254use parsargv;
     55
     56my $verbosity_list =
     57[ {     'name' => "0",
     58    'desc' => "" } ,
     59{   'name' => "1",
     60    'desc' => "" } ,
     61{   'name' => "2",
     62    'desc' => "" } ,
     63{   'name' => "3",
     64    'desc' => "" }
     65];
     66
     67my $arguments =
     68[ {     'name' => "verbosity",
     69    'desc' => "",
     70    'type' => "enum",
     71    'list' => $verbosity_list,
     72    'deft' => "2",
     73    'reqd' => "no" } ];
     74
     75my $options =
     76{   'name'     => "BasClas",
     77        'desc'     => "Base class for all the classifiers.",
     78    'inherits' => "No",
     79    'args'     => $arguments };
     80
     81sub print_xml_usage {
     82     my $self = shift (@_);
     83     print STDERR "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n";
     84     $self->print_xml();
     85}
     86
     87sub print_xml {
     88    my $self = shift (@_);
     89     my $option_list = $self->{'option_list'};
     90     my $option = pop( @{$option_list} );
     91     if(defined $option)
     92     {
     93          print STDERR "<ClassInfo>\n";
     94          print STDERR "  <Name>$option->{'name'}</Name>\n";
     95          print STDERR "  <Desc>$option->{'desc'}</Desc>\n";
     96          print STDERR "  <Inherits>$option->{'inherits'}</Inherits>\n";
     97          print STDERR "  <Arguments>\n";
     98          if(defined $option->{'args'})
     99          {
     100                my $args = $option->{'args'};
     101                my $x;
     102                foreach $x ( @{$args} )
     103                {
     104                     print STDERR "    <Option>\n";
     105                     print STDERR "      <Name>$x->{'name'}</Name>\n";
     106                     print STDERR "      <Desc>$x->{'desc'}</Desc>\n";
     107                     print STDERR "      <Type>$x->{'type'}</Type>\n";
     108                     print STDERR "      <Required>$x->{'reqd'}</Required>\n";
     109                     if(defined $x->{'list'})
     110                     {
     111                          print STDERR "      <List>\n";
     112                          my $list = $x->{'list'};
     113                          my $y;
     114                          foreach $y ( @{$list} )
     115                          {
     116                                print STDERR "        <Value>\n";
     117                                print STDERR "          <Name>$y->{'name'}</Name>\n";
     118                                print STDERR "          <Desc>$y->{'desc'}</Desc>\n";
     119                                print STDERR "        </Value>\n";
     120                          }
     121                          # Special case of 'input_encoding'
     122                          if( $x->{'name'} =~ m/^input_encoding$/i ) {
     123                                my $e = $encodings::encodings;
     124                                foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     125                                     print STDERR "        <Value>\n";
     126                                     print STDERR "          <Name>$enc</Name>\n";
     127                                     print STDERR "          <Desc>$e->{$enc}->{'name'}</Desc>\n";
     128                                     print STDERR "        </Value>\n";
     129                                }
     130                          }
     131                          print STDERR "      </List>\n";
     132                     }
     133                     if(defined $x->{'deft'})
     134                     {
     135                          print STDERR "      <Default>$x->{'deft'}</Default>\n";
     136                     }
     137                     print STDERR "    </Option>\n";
     138                }
     139          }
     140          if(defined $option_list) {
     141                $self->print_xml();
     142          }
     143         
     144          print STDERR "  </Arguments>\n";
     145          print STDERR "</ClassInfo>\n";
     146     }
     147}
    53148
    54149sub print_general_usage {
     
    79174    $self->{'outhandle'} = STDERR;
    80175   
     176     $self->{'option_list'} = [ $options ];
     177
    81178    # general options available to all classifiers
    82179    if (!parsargv::parse(\@_,
  • trunk/gsdl/perllib/classify/Browse.pm

    r2489 r3540  
    2424###########################################################################
    2525
     26# 12/05/02 Added usage datastructure - John Thompson
     27
    2628use BasClas;
    2729package Browse;
     
    3234    @ISA = ('BasClas');
    3335}
     36
     37my $options =
     38{   'name'     => "Browse",
     39    'desc'     => "",
     40    'inherits' => "Yes" };
    3441
    3542sub print_usage {
     
    4350    my $class = shift (@_);
    4451    my $self = new BasClas($class, @_);
    45    
     52 
     53     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     54     my $option_list = $self->{'option_list'};
     55     push( @{$option_list}, $options ); 
    4656   
    4757    # classifier information
  • trunk/gsdl/perllib/classify/DateList.pm

    r2916 r3540  
    3333# jrm21 - added option "bymonth", which splits by year and month.
    3434
     35# 12/05/02 Added usage datastructure - John Thompson
     36
    3537package DateList;
    3638
     
    4143    @ISA = ('BasClas');
    4244}
     45
     46my $arguments =
     47[ {     'name' => "bymonth",
     48    'desc' => "Classify by year and month.",
     49    'type' => "flag",
     50    'reqd' => "no" }
     51];
     52
     53my $options =
     54{   'name'     => "DateList",
     55    'desc'     => "Classifier plugin for sorting by date. Always sorts by 'Date' metadata. Date is assumed to be in the form yyyymmdd.",
     56    'inherits' => "Yes",
     57    'args'     => $arguments };
    4358
    4459sub print_usage {
     
    4863    -bymonth  [or bymonth=1]    Classify by year and month
    4964
    50   Classifier plugin for sorting by date, and assumes that 'Date' metadata
    51   exists. Date is assumed to be in the form yyyymmdd (all digits).
    52   By default dates are classified by year.
    53 
     65    Classifier plugin for sorting by date.
     66    Always sorts by 'Date' metadata.
     67    Date is assumed to be in the form yyyymmdd (all digits).
     68    By default dates are split by year - this should change.
     69
     70    Any errors are Dana's problem.
    5471";
    5572}
     
    5875    my $class = shift (@_);
    5976    my $self = new BasClas($class, @_);
     77
     78     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     79     my $option_list = $self->{'option_list'};
     80     push( @{$option_list}, $options );
    6081
    6182    $self->{'list'} = {};
  • trunk/gsdl/perllib/classify/HTML.pm

    r2022 r3540  
    3131# url=url           -- the url of the web page to link to
    3232
     33# 12/05/02 Added usage datastructure - John Thompson
     34
    3335package HTML;
    3436
     
    3840    @ISA = ('BasClas');
    3941}
     42
     43my $arguments =
     44[ {     'name' => "url",
     45    'desc' => "The url of the web page to link to.",
     46    'type' => "string",
     47    'reqd' => "yes" } ,
     48{   'name' => "buttonname",
     49    'desc' => "The title field for this classification. If not included title field 'Browse'.",
     50    'type' => "string",
     51    'reqd' => "no" }
     52];
     53
     54my $options =
     55{   'name'     => "HTML",
     56    'desc'     => "Creates an empty classification that's simply a link to a web page.",
     57    'inherits' => "Yes",
     58    'args'     => $arguments };
    4059
    4160sub print_usage {
     
    5574    my $class = shift (@_);
    5675    my $self = new BasClas($class, @_);
     76
     77     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     78     my $option_list = $self->{'option_list'};
     79     push( @{$option_list}, $options );
    5780   
    5881    my ($title, $url);
  • trunk/gsdl/perllib/classify/Hierarchy.pm

    r2973 r3540  
    4141#                     like an AZList classification)
    4242
     43# 12/05/02 Added usage datastructure - John Thompson
     44# 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson
     45
    4346package Hierarchy;
    4447
     
    5255}
    5356
     57my $arguments =
     58[ {     'name' => "metadata",
     59    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     60    'type' => "metadata",
     61    'reqd' => "yes" } ,
     62{   'name' => "buttonname",
     63    'desc' => "Button name for this classification. Defaults to metadata name.",
     64    'type' => "string",
     65    'reqd' => "no" } ,
     66{   'name' => "hfile",
     67    'desc' => "The classification structure file.",
     68    'type' => "string",
     69    'reqd' => "yes" } ,
     70{   'name' => "sort",
     71    'desc' => "Metadata field to sort by (defaults to none).",
     72    'type' => "string",
     73    'reqd' => "no" } ,
     74{   'name' => "hlist_at_top",
     75    'desc' => "Display the first level of the classification horizontally.",
     76    'type' => "flag",
     77    'reqd' => "no" }
     78];
     79
     80my $options =
     81{   'name'     => "Hierarchy",
     82    'desc'     => "Classifier plugin for generating hierarchical classifications",
     83    'inherits' => "Yes" ,
     84    'args'     => $arguments };
     85
    5486sub print_usage {
    5587    print STDERR "
     
    76108    my $class = shift (@_);
    77109    my $self = new BasClas($class, @_);
    78    
     110 
     111     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     112     my $option_list = $self->{'option_list'};
     113     push( @{$option_list}, $options );
     114   
    79115    my $sortname = "Title";
    80116    my ($hfile, $metadata, $title, $hlist_at_top);
     
    88124             "allow_extra_options")) {
    89125   
    90     print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
    91     &print_usage();
    92     die "\n";
     126          $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file.";
    93127    }
    94128
    95129    if (!$metadata) {
    96     &print_usage;
    97     print STDERR "\nHierarchy error: no metadata supplied\n";
    98     die "\n";
     130          $self->{'construction_error'} = "Hierarchy error: no metadata supplied.";
    99131    }
    100132
     
    103135    $sortname = undef if $sortname =~ /^nosort$/;
    104136
     137     my $subjectfile;
     138
    105139    if (!$hfile) {
    106     &print_usage;
    107     print STDERR "\nHierarchy error: No -hfile supplied\n";
    108     die "\n";
    109     }
    110    
    111     my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
    112     if (!-e $subjectfile) {
    113     my $collfile = $subjectfile;
    114     $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
    115     if (!-e $subjectfile) {
    116         my $outhandle = $self->{'outhandle'};
    117         &print_usage;
    118         print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
    119         print STDERR "This file should be in $collfile or $subjectfile\n";
    120         die "\n";
    121     }
    122     }
     140          $self->{'construction_error'} = "Hierarchy error: No -hfile supplied.";
     141    }
     142    else
     143     {
     144          $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
     145          if (!-e $subjectfile) {
     146                my $collfile = $subjectfile;
     147                $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
     148                if (!-e $subjectfile) {
     149                     my $outhandle = $self->{'outhandle'};
     150                     &print_usage;
     151                     print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
     152                     print STDERR "This file should be in $collfile or $subjectfile\n";
     153                     die "\n";
     154                }
     155          }
     156    }
    123157
    124158    $self->{'descriptorlist'} = {}; # first field in subject file
     
    135169sub init {
    136170    my $self = shift (@_);
     171
     172     if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) {
     173          print STDERR "Error: " , $self->{'construction_error'} , "\n";
     174          &print_usage;
     175          die "\n";
     176     }
    137177
    138178    # read in the subject file
     
    230270    $classifyinfo->{'Title'} = $title;
    231271    $classifyinfo->{'classifytype'} = $classifytype;
    232 
    233272    return $classifyinfo;
    234273    }
    235274
    236275    $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
    237 
    238276    my $offset = 0;
    239277    foreach $thing (@{$classifyinfo->{'contains'}}) {
    240278    $offset ++ if defined $thing->{'OID'};
    241279    }
    242    
    243     while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
     280
     281    while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) { 
    244282    push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
    245283    }
  • trunk/gsdl/perllib/classify/List.pm

    r2022 r3540  
    3838#                      if metadata is also not included title will be 'List'
    3939
     40# 12/05/02 Added usage datastructure - John Thompson
     41
    4042use BasClas;
    4143package List;
     
    4648    @ISA = ('BasClas');
    4749}
     50
     51my $arguments =
     52[ {     'name' => "metadata",
     53    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     54    'type' => "metadata",
     55    'reqd' => "yes" } ,
     56{   'name' => "buttonname",
     57    'desc' => "Button name for this classification. Defaults to metadata name.",
     58    'type' => "string",
     59    'reqd' => "no" } ,
     60{   'name' => "sort",
     61    'desc' => "Sort documents in list by this metadata field. By default it will sort by Metaname, or (if this is not set) in build (random) order.",
     62    'type' => "string",
     63    'reqd' => "no" }
     64];
     65
     66my $options =
     67{   'name'     => "List",
     68    'desc'     => "Simple list classifier plugin.",
     69    'inherits' => "Yes",
     70    'args'     => $arguments };
     71
    4872
    4973sub print_usage {
     
    6791    my $class = shift (@_);
    6892    my $self = new BasClas($class, @_);
     93
     94     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     95     my $option_list = $self->{'option_list'};
     96     push( @{$option_list}, $options );
    6997   
    7098    my ($metaname, $title, $sortname, $list);
  • trunk/gsdl/perllib/classify/Phind.pm

    r3536 r3540  
    3131# Type "classinfo.pl Phind" at the command line for a summary.
    3232
     33# 12/05/02 Added usage datastructure - John Thompson
     34
    3335package Phind;
    3436
     
    8284}
    8385
     86my $arguments =
     87[ {     'name' => "text",
     88    'desc' => "The text used to build the phrase hierarchy (default: 'section:Title,section:text').",
     89    'type' => "string",
     90    'reqd' => "no" } ,
     91{   'name' => "title",
     92    'desc' => "The metadata field used to describe each document (default: 'Title').",
     93    'type' => "metadata",
     94    'reqd' => "no" } ,
     95{   'name' => "button",
     96    'desc' => "The label for the classifier screen and button in navigation bar (default: 'Phrase').",
     97    'type' => "string",
     98    'reqd' => "no" } ,
     99{   'name' => "language",
     100    'desc' => "Language or languages to use building hierarchy. Languages are identified by two-letter country codes like en (English), es (Spanish), and fr (French). Language is a regular expression, so 'en|fr' (English or French) and '..' (match any language) are valid (default: 'en').",
     101    'type' => "language",
     102    'reqd' => "no" } ,
     103{   'name' => "savephrases",
     104    'desc' => "If set, the phrase infomation will be stored in the given file as text. It is probably a good idea to use an absolute path (default: not set).",
     105    'type' => "string",
     106    'reqd' => "no" } ,
     107{   'name' => "suffixmode",
     108    'desc' => "The smode parameter to the phrase extraction program. A value of 0 means that stopwords are ignored, and of 1 means that stopwords are used (default: 1).",
     109    'type' => "int",
     110    'reqd' => "no" } ,
     111{   'name' => "thesaurus",
     112    'desc' => "Name of a thesaurus stored in Phind format in the collection's etc directory (default: not set).",
     113    'type' => "string",
     114    'reqd' => "no" } ,
     115{   'name' => "untidy",
     116    'desc' => "Don't remove working files.",
     117    'type' => "flag",
     118    'reqd' => "no" }
     119];
     120
     121my $options =
     122{   'name'     => "Phind",
     123    'desc'     => "The Phind clasifier plugin.",
     124    'inherits' => "Yes",
     125    'args'     => $arguments };
     126
    84127sub print_usage {
    85128    print STDERR "
     
    114157                   (default: 1)
    115158
    116    -thesaurus Name Name of a thesaurus stored in phind format in the
     159   -thesaurus Name Name of a thesaurus stored in Phind format in the
    117160                   collection's etc directory.
    118161                   (default: not set)
     
    136179    my $class = shift (@_);
    137180    my $self = new BasClas($class, @_);
     181
     182     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     183     my $option_list = $self->{'option_list'};
     184     push( @{$option_list}, $options );
    138185
    139186    my $out = $self->{'outhandle'};
     
    283330   
    284331    # Extract the text from every section
    285     # (In Phind, document:text and section:text are equivalent)
     332    # (In phind, document:text and section:text are equivalent)
    286333    if ($field eq "text") {
    287334        $data = "";
     
    331378#
    332379# When get_classify_info is called, the clauses and docs.txt files have
    333 # already been constructed in the phind directory.  This function will
     380# already been constructed in the Phind directory.  This function will
    334381# translate them into compressed, indexed MGPP files that can be read by
    335382# the phindcgi script.  It will also register our classifier so that it
     
    355402    }
    356403
    357     # Construct Phind indexes
     404    # Construct phind indexes
    358405    my $suffixmode = $self->{'suffixmode'};
    359406    my ($command, $status);
     
    363410    print $out "\nExtracting vocabulary and statistics\n" if $verbosity;
    364411    &extract_vocabulary($self);
    365 
     412 
    366413    # Use the suffix program to generate the phind/phrases file
    367414    print $out "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;
    368415    &execute("suffix \"$phinddir\" $suffixmode $verbosity", $verbosity, $out);
    369 
    370416
    371417    # check that we generated some files. It's not necessarily an error if
     
    375421    print $out "\nNo phrases found for Phind classifier!\n";
    376422    return;
    377     }
     423    }   
    378424
    379425    # Create the phrase file and put phrase numbers in phind/phrases
     
    445491    return &convert_gml_to_tokens_EN($text);
    446492    }
     493
    447494    if ($language_exp =~ /zh/) {
    448495    return &convert_gml_to_tokens_ZH($text);
    449     }
     496    } 
    450497   
    451498    $_ = $text;
     
    477524    # 2. Split the remaining text into space-delimited tokens
    478525
    479     # Convert entities to their UTF8 equivalents
    480     s/&([^;]+);/&ghtml::getcharequiv($1,1)/gse;
     526    # Convert any HTML special characters (like &quot;) to their UTF8 equivalent
     527    s/&([^;]+);/&unicode::ascii2utf8(\&ghtml::getcharequiv($1,1))/gse;
    481528
    482529    # Split text at word boundaries
     
    541588    return $_;
    542589}
     590
    543591# A version of convert_gml_to_tokens that is fine-tuned to the English language.
    544592
     
    641689    if ($status != 0) {
    642690    print STDERR "Phind - Error executing '$command': $!\n";
    643     exit($status); # this causes the build to fail...
     691    exit($status);  # this causes the build to fail...
    644692    }
    645693}
  • trunk/gsdl/perllib/classify/SectionList.pm

    r2022 r3540  
    2828# itself
    2929
     30# 12/05/02 Added usage datastructure - John Thompson
     31
    3032package SectionList;
    3133
     
    3638    @ISA = ('List');
    3739}
     40
     41my $arguments =
     42[ {     'name' => "metadata",
     43    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     44    'type' => "metadata",
     45    'reqd' => "yes" } ,
     46{   'name' => "buttonname",
     47    'desc' => "Button name for this classification. Defaults to metadata name.",
     48    'type' => "string",
     49    'reqd' => "no" } ,
     50{   'name' => "sort",
     51    'desc' => "Sort documents in list by this metadata field. By default it will sort by Metaname, or (if this is not set) in build (random) order.",
     52    'type' => "string",
     53    'reqd' => "no" }
     54];
     55
     56my $options =
     57{   'name'     => "SectionList",
     58    'desc'     => "Same as List classifier but includes all sections of document (excluding top level) rather than just top level document itself.",
     59    'inherits' => "Yes",
     60    'args'     => $arguments };
    3861
    3962sub print_usage {
     
    6083    my $class = shift (@_);
    6184    my $self = new List($class, @_);
     85
     86     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     87     my $option_list = $self->{'option_list'};
     88     push( @{$option_list}, $options );
    6289 
    6390    return bless $self, $class;
  • trunk/gsdl/perllib/plugins/ArcPlug.pm

    r1424 r3540  
    2828# when an import is done), processing each file it finds
    2929
     30# 12-05-02 Added usage datastructure - John Thompson
     31
    3032package ArcPlug;
    3133
     
    3941}
    4042
     43my $options =
     44{   'name'     => "ArcPlug",
     45    'desc'     => "Plugin which recurses through an archives.inf file (i.e. the file generated in the archives directory when an import is done), processing each file it finds.",
     46    'inherits' => "Yes" };
     47
    4148sub new {
    4249    my ($class) = @_;
    4350    my $self = new BasPlug ("ArcPlug", @_);
     51     
     52    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     53     my $option_list = $self->{'option_list'};
     54     push( @{$option_list}, $options );
    4455
    4556    return bless $self, $class;
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r3515 r3540  
    4242use ghtml;
    4343
     44my $unicode_list =
     45[ { 'name' => "auto",
     46    'desc' => "Use text categorization algorithm to automatically identify the encoding of each source document. This will be slower than explicitly setting the encoding but will work where more than one encoding is used within the same collection." } ,
     47  { 'name' => "ascii",
     48    'desc' => "Plain 7 bit ascii. This may be a bit faster than using iso_8859_1. Beware of using this on a collection of documents that may contain characters outside the plain 7 bit ascii set though (e.g. German or French documents containing accents), use iso_8859_1 instead." },
     49  { 'name' => "utf8",
     50    'desc' => "either utf8 or unicode -- automatically detected." },
     51  { 'name' => "unicode",
     52    'desc' => "just unicode" } ];
     53
     54my $arguments =
     55    [ { 'name' => "process_exp",
     56    'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     57    'type' => "string",
     58    'deft' => "",
     59    'reqd' => "no" },
     60      { 'name' => "block_exp",
     61    'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     62    'type' => 'string',
     63    'deft' => "",
     64    'reqd' => "no" },
     65      { 'name' => "input_encoding",
     66    'desc' => "The encoding of the source documents. Documents will be converted from these encodings and stored internally as utf8. The default input_encoding is 'auto'.",
     67    'type' => "enum",
     68    'list' => $unicode_list,
     69    'reqd' => "no" ,
     70    'deft' => "auto" } ,
     71      { 'name' => "default_encoding",
     72    'desc' => "Use this encoding if -input_encoding is set to 'auto' and the text categorization algorithm fails to extract the encoding or extracts an encoding unsupported by Greenstone.  The default is iso_8859_1.",
     73    'type' => "flag",
     74    'reqd' => "no" },
     75      { 'name' => "extract_language",
     76    'desc' => "Identify the language of each document and set 'Language' metadata. Note that this will be done automatically if -input_encoding is 'auto'.",
     77    'type' => "flag",
     78    'reqd' => "no" },
     79      { 'name' => "default_language",
     80    'desc' => "If Greenstone fails to work out what language a document is the 'Language' metadata element will be set to this value. The default is 'en' (ISO 639 language symbols are used: en = English). Note that if -input_encoding is not set to 'auto' and -extract_language is not set, all documents will have their 'Language' metadata set to this value.",
     81    'type' => "language",
     82    'deft' => "en",
     83    'reqd' => "no" },
     84      { 'name' => "extract_acronyms",
     85    'desc' => "Extract acronyms from within text and set as metadata.",
     86    'type' => "flag",
     87    'reqd' => "no" },
     88      { 'name' => "markup_acronyms",
     89    'desc' => "Add acronym metadata into document text.",
     90    'type' => "flag",
     91    'reqd' => "no" },
     92      { 'name' => "first",
     93    'desc' => "Comma separated list of first sizes to extract from the text into a metadata field. The field is called 'FirstNNN'.",
     94    'type' => "string",
     95    'reqd' => "no" },
     96      { 'name' => "extract_email",
     97    'desc' => "Extract email addresses as metadata.",
     98    'type' => "flag",
     99    'reqd' => "no" },
     100      { 'name' => "extract_historical_years",
     101    'desc' => "Extract time-period information from historical documents.  This is stored as metadata with the document. There is a search interface for this metadata, which you can include in your collection by adding the statement, \"format QueryInterface DateSearch\" to your collection configuration file.",
     102    'type' => "flag",
     103    'reqd' => "no" },
     104      { 'name' => "maximum_year",
     105    'desc' => "The maximum historical date to be used as metadata (in a Common Era date, such as 1950).",
     106    'type' => "int",
     107    'reqd' => "no"},
     108      { 'name' => "maximum_century",
     109    'desc' => "The maximum named century to be extracted as historical metadata (e.g. 14 will extract all references up to the 14th century).",
     110    'type' => "int",
     111    'reqd' => "no" },
     112      { 'name' => "no_bibliography",
     113    'desc' => "Do not try and block bibliographic dates when extracting historical dates.",
     114    'type' => "flag",
     115    'reqd' => "no"},
     116      { 'name' => "cover_image",
     117    'desc' => "Will look for a prefix.jpg file (where prefix is the same prefix as the file being processed) and associate it as a cover image.",
     118    'type' => "flag",
     119    'reqd' => "no" } ];
     120
     121my $options = { 'name'     => "BasPlug",
     122        'desc'     => "Base class for all the import plugins.",
     123        'inherits' => "No",
     124        'args'     => $arguments,
     125        'process_exp' => "",
     126        'block_exp' => "" };
     127
     128sub print_xml_usage {
     129    my $self = shift (@_);
     130    print STDERR "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n";
     131    $self->print_xml();
     132}
     133
     134sub print_xml {
     135    my $self = shift (@_);
     136    my $option_list = $self->{'option_list'};
     137    my $option = pop( @{$option_list} );
     138    if(defined $option)
     139    {
     140    print STDERR "<PlugInfo>\n";
     141    print STDERR "  <Name>$option->{'name'}</Name>\n";
     142    print STDERR "  <Desc>$option->{'desc'}</Desc>\n";
     143    print STDERR "  <Inherits>$option->{'inherits'}</Inherits>\n";
     144    print STDERR "  <Arguments>\n";
     145    if(defined $option->{'args'})
     146    {
     147        my $args = $option->{'args'};
     148        my $x;
     149        foreach $x ( @{$args} )
     150        {
     151        print STDERR "    <Option>\n";
     152        print STDERR "      <Name>$x->{'name'}</Name>\n";
     153        print STDERR "      <Desc>$x->{'desc'}</Desc>\n";
     154        print STDERR "      <Type>$x->{'type'}</Type>\n";
     155        print STDERR "      <Required>$x->{'reqd'}</Required>\n";
     156        if(defined $x->{'list'})
     157        {
     158            print STDERR "      <List>\n";
     159            my $list = $x->{'list'};
     160            my $y;
     161            foreach $y ( @{$list} )
     162            {
     163            print STDERR "        <Value>\n";
     164            print STDERR "          <Name>$y->{'name'}</Name>\n";
     165            print STDERR "          <Desc>$y->{'desc'}</Desc>\n";
     166            print STDERR "        </Value>\n";
     167            }
     168            # Special case of 'input_encoding'
     169            if( $x->{'name'} =~ m/^input_encoding$/i ) {
     170            my $e = $encodings::encodings;
     171            foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     172                print STDERR "        <Value>\n";
     173                print STDERR "          <Name>$enc</Name>\n";
     174                print STDERR "          <Desc>$e->{$enc}->{'name'}</Desc>\n";
     175                print STDERR "        </Value>\n";
     176            }
     177            }
     178            print STDERR "      </List>\n";
     179        }
     180        if(defined $x->{'deft'})
     181        {
     182            print STDERR "      <Default>$x->{'deft'}</Default>\n";
     183        }
     184        print STDERR "    </Option>\n";
     185        }
     186    }
     187    if(defined $option_list) {
     188        $self->print_xml();
     189    }
     190   
     191    print STDERR "  </Arguments>\n";
     192    print STDERR "</PlugInfo>\n";
     193    }
     194}
     195
    44196sub print_general_usage {
    45197    my ($plugin_name) = @_;
     
    154306    $self->{'num_blocked'} = 0;
    155307    $self->{'num_archives'} = 0;
     308   
     309    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     310    $self->{'option_list'} = [ $options ];
    156311   
    157312    # general options available to all plugins
  • trunk/gsdl/perllib/plugins/BibTexPlug.pm

    r3426 r3540  
    5151}
    5252
     53my $arguments =
     54[ {     'name' => "process_exp",
     55    'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     56    'type' => "string",
     57    'reqd' => "no" ,
     58    'deft' => q^(?i)\.bib$^ }
     59];
     60
     61my $options =
     62{   'name'     => "BibTexPlug",
     63    'desc'     => "BibTexPlug reads bibliography files in BibTex format. BibTexPlug creates a document object for every reference a the file. It is a subclass of SplitPlug, so if there are multiple records, all are read.",
     64    'inherits' => "Yes",
     65    'args'     => $arguments };
     66
    5367# This plugin processes files with the suffix ".bib"
    5468sub get_default_process_exp {
     
    5973sub get_default_split_exp {
    6074    return q^\n+(?=@)^;
     75}
     76sub new {
     77    my $class = shift (@_);
     78    my $self = new SplitPlug ($class, @_);
     79    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     80    my $option_list = $self->{'option_list'};
     81    push( @{$option_list}, $options );
     82    return bless $self, $class;
    6183}
    6284
     
    368390 
    369391   $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
    370    $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
     392   $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1\">$1<\/a>/g;
    371393
    372394   return $text;
  • trunk/gsdl/perllib/plugins/BookPlug.pm

    r2356 r3540  
    4949# use this plugin instead of HBPlug.
    5050
     51# 12/05/02 Added usage datastructure - John Thompson
     52
    5153package BookPlug;
    5254
     
    5759    @ISA = ('BasPlug');
    5860}
     61
     62my $arguments =
     63[ {     'name' => "process_exp",
     64    'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     65    'type' => "string",
     66    'reqd' => "no",
     67    'deft' => q^(?i)\.hb$^} ,
     68{   'name' => "block_exp",
     69    'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     70    'type' => "string",
     71    'reqd' => "no",
     72    'deft' => q^\.jpg$^}
     73];
     74
     75my $options =
     76{   'name'     => "BookPlug",
     77    'desc'     => "Creates multi-level document from document containing &lt;&lt;TOC&gt;&gt; level tags. Metadata for each section is taken from any other tags on the same line as the &lt;&lt;TOC&gt;&gt;. e.g. &lt;&lt;Title&gt;&gt;xxxx&lt;&lt;/Title&gt;&gt; sets Title metadata. Everything else between TOC tags is treated as simple html (i.e. no processing of html links or any other HTMLPlug type stuff is done). Expects input files to have a .hb file extension by default (this can be changed by adding a -process_exp option a file with the same name as the hb file but a .jpg extension is taken as the cover image (jpg files are blocked by this plugin). BookPlug is a simplification (and extension) of the HBPlug used by the Humanity Library collections. BookPlug is faster as it expects the input files to be cleaner (The input to the HDL collections contains lots of excess html tags around &lt;&lt;TOC&gt;&gt; tags, uses &lt;&lt;I&gt;&gt; tags to specify images, and simply takes all text between &lt;&lt;TOC&gt;&gt; tags and start of text to be Title metadata). If you're marking up documents to be displayed in the same way as the HDL collections, use this plugin instead of HBPlug.",
     78    'inherits' => "Yes",
     79    'args'     => $arguments };
    5980
    6081sub new {
    6182    my ($class) = @_;
    6283    my $self = new BasPlug ("BookPlug", @_);
    63 
     84   
     85    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     86    my $option_list = $self->{'option_list'};
     87    push( @{$option_list}, $options );
     88   
    6489    return bless $self, $class;
    6590}
  • trunk/gsdl/perllib/plugins/ConvertToPlug.pm

    r3350 r3540  
    4848#    @ISA = ('BasPlug'); #, 'HTMLPlug', 'TEXTPlug');
    4949}
     50
     51my $convert_to_list =
     52[ { 'name' => "html",
     53    'desc' => "" },
     54{   'name' => "text",
     55    'desc' => "" }
     56];
     57
     58my $arguments =
     59[ {     'name' => "convert_to",
     60    'desc' => "Plugin converts to TEXT or HTML (default html).",
     61    'type' => "enum",
     62    'reqd' => "no",
     63    'list' => $convert_to_list,
     64    'deft' => "html"}
     65];
     66
     67my $options =
     68{  'name'     => "ConvertToPlug",
     69   'desc'     => "The plugin is inherited by such plugins as WordPlug and PDFPlug. It facilitates the conversion of these document types to either HTML or TEXT by setting up variable that instruct ConvertToBasPlug how to work. It works by dynamically inheriting HTMLPlug or TEXTPlug based on the plugin argument 'convert_to'.  If the argument is not present, the default is to inherit HTMLPlug.",
     70   'inherits' => "Yes",
     71   'args'     => $arguments };
     72
    5073
    5174sub print_usage {
     
    122145    $self->{'metadata_fields'} .= ",GENERATOR";
    123146    }
     147
     148    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     149    my $option_list = $self->{'option_list'};
     150    push( @{$option_list}, $options );
    124151
    125152    foreach my $key (keys %$args) {
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r3524 r3540  
    6161#       * RFC 2047 - MIME (part 3)  Message Header Extensions
    6262#       * RFC 1806 - Content Dispositions (ie inline/attachment)
     63
     64# 12/05/02 Added usage datastructure - John Thompson
     65
    6366package EMAILPlug;
    6467
     
    8588}
    8689
     90my $arguments =
     91[ {     'name' => "process_exp",
     92    'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     93    'type' => "string",
     94    'reqd' => "no",
     95    'deft' => q^(?i)\.hb$^} ,
     96{   'name' => "block_exp",
     97    'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     98    'type' => "string",
     99    'reqd' => "no",
     100    'deft' => q^\.jpg$^}
     101];
     102
     103my $options =
     104{   'name'     => "EMAILPlug",
     105    'desc'     => "Email plug reads email files.  These are named with a simple number (i.e. as they appear in maildir folders) or with the extension .mbx (for mbox mail file format).\nDocument text: The document text consists of all the text after the first blank line in the document.\nMetadata (not Dublin Core!):\n\t\$Headers      All the header content\n\t\$Subject      Subject: header\n\t\$To           To: header\n\t\$From         From: header\n\t\$FromName     Name of sender (where available)\n\t\$FromAddr     E-mail address of sender\n\t\$DateText     Date: header\n\t\$Date         Date: header in GSDL format (eg: 19990924)",
     106    'inherits' => "Yes",
     107    'args'     => $arguments };
     108
    87109# Create a new EMAILPlug object with which to parse a file.
    88110# Accomplished by creating a new BasPlug and using bless to
     
    92114    my ($class) = @_;
    93115    my $self = new BasPlug ($class, @_);
     116
     117    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     118    my $option_list = $self->{'option_list'};
     119    push( @{$option_list}, $options );
    94120
    95121    if (!parsargv::parse(\@_,
  • trunk/gsdl/perllib/plugins/FOXPlug.pm

    r2327 r3540  
    2828# This general plugin should be overridden for a particular database to process
    2929# the appropriate fields in the file.
     30
     31# 12/05/02 Added usage datastructure - John Thompson
    3032
    3133package FOXPlug;
     
    4345}
    4446
     47my $options = { 'name'     => "FOXPlug",
     48        'desc'     => "Plugin to process a Foxbase dbt file. This plugin provides the basic functionality to read in the dbt and dbf files and process each record. This general plugin should be overridden for a particular database to process the appropriate fields in the file.",
     49        'inherits' => "yes" };
     50
    4551sub new {
    4652    my ($class) = @_;
    4753    $self = new BasPlug ();
     54   
     55    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     56    my $option_list = $self->{'option_list'};
     57    push( @{$option_list}, $options );
    4858
    4959    return bless $self, $class;
  • trunk/gsdl/perllib/plugins/GAPlug.pm

    r2925 r3540  
    2424###########################################################################
    2525
    26 # Processes Greenstone Archive XML documents. Note that this plugin does no
    27 # syntax checking (though the XML::Parser module tests for well-formedness).
    28 # It's assumed that the Archive files conform to their DTD.
     26# Processes GreenstoneArchive XML documents. Note that this plugin does no
     27# syntax checking (though the XML::Parser module tests for
     28# well-formedness). It's assumed that the GreenstoneArchive files conform
     29# to their DTD.
     30
     31# 12/05/02 Added usage datastructure - John Thompson
    2932
    3033package GAPlug;
     
    3639}
    3740
     41my $options = { 'name'     => "GAPlug",
     42        'desc'     => "Processes GreenstoneArchive XML documents. Note that this plugin does no syntax checking (though the XML::Parser module tests for well-formedness). It's assumed that the GreenstoneArchive files conform to their DTD.",
     43        'inherits' => "yes" };
     44
    3845sub new {
    3946    my $class = shift (@_);
    4047    my $self = new XMLPlug ($class, @_);
     48
     49    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     50    my $option_list = $self->{'option_list'};
     51    push( @{$option_list}, $options );
    4152
    4253    $self->{'section'} = "";
     
    156167
    1571681;
     169
     170
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r2795 r3540  
    2727# assumes that gml tags are all in lower-case.
    2828
     29# 12/05/02 Added usage datastructure - John Thompson
     30
    2931package GMLPlug;
    3032
     
    3739}
    3840
     41my $options = { 'name'     => "GMLPlug",
     42        'desc'     => "Plugin which processes a GML format document assumes that gml tags are all in lower-case.",
     43        'inherits' => "yes" };
     44
    3945sub new {
    4046    my ($class) = @_;
    4147    my $self = new BasPlug ("GMLPlug", @_);
     48
     49    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     50    my $option_list = $self->{'option_list'};
     51    push( @{$option_list}, $options );
    4252
    4353    return bless $self, $class;}
  • trunk/gsdl/perllib/plugins/HBPlug.pm

    r2327 r3540  
    3838# Humanity Library collections
    3939
     40# 12/05/02 Added usage datastructure - John Thompson
     41
    4042package HBPlug;
    4143
     
    5052}
    5153
     54my $options = { 'name'     => "HBPlug",
     55        'desc'     => "Plugin which processes an HTML book directory. This plugin is used by the Humanity Library collections and does not handle input encodings other than ascii or extended ascii. This code is kind of ugly and could no doubt be made to run faster, by leaving it in this state I hope to encourage people to make their collections use HBSPlug instead ;-)\n\nUse HBSPlug if creating a new collection and marking up files like the Humanity Library collections. HBSPlug accepts all input encodings but expects the marked up files to be cleaner than those used by the Humanity Library collections",
     56        'inherits' => "yes" };
     57
    5258sub new {
    5359    my ($class) = @_;
    5460    my $self = new BasPlug ("HBPlug", @_);
     61   
     62    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     63    my $option_list = $self->{'option_list'};
     64    push( @{$option_list}, $options );
    5565
    5666    return bless $self, $class;
     
    6777    die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
    6878        $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
    69     }
     79    } 
    7080}
    7181
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r3539 r3540  
    4747    @ISA = ('BasPlug');
    4848}
     49
     50my $arguments = [ { 'name' => "process_exp",
     51            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     52            'type' => "string",
     53            'deft' =>  q^(?i)(\.html?|\.shtml|\.shm|\.asp|\.php|\.cgi|.+\?.+=.*)$^ },
     54          { 'name' => "block_exp",
     55            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     56            'type' => 'string',
     57            'deft' =>  q^(?i)\.(gif|jpe?g|png|css)$^ },
     58          { 'name' => "nolinks",
     59            'desc' =>  "Don't make any attempt to trap links (setting this flag may improve speed of building/importing but any relative links within documents will be broken).",
     60            'type' => "flag" },
     61          { 'name' => "keep_head",
     62            'desc' => "Don't remove headers from html files.",
     63            'type' => "flag" },
     64          { 'name' => "no_metadata",
     65            'desc' => "Don't attempt to extract any metadata from files.",
     66            'type' => "flag" },
     67          { 'name' => "metadata_fields",
     68            'desc' => "Comma separated list of metadata fields to attempt to extract. Defaults to 'Title'. Use 'tag&lt;tagname&gt;' to have the contents of the first &lt;tagname &gt; pair put in a metadata element called 'tagname'. Capitalise this as you want the metadata capitalised in Greenstone, since the tag extraction is case insensitive.",
     69            'type' => "metadatum",
     70            'deft' => "" },
     71          { 'name' => "hunt_creator_metadata",
     72            'desc' => "Find as much metadata as possible on authorship and place it in the 'Creator' field. Requires the -metadata_fields flag.",
     73            'type' => "flag" },
     74          { 'name' => "file_is_url",
     75            'desc' => "Set if input filenames make up url of original source documents e.g. if a web mirroring tool was used to create the import directory structure.",
     76            'type' => "flag" },
     77          { 'name' => "assoc_files",
     78            'desc' => "Perl regular expression of file extensions to associate with html documents. Defaults to '(?i)\.(jpe?g|gif|png|css)\$'",
     79            'type' => "string",
     80            'deft' => q^(?i)\.(jpe?g|gif|png|css)\$^ },
     81          { 'name' => "rename_assoc_files",
     82            'desc' => "Renames files associated with documents (e.g. images). Also creates much shallower directory structure (useful when creating collections to go on cd-rom).",
     83            'type' => "flag" } ,
     84          { 'name' => "title_sub",
     85            'desc' => "Substitution expression to modify string stored as Title. Used by, for example, PDFPlug to remove \"Page 1\", etc from text used as the title.",
     86            'type' => "string" } ,
     87          { 'name' => "description_tags",
     88            'desc' => "Split document into sub-sections where &lt;Section&gt; tags occur. Note that by setting this option you implicitly set -no_metadata, as all metadata should be included within the &lt;Section&gt; tags. Also, '-keep_head' will have no effect when this option is set.",
     89            'type' => "flag" } ];
     90
     91my $options = { 'name'     => "HTMLPlug",
     92        'desc'     => "This plugin processes HTML files",
     93        'inherits' => "yes",
     94        'args'     => $arguments };
    4995
    5096sub print_usage {
     
    92138    my $self = new BasPlug ($class, @_);
    93139
     140    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     141    my $option_list = $self->{'option_list'};
     142    push( @{$option_list}, $options );
     143   
    94144    if (!parsargv::parse(\@_,
    95145             q^nolinks^, \$self->{'nolinks'},
  • trunk/gsdl/perllib/plugins/ImagePlug.pm

    r3517 r3540  
    3333
    3434
     35my $arguments = [ { 'name' => "process_exp",
     36            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     37            'type' => "string",
     38            'deft' => q^(?i)(\.jpe?g|\.gif|\.png|\.bmp|\.xbm|\.tif?f)$^,
     39            'reqd' => "no" },
     40          { 'name' => "noscaleup",
     41            'desc' => "Don't scale up small images when making thumbnails.",
     42            'type' => "flag",
     43            'reqd' => "no" },
     44          { 'name' => "thumbnailsize",
     45            'desc' => "Make thumbnails of size nxn.",
     46            'type' => "int",
     47            'reqd' => "no" },
     48          { 'name' => "thumbnailtype",
     49            'desc' => "Make thumbnails in format 's'.",
     50            'type' => "string",
     51            'reqd' => "no" },
     52          { 'name' => "screenviewsize",
     53            'desc' => "If set, makes an image of size n for screen display and sets Screen, ScreenSize, ScreenWidth and ScreenHeight metadata.  By default it is not set.",
     54            'type' => "int",
     55            'reqd' => "no" },
     56          { 'name' => "screenviewtype",
     57            'desc' => "If -screenviewsize is set, this sets the screen display image type.  Defaults to jpg.",
     58            'type' => "string",
     59            'deft' => "jpg",
     60            'reqd' => "no" },
     61          { 'name' => "convertto",
     62            'desc' => "Convert main image to.",
     63            'type' => "string",
     64            'reqd' => "no" },
     65          { 'name' => "minimumsize",
     66            'desc' => "Ignore images smaller than n bytes.",
     67            'type' => "int",
     68            'reqd' => "no" } ];
     69
     70my $options = { 'name'     => "ImagePlug",
     71        'desc'     => "",
     72        'inherits' => "yes",
     73        'args'     => $arguments };
     74
     75
    3576sub print_usage {
    3677    my ($plugin_name) = @_;
     
    65106    my $self = new BasPlug ("ImagePlug", @_);
    66107
     108    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     109    my $option_list = $self->{'option_list'};
     110    push( @{$option_list}, $options );
     111   
    67112    if (!parsargv::parse(\@_,
    68113             q^noscaleup^, \$self->{'noscaleup'},
  • trunk/gsdl/perllib/plugins/IndexPlug.pm

    r1482 r3540  
    5050# named 'Subject'.
    5151
     52# 12/05/02 Added usage datastructure - John Thompson
     53
    5254package IndexPlug;
    5355
     
    6264}
    6365
     66my $options = { 'name'     => "IndexPlug",
     67        'desc'     => "This recursive plugin processes an index.txt file. The index.txt file should contain the list of files to be included in the collection followed by any extra metadata to be associated with each file.\n\nThe index.txt file should be formatted as follows: The first line may be a key (beginning with key:) to name the metadata fields (e.g. key: Subject Organization Date). The following lines will contain a filename followed by the value that metadata entry is to be set to. (e.g. 'irma/iw097e 3.2 unesco 1993' will associate the metadata Subject=3.2, Organization=unesco, and Date=1993 with the file irma/iw097e if the above key line was used)\n\nNote that if any of the metadata fields use the Hierarchy classifier plugin then the value they're set to should correspond to the first field (the descriptor) in the appropriate classification file.\n\nMetadata values may be named separately using a tag (e.g. &gt;Subject&lt;3.2) and this will override any name given to them by the key line. If there's no key line any unnamed metadata value will be named 'Subject'..",
     68        'inherits' => "yes" };
     69
    6470sub new {
    6571    my ($class) = @_;
    6672    my $self = new BasPlug ("IndexPlug", @_);
     73   
     74    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     75    my $option_list = $self->{'option_list'};
     76    push( @{$option_list}, $options );
    6777
    6878    return bless $self, $class;
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r3411 r3540  
    3232}
    3333
     34my $arguments = [ { 'name' => "process_exp",
     35            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     36            'type' => "string",
     37            'deft' => q^(?i)\.pdf$^,
     38            'reqd' => "no" },
     39          { 'name' => "block_exp",
     40            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     41            'type' => 'string',
     42            'deft' =>  q^^ }
     43          ];
     44
     45my $options = { 'name'     => "PDFPlug",
     46        'desc'     => "Reasonably with-it pdf plugin.",
     47        'inherits' => "yes",
     48        'args'     => $arguments };
     49
    3450sub new {
    3551    my $class = shift (@_);
     
    6278    $self->{'use_sections'}=1;
    6379    }
     80    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     81    my $option_list = $self->{'option_list'};
     82    push( @{$option_list}, $options );
    6483   
    6584    return bless $self, $class;
  • trunk/gsdl/perllib/plugins/PSPlug.pm

    r2979 r3540  
    2424###########################################################################
    2525
     26# 12/05/02 Added usage datastructure - John Thompson
     27
    2628package PSPlug;
    2729
     
    3335}
    3436
     37my $arguments = [ { 'name' => "process_exp",
     38            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     39            'type' => "string",
     40            'deft' => q^(?i)\.ps$^,
     41            'reqd' => "no" },
     42          { 'name' => "block_exp",
     43            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     44            'type' => 'string',
     45            'deft' =>  q^(?i)\.(eps)$^ }
     46          ];
     47
     48my $options = { 'name'     => "PSPlug",
     49        'desc'     => "This might look VERY similar to the PDF plugin.",
     50        'inherits' => "yes",
     51        'args'     => $arguments };
     52
    3553sub new {
    3654    my $class = shift (@_);
     
    3957
    4058    my $self = new ConvertToPlug ($class, "-convert_to", "text", @_ , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     59
     60    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     61    my $option_list = $self->{'option_list'};
     62    push( @{$option_list}, $options );
    4163   
    4264    if (!parsargv::parse(\@_,
  • trunk/gsdl/perllib/plugins/RTFPlug.pm

    r2979 r3540  
    2525###########################################################################
    2626
     27# 12/05/02 Added usage datastructure - John Thompson
     28
    2729package RTFPlug;
    2830
     
    3133sub BEGIN {
    3234    @ISA = ('ConvertToPlug');
     35}
     36
     37my $arguments = [ { 'name' => "process_exp",
     38            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     39            'type' => "string",
     40            'deft' => q^(?i)\.rtf$^,
     41            'reqd' => "no" }
     42          ];
     43
     44my $options = { 'name'     => "RTFPlug",
     45        'desc'     => "Plugin for importing Rich Text Format files.",
     46        'inherits' => "yes",
     47        'args'     => $arguments };
     48
     49sub new {
     50    my $class = shift (@_);
     51    my $self = new ConvertToPlug ($class, @_);
     52   
     53    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     54    my $option_list = $self->{'option_list'};
     55    push( @{$option_list}, $options );
     56
     57    return bless $self, $class;
    3358}
    3459
     
    4267
    4368    my $outhandle = $self->{'outhandle'};
    44     print $outhandle "RTFPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
     69    print $outhandle "RTFPlug: passing $_[3] on to $self->{'converted_to'}Plug\n" 
    4570    if $self->{'verbosity'} > 1;
    4671
  • trunk/gsdl/perllib/plugins/RecPlug.pm

    r3116 r3540  
    106106use XML::Parser;
    107107
     108my $arguments = [ { 'name' => "block_exp",
     109            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     110            'type' => "string",
     111            'deft' => "CVS",
     112            'reqd' => "no" },
     113          { 'name' => "use_metadata_files",
     114            'desc' => "Read metadata from metadata XML files.",
     115            'type' => "flag",
     116            'reqd' => "no" } ];
     117
     118my $options = { 'name'     => "RecPlug",
     119        'desc'     => "RecPlug is a plugin which recurses through directories processing
     120# each file it finds. For detailed comments edit &lt;GSDLHOME&gt;/perllib/plugins/RecPlug.pm .",
     121            'inherits' => "yes",
     122            'args'     => $arguments };
     123
    108124sub print_usage {
    109125    my ($plugin_name) = @_;
     
    124140    $self = new BasPlug ($class, @_);
    125141   
     142    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     143    my $option_list = $self->{'option_list'};
     144    push( @{$option_list}, $options );
     145
    126146    if (!parsargv::parse(\@_,
    127147             q^use_metadata_files^, \$self->{'use_metadata_files'},
  • trunk/gsdl/perllib/plugins/ReferPlug.pm

    r1676 r3540  
    2525#
    2626###########################################################################
    27 
    2827
    2928# ReferPlug reads bibliography files in Refer format.
     
    6261#
    6362
     63# 12/05/02 Added usage datastructure - John Thompson
    6464
    6565package ReferPlug;
    6666
    6767use SplitPlug;
    68 
    6968
    7069# ReferPlug is a sub-class of BasPlug.
     
    7372}
    7473
     74my $arguments = [ { 'name' => "process_exp",
     75            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     76            'type' => "string",
     77            'deft' => q^(?i)\.bib$^,
     78            'reqd' => "no" } ];
     79
     80my $options = { 'name'     => "ReferPlug",
     81        'desc'     => "ReferPlug reads bibliography files in Refer format.\nBy Gordon W. Paynter (gwp\@cs.waikato.ac.nz), November 2000\n\nLoosely based on hcibib2Plug by Steve Jones (stevej\@cs.waikato.ac.nz). Which was based on EMAILPlug by Gordon Paynter (gwp\@cs.waikato.ac.nz). Which was based on old versions of HTMLplug and HCIBIBPlugby by Stefan Boddie and others -- it's hard to tell what came from where, now.\n\nReferPlug creates a document object for every reference in the file. It is a subclass of SplitPlug, so if there are multiple records, all are read.\n\nDocument text:\n\tThe document text consists of the reference in Refer format.\nMetadata:\n\t\$Creator    \%A Author name\n\t\$Title      \%T Title of article of book\n\t\$Journal   \%J Title of Journal\n\t\$Booktitle \%B Title of book containing the publication\n\t\$Report        \%R Type of Report, paper or thesis\n\t\$Volume     \%V Volume Number of Journal\n\t\$Number        \%N Number of Journal within Volume\n\t\$Editor     \%E Editor name\n\t\$Pages      \%P Page Number of article\n\t\$Publisher   \%I Name of Publisher\n\t\$Publisheraddr    \%C Publisher's address\n\t\$Date       \%D Date of publication\n\t\$Keywords   \%K Keywords associated with publication\n\t\$Abstract  \%X Abstract of publication\n\t\$Copyright\t\%* Copyright information for the article",
     82        'inherits' => "yes",
     83        'args'     => $arguments };
     84
    7585# This plugin processes files with the suffix ".bib"
    7686sub get_default_process_exp {
     
    8393}
    8494
     95sub new {
     96    my $class = shift (@_);
     97    my $self = new SplitPlug ($class, @_);
     98
     99    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     100    my $option_list = $self->{'option_list'};
     101    push( @{$option_list}, $options );
     102
     103    return bless $self, $class;
     104}
    85105
    86106# The process function reads a single bibliogrphic record and stores
  • trunk/gsdl/perllib/plugins/SRCPlug.pm

    r2657 r3540  
    3535#   Shell   (currently only done as text)
    3636
     37# 12/05/02 Added usage datastructure - John Thompson
    3738
    3839package SRCPlug;
     
    4546}
    4647
     48my $arguments = [ { 'name' => "process_exp",
     49            'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     50            'type' => "string",
     51            'deft' => q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^,
     52            'reqd' => "no" } ,
     53          { 'name' => "block_exp",
     54            'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
     55            'type' => 'string',
     56            'deft' => q^(?i)\.(o|obj|a|so|dll)$^,
     57            'reqd' => "no" } ,
     58          { 'name' => "remove_prefix",
     59            'desc' => "Remove this leading pattern from the filename (eg -remove_prefix /tmp/XX/src/). The default is to remove the whole path from the filename.",
     60            'type' => 'string',
     61            'reqd' => "no" } ];
     62
     63my $options = { 'name'     => "SRCPlug",
     64        'desc'     => "Filename is currently used for Title ( optionally minus some prefix ). Current languages:\ntext: READMEs/Makefiles\nC/C++   (currently extracts #include statements and C++ class decls)\nPerl    (currently only done as text)\nShell   (currently only done as text)",
     65            'inherits' => "yes",
     66            'args'     => $arguments };
    4767
    4868sub print_usage {
     
    6080    my ($class) = @_;
    6181    my $self = new BasPlug ($class, @_);
     82   
     83    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     84    my $option_list = $self->{'option_list'};
     85    push( @{$option_list}, $options );
    6286
    6387    if (!parsargv::parse(\@_,
  • trunk/gsdl/perllib/plugins/SplitPlug.pm

    r3537 r3540  
    4949}
    5050
     51my $options = { 'name'     => "SplitPlug",
     52                 'desc'     => "SplitPlug is a plugin for splitting input files into segments that will then be individually processed. This plugin should not be called directly.  Instead, if you need to process input files that contain several documents, you should write a plugin with a process function that will handle one of those documents and have it inherit from SplitPlug.  See ReferPlug for an example.",
     53                     'inherits' => "yes" };
     54
     55
    5156sub new {
    5257    my ($class) = @_;
    5358    $self = new BasPlug($class, @_);
    5459
     60
     61    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     62    my $option_list = $self->{'option_list'};
     63    push( @{$option_list}, $options );
     64   
    5565    if (!parsargv::parse(\@_,
    5666             q^split_exp/.*/^, \$self->{'split_exp'},
  • trunk/gsdl/perllib/plugins/TEXTPlug.pm

    r3037 r3540  
    2727# of first line of text (up to 100 characters long).
    2828
     29# 12/05/02 Added usage datastructure - John Thompson
     30
    2931package TEXTPlug;
    3032
     
    3638    @ISA = ('BasPlug');
    3739}
     40
     41my $arguments = [ { 'name' => "process_exp",
     42                          'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     43                          'type' => "string",
     44                          'deft' => q^(?i)\.te?xt$^,
     45                          'reqd' => "no" } ,
     46                        { 'name' => "title_sub",
     47                          'desc' => "Substitution expression to modify string stored as Title. Used by, for example, PSPlug to remove \"Page 1\" etc from text used as the title.",
     48                          'type' => "string",
     49                          'reqd' => "no" }];
     50
     51my $options = { 'name'     => "TEXTPlug",
     52                 'desc'     => "Creates simple single-level document. Adds Title metadata of first line of text (up to 100 characters long).",
     53                     'inherits' => "yes",
     54                     'args'     => $arguments };
    3855
    3956sub print_usage {
     
    5067    my ($class) = @_;
    5168    my $self = new BasPlug ($class, @_);
     69
     70     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     71     my $option_list = $self->{'option_list'};
     72     push( @{$option_list}, $options );
    5273
    5374    if (!parsargv::parse(\@_,
     
    87108    $title =~ /^\s+/s;
    88109    if (defined $self->{'title_sub'} &&
    89         $self->{'title_sub'}) {$title =~ s/$self->{'title_sub'}//;}
     110           $self->{'title_sub'}) {$title =~ s/$self->{'title_sub'}//;}
    90111    $title =~ /^\s*([^\n]*)/s; $title=$1;
    91112    if (length($title) > 100) {
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r3400 r3540  
    2424###########################################################################
    2525
     26# 12/05/02 Added usage datastructure - John Thompson
     27
    2628package WordPlug;
    2729
     
    3234}
    3335
     36my $arguments = [ { 'name' => "process_exp",
     37                          'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     38                          'type' => "string",
     39                          'deft' => q^(?i)\.doc$^,
     40                          'reqd' => "no" } ];
     41
     42my $options = { 'name'     => "WordPlug",
     43                 'desc'     => "",
     44                     'inherits' => "yes",
     45                     'args'     => $arguments };
     46
    3447sub new {
    3548    my $class = shift (@_);
    3649
    3750    my $self = new ConvertToPlug ($class, @_);
     51
     52     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     53     my $option_list = $self->{'option_list'};
     54     push( @{$option_list}, $options );
    3855
    3956    # wvWare will always produce html files encoded as utf-8
     
    5774
    5875    my $outhandle = $self->{'outhandle'};   
    59     print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
     76    print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n" 
    6077    if $self->{'verbosity'} > 1;
    6178   
  • trunk/gsdl/perllib/plugins/XMLPlug.pm

    r3107 r3540  
    3535
    3636use XML::Parser;
     37my $arguments = [ { 'name' => "process_exp",
     38                          'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     39                          'type' => "string",
     40                          'deft' => q^(?i)\.xml$^,
     41                          'reqd' => "no" } ];
     42
     43my $options = { 'name'     => "XMLPlug",
     44                 'desc'     => "",
     45                     'inherits' => "yes",
     46                     'args'     => $arguments };
     47
    3748
    3849my ($self);
     
    4354    $self = new BasPlug ($class, @_);
    4455
     56
     57    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     58    my $option_list = $self->{'option_list'};
     59    push( @{$option_list}, $options );
     60   
    4561    my $parser = new XML::Parser('Style' => 'Stream',
    4662                 'Handlers' => {'Char' => \&Char,
  • trunk/gsdl/perllib/plugins/ZIPPlug.pm

    r2795 r3540  
    4343# tar (for tar)
    4444
     45# 12/05/02 Added usage datastructure - John Thompson
     46
    4547package ZIPPlug;
    4648
     
    5557}
    5658
     59my $options = { 'name'     => "ZIPPlug",
     60                 'desc'     => "Plugin which handles compressed and/or archived input formats currently handled formats and file extensions are:\ngzip (.gz, .z, .tgz, .taz)\nbzip (.bz)\nbzip2 (.bz2)\nzip (.zip .jar)\ntar (.tar)\n\nThis plugin relies on the following utilities being present (if trying to process the corresponding formats):\ngunzip (for gzip)\nbunzip (for bzip)\nbunzip2 \nunzip (for zip)\ntar (for tar)",
     61                     'inherits' => "yes" };
     62
    5763sub new {
    5864    my ($class) = @_;
    5965    my $self = new BasPlug ("ZIPPlug", @_);
     66
     67     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     68     my $option_list = $self->{'option_list'};
     69     push( @{$option_list}, $options );
    6070
    6171    return bless $self, $class;
Note: See TracChangeset for help on using the changeset viewer.