Ignore:
Timestamp:
2002-11-18T17:43:56+13:00 (22 years ago)
Author:
kjdon
Message:

added John T's changes into CVS - added info to enable retrieval of usage info in xml

Location:
trunk/gsdl/perllib/classify
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/AZCompactList.pm

    r3529 r3540  
    5050}
    5151
     52my $doclevel_list =
     53[ { 'name' => "top",
     54    'desc' => "Whole document." } ,
     55{   'name' => "section",
     56    'desc' => "By sections." }
     57];
     58
     59my $arguments =
     60[ {     'name' => "metadata",
     61    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     62    'type' => "metadata",
     63    'reqd' => "yes" } ,
     64{   'name' => "buttonname",
     65    'desc' => "Button name for this classification. Defaults to metadata name.",
     66    'type' => "string",
     67    'reqd' => "no" } ,
     68{   'name' => "mingroup",
     69    'desc' => "The smallest value that will cause a group in the hierarchy to form.",
     70    'type' => "int",
     71    'reqd' => "no" } ,
     72{   'name' => "minnesting",
     73    'desc' => "The smallest value that will cause a list to converted into nested list.",
     74    'type' => "int",
     75    'reqd' => "no" } ,
     76{   'name' => "mincompact",
     77    'desc' => "Used in compact list.",
     78    'type' => "int",
     79    'reqd' => "no" } ,
     80{   'name' => "maxcompact",
     81    'desc' => "Used in compact list.",
     82    'type' => "int",
     83    'reqd' => "no" } ,
     84{   'name' => "doclevel",
     85    'desc' => "Level to process document at.",
     86    'type' => "enum",
     87    'list' => $doclevel_list,
     88    'reqd' => "no" } ,
     89{   'name' => "onlyfirst",
     90    'desc' => "Control whether all or only first metadata value used from array of metadata.",
     91    'type' => "flag",
     92    'reqd' => "no" }
     93];
     94
     95my $options =
     96{   'name'     => "AZCompactList",
     97    'desc'     => "Classifier plugin for sorting alphabetically",
     98    'inherits' => "Yes",
     99    'args'     => $arguments };
     100
    52101sub print_usage {
    53102    print STDERR "
     
    72121    my $self = new BasClas($class, @_);
    73122
     123    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     124    my $option_list = $self->{'option_list'};
     125    push( @{$option_list}, $options );
     126
     127
    74128    my ($metaname, $title, $removeprefix);
    75129    my $mingroup = 2;
  • trunk/gsdl/perllib/classify/AZList.pm

    r3510 r3540  
    3535    @ISA = ('BasClas');
    3636}
     37
     38my $arguments = [ { 'name' => "metadata",
     39            'desc' => "Metadata field used for classification. List will be sorted by this element.",
     40            'type' => "metadata",
     41            'reqd' => "yes" } ,
     42          { 'name' => "buttonname",
     43            'desc' => "Button name for this classification. Defaults to metadata name.",
     44            'type' => "string",
     45            'reqd' => "no" } ,
     46          { 'name' => "removeprefix",
     47            'desc' => "A prefix to ignore in the Metadata values for the field when sorting.",
     48            'type' => "string",
     49            'reqd' => "no" } ];
     50
     51my $options = { 'name'     => "AZList",
     52                 'desc'     => "Classifier plugin for sorting alphabetically",
     53                     'inherits' => "Yes",
     54                     'args'     => $arguments };
    3755
    3856sub print_usage {
     
    5573    my $class = shift (@_);
    5674    my $self = new BasClas($class, @_);
     75   
     76    # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     77    my $option_list = $self->{'option_list'};
     78    push( @{$option_list}, $options );
    5779   
    5880    my ($metaname, $title, $removeprefix);
  • trunk/gsdl/perllib/classify/AZSectionList.pm

    r2954 r3540  
    3434# to the classification
    3535
     36# 12/05/02 Added usage datastructure - John Thompson
     37
    3638package AZSectionList;
    3739
     
    4345}
    4446
     47my $arguments =
     48[ {     'name' => "metadata",
     49    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     50    'type' => "metadata",
     51    'reqd' => "yes" } ,
     52{   'name' => "buttonname",
     53    'desc' => "Button name for this classification. Defaults to metadata name.",
     54    'type' => "string",
     55    'reqd' => "no" }
     56];
     57
     58my $options =
     59{   'name'     => "AZSectionList",
     60    'desc'     => "Classifier plugin for sorting alphabetically. This is very similar to AZList except it sorts by section level metadata (excluding the top level) instead of just top level metadata. The only change is to the classify() subroutine which must now iterate through each section, adding each to the classification.",
     61    'inherits' => "Yes",
     62    'args'     => $arguments };
     63
    4564sub print_usage {
    4665    print STDERR "
    47   usage: classify AZSectionList -metadata X [options]
     66  usage: classify AZSectionList [options]
    4867  options:
    4968
    50   -metadata X       (required) Metadata field used for classification.
    51             List will be sorted by this element.
     69  -metadata X    (required) Metadata field used for classification,
     70                  list will be sorted by this element.
    5271
    53   -buttonname X     Button name for this classification.
    54             defaults to metadata name.
     72  -buttonname X  (OPTIONAL) Title field for this classification.
     73                  if not included title field will be Metaname.
    5574
    56   -removeprefix regex   A prefix to ignore in the Metadata values
    57             for the field when sorting.
    58 
     75  -removeprefix regex A prefix to ignore in the Metadata values
     76                      for the field when sorting.
    5977This is very similar to AZList except it sorts by section level metadata
    6078(excluding the top level) instead of just top level metadata.
     
    6583    my $class = shift (@_);
    6684    my $self = new AZList($class, @_);
     85
     86     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     87     my $option_list = $self->{'option_list'};
     88     push( @{$option_list}, $options );
    6789
    6890    return bless $self, $class;
     
    91113    # if this section doesn't contain the metadata element we're
    92114    # sorting by we won't include it in this classification
     115
    93116    if (defined $metavalue && $metavalue ne "") {
    94117    if ($self->{'removeprefix'}) {
    95118        $metavalue =~ s/^$self->{'removeprefix'}//;
    96119    }
    97 
    98120    if ($self->{'metaname'} eq 'Creator') {
    99121        &sorttools::format_string_name_english (\$metavalue);
  • trunk/gsdl/perllib/classify/BasClas.pm

    r1885 r3540  
    5050#    display it.
    5151
     52# 09/05/02 Added usage datastructure - John Thompson
     53
    5254use parsargv;
     55
     56my $verbosity_list =
     57[ {     'name' => "0",
     58    'desc' => "" } ,
     59{   'name' => "1",
     60    'desc' => "" } ,
     61{   'name' => "2",
     62    'desc' => "" } ,
     63{   'name' => "3",
     64    'desc' => "" }
     65];
     66
     67my $arguments =
     68[ {     'name' => "verbosity",
     69    'desc' => "",
     70    'type' => "enum",
     71    'list' => $verbosity_list,
     72    'deft' => "2",
     73    'reqd' => "no" } ];
     74
     75my $options =
     76{   'name'     => "BasClas",
     77        'desc'     => "Base class for all the classifiers.",
     78    'inherits' => "No",
     79    'args'     => $arguments };
     80
     81sub print_xml_usage {
     82     my $self = shift (@_);
     83     print STDERR "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n";
     84     $self->print_xml();
     85}
     86
     87sub print_xml {
     88    my $self = shift (@_);
     89     my $option_list = $self->{'option_list'};
     90     my $option = pop( @{$option_list} );
     91     if(defined $option)
     92     {
     93          print STDERR "<ClassInfo>\n";
     94          print STDERR "  <Name>$option->{'name'}</Name>\n";
     95          print STDERR "  <Desc>$option->{'desc'}</Desc>\n";
     96          print STDERR "  <Inherits>$option->{'inherits'}</Inherits>\n";
     97          print STDERR "  <Arguments>\n";
     98          if(defined $option->{'args'})
     99          {
     100                my $args = $option->{'args'};
     101                my $x;
     102                foreach $x ( @{$args} )
     103                {
     104                     print STDERR "    <Option>\n";
     105                     print STDERR "      <Name>$x->{'name'}</Name>\n";
     106                     print STDERR "      <Desc>$x->{'desc'}</Desc>\n";
     107                     print STDERR "      <Type>$x->{'type'}</Type>\n";
     108                     print STDERR "      <Required>$x->{'reqd'}</Required>\n";
     109                     if(defined $x->{'list'})
     110                     {
     111                          print STDERR "      <List>\n";
     112                          my $list = $x->{'list'};
     113                          my $y;
     114                          foreach $y ( @{$list} )
     115                          {
     116                                print STDERR "        <Value>\n";
     117                                print STDERR "          <Name>$y->{'name'}</Name>\n";
     118                                print STDERR "          <Desc>$y->{'desc'}</Desc>\n";
     119                                print STDERR "        </Value>\n";
     120                          }
     121                          # Special case of 'input_encoding'
     122                          if( $x->{'name'} =~ m/^input_encoding$/i ) {
     123                                my $e = $encodings::encodings;
     124                                foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     125                                     print STDERR "        <Value>\n";
     126                                     print STDERR "          <Name>$enc</Name>\n";
     127                                     print STDERR "          <Desc>$e->{$enc}->{'name'}</Desc>\n";
     128                                     print STDERR "        </Value>\n";
     129                                }
     130                          }
     131                          print STDERR "      </List>\n";
     132                     }
     133                     if(defined $x->{'deft'})
     134                     {
     135                          print STDERR "      <Default>$x->{'deft'}</Default>\n";
     136                     }
     137                     print STDERR "    </Option>\n";
     138                }
     139          }
     140          if(defined $option_list) {
     141                $self->print_xml();
     142          }
     143         
     144          print STDERR "  </Arguments>\n";
     145          print STDERR "</ClassInfo>\n";
     146     }
     147}
    53148
    54149sub print_general_usage {
     
    79174    $self->{'outhandle'} = STDERR;
    80175   
     176     $self->{'option_list'} = [ $options ];
     177
    81178    # general options available to all classifiers
    82179    if (!parsargv::parse(\@_,
  • trunk/gsdl/perllib/classify/Browse.pm

    r2489 r3540  
    2424###########################################################################
    2525
     26# 12/05/02 Added usage datastructure - John Thompson
     27
    2628use BasClas;
    2729package Browse;
     
    3234    @ISA = ('BasClas');
    3335}
     36
     37my $options =
     38{   'name'     => "Browse",
     39    'desc'     => "",
     40    'inherits' => "Yes" };
    3441
    3542sub print_usage {
     
    4350    my $class = shift (@_);
    4451    my $self = new BasClas($class, @_);
    45    
     52 
     53     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     54     my $option_list = $self->{'option_list'};
     55     push( @{$option_list}, $options ); 
    4656   
    4757    # classifier information
  • trunk/gsdl/perllib/classify/DateList.pm

    r2916 r3540  
    3333# jrm21 - added option "bymonth", which splits by year and month.
    3434
     35# 12/05/02 Added usage datastructure - John Thompson
     36
    3537package DateList;
    3638
     
    4143    @ISA = ('BasClas');
    4244}
     45
     46my $arguments =
     47[ {     'name' => "bymonth",
     48    'desc' => "Classify by year and month.",
     49    'type' => "flag",
     50    'reqd' => "no" }
     51];
     52
     53my $options =
     54{   'name'     => "DateList",
     55    'desc'     => "Classifier plugin for sorting by date. Always sorts by 'Date' metadata. Date is assumed to be in the form yyyymmdd.",
     56    'inherits' => "Yes",
     57    'args'     => $arguments };
    4358
    4459sub print_usage {
     
    4863    -bymonth  [or bymonth=1]    Classify by year and month
    4964
    50   Classifier plugin for sorting by date, and assumes that 'Date' metadata
    51   exists. Date is assumed to be in the form yyyymmdd (all digits).
    52   By default dates are classified by year.
    53 
     65    Classifier plugin for sorting by date.
     66    Always sorts by 'Date' metadata.
     67    Date is assumed to be in the form yyyymmdd (all digits).
     68    By default dates are split by year - this should change.
     69
     70    Any errors are Dana's problem.
    5471";
    5572}
     
    5875    my $class = shift (@_);
    5976    my $self = new BasClas($class, @_);
     77
     78     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     79     my $option_list = $self->{'option_list'};
     80     push( @{$option_list}, $options );
    6081
    6182    $self->{'list'} = {};
  • trunk/gsdl/perllib/classify/HTML.pm

    r2022 r3540  
    3131# url=url           -- the url of the web page to link to
    3232
     33# 12/05/02 Added usage datastructure - John Thompson
     34
    3335package HTML;
    3436
     
    3840    @ISA = ('BasClas');
    3941}
     42
     43my $arguments =
     44[ {     'name' => "url",
     45    'desc' => "The url of the web page to link to.",
     46    'type' => "string",
     47    'reqd' => "yes" } ,
     48{   'name' => "buttonname",
     49    'desc' => "The title field for this classification. If not included title field 'Browse'.",
     50    'type' => "string",
     51    'reqd' => "no" }
     52];
     53
     54my $options =
     55{   'name'     => "HTML",
     56    'desc'     => "Creates an empty classification that's simply a link to a web page.",
     57    'inherits' => "Yes",
     58    'args'     => $arguments };
    4059
    4160sub print_usage {
     
    5574    my $class = shift (@_);
    5675    my $self = new BasClas($class, @_);
     76
     77     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     78     my $option_list = $self->{'option_list'};
     79     push( @{$option_list}, $options );
    5780   
    5881    my ($title, $url);
  • trunk/gsdl/perllib/classify/Hierarchy.pm

    r2973 r3540  
    4141#                     like an AZList classification)
    4242
     43# 12/05/02 Added usage datastructure - John Thompson
     44# 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson
     45
    4346package Hierarchy;
    4447
     
    5255}
    5356
     57my $arguments =
     58[ {     'name' => "metadata",
     59    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     60    'type' => "metadata",
     61    'reqd' => "yes" } ,
     62{   'name' => "buttonname",
     63    'desc' => "Button name for this classification. Defaults to metadata name.",
     64    'type' => "string",
     65    'reqd' => "no" } ,
     66{   'name' => "hfile",
     67    'desc' => "The classification structure file.",
     68    'type' => "string",
     69    'reqd' => "yes" } ,
     70{   'name' => "sort",
     71    'desc' => "Metadata field to sort by (defaults to none).",
     72    'type' => "string",
     73    'reqd' => "no" } ,
     74{   'name' => "hlist_at_top",
     75    'desc' => "Display the first level of the classification horizontally.",
     76    'type' => "flag",
     77    'reqd' => "no" }
     78];
     79
     80my $options =
     81{   'name'     => "Hierarchy",
     82    'desc'     => "Classifier plugin for generating hierarchical classifications",
     83    'inherits' => "Yes" ,
     84    'args'     => $arguments };
     85
    5486sub print_usage {
    5587    print STDERR "
     
    76108    my $class = shift (@_);
    77109    my $self = new BasClas($class, @_);
    78    
     110 
     111     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     112     my $option_list = $self->{'option_list'};
     113     push( @{$option_list}, $options );
     114   
    79115    my $sortname = "Title";
    80116    my ($hfile, $metadata, $title, $hlist_at_top);
     
    88124             "allow_extra_options")) {
    89125   
    90     print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
    91     &print_usage();
    92     die "\n";
     126          $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file.";
    93127    }
    94128
    95129    if (!$metadata) {
    96     &print_usage;
    97     print STDERR "\nHierarchy error: no metadata supplied\n";
    98     die "\n";
     130          $self->{'construction_error'} = "Hierarchy error: no metadata supplied.";
    99131    }
    100132
     
    103135    $sortname = undef if $sortname =~ /^nosort$/;
    104136
     137     my $subjectfile;
     138
    105139    if (!$hfile) {
    106     &print_usage;
    107     print STDERR "\nHierarchy error: No -hfile supplied\n";
    108     die "\n";
    109     }
    110    
    111     my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
    112     if (!-e $subjectfile) {
    113     my $collfile = $subjectfile;
    114     $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
    115     if (!-e $subjectfile) {
    116         my $outhandle = $self->{'outhandle'};
    117         &print_usage;
    118         print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
    119         print STDERR "This file should be in $collfile or $subjectfile\n";
    120         die "\n";
    121     }
    122     }
     140          $self->{'construction_error'} = "Hierarchy error: No -hfile supplied.";
     141    }
     142    else
     143     {
     144          $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
     145          if (!-e $subjectfile) {
     146                my $collfile = $subjectfile;
     147                $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
     148                if (!-e $subjectfile) {
     149                     my $outhandle = $self->{'outhandle'};
     150                     &print_usage;
     151                     print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
     152                     print STDERR "This file should be in $collfile or $subjectfile\n";
     153                     die "\n";
     154                }
     155          }
     156    }
    123157
    124158    $self->{'descriptorlist'} = {}; # first field in subject file
     
    135169sub init {
    136170    my $self = shift (@_);
     171
     172     if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) {
     173          print STDERR "Error: " , $self->{'construction_error'} , "\n";
     174          &print_usage;
     175          die "\n";
     176     }
    137177
    138178    # read in the subject file
     
    230270    $classifyinfo->{'Title'} = $title;
    231271    $classifyinfo->{'classifytype'} = $classifytype;
    232 
    233272    return $classifyinfo;
    234273    }
    235274
    236275    $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
    237 
    238276    my $offset = 0;
    239277    foreach $thing (@{$classifyinfo->{'contains'}}) {
    240278    $offset ++ if defined $thing->{'OID'};
    241279    }
    242    
    243     while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) {
     280
     281    while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) { 
    244282    push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
    245283    }
  • trunk/gsdl/perllib/classify/List.pm

    r2022 r3540  
    3838#                      if metadata is also not included title will be 'List'
    3939
     40# 12/05/02 Added usage datastructure - John Thompson
     41
    4042use BasClas;
    4143package List;
     
    4648    @ISA = ('BasClas');
    4749}
     50
     51my $arguments =
     52[ {     'name' => "metadata",
     53    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     54    'type' => "metadata",
     55    'reqd' => "yes" } ,
     56{   'name' => "buttonname",
     57    'desc' => "Button name for this classification. Defaults to metadata name.",
     58    'type' => "string",
     59    'reqd' => "no" } ,
     60{   'name' => "sort",
     61    'desc' => "Sort documents in list by this metadata field. By default it will sort by Metaname, or (if this is not set) in build (random) order.",
     62    'type' => "string",
     63    'reqd' => "no" }
     64];
     65
     66my $options =
     67{   'name'     => "List",
     68    'desc'     => "Simple list classifier plugin.",
     69    'inherits' => "Yes",
     70    'args'     => $arguments };
     71
    4872
    4973sub print_usage {
     
    6791    my $class = shift (@_);
    6892    my $self = new BasClas($class, @_);
     93
     94     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     95     my $option_list = $self->{'option_list'};
     96     push( @{$option_list}, $options );
    6997   
    7098    my ($metaname, $title, $sortname, $list);
  • trunk/gsdl/perllib/classify/Phind.pm

    r3536 r3540  
    3131# Type "classinfo.pl Phind" at the command line for a summary.
    3232
     33# 12/05/02 Added usage datastructure - John Thompson
     34
    3335package Phind;
    3436
     
    8284}
    8385
     86my $arguments =
     87[ {     'name' => "text",
     88    'desc' => "The text used to build the phrase hierarchy (default: 'section:Title,section:text').",
     89    'type' => "string",
     90    'reqd' => "no" } ,
     91{   'name' => "title",
     92    'desc' => "The metadata field used to describe each document (default: 'Title').",
     93    'type' => "metadata",
     94    'reqd' => "no" } ,
     95{   'name' => "button",
     96    'desc' => "The label for the classifier screen and button in navigation bar (default: 'Phrase').",
     97    'type' => "string",
     98    'reqd' => "no" } ,
     99{   'name' => "language",
     100    'desc' => "Language or languages to use building hierarchy. Languages are identified by two-letter country codes like en (English), es (Spanish), and fr (French). Language is a regular expression, so 'en|fr' (English or French) and '..' (match any language) are valid (default: 'en').",
     101    'type' => "language",
     102    'reqd' => "no" } ,
     103{   'name' => "savephrases",
     104    'desc' => "If set, the phrase infomation will be stored in the given file as text. It is probably a good idea to use an absolute path (default: not set).",
     105    'type' => "string",
     106    'reqd' => "no" } ,
     107{   'name' => "suffixmode",
     108    'desc' => "The smode parameter to the phrase extraction program. A value of 0 means that stopwords are ignored, and of 1 means that stopwords are used (default: 1).",
     109    'type' => "int",
     110    'reqd' => "no" } ,
     111{   'name' => "thesaurus",
     112    'desc' => "Name of a thesaurus stored in Phind format in the collection's etc directory (default: not set).",
     113    'type' => "string",
     114    'reqd' => "no" } ,
     115{   'name' => "untidy",
     116    'desc' => "Don't remove working files.",
     117    'type' => "flag",
     118    'reqd' => "no" }
     119];
     120
     121my $options =
     122{   'name'     => "Phind",
     123    'desc'     => "The Phind clasifier plugin.",
     124    'inherits' => "Yes",
     125    'args'     => $arguments };
     126
    84127sub print_usage {
    85128    print STDERR "
     
    114157                   (default: 1)
    115158
    116    -thesaurus Name Name of a thesaurus stored in phind format in the
     159   -thesaurus Name Name of a thesaurus stored in Phind format in the
    117160                   collection's etc directory.
    118161                   (default: not set)
     
    136179    my $class = shift (@_);
    137180    my $self = new BasClas($class, @_);
     181
     182     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     183     my $option_list = $self->{'option_list'};
     184     push( @{$option_list}, $options );
    138185
    139186    my $out = $self->{'outhandle'};
     
    283330   
    284331    # Extract the text from every section
    285     # (In Phind, document:text and section:text are equivalent)
     332    # (In phind, document:text and section:text are equivalent)
    286333    if ($field eq "text") {
    287334        $data = "";
     
    331378#
    332379# When get_classify_info is called, the clauses and docs.txt files have
    333 # already been constructed in the phind directory.  This function will
     380# already been constructed in the Phind directory.  This function will
    334381# translate them into compressed, indexed MGPP files that can be read by
    335382# the phindcgi script.  It will also register our classifier so that it
     
    355402    }
    356403
    357     # Construct Phind indexes
     404    # Construct phind indexes
    358405    my $suffixmode = $self->{'suffixmode'};
    359406    my ($command, $status);
     
    363410    print $out "\nExtracting vocabulary and statistics\n" if $verbosity;
    364411    &extract_vocabulary($self);
    365 
     412 
    366413    # Use the suffix program to generate the phind/phrases file
    367414    print $out "\nExtracting phrases from processed text (with suffix)\n" if $verbosity;
    368415    &execute("suffix \"$phinddir\" $suffixmode $verbosity", $verbosity, $out);
    369 
    370416
    371417    # check that we generated some files. It's not necessarily an error if
     
    375421    print $out "\nNo phrases found for Phind classifier!\n";
    376422    return;
    377     }
     423    }   
    378424
    379425    # Create the phrase file and put phrase numbers in phind/phrases
     
    445491    return &convert_gml_to_tokens_EN($text);
    446492    }
     493
    447494    if ($language_exp =~ /zh/) {
    448495    return &convert_gml_to_tokens_ZH($text);
    449     }
     496    } 
    450497   
    451498    $_ = $text;
     
    477524    # 2. Split the remaining text into space-delimited tokens
    478525
    479     # Convert entities to their UTF8 equivalents
    480     s/&([^;]+);/&ghtml::getcharequiv($1,1)/gse;
     526    # Convert any HTML special characters (like &quot;) to their UTF8 equivalent
     527    s/&([^;]+);/&unicode::ascii2utf8(\&ghtml::getcharequiv($1,1))/gse;
    481528
    482529    # Split text at word boundaries
     
    541588    return $_;
    542589}
     590
    543591# A version of convert_gml_to_tokens that is fine-tuned to the English language.
    544592
     
    641689    if ($status != 0) {
    642690    print STDERR "Phind - Error executing '$command': $!\n";
    643     exit($status); # this causes the build to fail...
     691    exit($status);  # this causes the build to fail...
    644692    }
    645693}
  • trunk/gsdl/perllib/classify/SectionList.pm

    r2022 r3540  
    2828# itself
    2929
     30# 12/05/02 Added usage datastructure - John Thompson
     31
    3032package SectionList;
    3133
     
    3638    @ISA = ('List');
    3739}
     40
     41my $arguments =
     42[ {     'name' => "metadata",
     43    'desc' => "Metadata field used for classification. List will be sorted by this element.",
     44    'type' => "metadata",
     45    'reqd' => "yes" } ,
     46{   'name' => "buttonname",
     47    'desc' => "Button name for this classification. Defaults to metadata name.",
     48    'type' => "string",
     49    'reqd' => "no" } ,
     50{   'name' => "sort",
     51    'desc' => "Sort documents in list by this metadata field. By default it will sort by Metaname, or (if this is not set) in build (random) order.",
     52    'type' => "string",
     53    'reqd' => "no" }
     54];
     55
     56my $options =
     57{   'name'     => "SectionList",
     58    'desc'     => "Same as List classifier but includes all sections of document (excluding top level) rather than just top level document itself.",
     59    'inherits' => "Yes",
     60    'args'     => $arguments };
    3861
    3962sub print_usage {
     
    6083    my $class = shift (@_);
    6184    my $self = new List($class, @_);
     85
     86     # 14-05-02 To allow for proper inheritance of arguments - John Thompson
     87     my $option_list = $self->{'option_list'};
     88     push( @{$option_list}, $options );
    6289 
    6390    return bless $self, $class;
Note: See TracChangeset for help on using the changeset viewer.