Changeset 1279 for branches


Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

Location:
branches/New_Config_Format-branch/gsdl
Files:
13 deleted
147 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/bin/script/buildcol.pl

    r1031 r1279  
    5454    print STDERR "   -keepold              will not destroy the current contents of the\n";
    5555    print STDERR "                         building directory\n";
    56     print STDERR "   -allclassifications   Don't remove empty classifications\n\n";
     56    print STDERR "   -allclassifications   Don't remove empty classifications\n";
     57    print STDERR "   -create_images        Attempt to create default images for new\n";
     58    print STDERR "                         collection. This relies on the Gimp being\n";
     59    print STDERR "                         installed along with relevant perl modules\n";
     60    print STDERR "                         to allow scripting from perl\n\n";
    5761}
    5862
     
    6165{
    6266    my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
    63     $debug, $mode, $indexname, $keepold, $allclassifications);
     67    $debug, $mode, $indexname, $keepold, $allclassifications,
     68    $create_images);
    6469    if (!parsargv::parse(\@ARGV,
    6570             'verbosity/\d+/2', \$verbosity,
     
    7277             'index/.*/', \$indexname,
    7378             'keepold', \$keepold,
    74              'allclassifications', \$allclassifications)) {
     79             'allclassifications', \$allclassifications,
     80             'create_images', \$create_images)) {
    7581    &print_usage();
    7682    die "\n";
     
    8591    # read the configuration file
    8692    $textindex = "section:text";
    87     $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
     93    $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
    8894    if (-e $configfilename) {
    8995    $collectcfg = &colcfg::read_collect_cfg ($configfilename);
     
    101107    }
    102108   
     109    # create default images if required
     110    if ($create_images) {
     111    my $collection_name = $collection;
     112    $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}
     113    if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
     114   
     115    &create_images ($collection_name);
     116    }
     117
    103118    # fill in the default archives and building directories if none
    104119    # were supplied, turn all \ into / and remove trailing /
     
    177192}
    178193
    179 
     194sub create_images {
     195    my ($collection_name) = @_;
     196
     197    my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
     198    if (!-e $image_script) {
     199    print STDERR "WARNING: Image making script ($image_script) could not be found\n";
     200    print STDERR "         Default images will not be generated\n\n";
     201    return;
     202    }
     203
     204    my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
     205
     206    &util::mk_all_dir ($imagedir);
     207
     208    # create the images
     209    system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
     210    system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
     211
     212    # update the collect.cfg configuration file (this will need
     213    # to be changed when the config file format changes)
     214    if (!open (CFGFILE, $configfilename)) {
     215    print STDERR "WARNING: Couldn't open config file ($configfilename)\n";
     216    print STDERR "         for updating so collection images may not be linked correctly\n";
     217    return;
     218    }
     219
     220    my $line = ""; my $file = "";
     221    my $found = 0; my $foundsm = 0;
     222    while (defined ($line = <CFGFILE>)) {
     223    if ($line =~ /collectionmeta\s+iconcollection\s+/) {
     224        $line = "collectionmeta iconcollection _httprefix_/collect/$collection/images/$collection.gif\n";
     225        $found = 1;
     226    } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
     227        $line = "collectionmeta iconcollectionsmall _httprefix_/collect/$collection/images/${collection}sm.gif\n";
     228        $foundsm = 1;
     229    }
     230    $file .= $line;
     231    }
     232    close CFGFILE;
     233
     234    $file .= "collectionmeta iconcollection _httprefix_/collect/$collection/images/$collection.gif\n" if !$found;
     235    $file .= "collectionmeta iconcollectionsmall _httprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
     236
     237    if (!open (CFGFILE, ">$configfilename")) {
     238    print STDERR "WARNING: Couldn't open config file ($configfilename)\n";
     239    print STDERR "         for updating so collection images may not be linked correctly\n";
     240    return;
     241    }
     242    print CFGFILE $file;
     243    close CFGFILE;
     244}
  • branches/New_Config_Format-branch/gsdl/bin/script/gimp/flash_button.pl

    r1037 r1279  
    5555
    5656sub print_usage {
    57     print STDERR "\n  usage: $0 [options] macrofile\n\n";
     57    print STDERR "\n  usage: $0 [options]\n\n";
    5858    print STDERR "  options:\n";
    5959    print STDERR "   -cfg_file file        configuration file containing one or more\n";
     
    136136    chomp $image_dir;
    137137    }
     138
     139    # replace any '\n' occurring in text with carriage return
     140    $text =~ s/\\n/\n/gi;
    138141   
    139142    if ($cfg_file =~ /\w/) {
  • branches/New_Config_Format-branch/gsdl/bin/script/gimp/title_icon.pl

    r1037 r1279  
    4646
    4747
    48 my ($cfg_file, $imagefile, $width, $height, $imageheight, $stripecolor, $stripewidth,
    49     $stripe_alignment, $i_transparency, $text, $text_alignment, $filename, $textspace_x,
    50     $textspace_y, $bgcolor, $fontcolor, $fontsize, $minfontsize, $fontname,
    51     $fontweight, $fontslant, $fontwidth, $fontspacing, $image_dir);
     48local ($cfg_file, $size, $imagefile, $width, $height, $imageheight, $stripecolor, $stripewidth,
     49       $stripe_alignment, $i_transparency, $text, $text_alignment, $filename, $textspace_x,
     50       $textspace_y, $bgcolor, $fontcolor, $fontsize, $minfontsize, $fontname,
     51       $fontweight, $fontslant, $fontwidth, $fontspacing, $image_dir, $dont_wrap);
    5252
    5353sub print_usage {
     
    5757    print STDERR "                          sets of the following options - use to create\n";
    5858    print STDERR "                          batches of images\n";
     59    print STDERR "   -size number           the overall size ratio of the image (i.e. a size\n";
     60    print STDERR "                          of 2 will create an image twice the default size)\n";
    5961    print STDERR "   -image_dir directory   directory to create images in [`pwd`]\n";
    6062    print STDERR "                          this should be full path to existing directory\n";
     
    8385    print STDERR "   -fontslant             [r]\n";
    8486    print STDERR "   -fontwidth             [*]\n";
    85     print STDERR "   -fontspacing           [*]\n\n";
     87    print STDERR "   -fontspacing           [*]\n";
     88    print STDERR "   -dont_wrap             don't attempt to wrap text\n\n";
    8689}
    8790
     
    8992    $image_dir = "./";
    9093    $imagefile = "";
    91     $width = 150;
    92     $height = 44;
    93     $imageheight = 110;
     94    $width = int (150 * $size);
     95    $height = int (44 * $size);
     96    $imageheight = int (110 * $size);
    9497    $stripecolor = $gsdl_green;
    95     $stripewidth = 40;
     98    $stripewidth = int (40 * $size);
    9699    $stripe_alignment = "left";
    97100    $i_transparency = 60;
     
    99102    $text_alignment = "left";
    100103    $filename = "";
    101     $textspace_x = 3;
    102     $textspace_y = 3;
     104    $textspace_x = int (3 * $size);
     105    $textspace_y = int (3 * $size);
    103106    $bgcolor = $gsdl_green;
    104107    $fontcolor = $black;
    105     $fontsize = 17;
    106     $minfontsize = 10;
     108    $fontsize = int (17 * $size);
     109    $minfontsize = int (10 * $size);
    107110    $fontname = "lucida";
    108111    $fontweight = "medium";
     
    116119    if (!parsargv::parse(\@ARGV,
    117120             'cfg_file/.*/', \$cfg_file,
     121             'size/\d+/1', \$size,
    118122             'image_dir/.*/./', \$image_dir,
    119123             'imagefile/.*/', \$imagefile,
     
    138142             'fontslant/.*/r', \$fontslant,
    139143             'fontwidth/.*/*', \$fontwidth,
    140              'fontspacing/.*/*', \$fontspacing)) {
     144             'fontspacing/.*/*', \$fontspacing,
     145             'dont_wrap', \$dont_wrap)) {
    141146    &print_usage();
    142147    die "title_icon.pl: incorrect options\n";
     
    148153    chomp $image_dir;
    149154    }
    150    
     155
    151156    if ($cfg_file =~ /\w/) {
    152157   
     
    177182
    178183sub produce_image {
     184
     185    &adjust_args ();
     186    &wrap_text () unless $dont_wrap;
    179187
    180188    my $use_image = 0;
     
    338346    return 1;
    339347}
     348
     349# adjust arguments that are effected by the size argument
     350sub adjust_args {   
     351
     352    if ($size != 1) {
     353    my @size_args = ('width', 'height', 'imageheight', 'stripewidth',
     354             'textspace_x', 'textspace_y', 'fontsize', 'minfontsize');
     355    foreach $arg (@size_args) {
     356        $$arg = int ($$arg * $size);
     357    }
     358    }
     359}
     360
     361sub wrap_text {
     362
     363    # don't wrap text if it already contains carriage returns
     364    return if $text =~ /\n/;
     365
     366    # the following assumes that all words are less than $wrap_length long
     367    my $wrap_length = 14;
     368
     369    my $new_text = "";
     370    while (length ($text) >= $wrap_length) {
     371    my $line = substr ($text, 0, $wrap_length);
     372    $text =~ s/^$line//;
     373    $line =~ s/\s([^\s]*)$/\n/;
     374    $text = $1 . $text;
     375    $new_text .= $line;
     376    }
     377    $new_text .= $text;
     378    $text = $new_text;
     379}
    340380   
    341381sub query {
  • branches/New_Config_Format-branch/gsdl/bin/script/gsw3mir.pl

    r845 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!/usr/bin/perl -w
    22
    33###########################################################################
  • branches/New_Config_Format-branch/gsdl/bin/script/import.pl

    r1031 r1279  
    5656    print STDERR "                          directory -- use with care\n";
    5757    print STDERR "   -gzip                  Use gzip to compress resulting gml documents\n";
     58    print STDERR "                          (don't forget to include ZIPPlug in your plugin\n";
     59    print STDERR "                          list when building from compressed documents)\n";
    5860    print STDERR "   -maxdocs number        Maximum number of documents to import\n";
    5961    print STDERR "   -groupsize number      Number of GML documents to group into one file\n";
     
    135137
    136138    # load all the plugins
    137     $pluginfo = &plugin::load_plugins ($plugins);
     139    $pluginfo = &plugin::load_plugins ($plugins, $verbosity);
    138140    if (scalar(@$pluginfo) == 0) {
    139141    print STDERR "No plugins were loaded.\n";
  • branches/New_Config_Format-branch/gsdl/bin/script/mkcol.pl

    r1031 r1279  
    5151    print STDERR "   -title text         The title for the collection\n";
    5252    print STDERR "   -about text         The about text for the collection\n";
    53     print STDERR "   -plugins list       Space separated list of perl plugin modules to use\n";
     53    print STDERR "   -plugin text        perl plugin module to use (there may be multiple\n";
     54    print STDERR "                       plugin entries\n";
    5455    print STDERR "   -refine   list      Space separated list of perl plugin modules to use\n";
    5556
     
    106107        $line =~ s/\*\*title\*\*/$title/g;
    107108        $line =~ s/\*\*about\*\*/$about/g;
    108         $line =~ s/\*\*plugins\*\*/$plugins/g;
     109        $line =~ s/\*\*plugins\*\*/$pluginstring/g;
    109110        $line =~ s/\*\*refine\*\*/$refine/g;
    110111
     
    119120
    120121
    121 my (@indexes, @indexestext);
     122my (@indexes, @indexestext, @plugin);
    122123
    123124# get and check options
     
    132133               'title/.+/', \$title,
    133134               'about/.+/', \$about,
    134                'plugins/.+/GMLPlug TEXTPlug ArcPlug RecPlug/', \$plugins,
     135               'plugin/.+', \@plugin,
    135136               'refine/.+/', \$refine
    136137               )) {
     
    138139    die "\n";
    139140}
    140    
     141
     142# load default plugins if none were on command line   
     143if (!scalar(@plugin)) {
     144    @plugin = (GMLPlug,TEXTPlug,ArcPlug,RecPlug);
     145}
     146
    141147# get and check the collection name
    142148($collection) = @ARGV;
     
    195201}
    196202
     203$pluginstring = "";
     204foreach $plugin (@plugin) {
     205    $pluginstring .= "plugin         $plugin\n";
     206}
    197207
    198208# make sure the model collection exists
  • branches/New_Config_Format-branch/gsdl/bin/script/newsrc.pl

    r546 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!/usr/bin/perl -w
    22
    33###########################################################################
  • branches/New_Config_Format-branch/gsdl/bin/script/togb.pl

    r630 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!/usr/bin/perl -w
    22
    33###########################################################################
     
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'extended', \$extended,
     38             'iso_8859_1', \$iso_8859_1,
    3939             'gb', \$gb)) {
    4040    print STDERR "\n  usage: $0 [options]\n\n";
    4141    print STDERR "  options:\n";
    42     print STDERR "   -utf8     input is in utf-8 or unicode (default)\n";
    43     print STDERR "   -extended input is in extended ascii\n";
    44     print STDERR "   -gb       input is in GB or GBK\n\n";
     42    print STDERR "   -unicode    input is in utf-8 or unicode (default)\n";
     43    print STDERR "   -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n";
     44    print STDERR "   -gb         input is in GB or GBK (simplified Chinese)\n\n";
    4545    die "\n";
    4646}
    4747
    4848$encoding = "utf8" if $unicode;
    49 $encoding = "extended" if $extended;
     49$encoding = "iso_8859_1" if $iso_8859_1;
    5050$encoding = "gb" if $gb;
    5151
  • branches/New_Config_Format-branch/gsdl/bin/script/touc.pl

    r630 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!/usr/bin/perl -w
    22
    33###########################################################################
     
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'extended', \$extended,
     38             'iso_8859_1', \$iso_8859_1,
     39             'iso_8859_6', \$iso_8859_6,
     40             'windows_1256', \$windows_1256,
    3941             'gb', \$gb)) {
    4042    print STDERR "\n  usage: $0 [options]\n\n";
    4143    print STDERR "  options:\n";
    42     print STDERR "   -utf8     input is in utf-8 or unicode (default)\n";
    43     print STDERR "   -extended input is in extended ascii\n";
    44     print STDERR "   -gb       input is in GB or GBK\n\n";
     44    print STDERR "   -unicode      input is in utf-8 or unicode (default)\n";
     45    print STDERR "   -iso_8859_1   input is in extended ascii (ISO-8859-1 Latin 1)\n";
     46    print STDERR "   -iso_8859_6   input is in 8 bit Arabic (ISO-8859-6)\n";
     47    print STDERR "   -windows_1256 input is in Windows 1256 (Arabic)\n";
     48    print STDERR "   -gb           input is in GB or GBK (simplified Chinese)\n\n";
    4549    die "\n";
    4650}
    4751
    4852$encoding = "utf8" if $unicode;
    49 $encoding = "extended" if $extended;
     53$encoding = "iso_8859_1" if $iso_8859_1;
     54$encoding = "iso_8859_6" if $iso_8859_6;
     55$encoding = "windows_1256" if $windows_1256;
    5056$encoding = "gb" if $gb;
    5157
  • branches/New_Config_Format-branch/gsdl/bin/script/toutf8.pl

    r630 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!/usr/bin/perl -w
    22
    33###########################################################################
     
    3636if (!parsargv::parse(\@ARGV,
    3737             'unicode', \$unicode,
    38              'extended', \$extended,
     38             'iso_8859_1', \$iso_8859_1,
     39             'iso_8859_6', \$iso_8859_6,
     40             'windows_1256', \$windows_1256,
    3941             'gb', \$gb)) {
    4042    print STDERR "\n  usage: $0 [options]\n\n";
    4143    print STDERR "  options:\n";
    42     print STDERR "   -utf8     input is in utf-8 or unicode (default)\n";
    43     print STDERR "   -extended input is in extended ascii\n";
    44     print STDERR "   -gb       input is in GB or GBK\n\n";
     44    print STDERR "   -unicode      input is in utf-8 or unicode (default)\n";
     45    print STDERR "   -iso_8859_1   input is in extended ascii (ISO-8859-1 Latin 1)\n";
     46    print STDERR "   -iso_8859_6   input is in 8 bit Arabic (ISO-8859-6)\n";
     47    print STDERR "   -windows_1256 input is in Windows 1256 (Arabic)\n";
     48    print STDERR "   -gb           input is in GB or GBK (simplified Chinese)\n\n";
    4549    die "\n";
    4650}
    4751
    4852$encoding = "utf8" if $unicode;
    49 $encoding = "extended" if $extended;
     53$encoding = "iso_8859_1" if $iso_8859_1;
     54$encoding = "iso_8859_6" if $iso_8859_6;
     55$encoding = "windows_1256" if $windows_1256;
    5056$encoding = "gb" if $gb;
    5157
  • branches/New_Config_Format-branch/gsdl/bin/script/translate.pl

    r1062 r1279  
    5454        'Ouml' => chr (214),
    5555        'Uuml' => chr (220),
    56         'szlig' => chr (223));
     56        'szlig' => chr (223),
     57        'aacute' => chr (225),
     58        'eacute' => chr (233),
     59        'iacute' => chr (237),
     60        'oacute' => chr (243),
     61        'uacute' => chr (250),
     62        'Aacute' => chr (193),
     63        'Eacute' => chr (201),
     64        'Iacute' => chr (205),
     65        'Oacute' => chr (211),
     66        'Uacute' => chr (218),
     67        'ntilde' => chr (241),
     68        'Ntilde' => chr (209));
    5769
    5870my $hand_made = 0;
     
    137149    # process all the images
    138150
    139     $dmfile =~ s/\n\#\#\s*\"([^\"]*)\"\s*\#\#\s*([^\s\#]*)\s*\#\#\s*([^\s\#]*)\s*\#\#(.*?)(?=(\n\#|\s*\Z))/
    140     &process_image ($1, $2, $3, $4)/esg;
     151    $dmfile =~ s/\n\#\#\s*\"([^\"]*)\"\s*\#\#\s*([^\s\#]*)\s*\#\#\s*([^\s\#]*)\s*\#\#(.*?)(?=(\n\#|\s*\Z))/&process_image ($1, $2, $3, $4)/esg;
    141152
    142153    # add language parameter to each macro
     
    150161
    151162    my $origtext = $text;
     163    $text =~ s/&(\d{3,4});/chr($1)/ge;
    152164    $text =~ s/&([^;]*);/$rmap{$1}/g;
    153165
     
    204216    } elsif ($image_type eq "green_title") {
    205217
    206     # generate green title image
     218    # read the width if it is specified in $image_macros
     219        my ($width) = $image_macros =~ /_width${image_name}x?_\s*[^\{]*\{(\d+)\}/;
     220        $width = 200 unless ($width);
     221
     222        # generate green title image
    207223    my $options = "-text \"$text\" -filename ${image_name}.gif -image_dir $image_dir";
    208     $options .= " -width 200 -height 57 -stripe_alignment right -text_alignment right";
     224    $options .= " -width $width -height 57 -stripe_alignment right -text_alignment right";
    209225    $options .= " -fontsize 26 -fontweight bold";
    210226    `$ENV{'GSDLHOME'}/bin/script/gimp/title_icon.pl $options`;
    211227
    212     # get width of new images and edit width macro
    213     # we'll do this even though title_icon.pl will always create images of the
    214     # width specified (200)
    215     my $fullfilename = &util::filename_cat ($image_dir, "${image_name}.gif");
    216     &process_width_macro ($fullfilename, $image_name, \$image_macros);
     228    # get width of resulting image and edit _width..._ macro in $image_macros
     229        # (no longer needed since we always resize to the width read from $image_macros.)
     230        # my $fullfilename = &util::filename_cat ($image_dir, "${image_name}.gif");
     231    # &process_width_macro ($fullfilename, $image_name, \$image_macros);
    217232
    218233    } elsif ($image_type eq "hand_made") {
     
    226241    }
    227242
    228     return "\n\#\# \"$text\" \#\# $image_type \#\# $image_name \#\#$image_macros";
     243    return "\n\#\# \"$origtext\" \#\# $image_type \#\# $image_name \#\#$image_macros";
    229244}
    230245
     
    233248
    234249    my $img_info = &get_img_info ($filename);
    235     $$image_macros =~ s/(_width${image_name}x_\s*(?:\[[^\]]*\])?\s*\{)(\d+)(\})/$1$img_info->{'width'}$3/s;
     250    $$image_macros =~ s/(_width${image_name}x?_\s*(?:\[[^\]]*\])?\s*\{)(\d+)(\})/$1$img_info->{'width'}$3/s;
    236251}
    237252
  • branches/New_Config_Format-branch/gsdl/cgi-bin/gsdlsite.cfg

    r1038 r1279  
    33
    44# points to the GSDLHOME directory
    5 gsdlhome    /home/gsdl
     5gsdlhome    **GSDLHOME**
    66
    77# this is the http address of GSDLHOME
    8 httpprefix  /gsdl
     8# if your webservers DocumentRoot is set to $GSDLHOME
     9# then httpprefix can remain commented out
     10#httpprefix  /gsdl
    911
    1012# this is the http address of the directory which
    1113# contains the images for the interface.
    12 httpimg     /gsdl/images
     14# if your webservers DocumentRoot is set to $GSDLHOME
     15# then httpimg will be /images
     16httpimg     /images
    1317
    1418# should contain the http address of this cgi script. This
  • branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_buildcol.pl

    r841 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!perl -w
    22
    33###########################################################################
     
    3232#   an already running cgi program.
    3333
    34 use Fcntl ':flock';
     34package webpage_buildcol;
     35
    3536use File::Basename;
     37use GSDLHOME;
     38use gflock;
    3639
    3740my $args;
     
    4649        my ($variable,$assignment) = ($1,$3);
    4750        $args->{$variable} = $assignment;
    48 
    49         $ENV{'GSDLHOME'} = $assignment if ($variable eq "gsdlhome");
    50         $ENV{'GSDLOS'} = $assignment if ($variable eq "gsdlos");
    51     }
    52     }
    53 
    54     if (defined($ENV{'GSDLHOME'}))
    55     {
    56     if (!defined($ENV{'GSDLOS'}))
    57     {
    58         $ENV{'GSDLOS'} = $^O; # special perl variable set to OS
    59         ##### Need to check to see what this is set to
    60         ##### under Windows
    61     }
    62 
    63     $ENV{'PATH'} .= ":$ENV{'GSDLHOME'}/bin/script";
    64     $ENV{'PATH'} .= ":$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
    65    
    66     unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    67     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    68     }
    69     else
    70     {
    71     print STDERR "Environment variable GSDLHOME not set\n";
    72     exit 1;
     51    }
    7352    }
    7453}
     
    8463    if (open(TMPOUT,">$full_tmpname"))
    8564    {
    86     if (flock(TMPOUT,LOCK_EX))
    87     {
     65    if (&gflock::lock (webpage_buildcol::TMPOUT)) {
    8866        print TMPOUT $text;
    8967        close(TMPOUT);
    90         flock(TMPOUT,LOCK_UN);
    91     }
    92     else
    93     {
     68        &gflock::unlock (webpage_buildcol::TMPOUT);
     69
     70    } else {
    9471        # Problem locking file
    9572        my $mess = "Unable to lock temporary communication file:";
     
    11895    my $full_importname = &util::filename_cat($full_dirname,"import");
    11996
    120     my $log_filename = &util::filename_cat($ENV{'GSDLHOME'},"log","$dirname.bld");
     97    my $log_filename = &util::filename_cat($ENV{'GSDLHOME'},"etc","$dirname.bld");
    12198    if (!open (LOGOUT, ">$log_filename"))
    12299    {
     
    140117    if ($copy_dir =~ m/^yes$/i)
    141118    {
    142         my $download_cmd = "";
     119        my $download_cmd = "perl " . &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script");
    143120        my $file_or_url  = $building_cfg_text->{'building'}->{'fileorurl'};
    144121       
     
    146123        {
    147124        # run urlcopy.pl to download files
    148         $download_cmd = "urlcopy.pl ";
     125        $download_cmd = " urlcopy.pl ";
    149126        my @urls = split("\n",$input_dir);
    150127        my $u;
     
    161138        $input_dir =~ s/^\s+//;
    162139        $input_dir =~ s/\s+$//;
    163         $download_cmd = "filecopy.pl $input_dir";
     140        $download_cmd = " filecopy.pl $input_dir";
    164141        }
    165142       
     
    194171    }
    195172
    196     if ($copy_dir =~ /^no$/i)
     173    if ((defined $copy_dir) && ($copy_dir =~ /^no$/i))
    197174    {
    198175    # link it
     
    221198    {
    222199    # Import operation
    223     my $import_cmd = "import.pl -removeold $dirname";
     200    my $import_cmd = "perl ";
     201    $import_cmd .= &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "import.pl");
     202    $import_cmd .= " -removeold $dirname";
    224203   
    225204    if(!open(IMPORTOUT,"$import_cmd 2>&1 |"))
     
    254233
    255234    # Build operation
    256     my $build_cmd = "";
     235    my $build_cmd = "perl " .
     236        &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "buildcol.pl");
    257237    if (($do_import eq "true")
    258238        || (($do_import eq "false") && (-e $full_archivename)))
    259239    {
    260         $build_cmd = "buildcol.pl $dirname";
     240        $build_cmd .= " $dirname";
    261241    }
    262242    else
    263243    {
    264 
    265         $build_cmd = "buildcol.pl";
    266244        $build_cmd .= " -archivedir $full_importname";
    267245        $build_cmd .= " -cachedir $full_archivename";
     
    354332    = &util::filename_cat($full_dirname,"etc","collect.cfg");
    355333
    356     if (open(CFGIN,"<$cfg_filename"))
    357     {
    358     if (flock(CFGIN,LOCK_EX))
    359     {
    360         # do requested stages for building
    361         my $result = do_build($full_dirname,$dirname,$args);
    362         flock(CFGIN,LOCK_UN);
    363         close(CFGIN);
    364         return if ($result ne "success");
    365     }
    366     else
    367     {
    368         # Problem locking file
    369         my $mess = "Unable to lock configuration file: $cfg_filename";
    370         print STDERR "$mess\n";
    371         return;
    372     }
    373     }
    374 
    375 #    my $mess_url = "$args->{'httpbuild'}&bca=mess&bc1dirname=$dirname";
    376 #    print "Location: $mess_url&head=_headdone_&mess=_messdonebuildcol_\n\n";
    377 #    print "done\n"; # in tmp file
     334    # do requested stages for building
     335    my $result = do_build($full_dirname,$dirname,$args);
     336    return if ($result ne "success");
     337
     338    my $mess_url = "$args->{'httpbuild'}&bca=mess&bc1dirname=$dirname";
     339    print "Location: $mess_url&head=_headdone_&mess=_messdonebuildcol_\n\n";
     340    print "done\n"; # in tmp file
    378341}
    379342
    380343&main();
    381 
    382 
    383 
    384 
    385 
  • branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_buildstatus.pl

    r724 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!perl -w
    22
    33###########################################################################
     
    3131use CGI;
    3232use GSDLHOME;
    33 use Fcntl ':flock';
    3433
    3534require util;
     
    6665    }
    6766
    68     my $full_tmpname
    69     = &util::filename_cat($ENV{'GSDLHOME'},"tmp",$tmpname);
     67    my $full_tmpname = &util::filename_cat($ENV{'GSDLHOME'},"tmp");
     68
     69    if (!-d $full_tmpname) {
     70    mkdir ($full_tmpname, 0777);
     71    }
     72
     73    $full_tmpname = &util::filename_cat($full_tmpname, $tmpname);
    7074
    7175    if (open(TMPIN,"<$full_tmpname"))
     
    8185    $bc1finished = 1 if ($mess eq "Done");
    8286    $bc1finished = -1 if ($mess =~ m/^Error:/);
     87    # escape backslashes so they don't vanish from printed filenames
     88    $mess =~ s/\\/\\\\/g;
    8389    &webpageutil::status_location($args,$mess,$tmpname,$bc1finished);
    8490
     
    96102
    97103&main();
    98 
    99 
    100 
    101 
    102 
  • branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_delcol.pl

    r841 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!perl -w
    22
    33###########################################################################
     
    3030use CGI;
    3131use GSDLHOME;
    32 use Fcntl ':flock';
    3332
    3433require util;
     
    7372    &util::rm_r($col_dir);
    7473
    75     # delete from collections.txt
    76     my $collist_filename
    77         = &util::filename_cat($ENV{'GSDLHOME'},"etc","collections.txt");
    78     if (open(CLIN,"<$collist_filename"))
    79     {
    80         if (flock(CLIN,LOCK_EX))
    81         {
    82         my @keep_dirnames = ();
    83         while (defined($line=<CLIN>))
    84         {
    85             chop $line;
    86             push(@keep_dirnames,$line) if ($line ne $dirname);
    87         }
    88         close(CLIN);
    89 
    90         if (open(CLIN,">$collist_filename"))
    91         {
    92             print CLIN join("\n",@keep_dirnames), "\n";
    93         }
    94 
    95         flock(CLIN,LOCK_UN);
    96         close(CLIN);
    97         }
    98         else
    99         {
    100         # problem locking file
    101         my $mess = "Unable to lock collection list configuration";
    102         $mess .= " file: $collist_filename";
    103         &webpageutil::error_location($args,$mess);
    104         return;
    105         }
    106     }
    107     else
    108     {
    109         # problem opening file for reading in
    110 
    111         my $mess = "Unable to open for input the collection list";
    112         $mess .= " configuration file: $collist_filename";
    113         &webpageutil::error_location($args,$mess);
    114         return;
    115     }
    11674    }
    11775    elsif ($delete_area eq "import")
     
    14098    my $mess_url = "$args->{'httpbuild'}&bca=mess";
    14199    $mess_url .= "&bc1dirname=$dirname" if ($delete_area ne "all");
    142     print "Content: text/html\n\n $mess_url&head=_headdone_&mess=_messdonedelcol_\n\n";
     100    print "Location: $mess_url&head=_headdone_&mess=_messdonedelcol_\n\n";
    143101    return;
    144102
  • branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_editcol.pl

    r724 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!perl -w
    22
    33###########################################################################
     
    2828# This program is a webpage wrapper for saving an edited config file
    2929
     30package webpage_editcol;
     31
    3032use CGI;
    3133use GSDLHOME;
    32 use Fcntl ':flock';
     34use gflock;
    3335
    3436require util;
     
    7072    if (open(CFGOUT,">$cfg_filename"))
    7173    {
    72     if (flock(CFGOUT,LOCK_EX))
    73     {
     74    if (&gflock::lock (webpage_editcol::CFGOUT)) {
    7475        my $cfg_text = $args->{'bc1cfgfile'};
    7576        print CFGOUT "$cfg_text";
    76         flock(CFGOUT,LOCK_UN);
     77        &gflock::unlock (webpage_editcol::CFGOUT);
    7778        close(CFGOUT);
    7879    }
  • branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_mkcol.pl

    r724 r1279  
    1 #!/usr/local/bin/perl5 -w
     1#!perl -w
    22
    33###########################################################################
     
    2828# This program is a webpage wrapper to the mkcol.pl process
    2929
     30package webpage_mkcol;
     31
    3032use CGI;
    3133use GSDLHOME;
    32 use Fcntl ':flock';
     34use gflock;
    3335
    3436require util;
     
    4143    my %args = ();
    4244   
     45    open (FILE, '>d:\gsdl\logout.txt') || die;
     46
    4347    foreach $p ($cgi->param())
    4448    {
    4549    $args{$p} = $cgi->param($p);
    46     }
    47 
     50
     51    print FILE "webpage_mkcol.pl - $p -> $args{$p}\n";
     52    }
     53
     54    close FILE;
    4855    return \%args;
    4956}
     
    8592    else
    8693    {
    87     # clean up input for heuristic that derives directory name for a new collection
     94    # clean up input for heuristic that derives directory name for a
     95    # new collection
    8896    $fullname =~ s/\s+/ /g;
    8997    $fullname =~ tr/[A-Z]/[a-z]/;
     
    149157    my $acronyms      = $args->{'bc1acronyms'};
    150158
    151     my $cmd = "mkcol.pl";
     159    my $cmd = "perl ";
     160    $cmd .= &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "mkcol.pl");
    152161    $cmd .= " -title   \"$fullname\"";
    153162    $cmd .= " -creator $contact_email";
    154163    $cmd .= " -about   \"$about_desc\"";
    155     $cmd .= " -plugins \"GMLPlug ${src_format}Plug ArcPlug RecPlug\"";
     164    $cmd .= " -plugin  \"GMLPlug\"";
     165    $cmd .= " -plugin  \"${src_format}Plug\"";
     166    $cmd .= " -plugin  \"ArcPlug\"";
     167    $cmd .= " -plugin  \"RecPlug\"";
    156168### $cmd .= " -refine  \"$refine_plugs\"";
    157169    $cmd .= " $unique_dirname";
     170
    158171    my $status = system($cmd);
    159172    $status /= 256;
     
    162175    {
    163176        # append copydir, file_or_url and input_dir to end of collect.cfg
     177        # we'll also append DocumentUseHTML if processing HTML docs
    164178        my $cfg_filename
    165179        = &util::filename_cat($ENV{'GSDLHOME'},"collect",$unique_dirname,
     
    167181        if (open(CFGAPP,">>$cfg_filename"))
    168182        {
    169         if (flock(CFGAPP,LOCK_EX))
    170         {
     183        if (&gflock::lock (webpage_mkcol::CFGAPP)) {
    171184            print CFGAPP "\n";
     185
     186            if ($src_format eq "HTML") {
     187            print CFGAPP "format\tDocumentUseHTML\ttrue\n\n";
     188            }
     189
    172190            print CFGAPP "building\tfileorurl\t$file_or_url\n";
    173191            print CFGAPP "building\tinputdir\t$input_dir\n";
    174192            print CFGAPP "building\tcopydir\t\t$copy_dir\n";
    175193            print CFGAPP "building\tingsdlarea\t$in_gsdl_area\n";
    176             flock(CFGAPP,LOCK_UN);
     194            &gflock::unlock (webpage_mkcol::CFGAPP);
    177195            close(CFGAPP);
    178196        }
     
    195213        return;
    196214        }
    197 
    198 
    199         # append dirname to end of collection config file
    200         my $collist_filename
    201         = &util::filename_cat($ENV{'GSDLHOME'},"etc","collections.txt");
    202         if (open(CLAPP,">>$collist_filename"))
    203         {
    204         if (flock(CLAPP,LOCK_EX))
    205         {
    206             print CLAPP "$unique_dirname\n";
    207             flock(CLAPP,LOCK_UN);
    208             close(CLAPP);
    209         }
    210         else
    211         {
    212             # problem locking file
    213             my $mess = "Unable to lock collection list";
    214             $mess .= " configuration file: $collist_filename";
    215             &webpageutil::error_location($args,$mess);
    216             close(CLAPP);
    217             return;
    218         }
    219         }
    220         else
    221         {
    222         # problem
    223         my $mess = "Unable to append to collection list";
    224         $mess .= " configuration file: $collist_filename";
    225         &webpageutil::error_location($args,$mess);
    226         return;
    227         }
    228        
    229215    }
    230216    else
     
    248234
    249235&main();
    250 
    251 
    252 
    253 
    254 
  • branches/New_Config_Format-branch/gsdl/collect/modelcol/etc/collect.cfg

    r815 r1279  
    66indexes        document:text
    77defaultindex   document:text
    8 plugin         GMLPlug
    9 plugin         TEXTPlug
    10 plugin         ArcPlug
    11 plugin         RecPlug
     8
     9**plugins**
    1210
    1311classify       AZList metadata=Title
    1412
    15 collectionmeta collectionname    "**collection**"
     13collectionmeta collectionname    "**title**"
     14collectionmeta iconcollection    ""
     15collectionmeta collectionextra   "**about**"
    1616collectionmeta .document:text    "documents"
  • branches/New_Config_Format-branch/gsdl/docs/TODO

    r1036 r1279  
    11
    22configuration/installation:
    3 
    4     - iconcollection fields in collect.cfg are dependant on httpprefix
    5 
    6     - set up setup.bash/setup.bat etc during configuration (so GSDLHOME
    7       is full path and script can be run from anywhere)
    83   
    9     - autoconfigure/InstallShield to ask where cgi-bin directory is?
    10 
    114unix:
    125
    13     - builddemo.sh and eveything in bin/script must be executable
    14 
    15     - maybe look at editing all perl scripts during configuration to
    16       get #!/usr/bin/... line pointing to correct place. definitely
    17       wants to default to /usr/bin/perl instead of /usr/local/bin/perl5
     6    - look into getting perl scripts to run correctly on any operating
     7          system (i.e. the #! stuff) -- those in cgi-bin particularly need
     8          to work on windows too (#!perl on windows, #!/use/bin/perl on
     9          linux) -- perlrun manpage
    1810
    1911    - add fastcgi to distribution
     
    2113windows:
    2214
    23     - use Installshield to create packages (maybe use 'package for the
    24       web') of:
    25       - full source code with building capability
    26       - server and cd-rom executables with building capability
    27       - server and cd-rom executables only
     15    - use Installshield to create windows packages - wait for George to
     16          decide what he's going to do about InstallShield
    2817
    29     - use same executable for both cd-rom executables
     18    - call server version of cd-rom executable 'standard' and other
     19          version 'backup' (or maybe 'gold' and 'silver'?)
    3020
    31     - version string in fnord.cpp should be changed when a new version
    32           is released - maybe the support.htm file too
     21    - fix up Local Library - somehow need to have option of old server
     22          and single-user (or Gold and Silver?) within same executable.
    3323
    34 write perl script for updating distribution
    3524
    3625tidy up text versions of macro files
    3726
    38 Write a perl script to go through a translated macro file and generate
    39 appropriate icons
    40 
    41 add german interface
     27look further into creating images containing non-latin1 characters
    4228
    4329create default collection images at build time with gimp script
    44 
    45 convert crappy old scheme gimp scripts to perl
    4630
    4731fix query caching - do caching in receptionist, finish off cross collection
     
    5337add to
    5438
    55 get remaining plugins tidied up and using passed in options - file
    56 extension option for all plugins
    57 
    58 get all plugins to use multiread object and throw away the specialized GB
    59 plugins
    60 
    6139write a gzip/bzip/zip/tar plugin and take any remaining gzip stuff out of
    6240rest of plugins
    6341
    64 tidy up classifiers, make collection-specific ones more object oriented,
    65 add section and compact options instead of using separate classifiers - get
    66 classifiers using same option passing code as plugins now use
     42look at speeding up acronym extraction code, add more options
    6743
    6844create a general classifier for doing stuff like NPepaList.pm does. should
     
    7046etc.
    7147
    72 sort out arrows/paging of browserclasses (for queryaction too!)
    73 -DocumentBottomArrows?? - want arrows at bottom as well as top of pages for
    74 gberg type collections
     48for gsdl-3.0
    7549
    76 tidy up automatic help text - need to work out type of collection at build
    77 time - get rid of the hacky HelpNoDocs, HelpBibDocs and HelpBookDocs config
    78 file entries
     50  - extend configuration file syntax - tidy up all the format stuff -
     51    replace DocumentImages, DocumentTitles and DocumentHeading with
     52    DocumentColumn stuff (develop New_Config_Format-branch CVS branch)
    7953
    80 fix up collection specific metadata - content negotiation??
     54  - tidy up classifiers, make collection-specific ones more object oriented
     55    (probably create classifier base class), add section and compact
     56    options instead of using separate classifiers - get classifiers using
     57    same option passing code as plugins now use
    8158
    82 append to init and error logs instead of overwriting
     59  - sort out arrows/paging of browserclasses (for queryaction too!)
     60    DocumentBottomArrows?? - want arrows at bottom as well as top of pages
     61    for gberg type collections
    8362
    84 implement a more robust way of dealing with the way browsers encode form
    85 arguments when you don't want them to - check that
    86 _decodedcompressedoptions_ has been set up correctly everywhere
     63  - tidy up automatic help text - need to work out type of collection at
     64    build time - get rid of the hacky HelpNoDocs, HelpBibDocs and
     65    HelpBookDocs config file entries
    8766
    88 extend configuration file syntax - tidy up all the format stuff
     67  - fix up collection specific metadata - content negotiation??
     68    "chapters" = "Kapiteln"
     69    "section titles" = "Sektionstiteln"
     70    "entire books" = "ganzen B&uuml;chern"
    8971
    90 replace DocumentImages, DocumentTitles and DocumentHeading with
    91 DocumentIcon (values of true, false and formatstring - true may be part of
    92 formatstring)
     72  - add ability to use a separate formatstring for each index when
     73    displaying query results
    9374
    94 fix up formattools - bug preventing nested If/Or - check that no 'here'
    95 pointers can run past 'end'
     75  - make interface languages and encodings easier to add. defining
     76    languages and encodings should be done in main.cfg (or collect.cfg for
     77    collection specific control). should also define:
     78      - which encodings a given language may use -- i.e. the preferences page
     79      shouldn't let you select an encoding that doesn't work with the
     80      currently selected language
     81      - which encoding is the default for a given language
     82      - which language is the default.
    9683
    97 get numwords (and maybe other stats?) working in collection info - maybe
    98 set as macros
    9984
    10085tidy things up so that get_collectinfo is only called once per collection
    101 (at init time for fastcgi) - it's currently being called all over the place
    102 
    103 get BrowseFilter EndResults option to accept -1
     86(at init time for fastcgi) - it's currently being called all over the
     87place. maybe load collection info into recpt.collectinfo structure. load in
     88as required for each collection so structure would slowly fill up with all
     89collections when using fastcgi
    10490
    10591add Language metadata to all documents by default??
    10692
    107 fix the way the interface language preference is implemented - should check
    108 somewhere that current language matches one of those specified in config
    109 file
     93usage page - Matthias Dalmeier
    11094
    111 bug when classification is empty. CL number still gets incremented so
    112 formatstrings don't match as expected
     95create a usage page from ftp logs (build on ftp_stats.pl script)
    11396
    114 usage page
    115 
    116 if document has only one level - e.g. TEXTPlug or HTMLPlug; top level type
    117 should be Invisible so that tables of contents aren't displayed
    118 
    119 add browserclasses to statusaction
    120 
    121 add ability to use a separate formatstring for each index when displaying
    122 query results
    123 
    124 rebuild fao1 and fao2 with fixed up version of HTMLPlug (no multiple copies
    125 of images
    126 
    127 rebuild Davids collections:
    128     - whist
    129     - musvid
    130     - ohist
     97tidy up ohist to remove need for public_html/ohist stuff
    13198
    13299WebPlug - optimize to run reasonably quickly? - Currently works (I think)
     
    135102build a small collection
    136103
    137 look into possibility of clearing highlighting whenever you go to browse
    138 etc.  i.e. highlighting only works when coming directly from a search
    139 
    140 replace mk_all_dir function with perl module - see if speed improvements
    141 are worthwhile
    142 
    143104update collections - set up auto-updating where needed
    144105        - csbib
     
    146107    - tcc - mail updating
    147108    - niupepa
    148 
    149 add option to buildcol.pl to force documents to be paged/hierarchical. need
    150 to use it for niupepa collection
     109    - gberg
    151110
    152111sort documents between importing and building
    153112
    154 fix plugins option to mkcol.pl
     113get polling for new collections to work when using fastcgi
    155114
    156 get polling for new collections to work when using fastcgi - should also
    157 produce error message when unable to read collect dir (rather than just
    158 exiting)
     115ipc and unesco collections have no 'about' text (maybe others too).
    159116
    160 what to do with Makefiles for collection specific receptionists (like
    161 cstr), use configure?
     117compiler warnings
    162118
    163 check out mgs declaration of error()
     119windows cd-rom version
    164120
    165 call server version of cd-rom executable 'standard' and other version
    166 'backup'
     121    - message in browser selection box to say that IE isn't compatible
     122      with 'silver' version (instead of simply greying out selection of
     123      IE as currently happens).
    167124
    168 put download stuff onto ftp
     125    - look into Belgium bug ("Netscape was unable to create a network
     126          socket connection... " -- NT 4.0 -- no internet connection -- run
     127          programs/accessories/communication/internet connection wizard,
     128          answer LAN to all questions -- bug appears to be present if you
     129          get above error message when attempting to access 127.0.0.1 --
     130          look into testing for this problem from InstallShield, maybe try
     131          reading from 127.0.0.1 and see if it times out?
    169132
    170 fix up publications page - maybe just use an index.html type thing on rose
     133look into need for _LITTLE_ENDIAN flag in mg
    171134
    172 look into server security
     135make all external nzdl.org pages plain html rather than macro pages. remove
     136all related images from distribution
    173137
    174 fix up compiler warnings (on titoki and rose)
     138ohists RealAudio server appears to be broken
    175139
    176 ipc collection has no 'about' text
     140capability to view actual BibTeX entries of csbib (and maybe other?)
     141collections
    177142
    178 sort out bbc collections - find real import stuff - delete multiple copies
    179 that are laying around
     143scripts for installing w3mir - a .bat version of davids install.sh files
    180144
    181 tidy up cstr - image on left above all 5 buttons - title on right above
    182 goto line
     145implement proper unicode sorting within classifiers
    183146
    184 play with csbib - appear to only get 5 matches for 'references' query on
    185 'the' - some indexes also appear to only work some of the time
     147get file locking to work on non GSDL_USE_IOS_H compilers (VC++ 6.0)
    186148
     149produce an error message when no "built" collections are available
     150(currently only produce an error when no collections at all are available)
     151
     152have another go at getting gsdl_system() function to work properly on
     153windows 95 (for end-user collection building)
     154
  • branches/New_Config_Format-branch/gsdl/etc/VERSION

    r1040 r1279  
    1 gsdl version: 2.13
    2 cvs tag: gsdl-213
    3 build version: 2.0
     1gsdl version: x.xx
     2cvs tag: gsdl-x_xx-distribution
     3build version: 2.1
  • branches/New_Config_Format-branch/gsdl/etc/main.cfg

    r1035 r1279  
    11maintainer      [email protected]
     2
    23macrofiles      style.dm base.dm query.dm help.dm pref.dm \
    3         about.dm home.dm document.dm status.dm \
    4         authen.dm users.dm html.dm english.dm \
    5         english2.dm chinese.dm maori.dm people.dm \
    6         tech.dm prescrpt.dm mg.dm rw.dm build.dm \
    7         extlink.dm gsdlsoft.dm delhistory.dm german.dm
     4        about.dm document.dm status.dm \
     5        home.dm \
     6        authen.dm users.dm html.dm build.dm extlink.dm delhistory.dm \
     7        gsdl.dm gsdlsoft.dm prescrpt.dm mg.dm \
     8        english.dm chinese.dm french.dm german.dm maori.dm \
     9        english2.dm french2.dm arabic.dm spanish.dm
     10
    811status          enabled
    912usecookies      true
     
    1215macroprecedence c,v,l
    1316cgiarg          shortname=v longname=version multiplechar=false argdefault=0 \
    14                 defaultstatus=weak savedarginfo=can
     17                defaultstatus=weak savedarginfo=must
    1518
    1619cgiarg          shortname=a argdefault=p
  • branches/New_Config_Format-branch/gsdl/lib/cfgread.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.8.2.1  2000/07/12 22:20:52  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.9  2000/07/05 21:49:22  sjboddie
     34   Receptionist now caches collection information to avoid making multiple
     35   get_collectinfo calls to collection server
     36
    3037   Revision 1.8  2000/04/06 19:57:57  cs025
    3138   Correcting a correction - reinstated all lib files due to silly
     
    5663
    5764#include "cfgread.h"
    58 
    59 void collectioninfo_t::clear () {
    60   gsdl_gsdlhome.clear();
    61   gsdl_gdbmhome.clear();
    62 }
    6365
    6466int write_ini_line (ofstream &fileout, const text_t &key, const text_t value) {
  • branches/New_Config_Format-branch/gsdl/lib/cfgread.h

    r1076 r1279  
    4141#endif
    4242
    43 struct collectioninfo_t {
    44   void clear ();
    45   collectioninfo_t () {clear();}
    46 
    47   text_t gsdl_gsdlhome;
    48   text_t gsdl_gdbmhome;
    49 };
    50 
    51 typedef map<text_t, collectioninfo_t, lttext_t> colinfo_tmap;
    52 
    5343// return 0 on success, -1 on failure
    5444int write_ini_line (ofstream &fileout, const text_t &key, const text_t value);
  • branches/New_Config_Format-branch/gsdl/lib/display.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.19.2.1  2000/07/12 22:20:53  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.21  2000/06/18 22:56:55  sjboddie
     34   fixed a bug that I'd introduced earlier when attempting to get things
     35   compiling on VC++ 6.0 - the gsdl-2.22 release (and possibly 2.21) was
     36   affected and wouldn't have displayed chinese and Arabic characters
     37   correctly
     38
     39   Revision 1.20  2000/05/12 03:09:23  sjboddie
     40   minor modifications to get web library compiling under VC++ 6.0
     41
    3042   Revision 1.19  2000/04/06 19:57:58  cs025
    3143   Correcting a correction - reinstated all lib files due to silly
     
    684696// bigendian should be set to 1
    685697// 0 will be returned when the end of the file has been found
    686 unsigned short my_uni_get (istream &fin, int &line,
     698unsigned short my_uni_get (unistream &fin, int &line,
    687699               int &isunicode, int &bigendian) {
    688700  unsigned short c = 0;
     
    692704    // get the next two characters
    693705    unsigned char c1 = 0, c2 = 0;
     706   
    694707    if (!fin.eof()) fin.get(c1);
    695708    if (!fin.eof()) fin.get(c2);
     
    766779 
    767780  // open the file
    768   ifstream fin(filenamestr);
     781  unistream fin (filenamestr);
     782
    769783  if (fin.fail()) return -1; // read failed
    770784
  • branches/New_Config_Format-branch/gsdl/lib/display.h

    r1076 r1279  
    6464#  include <iostream.h>
    6565#  include <fstream.h>
     66
     67#define unistream ifstream
     68
    6669#else
    6770#  include <iostream>
    6871#  include <fstream>
     72
     73typedef std::basic_ifstream<unsigned char> unistream;
     74
    6975#endif
    7076
     
    8288// cyclic macros (a includes b and b includes a)
    8389#define MAXRECURSIONDEPTH 30
    84 
    8590
    8691// class prototypes
  • branches/New_Config_Format-branch/gsdl/lib/fileutil.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.14.2.1  2000/07/12 22:20:54  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.16  2000/05/12 03:09:22  sjboddie
     34   minor modifications to get web library compiling under VC++ 6.0
     35
     36   Revision 1.15  2000/05/04 08:27:28  sjboddie
     37   modifications for windows ports of GCC
     38
    3039   Revision 1.14  2000/04/06 19:57:59  cs025
    3140   Correcting a correction - reinstated all lib files due to silly
     
    118127  path2.erase (path2.begin(), here);
    119128 
    120   // return the concatenation of the two strings
    121   return path1 + path2;
     129  text_t fullpath = path1 + path2;
     130
     131  // make sure all the right slashes are used
     132  here = fullpath.begin();
     133  end = fullpath.end();
     134  while (here != end) {
     135#ifdef __WIN32__
     136    if (*here == '/') *here = '\\';
     137#else
     138    if (*here == '\\') *here = '/';
     139#endif
     140    here ++;
     141  }
     142  return fullpath;
    122143}
    123144
     
    139160bool file_exists (const text_t &filename) {
    140161  char *cstr = filename.getcstr();
     162#ifdef GSDL_USE_IOS_H
    141163  ifstream filestream (cstr, ios::in | ios::nocreate);
     164#else
     165  ifstream filestream (cstr, ios::in);
     166#endif
    142167  delete cstr;
    143168
     
    155180bool file_writable (const text_t &filename) {
    156181  char *cstr = filename.getcstr();
     182#ifdef GSDL_USE_IOS_H
    157183  ifstream filestream (cstr, ios::out | ios::nocreate);
     184#else
     185  ifstream filestream (cstr, ios::out);
     186#endif
    158187  delete cstr;
    159188
     
    168197}
    169198
    170 #if defined __WIN32__
     199#if defined(__WIN32__) && !defined(__GNUC__)
    171200
    172201#include <windows.h>
    173202
    174 BOOL read_dir (const text_t &dirname, text_tarray &filelist) {
     203bool read_dir (const text_t &dirname, text_tarray &filelist) {
    175204 
    176205  WIN32_FIND_DATA FileData; 
    177206  HANDLE hSearch;
    178   DWORD dwAttrs;
    179207  char *dirpath = dirname.getcstr();
    180208  strcat (dirpath, "\\*");
    181 
    182   BOOL finished = false;
    183209 
    184210  hSearch = FindFirstFile(dirpath, &FileData);
  • branches/New_Config_Format-branch/gsdl/lib/gsdlconf.h

    r1076 r1279  
    3030#define GSDLCONF_H
    3131
    32 #ifndef __WIN32__
     32#if defined(__WIN32__)
     33#include "../win32cfg.h"
     34#else
    3335#include "../config.h"
    3436#endif
     
    7880#endif
    7981
     82// mktemp
     83
     84#if !defined(__WIN32__) || defined(__GNUC__)
     85#include <unistd.h>
     86#define GSDL_MKTEMP(str) mktemp(str)
     87#else
     88#include <io.h>
     89#define GSDL_MKTEMP(str) _mktemp(str)
     90#endif
     91
     92
    8093
    8194// file locking
    8295
    8396#if defined(__WIN32__)
     97
     98#if !defined(LK_UNLOCK) && defined(LK_UNLCK)
     99#define LK_UNLOCK LK_UNLCK
     100#endif
     101
     102#if defined (GSDL_USE_IOS_H)
    84103#include <io.h>
    85104#include <sys/locking.h>
     105
     106#if defined(__GNUC__)
     107#define GSDL_GET_FILEDESC(str) str.filedesc()
     108#else
    86109#define GSDL_GET_FILEDESC(str) str.fd()
     110#endif
     111
    87112#define GSDL_UNLOCK_FILE(fd) _locking(fd, LK_UNLCK, 200)
    88113#define GSDL_LOCK_FILE(fd) lock_val = _locking(fd, LK_NBLCK, 200)
     114
     115#else
     116
     117// when using <fstream> (i.e. VC++ 6.0) I can't work out how
     118// to return a filedesc. File locking won't currently work
     119// for those windows compilers requiring GSDL_USE_IOS_H to
     120// not be set -- Stefan.
     121#define GSDL_GET_FILEDESC(str) 1
     122#define GSDL_LOCK_FILE(fd) lock_val = 0
     123#define GSDL_UNLOCK_FILE(fd) 0
     124#endif
    89125
    90126#else
  • branches/New_Config_Format-branch/gsdl/lib/gsdltools.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.3.2.1  2000/07/12 22:20:55  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.5  2000/05/19 04:56:01  sjboddie
     34   added gsdl_system function for spawning off new processes under windows
     35
     36   Revision 1.4  2000/05/04 05:16:23  sjboddie
     37   Moved dm_safe from htmlutils to gsdltools. Also made it escape '\'
     38   characters to prevent their mysterious disapearance from things like
     39   windows filenames when they get passed through the macro expander.
     40
    3041   Revision 1.3  2000/04/06 19:58:01  cs025
    3142   Correcting a correction - reinstated all lib files due to silly
     
    4960}
    5061
     62text_t dm_safe (const text_t &instring) {
    5163
     64  text_t outstring;
     65  text_t::const_iterator here = instring.begin();
     66  text_t::const_iterator end = instring.end();
     67  while (here != end) {
     68    if (*here == '_' || *here == '\\') outstring.push_back('\\');
     69    outstring.push_back(*here);
     70    here ++;
     71  }
     72  return outstring;
     73}
     74
     75// gsdl_system spawns a completely separate program (i.e. the calling
     76// program continues and terminates normally). Arguments containing special
     77// characters (e.g. '&') should be quoted with ""
     78
     79// on unix systems youcan get the same effext as this function by doing a
     80// system call and putting the spawned process in the background
     81// (e.g. system (funcname options &);
     82
     83#if defined (__WIN32__)
     84#include <windows.h>
     85void gsdl_system (char *cmd, ostream &logout) {
     86
     87  STARTUPINFO ps = {sizeof(STARTUPINFO), NULL, NULL, NULL,
     88                    0, 0, 0, 0, 0, 0,
     89                    0, 0,
     90                    0, 0, NULL,
     91                    NULL, NULL, NULL};
     92  PROCESS_INFORMATION pi;
     93  BOOL res = CreateProcess(NULL,
     94                           cmd,
     95                           NULL,
     96                           NULL,
     97                           FALSE,
     98                           DETACHED_PROCESS,
     99                           NULL,
     100                           NULL,
     101                           &ps,
     102                           &pi);
     103  if (!res) {
     104    logout << "Failed to start " << cmd << " process, error code " << GetLastError();
     105  }
     106
     107  CloseHandle(pi.hProcess);
     108  CloseHandle(pi.hThread);
     109}
     110
     111#endif
  • branches/New_Config_Format-branch/gsdl/lib/gsdltools.h

    r1076 r1279  
    3838bool littleEndian();
    3939
     40// escapes '\' and '_' characters with '\'
     41// note that single '\' characters occurring
     42// naturally within text (or filenames!!) will
     43// be removed by the dm macro language
     44text_t dm_safe (const text_t &instring);
     45
     46
     47// gsdl_system spawns a completely separate program (i.e. the calling
     48// program continues and terminates normally). Arguments containing special
     49// characters (e.g. '&') should be quoted with ""
     50
     51// on unix systems youcan get the same effext as this function by doing a
     52// system call and putting the spawned process in the background
     53// (e.g. system (funcname options &);
     54#if defined (__WIN32__)
     55void gsdl_system (char *cmd, ostream &logout);
    4056#endif
     57
     58#endif
  • branches/New_Config_Format-branch/gsdl/lib/gsdlunicode.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.12.2.1  2000/07/12 22:20:55  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.14  2000/06/23 05:03:29  nzdl
     34   fixed a couple of compiler warnings created by the new encoding stuff
     35
     36   Revision 1.13  2000/06/23 03:21:38  sjboddie
     37   Created converter classes for simple 8 bit encodings that use a
     38   simple textual map file. Instances of these classes are used to handle
     39   the Windows 1256 (Arabic) encoding.
     40
    3041   Revision 1.12  2000/04/06 19:58:02  cs025
    3142   Correcting a correction - reinstated all lib files due to silly
     
    7081#include <stdio.h>
    7182
     83#if defined(GSDL_USE_OBJECTSPACE)
     84#  include <ospace\std\iostream>
     85#  include <ospace\std\fstream>
     86#elif defined(GSDL_USE_IOS_H)
     87#  include <iostream.h>
     88#  include <fstream.h>
     89#else
     90#  include <iostream>
     91#  include <fstream>
     92#endif
    7293
    7394
     
    551572  else status = unfinished;
    552573}
     574
     575
     576bool simplemapconvert::loadmapfile (bool in) {
     577  if (loaded) return true;
     578  if (mapfile.empty()) return false;
     579
     580  char *cfilename = mapfile.getcstr();
     581#ifdef GSDL_USE_IOS_H
     582  ifstream mapfilein (cfilename, ios::in | ios::nocreate);
     583#else
     584  ifstream mapfilein (cfilename, ios::in);
     585#endif
     586  delete cfilename;
     587  if (!mapfilein) return false;
     588
     589  char cline[2048];
     590  text_t line;
     591
     592  while (!mapfilein.eof()) {
     593    mapfilein.getline (cline, 2048);
     594    line.clear();
     595    line.appendcstr (cline);
     596    if (line.empty()) continue;
     597    // remove comments
     598    text_t::iterator end = line.end();
     599    text_t::iterator here = findchar (line.begin(), end, '#');
     600    if (here != end) {
     601      line.erase (here, end);
     602      if (line.empty()) continue;
     603    }
     604   
     605    text_tarray parts;
     606    splitchar (line.begin(), line.end(), '\t', parts);
     607   
     608    // do some simple sanity checks
     609    if (parts.size() < 2) continue;
     610    text_t::iterator begin1 = parts[0].begin();
     611    text_t::iterator begin2 = parts[1].begin();
     612    if (*begin1 != '0' || *(begin1+1) != 'x') continue;
     613    if (*begin2 != '0' || *(begin2+1) != 'x') continue;
     614    char *from = parts[0].getcstr();
     615    char *to = parts[1].getcstr();
     616    unsigned int f = 0, t = 0;
     617    sscanf (from, "%i", &f);
     618    sscanf (to, "%i", &t);
     619    delete from;
     620    delete to;
     621   
     622    if (in) mapping[(unsigned short)f] = (unsigned short)t;
     623    else mapping[(unsigned short)t] = (unsigned short)f;
     624  }
     625
     626  loaded = true;
     627  return true;
     628}
     629
     630unsigned short simplemapconvert::convert (unsigned short c, bool in) {
     631
     632  if (!loaded)
     633    if (!loadmapfile(in)) return absentc;
     634 
     635  return mapping[c];
     636}
     637
     638
     639void simplemapinconvertclass::convert (text_t &output, status_t &status) {
     640  output.clear();
     641 
     642  if (start == NULL || len == 0) {
     643    status = finished;
     644    return;
     645  }
     646
     647  // don't want any funny sign conversions happening
     648  unsigned char *here = (unsigned char *)start;
     649  while (len > 0) {
     650
     651    if (*here < 0x80)
     652      output.push_back (*here); // append this character
     653    else
     654      output.push_back (converter.convert(*here, true));
     655
     656    ++here;
     657    --len;
     658  }
     659
     660  start = (char *)here; // save current position
     661  status = finished;
     662}
     663
     664
     665void simplemapoutconvertclass::convert (char *output, size_t maxlen,
     666                    size_t &len, status_t &status) {
     667
     668  if (input == NULL || output == NULL) {
     669    status = finished;
     670    return;
     671  }
     672
     673  // don't want any funny sign conversions happening
     674  unsigned char *uoutput = (unsigned char *)output;
     675  text_t::iterator textend = input->end();
     676  len = 0;
     677  while ((len < maxlen) && (texthere != textend)) {
     678
     679    if (*texthere < 0x80) *uoutput = (unsigned char)(*texthere);
     680    else *uoutput = converter.convert (*texthere, false);
     681
     682    ++uoutput;
     683    ++len;
     684    ++texthere;
     685  }
     686 
     687  if (texthere == textend) status = finished;
     688  else status = unfinished;
     689}
  • branches/New_Config_Format-branch/gsdl/lib/gsdlunicode.h

    r1076 r1279  
    215215};
    216216
     217
     218// Simple input and output converter classes for use with 8 bit encodings
     219// using simple textual map files. Map files should contain (at least) two
     220// tab-separated fields. The first field is the mapped value and the second
     221// field is the unicode value.
     222
     223struct ltus_t
     224{
     225  bool operator()(const unsigned short &t1, const unsigned short &t2) const
     226  { return t1 < t2; }
     227};
     228
     229
     230class simplemapconvert {
     231public:
     232  simplemapconvert () {absentc=0; loaded=false;}
     233  unsigned short convert (unsigned short c, bool in);
     234  void setmapfile (const text_t &themapfile) {mapfile = themapfile;}
     235
     236protected:
     237  bool loadmapfile (bool in);
     238
     239  map <unsigned short, unsigned short, ltus_t> mapping;
     240  bool loaded;
     241  text_t mapfile;
     242  unsigned short absentc;
     243};
     244
     245
     246class simplemapinconvertclass : public inconvertclass {
     247public:
     248  virtual ~simplemapinconvertclass () {}
     249
     250  void convert (text_t &output, status_t &status);
     251
     252  void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);}
     253 
     254protected:
     255  simplemapconvert converter;
     256};
     257
     258class simplemapoutconvertclass : public rzwsoutconvertclass {
     259public:
     260  virtual ~simplemapoutconvertclass () {}
     261
     262  void convert (char *output, size_t maxlen,
     263        size_t &len, status_t &status);
     264
     265  void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);}
     266 
     267protected:
     268  simplemapconvert converter;
     269};
     270
     271
     272
     273
    217274#endif
  • branches/New_Config_Format-branch/gsdl/lib/text_t.cpp

    r1076 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.17.2.1  2000/07/12 22:20:56  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.18  2000/04/14 02:50:12  sjboddie
     34   added text_t versions of joinchar to work with sets and lists
     35
    3036   Revision 1.17  2000/04/06 19:58:03  cs025
    3137   Correcting a correction - reinstated all lib files due to silly
     
    434440    {
    435441      if (!first) outtext.push_back (c);
     442      first = false;
     443      outtext += *here;
     444      here++;
     445    }
     446}
     447
     448void joinchar (const text_tlist &inlist, text_t c, text_t &outtext)
     449{
     450  outtext.clear ();
     451
     452  text_tlist::const_iterator here = inlist.begin ();
     453  text_tlist::const_iterator end = inlist.end ();
     454  bool first = true;
     455  while (here != end)
     456    {
     457      if (!first) outtext += c;
     458      first = false;
     459      outtext += *here;
     460      here++;
     461    }
     462}
     463
     464void joinchar (const text_tset &inlist, text_t c, text_t &outtext)
     465{
     466  outtext.clear ();
     467
     468  text_tset::const_iterator here = inlist.begin ();
     469  text_tset::const_iterator end = inlist.end ();
     470  bool first = true;
     471  while (here != end)
     472    {
     473      if (!first) outtext += c;
    436474      first = false;
    437475      outtext += *here;
  • branches/New_Config_Format-branch/gsdl/lib/text_t.h

    r1076 r1279  
    238238void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext);
    239239void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext);
     240void joinchar (const text_tset &inlist, text_t c, text_t &outtext);
     241void joinchar (const text_tlist &inlist, text_t c, text_t &outtext);
    240242void joinchar (const text_tarray &inlist, text_t c, text_t &outtext);
    241243
  • branches/New_Config_Format-branch/gsdl/lib/win32.mak

    r1076 r1279  
    2525###########################################################################
    2626
    27 GSDLHOME = d:\home\dl\gsdl
    28 STLPATH = d:\home\dl\stl\stlport
     27GSDLHOME = c:\gsdl
     28STLPATH = c:\stlport
    2929
    3030AR = lib
  • branches/New_Config_Format-branch/gsdl/macros/about.dm

    r876 r1279  
    99# don't want link to 'about' page
    1010_imagecollection_ {_If_("_iconcollection_" ne "",
    11     <img src="_iconcollection_" border=0>,  <br><br><h2>_collectionname_</h2>}
     11    <img src="_iconcollection_" border=0>,<br><br><h2>_collectionname_</h2>)}
     12_imagecollection_ [v=1] {<br><br><h2>_collectionname_</h2>}
    1213
    1314#######################################################################
  • branches/New_Config_Format-branch/gsdl/macros/authen.dm

    r876 r1279  
    1111_content_ {
    1212<br><br><br><br>
    13 _messagestatus_
    14 
    1513<form name="login" method="get" action="_gwcgi_">
    1614<input type=hidden name="e" value="_If_(_cgiarger_,_cgiarger_,_decodedcompressedoptions_)">
    1715_hiddenargs_
     16<center><table width=_pagewidth_>
     17<tr><td>
     18_messagestatus_
     19</td></tr>
    1820
    19 <table border=0>
    20 <tr><td>_textusername_</td><td><input type="text" name="un" value="_cgiargun_" size=10></td></tr>
    21 <tr><td>_textpassword_</td><td><input type="password" name="pw" size=10></td></tr>
    22 <tr><td></td><td><input type="submit" value="sign in"></td></tr>
     21<tr><td>
     22<table><td>_textusername_</td>
     23<td><input type="text" name="un" value="_cgiargun_" size=10></td>
     24<td></td></tr>
     25<tr><td>_textpassword_</td>
     26<td><input type="password" name="pw" size=10></td>
     27<td><input type="submit" value="sign in"></td>
     28</tr>
     29</table>
     30<td></tr>
    2331</table>
    2432</form>
  • branches/New_Config_Format-branch/gsdl/macros/base.dm

    r1019 r1279  
    66
    77##########
    8 
    9 # won't need this once old versions of library are no longer used
    10 _decodedcompressedoptions_ {_compressedoptions_}
    11 
    128
    139_htmlextra_ {}
     
    2723_imagethispage_ {}
    2824_iconcollection_ {}
    29 _imagecollection_ {_If_("_iconcollection_" ne "",<a href="_httppagex_(about)"><img src="_iconcollection_" border=0></a>)}
     25_collectionname_ {}
     26
     27_imagecollection_ {_If_("_iconcollection_" ne "",
     28<a href="_httppageabout_"><img src="_iconcollection_" border=0></a>,
     29_imagecollectionv_}
     30_imagecollection_ [v=1] {_imagecollectionv_}
     31_imagecollectionv_ {_If_(_collectionname_,<br><br><h2><a href="_httppageabout_">_collectionname_</a></h2>)}
    3032
    3133#######################################################################
     
    5052_Datewidth_ {_widthtdatex_}
    5153_Subjectwidth_ {_widthtsubjx_}
     54_Towidth_ {_widthttox_}
     55_Fromwidth_ {_widthtfromx_}
    5256_Organizationwidth_ {_widthtorgx_}
    5357_Howtowidth_ {_widththowx_}
     
    7579_jselection_ {}
    7680_nselection_ {}
    77 
     81#granularity selection for mgpp
     82_gselection_ {}
    7883#######################################################################
    7984# navigation bar images
     
    8893
    8994# image macros for all the classifications currently supported by
    90 # this receptionist. i.e. title, author, subject, series, date,
    91 # howto, and organization (and search)
     95# this receptionist.
     96#
     97# image macros for to and from added by gwp on 2000 june 13
    9298
    9399_imagesearch_ {<a href="_httpquery_"
     
    137143        border="0" alt="_textimageSubject_"></a>}
    138144_imageSubject_[v=1] {<a href="_httpbrowseSubject_">_textimageSubject_</a><br>
     145}
     146
     147_imageTo_ {<a href="_httpbrowseTo_"
     148    onMouseover = "img\_on('to')"
     149    onMouseout = "img\_off('to')"><img name="to"
     150        src="_httpiconttoof_" width="_widthttox_" height="_heightttox_"
     151        border="0" alt="_textimageTo_"></a>}
     152_imageTo_[v=1] {<a href="_httpbrowseTo_">_textimageTo_</a><br>
     153}
     154
     155_imageFrom_ {<a href="_httpbrowseFrom_"
     156    onMouseover = "img\_on('from')"
     157    onMouseout = "img\_off('from')"><img name="from"
     158        src="_httpicontfromof_" width="_widthtfromx_" height="_heighttfromx_"
     159        border="0" alt="_textimageFrom_"></a>}
     160_imageFrom_[v=1] {<a href="_httpbrowseFrom_">_textimageFrom_</a><br>
    139161}
    140162
     
    283305}
    284306
     307_javaTo_ {
     308    to\_on = new Image(_widthttox_, _heightttox_);         
     309    to\_on.src = "_httpiconttoon_";
     310    to\_off = new Image(_widthttox_, _heightttox_);         
     311    to\_off.src = "_httpiconttoof_";           
     312}
     313
     314_javaFrom_ {
     315    from\_on = new Image(_widthtfromx_, _heighttfromx_);         
     316    from\_on.src = "_httpicontfromon_";
     317    from\_off = new Image(_widthtfromx_, _heighttfromx_);         
     318    from\_off.src = "_httpicontfromof_";           
     319}
     320
    285321_javaHowto_ {
    286322    how\_on = new Image(_widththowx_, _heightthowx_);         
     
    323359#######################################################################
    324360
    325 _mailaddr_ {[email protected]}
     361_mailaddr_ {[email protected]}
    326362
    327363_gsdltop_ {_top}
     
    336372_httpdocimg_ {_httpcollimg_/_thisOID_}
    337373
    338 _httpcollection_{_httpprefix_/collect/_cgiargc_}
     374_httpcollection_ {_httpprefix_/collect/_cgiargc_}
    339375
    340376_httppagex_ {_gwcgi_?e=_compressedoptions_&a=p&p=_1_}
     
    348384
    349385_httpgreenstone_ {_httppagex_(gsdl)}
     386_httpdownload_ {http://www.nzdl.org/download}
     387_httppublications_ {_httpdownload_/greenstone/publications}
    350388
    351389_httpcurrentdocument_ {_gwcgi_?e=_compressedoptions_&cl=_cgiargcl_&d=_cgiargd_}
     
    512550
    513551_icontabSubjectgreen_ {<img
    514 src="_httpicontsubjgr_" height=_heighttsubjx_ width=widthtsubjx_ border=0>}
     552src="_httpicontsubjgr_" height=_heighttsubjx_ width=_widthtsubjx_ border=0>}
    515553_icontabSubjectgreen_[v=1] {_texticontabsubjectgreen_}
     554
     555_icontabTogreen_ {<img
     556src="_httpiconttogr_" height=_heightttox_ width=_widthttox_ border=0>}
     557_icontabTogreen_[v=1] {_texticontabtogreen_}
     558
     559_icontabFromgreen_ {<img
     560src="_httpicontfromgr_" height=_heighttfromx_ width=_widthtfromx_ border=0>}
     561_icontabFromgreen_[v=1] {_texticontabfromgreen_}
    516562
    517563_icontabHowtogreen_ {<img
     
    528574
    529575_icontabOrganizationgreen_ {<img
    530 src="_httpicontorggr_" height=_heighttorggr_ width=_widthtorggr_ border=0>}
     576src="_httpicontorggr_" height=_heighttorgx_ width=_widthtorgx_ border=0>}
    531577_icontabOrganizationgreen_[v=1] {_texticontaborggreen_}
    532578
  • branches/New_Config_Format-branch/gsdl/macros/build.dm

    r725 r1279  
    1010<table width=100%>
    1111  <tr>
    12     <td><img src="/gsdl/images/gsdl_height.gif" width=2 height=77></td>
     12    <td><img src="_httpimg_/spacer.gif" width=2 height=77></td>
    1313    <td>_iconblankbar_
    1414        <center><h2>_1_</h2></center>
     
    4444}
    4545
     46_httpiconstop_ {_httpimg_/stop.gif}
    4647
    4748_iconwizardnext_{<img src="_httpiconmore_" align=absbottom
     
    7374_bcargfileorurl_    {file}
    7475_bcarginputdir_     {}
    75 _bcargcopydir_      {}
     76_bcargcopydir_      {yes}
    7677_bcargingsdlarea_   {no}
    7778_bcargacronyms_     {off}
     
    287288       This process is structured as a series of Web pages, overseen by the
    288289    new-collection &quot;wizard.&quot  The wizard bar at the bottom of the page
    289     shows you the sequences of pages to be completed.  You can return to
    290     a previously page by clicking on the corresponding item in the wizard bar.
     290    shows you the sequence of pages to be completed.  You can return to
     291    a previous page by clicking on the corresponding item in the wizard bar.
    291292  </td>
    292293</tr>
     
    454455          \}
    455456      \}
    456 
    457457        simulate_get_action("_httpbuild_&bca=newcol");
    458458   \}
     
    654654    Note, once a collection has been built it
    655655    is not necessary for the source data to remain in the Greenstone
    656     collection area -- the collection can be safely access, browsed
     656    collection area -- the collection can be safely accessed, browsed
    657657    and searched without this.  If you wish, however, to make changes
    658     to the collection and then rebuilt it then
    659     the source data must still be avialable.
     658    to the collection and then rebuild, the source data must still be available.
    660659     </font>
    661660  </td>
     
    697696
    698697    var cgi_prefix = "webpage_mkcol.pl";
    699     cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_");
    700     cgi_prefix += "&httpbuild=" + escape("_httpbuild_");
     698    cgi_prefix += "?httpbuild=" + escape("_httpbuild_");
    701699    cgi_prefix += "&bc1refine=" + escape(bc1refine);
    702700
     
    861859    \{
    862860        var cgi_prefix = "webpage_editcol.pl";
    863         cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_");
    864         cgi_prefix += "&httpbuild=" + escape("_httpbuild_");
     861        cgi_prefix += "?httpbuild=" + escape("_httpbuild_");
    865862        cgi_prefix += "&bc1cfgfile=" + escape(form.bc1cfgfile.value);
    866863
     
    906903
    907904<tr>
    908   <td>Colletion to edit:</td>
     905  <td>Collection to edit:</td>
    909906  <td colspan=3>
    910907     _fullnamemenu_
     
    10271024
    10281025    var cgi_prefix = "_httpbuild_&bca=buildcol";
    1029     cgi_prefix += "&gsdlhome=" + escape("_gsdlhome_");
    10301026    cgi_prefix += "&httpbuild=" + escape("_httpbuild_");
    10311027    cgi_prefix += "&wizard=buildframe";
     
    10431039
    10441040    var cgi_prefix = "_httpbuild_&bca=buildcol";
    1045     cgi_prefix += "&gsdlhome=" + escape("_gsdlhome_");
    10461041    cgi_prefix += "&httpbuild=" + escape("_httpbuild_");
    10471042    cgi_prefix += "&wizard=buildcol";
     
    11641159
    11651160
     1161# not sure why we need 3 frames here when one appears to always be blank
     1162# - Stefan
     1163#_contentbuildframe_ {
     1164
     1165#<frameset rows="*,150,*" border=0>
     1166#  <noframes><body bgcolor="#ffffff">
     1167#  <p>You must have a frame enabled browser to view this.</p>
     1168#  </body>
     1169#  </noframes>
     1170#  <frame src="_httpbuild_&bca=buildcol&wizard=buildexec&bc1dirname=_bcargdirname_&bc1tmpname=_bcargtmpname_" name=infoframe>
     1171#  <frame src="_httpbuild_&bca=buildstatus&bc1tmpname=_bcargtmpname_" name=execframe>
     1172#  <frame src="_httpbuild_&bca=blankpage" name=blankframe>
     1173#</frameset>
     1174#}
     1175
    11661176_contentbuildframe_ {
    11671177
    1168 <frameset rows="*,150,*" border=0>
     1178<frameset rows="200,150" border=0>
    11691179  <noframes><body bgcolor="#ffffff">
    11701180  <p>You must have a frame enabled browser to view this.</p>
     
    11731183  <frame src="_httpbuild_&bca=buildcol&wizard=buildexec&bc1dirname=_bcargdirname_&bc1tmpname=_bcargtmpname_" name=infoframe>
    11741184  <frame src="_httpbuild_&bca=buildstatus&bc1tmpname=_bcargtmpname_" name=execframe>
    1175   <frame src="_httpbuild_&bca=blankpage" name=blankframe>
    11761185</frameset>
    11771186}
     
    11891198
    11901199   <!-- Hide code from non-js browsers
    1191    var timer = 5;
     1200   var timer = 10;
    11921201   function restart_count_down() \{
    1193     timer=5;
     1202    timer=10;
    11941203    setTimeout("count_down()",1000);
    11951204   \}
     
    12101219   function update_status_page() \{
    12111220    var cgi_cmd = "webpage_buildstatus.pl";
    1212     cgi_cmd += "?gsdlhome=" + escape("_gsdlhome_");
    1213     cgi_cmd += "&httpbuild=" + escape("_httpbuild_");
     1221    cgi_cmd += "?httpbuild=" + escape("_httpbuild_");
    12141222    cgi_cmd += "&bc1dirname=" + escape("_bcargdirname_");
    12151223    cgi_cmd += "&bc1tmpname=" + escape("_bcargtmpname_");
     
    12351243           \}
    12361244
    1237          cgi_cmd += "&gsdlhome=" + escape("_gsdlhome_");
    12381245         cgi_cmd += "&httpbuild=" + escape("_httpbuild_");
    12391246         cgi_cmd += "&bc1dirname=" + escape("_bcargdirname_");
     
    13581365      \{
    13591366        var cgi_prefix = "webpage_delcol.pl";
    1360         cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_");
    1361         cgi_prefix += "&httpbuild=" + escape("_httpbuild_");
     1367        cgi_prefix += "?httpbuild=" + escape("_httpbuild_");
    13621368
    13631369            simulate_get_action(cgi_prefix);
     
    13941400
    13951401<tr>
    1396   <td>Colletion to delete:</td>
     1402  <td>Collection to delete:</td>
    13971403  <td colspan=3>
    13981404     _fullnamemenu_
  • branches/New_Config_Format-branch/gsdl/macros/document.dm

    r944 r1279  
    155155_iconSubjectpage_ [v=1] {<h2>_texticonhsubj_</h2>}
    156156
     157_iconTopage_ {<img src="_httpiconhto_" width=_widthhto_
     158height=_heighthto_}
     159_iconTopage_ [v=1] {<h2>_texticonhto_</h2>}
     160
     161_iconFrompage_ {<img src="_httpiconhfrom_" width=_widthhfrom_
     162height=_heighthfrom_}
     163_iconFrompage_ [v=1] {<h2>_texticonhfrom_</h2>}
     164
    157165_iconSeriespage_ {<img src="_httpiconhser_" width=_widthhser_
    158166height=_heighthser_}
     
    229237_tab_ {<td>_icontab_</td>}
    230238
    231 _iconcontracttoc_ {<img name="concon" src="_httpiconeconcof_" width=_widtheconcx_ height=_heighteconcof_ alt="_texticoncontracttoc_" border=0>}
     239_iconcontracttoc_ {<img name="concon" src="_httpiconeconcof_" width=_widtheconcx_ height=_heighteconcx_ alt="_texticoncontracttoc_" border=0>}
    232240_iconcontracttoc_[v=1] {_texticoncontracttoc_}
    233241
    234 _iconexpandtoc_ {<img name="expcon" src="_httpiconeexpcof_" width=_widtheexpx_ height=_heighteexpcx_ alt="_texticonexpandtoc_" border=0>}
     242_iconexpandtoc_ {<img name="expcon" src="_httpiconeexpcof_" width=_widtheexpcx_ height=_heighteexpcx_ alt="_texticonexpandtoc_" border=0>}
    235243_iconexpandtoc_[v=1] {_texticonexpandtoc_}
    236244
  • branches/New_Config_Format-branch/gsdl/macros/english.dm

    r1032 r1279  
    7272_textimageDate_ {Browse by date}
    7373_textimageSubject_ {Browse by subject category}
     74_textimageTo_ {Browse by To field}
     75_textimageFrom_ {Browse by From field}
    7476_textimageOrganization_ {Browse by organization}
    7577_textimageHowto_ {Browse how to categories}
     
    8587_texticontablistgreen_ {Listing}
    8688_texticontabsubjectgreen_{Subjects}
     89_texticontabtogreen_{To}
     90_texticontabfromgreen_{From}
    8791_texticontaborggreen_{Organization}
    8892_texticontabhowgreen_{How to}
     
    116120_textmonth12_ {December}
    117121
     122_Document_ {Document}
     123_Section_ {Section}
     124_Paragraph_ {Paragraph}
     125
    118126_magazines_ {Magazines}
    119127
     128_nzdlpagefooter_ {<p>_iconblankbar_
     129<p><a href="http://www.nzdl.org">New Zealand Digital Library Project</a>
     130<br><a href="http://www.cs.waikato.ac.nz/cs">Department of Computer Science</a>,
     131<a href="http://www.waikato.ac.nz">University of Waikato</a>,
     132New Zealand}
    120133
    121134#------------------------------------------------------------
     
    174187_heighttsubjx_ {17}
    175188
     189## "to" ## nav_bar_button ## tto ##
     190_httpiconttogr_ {_httpimg_/ttogr.gif}
     191_httpiconttoon_ {_httpimg_/ttoon.gif}
     192_httpiconttoof_ {_httpimg_/ttoof.gif}
     193_widthttox_ {87}
     194_heightttox_ {17}
     195
     196## "from" ## nav_bar_button ## tfrom ##
     197_httpicontfromgr_ {_httpimg_/tfromgr.gif}
     198_httpicontfromon_ {_httpimg_/tfromon.gif}
     199_httpicontfromof_ {_httpimg_/tfromof.gif}
     200_widthtfromx_ {87}
     201_heighttfromx_ {17}
     202
    176203## "organization" ## nav_bar_button ## torg ##
    177204_httpicontorggr_{_httpimg_/torggr.gif}
     
    269296_textCreatorpage_ {_texticonhauth_}
    270297_textSubjectpage_ {_texticonhsubj_}
     298_textTopage_ {_texticonhto_}
     299_textFrompage_ {_texticonhfrom_}
    271300_textSeriespage_ {_texticonhser_}
    272301_textDatepage_ {_texticonhdate_}
     
    279308_texticonhauth_ {Authors A-Z}
    280309_texticonhsubj_ {Subjects}
     310_texticonhto_ {To}
     311_texticonhfrom_ {From}
    281312_texticonhser_ {Series}
    282313_texticonhdate_ {Dates}
     
    330361_httpiconhauth_ {_httpimg_/h\_auth.gif}
    331362_widthhauth_ {200}
     363_heighthauth_ {57}
    332364
    333365## "subjects" ## green_title ## h_subj ##
     
    335367_widthhsubj_ {200}
    336368_heighthsubj_ {57}
     369
     370## "to" ## green_title ## h_to ##
     371_httpiconhto_ {_httpimg_/h\_to.gif}
     372_widthhto_ {200}
     373_heighthto_ {57}
     374
     375## "from" ## green_title ## h_from ##
     376_httpiconhfrom_ {_httpimg_/h\_from.gif}
     377_widthhfrom_ {200}
     378_heighthfrom_ {57}
    337379
    338380## "series" ## green_title ## h_ser ##
     
    458500_textselect_ {_If_(_cgiargb_,_textadvancedsearch_,_textsimplesearch_)}
    459501
    460 _textsimplesearch_ {Search for _If_(_hselection_, _hselection_)_If_(_jselection_,_textjselect_)_If_(_nselection_, in _nselection_ language)
     502_textsimplesearch_ {Search for _If_(_hselection_, _hselection_)_If_(_jselection_,_textjselect_)_If_(_gselection_, at _gselection_ level)_If_(_nselection_, in _nselection_ language)
    461503which contain _querytypeselection_ of the words}
    462504
    463 _textadvancedsearch_ {Search_If_(_hselection_, _hselection_, _defaultindextext_)_If_(_jselection_,_textjselect_)_If_(_nselection_, in _nselection_ language)
     505_textadvancedsearch_ {Search_If_(_hselection_, _hselection_, _defaultindextext_)_If_(_jselection_,_textjselect_)_If_(_gselection_, at _gselection_ level)_If_(_nselection_, in _nselection_ language)
    464506using _querytypeselection_ query}
    465507
     
    500542## "display" ## hand_made ##
    501543_httpicondisplay_ {_httpimg_/display.gif}
    502 _widthdisplay {60}
     544_widthdisplay_ {60}
    503545_heightdisplay_ {20}
    504546
     
    538580_textlangeng_ {English}
    539581_textlanggerman_ {German}
     582_textlangfrench_ {French}
     583_textlangspanish_ {Spanish}
    540584_textlangmaori_ {M_amn_ori}
    541585_textlangchinese_ {Chinese}
     586_textlangarabic_ {Arabic}
    542587_textgraphical_ {Graphical}
    543588_texttextual_ {Textual}
     
    605650_textDateshort_ {access publications by date}
    606651_textSubjectshort_ {access publications by subject}
     652_textToshort_ {access publications by To field}
     653_textFromshort_ {access publications by From field}
    607654_textTitleshort_ {access publications by title}
    608655_textBrowseshort_ {browse publications}
     
    642689pressing the <i>subjects</i> button.  This brings up a list of subjects,
    643690represented by bookshelves.  }
     691
     692_textTolong_ { <p>You can <i>access publications by To field</i> by
     693pressing the <i>to</i> button.  This brings up a list of addressees.  }
     694
     695_textFromlong_ { <p>You can <i>access publications by From field</i> by
     696pressing the <i>from</i> button.  This brings up a list of senders.  }
    644697
    645698_textSerieslong_ { <p>You can <i>access publications by series</i> by
  • branches/New_Config_Format-branch/gsdl/macros/english2.dm

    r1034 r1279  
    2323_colnotbuilt_ {Collection not built.}
    2424
    25 _textpeople_ {People}
     25_textpagetitle_ {Greenstone Digital Library}
    2626_textfb_ {Feedback}
    27 _textpub_ {Publications}
    28 _texttec_ {Technology}
    29 _textrw_ {Related Work}
    3027_textinfosheet_ {Info Sheet}
    31 _textscreenshots_ {Screen Shots}
    32 _textnpepainfosheet_ {Niupepa Info Sheet}
    33 _textpagetitle_ {Greenstone Digital Library}
    34 _textprojhead_ {The New Zealand Digital Library Project}
    3528
     29_textprojhead_ {The Greenstone software and <br>The New Zealand Digital Library Project}
    3630_textprojinfo_ {
    37 <h4>The NZDL system</h4>
    38 
    39 <p> The New Zealand Digital Library system comprises several demonstration
    40 collections -- computer science technical reports and bibliographies,
    41 literary works, humanitarian and development information, magazines -- and
    42 makes them available over the Web through full-text interfaces.  Behind the
    43 query interface lies a huge collection providing gigabytes of information.
    44 We hope you find what you want, or at least something intriguing!
    45 
    46 <h4>The Greenstone software</h4>
    47 
    48 <p> The Greenstone Digital Library software provides a new way of
    49 organizing information and making it available over the Internet.  A
    50 <i>collection</i> of information comprises several (typically several
    51 thousand, or even several million) <i>documents</i>, which share a uniform
    52 searching and browsing interface.  The collections in a library are
    53 organized in a different way--though they share a strong family
    54 resemblance.  Although primarily designed for access over the Web,
    55 Greenstone collections can be made available, in precisely the same form,
    56 on CD-ROM for standalone PCs.  Greenstone is open-source software,
    57 available under the terms of the Gnu public license. 
    58 <p>The following websites are among those currently using Greenstone. <i>Note that these
    59 sites are under development.</i>
    60 <ul>
    61 <li><a href="http://moby.cisti.nrc.ca/~nzdl/cgi-bin/library">CISTI</a>
    62 <li><a href="http://gene.rutgers.edu/cgi-bin/library">Rutgers University</a>
    63 <li><a href="http://csdl1.mdx.ac.uk/">Middlesex University</a>
    64 <li><a href="http://laraine.unidata.ucar.edu/projects/coohl/htdig/cgi-bin/library">Unidata</a>
    65 </ul>
    66 <h4>The research</h4>
    67 
    68 <p> The goal of our research program is to explore the potential of
    69 internet-based digital libraries.  Our vision is to develop systems that
    70 automatically impose structure on anarchic, uncatalogued, distributed
    71 repositories of information, thereby providing information consumers with
    72 effective tools to locate what they need and to peruse it conveniently and
    73 comfortably.  Our research objectives are to
    74 
    75 <ul>
    76   <li> develop technology for creating and automatically
    77       maintaining collections;
    78   <li> monitor usage to study library users' needs;
    79   <li> look at novel interfaces that cater to a wide spectrum of users;
    80   <li> find ways to abstract layout and bibliographic information
    81       from document files;
    82   <li> use this information to enhance presentation and for
    83       bibliometric research;
    84   <li> assess potential subject areas for public-domain collections;
    85   <li> survey and critique other digital library projects.
    86 </ul>
     31<p>
     32The Greenstone Digital Library software provides a new way of
     33organizing information and making it available over the Internet or on
     34CD-ROM. It is open-source software, available under the terms of the
     35GNU General Public License.
     36<p>
     37A digital library is made up of a set of collections. Each collection of
     38information comprises several (typically several thousand, or even
     39several million) documents, which share a uniform searching and
     40browsing interface. Collections can be organized in many different
     41ways while retaining a strong family resemblance.
     42<p>
     43The
     44<a href="http://www.nzdl.org">New Zealand Digital Library Project</a>
     45is a research programme at The University of Waikato whose aim is to
     46develop the underlying technology for digital libraries and make it
     47available publicly so that others can use it to create their own
     48collections.
     49Greenstone was created to further this objective.
     50Further details are available from 
     51<a href="http://www.nzdl.org">http://www.nzdl.org</a>
    8752}
    8853
    89 _textotherinfo_ {
    90 <table border=0 cellpadding=5><tr valign=top>
    91 <td width=50%>
    92 <h4>Global Help Projects vzw</h4>
     54_textpoem_ {
     55<br><h2>Kia papapounamu te moana</h2>
    9356
    94 <a href="http://www.globalprojects.org">Global Help Projects</a> is a
    95 registered charity responsible for the Humanity Libraries Project that
    96 provides universal low-cost information access through co-operation between
    97 UN Agencies, universities and NGOs.  Global Help Projects collaborate
    98 extensively with the NZDL project, and use the Greenstone software.
    99 </td><td width=50%>
     57<p>kia hora te marino,
     58<br>kia tere te karohirohi,
     59<br>kia papapounamu te moana
    10060
    101 <h4>DigiLib Systems Limited</h4>
    102 
    103 <p><a href="http://www.digilibs.com/">DigiLib Systems Limited</a> is an
    104 innovative software company that creates international digital libraries.
    105 As a major contributor to the Greenstone Digital Library Software they are
    106 able to build, customize, and extend digital libraries to meet exacting
    107 needs.  Please <a href="mailto:[email protected]">contact</a> them for
    108 an obligation free quote.  </td></tr></table>}
    109 
    110 _textpoem_ {
    111 <br><h2 align=left>Kia papapounamu te moana</h2>
    112 
    113         <p>kia hora te marino,
    114         <br>kia tere te karohirohi,
    115         <br>kia papapounamu te moana
    116 
    117         <p>may peace and calmness surround you,
    118         <br>may you reside in the warmth of a summer's haze,
    119         <br>may the ocean of your travels be as smooth as the polished greenstone.
     61<p>may peace and calmness surround you,
     62<br>may you reside in the warmth of a summer's haze,
     63<br>may the ocean of your travels be as smooth as the polished greenstone.
    12064}
    12165
    122 _textgreenstone_ { <p><br> Greenstone is a semi-precious stone that (like
    123 this software) is sourced in New Zealand.  In traditional Maori society it
    124 was the most highly prized and sought after of all substances.  It can
    125 absorb and hold <i>wairua</i>, which is a spirit or life force, and is
    126 endowed with traditional virtues that make it an appropriate emblem for a
     66_textgreenstone_ {
     67<p>Greenstone is a semi-precious stone that (like this software) is sourced in New Zealand.  In traditional Maori society it was the most highly prized and sought after of all substances.  It can absorb and hold <i>wairua</i>, which is a spirit or life force, and is endowed with traditional virtues that make it an appropriate emblem for a
    12768public-domain digital library project.  Its lustre shows charity; its
    12869translucence, honesty; its toughness, courage; and the sharp edge it can
     
    13475symbolizing the leading edge of technology.
    13576
    136 <p><a href="mailto:[email protected]">Greenstone Digital Library Software</a>
    137 <br><a href="http://www.cs.waikato.ac.nz/cs">Computer Science Department</a>,
    138 <a href="http://www.waikato.ac.nz">University of Waikato</a>, New Zealand
    139 
    140 <br>October 1999
    14177}
    14278
     
    15591_widthselcolgr_ {537}
    15692_heightselcolgr_ {17}
    157 
    158 
    159 
    160 ######################################################################
    161 # 'people' page
    162 package people
    163 ######################################################################
    164 
    165 
    166 #------------------------------------------------------------
    167 # text macros
    168 #------------------------------------------------------------
    169 
    170 _textpagetitle_ {NZDL: People}
    171 
    172 _textsmallrs_ {Related Staff}
    173 
    174 _textsawnzdl_ {Staff associated with the New Zealand Digital Library
    175     project are:}
    176 _texttrsaawp_ {These research students and software support people are associated with the project:}
    177 
    178 _textstafftable_ {
    179 <table>
    180 <tr>
    181   <td align=right valign=top width=100><a href="http://www.cs.waikato.ac.nz/~ihw">Ian Witten</a></td>
    182   <td _1_ align=left valign=top>Project leader; co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a></td>
    183 </tr>
    184 <tr>
    185   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/mark-d.-apperley-.html">Mark Apperley</a></td>
    186   <td _1_ align=left valign=top>User interfaces for readers</td>
    187 </tr>
    188 <tr>
    189   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/david-bainbridge.html">David Bainbridge</a></td>
    190   <td _1_ align=left valign=top>Musical and Web-based collections; optical music recognition</td>
    191 </tr>
    192 <tr>
    193   <td align=right valign=top><a
    194 href="http://www.cs.waikato.ac.nz/cs/Staff/sally-jo-cunningham.html">Sally Jo Cunningham</a></td>
    195   <td _1_ align=left valign=top>Collections and usage studies</td>
    196 </tr>
    197 <tr>
    198   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/steve-jones.html">Steve Jones</a></td>
    199   <td _1_ align=left valign=top>Phrase-based interfaces, collaborative browsing, usage analysis</td>
    200 </tr>
    201 <tr>
    202   <td align=right valign=top><a
    203   href="http://www.cs.waikato.ac.nz/cs/Staff/te-taka-keegan.html">Te Taka Keegan
    204   </a></td>
    205   <td _1_ align=left valign=top>Maori language systems</td>
    206 </tr>
    207   <td align=right valign=top><a
    208   href="http://www.cs.waikato.ac.nz/Staff/malika-mahoui.html">Malika
    209   Mahoui</a></td>
    210   <td _1_ align=left valign=top>Text mining, Arabic interfaces</td>
    211 </tr>
    212 </table>
    213 }
    214 
    215 _textgstable_ {
    216 <table>
    217 <tr>
    218   <td align=right valign=top>George Buchanan</a></td>
    219   <td _1_ align=left valign=top>Systems support</td>
    220 </tr>
    221 <tr>
    222   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~sjboddie">Stefan Boddie</a></td>
    223   <td _1_ align=left valign=top>Systems support</td>
    224 </tr>
    225 <tr>
    226   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~rjmcnab">Rodger McNab</a></td>
    227   <td _1_ align=left valign=top>Systems support</td>
    228 </tr>
    229 <tr>
    230   <td align=right valign=top>YingYing Wen</a></td>
    231   <td _1_ align=left valign=top>Text mining, Chinese libraries</td>
    232 </tr>
    233 <tr>
    234   <td align=right valign=top>Stuart Yeates</a></td>
    235   <td _1_ align=left valign=top>Text mining, acronym extraction</td>
    236 </tr>
    237 </table>
    238 }
    239 
    240 _textsmallcont_ {Other Contributors}
    241 _texttpcsp_ {These people have contributed strongly to the project:}
    242 
    243 _textconttable_ {
    244 <table>
    245 <tr>
    246   <td align=right valign=top width=100>Mark Abrahams</td>
    247   <td _1_ align=left valign=top>Client-side browsing interfaces using Java</td>
    248 </tr>
    249 <tr>
    250   <td align=right valign=top width=100><a href="http://www.cosc.canterbury.ac.nz/~tim">Tim Bell</a></td>
    251   <td _1_ align=left valign=top>Co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a></td>
    252 </tr>
    253 <tr>
    254   <td align=right valign=top>Matt Humphrey</td>
    255   <td _1_ align=left valign=top>Information visualization in the digital library</td>
    256 </tr>
    257 <tr>
    258   <td align=right valign=top><a
    259 href="http://www.cs.waikato.ac.nz/~singlis">Stuart Inglis</a></td>
    260   <td _1_ align=left valign=top>Document image analysis and optical character recognition</td>
    261 </tr>
    262 <tr>
    263   <td align=right valign=top>Trent Mankelow</a></td>
    264   <td _1_ align=left valign=top>School Journal prototype</td>
    265 </tr>
    266 <tr>
    267   <td align=right valign=top><a href="http://www.cosc.canterbury.ac.nz/~bruce">Bruce McKenzie</a></td>
    268   <td _1_ align=left valign=top>Original interface to MG</td>
    269 </tr>
    270 <tr>
    271   <td align=right valign=top><a href="http://www.cs.mu.oz.au/~alistair">Alistair Moffat</a></td>
    272   <td _1_ align=left valign=top>Co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a>,
    273 created the MG software</td>
    274 </tr>
    275 <tr>
    276   <td align=right valign=top>Todd Reed</td>
    277   <td _1_ align=left valign=top>PostScript to text conversion, user interface, WWW server, index building, FTP</td>
    278 </tr>
    279 <tr>
    280   <td align=right valign=top><a
    281 href="http://www.cs.waikato.ac.nz/cs/Staff/don-a.-smith.html">Don Smith</a></td>
    282   <td _1_ align=left valign=top>Special needs of libraries for mathematical and theoretical materials</td>
    283 </tr>
    284 <tr>
    285   <td align=right valign=top>Che Tamahori</td>
    286   <td _1_ align=left valign=top>Designer of New Zealand Digital Library Web
    287   pages</td>
    288 </tr>
    289 <tr>
    290   <td align=right valign=top><a
    291 href="http://www.cs.waikato.ac.nz/~wjt">Bill Teahan</a></td>
    292   <td _1_ align=left valign=top>Language modeling</td>
    293 </tr>
    294 <tr>
    295   <td align=right valign=top>Mahendra Vallabh</td>
    296   <td _1_ align=left valign=top>Original FTP script</td>
    297 </tr>
    298 <tr>
    299   <td align=right valign=top><a
    300 href="http://www.cs.waikato.ac.nz/cs/Staff/lloyd-a.-smith.html">Lloyd Smith</a></td>
    301   <td _1_ align=left valign=top>Music collections and music retrieval</td>
    302 </tr>
    303 <tr>
    304   <td align=right valign=top>John Venable</td>
    305   <td _1_ align=left valign=top>Requirements for digital libraries, and collections for information systems</td>
    306 </tr>
    307 </table>
    308 }
    309 
    310 _textaffiliates_ {There are several affiliates at other universities:}
    311 
    312 _textaffiliatetable_ {
    313 <table>
    314 <tr>
    315   <td align=right valign=top>Elke Duenker</td>
    316   <td _1_ align=left valign=top>Cross-cultural issues</td>
    317 </tr>
    318 <tr>
    319   <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~cgn/
    320 ">Craig Nevill-Manning</a></td>
    321   <td _1_ align=left valign=top>PostScript to text conversion, user
    322 interface, WWW server, index
    323 building, FTP</td>
    324 </tr>
    325 <tr>
    326   <td align=right valign=top>Nina Reeves</td>
    327   <td _1_ align=left valign=top>Librarians and library users</td>
    328 </tr>
    329 <tr>
    330   <td align=right valign=top>Yin Leng Theng</td>
    331   <td _1_ align=left valign=top>Digital libraries for schools</td>
    332 </tr>
    333 <tr>
    334   <td align=right valign=top>Harold Thimbleby</td>
    335   <td _1_ align=left valign=top>User interfaces for digital
    336 libraries</td>
    337 </tr>
    338 </table>
    339 }
    340 
    341 
    342 #------------------------------------------------------------
    343 # icons
    344 #------------------------------------------------------------
    345 
    346 ## "people" ## green_title ## h_people ##
    347 _httpiconhpeople_ {_httpimg_/h\_people.gif}
    348 _widthhpeople_ {200}
    349 _heighthpeople_ {57}
    350 
    351 
    352 
    353 ######################################################################
    354 # 'technology' page
    355 package technology
    356 ######################################################################
    357 
    358 
    359 #------------------------------------------------------------
    360 # text macros
    361 #------------------------------------------------------------
    362 
    363 _textpagetitle_ {NZDL: Technology}
    364 
    365 _content_ {
    366 _iconblankbar_
    367 <p>There are several freely available technologies underlying the New Zealand
    368 Digital Library:
    369 <ul>
    370 <li><a href="_httppagex_(gsdlsoft)"><i>Greenstone</i></a>, the digital
    371 library system that generates each and every page of this website.<p>
    372 
    373 <li><a href="_httppagex_(prescript)"><i>PreScript</i></a>, a system
    374 that converts PostScript to plain ASCII or HTML, detects paragraph boundaries,
    375 removes hyphenation, and interprets many ligatures.<p>
    376 
    377 <li><a href="_httppagex_(mg)"><i>MG</i></a>, an enhancement of the <a
    378 href="http://www.cs.mu.oz.au/mg"><i>Managing Gigabytes</i></a> full-text
    379 retrieval system, that provides flexible stemming methods, weighting terms,
    380 term frequencies, merged indexes, machine independent indexes, and a port to
    381 MSDOS.<p>
    382 
    383 <li><a href="http://www.cs.waikato.ac.nz/sequitur"><i>Sequitur</i></a>, a
    384 method for inferring compositional hierarchies from strings by detecting
    385 repetition and factoring it out of the string by  forming rules in a
    386 grammar. The rules can be composed of non-terminals, giving rise to a
    387 hierarchy. Sequitur is useful for  recognizing lexical structure in strings,
    388 and excels at very long sequences.<p>
    389 
    390 <li><a href="http://www.nzdl.org/Kea"><i>Kea</i></a>, a program for
    391 automatically extracting keyphrases from the full text of documents. Candidate
    392 keyphrases are identified using rudimentary lexical processing, features are
    393 computed for each candidate, and machine learning is used to generate a
    394 classifier that determines which candidates should be assigned as
    395 keyphrases. <p>
    396 
    397 <li><a href="http://www.cs.waikato.ac.nz/~stevej/Research/Phrasier/"><i>Phrasier</i></a>, a
    398 tool to support information seeking activities in a digital library.  Its novel design
    399 reflects the fact that reading, writing, browsing and searching activities are rarely
    400 carried out independently of each other.  They overlap and interleave in ways which have
    401 not been effectively supported by conventional information retrieval interfaces.  Consequenly
    402 Phrasier blurs the distinction between writing a document and finding material related to it;
    403 between reading a document and finding others on the same or similar topics; between keyword
    404 searching and subject browsing. <p>
    405 
    406 </ul>
    407 
    408 <br>
    409 }
    410 
    411 
    412 #------------------------------------------------------------
    413 # icons
    414 #------------------------------------------------------------
    415 
    416 ## "technology" ## green_title ## h_tech ##
    417 _httpiconhtech_ {_httpimg_/h\_tech.gif}
    418 _widthhtech_ {200}
    419 _heighthtech_ {57}
    420 
    421 
    422 
    423 ######################################################################
    424 # 'status' pages
    425 package status
    426 ######################################################################
    427 
    428 
    429 #------------------------------------------------------------
    430 # text macros
    431 #------------------------------------------------------------
    432 
    433 _textframebrowser_ {You must have a frame enabled browser to view this.}
    434 _textusermanage_ {User management}
    435 _textlistusers_ {list users}
    436 _textaddusers_ {add a new user}
    437 
    438 _textinfo_ {Information}
    439 _textgeneral_ {general}
    440 _textarguments_ {arguments}
    441 _textactions_ {actions}
    442 _textprotocols_ {protocols}
    443 
    444 _textcollections_ {Collections}
    445 _textnewcoll_ {new collection}
    446 _texteditcoll_ {edit collection}
    447 _textbuildcoll_ {build collection}
    448 _textdeletecoll_ {delete collection}
    449 
    450 _textlogs_ {Logs}
    451 _textinitlog_ {init log}
    452 _texterrorlog_ {error log}
    453 
    454 _textreturnhome_ {Return to home page}
    455 
    456 _titlewelcome_ { Maintenance and Administration }
    457 
    458 _welcome_ {
    459 
    460 <p> Maintenance and administration services available include:
    461     view on-line logs;
    462     create, maintain and update collections;
    463     and access technical information such as CGI arguments. 
    464     These services are accessed using the
    465     side navigation bar on the lefthand side of the page.
    466 }
    467 
    468 
    469 #------------------------------------------------------------
    470 # icons
    471 #------------------------------------------------------------
    472 
    473 
    474 
    475 ######################################################################
    476 # html package
    477 package html
    478 ######################################################################
    479 
    480 
    481 #------------------------------------------------------------
    482 # text macros
    483 #------------------------------------------------------------
    484 
    485 _textframebrowser_ {You must have a frame enabled browser to view this.}
    486 
    487 
    488 #------------------------------------------------------------
    489 # icons
    490 #------------------------------------------------------------
     93_altselcolgr_ {"Select a collection"}
    49194
    49295
  • branches/New_Config_Format-branch/gsdl/macros/german.dm

    r1071 r1279  
    380380
    381381## "EXPANDIEREN
    382   DES TEXTS" ## document_button ## eallt ##
     382#  DES TEXTS" ## document_button ## eallt ##
    383383_httpiconealltof_ [l=de] {_httpimg_/de/ealltof.gif}
    384384_httpiconeallton_ [l=de] {_httpimg_/de/eallton.gif}
     
    387387
    388388## "KONTRAKTIONDES
    389         INHALTS" ## document_button ## econc ##
     389#        INHALTS" ## document_button ## econc ##
    390390_httpiconeconcof_ [l=de] {_httpimg_/de/econcof.gif}
    391391_httpiconeconcon_ [l=de] {_httpimg_/de/econcon.gif}
     
    394394
    395395## "SEPARATES
    396   FENSTER" ## document_button ## edtch ##
     396#  FENSTER" ## document_button ## edtch ##
    397397_httpiconedtchof_ [l=de] {_httpimg_/de/edtchof.gif}
    398398_httpiconedtchon_ [l=de] {_httpimg_/de/edtchon.gif}
     
    401401
    402402## "EXPANDIEREN
    403 DES INHALTS" ## document_button ## eexpc ##
     403#DES INHALTS" ## document_button ## eexpc ##
    404404_httpiconeexpcof_ [l=de] {_httpimg_/de/eexpcof.gif}
    405405_httpiconeexpcon_ [l=de] {_httpimg_/de/eexpcon.gif}
     
    408408
    409409## "KONTRAKTION
    410    DES TEXTS" ## document_button ## etsec ##
     410#   DES TEXTS" ## document_button ## etsec ##
    411411_httpiconetsecof_ [l=de] {_httpimg_/de/etsecof.gif}
    412412_httpiconetsecon_ [l=de] {_httpimg_/de/etsecon.gif}
     
    421421
    422422## "         KEINE
    423 HERVORHEBUNG" ## document_button ## enhl ##
     423#HERVORHEBUNG" ## document_button ## enhl ##
    424424_httpiconenhlof_ [l=de] {_httpimg_/de/enhlof.gif}
    425425_httpiconenhlon_ [l=de] {_httpimg_/de/enhlon.gif}
     
    508508## "sichtung" ## hand_made ##
    509509_httpicondisplay_ [l=de] {_httpimg_/display.gif}
    510 _widthdisplay {60}
     510_widthdisplay_ [l=de] {60}
    511511_heightdisplay_ [l=de] {20}
    512512
  • branches/New_Config_Format-branch/gsdl/macros/gsdlsoft.dm

    r1049 r1279  
    2525_iconblankbar_
    2626
    27 <h4>The Greenstone software</h4>
     27<h2>The Greenstone software</h2>
    2828
    29 <p> The Greenstone Digital Library software provides a new way of
    30 organizing information and making it available over the Internet.  A
    31 <i>collection</i> of information comprises several (typically several
    32 thousand, or even several million) <i>documents</i>, which share a uniform
    33 searching and browsing interface.  The collections in a library are
    34 organized in a different way--though they share a strong family
    35 resemblance.  Although primarily designed for access over the Web,
    36 Greenstone collections can be made available, in precisely the same form,
    37 on CD-ROM for standalone PCs.  Greenstone is open-source software,
    38 available under the terms of the Gnu public license.  Documentation is
    39 available in the form of <i><a
    40 href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">The Greenstone
    41 Digital Library Software</a></i> manual.
     29<p>
     30This page explains how to download and install the
     31<a href="_httppagex_(gsdl)">Greenstone Digital Library software</a>.
     32<p>
     33Greenstone is open-source software.
     34It is distributed under the terms of the
     35<a href="http://www.gnu.org/copyleft/gpl.html">GNU General Public License</a>.
     36Documentation is available in the form of
     37<i><a href="_gsdl:httpgsdlmanual_">The Greenstone Digital Library Software manual</a></i>.
     38
     39<h4>Downloading Greenstone</h4>
     40<p>
     41<a
     42href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13.tar.gz</i></a>
     43<i>(4.6 Mb)</i> contains the latest distribution of Greenstone.
     44<p>
     45<a href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">gsdl_manual.pdf</a></i> (570 Kb) contains the manual in PDF format.
    4246
    4347<h4>Installing Greenstone</h4>
     
    4549<p> To install on unix systems:
    4650
    47 <ul> <li>Download the <a
    48 href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13</i></a>
    49 distribution <i>(4.6 Mb)</i>.  <li>Extract the gzipped tar archive <i>(tar
    50 xvzf gsdl-2.13.tar.gz)</i>.  <li>In the resulting gsdl directory type
    51 <i>./configure</i>. When the configure script has finished running type
    52 <i>make</i>, then <i>make install</i>.  <li><i>make install</i> copies the
     51<ul>
     52<li>Download the
     53<a href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13</i></a>
     54distribution <i>(4.6 Mb)</i>. 
     55<li>Extract the gzipped tar archive <i>(tar xvzf gsdl-2.13.tar.gz)</i>. 
     56<li>In the resulting gsdl directory type <i>./configure</i>.
     57When the configure script has finished running type <i>make</i>,
     58then <i>make install</i>.
     59<li><i>make install</i> copies the
    5360compiled executable file into the gsdl/cgi-bin directory by default. To run
    5461the library as a cgi script move the executable and the gsdlsite.cfg
    55 configuration file to your systems cgi-bin directory.  <li>The gsdlsite.cfg
     62configuration file to your systems cgi-bin directory.
     63<li>The gsdlsite.cfg
    5664configuration file must be edited to suit your site. The gsdlhome entry
    5765will need to be set to point to your gsdl directory. Other fields that may
    5866need changing are httpprefix (the web path to the gsdl directory) and
    59 httpimg (the path to gsdl/images).  <li>To build the demonstration
     67httpimg (the path to gsdl/images).
     68<li>To build the demonstration
    6069collection that comes with the distribution run the builddemo.sh script
    61 from within the gsdl directory.  <li>For more information on using the
     70from within the gsdl directory.
     71<li>For more information on using the
    6272Greenstone software, download <i><a
    6373href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">The Greenstone
     
    105115Please report bugs or installation problems to <a href="mailto:[email protected]">[email protected]</a>
    106116
     117_nzdlpagefooter_
     118<br>April 2000
    107119}
  • branches/New_Config_Format-branch/gsdl/macros/home.dm

    r1049 r1279  
    11package home
    2 
    3 _httpmusiclibrary_ {http://nzdl2.cs.waikato.ac.nz/cgi-bin/gwmm?c=meldex&a=page&p=coltitle}
    42
    53#######################################################################
     
    1412_javalinks_ [v=1] {}
    1513
    16 
    17 
    1814#######################################################################
    1915# icons
    2016#######################################################################
    2117
    22 _iconnzdl_ {
    23 <img src="_httpiconnzdl_" width=_widthnzdl_ height=_heightnzdl_>}
     18_iconnzdl_ {<img src="_httpiconnzdl_" width=_widthnzdl_ height=_heightnzdl_ alt="The New Zealand Digital Library">}
    2419_icongbull_ {<img src="_httpicongbull_">}
    2520_iconpdf_ {<img src="_httpiconpdf_">}
    26 _iconselectcollection_ {<img src="_httpiconselcolgr_" width=_widthselcolgr_ height=_heightselcolgr_>}
     21_iconselectcollection_ {<img src="_httpiconselcolgr_" width=_widthselcolgr_ height=_heightselcolgr_ alt=_altselcolgr_>}
    2722_iconmusiclibrary_ {<img src="_httpicontmusic_" border=1 alt="meldex music library">}
    2823
     
    4540_pagetitle_ {_textpagetitle_}
    4641_imagethispage_ {}
    47 _imagecollection_ {<center>_iconnzdl_</center>}
     42_imagecollection_ { }
    4843
    4944_content_ {
    50 <center>_iconselectcollection_</center><br>
    51 _homeextra_
    52 <center>_iconblankbar_</center>
     45<center>
     46<p>_iconselectcollection_
     47</center>
     48<p>_homeextra_
     49
     50<center>
     51<p>_iconblankbar_
     52</center>
     53<p><center><h2>_textprojhead_</h2></center>
     54
     55<table border=0 cellpadding=5>
     56
     57<tr valign=top>
     58<td>
     59<p>_icongbull_ <a href="mailto:_mailaddr_">_textfb_</a>
     60<p>_icongbull_ <a href="http://www.nzdl.org">NZDL</a>
     61<p>_icongbull_ <a href="_httppagex_(gsdl)">Greenstone</a>
     62<p>_iconpdf_   <a href="_gsdl:httpgsdlmanual_">Manual</a></i>
     63<p>_iconpdf_   <a href="_httppublications_/NZDLtext.pdf">_textinfosheet_</a>
     64<p><a href="_httppagestatus_"><img src="_httpimg_/tabspace.gif" width=60 height=20 border=0></a>
     65</td>
     66<td>_textprojinfo_</td>
     67</tr></table>
     68
     69<p>_iconblankbar_
     70<table>
     71<tr valign=top>
     72<td>_textpoem_</td>
     73<td>_imagegreenstone_</td>
     74</tr></table>
     75<p>_textgreenstone_
    5376
    5477<p>
    55 <table>
    56 <tr><td colspan=2><center><h2>_textprojhead_</h2></center></td></tr>
    57 <tr valign=top><td>
    58 
    59 <table border=0 cellpadding=5>
    60 <tr valign=top><td rowspan=10 width=32> </td>
    61 <tr valign=top><td>_icongbull_</td><td><a href="mailto:_mailaddr_">_textfb_</a></td></tr>
    62 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(people)">_textpeople_</a><br></td></tr>
    63 <tr valign=top><td>_icongbull_</td><td><a href="http://www.cs.waikato.ac.nz/~nzdl/publications/">_textpub_</a><br></td></tr>
    64 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(rw)">_textrw_</a><br></td></tr>
    65 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(technology)">_texttec_</a><br></td></tr>
    66 <tr valign=top><td>_iconpdf_</td><td><a
    67 href="http://www.nzdl.org/download/greenstone/publications/NZDLtext.pdf">_textinfosheet_</a></td></tr>
    68 <tr valign=top><td>_iconpdf_</td><td><a
    69 href="http://www.nzdl.org/download/greenstone/publications/NZDLpictures.pdf">_textscreenshots_</a></td></tr>
    70 <tr valign=top><td>_iconpdf_</td><td><a
    71 href="http://www.nzdl.org/download/greenstone/publications/Niupepa.pdf">_textnpepainfosheet_</a></td></tr>
    72 <tr valign=top><td colspan=2><a href="_httppagestatus_"><img src="_httpimg_/tabspace.gif" width=60 height=20 border=0></a></td></tr>
    73 
    74 </table>
    75 
    76 </td>
    77 <td>
    78 
    79 <table border=0 cellpadding=5>
    80 <tr><td>
    81 _textprojinfo_
    82 </td></tr></table>
    83 </td></tr></table>
    84 _textotherinfo_
    85 _iconblankbar_
    86 <table><tr valign=top>
    87 <td>_textpoem_</td>
    88 <td>_imagegreenstone_</td>
    89 </tr>
    90 <tr><td colspan=2>_textgreenstone_</td>
    91 </tr>
    92 </table>
     78<p>_iconblankbar_
     79<p>Greenstone software by <a href="http://www.nzdl.org">The New Zealand Digital Library Project</a>,
     80<br><a href="http://www.cs.waikato.ac.nz/cs">Department of Computer Science</a>,
     81<a href="http://www.waikato.ac.nz">University of Waikato</a>,
     82New Zealand
    9383}
    9484
    95 
    96 #######################################################################
    97 # English language text macros
    98 #######################################################################
    99 
    100 # moved to english.dm
    101 
    102 
    103 
    104 
    105 
    106 
    107 
  • branches/New_Config_Format-branch/gsdl/macros/maori.dm

    r1057 r1279  
    374374
    375375######################################################################
    376 # 'prefereces' page
     376# 'preferences' page
    377377package preferences
    378378######################################################################
     
    406406
    407407
    408 ######################################################################
    409 # 'people' page
    410 package people
    411 ######################################################################
    412 
    413 #------------------------------------------------------------
    414 # text macros
    415 #------------------------------------------------------------
    416 
    417 
    418 #------------------------------------------------------------
    419 # icons
    420 #------------------------------------------------------------
    421 
    422 
    423 ######################################################################
    424 # 'technology' page
    425 package technology
    426 ######################################################################
    427 
    428 #------------------------------------------------------------
    429 # text macros
    430 #------------------------------------------------------------
    431 
    432 _textpagetitle_ {NZDL: Technology}
    433 
    434 
    435 #------------------------------------------------------------
    436 # icons
    437 #------------------------------------------------------------
    438 
    439 
    440 ######################################################################
    441 # 'status' pages
    442 package status
    443 ######################################################################
    444 
    445 #------------------------------------------------------------
    446 # text macros
    447 #------------------------------------------------------------
    448 
    449 
    450 #------------------------------------------------------------
    451 # icons
    452 #------------------------------------------------------------
    453 
    454 
    455 ######################################################################
    456 # html package
    457 package html
    458 ######################################################################
    459 
    460 #------------------------------------------------------------
    461 # text macros
    462 #------------------------------------------------------------
    463 
    464 
    465 #------------------------------------------------------------
    466 # icons
    467 #------------------------------------------------------------
    468 
    469 
    470 ######################################################################
    471 # external link package
    472 package extlink
    473 ######################################################################
    474 
    475 #------------------------------------------------------------
    476 # text macros
    477 #------------------------------------------------------------
    478 
    479 
    480 #------------------------------------------------------------
    481 # icons
    482 #------------------------------------------------------------
    483 
    484 
    485 ######################################################################
    486 # authentication page
    487 package authen
    488 ######################################################################
    489 
    490 #------------------------------------------------------------
    491 # text macros
    492 #------------------------------------------------------------
    493 
    494 
    495 #------------------------------------------------------------
    496 # icons
    497 #------------------------------------------------------------
    498 
    499 
    500 
     408
     409
  • branches/New_Config_Format-branch/gsdl/macros/pref.dm

    r1032 r1279  
    253253}
    254254
     255_caseoption_ [l=ar] {}
     256
    255257_fcoption_ {
    256258<input type=checkbox name=fc onClick="updatefc();"_If_("_cgiargfc_" eq "1", checked)>
     
    327329_enlanguageoption_ {<option value="en"_If_("_cgiargl_",, selected)_If_("_cgiargl_" eq "en", selected)>_textlangeng_}
    328330_delanguageoption_ {<option value="de"_If_("_cgiargl_" eq "de", selected)>_textlanggerman_}
     331_frlanguageoption_ {<option value="fr"_If_("_cgiargl_" eq "fr", selected)>_textlangfrench_}
     332_eslanguageoption_ {<option value="es"_If_("_cgiargl_" eq "es", selected)>_textlangspanish_}
    329333_milanguageoption_ {<option value="mi"_If_("_cgiargl_" eq "mi", selected)>_textlangmaori_}
    330334_zhlanguageoption_ {<option value="zh"_If_("_cgiargl_" eq "zh", selected)>_textlangchinese_}
     335_arlanguageoption_ {<option value="ar"_If_("_cgiargl_" eq "ar", selected)>_textlangarabic_}
    331336
    332337_encodingoption_ {
     
    336341  <option value="u" _If_("_cgiargw_" eq "u",selected)>UTF-8
    337342  <option value="g" _If_("_cgiargw_" eq "g",selected)>GBK
     343  <option value="a" _If_("_cgiargw_" eq "a",selected)>Arabic (windows 1256)
    338344</select>
    339345}
  • branches/New_Config_Format-branch/gsdl/macros/prescrpt.dm

    r1049 r1279  
    4242_content_ {
    4343_iconblankbar_
    44 <p><i>PreScript</i> offers:
     44<p>
     45<i>PreScript</i> is a utility for extracting text from PostScfript files.
     46PreScript offers:
    4547
    4648<dl>
  • branches/New_Config_Format-branch/gsdl/macros/query.dm

    r964 r1279  
    3131# don't want alt text here
    3232_iconnext_{<img src="_httpiconmore_" width=_widthmore_ height=_heightmore_ border=0 align=top>}
     33_iconnext_ [v=1] {}
    3334_iconprev_{<img src="_httpiconless_" width=_widthless_ height=_heightless_ border=0 align=top>}
     35_iconprev_ [v=1] {}
    3436
    3537#######################################################################
  • branches/New_Config_Format-branch/gsdl/macros/status.dm

    r931 r1279  
    2222<table width=100%>
    2323  <tr>
    24     <td><img src="/gsdl/images/spacer.gif" width=2 height=77></td>
     24    <td><img src="_httpimg_/spacer.gif" width=2 height=77></td>
    2525    <td>_iconblankbar_
    2626        <center><h2>_1_</h2></center>
     
    5757_select_ {
    5858<a href="_httppagehome_" target=_top border=0><img src="_httpimg_/gsdl.gif"></a>
     59<p>
     60<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=welcome" target=infoframe>_textadminhome_</a>
     61<a href="_httppagehome_" target=\_top>_textreturnhome_</a>
     62
    5963
    6064_If_("_cgiarguma_" ne "\_cgiarguma\_",
     
    6872<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=argumentinfo" target=infoframe>_textarguments_</a><br>
    6973<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=actioninfo" target=infoframe>_textactions_</a><br>
     74<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=browserinfo" target=infoframe>_textbrowsers_</a><br>
    7075<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=protocolinfo" target=infoframe>_textprotocols_</a>
    7176
     
    8085<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=initlog" target=infoframe>_textinitlog_</a><br>
    8186<a href="_gwcgi_?e=_compressedoptions_&a=status&sp=errorlog" target=infoframe>_texterrorlog_</a>
    82 
    83 <p>
    84 <a href="_httppagehome_" target=_top>_textreturnhome_</a>
    8587}
    8688
    8789
     90_textframebrowser_ {You must have a frame enabled browser to view this.}
     91_textusermanage_ {User management}
     92_textlistusers_ {list users}
     93_textaddusers_ {add a new user}
     94
     95_textinfo_ {Information}
     96_textgeneral_ {general}
     97_textarguments_ {arguments}
     98_textactions_ {actions}
     99_textbrowsers_ {browsers}
     100_textprotocols_ {protocols}
     101
     102_textcollections_ {Collection management}
     103_textnewcoll_ {create new collection}
     104_texteditcoll_ {edit collection}
     105_textbuildcoll_ {build collection}
     106_textdeletecoll_ {delete collection}
     107
     108_textlogs_ {Logs}
     109_textinitlog_ {init log}
     110_texterrorlog_ {error log}
     111
     112_textadminhome_ {admin home}
     113_textreturnhome_ {Greenstone home}
     114
     115_titlewelcome_ { Maintenance and Administration }
     116
     117_welcome_ {
     118
     119<center>
     120<table width=_pagewidth_><tr><td>
     121Maintenance and administration services available include:
     122<ul>
     123<li>view on-line logs
     124<li>create, maintain and update collections
     125<li>access technical information such as CGI arguments
     126</ul>
     127These services are accessed using the side navigation bar on the
     128lefthand side of the page.
     129<p>
     130_iconblankbar_
     131</td></tr>
     132<tr><th align=left><br>Collection Status</th></tr>
     133<tr><td>
     134<font color=gray>
     135Collections will only appear as &quot;running&quot; if their build.cfg
     136files exist, are readable, contain a valid builddate field (i.e. > 0),
     137and are in the collection's index directory (i.e. NOT the building
     138directory).
     139<p>
     140click <i>abbrev.</i> for information on a collection
     141<br>
     142click <i>collection</i> to view a collection
     143</font>
     144</td></tr>
     145</table>
     146</center>
     147}
  • branches/New_Config_Format-branch/gsdl/macros/users.dm

    r876 r1279  
    8787</font></td></tr>
    8888<tr><td>comment</td><td colspan=2><input type="text" name="umc" value="_users:usersargc_" size=50></td></tr>
    89 <tr></td><td><td><input type="submit" name=beu value="submit"></td></tr>
     89<tr><td></td><td colspan=2><input type="submit" name=beu value="submit">
     90<input type="submit" name=uma value="cancel"></td></tr>
    9091</table>
    9192</form>
  • branches/New_Config_Format-branch/gsdl/packages/mg/lib/WIN32.MAK

    r1000 r1279  
    2525###########################################################################
    2626 
    27 GSDLHOME = d:\home\dl\gsdl
     27GSDLHOME = c:\gsdl
    2828
    2929AR = lib
     
    3232
    3333DEFS = -DQUIET -DHAVE_CONFIG_H -D__WIN32__ -D_LITTLE_ENDIAN
    34 INCLUDES = -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME)\packages\mg
     34INCLUDES = -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME)\packages\mg \
     35           -I$(GSDLHOME)
    3536
    3637COMPILE = $(CC) -c $(DEFS) $(INCLUDES)
  • branches/New_Config_Format-branch/gsdl/packages/mg/src/text/WIN32.MAK

    r1000 r1279  
    2525###########################################################################
    2626
    27 GSDLHOME = d:\home\dl\gsdl
     27GSDLHOME = c:\gsdl
    2828
    2929CC = cl
     
    3232       -D__WIN32__ -D_LITTLE_ENDIAN
    3333INCLUDES = -I$(GSDLHOME)\packages\mg\src\text -I$(GSDLHOME)\packages\mg \
    34        -I$(GSDLHOME)\packages\mg\lib
     34       -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME)
    3535LDFLAGS =
    3636
  • branches/New_Config_Format-branch/gsdl/packages/mg/src/text/bool_parser.c

    r531 r1279  
    1 #ifndef lint
    2 static char yysccsid[] = "@(#)yaccpar   1.9 (Berkeley) 02/21/93";
    3 #endif
    4 #define YYBYACC 1
    5 #define YYMAJOR 1
    6 #define YYMINOR 9
    7 #define yyclearin (yychar=(-1))
    8 #define yyerrok (yyerrflag=0)
    9 #define YYRECOVERING (yyerrflag!=0)
    10 #define YYPREFIX "yy"
    11 #line 25 "bool_parser.y"
     1
     2/*  A Bison parser, made from bool_parser.y
     3 by  GNU Bison version 1.27
     4  */
     5
     6#define YYBISON 1  /* Identify Bison output.  */
     7
     8#define TERM    257
     9
     10#line 24 "bool_parser.y"
     11
    1212 
    1313#include "sysfuncs.h"
     
    4848static u_long invf_ptr;
    4949static u_long invf_len;
    50 #line 65 "bool_parser.y"
     50
     51#line 66 "bool_parser.y"
    5152typedef union {
    5253  char *text;
    5354  bool_tree_node *node;
    5455} YYSTYPE;
    55 #line 55 "y.tab.c"
    56 #define TERM 257
    57 #define YYERRCODE 256
    58 short yylhs[] = {                                        -1,
    59     0,    1,    1,    1,    1,    2,    2,    3,    3,    3,
    60     4,    4,
    61 };
    62 short yylen[] = {                                         2,
    63     1,    1,    3,    1,    1,    1,    2,    3,    2,    1,
    64     3,    1,
    65 };
    66 short yydefred[] = {                                      0,
    67     2,    0,    4,    5,    0,    0,    6,   10,    0,    0,
    68     0,    7,    0,    9,    0,    3,    8,    0,
    69 };
    70 short yydgoto[] = {                                       6,
    71     7,    8,    9,   10,
    72 };
    73 short yysindex[] = {                                    -32,
    74     0,  -32,    0,    0,  -32,    0,    0,    0,  -33, -118,
    75   -37,    0,  -32,    0,  -32,    0,    0,  -33,
    76 };
    77 short yyrindex[] = {                                      0,
    78     0,    0,    0,    0,    0,    0,    0,    0,    2,   12,
    79     0,    0,    0,    0,    0,    0,    0,    3,
    80 };
    81 short yygindex[] = {                                      0,
    82     0,    6,   -2,   14,
    83 };
    84 #define YYTABLESIZE 225
    85 short yytable[] = {                                       5,
    86     5,   12,   11,   16,   13,   15,    2,    2,    3,    3,
    87    12,    1,   18,    0,   14,   11,    0,    0,   17,    0,
    88     0,    0,    0,   14,    0,    0,    0,    0,    0,    0,
    89     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    90     0,    0,   12,   11,    0,    0,    0,    0,    0,    0,
    91     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    92     0,    4,    4,    0,    0,    0,    0,    0,    0,    0,
    93     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    94     0,    0,    0,    0,    0,    0,   15,    0,    0,    0,
    95     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    96     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    97     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    98     0,    0,    0,    0,    0,   12,   11,    0,    0,    0,
    99     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    100     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    101     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    102     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    103     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    104     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    105     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    106     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    107     0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
    108     0,    0,    0,    1,    1,
    109 };
    110 short yycheck[] = {                                      33,
    111    33,    0,    0,   41,   38,  124,   40,   40,   42,   42,
    112     5,    0,   15,   -1,    9,    2,   -1,   -1,   13,   -1,
    113    -1,   -1,   -1,   18,   -1,   -1,   -1,   -1,   -1,   -1,
    114    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    115    -1,   -1,   41,   41,   -1,   -1,   -1,   -1,   -1,   -1,
    116    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    117    -1,   95,   95,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    118    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    119    -1,   -1,   -1,   -1,   -1,   -1,  124,   -1,   -1,   -1,
    120    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    121    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    122    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    123    -1,   -1,   -1,   -1,   -1,  124,  124,   -1,   -1,   -1,
    124    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    125    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    126    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    127    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    128    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    129    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    130    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    131    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    132    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
    133    -1,   -1,   -1,  257,  257,
    134 };
    135 #define YYFINAL 6
    136 #ifndef YYDEBUG
    137 #define YYDEBUG 0
    138 #endif
    139 #define YYMAXTOKEN 257
    140 #if YYDEBUG
    141 char *yyname[] = {
    142 "end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    143 "'!'",0,0,0,0,"'&'",0,"'('","')'","'*'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    144 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'_'",0,0,0,0,0,
    145 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    146 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    147 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    148 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    149 "TERM",
    150 };
    151 char *yyrule[] = {
    152 "$accept : query",
    153 "query : or",
    154 "term : TERM",
    155 "term : '(' or ')'",
    156 "term : '*'",
    157 "term : '_'",
    158 "not : term",
    159 "not : '!' not",
    160 "and : and '&' not",
    161 "and : and not",
    162 "and : not",
    163 "or : or '|' and",
    164 "or : and",
    165 };
    166 #endif
    167 #ifdef YYSTACKSIZE
     56#include <stdio.h>
     57
     58#ifndef __cplusplus
     59#ifndef __STDC__
     60#define const
     61#endif
     62#endif
     63
     64
     65
     66#define YYFINAL     20
     67#define YYFLAG      -32768
     68#define YYNTBASE    11
     69
     70#define YYTRANSLATE(x) ((unsigned)(x) <= 257 ? yytranslate[x] : 16)
     71
     72static const char yytranslate[] = {     0,
     73     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     74     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     75     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     76     2,     2,     8,     2,     2,     2,     2,     9,     2,     4,
     77     5,     6,     2,     2,     2,     2,     2,     2,     2,     2,
     78     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     79     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     80     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     81     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     82     2,     2,     2,     2,     7,     2,     2,     2,     2,     2,
     83     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     84     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     85     2,     2,     2,    10,     2,     2,     2,     2,     2,     2,
     86     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     87     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     88     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     89     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     90     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     91     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     92     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     93     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     94     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     95     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     96     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     97     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
     98     2,     2,     2,     2,     2,     1,     3
     99};
     100
     101#if YYDEBUG != 0
     102static const short yyprhs[] = {     0,
     103     0,     2,     4,     8,    10,    12,    14,    17,    21,    24,
     104    26,    30
     105};
     106
     107static const short yyrhs[] = {    15,
     108     0,     3,     0,     4,    15,     5,     0,     6,     0,     7,
     109     0,    12,     0,     8,    13,     0,    14,     9,    13,     0,
     110    14,    13,     0,    13,     0,    15,    10,    14,     0,    14,
     111     0
     112};
     113
     114#endif
     115
     116#if YYDEBUG != 0
     117static const short yyrline[] = { 0,
     118    76,    80,    81,    82,    83,    86,    87,    90,    91,    92,
     119    95,    96
     120};
     121#endif
     122
     123
     124#if YYDEBUG != 0 || defined (YYERROR_VERBOSE)
     125
     126static const char * const yytname[] = {   "$","error","$undefined.","TERM","'('",
     127"')'","'*'","'_'","'!'","'&'","'|'","query","term","not","and","or", NULL
     128};
     129#endif
     130
     131static const short yyr1[] = {     0,
     132    11,    12,    12,    12,    12,    13,    13,    14,    14,    14,
     133    15,    15
     134};
     135
     136static const short yyr2[] = {     0,
     137     1,     1,     3,     1,     1,     1,     2,     3,     2,     1,
     138     3,     1
     139};
     140
     141static const short yydefact[] = {     0,
     142     2,     0,     4,     5,     0,     6,    10,    12,     1,     0,
     143     7,     0,     9,     0,     3,     8,    11,     0,     0,     0
     144};
     145
     146static const short yydefgoto[] = {    18,
     147     6,     7,     8,     9
     148};
     149
     150static const short yypact[] = {    10,
     151-32768,    10,-32768,-32768,    10,-32768,-32768,     2,    -9,    14,
     152-32768,    10,-32768,    10,-32768,-32768,     2,     4,    15,-32768
     153};
     154
     155static const short yypgoto[] = {-32768,
     156-32768,    -5,   -12,    18
     157};
     158
     159
     160#define YYLAST      24
     161
     162
     163static const short yytable[] = {    11,
     164    14,    17,    13,    19,     1,     2,    16,     3,     4,     5,
     165    12,    13,     1,     2,    20,     3,     4,     5,    15,    10,
     166     0,     0,     0,    14
     167};
     168
     169static const short yycheck[] = {     5,
     170    10,    14,     8,     0,     3,     4,    12,     6,     7,     8,
     171     9,    17,     3,     4,     0,     6,     7,     8,     5,     2,
     172    -1,    -1,    -1,    10
     173};
     174/* -*-C-*-  Note some compilers choke on comments on `#line' lines.  */
     175#line 3 "/usr/share/bison.simple"
     176/* This file comes from bison-1.27.  */
     177
     178/* Skeleton output parser for bison,
     179   Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc.
     180
     181   This program is free software; you can redistribute it and/or modify
     182   it under the terms of the GNU General Public License as published by
     183   the Free Software Foundation; either version 2, or (at your option)
     184   any later version.
     185
     186   This program is distributed in the hope that it will be useful,
     187   but WITHOUT ANY WARRANTY; without even the implied warranty of
     188   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     189   GNU General Public License for more details.
     190
     191   You should have received a copy of the GNU General Public License
     192   along with this program; if not, write to the Free Software
     193   Foundation, Inc., 59 Temple Place - Suite 330,
     194   Boston, MA 02111-1307, USA.  */
     195
     196/* As a special exception, when this file is copied by Bison into a
     197   Bison output file, you may use that output file without restriction.
     198   This special exception was added by the Free Software Foundation
     199   in version 1.24 of Bison.  */
     200
     201/* This is the parser code that is written into each bison parser
     202  when the %semantic_parser declaration is not specified in the grammar.
     203  It was written by Richard Stallman by simplifying the hairy parser
     204  used when %semantic_parser is specified.  */
     205
     206#ifndef YYSTACK_USE_ALLOCA
     207#ifdef alloca
     208#define YYSTACK_USE_ALLOCA
     209#else /* alloca not defined */
     210#ifdef __GNUC__
     211#define YYSTACK_USE_ALLOCA
     212#define alloca __builtin_alloca
     213#else /* not GNU C.  */
     214#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) || (defined (__sun) && defined (__i386))
     215#define YYSTACK_USE_ALLOCA
     216#include <alloca.h>
     217#else /* not sparc */
     218/* We think this test detects Watcom and Microsoft C.  */
     219/* This used to test MSDOS, but that is a bad idea
     220   since that symbol is in the user namespace.  */
     221#if (defined (_MSDOS) || defined (_MSDOS_)) && !defined (__TURBOC__)
     222#if 0 /* No need for malloc.h, which pollutes the namespace;
     223     instead, just don't use alloca.  */
     224#include <malloc.h>
     225#endif
     226#else /* not MSDOS, or __TURBOC__ */
     227#if defined(_AIX)
     228/* I don't know what this was needed for, but it pollutes the namespace.
     229   So I turned it off.   rms, 2 May 1997.  */
     230/* #include <malloc.h>  */
     231 #pragma alloca
     232#define YYSTACK_USE_ALLOCA
     233#else /* not MSDOS, or __TURBOC__, or _AIX */
     234#if 0
     235#ifdef __hpux /* [email protected] says this works for HPUX 9.05 and up,
     236         and on HPUX 10.  Eventually we can turn this on.  */
     237#define YYSTACK_USE_ALLOCA
     238#define alloca __builtin_alloca
     239#endif /* __hpux */
     240#endif
     241#endif /* not _AIX */
     242#endif /* not MSDOS, or __TURBOC__ */
     243#endif /* not sparc */
     244#endif /* not GNU C */
     245#endif /* alloca not defined */
     246#endif /* YYSTACK_USE_ALLOCA not defined */
     247
     248#ifdef YYSTACK_USE_ALLOCA
     249#define YYSTACK_ALLOC alloca
     250#else
     251#define YYSTACK_ALLOC malloc
     252#endif
     253
     254/* Note: there must be only one dollar sign in this file.
     255   It is replaced by the list of actions, each action
     256   as one case of the switch.  */
     257
     258#define yyerrok     (yyerrstatus = 0)
     259#define yyclearin   (yychar = YYEMPTY)
     260#define YYEMPTY     -2
     261#define YYEOF       0
     262#define YYACCEPT    goto yyacceptlab
     263#define YYABORT     goto yyabortlab
     264#define YYERROR     goto yyerrlab1
     265/* Like YYERROR except do call yyerror.
     266   This remains here temporarily to ease the
     267   transition to the new meaning of YYERROR, for GCC.
     268   Once GCC version 2 has supplanted version 1, this can go.  */
     269#define YYFAIL      goto yyerrlab
     270#define YYRECOVERING()  (!!yyerrstatus)
     271#define YYBACKUP(token, value) \
     272do                              \
     273  if (yychar == YYEMPTY && yylen == 1)              \
     274    { yychar = (token), yylval = (value);           \
     275      yychar1 = YYTRANSLATE (yychar);               \
     276      YYPOPSTACK;                       \
     277      goto yybackup;                        \
     278    }                               \
     279  else                              \
     280    { yyerror ("syntax error: cannot back up"); YYERROR; }  \
     281while (0)
     282
     283#define YYTERROR    1
     284#define YYERRCODE   256
     285
     286#ifndef YYPURE
     287#define YYLEX       yylex()
     288#endif
     289
     290#ifdef YYPURE
     291#ifdef YYLSP_NEEDED
     292#ifdef YYLEX_PARAM
     293#define YYLEX       yylex(&yylval, &yylloc, YYLEX_PARAM)
     294#else
     295#define YYLEX       yylex(&yylval, &yylloc)
     296#endif
     297#else /* not YYLSP_NEEDED */
     298#ifdef YYLEX_PARAM
     299#define YYLEX       yylex(&yylval, YYLEX_PARAM)
     300#else
     301#define YYLEX       yylex(&yylval)
     302#endif
     303#endif /* not YYLSP_NEEDED */
     304#endif
     305
     306/* If nonreentrant, generate the variables here */
     307
     308#ifndef YYPURE
     309
     310int yychar;         /*  the lookahead symbol        */
     311YYSTYPE yylval;         /*  the semantic value of the       */
     312                /*  lookahead symbol            */
     313
     314#ifdef YYLSP_NEEDED
     315YYLTYPE yylloc;         /*  location data for the lookahead */
     316                /*  symbol              */
     317#endif
     318
     319int yynerrs;            /*  number of parse errors so far       */
     320#endif  /* not YYPURE */
     321
     322#if YYDEBUG != 0
     323int yydebug;            /*  nonzero means print parse trace */
     324/* Since this is uninitialized, it does not stop multiple parsers
     325   from coexisting.  */
     326#endif
     327
     328/*  YYINITDEPTH indicates the initial size of the parser's stacks   */
     329
     330#ifndef YYINITDEPTH
     331#define YYINITDEPTH 200
     332#endif
     333
     334/*  YYMAXDEPTH is the maximum size the stacks can grow to
     335    (effective only if the built-in stack extension method is used).  */
     336
     337#if YYMAXDEPTH == 0
    168338#undef YYMAXDEPTH
    169 #define YYMAXDEPTH YYSTACKSIZE
     339#endif
     340
     341#ifndef YYMAXDEPTH
     342#define YYMAXDEPTH 10000
     343#endif
     344
     345
     346/* Define __yy_memcpy.  Note that the size argument
     347   should be passed with type unsigned int, because that is what the non-GCC
     348   definitions require.  With GCC, __builtin_memcpy takes an arg
     349   of type size_t, but it can handle unsigned int.  */
     350
     351#if __GNUC__ > 1        /* GNU C and GNU C++ define this.  */
     352#define __yy_memcpy(TO,FROM,COUNT)  __builtin_memcpy(TO,FROM,COUNT)
     353#else               /* not GNU C or C++ */
     354#ifndef __cplusplus
     355
     356/* This is the most reliable way to avoid incompatibilities
     357   in available built-in functions on various systems.  */
     358static void
     359__yy_memcpy (to, from, count)
     360     char *to;
     361     char *from;
     362     unsigned int count;
     363{
     364  register char *f = from;
     365  register char *t = to;
     366  register int i = count;
     367
     368  while (i-- > 0)
     369    *t++ = *f++;
     370}
     371
     372#else /* __cplusplus */
     373
     374/* This is the most reliable way to avoid incompatibilities
     375   in available built-in functions on various systems.  */
     376static void
     377__yy_memcpy (char *to, char *from, unsigned int count)
     378{
     379  register char *t = to;
     380  register char *f = from;
     381  register int i = count;
     382
     383  while (i-- > 0)
     384    *t++ = *f++;
     385}
     386
     387#endif
     388#endif
     389
     390
     391#line 216 "/usr/share/bison.simple"
     392
     393/* The user can define YYPARSE_PARAM as the name of an argument to be passed
     394   into yyparse.  The argument should have type void *.
     395   It should actually point to an object.
     396   Grammar actions can access the variable by casting it
     397   to the proper pointer type.  */
     398
     399#ifdef YYPARSE_PARAM
     400#ifdef __cplusplus
     401#define YYPARSE_PARAM_ARG void *YYPARSE_PARAM
     402#define YYPARSE_PARAM_DECL
     403#else /* not __cplusplus */
     404#define YYPARSE_PARAM_ARG YYPARSE_PARAM
     405#define YYPARSE_PARAM_DECL void *YYPARSE_PARAM;
     406#endif /* not __cplusplus */
     407#else /* not YYPARSE_PARAM */
     408#define YYPARSE_PARAM_ARG
     409#define YYPARSE_PARAM_DECL
     410#endif /* not YYPARSE_PARAM */
     411
     412/* Prevent warning if -Wstrict-prototypes.  */
     413#ifdef __GNUC__
     414#ifdef YYPARSE_PARAM
     415int yyparse (void *);
    170416#else
    171 #ifdef YYMAXDEPTH
    172 #define YYSTACKSIZE YYMAXDEPTH
     417int yyparse (void);
     418#endif
     419#endif
     420
     421int
     422yyparse(YYPARSE_PARAM_ARG)
     423     YYPARSE_PARAM_DECL
     424{
     425  register int yystate;
     426  register int yyn;
     427  register short *yyssp;
     428  register YYSTYPE *yyvsp;
     429  int yyerrstatus;  /*  number of tokens to shift before error messages enabled */
     430  int yychar1 = 0;      /*  lookahead token as an internal (translated) token number */
     431
     432  short yyssa[YYINITDEPTH]; /*  the state stack         */
     433  YYSTYPE yyvsa[YYINITDEPTH];   /*  the semantic value stack        */
     434
     435  short *yyss = yyssa;      /*  refer to the stacks thru separate pointers */
     436  YYSTYPE *yyvs = yyvsa;    /*  to allow yyoverflow to reallocate them elsewhere */
     437
     438#ifdef YYLSP_NEEDED
     439  YYLTYPE yylsa[YYINITDEPTH];   /*  the location stack          */
     440  YYLTYPE *yyls = yylsa;
     441  YYLTYPE *yylsp;
     442
     443#define YYPOPSTACK   (yyvsp--, yyssp--, yylsp--)
    173444#else
    174 #define YYSTACKSIZE 500
    175 #define YYMAXDEPTH 500
    176 #endif
    177 #endif
    178 int yydebug;
    179 int yynerrs;
    180 int yyerrflag;
    181 int yychar;
    182 short *yyssp;
    183 YYSTYPE *yyvsp;
    184 YYSTYPE yyval;
    185 YYSTYPE yylval;
    186 short yyss[YYSTACKSIZE];
    187 YYSTYPE yyvs[YYSTACKSIZE];
    188 #define yystacksize YYSTACKSIZE
     445#define YYPOPSTACK   (yyvsp--, yyssp--)
     446#endif
     447
     448  int yystacksize = YYINITDEPTH;
     449  int yyfree_stacks = 0;
     450
     451#ifdef YYPURE
     452  int yychar;
     453  YYSTYPE yylval;
     454  int yynerrs;
     455#ifdef YYLSP_NEEDED
     456  YYLTYPE yylloc;
     457#endif
     458#endif
     459
     460  YYSTYPE yyval;        /*  the variable used to return     */
     461                /*  semantic values from the action */
     462                /*  routines                */
     463
     464  int yylen;
     465
     466#if YYDEBUG != 0
     467  if (yydebug)
     468    fprintf(stderr, "Starting parse\n");
     469#endif
     470
     471  yystate = 0;
     472  yyerrstatus = 0;
     473  yynerrs = 0;
     474  yychar = YYEMPTY;     /* Cause a token to be read.  */
     475
     476  /* Initialize stack pointers.
     477     Waste one element of value and location stack
     478     so that they stay on the same level as the state stack.
     479     The wasted elements are never initialized.  */
     480
     481  yyssp = yyss - 1;
     482  yyvsp = yyvs;
     483#ifdef YYLSP_NEEDED
     484  yylsp = yyls;
     485#endif
     486
     487/* Push a new state, which is found in  yystate  .  */
     488/* In all cases, when you get here, the value and location stacks
     489   have just been pushed. so pushing a state here evens the stacks.  */
     490yynewstate:
     491
     492  *++yyssp = yystate;
     493
     494  if (yyssp >= yyss + yystacksize - 1)
     495    {
     496      /* Give user a chance to reallocate the stack */
     497      /* Use copies of these so that the &'s don't force the real ones into memory. */
     498      YYSTYPE *yyvs1 = yyvs;
     499      short *yyss1 = yyss;
     500#ifdef YYLSP_NEEDED
     501      YYLTYPE *yyls1 = yyls;
     502#endif
     503
     504      /* Get the current used size of the three stacks, in elements.  */
     505      int size = yyssp - yyss + 1;
     506
     507#ifdef yyoverflow
     508      /* Each stack pointer address is followed by the size of
     509     the data in use in that stack, in bytes.  */
     510#ifdef YYLSP_NEEDED
     511      /* This used to be a conditional around just the two extra args,
     512     but that might be undefined if yyoverflow is a macro.  */
     513      yyoverflow("parser stack overflow",
     514         &yyss1, size * sizeof (*yyssp),
     515         &yyvs1, size * sizeof (*yyvsp),
     516         &yyls1, size * sizeof (*yylsp),
     517         &yystacksize);
     518#else
     519      yyoverflow("parser stack overflow",
     520         &yyss1, size * sizeof (*yyssp),
     521         &yyvs1, size * sizeof (*yyvsp),
     522         &yystacksize);
     523#endif
     524
     525      yyss = yyss1; yyvs = yyvs1;
     526#ifdef YYLSP_NEEDED
     527      yyls = yyls1;
     528#endif
     529#else /* no yyoverflow */
     530      /* Extend the stack our own way.  */
     531      if (yystacksize >= YYMAXDEPTH)
     532    {
     533      yyerror("parser stack overflow");
     534      if (yyfree_stacks)
     535        {
     536          free (yyss);
     537          free (yyvs);
     538#ifdef YYLSP_NEEDED
     539          free (yyls);
     540#endif
     541        }
     542      return 2;
     543    }
     544      yystacksize *= 2;
     545      if (yystacksize > YYMAXDEPTH)
     546    yystacksize = YYMAXDEPTH;
     547#ifndef YYSTACK_USE_ALLOCA
     548      yyfree_stacks = 1;
     549#endif
     550      yyss = (short *) YYSTACK_ALLOC (yystacksize * sizeof (*yyssp));
     551      __yy_memcpy ((char *)yyss, (char *)yyss1,
     552           size * (unsigned int) sizeof (*yyssp));
     553      yyvs = (YYSTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp));
     554      __yy_memcpy ((char *)yyvs, (char *)yyvs1,
     555           size * (unsigned int) sizeof (*yyvsp));
     556#ifdef YYLSP_NEEDED
     557      yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp));
     558      __yy_memcpy ((char *)yyls, (char *)yyls1,
     559           size * (unsigned int) sizeof (*yylsp));
     560#endif
     561#endif /* no yyoverflow */
     562
     563      yyssp = yyss + size - 1;
     564      yyvsp = yyvs + size - 1;
     565#ifdef YYLSP_NEEDED
     566      yylsp = yyls + size - 1;
     567#endif
     568
     569#if YYDEBUG != 0
     570      if (yydebug)
     571    fprintf(stderr, "Stack size increased to %d\n", yystacksize);
     572#endif
     573
     574      if (yyssp >= yyss + yystacksize - 1)
     575    YYABORT;
     576    }
     577
     578#if YYDEBUG != 0
     579  if (yydebug)
     580    fprintf(stderr, "Entering state %d\n", yystate);
     581#endif
     582
     583  goto yybackup;
     584 yybackup:
     585
     586/* Do appropriate processing given the current state.  */
     587/* Read a lookahead token if we need one and don't already have one.  */
     588/* yyresume: */
     589
     590  /* First try to decide what to do without reference to lookahead token.  */
     591
     592  yyn = yypact[yystate];
     593  if (yyn == YYFLAG)
     594    goto yydefault;
     595
     596  /* Not known => get a lookahead token if don't already have one.  */
     597
     598  /* yychar is either YYEMPTY or YYEOF
     599     or a valid token in external form.  */
     600
     601  if (yychar == YYEMPTY)
     602    {
     603#if YYDEBUG != 0
     604      if (yydebug)
     605    fprintf(stderr, "Reading a token: ");
     606#endif
     607      yychar = YYLEX;
     608    }
     609
     610  /* Convert token to internal form (in yychar1) for indexing tables with */
     611
     612  if (yychar <= 0)      /* This means end of input. */
     613    {
     614      yychar1 = 0;
     615      yychar = YYEOF;       /* Don't call YYLEX any more */
     616
     617#if YYDEBUG != 0
     618      if (yydebug)
     619    fprintf(stderr, "Now at end of input.\n");
     620#endif
     621    }
     622  else
     623    {
     624      yychar1 = YYTRANSLATE(yychar);
     625
     626#if YYDEBUG != 0
     627      if (yydebug)
     628    {
     629      fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]);
     630      /* Give the individual parser a way to print the precise meaning
     631         of a token, for further debugging info.  */
     632#ifdef YYPRINT
     633      YYPRINT (stderr, yychar, yylval);
     634#endif
     635      fprintf (stderr, ")\n");
     636    }
     637#endif
     638    }
     639
     640  yyn += yychar1;
     641  if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
     642    goto yydefault;
     643
     644  yyn = yytable[yyn];
     645
     646  /* yyn is what to do for this token type in this state.
     647     Negative => reduce, -yyn is rule number.
     648     Positive => shift, yyn is new state.
     649       New state is final state => don't bother to shift,
     650       just return success.
     651     0, or most negative number => error.  */
     652
     653  if (yyn < 0)
     654    {
     655      if (yyn == YYFLAG)
     656    goto yyerrlab;
     657      yyn = -yyn;
     658      goto yyreduce;
     659    }
     660  else if (yyn == 0)
     661    goto yyerrlab;
     662
     663  if (yyn == YYFINAL)
     664    YYACCEPT;
     665
     666  /* Shift the lookahead token.  */
     667
     668#if YYDEBUG != 0
     669  if (yydebug)
     670    fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]);
     671#endif
     672
     673  /* Discard the token being shifted unless it is eof.  */
     674  if (yychar != YYEOF)
     675    yychar = YYEMPTY;
     676
     677  *++yyvsp = yylval;
     678#ifdef YYLSP_NEEDED
     679  *++yylsp = yylloc;
     680#endif
     681
     682  /* count tokens shifted since error; after three, turn off error status.  */
     683  if (yyerrstatus) yyerrstatus--;
     684
     685  yystate = yyn;
     686  goto yynewstate;
     687
     688/* Do the default action for the current state.  */
     689yydefault:
     690
     691  yyn = yydefact[yystate];
     692  if (yyn == 0)
     693    goto yyerrlab;
     694
     695/* Do a reduction.  yyn is the number of a rule to reduce with.  */
     696yyreduce:
     697  yylen = yyr2[yyn];
     698  if (yylen > 0)
     699    yyval = yyvsp[1-yylen]; /* implement default value of the action */
     700
     701#if YYDEBUG != 0
     702  if (yydebug)
     703    {
     704      int i;
     705
     706      fprintf (stderr, "Reducing via rule %d (line %d), ",
     707           yyn, yyrline[yyn]);
     708
     709      /* Print the symbols being reduced, and their result.  */
     710      for (i = yyprhs[yyn]; yyrhs[i] > 0; i++)
     711    fprintf (stderr, "%s ", yytname[yyrhs[i]]);
     712      fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]);
     713    }
     714#endif
     715
     716
     717  switch (yyn) {
     718
     719case 1:
     720#line 76 "bool_parser.y"
     721{ tree_base = yyvsp[0].node;;
     722    break;}
     723case 2:
     724#line 80 "bool_parser.y"
     725{ yyval.node = CreateBoolTermNode(term_list, yyvsp[0].text, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); ;
     726    break;}
     727case 3:
     728#line 81 "bool_parser.y"
     729{ yyval.node = yyvsp[-1].node; ;
     730    break;}
     731case 4:
     732#line 82 "bool_parser.y"
     733{ yyval.node = CreateBoolTreeNode(N_all, NULL, NULL); ;
     734    break;}
     735case 5:
     736#line 83 "bool_parser.y"
     737{ yyval.node = CreateBoolTreeNode(N_none, NULL, NULL); ;
     738    break;}
     739case 7:
     740#line 87 "bool_parser.y"
     741{ yyval.node = CreateBoolTreeNode(N_not, yyvsp[0].node, NULL); ;
     742    break;}
     743case 8:
     744#line 90 "bool_parser.y"
     745{ yyval.node = CreateBoolTreeNode(N_and, yyvsp[-2].node, yyvsp[0].node); ;
     746    break;}
     747case 9:
     748#line 91 "bool_parser.y"
     749{ yyval.node = CreateBoolTreeNode(N_and, yyvsp[-1].node, yyvsp[0].node); ;
     750    break;}
     751case 11:
     752#line 95 "bool_parser.y"
     753{ yyval.node = CreateBoolTreeNode(N_or, yyvsp[-2].node, yyvsp[0].node); ;
     754    break;}
     755}
     756   /* the action file gets copied in in place of this dollarsign */
     757#line 542 "/usr/share/bison.simple"
     758
     759
     760  yyvsp -= yylen;
     761  yyssp -= yylen;
     762#ifdef YYLSP_NEEDED
     763  yylsp -= yylen;
     764#endif
     765
     766#if YYDEBUG != 0
     767  if (yydebug)
     768    {
     769      short *ssp1 = yyss - 1;
     770      fprintf (stderr, "state stack now");
     771      while (ssp1 != yyssp)
     772    fprintf (stderr, " %d", *++ssp1);
     773      fprintf (stderr, "\n");
     774    }
     775#endif
     776
     777  *++yyvsp = yyval;
     778
     779#ifdef YYLSP_NEEDED
     780  yylsp++;
     781  if (yylen == 0)
     782    {
     783      yylsp->first_line = yylloc.first_line;
     784      yylsp->first_column = yylloc.first_column;
     785      yylsp->last_line = (yylsp-1)->last_line;
     786      yylsp->last_column = (yylsp-1)->last_column;
     787      yylsp->text = 0;
     788    }
     789  else
     790    {
     791      yylsp->last_line = (yylsp+yylen-1)->last_line;
     792      yylsp->last_column = (yylsp+yylen-1)->last_column;
     793    }
     794#endif
     795
     796  /* Now "shift" the result of the reduction.
     797     Determine what state that goes to,
     798     based on the state we popped back to
     799     and the rule number reduced by.  */
     800
     801  yyn = yyr1[yyn];
     802
     803  yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
     804  if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
     805    yystate = yytable[yystate];
     806  else
     807    yystate = yydefgoto[yyn - YYNTBASE];
     808
     809  goto yynewstate;
     810
     811yyerrlab:   /* here on detecting error */
     812
     813  if (! yyerrstatus)
     814    /* If not already recovering from an error, report this error.  */
     815    {
     816      ++yynerrs;
     817
     818#ifdef YYERROR_VERBOSE
     819      yyn = yypact[yystate];
     820
     821      if (yyn > YYFLAG && yyn < YYLAST)
     822    {
     823      int size = 0;
     824      char *msg;
     825      int x, count;
     826
     827      count = 0;
     828      /* Start X at -yyn if nec to avoid negative indexes in yycheck.  */
     829      for (x = (yyn < 0 ? -yyn : 0);
     830           x < (sizeof(yytname) / sizeof(char *)); x++)
     831        if (yycheck[x + yyn] == x)
     832          size += strlen(yytname[x]) + 15, count++;
     833      msg = (char *) malloc(size + 15);
     834      if (msg != 0)
     835        {
     836          strcpy(msg, "parse error");
     837
     838          if (count < 5)
     839        {
     840          count = 0;
     841          for (x = (yyn < 0 ? -yyn : 0);
     842               x < (sizeof(yytname) / sizeof(char *)); x++)
     843            if (yycheck[x + yyn] == x)
     844              {
     845            strcat(msg, count == 0 ? ", expecting `" : " or `");
     846            strcat(msg, yytname[x]);
     847            strcat(msg, "'");
     848            count++;
     849              }
     850        }
     851          yyerror(msg);
     852          free(msg);
     853        }
     854      else
     855        yyerror ("parse error; also virtual memory exceeded");
     856    }
     857      else
     858#endif /* YYERROR_VERBOSE */
     859    yyerror("parse error");
     860    }
     861
     862  goto yyerrlab1;
     863yyerrlab1:   /* here on error raised explicitly by an action */
     864
     865  if (yyerrstatus == 3)
     866    {
     867      /* if just tried and failed to reuse lookahead token after an error, discard it.  */
     868
     869      /* return failure if at end of input */
     870      if (yychar == YYEOF)
     871    YYABORT;
     872
     873#if YYDEBUG != 0
     874      if (yydebug)
     875    fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]);
     876#endif
     877
     878      yychar = YYEMPTY;
     879    }
     880
     881  /* Else will try to reuse lookahead token
     882     after shifting the error token.  */
     883
     884  yyerrstatus = 3;      /* Each real token shifted decrements this */
     885
     886  goto yyerrhandle;
     887
     888yyerrdefault:  /* current state does not do anything special for the error token. */
     889
     890#if 0
     891  /* This is wrong; only states that explicitly want error tokens
     892     should shift them.  */
     893  yyn = yydefact[yystate];  /* If its default is to accept any token, ok.  Otherwise pop it.*/
     894  if (yyn) goto yydefault;
     895#endif
     896
     897yyerrpop:   /* pop the current state because it cannot handle the error token */
     898
     899  if (yyssp == yyss) YYABORT;
     900  yyvsp--;
     901  yystate = *--yyssp;
     902#ifdef YYLSP_NEEDED
     903  yylsp--;
     904#endif
     905
     906#if YYDEBUG != 0
     907  if (yydebug)
     908    {
     909      short *ssp1 = yyss - 1;
     910      fprintf (stderr, "Error: state stack now");
     911      while (ssp1 != yyssp)
     912    fprintf (stderr, " %d", *++ssp1);
     913      fprintf (stderr, "\n");
     914    }
     915#endif
     916
     917yyerrhandle:
     918
     919  yyn = yypact[yystate];
     920  if (yyn == YYFLAG)
     921    goto yyerrdefault;
     922
     923  yyn += YYTERROR;
     924  if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
     925    goto yyerrdefault;
     926
     927  yyn = yytable[yyn];
     928  if (yyn < 0)
     929    {
     930      if (yyn == YYFLAG)
     931    goto yyerrpop;
     932      yyn = -yyn;
     933      goto yyreduce;
     934    }
     935  else if (yyn == 0)
     936    goto yyerrpop;
     937
     938  if (yyn == YYFINAL)
     939    YYACCEPT;
     940
     941#if YYDEBUG != 0
     942  if (yydebug)
     943    fprintf(stderr, "Shifting error token, ");
     944#endif
     945
     946  *++yyvsp = yylval;
     947#ifdef YYLSP_NEEDED
     948  *++yylsp = yylloc;
     949#endif
     950
     951  yystate = yyn;
     952  goto yynewstate;
     953
     954 yyacceptlab:
     955  /* YYACCEPT comes here.  */
     956  if (yyfree_stacks)
     957    {
     958      free (yyss);
     959      free (yyvs);
     960#ifdef YYLSP_NEEDED
     961      free (yyls);
     962#endif
     963    }
     964  return 0;
     965
     966 yyabortlab:
     967  /* YYABORT comes here.  */
     968  if (yyfree_stacks)
     969    {
     970      free (yyss);
     971      free (yyvs);
     972#ifdef YYLSP_NEEDED
     973      free (yyls);
     974#endif
     975    }
     976  return 1;
     977}
    189978#line 99 "bool_parser.y"
     979
    190980 
    191981/* Bison on one mips machine defined "const" to be nothing but
     
    3811171bool_tree_node *
    3821172ParseBool(char *query_line, int query_len,
    383           TermList **the_term_list, int the_stemmer_num, int the_stem_method,
    384       int *res,
     1173          TermList **the_term_list, int the_stemmer_num, int the_stem_method, int *res,
    3851174      stemmed_dict * the_sd, int is_indexed,   /* [RPAP - Jan 97: Stem Index Change] */
    3861175      QueryTermList **the_query_term_list)  /* [RPAP - Feb 97: Term Frequency] */
     
    4071196 
    4081197
    409 #line 407 "y.tab.c"
    410 #define YYABORT goto yyabort
    411 #define YYREJECT goto yyabort
    412 #define YYACCEPT goto yyaccept
    413 #define YYERROR goto yyerrlab
    414 int
    415 yyparse()
    416 {
    417     register int yym, yyn, yystate;
    418 #if YYDEBUG
    419     register char *yys;
    420     extern char *getenv();
    421 
    422     if (yys = getenv("YYDEBUG"))
    423     {
    424         yyn = *yys;
    425         if (yyn >= '0' && yyn <= '9')
    426             yydebug = yyn - '0';
    427     }
    428 #endif
    429 
    430     yynerrs = 0;
    431     yyerrflag = 0;
    432     yychar = (-1);
    433 
    434     yyssp = yyss;
    435     yyvsp = yyvs;
    436     *yyssp = yystate = 0;
    437 
    438 yyloop:
    439     if (yyn = yydefred[yystate]) goto yyreduce;
    440     if (yychar < 0)
    441     {
    442         if ((yychar = yylex()) < 0) yychar = 0;
    443 #if YYDEBUG
    444         if (yydebug)
    445         {
    446             yys = 0;
    447             if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
    448             if (!yys) yys = "illegal-symbol";
    449             printf("%sdebug: state %d, reading %d (%s)\n",
    450                     YYPREFIX, yystate, yychar, yys);
    451         }
    452 #endif
    453     }
    454     if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&
    455             yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
    456     {
    457 #if YYDEBUG
    458         if (yydebug)
    459             printf("%sdebug: state %d, shifting to state %d\n",
    460                     YYPREFIX, yystate, yytable[yyn]);
    461 #endif
    462         if (yyssp >= yyss + yystacksize - 1)
    463         {
    464             goto yyoverflow;
    465         }
    466         *++yyssp = yystate = yytable[yyn];
    467         *++yyvsp = yylval;
    468         yychar = (-1);
    469         if (yyerrflag > 0)  --yyerrflag;
    470         goto yyloop;
    471     }
    472     if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&
    473             yyn <= YYTABLESIZE && yycheck[yyn] == yychar)
    474     {
    475         yyn = yytable[yyn];
    476         goto yyreduce;
    477     }
    478     if (yyerrflag) goto yyinrecovery;
    479 #ifdef lint
    480     goto yynewerror;
    481 #endif
    482 yynewerror:
    483     yyerror("syntax error");
    484 #ifdef lint
    485     goto yyerrlab;
    486 #endif
    487 yyerrlab:
    488     ++yynerrs;
    489 yyinrecovery:
    490     if (yyerrflag < 3)
    491     {
    492         yyerrflag = 3;
    493         for (;;)
    494         {
    495             if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&
    496                     yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)
    497             {
    498 #if YYDEBUG
    499                 if (yydebug)
    500                     printf("%sdebug: state %d, error recovery shifting\
    501  to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);
    502 #endif
    503                 if (yyssp >= yyss + yystacksize - 1)
    504                 {
    505                     goto yyoverflow;
    506                 }
    507                 *++yyssp = yystate = yytable[yyn];
    508                 *++yyvsp = yylval;
    509                 goto yyloop;
    510             }
    511             else
    512             {
    513 #if YYDEBUG
    514                 if (yydebug)
    515                     printf("%sdebug: error recovery discarding state %d\n",
    516                             YYPREFIX, *yyssp);
    517 #endif
    518                 if (yyssp <= yyss) goto yyabort;
    519                 --yyssp;
    520                 --yyvsp;
    521             }
    522         }
    523     }
    524     else
    525     {
    526         if (yychar == 0) goto yyabort;
    527 #if YYDEBUG
    528         if (yydebug)
    529         {
    530             yys = 0;
    531             if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
    532             if (!yys) yys = "illegal-symbol";
    533             printf("%sdebug: state %d, error recovery discards token %d (%s)\n",
    534                     YYPREFIX, yystate, yychar, yys);
    535         }
    536 #endif
    537         yychar = (-1);
    538         goto yyloop;
    539     }
    540 yyreduce:
    541 #if YYDEBUG
    542     if (yydebug)
    543         printf("%sdebug: state %d, reducing by rule %d (%s)\n",
    544                 YYPREFIX, yystate, yyn, yyrule[yyn]);
    545 #endif
    546     yym = yylen[yyn];
    547     yyval = yyvsp[1-yym];
    548     switch (yyn)
    549     {
    550 case 1:
    551 #line 75 "bool_parser.y"
    552 { tree_base = yyvsp[0].node;}
    553 break;
    554 case 2:
    555 #line 79 "bool_parser.y"
    556 { yyval.node = CreateBoolTermNode(term_list, yyvsp[0].text, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); }
    557 break;
    558 case 3:
    559 #line 80 "bool_parser.y"
    560 { yyval.node = yyvsp[-1].node; }
    561 break;
    562 case 4:
    563 #line 81 "bool_parser.y"
    564 { yyval.node = CreateBoolTreeNode(N_all, NULL, NULL); }
    565 break;
    566 case 5:
    567 #line 82 "bool_parser.y"
    568 { yyval.node = CreateBoolTreeNode(N_none, NULL, NULL); }
    569 break;
    570 case 7:
    571 #line 86 "bool_parser.y"
    572 { yyval.node = CreateBoolTreeNode(N_not, yyvsp[0].node, NULL); }
    573 break;
    574 case 8:
    575 #line 89 "bool_parser.y"
    576 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-2].node, yyvsp[0].node); }
    577 break;
    578 case 9:
    579 #line 90 "bool_parser.y"
    580 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-1].node, yyvsp[0].node); }
    581 break;
    582 case 11:
    583 #line 94 "bool_parser.y"
    584 { yyval.node = CreateBoolTreeNode(N_or, yyvsp[-2].node, yyvsp[0].node); }
    585 break;
    586 #line 584 "y.tab.c"
    587     }
    588     yyssp -= yym;
    589     yystate = *yyssp;
    590     yyvsp -= yym;
    591     yym = yylhs[yyn];
    592     if (yystate == 0 && yym == 0)
    593     {
    594 #if YYDEBUG
    595         if (yydebug)
    596             printf("%sdebug: after reduction, shifting from state 0 to\
    597  state %d\n", YYPREFIX, YYFINAL);
    598 #endif
    599         yystate = YYFINAL;
    600         *++yyssp = YYFINAL;
    601         *++yyvsp = yyval;
    602         if (yychar < 0)
    603         {
    604             if ((yychar = yylex()) < 0) yychar = 0;
    605 #if YYDEBUG
    606             if (yydebug)
    607             {
    608                 yys = 0;
    609                 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];
    610                 if (!yys) yys = "illegal-symbol";
    611                 printf("%sdebug: state %d, reading %d (%s)\n",
    612                         YYPREFIX, YYFINAL, yychar, yys);
    613             }
    614 #endif
    615         }
    616         if (yychar == 0) goto yyaccept;
    617         goto yyloop;
    618     }
    619     if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&
    620             yyn <= YYTABLESIZE && yycheck[yyn] == yystate)
    621         yystate = yytable[yyn];
    622     else
    623         yystate = yydgoto[yym];
    624 #if YYDEBUG
    625     if (yydebug)
    626         printf("%sdebug: after reduction, shifting from state %d \
    627 to state %d\n", YYPREFIX, *yyssp, yystate);
    628 #endif
    629     if (yyssp >= yyss + yystacksize - 1)
    630     {
    631         goto yyoverflow;
    632     }
    633     *++yyssp = yystate;
    634     *++yyvsp = yyval;
    635     goto yyloop;
    636 yyoverflow:
    637     yyerror("yacc stack overflow");
    638 yyabort:
    639     return (1);
    640 yyaccept:
    641     return (0);
    642 }
    643 
  • branches/New_Config_Format-branch/gsdl/packages/mg/sysfuncs.h

    r821 r1279  
    194194
    195195/* On MSDOS, there are missing things from <sys/stat.h>.  */
    196 #ifdef __MSDOS__
     196#if defined(__MSDOS__)
    197197#define S_ISUID 0
    198198#define S_ISGID 0
    199199#define S_ISVTX 0
     200#endif
     201
     202#if defined(__GNUC__) && defined(__WIN32__)
     203#include <limits.h>
    200204#endif
    201205
  • branches/New_Config_Format-branch/gsdl/perllib/classify/AZSectionList.pm

    r741 r1279  
    3030# instead of just top level metadata
    3131
    32 # options are:
    33 # metadata=Metadata
    34 
    3532# the only change is to the classify() subroutine which
    3633# must now iterate through each section, adding each
     
    3936package AZSectionList;
    4037
     38use AZList;
    4139use sorttools;
    4240
    43 sub new {
    44     my ($class, @options) = @_;
    45 
    46     my ($metaname);
    47     foreach $option (@options) {
    48     if ($option =~ /^metadata=(.*)$/i) {
    49         $metaname = $1;
    50     }
    51     }
    52 
    53     if (!defined $metaname) {
    54     die "AZSectionList used with no metadata name to classify by\n";
    55     }
    56    
    57     return bless {
    58     'list'=>{},
    59     'metaname' => $metaname
    60     }, $class;
    61 }
    62 
    63 sub init {
    64     my $self = shift (@_);
    65 
    66     $self->{'list'} = {};
     41sub BEGIN {
     42    @ISA = ('AZList');
    6743}
    6844
     
    10379}
    10480
    105 sub get_classify_info {
    106     my $self = shift (@_);
    107 
    108     my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};}
    109     keys %{$self->{'list'}};
    110 
    111     return $self->splitlist (\@classlist);
    112 }
    113 
    114 sub get_entry {
    115     my $self = shift (@_);
    116     my ($title, $childtype, $thistype) = @_;
    117    
    118     # organise into classification structure
    119     my %classifyinfo = ('childtype'=>$childtype,
    120             'Title'=>$title,
    121             'contains'=>[]);
    122     $classifyinfo{'thistype'} = $thistype
    123     if defined $thistype && $thistype =~ /\w/;
    124 
    125     return \%classifyinfo;
    126 }
    127 
    128 # splitlist takes an ordered list of classifications (@$classlistref) and splits it
    129 # up into alphabetical sub-sections.
    130 sub splitlist {
    131     my $self = shift (@_);
    132     my ($classlistref) = @_;
    133     my $classhash = {};
    134 
    135     # top level
    136     my $childtype = "HList";
    137     if (scalar (@$classlistref) <= 39) {$childtype = "VList";}
    138     my $classifyinfo = $self->get_entry ($self->{'metaname'}, $childtype, "Invisible");
    139 
    140     # don't need to do any splitting if there are less than 39 (max + min -1) classifications
    141     if ((scalar @$classlistref) <= 39) {
    142     foreach $subOID (@$classlistref) {
    143         push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});
    144     }
    145     return $classifyinfo;
    146     }
    147    
    148     # first split up the list into separate A-Z and 0-9 classifications
    149     foreach $classification (@$classlistref) {
    150     my $title = $self->{'list'}->{$classification};
    151     $title =~ s/^(.).*$/$1/;
    152     $title =~ tr/[a-z]/[A-Z]/;
    153     if ($title =~ /^[0-9]$/) {$title = '0-9';}
    154     elsif ($title !~ /^[A-Z]$/) {
    155         print STDERR "AZSectionList: WARNING $classification has badly " .
    156         "formatted title ($title)\n";
    157     }
    158     $classhash->{$title} = [] unless defined $classhash->{$title};
    159     push (@{$classhash->{$title}}, $classification);
    160     }
    161     $classhash = $self->compactlist ($classhash);
    162 
    163     my @tmparr = ();
    164     foreach $subsection (sort keys (%$classhash)) {
    165     push (@tmparr, $subsection);
    166     }
    167    
    168     # if there's a 0-9 section it will have been sorted to the beginning
    169     # but we want it at the end
    170     if ($tmparr[0] eq '0-9') {
    171     shift @tmparr;
    172     push (@tmparr, '0-9');
    173     }
    174 
    175     foreach $subclass (@tmparr) {
    176     my $tempclassify = $self->get_entry($subclass, "VList");
    177     foreach $subsubOID (@{$classhash->{$subclass}}) {
    178         push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});
    179     }
    180     push (@{$classifyinfo->{'contains'}}, $tempclassify);
    181     }
    182 
    183     return $classifyinfo;
    184 }
    185 
    186 sub compactlist {
    187     my $self = shift (@_);
    188     my ($classhashref) = @_;
    189     my $compactedhash = {};
    190     my @currentOIDs = ();
    191     my $currentfirstletter = "";
    192     my $currentlastletter = "";
    193     my $lastkey = "";
    194 
    195     # minimum and maximum documents to be displayed per page.
    196     # the actual maximum will be max + (min-1).
    197     # the smallest sub-section is a single letter at present
    198     # so in this case there may be many times max documents
    199     # displayed on a page.
    200     my $min = 10;
    201     my $max = 30;
    202 
    203     foreach $subsection (sort keys %$classhashref) {
    204     if ($subsection eq '0-9') {
    205         @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};
    206         next;
    207     }
    208     $currentfirstletter = $subsection if $currentfirstletter eq "";
    209     if ((scalar (@currentOIDs) < $min) ||
    210         ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {
    211         push (@currentOIDs, @{$classhashref->{$subsection}});
    212         $currentlastletter = $subsection;
    213     } else {
    214 
    215         if ($currentfirstletter eq $currentlastletter) {
    216         @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
    217         $lastkey = $currentfirstletter;
    218         } else {
    219         @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
    220         $lastkey = "$currentfirstletter-$currentlastletter";
    221         }
    222         if (scalar (@{$classhashref->{$subsection}}) >= $max) {
    223         $compactedhash->{$subsection} = $classhashref->{$subsection};
    224         @currentOIDs = ();
    225         $currentfirstletter = "";
    226         $lastkey = $subsection;
    227         } else {
    228         @currentOIDs = @{$classhashref->{$subsection}};
    229         $currentfirstletter = $subsection;
    230         $currentlastletter = $subsection;
    231         }
    232     }
    233     }
    234 
    235     # add final OIDs to last sub-classification if there aren't many otherwise
    236     # add final sub-classification
    237     if (scalar (@currentOIDs) < $min) {
    238     my ($newkey) = $lastkey =~ /^(.)/;
    239     @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);
    240     delete $compactedhash->{$lastkey};
    241     @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;   
    242     } else {
    243     if ($currentfirstletter eq $currentlastletter) {
    244         @{$compactedhash->{$currentfirstletter}} = @currentOIDs;
    245     } else {
    246         @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;
    247     }
    248     }
    249 
    250     return $compactedhash;
    251 }
    25281
    253821;
  • branches/New_Config_Format-branch/gsdl/perllib/classify/List.pm

    r677 r1279  
    144144sub get_classify_info {
    145145    my $self = shift (@_);
     146    my ($no_thistype) = @_;
     147    $no_thistype = 0 unless defined $no_thistype;
    146148
    147149    my @list = ();
     
    156158
    157159    # organise into classification structure
    158     my %classifyinfo = ('thistype'=>'Invisible',
    159             'childtype'=>'VList',
     160    my %classifyinfo = ('childtype'=>'VList',
    160161            'Title'=>$self->{'title'},
    161162            'contains'=>[]);
     163    $classifyinfo{'thistype'} = 'Invisible' unless $no_thistype;
     164
    162165    foreach $OID (@list) {
    163166    push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});
  • branches/New_Config_Format-branch/gsdl/perllib/classify/SectionList.pm

    r838 r1279  
    2727# (excluding top level) rather than just top level document
    2828# itself
    29 # options are:
    30 # metadata=Metaname -- (optional) all documents with Metaname metadata
    31 #                      will be included in list. if not included all documents
    32 #                      will be included in list.
    33 # sort=Meta         -- (optional) sort documents in list alphabetically by
    34 #                      Meta. by default it will sort by Metaname, if neither
    35 #                      are set documents will be in build (random) order.
    36 #                      Meta may be Filename to sort by original filename or
    37 #                      nosort to force not to sort
    38 # title=Title       -- (optional) the title field for this classification.
    39 #                      if not included title field will be Metaname.
    40 #                      if metadata is also not included title will be 'List'
    4129
    4230package SectionList;
    4331
     32use List;
    4433use sorttools;
    4534
    46 sub new {
    47     my ($class, @options) = @_;
    48 
    49     my $list = [];
    50     my ($metaname, $title, $sortname);
    51 
    52     foreach $option (@options) {
    53     if ($option =~ /^metadata=(.*)$/i) {
    54         $metaname = $1;
    55         $list = {};
    56     } elsif ($option =~ /^title=(.*)$/i) {
    57         $title = $1;
    58     } elsif ($option =~ /^sort=(.*)$/i) {
    59         $sortname = $1;
    60     }
    61     }
    62 
    63     if (!defined $title) {
    64     if (defined $metaname) {
    65         $title = $metaname;
    66     } else {
    67         $title = 'List';
    68     }
    69     }
    70 
    71     if (defined $sortname && $sortname =~ /^nosort$/i) {
    72     $sortname = undef;
    73     } elsif (!defined $sortname && defined $metaname) {
    74     $sortname = $metaname;
    75     }
    76 
    77     return bless {
    78     'list'=>$list,
    79     'metaname' => $metaname,
    80     'title' => $title,
    81     'sortname' => $sortname
    82     }, $class;
    83 }
    84 
    85 sub init {
    86     my $self = shift (@_);
    87 
    88     if (defined $self->{'sortname'}) {
    89     $self->{'list'} = {};
    90     } else {
    91     $self->{'list'} = [];
    92     }
     35sub BEGIN {
     36    @ISA = ('List');
    9337}
    9438
    9539sub classify {
    9640    my $self = shift (@_);
    97     my ($doc_obj,@options) = @_;
     41    my ($doc_obj, @options) = @_;
    9842   
    9943    my $thissection = undef;
     
    175119}
    176120
    177 sub get_classify_info {
    178     my $self = shift (@_);
    179 
    180     my @list = ();
    181     if (defined $self->{'sortname'}) {
    182     if (keys %{$self->{'list'}}) {
    183         @list = sort {$self->{'list'}->{$a}
    184               cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};
    185     }
    186     } else {
    187     @list = @{$self->{'list'}};
    188     }
    189 
    190     # organise into classification structure
    191     my %classifyinfo = ('thistype'=>'Invisible',
    192             'childtype'=>'VList',
    193             'Title'=>$self->{'title'},
    194             'contains'=>[]);
    195     foreach $OID (@list) {
    196     push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});
    197     }
    198 
    199     return \%classifyinfo;
    200 }
    201 
    202 
    2031211;
  • branches/New_Config_Format-branch/gsdl/perllib/doc.pm

    r846 r1279  
    2424###########################################################################
    2525
    26 # class to hold documents
     26# base class to hold documents
    2727
    2828package doc;
    2929
    30 use basedoc;
    31 
    3230BEGIN {
    33     @ISA = ('basedoc');
    34 }
     31    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
     32    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux");
     33}
     34
     35use unicode;
     36use util;
     37use ghtml;
     38##use hashdoc;
    3539
    3640# the document type may be indexed_doc, nonindexed_doc, or
     
    3943sub new {
    4044    my $class = shift (@_);
    41     my ($source_filename, $doc_type) = @_;
    42    
    43     my $self = new basedoc();
     45   
     46    my $self = bless {'associated_files'=>[],
     47              'subsection_order'=>[],
     48              'next_subsection'=>1,
     49              'subsections'=>{},
     50              'metadata'=>[],
     51              'text'=>""}, $class;
    4452
    4553#    $self->set_source_filename ($source_filename) if defined $source_filename;
    46     push (@{$self->{'metadata'}}, ["gsdlsourcefilename", &unicode::ascii2utf8($source_filename)])
    47     if defined $source_filename;
     54    push (@{$self->{'metadata'}}, ["gsdlsourcefilename", $source_filename]) if defined $source_filename;
    4855#    $self->set_doc_type ($doc_type) if defined $doc_type;
    49     push (@{$self->{'metadata'}}, ["gsdldoctype", &unicode::ascii2utf8($doc_type)])
    50     if defined $doc_type;
    51 
    52     bless($self,$class);
     56    push (@{$self->{'metadata'}}, ["gsdldoctype", $doc_type]) if defined $doc_type;
     57
    5358    return $self;
    5459}
    5560
     61# clone the $self object
     62sub duplicate {
     63    my $self = shift (@_);
     64
     65    my $newobj = {};
     66   
     67    foreach $k (keys %$self) {
     68    $newobj->{$k} = &clone ($self->{$k});
     69    }
     70
     71    bless $newobj, ref($self);
     72    return $newobj;
     73}
     74
     75sub clone {
     76    my ($from) = @_;
     77    my $type = ref ($from);
     78
     79    if ($type eq "HASH") {
     80    my $to = {};
     81    foreach $key (keys %$from) {
     82        $to->{$key} = &clone ($from->{$key});
     83    }
     84    return $to;
     85    } elsif ($type eq "ARRAY") {
     86    my $to = [];
     87    foreach $v (@$from) {
     88        push (@$to, &clone ($v));
     89    }
     90    return $to;
     91    } else {
     92    return $from;
     93    }
     94}
     95
     96
     97sub set_source_filename {
     98    my $self = shift (@_);
     99    my ($source_filename) = @_;
     100
     101    $self->set_metadata_element ($self->get_top_section(),
     102                 "gsdlsourcefilename",
     103                 $source_filename);
     104}
     105
     106# returns the source_filename as it was provided
     107sub get_source_filename {
     108    my $self = shift (@_);
     109
     110    return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
     111}
     112
     113sub set_doc_type {
     114    my $self = shift (@_);
     115    my ($doc_type) = @_;
     116
     117    $self->set_metadata_element ($self->get_top_section(),
     118                 "gsdldoctype",
     119                 $doc_type);
     120}
     121
     122# returns the source_filename as it was provided
     123# the default of "indexed_doc" is used if no document
     124# type was provided
     125sub get_doc_type {
     126    my $self = shift (@_);
     127
     128    my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
     129    return $doc_type if (defined $doc_type);
     130    return "indexed_doc";
     131}
     132
     133sub _escape_text {
     134    my ($text) = @_;
     135
     136    # special characters in the gml encoding
     137    $text =~ s/&/&amp;/g; # this has to be first...
     138    $text =~ s/</&lt;/g;
     139    $text =~ s/>/&gt;/g;
     140    $text =~ s/\"/&quot;/g;
     141
     142    return $text;
     143}
     144
     145
     146sub buffer_section {
     147    my $self = shift (@_);
     148    my ($section, $suppress_subject_info) = @_;
     149
     150    $suppress_subject_info = 0 unless defined $suppress_subject_info;
     151    my ($all_text,$data, $subsection);
     152   
     153    my $section_ptr = $self->_lookup_section ($section);
     154    my ($section_num) = $section =~ /(\d+)$/;
     155   
     156    return "" unless defined $section_ptr;
     157
     158    # output the section header (including the section number
     159    # and metadata)
     160
     161    $all_text = "<gsdlsection";
     162    $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num;
     163    foreach $data (@{$section_ptr->{'metadata'}}) {
     164    $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
     165        unless $suppress_subject_info && $data->[0] eq "Subject";
     166    }
     167    $all_text .= ">";
     168
     169    # output the text
     170    $all_text .= &_escape_text($section_ptr->{'text'});
     171
     172    # output all the subsections
     173    foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
     174    $all_text .= $self->buffer_section("$section.$subsection", $suppress_subject_info);
     175    }
     176   
     177    # output the closing tag
     178    $all_text .=  "</gsdlsection>\n";
     179
     180    return $all_text;
     181}
     182
     183sub output_section {
     184    my $self = shift (@_);
     185    my ($handle, $section, $suppress_subject_info) = @_;
     186
     187    my $all_text = $self->buffer_section($section, $suppress_subject_info);
     188    print $handle $all_text;
     189}
     190
     191# look up the reference to the a particular section
     192sub _lookup_section {
     193    my $self = shift (@_);
     194    my ($section) = @_;
     195
     196    my ($num);
     197    my $sectionref = $self;
     198
     199    while (defined $section && $section ne "") {
     200    ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
     201    $num =~ s/^0+(\d)/$1/; # remove leading 0s
     202    $section = "" unless defined $section;
     203   
     204    if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
     205        $sectionref = $sectionref->{'subsections'}->{$num};
     206    } else {
     207        return undef;
     208    }
     209    }
     210   
     211    return $sectionref;
     212}
     213
     214sub _calc_OID {
     215    my $self = shift (@_);
     216    my ($filename) = @_;
     217
     218    my $osexe = &util::get_os_exe();
     219
     220    my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
     221                       $ENV{'GSDLOS'},"hashfile$osexe");
     222    my $result = "NULL";
     223
     224    if (-e "$hashfile_exe") {
     225    $result = `$hashfile_exe \"$filename\"`;
     226    ($result) = $result =~ /:\s*([0-9a-f]+)/i;
     227
     228    } else {
     229    print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
     230    }
     231
     232    return "HASH$result";
     233}
     234
     235# methods dealing with OID, not groups of them.
     236
     237# if $OID is not provided one is calculated from hashing the
     238# current contents of the document
     239# An OID are actually stored as metadata of the document
     240sub set_OID {
     241    my $self = shift (@_);
     242    my ($OID) = @_;
     243
     244    # if an OID wasn't provided then feed this document to
     245    # hashfile.exe
     246    if (!defined $OID) {
     247    $OID = "NULL";
     248    my $tmp_filename = &util::get_tmp_filename();
     249    if (!open (OUTFILE, ">$tmp_filename")) {
     250        print STDERR "doc::set_OID could not write to $tmp_filename\n";
     251    } else {
     252        $self->output_section('OUTFILE', $self->get_top_section(), 1);
     253        close (OUTFILE);
     254
     255        $OID = $self->_calc_OID ($tmp_filename);
     256        &util::rm ($tmp_filename);
     257    }
     258    }
     259
     260    $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
     261}
     262
     263# this uses hashdoc (embedded c thingy) which is faster but still
     264# needs a little work to be suffiently stable
     265sub ___set_OID {
     266    my $self = shift (@_);
     267    my ($OID) = @_;
     268
     269    # if an OID wasn't provided then calculate hash value based on document
     270    if (!defined $OID)
     271    {
     272    my $hash_text = $self->buffer_section($self->get_top_section(), 1);
     273    my $hash_len = length($hash_text);
     274
     275        $OID = &hashdoc::buffer($hash_text,$hash_len);
     276    }
     277
     278    $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
     279}
     280
     281# returns the OID for this document
     282sub get_OID {
     283    my $self = shift (@_);
     284    my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
     285    return $OID if (defined $OID);
     286    return "NULL";
     287}
     288
     289sub delete_OID {
     290    my $self = shift (@_);
     291   
     292    $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
     293}
     294
     295
     296# methods for manipulating section names
     297
     298# returns the name of the top-most section (the top
     299# level of the document
     300sub get_top_section {
     301    my $self = shift (@_);
     302   
     303    return "";
     304}
     305
     306# returns a section
     307sub get_parent_section {
     308    my $self = shift (@_);
     309    my ($section) = @_;
     310
     311    $section =~ s/(^|\.)\d+$//;
     312
     313    return $section;
     314}
     315
     316# returns the first child section (or the end child
     317# if there isn't any)
     318sub get_begin_child {
     319    my $self = shift (@_);
     320    my ($section) = @_;
     321
     322    my $section_ptr = $self->_lookup_section($section);
     323    return "" unless defined $section_ptr;
     324
     325    if (defined $section_ptr->{'subsection_order'}->[0]) {
     326    return "$section.$section_ptr->{'subsection_order'}->[0]";
     327    }
     328
     329    return $self->get_end_child ($section);
     330}
     331
     332# returns the next child of a parent section
     333sub get_next_child {
     334    my $self = shift (@_);
     335    my ($section) = @_;
     336   
     337    my $parent_section = $self->get_parent_section($section);
     338    my $parent_section_ptr = $self->_lookup_section($parent_section);
     339    return undef unless defined $parent_section_ptr;
     340
     341    my ($section_num) = $section =~ /(\d+)$/;
     342    return undef unless defined $section_num;
     343
     344    my $i = 0;
     345    my $section_order = $parent_section_ptr->{'subsection_order'};
     346    while ($i < scalar(@$section_order)) {
     347    last if $section_order->[$i] eq $section_num;
     348    $i++;
     349    }
     350
     351    $i++; # the next child
     352    if ($i < scalar(@$section_order)) {
     353    return $section_order->[$i] if $parent_section eq "";
     354    return "$parent_section.$section_order->[$i]";
     355    }
     356
     357    # no more sections in this level
     358    return undef;
     359}
     360
     361# returns a reference to a list of children
     362sub get_children {
     363    my $self = shift (@_);
     364    my ($section) = @_;
     365
     366    my $section_ptr = $self->_lookup_section($section);
     367    return [] unless defined $section_ptr;
     368
     369    my @children = @{$section_ptr->{'subsection_order'}};
     370
     371    map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
     372    return \@children;
     373}
     374
     375# returns the child section one past the last one (which
     376# is coded as "0")
     377sub get_end_child {
     378    my $self = shift (@_);
     379    my ($section) = @_;
     380
     381    return $section . ".0" unless $section eq "";
     382    return "0";
     383}
     384
     385# returns the next section in book order
     386sub get_next_section {
     387    my $self = shift (@_);
     388    my ($section) = @_;
     389
     390    return undef unless defined $section;
     391
     392    my $section_ptr = $self->_lookup_section($section);
     393    return undef unless defined $section_ptr;
     394
     395    # first try to find first child
     396    if (defined $section_ptr->{'subsection_order'}->[0]) {
     397    return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
     398    return "$section.$section_ptr->{'subsection_order'}->[0]";
     399    }
     400
     401    do {
     402    # try to find sibling
     403    my $next_child = $self->get_next_child ($section);
     404    return $next_child if (defined $next_child);
     405
     406    # move up one level
     407    $section = $self->get_parent_section ($section);
     408    } while $section =~ /\d/;
     409
     410    return undef;
     411}
     412
     413sub is_leaf_section {
     414    my $self = shift (@_);
     415    my ($section) = @_;
     416
     417    my $section_ptr = $self->_lookup_section($section);
     418    return 1 unless defined $section_ptr;
     419
     420    return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
     421}
     422
     423# methods for dealing with sections
     424
     425# returns the name of the inserted section
     426sub insert_section {
     427    my $self = shift (@_);
     428    my ($before_section) = @_;
     429
     430    # get the child to insert before and its parent section
     431    my $parent_section = "";
     432    my $before_child = "0";
     433    my @before_section = split (/\./, $before_section);
     434    if (scalar(@before_section) > 0) {
     435    $before_child = pop (@before_section);
     436    $parent_section = join (".", @before_section);
     437    }
     438
     439    my $parent_section_ptr = $self->_lookup_section($parent_section);
     440    if (!defined $parent_section_ptr) {
     441    print STDERR "doc::insert_section couldn't find parent section " .
     442        "$parent_section\n";
     443    return;
     444    }
     445
     446    # get the next section number
     447    my $section_num = $parent_section_ptr->{'next_subsection'}++;
     448
     449    my $i = 0;
     450    while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
     451       $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
     452    $i++;
     453    }
     454   
     455    # insert the section number into the order list
     456    splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
     457
     458    # add this section to the parent section
     459    my $section_ptr = {'subsection_order'=>[],
     460               'next_subsection'=>1,
     461               'subsections'=>{},
     462               'metadata'=>[],
     463               'text'=>""};
     464    $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
     465
     466    # work out the full section number
     467    my $section = $parent_section;
     468    $section .= "." unless $section eq "";
     469    $section .= $section_num;
     470   
     471    return $section;
     472}
     473
     474# creates a pre-named section
     475sub create_named_section {
     476    my $self = shift (@_);
     477    my ($mastersection) = @_;
     478
     479    my ($num);
     480    my $section = $mastersection;
     481    my $sectionref = $self;
     482
     483####    print STDERR "*** mastersection = $mastersection\n";
     484
     485    while ($section ne "") {
     486    ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
     487    $num =~ s/^0+(\d)/$1/; # remove leading 0s
     488    $section = "" unless defined $section;
     489   
     490    if (defined $num) {
     491        if (!defined $sectionref->{'subsections'}->{$num}) {
     492        push (@{$sectionref->{'subsection_order'}}, $num);
     493        $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
     494                            'next_subsection'=>1,
     495                            'subsections'=>{},
     496                            'metadata'=>[],
     497                            'text'=>""};
     498        if ($num >= $sectionref->{'next_subsection'}) {
     499            $sectionref->{'next_subsection'} = $num + 1;
     500        }
     501        }
     502        $sectionref = $sectionref->{'subsections'}->{$num};
     503
     504    } else {
     505        print STDERR "doc::create_named_section couldn't create section ";
     506        print STDERR "$mastersection\n";
     507        last;
     508    }
     509    }
     510}
     511
     512# returns a reference to a list of subsections
     513sub list_subsections {
     514    my $self = shift (@_);
     515    my ($section) = @_;
     516
     517    my $section_ptr = $self->_lookup_section ($section);
     518    if (!defined $section_ptr) {
     519    print STDERR "doc::list_subsections couldn't find section $section\n";
     520    return [];
     521    }
     522
     523    return [@{$section_ptr->{'subsection_order'}}];
     524}
     525
     526sub delete_section {
     527    my $self = shift (@_);
     528    my ($section) = @_;
     529
     530#    my $section_ptr = {'subsection_order'=>[],
     531#              'next_subsection'=>1,
     532#              'subsections'=>{},
     533#              'metadata'=>[],
     534#              'text'=>""};
     535
     536    # if this is the top section reset everything
     537    if ($section eq "") {
     538    $self->{'subsection_order'} = [];
     539    $self->{'subsections'} = {};
     540    $self->{'metadata'} = [];
     541    $self->{'text'} = "";
     542    return;
     543    }
     544
     545    # find the parent of the section to delete
     546    my $parent_section = "";
     547    my $child = "0";
     548    my @section = split (/\./, $section);
     549    if (scalar(@section) > 0) {
     550    $child = pop (@section);
     551    $parent_section = join (".", @section);
     552    }
     553
     554    my $parent_section_ptr = $self->_lookup_section($parent_section);
     555    if (!defined $parent_section_ptr) {
     556    print STDERR "doc::delete_section couldn't find parent section " .
     557        "$parent_section\n";
     558    return;
     559    }
     560
     561    # remove this section from the subsection_order list
     562    my $i = 0;
     563    while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
     564    if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
     565        splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
     566        last;
     567    }
     568    $i++;
     569    }
     570
     571    # remove this section from the subsection hash
     572    if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
     573    undef $parent_section_ptr->{'subsections'}->{$child};
     574    }
     575}
     576
     577#--
    56578# methods for dealing with metadata
    57579
     
    60582# are for metadata which can have more than one value.
    61583
    62 # set_metadata_element assumes the value is in (extended) ascii form.
    63 # For text which hash been already converted to the UTF-8 format use
    64 # set_utf8_metadata_element.
     584# returns the first metadata value which matches field
     585sub get_metadata_element {
     586    my $self = shift (@_);
     587    my ($section, $field) = @_;
     588    my ($data);
     589
     590    my $section_ptr = $self->_lookup_section($section);
     591    if (!defined $section_ptr) {
     592    print STDERR "doc::get_metadata_element couldn't find section " .
     593        "$section\n";
     594    return;
     595    }
     596
     597    foreach $data (@{$section_ptr->{'metadata'}}) {
     598    return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
     599    }
     600   
     601    return undef; # was not found
     602}
     603
     604
     605# returns a list of the form [value1, value2, ...]
     606sub get_metadata {
     607    my $self = shift (@_);
     608    my ($section, $field) = @_;
     609    my ($data);
     610
     611    my $section_ptr = $self->_lookup_section($section);
     612    if (!defined $section_ptr) {
     613        print STDERR "doc::get_metadata couldn't find section " .
     614            "$section\n";
     615        return;
     616    }
     617
     618    my @metadata = ();
     619    foreach $data (@{$section_ptr->{'metadata'}}) {
     620        push (@metadata, $data->[1]) if ($data->[0] eq $field);
     621    }
     622       
     623    return \@metadata;
     624}
     625
     626# returns a list of the form [[field,value],[field,value],...]
     627sub get_all_metadata {
     628    my $self = shift (@_);
     629    my ($section) = @_;
     630
     631    my $section_ptr = $self->_lookup_section($section);
     632    if (!defined $section_ptr) {
     633    print STDERR "doc::get_all_metadata couldn't find section " .
     634        "$section\n";
     635    return;
     636    }
     637   
     638    return $section_ptr->{'metadata'};
     639}
     640
     641# $value is optional
     642sub delete_metadata {
     643    my $self = shift (@_);
     644    my ($section, $field, $value) = @_;
     645
     646    my $section_ptr = $self->_lookup_section($section);
     647    if (!defined $section_ptr) {
     648    print STDERR "doc::delete_metadata couldn't find section " .
     649        "$section\n";
     650    return;
     651    }
     652
     653    my $i = 0;
     654    while ($i < scalar (@{$section_ptr->{'metadata'}})) {
     655    if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
     656        (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
     657        splice (@{$section_ptr->{'metadata'}}, $i, 1);
     658    } else {
     659        $i++;
     660    }
     661    }
     662}
     663
     664sub delete_all_metadata {
     665    my $self = shift (@_);
     666    my ($section) = @_;
     667
     668    my $section_ptr = $self->_lookup_section($section);
     669    if (!defined $section_ptr) {
     670    print STDERR "doc::delete_all_metadata couldn't find section " .
     671        "$section\n";
     672    return;
     673    }
     674   
     675    $section_ptr->{'metadata'} = [];
     676}
     677
    65678sub set_metadata_element {
    66679    my $self = shift (@_);
     
    112725# methods for dealing with text
    113726
     727# returns the text for a section
     728sub get_text {
     729    my $self = shift (@_);
     730    my ($section) = @_;
     731
     732    my $section_ptr = $self->_lookup_section($section);
     733    if (!defined $section_ptr) {
     734    print STDERR "doc::get_text couldn't find section " .
     735        "$section\n";
     736    return "";
     737    }
     738
     739    return $section_ptr->{'text'};
     740}
     741
     742# returns the (utf-8 encoded) length of the text for a section
     743sub get_text_length {
     744    my $self = shift (@_);
     745    my ($section) = @_;
     746
     747    my $section_ptr = $self->_lookup_section($section);
     748    if (!defined $section_ptr) {
     749    print STDERR "doc::get_text_length couldn't find section " .
     750        "$section\n";
     751    return 0;
     752    }
     753
     754    return length ($section_ptr->{'text'});
     755}
     756
     757sub delete_text {
     758    my $self = shift (@_);
     759    my ($section) = @_;
     760
     761    my $section_ptr = $self->_lookup_section($section);
     762    if (!defined $section_ptr) {
     763    print STDERR "doc::delete_text couldn't find section " .
     764        "$section\n";
     765    return;
     766    }
     767
     768    $section_ptr->{'text'} = "";
     769}
     770
    114771# add_text assumes the text is in (extended) ascii form. For
    115772# text which has been already converted to the UTF-8 format
     
    143800
    144801
     802# methods for dealing with associated files
     803
     804# a file is associated with a document, NOT a section.
     805# if section is defined it is noted in the data structure
     806# only so that files associated from a particular section
     807# may be removed later (using delete_section_assoc_files)
     808sub associate_file {
     809    my $self = shift (@_);
     810    my ($real_filename, $assoc_filename, $mime_type, $section) = @_;
     811    $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type;
     812
     813    # remove all associated files with the same name
     814    $self->delete_assoc_file ($assoc_filename);
     815
     816    push (@{$self->{'associated_files'}},
     817      [$real_filename, $assoc_filename, $mime_type, $section]);
     818}
     819
     820# returns a list of associated files in the form
     821#   [[real_filename, assoc_filename, mimetype], ...]
     822sub get_assoc_files {
     823    my $self = shift (@_);
     824
     825    return $self->{'associated_files'};
     826}
     827
     828sub delete_section_assoc_files {
     829    my $self = shift (@_);
     830    my ($section) = @_;
     831
     832    my $i=0;
     833    while ($i < scalar (@{$self->{'associated_files'}})) {
     834    if (defined $self->{'associated_files'}->[$i]->[3] &&
     835        $self->{'associated_files'}->[$i]->[3] eq $section) {
     836        splice (@{$self->{'associated_files'}}, $i, 1);
     837    } else {
     838        $i++;
     839    }
     840    }
     841}
     842
     843sub delete_assoc_file {
     844    my $self = shift (@_);
     845    my ($assoc_filename) = @_;
     846
     847    my $i=0;
     848    while ($i < scalar (@{$self->{'associated_files'}})) {
     849    if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
     850        splice (@{$self->{'associated_files'}}, $i, 1);
     851    } else {
     852        $i++;
     853    }
     854    }
     855}
     856
     857sub reset_nextsection_ptr {
     858    my $self = shift (@_);
     859    my ($section) = @_;
     860   
     861    my $section_ptr = $self->_lookup_section($section);
     862    $section_ptr->{'next_subsection'} = 1;
     863}
     864
    1458651;
  • branches/New_Config_Format-branch/gsdl/perllib/gb.pm

    r537 r1279  
    163163
    164164    return 0 unless open (MAPFILE, "$ENV{'GSDLHOME'}/unicode/$filename");
     165    binmode (MAPFILE); # f$#@!!! windows
    165166
    166167    $translations{$encoding} = [@array256];
  • branches/New_Config_Format-branch/gsdl/perllib/ghtml.pm

    r1010 r1279  
    3434sub htmlsafe
    3535{
    36     $_[0] =~ s/&/&amp;/og;
    37     $_[0] =~ s/</&lt;/og;
    38     $_[0] =~ s/>/&gt;/og;
     36    $_[0] =~ s/&/&amp;/osg;
     37    $_[0] =~ s/</&lt;/osg;
     38    $_[0] =~ s/>/&gt;/osg;
     39    $_[0] =~ s/\"/&quot;/osg;
    3940}
    4041
     
    4546sub urlsafe
    4647{
    47     $_[0] =~ s/[\x09\x20\x22\x3c\x3e\x5b\x5c\x5d\x5e\x60\x7b\x7c\x7d\x7e\?\=\&\+_\/]/sprintf("%%%2x", ord($&))/ge;
     48    $_[0] =~ s/[\x09\x20\x22\x3c\x3e\x5b\x5c\x5d\x5e\x60\x7b\x7c\x7d\x7e\?\=\&\+_\/]/sprintf("%%%2x", ord($&))/gse;
    4849}
    4950
     
    5657sub dmsafe {
    5758    my ($s) = $_[0];
    58     $s =~ s/&/&amp;/og; # for html
    59     $s =~ s/</&lt;/og;  # for html
    60     $s =~ s/>/&gt;/og;  # for html
    61     $s =~ s/\_/&#095;/og; # for dm (we have a convention of starting macros with _
    62     $s =~ s/\"/&quot;/og; # for html (don't want to be interpreted as a quote)
    63     $s =~ s/\{/&#123;/og; # for dm blocks
    64     $s =~ s/\}/&#125;/og; # for dm blocks
     59    $s =~ s/&/&amp;/osg; # for html
     60    $s =~ s/</&lt;/osg;  # for html
     61    $s =~ s/>/&gt;/osg;  # for html
     62    $s =~ s/\_/&#095;/osg; # for dm (we have a convention of starting macros with _
     63    $s =~ s/\"/&quot;/osg; # for html (don't want to be interpreted as a quote)
     64    $s =~ s/\{/&#123;/osg; # for dm blocks
     65    $s =~ s/\}/&#125;/osg; # for dm blocks
     66    $s =~ s/\\/&#092;/osg; # for dm (dm removes naturally occurring backquotes)
    6567    return $s;
    6668}
     
    170172    # args: the text that you want to convert
    171173
    172     $_[0] =~ s/&([^;]+);/&getcharequiv($1,0)/ge;
     174    $_[0] =~ s/&([^;]+);/&getcharequiv($1,0)/gse;
    173175}
    174176
     
    177179    # args: the text that you want to convert
    178180
    179     $_[0] =~ s/&([^;]+);/&getcharequiv($1,1)/ge;
     181    $_[0] =~ s/&([^;]+);/&getcharequiv($1,1)/gse;
    180182}
    181183
  • branches/New_Config_Format-branch/gsdl/perllib/mgbuilder.pm

    r1072 r1279  
    9999   
    100100    # load all the plugins
    101     $self->{'pluginfo'} = &plugin::load_plugins ($plugins);
     101    $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity);
    102102    if (scalar(@{$self->{'pluginfo'}}) == 0) {
    103103    print STDERR "No plugins were loaded.\n";
     
    207207    close ($handle) unless $self->{'debug'};
    208208
     209    $self->print_stats();
     210
    209211    # create the compression dictionary
    210212    # the compression dictionary is built by assuming the stats are from a seed
     
    232234           "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
    233235    close ($handle) unless $self->{'debug'};
     236
     237    $self->print_stats();
    234238}
    235239
     
    481485    close ($handle) unless $self->{'debug'};
    482486
     487    $self->print_stats();
     488
    483489    if (!$self->{'debug'}) {
    484490    # create the perfect hash function
     
    502508           "", {}, $self->{'buildproc'}, $self->{'maxdocs'});
    503509   
     510    $self->print_stats ();
     511
    504512    if (!$self->{'debug'}) {
    505513
     
    646654    $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes();
    647655
     656    # get additional stats from mg
     657    my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}";
     658    my $exe = &util::get_os_exe ();
     659    my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe");
     660    my $input_file = &util::filename_cat ("text", $self->{'collection'});
     661    if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) {
     662    print STDERR "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n";
     663    } else {
     664    my $line = "";
     665    while (defined ($line = <PIPEIN>)) {
     666        if ($line =~ /^Words in collection \[dict\]\s+:\s+(\d+)/) {
     667        ($build_cfg->{'numwords'}) = $1;
     668        } elsif ($line =~ /^Documents\s+:\s+(\d+)/) {
     669        ($build_cfg->{'numsections'}) = $1;
     670        }
     671    }
     672    close PIPEIN;
     673    }
     674
    648675    # store the mapping between the index names and the directory names
    649676    my @indexmap = ();
     
    667694    $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap);
    668695
    669     $build_cfg->{'notbuilt'} = $self->{'notbuilt'};
     696    $build_cfg->{'notbuilt'} = $self->{'notbuilt'} if scalar @{$self->{'notbuilt'}};
    670697
    671698    # write out the build information
    672699    &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg,
    673                  '^(builddate|numdocs|numbytes)$',
     700                 '^(builddate|numdocs|numbytes|numwords|numsections)$',
    674701                             '^(indexmap|subcollectionmap|languagemap|notbuilt)$');
    675702
     
    680707}
    681708
     709sub print_stats {
     710    my $self = shift (@_);
     711
     712    my $indexing_text = $self->{'buildproc'}->get_indexing_text();
     713    my $index = $self->{'buildproc'}->get_index();
     714    my $num_bytes = $self->{'buildproc'}->get_num_bytes();
     715    my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes();
     716
     717    if ($indexing_text) {
     718    print STDERR "Stats (Creating index $index)\n";
     719    } else {
     720    print STDERR "Stats (Compressing text from $index)\n";
     721    }
     722    print STDERR "Total bytes in collection: $num_bytes\n";
     723    print STDERR "Total bytes in $index: $num_processed_bytes\n";
     724
     725    if ($num_processed_bytes < 50) {
     726    print STDERR "***************\n";
     727    print STDERR "WARNING: There is very little or no text to process for $index\n";
     728    if ($indexing_text) {
     729        print STDERR "This may cause an error while attempting to build the index\n";
     730    } else {
     731        print STDERR "This may cause an error while attempting to compress the text\n";
     732    }
     733    print STDERR "***************\n";
     734    }
     735}
    682736
    6837371;
  • branches/New_Config_Format-branch/gsdl/perllib/mgbuildproc.pm

    r1072 r1279  
    5858    $self->{'num_sections'} = 0;
    5959    $self->{'num_bytes'} = 0;
     60    $self->{'num_processed_bytes'} = 0;
    6061
    6162    $self->{'indexing_text'} = 0;
     
    6970    $self->{'num_docs'} = 0;
    7071    $self->{'num_sections'} = 0;
     72    $self->{'num_processed_bytes'} = 0;
    7173    $self->{'num_bytes'} = 0;
    7274}
     
    8486}
    8587
     88# num_bytes is the actual number of bytes in the collection
     89# this is normally the same as what's processed during text compression
    8690sub get_num_bytes {
    8791    my $self = shift (@_);
    8892
    8993    return $self->{'num_bytes'};
     94}
     95
     96# num_processed_bytes is the number of bytes actually passed
     97# to mg for the current index
     98sub get_num_processed_bytes {
     99    my $self = shift (@_);
     100
     101    return $self->{'num_processed_bytes'};
    90102}
    91103
     
    126138}
    127139
     140sub get_index {
     141    my $self = shift (@_);
     142
     143    return $self->{'index'};
     144}
     145
    128146sub set_classifiers {
    129147    my $self = shift (@_);
     
    138156
    139157    $self->{'indexing_text'} = $indexing_text;
     158}
     159
     160sub get_indexing_text {
     161    my $self = shift (@_);
     162
     163    return $self->{'indexing_text'};
    140164}
    141165
     
    416440            if ($real_field eq "text") {
    417441            $new_text = $doc_obj->get_text ($section);
     442            $self->{'num_processed_bytes'} += length ($new_text);
    418443            $new_text =~ s/[\cB\cC]//g;
    419444            $self->find_paragraphs($new_text);
     
    423448            foreach $meta (@{$doc_obj->get_metadata ($section, $real_field)}) {
    424449                $meta =~ s/[\cB\cC]//g;
     450                $self->{'num_processed_bytes'} += length ($meta);
    425451                $new_text .= "\cC" unless $first;
    426452                $new_text .= $meta;
  • branches/New_Config_Format-branch/gsdl/perllib/multiread.pm

    r627 r1279  
    2626# encodings currently supported are
    2727#
    28 # utf8     - either utf8 or unicode (automatically detected)
    29 # unicode  - just unicode (doesn't currently do endian detection)
    30 # gb       - GB
    31 # extended - extended ascii
    32 
     28# utf8         - either utf8 or unicode (automatically detected)
     29# unicode      - just unicode (doesn't currently do endian detection)
     30# gb           - GB
     31# iso_8859_1   - extended ascii (iso-8859-1)
     32# iso_8859_6   - 8 bit arabic (iso-8859-6)
     33# windows_1256 - Windows codepage 1256 (Arabic)
    3334
    3435package multiread;
     
    169170    }
    170171
    171     if ($self->{'encoding'} eq "extended") {
    172     # extended ascii
     172    if ($self->{'encoding'} eq "iso_8859_1") {
     173    # Latin 1 extended ascii (ISO-8859-1)
    173174    return undef if (eof ($handle));
    174175    return &unicode::ascii2utf8 (getc ($handle));
     176    }
     177
     178    if ($self->{'encoding'} eq "iso_8859_6") {
     179    # 8 bit Arabic (IOS-8859-6)
     180    return undef if (eof ($handle));
     181    return &unicode::unicode2utf8(&unicode::arabic2unicode (getc ($handle)));
     182    }
     183
     184    if ($self->{'encoding'} eq "windows_1256") {
     185    # Windows 1256 (Arabic)
     186    return undef if (eof ($handle));
     187    return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", getc ($handle)));
    175188    }
    176189
     
    236249    }
    237250   
    238     if ($self->{'encoding'} eq "extended") {
    239     # extended ascii
     251    if ($self->{'encoding'} eq "iso_8859_1") {
     252    # extended ascii (ISO-8859-1)
    240253    my $line = "";
    241254    if (defined ($line = <$handle>)) {
     
    244257    return undef;
    245258    }
     259   
     260    if ($self->{'encoding'} eq "iso_8859_6") {
     261    # 8 bit arabic (ISO-8859-6)
     262    my $line = "";
     263    if (defined ($line = <$handle>)) {
     264        return &unicode::unicode2utf8(&unicode::arabic2unicode ($line));
     265    }
     266    return undef;
     267    }
     268   
     269    if ($self->{'encoding'} eq "windows_1256") {
     270    # Windows 1256 (Arabic)
     271    my $line = "";
     272    if (defined ($line = <$handle>)) {
     273        return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $line));
     274    }
     275    return undef;
     276    }
    246277
    247278    # unknown encoding
     
    250281
    251282
     283# will convert entire contents of file to utf8 and append result to $outputref
     284# this may be a slightly faster way to get the contents of a file than by
     285# recursively calling read_line()
     286sub read_file {
     287    my $self = shift (@_);
     288    my ($outputref) = @_;
     289
     290    # make sure we have a file handle
     291    return if ($self->{'handle'} eq "");
     292
     293    my $handle = $self->{'handle'};
     294
     295    if ($self->{'first'} && $self->{'encoding'} eq "utf8") {
     296    # special case for the first line of utf8 text to detect whether
     297    # the file is in utf8 or unicode
     298    $$text .= $self->read_line ();
     299    }
     300
     301    if ($self->{'encoding'} eq "utf8") {
     302    undef $/;
     303    $$outputref .=  <$handle>;
     304    $/ = "\n";
     305    return;
     306    }
     307
     308    if ($self->{'encoding'} eq "unicode") {
     309    my $line = "";
     310    while (defined ($line = $self->read_line())) {
     311        $$outputref .= $line;
     312    }
     313    return;
     314    }
     315
     316    if ($self->{'encoding'} eq "gb") {
     317    undef $/;
     318    my $text = <$handle>;
     319    $/ = "\n";
     320    $$outputref .= &unicode::unicode2utf8 (&gb::gb2unicode ($text));
     321    return;
     322    }
     323   
     324    if ($self->{'encoding'} eq "iso_8859_1") {
     325    undef $/;
     326    my $text = <$handle>;
     327    $/ = "\n";
     328    $$outputref .= &unicode::ascii2utf8 ($text);
     329    return;
     330    }
     331   
     332    if ($self->{'encoding'} eq "iso_8859_6") {
     333    my $text = <$handle>;
     334    undef $/;
     335    $/ = "\n";
     336    $$outputref .= &unicode::unicode2utf8(&unicode::arabic2unicode ($text));
     337    return;
     338    }
     339
     340    if ($self->{'encoding'} eq "windows_1256") {
     341    undef $/;
     342    my $text = <$handle>;
     343    $/ = "\n";
     344    $$outputref .= &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $text));
     345    return;
     346    }
     347}
     348
     349
    2523501;
  • branches/New_Config_Format-branch/gsdl/perllib/parsargv.pm

    r537 r1279  
    6464# Returns 0 if there was an error, nonzero otherwise.
    6565#
    66 sub parse
     66
     67 
     68 sub parse
    6769{
    6870    my $arglist = shift;
     
    7072    my %option;
    7173
    72     while (($spec, $var) = splice(@_, 0, 2))
    73     {
     74    my @rest = @_;
     75
     76    # if the last argument is the string "allow_extra_options" then options
     77    # in \@rest without a corresponding SPEC will be ignored (i.e. the "$arg is
     78    # not a valid option" error won't occur)\n";
     79    my $allow_extra_options = pop @rest;
     80    if (defined ($allow_extra_options)) {
     81    if ($allow_extra_options eq "allow_extra_options") {
     82        $allow_extra_options = 1;
     83    } else {
     84        # put it back where we got it
     85        push (@rest, $allow_extra_options);
     86        $allow_extra_options = 0;
     87    }
     88    } else {
     89    $allow_extra_options = 0;
     90    }
     91
     92    while (($spec, $var) = splice(@rest, 0, 2))
     93        {
    7494    die "Variable for $spec is not a valid type."
    7595        unless ref($var) eq 'SCALAR' || ref($var) eq 'ARRAY';
     
    126146        &process_arg($option{$arg}, $arglist, \$errors);
    127147        }
    128         else
     148        elsif (!$allow_extra_options)
    129149        {
    130150        print STDERR "$arg is not a valid option.\n";
  • branches/New_Config_Format-branch/gsdl/perllib/plugin.pm

    r835 r1279  
    2929
    3030sub load_plugins {
    31     my ($plugin_list) = @_;
     31    my ($plugin_list, $verbosity) = @_;
    3232    my @plugin_objects = ();
     33
     34    $verbosity = 2 unless defined $verbosity;
    3335
    3436    foreach $pluginoptions (@$plugin_list) {
     
    4951    map { $_ = "\"$_\""; } @$pluginoptions;
    5052    my $options = join (",", @$pluginoptions);
     53    $options =~ s/\$/\\\$/g;
    5154    eval ("\$plugobj = new \$pluginname($options)");
    5255    die "$@" if $@;
    5356   
     57    # initialize plugin
     58    $plugobj->init($verbosity);
     59
    5460    # add this object to the list
    5561    push (@plugin_objects, $plugobj);
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/ArcPlug.pm

    r809 r1279  
    3939}
    4040
     41use strict;
     42
    4143sub new {
    4244    my ($class) = @_;
    43     my $self = new BasPlug ();
     45    my $self = new BasPlug ("ArcPlug", @_);
    4446
    4547    return bless $self, $class;
     
    5860sub read {
    5961    my $self = shift (@_);
    60     ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
     62    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    6163
    6264    my $count = 0;
    6365
    6466    # see if this has a archives information file within it
    65     $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
     67    my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
    6668
    6769    if (-e $archive_info_filename) {
     
    7779
    7880    # process each file
    79     foreach $subfile (@$file_list) {
     81    foreach my $subfile (@$file_list) {
    8082        last if ($maxdocs != -1 && $count >= $maxdocs);
    8183
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/BasPlug.pm

    r839 r1279  
    2626package BasPlug;
    2727
     28use parsargv;
     29use multiread;
     30use cnseg;
     31use acronym;
     32use strict;
     33use doc;
     34
     35sub print_general_usage {
     36    my ($plugin_name) = @_;
     37
     38    print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
     39    print STDERR "   -input_encoding   The encoding of the source documents. Documents will be\n";
     40    print STDERR "                     converted from these encodings and stored internally as\n";
     41    print STDERR "                     utf8. The default input_encoding is Latin1. Accepted values\n";
     42    print STDERR "                     are:\n";
     43    print STDERR "                        iso_8859_1 (extended ascii)\n";
     44    print STDERR "                        Latin1 (the same as iso-8859-1)\n";
     45    print STDERR "                        ascii (7 bit ascii -- may be faster than Latin1 as no\n";
     46    print STDERR "                               conversion is neccessary)\n";
     47    print STDERR "                        gb (GB or GBK simplified Chinese)\n";
     48    print STDERR "                        iso_8859_6 (8 bit Arabic)\n";
     49    print STDERR "                        windows_1256 (Windows codepage 1256 (Arabic))\n";
     50    print STDERR "                        Arabic (the same as windows_1256)\n";
     51    print STDERR "                        utf8 (either utf8 or unicode -- automatically detected)\n";
     52    print STDERR "                        unicode (just unicode -- doesn't currently do endian\n";
     53    print STDERR "                                 detection)\n";
     54    print STDERR "   -process_exp      A perl regular expression to match against filenames.\n";
     55    print STDERR "                     Matching filenames will be processed by this plugin.\n";
     56    print STDERR "                     Each plugin has its own default process_exp. e.g HTMLPlug\n";
     57    print STDERR "                     defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
     58    print STDERR "                     .htm or .html (case-insensitive).\n";
     59    print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
     60    print STDERR "                     being passed to any further plugins in the list. This has no\n";
     61    print STDERR "                     real effect other than to prevent lots of warning messages\n";
     62    print STDERR "                     about input files you don't care about. Each plugin may or may\n";
     63    print STDERR "                     not have a default block_exp. e.g. by default HTMLPlug blocks\n";
     64    print STDERR "                     any files with .gif, .jpg, .jpeg, .png, .pdf, .rtf or .css\n";
     65    print STDERR "                     file extensions.\n";
     66    print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n\n";
     67}
     68
     69# print_usage should be overridden for any sub-classes having
     70# their own plugin specific options
     71sub print_usage {
     72    print STDERR "\nThis plugin has no plugin specific options\n\n";
     73
     74}
    2875
    2976sub new {
    30     my ($class) = @_;
    31 
    32     return bless {}, $class;
     77    my $class = shift (@_);
     78    my $plugin_name = shift (@_);
     79
     80    my $self = {};
     81    my $encodings = "^(iso_8859_1|Latin1|ascii|gb|iso_8859_6|windows_1256|Arabic|utf8|unicode)\$";
     82
     83    # general options available to all plugins
     84    if (!parsargv::parse(\@_,
     85             qq^input_encoding/$encodings/Latin1^, \$self->{'input_encoding'},
     86             q^process_exp/.*/^, \$self->{'process_exp'},
     87             q^block_exp/.*/^, \$self->{'block_exp'},
     88             q^extract_acronyms^, \$self->{'extract_acronyms'},
     89             "allow_extra_options")) {
     90
     91    print STDERR "\nThe $plugin_name plugin uses an incorrect general option (general options are those\n";
     92    print STDERR "available to all plugins). Check your collect.cfg configuration file.\n";
     93        &print_general_usage($plugin_name);
     94    die "\n";
     95    }
     96
     97    return bless $self, $class;
     98}
     99
     100# initialize BasPlug options
     101# if init() is overridden in a sub-class, remember to call BasPlug::init()
     102sub init {
     103    my $self = shift (@_);
     104    my ($verbosity) = @_;
     105
     106    # verbosity is passed through from the processor
     107    $self->{'verbosity'} = $verbosity;
     108
     109    # set process_exp and block_exp to defaults unless they were
     110    # explicitly set
     111
     112    if ((!$self->is_recursive()) and
     113    (!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) {
     114
     115    $self->{'process_exp'} = $self->get_default_process_exp ();
     116    if ($self->{'process_exp'} eq "") {
     117        warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n";
     118    }
     119    }
     120
     121    if ((!defined $self->{'block_exp'}) || ($self->{'block_exp'} eq "")) {
     122    $self->{'block_exp'} = $self->get_default_block_exp ();
     123    }
     124   
     125    # handle input_encoding aliases
     126    $self->{'input_encoding'} = "iso_8859_1" if $self->{'input_encoding'} eq "Latin1";
     127    $self->{'input_encoding'} = "windows_1256" if $self->{'input_encoding'} eq "Arabic";
    33128}
    34129
     
    42137}
    43138
    44 # return 1 if this class might recurse using $pluginfo
     139# this function should be overridden to return 1
     140# in recursive plugins
    45141sub is_recursive {
    46142    my $self = shift (@_);
    47143
    48     die "BasPlug::is_recursive function must be implemented in sub classes\n";
    49 }
    50 
    51 # return number of files processed, undef if can't process
     144    return 0;
     145}
     146
     147sub get_default_block_exp {
     148    my $self = shift (@_);
     149
     150    return "";
     151}
     152
     153sub get_default_process_exp {
     154    my $self = shift (@_);
     155
     156    return "";
     157}
     158
     159# The BasPlug read() function. This function does all the right things
     160# to make general options work for a given plugin. It calls the process()
     161# function which does all the work specific to a plugin (like the old
     162# read functions used to do). Most plugins should define their own
     163# process() function and let this read() function keep control.
     164#
     165# recursive plugins (e.g. RecPlug) and specialized plugins like those
     166# capable of processing many documents within a single file (e.g.
     167# GMLPlug) should normally implement their own version of read()
     168#
     169# Return number of files processed, undef if can't process
    52170# Note that $base_dir might be "" and that $file might
    53171# include directories
     172
    54173sub read {
    55174    my $self = shift (@_);
    56175    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    57176
    58     die "BasPlug::read function must be implemented in sub classes\n";
    59 
    60     return undef; # will never get here
    61 }
    62 
    63 sub extra_metadata
    64 {
    65     my ($self,$doc_obj,$cursection, $metadata) = @_;
    66 
    67     foreach $field (keys(%$metadata)) {
     177    if ($self->is_recursive()) {
     178    die "BasPlug::read function must be implemented in sub-class for recursive plugins\n";
     179    }
     180
     181    my $filename = &util::filename_cat($base_dir, $file);
     182    return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
     183    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
     184    return undef;
     185    }
     186    my $plugin_name = ref ($self);
     187    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     188   
     189    # create a new document
     190    my $doc_obj = new doc ($file, "indexed_doc");
     191   
     192    # read in file ($text will be in utf8)
     193    my $text = "";
     194    $self->read_file ($filename, \$text);
     195
     196    if ($text !~ /\w/) {
     197    print STDERR "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
     198    return 0;
     199    }
     200
     201    # include any metadata passed in from previous plugins
     202    # note that this metadata is associated with the top level section
     203    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
     204
     205    # do plugin specific processing of doc_obj
     206    return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
     207
     208    # do any automatic metadata extraction
     209    $self->auto_extract_metadata ($doc_obj);
     210
     211    # add an OID
     212    $doc_obj->set_OID();
     213
     214    # process the document
     215    $processor->process($doc_obj);
     216
     217    return 1; # processed the file
     218}
     219
     220# returns undef if file is rejected by the plugin
     221sub process {
     222    my $self = shift (@_);
     223    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     224
     225    die "Basplug::process function must be implemented in sub-class\n";
     226
     227    return undef; # never gets here
     228}
     229
     230# uses the multiread package to read in the entire file pointed to
     231# by filename and loads the resulting text into $$textref. Input text
     232# may be in any of the encodings handled by multiread, output text
     233# will be in utf8
     234sub read_file {
     235    my $self = shift (@_);
     236    my ($filename, $textref) = @_;
     237
     238    $$textref = "";
     239
     240    open (FILE, $filename) || die "BasPlug::read_file could not open $filename for reading ($!)\n";
     241
     242    if ($self->{'input_encoding'} eq "ascii") {
     243    undef $/;
     244    $$textref = <FILE>;
     245    $/ = "\n";
     246    } else {
     247    my $reader = new multiread();
     248    $reader->set_handle ('BasPlug::FILE');
     249    $reader->set_encoding ($self->{'input_encoding'});
     250    $reader->read_file ($textref);
     251
     252    if ($self->{'input_encoding'} eq "gb") {
     253        # segment the Chinese words
     254        $$textref = &cnseg::segment($$textref);
     255    }
     256    }
     257
     258    close FILE;
     259}
     260
     261# add any extra metadata that's been passed around from one
     262# plugin to another.
     263# extra_metadata uses add_utf8_metadata so it expects metadata values
     264# to already be in utf8
     265sub extra_metadata {
     266    my $self = shift (@_);
     267    my ($doc_obj, $cursection, $metadata) = @_;
     268
     269    foreach my $field (keys(%$metadata)) {
    68270    # $metadata->{$field} may be an array reference
    69271    if (ref ($metadata->{$field}) eq "ARRAY") {
    70272        map {
    71         $doc_obj->add_metadata ($cursection, $field, $_);
     273        $doc_obj->add_utf8_metadata ($cursection, $field, $_);
    72274        } @{$metadata->{$field}};
    73275    } else {
    74         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
     276        $doc_obj->add_utf8_metadata ($cursection, $field, $metadata->{$field});
     277    }
     278    }
     279}
     280
     281# extract acronyms (and hopefully other stuff soon too).
     282sub auto_extract_metadata {
     283    my $self = shift (@_);
     284    my ($doc_obj) = @_;
     285
     286    if ($self->{'extract_acronyms'}) {
     287    my $thissection = $doc_obj->get_top_section();
     288    while (defined $thissection) {
     289        my $text = $doc_obj->get_text($thissection);
     290        $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./;
     291        $thissection = $doc_obj->get_next_section ($thissection);
     292    }
     293    }
     294}
     295
     296sub extract_acronyms {
     297    my $self = shift (@_);
     298    my ($textref, $doc_obj, $thissection) = @_;
     299
     300    my $acro_array =  &acronym::acronyms($textref);
     301
     302    foreach my $acro (@$acro_array) {
     303
     304    #do the normal acronym
     305    $doc_obj->add_utf8_metadata($thissection, "Acronym",  $acro->to_string());
     306    print "found " . $acro->to_string() . "\n";
     307       
     308    # do the KWIC (Key Word In Context) acronym
     309    my @kwic = $acro->to_string_kwic();
     310    foreach my $kwic (@kwic) {
     311        $doc_obj->add_utf8_metadata($thissection, "AcronymKWIC",  $kwic);
     312        print "found (KWIC)" . $kwic . "\n";
    75313    }
    76314    }
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/EMAILPlug.pm

    r638 r1279  
    2626
    2727
    28 #
    29 # EMAILPlug reads an email file (*.email)
    30 #
    31 # Version 1.1   1999 Sep 20  by Gordon Paynter ([email protected])
    32 #                            loosely based on the original HTMLPlug code
     28
     29# EMAILPlug
     30#
     31# by Gordon Paynter ([email protected])
     32#
     33# Email plug reads email files.  These are named with a simple
     34# number (i.e. as they appear in mh_mail folders) or with the
     35# extension .email
    3336#
    3437# Document text:
    35 #   The document text consists of all the text occuring after the first
    36 #   blank line in this document.
     38#   The document text consists of all the text
     39#   after the first blank line in the document.
    3740#
    3841# Metadata:
     42#   $Headers      All the header content
    3943#   $Subject      Subject: header
    4044#   $To           To: header
     
    4246#   $DateText     Date: header
    4347#   $Date         Date: header in GSDL format (eg: 19990924)
    44 #   $OtherHeaders All the other headers
    45 #   $NewText      The unquoted text in this message
     48#
     49# Version history
     50#
     51# 1.2   (2000 Jun 12) Major rewrite.
     52#       (The new version of Greenstone breaks some of the metadata.)
     53# 1.1.1 Compensated for two-digit years like "95"
     54# 1.1   (1999 Sep 20) Introduced the various metadata fileds
     55# 1.0   Based on the original HTMLPlug code
    4656#
    4757
     
    5666# EMAILPlug is a sub-class of BasPlug.
    5767
    58 sub BEGIN {
     68sub BEGIN { 
    5969    @ISA = ('BasPlug');
    6070}
    6171
     72use strict;
    6273
    6374# Create a new EMAILPlug object with which to parse a file.
    64 # This is done by creating a new BasPlug and usig bless to
     75# Accomplished by creating a new BasPlug and using bless to
    6576# turn it into an EMAILPlug.
    6677
    6778sub new {
    6879    my ($class) = @_;
    69     $self = new BasPlug ();
     80    my $self = new BasPlug ("EMAILPlug", @_);
    7081
    7182    return bless $self, $class;
    7283}
    7384
    74 
    75 # Is the EMAILPlug recursive?  No.
    76 
    77 sub is_recursive {
     85sub get_default_process_exp {
    7886    my $self = shift (@_);
    7987
    80     return 0; # this is not a recursive plugin
    81 }
    82 
    83 
    84 #
    85 # read
    86 #
    87 # read attempts to read a file and store its contents in a
    88 # new document object.
    89 #
    90 # Returns: number of files processed or undef if can't process
    91 # This plugin only processes one file at a time.
    92 #
    93 # Note: $base_dir might be "" and $file might include directories,
    94 # but that doesn't affect EMAILPlug
    95 #
    96 
    97 sub read {
     88    return q^\d+(\.email)?$^;
     89}
     90
     91# do plugin specific processing of doc_obj
     92sub process {
    9893    my $self = shift (@_);
    99     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    100 
    101     # Make sure file exists and is an email file
    102     my $filename = &util::filename_cat($base_dir, $file);
    103     return undef unless ($filename =~ /\.email$/i && (-e $filename));
    104 
    105     print STDERR "EMAILPlug: processing $filename\n" if $processor->{'verbosity'};
    106 
    107     # create a new document object
    108     my $doc_obj = new doc ($file, "indexed_doc");
    109     open (FILE, $filename) || die "EMAILPlug::read - can't open $filename\n";
     94    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     95   
     96    # Check that we're dealing with a valid mail file
     97    return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/));
     98
     99    print STDERR "EMAILPlug: processing $file\n"
     100    if $self->{'verbosity'} > 1;
     101
    110102    my $cursection = $doc_obj->get_top_section();
    111103
    112     # Metadata fields
    113     my $Subject = "";
    114     my $To = "";
    115     my $From = "";
    116     my $DateText = "";
    117     my $Date = "";
    118     my $OtherHeaders = "";
    119     my $NewText = "";
    120     my $text = "";
    121     my $line = "";
    122 
    123     my $headers_read = 0;
    124    
    125     # Read and process each line in te email file.
    126     # Each file consists of a set of header lines, then a blank line,
    127     # then the body of the email.
    128     while (<FILE>) {
     104    #
     105    # Parse the document's text and extract metadata
     106    #
     107
     108    # Separate header from body of message
     109    my $Headers = $$textref;
     110    $Headers =~ s/\n\n.*//s;
     111    $$textref = substr $$textref, (length $Headers);
     112
     113    # Extract basic metadata from header
     114    my @headers = ("From", "To", "Subject", "Date");
     115    my $value = "";
     116    my %raw;
     117
     118    foreach my $name (@headers) {
     119    $value = $Headers;
     120    $value =~ s/.*$name://s;
     121    $value =~ s/\S*:.*//s;
     122    $value =~ s/\s*$//;
     123    $value =~ s/\s+/ /g;
     124    $raw{$name} = $value;
     125    }
     126
     127    # Process Date information
     128    if ($raw{"Date"}) {
     129    $raw{"DateText"} = $raw{"Date"};
    129130   
    130     $line = $_;
     131    # Convert the date text to internal date format
     132    $value = $raw{"Date"};
     133    my ($day, $month, $year) = $value =~ /(\d?\d)\s([A-Z][a-z][a-z])\s(\d\d\d?\d?)/;
     134    if ($year < 100) { $year += 1900; }
     135    $raw{"Date"} = &sorttools::format_date($day, $month, $year);
    131136   
    132     # Remove carriage returns from the line.
    133     # We will later replace single cariage returns with <BR> tags
    134     # and double carriage returns with <P> tags.
    135     $line =~ s/\n/ /g;
    136 
    137         if ($headers_read) {
    138         # The headers have been read, so add this line to the body text
    139         $text .= "$line\n";
    140             # If the line isn't quoted, add it to the NewText metadata
    141         if ($line =~ /^[^>|]/) {
    142         $NewText .= "$line\n";
    143         }
    144        
    145     } elsif ($line =~ /^\s*$/) {
    146         # An empty line signals the end of the headers.
    147         $headers_read = 1;
    148 
     137    } else {
     138    # We have not extracted a date
     139    $raw{"DateText"} = "Unknown.";
     140    $raw{"Date"} = "19000000";
     141    }
     142
     143
     144    # Add extracted metadata to document object
     145    foreach my $name (keys %raw) {
     146    $value = $raw{$name};
     147    if ($value) {
     148        $value = &text_into_html($value);
    149149    } else {
    150         # Read a line of header information and add it to the metadata
    151         $line .= "\n";
    152         if ($line =~ /^From:/) {
    153         $line =~ s/^From:\s*//;
    154         $From .= $line;
    155         } elsif ($line =~ /^To:/) {
    156         $line =~ s/^To:\s*//;
    157         $To .= $line;
    158         } elsif ($line =~ /^Date:/) {
    159         $line =~ s/^Date:\s*//;
    160         $DateText .= $line;
    161         if ($Date !~ /\d+/) {
    162             # Convert the date text to internal date format
    163             my ($day, $month, $year) = $line =~ /(\d?\d)\s([A-Z][a-z][a-z])\s(\d\d\d\d)/;
    164             $Date = &sorttools::format_date($day, $month, $year);
    165         }
    166         } elsif ($line =~ /^Subject:/) {
    167         $line =~ s/^Subject:\s*//;
    168         $Subject .= $line;
    169         } else {
    170         $OtherHeaders .= $line;
    171         }
    172     }   
     150        $value = "No $name field";
     151    }
     152    $doc_obj->add_utf8_metadata ($cursection, $name, $value);
    173153    }
    174154
    175     # Add Subject metadata
    176     $Subject = &text_into_html($Subject);
    177     $Subject = "No Subject" unless ($Subject =~ /\w/);
    178     $doc_obj->add_metadata ($cursection, "Subject", $Subject);
    179    
    180     # Add Sender
    181     $From = &text_into_html($From);
    182     $From = "No Sender" unless ($From =~ /\w/);
    183     $doc_obj->add_metadata ($cursection, "Creator", $From);
    184 
    185     # Add Recipient
    186     $To = &text_into_html($To);
    187     $To = "No Recipient" unless ($To =~ /\w/);
    188     $doc_obj->add_metadata ($cursection, "To", $To);
    189 
    190     # Add Date Text
    191     $DateText =~ &text_into_html($Date);
    192     $doc_obj->add_metadata ($cursection, "DateText", $DateText) if ($DateText =~ /\w/);
    193 
    194     # Add Date
    195     $Date =~ &text_into_html($Date);
    196     $doc_obj->add_metadata ($cursection, "Date", $Date) if ($Date =~ /\w/);
    197 
    198     # Add Other Headers
    199     $OtherHeaders = &text_into_html($OtherHeaders);
    200     $doc_obj->add_metadata ($cursection, "OtherHeaders", $OtherHeaders) if ($OtherHeaders =~ /\w/);
    201    
    202     # Add New Text
    203     $NewText = &text_into_html($NewText);
    204     $doc_obj->add_metadata ($cursection, "NewText", $NewText) if ($NewText =~ /\w/);
    205 
    206     # Add text
    207     $text =~ s/<BR>\s*<BR>/<P>/g;
    208     $text = &text_into_html($text);
    209     $doc_obj->add_text ($cursection, $text) if ($text =~ /\w/);
    210    
    211     # Add the OID - that is, the big HASH value used as a unique ID
    212     $doc_obj->set_OID ();
    213 
    214     # Process the document
    215     $processor->process($doc_obj);
    216 
    217     return 1; # processed the file
    218 }
    219 
    220 
    221 1;
    222 
    223 
    224 
    225 #
     155    # Add "All headers" metadata
     156    $Headers = &text_into_html($Headers);
     157    $Headers = "No headers" unless ($Headers =~ /\w/);
     158    $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers);
     159
     160    # Add text to document object
     161    $$textref = &text_into_html($$textref);
     162    $$textref = "No message" unless ($$textref =~ /\w/);
     163    $doc_obj->add_utf8_text($cursection, $$textref);
     164
     165    return 1;
     166}
     167
     168
    226169# Convert a text string into HTML.
    227170#
     
    234177# and replaces carriage returns with <BR> tags (and multiple carriage
    235178# returns with <P> tags).
    236 #
     179
    237180
    238181sub text_into_html {
    239182    my ($text) = @_;
    240183
    241 
    242     # Convert problem charaters into HTML symbols
    243     $text =~ s/&/&amp;/g;
    244     $text =~ s/</&lt;/g;
    245     $text =~ s/>/&gt;/g;
    246     $text =~ s/\"/&quot;/g;
     184    # Convert problem characters into HTML symbols
     185    $text =~ s/&/&amp;/go;
     186    $text =~ s/</&lt;/go;
     187    $text =~ s/>/&gt;/go;
     188    $text =~ s/\"/&quot;/go;
    247189
    248190    # convert email addresses and URLs into links
    249191    $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g;
    250     $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-]*)/<a href=\"$1">$1<\/a>/g;
     192    $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-~]*)/<a href=\"$1\">$1<\/a>/g;
    251193
    252194    # Clean up whitespace and convert \n charaters to <BR> or <P>
    253     $text =~ s/ +/ /g;
    254     $text =~ s/\s*$//;
    255     $text =~ s/^\s*//;
    256     $text =~ s/\n/\n<BR>/g;
    257     $text =~ s/<BR>\s*<BR>/<P>/g;
     195    $text =~ s/ +/ /go;
     196    $text =~ s/\s*$//o;
     197    $text =~ s/^\s*//o;
     198    $text =~ s/\n/\n<BR>/go;
     199    $text =~ s/<BR>\s*<BR>/<P>/go;
    258200
    259201    return $text;
    260202}
    261203
    262    
    263 
    264 
    265 
    266 
    267 
    268 
     204
     205# Perl packages have to return true if they are run.
     2061;
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/GMLPlug.pm

    r1010 r1279  
    3737}
    3838
     39use strict;
     40
    3941sub new {
    4042    my ($class) = @_;
    41     $self = new BasPlug ();
     43    my $self = new BasPlug ("GMLPlug", @_);
    4244
    4345    return bless $self, $class;
    4446}
    4547
    46 
    47 sub is_recursive {
     48sub get_default_process_exp {
    4849    my $self = shift (@_);
    4950
    50     return 0; # this is not a recursive plugin
    51 }
    52 
    53 sub _unescape_text {
    54     my ($text) = @_;
    55 
    56     # special characters in the gml encoding
    57     $text =~ s/&lt;/</g;
    58     $text =~ s/&gt;/>/g;
    59     $text =~ s/&quot;/\"/g;
    60     $text =~ s/&amp;/&/g; # this has to be last...
    61 
    62     return $text;
     51    return q^(?i)\.gml?$^;
    6352}
    6453
     
    6958    my $self = shift (@_);
    7059    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    71     my $fullname = &util::filename_cat ($base_dir, $file);
    7260
    73     # see if this is a gml book
    74     return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/io);
    75 
    76     my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/io;
    77 
    78     if (defined $gz && $gz =~ /\.gz/io) {
    79     $gz = 1;
    80     } else {
    81     $gz = 0;
     61    my $filename = &util::filename_cat($base_dir, $file);
     62    return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
     63    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
     64    return undef;
    8265    }
     66    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    8367
    8468    print STDERR "GMLPlug: processing $file\n";
    8569
    86     # read in the document
    87     if ($gz) {
    88     if (!open (INFILE, "zcat $fullname |")) {
    89         print STDERR "GMLPlug::read - zcat couldn't read $fullname\n";
    90         return undef;
    91     }
    92     } else {
    93     if (!open (INFILE, $fullname)) {
    94         print STDERR "GMLPlug::read - couldn't read $fullname\n";
    95         return undef;
    96     }
     70    my $parent_dir = $file;
     71    $parent_dir =~ s/[^\\\/]*$//;
     72    $parent_dir = &util::filename_cat ($base_dir, $parent_dir);
     73
     74    if (!open (INFILE, $filename)) {
     75    print STDERR "GMLPlug::read - couldn't read $filename\n";
     76    return 0;
    9777    }
    9878
     
    10686
    10787    my $no_docs = 0;
    108 #    my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right
    10988
    11089    while (1) {
     
    128107
    129108        } else {
    130             print STDERR "GMLPlug::read - error in file $fullname\n";
     109            print STDERR "GMLPlug::read - error in file $filename\n";
    131110            print STDERR "text: \"$gml\"\n";
    132111            last;
     
    166145        last if $section eq ""; # back to top level again (more than one document in gml file)
    167146        $section = $doc_obj->get_parent_section ($section);
    168     } #while (1) section level
     147    } # while (1) section level
    169148
    170149    # add the associated files
    171     $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
     150    my $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
    172151    my ($assoc_file_info, $afile);
    173152    foreach $assoc_file_info (@$assoc_files) {
     
    186165    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
    187166   
    188     # assume the document has an OID
     167    # do any automatic metadata extraction
     168    $self->auto_extract_metadata ($doc_obj);
     169
     170    # assume the document has an OID already
    189171   
    190172    # process the document
     
    194176    last if ($maxdocs > -1 && $no_docs >= $maxdocs);
    195177    last unless defined $gml && $gml =~ /\w/;
    196     } #while(1) document level
     178    } # while(1) document level
    197179
    198180    return $no_docs; # no of docs processed
    199181}
    200182
     183sub _unescape_text {
     184    my ($text) = @_;
     185
     186    # special characters in the gml encoding
     187    $text =~ s/&lt;/</g;
     188    $text =~ s/&gt;/>/g;
     189    $text =~ s/&quot;/\"/g;
     190    $text =~ s/&amp;/&/g; # this has to be last...
     191
     192    return $text;
     193}
    201194
    2021951;
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/HBPlug.pm

    r1020 r1279  
    2424###########################################################################
    2525
    26 # plugin which process an HTML book directory
     26# plugin which processes an HTML book directory
     27
     28# This plugin is used by the Humanity Library collections and does not handle
     29# input encodings other than ascii or extended ascii
     30
     31# this code is kind of ugly and could no doubt be made to run faster, by leaving
     32# it in this state I hope to encourage people to make their collections use
     33# HBSPlug instead ;-)
     34
     35# Use HBSPlug if creating a new collection and marking up files like the
     36# Humanity Library collections. HBSPlug accepts all input encodings but
     37# expects the marked up files to be cleaner than those used by the
     38# Humanity Library collections
    2739
    2840package HBPlug;
    2941
    30 use plugin;
    3142use ghtml;
    3243use BasPlug;
    3344use util;
    34 use lang;
    3545use doc;
    36 use cfgread;
    3746
    3847
     
    4352sub new {
    4453    my ($class) = @_;
    45     $self = new BasPlug ();
     54    my $self = new BasPlug ("HBPlug", @_);
    4655
    4756    return bless $self, $class;
    4857}
    4958
    50 sub is_recursive {
    51     my $self = shift (@_);
    52 
    53     return 0; # this is not a recursive plugin
    54 }
     59sub init {
     60    my $self = shift (@_);
     61    my ($verbosity) = @_;
     62
     63    $self->BasPlug::init();
     64
     65    # this plugin only handles ascii encodings
     66    if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {
     67    die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
     68        $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
     69    }
     70}
     71
     72# this is included only to prevent warnings being printed out
     73# from BasPlug::init. The process_exp is not used by this plugin
     74sub get_default_process_exp {
     75    my $self = shift (@_);
     76
     77    return "This plugin does not use a process_exp\n";
     78}
     79
    5580
    5681sub HB_read_html_file {
     
    6590
    6691    my $foundbody = 0;
    67     $self->HB_gettext (\$foundbody, $text, FILE);
     92    $self->HB_gettext (\$foundbody, $text, "FILE");
    6893    close FILE;
    6994
     
    7297    $foundbody = 1;
    7398    open (FILE, $htmlfile) || return;
    74     $self->HB_gettext (\$foundbody, $text, FILE);   
     99    $self->HB_gettext (\$foundbody, $text, "FILE");
    75100    close FILE;
    76101    }
     
    159184}
    160185
     186# if input_encoding is ascii we can call add_utf8_metadata
     187# directly but if it's iso_8859_1 (the default) we need to call
     188# add_metadata so that the ascii2utf8 conversion is done first
     189# this should speed things up a little if processing an ascii only
     190# document with input_encoding set to ascii
     191sub HB_add_metadata {
     192    my $self = shift (@_);
     193    my ($doc_obj, $cursection, $field, $value) = @_;
     194
     195    if ($self->{'input_encoding'} eq "ascii") {
     196    $doc_obj->add_utf8_metadata ($cursection, $field, $value);
     197    } else {
     198    $doc_obj->add_metadata ($cursection, $field, $value);
     199    }
     200}
    161201
    162202# return number of files processed, undef if can't process
     
    192232
    193233    # add metadata for top level of document
    194     foreach $field (keys(%$metadata)) {
     234    foreach my $field (keys(%$metadata)) {
    195235    # $metadata->{$field} may be an array reference
    196236    if (ref ($metadata->{$field}) eq "ARRAY") {
    197237        map {
    198         $doc_obj->add_metadata ($cursection, $field, $_);
     238        $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);
    199239        } @{$metadata->{$field}};
    200240    } else {
    201         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
     241        $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});
    202242    }
    203243    }
     
    240280
    241281        # add the metadata to this section
    242         $doc_obj->add_metadata ($cursection, "Title", $title);
     282        $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title);
    243283
    244284        # clean up the section html
     
    251291
    252292        # add the text for this section
    253         $doc_obj->add_text ($cursection, $sectiontext);
    254        
     293        if ($self->{'input_encoding'} eq "ascii") {
     294        $doc_obj->add_utf8_text ($cursection, $sectiontext);
     295        } else {
     296        $doc_obj->add_text ($cursection, $sectiontext);
     297        }
    255298    } else {
    256299        print STDERR "WARNING - leftover text\n" , $self->shorten($html),
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/HTMLPlug.pm

    r1020 r1279  
    5050
    5151sub print_usage {
    52     print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n";
    53 
    5452    print STDERR "\n  usage: plugin HTMLPlug [options]\n\n";
    5553    print STDERR "  options:\n";
    56     print STDERR "   -process_exp           A perl regular expression to match against filenames.\n";
    57     print STDERR "                          Matching filenames will be processed by this plugin.\n";
    58     print STDERR "                          Defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
    59     print STDERR "                          .htm or .html (case-insensitive).\n";
    6054    print STDERR "   -nolinks               Don't make any attempt to trap links (setting this flag may\n";
    6155    print STDERR "                          improve speed of building/importing but any relative links within\n";
    6256    print STDERR "                          documents will be broken).\n";
    63     print STDERR "   -block_exp             Files matching this regular expression will be blocked from\n";
    64     print STDERR "                          being passed to any further plugins in the list. By default\n";
    65     print STDERR "                          HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf,\n";
    66     print STDERR "                          .rtf or .css file extensions.\n";
    6757    print STDERR "   -keep_head             Don't remove headers from html files.\n";
    6858    print STDERR "   -no_metadata           Don't attempt to extract any metadata from files.\n";
    6959    print STDERR "   -metadata_fields       Comma separated list of metadata fields to attempt to extract.\n";
    70     print STDERR "                          Defaults to 'Title'\n";
     60    print STDERR "                          Defaults to 'Title'.\n";
     61    print STDERR "                          Use `first200` to get the first 200 characters of the body.\n";
     62    print STDERR "                          Use `H1` to get the text inside the first <H1> and </H1> tags in the text.\n";
    7163    print STDERR "   -w3mir                 Set if w3mir was used to generate input file structure.\n";
    72     print STDERR "                          w3mir \n";
    7364    print STDERR "   -assoc_files           Perl regular expression of file extensions to associate with\n";
    74     print STDERR "                          html documents. Defaults to '(?i)\.(jpe?g|gif|png|css|pdf)$'\n";
     65    print STDERR "                          html documents. Defaults to '(?i)\.(jpe?g|gif|png|css|pdf)\$'\n";
    7566    print STDERR "   -rename_assoc_files    Renames files associated with documents (e.g. images). Also\n";
    7667    print STDERR "                          creates much shallower directory structure (useful when creating\n";
     
    8071sub new {
    8172    my $class = shift (@_);
    82     my $self = new BasPlug ();
     73    my $self = new BasPlug ("HTMLPlug", @_);
    8374
    8475    if (!parsargv::parse(\@_,
    85              q^process_exp/.*/(?i)\.html?$^, \$self->{'process_exp'},
    8676             q^nolinks^, \$self->{'nolinks'},
    87              q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^, \$self->{'block_exp'},
    8877             q^keep_head^, \$self->{'keep_head'},
    8978             q^no_metadata^, \$self->{'no_metadata'},
     
    9180             q^w3mir^, \$self->{'w3mir'},
    9281             q^assoc_files/.*/(?i)\.(jpe?g|gif|png|css|pdf)$^, \$self->{'assoc_files'},
    93              q^rename_assoc_files^, \$self->{'rename_assoc_files'})) {
     82             q^rename_assoc_files^, \$self->{'rename_assoc_files'},
     83             "allow_extra_options")) {
     84
     85    print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n";
    9486    &print_usage();
    9587    die "\n";
    9688    }
    97 
     89   
    9890    $self->{'aux_files'} = {};
    9991    $self->{'dir_num'} = 0;
    10092    $self->{'file_num'} = 0;
    101 
     93   
    10294    return bless $self, $class;
    10395}
    10496
    105 sub is_recursive {
    106     my $self = shift (@_);
    107 
    108     return 0; # this is not a recursive plugin
    109 }
    110 
    111 # return number of files processed, undef if can't process
    112 # Note that $base_dir might be "" and that $file might
    113 # include directories
    114 sub read {
    115     my $self = shift (@_);
    116     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    117 
    118     my $filename = &util::filename_cat($base_dir, $file);
    119     return 0 if $filename =~ /$self->{'block_exp'}/;
    120     if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
    121     return undef;
    122     }
    123     $file =~ s/^[\/\\]+//;
    124 
    125     $self->{'verbosity'} = $processor->{'verbosity'};
     97
     98sub get_default_block_exp {
     99    my $self = shift (@_);
     100
     101    return q^(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^;
     102}
     103
     104sub get_default_process_exp {
     105    my $self = shift (@_);
     106
     107    return q^(?i)\.html?$^;
     108}
     109
     110
     111# do plugin specific processing of doc_obj
     112sub process {
     113    my $self = shift (@_);
     114    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     115
    126116    print STDERR "HTMLPlug: processing $file\n"
    127117    if $self->{'verbosity'} > 1;
    128118
    129     # create a new document
    130     my $doc_obj = new doc ($file, "indexed_doc");
    131119    my $cursection = $doc_obj->get_top_section();
    132    
    133     # read in HTML file
    134     open (FILE, $filename) || die "HTMLPlug::read - can't open $filename\n";
    135     undef $/;
    136     my $text = <FILE>;
    137     $/ = "\n";
    138     close FILE;
    139     if (!defined $text || $text !~ /\w/) {
    140     print STDERR "HTMLPlug: ERROR: $file contains no text\n" if $self->{'verbosity'};
    141     return 0;
    142     }
    143 
    144     $self->extra_metadata ($doc_obj, $cursection, $metadata);
    145     $self->extract_metadata (\$text, $metadata, $doc_obj, $cursection)
     120
     121    $self->extract_metadata ($textref, $metadata, $doc_obj, $cursection)
    146122    unless $self->{'no_metadata'};
    147123
     
    152128    my $web_url = "http://$file";
    153129    $web_url =~ s/\\/\//g; # for windows
    154     $doc_obj->add_metadata($cursection, "URL", $web_url);
     130    $doc_obj->add_utf8_metadata($cursection, "URL", $web_url);
    155131
    156132    # remove header and footer
    157133    if (!$self->{'keep_head'}) {
    158     $text =~ s/^.*?<body[^>]*>//is;
    159     $text =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg;
     134    $$textref =~ s/^.*?<body[^>]*>//is;
     135    $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg;
    160136    }
    161137
     
    164140
    165141    # usemap="./#index" not handled correctly => change to "#index"
    166     $text =~ s/(<img[^>]*?usemap\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
     142    $$textref =~ s/(<img[^>]*?usemap\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
    167143        $self->replace_usemap_links($1, $2, $3)/isge;
    168144
    169     $text =~ s/(<(?:a|area|frame|link)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
     145    $$textref =~ s/(<(?:a|area|frame|link)\s+[^>]*? (?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
    170146        $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    171147    }
    172148
    173149    # trap images
    174     $text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
     150    $$textref =~ s/(<img[^>]*? src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/
    175151    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    176152
    177     $doc_obj->add_text ($cursection, $text);
    178 
    179     # add an OID
    180     $doc_obj->set_OID();
    181 
    182     # process the document
    183     $processor->process($doc_obj);
    184 
    185     return 1; # processed the file
     153    # add text to document object
     154    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
     155
     156    return 1;
    186157}
    187158
     
    349320
    350321    foreach my $field (split /,/, $self->{'metadata_fields'}) {
    351    
     322
    352323    # don't need to extract field if it was passed in from a previous
    353324    # (recursive) plugin
     
    361332            my $value = $1;
    362333            $value =~ s/\s+/ /gs;
    363             $doc_obj->add_metadata($section, $field, $value);
     334            $doc_obj->add_utf8_metadata($section, $field, $value);
    364335            next;
    365336        }
     
    367338    }
    368339   
    369     # special case for Title metadata - try <title> tags
    370     # then first 100 characters of text
     340    # TITLE: extract the document title
    371341   
    372342    if ($field =~ /^title$/i) {
     
    378348            if ($title =~ /\w/) {
    379349            $title =~ s/\s+/ /gs;
    380             $doc_obj->add_metadata ($section, $field, $title);
     350            $title =~ s/^\s+//;
     351            $title =~ s/\s+$//;
     352            $doc_obj->add_utf8_metadata ($section, $field, $title);
    381353            next;
    382354            }
     
    386358        # if no title use first 100 characters
    387359        my $tmptext = $$textref;
     360        $tmptext =~ s/\s+/ /gs;
    388361        $tmptext =~ s/<[^>]*>//g;
    389         my $title = substr ($tmptext, 0, 100);
    390         $title =~ s/\s+/ /gs;
    391         $doc_obj->add_metadata ($section, $field, $title);
    392     }
    393     }
    394 }
     362        $tmptext = substr ($tmptext, 0, 100);
     363        $tmptext =~ s/^\s+//;
     364        $tmptext =~ s/\s+$//;
     365        $tmptext =~ s/\s\S*$/.../;
     366        $doc_obj->add_utf8_metadata ($section, $field, $tmptext);
     367        next;
     368    }
     369
     370    # FIRST200: extract the first 200 characters as metadata
     371
     372    if ($field =~ /^first200$/i) {
     373        my $tmptext = $$textref;
     374        $tmptext =~ s/\s+/ /gs;
     375        $tmptext =~ s/.*<body[^>]*>//i;
     376        $tmptext =~ s/<[^>]*>//g;
     377        $tmptext = substr ($tmptext, 0, 200);
     378        $tmptext =~ s/^\s+//;
     379        $tmptext =~ s/\s+$//;
     380        $tmptext =~ s/\s\S*$/.../;
     381        $doc_obj->add_utf8_metadata ($section, $field, $tmptext);
     382        next;
     383    }
     384
     385    # H1: extract the text between the first <H1> and </H1> tags
     386    if ($field =~ /^H1$/i) {
     387        my $tmptext = $$textref;
     388        $tmptext =~ s/\s+/ /gs;
     389        if ($tmptext =~ /<H1[^>]*>/i) {
     390        $tmptext =~ s/.*<H1[^>]*>//i;
     391        $tmptext =~ s/<\/H1[^>]*>.*//i;
     392        $tmptext =~ s/^\s+//;
     393        $tmptext =~ s/\s+$//;
     394        $doc_obj->add_utf8_metadata ($section, $field, $tmptext);
     395        }
     396        next;
     397    }
     398    }
     399}
     400
    395401
    396402# evaluate any "../" to next directory up
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/IndexPlug.pm

    r809 r1279  
    5454use plugin;
    5555use BasPlug;
    56 use lang;
    5756use doc;
    5857use util;
     
    6362}
    6463
     64use strict;
     65
    6566sub new {
    6667    my ($class) = @_;
    67     $self = new BasPlug ();
     68    my $self = new BasPlug ("IndexPlug", @_);
    6869
    6970    return bless $self, $class;
     
    7677    return 1;
    7778}
    78 
    7979
    8080# return number of files processed, undef if can't process
     
    104104    # process each document
    105105    my $count = 0;
    106     foreach $docfile (keys (%$list)) {
     106    foreach my $docfile (keys (%$list)) {
    107107    last if ($maxdocs != -1 && $count >= $maxdocs);
    108108    $metadata = {}; # at present we can do this as metadata
     
    113113    # note that $list->{$docfile} is an array reference
    114114    if ($docfile !~ /key:/i) {
     115        my $i = 0;
    115116        for ($i = 0; $i < scalar (@{$list->{$docfile}}); $i ++) {
    116117        if ($list->{$docfile}->[$i] =~ /^<([^>]+)>(.+)$/) {
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/RecPlug.pm

    r809 r1279  
    3838}
    3939
     40use strict;
     41
    4042sub new {
    4143    my ($class) = @_;
    42     my $self = new BasPlug ();
     44    my $self = new BasPlug ("RecPlug", @_);
    4345
    4446    $self->{'exclude_tail_dirs'} = []; # empty by default
     
    6264    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    6365
    64     foreach $etd ( @{$self->{'exclude_tail_dirs'}} )
     66    foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
    6567    {
    6668    return 0 if ($file =~ m/$etd/);
     
    7274
    7375    # see if this is a directory
    74     $dirname = &util::filename_cat ($base_dir, $file);
     76    my $dirname = &util::filename_cat ($base_dir, $file);
    7577    if (-d $dirname) {
    7678
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/TEXTPlug.pm

    r732 r1279  
    2424###########################################################################
    2525
    26 # creates simple single-level document from .txt or .text files
    27 # (case-insensitive match on filenames). Adds Title metadata
    28 # of first 100 characters found.
     26# creates simple single-level document. Adds Title metadata
     27# of first line of text (up to 100 characters long).
    2928
    3029package TEXTPlug;
    3130
    3231use BasPlug;
    33 use sorttools;
    3432
    3533sub BEGIN {
     
    3735}
    3836
     37use strict;
     38
    3939sub new {
    4040    my ($class) = @_;
    41     $self = new BasPlug ();
     41    my $self = new BasPlug ("TEXTPlug", @_);
    4242
    4343    return bless $self, $class;
    4444}
    4545
    46 sub is_recursive {
     46sub get_default_process_exp {
    4747    my $self = shift (@_);
    4848
    49     return 0; # this is not a recursive plugin
     49    return q^(?i)\.te?xt$^;
    5050}
    5151
    52 
    53 # return number of files processed, undef if can't process
    54 # Note that $base_dir might be "" and that $file might
    55 # include directories
    56 sub read {
     52# do plugin specific processing of doc_obj
     53sub process {
    5754    my $self = shift (@_);
    58     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    59 
    60     my $filename = &util::filename_cat($base_dir, $file);
    61 
    62     return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));
    63 
    64     my $gz = 0;
    65     if (defined $2) {
    66     $gz = $2;
    67     $gz = 1 if ($gz =~ /\.gz/i);
    68     }
    69 
    70     print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};
    71 
    72     # create a new document
    73     my $doc_obj = new doc ($file, "indexed_doc");
    74 
    75     if ($gz) {
    76     open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
    77     } else {
    78     open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
    79     }
     55    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     56   
     57    print STDERR "TEXTPlug: processing $file\n"
     58    if $self->{'verbosity'} > 1;
     59   
    8060    my $cursection = $doc_obj->get_top_section();
    81 
    82     my $text = "";
    83     my $line = "";
    84     my $foundtitle = 0;
    85     # don't need to get title if it has been passed
    86     # in from another plugin
    87     if (defined $metadata->{'Title'}) {
    88     $foundtitle = 1;
    89     }
    90     while (defined ($line = <FILE>)) {
    91     # use first line as title (or first 100 characters if it's long)
    92     if (!$foundtitle && length($line) > 5) {
    93         my $title = "";
    94         if (length($line) > 100) {
    95         $title = substr ($line, 0, 100);
    96         } else {
    97         $title = $line;
    98         }
    99         $doc_obj->add_metadata ($cursection, "Title", $title);
    100         $foundtitle = 1;
     61   
     62    # get title metadata
     63    # (don't need to get title if it has been passed
     64    # in from another plugin)
     65    if (!defined $metadata->{'Title'}) {
     66    my ($title) = $$textref =~ /^([^\n]*)/;
     67    if (length($title) > 100) {
     68        $title = substr ($title, 0, 100);
    10169    }
    102     $text .= $line;
     70    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
    10371    }
    10472   
    105     $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>");
     73    # insert preformat tags and add text to document object
     74    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
    10675
    107 
    108     foreach $field (keys(%$metadata)) {
    109     # $metadata->{$field} may be an array reference
    110     if (ref ($metadata->{$field}) eq "ARRAY") {
    111         map {
    112         $doc_obj->add_metadata ($cursection, $field, $_);
    113         } @{$metadata->{$field}};
    114     } else {
    115         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
    116     }
    117     }
    118 
    119     # add OID
    120     $doc_obj->set_OID ();
    121 
    122     # process the document
    123     $processor->process($doc_obj);
    124 
    125     return 1; # processed the file
     76    return 1;
    12677}
    12778
  • branches/New_Config_Format-branch/gsdl/perllib/unicode.pm

    r537 r1279  
    3131package unicode;
    3232
    33 
    34 
    35 # ascii2unicode takes a (extended) ascii string and
    36 # returns a unicode array.
     33%translations = ();
     34
     35# ascii2unicode takes an (extended) ascii string (ISO-8859-1)
     36# and returns a unicode array.
    3737sub ascii2unicode {
    3838    my ($in) = @_;
     
    4343    while ($i < $len) {
    4444    push (@$out, ord(substr ($in, $i, 1)));
     45    $i++;
     46    }
     47
     48    return $out;
     49}
     50
     51# arabic2unicode takes an 8 bit Arabic string (ISO-8859-6)
     52# and returns a unicode array
     53sub arabic2unicode {
     54    my ($in) = @_;
     55    my $out = [];
     56
     57    my $i = 0;
     58    my $len = length($in);
     59    while ($i < $len) {
     60    my $c = ord(substr ($in, $i, 1));
     61    $c += (1567-191) if ($c >= 0x80);
     62    push (@$out, $c);
     63    $i++;
     64    }
     65
     66    return $out;
     67}
     68
     69# windows2unicode takes a windows encoding (e.g. Windows 1256 (Arabic))
     70# and returns a unicode array. These encodings are similar to but not
     71# identical to the corresponding ISO-8859 encodings.
     72#
     73# The map files for these encodings should be in unicode/MAPPINGS/WINDOWS
     74sub windows2unicode {
     75    my ($encoding, $in) = @_;
     76    my $out = [];
     77
     78    my $mapfile = &util::filename_cat($ENV{'GSDLHOME'}, "unicode", "MAPPINGS",
     79                      "WINDOWS", "$encoding.TXT");
     80    return $out unless &loadmapping ($encoding, $mapfile);
     81
     82    my $i = 0;
     83    my $len = length($in);
     84    while ($i < $len) {
     85    my $c = ord(substr ($in, $i, 1));
     86    $c = $translations{"$encoding-unicode"}->{$c} if ($c >= 0x80);
     87    push (@$out, $c);
    4588    $i++;
    4689    }
     
    193236}
    194237
     238# loadmapping expects the mapfile to contain (at least) two
     239# tab-separated fields. The first field is the mapped value
     240# and the second field is the unicode value.
     241#
     242# It returns 1 if successful, 0 if unsuccessful
     243sub loadmapping {
     244    my ($encoding, $mapfile) = @_;
     245   
     246    my $to = "$encoding-unicode";
     247    my $from = "unicode-$encoding";
     248
     249    # check to see if the encoding has already been loaded
     250    if (defined $translations{$to} && defined $translations{$from}) {
     251    return 1;
     252    }
     253
     254    if (!open (MAPFILE, $mapfile)) {
     255    print STDERR "ERROR: unable to load mapfile $mapfile\n";
     256    return 0;
     257    }
     258
     259    my ($line, @line);
     260    $translations{$to} = {};
     261    $translations{$from} = {};
     262    while (defined ($line = <MAPFILE>)) {
     263    # remove comments
     264    $line =~ s/\#.*$//;
     265    next unless $line =~ /\S/;
     266
     267    # split the line into fields and do a few
     268    # simple sanity checks
     269    @line = split (/\t/, $line);
     270    next unless (scalar(@line) >= 2 &&
     271             $line[0] =~ /^0x/ &&
     272             $line[1] =~ /^0x/);
     273
     274    my $a = hex($line[0]);
     275    my $b = hex($line[1]);
     276
     277    $translations{$to}->{$a} = $b;
     278    $translations{$from}->{$b} = $a;
     279    }
     280
     281    close (MAPFILE);
     282
     283    return 1;
     284}
     285
    195286
    1962871;
    197 
  • branches/New_Config_Format-branch/gsdl/setup.bash

    r10 r1279  
    22export GSDLOS=`uname -s | tr A-Z a-z`
    33export PATH=$PATH:$GSDLHOME/bin/script:$GSDLHOME/bin/$GSDLOS
     4export MANPATH=$MANPATH:$GSDLHOME/packages/mg/man
  • branches/New_Config_Format-branch/gsdl/setup.csh

    r682 r1279  
    22setenv GSDLOS `uname -s | tr A-Z a-z`
    33setenv PATH $PATH\:$GSDLHOME/bin/script\:$GSDLHOME/bin/$GSDLOS
     4setenv MANPATH $MANPATH\:$GSDLHOME/packages/mg/man
  • branches/New_Config_Format-branch/gsdl/src/colservr/browsefilter.cpp

    r990 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.10.4.1  2000/07/12 22:21:26  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.11  2000/06/29 22:02:23  sjboddie
     34   Made BrowseFilters EndResults option take -1 (i.e. "all"). This is mostly
     35   to be consistent with the QueryFilter
     36
    3037   Revision 1.10  2000/02/29 01:35:56  sjboddie
    3138   tidied up endianness and fastcgi
     
    8592  filtopt.type = FilterOption_t::integert;
    8693  filtopt.repeatable = FilterOption_t::onePerQuery;
    87   filtopt.defaultValue = "10000";
    88   filtopt.validValues.push_back("1");
     94  filtopt.defaultValue = "-1";
     95  filtopt.validValues.push_back("-1");
    8996  filtopt.validValues.push_back("10000");
    9097  filterOptions["EndResults"] = filtopt;
     
    298305
    299306      while (result_here != result_end) {
    300     if (resultnum > endresults) break;
     307    // if endresults is -1 get all results
     308    if ((endresults != -1) && (resultnum > endresults)) break;
    301309    if (resultnum >= startresults) {
    302310      resultdoc.OID = (*result_here);
  • branches/New_Config_Format-branch/gsdl/src/colservr/collectserver.cpp

    r830 r1279  
    2929/*
    3030   $Log$
     31   Revision 1.20.4.1  2000/07/12 22:21:26  sjboddie
     32   merged changes to trunk into New_Config_Format branch
     33
     34   Revision 1.21  2000/06/29 00:22:58  sjboddie
     35   added new numsections field to collection info and made the statusaction
     36   recognize it
     37
    3138   Revision 1.20  1999/12/13 02:56:22  davidb
    3239   Support for cross-collection searching (CCS)
     
    136143    else if (key == "languages") collectinfo.languages = cfgline;
    137144    else if (key == "numdocs") collectinfo.numDocs = value.getint();
     145    else if (key == "numsections") collectinfo.numSections = value.getint();
    138146    else if (key == "numwords") collectinfo.numWords = value.getint();
    139147    else if (key == "numbytes") collectinfo.numBytes = value.getint();
  • branches/New_Config_Format-branch/gsdl/src/colservr/mgsearch.cpp

    r633 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.22.4.1  2000/07/12 22:21:27  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.23  2000/07/03 21:58:41  nzdl
     34   removed mg directive that was causing meaningless warning messages
     35   in errout.txt
     36
    3037   Revision 1.22  1999/09/24 02:41:21  rjmcnab
    3138   change to use has_unicode_letdig in text_t
     
    424431  mgq_ask(".set maxparas 500000");
    425432  mgq_ask(".set verbatim true");
    426   mgq_ask(".unset skip_dump");
     433  //  mgq_ask(".unset skip_dump");
    427434  mgq_ask(".set mode docnums");
    428435
  • branches/New_Config_Format-branch/gsdl/src/colservr/win32.mak

    r1000 r1279  
    2525###########################################################################
    2626
    27 GSDLHOME = d:\home\dl\gsdl
    28 STLPATH = d:\home\dl\stl\stlport
     27GSDLHOME = c:\gsdl
     28STLPATH = c:\stlport
    2929
    3030AR = lib
  • branches/New_Config_Format-branch/gsdl/src/hashfile/hashfile.cpp

    r915 r1279  
    143143
    144144
     145
     146// create a hash string from the contents of a file.
     147//
     148// The file is treated as a large base 256 number (each char is a digit),
     149// and the hash value is the remainder when this number is divided by a
     150// very large prime.
     151//
     152// PROBLEM: This is a flawed hash function because the rightmost (highest)
     153// value in remainder is more likely to be a "1" than it is to be any other
     154// number. 
     155//
     156// EVIDENCE: About 50% of the files in any GSDL directory have a hash code
     157// that starts with 01* which implies (based on the my_convert_num function)
     158// that the rightmost "digit" of the remainder = z such that (z % 16 = 0)
     159// and (z / 16 = 1) which means (z = 1).
     160//
     161// MAJOR REASON: suppose our prime number was 19.  Then, if we have a
     162// reasonably random distribution of numbers for which we are going to
     163// calculate (N % 19), we expect to get a roughly uniform distribution
     164// of remainders where the possible values are 0, 1, 2, 3... 18.  The
     165// problem is that 9 out of the 19 possible values (10,11... 18) start
     166// with the digit 1.  Thus our hascode will start with "01".
     167//
     168// ANOTHER PROBLEM: Characters in the file are read one at a time; after each
     169// one is read it is prepended to the remainder, then the remainder is
     170// recalculated on the string thus far seen.  I am sure the math here
     171// is wrong - if I try calculating (111 mod 7) by the same algorithm, it
     172// simply does nor work.
     173//
     174// ANOTHER POSSIBLE PROLEM: Each character from the file is read into
     175// remainder at the most significant end, and when that character
     176// is a zero you get a number like "01" which would be considered
     177// larger than a number like "8" because it is longer (two  digits
     178// instead of 1). 
     179//
     180// These comments added by Gordon Paynter ([email protected]) in
     181// June 2000.  I didn't write any code, however.
     182
    145183char *hashfile (char *filename) {
    146184  FILE *infile = (FILE *)NULL;
    147185  int i;
    148186
    149   // calculate the 8 multiples of the prime number to use
    150   // in the long division
    151187  number primepow[8];
    152188  number pow;
     
    163199  pow.len = 12;
    164200
     201  // calculate 8 multiples of the prime number.
     202  // These are used to find the remainder using only subtraction operations
    165203  for (i=0; i<8; i++) {
    166204    primepow[i] = pow;
     
    168206  }
    169207
     208  // The "remainder" after division by the prime.  Our result.
     209  remainder.len = 0;
     210
     211  // open the file
    170212  infile = fopen (filename, "rb");
    171213  if (infile == NULL) {
    172214    return (char *)NULL;
    173215  }
     216  c = (unsigned char)fgetc(infile);
    174217 
    175   remainder.len = 0;
    176   c = (unsigned char)fgetc(infile);
    177218  while (!feof (infile)) {
    178     // remainder = remainder * 256 + c
     219
     220    // make sure the remainder has not grown too large
    179221    if (remainder.len == MAXNUMLEN-1) {
    180222      fprintf (stderr, "ERROR - number overflow\n");
    181223      return (char *)NULL;
    182224    }
     225
     226    // remainder = remainder * 256 + c
    183227    for (i=remainder.len; i>0; i--) {
    184228      remainder.num[i] = remainder.num[i-1];
    185229    }
    186230    remainder.num[0] = c;
    187     if (remainder.len > 0 || c != 0) remainder.len = remainder.len+1;
     231    if (remainder.len > 0 || c != 0) remainder.len = remainder.len+1;
    188232   
     233    // remainder = (remainder % large-prime-number)
    189234    for (i=7; i>=0; i--) {
    190235      my_ifpos_dec (remainder, primepow[i]);
    191236    }
    192237
     238    // read a new character from the file
    193239    c = (unsigned char)fgetc(infile);
    194240  }
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/Makefile.in

    r861 r1279  
    125125  invf.cpp                 mg_invf_dict_dump.cpp    Weights.cpp            \
    126126  MGQuery.cpp              Terms.cpp                QueryTester.cpp        \
    127   QueryLex.cpp             QueryParser.cpp          Queryer.cpp
     127  QueryLex.cpp             QueryParser.cpp         \
     128  GSDLQueryLex.cpp     GSDLQueryParser.cpp      Queryer.cpp
    128129
    129130
     
    135136  IndexData.h         build.h             mg_errors.h                    \
    136137  TagInfo.h           comp_dict.h         mg_files.h          Weights.h  \
    137   MGQuery.h           Terms.h             QueryLex.h          QueryParser.h
     138  MGQuery.h           Terms.h             QueryLex.h          QueryParser.h \
     139  GSDLQueryLex.h    GSDLQueryParser.h
    138140
    139141
     
    180182               FIvfLevelInfo$o FragLevelConvert$o Terms$o MGQuery$o \
    181183               IndexData$o stemmer$o Weights$o TextGet$o text$o FText$o \
    182                QueryParser$o QueryLex$o words$o
     184               GSDLQueryParser$o GSDLQueryLex$o words$o
    183185
    184186Queryer: $(QUERYER_OBJS)
     
    272274    done
    273275
    274 
    275 LIB_OBJS = mg_files$o mg_errors$o locallib$o invf$o UCArray$o \
     276#mg_errors$o removed from LIB_OBJS to avoid conflict with mg's libtextin.a
     277LIB_OBJS = mg_files$o  mg_errors$o locallib$o invf$o UCArray$o \
    276278           FIvfLevelInfo$o FragLevelConvert$o Terms$o MGQuery$o \
    277279           IndexData$o stemmer$o Weights$o TextGet$o text$o FText$o \
    278            QueryParser$o QueryLex$o words$o
     280           GSDLQueryParser$o GSDLQueryLex$o words$o
    279281
    280282libtextin.a: $(LIB_OBJS)
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/Queryer.cpp

    r926 r1279  
    2828#include "mg_files.h"
    2929
    30 #include "QueryParser.h"
     30#include "GSDLQueryParser.h"
    3131
    3232
     
    6666  // init the text system
    6767  TextData textData;
    68   if (!textData.LoadData (textfilename)) {
     68  if (!textData.LoadData (basePath, textfilename)) {
    6969    FatalError (1, "Couldn't load text information for \"%s\"", textfilename);
    7070  }
     
    9191
    9292  UCArray level;
    93   level.clear();
     93  UCArrayClear(level);
    9494  //SetCStr(level, "");
    9595     
     
    109109    cout << "current index="<< queryInfo.docLevel << "\nchange to index:";
    110110    cin >> query;
    111     queryInfo.docLevel.clear();
     111    UCArrayClear(queryInfo.docLevel);
    112112    SetCStr(queryInfo.docLevel, query);
    113113    cout << "index set to " << queryInfo.docLevel <<"\n";
     
    117117    cout << "current level="<< level << "\nchange to level:";
    118118    cin >> query;
    119     level.clear();
     119    UCArrayClear(level);
    120120    SetCStr(level, query);
    121121    cout << "level set to " << level <<"\n";
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/Terms.cpp

    r927 r1279  
    4444  stemMethod = 0;
    4545  matchDocs = 0;
     46  termFreq = 0;
    4647}
    4748
    4849ostream &operator<< (ostream &s, const TermFreqData &t) {
    4950  s << "<" << t.tag << ">\"" << t.term << "\"stem("
    50     << t.stemMethod << ")docs(" << t.matchDocs << ")";
     51    << t.stemMethod << ")docs(" << t.matchDocs << ")"
     52    << "count("<<t.termFreq<<")";
    5153  return s;
    5254}
     
    5658      (t1.term == t2.term) &&
    5759      (t1.stemMethod == t2.stemMethod) &&
    58       (t1.matchDocs == t2.matchDocs));
     60      (t1.matchDocs == t2.matchDocs) &&
     61      (t1.termFreq == t2.termFreq));
    5962}
    6063
     
    442445  unsigned long termDocFreq = 0;
    443446  unsigned long lastLevelDocNum = 0;
    444 
     447  unsigned long overallwordfreq = 0;
    445448 
    446449  while (termDataI < termDataSize) {
     
    463466      if (needRanks)
    464467    termDocFreq += termData.fragFreqs[termDataI];
     468      overallwordfreq += termData.fragFreqs[termDataI];
    465469    }
    466470   
     
    484488    termFreqData.stemMethod = stemMethod;
    485489    termFreqData.matchDocs = termData.matchDocs;
     490    termFreqData.termFreq = overallwordfreq;
    486491    result.termFreqs.push_back (termFreqData);
    487492  }
     
    517522  unsigned long termDocFreq = 0;
    518523  unsigned long lastLevelDocNum = 0;
    519 
     524  unsigned long overallwordfreq = 0;
    520525  unsigned long resultI = 0;
    521526  unsigned long resultSize = result.docs.size();
     
    552557      if (needRanks)
    553558    termDocFreq += termData.fragFreqs[termDataI];
     559     overallwordfreq += termData.fragFreqs[termDataI];
    554560    }
    555561   
    556562    termDataI++;
    557   }
     563  } // while
    558564
    559565  if (lastLevelDocNum > 0) {
     
    590596    termFreqData.stemMethod = stemMethod;
    591597    termFreqData.matchDocs = termData.matchDocs;
     598    termFreqData.termFreq = overallwordfreq;
    592599    result.termFreqs.push_back (termFreqData);
    593600  }
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/Terms.h

    r927 r1279  
    5858  UCArray term; // unstemmed term
    5959  int stemMethod;
    60   unsigned long matchDocs; // tf for level
    61 
     60  unsigned long matchDocs; // tf for level - num levels
     61               // containing this term
     62  unsigned long termFreq;  // overall term freq - num words that
     63                // are this term
    6264  void Clear ();
    6365  TermFreqData () { Clear (); }
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/TextGet.cpp

    r855 r1279  
    478478
    479479void TextData::Clear () {
    480   cd.Clear(); // not implemented
     480  cd.Clear();
    481481  textFile = NULL;
    482482  textIdxFile = NULL;
     
    485485}
    486486
    487 bool TextData::LoadData (char *textname) {
     487bool TextData::LoadData (char *basepath, char *textname) {
     488 
     489  if (textname[0] == '\0') return false;
     490 
     491  // set the basepath
     492  set_basepath(basepath);
     493 
    488494  // load the compression dictionary
    489495  if (!OpenLoadCompDict (textname, cd)) return false;
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/TextGet.h

    r855 r1279  
    4242  // loads compression dictionary, the compressed text header,
    4343  // and all level informaiton
    44   bool LoadData (char *textname);
     44  bool LoadData (char *basepath, char *textname);
    4545  bool UnloadData ();
    4646};
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/UCArray.cpp

    r855 r1279  
    3535}
    3636
     37char * GetCStr(UCArray text) {
     38
     39  char *cstr = new char[text.size()+1];
     40  UCArray::const_iterator here = text.begin();
     41  UCArray::const_iterator end = text.end();
     42
     43  int i = 0;
     44  while (here != end) {
     45    cstr[i] = (char)*here;
     46    here++; i++;
     47  }
     48  cstr[i]='\0';
     49  return cstr;
     50}
    3751
    3852ostream &operator<<(ostream &s, const UCArray &a) {
     
    214228}
    215229
     230/* comparison for browse index - items match if the smaller word
     231   is a prefix of the larger word, case independent
     232*/
     233int BrowseCompare (const UCArray &a1, const UCArray &a2) {
     234  unsigned int l1 = a1.size();
     235  unsigned int l2 = a2.size();
     236  unsigned int l = (l1 < l2) ? l1 : l2; // l is the shorter of the two
     237  int diff = 0;
     238
     239  UCArray::const_iterator a1Here = a1.begin();
     240  UCArray::const_iterator a2Here = a2.begin();
     241
     242  while(l--) {
     243    if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) !=0)
     244      return diff;
     245    a1Here++;
     246    a2Here++;
     247  }
     248  return 0;
     249
     250}
    216251
    217252unsigned long PrefixLen (const UCArray &a1, const UCArray &a2) {
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/UCArray.h

    r855 r1279  
    5151// functions to manipulate UCArrays
    5252void SetCStr (UCArray &text, const char *cStr);
     53char * GetCStr(UCArray text);
    5354inline void UCArrayClear (UCArray &a) {
    5455  a.erase (a.begin(), a.end());
     
    8990// compares the two strings in dictionary order
    9091int DictCompare (const UCArray &a1, const UCArray &a2);
    91 
     92// compares the two strings, case independent, a match (ie 0) is
     93// if one string is a prefix of the other
     94int BrowseCompare (const UCArray &a1, const UCArray &a2);
    9295
    9396struct LTUCArray {
     
    113116
    114117#endif
     118
     119
     120
     121
     122
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/invf.h

    r925 r1279  
    3131
    3232// NOTE: This does not include the magic number
     33// header info for .invf.dict file
    3334struct invf_dict_header {
    3435  unsigned long lookback;
     
    9192// this version of the blocked dictionary uses a fixed number
    9293// of entries per block, not a fixed block size
     94// info for .invf.dict.blocked file
     95// blocked dict has a heap of blocks, some for words, some for tags
     96// and an index into each set of blocks. The index has pointers to
     97// the first entry in each block. Can do a binary search on the index
     98// to find out which block an elemnet is in
    9399struct block_dict_header : public invf_dict_header {
    94100  // note: word_dict_start and tag_dict_start are undefined
    95101  // for blocked dictionaries
    96102
    97   unsigned long entries_per_wblk;
     103  unsigned long entries_per_wblk; // word blocks
    98104  unsigned long num_wblks;
    99105  unsigned long max_wblk_size;
     
    101107  unsigned long wblk_idx_start;
    102108 
    103   unsigned long entries_per_tblk;
     109  unsigned long entries_per_tblk; // tag blocks
    104110  unsigned long num_tblks;
    105111  unsigned long max_tblk_size;
     
    117123struct block_dict_el {
    118124  UCArray el; // word or tag
    119   unsigned long frag_occur; // # entries in invf file
    120   unsigned long freq;
    121   unsigned long invf_ptr;
     125  unsigned long frag_occur; // # entries in invf file - if have a
     126  // word level index, this is the same as freq, otherwise, its the number
     127  // of fragments containing this word
     128  unsigned long freq; // # of times this word occurs
     129  unsigned long invf_ptr; // pointer into inverted file
    122130
    123131  virtual void Clear ();
     
    133141
    134142struct word_block_dict_el : public block_dict_el {
    135   unsigned long *levelFreqs;
     143  unsigned long *levelFreqs; // freq of the word at each level
    136144
    137145  void Clear ();
     
    209217#define SKIP_MODE_NO_SKIPS 0
    210218
     219// invf file - has a list of frags for each word, but the word is not
     220//  stored in the invf file - the dictionaries store the words, along
     221// with num entries, and a pointer into invf file
    211222struct invf_file_header {
    212223  unsigned long no_of_words;
  • branches/New_Config_Format-branch/gsdl/src/mgpp/text/mg_decompress_text.cpp

    r856 r1279  
    3434  int ch;
    3535  char *filename = "";
     36  char *basePath = "";
    3637  UCArray level;
    3738  SetCStr (level, "Document");
     
    4748      break;
    4849    case 'd':
     50      basePath = optarg;
    4951      set_basepath (optarg);
    5052      break;
     
    6264  // load up the text information
    6365  TextData td;
    64   if (!td.LoadData (filename)) {
     66  if (!td.LoadData (basePath, filename)) {
    6567    FatalError (1, "Couldn't load text information for \"%s\"", filename);
    6668  }
     
    9092  return 0;
    9193}
     94
     95
  • branches/New_Config_Format-branch/gsdl/src/recpt/authenaction.cpp

    r755 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.9.4.1  2000/07/12 22:21:34  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.10  2000/04/19 22:30:23  sjboddie
     34   tidied up status pages and end-user collection building
     35
    3036   Revision 1.9  1999/11/01 21:11:35  sjboddie
    3137   changed arguments passed to many functions
     
    300306                         + "_"));
    301307  // change style of header and footer if page is a frame
    302   if (args["sp"] != "frameset") {
     308  if ((args["sp"].empty()) || (args["sp"] == "frameset")) {
    303309    disp.setmacro ("header", "authen", "_status:infoheader_(Log in)");
    304310    disp.setmacro ("header", "authenok", "_status:infoheader_(Log in)");
     
    306312    disp.setmacro ("footer", "authenok", "_status:infofooter_(Log in)");
    307313  }
    308   else {
    309     //    disp.setmacro ("header", "authen", "_Global:header_"); //****
    310     //    disp.setmacro ("header", "authenok", "_Global:header_");
    311     //    disp.setmacro ("footer", "authen", "_Global:footer_");
    312     //    disp.setmacro ("footer", "authenok", "_Global:footer_");
    313     disp.setmacro ("header", "authen", "_:header_");
    314     disp.setmacro ("header", "authenok", "_:header_");
    315     disp.setmacro ("footer", "authen", "_:footer_");
    316     disp.setmacro ("footer", "authenok", "_:footer_");
    317   }
    318    
    319314
    320315  // get a list of saved configuration arguments (if possible)
  • branches/New_Config_Format-branch/gsdl/src/recpt/browserclass.cpp

    r765 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.5.4.1  2000/07/12 22:21:35  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.6  2000/06/29 02:47:19  sjboddie
     34   added browser info (i.e VList, HList etc.) to status pages
     35
    3036   Revision 1.5  1999/11/01 22:04:11  sjboddie
    3137   just a few small changes (that means I can't remember ;)
     
    8187
    8288text_t browserclass::get_default_formatstring () {
    83   return "<td>[link][icon][/link]</td><td>[highlight]{Or}{[Title],Untitled}[/highlight]</td>";
     89  return "";
    8490}
    8591
  • branches/New_Config_Format-branch/gsdl/src/recpt/buildaction.cpp

    r1000 r1279  
    2424 *********************************************************************/
    2525
    26 #ifndef __WIN32__
    27 #include <unistd.h>
    28 #endif
    29 
    3026#include "OIDtools.h"
    3127#include "fileutil.h"
    3228#include "htmlutils.h"
     29#include "gsdltools.h"
    3330#include "buildaction.h"
    3431
     
    8077bool buildaction::check_cgiargs (cgiargsinfoclass &/*argsinfo*/, cgiargsclass &args,
    8178                 ostream &/*logout*/) {
    82   if ((args["bca"] != "buildstatus") && (args["bca"] != "collog"))
     79
     80  text_t &arg_bca = args["bca"];
     81  if (!((arg_bca == "buildstatus") || (arg_bca == "collog") || (arg_bca == "blankpage")
     82    || ((arg_bca == "buildcol") && (args["wizard"] == "buildexec"))))
    8383    {
    8484      // authenticate the user if authentication is avaiable
     
    102102  // make sure we know about a receptionist
    103103  if (recpt == NULL) {
    104     logout << "The page action does not contain information\n"
     104    logout << "The build action does not contain information\n"
    105105           << "about any receptionists. The method set_receptionist\n"
    106106           << "was probably not called from the module which instantiated\n"
    107            << "this page action.\n";
     107           << "this build action.\n";
    108108    return;
    109109  }
     
    116116  text_t fullnamelist = "var fullnamelist = new Array(";
    117117  text_t dirnamelist  = "var dirnamelist = new Array(";
    118   text_t fullnamemenu = (text_t)"<input type=hidden name=\"bc1fullnameindex\""
    119                         +" value=\"_bcargfullnameindex_\">\n"
    120                         +"<select name=\"bc1fullnamemenu\" onChange=fullnameindex_changed()>\n";
     118  text_t fullnamemenu = "<input type=hidden name=\"bc1fullnameindex\"";
     119  fullnamemenu += " value=\"_bcargfullnameindex_\">\n";
     120  fullnamemenu += "<select name=\"bc1fullnamemenu\" onChange=fullnameindex_changed()>\n";
    121121  int rcount = 1;
    122122  int fcount = 1;
     
    146146    while (collist_here != collist_end) {
    147147
    148       ColInfoResponse_t cinfo;
    149       (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout);
    150       if (err == noError) {
     148      ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
     149      if (cinfo != NULL) {
    151150        text_t collectionname = *collist_here;
    152         if (!cinfo.collectionmeta["collectionname"].empty())
     151        if (!cinfo->collectionmeta["collectionname"].empty())
    153152          {
    154153        // get collection name from the collection cfg file
    155         collectionname = cinfo.collectionmeta["collectionname"];
     154        collectionname = cinfo->collectionmeta["collectionname"];
    156155          }
    157156        else
     
    169168        dirnamelist += (text_t)"\"" + *collist_here + "\"";
    170169
    171         fullnamemenu += (text_t)"<option value=\"" + collectionname + "\"";
    172         if (args["bc1dirname"] == "")
     170        fullnamemenu += "<option value=\"" + collectionname;
     171        fullnamemenu.push_back ('"');
     172        if (args["bc1dirname"].empty())
    173173          {
    174174        if ((rcount==1) && (ccount==1))
     
    202202        if (args["bca"]=="buildcol")
    203203          {
    204         if (((args["bc1dirname"]=="") && (rcount==1) && (ccount==1))
     204        if (((args["bc1dirname"].empty()) && (rcount==1) && (ccount==1))
    205205            || (args["bc1dirname"]==*collist_here))
    206206          {
    207             disp.setmacro("bcargingsdlarea","build",cinfo.building["ingsdlarea"]);
    208             disp.setmacro("bcargcopydir","build",cinfo.building["copydir"]);
     207            disp.setmacro("bcargingsdlarea","build",cinfo->building["ingsdlarea"]);
     208            disp.setmacro("bcargcopydir","build",cinfo->building["copydir"]);
    209209           
    210             if (cinfo.buildDate==0) // not built
     210            if (cinfo->buildDate==0) // not built
    211211              {
    212212            // assume that's because this is the first build ever for the collection
     
    240240    }
    241241
    242   if ((args["bca"]=="delcol") || (args["bca"]=="editcol") || (args["bca"]=="buildcol") || (args["bca"]=="collog"))
     242  if ((args["bca"]=="delcol") || (args["bca"]=="editcol") ||
     243      (args["bca"]=="buildcol") || (args["bca"]=="collog"))
    243244    {
    244245      disp.setmacro ("dirnamelist", "build", dirnamelist);
     
    268269    text_t bc1name = substr(args_entry.begin()+3,args_entry.end());
    269270    text_t cached_cgiarg = "bcarg" + bc1name;
    270     disp.setmacro(cached_cgiarg,"build",args[args_entry]);
     271    disp.setmacro(cached_cgiarg, "build", dm_safe(args[args_entry]));
    271272      }
    272273    args_here++;
     
    288289      text_t cfg_fname
    289290    = filename_cat(gsdlhome, "collect", dirname, "etc", "collect.cfg");
     291
     292#ifdef GSDL_USE_IOS_H
    290293      ifstream cfg_ifs (cfg_fname.getcstr(), ios::in | ios::nocreate);
     294#else
     295      ifstream cfg_ifs (cfg_fname.getcstr(), ios::in);
     296#endif
     297
    291298      if (cfg_ifs)
    292299    { 
     
    323330      // read in collect.bld
    324331      text_t bld_fname = filename_cat(gsdlhome, "log", dirname+ ".bld");
     332
     333#ifdef GSDL_USE_IOS_H
    325334      ifstream bld_ifs (bld_fname.getcstr(), ios::in | ios::nocreate);
     335#else
     336      ifstream bld_ifs (bld_fname.getcstr(), ios::in);
     337#endif
     338
    326339      if (bld_ifs)
    327340    {
     
    427440                   ostream &logout) {
    428441
    429 
    430 
    431 
    432442  text_tarray wizard_split;
    433   const text_t& wizard = (args["wizard"]=="") ? (text_t)"buildcol" : args["wizard"];
     443  const text_t wizard = (args["wizard"].empty()) ? "buildcol" : args["wizard"];
    434444  splitchar(wizard.begin(),wizard.end(),':',wizard_split);
    435   const text_t& wizard_last = wizard_split.back();
     445  const text_t wizard_last = wizard_split.back();
    436446 
    437447  if (wizard_last != "buildframe")
     
    444454  else
    445455    {
    446       text_t dirname = (args["bc1dirname"] == "") ? (text_t)"unknown" : args["bc1dirname"];
     456      text_t dirname = (args["bc1dirname"] == "") ?
     457    "unknown" : args["bc1dirname"];
     458
    447459      text_t tmpname = dirname+"_XXXXXX";
    448       if (mktemp(tmpname.getcstr())==NULL)
    449     {
    450       logout << "Failed to create temporary filename" << endl;
    451     }
     460      char *cstr_tmpname = tmpname.getcstr();
     461
     462      if (GSDL_MKTEMP (cstr_tmpname)==NULL)
     463    logout << "Failed to create temporary filename" << endl;
     464
     465      delete cstr_tmpname;
     466
    452467      disp.setmacro("bcargtmpname","build",tmpname);
    453468
     
    457472 
    458473      textout << outconvert << disp << headmess << content << "_build:framefooter_\n";
    459       text_t cmd = filename_cat(gsdlhome, "cgi-bin", "webpage_buildcol.pl");
    460       cmd += " gsdlhome=\""+args["gsdlhome"]+"\" httpbuild=\""+args["httpbuild"]+"\"";
    461       cmd += " bc1copydata=\""+args["bc1copydata"]+"\"";
    462       cmd += " bc1doimport=\""+args["bc1doimport"]+"\"";
    463       cmd += " bc1dobuild=\""+args["bc1dobuild"]+"\"";
    464       cmd += " bc1dirname=\""+args["bc1dirname"]+"\" bc1tmpname=\""+tmpname+"\" &";
    465      
    466       system(cmd.getcstr());
     474      text_t cmd = "perl \"" +
     475    filename_cat(gsdlhome, "cgi-bin", "webpage_buildcol.pl") + "\"";
     476      cmd += " \"httpbuild="+args["httpbuild"]+"\"";
     477      cmd += " \"bc1copydata="+args["bc1copydata"]+"\"";
     478      cmd += " \"bc1doimport="+args["bc1doimport"]+"\"";
     479      cmd += " \"bc1dobuild="+args["bc1dobuild"]+"\"";
     480      cmd += " \"bc1dirname="+args["bc1dirname"]+"\" \"bc1tmpname="+tmpname+"\"";
     481      // run webpage_buildcol.pl in background on unix systems
     482#if !defined (__WIN32__)
     483      cmd += " &";
     484#endif
     485
     486      char *cstr_cmd = cmd.getcstr();
     487
     488#if defined (__WIN32__)
     489      gsdl_system (cstr_cmd, logout);
     490#else
     491      system (cstr_cmd);
     492#endif
     493
     494      delete cstr_cmd;
    467495    }
    468496
  • branches/New_Config_Format-branch/gsdl/src/recpt/cgiwrapper.cpp

    r963 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.22.4.1  2000/07/12 22:21:35  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.31  2000/07/12 04:51:05  nzdl
     34   added an error message when no "valid" collections are available
     35
     36   Revision 1.30  2000/07/05 21:49:31  sjboddie
     37   Receptionist now caches collection information to avoid making multiple
     38   get_collectinfo calls to collection server
     39
     40   Revision 1.29  2000/06/29 03:57:14  sjboddie
     41   Now append to error log (errout.txt) instead of overwriting it each time
     42
     43   Revision 1.28  2000/06/28 01:30:23  nzdl
     44   *** empty log message ***
     45
     46   Revision 1.27  2000/06/28 01:24:59  sjboddie
     47   got "POST" cgi data to work when using fastcgi
     48
     49   Revision 1.26  2000/05/12 03:09:26  sjboddie
     50   minor modifications to get web library compiling under VC++ 6.0
     51
     52   Revision 1.25  2000/04/14 04:45:19  sjboddie
     53   Modified the English of the debug output slightly
     54
     55   Revision 1.24  2000/04/14 03:10:35  sjboddie
     56   tidied up a few issues concerning the new debug info which showed
     57   up on windows
     58
     59   Revision 1.23  2000/04/14 02:52:05  sjboddie
     60   tidied up error messaging and set up some debugging info to be output
     61   when running library from command line
     62
    3063   Revision 1.22  2000/02/21 21:56:46  sjboddie
    3164   gsdlhome now comes from gsdlsite.cfg
     
    191224#endif
    192225
    193 
    194 static void page_errorsitecfg (text_t &errorpage, int mode) {
    195   errorpage += "Content-type: text/html\n\n";
    196 
    197   errorpage += "<html>\n";
    198   errorpage += "<head>\n";
    199   errorpage += "<title>Error</title>\n";
    200   errorpage += "</head>\n";
    201   errorpage += "<body>\n";
    202   errorpage += "<h2>Oops!</h2>\n";
     226static void format_error_string (text_t &errorpage, const text_t &errortext, bool debug) {
     227
     228  errorpage.clear();
     229
     230  if (debug) {
     231    errorpage += "\n";
     232    errorpage += "ERROR: " + errortext;
     233    errorpage += "\n";
     234   
     235  } else {
     236
     237    errorpage += "Content-type: text/html\n\n";
     238   
     239    errorpage += "<html>\n";
     240    errorpage += "<head>\n";
     241    errorpage += "<title>Error</title>\n";
     242    errorpage += "</head>\n";
     243    errorpage += "<body>\n";
     244    errorpage += "<h2>Oops!</h2>\n";
     245    errorpage += errortext;
     246    errorpage += "</body>\n";
     247    errorpage += "</html>\n";
     248  }
     249}
     250
     251static void page_errorcollect (const text_t &gsdlhome, text_t &errorpage, bool debug) {
     252
     253  text_t collectdir = filename_cat (gsdlhome, "collect");
     254
     255  text_t errortext = "No valid collections were found: Check that your collect directory\n";
     256  errortext += "(" + collectdir + ") is readable and contains at least one valid collection.\n";
     257  errortext += "Note that modelcol is NOT a valid collection.\n";
     258  errortext += "If the path to your collect directory is wrong edit the 'gsdlhome' field\n";
     259  errortext += "in your gsdlsite.cfg configuration file.\n";
     260
     261  format_error_string (errorpage, errortext, debug);
     262}
     263
     264static void page_errorsitecfg (text_t &errorpage, bool debug, int mode) {
     265
     266  text_t errortext;
     267
    203268  if (mode == 0) {
    204     errorpage += "The gsdlsite.cfg configuration file could not be found. This file\n";
    205     errorpage += "should contain configuration information relating to this\n";
    206     errorpage += "site's setup.\n";
    207     errorpage += "gsdlsite.cfg should reside in the same directory as this executable\n";
     269    errortext += "The gsdlsite.cfg configuration file could not be found. This\n";
     270    errortext += "file should contain configuration information relating to this\n";
     271    errortext += "site's setup.\n";
     272
    208273  } else if (mode == 1) {
    209     errorpage += "The gsdlsite.cfg configuration file does not contain a valid gsdlhome\n";
    210     errorpage += "entry.\n";
    211     errorpage += "gsdlsite.cfg resides in the same directory as this executable\n";
    212   }
    213   errorpage += "</body>\n";
    214   errorpage += "</html>\n";
     274    errortext += "The gsdlsite.cfg configuration file does not contain a valid\n";
     275    errortext += "gsdlhome entry.\n";
     276  }
     277
     278  if (debug) {
     279    errortext += "gsdlsite.cfg should reside in the directory from which the\n";
     280    errortext += "library executable was run.\n";
     281  } else {
     282    errortext += "gsdlsite.cfg should reside in the same directory as the library\n";
     283    errortext += "executable file.\n";
     284  }
     285
     286  format_error_string (errorpage, errortext, debug);
    215287}
    216288
    217289
    218290static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection,
    219                    text_t &errorpage) {
    220   errorpage += "Content-type: text/html\n\n";
    221 
    222   errorpage += "<html>\n";
    223   errorpage += "<head>\n";
    224   errorpage += "<title>Error</title>\n";
    225   errorpage += "</head>\n";
    226   errorpage += "<body>\n";
    227   errorpage += "<h2>Oops!</h2>\n";
     291                   bool debug, text_t &errorpage) {
     292
     293  text_t errortext;
     294
    228295  if (collection.empty()) {
    229296    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
    230     errorpage += "The main.cfg configuration file could not be found. This file\n";
    231     errorpage += "should contain configuration information relating to the\n";
    232     errorpage += "setup of the interface. As this cgi script is not being run\n";
    233     errorpage += "in collection specific mode the file should reside at\n";
    234     errorpage += main_cfg_file + ".\n";
     297    errortext += "The main.cfg configuration file could not be found. This file\n";
     298    errortext += "should contain configuration information relating to the\n";
     299    errortext += "setup of the interface. As this receptionist is not being run\n";
     300    errortext += "in collection specific mode the file should reside at\n";
     301    errortext += main_cfg_file + ".\n";
    235302  } else {
    236303    text_t collect_cfg_file = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg");
    237304    text_t main_collect_cfg_file = filename_cat (gsdlhome, "etc", "collect.cfg");
    238305    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
    239     errorpage += "Either the collect.cfg or main.cfg configuration file could\n";
    240     errorpage += "not be found. This file should contain configuration information\n";
    241     errorpage += "relating to the setup of the interface. As this cgi script is\n";
    242     errorpage += "being run in collection specific mode the file should reside\n";
    243     errorpage += "at either ";
    244     errorpage += collect_cfg_file + ",\n";
    245     errorpage += main_collect_cfg_file + " or " + main_cfg_file + ".\n";
    246   }
    247   errorpage += "</body>\n";
    248   errorpage += "</html>\n";
    249 }
    250 
    251 
    252 static void page_errorinit (const text_t &gsdlhome, text_t &errorpage) {
    253   errorpage += "Content-type: text/html\n\n";
    254 
    255   errorpage += "<html>\n";
    256   errorpage += "<head>\n";
    257   errorpage += "<title>Error</title>\n";
    258   errorpage += "</head>\n";
    259   errorpage += "<body>\n";
    260   errorpage += "<h2>Oops!</h2>\n";
    261   errorpage += "An error occurred during the initialisation of the Greenstone Digital\n";
    262   errorpage += "Library software. It is likely that the software has not been setup\n";
    263   errorpage += "correctly.\n";
     306    errortext += "Either the collect.cfg or main.cfg configuration file could\n";
     307    errortext += "not be found. This file should contain configuration information\n";
     308    errortext += "relating to the setup of the interface. As this receptionist is\n";
     309    errortext += "being run in collection specific mode the file should reside\n";
     310    errortext += "at either " + collect_cfg_file + ",\n";
     311    errortext += main_collect_cfg_file + " or " + main_cfg_file + ".\n";
     312  }
     313
     314  format_error_string (errorpage, errortext, debug);
     315}
     316
     317
     318static void page_errorinit (const text_t &gsdlhome, bool debug, text_t &errorpage) {
     319
     320  text_t errortext = "An error occurred during the initialisation of the Greenstone Digital\n";
     321  errortext += "Library software. It is likely that the software has not been setup\n";
     322  errortext += "correctly.\n";
    264323
    265324  text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");
     
    268327  delete ifile;
    269328  if (initin) {
    270     errorpage += "The initialisation error log, " + init_file + ", contains the\n";
    271     errorpage += "following information:\n\n";
    272     errorpage += "<pre>\n";
     329    errortext += "The initialisation error log, " + init_file + ", contains the\n";
     330    errortext += "following information:\n\n";
     331    if (!debug) errortext += "<pre>\n";
    273332
    274333    char c;
    275334    initin.get(c);
    276335    while (!initin.eof ()) {
    277       errorpage.push_back(c);
     336      errortext.push_back(c);
    278337      initin.get(c);
    279338    }
    280339   
    281     errorpage += "</pre>\n";
     340    if (!debug) errortext += "</pre>\n";
    282341
    283342    initin.close();
    284343
    285344  } else {
    286     errorpage += "Please consult " + init_file + " for more information.\n";
    287   }
    288 
    289   errorpage += "</body>\n";
    290   errorpage += "</html>\n";
    291 }
    292 
    293 static void page_errorparseargs (const text_t &gsdlhome, text_t &errorpage) {
    294   errorpage += "Content-type: text/html\n\n";
    295 
    296   errorpage += "<html>\n";
    297   errorpage += "<head>\n";
    298   errorpage += "<title>Error</title>\n";
    299   errorpage += "</head>\n";
    300   errorpage += "<body>\n";
    301   errorpage += "<h2>Oops!</h2>\n";
    302   errorpage += "An error occurred during the parsing of the cgi arguments.\n";
     345    errortext += "Please consult " + init_file + " for more information.\n";
     346  }
     347
     348  format_error_string (errorpage, errortext, debug);
     349}
     350
     351static void page_errorparseargs (const text_t &gsdlhome, bool debug, text_t &errorpage) {
     352
     353  text_t errortext = "An error occurred during the parsing of the cgi arguments.\n";
    303354
    304355  text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
     
    307358  delete efile;
    308359  if (errin) {
    309     errorpage += "The error log, " + error_file + ", contains the\n";
    310     errorpage += "following information:\n\n";
    311     errorpage += "<pre>\n";
     360    errortext += "The error log, " + error_file + ", contains the\n";
     361    errortext += "following information:\n\n";
     362    if (!debug) errortext += "<pre>\n";
    312363
    313364    char c;
    314365    errin.get(c);
    315366    while (!errin.eof ()) {
    316       errorpage.push_back(c);
     367      errortext.push_back(c);
    317368      errin.get(c);
    318369    }
    319     errorpage += "</pre>\n";
     370    if (!debug) errortext += "</pre>\n";
    320371    errin.close();
    321372
    322373  } else {
    323     errorpage += "Please consult " + error_file + " for more information.\n";
    324   }
    325 
    326   errorpage += "</body>\n";
    327   errorpage += "</html>\n";
    328 }
    329 
    330 static void page_errorcgipage (const text_t &gsdlhome, text_t &errorpage) {
    331   errorpage += "Content-type: text/html\n\n";
    332 
    333   errorpage += "<html>\n";
    334   errorpage += "<head>\n";
    335   errorpage += "<title>Error</title>\n";
    336   errorpage += "</head>\n";
    337   errorpage += "<body>\n";
    338   errorpage += "<h2>Oops!</h2>\n";
    339   errorpage += "An error occurred during the construction of the cgi page.\n";
    340 
     374    errortext += "Please consult " + error_file + " for more information.\n";
     375  }
     376
     377  format_error_string (errorpage, errortext, debug);
     378}
     379
     380static void page_errorcgipage (const text_t &gsdlhome, bool debug, text_t &errorpage) {
     381
     382  text_t errortext = "An error occurred during the construction of the cgi page.\n";
    341383
    342384  text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
     
    345387  delete efile;
    346388  if (errin) {
    347     errorpage += "The error log, " + error_file + ", contains the\n";
    348     errorpage += "following information:\n\n";
    349     errorpage += "<pre>\n";
     389    errortext += "The error log, " + error_file + ", contains the\n";
     390    errortext += "following information:\n\n";
     391    if (!debug) errortext += "<pre>\n";
    350392
    351393    char c;
    352394    errin.get(c);
    353395    while (!errin.eof ()) {
    354       errorpage.push_back(c);
     396      errortext.push_back(c);
    355397      errin.get(c);
    356398    }
    357     errorpage += "</pre>\n";
     399    if (!debug) errortext += "</pre>\n";
    358400    errin.close();
    359401
    360402  } else {
    361     errorpage += "Please consult " + error_file + " for more information.\n";
    362   }
    363 
    364   errorpage += "</body>\n";
    365   errorpage += "</html>\n";
    366 }
    367 
     403    errortext += "Please consult " + error_file + " for more information.\n";
     404  }
     405
     406  format_error_string (errorpage, errortext, debug);
     407}
     408
     409static void print_debug_info (receptionist &recpt) {
     410
     411  outconvertclass text_t2ascii;
     412  recptconf configinfo = recpt.get_configinfo ();
     413  text_t etc_dir = filename_cat (configinfo.gsdlhome, "etc");
     414
     415  cout << "\n";
     416  cout << text_t2ascii
     417       << "------------------------------------------------------------\n"
     418       << "Configuration and initialization completed successfully.\n"
     419       << "  Note that more debug information may be available in the\n"
     420       << "  initialization and error logs initout.txt and errout.txt\n"
     421       << "  in " << etc_dir << ".\n"
     422       << "------------------------------------------------------------\n\n";
     423
     424  bool colspec = false;
     425  if (configinfo.collection.empty()) {
     426    cout << "Receptionist is running in \"general\" (i.e. not \"collection\n"
     427     << "specific\") mode.\n";
     428  } else {
     429    cout << text_t2ascii
     430     << "Receptionist is running in \"collection specific\" mode.\n"
     431     << "  collection=" << configinfo.collection << "\n"
     432     << "  collection directory=" << configinfo.collectdir << "\n";
     433    colspec = true;
     434  }
     435 
     436  cout << text_t2ascii << "gsdlhome=" << configinfo.gsdlhome << "\n";
     437  if (!configinfo.gdbmhome.empty())
     438    cout << text_t2ascii << "gdbmhome=" << configinfo.gdbmhome << "\n";
     439  cout << text_t2ascii << "httpprefix=" << configinfo.httpprefix << "\n";
     440  cout << text_t2ascii << "httpimg=" << configinfo.httpimg << "\n";
     441  cout << text_t2ascii << "gwcgi=" << configinfo.gwcgi << "\n"
     442       << "  Note that unless gwcgi has been set from a configuration\n"
     443       << "  file it is dependent on environment variables set by your\n"
     444       << "  webserver. Therefore it may not have the same value when run\n"
     445       << "  from the command line as it would be when run from your\n"
     446       << "  web server.\n";
     447  if (configinfo.usecookies)
     448    cout << "cookies are enabled\n";
     449  else
     450    cout << "cookies are disabled\n";
     451  if (configinfo.logcgiargs)
     452    cout << "logging is enabled\n";
     453  else
     454    cout << "logging is disabled\n";
     455  cout << "------------------------------------------------------------\n\n";
     456
     457  text_tset::const_iterator this_mfile = configinfo.macrofiles.begin();
     458  text_tset::const_iterator end_mfile = configinfo.macrofiles.end();
     459  cout << "Macro Files:\n"
     460       << "------------\n";
     461  text_t mfile;
     462  bool found;
     463  while (this_mfile != end_mfile) {
     464    cout << text_t2ascii << *this_mfile;
     465    int spaces = (22 - (*this_mfile).size());
     466    if (spaces < 2) spaces = 2;
     467    text_t outspaces;
     468    for (int i = 0; i < spaces; i++) outspaces.push_back (' ');
     469    cout << text_t2ascii << outspaces;
     470
     471    found = false;
     472    if (colspec) {
     473      // collection specific - try collectdir/macros first
     474      mfile = filename_cat (configinfo.collectdir, "macros", *this_mfile);
     475      if (file_exists (mfile)) {
     476    cout << text_t2ascii << "found (" << mfile << ")\n";
     477    found = true;
     478      }
     479    }
     480 
     481    if (!found) {
     482      // try main macro directory
     483      mfile = filename_cat (configinfo.gsdlhome, "macros", *this_mfile);
     484      if (file_exists (mfile)) {
     485    cout << text_t2ascii << "found (" << mfile << ")\n";
     486    found = true;
     487      }
     488    }
     489
     490    if (!found)
     491      cout << text_t2ascii << "NOT FOUND\n";
     492
     493    this_mfile ++;
     494  }
     495
     496  cout << "------------------------------------------------------------\n\n"
     497       << "Collections:\n"
     498       << "------------\n"
     499       << "  Note that collections will only appear as \"running\" if\n"
     500       << "  their build.cfg files exist, are readable, contain a valid\n"
     501       << "  builddate field (i.e. > 0), and are in the collection's\n"
     502       << "  index directory (i.e. NOT the building directory)\n\n";
     503
     504  recptprotolistclass *protos = recpt.get_recptprotolist_ptr();
     505  recptprotolistclass::iterator rprotolist_here = protos->begin();
     506  recptprotolistclass::iterator rprotolist_end = protos->end();
     507  bool found_valid_col = false;
     508  while (rprotolist_here != rprotolist_end) {
     509    if ((*rprotolist_here).p != NULL) {
     510
     511      text_tarray collist;
     512      comerror_t err;
     513      (*rprotolist_here).p->get_collection_list (collist, err, cerr);
     514      if (err == noError) {
     515    text_tarray::iterator collist_here = collist.begin();
     516    text_tarray::iterator collist_end = collist.end();
     517
     518    while (collist_here != collist_end) {
     519     
     520      cout << text_t2ascii << *collist_here;
     521
     522      int spaces = (22 - (*collist_here).size());
     523      if (spaces < 2) spaces = 2;
     524      text_t outspaces;
     525      for (int i = 0; i < spaces; i++) outspaces.push_back (' ');
     526      cout << text_t2ascii << outspaces;
     527
     528      ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr);
     529      if (cinfo != NULL) {
     530        if (cinfo->isPublic) cout << "public ";
     531        else cout << "private";
     532
     533        if (cinfo->buildDate > 0) {
     534          cout << "   running    ";
     535          found_valid_col = true;
     536        } else {
     537          cout << "   not running";
     538        }
     539      }
     540
     541      cout << "\n";
     542
     543      collist_here ++;
     544    }
     545      }
     546    }
     547    rprotolist_here ++;
     548  }
     549 
     550  if (!found_valid_col) {
     551    cout << "WARNING: No \"running\" collections were found. You need to\n";
     552    cout << "         build one of the above collections\n";
     553  }
     554
     555  cout << "\n------------------------------------------------------------\n";
     556  cout << "------------------------------------------------------------\n\n";
     557  cout << "receptionist running in command line debug mode\n";
     558  cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n";
     559
     560}
    368561
    369562// cgiwrapper does everything necessary to output a page
     
    372565// should equal "".
    373566void cgiwrapper (receptionist &recpt, text_t collection) {
     567 
     568  int numrequests = 0;
     569  bool debug = false;
     570  recptconf configinfo = recpt.get_configinfo ();
     571
     572  // find out whether this is being run as a cgi-script
     573  // or a fastcgi script
    374574#ifdef USE_FASTCGI
    375575  fcgistreambuf outbuf;
    376 #endif
    377 
    378   // init stuff - we can't output error pages directly with
    379   // fastcgi so the pages are stored until we can output them
    380   text_t errorpage;
    381   outconvertclass text_t2ascii;
    382 
    383   // set defaults
    384   int maxrequests = 10000;
    385   recpt.configure ("collection", collection);
    386   recpt.configure ("httpimg", "/gsdl/images");
    387   char *script_name = getenv("SCRIPT_NAME");
    388   if (script_name != NULL) recpt.configure("gwcgi", script_name);
    389   else recpt.configure("gwcgi", "/cgi-bin/gw");
    390 
    391   // read in the configuration files.
    392   text_t gsdlhome;
    393   if (!site_cfg_read (recpt, gsdlhome, maxrequests)) {
    394     // couldn't find the site configuration file
    395     page_errorsitecfg (errorpage, 0);
    396   } else if (gsdlhome.empty()) {
    397     // no gsdlhome in gsdlsite.cfg
    398     page_errorsitecfg (errorpage, 1);
    399   } else if (!main_cfg_read (recpt, gsdlhome, collection)) {
    400     // couldn't find the main configuration file
    401     page_errormaincfg (gsdlhome, collection, errorpage);
    402   }
    403 
    404   // initialise the library software
    405   if (errorpage.empty()) {
    406     text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");
    407     char *iout = init_file.getcstr();
    408     ofstream initout (iout);
    409     delete iout;
    410     if (!recpt.init(initout)) {
    411       // an error occurred during the initialisation
    412       initout.close();
    413       page_errorinit(gsdlhome, errorpage);
    414     }
    415     initout.close();
    416   }
    417  
    418   // find out whether this is being run as a cgi-script
    419   // or a fastcgi script
    420   int numrequests = 0;
    421 #ifdef USE_FASTCGI
    422576  int isfastcgi = !FCGX_IsCGI();
    423577  FCGX_Stream *fcgiin, *fcgiout, *fcgierr;
     
    457611      } else {
    458612    // debugging from command line
    459     char cinURIStr[1024];
    460     cin.get(cinURIStr, 1024);
    461     argstr = cinURIStr;
     613    debug = true;
    462614      }
    463615    }
    464 
    465     // cgi scripts only deal with one request
    466     maxrequests = 1;
    467   }
     616  }
     617
     618  if (debug) {
     619    cout << "Configuring Greenstone...\n";
     620    cout << flush;
     621  }
     622
     623  // init stuff - we can't output error pages directly with
     624  // fastcgi so the pages are stored until we can output them
     625  text_t errorpage;
     626  outconvertclass text_t2ascii;
     627
     628  // set defaults
     629  int maxrequests = 10000;
     630  recpt.configure ("collection", collection);
     631  recpt.configure ("httpimg", "/gsdl/images");
     632  char *script_name = getenv("SCRIPT_NAME");
     633  if (script_name != NULL) recpt.configure("gwcgi", script_name);
     634  else recpt.configure("gwcgi", "/cgi-bin/gw");
     635
     636  // read in the configuration files.
     637  text_t gsdlhome;
     638  if (!site_cfg_read (recpt, gsdlhome, maxrequests)) {
     639    // couldn't find the site configuration file
     640    page_errorsitecfg (errorpage, debug, 0);
     641  } else if (gsdlhome.empty()) {
     642    // no gsdlhome in gsdlsite.cfg
     643    page_errorsitecfg (errorpage, debug, 1);
     644  } else if (!main_cfg_read (recpt, gsdlhome, collection)) {
     645    // couldn't find the main configuration file
     646    page_errormaincfg (gsdlhome, collection, debug, errorpage);
     647  } else  if (configinfo.collectinfo.empty()) {
     648    // don't have any collections
     649    page_errorcollect (gsdlhome, errorpage, debug);
     650  }
     651
     652  if (errorpage.empty()) {
     653
     654    // initialise the library software
     655    if (debug) {
     656      cout << "Initializing...\n";
     657      cout << flush;
     658    }
     659
     660    text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");
     661    char *iout = init_file.getcstr();
     662    ofstream initout (iout);
     663    delete iout;
     664    if (!recpt.init(initout)) {
     665      // an error occurred during the initialisation
     666      initout.close();
     667      page_errorinit(gsdlhome, debug, errorpage);
     668    }
     669    initout.close();
     670  }
     671
     672  if (debug && errorpage.empty()) {
     673    // get query string from command line
     674    print_debug_info (recpt);
     675    char cinURIStr[1024];
     676    cin.get(cinURIStr, 1024);
     677    argstr = cinURIStr;
     678  }
     679
     680  // cgi scripts only deal with one request
     681  if (!isfastcgi) maxrequests = 1;
    468682
    469683  // Page-request loop. If this is not being run as a fastcgi
     
    474688    if (isfastcgi) {
    475689      if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break;
    476       aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp);
    477       if (aURIStr != NULL) argstr = aURIStr;
    478       else argstr = "";
     690
     691      char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp);
     692      char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp);
     693
     694      if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
     695      content_length_str != NULL)  {
     696    // POST form data
     697    int content_length = text_t(content_length_str).getint();
     698    if (content_length > 0) {
     699      argstr.clear();
     700      int c;
     701      do {
     702        c = FCGX_GetChar (fcgiin);
     703        if (c < 0) break;
     704        argstr.push_back (c);
     705        content_length--;
     706      } while (content_length > 0);
     707    }
     708
     709      } else {
     710    // GET form data
     711    aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp);
     712    if (aURIStr != NULL) argstr = aURIStr;
     713    else argstr = "";
     714      }
    479715    }
    480716#endif
     
    528764      text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
    529765      char *eout = error_file.getcstr();
    530       ofstream errout (eout);
     766      ofstream errout (eout, ios::app);
    531767      delete eout;
    532768      cerr = errout;
     
    536772      if (!recpt.parse_cgi_args (argstr, args, errout, fastcgienv)) {
    537773    errout.close ();
    538     page_errorparseargs(gsdlhome, errorpage);
     774    page_errorparseargs(gsdlhome, debug, errorpage);
    539775      } else {
    540776    if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) {
    541777      errout.close ();
    542       page_errorcgipage(gsdlhome, errorpage);
    543     } else {
    544       errout.close ();
     778      page_errorcgipage(gsdlhome, debug, errorpage);
    545779    }
    546780    recpt.log_cgi_args (args, errout, fastcgienv);
     781    errout.close ();
    547782      }
    548783    }
  • branches/New_Config_Format-branch/gsdl/src/recpt/comtypes.cpp

    r823 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.22.4.1  2000/07/12 22:21:36  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.23  2000/06/29 00:22:58  sjboddie
     34   added new numsections field to collection info and made the statusaction
     35   recognize it
     36
    3037   Revision 1.22  1999/12/13 02:24:33  davidb
    3138   Data fields for cross collection searching (CCS)
     
    134141  buildDate=0;
    135142  numDocs=0;
     143  numSections=0;
    136144  numWords=0;
    137145  numBytes=0;
  • branches/New_Config_Format-branch/gsdl/src/recpt/comtypes.h

    r871 r1279  
    7575//   languages      [6]  IMPLICIT StringSet,  -- languages in the collection
    7676//   numDocs        [7]  IMPLICIT INTEGER,
    77 //   numWords       [8]  IMPLICIT INTEGER OPTIONAL,
    78 //   numBytes       [9]  IMPLICIT INTEGER OPTIONAL
    79 //   collectionmeta [10]  IMPLICIT StringSet
    80 //   format         [11] IMPLICIT StringSet
    81 //   building       [12] IMPLICIT StringSet
    82 //   receptionist   [13] IMPLICIT GeneralString
     77//   numSections    [8]  IMPLICIT INTEGER OPTIONAL,
     78//   numWords       [9]  IMPLICIT INTEGER OPTIONAL,
     79//   numBytes       [10] IMPLICIT INTEGER OPTIONAL
     80//   collectionmeta [11] IMPLICIT StringSet
     81//   format         [12] IMPLICIT StringSet
     82//   building       [13] IMPLICIT StringSet
     83//   receptionist   [14] IMPLICIT GeneralString
    8384// }
    8485struct ColInfoResponse_t {
     
    9293  text_tarray ccsCols;    // empty if collection does not use cross-collection searching
    9394  text_tarray languages;
    94   unsigned long numDocs;  // 0 if not known
    95   unsigned long numWords; // 0 if not known
    96   unsigned long numBytes; // 0 if not known
     95  unsigned long numDocs;     // 0 if not known
     96  unsigned long numSections; // 0 if not known
     97  unsigned long numWords;    // 0 if not known
     98  unsigned long numBytes;    // 0 if not known
    9799  text_tmap collectionmeta;
    98100  text_tmap format;
  • branches/New_Config_Format-branch/gsdl/src/recpt/documentaction.cpp

    r1258 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.37.2.3  2000/07/12 22:21:37  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33
     34   Revision 1.39  2000/07/05 21:49:31  sjboddie
     35   Receptionist now caches collection information to avoid making multiple
     36   get_collectinfo calls to collection server
     37
    3038   Revision 1.37.2.2  2000/06/30 00:46:16  nzdl
    3139   caught New_Config_Format-branch up with changes to trunk
    3240
     41   Revision 1.38  2000/05/04 05:18:46  sjboddie
     42   attempting to get end-user collection building to work under windows
     43
    3344   Revision 1.37.2.1  2000/04/09 23:16:46  sjboddie
    3445   Added DocumentColumns stuff to New_Config_Format-branch branch
     46
     47   Revision 1.37  2000/04/07 04:40:44  sjboddie
     48   Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc.
     49   from DocumentColumns stuff. I'll move the DocumentColumns stuff to a
     50   separate development branch (New_Config_Format-branch) for now. The plan
     51   is to redesign the configuration file format a bit and limit the number of
     52   distributions floating around that take different configuration formats).
    3553
    3654   Revision 1.36  2000/04/03 07:26:28  sjboddie
     
    187205
    188206documentaction::documentaction () {
     207  recpt = NULL;
     208
    189209
    190210  // this action uses cgi variables "a", "d", "cl",
     
    632652  if (collectproto == NULL) return;
    633653 
     654  if (recpt == NULL) {
     655    logout << "ERROR (documentaction::define_external_macros): This action does not contain\n"
     656       << "      information about any receptionists. The method set_receptionist was\n"
     657       << "      probably not called from the module which instantiated this action.\n";
     658    return;
     659  }
     660
    634661  outconvertclass text_t2ascii;
    635662  comerror_t err;
     
    638665  text_tset metadata;
    639666
    640 
    641   // get info on current collection and load up formatinfo
    642   // I'd prefer not to do this here as we're getting
    643   // collection info every time (and probably also getting
    644   // it in other places some of the time) - One day I'll
    645   // fix it ... maybe - Stefan.
    646   ColInfoResponse_t cinfo;
    647   collectproto->get_collectinfo (collection, cinfo, err, logout);
    648   load_formatinfo (cinfo.format, args.getintarg("gt"));
     667  ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout);
     668  if (cinfo == NULL) {
     669    logout << "ERROR (documentaction::define_external_macros): get_collectinfo_ptr returned NULL\n";
     670    return;
     671  }
     672  load_formatinfo (cinfo->format, args.getintarg("gt"));
    649673
    650674  if (formatinfo.DocumentUseHTML) {
     
    658682      disp.setmacro ("gsdltop", "Global", "documenttop");
    659683    }
    660     text_tmap::iterator it = cinfo.format.find ("homepage");
    661     if (it != cinfo.format.end()) {
     684    text_tmap::iterator it = cinfo->format.find ("homepage");
     685    if (it != cinfo->format.end()) {
    662686      text_t httppagehome;
    663687      if (get_link (args, protos, (*it).second, httppagehome, logout))
  • branches/New_Config_Format-branch/gsdl/src/recpt/documentaction.h

    r1084 r1279  
    9494  virtual ~documentaction ();
    9595
     96  void set_receptionist (receptionist *therecpt) {recpt=therecpt;}
     97
    9698  text_t get_action_name () {return "d";}
    9799
  • branches/New_Config_Format-branch/gsdl/src/recpt/formattools.cpp

    r1258 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.20.2.3  2000/07/12 22:21:39  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33
    3034   Revision 1.20.2.2  2000/06/30 00:46:17  nzdl
    3135   caught New_Config_Format-branch up with changes to trunk
    32 
    3336
    3437   Revision 1.21  2000/06/30 00:40:39  sjboddie
  • branches/New_Config_Format-branch/gsdl/src/recpt/htmlutils.cpp

    r919 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.5.4.1  2000/07/12 22:21:39  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.6  2000/05/04 05:17:46  sjboddie
     34   moved dm_safe from htmlutils to gsdltools
     35
    3036   Revision 1.5  2000/02/13 20:38:59  sjboddie
    3137   added dm_safe function
     
    6975  return outstring;
    7076}
    71 
    72 text_t dm_safe (const text_t &instring) {
    73 
    74   text_t outstring;
    75   text_t::const_iterator here = instring.begin();
    76   text_t::const_iterator end = instring.end();
    77   while (here != end) {
    78     if (*here == '_') outstring += "\\_";
    79     else outstring.push_back(*here);
    80     here ++;
    81   }
    82   return outstring;
    83 }
  • branches/New_Config_Format-branch/gsdl/src/recpt/htmlutils.h

    r919 r1279  
    3535text_t html_safe (const text_t &instring);
    3636
    37 text_t dm_safe (const text_t &instring);
    38 
    3937#endif
  • branches/New_Config_Format-branch/gsdl/src/recpt/infodbclass.cpp

    r928 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.6.4.1  2000/07/12 22:21:40  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.7  2000/05/04 08:21:30  sjboddie
     34   modifications for windows port of GCC
     35
    3036   Revision 1.6  2000/02/15 22:53:50  kjm18
    3137   search history stuff added.
     
    159165
    160166  openfile = filename;
    161 
     167 
    162168  char *namebuffer = filename.getcstr();
    163169  do {
    164170#ifdef __WIN32__
    165       gdbmfile = gdbm_open (namebuffer, block_size, mode, 00664, NULL, (need_filelock) ? 1 : 0);
     171    gdbmfile = gdbm_open (namebuffer, block_size, mode, 00664, NULL, (need_filelock) ? 1 : 0);
    166172#else
    167173    gdbmfile = gdbm_open (namebuffer, block_size, mode, 00664, NULL);
     
    171177       (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER));
    172178  delete namebuffer;
    173 
     179 
    174180  if (gdbmfile == NULL && logout != NULL) {
    175181    outconvertclass text_t2ascii;
  • branches/New_Config_Format-branch/gsdl/src/recpt/infodbclass.h

    r928 r1279  
    3333#include "gsdlconf.h"
    3434#include "text_t.h"
     35
     36#if defined(GSDL_USE_OBJECTSPACE)
     37#  include <ospace\std\iostream>
     38#  include <ospace\std\fstream>
     39#elif defined(GSDL_USE_IOS_H)
     40#  include <iostream.h>
     41#  include <fstream.h>
     42#else
     43#  include <iostream>
     44#  include <fstream>
     45#endif
    3546
    3647#ifdef __WIN32__
  • branches/New_Config_Format-branch/gsdl/src/recpt/librarymain.cpp

    r994 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.24.4.1  2000/07/12 22:21:41  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.28  2000/07/05 21:49:32  sjboddie
     34   Receptionist now caches collection information to avoid making multiple
     35   get_collectinfo calls to collection server
     36
     37   Revision 1.27  2000/06/23 03:21:39  sjboddie
     38   Created converter classes for simple 8 bit encodings that use a
     39   simple textual map file. Instances of these classes are used to handle
     40   the Windows 1256 (Arabic) encoding.
     41
     42   Revision 1.26  2000/05/29 03:30:03  sjboddie
     43   fixed a bug preventing GB encoded text from being displayed correctly
     44   (bug showed up in Chinese collection)
     45
     46   Revision 1.25  2000/04/14 02:52:05  sjboddie
     47   tidied up error messaging and set up some debugging info to be output
     48   when running library from command line
     49
    3050   Revision 1.24  2000/02/29 20:59:02  sjboddie
    3151   added error message when unable to read from collect directory - should
     
    132152#include "authenaction.h"
    133153#include "usersaction.h"
    134 #include "authenaction.h"
    135154#include "extlinkaction.h"
    136155#include "buildaction.h"
     
    152171  text_tarray collections;
    153172
    154   // get gsdlhome (if we fail the error will be picked up later -- in
    155   // cgiwrapper)
     173  // get gsdlhome
    156174  text_t gsdlhome;
    157   if (site_cfg_read (gsdlhome)) {
    158     text_t collectdir = filename_cat (gsdlhome, "collect");
    159     if (!read_dir (collectdir, collections)) {
    160       cerr << "couldn't read collect directory - make sure gsdlhome field is correct in gsdlsite.cfg\n";
    161       exit (1);
    162     }
    163   }
     175  site_cfg_read (gsdlhome);
     176  text_t collectdir = filename_cat (gsdlhome, "collect");
     177  read_dir (collectdir, collections);
    164178
    165179  text_tarray::const_iterator thiscol = collections.begin();
     
    202216    // collection name
    203217    cserver->configure ("collection", *thiscol);
    204    
     218 
     219    // configure receptionist's collectinfo structure
     220    text_tarray colinfo;
     221    colinfo.push_back (*thiscol);
     222    colinfo.push_back (gsdlhome);
     223    colinfo.push_back (gsdlhome);
     224    recpt.configure ("collectinfo", colinfo);
     225 
    205226    nproto.add_collectserver (cserver);
    206227    thiscol ++;
     
    215236  recpt.add_converter ("u", &utf8inconvert, &utf8outconvert);
    216237
    217   if (!gsdlhome.empty()) {
    218     mapinconvertclass gbinconvert;
    219     gbinconvert.setmapfile (gsdlhome, "gbku", 0x25a1);
    220     mapoutconvertclass gboutconvert;
    221     gboutconvert.setmapfile (gsdlhome, "ugbk", 0xa1f5);
    222     recpt.add_converter ("g", &gbinconvert, &gboutconvert);
    223   }
     238  mapinconvertclass gbinconvert;
     239  gbinconvert.setmapfile (gsdlhome, "gbku", 0x25a1);
     240  mapoutconvertclass gboutconvert;
     241  gboutconvert.setmapfile (gsdlhome, "ugbk", 0xa1f5);
     242  recpt.add_converter ("g", &gbinconvert, &gboutconvert);
     243
     244  text_t armapfile = filename_cat (gsdlhome, "unicode", "MAPPINGS");
     245  armapfile = filename_cat (armapfile, "WINDOWS", "1256.TXT");
     246  simplemapinconvertclass arinconvert;
     247  arinconvert.setmapfile (armapfile);
     248  simplemapoutconvertclass aroutconvert;
     249  aroutconvert.setmapfile (armapfile);
     250  recpt.add_converter ("a", &arinconvert, &aroutconvert); 
     251
    224252
    225253  // the list of actions. Note: these actions will become invalid
     
    230258
    231259  pageaction apageaction;
     260  apageaction.set_receptionist (&recpt);
    232261  recpt.add_action (&apageaction);
    233262
     
    236265
    237266  queryaction aqueryaction;
     267  aqueryaction.set_receptionist (&recpt);
    238268  recpt.add_action (&aqueryaction);
    239269
    240270  documentaction adocumentaction;
     271  adocumentaction.set_receptionist (&recpt);
    241272  recpt.add_action (&adocumentaction);
    242273
  • branches/New_Config_Format-branch/gsdl/src/recpt/pageaction.cpp

    r1033 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.29.4.1  2000/07/12 22:21:41  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.34  2000/07/12 04:51:05  nzdl
     34   added an error message when no "valid" collections are available
     35
     36   Revision 1.33  2000/07/05 21:49:33  sjboddie
     37   Receptionist now caches collection information to avoid making multiple
     38   get_collectinfo calls to collection server
     39
     40   Revision 1.32  2000/06/27 23:02:40  sjboddie
     41   Tidied up the way collections are displayed on 'standard' homepage.
     42   Removed all the nzdl.org specific stuff.
     43
     44   Revision 1.31  2000/06/23 03:48:08  sjboddie
     45   Added Arabic language and encoding options to the preferences page. This
     46   is a much more complex task than it should be (you even have to recompile!)
     47   but it's not really worth fixing until the new config file format is
     48   finalised.
     49
     50   Revision 1.30  2000/06/14 22:33:42  sjboddie
     51   Added French and Spanish language selection options to the preferences
     52   page. I really don't like how this is currently implemented (i.e. you
     53   have to recompile the library to do something so simple), it might have
     54   to wait until the new configuration file stuff is done before it's fixed
     55   though.
     56
    3057   Revision 1.29  2000/03/19 21:16:46  nzdl
    3158   added german language interface
     
    186213
    187214  text_t homeextra = "<center><table width=_pagewidth_><tr valign=top>\n";
     215  bool found_valid_col = false;
    188216 
    189217  recptprotolistclass::iterator rprotolist_here = protos->begin();
     
    198226    text_tarray::iterator collist_here = collist.begin();
    199227    text_tarray::iterator collist_end = collist.end();
    200    
    201     int row1 = 9;
    202     int row2 = 8;
    203     int count = 1;
     228
     229    int count = 0;
     230    bool first = true;
    204231    while (collist_here != collist_end) {
    205232     
    206       ColInfoResponse_t cinfo;
    207       (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout);
     233      ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
    208234     
    209       if (err == noError) {
    210         if (cinfo.isPublic && (cinfo.buildDate > 0)) {
     235      if (cinfo != NULL) {
     236        if (cinfo->isPublic && (cinfo->buildDate > 0)) {
    211237         
     238          found_valid_col = true;
    212239          FilterResponse_t response;
    213240          text_tset metadata;
     
    233260          }
    234261       
    235           if ((count == 1) || (count == (row1+1)) || (count == ((row1+row2)+1)))
    236         homeextra += "<td align=center>";
    237           else homeextra += "<p>";
    238 
    239           if (*collist_here == "niupepa") {
    240         homeextra += "<a href=\"_httpmusiclibrary_\">_iconmusiclibrary_</a>\n";
    241         if ((count == row1) || (count == (row1+row2))) homeextra += "</td>";
    242         count ++;
    243         if ((count == 1) || (count == (row1+1)) || (count == ((row1+row2)+1)))
    244           homeextra += "<td align=center>";
    245         else homeextra += "<p>";
    246           }
     262          if ((count%3 == 0) && (!first))
     263        homeextra += "</tr><tr valign=top>\n";
    247264         
    248265          text_t link = "<a href=\"_gwcgi_?a=p&p=about&c=" + *collist_here + "\">";
     
    250267        link = "<a href=\"_gwcgi_?a=p&p=about&l=zh&nw=u&c=" + *collist_here + "\">";
    251268          if (*collist_here == "arabic")
    252         link = "<a href=\"_gwcgi_?a=p&p=about&w=a&c=" + *collist_here + "\">";
    253 
    254           if (!cinfo.receptionist.empty())
    255         link = "<a href=\"" + cinfo.receptionist + "\">";
     269        link = "<a href=\"_gwcgi_?a=p&p=about&l=ar&nw=u&c=" + *collist_here + "\">";
     270
     271          if (!cinfo->receptionist.empty())
     272        link = "<a href=\"" + cinfo->receptionist + "\">";
    256273         
    257           homeextra += link + collectionname + "</a>\n";
    258          
    259           if ((count == row1) || (count == (row1+row2)))
    260         homeextra += "</td>";
     274          homeextra += "<td>" + link + collectionname + "</a></td>\n";
    261275         
    262276          count ++;
     277          first = false;
    263278        }
    264279      }
     
    266281      collist_here ++;
    267282    }
    268     homeextra += "</tr></table></center>\n";
    269     disp.setmacro ("homeextra", "home", homeextra);
     283    for (; count%3 != 0; count ++) homeextra += "<td></td>\n";
    270284      }
    271285    }
    272286    rprotolist_here ++;
    273287  }
     288
     289  if (!found_valid_col) {
     290    homeextra += "<td>No valid (i.e. built and public) collections are available</td>\n";
     291  }
     292  homeextra += "</tr></table></center>\n";
     293  disp.setmacro ("homeextra", "home", homeextra);
     294
    274295}
    275296
     
    319340
    320341 
     342  if (recpt == NULL) {
     343    logout << "ERROR (pageaction::define_internal_macros): This action does not contain\n"
     344       << "      information about any receptionists. The method set_receptionist was\n"
     345       << "      probably not called from the module which instantiated this action.\n";
     346    return;
     347  }
     348
    321349  text_t &arg_p = args["p"];
    322350  text_t &arg_c = args["c"];
    323   ColInfoResponse_t cinfo;
    324   comerror_t err;
     351  ColInfoResponse_t *cinfo = NULL;
    325352
    326353  recptproto* collectproto = protos->getrecptproto (arg_c, logout);
    327354  if (collectproto != NULL) {
    328     collectproto->get_collectinfo (arg_c, cinfo, err, logout);
    329 
    330     disp.setmacro ("numdocs", "Global", cinfo.numDocs);
     355    cinfo = recpt->get_collectinfo_ptr (collectproto, arg_c, logout);
     356
     357    disp.setmacro ("numdocs", "Global", cinfo->numDocs);
    331358    unsigned long current_time = time(NULL);
    332     unsigned long builddate = (current_time - cinfo.buildDate) / 86400;
     359    unsigned long builddate = (current_time - cinfo->buildDate) / 86400;
    333360    disp.setmacro ("builddate", "Global", builddate);
    334361  }
     
    340367    // _collectionoption_
    341368
    342     if (args["ccs"] == "1" && collectproto != NULL && (cinfo.ccsCols.size() > 1)) {
     369    if (args["ccs"] == "1" && collectproto != NULL && (cinfo->ccsCols.size() > 1)) {
    343370      text_t collectionoption = "_textcollectionoption_";
    344       text_tarray::const_iterator col_here = cinfo.ccsCols.begin();
    345       text_tarray::const_iterator col_end = cinfo.ccsCols.end();
     371      text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
     372      text_tarray::const_iterator col_end = cinfo->ccsCols.end();
    346373      int count = 0;
    347374      while (col_here != col_end) {
    348375    text_t colname;
    349376    if (*col_here == arg_c) {
    350         colname = cinfo.collectionmeta["collectionname"];
     377        colname = cinfo->collectionmeta["collectionname"];
    351378    } else {
    352       ColInfoResponse_t this_cinfo;
    353       collectproto->get_collectinfo (*col_here, this_cinfo, err, logout);
    354       if (err != noError) {col_here ++; continue;}
    355       colname = this_cinfo.collectionmeta["collectionname"];
     379      ColInfoResponse_t *this_cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout);
     380      if (this_cinfo == NULL) {col_here ++; continue;}
     381      colname = this_cinfo->collectionmeta["collectionname"];
    356382    }
    357383
     
    369395    // _htmloptions_
    370396   
    371     text_tmap::const_iterator it = cinfo.format.find ("DocumentUseHTML");
    372     if ((it != cinfo.format.end()) && ((*it).second == "true")) {
     397    text_tmap::const_iterator it = cinfo->format.find ("DocumentUseHTML");
     398    if ((it != cinfo->format.end()) && ((*it).second == "true")) {
    373399      disp.setmacro ("htmloptions", "preferences", "_htmloptionson_");
    374400
     
    376402      // _PreferenceDocsFromWeb_
    377403
    378       it = cinfo.format.find ("PreferenceDocsFromWeb");
    379       if ((it == cinfo.format.end()) || ((*it).second == "true"))
     404      it = cinfo->format.find ("PreferenceDocsFromWeb");
     405      if ((it == cinfo->format.end()) || ((*it).second == "true"))
    380406    disp.setmacro ("PreferenceDocsFromWeb", "preferences", "1");
    381407    }
     
    392418    text_tarray languages;
    393419    languages.push_back ("en");
     420    languages.push_back ("fr");
    394421    languages.push_back ("de");
     422    languages.push_back ("es");
    395423    languages.push_back ("mi");
    396424    languages.push_back ("zh");
     425    languages.push_back ("ar");
    397426    text_tarray::const_iterator this_lang = languages.begin();
    398427    text_tarray::const_iterator end_lang = languages.end();
    399428
    400429    text_t languageoption = "_textlanguage_\n<select name=\"l\" onChange=\"updatel();\">\n";
    401     it = cinfo.format.find ("PreferenceLanguages");
    402     if ((it != cinfo.format.end()) && (!(*it).second.empty())) {
     430    it = cinfo->format.find ("PreferenceLanguages");
     431    if ((it != cinfo->format.end()) && (!(*it).second.empty())) {
    403432      text_tset pref_langs;
    404433      splitchar ((*it).second.begin(), (*it).second.end(), '|', pref_langs);
     
    471500         
    472501      // if HTML collection there's no how to read document text
    473       text_tmap::const_iterator it = cinfo.format.find ("HelpNoDocs");
    474       if ((it != cinfo.format.end()) && ((*it).second == "true")) {
     502      text_tmap::const_iterator it = cinfo->format.find ("HelpNoDocs");
     503      if ((it != cinfo->format.end()) && ((*it).second == "true")) {
    475504    disp.setmacro ("topicreadingdocs", "help", "");
    476505    disp.setmacro ("texthelpreadingdocs", "help", "");
    477506      }
    478       it = cinfo.format.find ("HelpBibDocs");
    479       if ((it != cinfo.format.end()) && ((*it).second == "true")) {
     507      it = cinfo->format.find ("HelpBibDocs");
     508      if ((it != cinfo->format.end()) && ((*it).second == "true")) {
    480509    disp.setmacro ("texthelpreadingdocs", "help", "_bibtexthelpreadingdocs_");
    481510    disp.setmacro ("textreadingdocs", "help", "_bibtextreadingdocs_");
    482511      }
    483       it = cinfo.format.find ("HelpBookDocs");
    484       if ((it != cinfo.format.end()) && ((*it).second == "true")) {
     512      it = cinfo->format.find ("HelpBookDocs");
     513      if ((it != cinfo->format.end()) && ((*it).second == "true")) {
    485514    disp.setmacro ("texthelpreadingdocs", "help", "_booktexthelpreadingdocs_");
    486515    disp.setmacro ("textreadingdocs", "help", "_booktextreadingdocs_");
     
    491520
    492521      // _textsubcollections_
    493       if (args["ccs"] == "1" && (cinfo.ccsCols.size() > 1)) {
    494     text_t textsubcollections = "_textsubcols1_(" + text_t(cinfo.ccsCols.size()) + ")";
    495     text_tarray::const_iterator here = cinfo.ccsCols.begin();
    496     text_tarray::const_iterator end = cinfo.ccsCols.end();
     522      if (args["ccs"] == "1" && (cinfo->ccsCols.size() > 1)) {
     523    text_t textsubcollections = "_textsubcols1_(" + text_t(cinfo->ccsCols.size()) + ")";
     524    text_tarray::const_iterator here = cinfo->ccsCols.begin();
     525    text_tarray::const_iterator end = cinfo->ccsCols.end();
    497526    bool first = true;
    498527    int count = 0;
     
    500529      if (*here == arg_c) {
    501530        if (!first) textsubcollections += "<br>";
    502         textsubcollections += "\n" + cinfo.collectionmeta["collectionname"] + "\n";
     531        textsubcollections += "\n" + cinfo->collectionmeta["collectionname"] + "\n";
    503532      } else {
    504         ColInfoResponse_t this_cinfo;
    505         collectproto->get_collectinfo (*here, this_cinfo, err, logout);
    506         if (err != noError) {here ++; continue;}
     533        ColInfoResponse_t *this_cinfo = recpt->get_collectinfo_ptr (collectproto, *here, logout);
     534        if (this_cinfo == NULL) {here ++; continue;}
    507535        if (!first) textsubcollections += "<br>";
    508         textsubcollections += "\n" + this_cinfo.collectionmeta["collectionname"] + "\n";
     536        textsubcollections += "\n" + this_cinfo->collectionmeta["collectionname"] + "\n";
    509537      }
    510538      count ++;
  • branches/New_Config_Format-branch/gsdl/src/recpt/pageaction.h

    r761 r1279  
    4747  virtual ~pageaction ();
    4848
     49  void set_receptionist (receptionist *therecpt) {recpt=therecpt;}
     50
    4951  text_t get_action_name () {return "p";}
    5052
  • branches/New_Config_Format-branch/gsdl/src/recpt/pagedbrowserclass.cpp

    r1048 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.9.4.1  2000/07/12 22:21:42  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.10  2000/06/29 02:47:20  sjboddie
     34   added browser info (i.e VList, HList etc.) to status pages
     35
    3036   Revision 1.9  2000/03/31 03:04:32  nzdl
    3137   tidied up some of the browsing code - replaced DocumentImages,
     
    7783void pagedbrowserclass::load_metadata_defaults (text_tset &metadata) {
    7884  metadata.insert ("Title");
    79 }
    80 
    81 text_t pagedbrowserclass::get_default_formatstring () {
    82   return "";
    8385}
    8486
  • branches/New_Config_Format-branch/gsdl/src/recpt/pagedbrowserclass.h

    r928 r1279  
    4545  void load_metadata_defaults (text_tset &metadata);
    4646
    47   text_t get_default_formatstring ();
    48 
    4947  virtual void processOID (cgiargsclass &args, recptproto *collectproto,
    5048               ostream &logout);
  • branches/New_Config_Format-branch/gsdl/src/recpt/queryaction.cpp

    r962 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.36.4.1  2000/07/12 22:21:43  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.37  2000/07/05 21:49:34  sjboddie
     34   Receptionist now caches collection information to avoid making multiple
     35   get_collectinfo calls to collection server
     36
    3037   Revision 1.36  2000/02/21 21:57:48  sjboddie
    3138   actions are now configured with gsdlhome
     
    177184queryaction::queryaction () {
    178185
     186  recpt = NULL;
    179187  num_phrases = 0;
    180188
     
    695703                  ostream &textout, ostream &logout) {
    696704
    697   ColInfoResponse_t cinfo;
     705  ColInfoResponse_t *cinfo = NULL;
    698706  comerror_t err;
    699707  InfoFilterOptionsResponse_t fresponse;
     
    737745    while (collist_here != collist_end) {
    738746     
    739       (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout);
     747      cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
    740748      //      if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) {
    741       if (err == noError && (cinfo.buildDate > 0)) {
     749      if (cinfo != NULL && (cinfo->buildDate > 0)) {
    742750       
    743751        (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout);
     
    789797              << " name=cc value=\"" << *collist_here << "\">";
    790798         
    791           if (!cinfo.collectionmeta["collectionname"].empty())
    792         textout << outconvert << disp << cinfo.collectionmeta["collectionname"];
     799          if (!cinfo->collectionmeta["collectionname"].empty())
     800        textout << outconvert << disp << cinfo->collectionmeta["collectionname"];
    793801          else
    794802        textout << outconvert << *collist_here;
     
    816824                 ostream &logout) {
    817825 
     826  if (recpt == NULL) {
     827    logout << "ERROR (queryaction::do_action): This action does not contain information\n"
     828       << "      about any receptionists. The method set_receptionist was probably\n"
     829       << "      not called from the module which instantiated this action.\n";
     830    return true;
     831  }
     832
     833
    818834  if (args["ccs"] == "1") {
    819835    if (!args["cc"].empty()) {
     
    866882  map<text_t, colinfo_t, lttext_t> colinfomap;
    867883
    868   ColInfoResponse_t cinfo;
     884  ColInfoResponse_t *cinfo = NULL;
    869885  comerror_t err;
    870886  FilterRequest_t request;
     
    906922      continue;
    907923    }
    908     collectproto->get_collectinfo (*col_here, cinfo, err, logout);
     924    cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout);
     925    if (cinfo == NULL) {
     926      logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL\n";
     927      col_here ++;
     928      continue;
     929    }
    909930   
    910931    browserclass *bptr = browsers->getbrowser (browsertype);
     
    913934    text_t formatstring;
    914935    if (!get_formatstring (classification, browsertype,
    915                cinfo.format, formatstring))
     936               cinfo->format, formatstring))
    916937      formatstring = bptr->get_default_formatstring();
    917938
     
    10691090  text_t classification = "Search";
    10701091
    1071   ColInfoResponse_t cinfo;
    10721092  comerror_t err;
    1073   collectproto->get_collectinfo (collection, cinfo, err, logout);
     1093  ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout);
     1094
     1095  if (cinfo == NULL) {
     1096    logout << "ERROR (query_action::search_single_collection): get_collectinfo_ptr returned NULL\n";
     1097    return false;
     1098  }
    10741099   
    10751100  browserclass *bptr = browsers->getbrowser (browsertype);
     
    10781103  text_t formatstring;
    10791104  if (!get_formatstring (classification, browsertype,
    1080              cinfo.format, formatstring))
     1105             cinfo->format, formatstring))
    10811106    formatstring = bptr->get_default_formatstring();
    10821107
  • branches/New_Config_Format-branch/gsdl/src/recpt/queryaction.h

    r928 r1279  
    3232#include "gsdlconf.h"
    3333#include "action.h"
     34#include "receptionist.h"
    3435
    3536struct colinfo_t {
     
    6162
    6263protected:
     64
     65  receptionist *recpt;
     66
    6367  text_t formatstring;
    6468  int num_phrases;
     
    97101  queryaction ();
    98102  virtual ~queryaction () {}
     103
     104  void set_receptionist (receptionist *therecpt) {recpt=therecpt;}
    99105 
    100106  void configure (const text_t &key, const text_tarray &cfgline);
  • branches/New_Config_Format-branch/gsdl/src/recpt/receptionist.cpp

    r950 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.47.4.1  2000/07/12 22:21:44  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.53  2000/07/05 21:49:34  sjboddie
     34   Receptionist now caches collection information to avoid making multiple
     35   get_collectinfo calls to collection server
     36
     37   Revision 1.52  2000/07/04 02:15:22  sjboddie
     38   fixed bug causing segmentation fault when an invalid collection
     39   was supplied as the "c" cgi argument
     40
     41   Revision 1.51  2000/05/28 09:15:34  sjboddie
     42   a few small changes to get an initial release of the local library
     43
     44   Revision 1.50  2000/05/12 03:09:25  sjboddie
     45   minor modifications to get web library compiling under VC++ 6.0
     46
     47   Revision 1.49  2000/05/04 05:18:46  sjboddie
     48   attempting to get end-user collection building to work under windows
     49
     50   Revision 1.48  2000/04/14 02:52:06  sjboddie
     51   tidied up error messaging and set up some debugging info to be output
     52   when running library from command line
     53
    3054   Revision 1.47  2000/02/17 22:26:17  sjboddie
    3155   set macros for displaying macrons in utf8
     
    227251#include "cgiutils.h"
    228252#include "htmlutils.h"
     253#include "gsdltools.h"
    229254#include "OIDtools.h"
    230255#include <assert.h>
    231256#include <time.h>
    232257#include <stdio.h>
     258#if defined (GSDL_USE_IOS_H)
    233259#include <fstream.h>
     260#else
     261#include <fstream>
     262#endif
    234263
    235264#if defined (__WIN32_)
     
    244273  collectdir.clear();
    245274  httpprefix.clear();
    246   httpimg.clear();
     275  httpimg = "/images";
    247276  gwcgi.clear();
    248277  macrofiles.erase(macrofiles.begin(), macrofiles.end());
     
    264293}
    265294
    266 
     295void collectioninfo_t::clear () {
     296  gsdl_gsdlhome.clear();
     297  gsdl_gdbmhome.clear();
     298
     299  info_loaded = false;
     300  info.clear();
     301}
    267302
    268303receptionist::receptionist () {
     
    781816  utf8outconvertclass text_t2utf8;
    782817  char *lfile = filename.getcstr();
     818
    783819  ofstream log (lfile, ios::app);
    784820 
     
    904940bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
    905941                    ostream &logout) {
    906  
     942
    907943  // decide on the output conversion class
    908944  text_t &arg_w = args["w"];
     
    10051041  text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
    10061042
    1007   text_tarray maindirs;
     1043  text_tset maindirs;
    10081044  text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
    1009   maindirs.push_back (gsdlmacrodir);
     1045  maindirs.insert (gsdlmacrodir);
    10101046  colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
    10111047  colinfo_tmap::iterator colend = configinfo.collectinfo.end();
    10121048  while (colhere != colend) {
    1013     gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
    1014     maindirs.push_back (gsdlmacrodir);
     1049    if (!((*colhere).second.gsdl_gsdlhome).empty()) {
     1050      gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
     1051      maindirs.insert (gsdlmacrodir);
     1052    }
    10151053    colhere ++;
    10161054  }
     
    10371075    // we'll load all copies
    10381076    if (!foundfile) {
    1039       text_tarray::const_iterator dirhere = maindirs.begin();
    1040       text_tarray::const_iterator dirend = maindirs.end();
     1077      text_tset::const_iterator dirhere = maindirs.begin();
     1078      text_tset::const_iterator dirend = maindirs.end();
    10411079      while (dirhere != dirend) {
    10421080    filename = filename_cat (*dirhere, *arrhere);
     
    10951133
    10961134    text_t &arg_c = args["c"];
    1097     ColInfoResponse_t cinfo;
    1098     comerror_t err;
    10991135    recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
    1100     collectproto->get_collectinfo (arg_c, cinfo, err, logout);
    1101 
    1102     if (!cinfo.ccsCols.empty()) {
    1103       args["ccs"] = 1;
    1104       if (args["cc"].empty()) {
    1105     text_tarray::const_iterator col_here = cinfo.ccsCols.begin();
    1106     text_tarray::const_iterator col_end = cinfo.ccsCols.end();
    1107     bool first = true;
    1108     while (col_here != col_end) {
    1109       // make sure it's a valid collection
    1110       if (protocols.getrecptproto (*col_here, logout) != NULL) {
    1111         if (!first) args["cc"].push_back (',');
    1112         args["cc"] += *col_here;
    1113         first = false;
     1136    if (collectproto == NULL) {
     1137      // oops, this collection isn't valid
     1138      outconvertclass text_t2ascii;
     1139      logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
     1140      args["c"].clear();
     1141
     1142    } else {
     1143
     1144      ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
     1145
     1146      if (cinfo != NULL) {
     1147    if (!cinfo->ccsCols.empty()) {
     1148      args["ccs"] = 1;
     1149      if (args["cc"].empty()) {
     1150        text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
     1151        text_tarray::const_iterator col_end = cinfo->ccsCols.end();
     1152        bool first = true;
     1153        while (col_here != col_end) {
     1154          // make sure it's a valid collection
     1155          if (protocols.getrecptproto (*col_here, logout) != NULL) {
     1156        if (!first) args["cc"].push_back (',');
     1157        args["cc"] += *col_here;
     1158        first = false;
     1159          }
     1160          col_here ++;
     1161        }
    11141162      }
    1115       col_here ++;
    11161163    }
     1164      } else {
     1165    logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
    11171166      }
    11181167    }
     
    12261275  text_t &collection = args["c"];
    12271276
    1228   disp.setmacro ("gsdlhome", "Global", configinfo.gsdlhome);
     1277  disp.setmacro ("gsdlhome", "Global", dm_safe(configinfo.gsdlhome));
    12291278  disp.setmacro ("gwcgi", "Global", configinfo.gwcgi);
    12301279  disp.setmacro ("httpimg", "Global", configinfo.httpimg);
     
    12991348  }
    13001349}
     1350
     1351// gets collection info from cache if found or
     1352// calls collection server (and updates cache)
     1353// returns NULL if there's an error
     1354ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
     1355                              const text_t &collection,
     1356                              ostream &logout) {
     1357 
     1358  // check the cache
     1359  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
     1360  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
     1361    // found it
     1362    return &((*it).second.info);
     1363  }
     1364
     1365  // not cached, get info from collection server
     1366  if (collectproto == NULL) {
     1367    logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
     1368    return NULL;
     1369  }
     1370   
     1371  comerror_t err;
     1372  if (it == configinfo.collectinfo.end()) {
     1373    collectioninfo_t cinfo;
     1374    collectproto->get_collectinfo (collection, cinfo.info, err, logout);
     1375    if (err != noError) {
     1376      outconvertclass text_t2ascii;
     1377      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
     1378         << get_comerror_string (err) << "\"while getting collectinfo\n";
     1379      return NULL;
     1380    }
     1381    cinfo.info_loaded = true;
     1382    configinfo.collectinfo[collection] = cinfo;
     1383    return &(configinfo.collectinfo[collection].info);
     1384  } else {
     1385    collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
     1386    if (err != noError) {
     1387      outconvertclass text_t2ascii;
     1388      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
     1389         << get_comerror_string (err) << "\"while getting collectinfo\n";
     1390      return NULL;
     1391    }
     1392    (*it).second.info_loaded = true;
     1393    return &((*it).second.info);
     1394  }
     1395}
  • branches/New_Config_Format-branch/gsdl/src/recpt/receptionist.h

    r864 r1279  
    3434#include "cgiargs.h"
    3535#include "display.h"
    36 #include "action.h"
    3736#include "browserclass.h"
    3837#include "recptproto.h"
    3938#include "converter.h"
    4039#include "cfgread.h"
     40#include "action.h"
    4141
    4242// the MACROPRECEDENCE macro is used as a default. override
     
    5151
    5252
     53struct collectioninfo_t {
     54  void clear ();
     55  collectioninfo_t () {clear();}
     56
     57  text_t gsdl_gsdlhome;
     58  text_t gsdl_gdbmhome;
     59
     60  bool info_loaded;
     61  ColInfoResponse_t info;
     62};
     63
     64typedef map<text_t, collectioninfo_t, lttext_t> colinfo_tmap;
     65
     66
    5367struct recptconf {
    5468  text_t gsdlhome;
     
    5872  colinfo_tmap collectinfo;
    5973  text_t httpprefix;
    60   text_t httpimg;
     74  text_t httpimg;    // will equal /images if not set
    6175  text_t gwcgi;
    6276  text_tset macrofiles;
     
    7185  recptconf () {clear();}
    7286};
    73 
    7487
    7588class receptionist {
     
    196209  // list. This can be used to save preferences between sessions.
    197210  text_t get_compressed_arg (cgiargsclass &args, ostream &logout);
     211
     212  // gets collection info from cache if found or
     213  // calls collection server (and updates cache)
     214  // returns NULL if there's an error
     215  ColInfoResponse_t *get_collectinfo_ptr (recptproto *collectproto,
     216                      const text_t &collection,
     217                      ostream &logout);
     218
    198219 
    199220protected:
  • branches/New_Config_Format-branch/gsdl/src/recpt/recptconfig.cpp

    r963 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.6.4.1  2000/07/12 22:21:45  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.9  2000/05/12 03:09:24  sjboddie
     34   minor modifications to get web library compiling under VC++ 6.0
     35
     36   Revision 1.8  2000/04/14 03:10:35  sjboddie
     37   tidied up a few issues concerning the new debug info which showed
     38   up on windows
     39
     40   Revision 1.7  2000/04/14 02:52:06  sjboddie
     41   tidied up error messaging and set up some debugging info to be output
     42   when running library from command line
     43
    3044   Revision 1.6  2000/02/21 21:55:33  sjboddie
    3145   gsdlhome now comes from gsdlsite.cfg
     
    7993  text_tarray cfgline;
    8094  text_t key;
    81   ifstream confin ("gsdlsite.cfg");
     95
     96#ifdef GSDL_USE_IOS_H
     97  ifstream confin ("gsdlsite.cfg", ios::in | ios::nocreate);
     98#else
     99  ifstream confin ("gsdlsite.cfg", ios::in);
     100#endif
    82101
    83102  if (confin) {
     
    105124}
    106125
    107 // this version just grabs gsdlhome, returns false if it can't find it
     126// this version just grabs gsdlhome, returning true
     127// unless unable to read gsdlsite.cfg
    108128bool site_cfg_read (text_t &gsdlhome) {
    109129
     
    113133  text_tarray cfgline;
    114134  text_t key;
    115   ifstream confin ("gsdlsite.cfg");
     135
     136#ifdef GSDL_USE_IOS_H
     137  ifstream confin ("gsdlsite.cfg", ios::in | ios::nocreate);
     138#else
     139  ifstream confin ("gsdlsite.cfg", ios::in);
     140#endif
    116141
    117142  if (confin) {
     
    120145    if (cfgline[0] == "gsdlhome") {
    121146      gsdlhome = cfgline[1];
    122       return true;
    123     }
    124       }
    125     }
     147      break;
     148    }
     149      }
     150    }
     151    return true;
    126152    confin.close ();
    127153  }
     
    143169  if (file_exists (filename)) {
    144170    char *cstr = filename.getcstr();
    145     ifstream confin (cstr);
     171   
     172#ifdef GSDL_USE_IOS_H
     173    ifstream confin (cstr, ios::in | ios::nocreate);
     174#else
     175    ifstream confin (cstr, ios::in);
     176#endif
     177
    146178    delete cstr;
    147179 
     
    176208    if (!filename.empty()) {
    177209      char *cstr = filename.getcstr();
    178       ifstream confin (cstr);
     210
     211#ifdef GSDL_USE_IOS_H
     212      ifstream confin (cstr, ios::in | ios::nocreate);
     213#else
     214      ifstream confin (cstr, ios::in);
     215#endif
     216     
    179217      delete cstr;
    180218     
  • branches/New_Config_Format-branch/gsdl/src/recpt/statusaction.cpp

    r995 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.25.4.1  2000/07/12 22:21:46  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.31  2000/07/05 21:49:36  sjboddie
     34   Receptionist now caches collection information to avoid making multiple
     35   get_collectinfo calls to collection server
     36
     37   Revision 1.30  2000/07/03 22:26:27  nzdl
     38   fixed a few errors in the macro files (and one in some text printed out
     39   by the statusaction)
     40
     41   Revision 1.29  2000/06/29 02:47:21  sjboddie
     42   added browser info (i.e VList, HList etc.) to status pages
     43
     44   Revision 1.28  2000/06/29 00:22:59  sjboddie
     45   added new numsections field to collection info and made the statusaction
     46   recognize it
     47
     48   Revision 1.27  2000/05/12 03:09:24  sjboddie
     49   minor modifications to get web library compiling under VC++ 6.0
     50
     51   Revision 1.26  2000/04/19 22:30:23  sjboddie
     52   tidied up status pages and end-user collection building
     53
    3054   Revision 1.25  2000/02/29 21:00:31  sjboddie
    3155   fixed some compiler warnings
     
    116140
    117141 */
    118 
    119142
    120143#include "statusaction.h"
     
    138161}
    139162
    140 void statusaction::output_welcome (cgiargsclass &/*args*/, displayclass &disp,
    141                   outconvertclass &outconvert,
    142                   ostream &textout, ostream &/*logout*/) {
    143   textout << outconvert << disp << "_status:infoheader_(_titlewelcome_)\n"
    144     "_status:welcome_\n"
    145     "_status:infofooter_\n";
     163void statusaction::output_welcome (cgiargsclass &/*args*/, recptprotolistclass *protos,
     164                   displayclass &disp, outconvertclass &outconvert,
     165                   ostream &textout, ostream &logout) {
     166
     167  if (recpt == NULL) return;
     168
     169  textout << outconvert << disp
     170      << "_status:infoheader_(_titlewelcome_)\n"
     171      << "_status:welcome_"
     172      << "<center><table width=_pagewidth_>\n"
     173      << "<th align=left>abbrev.</th><th align=left>collection</th>"
     174      << "<th align=left>public?</th><th align=left>running?</th></tr>\n";
     175
     176  recptprotolistclass::iterator rprotolist_here = protos->begin();
     177  recptprotolistclass::iterator rprotolist_end = protos->end();
     178  while (rprotolist_here != rprotolist_end) {
     179    if ((*rprotolist_here).p != NULL) {
     180      text_t protoname = (*rprotolist_here).p->get_protocol_name();
     181      text_tarray collist;
     182      comerror_t err;
     183      (*rprotolist_here).p->get_collection_list (collist, err, logout);
     184      if (err == noError) {
     185    text_tarray::iterator collist_here = collist.begin();
     186    text_tarray::iterator collist_end = collist.end();
     187
     188    while (collist_here != collist_end) {
     189
     190        textout << outconvert << disp
     191            << "<tr><td><a href=\"_gwcgi_?e=_compressedoptions_&a=status&sp=collectioninfo&pr="
     192            << protoname
     193            << "&c="
     194            << *collist_here
     195            << "\">"
     196            << *collist_here
     197            << "</a></td>";
     198
     199      ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
     200      if (cinfo != NULL) {
     201        text_t collname = *collist_here;
     202        text_tmap::iterator it = cinfo->collectionmeta.find("collectionname");
     203        if (it != cinfo->collectionmeta.end()) collname = (*it).second;
     204
     205        textout << "<td>";
     206        if (cinfo->buildDate > 0)
     207          textout << outconvert << disp
     208              << "<a href=\"_httppagex_(about)&c=" << *collist_here
     209              << "\" target=\\_top>";
     210
     211        textout << outconvert << disp << collname;
     212
     213        if (cinfo->buildDate > 0) textout << "</a>";
     214
     215        textout << "</td>";
     216
     217        if (cinfo->isPublic) textout << "<td>yes</td>";
     218        else textout << "<td>no</td>";
     219       
     220        if (cinfo->buildDate > 0)
     221          textout << outconvert << "<td>yes</td>";
     222        else
     223          textout << "<td>no</td>";
     224       
     225      } else {
     226        textout << "<td></td><td></td><td></td>";
     227      }
     228
     229      textout << "</tr>\n";
     230      collist_here ++;
     231    }
     232      }
     233    }
     234    rprotolist_here ++;
     235  }
     236
     237  textout << "</table></center>\n";
     238  textout << outconvert << disp << "_status:infofooter_\n";
    146239}
    147240
     
    256349  }
    257350 
     351  // browsers
     352  browsermapclass *browsers = recpt->get_browsermap_ptr();
     353  if (browsers != NULL) {
     354    textout << outconvert << "<tr valign=top><th>browsers</th><td>";
     355   
     356    browserptrmap::iterator browsershere = browsers->begin ();
     357    browserptrmap::iterator browsersend = browsers->end ();
     358    bool browsersfirst = true;
     359    while (browsershere != browsersend) {
     360      if (!browsersfirst) textout << outconvert << ", ";
     361      browsersfirst = false;
     362      assert ((*browsershere).second.b != NULL);
     363      if ((*browsershere).second.b != NULL) {
     364    textout << outconvert << "\"" << (*browsershere).second.b->get_browser_name() << "\"";
     365      }
     366      browsershere++;
     367    }
     368   
     369    textout << outconvert << "</td></tr>\n";
     370  }
     371
    258372  // protocols
    259373  recptprotolistclass *protocols = recpt->get_recptprotolist_ptr ();
     
    399513}
    400514
     515void statusaction::output_browserinfo (cgiargsclass &/*args*/, displayclass &disp,
     516                       outconvertclass &outconvert,
     517                       ostream &textout, ostream &/*logout*/) {
     518  if (recpt == NULL) return;
     519  browsermapclass *browsers = recpt->get_browsermap_ptr();
     520
     521  textout << outconvert << disp << "_status:infoheader_(Browser Information)\n";
     522  textout << outconvert
     523      << "<h2>Browser information</h2>\n"
     524      << "<table>";
     525
     526  // browser information
     527  if (browsers != NULL) {
     528    textout << outconvert
     529        << "<tr><th>browser name</th><th>default formatstring</th></tr>\n";
     530   
     531    browserptrmap::iterator browsershere = browsers->begin ();
     532    browserptrmap::iterator browsersend = browsers->end ();
     533    while (browsershere != browsersend) {
     534      assert ((*browsershere).second.b != NULL);
     535      if ((*browsershere).second.b != NULL) {
     536    textout << outconvert
     537        << "<tr><td>" << (*browsershere).second.b->get_browser_name()
     538        << "</td><td>" << html_safe ((*browsershere).second.b->get_default_formatstring())
     539        << "</td></tr>\n";
     540      }
     541      browsershere++;
     542    }
     543  }
     544 
     545  textout << outconvert << disp << "</table>\n_status:infofooter_\n";
     546}
     547
    401548void statusaction::output_protocolinfo (cgiargsclass &/*args*/, displayclass &disp,
    402549                    outconvertclass &outconvert,
     
    512659  } else {
    513660    // rproto can't be NULL to get here
    514     ColInfoResponse_t collectinfo;
    515     comerror_t err;
    516 
    517     rproto->get_collectinfo (arg_c, collectinfo, err, logout);
    518     if (err == noError) {
     661    ColInfoResponse_t *collectinfo = recpt->get_collectinfo_ptr (rproto, arg_c, logout);
     662    if (collectinfo != NULL) {
    519663      textout << outconvert << "<table>\n"
    520664          << "<tr><th>collection name</th><td>\""
    521           << collectinfo.shortInfo.name
     665          << collectinfo->shortInfo.name
    522666          << "\"</td></tr>\n"
    523667
    524668          << "<tr><th>host</th><td>\""
    525           << collectinfo.shortInfo.host
     669          << collectinfo->shortInfo.host
    526670          << "\"</td></tr>\n"
    527671
    528672          << "<tr><th>port</th><td>\""
    529           << collectinfo.shortInfo.port
     673          << collectinfo->shortInfo.port
    530674          << "\"</td></tr>\n"
    531675
    532676          << "<tr><th>is public?</th><td>";
    533       if (collectinfo.isPublic) textout << outconvert << "true";
     677      if (collectinfo->isPublic) textout << outconvert << "true";
    534678      else textout << outconvert << "false";
    535679      textout << outconvert
     
    537681
    538682          << "<tr><th>is beta?</th><td>";
    539       if (collectinfo.isBeta) textout << outconvert << "true";
     683      if (collectinfo->isBeta) textout << outconvert << "true";
    540684      else textout << outconvert << "false";
    541685      textout << outconvert
     
    543687
    544688          << "<tr><th>build date</th><td>\""
    545           << collectinfo.buildDate
     689          << collectinfo->buildDate
    546690          << "\"</td></tr>\n"
    547691
    548692          << "<tr><th>interface languages</th><td>";
    549       text_tarray::iterator languages_here = collectinfo.languages.begin();
    550       text_tarray::iterator languages_end = collectinfo.languages.end();
     693      text_tarray::iterator languages_here = collectinfo->languages.begin();
     694      text_tarray::iterator languages_end = collectinfo->languages.end();
    551695      bool languages_first = true;
    552696      while (languages_here != languages_end) {
     
    558702
    559703      textout << "<tr><th valign=top>collection metadata</th><td><table>\n";
    560       text_tmap::iterator meta_here = collectinfo.collectionmeta.begin();
    561       text_tmap::iterator meta_end = collectinfo.collectionmeta.end();
     704      text_tmap::iterator meta_here = collectinfo->collectionmeta.begin();
     705      text_tmap::iterator meta_end = collectinfo->collectionmeta.end();
    562706      while (meta_here != meta_end) {
    563707    textout << outconvert << "<tr><td>" << (*meta_here).first
     
    568712
    569713      textout << "<tr><th valign=top>format info</th><td><table>\n";
    570       text_tmap::iterator format_here = collectinfo.format.begin();
    571       text_tmap::iterator format_end = collectinfo.format.end();
     714      text_tmap::iterator format_here = collectinfo->format.begin();
     715      text_tmap::iterator format_end = collectinfo->format.end();
    572716      while (format_here != format_end) {
    573717    textout << outconvert << "<tr><td>" << (*format_here).first
     
    578722
    579723      textout << "<tr><th valign=top>building info</th><td><table>\n";
    580       text_tmap::iterator building_here = collectinfo.building.begin();
    581       text_tmap::iterator building_end = collectinfo.building.end();
     724      text_tmap::iterator building_here = collectinfo->building.begin();
     725      text_tmap::iterator building_end = collectinfo->building.end();
    582726      while (building_here != building_end) {
    583727    textout << outconvert << "<tr><td>" << (*building_here).first
     
    591735
    592736          << "<tr><th>number of documents</th><td>\""
    593           << collectinfo.numDocs
     737          << collectinfo->numDocs
    594738          << "\"</td></tr>\n"
    595739
     740          << "<tr><th>number of sections</th><td>\""
     741          << collectinfo->numSections
     742          << "\"</td></tr>\n"
     743
    596744          << "<tr><th>number of words</th><td>\""
    597           << collectinfo.numWords
     745          << collectinfo->numWords
    598746          << "\"</td></tr>\n"
    599747
    600748          << "<tr><th>number of bytes</th><td>\""
    601           << collectinfo.numBytes
     749          << collectinfo->numBytes
    602750          << "\"</td></tr>\n"
    603751
    604752          << "<tr><th>preferred receptionist</th><td>\""
    605           << collectinfo.receptionist
     753          << collectinfo->receptionist
    606754          << "\"</td></tr>\n"
    607755
     
    609757     
    610758    } else {
    611       textout << outconvert << "Error (" << get_comerror_string (err)
    612           << ") while getting collect information\n";
     759      textout << "ERROR (statusaction::output_collectioninfo): while getting collect information\n";
    613760    }
    614761
     
    617764    InfoFilterOptionsRequest_t filteroptions_request;
    618765    InfoFilterOptionsResponse_t filteroptions;
     766    comerror_t err;
    619767    rproto->get_filterinfo (arg_c, filterinfo, err, logout);
    620768    if (err == noError) {
     
    722870  textout << outconvert << "<h2>Init log</h2>\n";
    723871
     872#ifdef GSDL_USE_IOS_H
    724873  ifstream initin (cinitfilename, ios::in | ios::nocreate);
     874#else
     875  ifstream initin (cinitfilename, ios::in);
     876#endif
     877
    725878  delete cinitfilename;
    726879  if (initin) {
     
    762915  logout << flush;
    763916
     917#ifdef GSDL_USE_IOS_H
    764918  ifstream errin (cerrfilename, ios::in | ios::nocreate);
     919#else
     920  ifstream errin (cerrfilename, ios::in);
     921#endif
     922
    765923  delete cerrfilename;
    766924  if (errin) {
     
    8561014}
    8571015
    858 bool statusaction::do_action (cgiargsclass &args, recptprotolistclass * /*protos*/,
     1016bool statusaction::do_action (cgiargsclass &args, recptprotolistclass *protos,
    8591017                  browsermapclass * /*browsers*/, displayclass &disp,
    8601018                  outconvertclass &outconvert, ostream &textout,
     
    8981056  if (arg_sp == "frameset") output_frameset (args, disp, outconvert, textout, logout);
    8991057  else if (arg_sp == "select") output_select (args, disp, outconvert, textout, logout);
    900   else if (arg_sp == "welcome") output_welcome (args, disp, outconvert, textout, logout);
     1058  else if (arg_sp == "welcome") output_welcome (args, protos, disp, outconvert, textout, logout);
    9011059  else if (arg_sp == "generalinfo") output_generalinfo (args, disp, outconvert, textout, logout);
    9021060  else if (arg_sp == "argumentinfo") output_argumentinfo (args, disp, outconvert, textout, logout);
    9031061  else if (arg_sp == "actioninfo") output_actioninfo (args, disp, outconvert, textout, logout);
     1062  else if (arg_sp == "browserinfo") output_browserinfo (args, disp, outconvert, textout, logout);
    9041063  else if (arg_sp == "protocolinfo") output_protocolinfo (args, disp, outconvert, textout, logout);
    9051064  else if (arg_sp == "collectioninfo") output_collectioninfo (args, disp, outconvert, textout, logout);
  • branches/New_Config_Format-branch/gsdl/src/recpt/statusaction.h

    r760 r1279  
    4444            outconvertclass &outconvert,
    4545            ostream &textout, ostream &logout);
     46
    4647  void output_select (cgiargsclass &args, displayclass &disp,
    4748              outconvertclass &outconvert,
    4849              ostream &textout, ostream &logout);
    4950
    50   void output_welcome (cgiargsclass &args, displayclass &disp,
    51                outconvertclass &outconvert,
    52                ostream &textout, ostream &logout);
     51  void output_welcome (cgiargsclass &args, recptprotolistclass *protos,
     52               displayclass &disp, outconvertclass &outconvert,
     53               ostream &textout, ostream &logout);
     54
    5355  void output_generalinfo (cgiargsclass &args, displayclass &disp,
    5456               outconvertclass &outconvert,
    5557               ostream &textout, ostream &logout);
     58
    5659  void output_argumentinfo (cgiargsclass &args, displayclass &disp,
    5760                outconvertclass &outconvert,
    5861                ostream &textout, ostream &logout);
     62
    5963  void output_actioninfo (cgiargsclass &args, displayclass &disp,
    6064              outconvertclass &outconvert,
    6165              ostream &textout, ostream &logout);
     66
     67  void output_browserinfo (cgiargsclass &args, displayclass &disp,
     68               outconvertclass &outconvert,
     69               ostream &textout, ostream &logout);
     70
    6271  void output_protocolinfo (cgiargsclass &args, displayclass &disp,
    6372                outconvertclass &outconvert,
    6473                ostream &textout, ostream &logout);
     74
    6575  void output_collectioninfo (cgiargsclass &args, displayclass &disp,
    6676                  outconvertclass &outconvert,
    6777                  ostream &textout, ostream &logout);
     78
    6879  void output_initlog (cgiargsclass &args, displayclass &disp,
    6980               outconvertclass &outconvert,
    7081               ostream &textout, ostream &logout);
     82
    7183  void output_errorlog (cgiargsclass &args, displayclass &disp,
    7284            outconvertclass &outconvert,
  • branches/New_Config_Format-branch/gsdl/src/recpt/userdb.cpp

    r1000 r1279  
    2828/*
    2929   $Log$
     30   Revision 1.6.4.1  2000/07/12 22:21:47  sjboddie
     31   merged changes to trunk into New_Config_Format branch
     32
     33   Revision 1.7  2000/05/22 12:30:36  sjboddie
     34   the initial admin user now belongs to the colbuilder group by default
     35   (as well as the administrator group)
     36
    3037   Revision 1.6  2000/03/01 22:23:09  sjboddie
    3138   tidied up windows installation
     
    187194      userinfo.password = crypt_text("admin");
    188195      userinfo.enabled = true;
    189       userinfo.groups = "administrator";
     196      userinfo.groups = "administrator,colbuilder";
    190197      userinfo.comment = "change the password for this account as soon as possible";
    191198      return set_user_info (userdbfile, username, userinfo);
  • branches/New_Config_Format-branch/gsdl/src/recpt/win32.mak

    r1000 r1279  
    2525###########################################################################
    2626
    27 GSDLHOME = d:\home\dl\gsdl
    28 STLPATH = d:\home\dl\stl\stlport
     27GSDLHOME = c:\gsdl
     28STLPATH = c:\stlport
    2929
    3030AR = lib
  • branches/New_Config_Format-branch/gsdl/src/w32server/cgiwrapper.cpp

    r1011 r1279  
     1#include "text_t.h"
     2
    13#include <windows.h>
    24#include <string.h>
     
    3234
    3335// actions
    34 #include "action.h"
    3536#include "statusaction.h"
    3637#include "pageaction.h"
     
    3839#include "queryaction.h"
    3940#include "documentaction.h"
     41#include "tipaction.h"
    4042#include "authenaction.h"
    4143#include "usersaction.h"
    4244#include "extlinkaction.h"
    43 //#include "buildaction.h"
     45#include "buildaction.h"
    4446#include "delhistoryaction.h"
    4547
    4648// browsers
    47 #include "browserclass.h"
    4849#include "vlistbrowserclass.h"
    4950#include "hlistbrowserclass.h"
     
    7879  RequestInfoT *RInfo;
    7980  ostream *casostr;
     81#if !defined (GSDL_USE_IOS_H)
     82  char buffer[256];
     83#endif
    8084};
    8185
    8286textstreambuf::textstreambuf() {
    8387  tsbreset();
     88#if !defined (GSDL_USE_IOS_H)
     89  setp (&buffer[0], &buffer[255]);
     90#else
    8491  if (base() == ebuf()) allocate();
    8592  setp (base(), ebuf());
     93#endif
    8694};
    8795
    8896int textstreambuf::sync () {
    8997  if ((RInfo != NULL) &&
    90       (Send_String_N(pbase(), out_waiting(), RInfo) < 0)) {
     98      (Send_String_N(pbase(), pptr()-pbase(), RInfo) < 0)) {
    9199    RInfo = NULL;
    92100  }
     
    94102  if (casostr != NULL) {
    95103    char *thepbase=pbase();
    96         for (int i=0;i<out_waiting();i++) (*casostr).put(thepbase[i]);
     104    for (int i=0;i<(pptr()-pbase());i++) (*casostr).put(thepbase[i]);
    97105  }
    98106 
     
    118126  int overflow (int ch);
    119127  int underflow () {return EOF;}
     128
     129#if !defined (GSDL_USE_IOS_H)
     130private:
     131  char buffer[256];
     132#endif
    120133};
    121134
    122135logstreambuf::logstreambuf () {
     136#if !defined (GSDL_USE_IOS_H)
     137  setp (&buffer[0], &buffer[255]);
     138#else
    123139  if (base() == ebuf()) allocate();
    124140  setp (base(), ebuf());
     141#endif
    125142}
    126143
     
    128145  if (gsdl_keep_log || gsdl_show_console) {
    129146    log_message ("LOCAL LIB MESSAGE: ");
    130         log_message_N (pbase(), out_waiting());
     147        log_message_N (pbase(), pptr()-pbase());
    131148  }
    132149
     
    154171DWORD lastlibaccesstime;
    155172DWORD baseavailvirtual;
    156 
    157 static void page_errorsitecfg (const text_t &gsdlhome, const text_t &collection) {
    158    
    159   text_t message = "Error\n\n"
    160     "The site.cfg configuration file could not be found. This file\n"
    161     "should contain configuration information relating to this sites\n"
    162     "setup.\n";
    163  
    164   if (collection.empty()) {
    165     message += "As this program is not being run in collection specific mode,\n"
    166       "the file should reside at " + gsdlhome + "\\etc\\site.cfg.\n";
    167   } else {
    168     message += "As this program is being run in collection specific mode,\n"
    169       "the file can reside at " + gsdlhome + "\\collect\\" + collection +
    170       "\\etc\\site.cfg or " + gsdlhome + "\\etc\\site.cfg.\n";
    171   }
    172 
    173   MessageBox(NULL, message.getcstr(),
    174          "Greenstone Digital Library Software"
    175          ,MB_OK|MB_SYSTEMMODAL);
    176 }
    177173
    178174static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection) {
     
    326322// returns 1 if successful, 0 if unsuccessful
    327323int gsdl_init () {
     324#if defined (GSDL_USE_IOS_H)
    328325  cerr = &logstream;
    329326  cout = &textstream;
     327#else
     328  cerr.rdbuf(&logstream);
     329  cout.rdbuf(&textstream);
     330#endif
    330331
    331332  // collection should be set to "" unless in
     
    425426 
    426427  pageaction *apageaction = new pageaction();
     428  apageaction->set_receptionist (&recpt);
    427429  recpt.add_action (apageaction);
    428430 
     
    430432  recpt.add_action (apingaction);
    431433 
     434  tipaction *atipaction = new tipaction();
     435  recpt.add_action (atipaction);
     436 
    432437  queryaction *aqueryaction = new queryaction();
     438  aqueryaction->set_receptionist (&recpt);
    433439  recpt.add_action (aqueryaction);
    434440 
    435441  documentaction *adocumentaction = new documentaction();
     442  adocumentaction->set_receptionist (&recpt);
    436443  recpt.add_action (adocumentaction);
    437444 
     
    442449  recpt.add_action (anextlinkaction);
    443450
    444   //  buildaction *abuildaction = new buildaction();
    445   //  abuildaction->set_receptionist (&recpt);
    446   //  recpt.add_action (abuildaction);
     451  buildaction *abuildaction = new buildaction();
     452  abuildaction->set_receptionist (&recpt);
     453  recpt.add_action (abuildaction);
    447454 
    448455  authenaction *aauthenaction = new authenaction();
     
    450457  recpt.add_action (aauthenaction);
    451458
    452   delhistoryaction adelhistoryaction;
    453   recpt.add_action(&adelhistoryaction);
     459  delhistoryaction *adelhistoryaction = new delhistoryaction();
     460  recpt.add_action (adelhistoryaction);
    454461
    455462
     
    503510  // those read in last will override those read earlier
    504511  // collections being used together in this way should be
    505   // careful not to have site.cfg or main.cfg files that might
     512  // careful not to have main.cfg files that might
    506513  // screw with each other.
    507514  text_tset::const_iterator thome = gsdlhomes.begin();
    508515  text_tset::const_iterator ehome = gsdlhomes.end();
    509516  while (thome != ehome) {
    510     // TODO: should only need to do this once now I think
    511     // gsdlsite.cfg will need to be installed along with executable
    512     //    if (!site_cfg_read (recpt, *thome, maxrequests)) {
    513       // couldn't find the site configuration file
    514     //      page_errorsitecfg (*thome, collection);
    515     //      return 0;
    516     //    } else
    517517    if (!main_cfg_read (recpt, *thome, collection)) {
    518518      // couldn't find the main configuration file
  • branches/New_Config_Format-branch/gsdl/src/w32server/fnord.cpp

    r1040 r1279  
    1818The author can be contacted via Email at [email protected]
    1919*/
     20#include "text_t.h"
    2021#include <windows.h>
    2122#include <stdlib.h>
     
    7677#define ENTERBUTTONY ((MAINWINDOWHEIGHT-RESTHEIGHT)+5)
    7778
    78 #define VERSIONSTRING "version 2.13"
     79#define VERSIONSTRING "version x.xx"
    7980const char versionstring[] = VERSIONSTRING;
    8081
     
    165166  if (coltitledc == NULL) {
    166167    coltitledc = CreateCompatibleDC(pdc);
    167     defcoltitlebitmap = SelectObject (coltitledc, coltitlebitmap);
     168    defcoltitlebitmap = (HBITMAP)SelectObject (coltitledc, coltitlebitmap);
    168169  }
    169170 
    170171  if (logodc == NULL) {
    171172    logodc = CreateCompatibleDC(pdc);
    172     deflogobitmap = SelectObject (logodc, logobitmap);
     173    deflogobitmap = (HBITMAP)SelectObject (logodc, logobitmap);
    173174  }
    174175 
     
    219220      statusRect.right = STATUSX+STATUSWIDTH;
    220221      statusRect.bottom = STATUSY+STATUSHEIGHT;
    221       FillRect(pdc, &statusRect, GetStockObject(WHITE_BRUSH));
     222      FillRect(pdc, &statusRect, (HBRUSH)GetStockObject(WHITE_BRUSH));
    222223     
    223224      int cury = STATUSY;
     
    259260    infoRect.right = INFOX+INFOWIDTH;
    260261    infoRect.bottom = INFOY+INFOHEIGHT;
    261     FillRect(pdc, &infoRect, GetStockObject(WHITE_BRUSH));
     262    FillRect(pdc, &infoRect, (HBRUSH)GetStockObject(WHITE_BRUSH));
    262263    DrawText(pdc, infostring, -1, &infoRect, DT_CENTER);
    263264  }
     
    558559    // finally, get the host name (no error value
    559560    // is returned from this function)
    560     GetLocalName(Instance);
     561    GetLocalName((HINSTANCE)Instance);
    561562   
    562563      } else {
     
    596597  startbrowserdir[0] = '\0';
    597598  netscapeneeded = 0;
    598   int err = tryinitnetwork (Instance, MsgWindow, NULL);
    599   //int err = 1;
     599  //  int err = tryinitnetwork (Instance, MsgWindow, NULL);
     600  int err = 1;
    600601 
    601602  // if an error occurred, try again with billsock
     
    802803
    803804
    804 int __stdcall WinMain(HANDLE Instance, HANDLE /*PrevInstance*/, LPSTR CmdLineStr, int /*CmdShow*/) {
     805int __stdcall WinMain(HINSTANCE Instance, HINSTANCE /*PrevInstance*/, LPSTR CmdLineStr, int /*CmdShow*/) {
    805806  HWND MainWindow;  MSG Message;  WNDCLASS MainClass; 
    806807 
     
    813814  MainClass.hIcon     = LoadIcon(Instance, MAKEINTRESOURCE(TRAY_ICON));
    814815  MainClass.hCursor   = LoadCursor(NULL, IDC_ARROW);
    815   MainClass.hbrBackground = GetStockObject(WHITE_BRUSH);
     816  MainClass.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);
    816817  MainClass.lpszMenuName = MAKEINTRESOURCE(Main_Menu);
    817818  MainClass.lpszClassName = "Greenstone Digital Library Software";
  • branches/New_Config_Format-branch/gsdl/src/w32server/httpreq.cpp

    r611 r1279  
    1818The author can be contacted via Email at [email protected]
    1919*/
     20#include "text_t.h"
    2021#include <windows.h>
    2122#include <stdlib.h>
  • branches/New_Config_Format-branch/gsdl/src/w32server/httpsrv.cpp

    r902 r1279  
    1818The author can be contacted via Email at [email protected]
    1919*/
     20#include "text_t.h"
    2021#include <windows.h>
    2122#include <stdlib.h>
     
    2930#include "httpsrv.h"
    3031#include "locate.h"
    31 #include "settings.h"
    3232
    3333//Private Functions
  • branches/New_Config_Format-branch/gsdl/src/w32server/locate.cpp

    r1011 r1279  
     1#include "text_t.h"
    12#include <windows.h>
    23#include <stdio.h>
     
    115116  scroll.bottom = text_rect.bottom;
    116117  ScrollDC(dc,0,-nbits,&scroll,&clip,NULL,&update);
    117   FillRect(dc,&update, GetStockObject(WHITE_BRUSH));
     118  FillRect(dc,&update, (HBRUSH)GetStockObject(WHITE_BRUSH));
    118119}
    119120
  • branches/New_Config_Format-branch/gsdl/src/w32server/netio.cpp

    r611 r1279  
    103103}
    104104
    105 char *GetLocalName(HANDLE hInstance) {
     105char *GetLocalName(HINSTANCE hInstance) {
    106106    // static in case it is written to after the function has finished
    107107    // (I did not error checking on WSACancelAsyncRequest)
  • branches/New_Config_Format-branch/gsdl/src/w32server/netio.h

    r611 r1279  
    8484Returns: A string containing the local address
    8585*/
    86 char *GetLocalName(HANDLE hInstance);
     86char *GetLocalName(HINSTANCE hInstance);
    8787
    8888// returns 0 on success, and a WSA error message on failure.
  • branches/New_Config_Format-branch/gsdl/src/w32server/settings.cpp

    r902 r1279  
     1#include "text_t.h"
     2
    13#if defined(GSDL_USE_OBJECTSPACE)
    24#  include <ospace\std\fstream>
     
    367369  text_t key, value, section;
    368370  char *cstr_value;
     371#if defined (GSDL_USE_IOS_H)
    369372  ifstream conf (conffile, ios::nocreate);
     373#else
     374  ifstream conf (conffile);
     375#endif
    370376  if (conf) {
    371377    while (read_ini_line(conf, key, value) >= 0) {
  • branches/New_Config_Format-branch/gsdl/src/w32server/settings.h

    r902 r1279  
    1212#include "text_t.h"
    1313#include "cfgread.h"
     14#include "receptionist.h"
    1415
    1516// library settings
Note: See TracChangeset for help on using the changeset viewer.