Changeset 1279
- Timestamp:
- 2000-07-13T10:21:53+12:00 (24 years ago)
- Location:
- branches/New_Config_Format-branch/gsdl
- Files:
-
- 13 deleted
- 147 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/New_Config_Format-branch/gsdl/bin/script/buildcol.pl
r1031 r1279 54 54 print STDERR " -keepold will not destroy the current contents of the\n"; 55 55 print STDERR " building directory\n"; 56 print STDERR " -allclassifications Don't remove empty classifications\n\n"; 56 print STDERR " -allclassifications Don't remove empty classifications\n"; 57 print STDERR " -create_images Attempt to create default images for new\n"; 58 print STDERR " collection. This relies on the Gimp being\n"; 59 print STDERR " installed along with relevant perl modules\n"; 60 print STDERR " to allow scripting from perl\n\n"; 57 61 } 58 62 … … 61 65 { 62 66 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs, 63 $debug, $mode, $indexname, $keepold, $allclassifications); 67 $debug, $mode, $indexname, $keepold, $allclassifications, 68 $create_images); 64 69 if (!parsargv::parse(\@ARGV, 65 70 'verbosity/\d+/2', \$verbosity, … … 72 77 'index/.*/', \$indexname, 73 78 'keepold', \$keepold, 74 'allclassifications', \$allclassifications)) { 79 'allclassifications', \$allclassifications, 80 'create_images', \$create_images)) { 75 81 &print_usage(); 76 82 die "\n"; … … 85 91 # read the configuration file 86 92 $textindex = "section:text"; 87 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc /collect.cfg");93 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); 88 94 if (-e $configfilename) { 89 95 $collectcfg = &colcfg::read_collect_cfg ($configfilename); … … 101 107 } 102 108 109 # create default images if required 110 if ($create_images) { 111 my $collection_name = $collection; 112 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'} 113 if defined $collectcfg->{'collectionmeta'}->{'collectionname'}; 114 115 &create_images ($collection_name); 116 } 117 103 118 # fill in the default archives and building directories if none 104 119 # were supplied, turn all \ into / and remove trailing / … … 177 192 } 178 193 179 194 sub create_images { 195 my ($collection_name) = @_; 196 197 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl"); 198 if (!-e $image_script) { 199 print STDERR "WARNING: Image making script ($image_script) could not be found\n"; 200 print STDERR " Default images will not be generated\n\n"; 201 return; 202 } 203 204 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images"); 205 206 &util::mk_all_dir ($imagedir); 207 208 # create the images 209 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\""); 210 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\""); 211 212 # update the collect.cfg configuration file (this will need 213 # to be changed when the config file format changes) 214 if (!open (CFGFILE, $configfilename)) { 215 print STDERR "WARNING: Couldn't open config file ($configfilename)\n"; 216 print STDERR " for updating so collection images may not be linked correctly\n"; 217 return; 218 } 219 220 my $line = ""; my $file = ""; 221 my $found = 0; my $foundsm = 0; 222 while (defined ($line = <CFGFILE>)) { 223 if ($line =~ /collectionmeta\s+iconcollection\s+/) { 224 $line = "collectionmeta iconcollection _httprefix_/collect/$collection/images/$collection.gif\n"; 225 $found = 1; 226 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) { 227 $line = "collectionmeta iconcollectionsmall _httprefix_/collect/$collection/images/${collection}sm.gif\n"; 228 $foundsm = 1; 229 } 230 $file .= $line; 231 } 232 close CFGFILE; 233 234 $file .= "collectionmeta iconcollection _httprefix_/collect/$collection/images/$collection.gif\n" if !$found; 235 $file .= "collectionmeta iconcollectionsmall _httprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm; 236 237 if (!open (CFGFILE, ">$configfilename")) { 238 print STDERR "WARNING: Couldn't open config file ($configfilename)\n"; 239 print STDERR " for updating so collection images may not be linked correctly\n"; 240 return; 241 } 242 print CFGFILE $file; 243 close CFGFILE; 244 } -
branches/New_Config_Format-branch/gsdl/bin/script/gimp/flash_button.pl
r1037 r1279 55 55 56 56 sub print_usage { 57 print STDERR "\n usage: $0 [options] macrofile\n\n";57 print STDERR "\n usage: $0 [options]\n\n"; 58 58 print STDERR " options:\n"; 59 59 print STDERR " -cfg_file file configuration file containing one or more\n"; … … 136 136 chomp $image_dir; 137 137 } 138 139 # replace any '\n' occurring in text with carriage return 140 $text =~ s/\\n/\n/gi; 138 141 139 142 if ($cfg_file =~ /\w/) { -
branches/New_Config_Format-branch/gsdl/bin/script/gimp/title_icon.pl
r1037 r1279 46 46 47 47 48 my ($cfg_file, $imagefile, $width, $height, $imageheight, $stripecolor, $stripewidth,49 $stripe_alignment, $i_transparency, $text, $text_alignment, $filename, $textspace_x,50 $textspace_y, $bgcolor, $fontcolor, $fontsize, $minfontsize, $fontname,51 $fontweight, $fontslant, $fontwidth, $fontspacing, $image_dir);48 local ($cfg_file, $size, $imagefile, $width, $height, $imageheight, $stripecolor, $stripewidth, 49 $stripe_alignment, $i_transparency, $text, $text_alignment, $filename, $textspace_x, 50 $textspace_y, $bgcolor, $fontcolor, $fontsize, $minfontsize, $fontname, 51 $fontweight, $fontslant, $fontwidth, $fontspacing, $image_dir, $dont_wrap); 52 52 53 53 sub print_usage { … … 57 57 print STDERR " sets of the following options - use to create\n"; 58 58 print STDERR " batches of images\n"; 59 print STDERR " -size number the overall size ratio of the image (i.e. a size\n"; 60 print STDERR " of 2 will create an image twice the default size)\n"; 59 61 print STDERR " -image_dir directory directory to create images in [`pwd`]\n"; 60 62 print STDERR " this should be full path to existing directory\n"; … … 83 85 print STDERR " -fontslant [r]\n"; 84 86 print STDERR " -fontwidth [*]\n"; 85 print STDERR " -fontspacing [*]\n\n"; 87 print STDERR " -fontspacing [*]\n"; 88 print STDERR " -dont_wrap don't attempt to wrap text\n\n"; 86 89 } 87 90 … … 89 92 $image_dir = "./"; 90 93 $imagefile = ""; 91 $width = 150;92 $height = 44;93 $imageheight = 110;94 $width = int (150 * $size); 95 $height = int (44 * $size); 96 $imageheight = int (110 * $size); 94 97 $stripecolor = $gsdl_green; 95 $stripewidth = 40;98 $stripewidth = int (40 * $size); 96 99 $stripe_alignment = "left"; 97 100 $i_transparency = 60; … … 99 102 $text_alignment = "left"; 100 103 $filename = ""; 101 $textspace_x = 3;102 $textspace_y = 3;104 $textspace_x = int (3 * $size); 105 $textspace_y = int (3 * $size); 103 106 $bgcolor = $gsdl_green; 104 107 $fontcolor = $black; 105 $fontsize = 17;106 $minfontsize = 10;108 $fontsize = int (17 * $size); 109 $minfontsize = int (10 * $size); 107 110 $fontname = "lucida"; 108 111 $fontweight = "medium"; … … 116 119 if (!parsargv::parse(\@ARGV, 117 120 'cfg_file/.*/', \$cfg_file, 121 'size/\d+/1', \$size, 118 122 'image_dir/.*/./', \$image_dir, 119 123 'imagefile/.*/', \$imagefile, … … 138 142 'fontslant/.*/r', \$fontslant, 139 143 'fontwidth/.*/*', \$fontwidth, 140 'fontspacing/.*/*', \$fontspacing)) { 144 'fontspacing/.*/*', \$fontspacing, 145 'dont_wrap', \$dont_wrap)) { 141 146 &print_usage(); 142 147 die "title_icon.pl: incorrect options\n"; … … 148 153 chomp $image_dir; 149 154 } 150 155 151 156 if ($cfg_file =~ /\w/) { 152 157 … … 177 182 178 183 sub produce_image { 184 185 &adjust_args (); 186 &wrap_text () unless $dont_wrap; 179 187 180 188 my $use_image = 0; … … 338 346 return 1; 339 347 } 348 349 # adjust arguments that are effected by the size argument 350 sub adjust_args { 351 352 if ($size != 1) { 353 my @size_args = ('width', 'height', 'imageheight', 'stripewidth', 354 'textspace_x', 'textspace_y', 'fontsize', 'minfontsize'); 355 foreach $arg (@size_args) { 356 $$arg = int ($$arg * $size); 357 } 358 } 359 } 360 361 sub wrap_text { 362 363 # don't wrap text if it already contains carriage returns 364 return if $text =~ /\n/; 365 366 # the following assumes that all words are less than $wrap_length long 367 my $wrap_length = 14; 368 369 my $new_text = ""; 370 while (length ($text) >= $wrap_length) { 371 my $line = substr ($text, 0, $wrap_length); 372 $text =~ s/^$line//; 373 $line =~ s/\s([^\s]*)$/\n/; 374 $text = $1 . $text; 375 $new_text .= $line; 376 } 377 $new_text .= $text; 378 $text = $new_text; 379 } 340 380 341 381 sub query { -
branches/New_Config_Format-branch/gsdl/bin/script/gsw3mir.pl
r845 r1279 1 #!/usr/ local/bin/perl5-w1 #!/usr/bin/perl -w 2 2 3 3 ########################################################################### -
branches/New_Config_Format-branch/gsdl/bin/script/import.pl
r1031 r1279 56 56 print STDERR " directory -- use with care\n"; 57 57 print STDERR " -gzip Use gzip to compress resulting gml documents\n"; 58 print STDERR " (don't forget to include ZIPPlug in your plugin\n"; 59 print STDERR " list when building from compressed documents)\n"; 58 60 print STDERR " -maxdocs number Maximum number of documents to import\n"; 59 61 print STDERR " -groupsize number Number of GML documents to group into one file\n"; … … 135 137 136 138 # load all the plugins 137 $pluginfo = &plugin::load_plugins ($plugins );139 $pluginfo = &plugin::load_plugins ($plugins, $verbosity); 138 140 if (scalar(@$pluginfo) == 0) { 139 141 print STDERR "No plugins were loaded.\n"; -
branches/New_Config_Format-branch/gsdl/bin/script/mkcol.pl
r1031 r1279 51 51 print STDERR " -title text The title for the collection\n"; 52 52 print STDERR " -about text The about text for the collection\n"; 53 print STDERR " -plugins list Space separated list of perl plugin modules to use\n"; 53 print STDERR " -plugin text perl plugin module to use (there may be multiple\n"; 54 print STDERR " plugin entries\n"; 54 55 print STDERR " -refine list Space separated list of perl plugin modules to use\n"; 55 56 … … 106 107 $line =~ s/\*\*title\*\*/$title/g; 107 108 $line =~ s/\*\*about\*\*/$about/g; 108 $line =~ s/\*\*plugins\*\*/$plugins /g;109 $line =~ s/\*\*plugins\*\*/$pluginstring/g; 109 110 $line =~ s/\*\*refine\*\*/$refine/g; 110 111 … … 119 120 120 121 121 my (@indexes, @indexestext );122 my (@indexes, @indexestext, @plugin); 122 123 123 124 # get and check options … … 132 133 'title/.+/', \$title, 133 134 'about/.+/', \$about, 134 'plugin s/.+/GMLPlug TEXTPlug ArcPlug RecPlug/', \$plugins,135 'plugin/.+', \@plugin, 135 136 'refine/.+/', \$refine 136 137 )) { … … 138 139 die "\n"; 139 140 } 140 141 142 # load default plugins if none were on command line 143 if (!scalar(@plugin)) { 144 @plugin = (GMLPlug,TEXTPlug,ArcPlug,RecPlug); 145 } 146 141 147 # get and check the collection name 142 148 ($collection) = @ARGV; … … 195 201 } 196 202 203 $pluginstring = ""; 204 foreach $plugin (@plugin) { 205 $pluginstring .= "plugin $plugin\n"; 206 } 197 207 198 208 # make sure the model collection exists -
branches/New_Config_Format-branch/gsdl/bin/script/newsrc.pl
r546 r1279 1 #!/usr/ local/bin/perl5-w1 #!/usr/bin/perl -w 2 2 3 3 ########################################################################### -
branches/New_Config_Format-branch/gsdl/bin/script/togb.pl
r630 r1279 1 #!/usr/ local/bin/perl5-w1 #!/usr/bin/perl -w 2 2 3 3 ########################################################################### … … 36 36 if (!parsargv::parse(\@ARGV, 37 37 'unicode', \$unicode, 38 ' extended', \$extended,38 'iso_8859_1', \$iso_8859_1, 39 39 'gb', \$gb)) { 40 40 print STDERR "\n usage: $0 [options]\n\n"; 41 41 print STDERR " options:\n"; 42 print STDERR " -u tf8input is in utf-8 or unicode (default)\n";43 print STDERR " - extended input is in extended ascii\n";44 print STDERR " -gb input is in GB or GBK\n\n";42 print STDERR " -unicode input is in utf-8 or unicode (default)\n"; 43 print STDERR " -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n"; 44 print STDERR " -gb input is in GB or GBK (simplified Chinese)\n\n"; 45 45 die "\n"; 46 46 } 47 47 48 48 $encoding = "utf8" if $unicode; 49 $encoding = " extended" if $extended;49 $encoding = "iso_8859_1" if $iso_8859_1; 50 50 $encoding = "gb" if $gb; 51 51 -
branches/New_Config_Format-branch/gsdl/bin/script/touc.pl
r630 r1279 1 #!/usr/ local/bin/perl5-w1 #!/usr/bin/perl -w 2 2 3 3 ########################################################################### … … 36 36 if (!parsargv::parse(\@ARGV, 37 37 'unicode', \$unicode, 38 'extended', \$extended, 38 'iso_8859_1', \$iso_8859_1, 39 'iso_8859_6', \$iso_8859_6, 40 'windows_1256', \$windows_1256, 39 41 'gb', \$gb)) { 40 42 print STDERR "\n usage: $0 [options]\n\n"; 41 43 print STDERR " options:\n"; 42 print STDERR " -utf8 input is in utf-8 or unicode (default)\n"; 43 print STDERR " -extended input is in extended ascii\n"; 44 print STDERR " -gb input is in GB or GBK\n\n"; 44 print STDERR " -unicode input is in utf-8 or unicode (default)\n"; 45 print STDERR " -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n"; 46 print STDERR " -iso_8859_6 input is in 8 bit Arabic (ISO-8859-6)\n"; 47 print STDERR " -windows_1256 input is in Windows 1256 (Arabic)\n"; 48 print STDERR " -gb input is in GB or GBK (simplified Chinese)\n\n"; 45 49 die "\n"; 46 50 } 47 51 48 52 $encoding = "utf8" if $unicode; 49 $encoding = "extended" if $extended; 53 $encoding = "iso_8859_1" if $iso_8859_1; 54 $encoding = "iso_8859_6" if $iso_8859_6; 55 $encoding = "windows_1256" if $windows_1256; 50 56 $encoding = "gb" if $gb; 51 57 -
branches/New_Config_Format-branch/gsdl/bin/script/toutf8.pl
r630 r1279 1 #!/usr/ local/bin/perl5-w1 #!/usr/bin/perl -w 2 2 3 3 ########################################################################### … … 36 36 if (!parsargv::parse(\@ARGV, 37 37 'unicode', \$unicode, 38 'extended', \$extended, 38 'iso_8859_1', \$iso_8859_1, 39 'iso_8859_6', \$iso_8859_6, 40 'windows_1256', \$windows_1256, 39 41 'gb', \$gb)) { 40 42 print STDERR "\n usage: $0 [options]\n\n"; 41 43 print STDERR " options:\n"; 42 print STDERR " -utf8 input is in utf-8 or unicode (default)\n"; 43 print STDERR " -extended input is in extended ascii\n"; 44 print STDERR " -gb input is in GB or GBK\n\n"; 44 print STDERR " -unicode input is in utf-8 or unicode (default)\n"; 45 print STDERR " -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n"; 46 print STDERR " -iso_8859_6 input is in 8 bit Arabic (ISO-8859-6)\n"; 47 print STDERR " -windows_1256 input is in Windows 1256 (Arabic)\n"; 48 print STDERR " -gb input is in GB or GBK (simplified Chinese)\n\n"; 45 49 die "\n"; 46 50 } 47 51 48 52 $encoding = "utf8" if $unicode; 49 $encoding = "extended" if $extended; 53 $encoding = "iso_8859_1" if $iso_8859_1; 54 $encoding = "iso_8859_6" if $iso_8859_6; 55 $encoding = "windows_1256" if $windows_1256; 50 56 $encoding = "gb" if $gb; 51 57 -
branches/New_Config_Format-branch/gsdl/bin/script/translate.pl
r1062 r1279 54 54 'Ouml' => chr (214), 55 55 'Uuml' => chr (220), 56 'szlig' => chr (223)); 56 'szlig' => chr (223), 57 'aacute' => chr (225), 58 'eacute' => chr (233), 59 'iacute' => chr (237), 60 'oacute' => chr (243), 61 'uacute' => chr (250), 62 'Aacute' => chr (193), 63 'Eacute' => chr (201), 64 'Iacute' => chr (205), 65 'Oacute' => chr (211), 66 'Uacute' => chr (218), 67 'ntilde' => chr (241), 68 'Ntilde' => chr (209)); 57 69 58 70 my $hand_made = 0; … … 137 149 # process all the images 138 150 139 $dmfile =~ s/\n\#\#\s*\"([^\"]*)\"\s*\#\#\s*([^\s\#]*)\s*\#\#\s*([^\s\#]*)\s*\#\#(.*?)(?=(\n\#|\s*\Z))/ 140 &process_image ($1, $2, $3, $4)/esg; 151 $dmfile =~ s/\n\#\#\s*\"([^\"]*)\"\s*\#\#\s*([^\s\#]*)\s*\#\#\s*([^\s\#]*)\s*\#\#(.*?)(?=(\n\#|\s*\Z))/&process_image ($1, $2, $3, $4)/esg; 141 152 142 153 # add language parameter to each macro … … 150 161 151 162 my $origtext = $text; 163 $text =~ s/&(\d{3,4});/chr($1)/ge; 152 164 $text =~ s/&([^;]*);/$rmap{$1}/g; 153 165 … … 204 216 } elsif ($image_type eq "green_title") { 205 217 206 # generate green title image 218 # read the width if it is specified in $image_macros 219 my ($width) = $image_macros =~ /_width${image_name}x?_\s*[^\{]*\{(\d+)\}/; 220 $width = 200 unless ($width); 221 222 # generate green title image 207 223 my $options = "-text \"$text\" -filename ${image_name}.gif -image_dir $image_dir"; 208 $options .= " -width 200-height 57 -stripe_alignment right -text_alignment right";224 $options .= " -width $width -height 57 -stripe_alignment right -text_alignment right"; 209 225 $options .= " -fontsize 26 -fontweight bold"; 210 226 `$ENV{'GSDLHOME'}/bin/script/gimp/title_icon.pl $options`; 211 227 212 # get width of new images and edit width macro 213 # we'll do this even though title_icon.pl will always create images of the 214 # width specified (200) 215 my $fullfilename = &util::filename_cat ($image_dir, "${image_name}.gif"); 216 &process_width_macro ($fullfilename, $image_name, \$image_macros); 228 # get width of resulting image and edit _width..._ macro in $image_macros 229 # (no longer needed since we always resize to the width read from $image_macros.) 230 # my $fullfilename = &util::filename_cat ($image_dir, "${image_name}.gif"); 231 # &process_width_macro ($fullfilename, $image_name, \$image_macros); 217 232 218 233 } elsif ($image_type eq "hand_made") { … … 226 241 } 227 242 228 return "\n\#\# \"$ text\" \#\# $image_type \#\# $image_name \#\#$image_macros";243 return "\n\#\# \"$origtext\" \#\# $image_type \#\# $image_name \#\#$image_macros"; 229 244 } 230 245 … … 233 248 234 249 my $img_info = &get_img_info ($filename); 235 $$image_macros =~ s/(_width${image_name}x _\s*(?:\[[^\]]*\])?\s*\{)(\d+)(\})/$1$img_info->{'width'}$3/s;250 $$image_macros =~ s/(_width${image_name}x?_\s*(?:\[[^\]]*\])?\s*\{)(\d+)(\})/$1$img_info->{'width'}$3/s; 236 251 } 237 252 -
branches/New_Config_Format-branch/gsdl/cgi-bin/gsdlsite.cfg
r1038 r1279 3 3 4 4 # points to the GSDLHOME directory 5 gsdlhome /home/gsdl5 gsdlhome **GSDLHOME** 6 6 7 7 # this is the http address of GSDLHOME 8 httpprefix /gsdl 8 # if your webservers DocumentRoot is set to $GSDLHOME 9 # then httpprefix can remain commented out 10 #httpprefix /gsdl 9 11 10 12 # this is the http address of the directory which 11 13 # contains the images for the interface. 12 httpimg /gsdl/images 14 # if your webservers DocumentRoot is set to $GSDLHOME 15 # then httpimg will be /images 16 httpimg /images 13 17 14 18 # should contain the http address of this cgi script. This -
branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_buildcol.pl
r841 r1279 1 #! /usr/local/bin/perl5-w1 #!perl -w 2 2 3 3 ########################################################################### … … 32 32 # an already running cgi program. 33 33 34 use Fcntl ':flock'; 34 package webpage_buildcol; 35 35 36 use File::Basename; 37 use GSDLHOME; 38 use gflock; 36 39 37 40 my $args; … … 46 49 my ($variable,$assignment) = ($1,$3); 47 50 $args->{$variable} = $assignment; 48 49 $ENV{'GSDLHOME'} = $assignment if ($variable eq "gsdlhome"); 50 $ENV{'GSDLOS'} = $assignment if ($variable eq "gsdlos"); 51 } 52 } 53 54 if (defined($ENV{'GSDLHOME'})) 55 { 56 if (!defined($ENV{'GSDLOS'})) 57 { 58 $ENV{'GSDLOS'} = $^O; # special perl variable set to OS 59 ##### Need to check to see what this is set to 60 ##### under Windows 61 } 62 63 $ENV{'PATH'} .= ":$ENV{'GSDLHOME'}/bin/script"; 64 $ENV{'PATH'} .= ":$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}"; 65 66 unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); 67 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 68 } 69 else 70 { 71 print STDERR "Environment variable GSDLHOME not set\n"; 72 exit 1; 51 } 73 52 } 74 53 } … … 84 63 if (open(TMPOUT,">$full_tmpname")) 85 64 { 86 if (flock(TMPOUT,LOCK_EX)) 87 { 65 if (&gflock::lock (webpage_buildcol::TMPOUT)) { 88 66 print TMPOUT $text; 89 67 close(TMPOUT); 90 flock(TMPOUT,LOCK_UN); 91 } 92 else 93 { 68 &gflock::unlock (webpage_buildcol::TMPOUT); 69 70 } else { 94 71 # Problem locking file 95 72 my $mess = "Unable to lock temporary communication file:"; … … 118 95 my $full_importname = &util::filename_cat($full_dirname,"import"); 119 96 120 my $log_filename = &util::filename_cat($ENV{'GSDLHOME'}," log","$dirname.bld");97 my $log_filename = &util::filename_cat($ENV{'GSDLHOME'},"etc","$dirname.bld"); 121 98 if (!open (LOGOUT, ">$log_filename")) 122 99 { … … 140 117 if ($copy_dir =~ m/^yes$/i) 141 118 { 142 my $download_cmd = " ";119 my $download_cmd = "perl " . &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script"); 143 120 my $file_or_url = $building_cfg_text->{'building'}->{'fileorurl'}; 144 121 … … 146 123 { 147 124 # run urlcopy.pl to download files 148 $download_cmd = " urlcopy.pl ";125 $download_cmd = " urlcopy.pl "; 149 126 my @urls = split("\n",$input_dir); 150 127 my $u; … … 161 138 $input_dir =~ s/^\s+//; 162 139 $input_dir =~ s/\s+$//; 163 $download_cmd = " filecopy.pl $input_dir";140 $download_cmd = " filecopy.pl $input_dir"; 164 141 } 165 142 … … 194 171 } 195 172 196 if ( $copy_dir =~ /^no$/i)173 if ((defined $copy_dir) && ($copy_dir =~ /^no$/i)) 197 174 { 198 175 # link it … … 221 198 { 222 199 # Import operation 223 my $import_cmd = "import.pl -removeold $dirname"; 200 my $import_cmd = "perl "; 201 $import_cmd .= &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "import.pl"); 202 $import_cmd .= " -removeold $dirname"; 224 203 225 204 if(!open(IMPORTOUT,"$import_cmd 2>&1 |")) … … 254 233 255 234 # Build operation 256 my $build_cmd = ""; 235 my $build_cmd = "perl " . 236 &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "buildcol.pl"); 257 237 if (($do_import eq "true") 258 238 || (($do_import eq "false") && (-e $full_archivename))) 259 239 { 260 $build_cmd = "buildcol.pl$dirname";240 $build_cmd .= " $dirname"; 261 241 } 262 242 else 263 243 { 264 265 $build_cmd = "buildcol.pl";266 244 $build_cmd .= " -archivedir $full_importname"; 267 245 $build_cmd .= " -cachedir $full_archivename"; … … 354 332 = &util::filename_cat($full_dirname,"etc","collect.cfg"); 355 333 356 if (open(CFGIN,"<$cfg_filename")) 357 { 358 if (flock(CFGIN,LOCK_EX)) 359 { 360 # do requested stages for building 361 my $result = do_build($full_dirname,$dirname,$args); 362 flock(CFGIN,LOCK_UN); 363 close(CFGIN); 364 return if ($result ne "success"); 365 } 366 else 367 { 368 # Problem locking file 369 my $mess = "Unable to lock configuration file: $cfg_filename"; 370 print STDERR "$mess\n"; 371 return; 372 } 373 } 374 375 # my $mess_url = "$args->{'httpbuild'}&bca=mess&bc1dirname=$dirname"; 376 # print "Location: $mess_url&head=_headdone_&mess=_messdonebuildcol_\n\n"; 377 # print "done\n"; # in tmp file 334 # do requested stages for building 335 my $result = do_build($full_dirname,$dirname,$args); 336 return if ($result ne "success"); 337 338 my $mess_url = "$args->{'httpbuild'}&bca=mess&bc1dirname=$dirname"; 339 print "Location: $mess_url&head=_headdone_&mess=_messdonebuildcol_\n\n"; 340 print "done\n"; # in tmp file 378 341 } 379 342 380 343 &main(); 381 382 383 384 385 -
branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_buildstatus.pl
r724 r1279 1 #! /usr/local/bin/perl5-w1 #!perl -w 2 2 3 3 ########################################################################### … … 31 31 use CGI; 32 32 use GSDLHOME; 33 use Fcntl ':flock';34 33 35 34 require util; … … 66 65 } 67 66 68 my $full_tmpname 69 = &util::filename_cat($ENV{'GSDLHOME'},"tmp",$tmpname); 67 my $full_tmpname = &util::filename_cat($ENV{'GSDLHOME'},"tmp"); 68 69 if (!-d $full_tmpname) { 70 mkdir ($full_tmpname, 0777); 71 } 72 73 $full_tmpname = &util::filename_cat($full_tmpname, $tmpname); 70 74 71 75 if (open(TMPIN,"<$full_tmpname")) … … 81 85 $bc1finished = 1 if ($mess eq "Done"); 82 86 $bc1finished = -1 if ($mess =~ m/^Error:/); 87 # escape backslashes so they don't vanish from printed filenames 88 $mess =~ s/\\/\\\\/g; 83 89 &webpageutil::status_location($args,$mess,$tmpname,$bc1finished); 84 90 … … 96 102 97 103 &main(); 98 99 100 101 102 -
branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_delcol.pl
r841 r1279 1 #! /usr/local/bin/perl5-w1 #!perl -w 2 2 3 3 ########################################################################### … … 30 30 use CGI; 31 31 use GSDLHOME; 32 use Fcntl ':flock';33 32 34 33 require util; … … 73 72 &util::rm_r($col_dir); 74 73 75 # delete from collections.txt76 my $collist_filename77 = &util::filename_cat($ENV{'GSDLHOME'},"etc","collections.txt");78 if (open(CLIN,"<$collist_filename"))79 {80 if (flock(CLIN,LOCK_EX))81 {82 my @keep_dirnames = ();83 while (defined($line=<CLIN>))84 {85 chop $line;86 push(@keep_dirnames,$line) if ($line ne $dirname);87 }88 close(CLIN);89 90 if (open(CLIN,">$collist_filename"))91 {92 print CLIN join("\n",@keep_dirnames), "\n";93 }94 95 flock(CLIN,LOCK_UN);96 close(CLIN);97 }98 else99 {100 # problem locking file101 my $mess = "Unable to lock collection list configuration";102 $mess .= " file: $collist_filename";103 &webpageutil::error_location($args,$mess);104 return;105 }106 }107 else108 {109 # problem opening file for reading in110 111 my $mess = "Unable to open for input the collection list";112 $mess .= " configuration file: $collist_filename";113 &webpageutil::error_location($args,$mess);114 return;115 }116 74 } 117 75 elsif ($delete_area eq "import") … … 140 98 my $mess_url = "$args->{'httpbuild'}&bca=mess"; 141 99 $mess_url .= "&bc1dirname=$dirname" if ($delete_area ne "all"); 142 print " Content: text/html\n\n$mess_url&head=_headdone_&mess=_messdonedelcol_\n\n";100 print "Location: $mess_url&head=_headdone_&mess=_messdonedelcol_\n\n"; 143 101 return; 144 102 -
branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_editcol.pl
r724 r1279 1 #! /usr/local/bin/perl5-w1 #!perl -w 2 2 3 3 ########################################################################### … … 28 28 # This program is a webpage wrapper for saving an edited config file 29 29 30 package webpage_editcol; 31 30 32 use CGI; 31 33 use GSDLHOME; 32 use Fcntl ':flock';34 use gflock; 33 35 34 36 require util; … … 70 72 if (open(CFGOUT,">$cfg_filename")) 71 73 { 72 if (flock(CFGOUT,LOCK_EX)) 73 { 74 if (&gflock::lock (webpage_editcol::CFGOUT)) { 74 75 my $cfg_text = $args->{'bc1cfgfile'}; 75 76 print CFGOUT "$cfg_text"; 76 flock(CFGOUT,LOCK_UN);77 &gflock::unlock (webpage_editcol::CFGOUT); 77 78 close(CFGOUT); 78 79 } -
branches/New_Config_Format-branch/gsdl/cgi-bin/webpage_mkcol.pl
r724 r1279 1 #! /usr/local/bin/perl5-w1 #!perl -w 2 2 3 3 ########################################################################### … … 28 28 # This program is a webpage wrapper to the mkcol.pl process 29 29 30 package webpage_mkcol; 31 30 32 use CGI; 31 33 use GSDLHOME; 32 use Fcntl ':flock';34 use gflock; 33 35 34 36 require util; … … 41 43 my %args = (); 42 44 45 open (FILE, '>d:\gsdl\logout.txt') || die; 46 43 47 foreach $p ($cgi->param()) 44 48 { 45 49 $args{$p} = $cgi->param($p); 46 } 47 50 51 print FILE "webpage_mkcol.pl - $p -> $args{$p}\n"; 52 } 53 54 close FILE; 48 55 return \%args; 49 56 } … … 85 92 else 86 93 { 87 # clean up input for heuristic that derives directory name for a new collection 94 # clean up input for heuristic that derives directory name for a 95 # new collection 88 96 $fullname =~ s/\s+/ /g; 89 97 $fullname =~ tr/[A-Z]/[a-z]/; … … 149 157 my $acronyms = $args->{'bc1acronyms'}; 150 158 151 my $cmd = "mkcol.pl"; 159 my $cmd = "perl "; 160 $cmd .= &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script", "mkcol.pl"); 152 161 $cmd .= " -title \"$fullname\""; 153 162 $cmd .= " -creator $contact_email"; 154 163 $cmd .= " -about \"$about_desc\""; 155 $cmd .= " -plugins \"GMLPlug ${src_format}Plug ArcPlug RecPlug\""; 164 $cmd .= " -plugin \"GMLPlug\""; 165 $cmd .= " -plugin \"${src_format}Plug\""; 166 $cmd .= " -plugin \"ArcPlug\""; 167 $cmd .= " -plugin \"RecPlug\""; 156 168 ### $cmd .= " -refine \"$refine_plugs\""; 157 169 $cmd .= " $unique_dirname"; 170 158 171 my $status = system($cmd); 159 172 $status /= 256; … … 162 175 { 163 176 # append copydir, file_or_url and input_dir to end of collect.cfg 177 # we'll also append DocumentUseHTML if processing HTML docs 164 178 my $cfg_filename 165 179 = &util::filename_cat($ENV{'GSDLHOME'},"collect",$unique_dirname, … … 167 181 if (open(CFGAPP,">>$cfg_filename")) 168 182 { 169 if (flock(CFGAPP,LOCK_EX)) 170 { 183 if (&gflock::lock (webpage_mkcol::CFGAPP)) { 171 184 print CFGAPP "\n"; 185 186 if ($src_format eq "HTML") { 187 print CFGAPP "format\tDocumentUseHTML\ttrue\n\n"; 188 } 189 172 190 print CFGAPP "building\tfileorurl\t$file_or_url\n"; 173 191 print CFGAPP "building\tinputdir\t$input_dir\n"; 174 192 print CFGAPP "building\tcopydir\t\t$copy_dir\n"; 175 193 print CFGAPP "building\tingsdlarea\t$in_gsdl_area\n"; 176 flock(CFGAPP,LOCK_UN);194 &gflock::unlock (webpage_mkcol::CFGAPP); 177 195 close(CFGAPP); 178 196 } … … 195 213 return; 196 214 } 197 198 199 # append dirname to end of collection config file200 my $collist_filename201 = &util::filename_cat($ENV{'GSDLHOME'},"etc","collections.txt");202 if (open(CLAPP,">>$collist_filename"))203 {204 if (flock(CLAPP,LOCK_EX))205 {206 print CLAPP "$unique_dirname\n";207 flock(CLAPP,LOCK_UN);208 close(CLAPP);209 }210 else211 {212 # problem locking file213 my $mess = "Unable to lock collection list";214 $mess .= " configuration file: $collist_filename";215 &webpageutil::error_location($args,$mess);216 close(CLAPP);217 return;218 }219 }220 else221 {222 # problem223 my $mess = "Unable to append to collection list";224 $mess .= " configuration file: $collist_filename";225 &webpageutil::error_location($args,$mess);226 return;227 }228 229 215 } 230 216 else … … 248 234 249 235 &main(); 250 251 252 253 254 -
branches/New_Config_Format-branch/gsdl/collect/modelcol/etc/collect.cfg
r815 r1279 6 6 indexes document:text 7 7 defaultindex document:text 8 plugin GMLPlug 9 plugin TEXTPlug 10 plugin ArcPlug 11 plugin RecPlug 8 9 **plugins** 12 10 13 11 classify AZList metadata=Title 14 12 15 collectionmeta collectionname "**collection**" 13 collectionmeta collectionname "**title**" 14 collectionmeta iconcollection "" 15 collectionmeta collectionextra "**about**" 16 16 collectionmeta .document:text "documents" -
branches/New_Config_Format-branch/gsdl/docs/TODO
r1036 r1279 1 1 2 2 configuration/installation: 3 4 - iconcollection fields in collect.cfg are dependant on httpprefix5 6 - set up setup.bash/setup.bat etc during configuration (so GSDLHOME7 is full path and script can be run from anywhere)8 3 9 - autoconfigure/InstallShield to ask where cgi-bin directory is?10 11 4 unix: 12 5 13 - builddemo.sh and eveything in bin/script must be executable 14 15 - maybe look at editing all perl scripts during configuration to 16 get #!/usr/bin/... line pointing to correct place. definitely 17 wants to default to /usr/bin/perl instead of /usr/local/bin/perl5 6 - look into getting perl scripts to run correctly on any operating 7 system (i.e. the #! stuff) -- those in cgi-bin particularly need 8 to work on windows too (#!perl on windows, #!/use/bin/perl on 9 linux) -- perlrun manpage 18 10 19 11 - add fastcgi to distribution … … 21 13 windows: 22 14 23 - use Installshield to create packages (maybe use 'package for the 24 web') of: 25 - full source code with building capability 26 - server and cd-rom executables with building capability 27 - server and cd-rom executables only 15 - use Installshield to create windows packages - wait for George to 16 decide what he's going to do about InstallShield 28 17 29 - use same executable for both cd-rom executables 18 - call server version of cd-rom executable 'standard' and other 19 version 'backup' (or maybe 'gold' and 'silver'?) 30 20 31 - version string in fnord.cpp should be changed when a new version32 is released - maybe the support.htm file too21 - fix up Local Library - somehow need to have option of old server 22 and single-user (or Gold and Silver?) within same executable. 33 23 34 write perl script for updating distribution35 24 36 25 tidy up text versions of macro files 37 26 38 Write a perl script to go through a translated macro file and generate 39 appropriate icons 40 41 add german interface 27 look further into creating images containing non-latin1 characters 42 28 43 29 create default collection images at build time with gimp script 44 45 convert crappy old scheme gimp scripts to perl46 30 47 31 fix query caching - do caching in receptionist, finish off cross collection … … 53 37 add to 54 38 55 get remaining plugins tidied up and using passed in options - file56 extension option for all plugins57 58 get all plugins to use multiread object and throw away the specialized GB59 plugins60 61 39 write a gzip/bzip/zip/tar plugin and take any remaining gzip stuff out of 62 40 rest of plugins 63 41 64 tidy up classifiers, make collection-specific ones more object oriented, 65 add section and compact options instead of using separate classifiers - get 66 classifiers using same option passing code as plugins now use 42 look at speeding up acronym extraction code, add more options 67 43 68 44 create a general classifier for doing stuff like NPepaList.pm does. should … … 70 46 etc. 71 47 72 sort out arrows/paging of browserclasses (for queryaction too!) 73 -DocumentBottomArrows?? - want arrows at bottom as well as top of pages for 74 gberg type collections 48 for gsdl-3.0 75 49 76 tidy up automatic help text - need to work out type of collection at build 77 time - get rid of the hacky HelpNoDocs, HelpBibDocs and HelpBookDocs config 78 file entries 50 - extend configuration file syntax - tidy up all the format stuff - 51 replace DocumentImages, DocumentTitles and DocumentHeading with 52 DocumentColumn stuff (develop New_Config_Format-branch CVS branch) 79 53 80 fix up collection specific metadata - content negotiation?? 54 - tidy up classifiers, make collection-specific ones more object oriented 55 (probably create classifier base class), add section and compact 56 options instead of using separate classifiers - get classifiers using 57 same option passing code as plugins now use 81 58 82 append to init and error logs instead of overwriting 59 - sort out arrows/paging of browserclasses (for queryaction too!) 60 DocumentBottomArrows?? - want arrows at bottom as well as top of pages 61 for gberg type collections 83 62 84 implement a more robust way of dealing with the way browsers encode form 85 arguments when you don't want them to - check that 86 _decodedcompressedoptions_ has been set up correctly everywhere 63 - tidy up automatic help text - need to work out type of collection at 64 build time - get rid of the hacky HelpNoDocs, HelpBibDocs and 65 HelpBookDocs config file entries 87 66 88 extend configuration file syntax - tidy up all the format stuff 67 - fix up collection specific metadata - content negotiation?? 68 "chapters" = "Kapiteln" 69 "section titles" = "Sektionstiteln" 70 "entire books" = "ganzen Büchern" 89 71 90 replace DocumentImages, DocumentTitles and DocumentHeading with 91 DocumentIcon (values of true, false and formatstring - true may be part of 92 formatstring) 72 - add ability to use a separate formatstring for each index when 73 displaying query results 93 74 94 fix up formattools - bug preventing nested If/Or - check that no 'here' 95 pointers can run past 'end' 75 - make interface languages and encodings easier to add. defining 76 languages and encodings should be done in main.cfg (or collect.cfg for 77 collection specific control). should also define: 78 - which encodings a given language may use -- i.e. the preferences page 79 shouldn't let you select an encoding that doesn't work with the 80 currently selected language 81 - which encoding is the default for a given language 82 - which language is the default. 96 83 97 get numwords (and maybe other stats?) working in collection info - maybe98 set as macros99 84 100 85 tidy things up so that get_collectinfo is only called once per collection 101 (at init time for fastcgi) - it's currently being called all over the place 102 103 get BrowseFilter EndResults option to accept -1 86 (at init time for fastcgi) - it's currently being called all over the 87 place. maybe load collection info into recpt.collectinfo structure. load in 88 as required for each collection so structure would slowly fill up with all 89 collections when using fastcgi 104 90 105 91 add Language metadata to all documents by default?? 106 92 107 fix the way the interface language preference is implemented - should check 108 somewhere that current language matches one of those specified in config 109 file 93 usage page - Matthias Dalmeier 110 94 111 bug when classification is empty. CL number still gets incremented so 112 formatstrings don't match as expected 95 create a usage page from ftp logs (build on ftp_stats.pl script) 113 96 114 usage page 115 116 if document has only one level - e.g. TEXTPlug or HTMLPlug; top level type 117 should be Invisible so that tables of contents aren't displayed 118 119 add browserclasses to statusaction 120 121 add ability to use a separate formatstring for each index when displaying 122 query results 123 124 rebuild fao1 and fao2 with fixed up version of HTMLPlug (no multiple copies 125 of images 126 127 rebuild Davids collections: 128 - whist 129 - musvid 130 - ohist 97 tidy up ohist to remove need for public_html/ohist stuff 131 98 132 99 WebPlug - optimize to run reasonably quickly? - Currently works (I think) … … 135 102 build a small collection 136 103 137 look into possibility of clearing highlighting whenever you go to browse138 etc. i.e. highlighting only works when coming directly from a search139 140 replace mk_all_dir function with perl module - see if speed improvements141 are worthwhile142 143 104 update collections - set up auto-updating where needed 144 105 - csbib … … 146 107 - tcc - mail updating 147 108 - niupepa 148 149 add option to buildcol.pl to force documents to be paged/hierarchical. need 150 to use it for niupepa collection 109 - gberg 151 110 152 111 sort documents between importing and building 153 112 154 fix plugins option to mkcol.pl 113 get polling for new collections to work when using fastcgi 155 114 156 get polling for new collections to work when using fastcgi - should also 157 produce error message when unable to read collect dir (rather than just 158 exiting) 115 ipc and unesco collections have no 'about' text (maybe others too). 159 116 160 what to do with Makefiles for collection specific receptionists (like 161 cstr), use configure? 117 compiler warnings 162 118 163 check out mgs declaration of error() 119 windows cd-rom version 164 120 165 call server version of cd-rom executable 'standard' and other version 166 'backup' 121 - message in browser selection box to say that IE isn't compatible 122 with 'silver' version (instead of simply greying out selection of 123 IE as currently happens). 167 124 168 put download stuff onto ftp 125 - look into Belgium bug ("Netscape was unable to create a network 126 socket connection... " -- NT 4.0 -- no internet connection -- run 127 programs/accessories/communication/internet connection wizard, 128 answer LAN to all questions -- bug appears to be present if you 129 get above error message when attempting to access 127.0.0.1 -- 130 look into testing for this problem from InstallShield, maybe try 131 reading from 127.0.0.1 and see if it times out? 169 132 170 fix up publications page - maybe just use an index.html type thing on rose 133 look into need for _LITTLE_ENDIAN flag in mg 171 134 172 look into server security 135 make all external nzdl.org pages plain html rather than macro pages. remove 136 all related images from distribution 173 137 174 fix up compiler warnings (on titoki and rose) 138 ohists RealAudio server appears to be broken 175 139 176 ipc collection has no 'about' text 140 capability to view actual BibTeX entries of csbib (and maybe other?) 141 collections 177 142 178 sort out bbc collections - find real import stuff - delete multiple copies 179 that are laying around 143 scripts for installing w3mir - a .bat version of davids install.sh files 180 144 181 tidy up cstr - image on left above all 5 buttons - title on right above 182 goto line 145 implement proper unicode sorting within classifiers 183 146 184 play with csbib - appear to only get 5 matches for 'references' query on 185 'the' - some indexes also appear to only work some of the time 147 get file locking to work on non GSDL_USE_IOS_H compilers (VC++ 6.0) 186 148 149 produce an error message when no "built" collections are available 150 (currently only produce an error when no collections at all are available) 151 152 have another go at getting gsdl_system() function to work properly on 153 windows 95 (for end-user collection building) 154 -
branches/New_Config_Format-branch/gsdl/etc/VERSION
r1040 r1279 1 gsdl version: 2.132 cvs tag: gsdl- 2133 build version: 2. 01 gsdl version: x.xx 2 cvs tag: gsdl-x_xx-distribution 3 build version: 2.1 -
branches/New_Config_Format-branch/gsdl/etc/main.cfg
r1035 r1279 1 1 maintainer [email protected] 2 2 3 macrofiles style.dm base.dm query.dm help.dm pref.dm \ 3 about.dm home.dm document.dm status.dm \ 4 authen.dm users.dm html.dm english.dm \ 5 english2.dm chinese.dm maori.dm people.dm \ 6 tech.dm prescrpt.dm mg.dm rw.dm build.dm \ 7 extlink.dm gsdlsoft.dm delhistory.dm german.dm 4 about.dm document.dm status.dm \ 5 home.dm \ 6 authen.dm users.dm html.dm build.dm extlink.dm delhistory.dm \ 7 gsdl.dm gsdlsoft.dm prescrpt.dm mg.dm \ 8 english.dm chinese.dm french.dm german.dm maori.dm \ 9 english2.dm french2.dm arabic.dm spanish.dm 10 8 11 status enabled 9 12 usecookies true … … 12 15 macroprecedence c,v,l 13 16 cgiarg shortname=v longname=version multiplechar=false argdefault=0 \ 14 defaultstatus=weak savedarginfo= can17 defaultstatus=weak savedarginfo=must 15 18 16 19 cgiarg shortname=a argdefault=p -
branches/New_Config_Format-branch/gsdl/lib/cfgread.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.8.2.1 2000/07/12 22:20:52 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.9 2000/07/05 21:49:22 sjboddie 34 Receptionist now caches collection information to avoid making multiple 35 get_collectinfo calls to collection server 36 30 37 Revision 1.8 2000/04/06 19:57:57 cs025 31 38 Correcting a correction - reinstated all lib files due to silly … … 56 63 57 64 #include "cfgread.h" 58 59 void collectioninfo_t::clear () {60 gsdl_gsdlhome.clear();61 gsdl_gdbmhome.clear();62 }63 65 64 66 int write_ini_line (ofstream &fileout, const text_t &key, const text_t value) { -
branches/New_Config_Format-branch/gsdl/lib/cfgread.h
r1076 r1279 41 41 #endif 42 42 43 struct collectioninfo_t {44 void clear ();45 collectioninfo_t () {clear();}46 47 text_t gsdl_gsdlhome;48 text_t gsdl_gdbmhome;49 };50 51 typedef map<text_t, collectioninfo_t, lttext_t> colinfo_tmap;52 53 43 // return 0 on success, -1 on failure 54 44 int write_ini_line (ofstream &fileout, const text_t &key, const text_t value); -
branches/New_Config_Format-branch/gsdl/lib/display.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.19.2.1 2000/07/12 22:20:53 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.21 2000/06/18 22:56:55 sjboddie 34 fixed a bug that I'd introduced earlier when attempting to get things 35 compiling on VC++ 6.0 - the gsdl-2.22 release (and possibly 2.21) was 36 affected and wouldn't have displayed chinese and Arabic characters 37 correctly 38 39 Revision 1.20 2000/05/12 03:09:23 sjboddie 40 minor modifications to get web library compiling under VC++ 6.0 41 30 42 Revision 1.19 2000/04/06 19:57:58 cs025 31 43 Correcting a correction - reinstated all lib files due to silly … … 684 696 // bigendian should be set to 1 685 697 // 0 will be returned when the end of the file has been found 686 unsigned short my_uni_get ( istream &fin, int &line,698 unsigned short my_uni_get (unistream &fin, int &line, 687 699 int &isunicode, int &bigendian) { 688 700 unsigned short c = 0; … … 692 704 // get the next two characters 693 705 unsigned char c1 = 0, c2 = 0; 706 694 707 if (!fin.eof()) fin.get(c1); 695 708 if (!fin.eof()) fin.get(c2); … … 766 779 767 780 // open the file 768 ifstream fin(filenamestr); 781 unistream fin (filenamestr); 782 769 783 if (fin.fail()) return -1; // read failed 770 784 -
branches/New_Config_Format-branch/gsdl/lib/display.h
r1076 r1279 64 64 # include <iostream.h> 65 65 # include <fstream.h> 66 67 #define unistream ifstream 68 66 69 #else 67 70 # include <iostream> 68 71 # include <fstream> 72 73 typedef std::basic_ifstream<unsigned char> unistream; 74 69 75 #endif 70 76 … … 82 88 // cyclic macros (a includes b and b includes a) 83 89 #define MAXRECURSIONDEPTH 30 84 85 90 86 91 // class prototypes -
branches/New_Config_Format-branch/gsdl/lib/fileutil.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.14.2.1 2000/07/12 22:20:54 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.16 2000/05/12 03:09:22 sjboddie 34 minor modifications to get web library compiling under VC++ 6.0 35 36 Revision 1.15 2000/05/04 08:27:28 sjboddie 37 modifications for windows ports of GCC 38 30 39 Revision 1.14 2000/04/06 19:57:59 cs025 31 40 Correcting a correction - reinstated all lib files due to silly … … 118 127 path2.erase (path2.begin(), here); 119 128 120 // return the concatenation of the two strings 121 return path1 + path2; 129 text_t fullpath = path1 + path2; 130 131 // make sure all the right slashes are used 132 here = fullpath.begin(); 133 end = fullpath.end(); 134 while (here != end) { 135 #ifdef __WIN32__ 136 if (*here == '/') *here = '\\'; 137 #else 138 if (*here == '\\') *here = '/'; 139 #endif 140 here ++; 141 } 142 return fullpath; 122 143 } 123 144 … … 139 160 bool file_exists (const text_t &filename) { 140 161 char *cstr = filename.getcstr(); 162 #ifdef GSDL_USE_IOS_H 141 163 ifstream filestream (cstr, ios::in | ios::nocreate); 164 #else 165 ifstream filestream (cstr, ios::in); 166 #endif 142 167 delete cstr; 143 168 … … 155 180 bool file_writable (const text_t &filename) { 156 181 char *cstr = filename.getcstr(); 182 #ifdef GSDL_USE_IOS_H 157 183 ifstream filestream (cstr, ios::out | ios::nocreate); 184 #else 185 ifstream filestream (cstr, ios::out); 186 #endif 158 187 delete cstr; 159 188 … … 168 197 } 169 198 170 #if defined __WIN32__199 #if defined(__WIN32__) && !defined(__GNUC__) 171 200 172 201 #include <windows.h> 173 202 174 BOOLread_dir (const text_t &dirname, text_tarray &filelist) {203 bool read_dir (const text_t &dirname, text_tarray &filelist) { 175 204 176 205 WIN32_FIND_DATA FileData; 177 206 HANDLE hSearch; 178 DWORD dwAttrs;179 207 char *dirpath = dirname.getcstr(); 180 208 strcat (dirpath, "\\*"); 181 182 BOOL finished = false;183 209 184 210 hSearch = FindFirstFile(dirpath, &FileData); -
branches/New_Config_Format-branch/gsdl/lib/gsdlconf.h
r1076 r1279 30 30 #define GSDLCONF_H 31 31 32 #ifndef __WIN32__ 32 #if defined(__WIN32__) 33 #include "../win32cfg.h" 34 #else 33 35 #include "../config.h" 34 36 #endif … … 78 80 #endif 79 81 82 // mktemp 83 84 #if !defined(__WIN32__) || defined(__GNUC__) 85 #include <unistd.h> 86 #define GSDL_MKTEMP(str) mktemp(str) 87 #else 88 #include <io.h> 89 #define GSDL_MKTEMP(str) _mktemp(str) 90 #endif 91 92 80 93 81 94 // file locking 82 95 83 96 #if defined(__WIN32__) 97 98 #if !defined(LK_UNLOCK) && defined(LK_UNLCK) 99 #define LK_UNLOCK LK_UNLCK 100 #endif 101 102 #if defined (GSDL_USE_IOS_H) 84 103 #include <io.h> 85 104 #include <sys/locking.h> 105 106 #if defined(__GNUC__) 107 #define GSDL_GET_FILEDESC(str) str.filedesc() 108 #else 86 109 #define GSDL_GET_FILEDESC(str) str.fd() 110 #endif 111 87 112 #define GSDL_UNLOCK_FILE(fd) _locking(fd, LK_UNLCK, 200) 88 113 #define GSDL_LOCK_FILE(fd) lock_val = _locking(fd, LK_NBLCK, 200) 114 115 #else 116 117 // when using <fstream> (i.e. VC++ 6.0) I can't work out how 118 // to return a filedesc. File locking won't currently work 119 // for those windows compilers requiring GSDL_USE_IOS_H to 120 // not be set -- Stefan. 121 #define GSDL_GET_FILEDESC(str) 1 122 #define GSDL_LOCK_FILE(fd) lock_val = 0 123 #define GSDL_UNLOCK_FILE(fd) 0 124 #endif 89 125 90 126 #else -
branches/New_Config_Format-branch/gsdl/lib/gsdltools.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.3.2.1 2000/07/12 22:20:55 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.5 2000/05/19 04:56:01 sjboddie 34 added gsdl_system function for spawning off new processes under windows 35 36 Revision 1.4 2000/05/04 05:16:23 sjboddie 37 Moved dm_safe from htmlutils to gsdltools. Also made it escape '\' 38 characters to prevent their mysterious disapearance from things like 39 windows filenames when they get passed through the macro expander. 40 30 41 Revision 1.3 2000/04/06 19:58:01 cs025 31 42 Correcting a correction - reinstated all lib files due to silly … … 49 60 } 50 61 62 text_t dm_safe (const text_t &instring) { 51 63 64 text_t outstring; 65 text_t::const_iterator here = instring.begin(); 66 text_t::const_iterator end = instring.end(); 67 while (here != end) { 68 if (*here == '_' || *here == '\\') outstring.push_back('\\'); 69 outstring.push_back(*here); 70 here ++; 71 } 72 return outstring; 73 } 74 75 // gsdl_system spawns a completely separate program (i.e. the calling 76 // program continues and terminates normally). Arguments containing special 77 // characters (e.g. '&') should be quoted with "" 78 79 // on unix systems youcan get the same effext as this function by doing a 80 // system call and putting the spawned process in the background 81 // (e.g. system (funcname options &); 82 83 #if defined (__WIN32__) 84 #include <windows.h> 85 void gsdl_system (char *cmd, ostream &logout) { 86 87 STARTUPINFO ps = {sizeof(STARTUPINFO), NULL, NULL, NULL, 88 0, 0, 0, 0, 0, 0, 89 0, 0, 90 0, 0, NULL, 91 NULL, NULL, NULL}; 92 PROCESS_INFORMATION pi; 93 BOOL res = CreateProcess(NULL, 94 cmd, 95 NULL, 96 NULL, 97 FALSE, 98 DETACHED_PROCESS, 99 NULL, 100 NULL, 101 &ps, 102 &pi); 103 if (!res) { 104 logout << "Failed to start " << cmd << " process, error code " << GetLastError(); 105 } 106 107 CloseHandle(pi.hProcess); 108 CloseHandle(pi.hThread); 109 } 110 111 #endif -
branches/New_Config_Format-branch/gsdl/lib/gsdltools.h
r1076 r1279 38 38 bool littleEndian(); 39 39 40 // escapes '\' and '_' characters with '\' 41 // note that single '\' characters occurring 42 // naturally within text (or filenames!!) will 43 // be removed by the dm macro language 44 text_t dm_safe (const text_t &instring); 45 46 47 // gsdl_system spawns a completely separate program (i.e. the calling 48 // program continues and terminates normally). Arguments containing special 49 // characters (e.g. '&') should be quoted with "" 50 51 // on unix systems youcan get the same effext as this function by doing a 52 // system call and putting the spawned process in the background 53 // (e.g. system (funcname options &); 54 #if defined (__WIN32__) 55 void gsdl_system (char *cmd, ostream &logout); 40 56 #endif 57 58 #endif -
branches/New_Config_Format-branch/gsdl/lib/gsdlunicode.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.12.2.1 2000/07/12 22:20:55 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.14 2000/06/23 05:03:29 nzdl 34 fixed a couple of compiler warnings created by the new encoding stuff 35 36 Revision 1.13 2000/06/23 03:21:38 sjboddie 37 Created converter classes for simple 8 bit encodings that use a 38 simple textual map file. Instances of these classes are used to handle 39 the Windows 1256 (Arabic) encoding. 40 30 41 Revision 1.12 2000/04/06 19:58:02 cs025 31 42 Correcting a correction - reinstated all lib files due to silly … … 70 81 #include <stdio.h> 71 82 83 #if defined(GSDL_USE_OBJECTSPACE) 84 # include <ospace\std\iostream> 85 # include <ospace\std\fstream> 86 #elif defined(GSDL_USE_IOS_H) 87 # include <iostream.h> 88 # include <fstream.h> 89 #else 90 # include <iostream> 91 # include <fstream> 92 #endif 72 93 73 94 … … 551 572 else status = unfinished; 552 573 } 574 575 576 bool simplemapconvert::loadmapfile (bool in) { 577 if (loaded) return true; 578 if (mapfile.empty()) return false; 579 580 char *cfilename = mapfile.getcstr(); 581 #ifdef GSDL_USE_IOS_H 582 ifstream mapfilein (cfilename, ios::in | ios::nocreate); 583 #else 584 ifstream mapfilein (cfilename, ios::in); 585 #endif 586 delete cfilename; 587 if (!mapfilein) return false; 588 589 char cline[2048]; 590 text_t line; 591 592 while (!mapfilein.eof()) { 593 mapfilein.getline (cline, 2048); 594 line.clear(); 595 line.appendcstr (cline); 596 if (line.empty()) continue; 597 // remove comments 598 text_t::iterator end = line.end(); 599 text_t::iterator here = findchar (line.begin(), end, '#'); 600 if (here != end) { 601 line.erase (here, end); 602 if (line.empty()) continue; 603 } 604 605 text_tarray parts; 606 splitchar (line.begin(), line.end(), '\t', parts); 607 608 // do some simple sanity checks 609 if (parts.size() < 2) continue; 610 text_t::iterator begin1 = parts[0].begin(); 611 text_t::iterator begin2 = parts[1].begin(); 612 if (*begin1 != '0' || *(begin1+1) != 'x') continue; 613 if (*begin2 != '0' || *(begin2+1) != 'x') continue; 614 char *from = parts[0].getcstr(); 615 char *to = parts[1].getcstr(); 616 unsigned int f = 0, t = 0; 617 sscanf (from, "%i", &f); 618 sscanf (to, "%i", &t); 619 delete from; 620 delete to; 621 622 if (in) mapping[(unsigned short)f] = (unsigned short)t; 623 else mapping[(unsigned short)t] = (unsigned short)f; 624 } 625 626 loaded = true; 627 return true; 628 } 629 630 unsigned short simplemapconvert::convert (unsigned short c, bool in) { 631 632 if (!loaded) 633 if (!loadmapfile(in)) return absentc; 634 635 return mapping[c]; 636 } 637 638 639 void simplemapinconvertclass::convert (text_t &output, status_t &status) { 640 output.clear(); 641 642 if (start == NULL || len == 0) { 643 status = finished; 644 return; 645 } 646 647 // don't want any funny sign conversions happening 648 unsigned char *here = (unsigned char *)start; 649 while (len > 0) { 650 651 if (*here < 0x80) 652 output.push_back (*here); // append this character 653 else 654 output.push_back (converter.convert(*here, true)); 655 656 ++here; 657 --len; 658 } 659 660 start = (char *)here; // save current position 661 status = finished; 662 } 663 664 665 void simplemapoutconvertclass::convert (char *output, size_t maxlen, 666 size_t &len, status_t &status) { 667 668 if (input == NULL || output == NULL) { 669 status = finished; 670 return; 671 } 672 673 // don't want any funny sign conversions happening 674 unsigned char *uoutput = (unsigned char *)output; 675 text_t::iterator textend = input->end(); 676 len = 0; 677 while ((len < maxlen) && (texthere != textend)) { 678 679 if (*texthere < 0x80) *uoutput = (unsigned char)(*texthere); 680 else *uoutput = converter.convert (*texthere, false); 681 682 ++uoutput; 683 ++len; 684 ++texthere; 685 } 686 687 if (texthere == textend) status = finished; 688 else status = unfinished; 689 } -
branches/New_Config_Format-branch/gsdl/lib/gsdlunicode.h
r1076 r1279 215 215 }; 216 216 217 218 // Simple input and output converter classes for use with 8 bit encodings 219 // using simple textual map files. Map files should contain (at least) two 220 // tab-separated fields. The first field is the mapped value and the second 221 // field is the unicode value. 222 223 struct ltus_t 224 { 225 bool operator()(const unsigned short &t1, const unsigned short &t2) const 226 { return t1 < t2; } 227 }; 228 229 230 class simplemapconvert { 231 public: 232 simplemapconvert () {absentc=0; loaded=false;} 233 unsigned short convert (unsigned short c, bool in); 234 void setmapfile (const text_t &themapfile) {mapfile = themapfile;} 235 236 protected: 237 bool loadmapfile (bool in); 238 239 map <unsigned short, unsigned short, ltus_t> mapping; 240 bool loaded; 241 text_t mapfile; 242 unsigned short absentc; 243 }; 244 245 246 class simplemapinconvertclass : public inconvertclass { 247 public: 248 virtual ~simplemapinconvertclass () {} 249 250 void convert (text_t &output, status_t &status); 251 252 void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);} 253 254 protected: 255 simplemapconvert converter; 256 }; 257 258 class simplemapoutconvertclass : public rzwsoutconvertclass { 259 public: 260 virtual ~simplemapoutconvertclass () {} 261 262 void convert (char *output, size_t maxlen, 263 size_t &len, status_t &status); 264 265 void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);} 266 267 protected: 268 simplemapconvert converter; 269 }; 270 271 272 273 217 274 #endif -
branches/New_Config_Format-branch/gsdl/lib/text_t.cpp
r1076 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.17.2.1 2000/07/12 22:20:56 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.18 2000/04/14 02:50:12 sjboddie 34 added text_t versions of joinchar to work with sets and lists 35 30 36 Revision 1.17 2000/04/06 19:58:03 cs025 31 37 Correcting a correction - reinstated all lib files due to silly … … 434 440 { 435 441 if (!first) outtext.push_back (c); 442 first = false; 443 outtext += *here; 444 here++; 445 } 446 } 447 448 void joinchar (const text_tlist &inlist, text_t c, text_t &outtext) 449 { 450 outtext.clear (); 451 452 text_tlist::const_iterator here = inlist.begin (); 453 text_tlist::const_iterator end = inlist.end (); 454 bool first = true; 455 while (here != end) 456 { 457 if (!first) outtext += c; 458 first = false; 459 outtext += *here; 460 here++; 461 } 462 } 463 464 void joinchar (const text_tset &inlist, text_t c, text_t &outtext) 465 { 466 outtext.clear (); 467 468 text_tset::const_iterator here = inlist.begin (); 469 text_tset::const_iterator end = inlist.end (); 470 bool first = true; 471 while (here != end) 472 { 473 if (!first) outtext += c; 436 474 first = false; 437 475 outtext += *here; -
branches/New_Config_Format-branch/gsdl/lib/text_t.h
r1076 r1279 238 238 void joinchar (const text_tlist &inlist, unsigned short c, text_t &outtext); 239 239 void joinchar (const text_tarray &inlist, unsigned short c, text_t &outtext); 240 void joinchar (const text_tset &inlist, text_t c, text_t &outtext); 241 void joinchar (const text_tlist &inlist, text_t c, text_t &outtext); 240 242 void joinchar (const text_tarray &inlist, text_t c, text_t &outtext); 241 243 -
branches/New_Config_Format-branch/gsdl/lib/win32.mak
r1076 r1279 25 25 ########################################################################### 26 26 27 GSDLHOME = d:\home\dl\gsdl28 STLPATH = d:\home\dl\stl\stlport27 GSDLHOME = c:\gsdl 28 STLPATH = c:\stlport 29 29 30 30 AR = lib -
branches/New_Config_Format-branch/gsdl/macros/about.dm
r876 r1279 9 9 # don't want link to 'about' page 10 10 _imagecollection_ {_If_("_iconcollection_" ne "", 11 <img src="_iconcollection_" border=0>, <br><br><h2>_collectionname_</h2>} 11 <img src="_iconcollection_" border=0>,<br><br><h2>_collectionname_</h2>)} 12 _imagecollection_ [v=1] {<br><br><h2>_collectionname_</h2>} 12 13 13 14 ####################################################################### -
branches/New_Config_Format-branch/gsdl/macros/authen.dm
r876 r1279 11 11 _content_ { 12 12 <br><br><br><br> 13 _messagestatus_14 15 13 <form name="login" method="get" action="_gwcgi_"> 16 14 <input type=hidden name="e" value="_If_(_cgiarger_,_cgiarger_,_decodedcompressedoptions_)"> 17 15 _hiddenargs_ 16 <center><table width=_pagewidth_> 17 <tr><td> 18 _messagestatus_ 19 </td></tr> 18 20 19 <table border=0> 20 <tr><td>_textusername_</td><td><input type="text" name="un" value="_cgiargun_" size=10></td></tr> 21 <tr><td>_textpassword_</td><td><input type="password" name="pw" size=10></td></tr> 22 <tr><td></td><td><input type="submit" value="sign in"></td></tr> 21 <tr><td> 22 <table><td>_textusername_</td> 23 <td><input type="text" name="un" value="_cgiargun_" size=10></td> 24 <td></td></tr> 25 <tr><td>_textpassword_</td> 26 <td><input type="password" name="pw" size=10></td> 27 <td><input type="submit" value="sign in"></td> 28 </tr> 29 </table> 30 <td></tr> 23 31 </table> 24 32 </form> -
branches/New_Config_Format-branch/gsdl/macros/base.dm
r1019 r1279 6 6 7 7 ########## 8 9 # won't need this once old versions of library are no longer used10 _decodedcompressedoptions_ {_compressedoptions_}11 12 8 13 9 _htmlextra_ {} … … 27 23 _imagethispage_ {} 28 24 _iconcollection_ {} 29 _imagecollection_ {_If_("_iconcollection_" ne "",<a href="_httppagex_(about)"><img src="_iconcollection_" border=0></a>)} 25 _collectionname_ {} 26 27 _imagecollection_ {_If_("_iconcollection_" ne "", 28 <a href="_httppageabout_"><img src="_iconcollection_" border=0></a>, 29 _imagecollectionv_} 30 _imagecollection_ [v=1] {_imagecollectionv_} 31 _imagecollectionv_ {_If_(_collectionname_,<br><br><h2><a href="_httppageabout_">_collectionname_</a></h2>)} 30 32 31 33 ####################################################################### … … 50 52 _Datewidth_ {_widthtdatex_} 51 53 _Subjectwidth_ {_widthtsubjx_} 54 _Towidth_ {_widthttox_} 55 _Fromwidth_ {_widthtfromx_} 52 56 _Organizationwidth_ {_widthtorgx_} 53 57 _Howtowidth_ {_widththowx_} … … 75 79 _jselection_ {} 76 80 _nselection_ {} 77 81 #granularity selection for mgpp 82 _gselection_ {} 78 83 ####################################################################### 79 84 # navigation bar images … … 88 93 89 94 # image macros for all the classifications currently supported by 90 # this receptionist. i.e. title, author, subject, series, date, 91 # howto, and organization (and search) 95 # this receptionist. 96 # 97 # image macros for to and from added by gwp on 2000 june 13 92 98 93 99 _imagesearch_ {<a href="_httpquery_" … … 137 143 border="0" alt="_textimageSubject_"></a>} 138 144 _imageSubject_[v=1] {<a href="_httpbrowseSubject_">_textimageSubject_</a><br> 145 } 146 147 _imageTo_ {<a href="_httpbrowseTo_" 148 onMouseover = "img\_on('to')" 149 onMouseout = "img\_off('to')"><img name="to" 150 src="_httpiconttoof_" width="_widthttox_" height="_heightttox_" 151 border="0" alt="_textimageTo_"></a>} 152 _imageTo_[v=1] {<a href="_httpbrowseTo_">_textimageTo_</a><br> 153 } 154 155 _imageFrom_ {<a href="_httpbrowseFrom_" 156 onMouseover = "img\_on('from')" 157 onMouseout = "img\_off('from')"><img name="from" 158 src="_httpicontfromof_" width="_widthtfromx_" height="_heighttfromx_" 159 border="0" alt="_textimageFrom_"></a>} 160 _imageFrom_[v=1] {<a href="_httpbrowseFrom_">_textimageFrom_</a><br> 139 161 } 140 162 … … 283 305 } 284 306 307 _javaTo_ { 308 to\_on = new Image(_widthttox_, _heightttox_); 309 to\_on.src = "_httpiconttoon_"; 310 to\_off = new Image(_widthttox_, _heightttox_); 311 to\_off.src = "_httpiconttoof_"; 312 } 313 314 _javaFrom_ { 315 from\_on = new Image(_widthtfromx_, _heighttfromx_); 316 from\_on.src = "_httpicontfromon_"; 317 from\_off = new Image(_widthtfromx_, _heighttfromx_); 318 from\_off.src = "_httpicontfromof_"; 319 } 320 285 321 _javaHowto_ { 286 322 how\_on = new Image(_widththowx_, _heightthowx_); … … 323 359 ####################################################################### 324 360 325 _mailaddr_ { [email protected]}361 _mailaddr_ {[email protected]} 326 362 327 363 _gsdltop_ {_top} … … 336 372 _httpdocimg_ {_httpcollimg_/_thisOID_} 337 373 338 _httpcollection_ {_httpprefix_/collect/_cgiargc_}374 _httpcollection_ {_httpprefix_/collect/_cgiargc_} 339 375 340 376 _httppagex_ {_gwcgi_?e=_compressedoptions_&a=p&p=_1_} … … 348 384 349 385 _httpgreenstone_ {_httppagex_(gsdl)} 386 _httpdownload_ {http://www.nzdl.org/download} 387 _httppublications_ {_httpdownload_/greenstone/publications} 350 388 351 389 _httpcurrentdocument_ {_gwcgi_?e=_compressedoptions_&cl=_cgiargcl_&d=_cgiargd_} … … 512 550 513 551 _icontabSubjectgreen_ {<img 514 src="_httpicontsubjgr_" height=_heighttsubjx_ width= widthtsubjx_ border=0>}552 src="_httpicontsubjgr_" height=_heighttsubjx_ width=_widthtsubjx_ border=0>} 515 553 _icontabSubjectgreen_[v=1] {_texticontabsubjectgreen_} 554 555 _icontabTogreen_ {<img 556 src="_httpiconttogr_" height=_heightttox_ width=_widthttox_ border=0>} 557 _icontabTogreen_[v=1] {_texticontabtogreen_} 558 559 _icontabFromgreen_ {<img 560 src="_httpicontfromgr_" height=_heighttfromx_ width=_widthtfromx_ border=0>} 561 _icontabFromgreen_[v=1] {_texticontabfromgreen_} 516 562 517 563 _icontabHowtogreen_ {<img … … 528 574 529 575 _icontabOrganizationgreen_ {<img 530 src="_httpicontorggr_" height=_heighttorg gr_ width=_widthtorggr_ border=0>}576 src="_httpicontorggr_" height=_heighttorgx_ width=_widthtorgx_ border=0>} 531 577 _icontabOrganizationgreen_[v=1] {_texticontaborggreen_} 532 578 -
branches/New_Config_Format-branch/gsdl/macros/build.dm
r725 r1279 10 10 <table width=100%> 11 11 <tr> 12 <td><img src=" /gsdl/images/gsdl_height.gif" width=2 height=77></td>12 <td><img src="_httpimg_/spacer.gif" width=2 height=77></td> 13 13 <td>_iconblankbar_ 14 14 <center><h2>_1_</h2></center> … … 44 44 } 45 45 46 _httpiconstop_ {_httpimg_/stop.gif} 46 47 47 48 _iconwizardnext_{<img src="_httpiconmore_" align=absbottom … … 73 74 _bcargfileorurl_ {file} 74 75 _bcarginputdir_ {} 75 _bcargcopydir_ { }76 _bcargcopydir_ {yes} 76 77 _bcargingsdlarea_ {no} 77 78 _bcargacronyms_ {off} … … 287 288 This process is structured as a series of Web pages, overseen by the 288 289 new-collection "wizard." The wizard bar at the bottom of the page 289 shows you the sequence sof pages to be completed. You can return to290 a previous lypage by clicking on the corresponding item in the wizard bar.290 shows you the sequence of pages to be completed. You can return to 291 a previous page by clicking on the corresponding item in the wizard bar. 291 292 </td> 292 293 </tr> … … 454 455 \} 455 456 \} 456 457 457 simulate_get_action("_httpbuild_&bca=newcol"); 458 458 \} … … 654 654 Note, once a collection has been built it 655 655 is not necessary for the source data to remain in the Greenstone 656 collection area -- the collection can be safely access , browsed656 collection area -- the collection can be safely accessed, browsed 657 657 and searched without this. If you wish, however, to make changes 658 to the collection and then rebuilt it then 659 the source data must still be avialable. 658 to the collection and then rebuild, the source data must still be available. 660 659 </font> 661 660 </td> … … 697 696 698 697 var cgi_prefix = "webpage_mkcol.pl"; 699 cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_"); 700 cgi_prefix += "&httpbuild=" + escape("_httpbuild_"); 698 cgi_prefix += "?httpbuild=" + escape("_httpbuild_"); 701 699 cgi_prefix += "&bc1refine=" + escape(bc1refine); 702 700 … … 861 859 \{ 862 860 var cgi_prefix = "webpage_editcol.pl"; 863 cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_"); 864 cgi_prefix += "&httpbuild=" + escape("_httpbuild_"); 861 cgi_prefix += "?httpbuild=" + escape("_httpbuild_"); 865 862 cgi_prefix += "&bc1cfgfile=" + escape(form.bc1cfgfile.value); 866 863 … … 906 903 907 904 <tr> 908 <td>Colle tion to edit:</td>905 <td>Collection to edit:</td> 909 906 <td colspan=3> 910 907 _fullnamemenu_ … … 1027 1024 1028 1025 var cgi_prefix = "_httpbuild_&bca=buildcol"; 1029 cgi_prefix += "&gsdlhome=" + escape("_gsdlhome_");1030 1026 cgi_prefix += "&httpbuild=" + escape("_httpbuild_"); 1031 1027 cgi_prefix += "&wizard=buildframe"; … … 1043 1039 1044 1040 var cgi_prefix = "_httpbuild_&bca=buildcol"; 1045 cgi_prefix += "&gsdlhome=" + escape("_gsdlhome_");1046 1041 cgi_prefix += "&httpbuild=" + escape("_httpbuild_"); 1047 1042 cgi_prefix += "&wizard=buildcol"; … … 1164 1159 1165 1160 1161 # not sure why we need 3 frames here when one appears to always be blank 1162 # - Stefan 1163 #_contentbuildframe_ { 1164 1165 #<frameset rows="*,150,*" border=0> 1166 # <noframes><body bgcolor="#ffffff"> 1167 # <p>You must have a frame enabled browser to view this.</p> 1168 # </body> 1169 # </noframes> 1170 # <frame src="_httpbuild_&bca=buildcol&wizard=buildexec&bc1dirname=_bcargdirname_&bc1tmpname=_bcargtmpname_" name=infoframe> 1171 # <frame src="_httpbuild_&bca=buildstatus&bc1tmpname=_bcargtmpname_" name=execframe> 1172 # <frame src="_httpbuild_&bca=blankpage" name=blankframe> 1173 #</frameset> 1174 #} 1175 1166 1176 _contentbuildframe_ { 1167 1177 1168 <frameset rows=" *,150,*" border=0>1178 <frameset rows="200,150" border=0> 1169 1179 <noframes><body bgcolor="#ffffff"> 1170 1180 <p>You must have a frame enabled browser to view this.</p> … … 1173 1183 <frame src="_httpbuild_&bca=buildcol&wizard=buildexec&bc1dirname=_bcargdirname_&bc1tmpname=_bcargtmpname_" name=infoframe> 1174 1184 <frame src="_httpbuild_&bca=buildstatus&bc1tmpname=_bcargtmpname_" name=execframe> 1175 <frame src="_httpbuild_&bca=blankpage" name=blankframe>1176 1185 </frameset> 1177 1186 } … … 1189 1198 1190 1199 <!-- Hide code from non-js browsers 1191 var timer = 5;1200 var timer = 10; 1192 1201 function restart_count_down() \{ 1193 timer= 5;1202 timer=10; 1194 1203 setTimeout("count_down()",1000); 1195 1204 \} … … 1210 1219 function update_status_page() \{ 1211 1220 var cgi_cmd = "webpage_buildstatus.pl"; 1212 cgi_cmd += "?gsdlhome=" + escape("_gsdlhome_"); 1213 cgi_cmd += "&httpbuild=" + escape("_httpbuild_"); 1221 cgi_cmd += "?httpbuild=" + escape("_httpbuild_"); 1214 1222 cgi_cmd += "&bc1dirname=" + escape("_bcargdirname_"); 1215 1223 cgi_cmd += "&bc1tmpname=" + escape("_bcargtmpname_"); … … 1235 1243 \} 1236 1244 1237 cgi_cmd += "&gsdlhome=" + escape("_gsdlhome_");1238 1245 cgi_cmd += "&httpbuild=" + escape("_httpbuild_"); 1239 1246 cgi_cmd += "&bc1dirname=" + escape("_bcargdirname_"); … … 1358 1365 \{ 1359 1366 var cgi_prefix = "webpage_delcol.pl"; 1360 cgi_prefix += "?gsdlhome=" + escape("_gsdlhome_"); 1361 cgi_prefix += "&httpbuild=" + escape("_httpbuild_"); 1367 cgi_prefix += "?httpbuild=" + escape("_httpbuild_"); 1362 1368 1363 1369 simulate_get_action(cgi_prefix); … … 1394 1400 1395 1401 <tr> 1396 <td>Colle tion to delete:</td>1402 <td>Collection to delete:</td> 1397 1403 <td colspan=3> 1398 1404 _fullnamemenu_ -
branches/New_Config_Format-branch/gsdl/macros/document.dm
r944 r1279 155 155 _iconSubjectpage_ [v=1] {<h2>_texticonhsubj_</h2>} 156 156 157 _iconTopage_ {<img src="_httpiconhto_" width=_widthhto_ 158 height=_heighthto_} 159 _iconTopage_ [v=1] {<h2>_texticonhto_</h2>} 160 161 _iconFrompage_ {<img src="_httpiconhfrom_" width=_widthhfrom_ 162 height=_heighthfrom_} 163 _iconFrompage_ [v=1] {<h2>_texticonhfrom_</h2>} 164 157 165 _iconSeriespage_ {<img src="_httpiconhser_" width=_widthhser_ 158 166 height=_heighthser_} … … 229 237 _tab_ {<td>_icontab_</td>} 230 238 231 _iconcontracttoc_ {<img name="concon" src="_httpiconeconcof_" width=_widtheconcx_ height=_heighteconc of_ alt="_texticoncontracttoc_" border=0>}239 _iconcontracttoc_ {<img name="concon" src="_httpiconeconcof_" width=_widtheconcx_ height=_heighteconcx_ alt="_texticoncontracttoc_" border=0>} 232 240 _iconcontracttoc_[v=1] {_texticoncontracttoc_} 233 241 234 _iconexpandtoc_ {<img name="expcon" src="_httpiconeexpcof_" width=_widtheexp x_ height=_heighteexpcx_ alt="_texticonexpandtoc_" border=0>}242 _iconexpandtoc_ {<img name="expcon" src="_httpiconeexpcof_" width=_widtheexpcx_ height=_heighteexpcx_ alt="_texticonexpandtoc_" border=0>} 235 243 _iconexpandtoc_[v=1] {_texticonexpandtoc_} 236 244 -
branches/New_Config_Format-branch/gsdl/macros/english.dm
r1032 r1279 72 72 _textimageDate_ {Browse by date} 73 73 _textimageSubject_ {Browse by subject category} 74 _textimageTo_ {Browse by To field} 75 _textimageFrom_ {Browse by From field} 74 76 _textimageOrganization_ {Browse by organization} 75 77 _textimageHowto_ {Browse how to categories} … … 85 87 _texticontablistgreen_ {Listing} 86 88 _texticontabsubjectgreen_{Subjects} 89 _texticontabtogreen_{To} 90 _texticontabfromgreen_{From} 87 91 _texticontaborggreen_{Organization} 88 92 _texticontabhowgreen_{How to} … … 116 120 _textmonth12_ {December} 117 121 122 _Document_ {Document} 123 _Section_ {Section} 124 _Paragraph_ {Paragraph} 125 118 126 _magazines_ {Magazines} 119 127 128 _nzdlpagefooter_ {<p>_iconblankbar_ 129 <p><a href="http://www.nzdl.org">New Zealand Digital Library Project</a> 130 <br><a href="http://www.cs.waikato.ac.nz/cs">Department of Computer Science</a>, 131 <a href="http://www.waikato.ac.nz">University of Waikato</a>, 132 New Zealand} 120 133 121 134 #------------------------------------------------------------ … … 174 187 _heighttsubjx_ {17} 175 188 189 ## "to" ## nav_bar_button ## tto ## 190 _httpiconttogr_ {_httpimg_/ttogr.gif} 191 _httpiconttoon_ {_httpimg_/ttoon.gif} 192 _httpiconttoof_ {_httpimg_/ttoof.gif} 193 _widthttox_ {87} 194 _heightttox_ {17} 195 196 ## "from" ## nav_bar_button ## tfrom ## 197 _httpicontfromgr_ {_httpimg_/tfromgr.gif} 198 _httpicontfromon_ {_httpimg_/tfromon.gif} 199 _httpicontfromof_ {_httpimg_/tfromof.gif} 200 _widthtfromx_ {87} 201 _heighttfromx_ {17} 202 176 203 ## "organization" ## nav_bar_button ## torg ## 177 204 _httpicontorggr_{_httpimg_/torggr.gif} … … 269 296 _textCreatorpage_ {_texticonhauth_} 270 297 _textSubjectpage_ {_texticonhsubj_} 298 _textTopage_ {_texticonhto_} 299 _textFrompage_ {_texticonhfrom_} 271 300 _textSeriespage_ {_texticonhser_} 272 301 _textDatepage_ {_texticonhdate_} … … 279 308 _texticonhauth_ {Authors A-Z} 280 309 _texticonhsubj_ {Subjects} 310 _texticonhto_ {To} 311 _texticonhfrom_ {From} 281 312 _texticonhser_ {Series} 282 313 _texticonhdate_ {Dates} … … 330 361 _httpiconhauth_ {_httpimg_/h\_auth.gif} 331 362 _widthhauth_ {200} 363 _heighthauth_ {57} 332 364 333 365 ## "subjects" ## green_title ## h_subj ## … … 335 367 _widthhsubj_ {200} 336 368 _heighthsubj_ {57} 369 370 ## "to" ## green_title ## h_to ## 371 _httpiconhto_ {_httpimg_/h\_to.gif} 372 _widthhto_ {200} 373 _heighthto_ {57} 374 375 ## "from" ## green_title ## h_from ## 376 _httpiconhfrom_ {_httpimg_/h\_from.gif} 377 _widthhfrom_ {200} 378 _heighthfrom_ {57} 337 379 338 380 ## "series" ## green_title ## h_ser ## … … 458 500 _textselect_ {_If_(_cgiargb_,_textadvancedsearch_,_textsimplesearch_)} 459 501 460 _textsimplesearch_ {Search for _If_(_hselection_, _hselection_)_If_(_jselection_,_textjselect_)_If_(_ nselection_, in _nselection_ language)502 _textsimplesearch_ {Search for _If_(_hselection_, _hselection_)_If_(_jselection_,_textjselect_)_If_(_gselection_, at _gselection_ level)_If_(_nselection_, in _nselection_ language) 461 503 which contain _querytypeselection_ of the words} 462 504 463 _textadvancedsearch_ {Search_If_(_hselection_, _hselection_, _defaultindextext_)_If_(_jselection_,_textjselect_)_If_(_ nselection_, in _nselection_ language)505 _textadvancedsearch_ {Search_If_(_hselection_, _hselection_, _defaultindextext_)_If_(_jselection_,_textjselect_)_If_(_gselection_, at _gselection_ level)_If_(_nselection_, in _nselection_ language) 464 506 using _querytypeselection_ query} 465 507 … … 500 542 ## "display" ## hand_made ## 501 543 _httpicondisplay_ {_httpimg_/display.gif} 502 _widthdisplay {60}544 _widthdisplay_ {60} 503 545 _heightdisplay_ {20} 504 546 … … 538 580 _textlangeng_ {English} 539 581 _textlanggerman_ {German} 582 _textlangfrench_ {French} 583 _textlangspanish_ {Spanish} 540 584 _textlangmaori_ {M_amn_ori} 541 585 _textlangchinese_ {Chinese} 586 _textlangarabic_ {Arabic} 542 587 _textgraphical_ {Graphical} 543 588 _texttextual_ {Textual} … … 605 650 _textDateshort_ {access publications by date} 606 651 _textSubjectshort_ {access publications by subject} 652 _textToshort_ {access publications by To field} 653 _textFromshort_ {access publications by From field} 607 654 _textTitleshort_ {access publications by title} 608 655 _textBrowseshort_ {browse publications} … … 642 689 pressing the <i>subjects</i> button. This brings up a list of subjects, 643 690 represented by bookshelves. } 691 692 _textTolong_ { <p>You can <i>access publications by To field</i> by 693 pressing the <i>to</i> button. This brings up a list of addressees. } 694 695 _textFromlong_ { <p>You can <i>access publications by From field</i> by 696 pressing the <i>from</i> button. This brings up a list of senders. } 644 697 645 698 _textSerieslong_ { <p>You can <i>access publications by series</i> by -
branches/New_Config_Format-branch/gsdl/macros/english2.dm
r1034 r1279 23 23 _colnotbuilt_ {Collection not built.} 24 24 25 _textp eople_ {People}25 _textpagetitle_ {Greenstone Digital Library} 26 26 _textfb_ {Feedback} 27 _textpub_ {Publications}28 _texttec_ {Technology}29 _textrw_ {Related Work}30 27 _textinfosheet_ {Info Sheet} 31 _textscreenshots_ {Screen Shots}32 _textnpepainfosheet_ {Niupepa Info Sheet}33 _textpagetitle_ {Greenstone Digital Library}34 _textprojhead_ {The New Zealand Digital Library Project}35 28 29 _textprojhead_ {The Greenstone software and <br>The New Zealand Digital Library Project} 36 30 _textprojinfo_ { 37 <h4>The NZDL system</h4> 38 39 <p> The New Zealand Digital Library system comprises several demonstration 40 collections -- computer science technical reports and bibliographies, 41 literary works, humanitarian and development information, magazines -- and 42 makes them available over the Web through full-text interfaces. Behind the 43 query interface lies a huge collection providing gigabytes of information. 44 We hope you find what you want, or at least something intriguing! 45 46 <h4>The Greenstone software</h4> 47 48 <p> The Greenstone Digital Library software provides a new way of 49 organizing information and making it available over the Internet. A 50 <i>collection</i> of information comprises several (typically several 51 thousand, or even several million) <i>documents</i>, which share a uniform 52 searching and browsing interface. The collections in a library are 53 organized in a different way--though they share a strong family 54 resemblance. Although primarily designed for access over the Web, 55 Greenstone collections can be made available, in precisely the same form, 56 on CD-ROM for standalone PCs. Greenstone is open-source software, 57 available under the terms of the Gnu public license. 58 <p>The following websites are among those currently using Greenstone. <i>Note that these 59 sites are under development.</i> 60 <ul> 61 <li><a href="http://moby.cisti.nrc.ca/~nzdl/cgi-bin/library">CISTI</a> 62 <li><a href="http://gene.rutgers.edu/cgi-bin/library">Rutgers University</a> 63 <li><a href="http://csdl1.mdx.ac.uk/">Middlesex University</a> 64 <li><a href="http://laraine.unidata.ucar.edu/projects/coohl/htdig/cgi-bin/library">Unidata</a> 65 </ul> 66 <h4>The research</h4> 67 68 <p> The goal of our research program is to explore the potential of 69 internet-based digital libraries. Our vision is to develop systems that 70 automatically impose structure on anarchic, uncatalogued, distributed 71 repositories of information, thereby providing information consumers with 72 effective tools to locate what they need and to peruse it conveniently and 73 comfortably. Our research objectives are to 74 75 <ul> 76 <li> develop technology for creating and automatically 77 maintaining collections; 78 <li> monitor usage to study library users' needs; 79 <li> look at novel interfaces that cater to a wide spectrum of users; 80 <li> find ways to abstract layout and bibliographic information 81 from document files; 82 <li> use this information to enhance presentation and for 83 bibliometric research; 84 <li> assess potential subject areas for public-domain collections; 85 <li> survey and critique other digital library projects. 86 </ul> 31 <p> 32 The Greenstone Digital Library software provides a new way of 33 organizing information and making it available over the Internet or on 34 CD-ROM. It is open-source software, available under the terms of the 35 GNU General Public License. 36 <p> 37 A digital library is made up of a set of collections. Each collection of 38 information comprises several (typically several thousand, or even 39 several million) documents, which share a uniform searching and 40 browsing interface. Collections can be organized in many different 41 ways while retaining a strong family resemblance. 42 <p> 43 The 44 <a href="http://www.nzdl.org">New Zealand Digital Library Project</a> 45 is a research programme at The University of Waikato whose aim is to 46 develop the underlying technology for digital libraries and make it 47 available publicly so that others can use it to create their own 48 collections. 49 Greenstone was created to further this objective. 50 Further details are available from 51 <a href="http://www.nzdl.org">http://www.nzdl.org</a> 87 52 } 88 53 89 _textotherinfo_ { 90 <table border=0 cellpadding=5><tr valign=top> 91 <td width=50%> 92 <h4>Global Help Projects vzw</h4> 54 _textpoem_ { 55 <br><h2>Kia papapounamu te moana</h2> 93 56 94 <a href="http://www.globalprojects.org">Global Help Projects</a> is a 95 registered charity responsible for the Humanity Libraries Project that 96 provides universal low-cost information access through co-operation between 97 UN Agencies, universities and NGOs. Global Help Projects collaborate 98 extensively with the NZDL project, and use the Greenstone software. 99 </td><td width=50%> 57 <p>kia hora te marino, 58 <br>kia tere te karohirohi, 59 <br>kia papapounamu te moana 100 60 101 <h4>DigiLib Systems Limited</h4> 102 103 <p><a href="http://www.digilibs.com/">DigiLib Systems Limited</a> is an 104 innovative software company that creates international digital libraries. 105 As a major contributor to the Greenstone Digital Library Software they are 106 able to build, customize, and extend digital libraries to meet exacting 107 needs. Please <a href="mailto:[email protected]">contact</a> them for 108 an obligation free quote. </td></tr></table>} 109 110 _textpoem_ { 111 <br><h2 align=left>Kia papapounamu te moana</h2> 112 113 <p>kia hora te marino, 114 <br>kia tere te karohirohi, 115 <br>kia papapounamu te moana 116 117 <p>may peace and calmness surround you, 118 <br>may you reside in the warmth of a summer's haze, 119 <br>may the ocean of your travels be as smooth as the polished greenstone. 61 <p>may peace and calmness surround you, 62 <br>may you reside in the warmth of a summer's haze, 63 <br>may the ocean of your travels be as smooth as the polished greenstone. 120 64 } 121 65 122 _textgreenstone_ { <p><br> Greenstone is a semi-precious stone that (like 123 this software) is sourced in New Zealand. In traditional Maori society it 124 was the most highly prized and sought after of all substances. It can 125 absorb and hold <i>wairua</i>, which is a spirit or life force, and is 126 endowed with traditional virtues that make it an appropriate emblem for a 66 _textgreenstone_ { 67 <p>Greenstone is a semi-precious stone that (like this software) is sourced in New Zealand. In traditional Maori society it was the most highly prized and sought after of all substances. It can absorb and hold <i>wairua</i>, which is a spirit or life force, and is endowed with traditional virtues that make it an appropriate emblem for a 127 68 public-domain digital library project. Its lustre shows charity; its 128 69 translucence, honesty; its toughness, courage; and the sharp edge it can … … 134 75 symbolizing the leading edge of technology. 135 76 136 <p><a href="mailto:[email protected]">Greenstone Digital Library Software</a>137 <br><a href="http://www.cs.waikato.ac.nz/cs">Computer Science Department</a>,138 <a href="http://www.waikato.ac.nz">University of Waikato</a>, New Zealand139 140 <br>October 1999141 77 } 142 78 … … 155 91 _widthselcolgr_ {537} 156 92 _heightselcolgr_ {17} 157 158 159 160 ###################################################################### 161 # 'people' page 162 package people 163 ###################################################################### 164 165 166 #------------------------------------------------------------ 167 # text macros 168 #------------------------------------------------------------ 169 170 _textpagetitle_ {NZDL: People} 171 172 _textsmallrs_ {Related Staff} 173 174 _textsawnzdl_ {Staff associated with the New Zealand Digital Library 175 project are:} 176 _texttrsaawp_ {These research students and software support people are associated with the project:} 177 178 _textstafftable_ { 179 <table> 180 <tr> 181 <td align=right valign=top width=100><a href="http://www.cs.waikato.ac.nz/~ihw">Ian Witten</a></td> 182 <td _1_ align=left valign=top>Project leader; co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a></td> 183 </tr> 184 <tr> 185 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/mark-d.-apperley-.html">Mark Apperley</a></td> 186 <td _1_ align=left valign=top>User interfaces for readers</td> 187 </tr> 188 <tr> 189 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/david-bainbridge.html">David Bainbridge</a></td> 190 <td _1_ align=left valign=top>Musical and Web-based collections; optical music recognition</td> 191 </tr> 192 <tr> 193 <td align=right valign=top><a 194 href="http://www.cs.waikato.ac.nz/cs/Staff/sally-jo-cunningham.html">Sally Jo Cunningham</a></td> 195 <td _1_ align=left valign=top>Collections and usage studies</td> 196 </tr> 197 <tr> 198 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/cs/Staff/steve-jones.html">Steve Jones</a></td> 199 <td _1_ align=left valign=top>Phrase-based interfaces, collaborative browsing, usage analysis</td> 200 </tr> 201 <tr> 202 <td align=right valign=top><a 203 href="http://www.cs.waikato.ac.nz/cs/Staff/te-taka-keegan.html">Te Taka Keegan 204 </a></td> 205 <td _1_ align=left valign=top>Maori language systems</td> 206 </tr> 207 <td align=right valign=top><a 208 href="http://www.cs.waikato.ac.nz/Staff/malika-mahoui.html">Malika 209 Mahoui</a></td> 210 <td _1_ align=left valign=top>Text mining, Arabic interfaces</td> 211 </tr> 212 </table> 213 } 214 215 _textgstable_ { 216 <table> 217 <tr> 218 <td align=right valign=top>George Buchanan</a></td> 219 <td _1_ align=left valign=top>Systems support</td> 220 </tr> 221 <tr> 222 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~sjboddie">Stefan Boddie</a></td> 223 <td _1_ align=left valign=top>Systems support</td> 224 </tr> 225 <tr> 226 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~rjmcnab">Rodger McNab</a></td> 227 <td _1_ align=left valign=top>Systems support</td> 228 </tr> 229 <tr> 230 <td align=right valign=top>YingYing Wen</a></td> 231 <td _1_ align=left valign=top>Text mining, Chinese libraries</td> 232 </tr> 233 <tr> 234 <td align=right valign=top>Stuart Yeates</a></td> 235 <td _1_ align=left valign=top>Text mining, acronym extraction</td> 236 </tr> 237 </table> 238 } 239 240 _textsmallcont_ {Other Contributors} 241 _texttpcsp_ {These people have contributed strongly to the project:} 242 243 _textconttable_ { 244 <table> 245 <tr> 246 <td align=right valign=top width=100>Mark Abrahams</td> 247 <td _1_ align=left valign=top>Client-side browsing interfaces using Java</td> 248 </tr> 249 <tr> 250 <td align=right valign=top width=100><a href="http://www.cosc.canterbury.ac.nz/~tim">Tim Bell</a></td> 251 <td _1_ align=left valign=top>Co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a></td> 252 </tr> 253 <tr> 254 <td align=right valign=top>Matt Humphrey</td> 255 <td _1_ align=left valign=top>Information visualization in the digital library</td> 256 </tr> 257 <tr> 258 <td align=right valign=top><a 259 href="http://www.cs.waikato.ac.nz/~singlis">Stuart Inglis</a></td> 260 <td _1_ align=left valign=top>Document image analysis and optical character recognition</td> 261 </tr> 262 <tr> 263 <td align=right valign=top>Trent Mankelow</a></td> 264 <td _1_ align=left valign=top>School Journal prototype</td> 265 </tr> 266 <tr> 267 <td align=right valign=top><a href="http://www.cosc.canterbury.ac.nz/~bruce">Bruce McKenzie</a></td> 268 <td _1_ align=left valign=top>Original interface to MG</td> 269 </tr> 270 <tr> 271 <td align=right valign=top><a href="http://www.cs.mu.oz.au/~alistair">Alistair Moffat</a></td> 272 <td _1_ align=left valign=top>Co-author of <a href="http://www.cs.mu.oz.au/mg/"><i>Managing Gigabytes</i></a>, 273 created the MG software</td> 274 </tr> 275 <tr> 276 <td align=right valign=top>Todd Reed</td> 277 <td _1_ align=left valign=top>PostScript to text conversion, user interface, WWW server, index building, FTP</td> 278 </tr> 279 <tr> 280 <td align=right valign=top><a 281 href="http://www.cs.waikato.ac.nz/cs/Staff/don-a.-smith.html">Don Smith</a></td> 282 <td _1_ align=left valign=top>Special needs of libraries for mathematical and theoretical materials</td> 283 </tr> 284 <tr> 285 <td align=right valign=top>Che Tamahori</td> 286 <td _1_ align=left valign=top>Designer of New Zealand Digital Library Web 287 pages</td> 288 </tr> 289 <tr> 290 <td align=right valign=top><a 291 href="http://www.cs.waikato.ac.nz/~wjt">Bill Teahan</a></td> 292 <td _1_ align=left valign=top>Language modeling</td> 293 </tr> 294 <tr> 295 <td align=right valign=top>Mahendra Vallabh</td> 296 <td _1_ align=left valign=top>Original FTP script</td> 297 </tr> 298 <tr> 299 <td align=right valign=top><a 300 href="http://www.cs.waikato.ac.nz/cs/Staff/lloyd-a.-smith.html">Lloyd Smith</a></td> 301 <td _1_ align=left valign=top>Music collections and music retrieval</td> 302 </tr> 303 <tr> 304 <td align=right valign=top>John Venable</td> 305 <td _1_ align=left valign=top>Requirements for digital libraries, and collections for information systems</td> 306 </tr> 307 </table> 308 } 309 310 _textaffiliates_ {There are several affiliates at other universities:} 311 312 _textaffiliatetable_ { 313 <table> 314 <tr> 315 <td align=right valign=top>Elke Duenker</td> 316 <td _1_ align=left valign=top>Cross-cultural issues</td> 317 </tr> 318 <tr> 319 <td align=right valign=top><a href="http://www.cs.waikato.ac.nz/~cgn/ 320 ">Craig Nevill-Manning</a></td> 321 <td _1_ align=left valign=top>PostScript to text conversion, user 322 interface, WWW server, index 323 building, FTP</td> 324 </tr> 325 <tr> 326 <td align=right valign=top>Nina Reeves</td> 327 <td _1_ align=left valign=top>Librarians and library users</td> 328 </tr> 329 <tr> 330 <td align=right valign=top>Yin Leng Theng</td> 331 <td _1_ align=left valign=top>Digital libraries for schools</td> 332 </tr> 333 <tr> 334 <td align=right valign=top>Harold Thimbleby</td> 335 <td _1_ align=left valign=top>User interfaces for digital 336 libraries</td> 337 </tr> 338 </table> 339 } 340 341 342 #------------------------------------------------------------ 343 # icons 344 #------------------------------------------------------------ 345 346 ## "people" ## green_title ## h_people ## 347 _httpiconhpeople_ {_httpimg_/h\_people.gif} 348 _widthhpeople_ {200} 349 _heighthpeople_ {57} 350 351 352 353 ###################################################################### 354 # 'technology' page 355 package technology 356 ###################################################################### 357 358 359 #------------------------------------------------------------ 360 # text macros 361 #------------------------------------------------------------ 362 363 _textpagetitle_ {NZDL: Technology} 364 365 _content_ { 366 _iconblankbar_ 367 <p>There are several freely available technologies underlying the New Zealand 368 Digital Library: 369 <ul> 370 <li><a href="_httppagex_(gsdlsoft)"><i>Greenstone</i></a>, the digital 371 library system that generates each and every page of this website.<p> 372 373 <li><a href="_httppagex_(prescript)"><i>PreScript</i></a>, a system 374 that converts PostScript to plain ASCII or HTML, detects paragraph boundaries, 375 removes hyphenation, and interprets many ligatures.<p> 376 377 <li><a href="_httppagex_(mg)"><i>MG</i></a>, an enhancement of the <a 378 href="http://www.cs.mu.oz.au/mg"><i>Managing Gigabytes</i></a> full-text 379 retrieval system, that provides flexible stemming methods, weighting terms, 380 term frequencies, merged indexes, machine independent indexes, and a port to 381 MSDOS.<p> 382 383 <li><a href="http://www.cs.waikato.ac.nz/sequitur"><i>Sequitur</i></a>, a 384 method for inferring compositional hierarchies from strings by detecting 385 repetition and factoring it out of the string by forming rules in a 386 grammar. The rules can be composed of non-terminals, giving rise to a 387 hierarchy. Sequitur is useful for recognizing lexical structure in strings, 388 and excels at very long sequences.<p> 389 390 <li><a href="http://www.nzdl.org/Kea"><i>Kea</i></a>, a program for 391 automatically extracting keyphrases from the full text of documents. Candidate 392 keyphrases are identified using rudimentary lexical processing, features are 393 computed for each candidate, and machine learning is used to generate a 394 classifier that determines which candidates should be assigned as 395 keyphrases. <p> 396 397 <li><a href="http://www.cs.waikato.ac.nz/~stevej/Research/Phrasier/"><i>Phrasier</i></a>, a 398 tool to support information seeking activities in a digital library. Its novel design 399 reflects the fact that reading, writing, browsing and searching activities are rarely 400 carried out independently of each other. They overlap and interleave in ways which have 401 not been effectively supported by conventional information retrieval interfaces. Consequenly 402 Phrasier blurs the distinction between writing a document and finding material related to it; 403 between reading a document and finding others on the same or similar topics; between keyword 404 searching and subject browsing. <p> 405 406 </ul> 407 408 <br> 409 } 410 411 412 #------------------------------------------------------------ 413 # icons 414 #------------------------------------------------------------ 415 416 ## "technology" ## green_title ## h_tech ## 417 _httpiconhtech_ {_httpimg_/h\_tech.gif} 418 _widthhtech_ {200} 419 _heighthtech_ {57} 420 421 422 423 ###################################################################### 424 # 'status' pages 425 package status 426 ###################################################################### 427 428 429 #------------------------------------------------------------ 430 # text macros 431 #------------------------------------------------------------ 432 433 _textframebrowser_ {You must have a frame enabled browser to view this.} 434 _textusermanage_ {User management} 435 _textlistusers_ {list users} 436 _textaddusers_ {add a new user} 437 438 _textinfo_ {Information} 439 _textgeneral_ {general} 440 _textarguments_ {arguments} 441 _textactions_ {actions} 442 _textprotocols_ {protocols} 443 444 _textcollections_ {Collections} 445 _textnewcoll_ {new collection} 446 _texteditcoll_ {edit collection} 447 _textbuildcoll_ {build collection} 448 _textdeletecoll_ {delete collection} 449 450 _textlogs_ {Logs} 451 _textinitlog_ {init log} 452 _texterrorlog_ {error log} 453 454 _textreturnhome_ {Return to home page} 455 456 _titlewelcome_ { Maintenance and Administration } 457 458 _welcome_ { 459 460 <p> Maintenance and administration services available include: 461 view on-line logs; 462 create, maintain and update collections; 463 and access technical information such as CGI arguments. 464 These services are accessed using the 465 side navigation bar on the lefthand side of the page. 466 } 467 468 469 #------------------------------------------------------------ 470 # icons 471 #------------------------------------------------------------ 472 473 474 475 ###################################################################### 476 # html package 477 package html 478 ###################################################################### 479 480 481 #------------------------------------------------------------ 482 # text macros 483 #------------------------------------------------------------ 484 485 _textframebrowser_ {You must have a frame enabled browser to view this.} 486 487 488 #------------------------------------------------------------ 489 # icons 490 #------------------------------------------------------------ 93 _altselcolgr_ {"Select a collection"} 491 94 492 95 -
branches/New_Config_Format-branch/gsdl/macros/german.dm
r1071 r1279 380 380 381 381 ## "EXPANDIEREN 382 DES TEXTS" ## document_button ## eallt ##382 # DES TEXTS" ## document_button ## eallt ## 383 383 _httpiconealltof_ [l=de] {_httpimg_/de/ealltof.gif} 384 384 _httpiconeallton_ [l=de] {_httpimg_/de/eallton.gif} … … 387 387 388 388 ## "KONTRAKTIONDES 389 INHALTS" ## document_button ## econc ##389 # INHALTS" ## document_button ## econc ## 390 390 _httpiconeconcof_ [l=de] {_httpimg_/de/econcof.gif} 391 391 _httpiconeconcon_ [l=de] {_httpimg_/de/econcon.gif} … … 394 394 395 395 ## "SEPARATES 396 FENSTER" ## document_button ## edtch ##396 # FENSTER" ## document_button ## edtch ## 397 397 _httpiconedtchof_ [l=de] {_httpimg_/de/edtchof.gif} 398 398 _httpiconedtchon_ [l=de] {_httpimg_/de/edtchon.gif} … … 401 401 402 402 ## "EXPANDIEREN 403 DES INHALTS" ## document_button ## eexpc ##403 #DES INHALTS" ## document_button ## eexpc ## 404 404 _httpiconeexpcof_ [l=de] {_httpimg_/de/eexpcof.gif} 405 405 _httpiconeexpcon_ [l=de] {_httpimg_/de/eexpcon.gif} … … 408 408 409 409 ## "KONTRAKTION 410 DES TEXTS" ## document_button ## etsec ##410 # DES TEXTS" ## document_button ## etsec ## 411 411 _httpiconetsecof_ [l=de] {_httpimg_/de/etsecof.gif} 412 412 _httpiconetsecon_ [l=de] {_httpimg_/de/etsecon.gif} … … 421 421 422 422 ## " KEINE 423 HERVORHEBUNG" ## document_button ## enhl ##423 #HERVORHEBUNG" ## document_button ## enhl ## 424 424 _httpiconenhlof_ [l=de] {_httpimg_/de/enhlof.gif} 425 425 _httpiconenhlon_ [l=de] {_httpimg_/de/enhlon.gif} … … 508 508 ## "sichtung" ## hand_made ## 509 509 _httpicondisplay_ [l=de] {_httpimg_/display.gif} 510 _widthdisplay {60}510 _widthdisplay_ [l=de] {60} 511 511 _heightdisplay_ [l=de] {20} 512 512 -
branches/New_Config_Format-branch/gsdl/macros/gsdlsoft.dm
r1049 r1279 25 25 _iconblankbar_ 26 26 27 <h 4>The Greenstone software</h4>27 <h2>The Greenstone software</h2> 28 28 29 <p> The Greenstone Digital Library software provides a new way of 30 organizing information and making it available over the Internet. A 31 <i>collection</i> of information comprises several (typically several 32 thousand, or even several million) <i>documents</i>, which share a uniform 33 searching and browsing interface. The collections in a library are 34 organized in a different way--though they share a strong family 35 resemblance. Although primarily designed for access over the Web, 36 Greenstone collections can be made available, in precisely the same form, 37 on CD-ROM for standalone PCs. Greenstone is open-source software, 38 available under the terms of the Gnu public license. Documentation is 39 available in the form of <i><a 40 href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">The Greenstone 41 Digital Library Software</a></i> manual. 29 <p> 30 This page explains how to download and install the 31 <a href="_httppagex_(gsdl)">Greenstone Digital Library software</a>. 32 <p> 33 Greenstone is open-source software. 34 It is distributed under the terms of the 35 <a href="http://www.gnu.org/copyleft/gpl.html">GNU General Public License</a>. 36 Documentation is available in the form of 37 <i><a href="_gsdl:httpgsdlmanual_">The Greenstone Digital Library Software manual</a></i>. 38 39 <h4>Downloading Greenstone</h4> 40 <p> 41 <a 42 href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13.tar.gz</i></a> 43 <i>(4.6 Mb)</i> contains the latest distribution of Greenstone. 44 <p> 45 <a href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">gsdl_manual.pdf</a></i> (570 Kb) contains the manual in PDF format. 42 46 43 47 <h4>Installing Greenstone</h4> … … 45 49 <p> To install on unix systems: 46 50 47 <ul> <li>Download the <a 48 href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13</i></a> 49 distribution <i>(4.6 Mb)</i>. <li>Extract the gzipped tar archive <i>(tar 50 xvzf gsdl-2.13.tar.gz)</i>. <li>In the resulting gsdl directory type 51 <i>./configure</i>. When the configure script has finished running type 52 <i>make</i>, then <i>make install</i>. <li><i>make install</i> copies the 51 <ul> 52 <li>Download the 53 <a href="http://www.nzdl.org/download/greenstone/gsdl-2.13.tar.gz"><i>gsdl-2.13</i></a> 54 distribution <i>(4.6 Mb)</i>. 55 <li>Extract the gzipped tar archive <i>(tar xvzf gsdl-2.13.tar.gz)</i>. 56 <li>In the resulting gsdl directory type <i>./configure</i>. 57 When the configure script has finished running type <i>make</i>, 58 then <i>make install</i>. 59 <li><i>make install</i> copies the 53 60 compiled executable file into the gsdl/cgi-bin directory by default. To run 54 61 the library as a cgi script move the executable and the gsdlsite.cfg 55 configuration file to your systems cgi-bin directory. <li>The gsdlsite.cfg 62 configuration file to your systems cgi-bin directory. 63 <li>The gsdlsite.cfg 56 64 configuration file must be edited to suit your site. The gsdlhome entry 57 65 will need to be set to point to your gsdl directory. Other fields that may 58 66 need changing are httpprefix (the web path to the gsdl directory) and 59 httpimg (the path to gsdl/images). <li>To build the demonstration 67 httpimg (the path to gsdl/images). 68 <li>To build the demonstration 60 69 collection that comes with the distribution run the builddemo.sh script 61 from within the gsdl directory. <li>For more information on using the 70 from within the gsdl directory. 71 <li>For more information on using the 62 72 Greenstone software, download <i><a 63 73 href="http://www.nzdl.org/download/greenstone/publications/gsdl_manual.pdf">The Greenstone … … 105 115 Please report bugs or installation problems to <a href="mailto:[email protected]">[email protected]</a> 106 116 117 _nzdlpagefooter_ 118 <br>April 2000 107 119 } -
branches/New_Config_Format-branch/gsdl/macros/home.dm
r1049 r1279 1 1 package home 2 3 _httpmusiclibrary_ {http://nzdl2.cs.waikato.ac.nz/cgi-bin/gwmm?c=meldex&a=page&p=coltitle}4 2 5 3 ####################################################################### … … 14 12 _javalinks_ [v=1] {} 15 13 16 17 18 14 ####################################################################### 19 15 # icons 20 16 ####################################################################### 21 17 22 _iconnzdl_ { 23 <img src="_httpiconnzdl_" width=_widthnzdl_ height=_heightnzdl_>} 18 _iconnzdl_ {<img src="_httpiconnzdl_" width=_widthnzdl_ height=_heightnzdl_ alt="The New Zealand Digital Library">} 24 19 _icongbull_ {<img src="_httpicongbull_">} 25 20 _iconpdf_ {<img src="_httpiconpdf_">} 26 _iconselectcollection_ {<img src="_httpiconselcolgr_" width=_widthselcolgr_ height=_heightselcolgr_ >}21 _iconselectcollection_ {<img src="_httpiconselcolgr_" width=_widthselcolgr_ height=_heightselcolgr_ alt=_altselcolgr_>} 27 22 _iconmusiclibrary_ {<img src="_httpicontmusic_" border=1 alt="meldex music library">} 28 23 … … 45 40 _pagetitle_ {_textpagetitle_} 46 41 _imagethispage_ {} 47 _imagecollection_ { <center>_iconnzdl_</center>}42 _imagecollection_ { } 48 43 49 44 _content_ { 50 <center>_iconselectcollection_</center><br> 51 _homeextra_ 52 <center>_iconblankbar_</center> 45 <center> 46 <p>_iconselectcollection_ 47 </center> 48 <p>_homeextra_ 49 50 <center> 51 <p>_iconblankbar_ 52 </center> 53 <p><center><h2>_textprojhead_</h2></center> 54 55 <table border=0 cellpadding=5> 56 57 <tr valign=top> 58 <td> 59 <p>_icongbull_ <a href="mailto:_mailaddr_">_textfb_</a> 60 <p>_icongbull_ <a href="http://www.nzdl.org">NZDL</a> 61 <p>_icongbull_ <a href="_httppagex_(gsdl)">Greenstone</a> 62 <p>_iconpdf_ <a href="_gsdl:httpgsdlmanual_">Manual</a></i> 63 <p>_iconpdf_ <a href="_httppublications_/NZDLtext.pdf">_textinfosheet_</a> 64 <p><a href="_httppagestatus_"><img src="_httpimg_/tabspace.gif" width=60 height=20 border=0></a> 65 </td> 66 <td>_textprojinfo_</td> 67 </tr></table> 68 69 <p>_iconblankbar_ 70 <table> 71 <tr valign=top> 72 <td>_textpoem_</td> 73 <td>_imagegreenstone_</td> 74 </tr></table> 75 <p>_textgreenstone_ 53 76 54 77 <p> 55 <table> 56 <tr><td colspan=2><center><h2>_textprojhead_</h2></center></td></tr> 57 <tr valign=top><td> 58 59 <table border=0 cellpadding=5> 60 <tr valign=top><td rowspan=10 width=32> </td> 61 <tr valign=top><td>_icongbull_</td><td><a href="mailto:_mailaddr_">_textfb_</a></td></tr> 62 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(people)">_textpeople_</a><br></td></tr> 63 <tr valign=top><td>_icongbull_</td><td><a href="http://www.cs.waikato.ac.nz/~nzdl/publications/">_textpub_</a><br></td></tr> 64 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(rw)">_textrw_</a><br></td></tr> 65 <tr valign=top><td>_icongbull_</td><td><a href="_httppagex_(technology)">_texttec_</a><br></td></tr> 66 <tr valign=top><td>_iconpdf_</td><td><a 67 href="http://www.nzdl.org/download/greenstone/publications/NZDLtext.pdf">_textinfosheet_</a></td></tr> 68 <tr valign=top><td>_iconpdf_</td><td><a 69 href="http://www.nzdl.org/download/greenstone/publications/NZDLpictures.pdf">_textscreenshots_</a></td></tr> 70 <tr valign=top><td>_iconpdf_</td><td><a 71 href="http://www.nzdl.org/download/greenstone/publications/Niupepa.pdf">_textnpepainfosheet_</a></td></tr> 72 <tr valign=top><td colspan=2><a href="_httppagestatus_"><img src="_httpimg_/tabspace.gif" width=60 height=20 border=0></a></td></tr> 73 74 </table> 75 76 </td> 77 <td> 78 79 <table border=0 cellpadding=5> 80 <tr><td> 81 _textprojinfo_ 82 </td></tr></table> 83 </td></tr></table> 84 _textotherinfo_ 85 _iconblankbar_ 86 <table><tr valign=top> 87 <td>_textpoem_</td> 88 <td>_imagegreenstone_</td> 89 </tr> 90 <tr><td colspan=2>_textgreenstone_</td> 91 </tr> 92 </table> 78 <p>_iconblankbar_ 79 <p>Greenstone software by <a href="http://www.nzdl.org">The New Zealand Digital Library Project</a>, 80 <br><a href="http://www.cs.waikato.ac.nz/cs">Department of Computer Science</a>, 81 <a href="http://www.waikato.ac.nz">University of Waikato</a>, 82 New Zealand 93 83 } 94 84 95 96 #######################################################################97 # English language text macros98 #######################################################################99 100 # moved to english.dm101 102 103 104 105 106 107 -
branches/New_Config_Format-branch/gsdl/macros/maori.dm
r1057 r1279 374 374 375 375 ###################################################################### 376 # 'prefere ces' page376 # 'preferences' page 377 377 package preferences 378 378 ###################################################################### … … 406 406 407 407 408 ###################################################################### 409 # 'people' page 410 package people 411 ###################################################################### 412 413 #------------------------------------------------------------ 414 # text macros 415 #------------------------------------------------------------ 416 417 418 #------------------------------------------------------------ 419 # icons 420 #------------------------------------------------------------ 421 422 423 ###################################################################### 424 # 'technology' page 425 package technology 426 ###################################################################### 427 428 #------------------------------------------------------------ 429 # text macros 430 #------------------------------------------------------------ 431 432 _textpagetitle_ {NZDL: Technology} 433 434 435 #------------------------------------------------------------ 436 # icons 437 #------------------------------------------------------------ 438 439 440 ###################################################################### 441 # 'status' pages 442 package status 443 ###################################################################### 444 445 #------------------------------------------------------------ 446 # text macros 447 #------------------------------------------------------------ 448 449 450 #------------------------------------------------------------ 451 # icons 452 #------------------------------------------------------------ 453 454 455 ###################################################################### 456 # html package 457 package html 458 ###################################################################### 459 460 #------------------------------------------------------------ 461 # text macros 462 #------------------------------------------------------------ 463 464 465 #------------------------------------------------------------ 466 # icons 467 #------------------------------------------------------------ 468 469 470 ###################################################################### 471 # external link package 472 package extlink 473 ###################################################################### 474 475 #------------------------------------------------------------ 476 # text macros 477 #------------------------------------------------------------ 478 479 480 #------------------------------------------------------------ 481 # icons 482 #------------------------------------------------------------ 483 484 485 ###################################################################### 486 # authentication page 487 package authen 488 ###################################################################### 489 490 #------------------------------------------------------------ 491 # text macros 492 #------------------------------------------------------------ 493 494 495 #------------------------------------------------------------ 496 # icons 497 #------------------------------------------------------------ 498 499 500 408 409 -
branches/New_Config_Format-branch/gsdl/macros/pref.dm
r1032 r1279 253 253 } 254 254 255 _caseoption_ [l=ar] {} 256 255 257 _fcoption_ { 256 258 <input type=checkbox name=fc onClick="updatefc();"_If_("_cgiargfc_" eq "1", checked)> … … 327 329 _enlanguageoption_ {<option value="en"_If_("_cgiargl_",, selected)_If_("_cgiargl_" eq "en", selected)>_textlangeng_} 328 330 _delanguageoption_ {<option value="de"_If_("_cgiargl_" eq "de", selected)>_textlanggerman_} 331 _frlanguageoption_ {<option value="fr"_If_("_cgiargl_" eq "fr", selected)>_textlangfrench_} 332 _eslanguageoption_ {<option value="es"_If_("_cgiargl_" eq "es", selected)>_textlangspanish_} 329 333 _milanguageoption_ {<option value="mi"_If_("_cgiargl_" eq "mi", selected)>_textlangmaori_} 330 334 _zhlanguageoption_ {<option value="zh"_If_("_cgiargl_" eq "zh", selected)>_textlangchinese_} 335 _arlanguageoption_ {<option value="ar"_If_("_cgiargl_" eq "ar", selected)>_textlangarabic_} 331 336 332 337 _encodingoption_ { … … 336 341 <option value="u" _If_("_cgiargw_" eq "u",selected)>UTF-8 337 342 <option value="g" _If_("_cgiargw_" eq "g",selected)>GBK 343 <option value="a" _If_("_cgiargw_" eq "a",selected)>Arabic (windows 1256) 338 344 </select> 339 345 } -
branches/New_Config_Format-branch/gsdl/macros/prescrpt.dm
r1049 r1279 42 42 _content_ { 43 43 _iconblankbar_ 44 <p><i>PreScript</i> offers: 44 <p> 45 <i>PreScript</i> is a utility for extracting text from PostScfript files. 46 PreScript offers: 45 47 46 48 <dl> -
branches/New_Config_Format-branch/gsdl/macros/query.dm
r964 r1279 31 31 # don't want alt text here 32 32 _iconnext_{<img src="_httpiconmore_" width=_widthmore_ height=_heightmore_ border=0 align=top>} 33 _iconnext_ [v=1] {} 33 34 _iconprev_{<img src="_httpiconless_" width=_widthless_ height=_heightless_ border=0 align=top>} 35 _iconprev_ [v=1] {} 34 36 35 37 ####################################################################### -
branches/New_Config_Format-branch/gsdl/macros/status.dm
r931 r1279 22 22 <table width=100%> 23 23 <tr> 24 <td><img src=" /gsdl/images/spacer.gif" width=2 height=77></td>24 <td><img src="_httpimg_/spacer.gif" width=2 height=77></td> 25 25 <td>_iconblankbar_ 26 26 <center><h2>_1_</h2></center> … … 57 57 _select_ { 58 58 <a href="_httppagehome_" target=_top border=0><img src="_httpimg_/gsdl.gif"></a> 59 <p> 60 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=welcome" target=infoframe>_textadminhome_</a> 61 <a href="_httppagehome_" target=\_top>_textreturnhome_</a> 62 59 63 60 64 _If_("_cgiarguma_" ne "\_cgiarguma\_", … … 68 72 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=argumentinfo" target=infoframe>_textarguments_</a><br> 69 73 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=actioninfo" target=infoframe>_textactions_</a><br> 74 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=browserinfo" target=infoframe>_textbrowsers_</a><br> 70 75 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=protocolinfo" target=infoframe>_textprotocols_</a> 71 76 … … 80 85 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=initlog" target=infoframe>_textinitlog_</a><br> 81 86 <a href="_gwcgi_?e=_compressedoptions_&a=status&sp=errorlog" target=infoframe>_texterrorlog_</a> 82 83 <p>84 <a href="_httppagehome_" target=_top>_textreturnhome_</a>85 87 } 86 88 87 89 90 _textframebrowser_ {You must have a frame enabled browser to view this.} 91 _textusermanage_ {User management} 92 _textlistusers_ {list users} 93 _textaddusers_ {add a new user} 94 95 _textinfo_ {Information} 96 _textgeneral_ {general} 97 _textarguments_ {arguments} 98 _textactions_ {actions} 99 _textbrowsers_ {browsers} 100 _textprotocols_ {protocols} 101 102 _textcollections_ {Collection management} 103 _textnewcoll_ {create new collection} 104 _texteditcoll_ {edit collection} 105 _textbuildcoll_ {build collection} 106 _textdeletecoll_ {delete collection} 107 108 _textlogs_ {Logs} 109 _textinitlog_ {init log} 110 _texterrorlog_ {error log} 111 112 _textadminhome_ {admin home} 113 _textreturnhome_ {Greenstone home} 114 115 _titlewelcome_ { Maintenance and Administration } 116 117 _welcome_ { 118 119 <center> 120 <table width=_pagewidth_><tr><td> 121 Maintenance and administration services available include: 122 <ul> 123 <li>view on-line logs 124 <li>create, maintain and update collections 125 <li>access technical information such as CGI arguments 126 </ul> 127 These services are accessed using the side navigation bar on the 128 lefthand side of the page. 129 <p> 130 _iconblankbar_ 131 </td></tr> 132 <tr><th align=left><br>Collection Status</th></tr> 133 <tr><td> 134 <font color=gray> 135 Collections will only appear as "running" if their build.cfg 136 files exist, are readable, contain a valid builddate field (i.e. > 0), 137 and are in the collection's index directory (i.e. NOT the building 138 directory). 139 <p> 140 click <i>abbrev.</i> for information on a collection 141 <br> 142 click <i>collection</i> to view a collection 143 </font> 144 </td></tr> 145 </table> 146 </center> 147 } -
branches/New_Config_Format-branch/gsdl/macros/users.dm
r876 r1279 87 87 </font></td></tr> 88 88 <tr><td>comment</td><td colspan=2><input type="text" name="umc" value="_users:usersargc_" size=50></td></tr> 89 <tr></td><td><td><input type="submit" name=beu value="submit"></td></tr> 89 <tr><td></td><td colspan=2><input type="submit" name=beu value="submit"> 90 <input type="submit" name=uma value="cancel"></td></tr> 90 91 </table> 91 92 </form> -
branches/New_Config_Format-branch/gsdl/packages/mg/lib/WIN32.MAK
r1000 r1279 25 25 ########################################################################### 26 26 27 GSDLHOME = d:\home\dl\gsdl27 GSDLHOME = c:\gsdl 28 28 29 29 AR = lib … … 32 32 33 33 DEFS = -DQUIET -DHAVE_CONFIG_H -D__WIN32__ -D_LITTLE_ENDIAN 34 INCLUDES = -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME)\packages\mg 34 INCLUDES = -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME)\packages\mg \ 35 -I$(GSDLHOME) 35 36 36 37 COMPILE = $(CC) -c $(DEFS) $(INCLUDES) -
branches/New_Config_Format-branch/gsdl/packages/mg/src/text/WIN32.MAK
r1000 r1279 25 25 ########################################################################### 26 26 27 GSDLHOME = d:\home\dl\gsdl27 GSDLHOME = c:\gsdl 28 28 29 29 CC = cl … … 32 32 -D__WIN32__ -D_LITTLE_ENDIAN 33 33 INCLUDES = -I$(GSDLHOME)\packages\mg\src\text -I$(GSDLHOME)\packages\mg \ 34 -I$(GSDLHOME)\packages\mg\lib 34 -I$(GSDLHOME)\packages\mg\lib -I$(GSDLHOME) 35 35 LDFLAGS = 36 36 -
branches/New_Config_Format-branch/gsdl/packages/mg/src/text/bool_parser.c
r531 r1279 1 #ifndef lint 2 static char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93"; 3 #endif 4 #define YYBYACC 1 5 #define YYMAJOR 1 6 #define YY MINOR 97 #define yyclearin (yychar=(-1)) 8 #define yyerrok (yyerrflag=0)9 #define YYRECOVERING (yyerrflag!=0) 10 # define YYPREFIX "yy"11 #line 25 "bool_parser.y" 1 2 /* A Bison parser, made from bool_parser.y 3 by GNU Bison version 1.27 4 */ 5 6 #define YYBISON 1 /* Identify Bison output. */ 7 8 #define TERM 257 9 10 #line 24 "bool_parser.y" 11 12 12 13 13 #include "sysfuncs.h" … … 48 48 static u_long invf_ptr; 49 49 static u_long invf_len; 50 #line 65 "bool_parser.y" 50 51 #line 66 "bool_parser.y" 51 52 typedef union { 52 53 char *text; 53 54 bool_tree_node *node; 54 55 } YYSTYPE; 55 #line 55 "y.tab.c" 56 #define TERM 257 57 #define YYERRCODE 256 58 short yylhs[] = { -1, 59 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 60 4, 4, 61 }; 62 short yylen[] = { 2, 63 1, 1, 3, 1, 1, 1, 2, 3, 2, 1, 64 3, 1, 65 }; 66 short yydefred[] = { 0, 67 2, 0, 4, 5, 0, 0, 6, 10, 0, 0, 68 0, 7, 0, 9, 0, 3, 8, 0, 69 }; 70 short yydgoto[] = { 6, 71 7, 8, 9, 10, 72 }; 73 short yysindex[] = { -32, 74 0, -32, 0, 0, -32, 0, 0, 0, -33, -118, 75 -37, 0, -32, 0, -32, 0, 0, -33, 76 }; 77 short yyrindex[] = { 0, 78 0, 0, 0, 0, 0, 0, 0, 0, 2, 12, 79 0, 0, 0, 0, 0, 0, 0, 3, 80 }; 81 short yygindex[] = { 0, 82 0, 6, -2, 14, 83 }; 84 #define YYTABLESIZE 225 85 short yytable[] = { 5, 86 5, 12, 11, 16, 13, 15, 2, 2, 3, 3, 87 12, 1, 18, 0, 14, 11, 0, 0, 17, 0, 88 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90 0, 0, 12, 11, 0, 0, 0, 0, 0, 0, 91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 93 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 95 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98 0, 0, 0, 0, 0, 12, 11, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 1, 1, 109 }; 110 short yycheck[] = { 33, 111 33, 0, 0, 41, 38, 124, 40, 40, 42, 42, 112 5, 0, 15, -1, 9, 2, -1, -1, 13, -1, 113 -1, -1, -1, 18, -1, -1, -1, -1, -1, -1, 114 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 115 -1, -1, 41, 41, -1, -1, -1, -1, -1, -1, 116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 117 -1, 95, 95, -1, -1, -1, -1, -1, -1, -1, 118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 119 -1, -1, -1, -1, -1, -1, 124, -1, -1, -1, 120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 123 -1, -1, -1, -1, -1, 124, 124, -1, -1, -1, 124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 125 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 126 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 127 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 128 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 129 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 130 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 131 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 132 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 133 -1, -1, -1, 257, 257, 134 }; 135 #define YYFINAL 6 136 #ifndef YYDEBUG 137 #define YYDEBUG 0 138 #endif 139 #define YYMAXTOKEN 257 140 #if YYDEBUG 141 char *yyname[] = { 142 "end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 143 "'!'",0,0,0,0,"'&'",0,"'('","')'","'*'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 144 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'_'",0,0,0,0,0, 145 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0,0,0,0,0,0,0,0,0,0,0,0,0,0, 146 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 147 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 148 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 149 "TERM", 150 }; 151 char *yyrule[] = { 152 "$accept : query", 153 "query : or", 154 "term : TERM", 155 "term : '(' or ')'", 156 "term : '*'", 157 "term : '_'", 158 "not : term", 159 "not : '!' not", 160 "and : and '&' not", 161 "and : and not", 162 "and : not", 163 "or : or '|' and", 164 "or : and", 165 }; 166 #endif 167 #ifdef YYSTACKSIZE 56 #include <stdio.h> 57 58 #ifndef __cplusplus 59 #ifndef __STDC__ 60 #define const 61 #endif 62 #endif 63 64 65 66 #define YYFINAL 20 67 #define YYFLAG -32768 68 #define YYNTBASE 11 69 70 #define YYTRANSLATE(x) ((unsigned)(x) <= 257 ? yytranslate[x] : 16) 71 72 static const char yytranslate[] = { 0, 73 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 74 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 75 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 76 2, 2, 8, 2, 2, 2, 2, 9, 2, 4, 77 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 78 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 81 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 82 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 83 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 84 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 85 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 86 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 98 2, 2, 2, 2, 2, 1, 3 99 }; 100 101 #if YYDEBUG != 0 102 static const short yyprhs[] = { 0, 103 0, 2, 4, 8, 10, 12, 14, 17, 21, 24, 104 26, 30 105 }; 106 107 static const short yyrhs[] = { 15, 108 0, 3, 0, 4, 15, 5, 0, 6, 0, 7, 109 0, 12, 0, 8, 13, 0, 14, 9, 13, 0, 110 14, 13, 0, 13, 0, 15, 10, 14, 0, 14, 111 0 112 }; 113 114 #endif 115 116 #if YYDEBUG != 0 117 static const short yyrline[] = { 0, 118 76, 80, 81, 82, 83, 86, 87, 90, 91, 92, 119 95, 96 120 }; 121 #endif 122 123 124 #if YYDEBUG != 0 || defined (YYERROR_VERBOSE) 125 126 static const char * const yytname[] = { "$","error","$undefined.","TERM","'('", 127 "')'","'*'","'_'","'!'","'&'","'|'","query","term","not","and","or", NULL 128 }; 129 #endif 130 131 static const short yyr1[] = { 0, 132 11, 12, 12, 12, 12, 13, 13, 14, 14, 14, 133 15, 15 134 }; 135 136 static const short yyr2[] = { 0, 137 1, 1, 3, 1, 1, 1, 2, 3, 2, 1, 138 3, 1 139 }; 140 141 static const short yydefact[] = { 0, 142 2, 0, 4, 5, 0, 6, 10, 12, 1, 0, 143 7, 0, 9, 0, 3, 8, 11, 0, 0, 0 144 }; 145 146 static const short yydefgoto[] = { 18, 147 6, 7, 8, 9 148 }; 149 150 static const short yypact[] = { 10, 151 -32768, 10,-32768,-32768, 10,-32768,-32768, 2, -9, 14, 152 -32768, 10,-32768, 10,-32768,-32768, 2, 4, 15,-32768 153 }; 154 155 static const short yypgoto[] = {-32768, 156 -32768, -5, -12, 18 157 }; 158 159 160 #define YYLAST 24 161 162 163 static const short yytable[] = { 11, 164 14, 17, 13, 19, 1, 2, 16, 3, 4, 5, 165 12, 13, 1, 2, 20, 3, 4, 5, 15, 10, 166 0, 0, 0, 14 167 }; 168 169 static const short yycheck[] = { 5, 170 10, 14, 8, 0, 3, 4, 12, 6, 7, 8, 171 9, 17, 3, 4, 0, 6, 7, 8, 5, 2, 172 -1, -1, -1, 10 173 }; 174 /* -*-C-*- Note some compilers choke on comments on `#line' lines. */ 175 #line 3 "/usr/share/bison.simple" 176 /* This file comes from bison-1.27. */ 177 178 /* Skeleton output parser for bison, 179 Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc. 180 181 This program is free software; you can redistribute it and/or modify 182 it under the terms of the GNU General Public License as published by 183 the Free Software Foundation; either version 2, or (at your option) 184 any later version. 185 186 This program is distributed in the hope that it will be useful, 187 but WITHOUT ANY WARRANTY; without even the implied warranty of 188 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 189 GNU General Public License for more details. 190 191 You should have received a copy of the GNU General Public License 192 along with this program; if not, write to the Free Software 193 Foundation, Inc., 59 Temple Place - Suite 330, 194 Boston, MA 02111-1307, USA. */ 195 196 /* As a special exception, when this file is copied by Bison into a 197 Bison output file, you may use that output file without restriction. 198 This special exception was added by the Free Software Foundation 199 in version 1.24 of Bison. */ 200 201 /* This is the parser code that is written into each bison parser 202 when the %semantic_parser declaration is not specified in the grammar. 203 It was written by Richard Stallman by simplifying the hairy parser 204 used when %semantic_parser is specified. */ 205 206 #ifndef YYSTACK_USE_ALLOCA 207 #ifdef alloca 208 #define YYSTACK_USE_ALLOCA 209 #else /* alloca not defined */ 210 #ifdef __GNUC__ 211 #define YYSTACK_USE_ALLOCA 212 #define alloca __builtin_alloca 213 #else /* not GNU C. */ 214 #if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) || (defined (__sun) && defined (__i386)) 215 #define YYSTACK_USE_ALLOCA 216 #include <alloca.h> 217 #else /* not sparc */ 218 /* We think this test detects Watcom and Microsoft C. */ 219 /* This used to test MSDOS, but that is a bad idea 220 since that symbol is in the user namespace. */ 221 #if (defined (_MSDOS) || defined (_MSDOS_)) && !defined (__TURBOC__) 222 #if 0 /* No need for malloc.h, which pollutes the namespace; 223 instead, just don't use alloca. */ 224 #include <malloc.h> 225 #endif 226 #else /* not MSDOS, or __TURBOC__ */ 227 #if defined(_AIX) 228 /* I don't know what this was needed for, but it pollutes the namespace. 229 So I turned it off. rms, 2 May 1997. */ 230 /* #include <malloc.h> */ 231 #pragma alloca 232 #define YYSTACK_USE_ALLOCA 233 #else /* not MSDOS, or __TURBOC__, or _AIX */ 234 #if 0 235 #ifdef __hpux /* [email protected] says this works for HPUX 9.05 and up, 236 and on HPUX 10. Eventually we can turn this on. */ 237 #define YYSTACK_USE_ALLOCA 238 #define alloca __builtin_alloca 239 #endif /* __hpux */ 240 #endif 241 #endif /* not _AIX */ 242 #endif /* not MSDOS, or __TURBOC__ */ 243 #endif /* not sparc */ 244 #endif /* not GNU C */ 245 #endif /* alloca not defined */ 246 #endif /* YYSTACK_USE_ALLOCA not defined */ 247 248 #ifdef YYSTACK_USE_ALLOCA 249 #define YYSTACK_ALLOC alloca 250 #else 251 #define YYSTACK_ALLOC malloc 252 #endif 253 254 /* Note: there must be only one dollar sign in this file. 255 It is replaced by the list of actions, each action 256 as one case of the switch. */ 257 258 #define yyerrok (yyerrstatus = 0) 259 #define yyclearin (yychar = YYEMPTY) 260 #define YYEMPTY -2 261 #define YYEOF 0 262 #define YYACCEPT goto yyacceptlab 263 #define YYABORT goto yyabortlab 264 #define YYERROR goto yyerrlab1 265 /* Like YYERROR except do call yyerror. 266 This remains here temporarily to ease the 267 transition to the new meaning of YYERROR, for GCC. 268 Once GCC version 2 has supplanted version 1, this can go. */ 269 #define YYFAIL goto yyerrlab 270 #define YYRECOVERING() (!!yyerrstatus) 271 #define YYBACKUP(token, value) \ 272 do \ 273 if (yychar == YYEMPTY && yylen == 1) \ 274 { yychar = (token), yylval = (value); \ 275 yychar1 = YYTRANSLATE (yychar); \ 276 YYPOPSTACK; \ 277 goto yybackup; \ 278 } \ 279 else \ 280 { yyerror ("syntax error: cannot back up"); YYERROR; } \ 281 while (0) 282 283 #define YYTERROR 1 284 #define YYERRCODE 256 285 286 #ifndef YYPURE 287 #define YYLEX yylex() 288 #endif 289 290 #ifdef YYPURE 291 #ifdef YYLSP_NEEDED 292 #ifdef YYLEX_PARAM 293 #define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM) 294 #else 295 #define YYLEX yylex(&yylval, &yylloc) 296 #endif 297 #else /* not YYLSP_NEEDED */ 298 #ifdef YYLEX_PARAM 299 #define YYLEX yylex(&yylval, YYLEX_PARAM) 300 #else 301 #define YYLEX yylex(&yylval) 302 #endif 303 #endif /* not YYLSP_NEEDED */ 304 #endif 305 306 /* If nonreentrant, generate the variables here */ 307 308 #ifndef YYPURE 309 310 int yychar; /* the lookahead symbol */ 311 YYSTYPE yylval; /* the semantic value of the */ 312 /* lookahead symbol */ 313 314 #ifdef YYLSP_NEEDED 315 YYLTYPE yylloc; /* location data for the lookahead */ 316 /* symbol */ 317 #endif 318 319 int yynerrs; /* number of parse errors so far */ 320 #endif /* not YYPURE */ 321 322 #if YYDEBUG != 0 323 int yydebug; /* nonzero means print parse trace */ 324 /* Since this is uninitialized, it does not stop multiple parsers 325 from coexisting. */ 326 #endif 327 328 /* YYINITDEPTH indicates the initial size of the parser's stacks */ 329 330 #ifndef YYINITDEPTH 331 #define YYINITDEPTH 200 332 #endif 333 334 /* YYMAXDEPTH is the maximum size the stacks can grow to 335 (effective only if the built-in stack extension method is used). */ 336 337 #if YYMAXDEPTH == 0 168 338 #undef YYMAXDEPTH 169 #define YYMAXDEPTH YYSTACKSIZE 339 #endif 340 341 #ifndef YYMAXDEPTH 342 #define YYMAXDEPTH 10000 343 #endif 344 345 346 /* Define __yy_memcpy. Note that the size argument 347 should be passed with type unsigned int, because that is what the non-GCC 348 definitions require. With GCC, __builtin_memcpy takes an arg 349 of type size_t, but it can handle unsigned int. */ 350 351 #if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ 352 #define __yy_memcpy(TO,FROM,COUNT) __builtin_memcpy(TO,FROM,COUNT) 353 #else /* not GNU C or C++ */ 354 #ifndef __cplusplus 355 356 /* This is the most reliable way to avoid incompatibilities 357 in available built-in functions on various systems. */ 358 static void 359 __yy_memcpy (to, from, count) 360 char *to; 361 char *from; 362 unsigned int count; 363 { 364 register char *f = from; 365 register char *t = to; 366 register int i = count; 367 368 while (i-- > 0) 369 *t++ = *f++; 370 } 371 372 #else /* __cplusplus */ 373 374 /* This is the most reliable way to avoid incompatibilities 375 in available built-in functions on various systems. */ 376 static void 377 __yy_memcpy (char *to, char *from, unsigned int count) 378 { 379 register char *t = to; 380 register char *f = from; 381 register int i = count; 382 383 while (i-- > 0) 384 *t++ = *f++; 385 } 386 387 #endif 388 #endif 389 390 391 #line 216 "/usr/share/bison.simple" 392 393 /* The user can define YYPARSE_PARAM as the name of an argument to be passed 394 into yyparse. The argument should have type void *. 395 It should actually point to an object. 396 Grammar actions can access the variable by casting it 397 to the proper pointer type. */ 398 399 #ifdef YYPARSE_PARAM 400 #ifdef __cplusplus 401 #define YYPARSE_PARAM_ARG void *YYPARSE_PARAM 402 #define YYPARSE_PARAM_DECL 403 #else /* not __cplusplus */ 404 #define YYPARSE_PARAM_ARG YYPARSE_PARAM 405 #define YYPARSE_PARAM_DECL void *YYPARSE_PARAM; 406 #endif /* not __cplusplus */ 407 #else /* not YYPARSE_PARAM */ 408 #define YYPARSE_PARAM_ARG 409 #define YYPARSE_PARAM_DECL 410 #endif /* not YYPARSE_PARAM */ 411 412 /* Prevent warning if -Wstrict-prototypes. */ 413 #ifdef __GNUC__ 414 #ifdef YYPARSE_PARAM 415 int yyparse (void *); 170 416 #else 171 #ifdef YYMAXDEPTH 172 #define YYSTACKSIZE YYMAXDEPTH 417 int yyparse (void); 418 #endif 419 #endif 420 421 int 422 yyparse(YYPARSE_PARAM_ARG) 423 YYPARSE_PARAM_DECL 424 { 425 register int yystate; 426 register int yyn; 427 register short *yyssp; 428 register YYSTYPE *yyvsp; 429 int yyerrstatus; /* number of tokens to shift before error messages enabled */ 430 int yychar1 = 0; /* lookahead token as an internal (translated) token number */ 431 432 short yyssa[YYINITDEPTH]; /* the state stack */ 433 YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ 434 435 short *yyss = yyssa; /* refer to the stacks thru separate pointers */ 436 YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ 437 438 #ifdef YYLSP_NEEDED 439 YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ 440 YYLTYPE *yyls = yylsa; 441 YYLTYPE *yylsp; 442 443 #define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) 173 444 #else 174 #define YYSTACKSIZE 500 175 #define YYMAXDEPTH 500 176 #endif 177 #endif 178 int yydebug; 179 int yynerrs; 180 int yyerrflag; 181 int yychar; 182 short *yyssp; 183 YYSTYPE *yyvsp; 184 YYSTYPE yyval; 185 YYSTYPE yylval; 186 short yyss[YYSTACKSIZE]; 187 YYSTYPE yyvs[YYSTACKSIZE]; 188 #define yystacksize YYSTACKSIZE 445 #define YYPOPSTACK (yyvsp--, yyssp--) 446 #endif 447 448 int yystacksize = YYINITDEPTH; 449 int yyfree_stacks = 0; 450 451 #ifdef YYPURE 452 int yychar; 453 YYSTYPE yylval; 454 int yynerrs; 455 #ifdef YYLSP_NEEDED 456 YYLTYPE yylloc; 457 #endif 458 #endif 459 460 YYSTYPE yyval; /* the variable used to return */ 461 /* semantic values from the action */ 462 /* routines */ 463 464 int yylen; 465 466 #if YYDEBUG != 0 467 if (yydebug) 468 fprintf(stderr, "Starting parse\n"); 469 #endif 470 471 yystate = 0; 472 yyerrstatus = 0; 473 yynerrs = 0; 474 yychar = YYEMPTY; /* Cause a token to be read. */ 475 476 /* Initialize stack pointers. 477 Waste one element of value and location stack 478 so that they stay on the same level as the state stack. 479 The wasted elements are never initialized. */ 480 481 yyssp = yyss - 1; 482 yyvsp = yyvs; 483 #ifdef YYLSP_NEEDED 484 yylsp = yyls; 485 #endif 486 487 /* Push a new state, which is found in yystate . */ 488 /* In all cases, when you get here, the value and location stacks 489 have just been pushed. so pushing a state here evens the stacks. */ 490 yynewstate: 491 492 *++yyssp = yystate; 493 494 if (yyssp >= yyss + yystacksize - 1) 495 { 496 /* Give user a chance to reallocate the stack */ 497 /* Use copies of these so that the &'s don't force the real ones into memory. */ 498 YYSTYPE *yyvs1 = yyvs; 499 short *yyss1 = yyss; 500 #ifdef YYLSP_NEEDED 501 YYLTYPE *yyls1 = yyls; 502 #endif 503 504 /* Get the current used size of the three stacks, in elements. */ 505 int size = yyssp - yyss + 1; 506 507 #ifdef yyoverflow 508 /* Each stack pointer address is followed by the size of 509 the data in use in that stack, in bytes. */ 510 #ifdef YYLSP_NEEDED 511 /* This used to be a conditional around just the two extra args, 512 but that might be undefined if yyoverflow is a macro. */ 513 yyoverflow("parser stack overflow", 514 &yyss1, size * sizeof (*yyssp), 515 &yyvs1, size * sizeof (*yyvsp), 516 &yyls1, size * sizeof (*yylsp), 517 &yystacksize); 518 #else 519 yyoverflow("parser stack overflow", 520 &yyss1, size * sizeof (*yyssp), 521 &yyvs1, size * sizeof (*yyvsp), 522 &yystacksize); 523 #endif 524 525 yyss = yyss1; yyvs = yyvs1; 526 #ifdef YYLSP_NEEDED 527 yyls = yyls1; 528 #endif 529 #else /* no yyoverflow */ 530 /* Extend the stack our own way. */ 531 if (yystacksize >= YYMAXDEPTH) 532 { 533 yyerror("parser stack overflow"); 534 if (yyfree_stacks) 535 { 536 free (yyss); 537 free (yyvs); 538 #ifdef YYLSP_NEEDED 539 free (yyls); 540 #endif 541 } 542 return 2; 543 } 544 yystacksize *= 2; 545 if (yystacksize > YYMAXDEPTH) 546 yystacksize = YYMAXDEPTH; 547 #ifndef YYSTACK_USE_ALLOCA 548 yyfree_stacks = 1; 549 #endif 550 yyss = (short *) YYSTACK_ALLOC (yystacksize * sizeof (*yyssp)); 551 __yy_memcpy ((char *)yyss, (char *)yyss1, 552 size * (unsigned int) sizeof (*yyssp)); 553 yyvs = (YYSTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp)); 554 __yy_memcpy ((char *)yyvs, (char *)yyvs1, 555 size * (unsigned int) sizeof (*yyvsp)); 556 #ifdef YYLSP_NEEDED 557 yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp)); 558 __yy_memcpy ((char *)yyls, (char *)yyls1, 559 size * (unsigned int) sizeof (*yylsp)); 560 #endif 561 #endif /* no yyoverflow */ 562 563 yyssp = yyss + size - 1; 564 yyvsp = yyvs + size - 1; 565 #ifdef YYLSP_NEEDED 566 yylsp = yyls + size - 1; 567 #endif 568 569 #if YYDEBUG != 0 570 if (yydebug) 571 fprintf(stderr, "Stack size increased to %d\n", yystacksize); 572 #endif 573 574 if (yyssp >= yyss + yystacksize - 1) 575 YYABORT; 576 } 577 578 #if YYDEBUG != 0 579 if (yydebug) 580 fprintf(stderr, "Entering state %d\n", yystate); 581 #endif 582 583 goto yybackup; 584 yybackup: 585 586 /* Do appropriate processing given the current state. */ 587 /* Read a lookahead token if we need one and don't already have one. */ 588 /* yyresume: */ 589 590 /* First try to decide what to do without reference to lookahead token. */ 591 592 yyn = yypact[yystate]; 593 if (yyn == YYFLAG) 594 goto yydefault; 595 596 /* Not known => get a lookahead token if don't already have one. */ 597 598 /* yychar is either YYEMPTY or YYEOF 599 or a valid token in external form. */ 600 601 if (yychar == YYEMPTY) 602 { 603 #if YYDEBUG != 0 604 if (yydebug) 605 fprintf(stderr, "Reading a token: "); 606 #endif 607 yychar = YYLEX; 608 } 609 610 /* Convert token to internal form (in yychar1) for indexing tables with */ 611 612 if (yychar <= 0) /* This means end of input. */ 613 { 614 yychar1 = 0; 615 yychar = YYEOF; /* Don't call YYLEX any more */ 616 617 #if YYDEBUG != 0 618 if (yydebug) 619 fprintf(stderr, "Now at end of input.\n"); 620 #endif 621 } 622 else 623 { 624 yychar1 = YYTRANSLATE(yychar); 625 626 #if YYDEBUG != 0 627 if (yydebug) 628 { 629 fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); 630 /* Give the individual parser a way to print the precise meaning 631 of a token, for further debugging info. */ 632 #ifdef YYPRINT 633 YYPRINT (stderr, yychar, yylval); 634 #endif 635 fprintf (stderr, ")\n"); 636 } 637 #endif 638 } 639 640 yyn += yychar1; 641 if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) 642 goto yydefault; 643 644 yyn = yytable[yyn]; 645 646 /* yyn is what to do for this token type in this state. 647 Negative => reduce, -yyn is rule number. 648 Positive => shift, yyn is new state. 649 New state is final state => don't bother to shift, 650 just return success. 651 0, or most negative number => error. */ 652 653 if (yyn < 0) 654 { 655 if (yyn == YYFLAG) 656 goto yyerrlab; 657 yyn = -yyn; 658 goto yyreduce; 659 } 660 else if (yyn == 0) 661 goto yyerrlab; 662 663 if (yyn == YYFINAL) 664 YYACCEPT; 665 666 /* Shift the lookahead token. */ 667 668 #if YYDEBUG != 0 669 if (yydebug) 670 fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]); 671 #endif 672 673 /* Discard the token being shifted unless it is eof. */ 674 if (yychar != YYEOF) 675 yychar = YYEMPTY; 676 677 *++yyvsp = yylval; 678 #ifdef YYLSP_NEEDED 679 *++yylsp = yylloc; 680 #endif 681 682 /* count tokens shifted since error; after three, turn off error status. */ 683 if (yyerrstatus) yyerrstatus--; 684 685 yystate = yyn; 686 goto yynewstate; 687 688 /* Do the default action for the current state. */ 689 yydefault: 690 691 yyn = yydefact[yystate]; 692 if (yyn == 0) 693 goto yyerrlab; 694 695 /* Do a reduction. yyn is the number of a rule to reduce with. */ 696 yyreduce: 697 yylen = yyr2[yyn]; 698 if (yylen > 0) 699 yyval = yyvsp[1-yylen]; /* implement default value of the action */ 700 701 #if YYDEBUG != 0 702 if (yydebug) 703 { 704 int i; 705 706 fprintf (stderr, "Reducing via rule %d (line %d), ", 707 yyn, yyrline[yyn]); 708 709 /* Print the symbols being reduced, and their result. */ 710 for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) 711 fprintf (stderr, "%s ", yytname[yyrhs[i]]); 712 fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]); 713 } 714 #endif 715 716 717 switch (yyn) { 718 719 case 1: 720 #line 76 "bool_parser.y" 721 { tree_base = yyvsp[0].node;; 722 break;} 723 case 2: 724 #line 80 "bool_parser.y" 725 { yyval.node = CreateBoolTermNode(term_list, yyvsp[0].text, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); ; 726 break;} 727 case 3: 728 #line 81 "bool_parser.y" 729 { yyval.node = yyvsp[-1].node; ; 730 break;} 731 case 4: 732 #line 82 "bool_parser.y" 733 { yyval.node = CreateBoolTreeNode(N_all, NULL, NULL); ; 734 break;} 735 case 5: 736 #line 83 "bool_parser.y" 737 { yyval.node = CreateBoolTreeNode(N_none, NULL, NULL); ; 738 break;} 739 case 7: 740 #line 87 "bool_parser.y" 741 { yyval.node = CreateBoolTreeNode(N_not, yyvsp[0].node, NULL); ; 742 break;} 743 case 8: 744 #line 90 "bool_parser.y" 745 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-2].node, yyvsp[0].node); ; 746 break;} 747 case 9: 748 #line 91 "bool_parser.y" 749 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-1].node, yyvsp[0].node); ; 750 break;} 751 case 11: 752 #line 95 "bool_parser.y" 753 { yyval.node = CreateBoolTreeNode(N_or, yyvsp[-2].node, yyvsp[0].node); ; 754 break;} 755 } 756 /* the action file gets copied in in place of this dollarsign */ 757 #line 542 "/usr/share/bison.simple" 758 759 760 yyvsp -= yylen; 761 yyssp -= yylen; 762 #ifdef YYLSP_NEEDED 763 yylsp -= yylen; 764 #endif 765 766 #if YYDEBUG != 0 767 if (yydebug) 768 { 769 short *ssp1 = yyss - 1; 770 fprintf (stderr, "state stack now"); 771 while (ssp1 != yyssp) 772 fprintf (stderr, " %d", *++ssp1); 773 fprintf (stderr, "\n"); 774 } 775 #endif 776 777 *++yyvsp = yyval; 778 779 #ifdef YYLSP_NEEDED 780 yylsp++; 781 if (yylen == 0) 782 { 783 yylsp->first_line = yylloc.first_line; 784 yylsp->first_column = yylloc.first_column; 785 yylsp->last_line = (yylsp-1)->last_line; 786 yylsp->last_column = (yylsp-1)->last_column; 787 yylsp->text = 0; 788 } 789 else 790 { 791 yylsp->last_line = (yylsp+yylen-1)->last_line; 792 yylsp->last_column = (yylsp+yylen-1)->last_column; 793 } 794 #endif 795 796 /* Now "shift" the result of the reduction. 797 Determine what state that goes to, 798 based on the state we popped back to 799 and the rule number reduced by. */ 800 801 yyn = yyr1[yyn]; 802 803 yystate = yypgoto[yyn - YYNTBASE] + *yyssp; 804 if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) 805 yystate = yytable[yystate]; 806 else 807 yystate = yydefgoto[yyn - YYNTBASE]; 808 809 goto yynewstate; 810 811 yyerrlab: /* here on detecting error */ 812 813 if (! yyerrstatus) 814 /* If not already recovering from an error, report this error. */ 815 { 816 ++yynerrs; 817 818 #ifdef YYERROR_VERBOSE 819 yyn = yypact[yystate]; 820 821 if (yyn > YYFLAG && yyn < YYLAST) 822 { 823 int size = 0; 824 char *msg; 825 int x, count; 826 827 count = 0; 828 /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ 829 for (x = (yyn < 0 ? -yyn : 0); 830 x < (sizeof(yytname) / sizeof(char *)); x++) 831 if (yycheck[x + yyn] == x) 832 size += strlen(yytname[x]) + 15, count++; 833 msg = (char *) malloc(size + 15); 834 if (msg != 0) 835 { 836 strcpy(msg, "parse error"); 837 838 if (count < 5) 839 { 840 count = 0; 841 for (x = (yyn < 0 ? -yyn : 0); 842 x < (sizeof(yytname) / sizeof(char *)); x++) 843 if (yycheck[x + yyn] == x) 844 { 845 strcat(msg, count == 0 ? ", expecting `" : " or `"); 846 strcat(msg, yytname[x]); 847 strcat(msg, "'"); 848 count++; 849 } 850 } 851 yyerror(msg); 852 free(msg); 853 } 854 else 855 yyerror ("parse error; also virtual memory exceeded"); 856 } 857 else 858 #endif /* YYERROR_VERBOSE */ 859 yyerror("parse error"); 860 } 861 862 goto yyerrlab1; 863 yyerrlab1: /* here on error raised explicitly by an action */ 864 865 if (yyerrstatus == 3) 866 { 867 /* if just tried and failed to reuse lookahead token after an error, discard it. */ 868 869 /* return failure if at end of input */ 870 if (yychar == YYEOF) 871 YYABORT; 872 873 #if YYDEBUG != 0 874 if (yydebug) 875 fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); 876 #endif 877 878 yychar = YYEMPTY; 879 } 880 881 /* Else will try to reuse lookahead token 882 after shifting the error token. */ 883 884 yyerrstatus = 3; /* Each real token shifted decrements this */ 885 886 goto yyerrhandle; 887 888 yyerrdefault: /* current state does not do anything special for the error token. */ 889 890 #if 0 891 /* This is wrong; only states that explicitly want error tokens 892 should shift them. */ 893 yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ 894 if (yyn) goto yydefault; 895 #endif 896 897 yyerrpop: /* pop the current state because it cannot handle the error token */ 898 899 if (yyssp == yyss) YYABORT; 900 yyvsp--; 901 yystate = *--yyssp; 902 #ifdef YYLSP_NEEDED 903 yylsp--; 904 #endif 905 906 #if YYDEBUG != 0 907 if (yydebug) 908 { 909 short *ssp1 = yyss - 1; 910 fprintf (stderr, "Error: state stack now"); 911 while (ssp1 != yyssp) 912 fprintf (stderr, " %d", *++ssp1); 913 fprintf (stderr, "\n"); 914 } 915 #endif 916 917 yyerrhandle: 918 919 yyn = yypact[yystate]; 920 if (yyn == YYFLAG) 921 goto yyerrdefault; 922 923 yyn += YYTERROR; 924 if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) 925 goto yyerrdefault; 926 927 yyn = yytable[yyn]; 928 if (yyn < 0) 929 { 930 if (yyn == YYFLAG) 931 goto yyerrpop; 932 yyn = -yyn; 933 goto yyreduce; 934 } 935 else if (yyn == 0) 936 goto yyerrpop; 937 938 if (yyn == YYFINAL) 939 YYACCEPT; 940 941 #if YYDEBUG != 0 942 if (yydebug) 943 fprintf(stderr, "Shifting error token, "); 944 #endif 945 946 *++yyvsp = yylval; 947 #ifdef YYLSP_NEEDED 948 *++yylsp = yylloc; 949 #endif 950 951 yystate = yyn; 952 goto yynewstate; 953 954 yyacceptlab: 955 /* YYACCEPT comes here. */ 956 if (yyfree_stacks) 957 { 958 free (yyss); 959 free (yyvs); 960 #ifdef YYLSP_NEEDED 961 free (yyls); 962 #endif 963 } 964 return 0; 965 966 yyabortlab: 967 /* YYABORT comes here. */ 968 if (yyfree_stacks) 969 { 970 free (yyss); 971 free (yyvs); 972 #ifdef YYLSP_NEEDED 973 free (yyls); 974 #endif 975 } 976 return 1; 977 } 189 978 #line 99 "bool_parser.y" 979 190 980 191 981 /* Bison on one mips machine defined "const" to be nothing but … … 381 1171 bool_tree_node * 382 1172 ParseBool(char *query_line, int query_len, 383 TermList **the_term_list, int the_stemmer_num, int the_stem_method, 384 int *res, 1173 TermList **the_term_list, int the_stemmer_num, int the_stem_method, int *res, 385 1174 stemmed_dict * the_sd, int is_indexed, /* [RPAP - Jan 97: Stem Index Change] */ 386 1175 QueryTermList **the_query_term_list) /* [RPAP - Feb 97: Term Frequency] */ … … 407 1196 408 1197 409 #line 407 "y.tab.c"410 #define YYABORT goto yyabort411 #define YYREJECT goto yyabort412 #define YYACCEPT goto yyaccept413 #define YYERROR goto yyerrlab414 int415 yyparse()416 {417 register int yym, yyn, yystate;418 #if YYDEBUG419 register char *yys;420 extern char *getenv();421 422 if (yys = getenv("YYDEBUG"))423 {424 yyn = *yys;425 if (yyn >= '0' && yyn <= '9')426 yydebug = yyn - '0';427 }428 #endif429 430 yynerrs = 0;431 yyerrflag = 0;432 yychar = (-1);433 434 yyssp = yyss;435 yyvsp = yyvs;436 *yyssp = yystate = 0;437 438 yyloop:439 if (yyn = yydefred[yystate]) goto yyreduce;440 if (yychar < 0)441 {442 if ((yychar = yylex()) < 0) yychar = 0;443 #if YYDEBUG444 if (yydebug)445 {446 yys = 0;447 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];448 if (!yys) yys = "illegal-symbol";449 printf("%sdebug: state %d, reading %d (%s)\n",450 YYPREFIX, yystate, yychar, yys);451 }452 #endif453 }454 if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 &&455 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)456 {457 #if YYDEBUG458 if (yydebug)459 printf("%sdebug: state %d, shifting to state %d\n",460 YYPREFIX, yystate, yytable[yyn]);461 #endif462 if (yyssp >= yyss + yystacksize - 1)463 {464 goto yyoverflow;465 }466 *++yyssp = yystate = yytable[yyn];467 *++yyvsp = yylval;468 yychar = (-1);469 if (yyerrflag > 0) --yyerrflag;470 goto yyloop;471 }472 if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 &&473 yyn <= YYTABLESIZE && yycheck[yyn] == yychar)474 {475 yyn = yytable[yyn];476 goto yyreduce;477 }478 if (yyerrflag) goto yyinrecovery;479 #ifdef lint480 goto yynewerror;481 #endif482 yynewerror:483 yyerror("syntax error");484 #ifdef lint485 goto yyerrlab;486 #endif487 yyerrlab:488 ++yynerrs;489 yyinrecovery:490 if (yyerrflag < 3)491 {492 yyerrflag = 3;493 for (;;)494 {495 if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 &&496 yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE)497 {498 #if YYDEBUG499 if (yydebug)500 printf("%sdebug: state %d, error recovery shifting\501 to state %d\n", YYPREFIX, *yyssp, yytable[yyn]);502 #endif503 if (yyssp >= yyss + yystacksize - 1)504 {505 goto yyoverflow;506 }507 *++yyssp = yystate = yytable[yyn];508 *++yyvsp = yylval;509 goto yyloop;510 }511 else512 {513 #if YYDEBUG514 if (yydebug)515 printf("%sdebug: error recovery discarding state %d\n",516 YYPREFIX, *yyssp);517 #endif518 if (yyssp <= yyss) goto yyabort;519 --yyssp;520 --yyvsp;521 }522 }523 }524 else525 {526 if (yychar == 0) goto yyabort;527 #if YYDEBUG528 if (yydebug)529 {530 yys = 0;531 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];532 if (!yys) yys = "illegal-symbol";533 printf("%sdebug: state %d, error recovery discards token %d (%s)\n",534 YYPREFIX, yystate, yychar, yys);535 }536 #endif537 yychar = (-1);538 goto yyloop;539 }540 yyreduce:541 #if YYDEBUG542 if (yydebug)543 printf("%sdebug: state %d, reducing by rule %d (%s)\n",544 YYPREFIX, yystate, yyn, yyrule[yyn]);545 #endif546 yym = yylen[yyn];547 yyval = yyvsp[1-yym];548 switch (yyn)549 {550 case 1:551 #line 75 "bool_parser.y"552 { tree_base = yyvsp[0].node;}553 break;554 case 2:555 #line 79 "bool_parser.y"556 { yyval.node = CreateBoolTermNode(term_list, yyvsp[0].text, 1, word_num, count, doc_count, invf_ptr, invf_len, stemmer_num); }557 break;558 case 3:559 #line 80 "bool_parser.y"560 { yyval.node = yyvsp[-1].node; }561 break;562 case 4:563 #line 81 "bool_parser.y"564 { yyval.node = CreateBoolTreeNode(N_all, NULL, NULL); }565 break;566 case 5:567 #line 82 "bool_parser.y"568 { yyval.node = CreateBoolTreeNode(N_none, NULL, NULL); }569 break;570 case 7:571 #line 86 "bool_parser.y"572 { yyval.node = CreateBoolTreeNode(N_not, yyvsp[0].node, NULL); }573 break;574 case 8:575 #line 89 "bool_parser.y"576 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-2].node, yyvsp[0].node); }577 break;578 case 9:579 #line 90 "bool_parser.y"580 { yyval.node = CreateBoolTreeNode(N_and, yyvsp[-1].node, yyvsp[0].node); }581 break;582 case 11:583 #line 94 "bool_parser.y"584 { yyval.node = CreateBoolTreeNode(N_or, yyvsp[-2].node, yyvsp[0].node); }585 break;586 #line 584 "y.tab.c"587 }588 yyssp -= yym;589 yystate = *yyssp;590 yyvsp -= yym;591 yym = yylhs[yyn];592 if (yystate == 0 && yym == 0)593 {594 #if YYDEBUG595 if (yydebug)596 printf("%sdebug: after reduction, shifting from state 0 to\597 state %d\n", YYPREFIX, YYFINAL);598 #endif599 yystate = YYFINAL;600 *++yyssp = YYFINAL;601 *++yyvsp = yyval;602 if (yychar < 0)603 {604 if ((yychar = yylex()) < 0) yychar = 0;605 #if YYDEBUG606 if (yydebug)607 {608 yys = 0;609 if (yychar <= YYMAXTOKEN) yys = yyname[yychar];610 if (!yys) yys = "illegal-symbol";611 printf("%sdebug: state %d, reading %d (%s)\n",612 YYPREFIX, YYFINAL, yychar, yys);613 }614 #endif615 }616 if (yychar == 0) goto yyaccept;617 goto yyloop;618 }619 if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 &&620 yyn <= YYTABLESIZE && yycheck[yyn] == yystate)621 yystate = yytable[yyn];622 else623 yystate = yydgoto[yym];624 #if YYDEBUG625 if (yydebug)626 printf("%sdebug: after reduction, shifting from state %d \627 to state %d\n", YYPREFIX, *yyssp, yystate);628 #endif629 if (yyssp >= yyss + yystacksize - 1)630 {631 goto yyoverflow;632 }633 *++yyssp = yystate;634 *++yyvsp = yyval;635 goto yyloop;636 yyoverflow:637 yyerror("yacc stack overflow");638 yyabort:639 return (1);640 yyaccept:641 return (0);642 }643 -
branches/New_Config_Format-branch/gsdl/packages/mg/sysfuncs.h
r821 r1279 194 194 195 195 /* On MSDOS, there are missing things from <sys/stat.h>. */ 196 #if def __MSDOS__196 #if defined(__MSDOS__) 197 197 #define S_ISUID 0 198 198 #define S_ISGID 0 199 199 #define S_ISVTX 0 200 #endif 201 202 #if defined(__GNUC__) && defined(__WIN32__) 203 #include <limits.h> 200 204 #endif 201 205 -
branches/New_Config_Format-branch/gsdl/perllib/classify/AZSectionList.pm
r741 r1279 30 30 # instead of just top level metadata 31 31 32 # options are:33 # metadata=Metadata34 35 32 # the only change is to the classify() subroutine which 36 33 # must now iterate through each section, adding each … … 39 36 package AZSectionList; 40 37 38 use AZList; 41 39 use sorttools; 42 40 43 sub new { 44 my ($class, @options) = @_; 45 46 my ($metaname); 47 foreach $option (@options) { 48 if ($option =~ /^metadata=(.*)$/i) { 49 $metaname = $1; 50 } 51 } 52 53 if (!defined $metaname) { 54 die "AZSectionList used with no metadata name to classify by\n"; 55 } 56 57 return bless { 58 'list'=>{}, 59 'metaname' => $metaname 60 }, $class; 61 } 62 63 sub init { 64 my $self = shift (@_); 65 66 $self->{'list'} = {}; 41 sub BEGIN { 42 @ISA = ('AZList'); 67 43 } 68 44 … … 103 79 } 104 80 105 sub get_classify_info {106 my $self = shift (@_);107 108 my @classlist = sort {$self->{'list'}->{$a} cmp $self->{'list'}->{$b};}109 keys %{$self->{'list'}};110 111 return $self->splitlist (\@classlist);112 }113 114 sub get_entry {115 my $self = shift (@_);116 my ($title, $childtype, $thistype) = @_;117 118 # organise into classification structure119 my %classifyinfo = ('childtype'=>$childtype,120 'Title'=>$title,121 'contains'=>[]);122 $classifyinfo{'thistype'} = $thistype123 if defined $thistype && $thistype =~ /\w/;124 125 return \%classifyinfo;126 }127 128 # splitlist takes an ordered list of classifications (@$classlistref) and splits it129 # up into alphabetical sub-sections.130 sub splitlist {131 my $self = shift (@_);132 my ($classlistref) = @_;133 my $classhash = {};134 135 # top level136 my $childtype = "HList";137 if (scalar (@$classlistref) <= 39) {$childtype = "VList";}138 my $classifyinfo = $self->get_entry ($self->{'metaname'}, $childtype, "Invisible");139 140 # don't need to do any splitting if there are less than 39 (max + min -1) classifications141 if ((scalar @$classlistref) <= 39) {142 foreach $subOID (@$classlistref) {143 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID});144 }145 return $classifyinfo;146 }147 148 # first split up the list into separate A-Z and 0-9 classifications149 foreach $classification (@$classlistref) {150 my $title = $self->{'list'}->{$classification};151 $title =~ s/^(.).*$/$1/;152 $title =~ tr/[a-z]/[A-Z]/;153 if ($title =~ /^[0-9]$/) {$title = '0-9';}154 elsif ($title !~ /^[A-Z]$/) {155 print STDERR "AZSectionList: WARNING $classification has badly " .156 "formatted title ($title)\n";157 }158 $classhash->{$title} = [] unless defined $classhash->{$title};159 push (@{$classhash->{$title}}, $classification);160 }161 $classhash = $self->compactlist ($classhash);162 163 my @tmparr = ();164 foreach $subsection (sort keys (%$classhash)) {165 push (@tmparr, $subsection);166 }167 168 # if there's a 0-9 section it will have been sorted to the beginning169 # but we want it at the end170 if ($tmparr[0] eq '0-9') {171 shift @tmparr;172 push (@tmparr, '0-9');173 }174 175 foreach $subclass (@tmparr) {176 my $tempclassify = $self->get_entry($subclass, "VList");177 foreach $subsubOID (@{$classhash->{$subclass}}) {178 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID});179 }180 push (@{$classifyinfo->{'contains'}}, $tempclassify);181 }182 183 return $classifyinfo;184 }185 186 sub compactlist {187 my $self = shift (@_);188 my ($classhashref) = @_;189 my $compactedhash = {};190 my @currentOIDs = ();191 my $currentfirstletter = "";192 my $currentlastletter = "";193 my $lastkey = "";194 195 # minimum and maximum documents to be displayed per page.196 # the actual maximum will be max + (min-1).197 # the smallest sub-section is a single letter at present198 # so in this case there may be many times max documents199 # displayed on a page.200 my $min = 10;201 my $max = 30;202 203 foreach $subsection (sort keys %$classhashref) {204 if ($subsection eq '0-9') {205 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}};206 next;207 }208 $currentfirstletter = $subsection if $currentfirstletter eq "";209 if ((scalar (@currentOIDs) < $min) ||210 ((scalar (@currentOIDs) + scalar (@{$classhashref->{$subsection}})) <= $max)) {211 push (@currentOIDs, @{$classhashref->{$subsection}});212 $currentlastletter = $subsection;213 } else {214 215 if ($currentfirstletter eq $currentlastletter) {216 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;217 $lastkey = $currentfirstletter;218 } else {219 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;220 $lastkey = "$currentfirstletter-$currentlastletter";221 }222 if (scalar (@{$classhashref->{$subsection}}) >= $max) {223 $compactedhash->{$subsection} = $classhashref->{$subsection};224 @currentOIDs = ();225 $currentfirstletter = "";226 $lastkey = $subsection;227 } else {228 @currentOIDs = @{$classhashref->{$subsection}};229 $currentfirstletter = $subsection;230 $currentlastletter = $subsection;231 }232 }233 }234 235 # add final OIDs to last sub-classification if there aren't many otherwise236 # add final sub-classification237 if (scalar (@currentOIDs) < $min) {238 my ($newkey) = $lastkey =~ /^(.)/;239 @currentOIDs = (@{$compactedhash->{$lastkey}}, @currentOIDs);240 delete $compactedhash->{$lastkey};241 @{$compactedhash->{"$newkey-$currentlastletter"}} = @currentOIDs;242 } else {243 if ($currentfirstletter eq $currentlastletter) {244 @{$compactedhash->{$currentfirstletter}} = @currentOIDs;245 } else {246 @{$compactedhash->{"$currentfirstletter-$currentlastletter"}} = @currentOIDs;247 }248 }249 250 return $compactedhash;251 }252 81 253 82 1; -
branches/New_Config_Format-branch/gsdl/perllib/classify/List.pm
r677 r1279 144 144 sub get_classify_info { 145 145 my $self = shift (@_); 146 my ($no_thistype) = @_; 147 $no_thistype = 0 unless defined $no_thistype; 146 148 147 149 my @list = (); … … 156 158 157 159 # organise into classification structure 158 my %classifyinfo = ('thistype'=>'Invisible', 159 'childtype'=>'VList', 160 my %classifyinfo = ('childtype'=>'VList', 160 161 'Title'=>$self->{'title'}, 161 162 'contains'=>[]); 163 $classifyinfo{'thistype'} = 'Invisible' unless $no_thistype; 164 162 165 foreach $OID (@list) { 163 166 push (@{$classifyinfo{'contains'}}, {'OID'=>$OID}); -
branches/New_Config_Format-branch/gsdl/perllib/classify/SectionList.pm
r838 r1279 27 27 # (excluding top level) rather than just top level document 28 28 # itself 29 # options are:30 # metadata=Metaname -- (optional) all documents with Metaname metadata31 # will be included in list. if not included all documents32 # will be included in list.33 # sort=Meta -- (optional) sort documents in list alphabetically by34 # Meta. by default it will sort by Metaname, if neither35 # are set documents will be in build (random) order.36 # Meta may be Filename to sort by original filename or37 # nosort to force not to sort38 # title=Title -- (optional) the title field for this classification.39 # if not included title field will be Metaname.40 # if metadata is also not included title will be 'List'41 29 42 30 package SectionList; 43 31 32 use List; 44 33 use sorttools; 45 34 46 sub new { 47 my ($class, @options) = @_; 48 49 my $list = []; 50 my ($metaname, $title, $sortname); 51 52 foreach $option (@options) { 53 if ($option =~ /^metadata=(.*)$/i) { 54 $metaname = $1; 55 $list = {}; 56 } elsif ($option =~ /^title=(.*)$/i) { 57 $title = $1; 58 } elsif ($option =~ /^sort=(.*)$/i) { 59 $sortname = $1; 60 } 61 } 62 63 if (!defined $title) { 64 if (defined $metaname) { 65 $title = $metaname; 66 } else { 67 $title = 'List'; 68 } 69 } 70 71 if (defined $sortname && $sortname =~ /^nosort$/i) { 72 $sortname = undef; 73 } elsif (!defined $sortname && defined $metaname) { 74 $sortname = $metaname; 75 } 76 77 return bless { 78 'list'=>$list, 79 'metaname' => $metaname, 80 'title' => $title, 81 'sortname' => $sortname 82 }, $class; 83 } 84 85 sub init { 86 my $self = shift (@_); 87 88 if (defined $self->{'sortname'}) { 89 $self->{'list'} = {}; 90 } else { 91 $self->{'list'} = []; 92 } 35 sub BEGIN { 36 @ISA = ('List'); 93 37 } 94 38 95 39 sub classify { 96 40 my $self = shift (@_); 97 my ($doc_obj, @options) = @_;41 my ($doc_obj, @options) = @_; 98 42 99 43 my $thissection = undef; … … 175 119 } 176 120 177 sub get_classify_info {178 my $self = shift (@_);179 180 my @list = ();181 if (defined $self->{'sortname'}) {182 if (keys %{$self->{'list'}}) {183 @list = sort {$self->{'list'}->{$a}184 cmp $self->{'list'}->{$b};} keys %{$self->{'list'}};185 }186 } else {187 @list = @{$self->{'list'}};188 }189 190 # organise into classification structure191 my %classifyinfo = ('thistype'=>'Invisible',192 'childtype'=>'VList',193 'Title'=>$self->{'title'},194 'contains'=>[]);195 foreach $OID (@list) {196 push (@{$classifyinfo{'contains'}}, {'OID'=>$OID});197 }198 199 return \%classifyinfo;200 }201 202 203 121 1; -
branches/New_Config_Format-branch/gsdl/perllib/doc.pm
r846 r1279 24 24 ########################################################################### 25 25 26 # class to hold documents26 # base class to hold documents 27 27 28 28 package doc; 29 29 30 use basedoc;31 32 30 BEGIN { 33 @ISA = ('basedoc'); 34 } 31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/dynamic/lib/site_perl/5.005/i686-linux"); 33 } 34 35 use unicode; 36 use util; 37 use ghtml; 38 ##use hashdoc; 35 39 36 40 # the document type may be indexed_doc, nonindexed_doc, or … … 39 43 sub new { 40 44 my $class = shift (@_); 41 my ($source_filename, $doc_type) = @_; 42 43 my $self = new basedoc(); 45 46 my $self = bless {'associated_files'=>[], 47 'subsection_order'=>[], 48 'next_subsection'=>1, 49 'subsections'=>{}, 50 'metadata'=>[], 51 'text'=>""}, $class; 44 52 45 53 # $self->set_source_filename ($source_filename) if defined $source_filename; 46 push (@{$self->{'metadata'}}, ["gsdlsourcefilename", &unicode::ascii2utf8($source_filename)]) 47 if defined $source_filename; 54 push (@{$self->{'metadata'}}, ["gsdlsourcefilename", $source_filename]) if defined $source_filename; 48 55 # $self->set_doc_type ($doc_type) if defined $doc_type; 49 push (@{$self->{'metadata'}}, ["gsdldoctype", &unicode::ascii2utf8($doc_type)]) 50 if defined $doc_type; 51 52 bless($self,$class); 56 push (@{$self->{'metadata'}}, ["gsdldoctype", $doc_type]) if defined $doc_type; 57 53 58 return $self; 54 59 } 55 60 61 # clone the $self object 62 sub duplicate { 63 my $self = shift (@_); 64 65 my $newobj = {}; 66 67 foreach $k (keys %$self) { 68 $newobj->{$k} = &clone ($self->{$k}); 69 } 70 71 bless $newobj, ref($self); 72 return $newobj; 73 } 74 75 sub clone { 76 my ($from) = @_; 77 my $type = ref ($from); 78 79 if ($type eq "HASH") { 80 my $to = {}; 81 foreach $key (keys %$from) { 82 $to->{$key} = &clone ($from->{$key}); 83 } 84 return $to; 85 } elsif ($type eq "ARRAY") { 86 my $to = []; 87 foreach $v (@$from) { 88 push (@$to, &clone ($v)); 89 } 90 return $to; 91 } else { 92 return $from; 93 } 94 } 95 96 97 sub set_source_filename { 98 my $self = shift (@_); 99 my ($source_filename) = @_; 100 101 $self->set_metadata_element ($self->get_top_section(), 102 "gsdlsourcefilename", 103 $source_filename); 104 } 105 106 # returns the source_filename as it was provided 107 sub get_source_filename { 108 my $self = shift (@_); 109 110 return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename"); 111 } 112 113 sub set_doc_type { 114 my $self = shift (@_); 115 my ($doc_type) = @_; 116 117 $self->set_metadata_element ($self->get_top_section(), 118 "gsdldoctype", 119 $doc_type); 120 } 121 122 # returns the source_filename as it was provided 123 # the default of "indexed_doc" is used if no document 124 # type was provided 125 sub get_doc_type { 126 my $self = shift (@_); 127 128 my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype"); 129 return $doc_type if (defined $doc_type); 130 return "indexed_doc"; 131 } 132 133 sub _escape_text { 134 my ($text) = @_; 135 136 # special characters in the gml encoding 137 $text =~ s/&/&/g; # this has to be first... 138 $text =~ s/</</g; 139 $text =~ s/>/>/g; 140 $text =~ s/\"/"/g; 141 142 return $text; 143 } 144 145 146 sub buffer_section { 147 my $self = shift (@_); 148 my ($section, $suppress_subject_info) = @_; 149 150 $suppress_subject_info = 0 unless defined $suppress_subject_info; 151 my ($all_text,$data, $subsection); 152 153 my $section_ptr = $self->_lookup_section ($section); 154 my ($section_num) = $section =~ /(\d+)$/; 155 156 return "" unless defined $section_ptr; 157 158 # output the section header (including the section number 159 # and metadata) 160 161 $all_text = "<gsdlsection"; 162 $all_text .= " gsdlnum=\"$section_num\"" if defined $section_num; 163 foreach $data (@{$section_ptr->{'metadata'}}) { 164 $all_text .= " $data->[0]=\"" . &_escape_text($data->[1]) . "\"" 165 unless $suppress_subject_info && $data->[0] eq "Subject"; 166 } 167 $all_text .= ">"; 168 169 # output the text 170 $all_text .= &_escape_text($section_ptr->{'text'}); 171 172 # output all the subsections 173 foreach $subsection (@{$section_ptr->{'subsection_order'}}) { 174 $all_text .= $self->buffer_section("$section.$subsection", $suppress_subject_info); 175 } 176 177 # output the closing tag 178 $all_text .= "</gsdlsection>\n"; 179 180 return $all_text; 181 } 182 183 sub output_section { 184 my $self = shift (@_); 185 my ($handle, $section, $suppress_subject_info) = @_; 186 187 my $all_text = $self->buffer_section($section, $suppress_subject_info); 188 print $handle $all_text; 189 } 190 191 # look up the reference to the a particular section 192 sub _lookup_section { 193 my $self = shift (@_); 194 my ($section) = @_; 195 196 my ($num); 197 my $sectionref = $self; 198 199 while (defined $section && $section ne "") { 200 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/; 201 $num =~ s/^0+(\d)/$1/; # remove leading 0s 202 $section = "" unless defined $section; 203 204 if (defined $num && defined $sectionref->{'subsections'}->{$num}) { 205 $sectionref = $sectionref->{'subsections'}->{$num}; 206 } else { 207 return undef; 208 } 209 } 210 211 return $sectionref; 212 } 213 214 sub _calc_OID { 215 my $self = shift (@_); 216 my ($filename) = @_; 217 218 my $osexe = &util::get_os_exe(); 219 220 my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin", 221 $ENV{'GSDLOS'},"hashfile$osexe"); 222 my $result = "NULL"; 223 224 if (-e "$hashfile_exe") { 225 $result = `$hashfile_exe \"$filename\"`; 226 ($result) = $result =~ /:\s*([0-9a-f]+)/i; 227 228 } else { 229 print STDERR "doc::_calc_OID $hashfile_exe could not be found\n"; 230 } 231 232 return "HASH$result"; 233 } 234 235 # methods dealing with OID, not groups of them. 236 237 # if $OID is not provided one is calculated from hashing the 238 # current contents of the document 239 # An OID are actually stored as metadata of the document 240 sub set_OID { 241 my $self = shift (@_); 242 my ($OID) = @_; 243 244 # if an OID wasn't provided then feed this document to 245 # hashfile.exe 246 if (!defined $OID) { 247 $OID = "NULL"; 248 my $tmp_filename = &util::get_tmp_filename(); 249 if (!open (OUTFILE, ">$tmp_filename")) { 250 print STDERR "doc::set_OID could not write to $tmp_filename\n"; 251 } else { 252 $self->output_section('OUTFILE', $self->get_top_section(), 1); 253 close (OUTFILE); 254 255 $OID = $self->_calc_OID ($tmp_filename); 256 &util::rm ($tmp_filename); 257 } 258 } 259 260 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID); 261 } 262 263 # this uses hashdoc (embedded c thingy) which is faster but still 264 # needs a little work to be suffiently stable 265 sub ___set_OID { 266 my $self = shift (@_); 267 my ($OID) = @_; 268 269 # if an OID wasn't provided then calculate hash value based on document 270 if (!defined $OID) 271 { 272 my $hash_text = $self->buffer_section($self->get_top_section(), 1); 273 my $hash_len = length($hash_text); 274 275 $OID = &hashdoc::buffer($hash_text,$hash_len); 276 } 277 278 $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID); 279 } 280 281 # returns the OID for this document 282 sub get_OID { 283 my $self = shift (@_); 284 my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier"); 285 return $OID if (defined $OID); 286 return "NULL"; 287 } 288 289 sub delete_OID { 290 my $self = shift (@_); 291 292 $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL"); 293 } 294 295 296 # methods for manipulating section names 297 298 # returns the name of the top-most section (the top 299 # level of the document 300 sub get_top_section { 301 my $self = shift (@_); 302 303 return ""; 304 } 305 306 # returns a section 307 sub get_parent_section { 308 my $self = shift (@_); 309 my ($section) = @_; 310 311 $section =~ s/(^|\.)\d+$//; 312 313 return $section; 314 } 315 316 # returns the first child section (or the end child 317 # if there isn't any) 318 sub get_begin_child { 319 my $self = shift (@_); 320 my ($section) = @_; 321 322 my $section_ptr = $self->_lookup_section($section); 323 return "" unless defined $section_ptr; 324 325 if (defined $section_ptr->{'subsection_order'}->[0]) { 326 return "$section.$section_ptr->{'subsection_order'}->[0]"; 327 } 328 329 return $self->get_end_child ($section); 330 } 331 332 # returns the next child of a parent section 333 sub get_next_child { 334 my $self = shift (@_); 335 my ($section) = @_; 336 337 my $parent_section = $self->get_parent_section($section); 338 my $parent_section_ptr = $self->_lookup_section($parent_section); 339 return undef unless defined $parent_section_ptr; 340 341 my ($section_num) = $section =~ /(\d+)$/; 342 return undef unless defined $section_num; 343 344 my $i = 0; 345 my $section_order = $parent_section_ptr->{'subsection_order'}; 346 while ($i < scalar(@$section_order)) { 347 last if $section_order->[$i] eq $section_num; 348 $i++; 349 } 350 351 $i++; # the next child 352 if ($i < scalar(@$section_order)) { 353 return $section_order->[$i] if $parent_section eq ""; 354 return "$parent_section.$section_order->[$i]"; 355 } 356 357 # no more sections in this level 358 return undef; 359 } 360 361 # returns a reference to a list of children 362 sub get_children { 363 my $self = shift (@_); 364 my ($section) = @_; 365 366 my $section_ptr = $self->_lookup_section($section); 367 return [] unless defined $section_ptr; 368 369 my @children = @{$section_ptr->{'subsection_order'}}; 370 371 map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children; 372 return \@children; 373 } 374 375 # returns the child section one past the last one (which 376 # is coded as "0") 377 sub get_end_child { 378 my $self = shift (@_); 379 my ($section) = @_; 380 381 return $section . ".0" unless $section eq ""; 382 return "0"; 383 } 384 385 # returns the next section in book order 386 sub get_next_section { 387 my $self = shift (@_); 388 my ($section) = @_; 389 390 return undef unless defined $section; 391 392 my $section_ptr = $self->_lookup_section($section); 393 return undef unless defined $section_ptr; 394 395 # first try to find first child 396 if (defined $section_ptr->{'subsection_order'}->[0]) { 397 return $section_ptr->{'subsection_order'}->[0] if ($section eq ""); 398 return "$section.$section_ptr->{'subsection_order'}->[0]"; 399 } 400 401 do { 402 # try to find sibling 403 my $next_child = $self->get_next_child ($section); 404 return $next_child if (defined $next_child); 405 406 # move up one level 407 $section = $self->get_parent_section ($section); 408 } while $section =~ /\d/; 409 410 return undef; 411 } 412 413 sub is_leaf_section { 414 my $self = shift (@_); 415 my ($section) = @_; 416 417 my $section_ptr = $self->_lookup_section($section); 418 return 1 unless defined $section_ptr; 419 420 return (scalar (@{$section_ptr->{'subsection_order'}}) == 0); 421 } 422 423 # methods for dealing with sections 424 425 # returns the name of the inserted section 426 sub insert_section { 427 my $self = shift (@_); 428 my ($before_section) = @_; 429 430 # get the child to insert before and its parent section 431 my $parent_section = ""; 432 my $before_child = "0"; 433 my @before_section = split (/\./, $before_section); 434 if (scalar(@before_section) > 0) { 435 $before_child = pop (@before_section); 436 $parent_section = join (".", @before_section); 437 } 438 439 my $parent_section_ptr = $self->_lookup_section($parent_section); 440 if (!defined $parent_section_ptr) { 441 print STDERR "doc::insert_section couldn't find parent section " . 442 "$parent_section\n"; 443 return; 444 } 445 446 # get the next section number 447 my $section_num = $parent_section_ptr->{'next_subsection'}++; 448 449 my $i = 0; 450 while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) && 451 $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) { 452 $i++; 453 } 454 455 # insert the section number into the order list 456 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num); 457 458 # add this section to the parent section 459 my $section_ptr = {'subsection_order'=>[], 460 'next_subsection'=>1, 461 'subsections'=>{}, 462 'metadata'=>[], 463 'text'=>""}; 464 $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr; 465 466 # work out the full section number 467 my $section = $parent_section; 468 $section .= "." unless $section eq ""; 469 $section .= $section_num; 470 471 return $section; 472 } 473 474 # creates a pre-named section 475 sub create_named_section { 476 my $self = shift (@_); 477 my ($mastersection) = @_; 478 479 my ($num); 480 my $section = $mastersection; 481 my $sectionref = $self; 482 483 #### print STDERR "*** mastersection = $mastersection\n"; 484 485 while ($section ne "") { 486 ($num, $section) = $section =~ /^\.?(\d+)(.*)$/; 487 $num =~ s/^0+(\d)/$1/; # remove leading 0s 488 $section = "" unless defined $section; 489 490 if (defined $num) { 491 if (!defined $sectionref->{'subsections'}->{$num}) { 492 push (@{$sectionref->{'subsection_order'}}, $num); 493 $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[], 494 'next_subsection'=>1, 495 'subsections'=>{}, 496 'metadata'=>[], 497 'text'=>""}; 498 if ($num >= $sectionref->{'next_subsection'}) { 499 $sectionref->{'next_subsection'} = $num + 1; 500 } 501 } 502 $sectionref = $sectionref->{'subsections'}->{$num}; 503 504 } else { 505 print STDERR "doc::create_named_section couldn't create section "; 506 print STDERR "$mastersection\n"; 507 last; 508 } 509 } 510 } 511 512 # returns a reference to a list of subsections 513 sub list_subsections { 514 my $self = shift (@_); 515 my ($section) = @_; 516 517 my $section_ptr = $self->_lookup_section ($section); 518 if (!defined $section_ptr) { 519 print STDERR "doc::list_subsections couldn't find section $section\n"; 520 return []; 521 } 522 523 return [@{$section_ptr->{'subsection_order'}}]; 524 } 525 526 sub delete_section { 527 my $self = shift (@_); 528 my ($section) = @_; 529 530 # my $section_ptr = {'subsection_order'=>[], 531 # 'next_subsection'=>1, 532 # 'subsections'=>{}, 533 # 'metadata'=>[], 534 # 'text'=>""}; 535 536 # if this is the top section reset everything 537 if ($section eq "") { 538 $self->{'subsection_order'} = []; 539 $self->{'subsections'} = {}; 540 $self->{'metadata'} = []; 541 $self->{'text'} = ""; 542 return; 543 } 544 545 # find the parent of the section to delete 546 my $parent_section = ""; 547 my $child = "0"; 548 my @section = split (/\./, $section); 549 if (scalar(@section) > 0) { 550 $child = pop (@section); 551 $parent_section = join (".", @section); 552 } 553 554 my $parent_section_ptr = $self->_lookup_section($parent_section); 555 if (!defined $parent_section_ptr) { 556 print STDERR "doc::delete_section couldn't find parent section " . 557 "$parent_section\n"; 558 return; 559 } 560 561 # remove this section from the subsection_order list 562 my $i = 0; 563 while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) { 564 if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) { 565 splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1); 566 last; 567 } 568 $i++; 569 } 570 571 # remove this section from the subsection hash 572 if (defined ($parent_section_ptr->{'subsections'}->{$child})) { 573 undef $parent_section_ptr->{'subsections'}->{$child}; 574 } 575 } 576 577 #-- 56 578 # methods for dealing with metadata 57 579 … … 60 582 # are for metadata which can have more than one value. 61 583 62 # set_metadata_element assumes the value is in (extended) ascii form. 63 # For text which hash been already converted to the UTF-8 format use 64 # set_utf8_metadata_element. 584 # returns the first metadata value which matches field 585 sub get_metadata_element { 586 my $self = shift (@_); 587 my ($section, $field) = @_; 588 my ($data); 589 590 my $section_ptr = $self->_lookup_section($section); 591 if (!defined $section_ptr) { 592 print STDERR "doc::get_metadata_element couldn't find section " . 593 "$section\n"; 594 return; 595 } 596 597 foreach $data (@{$section_ptr->{'metadata'}}) { 598 return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field); 599 } 600 601 return undef; # was not found 602 } 603 604 605 # returns a list of the form [value1, value2, ...] 606 sub get_metadata { 607 my $self = shift (@_); 608 my ($section, $field) = @_; 609 my ($data); 610 611 my $section_ptr = $self->_lookup_section($section); 612 if (!defined $section_ptr) { 613 print STDERR "doc::get_metadata couldn't find section " . 614 "$section\n"; 615 return; 616 } 617 618 my @metadata = (); 619 foreach $data (@{$section_ptr->{'metadata'}}) { 620 push (@metadata, $data->[1]) if ($data->[0] eq $field); 621 } 622 623 return \@metadata; 624 } 625 626 # returns a list of the form [[field,value],[field,value],...] 627 sub get_all_metadata { 628 my $self = shift (@_); 629 my ($section) = @_; 630 631 my $section_ptr = $self->_lookup_section($section); 632 if (!defined $section_ptr) { 633 print STDERR "doc::get_all_metadata couldn't find section " . 634 "$section\n"; 635 return; 636 } 637 638 return $section_ptr->{'metadata'}; 639 } 640 641 # $value is optional 642 sub delete_metadata { 643 my $self = shift (@_); 644 my ($section, $field, $value) = @_; 645 646 my $section_ptr = $self->_lookup_section($section); 647 if (!defined $section_ptr) { 648 print STDERR "doc::delete_metadata couldn't find section " . 649 "$section\n"; 650 return; 651 } 652 653 my $i = 0; 654 while ($i < scalar (@{$section_ptr->{'metadata'}})) { 655 if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) && 656 (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) { 657 splice (@{$section_ptr->{'metadata'}}, $i, 1); 658 } else { 659 $i++; 660 } 661 } 662 } 663 664 sub delete_all_metadata { 665 my $self = shift (@_); 666 my ($section) = @_; 667 668 my $section_ptr = $self->_lookup_section($section); 669 if (!defined $section_ptr) { 670 print STDERR "doc::delete_all_metadata couldn't find section " . 671 "$section\n"; 672 return; 673 } 674 675 $section_ptr->{'metadata'} = []; 676 } 677 65 678 sub set_metadata_element { 66 679 my $self = shift (@_); … … 112 725 # methods for dealing with text 113 726 727 # returns the text for a section 728 sub get_text { 729 my $self = shift (@_); 730 my ($section) = @_; 731 732 my $section_ptr = $self->_lookup_section($section); 733 if (!defined $section_ptr) { 734 print STDERR "doc::get_text couldn't find section " . 735 "$section\n"; 736 return ""; 737 } 738 739 return $section_ptr->{'text'}; 740 } 741 742 # returns the (utf-8 encoded) length of the text for a section 743 sub get_text_length { 744 my $self = shift (@_); 745 my ($section) = @_; 746 747 my $section_ptr = $self->_lookup_section($section); 748 if (!defined $section_ptr) { 749 print STDERR "doc::get_text_length couldn't find section " . 750 "$section\n"; 751 return 0; 752 } 753 754 return length ($section_ptr->{'text'}); 755 } 756 757 sub delete_text { 758 my $self = shift (@_); 759 my ($section) = @_; 760 761 my $section_ptr = $self->_lookup_section($section); 762 if (!defined $section_ptr) { 763 print STDERR "doc::delete_text couldn't find section " . 764 "$section\n"; 765 return; 766 } 767 768 $section_ptr->{'text'} = ""; 769 } 770 114 771 # add_text assumes the text is in (extended) ascii form. For 115 772 # text which has been already converted to the UTF-8 format … … 143 800 144 801 802 # methods for dealing with associated files 803 804 # a file is associated with a document, NOT a section. 805 # if section is defined it is noted in the data structure 806 # only so that files associated from a particular section 807 # may be removed later (using delete_section_assoc_files) 808 sub associate_file { 809 my $self = shift (@_); 810 my ($real_filename, $assoc_filename, $mime_type, $section) = @_; 811 $mime_type = &ghtml::guess_mime_type ($real_filename) unless defined $mime_type; 812 813 # remove all associated files with the same name 814 $self->delete_assoc_file ($assoc_filename); 815 816 push (@{$self->{'associated_files'}}, 817 [$real_filename, $assoc_filename, $mime_type, $section]); 818 } 819 820 # returns a list of associated files in the form 821 # [[real_filename, assoc_filename, mimetype], ...] 822 sub get_assoc_files { 823 my $self = shift (@_); 824 825 return $self->{'associated_files'}; 826 } 827 828 sub delete_section_assoc_files { 829 my $self = shift (@_); 830 my ($section) = @_; 831 832 my $i=0; 833 while ($i < scalar (@{$self->{'associated_files'}})) { 834 if (defined $self->{'associated_files'}->[$i]->[3] && 835 $self->{'associated_files'}->[$i]->[3] eq $section) { 836 splice (@{$self->{'associated_files'}}, $i, 1); 837 } else { 838 $i++; 839 } 840 } 841 } 842 843 sub delete_assoc_file { 844 my $self = shift (@_); 845 my ($assoc_filename) = @_; 846 847 my $i=0; 848 while ($i < scalar (@{$self->{'associated_files'}})) { 849 if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) { 850 splice (@{$self->{'associated_files'}}, $i, 1); 851 } else { 852 $i++; 853 } 854 } 855 } 856 857 sub reset_nextsection_ptr { 858 my $self = shift (@_); 859 my ($section) = @_; 860 861 my $section_ptr = $self->_lookup_section($section); 862 $section_ptr->{'next_subsection'} = 1; 863 } 864 145 865 1; -
branches/New_Config_Format-branch/gsdl/perllib/gb.pm
r537 r1279 163 163 164 164 return 0 unless open (MAPFILE, "$ENV{'GSDLHOME'}/unicode/$filename"); 165 binmode (MAPFILE); # f$#@!!! windows 165 166 166 167 $translations{$encoding} = [@array256]; -
branches/New_Config_Format-branch/gsdl/perllib/ghtml.pm
r1010 r1279 34 34 sub htmlsafe 35 35 { 36 $_[0] =~ s/&/&/og; 37 $_[0] =~ s/</</og; 38 $_[0] =~ s/>/>/og; 36 $_[0] =~ s/&/&/osg; 37 $_[0] =~ s/</</osg; 38 $_[0] =~ s/>/>/osg; 39 $_[0] =~ s/\"/"/osg; 39 40 } 40 41 … … 45 46 sub urlsafe 46 47 { 47 $_[0] =~ s/[\x09\x20\x22\x3c\x3e\x5b\x5c\x5d\x5e\x60\x7b\x7c\x7d\x7e\?\=\&\+_\/]/sprintf("%%%2x", ord($&))/g e;48 $_[0] =~ s/[\x09\x20\x22\x3c\x3e\x5b\x5c\x5d\x5e\x60\x7b\x7c\x7d\x7e\?\=\&\+_\/]/sprintf("%%%2x", ord($&))/gse; 48 49 } 49 50 … … 56 57 sub dmsafe { 57 58 my ($s) = $_[0]; 58 $s =~ s/&/&/og; # for html 59 $s =~ s/</</og; # for html 60 $s =~ s/>/>/og; # for html 61 $s =~ s/\_/_/og; # for dm (we have a convention of starting macros with _ 62 $s =~ s/\"/"/og; # for html (don't want to be interpreted as a quote) 63 $s =~ s/\{/{/og; # for dm blocks 64 $s =~ s/\}/}/og; # for dm blocks 59 $s =~ s/&/&/osg; # for html 60 $s =~ s/</</osg; # for html 61 $s =~ s/>/>/osg; # for html 62 $s =~ s/\_/_/osg; # for dm (we have a convention of starting macros with _ 63 $s =~ s/\"/"/osg; # for html (don't want to be interpreted as a quote) 64 $s =~ s/\{/{/osg; # for dm blocks 65 $s =~ s/\}/}/osg; # for dm blocks 66 $s =~ s/\\/\/osg; # for dm (dm removes naturally occurring backquotes) 65 67 return $s; 66 68 } … … 170 172 # args: the text that you want to convert 171 173 172 $_[0] =~ s/&([^;]+);/&getcharequiv($1,0)/g e;174 $_[0] =~ s/&([^;]+);/&getcharequiv($1,0)/gse; 173 175 } 174 176 … … 177 179 # args: the text that you want to convert 178 180 179 $_[0] =~ s/&([^;]+);/&getcharequiv($1,1)/g e;181 $_[0] =~ s/&([^;]+);/&getcharequiv($1,1)/gse; 180 182 } 181 183 -
branches/New_Config_Format-branch/gsdl/perllib/mgbuilder.pm
r1072 r1279 99 99 100 100 # load all the plugins 101 $self->{'pluginfo'} = &plugin::load_plugins ($plugins );101 $self->{'pluginfo'} = &plugin::load_plugins ($plugins, $verbosity); 102 102 if (scalar(@{$self->{'pluginfo'}}) == 0) { 103 103 print STDERR "No plugins were loaded.\n"; … … 207 207 close ($handle) unless $self->{'debug'}; 208 208 209 $self->print_stats(); 210 209 211 # create the compression dictionary 210 212 # the compression dictionary is built by assuming the stats are from a seed … … 232 234 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); 233 235 close ($handle) unless $self->{'debug'}; 236 237 $self->print_stats(); 234 238 } 235 239 … … 481 485 close ($handle) unless $self->{'debug'}; 482 486 487 $self->print_stats(); 488 483 489 if (!$self->{'debug'}) { 484 490 # create the perfect hash function … … 502 508 "", {}, $self->{'buildproc'}, $self->{'maxdocs'}); 503 509 510 $self->print_stats (); 511 504 512 if (!$self->{'debug'}) { 505 513 … … 646 654 $build_cfg->{'numbytes'} = $self->{'buildproc'}->get_num_bytes(); 647 655 656 # get additional stats from mg 657 my $exedir = "$ENV{'GSDLHOME'}/bin/$ENV{'GSDLOS'}"; 658 my $exe = &util::get_os_exe (); 659 my $mgstat_exe = &util::filename_cat($exedir, "mgstat$exe"); 660 my $input_file = &util::filename_cat ("text", $self->{'collection'}); 661 if (!-e "$mgstat_exe" || !open (PIPEIN, "$mgstat_exe -d $self->{'build_dir'} -f $input_file |")) { 662 print STDERR "Warning: Couldn't open pipe to $mgstat_exe to get additional stats\n"; 663 } else { 664 my $line = ""; 665 while (defined ($line = <PIPEIN>)) { 666 if ($line =~ /^Words in collection \[dict\]\s+:\s+(\d+)/) { 667 ($build_cfg->{'numwords'}) = $1; 668 } elsif ($line =~ /^Documents\s+:\s+(\d+)/) { 669 ($build_cfg->{'numsections'}) = $1; 670 } 671 } 672 close PIPEIN; 673 } 674 648 675 # store the mapping between the index names and the directory names 649 676 my @indexmap = (); … … 667 694 $build_cfg->{'languagemap'} = \@languagemap if scalar (@languagemap); 668 695 669 $build_cfg->{'notbuilt'} = $self->{'notbuilt'} ;696 $build_cfg->{'notbuilt'} = $self->{'notbuilt'} if scalar @{$self->{'notbuilt'}}; 670 697 671 698 # write out the build information 672 699 &cfgread::write_cfg_file("$self->{'build_dir'}/build.cfg", $build_cfg, 673 '^(builddate|numdocs|numbytes )$',700 '^(builddate|numdocs|numbytes|numwords|numsections)$', 674 701 '^(indexmap|subcollectionmap|languagemap|notbuilt)$'); 675 702 … … 680 707 } 681 708 709 sub print_stats { 710 my $self = shift (@_); 711 712 my $indexing_text = $self->{'buildproc'}->get_indexing_text(); 713 my $index = $self->{'buildproc'}->get_index(); 714 my $num_bytes = $self->{'buildproc'}->get_num_bytes(); 715 my $num_processed_bytes = $self->{'buildproc'}->get_num_processed_bytes(); 716 717 if ($indexing_text) { 718 print STDERR "Stats (Creating index $index)\n"; 719 } else { 720 print STDERR "Stats (Compressing text from $index)\n"; 721 } 722 print STDERR "Total bytes in collection: $num_bytes\n"; 723 print STDERR "Total bytes in $index: $num_processed_bytes\n"; 724 725 if ($num_processed_bytes < 50) { 726 print STDERR "***************\n"; 727 print STDERR "WARNING: There is very little or no text to process for $index\n"; 728 if ($indexing_text) { 729 print STDERR "This may cause an error while attempting to build the index\n"; 730 } else { 731 print STDERR "This may cause an error while attempting to compress the text\n"; 732 } 733 print STDERR "***************\n"; 734 } 735 } 682 736 683 737 1; -
branches/New_Config_Format-branch/gsdl/perllib/mgbuildproc.pm
r1072 r1279 58 58 $self->{'num_sections'} = 0; 59 59 $self->{'num_bytes'} = 0; 60 $self->{'num_processed_bytes'} = 0; 60 61 61 62 $self->{'indexing_text'} = 0; … … 69 70 $self->{'num_docs'} = 0; 70 71 $self->{'num_sections'} = 0; 72 $self->{'num_processed_bytes'} = 0; 71 73 $self->{'num_bytes'} = 0; 72 74 } … … 84 86 } 85 87 88 # num_bytes is the actual number of bytes in the collection 89 # this is normally the same as what's processed during text compression 86 90 sub get_num_bytes { 87 91 my $self = shift (@_); 88 92 89 93 return $self->{'num_bytes'}; 94 } 95 96 # num_processed_bytes is the number of bytes actually passed 97 # to mg for the current index 98 sub get_num_processed_bytes { 99 my $self = shift (@_); 100 101 return $self->{'num_processed_bytes'}; 90 102 } 91 103 … … 126 138 } 127 139 140 sub get_index { 141 my $self = shift (@_); 142 143 return $self->{'index'}; 144 } 145 128 146 sub set_classifiers { 129 147 my $self = shift (@_); … … 138 156 139 157 $self->{'indexing_text'} = $indexing_text; 158 } 159 160 sub get_indexing_text { 161 my $self = shift (@_); 162 163 return $self->{'indexing_text'}; 140 164 } 141 165 … … 416 440 if ($real_field eq "text") { 417 441 $new_text = $doc_obj->get_text ($section); 442 $self->{'num_processed_bytes'} += length ($new_text); 418 443 $new_text =~ s/[\cB\cC]//g; 419 444 $self->find_paragraphs($new_text); … … 423 448 foreach $meta (@{$doc_obj->get_metadata ($section, $real_field)}) { 424 449 $meta =~ s/[\cB\cC]//g; 450 $self->{'num_processed_bytes'} += length ($meta); 425 451 $new_text .= "\cC" unless $first; 426 452 $new_text .= $meta; -
branches/New_Config_Format-branch/gsdl/perllib/multiread.pm
r627 r1279 26 26 # encodings currently supported are 27 27 # 28 # utf8 - either utf8 or unicode (automatically detected) 29 # unicode - just unicode (doesn't currently do endian detection) 30 # gb - GB 31 # extended - extended ascii 32 28 # utf8 - either utf8 or unicode (automatically detected) 29 # unicode - just unicode (doesn't currently do endian detection) 30 # gb - GB 31 # iso_8859_1 - extended ascii (iso-8859-1) 32 # iso_8859_6 - 8 bit arabic (iso-8859-6) 33 # windows_1256 - Windows codepage 1256 (Arabic) 33 34 34 35 package multiread; … … 169 170 } 170 171 171 if ($self->{'encoding'} eq " extended") {172 # extended ascii172 if ($self->{'encoding'} eq "iso_8859_1") { 173 # Latin 1 extended ascii (ISO-8859-1) 173 174 return undef if (eof ($handle)); 174 175 return &unicode::ascii2utf8 (getc ($handle)); 176 } 177 178 if ($self->{'encoding'} eq "iso_8859_6") { 179 # 8 bit Arabic (IOS-8859-6) 180 return undef if (eof ($handle)); 181 return &unicode::unicode2utf8(&unicode::arabic2unicode (getc ($handle))); 182 } 183 184 if ($self->{'encoding'} eq "windows_1256") { 185 # Windows 1256 (Arabic) 186 return undef if (eof ($handle)); 187 return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", getc ($handle))); 175 188 } 176 189 … … 236 249 } 237 250 238 if ($self->{'encoding'} eq " extended") {239 # extended ascii 251 if ($self->{'encoding'} eq "iso_8859_1") { 252 # extended ascii (ISO-8859-1) 240 253 my $line = ""; 241 254 if (defined ($line = <$handle>)) { … … 244 257 return undef; 245 258 } 259 260 if ($self->{'encoding'} eq "iso_8859_6") { 261 # 8 bit arabic (ISO-8859-6) 262 my $line = ""; 263 if (defined ($line = <$handle>)) { 264 return &unicode::unicode2utf8(&unicode::arabic2unicode ($line)); 265 } 266 return undef; 267 } 268 269 if ($self->{'encoding'} eq "windows_1256") { 270 # Windows 1256 (Arabic) 271 my $line = ""; 272 if (defined ($line = <$handle>)) { 273 return &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $line)); 274 } 275 return undef; 276 } 246 277 247 278 # unknown encoding … … 250 281 251 282 283 # will convert entire contents of file to utf8 and append result to $outputref 284 # this may be a slightly faster way to get the contents of a file than by 285 # recursively calling read_line() 286 sub read_file { 287 my $self = shift (@_); 288 my ($outputref) = @_; 289 290 # make sure we have a file handle 291 return if ($self->{'handle'} eq ""); 292 293 my $handle = $self->{'handle'}; 294 295 if ($self->{'first'} && $self->{'encoding'} eq "utf8") { 296 # special case for the first line of utf8 text to detect whether 297 # the file is in utf8 or unicode 298 $$text .= $self->read_line (); 299 } 300 301 if ($self->{'encoding'} eq "utf8") { 302 undef $/; 303 $$outputref .= <$handle>; 304 $/ = "\n"; 305 return; 306 } 307 308 if ($self->{'encoding'} eq "unicode") { 309 my $line = ""; 310 while (defined ($line = $self->read_line())) { 311 $$outputref .= $line; 312 } 313 return; 314 } 315 316 if ($self->{'encoding'} eq "gb") { 317 undef $/; 318 my $text = <$handle>; 319 $/ = "\n"; 320 $$outputref .= &unicode::unicode2utf8 (&gb::gb2unicode ($text)); 321 return; 322 } 323 324 if ($self->{'encoding'} eq "iso_8859_1") { 325 undef $/; 326 my $text = <$handle>; 327 $/ = "\n"; 328 $$outputref .= &unicode::ascii2utf8 ($text); 329 return; 330 } 331 332 if ($self->{'encoding'} eq "iso_8859_6") { 333 my $text = <$handle>; 334 undef $/; 335 $/ = "\n"; 336 $$outputref .= &unicode::unicode2utf8(&unicode::arabic2unicode ($text)); 337 return; 338 } 339 340 if ($self->{'encoding'} eq "windows_1256") { 341 undef $/; 342 my $text = <$handle>; 343 $/ = "\n"; 344 $$outputref .= &unicode::unicode2utf8(&unicode::windows2unicode ("1256", $text)); 345 return; 346 } 347 } 348 349 252 350 1; -
branches/New_Config_Format-branch/gsdl/perllib/parsargv.pm
r537 r1279 64 64 # Returns 0 if there was an error, nonzero otherwise. 65 65 # 66 sub parse 66 67 68 sub parse 67 69 { 68 70 my $arglist = shift; … … 70 72 my %option; 71 73 72 while (($spec, $var) = splice(@_, 0, 2)) 73 { 74 my @rest = @_; 75 76 # if the last argument is the string "allow_extra_options" then options 77 # in \@rest without a corresponding SPEC will be ignored (i.e. the "$arg is 78 # not a valid option" error won't occur)\n"; 79 my $allow_extra_options = pop @rest; 80 if (defined ($allow_extra_options)) { 81 if ($allow_extra_options eq "allow_extra_options") { 82 $allow_extra_options = 1; 83 } else { 84 # put it back where we got it 85 push (@rest, $allow_extra_options); 86 $allow_extra_options = 0; 87 } 88 } else { 89 $allow_extra_options = 0; 90 } 91 92 while (($spec, $var) = splice(@rest, 0, 2)) 93 { 74 94 die "Variable for $spec is not a valid type." 75 95 unless ref($var) eq 'SCALAR' || ref($var) eq 'ARRAY'; … … 126 146 &process_arg($option{$arg}, $arglist, \$errors); 127 147 } 128 els e148 elsif (!$allow_extra_options) 129 149 { 130 150 print STDERR "$arg is not a valid option.\n"; -
branches/New_Config_Format-branch/gsdl/perllib/plugin.pm
r835 r1279 29 29 30 30 sub load_plugins { 31 my ($plugin_list ) = @_;31 my ($plugin_list, $verbosity) = @_; 32 32 my @plugin_objects = (); 33 34 $verbosity = 2 unless defined $verbosity; 33 35 34 36 foreach $pluginoptions (@$plugin_list) { … … 49 51 map { $_ = "\"$_\""; } @$pluginoptions; 50 52 my $options = join (",", @$pluginoptions); 53 $options =~ s/\$/\\\$/g; 51 54 eval ("\$plugobj = new \$pluginname($options)"); 52 55 die "$@" if $@; 53 56 57 # initialize plugin 58 $plugobj->init($verbosity); 59 54 60 # add this object to the list 55 61 push (@plugin_objects, $plugobj); -
branches/New_Config_Format-branch/gsdl/perllib/plugins/ArcPlug.pm
r809 r1279 39 39 } 40 40 41 use strict; 42 41 43 sub new { 42 44 my ($class) = @_; 43 my $self = new BasPlug ( );45 my $self = new BasPlug ("ArcPlug", @_); 44 46 45 47 return bless $self, $class; … … 58 60 sub read { 59 61 my $self = shift (@_); 60 ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;62 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 61 63 62 64 my $count = 0; 63 65 64 66 # see if this has a archives information file within it 65 $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");67 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf"); 66 68 67 69 if (-e $archive_info_filename) { … … 77 79 78 80 # process each file 79 foreach $subfile (@$file_list) {81 foreach my $subfile (@$file_list) { 80 82 last if ($maxdocs != -1 && $count >= $maxdocs); 81 83 -
branches/New_Config_Format-branch/gsdl/perllib/plugins/BasPlug.pm
r839 r1279 26 26 package BasPlug; 27 27 28 use parsargv; 29 use multiread; 30 use cnseg; 31 use acronym; 32 use strict; 33 use doc; 34 35 sub print_general_usage { 36 my ($plugin_name) = @_; 37 38 print STDERR "\n usage: plugin $plugin_name [options]\n\n"; 39 print STDERR " -input_encoding The encoding of the source documents. Documents will be\n"; 40 print STDERR " converted from these encodings and stored internally as\n"; 41 print STDERR " utf8. The default input_encoding is Latin1. Accepted values\n"; 42 print STDERR " are:\n"; 43 print STDERR " iso_8859_1 (extended ascii)\n"; 44 print STDERR " Latin1 (the same as iso-8859-1)\n"; 45 print STDERR " ascii (7 bit ascii -- may be faster than Latin1 as no\n"; 46 print STDERR " conversion is neccessary)\n"; 47 print STDERR " gb (GB or GBK simplified Chinese)\n"; 48 print STDERR " iso_8859_6 (8 bit Arabic)\n"; 49 print STDERR " windows_1256 (Windows codepage 1256 (Arabic))\n"; 50 print STDERR " Arabic (the same as windows_1256)\n"; 51 print STDERR " utf8 (either utf8 or unicode -- automatically detected)\n"; 52 print STDERR " unicode (just unicode -- doesn't currently do endian\n"; 53 print STDERR " detection)\n"; 54 print STDERR " -process_exp A perl regular expression to match against filenames.\n"; 55 print STDERR " Matching filenames will be processed by this plugin.\n"; 56 print STDERR " Each plugin has its own default process_exp. e.g HTMLPlug\n"; 57 print STDERR " defaults to '(?i)\.html?\$' i.e. all documents ending in\n"; 58 print STDERR " .htm or .html (case-insensitive).\n"; 59 print STDERR " -block_exp Files matching this regular expression will be blocked from\n"; 60 print STDERR " being passed to any further plugins in the list. This has no\n"; 61 print STDERR " real effect other than to prevent lots of warning messages\n"; 62 print STDERR " about input files you don't care about. Each plugin may or may\n"; 63 print STDERR " not have a default block_exp. e.g. by default HTMLPlug blocks\n"; 64 print STDERR " any files with .gif, .jpg, .jpeg, .png, .pdf, .rtf or .css\n"; 65 print STDERR " file extensions.\n"; 66 print STDERR " -extract_acronyms Extract acronyms from within text and set as metadata\n\n"; 67 } 68 69 # print_usage should be overridden for any sub-classes having 70 # their own plugin specific options 71 sub print_usage { 72 print STDERR "\nThis plugin has no plugin specific options\n\n"; 73 74 } 28 75 29 76 sub new { 30 my ($class) = @_; 31 32 return bless {}, $class; 77 my $class = shift (@_); 78 my $plugin_name = shift (@_); 79 80 my $self = {}; 81 my $encodings = "^(iso_8859_1|Latin1|ascii|gb|iso_8859_6|windows_1256|Arabic|utf8|unicode)\$"; 82 83 # general options available to all plugins 84 if (!parsargv::parse(\@_, 85 qq^input_encoding/$encodings/Latin1^, \$self->{'input_encoding'}, 86 q^process_exp/.*/^, \$self->{'process_exp'}, 87 q^block_exp/.*/^, \$self->{'block_exp'}, 88 q^extract_acronyms^, \$self->{'extract_acronyms'}, 89 "allow_extra_options")) { 90 91 print STDERR "\nThe $plugin_name plugin uses an incorrect general option (general options are those\n"; 92 print STDERR "available to all plugins). Check your collect.cfg configuration file.\n"; 93 &print_general_usage($plugin_name); 94 die "\n"; 95 } 96 97 return bless $self, $class; 98 } 99 100 # initialize BasPlug options 101 # if init() is overridden in a sub-class, remember to call BasPlug::init() 102 sub init { 103 my $self = shift (@_); 104 my ($verbosity) = @_; 105 106 # verbosity is passed through from the processor 107 $self->{'verbosity'} = $verbosity; 108 109 # set process_exp and block_exp to defaults unless they were 110 # explicitly set 111 112 if ((!$self->is_recursive()) and 113 (!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) { 114 115 $self->{'process_exp'} = $self->get_default_process_exp (); 116 if ($self->{'process_exp'} eq "") { 117 warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n"; 118 } 119 } 120 121 if ((!defined $self->{'block_exp'}) || ($self->{'block_exp'} eq "")) { 122 $self->{'block_exp'} = $self->get_default_block_exp (); 123 } 124 125 # handle input_encoding aliases 126 $self->{'input_encoding'} = "iso_8859_1" if $self->{'input_encoding'} eq "Latin1"; 127 $self->{'input_encoding'} = "windows_1256" if $self->{'input_encoding'} eq "Arabic"; 33 128 } 34 129 … … 42 137 } 43 138 44 # return 1 if this class might recurse using $pluginfo 139 # this function should be overridden to return 1 140 # in recursive plugins 45 141 sub is_recursive { 46 142 my $self = shift (@_); 47 143 48 die "BasPlug::is_recursive function must be implemented in sub classes\n"; 49 } 50 51 # return number of files processed, undef if can't process 144 return 0; 145 } 146 147 sub get_default_block_exp { 148 my $self = shift (@_); 149 150 return ""; 151 } 152 153 sub get_default_process_exp { 154 my $self = shift (@_); 155 156 return ""; 157 } 158 159 # The BasPlug read() function. This function does all the right things 160 # to make general options work for a given plugin. It calls the process() 161 # function which does all the work specific to a plugin (like the old 162 # read functions used to do). Most plugins should define their own 163 # process() function and let this read() function keep control. 164 # 165 # recursive plugins (e.g. RecPlug) and specialized plugins like those 166 # capable of processing many documents within a single file (e.g. 167 # GMLPlug) should normally implement their own version of read() 168 # 169 # Return number of files processed, undef if can't process 52 170 # Note that $base_dir might be "" and that $file might 53 171 # include directories 172 54 173 sub read { 55 174 my $self = shift (@_); 56 175 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 57 176 58 die "BasPlug::read function must be implemented in sub classes\n"; 59 60 return undef; # will never get here 61 } 62 63 sub extra_metadata 64 { 65 my ($self,$doc_obj,$cursection, $metadata) = @_; 66 67 foreach $field (keys(%$metadata)) { 177 if ($self->is_recursive()) { 178 die "BasPlug::read function must be implemented in sub-class for recursive plugins\n"; 179 } 180 181 my $filename = &util::filename_cat($base_dir, $file); 182 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 183 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 184 return undef; 185 } 186 my $plugin_name = ref ($self); 187 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 188 189 # create a new document 190 my $doc_obj = new doc ($file, "indexed_doc"); 191 192 # read in file ($text will be in utf8) 193 my $text = ""; 194 $self->read_file ($filename, \$text); 195 196 if ($text !~ /\w/) { 197 print STDERR "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'}; 198 return 0; 199 } 200 201 # include any metadata passed in from previous plugins 202 # note that this metadata is associated with the top level section 203 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata); 204 205 # do plugin specific processing of doc_obj 206 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj)); 207 208 # do any automatic metadata extraction 209 $self->auto_extract_metadata ($doc_obj); 210 211 # add an OID 212 $doc_obj->set_OID(); 213 214 # process the document 215 $processor->process($doc_obj); 216 217 return 1; # processed the file 218 } 219 220 # returns undef if file is rejected by the plugin 221 sub process { 222 my $self = shift (@_); 223 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 224 225 die "Basplug::process function must be implemented in sub-class\n"; 226 227 return undef; # never gets here 228 } 229 230 # uses the multiread package to read in the entire file pointed to 231 # by filename and loads the resulting text into $$textref. Input text 232 # may be in any of the encodings handled by multiread, output text 233 # will be in utf8 234 sub read_file { 235 my $self = shift (@_); 236 my ($filename, $textref) = @_; 237 238 $$textref = ""; 239 240 open (FILE, $filename) || die "BasPlug::read_file could not open $filename for reading ($!)\n"; 241 242 if ($self->{'input_encoding'} eq "ascii") { 243 undef $/; 244 $$textref = <FILE>; 245 $/ = "\n"; 246 } else { 247 my $reader = new multiread(); 248 $reader->set_handle ('BasPlug::FILE'); 249 $reader->set_encoding ($self->{'input_encoding'}); 250 $reader->read_file ($textref); 251 252 if ($self->{'input_encoding'} eq "gb") { 253 # segment the Chinese words 254 $$textref = &cnseg::segment($$textref); 255 } 256 } 257 258 close FILE; 259 } 260 261 # add any extra metadata that's been passed around from one 262 # plugin to another. 263 # extra_metadata uses add_utf8_metadata so it expects metadata values 264 # to already be in utf8 265 sub extra_metadata { 266 my $self = shift (@_); 267 my ($doc_obj, $cursection, $metadata) = @_; 268 269 foreach my $field (keys(%$metadata)) { 68 270 # $metadata->{$field} may be an array reference 69 271 if (ref ($metadata->{$field}) eq "ARRAY") { 70 272 map { 71 $doc_obj->add_ metadata ($cursection, $field, $_);273 $doc_obj->add_utf8_metadata ($cursection, $field, $_); 72 274 } @{$metadata->{$field}}; 73 275 } else { 74 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field}); 276 $doc_obj->add_utf8_metadata ($cursection, $field, $metadata->{$field}); 277 } 278 } 279 } 280 281 # extract acronyms (and hopefully other stuff soon too). 282 sub auto_extract_metadata { 283 my $self = shift (@_); 284 my ($doc_obj) = @_; 285 286 if ($self->{'extract_acronyms'}) { 287 my $thissection = $doc_obj->get_top_section(); 288 while (defined $thissection) { 289 my $text = $doc_obj->get_text($thissection); 290 $self->extract_acronyms (\$text, $doc_obj, $thissection) if $text =~ /./; 291 $thissection = $doc_obj->get_next_section ($thissection); 292 } 293 } 294 } 295 296 sub extract_acronyms { 297 my $self = shift (@_); 298 my ($textref, $doc_obj, $thissection) = @_; 299 300 my $acro_array = &acronym::acronyms($textref); 301 302 foreach my $acro (@$acro_array) { 303 304 #do the normal acronym 305 $doc_obj->add_utf8_metadata($thissection, "Acronym", $acro->to_string()); 306 print "found " . $acro->to_string() . "\n"; 307 308 # do the KWIC (Key Word In Context) acronym 309 my @kwic = $acro->to_string_kwic(); 310 foreach my $kwic (@kwic) { 311 $doc_obj->add_utf8_metadata($thissection, "AcronymKWIC", $kwic); 312 print "found (KWIC)" . $kwic . "\n"; 75 313 } 76 314 } -
branches/New_Config_Format-branch/gsdl/perllib/plugins/EMAILPlug.pm
r638 r1279 26 26 27 27 28 # 29 # EMAILPlug reads an email file (*.email) 30 # 31 # Version 1.1 1999 Sep 20 by Gordon Paynter ([email protected]) 32 # loosely based on the original HTMLPlug code 28 29 # EMAILPlug 30 # 31 # by Gordon Paynter ([email protected]) 32 # 33 # Email plug reads email files. These are named with a simple 34 # number (i.e. as they appear in mh_mail folders) or with the 35 # extension .email 33 36 # 34 37 # Document text: 35 # The document text consists of all the text occuring after the first36 # blank line in thisdocument.38 # The document text consists of all the text 39 # after the first blank line in the document. 37 40 # 38 41 # Metadata: 42 # $Headers All the header content 39 43 # $Subject Subject: header 40 44 # $To To: header … … 42 46 # $DateText Date: header 43 47 # $Date Date: header in GSDL format (eg: 19990924) 44 # $OtherHeaders All the other headers 45 # $NewText The unquoted text in this message 48 # 49 # Version history 50 # 51 # 1.2 (2000 Jun 12) Major rewrite. 52 # (The new version of Greenstone breaks some of the metadata.) 53 # 1.1.1 Compensated for two-digit years like "95" 54 # 1.1 (1999 Sep 20) Introduced the various metadata fileds 55 # 1.0 Based on the original HTMLPlug code 46 56 # 47 57 … … 56 66 # EMAILPlug is a sub-class of BasPlug. 57 67 58 sub BEGIN { 68 sub BEGIN { 59 69 @ISA = ('BasPlug'); 60 70 } 61 71 72 use strict; 62 73 63 74 # Create a new EMAILPlug object with which to parse a file. 64 # This is done by creating a new BasPlug and usig bless to75 # Accomplished by creating a new BasPlug and using bless to 65 76 # turn it into an EMAILPlug. 66 77 67 78 sub new { 68 79 my ($class) = @_; 69 $self = new BasPlug ();80 my $self = new BasPlug ("EMAILPlug", @_); 70 81 71 82 return bless $self, $class; 72 83 } 73 84 74 75 # Is the EMAILPlug recursive? No. 76 77 sub is_recursive { 85 sub get_default_process_exp { 78 86 my $self = shift (@_); 79 87 80 return 0; # this is not a recursive plugin 81 } 82 83 84 # 85 # read 86 # 87 # read attempts to read a file and store its contents in a 88 # new document object. 89 # 90 # Returns: number of files processed or undef if can't process 91 # This plugin only processes one file at a time. 92 # 93 # Note: $base_dir might be "" and $file might include directories, 94 # but that doesn't affect EMAILPlug 95 # 96 97 sub read { 88 return q^\d+(\.email)?$^; 89 } 90 91 # do plugin specific processing of doc_obj 92 sub process { 98 93 my $self = shift (@_); 99 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 100 101 # Make sure file exists and is an email file 102 my $filename = &util::filename_cat($base_dir, $file); 103 return undef unless ($filename =~ /\.email$/i && (-e $filename)); 104 105 print STDERR "EMAILPlug: processing $filename\n" if $processor->{'verbosity'}; 106 107 # create a new document object 108 my $doc_obj = new doc ($file, "indexed_doc"); 109 open (FILE, $filename) || die "EMAILPlug::read - can't open $filename\n"; 94 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 95 96 # Check that we're dealing with a valid mail file 97 return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/)); 98 99 print STDERR "EMAILPlug: processing $file\n" 100 if $self->{'verbosity'} > 1; 101 110 102 my $cursection = $doc_obj->get_top_section(); 111 103 112 # Metadata fields 113 my $Subject = ""; 114 my $To = ""; 115 my $From = ""; 116 my $DateText = ""; 117 my $Date = ""; 118 my $OtherHeaders = ""; 119 my $NewText = ""; 120 my $text = ""; 121 my $line = ""; 122 123 my $headers_read = 0; 124 125 # Read and process each line in te email file. 126 # Each file consists of a set of header lines, then a blank line, 127 # then the body of the email. 128 while (<FILE>) { 104 # 105 # Parse the document's text and extract metadata 106 # 107 108 # Separate header from body of message 109 my $Headers = $$textref; 110 $Headers =~ s/\n\n.*//s; 111 $$textref = substr $$textref, (length $Headers); 112 113 # Extract basic metadata from header 114 my @headers = ("From", "To", "Subject", "Date"); 115 my $value = ""; 116 my %raw; 117 118 foreach my $name (@headers) { 119 $value = $Headers; 120 $value =~ s/.*$name://s; 121 $value =~ s/\S*:.*//s; 122 $value =~ s/\s*$//; 123 $value =~ s/\s+/ /g; 124 $raw{$name} = $value; 125 } 126 127 # Process Date information 128 if ($raw{"Date"}) { 129 $raw{"DateText"} = $raw{"Date"}; 129 130 130 $line = $_; 131 # Convert the date text to internal date format 132 $value = $raw{"Date"}; 133 my ($day, $month, $year) = $value =~ /(\d?\d)\s([A-Z][a-z][a-z])\s(\d\d\d?\d?)/; 134 if ($year < 100) { $year += 1900; } 135 $raw{"Date"} = &sorttools::format_date($day, $month, $year); 131 136 132 # Remove carriage returns from the line. 133 # We will later replace single cariage returns with <BR> tags 134 # and double carriage returns with <P> tags. 135 $line =~ s/\n/ /g; 136 137 if ($headers_read) { 138 # The headers have been read, so add this line to the body text 139 $text .= "$line\n"; 140 # If the line isn't quoted, add it to the NewText metadata 141 if ($line =~ /^[^>|]/) { 142 $NewText .= "$line\n"; 143 } 144 145 } elsif ($line =~ /^\s*$/) { 146 # An empty line signals the end of the headers. 147 $headers_read = 1; 148 137 } else { 138 # We have not extracted a date 139 $raw{"DateText"} = "Unknown."; 140 $raw{"Date"} = "19000000"; 141 } 142 143 144 # Add extracted metadata to document object 145 foreach my $name (keys %raw) { 146 $value = $raw{$name}; 147 if ($value) { 148 $value = &text_into_html($value); 149 149 } else { 150 # Read a line of header information and add it to the metadata 151 $line .= "\n"; 152 if ($line =~ /^From:/) { 153 $line =~ s/^From:\s*//; 154 $From .= $line; 155 } elsif ($line =~ /^To:/) { 156 $line =~ s/^To:\s*//; 157 $To .= $line; 158 } elsif ($line =~ /^Date:/) { 159 $line =~ s/^Date:\s*//; 160 $DateText .= $line; 161 if ($Date !~ /\d+/) { 162 # Convert the date text to internal date format 163 my ($day, $month, $year) = $line =~ /(\d?\d)\s([A-Z][a-z][a-z])\s(\d\d\d\d)/; 164 $Date = &sorttools::format_date($day, $month, $year); 165 } 166 } elsif ($line =~ /^Subject:/) { 167 $line =~ s/^Subject:\s*//; 168 $Subject .= $line; 169 } else { 170 $OtherHeaders .= $line; 171 } 172 } 150 $value = "No $name field"; 151 } 152 $doc_obj->add_utf8_metadata ($cursection, $name, $value); 173 153 } 174 154 175 # Add Subject metadata 176 $Subject = &text_into_html($Subject); 177 $Subject = "No Subject" unless ($Subject =~ /\w/); 178 $doc_obj->add_metadata ($cursection, "Subject", $Subject); 179 180 # Add Sender 181 $From = &text_into_html($From); 182 $From = "No Sender" unless ($From =~ /\w/); 183 $doc_obj->add_metadata ($cursection, "Creator", $From); 184 185 # Add Recipient 186 $To = &text_into_html($To); 187 $To = "No Recipient" unless ($To =~ /\w/); 188 $doc_obj->add_metadata ($cursection, "To", $To); 189 190 # Add Date Text 191 $DateText =~ &text_into_html($Date); 192 $doc_obj->add_metadata ($cursection, "DateText", $DateText) if ($DateText =~ /\w/); 193 194 # Add Date 195 $Date =~ &text_into_html($Date); 196 $doc_obj->add_metadata ($cursection, "Date", $Date) if ($Date =~ /\w/); 197 198 # Add Other Headers 199 $OtherHeaders = &text_into_html($OtherHeaders); 200 $doc_obj->add_metadata ($cursection, "OtherHeaders", $OtherHeaders) if ($OtherHeaders =~ /\w/); 201 202 # Add New Text 203 $NewText = &text_into_html($NewText); 204 $doc_obj->add_metadata ($cursection, "NewText", $NewText) if ($NewText =~ /\w/); 205 206 # Add text 207 $text =~ s/<BR>\s*<BR>/<P>/g; 208 $text = &text_into_html($text); 209 $doc_obj->add_text ($cursection, $text) if ($text =~ /\w/); 210 211 # Add the OID - that is, the big HASH value used as a unique ID 212 $doc_obj->set_OID (); 213 214 # Process the document 215 $processor->process($doc_obj); 216 217 return 1; # processed the file 218 } 219 220 221 1; 222 223 224 225 # 155 # Add "All headers" metadata 156 $Headers = &text_into_html($Headers); 157 $Headers = "No headers" unless ($Headers =~ /\w/); 158 $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers); 159 160 # Add text to document object 161 $$textref = &text_into_html($$textref); 162 $$textref = "No message" unless ($$textref =~ /\w/); 163 $doc_obj->add_utf8_text($cursection, $$textref); 164 165 return 1; 166 } 167 168 226 169 # Convert a text string into HTML. 227 170 # … … 234 177 # and replaces carriage returns with <BR> tags (and multiple carriage 235 178 # returns with <P> tags). 236 # 179 237 180 238 181 sub text_into_html { 239 182 my ($text) = @_; 240 183 241 242 # Convert problem charaters into HTML symbols 243 $text =~ s/&/&/g; 244 $text =~ s/</</g; 245 $text =~ s/>/>/g; 246 $text =~ s/\"/"/g; 184 # Convert problem characters into HTML symbols 185 $text =~ s/&/&/go; 186 $text =~ s/</</go; 187 $text =~ s/>/>/go; 188 $text =~ s/\"/"/go; 247 189 248 190 # convert email addresses and URLs into links 249 191 $text =~ s/([\w\d\.\-]+@[\w\d\.\-]+)/<a href=\"mailto:$1\">$1<\/a>/g; 250 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\- ]*)/<a href=\"$1">$1<\/a>/g;192 $text =~ s/(http:\/\/[\w\d\.\-]+[\/\w\d\.\-~]*)/<a href=\"$1\">$1<\/a>/g; 251 193 252 194 # Clean up whitespace and convert \n charaters to <BR> or <P> 253 $text =~ s/ +/ /g ;254 $text =~ s/\s*$// ;255 $text =~ s/^\s*// ;256 $text =~ s/\n/\n<BR>/g ;257 $text =~ s/<BR>\s*<BR>/<P>/g ;195 $text =~ s/ +/ /go; 196 $text =~ s/\s*$//o; 197 $text =~ s/^\s*//o; 198 $text =~ s/\n/\n<BR>/go; 199 $text =~ s/<BR>\s*<BR>/<P>/go; 258 200 259 201 return $text; 260 202 } 261 203 262 263 264 265 266 267 268 204 205 # Perl packages have to return true if they are run. 206 1; -
branches/New_Config_Format-branch/gsdl/perllib/plugins/GMLPlug.pm
r1010 r1279 37 37 } 38 38 39 use strict; 40 39 41 sub new { 40 42 my ($class) = @_; 41 $self = new BasPlug ();43 my $self = new BasPlug ("GMLPlug", @_); 42 44 43 45 return bless $self, $class; 44 46 } 45 47 46 47 sub is_recursive { 48 sub get_default_process_exp { 48 49 my $self = shift (@_); 49 50 50 return 0; # this is not a recursive plugin 51 } 52 53 sub _unescape_text { 54 my ($text) = @_; 55 56 # special characters in the gml encoding 57 $text =~ s/</</g; 58 $text =~ s/>/>/g; 59 $text =~ s/"/\"/g; 60 $text =~ s/&/&/g; # this has to be last... 61 62 return $text; 51 return q^(?i)\.gml?$^; 63 52 } 64 53 … … 69 58 my $self = shift (@_); 70 59 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 71 my $fullname = &util::filename_cat ($base_dir, $file);72 60 73 # see if this is a gml book 74 return undef unless (-f $fullname && $fullname =~ /\.gml(\.gz)?$/io); 75 76 my ($parent_dir, $gz) = $fullname =~ /^(.*?)[\/\\][^\/\\]+.gml(\.gz)?$/io; 77 78 if (defined $gz && $gz =~ /\.gz/io) { 79 $gz = 1; 80 } else { 81 $gz = 0; 61 my $filename = &util::filename_cat($base_dir, $file); 62 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; 63 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 64 return undef; 82 65 } 66 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 83 67 84 68 print STDERR "GMLPlug: processing $file\n"; 85 69 86 # read in the document 87 if ($gz) { 88 if (!open (INFILE, "zcat $fullname |")) { 89 print STDERR "GMLPlug::read - zcat couldn't read $fullname\n"; 90 return undef; 91 } 92 } else { 93 if (!open (INFILE, $fullname)) { 94 print STDERR "GMLPlug::read - couldn't read $fullname\n"; 95 return undef; 96 } 70 my $parent_dir = $file; 71 $parent_dir =~ s/[^\\\/]*$//; 72 $parent_dir = &util::filename_cat ($base_dir, $parent_dir); 73 74 if (!open (INFILE, $filename)) { 75 print STDERR "GMLPlug::read - couldn't read $filename\n"; 76 return 0; 97 77 } 98 78 … … 106 86 107 87 my $no_docs = 0; 108 # my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right109 88 110 89 while (1) { … … 128 107 129 108 } else { 130 print STDERR "GMLPlug::read - error in file $f ullname\n";109 print STDERR "GMLPlug::read - error in file $filename\n"; 131 110 print STDERR "text: \"$gml\"\n"; 132 111 last; … … 166 145 last if $section eq ""; # back to top level again (more than one document in gml file) 167 146 $section = $doc_obj->get_parent_section ($section); 168 } # while (1) section level147 } # while (1) section level 169 148 170 149 # add the associated files 171 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");150 my $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile"); 172 151 my ($assoc_file_info, $afile); 173 152 foreach $assoc_file_info (@$assoc_files) { … … 186 165 $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata); 187 166 188 # assume the document has an OID 167 # do any automatic metadata extraction 168 $self->auto_extract_metadata ($doc_obj); 169 170 # assume the document has an OID already 189 171 190 172 # process the document … … 194 176 last if ($maxdocs > -1 && $no_docs >= $maxdocs); 195 177 last unless defined $gml && $gml =~ /\w/; 196 } # while(1) document level178 } # while(1) document level 197 179 198 180 return $no_docs; # no of docs processed 199 181 } 200 182 183 sub _unescape_text { 184 my ($text) = @_; 185 186 # special characters in the gml encoding 187 $text =~ s/</</g; 188 $text =~ s/>/>/g; 189 $text =~ s/"/\"/g; 190 $text =~ s/&/&/g; # this has to be last... 191 192 return $text; 193 } 201 194 202 195 1; -
branches/New_Config_Format-branch/gsdl/perllib/plugins/HBPlug.pm
r1020 r1279 24 24 ########################################################################### 25 25 26 # plugin which process an HTML book directory 26 # plugin which processes an HTML book directory 27 28 # This plugin is used by the Humanity Library collections and does not handle 29 # input encodings other than ascii or extended ascii 30 31 # this code is kind of ugly and could no doubt be made to run faster, by leaving 32 # it in this state I hope to encourage people to make their collections use 33 # HBSPlug instead ;-) 34 35 # Use HBSPlug if creating a new collection and marking up files like the 36 # Humanity Library collections. HBSPlug accepts all input encodings but 37 # expects the marked up files to be cleaner than those used by the 38 # Humanity Library collections 27 39 28 40 package HBPlug; 29 41 30 use plugin;31 42 use ghtml; 32 43 use BasPlug; 33 44 use util; 34 use lang;35 45 use doc; 36 use cfgread;37 46 38 47 … … 43 52 sub new { 44 53 my ($class) = @_; 45 $self = new BasPlug ();54 my $self = new BasPlug ("HBPlug", @_); 46 55 47 56 return bless $self, $class; 48 57 } 49 58 50 sub is_recursive { 51 my $self = shift (@_); 52 53 return 0; # this is not a recursive plugin 54 } 59 sub init { 60 my $self = shift (@_); 61 my ($verbosity) = @_; 62 63 $self->BasPlug::init(); 64 65 # this plugin only handles ascii encodings 66 if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) { 67 die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" . 68 $self->{'input_encoding'} . " is not an acceptable input_encoding value\n"; 69 } 70 } 71 72 # this is included only to prevent warnings being printed out 73 # from BasPlug::init. The process_exp is not used by this plugin 74 sub get_default_process_exp { 75 my $self = shift (@_); 76 77 return "This plugin does not use a process_exp\n"; 78 } 79 55 80 56 81 sub HB_read_html_file { … … 65 90 66 91 my $foundbody = 0; 67 $self->HB_gettext (\$foundbody, $text, FILE);92 $self->HB_gettext (\$foundbody, $text, "FILE"); 68 93 close FILE; 69 94 … … 72 97 $foundbody = 1; 73 98 open (FILE, $htmlfile) || return; 74 $self->HB_gettext (\$foundbody, $text, FILE);99 $self->HB_gettext (\$foundbody, $text, "FILE"); 75 100 close FILE; 76 101 } … … 159 184 } 160 185 186 # if input_encoding is ascii we can call add_utf8_metadata 187 # directly but if it's iso_8859_1 (the default) we need to call 188 # add_metadata so that the ascii2utf8 conversion is done first 189 # this should speed things up a little if processing an ascii only 190 # document with input_encoding set to ascii 191 sub HB_add_metadata { 192 my $self = shift (@_); 193 my ($doc_obj, $cursection, $field, $value) = @_; 194 195 if ($self->{'input_encoding'} eq "ascii") { 196 $doc_obj->add_utf8_metadata ($cursection, $field, $value); 197 } else { 198 $doc_obj->add_metadata ($cursection, $field, $value); 199 } 200 } 161 201 162 202 # return number of files processed, undef if can't process … … 192 232 193 233 # add metadata for top level of document 194 foreach $field (keys(%$metadata)) {234 foreach my $field (keys(%$metadata)) { 195 235 # $metadata->{$field} may be an array reference 196 236 if (ref ($metadata->{$field}) eq "ARRAY") { 197 237 map { 198 $ doc_obj->add_metadata ($cursection, $field, $_);238 $self->HB_add_metadata ($doc_obj, $cursection, $field, $_); 199 239 } @{$metadata->{$field}}; 200 240 } else { 201 $ doc_obj->add_metadata ($cursection, $field, $metadata->{$field});241 $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field}); 202 242 } 203 243 } … … 240 280 241 281 # add the metadata to this section 242 $ doc_obj->add_metadata ($cursection, "Title", $title);282 $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title); 243 283 244 284 # clean up the section html … … 251 291 252 292 # add the text for this section 253 $doc_obj->add_text ($cursection, $sectiontext); 254 293 if ($self->{'input_encoding'} eq "ascii") { 294 $doc_obj->add_utf8_text ($cursection, $sectiontext); 295 } else { 296 $doc_obj->add_text ($cursection, $sectiontext); 297 } 255 298 } else { 256 299 print STDERR "WARNING - leftover text\n" , $self->shorten($html), -
branches/New_Config_Format-branch/gsdl/perllib/plugins/HTMLPlug.pm
r1020 r1279 50 50 51 51 sub print_usage { 52 print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n";53 54 52 print STDERR "\n usage: plugin HTMLPlug [options]\n\n"; 55 53 print STDERR " options:\n"; 56 print STDERR " -process_exp A perl regular expression to match against filenames.\n";57 print STDERR " Matching filenames will be processed by this plugin.\n";58 print STDERR " Defaults to '(?i)\.html?\$' i.e. all documents ending in\n";59 print STDERR " .htm or .html (case-insensitive).\n";60 54 print STDERR " -nolinks Don't make any attempt to trap links (setting this flag may\n"; 61 55 print STDERR " improve speed of building/importing but any relative links within\n"; 62 56 print STDERR " documents will be broken).\n"; 63 print STDERR " -block_exp Files matching this regular expression will be blocked from\n";64 print STDERR " being passed to any further plugins in the list. By default\n";65 print STDERR " HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png, .pdf,\n";66 print STDERR " .rtf or .css file extensions.\n";67 57 print STDERR " -keep_head Don't remove headers from html files.\n"; 68 58 print STDERR " -no_metadata Don't attempt to extract any metadata from files.\n"; 69 59 print STDERR " -metadata_fields Comma separated list of metadata fields to attempt to extract.\n"; 70 print STDERR " Defaults to 'Title'\n"; 60 print STDERR " Defaults to 'Title'.\n"; 61 print STDERR " Use `first200` to get the first 200 characters of the body.\n"; 62 print STDERR " Use `H1` to get the text inside the first <H1> and </H1> tags in the text.\n"; 71 63 print STDERR " -w3mir Set if w3mir was used to generate input file structure.\n"; 72 print STDERR " w3mir \n";73 64 print STDERR " -assoc_files Perl regular expression of file extensions to associate with\n"; 74 print STDERR " html documents. Defaults to '(?i)\.(jpe?g|gif|png|css|pdf) $'\n";65 print STDERR " html documents. Defaults to '(?i)\.(jpe?g|gif|png|css|pdf)\$'\n"; 75 66 print STDERR " -rename_assoc_files Renames files associated with documents (e.g. images). Also\n"; 76 67 print STDERR " creates much shallower directory structure (useful when creating\n"; … … 80 71 sub new { 81 72 my $class = shift (@_); 82 my $self = new BasPlug ( );73 my $self = new BasPlug ("HTMLPlug", @_); 83 74 84 75 if (!parsargv::parse(\@_, 85 q^process_exp/.*/(?i)\.html?$^, \$self->{'process_exp'},86 76 q^nolinks^, \$self->{'nolinks'}, 87 q^block_exp/.*/(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^, \$self->{'block_exp'},88 77 q^keep_head^, \$self->{'keep_head'}, 89 78 q^no_metadata^, \$self->{'no_metadata'}, … … 91 80 q^w3mir^, \$self->{'w3mir'}, 92 81 q^assoc_files/.*/(?i)\.(jpe?g|gif|png|css|pdf)$^, \$self->{'assoc_files'}, 93 q^rename_assoc_files^, \$self->{'rename_assoc_files'})) { 82 q^rename_assoc_files^, \$self->{'rename_assoc_files'}, 83 "allow_extra_options")) { 84 85 print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n"; 94 86 &print_usage(); 95 87 die "\n"; 96 88 } 97 89 98 90 $self->{'aux_files'} = {}; 99 91 $self->{'dir_num'} = 0; 100 92 $self->{'file_num'} = 0; 101 93 102 94 return bless $self, $class; 103 95 } 104 96 105 sub is_recursive { 106 my $self = shift (@_); 107 108 return 0; # this is not a recursive plugin 109 } 110 111 # return number of files processed, undef if can't process 112 # Note that $base_dir might be "" and that $file might 113 # include directories 114 sub read { 115 my $self = shift (@_); 116 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 117 118 my $filename = &util::filename_cat($base_dir, $file); 119 return 0 if $filename =~ /$self->{'block_exp'}/; 120 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { 121 return undef; 122 } 123 $file =~ s/^[\/\\]+//; 124 125 $self->{'verbosity'} = $processor->{'verbosity'}; 97 98 sub get_default_block_exp { 99 my $self = shift (@_); 100 101 return q^(?i)\.(gif|jpe?g|png|pdf|rtf|css)$^; 102 } 103 104 sub get_default_process_exp { 105 my $self = shift (@_); 106 107 return q^(?i)\.html?$^; 108 } 109 110 111 # do plugin specific processing of doc_obj 112 sub process { 113 my $self = shift (@_); 114 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 115 126 116 print STDERR "HTMLPlug: processing $file\n" 127 117 if $self->{'verbosity'} > 1; 128 118 129 # create a new document130 my $doc_obj = new doc ($file, "indexed_doc");131 119 my $cursection = $doc_obj->get_top_section(); 132 133 # read in HTML file 134 open (FILE, $filename) || die "HTMLPlug::read - can't open $filename\n"; 135 undef $/; 136 my $text = <FILE>; 137 $/ = "\n"; 138 close FILE; 139 if (!defined $text || $text !~ /\w/) { 140 print STDERR "HTMLPlug: ERROR: $file contains no text\n" if $self->{'verbosity'}; 141 return 0; 142 } 143 144 $self->extra_metadata ($doc_obj, $cursection, $metadata); 145 $self->extract_metadata (\$text, $metadata, $doc_obj, $cursection) 120 121 $self->extract_metadata ($textref, $metadata, $doc_obj, $cursection) 146 122 unless $self->{'no_metadata'}; 147 123 … … 152 128 my $web_url = "http://$file"; 153 129 $web_url =~ s/\\/\//g; # for windows 154 $doc_obj->add_ metadata($cursection, "URL", $web_url);130 $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); 155 131 156 132 # remove header and footer 157 133 if (!$self->{'keep_head'}) { 158 $ text=~ s/^.*?<body[^>]*>//is;159 $ text=~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg;134 $$textref =~ s/^.*?<body[^>]*>//is; 135 $$textref =~ s/(<\/body[^>]*>|<\/html[^>]*>)//isg; 160 136 } 161 137 … … 164 140 165 141 # usemap="./#index" not handled correctly => change to "#index" 166 $ text=~ s/(<img[^>]*?usemap\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/142 $$textref =~ s/(<img[^>]*?usemap\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 167 143 $self->replace_usemap_links($1, $2, $3)/isge; 168 144 169 $ text =~ s/(<(?:a|area|frame|link)\s+[^>]*?(?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/145 $$textref =~ s/(<(?:a|area|frame|link)\s+[^>]*? (?:href|src)\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 170 146 $self->replace_href_links ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 171 147 } 172 148 173 149 # trap images 174 $ text =~ s/(<img[^>]*?src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/150 $$textref =~ s/(<img[^>]*? src\s*=\s*\"?)([^\">\s]+)(\"?[^>]*>)/ 175 151 $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 176 152 177 $doc_obj->add_text ($cursection, $text); 178 179 # add an OID 180 $doc_obj->set_OID(); 181 182 # process the document 183 $processor->process($doc_obj); 184 185 return 1; # processed the file 153 # add text to document object 154 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>"); 155 156 return 1; 186 157 } 187 158 … … 349 320 350 321 foreach my $field (split /,/, $self->{'metadata_fields'}) { 351 322 352 323 # don't need to extract field if it was passed in from a previous 353 324 # (recursive) plugin … … 361 332 my $value = $1; 362 333 $value =~ s/\s+/ /gs; 363 $doc_obj->add_ metadata($section, $field, $value);334 $doc_obj->add_utf8_metadata($section, $field, $value); 364 335 next; 365 336 } … … 367 338 } 368 339 369 # special case for Title metadata - try <title> tags 370 # then first 100 characters of text 340 # TITLE: extract the document title 371 341 372 342 if ($field =~ /^title$/i) { … … 378 348 if ($title =~ /\w/) { 379 349 $title =~ s/\s+/ /gs; 380 $doc_obj->add_metadata ($section, $field, $title); 350 $title =~ s/^\s+//; 351 $title =~ s/\s+$//; 352 $doc_obj->add_utf8_metadata ($section, $field, $title); 381 353 next; 382 354 } … … 386 358 # if no title use first 100 characters 387 359 my $tmptext = $$textref; 360 $tmptext =~ s/\s+/ /gs; 388 361 $tmptext =~ s/<[^>]*>//g; 389 my $title = substr ($tmptext, 0, 100); 390 $title =~ s/\s+/ /gs; 391 $doc_obj->add_metadata ($section, $field, $title); 392 } 393 } 394 } 362 $tmptext = substr ($tmptext, 0, 100); 363 $tmptext =~ s/^\s+//; 364 $tmptext =~ s/\s+$//; 365 $tmptext =~ s/\s\S*$/.../; 366 $doc_obj->add_utf8_metadata ($section, $field, $tmptext); 367 next; 368 } 369 370 # FIRST200: extract the first 200 characters as metadata 371 372 if ($field =~ /^first200$/i) { 373 my $tmptext = $$textref; 374 $tmptext =~ s/\s+/ /gs; 375 $tmptext =~ s/.*<body[^>]*>//i; 376 $tmptext =~ s/<[^>]*>//g; 377 $tmptext = substr ($tmptext, 0, 200); 378 $tmptext =~ s/^\s+//; 379 $tmptext =~ s/\s+$//; 380 $tmptext =~ s/\s\S*$/.../; 381 $doc_obj->add_utf8_metadata ($section, $field, $tmptext); 382 next; 383 } 384 385 # H1: extract the text between the first <H1> and </H1> tags 386 if ($field =~ /^H1$/i) { 387 my $tmptext = $$textref; 388 $tmptext =~ s/\s+/ /gs; 389 if ($tmptext =~ /<H1[^>]*>/i) { 390 $tmptext =~ s/.*<H1[^>]*>//i; 391 $tmptext =~ s/<\/H1[^>]*>.*//i; 392 $tmptext =~ s/^\s+//; 393 $tmptext =~ s/\s+$//; 394 $doc_obj->add_utf8_metadata ($section, $field, $tmptext); 395 } 396 next; 397 } 398 } 399 } 400 395 401 396 402 # evaluate any "../" to next directory up -
branches/New_Config_Format-branch/gsdl/perllib/plugins/IndexPlug.pm
r809 r1279 54 54 use plugin; 55 55 use BasPlug; 56 use lang;57 56 use doc; 58 57 use util; … … 63 62 } 64 63 64 use strict; 65 65 66 sub new { 66 67 my ($class) = @_; 67 $self = new BasPlug ();68 my $self = new BasPlug ("IndexPlug", @_); 68 69 69 70 return bless $self, $class; … … 76 77 return 1; 77 78 } 78 79 79 80 80 # return number of files processed, undef if can't process … … 104 104 # process each document 105 105 my $count = 0; 106 foreach $docfile (keys (%$list)) {106 foreach my $docfile (keys (%$list)) { 107 107 last if ($maxdocs != -1 && $count >= $maxdocs); 108 108 $metadata = {}; # at present we can do this as metadata … … 113 113 # note that $list->{$docfile} is an array reference 114 114 if ($docfile !~ /key:/i) { 115 my $i = 0; 115 116 for ($i = 0; $i < scalar (@{$list->{$docfile}}); $i ++) { 116 117 if ($list->{$docfile}->[$i] =~ /^<([^>]+)>(.+)$/) { -
branches/New_Config_Format-branch/gsdl/perllib/plugins/RecPlug.pm
r809 r1279 38 38 } 39 39 40 use strict; 41 40 42 sub new { 41 43 my ($class) = @_; 42 my $self = new BasPlug ( );44 my $self = new BasPlug ("RecPlug", @_); 43 45 44 46 $self->{'exclude_tail_dirs'} = []; # empty by default … … 62 64 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 63 65 64 foreach $etd ( @{$self->{'exclude_tail_dirs'}} )66 foreach my $etd ( @{$self->{'exclude_tail_dirs'}} ) 65 67 { 66 68 return 0 if ($file =~ m/$etd/); … … 72 74 73 75 # see if this is a directory 74 $dirname = &util::filename_cat ($base_dir, $file);76 my $dirname = &util::filename_cat ($base_dir, $file); 75 77 if (-d $dirname) { 76 78 -
branches/New_Config_Format-branch/gsdl/perllib/plugins/TEXTPlug.pm
r732 r1279 24 24 ########################################################################### 25 25 26 # creates simple single-level document from .txt or .text files 27 # (case-insensitive match on filenames). Adds Title metadata 28 # of first 100 characters found. 26 # creates simple single-level document. Adds Title metadata 27 # of first line of text (up to 100 characters long). 29 28 30 29 package TEXTPlug; 31 30 32 31 use BasPlug; 33 use sorttools;34 32 35 33 sub BEGIN { … … 37 35 } 38 36 37 use strict; 38 39 39 sub new { 40 40 my ($class) = @_; 41 $self = new BasPlug ();41 my $self = new BasPlug ("TEXTPlug", @_); 42 42 43 43 return bless $self, $class; 44 44 } 45 45 46 sub is_recursive{46 sub get_default_process_exp { 47 47 my $self = shift (@_); 48 48 49 return 0; # this is not a recursive plugin49 return q^(?i)\.te?xt$^; 50 50 } 51 51 52 53 # return number of files processed, undef if can't process 54 # Note that $base_dir might be "" and that $file might 55 # include directories 56 sub read { 52 # do plugin specific processing of doc_obj 53 sub process { 57 54 my $self = shift (@_); 58 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 59 60 my $filename = &util::filename_cat($base_dir, $file); 61 62 return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename)); 63 64 my $gz = 0; 65 if (defined $2) { 66 $gz = $2; 67 $gz = 1 if ($gz =~ /\.gz/i); 68 } 69 70 print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'}; 71 72 # create a new document 73 my $doc_obj = new doc ($file, "indexed_doc"); 74 75 if ($gz) { 76 open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n"; 77 } else { 78 open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n"; 79 } 55 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 56 57 print STDERR "TEXTPlug: processing $file\n" 58 if $self->{'verbosity'} > 1; 59 80 60 my $cursection = $doc_obj->get_top_section(); 81 82 my $text = ""; 83 my $line = ""; 84 my $foundtitle = 0; 85 # don't need to get title if it has been passed 86 # in from another plugin 87 if (defined $metadata->{'Title'}) { 88 $foundtitle = 1; 89 } 90 while (defined ($line = <FILE>)) { 91 # use first line as title (or first 100 characters if it's long) 92 if (!$foundtitle && length($line) > 5) { 93 my $title = ""; 94 if (length($line) > 100) { 95 $title = substr ($line, 0, 100); 96 } else { 97 $title = $line; 98 } 99 $doc_obj->add_metadata ($cursection, "Title", $title); 100 $foundtitle = 1; 61 62 # get title metadata 63 # (don't need to get title if it has been passed 64 # in from another plugin) 65 if (!defined $metadata->{'Title'}) { 66 my ($title) = $$textref =~ /^([^\n]*)/; 67 if (length($title) > 100) { 68 $title = substr ($title, 0, 100); 101 69 } 102 $ text .= $line;70 $doc_obj->add_utf8_metadata ($cursection, "Title", $title); 103 71 } 104 72 105 $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>"); 73 # insert preformat tags and add text to document object 74 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>"); 106 75 107 108 foreach $field (keys(%$metadata)) { 109 # $metadata->{$field} may be an array reference 110 if (ref ($metadata->{$field}) eq "ARRAY") { 111 map { 112 $doc_obj->add_metadata ($cursection, $field, $_); 113 } @{$metadata->{$field}}; 114 } else { 115 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field}); 116 } 117 } 118 119 # add OID 120 $doc_obj->set_OID (); 121 122 # process the document 123 $processor->process($doc_obj); 124 125 return 1; # processed the file 76 return 1; 126 77 } 127 78 -
branches/New_Config_Format-branch/gsdl/perllib/unicode.pm
r537 r1279 31 31 package unicode; 32 32 33 34 35 # ascii2unicode takes a (extended) ascii string and36 # returns a unicode array.33 %translations = (); 34 35 # ascii2unicode takes an (extended) ascii string (ISO-8859-1) 36 # and returns a unicode array. 37 37 sub ascii2unicode { 38 38 my ($in) = @_; … … 43 43 while ($i < $len) { 44 44 push (@$out, ord(substr ($in, $i, 1))); 45 $i++; 46 } 47 48 return $out; 49 } 50 51 # arabic2unicode takes an 8 bit Arabic string (ISO-8859-6) 52 # and returns a unicode array 53 sub arabic2unicode { 54 my ($in) = @_; 55 my $out = []; 56 57 my $i = 0; 58 my $len = length($in); 59 while ($i < $len) { 60 my $c = ord(substr ($in, $i, 1)); 61 $c += (1567-191) if ($c >= 0x80); 62 push (@$out, $c); 63 $i++; 64 } 65 66 return $out; 67 } 68 69 # windows2unicode takes a windows encoding (e.g. Windows 1256 (Arabic)) 70 # and returns a unicode array. These encodings are similar to but not 71 # identical to the corresponding ISO-8859 encodings. 72 # 73 # The map files for these encodings should be in unicode/MAPPINGS/WINDOWS 74 sub windows2unicode { 75 my ($encoding, $in) = @_; 76 my $out = []; 77 78 my $mapfile = &util::filename_cat($ENV{'GSDLHOME'}, "unicode", "MAPPINGS", 79 "WINDOWS", "$encoding.TXT"); 80 return $out unless &loadmapping ($encoding, $mapfile); 81 82 my $i = 0; 83 my $len = length($in); 84 while ($i < $len) { 85 my $c = ord(substr ($in, $i, 1)); 86 $c = $translations{"$encoding-unicode"}->{$c} if ($c >= 0x80); 87 push (@$out, $c); 45 88 $i++; 46 89 } … … 193 236 } 194 237 238 # loadmapping expects the mapfile to contain (at least) two 239 # tab-separated fields. The first field is the mapped value 240 # and the second field is the unicode value. 241 # 242 # It returns 1 if successful, 0 if unsuccessful 243 sub loadmapping { 244 my ($encoding, $mapfile) = @_; 245 246 my $to = "$encoding-unicode"; 247 my $from = "unicode-$encoding"; 248 249 # check to see if the encoding has already been loaded 250 if (defined $translations{$to} && defined $translations{$from}) { 251 return 1; 252 } 253 254 if (!open (MAPFILE, $mapfile)) { 255 print STDERR "ERROR: unable to load mapfile $mapfile\n"; 256 return 0; 257 } 258 259 my ($line, @line); 260 $translations{$to} = {}; 261 $translations{$from} = {}; 262 while (defined ($line = <MAPFILE>)) { 263 # remove comments 264 $line =~ s/\#.*$//; 265 next unless $line =~ /\S/; 266 267 # split the line into fields and do a few 268 # simple sanity checks 269 @line = split (/\t/, $line); 270 next unless (scalar(@line) >= 2 && 271 $line[0] =~ /^0x/ && 272 $line[1] =~ /^0x/); 273 274 my $a = hex($line[0]); 275 my $b = hex($line[1]); 276 277 $translations{$to}->{$a} = $b; 278 $translations{$from}->{$b} = $a; 279 } 280 281 close (MAPFILE); 282 283 return 1; 284 } 285 195 286 196 287 1; 197 -
branches/New_Config_Format-branch/gsdl/setup.bash
r10 r1279 2 2 export GSDLOS=`uname -s | tr A-Z a-z` 3 3 export PATH=$PATH:$GSDLHOME/bin/script:$GSDLHOME/bin/$GSDLOS 4 export MANPATH=$MANPATH:$GSDLHOME/packages/mg/man -
branches/New_Config_Format-branch/gsdl/setup.csh
r682 r1279 2 2 setenv GSDLOS `uname -s | tr A-Z a-z` 3 3 setenv PATH $PATH\:$GSDLHOME/bin/script\:$GSDLHOME/bin/$GSDLOS 4 setenv MANPATH $MANPATH\:$GSDLHOME/packages/mg/man -
branches/New_Config_Format-branch/gsdl/src/colservr/browsefilter.cpp
r990 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.10.4.1 2000/07/12 22:21:26 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.11 2000/06/29 22:02:23 sjboddie 34 Made BrowseFilters EndResults option take -1 (i.e. "all"). This is mostly 35 to be consistent with the QueryFilter 36 30 37 Revision 1.10 2000/02/29 01:35:56 sjboddie 31 38 tidied up endianness and fastcgi … … 85 92 filtopt.type = FilterOption_t::integert; 86 93 filtopt.repeatable = FilterOption_t::onePerQuery; 87 filtopt.defaultValue = " 10000";88 filtopt.validValues.push_back(" 1");94 filtopt.defaultValue = "-1"; 95 filtopt.validValues.push_back("-1"); 89 96 filtopt.validValues.push_back("10000"); 90 97 filterOptions["EndResults"] = filtopt; … … 298 305 299 306 while (result_here != result_end) { 300 if (resultnum > endresults) break; 307 // if endresults is -1 get all results 308 if ((endresults != -1) && (resultnum > endresults)) break; 301 309 if (resultnum >= startresults) { 302 310 resultdoc.OID = (*result_here); -
branches/New_Config_Format-branch/gsdl/src/colservr/collectserver.cpp
r830 r1279 29 29 /* 30 30 $Log$ 31 Revision 1.20.4.1 2000/07/12 22:21:26 sjboddie 32 merged changes to trunk into New_Config_Format branch 33 34 Revision 1.21 2000/06/29 00:22:58 sjboddie 35 added new numsections field to collection info and made the statusaction 36 recognize it 37 31 38 Revision 1.20 1999/12/13 02:56:22 davidb 32 39 Support for cross-collection searching (CCS) … … 136 143 else if (key == "languages") collectinfo.languages = cfgline; 137 144 else if (key == "numdocs") collectinfo.numDocs = value.getint(); 145 else if (key == "numsections") collectinfo.numSections = value.getint(); 138 146 else if (key == "numwords") collectinfo.numWords = value.getint(); 139 147 else if (key == "numbytes") collectinfo.numBytes = value.getint(); -
branches/New_Config_Format-branch/gsdl/src/colservr/mgsearch.cpp
r633 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.22.4.1 2000/07/12 22:21:27 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.23 2000/07/03 21:58:41 nzdl 34 removed mg directive that was causing meaningless warning messages 35 in errout.txt 36 30 37 Revision 1.22 1999/09/24 02:41:21 rjmcnab 31 38 change to use has_unicode_letdig in text_t … … 424 431 mgq_ask(".set maxparas 500000"); 425 432 mgq_ask(".set verbatim true"); 426 mgq_ask(".unset skip_dump");433 // mgq_ask(".unset skip_dump"); 427 434 mgq_ask(".set mode docnums"); 428 435 -
branches/New_Config_Format-branch/gsdl/src/colservr/win32.mak
r1000 r1279 25 25 ########################################################################### 26 26 27 GSDLHOME = d:\home\dl\gsdl28 STLPATH = d:\home\dl\stl\stlport27 GSDLHOME = c:\gsdl 28 STLPATH = c:\stlport 29 29 30 30 AR = lib -
branches/New_Config_Format-branch/gsdl/src/hashfile/hashfile.cpp
r915 r1279 143 143 144 144 145 146 // create a hash string from the contents of a file. 147 // 148 // The file is treated as a large base 256 number (each char is a digit), 149 // and the hash value is the remainder when this number is divided by a 150 // very large prime. 151 // 152 // PROBLEM: This is a flawed hash function because the rightmost (highest) 153 // value in remainder is more likely to be a "1" than it is to be any other 154 // number. 155 // 156 // EVIDENCE: About 50% of the files in any GSDL directory have a hash code 157 // that starts with 01* which implies (based on the my_convert_num function) 158 // that the rightmost "digit" of the remainder = z such that (z % 16 = 0) 159 // and (z / 16 = 1) which means (z = 1). 160 // 161 // MAJOR REASON: suppose our prime number was 19. Then, if we have a 162 // reasonably random distribution of numbers for which we are going to 163 // calculate (N % 19), we expect to get a roughly uniform distribution 164 // of remainders where the possible values are 0, 1, 2, 3... 18. The 165 // problem is that 9 out of the 19 possible values (10,11... 18) start 166 // with the digit 1. Thus our hascode will start with "01". 167 // 168 // ANOTHER PROBLEM: Characters in the file are read one at a time; after each 169 // one is read it is prepended to the remainder, then the remainder is 170 // recalculated on the string thus far seen. I am sure the math here 171 // is wrong - if I try calculating (111 mod 7) by the same algorithm, it 172 // simply does nor work. 173 // 174 // ANOTHER POSSIBLE PROLEM: Each character from the file is read into 175 // remainder at the most significant end, and when that character 176 // is a zero you get a number like "01" which would be considered 177 // larger than a number like "8" because it is longer (two digits 178 // instead of 1). 179 // 180 // These comments added by Gordon Paynter ([email protected]) in 181 // June 2000. I didn't write any code, however. 182 145 183 char *hashfile (char *filename) { 146 184 FILE *infile = (FILE *)NULL; 147 185 int i; 148 186 149 // calculate the 8 multiples of the prime number to use150 // in the long division151 187 number primepow[8]; 152 188 number pow; … … 163 199 pow.len = 12; 164 200 201 // calculate 8 multiples of the prime number. 202 // These are used to find the remainder using only subtraction operations 165 203 for (i=0; i<8; i++) { 166 204 primepow[i] = pow; … … 168 206 } 169 207 208 // The "remainder" after division by the prime. Our result. 209 remainder.len = 0; 210 211 // open the file 170 212 infile = fopen (filename, "rb"); 171 213 if (infile == NULL) { 172 214 return (char *)NULL; 173 215 } 216 c = (unsigned char)fgetc(infile); 174 217 175 remainder.len = 0;176 c = (unsigned char)fgetc(infile);177 218 while (!feof (infile)) { 178 // remainder = remainder * 256 + c 219 220 // make sure the remainder has not grown too large 179 221 if (remainder.len == MAXNUMLEN-1) { 180 222 fprintf (stderr, "ERROR - number overflow\n"); 181 223 return (char *)NULL; 182 224 } 225 226 // remainder = remainder * 256 + c 183 227 for (i=remainder.len; i>0; i--) { 184 228 remainder.num[i] = remainder.num[i-1]; 185 229 } 186 230 remainder.num[0] = c; 187 231 if (remainder.len > 0 || c != 0) remainder.len = remainder.len+1; 188 232 233 // remainder = (remainder % large-prime-number) 189 234 for (i=7; i>=0; i--) { 190 235 my_ifpos_dec (remainder, primepow[i]); 191 236 } 192 237 238 // read a new character from the file 193 239 c = (unsigned char)fgetc(infile); 194 240 } -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/Makefile.in
r861 r1279 125 125 invf.cpp mg_invf_dict_dump.cpp Weights.cpp \ 126 126 MGQuery.cpp Terms.cpp QueryTester.cpp \ 127 QueryLex.cpp QueryParser.cpp Queryer.cpp 127 QueryLex.cpp QueryParser.cpp \ 128 GSDLQueryLex.cpp GSDLQueryParser.cpp Queryer.cpp 128 129 129 130 … … 135 136 IndexData.h build.h mg_errors.h \ 136 137 TagInfo.h comp_dict.h mg_files.h Weights.h \ 137 MGQuery.h Terms.h QueryLex.h QueryParser.h 138 MGQuery.h Terms.h QueryLex.h QueryParser.h \ 139 GSDLQueryLex.h GSDLQueryParser.h 138 140 139 141 … … 180 182 FIvfLevelInfo$o FragLevelConvert$o Terms$o MGQuery$o \ 181 183 IndexData$o stemmer$o Weights$o TextGet$o text$o FText$o \ 182 QueryParser$oQueryLex$o words$o184 GSDLQueryParser$o GSDLQueryLex$o words$o 183 185 184 186 Queryer: $(QUERYER_OBJS) … … 272 274 done 273 275 274 275 LIB_OBJS = mg_files$o mg_errors$o locallib$o invf$o UCArray$o \276 #mg_errors$o removed from LIB_OBJS to avoid conflict with mg's libtextin.a 277 LIB_OBJS = mg_files$o mg_errors$o locallib$o invf$o UCArray$o \ 276 278 FIvfLevelInfo$o FragLevelConvert$o Terms$o MGQuery$o \ 277 279 IndexData$o stemmer$o Weights$o TextGet$o text$o FText$o \ 278 QueryParser$oQueryLex$o words$o280 GSDLQueryParser$o GSDLQueryLex$o words$o 279 281 280 282 libtextin.a: $(LIB_OBJS) -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/Queryer.cpp
r926 r1279 28 28 #include "mg_files.h" 29 29 30 #include " QueryParser.h"30 #include "GSDLQueryParser.h" 31 31 32 32 … … 66 66 // init the text system 67 67 TextData textData; 68 if (!textData.LoadData ( textfilename)) {68 if (!textData.LoadData (basePath, textfilename)) { 69 69 FatalError (1, "Couldn't load text information for \"%s\"", textfilename); 70 70 } … … 91 91 92 92 UCArray level; 93 level.clear();93 UCArrayClear(level); 94 94 //SetCStr(level, ""); 95 95 … … 109 109 cout << "current index="<< queryInfo.docLevel << "\nchange to index:"; 110 110 cin >> query; 111 queryInfo.docLevel.clear();111 UCArrayClear(queryInfo.docLevel); 112 112 SetCStr(queryInfo.docLevel, query); 113 113 cout << "index set to " << queryInfo.docLevel <<"\n"; … … 117 117 cout << "current level="<< level << "\nchange to level:"; 118 118 cin >> query; 119 level.clear();119 UCArrayClear(level); 120 120 SetCStr(level, query); 121 121 cout << "level set to " << level <<"\n"; -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/Terms.cpp
r927 r1279 44 44 stemMethod = 0; 45 45 matchDocs = 0; 46 termFreq = 0; 46 47 } 47 48 48 49 ostream &operator<< (ostream &s, const TermFreqData &t) { 49 50 s << "<" << t.tag << ">\"" << t.term << "\"stem(" 50 << t.stemMethod << ")docs(" << t.matchDocs << ")"; 51 << t.stemMethod << ")docs(" << t.matchDocs << ")" 52 << "count("<<t.termFreq<<")"; 51 53 return s; 52 54 } … … 56 58 (t1.term == t2.term) && 57 59 (t1.stemMethod == t2.stemMethod) && 58 (t1.matchDocs == t2.matchDocs)); 60 (t1.matchDocs == t2.matchDocs) && 61 (t1.termFreq == t2.termFreq)); 59 62 } 60 63 … … 442 445 unsigned long termDocFreq = 0; 443 446 unsigned long lastLevelDocNum = 0; 444 447 unsigned long overallwordfreq = 0; 445 448 446 449 while (termDataI < termDataSize) { … … 463 466 if (needRanks) 464 467 termDocFreq += termData.fragFreqs[termDataI]; 468 overallwordfreq += termData.fragFreqs[termDataI]; 465 469 } 466 470 … … 484 488 termFreqData.stemMethod = stemMethod; 485 489 termFreqData.matchDocs = termData.matchDocs; 490 termFreqData.termFreq = overallwordfreq; 486 491 result.termFreqs.push_back (termFreqData); 487 492 } … … 517 522 unsigned long termDocFreq = 0; 518 523 unsigned long lastLevelDocNum = 0; 519 524 unsigned long overallwordfreq = 0; 520 525 unsigned long resultI = 0; 521 526 unsigned long resultSize = result.docs.size(); … … 552 557 if (needRanks) 553 558 termDocFreq += termData.fragFreqs[termDataI]; 559 overallwordfreq += termData.fragFreqs[termDataI]; 554 560 } 555 561 556 562 termDataI++; 557 } 563 } // while 558 564 559 565 if (lastLevelDocNum > 0) { … … 590 596 termFreqData.stemMethod = stemMethod; 591 597 termFreqData.matchDocs = termData.matchDocs; 598 termFreqData.termFreq = overallwordfreq; 592 599 result.termFreqs.push_back (termFreqData); 593 600 } -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/Terms.h
r927 r1279 58 58 UCArray term; // unstemmed term 59 59 int stemMethod; 60 unsigned long matchDocs; // tf for level 61 60 unsigned long matchDocs; // tf for level - num levels 61 // containing this term 62 unsigned long termFreq; // overall term freq - num words that 63 // are this term 62 64 void Clear (); 63 65 TermFreqData () { Clear (); } -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/TextGet.cpp
r855 r1279 478 478 479 479 void TextData::Clear () { 480 cd.Clear(); // not implemented480 cd.Clear(); 481 481 textFile = NULL; 482 482 textIdxFile = NULL; … … 485 485 } 486 486 487 bool TextData::LoadData (char *textname) { 487 bool TextData::LoadData (char *basepath, char *textname) { 488 489 if (textname[0] == '\0') return false; 490 491 // set the basepath 492 set_basepath(basepath); 493 488 494 // load the compression dictionary 489 495 if (!OpenLoadCompDict (textname, cd)) return false; -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/TextGet.h
r855 r1279 42 42 // loads compression dictionary, the compressed text header, 43 43 // and all level informaiton 44 bool LoadData (char * textname);44 bool LoadData (char *basepath, char *textname); 45 45 bool UnloadData (); 46 46 }; -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/UCArray.cpp
r855 r1279 35 35 } 36 36 37 char * GetCStr(UCArray text) { 38 39 char *cstr = new char[text.size()+1]; 40 UCArray::const_iterator here = text.begin(); 41 UCArray::const_iterator end = text.end(); 42 43 int i = 0; 44 while (here != end) { 45 cstr[i] = (char)*here; 46 here++; i++; 47 } 48 cstr[i]='\0'; 49 return cstr; 50 } 37 51 38 52 ostream &operator<<(ostream &s, const UCArray &a) { … … 214 228 } 215 229 230 /* comparison for browse index - items match if the smaller word 231 is a prefix of the larger word, case independent 232 */ 233 int BrowseCompare (const UCArray &a1, const UCArray &a2) { 234 unsigned int l1 = a1.size(); 235 unsigned int l2 = a2.size(); 236 unsigned int l = (l1 < l2) ? l1 : l2; // l is the shorter of the two 237 int diff = 0; 238 239 UCArray::const_iterator a1Here = a1.begin(); 240 UCArray::const_iterator a2Here = a2.begin(); 241 242 while(l--) { 243 if ((diff = casecharmap[*a1Here] - casecharmap[*a2Here]) !=0) 244 return diff; 245 a1Here++; 246 a2Here++; 247 } 248 return 0; 249 250 } 216 251 217 252 unsigned long PrefixLen (const UCArray &a1, const UCArray &a2) { -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/UCArray.h
r855 r1279 51 51 // functions to manipulate UCArrays 52 52 void SetCStr (UCArray &text, const char *cStr); 53 char * GetCStr(UCArray text); 53 54 inline void UCArrayClear (UCArray &a) { 54 55 a.erase (a.begin(), a.end()); … … 89 90 // compares the two strings in dictionary order 90 91 int DictCompare (const UCArray &a1, const UCArray &a2); 91 92 // compares the two strings, case independent, a match (ie 0) is 93 // if one string is a prefix of the other 94 int BrowseCompare (const UCArray &a1, const UCArray &a2); 92 95 93 96 struct LTUCArray { … … 113 116 114 117 #endif 118 119 120 121 122 -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/invf.h
r925 r1279 31 31 32 32 // NOTE: This does not include the magic number 33 // header info for .invf.dict file 33 34 struct invf_dict_header { 34 35 unsigned long lookback; … … 91 92 // this version of the blocked dictionary uses a fixed number 92 93 // of entries per block, not a fixed block size 94 // info for .invf.dict.blocked file 95 // blocked dict has a heap of blocks, some for words, some for tags 96 // and an index into each set of blocks. The index has pointers to 97 // the first entry in each block. Can do a binary search on the index 98 // to find out which block an elemnet is in 93 99 struct block_dict_header : public invf_dict_header { 94 100 // note: word_dict_start and tag_dict_start are undefined 95 101 // for blocked dictionaries 96 102 97 unsigned long entries_per_wblk; 103 unsigned long entries_per_wblk; // word blocks 98 104 unsigned long num_wblks; 99 105 unsigned long max_wblk_size; … … 101 107 unsigned long wblk_idx_start; 102 108 103 unsigned long entries_per_tblk; 109 unsigned long entries_per_tblk; // tag blocks 104 110 unsigned long num_tblks; 105 111 unsigned long max_tblk_size; … … 117 123 struct block_dict_el { 118 124 UCArray el; // word or tag 119 unsigned long frag_occur; // # entries in invf file 120 unsigned long freq; 121 unsigned long invf_ptr; 125 unsigned long frag_occur; // # entries in invf file - if have a 126 // word level index, this is the same as freq, otherwise, its the number 127 // of fragments containing this word 128 unsigned long freq; // # of times this word occurs 129 unsigned long invf_ptr; // pointer into inverted file 122 130 123 131 virtual void Clear (); … … 133 141 134 142 struct word_block_dict_el : public block_dict_el { 135 unsigned long *levelFreqs; 143 unsigned long *levelFreqs; // freq of the word at each level 136 144 137 145 void Clear (); … … 209 217 #define SKIP_MODE_NO_SKIPS 0 210 218 219 // invf file - has a list of frags for each word, but the word is not 220 // stored in the invf file - the dictionaries store the words, along 221 // with num entries, and a pointer into invf file 211 222 struct invf_file_header { 212 223 unsigned long no_of_words; -
branches/New_Config_Format-branch/gsdl/src/mgpp/text/mg_decompress_text.cpp
r856 r1279 34 34 int ch; 35 35 char *filename = ""; 36 char *basePath = ""; 36 37 UCArray level; 37 38 SetCStr (level, "Document"); … … 47 48 break; 48 49 case 'd': 50 basePath = optarg; 49 51 set_basepath (optarg); 50 52 break; … … 62 64 // load up the text information 63 65 TextData td; 64 if (!td.LoadData ( filename)) {66 if (!td.LoadData (basePath, filename)) { 65 67 FatalError (1, "Couldn't load text information for \"%s\"", filename); 66 68 } … … 90 92 return 0; 91 93 } 94 95 -
branches/New_Config_Format-branch/gsdl/src/recpt/authenaction.cpp
r755 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.9.4.1 2000/07/12 22:21:34 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.10 2000/04/19 22:30:23 sjboddie 34 tidied up status pages and end-user collection building 35 30 36 Revision 1.9 1999/11/01 21:11:35 sjboddie 31 37 changed arguments passed to many functions … … 300 306 + "_")); 301 307 // change style of header and footer if page is a frame 302 if ( args["sp"] != "frameset") {308 if ((args["sp"].empty()) || (args["sp"] == "frameset")) { 303 309 disp.setmacro ("header", "authen", "_status:infoheader_(Log in)"); 304 310 disp.setmacro ("header", "authenok", "_status:infoheader_(Log in)"); … … 306 312 disp.setmacro ("footer", "authenok", "_status:infofooter_(Log in)"); 307 313 } 308 else {309 // disp.setmacro ("header", "authen", "_Global:header_"); //****310 // disp.setmacro ("header", "authenok", "_Global:header_");311 // disp.setmacro ("footer", "authen", "_Global:footer_");312 // disp.setmacro ("footer", "authenok", "_Global:footer_");313 disp.setmacro ("header", "authen", "_:header_");314 disp.setmacro ("header", "authenok", "_:header_");315 disp.setmacro ("footer", "authen", "_:footer_");316 disp.setmacro ("footer", "authenok", "_:footer_");317 }318 319 314 320 315 // get a list of saved configuration arguments (if possible) -
branches/New_Config_Format-branch/gsdl/src/recpt/browserclass.cpp
r765 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.5.4.1 2000/07/12 22:21:35 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.6 2000/06/29 02:47:19 sjboddie 34 added browser info (i.e VList, HList etc.) to status pages 35 30 36 Revision 1.5 1999/11/01 22:04:11 sjboddie 31 37 just a few small changes (that means I can't remember ;) … … 81 87 82 88 text_t browserclass::get_default_formatstring () { 83 return " <td>[link][icon][/link]</td><td>[highlight]{Or}{[Title],Untitled}[/highlight]</td>";89 return ""; 84 90 } 85 91 -
branches/New_Config_Format-branch/gsdl/src/recpt/buildaction.cpp
r1000 r1279 24 24 *********************************************************************/ 25 25 26 #ifndef __WIN32__27 #include <unistd.h>28 #endif29 30 26 #include "OIDtools.h" 31 27 #include "fileutil.h" 32 28 #include "htmlutils.h" 29 #include "gsdltools.h" 33 30 #include "buildaction.h" 34 31 … … 80 77 bool buildaction::check_cgiargs (cgiargsinfoclass &/*argsinfo*/, cgiargsclass &args, 81 78 ostream &/*logout*/) { 82 if ((args["bca"] != "buildstatus") && (args["bca"] != "collog")) 79 80 text_t &arg_bca = args["bca"]; 81 if (!((arg_bca == "buildstatus") || (arg_bca == "collog") || (arg_bca == "blankpage") 82 || ((arg_bca == "buildcol") && (args["wizard"] == "buildexec")))) 83 83 { 84 84 // authenticate the user if authentication is avaiable … … 102 102 // make sure we know about a receptionist 103 103 if (recpt == NULL) { 104 logout << "The pageaction does not contain information\n"104 logout << "The build action does not contain information\n" 105 105 << "about any receptionists. The method set_receptionist\n" 106 106 << "was probably not called from the module which instantiated\n" 107 << "this pageaction.\n";107 << "this build action.\n"; 108 108 return; 109 109 } … … 116 116 text_t fullnamelist = "var fullnamelist = new Array("; 117 117 text_t dirnamelist = "var dirnamelist = new Array("; 118 text_t fullnamemenu = (text_t)"<input type=hidden name=\"bc1fullnameindex\""119 +" value=\"_bcargfullnameindex_\">\n"120 +"<select name=\"bc1fullnamemenu\" onChange=fullnameindex_changed()>\n";118 text_t fullnamemenu = "<input type=hidden name=\"bc1fullnameindex\""; 119 fullnamemenu += " value=\"_bcargfullnameindex_\">\n"; 120 fullnamemenu += "<select name=\"bc1fullnamemenu\" onChange=fullnameindex_changed()>\n"; 121 121 int rcount = 1; 122 122 int fcount = 1; … … 146 146 while (collist_here != collist_end) { 147 147 148 ColInfoResponse_t cinfo; 149 (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout); 150 if (err == noError) { 148 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); 149 if (cinfo != NULL) { 151 150 text_t collectionname = *collist_here; 152 if (!cinfo .collectionmeta["collectionname"].empty())151 if (!cinfo->collectionmeta["collectionname"].empty()) 153 152 { 154 153 // get collection name from the collection cfg file 155 collectionname = cinfo .collectionmeta["collectionname"];154 collectionname = cinfo->collectionmeta["collectionname"]; 156 155 } 157 156 else … … 169 168 dirnamelist += (text_t)"\"" + *collist_here + "\""; 170 169 171 fullnamemenu += (text_t)"<option value=\"" + collectionname + "\""; 172 if (args["bc1dirname"] == "") 170 fullnamemenu += "<option value=\"" + collectionname; 171 fullnamemenu.push_back ('"'); 172 if (args["bc1dirname"].empty()) 173 173 { 174 174 if ((rcount==1) && (ccount==1)) … … 202 202 if (args["bca"]=="buildcol") 203 203 { 204 if (((args["bc1dirname"] =="") && (rcount==1) && (ccount==1))204 if (((args["bc1dirname"].empty()) && (rcount==1) && (ccount==1)) 205 205 || (args["bc1dirname"]==*collist_here)) 206 206 { 207 disp.setmacro("bcargingsdlarea","build",cinfo .building["ingsdlarea"]);208 disp.setmacro("bcargcopydir","build",cinfo .building["copydir"]);207 disp.setmacro("bcargingsdlarea","build",cinfo->building["ingsdlarea"]); 208 disp.setmacro("bcargcopydir","build",cinfo->building["copydir"]); 209 209 210 if (cinfo .buildDate==0) // not built210 if (cinfo->buildDate==0) // not built 211 211 { 212 212 // assume that's because this is the first build ever for the collection … … 240 240 } 241 241 242 if ((args["bca"]=="delcol") || (args["bca"]=="editcol") || (args["bca"]=="buildcol") || (args["bca"]=="collog")) 242 if ((args["bca"]=="delcol") || (args["bca"]=="editcol") || 243 (args["bca"]=="buildcol") || (args["bca"]=="collog")) 243 244 { 244 245 disp.setmacro ("dirnamelist", "build", dirnamelist); … … 268 269 text_t bc1name = substr(args_entry.begin()+3,args_entry.end()); 269 270 text_t cached_cgiarg = "bcarg" + bc1name; 270 disp.setmacro(cached_cgiarg, "build",args[args_entry]);271 disp.setmacro(cached_cgiarg, "build", dm_safe(args[args_entry])); 271 272 } 272 273 args_here++; … … 288 289 text_t cfg_fname 289 290 = filename_cat(gsdlhome, "collect", dirname, "etc", "collect.cfg"); 291 292 #ifdef GSDL_USE_IOS_H 290 293 ifstream cfg_ifs (cfg_fname.getcstr(), ios::in | ios::nocreate); 294 #else 295 ifstream cfg_ifs (cfg_fname.getcstr(), ios::in); 296 #endif 297 291 298 if (cfg_ifs) 292 299 { … … 323 330 // read in collect.bld 324 331 text_t bld_fname = filename_cat(gsdlhome, "log", dirname+ ".bld"); 332 333 #ifdef GSDL_USE_IOS_H 325 334 ifstream bld_ifs (bld_fname.getcstr(), ios::in | ios::nocreate); 335 #else 336 ifstream bld_ifs (bld_fname.getcstr(), ios::in); 337 #endif 338 326 339 if (bld_ifs) 327 340 { … … 427 440 ostream &logout) { 428 441 429 430 431 432 442 text_tarray wizard_split; 433 const text_t & wizard = (args["wizard"]=="") ? (text_t)"buildcol" : args["wizard"];443 const text_t wizard = (args["wizard"].empty()) ? "buildcol" : args["wizard"]; 434 444 splitchar(wizard.begin(),wizard.end(),':',wizard_split); 435 const text_t &wizard_last = wizard_split.back();445 const text_t wizard_last = wizard_split.back(); 436 446 437 447 if (wizard_last != "buildframe") … … 444 454 else 445 455 { 446 text_t dirname = (args["bc1dirname"] == "") ? (text_t)"unknown" : args["bc1dirname"]; 456 text_t dirname = (args["bc1dirname"] == "") ? 457 "unknown" : args["bc1dirname"]; 458 447 459 text_t tmpname = dirname+"_XXXXXX"; 448 if (mktemp(tmpname.getcstr())==NULL) 449 { 450 logout << "Failed to create temporary filename" << endl; 451 } 460 char *cstr_tmpname = tmpname.getcstr(); 461 462 if (GSDL_MKTEMP (cstr_tmpname)==NULL) 463 logout << "Failed to create temporary filename" << endl; 464 465 delete cstr_tmpname; 466 452 467 disp.setmacro("bcargtmpname","build",tmpname); 453 468 … … 457 472 458 473 textout << outconvert << disp << headmess << content << "_build:framefooter_\n"; 459 text_t cmd = filename_cat(gsdlhome, "cgi-bin", "webpage_buildcol.pl"); 460 cmd += " gsdlhome=\""+args["gsdlhome"]+"\" httpbuild=\""+args["httpbuild"]+"\""; 461 cmd += " bc1copydata=\""+args["bc1copydata"]+"\""; 462 cmd += " bc1doimport=\""+args["bc1doimport"]+"\""; 463 cmd += " bc1dobuild=\""+args["bc1dobuild"]+"\""; 464 cmd += " bc1dirname=\""+args["bc1dirname"]+"\" bc1tmpname=\""+tmpname+"\" &"; 465 466 system(cmd.getcstr()); 474 text_t cmd = "perl \"" + 475 filename_cat(gsdlhome, "cgi-bin", "webpage_buildcol.pl") + "\""; 476 cmd += " \"httpbuild="+args["httpbuild"]+"\""; 477 cmd += " \"bc1copydata="+args["bc1copydata"]+"\""; 478 cmd += " \"bc1doimport="+args["bc1doimport"]+"\""; 479 cmd += " \"bc1dobuild="+args["bc1dobuild"]+"\""; 480 cmd += " \"bc1dirname="+args["bc1dirname"]+"\" \"bc1tmpname="+tmpname+"\""; 481 // run webpage_buildcol.pl in background on unix systems 482 #if !defined (__WIN32__) 483 cmd += " &"; 484 #endif 485 486 char *cstr_cmd = cmd.getcstr(); 487 488 #if defined (__WIN32__) 489 gsdl_system (cstr_cmd, logout); 490 #else 491 system (cstr_cmd); 492 #endif 493 494 delete cstr_cmd; 467 495 } 468 496 -
branches/New_Config_Format-branch/gsdl/src/recpt/cgiwrapper.cpp
r963 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.22.4.1 2000/07/12 22:21:35 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.31 2000/07/12 04:51:05 nzdl 34 added an error message when no "valid" collections are available 35 36 Revision 1.30 2000/07/05 21:49:31 sjboddie 37 Receptionist now caches collection information to avoid making multiple 38 get_collectinfo calls to collection server 39 40 Revision 1.29 2000/06/29 03:57:14 sjboddie 41 Now append to error log (errout.txt) instead of overwriting it each time 42 43 Revision 1.28 2000/06/28 01:30:23 nzdl 44 *** empty log message *** 45 46 Revision 1.27 2000/06/28 01:24:59 sjboddie 47 got "POST" cgi data to work when using fastcgi 48 49 Revision 1.26 2000/05/12 03:09:26 sjboddie 50 minor modifications to get web library compiling under VC++ 6.0 51 52 Revision 1.25 2000/04/14 04:45:19 sjboddie 53 Modified the English of the debug output slightly 54 55 Revision 1.24 2000/04/14 03:10:35 sjboddie 56 tidied up a few issues concerning the new debug info which showed 57 up on windows 58 59 Revision 1.23 2000/04/14 02:52:05 sjboddie 60 tidied up error messaging and set up some debugging info to be output 61 when running library from command line 62 30 63 Revision 1.22 2000/02/21 21:56:46 sjboddie 31 64 gsdlhome now comes from gsdlsite.cfg … … 191 224 #endif 192 225 193 194 static void page_errorsitecfg (text_t &errorpage, int mode) { 195 errorpage += "Content-type: text/html\n\n"; 196 197 errorpage += "<html>\n"; 198 errorpage += "<head>\n"; 199 errorpage += "<title>Error</title>\n"; 200 errorpage += "</head>\n"; 201 errorpage += "<body>\n"; 202 errorpage += "<h2>Oops!</h2>\n"; 226 static void format_error_string (text_t &errorpage, const text_t &errortext, bool debug) { 227 228 errorpage.clear(); 229 230 if (debug) { 231 errorpage += "\n"; 232 errorpage += "ERROR: " + errortext; 233 errorpage += "\n"; 234 235 } else { 236 237 errorpage += "Content-type: text/html\n\n"; 238 239 errorpage += "<html>\n"; 240 errorpage += "<head>\n"; 241 errorpage += "<title>Error</title>\n"; 242 errorpage += "</head>\n"; 243 errorpage += "<body>\n"; 244 errorpage += "<h2>Oops!</h2>\n"; 245 errorpage += errortext; 246 errorpage += "</body>\n"; 247 errorpage += "</html>\n"; 248 } 249 } 250 251 static void page_errorcollect (const text_t &gsdlhome, text_t &errorpage, bool debug) { 252 253 text_t collectdir = filename_cat (gsdlhome, "collect"); 254 255 text_t errortext = "No valid collections were found: Check that your collect directory\n"; 256 errortext += "(" + collectdir + ") is readable and contains at least one valid collection.\n"; 257 errortext += "Note that modelcol is NOT a valid collection.\n"; 258 errortext += "If the path to your collect directory is wrong edit the 'gsdlhome' field\n"; 259 errortext += "in your gsdlsite.cfg configuration file.\n"; 260 261 format_error_string (errorpage, errortext, debug); 262 } 263 264 static void page_errorsitecfg (text_t &errorpage, bool debug, int mode) { 265 266 text_t errortext; 267 203 268 if (mode == 0) { 204 error page += "The gsdlsite.cfg configuration file could not be found. This file\n";205 error page += "should contain configuration information relating to this\n";206 error page+= "site's setup.\n";207 errorpage += "gsdlsite.cfg should reside in the same directory as this executable\n"; 269 errortext += "The gsdlsite.cfg configuration file could not be found. This\n"; 270 errortext += "file should contain configuration information relating to this\n"; 271 errortext += "site's setup.\n"; 272 208 273 } else if (mode == 1) { 209 errorpage += "The gsdlsite.cfg configuration file does not contain a valid gsdlhome\n"; 210 errorpage += "entry.\n"; 211 errorpage += "gsdlsite.cfg resides in the same directory as this executable\n"; 212 } 213 errorpage += "</body>\n"; 214 errorpage += "</html>\n"; 274 errortext += "The gsdlsite.cfg configuration file does not contain a valid\n"; 275 errortext += "gsdlhome entry.\n"; 276 } 277 278 if (debug) { 279 errortext += "gsdlsite.cfg should reside in the directory from which the\n"; 280 errortext += "library executable was run.\n"; 281 } else { 282 errortext += "gsdlsite.cfg should reside in the same directory as the library\n"; 283 errortext += "executable file.\n"; 284 } 285 286 format_error_string (errorpage, errortext, debug); 215 287 } 216 288 217 289 218 290 static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection, 219 text_t &errorpage) { 220 errorpage += "Content-type: text/html\n\n"; 221 222 errorpage += "<html>\n"; 223 errorpage += "<head>\n"; 224 errorpage += "<title>Error</title>\n"; 225 errorpage += "</head>\n"; 226 errorpage += "<body>\n"; 227 errorpage += "<h2>Oops!</h2>\n"; 291 bool debug, text_t &errorpage) { 292 293 text_t errortext; 294 228 295 if (collection.empty()) { 229 296 text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg"); 230 error page+= "The main.cfg configuration file could not be found. This file\n";231 error page+= "should contain configuration information relating to the\n";232 error page += "setup of the interface. As this cgi script is not being run\n";233 error page+= "in collection specific mode the file should reside at\n";234 error page+= main_cfg_file + ".\n";297 errortext += "The main.cfg configuration file could not be found. This file\n"; 298 errortext += "should contain configuration information relating to the\n"; 299 errortext += "setup of the interface. As this receptionist is not being run\n"; 300 errortext += "in collection specific mode the file should reside at\n"; 301 errortext += main_cfg_file + ".\n"; 235 302 } else { 236 303 text_t collect_cfg_file = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg"); 237 304 text_t main_collect_cfg_file = filename_cat (gsdlhome, "etc", "collect.cfg"); 238 305 text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg"); 239 errorpage += "Either the collect.cfg or main.cfg configuration file could\n"; 240 errorpage += "not be found. This file should contain configuration information\n"; 241 errorpage += "relating to the setup of the interface. As this cgi script is\n"; 242 errorpage += "being run in collection specific mode the file should reside\n"; 243 errorpage += "at either "; 244 errorpage += collect_cfg_file + ",\n"; 245 errorpage += main_collect_cfg_file + " or " + main_cfg_file + ".\n"; 246 } 247 errorpage += "</body>\n"; 248 errorpage += "</html>\n"; 249 } 250 251 252 static void page_errorinit (const text_t &gsdlhome, text_t &errorpage) { 253 errorpage += "Content-type: text/html\n\n"; 254 255 errorpage += "<html>\n"; 256 errorpage += "<head>\n"; 257 errorpage += "<title>Error</title>\n"; 258 errorpage += "</head>\n"; 259 errorpage += "<body>\n"; 260 errorpage += "<h2>Oops!</h2>\n"; 261 errorpage += "An error occurred during the initialisation of the Greenstone Digital\n"; 262 errorpage += "Library software. It is likely that the software has not been setup\n"; 263 errorpage += "correctly.\n"; 306 errortext += "Either the collect.cfg or main.cfg configuration file could\n"; 307 errortext += "not be found. This file should contain configuration information\n"; 308 errortext += "relating to the setup of the interface. As this receptionist is\n"; 309 errortext += "being run in collection specific mode the file should reside\n"; 310 errortext += "at either " + collect_cfg_file + ",\n"; 311 errortext += main_collect_cfg_file + " or " + main_cfg_file + ".\n"; 312 } 313 314 format_error_string (errorpage, errortext, debug); 315 } 316 317 318 static void page_errorinit (const text_t &gsdlhome, bool debug, text_t &errorpage) { 319 320 text_t errortext = "An error occurred during the initialisation of the Greenstone Digital\n"; 321 errortext += "Library software. It is likely that the software has not been setup\n"; 322 errortext += "correctly.\n"; 264 323 265 324 text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt"); … … 268 327 delete ifile; 269 328 if (initin) { 270 error page+= "The initialisation error log, " + init_file + ", contains the\n";271 error page+= "following information:\n\n";272 errorpage+= "<pre>\n";329 errortext += "The initialisation error log, " + init_file + ", contains the\n"; 330 errortext += "following information:\n\n"; 331 if (!debug) errortext += "<pre>\n"; 273 332 274 333 char c; 275 334 initin.get(c); 276 335 while (!initin.eof ()) { 277 error page.push_back(c);336 errortext.push_back(c); 278 337 initin.get(c); 279 338 } 280 339 281 errorpage+= "</pre>\n";340 if (!debug) errortext += "</pre>\n"; 282 341 283 342 initin.close(); 284 343 285 344 } else { 286 errorpage += "Please consult " + init_file + " for more information.\n"; 287 } 288 289 errorpage += "</body>\n"; 290 errorpage += "</html>\n"; 291 } 292 293 static void page_errorparseargs (const text_t &gsdlhome, text_t &errorpage) { 294 errorpage += "Content-type: text/html\n\n"; 295 296 errorpage += "<html>\n"; 297 errorpage += "<head>\n"; 298 errorpage += "<title>Error</title>\n"; 299 errorpage += "</head>\n"; 300 errorpage += "<body>\n"; 301 errorpage += "<h2>Oops!</h2>\n"; 302 errorpage += "An error occurred during the parsing of the cgi arguments.\n"; 345 errortext += "Please consult " + init_file + " for more information.\n"; 346 } 347 348 format_error_string (errorpage, errortext, debug); 349 } 350 351 static void page_errorparseargs (const text_t &gsdlhome, bool debug, text_t &errorpage) { 352 353 text_t errortext = "An error occurred during the parsing of the cgi arguments.\n"; 303 354 304 355 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt"); … … 307 358 delete efile; 308 359 if (errin) { 309 error page+= "The error log, " + error_file + ", contains the\n";310 error page+= "following information:\n\n";311 errorpage+= "<pre>\n";360 errortext += "The error log, " + error_file + ", contains the\n"; 361 errortext += "following information:\n\n"; 362 if (!debug) errortext += "<pre>\n"; 312 363 313 364 char c; 314 365 errin.get(c); 315 366 while (!errin.eof ()) { 316 error page.push_back(c);367 errortext.push_back(c); 317 368 errin.get(c); 318 369 } 319 errorpage+= "</pre>\n";370 if (!debug) errortext += "</pre>\n"; 320 371 errin.close(); 321 372 322 373 } else { 323 errorpage += "Please consult " + error_file + " for more information.\n"; 324 } 325 326 errorpage += "</body>\n"; 327 errorpage += "</html>\n"; 328 } 329 330 static void page_errorcgipage (const text_t &gsdlhome, text_t &errorpage) { 331 errorpage += "Content-type: text/html\n\n"; 332 333 errorpage += "<html>\n"; 334 errorpage += "<head>\n"; 335 errorpage += "<title>Error</title>\n"; 336 errorpage += "</head>\n"; 337 errorpage += "<body>\n"; 338 errorpage += "<h2>Oops!</h2>\n"; 339 errorpage += "An error occurred during the construction of the cgi page.\n"; 340 374 errortext += "Please consult " + error_file + " for more information.\n"; 375 } 376 377 format_error_string (errorpage, errortext, debug); 378 } 379 380 static void page_errorcgipage (const text_t &gsdlhome, bool debug, text_t &errorpage) { 381 382 text_t errortext = "An error occurred during the construction of the cgi page.\n"; 341 383 342 384 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt"); … … 345 387 delete efile; 346 388 if (errin) { 347 error page+= "The error log, " + error_file + ", contains the\n";348 error page+= "following information:\n\n";349 errorpage+= "<pre>\n";389 errortext += "The error log, " + error_file + ", contains the\n"; 390 errortext += "following information:\n\n"; 391 if (!debug) errortext += "<pre>\n"; 350 392 351 393 char c; 352 394 errin.get(c); 353 395 while (!errin.eof ()) { 354 error page.push_back(c);396 errortext.push_back(c); 355 397 errin.get(c); 356 398 } 357 errorpage+= "</pre>\n";399 if (!debug) errortext += "</pre>\n"; 358 400 errin.close(); 359 401 360 402 } else { 361 errorpage += "Please consult " + error_file + " for more information.\n"; 362 } 363 364 errorpage += "</body>\n"; 365 errorpage += "</html>\n"; 366 } 367 403 errortext += "Please consult " + error_file + " for more information.\n"; 404 } 405 406 format_error_string (errorpage, errortext, debug); 407 } 408 409 static void print_debug_info (receptionist &recpt) { 410 411 outconvertclass text_t2ascii; 412 recptconf configinfo = recpt.get_configinfo (); 413 text_t etc_dir = filename_cat (configinfo.gsdlhome, "etc"); 414 415 cout << "\n"; 416 cout << text_t2ascii 417 << "------------------------------------------------------------\n" 418 << "Configuration and initialization completed successfully.\n" 419 << " Note that more debug information may be available in the\n" 420 << " initialization and error logs initout.txt and errout.txt\n" 421 << " in " << etc_dir << ".\n" 422 << "------------------------------------------------------------\n\n"; 423 424 bool colspec = false; 425 if (configinfo.collection.empty()) { 426 cout << "Receptionist is running in \"general\" (i.e. not \"collection\n" 427 << "specific\") mode.\n"; 428 } else { 429 cout << text_t2ascii 430 << "Receptionist is running in \"collection specific\" mode.\n" 431 << " collection=" << configinfo.collection << "\n" 432 << " collection directory=" << configinfo.collectdir << "\n"; 433 colspec = true; 434 } 435 436 cout << text_t2ascii << "gsdlhome=" << configinfo.gsdlhome << "\n"; 437 if (!configinfo.gdbmhome.empty()) 438 cout << text_t2ascii << "gdbmhome=" << configinfo.gdbmhome << "\n"; 439 cout << text_t2ascii << "httpprefix=" << configinfo.httpprefix << "\n"; 440 cout << text_t2ascii << "httpimg=" << configinfo.httpimg << "\n"; 441 cout << text_t2ascii << "gwcgi=" << configinfo.gwcgi << "\n" 442 << " Note that unless gwcgi has been set from a configuration\n" 443 << " file it is dependent on environment variables set by your\n" 444 << " webserver. Therefore it may not have the same value when run\n" 445 << " from the command line as it would be when run from your\n" 446 << " web server.\n"; 447 if (configinfo.usecookies) 448 cout << "cookies are enabled\n"; 449 else 450 cout << "cookies are disabled\n"; 451 if (configinfo.logcgiargs) 452 cout << "logging is enabled\n"; 453 else 454 cout << "logging is disabled\n"; 455 cout << "------------------------------------------------------------\n\n"; 456 457 text_tset::const_iterator this_mfile = configinfo.macrofiles.begin(); 458 text_tset::const_iterator end_mfile = configinfo.macrofiles.end(); 459 cout << "Macro Files:\n" 460 << "------------\n"; 461 text_t mfile; 462 bool found; 463 while (this_mfile != end_mfile) { 464 cout << text_t2ascii << *this_mfile; 465 int spaces = (22 - (*this_mfile).size()); 466 if (spaces < 2) spaces = 2; 467 text_t outspaces; 468 for (int i = 0; i < spaces; i++) outspaces.push_back (' '); 469 cout << text_t2ascii << outspaces; 470 471 found = false; 472 if (colspec) { 473 // collection specific - try collectdir/macros first 474 mfile = filename_cat (configinfo.collectdir, "macros", *this_mfile); 475 if (file_exists (mfile)) { 476 cout << text_t2ascii << "found (" << mfile << ")\n"; 477 found = true; 478 } 479 } 480 481 if (!found) { 482 // try main macro directory 483 mfile = filename_cat (configinfo.gsdlhome, "macros", *this_mfile); 484 if (file_exists (mfile)) { 485 cout << text_t2ascii << "found (" << mfile << ")\n"; 486 found = true; 487 } 488 } 489 490 if (!found) 491 cout << text_t2ascii << "NOT FOUND\n"; 492 493 this_mfile ++; 494 } 495 496 cout << "------------------------------------------------------------\n\n" 497 << "Collections:\n" 498 << "------------\n" 499 << " Note that collections will only appear as \"running\" if\n" 500 << " their build.cfg files exist, are readable, contain a valid\n" 501 << " builddate field (i.e. > 0), and are in the collection's\n" 502 << " index directory (i.e. NOT the building directory)\n\n"; 503 504 recptprotolistclass *protos = recpt.get_recptprotolist_ptr(); 505 recptprotolistclass::iterator rprotolist_here = protos->begin(); 506 recptprotolistclass::iterator rprotolist_end = protos->end(); 507 bool found_valid_col = false; 508 while (rprotolist_here != rprotolist_end) { 509 if ((*rprotolist_here).p != NULL) { 510 511 text_tarray collist; 512 comerror_t err; 513 (*rprotolist_here).p->get_collection_list (collist, err, cerr); 514 if (err == noError) { 515 text_tarray::iterator collist_here = collist.begin(); 516 text_tarray::iterator collist_end = collist.end(); 517 518 while (collist_here != collist_end) { 519 520 cout << text_t2ascii << *collist_here; 521 522 int spaces = (22 - (*collist_here).size()); 523 if (spaces < 2) spaces = 2; 524 text_t outspaces; 525 for (int i = 0; i < spaces; i++) outspaces.push_back (' '); 526 cout << text_t2ascii << outspaces; 527 528 ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr); 529 if (cinfo != NULL) { 530 if (cinfo->isPublic) cout << "public "; 531 else cout << "private"; 532 533 if (cinfo->buildDate > 0) { 534 cout << " running "; 535 found_valid_col = true; 536 } else { 537 cout << " not running"; 538 } 539 } 540 541 cout << "\n"; 542 543 collist_here ++; 544 } 545 } 546 } 547 rprotolist_here ++; 548 } 549 550 if (!found_valid_col) { 551 cout << "WARNING: No \"running\" collections were found. You need to\n"; 552 cout << " build one of the above collections\n"; 553 } 554 555 cout << "\n------------------------------------------------------------\n"; 556 cout << "------------------------------------------------------------\n\n"; 557 cout << "receptionist running in command line debug mode\n"; 558 cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n"; 559 560 } 368 561 369 562 // cgiwrapper does everything necessary to output a page … … 372 565 // should equal "". 373 566 void cgiwrapper (receptionist &recpt, text_t collection) { 567 568 int numrequests = 0; 569 bool debug = false; 570 recptconf configinfo = recpt.get_configinfo (); 571 572 // find out whether this is being run as a cgi-script 573 // or a fastcgi script 374 574 #ifdef USE_FASTCGI 375 575 fcgistreambuf outbuf; 376 #endif377 378 // init stuff - we can't output error pages directly with379 // fastcgi so the pages are stored until we can output them380 text_t errorpage;381 outconvertclass text_t2ascii;382 383 // set defaults384 int maxrequests = 10000;385 recpt.configure ("collection", collection);386 recpt.configure ("httpimg", "/gsdl/images");387 char *script_name = getenv("SCRIPT_NAME");388 if (script_name != NULL) recpt.configure("gwcgi", script_name);389 else recpt.configure("gwcgi", "/cgi-bin/gw");390 391 // read in the configuration files.392 text_t gsdlhome;393 if (!site_cfg_read (recpt, gsdlhome, maxrequests)) {394 // couldn't find the site configuration file395 page_errorsitecfg (errorpage, 0);396 } else if (gsdlhome.empty()) {397 // no gsdlhome in gsdlsite.cfg398 page_errorsitecfg (errorpage, 1);399 } else if (!main_cfg_read (recpt, gsdlhome, collection)) {400 // couldn't find the main configuration file401 page_errormaincfg (gsdlhome, collection, errorpage);402 }403 404 // initialise the library software405 if (errorpage.empty()) {406 text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");407 char *iout = init_file.getcstr();408 ofstream initout (iout);409 delete iout;410 if (!recpt.init(initout)) {411 // an error occurred during the initialisation412 initout.close();413 page_errorinit(gsdlhome, errorpage);414 }415 initout.close();416 }417 418 // find out whether this is being run as a cgi-script419 // or a fastcgi script420 int numrequests = 0;421 #ifdef USE_FASTCGI422 576 int isfastcgi = !FCGX_IsCGI(); 423 577 FCGX_Stream *fcgiin, *fcgiout, *fcgierr; … … 457 611 } else { 458 612 // debugging from command line 459 char cinURIStr[1024]; 460 cin.get(cinURIStr, 1024); 461 argstr = cinURIStr; 613 debug = true; 462 614 } 463 615 } 464 465 // cgi scripts only deal with one request 466 maxrequests = 1; 467 } 616 } 617 618 if (debug) { 619 cout << "Configuring Greenstone...\n"; 620 cout << flush; 621 } 622 623 // init stuff - we can't output error pages directly with 624 // fastcgi so the pages are stored until we can output them 625 text_t errorpage; 626 outconvertclass text_t2ascii; 627 628 // set defaults 629 int maxrequests = 10000; 630 recpt.configure ("collection", collection); 631 recpt.configure ("httpimg", "/gsdl/images"); 632 char *script_name = getenv("SCRIPT_NAME"); 633 if (script_name != NULL) recpt.configure("gwcgi", script_name); 634 else recpt.configure("gwcgi", "/cgi-bin/gw"); 635 636 // read in the configuration files. 637 text_t gsdlhome; 638 if (!site_cfg_read (recpt, gsdlhome, maxrequests)) { 639 // couldn't find the site configuration file 640 page_errorsitecfg (errorpage, debug, 0); 641 } else if (gsdlhome.empty()) { 642 // no gsdlhome in gsdlsite.cfg 643 page_errorsitecfg (errorpage, debug, 1); 644 } else if (!main_cfg_read (recpt, gsdlhome, collection)) { 645 // couldn't find the main configuration file 646 page_errormaincfg (gsdlhome, collection, debug, errorpage); 647 } else if (configinfo.collectinfo.empty()) { 648 // don't have any collections 649 page_errorcollect (gsdlhome, errorpage, debug); 650 } 651 652 if (errorpage.empty()) { 653 654 // initialise the library software 655 if (debug) { 656 cout << "Initializing...\n"; 657 cout << flush; 658 } 659 660 text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt"); 661 char *iout = init_file.getcstr(); 662 ofstream initout (iout); 663 delete iout; 664 if (!recpt.init(initout)) { 665 // an error occurred during the initialisation 666 initout.close(); 667 page_errorinit(gsdlhome, debug, errorpage); 668 } 669 initout.close(); 670 } 671 672 if (debug && errorpage.empty()) { 673 // get query string from command line 674 print_debug_info (recpt); 675 char cinURIStr[1024]; 676 cin.get(cinURIStr, 1024); 677 argstr = cinURIStr; 678 } 679 680 // cgi scripts only deal with one request 681 if (!isfastcgi) maxrequests = 1; 468 682 469 683 // Page-request loop. If this is not being run as a fastcgi … … 474 688 if (isfastcgi) { 475 689 if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break; 476 aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp); 477 if (aURIStr != NULL) argstr = aURIStr; 478 else argstr = ""; 690 691 char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp); 692 char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp); 693 694 if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 && 695 content_length_str != NULL) { 696 // POST form data 697 int content_length = text_t(content_length_str).getint(); 698 if (content_length > 0) { 699 argstr.clear(); 700 int c; 701 do { 702 c = FCGX_GetChar (fcgiin); 703 if (c < 0) break; 704 argstr.push_back (c); 705 content_length--; 706 } while (content_length > 0); 707 } 708 709 } else { 710 // GET form data 711 aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp); 712 if (aURIStr != NULL) argstr = aURIStr; 713 else argstr = ""; 714 } 479 715 } 480 716 #endif … … 528 764 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt"); 529 765 char *eout = error_file.getcstr(); 530 ofstream errout (eout );766 ofstream errout (eout, ios::app); 531 767 delete eout; 532 768 cerr = errout; … … 536 772 if (!recpt.parse_cgi_args (argstr, args, errout, fastcgienv)) { 537 773 errout.close (); 538 page_errorparseargs(gsdlhome, errorpage);774 page_errorparseargs(gsdlhome, debug, errorpage); 539 775 } else { 540 776 if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) { 541 777 errout.close (); 542 page_errorcgipage(gsdlhome, errorpage); 543 } else { 544 errout.close (); 778 page_errorcgipage(gsdlhome, debug, errorpage); 545 779 } 546 780 recpt.log_cgi_args (args, errout, fastcgienv); 781 errout.close (); 547 782 } 548 783 } -
branches/New_Config_Format-branch/gsdl/src/recpt/comtypes.cpp
r823 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.22.4.1 2000/07/12 22:21:36 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.23 2000/06/29 00:22:58 sjboddie 34 added new numsections field to collection info and made the statusaction 35 recognize it 36 30 37 Revision 1.22 1999/12/13 02:24:33 davidb 31 38 Data fields for cross collection searching (CCS) … … 134 141 buildDate=0; 135 142 numDocs=0; 143 numSections=0; 136 144 numWords=0; 137 145 numBytes=0; -
branches/New_Config_Format-branch/gsdl/src/recpt/comtypes.h
r871 r1279 75 75 // languages [6] IMPLICIT StringSet, -- languages in the collection 76 76 // numDocs [7] IMPLICIT INTEGER, 77 // numWords [8] IMPLICIT INTEGER OPTIONAL, 78 // numBytes [9] IMPLICIT INTEGER OPTIONAL 79 // collectionmeta [10] IMPLICIT StringSet 80 // format [11] IMPLICIT StringSet 81 // building [12] IMPLICIT StringSet 82 // receptionist [13] IMPLICIT GeneralString 77 // numSections [8] IMPLICIT INTEGER OPTIONAL, 78 // numWords [9] IMPLICIT INTEGER OPTIONAL, 79 // numBytes [10] IMPLICIT INTEGER OPTIONAL 80 // collectionmeta [11] IMPLICIT StringSet 81 // format [12] IMPLICIT StringSet 82 // building [13] IMPLICIT StringSet 83 // receptionist [14] IMPLICIT GeneralString 83 84 // } 84 85 struct ColInfoResponse_t { … … 92 93 text_tarray ccsCols; // empty if collection does not use cross-collection searching 93 94 text_tarray languages; 94 unsigned long numDocs; // 0 if not known 95 unsigned long numWords; // 0 if not known 96 unsigned long numBytes; // 0 if not known 95 unsigned long numDocs; // 0 if not known 96 unsigned long numSections; // 0 if not known 97 unsigned long numWords; // 0 if not known 98 unsigned long numBytes; // 0 if not known 97 99 text_tmap collectionmeta; 98 100 text_tmap format; -
branches/New_Config_Format-branch/gsdl/src/recpt/documentaction.cpp
r1258 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.37.2.3 2000/07/12 22:21:37 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 34 Revision 1.39 2000/07/05 21:49:31 sjboddie 35 Receptionist now caches collection information to avoid making multiple 36 get_collectinfo calls to collection server 37 30 38 Revision 1.37.2.2 2000/06/30 00:46:16 nzdl 31 39 caught New_Config_Format-branch up with changes to trunk 32 40 41 Revision 1.38 2000/05/04 05:18:46 sjboddie 42 attempting to get end-user collection building to work under windows 43 33 44 Revision 1.37.2.1 2000/04/09 23:16:46 sjboddie 34 45 Added DocumentColumns stuff to New_Config_Format-branch branch 46 47 Revision 1.37 2000/04/07 04:40:44 sjboddie 48 Reverted back to old DocumentHeader, DocumentTitles, DocumentImages etc. 49 from DocumentColumns stuff. I'll move the DocumentColumns stuff to a 50 separate development branch (New_Config_Format-branch) for now. The plan 51 is to redesign the configuration file format a bit and limit the number of 52 distributions floating around that take different configuration formats). 35 53 36 54 Revision 1.36 2000/04/03 07:26:28 sjboddie … … 187 205 188 206 documentaction::documentaction () { 207 recpt = NULL; 208 189 209 190 210 // this action uses cgi variables "a", "d", "cl", … … 632 652 if (collectproto == NULL) return; 633 653 654 if (recpt == NULL) { 655 logout << "ERROR (documentaction::define_external_macros): This action does not contain\n" 656 << " information about any receptionists. The method set_receptionist was\n" 657 << " probably not called from the module which instantiated this action.\n"; 658 return; 659 } 660 634 661 outconvertclass text_t2ascii; 635 662 comerror_t err; … … 638 665 text_tset metadata; 639 666 640 641 // get info on current collection and load up formatinfo 642 // I'd prefer not to do this here as we're getting 643 // collection info every time (and probably also getting 644 // it in other places some of the time) - One day I'll 645 // fix it ... maybe - Stefan. 646 ColInfoResponse_t cinfo; 647 collectproto->get_collectinfo (collection, cinfo, err, logout); 648 load_formatinfo (cinfo.format, args.getintarg("gt")); 667 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout); 668 if (cinfo == NULL) { 669 logout << "ERROR (documentaction::define_external_macros): get_collectinfo_ptr returned NULL\n"; 670 return; 671 } 672 load_formatinfo (cinfo->format, args.getintarg("gt")); 649 673 650 674 if (formatinfo.DocumentUseHTML) { … … 658 682 disp.setmacro ("gsdltop", "Global", "documenttop"); 659 683 } 660 text_tmap::iterator it = cinfo .format.find ("homepage");661 if (it != cinfo .format.end()) {684 text_tmap::iterator it = cinfo->format.find ("homepage"); 685 if (it != cinfo->format.end()) { 662 686 text_t httppagehome; 663 687 if (get_link (args, protos, (*it).second, httppagehome, logout)) -
branches/New_Config_Format-branch/gsdl/src/recpt/documentaction.h
r1084 r1279 94 94 virtual ~documentaction (); 95 95 96 void set_receptionist (receptionist *therecpt) {recpt=therecpt;} 97 96 98 text_t get_action_name () {return "d";} 97 99 -
branches/New_Config_Format-branch/gsdl/src/recpt/formattools.cpp
r1258 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.20.2.3 2000/07/12 22:21:39 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 30 34 Revision 1.20.2.2 2000/06/30 00:46:17 nzdl 31 35 caught New_Config_Format-branch up with changes to trunk 32 33 36 34 37 Revision 1.21 2000/06/30 00:40:39 sjboddie -
branches/New_Config_Format-branch/gsdl/src/recpt/htmlutils.cpp
r919 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.5.4.1 2000/07/12 22:21:39 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.6 2000/05/04 05:17:46 sjboddie 34 moved dm_safe from htmlutils to gsdltools 35 30 36 Revision 1.5 2000/02/13 20:38:59 sjboddie 31 37 added dm_safe function … … 69 75 return outstring; 70 76 } 71 72 text_t dm_safe (const text_t &instring) {73 74 text_t outstring;75 text_t::const_iterator here = instring.begin();76 text_t::const_iterator end = instring.end();77 while (here != end) {78 if (*here == '_') outstring += "\\_";79 else outstring.push_back(*here);80 here ++;81 }82 return outstring;83 } -
branches/New_Config_Format-branch/gsdl/src/recpt/htmlutils.h
r919 r1279 35 35 text_t html_safe (const text_t &instring); 36 36 37 text_t dm_safe (const text_t &instring);38 39 37 #endif -
branches/New_Config_Format-branch/gsdl/src/recpt/infodbclass.cpp
r928 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.6.4.1 2000/07/12 22:21:40 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.7 2000/05/04 08:21:30 sjboddie 34 modifications for windows port of GCC 35 30 36 Revision 1.6 2000/02/15 22:53:50 kjm18 31 37 search history stuff added. … … 159 165 160 166 openfile = filename; 161 167 162 168 char *namebuffer = filename.getcstr(); 163 169 do { 164 170 #ifdef __WIN32__ 165 171 gdbmfile = gdbm_open (namebuffer, block_size, mode, 00664, NULL, (need_filelock) ? 1 : 0); 166 172 #else 167 173 gdbmfile = gdbm_open (namebuffer, block_size, mode, 00664, NULL); … … 171 177 (gdbm_errno==GDBM_CANT_BE_READER || gdbm_errno==GDBM_CANT_BE_WRITER)); 172 178 delete namebuffer; 173 179 174 180 if (gdbmfile == NULL && logout != NULL) { 175 181 outconvertclass text_t2ascii; -
branches/New_Config_Format-branch/gsdl/src/recpt/infodbclass.h
r928 r1279 33 33 #include "gsdlconf.h" 34 34 #include "text_t.h" 35 36 #if defined(GSDL_USE_OBJECTSPACE) 37 # include <ospace\std\iostream> 38 # include <ospace\std\fstream> 39 #elif defined(GSDL_USE_IOS_H) 40 # include <iostream.h> 41 # include <fstream.h> 42 #else 43 # include <iostream> 44 # include <fstream> 45 #endif 35 46 36 47 #ifdef __WIN32__ -
branches/New_Config_Format-branch/gsdl/src/recpt/librarymain.cpp
r994 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.24.4.1 2000/07/12 22:21:41 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.28 2000/07/05 21:49:32 sjboddie 34 Receptionist now caches collection information to avoid making multiple 35 get_collectinfo calls to collection server 36 37 Revision 1.27 2000/06/23 03:21:39 sjboddie 38 Created converter classes for simple 8 bit encodings that use a 39 simple textual map file. Instances of these classes are used to handle 40 the Windows 1256 (Arabic) encoding. 41 42 Revision 1.26 2000/05/29 03:30:03 sjboddie 43 fixed a bug preventing GB encoded text from being displayed correctly 44 (bug showed up in Chinese collection) 45 46 Revision 1.25 2000/04/14 02:52:05 sjboddie 47 tidied up error messaging and set up some debugging info to be output 48 when running library from command line 49 30 50 Revision 1.24 2000/02/29 20:59:02 sjboddie 31 51 added error message when unable to read from collect directory - should … … 132 152 #include "authenaction.h" 133 153 #include "usersaction.h" 134 #include "authenaction.h"135 154 #include "extlinkaction.h" 136 155 #include "buildaction.h" … … 152 171 text_tarray collections; 153 172 154 // get gsdlhome (if we fail the error will be picked up later -- in 155 // cgiwrapper) 173 // get gsdlhome 156 174 text_t gsdlhome; 157 if (site_cfg_read (gsdlhome)) { 158 text_t collectdir = filename_cat (gsdlhome, "collect"); 159 if (!read_dir (collectdir, collections)) { 160 cerr << "couldn't read collect directory - make sure gsdlhome field is correct in gsdlsite.cfg\n"; 161 exit (1); 162 } 163 } 175 site_cfg_read (gsdlhome); 176 text_t collectdir = filename_cat (gsdlhome, "collect"); 177 read_dir (collectdir, collections); 164 178 165 179 text_tarray::const_iterator thiscol = collections.begin(); … … 202 216 // collection name 203 217 cserver->configure ("collection", *thiscol); 204 218 219 // configure receptionist's collectinfo structure 220 text_tarray colinfo; 221 colinfo.push_back (*thiscol); 222 colinfo.push_back (gsdlhome); 223 colinfo.push_back (gsdlhome); 224 recpt.configure ("collectinfo", colinfo); 225 205 226 nproto.add_collectserver (cserver); 206 227 thiscol ++; … … 215 236 recpt.add_converter ("u", &utf8inconvert, &utf8outconvert); 216 237 217 if (!gsdlhome.empty()) { 218 mapinconvertclass gbinconvert; 219 gbinconvert.setmapfile (gsdlhome, "gbku", 0x25a1); 220 mapoutconvertclass gboutconvert; 221 gboutconvert.setmapfile (gsdlhome, "ugbk", 0xa1f5); 222 recpt.add_converter ("g", &gbinconvert, &gboutconvert); 223 } 238 mapinconvertclass gbinconvert; 239 gbinconvert.setmapfile (gsdlhome, "gbku", 0x25a1); 240 mapoutconvertclass gboutconvert; 241 gboutconvert.setmapfile (gsdlhome, "ugbk", 0xa1f5); 242 recpt.add_converter ("g", &gbinconvert, &gboutconvert); 243 244 text_t armapfile = filename_cat (gsdlhome, "unicode", "MAPPINGS"); 245 armapfile = filename_cat (armapfile, "WINDOWS", "1256.TXT"); 246 simplemapinconvertclass arinconvert; 247 arinconvert.setmapfile (armapfile); 248 simplemapoutconvertclass aroutconvert; 249 aroutconvert.setmapfile (armapfile); 250 recpt.add_converter ("a", &arinconvert, &aroutconvert); 251 224 252 225 253 // the list of actions. Note: these actions will become invalid … … 230 258 231 259 pageaction apageaction; 260 apageaction.set_receptionist (&recpt); 232 261 recpt.add_action (&apageaction); 233 262 … … 236 265 237 266 queryaction aqueryaction; 267 aqueryaction.set_receptionist (&recpt); 238 268 recpt.add_action (&aqueryaction); 239 269 240 270 documentaction adocumentaction; 271 adocumentaction.set_receptionist (&recpt); 241 272 recpt.add_action (&adocumentaction); 242 273 -
branches/New_Config_Format-branch/gsdl/src/recpt/pageaction.cpp
r1033 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.29.4.1 2000/07/12 22:21:41 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.34 2000/07/12 04:51:05 nzdl 34 added an error message when no "valid" collections are available 35 36 Revision 1.33 2000/07/05 21:49:33 sjboddie 37 Receptionist now caches collection information to avoid making multiple 38 get_collectinfo calls to collection server 39 40 Revision 1.32 2000/06/27 23:02:40 sjboddie 41 Tidied up the way collections are displayed on 'standard' homepage. 42 Removed all the nzdl.org specific stuff. 43 44 Revision 1.31 2000/06/23 03:48:08 sjboddie 45 Added Arabic language and encoding options to the preferences page. This 46 is a much more complex task than it should be (you even have to recompile!) 47 but it's not really worth fixing until the new config file format is 48 finalised. 49 50 Revision 1.30 2000/06/14 22:33:42 sjboddie 51 Added French and Spanish language selection options to the preferences 52 page. I really don't like how this is currently implemented (i.e. you 53 have to recompile the library to do something so simple), it might have 54 to wait until the new configuration file stuff is done before it's fixed 55 though. 56 30 57 Revision 1.29 2000/03/19 21:16:46 nzdl 31 58 added german language interface … … 186 213 187 214 text_t homeextra = "<center><table width=_pagewidth_><tr valign=top>\n"; 215 bool found_valid_col = false; 188 216 189 217 recptprotolistclass::iterator rprotolist_here = protos->begin(); … … 198 226 text_tarray::iterator collist_here = collist.begin(); 199 227 text_tarray::iterator collist_end = collist.end(); 200 201 int row1 = 9; 202 int row2 = 8; 203 int count = 1; 228 229 int count = 0; 230 bool first = true; 204 231 while (collist_here != collist_end) { 205 232 206 ColInfoResponse_t cinfo; 207 (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout); 233 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); 208 234 209 if ( err == noError) {210 if (cinfo .isPublic && (cinfo.buildDate > 0)) {235 if (cinfo != NULL) { 236 if (cinfo->isPublic && (cinfo->buildDate > 0)) { 211 237 238 found_valid_col = true; 212 239 FilterResponse_t response; 213 240 text_tset metadata; … … 233 260 } 234 261 235 if ((count == 1) || (count == (row1+1)) || (count == ((row1+row2)+1))) 236 homeextra += "<td align=center>"; 237 else homeextra += "<p>"; 238 239 if (*collist_here == "niupepa") { 240 homeextra += "<a href=\"_httpmusiclibrary_\">_iconmusiclibrary_</a>\n"; 241 if ((count == row1) || (count == (row1+row2))) homeextra += "</td>"; 242 count ++; 243 if ((count == 1) || (count == (row1+1)) || (count == ((row1+row2)+1))) 244 homeextra += "<td align=center>"; 245 else homeextra += "<p>"; 246 } 262 if ((count%3 == 0) && (!first)) 263 homeextra += "</tr><tr valign=top>\n"; 247 264 248 265 text_t link = "<a href=\"_gwcgi_?a=p&p=about&c=" + *collist_here + "\">"; … … 250 267 link = "<a href=\"_gwcgi_?a=p&p=about&l=zh&nw=u&c=" + *collist_here + "\">"; 251 268 if (*collist_here == "arabic") 252 link = "<a href=\"_gwcgi_?a=p&p=about& w=a&c=" + *collist_here + "\">";253 254 if (!cinfo .receptionist.empty())255 link = "<a href=\"" + cinfo .receptionist + "\">";269 link = "<a href=\"_gwcgi_?a=p&p=about&l=ar&nw=u&c=" + *collist_here + "\">"; 270 271 if (!cinfo->receptionist.empty()) 272 link = "<a href=\"" + cinfo->receptionist + "\">"; 256 273 257 homeextra += link + collectionname + "</a>\n"; 258 259 if ((count == row1) || (count == (row1+row2))) 260 homeextra += "</td>"; 274 homeextra += "<td>" + link + collectionname + "</a></td>\n"; 261 275 262 276 count ++; 277 first = false; 263 278 } 264 279 } … … 266 281 collist_here ++; 267 282 } 268 homeextra += "</tr></table></center>\n"; 269 disp.setmacro ("homeextra", "home", homeextra); 283 for (; count%3 != 0; count ++) homeextra += "<td></td>\n"; 270 284 } 271 285 } 272 286 rprotolist_here ++; 273 287 } 288 289 if (!found_valid_col) { 290 homeextra += "<td>No valid (i.e. built and public) collections are available</td>\n"; 291 } 292 homeextra += "</tr></table></center>\n"; 293 disp.setmacro ("homeextra", "home", homeextra); 294 274 295 } 275 296 … … 319 340 320 341 342 if (recpt == NULL) { 343 logout << "ERROR (pageaction::define_internal_macros): This action does not contain\n" 344 << " information about any receptionists. The method set_receptionist was\n" 345 << " probably not called from the module which instantiated this action.\n"; 346 return; 347 } 348 321 349 text_t &arg_p = args["p"]; 322 350 text_t &arg_c = args["c"]; 323 ColInfoResponse_t cinfo; 324 comerror_t err; 351 ColInfoResponse_t *cinfo = NULL; 325 352 326 353 recptproto* collectproto = protos->getrecptproto (arg_c, logout); 327 354 if (collectproto != NULL) { 328 c ollectproto->get_collectinfo (arg_c, cinfo, err, logout);329 330 disp.setmacro ("numdocs", "Global", cinfo .numDocs);355 cinfo = recpt->get_collectinfo_ptr (collectproto, arg_c, logout); 356 357 disp.setmacro ("numdocs", "Global", cinfo->numDocs); 331 358 unsigned long current_time = time(NULL); 332 unsigned long builddate = (current_time - cinfo .buildDate) / 86400;359 unsigned long builddate = (current_time - cinfo->buildDate) / 86400; 333 360 disp.setmacro ("builddate", "Global", builddate); 334 361 } … … 340 367 // _collectionoption_ 341 368 342 if (args["ccs"] == "1" && collectproto != NULL && (cinfo .ccsCols.size() > 1)) {369 if (args["ccs"] == "1" && collectproto != NULL && (cinfo->ccsCols.size() > 1)) { 343 370 text_t collectionoption = "_textcollectionoption_"; 344 text_tarray::const_iterator col_here = cinfo .ccsCols.begin();345 text_tarray::const_iterator col_end = cinfo .ccsCols.end();371 text_tarray::const_iterator col_here = cinfo->ccsCols.begin(); 372 text_tarray::const_iterator col_end = cinfo->ccsCols.end(); 346 373 int count = 0; 347 374 while (col_here != col_end) { 348 375 text_t colname; 349 376 if (*col_here == arg_c) { 350 colname = cinfo .collectionmeta["collectionname"];377 colname = cinfo->collectionmeta["collectionname"]; 351 378 } else { 352 ColInfoResponse_t this_cinfo; 353 collectproto->get_collectinfo (*col_here, this_cinfo, err, logout); 354 if (err != noError) {col_here ++; continue;} 355 colname = this_cinfo.collectionmeta["collectionname"]; 379 ColInfoResponse_t *this_cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout); 380 if (this_cinfo == NULL) {col_here ++; continue;} 381 colname = this_cinfo->collectionmeta["collectionname"]; 356 382 } 357 383 … … 369 395 // _htmloptions_ 370 396 371 text_tmap::const_iterator it = cinfo .format.find ("DocumentUseHTML");372 if ((it != cinfo .format.end()) && ((*it).second == "true")) {397 text_tmap::const_iterator it = cinfo->format.find ("DocumentUseHTML"); 398 if ((it != cinfo->format.end()) && ((*it).second == "true")) { 373 399 disp.setmacro ("htmloptions", "preferences", "_htmloptionson_"); 374 400 … … 376 402 // _PreferenceDocsFromWeb_ 377 403 378 it = cinfo .format.find ("PreferenceDocsFromWeb");379 if ((it == cinfo .format.end()) || ((*it).second == "true"))404 it = cinfo->format.find ("PreferenceDocsFromWeb"); 405 if ((it == cinfo->format.end()) || ((*it).second == "true")) 380 406 disp.setmacro ("PreferenceDocsFromWeb", "preferences", "1"); 381 407 } … … 392 418 text_tarray languages; 393 419 languages.push_back ("en"); 420 languages.push_back ("fr"); 394 421 languages.push_back ("de"); 422 languages.push_back ("es"); 395 423 languages.push_back ("mi"); 396 424 languages.push_back ("zh"); 425 languages.push_back ("ar"); 397 426 text_tarray::const_iterator this_lang = languages.begin(); 398 427 text_tarray::const_iterator end_lang = languages.end(); 399 428 400 429 text_t languageoption = "_textlanguage_\n<select name=\"l\" onChange=\"updatel();\">\n"; 401 it = cinfo .format.find ("PreferenceLanguages");402 if ((it != cinfo .format.end()) && (!(*it).second.empty())) {430 it = cinfo->format.find ("PreferenceLanguages"); 431 if ((it != cinfo->format.end()) && (!(*it).second.empty())) { 403 432 text_tset pref_langs; 404 433 splitchar ((*it).second.begin(), (*it).second.end(), '|', pref_langs); … … 471 500 472 501 // if HTML collection there's no how to read document text 473 text_tmap::const_iterator it = cinfo .format.find ("HelpNoDocs");474 if ((it != cinfo .format.end()) && ((*it).second == "true")) {502 text_tmap::const_iterator it = cinfo->format.find ("HelpNoDocs"); 503 if ((it != cinfo->format.end()) && ((*it).second == "true")) { 475 504 disp.setmacro ("topicreadingdocs", "help", ""); 476 505 disp.setmacro ("texthelpreadingdocs", "help", ""); 477 506 } 478 it = cinfo .format.find ("HelpBibDocs");479 if ((it != cinfo .format.end()) && ((*it).second == "true")) {507 it = cinfo->format.find ("HelpBibDocs"); 508 if ((it != cinfo->format.end()) && ((*it).second == "true")) { 480 509 disp.setmacro ("texthelpreadingdocs", "help", "_bibtexthelpreadingdocs_"); 481 510 disp.setmacro ("textreadingdocs", "help", "_bibtextreadingdocs_"); 482 511 } 483 it = cinfo .format.find ("HelpBookDocs");484 if ((it != cinfo .format.end()) && ((*it).second == "true")) {512 it = cinfo->format.find ("HelpBookDocs"); 513 if ((it != cinfo->format.end()) && ((*it).second == "true")) { 485 514 disp.setmacro ("texthelpreadingdocs", "help", "_booktexthelpreadingdocs_"); 486 515 disp.setmacro ("textreadingdocs", "help", "_booktextreadingdocs_"); … … 491 520 492 521 // _textsubcollections_ 493 if (args["ccs"] == "1" && (cinfo .ccsCols.size() > 1)) {494 text_t textsubcollections = "_textsubcols1_(" + text_t(cinfo .ccsCols.size()) + ")";495 text_tarray::const_iterator here = cinfo .ccsCols.begin();496 text_tarray::const_iterator end = cinfo .ccsCols.end();522 if (args["ccs"] == "1" && (cinfo->ccsCols.size() > 1)) { 523 text_t textsubcollections = "_textsubcols1_(" + text_t(cinfo->ccsCols.size()) + ")"; 524 text_tarray::const_iterator here = cinfo->ccsCols.begin(); 525 text_tarray::const_iterator end = cinfo->ccsCols.end(); 497 526 bool first = true; 498 527 int count = 0; … … 500 529 if (*here == arg_c) { 501 530 if (!first) textsubcollections += "<br>"; 502 textsubcollections += "\n" + cinfo .collectionmeta["collectionname"] + "\n";531 textsubcollections += "\n" + cinfo->collectionmeta["collectionname"] + "\n"; 503 532 } else { 504 ColInfoResponse_t this_cinfo; 505 collectproto->get_collectinfo (*here, this_cinfo, err, logout); 506 if (err != noError) {here ++; continue;} 533 ColInfoResponse_t *this_cinfo = recpt->get_collectinfo_ptr (collectproto, *here, logout); 534 if (this_cinfo == NULL) {here ++; continue;} 507 535 if (!first) textsubcollections += "<br>"; 508 textsubcollections += "\n" + this_cinfo .collectionmeta["collectionname"] + "\n";536 textsubcollections += "\n" + this_cinfo->collectionmeta["collectionname"] + "\n"; 509 537 } 510 538 count ++; -
branches/New_Config_Format-branch/gsdl/src/recpt/pageaction.h
r761 r1279 47 47 virtual ~pageaction (); 48 48 49 void set_receptionist (receptionist *therecpt) {recpt=therecpt;} 50 49 51 text_t get_action_name () {return "p";} 50 52 -
branches/New_Config_Format-branch/gsdl/src/recpt/pagedbrowserclass.cpp
r1048 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.9.4.1 2000/07/12 22:21:42 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.10 2000/06/29 02:47:20 sjboddie 34 added browser info (i.e VList, HList etc.) to status pages 35 30 36 Revision 1.9 2000/03/31 03:04:32 nzdl 31 37 tidied up some of the browsing code - replaced DocumentImages, … … 77 83 void pagedbrowserclass::load_metadata_defaults (text_tset &metadata) { 78 84 metadata.insert ("Title"); 79 }80 81 text_t pagedbrowserclass::get_default_formatstring () {82 return "";83 85 } 84 86 -
branches/New_Config_Format-branch/gsdl/src/recpt/pagedbrowserclass.h
r928 r1279 45 45 void load_metadata_defaults (text_tset &metadata); 46 46 47 text_t get_default_formatstring ();48 49 47 virtual void processOID (cgiargsclass &args, recptproto *collectproto, 50 48 ostream &logout); -
branches/New_Config_Format-branch/gsdl/src/recpt/queryaction.cpp
r962 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.36.4.1 2000/07/12 22:21:43 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.37 2000/07/05 21:49:34 sjboddie 34 Receptionist now caches collection information to avoid making multiple 35 get_collectinfo calls to collection server 36 30 37 Revision 1.36 2000/02/21 21:57:48 sjboddie 31 38 actions are now configured with gsdlhome … … 177 184 queryaction::queryaction () { 178 185 186 recpt = NULL; 179 187 num_phrases = 0; 180 188 … … 695 703 ostream &textout, ostream &logout) { 696 704 697 ColInfoResponse_t cinfo;705 ColInfoResponse_t *cinfo = NULL; 698 706 comerror_t err; 699 707 InfoFilterOptionsResponse_t fresponse; … … 737 745 while (collist_here != collist_end) { 738 746 739 (*rprotolist_here).p->get_collectinfo (*collist_here, cinfo, err, logout);747 cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); 740 748 // if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) { 741 if ( err == noError && (cinfo.buildDate > 0)) {749 if (cinfo != NULL && (cinfo->buildDate > 0)) { 742 750 743 751 (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout); … … 789 797 << " name=cc value=\"" << *collist_here << "\">"; 790 798 791 if (!cinfo .collectionmeta["collectionname"].empty())792 textout << outconvert << disp << cinfo .collectionmeta["collectionname"];799 if (!cinfo->collectionmeta["collectionname"].empty()) 800 textout << outconvert << disp << cinfo->collectionmeta["collectionname"]; 793 801 else 794 802 textout << outconvert << *collist_here; … … 816 824 ostream &logout) { 817 825 826 if (recpt == NULL) { 827 logout << "ERROR (queryaction::do_action): This action does not contain information\n" 828 << " about any receptionists. The method set_receptionist was probably\n" 829 << " not called from the module which instantiated this action.\n"; 830 return true; 831 } 832 833 818 834 if (args["ccs"] == "1") { 819 835 if (!args["cc"].empty()) { … … 866 882 map<text_t, colinfo_t, lttext_t> colinfomap; 867 883 868 ColInfoResponse_t cinfo;884 ColInfoResponse_t *cinfo = NULL; 869 885 comerror_t err; 870 886 FilterRequest_t request; … … 906 922 continue; 907 923 } 908 collectproto->get_collectinfo (*col_here, cinfo, err, logout); 924 cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout); 925 if (cinfo == NULL) { 926 logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL\n"; 927 col_here ++; 928 continue; 929 } 909 930 910 931 browserclass *bptr = browsers->getbrowser (browsertype); … … 913 934 text_t formatstring; 914 935 if (!get_formatstring (classification, browsertype, 915 cinfo .format, formatstring))936 cinfo->format, formatstring)) 916 937 formatstring = bptr->get_default_formatstring(); 917 938 … … 1069 1090 text_t classification = "Search"; 1070 1091 1071 ColInfoResponse_t cinfo;1072 1092 comerror_t err; 1073 collectproto->get_collectinfo (collection, cinfo, err, logout); 1093 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout); 1094 1095 if (cinfo == NULL) { 1096 logout << "ERROR (query_action::search_single_collection): get_collectinfo_ptr returned NULL\n"; 1097 return false; 1098 } 1074 1099 1075 1100 browserclass *bptr = browsers->getbrowser (browsertype); … … 1078 1103 text_t formatstring; 1079 1104 if (!get_formatstring (classification, browsertype, 1080 cinfo .format, formatstring))1105 cinfo->format, formatstring)) 1081 1106 formatstring = bptr->get_default_formatstring(); 1082 1107 -
branches/New_Config_Format-branch/gsdl/src/recpt/queryaction.h
r928 r1279 32 32 #include "gsdlconf.h" 33 33 #include "action.h" 34 #include "receptionist.h" 34 35 35 36 struct colinfo_t { … … 61 62 62 63 protected: 64 65 receptionist *recpt; 66 63 67 text_t formatstring; 64 68 int num_phrases; … … 97 101 queryaction (); 98 102 virtual ~queryaction () {} 103 104 void set_receptionist (receptionist *therecpt) {recpt=therecpt;} 99 105 100 106 void configure (const text_t &key, const text_tarray &cfgline); -
branches/New_Config_Format-branch/gsdl/src/recpt/receptionist.cpp
r950 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.47.4.1 2000/07/12 22:21:44 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.53 2000/07/05 21:49:34 sjboddie 34 Receptionist now caches collection information to avoid making multiple 35 get_collectinfo calls to collection server 36 37 Revision 1.52 2000/07/04 02:15:22 sjboddie 38 fixed bug causing segmentation fault when an invalid collection 39 was supplied as the "c" cgi argument 40 41 Revision 1.51 2000/05/28 09:15:34 sjboddie 42 a few small changes to get an initial release of the local library 43 44 Revision 1.50 2000/05/12 03:09:25 sjboddie 45 minor modifications to get web library compiling under VC++ 6.0 46 47 Revision 1.49 2000/05/04 05:18:46 sjboddie 48 attempting to get end-user collection building to work under windows 49 50 Revision 1.48 2000/04/14 02:52:06 sjboddie 51 tidied up error messaging and set up some debugging info to be output 52 when running library from command line 53 30 54 Revision 1.47 2000/02/17 22:26:17 sjboddie 31 55 set macros for displaying macrons in utf8 … … 227 251 #include "cgiutils.h" 228 252 #include "htmlutils.h" 253 #include "gsdltools.h" 229 254 #include "OIDtools.h" 230 255 #include <assert.h> 231 256 #include <time.h> 232 257 #include <stdio.h> 258 #if defined (GSDL_USE_IOS_H) 233 259 #include <fstream.h> 260 #else 261 #include <fstream> 262 #endif 234 263 235 264 #if defined (__WIN32_) … … 244 273 collectdir.clear(); 245 274 httpprefix.clear(); 246 httpimg .clear();275 httpimg = "/images"; 247 276 gwcgi.clear(); 248 277 macrofiles.erase(macrofiles.begin(), macrofiles.end()); … … 264 293 } 265 294 266 295 void collectioninfo_t::clear () { 296 gsdl_gsdlhome.clear(); 297 gsdl_gdbmhome.clear(); 298 299 info_loaded = false; 300 info.clear(); 301 } 267 302 268 303 receptionist::receptionist () { … … 781 816 utf8outconvertclass text_t2utf8; 782 817 char *lfile = filename.getcstr(); 818 783 819 ofstream log (lfile, ios::app); 784 820 … … 904 940 bool receptionist::produce_content (cgiargsclass &args, ostream &contentout, 905 941 ostream &logout) { 906 942 907 943 // decide on the output conversion class 908 944 text_t &arg_w = args["w"]; … … 1005 1041 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros"); 1006 1042 1007 text_t arraymaindirs;1043 text_tset maindirs; 1008 1044 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros"); 1009 maindirs. push_back(gsdlmacrodir);1045 maindirs.insert (gsdlmacrodir); 1010 1046 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin(); 1011 1047 colinfo_tmap::iterator colend = configinfo.collectinfo.end(); 1012 1048 while (colhere != colend) { 1013 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros"); 1014 maindirs.push_back (gsdlmacrodir); 1049 if (!((*colhere).second.gsdl_gsdlhome).empty()) { 1050 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros"); 1051 maindirs.insert (gsdlmacrodir); 1052 } 1015 1053 colhere ++; 1016 1054 } … … 1037 1075 // we'll load all copies 1038 1076 if (!foundfile) { 1039 text_t array::const_iterator dirhere = maindirs.begin();1040 text_t array::const_iterator dirend = maindirs.end();1077 text_tset::const_iterator dirhere = maindirs.begin(); 1078 text_tset::const_iterator dirend = maindirs.end(); 1041 1079 while (dirhere != dirend) { 1042 1080 filename = filename_cat (*dirhere, *arrhere); … … 1095 1133 1096 1134 text_t &arg_c = args["c"]; 1097 ColInfoResponse_t cinfo;1098 comerror_t err;1099 1135 recptproto *collectproto = protocols.getrecptproto (arg_c, logout); 1100 collectproto->get_collectinfo (arg_c, cinfo, err, logout); 1101 1102 if (!cinfo.ccsCols.empty()) { 1103 args["ccs"] = 1; 1104 if (args["cc"].empty()) { 1105 text_tarray::const_iterator col_here = cinfo.ccsCols.begin(); 1106 text_tarray::const_iterator col_end = cinfo.ccsCols.end(); 1107 bool first = true; 1108 while (col_here != col_end) { 1109 // make sure it's a valid collection 1110 if (protocols.getrecptproto (*col_here, logout) != NULL) { 1111 if (!first) args["cc"].push_back (','); 1112 args["cc"] += *col_here; 1113 first = false; 1136 if (collectproto == NULL) { 1137 // oops, this collection isn't valid 1138 outconvertclass text_t2ascii; 1139 logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n"; 1140 args["c"].clear(); 1141 1142 } else { 1143 1144 ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout); 1145 1146 if (cinfo != NULL) { 1147 if (!cinfo->ccsCols.empty()) { 1148 args["ccs"] = 1; 1149 if (args["cc"].empty()) { 1150 text_tarray::const_iterator col_here = cinfo->ccsCols.begin(); 1151 text_tarray::const_iterator col_end = cinfo->ccsCols.end(); 1152 bool first = true; 1153 while (col_here != col_end) { 1154 // make sure it's a valid collection 1155 if (protocols.getrecptproto (*col_here, logout) != NULL) { 1156 if (!first) args["cc"].push_back (','); 1157 args["cc"] += *col_here; 1158 first = false; 1159 } 1160 col_here ++; 1161 } 1114 1162 } 1115 col_here ++;1116 1163 } 1164 } else { 1165 logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n"; 1117 1166 } 1118 1167 } … … 1226 1275 text_t &collection = args["c"]; 1227 1276 1228 disp.setmacro ("gsdlhome", "Global", configinfo.gsdlhome);1277 disp.setmacro ("gsdlhome", "Global", dm_safe(configinfo.gsdlhome)); 1229 1278 disp.setmacro ("gwcgi", "Global", configinfo.gwcgi); 1230 1279 disp.setmacro ("httpimg", "Global", configinfo.httpimg); … … 1299 1348 } 1300 1349 } 1350 1351 // gets collection info from cache if found or 1352 // calls collection server (and updates cache) 1353 // returns NULL if there's an error 1354 ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto, 1355 const text_t &collection, 1356 ostream &logout) { 1357 1358 // check the cache 1359 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection); 1360 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) { 1361 // found it 1362 return &((*it).second.info); 1363 } 1364 1365 // not cached, get info from collection server 1366 if (collectproto == NULL) { 1367 logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n"; 1368 return NULL; 1369 } 1370 1371 comerror_t err; 1372 if (it == configinfo.collectinfo.end()) { 1373 collectioninfo_t cinfo; 1374 collectproto->get_collectinfo (collection, cinfo.info, err, logout); 1375 if (err != noError) { 1376 outconvertclass text_t2ascii; 1377 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \"" 1378 << get_comerror_string (err) << "\"while getting collectinfo\n"; 1379 return NULL; 1380 } 1381 cinfo.info_loaded = true; 1382 configinfo.collectinfo[collection] = cinfo; 1383 return &(configinfo.collectinfo[collection].info); 1384 } else { 1385 collectproto->get_collectinfo (collection, (*it).second.info, err, logout); 1386 if (err != noError) { 1387 outconvertclass text_t2ascii; 1388 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \"" 1389 << get_comerror_string (err) << "\"while getting collectinfo\n"; 1390 return NULL; 1391 } 1392 (*it).second.info_loaded = true; 1393 return &((*it).second.info); 1394 } 1395 } -
branches/New_Config_Format-branch/gsdl/src/recpt/receptionist.h
r864 r1279 34 34 #include "cgiargs.h" 35 35 #include "display.h" 36 #include "action.h"37 36 #include "browserclass.h" 38 37 #include "recptproto.h" 39 38 #include "converter.h" 40 39 #include "cfgread.h" 40 #include "action.h" 41 41 42 42 // the MACROPRECEDENCE macro is used as a default. override … … 51 51 52 52 53 struct collectioninfo_t { 54 void clear (); 55 collectioninfo_t () {clear();} 56 57 text_t gsdl_gsdlhome; 58 text_t gsdl_gdbmhome; 59 60 bool info_loaded; 61 ColInfoResponse_t info; 62 }; 63 64 typedef map<text_t, collectioninfo_t, lttext_t> colinfo_tmap; 65 66 53 67 struct recptconf { 54 68 text_t gsdlhome; … … 58 72 colinfo_tmap collectinfo; 59 73 text_t httpprefix; 60 text_t httpimg; 74 text_t httpimg; // will equal /images if not set 61 75 text_t gwcgi; 62 76 text_tset macrofiles; … … 71 85 recptconf () {clear();} 72 86 }; 73 74 87 75 88 class receptionist { … … 196 209 // list. This can be used to save preferences between sessions. 197 210 text_t get_compressed_arg (cgiargsclass &args, ostream &logout); 211 212 // gets collection info from cache if found or 213 // calls collection server (and updates cache) 214 // returns NULL if there's an error 215 ColInfoResponse_t *get_collectinfo_ptr (recptproto *collectproto, 216 const text_t &collection, 217 ostream &logout); 218 198 219 199 220 protected: -
branches/New_Config_Format-branch/gsdl/src/recpt/recptconfig.cpp
r963 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.6.4.1 2000/07/12 22:21:45 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.9 2000/05/12 03:09:24 sjboddie 34 minor modifications to get web library compiling under VC++ 6.0 35 36 Revision 1.8 2000/04/14 03:10:35 sjboddie 37 tidied up a few issues concerning the new debug info which showed 38 up on windows 39 40 Revision 1.7 2000/04/14 02:52:06 sjboddie 41 tidied up error messaging and set up some debugging info to be output 42 when running library from command line 43 30 44 Revision 1.6 2000/02/21 21:55:33 sjboddie 31 45 gsdlhome now comes from gsdlsite.cfg … … 79 93 text_tarray cfgline; 80 94 text_t key; 81 ifstream confin ("gsdlsite.cfg"); 95 96 #ifdef GSDL_USE_IOS_H 97 ifstream confin ("gsdlsite.cfg", ios::in | ios::nocreate); 98 #else 99 ifstream confin ("gsdlsite.cfg", ios::in); 100 #endif 82 101 83 102 if (confin) { … … 105 124 } 106 125 107 // this version just grabs gsdlhome, returns false if it can't find it 126 // this version just grabs gsdlhome, returning true 127 // unless unable to read gsdlsite.cfg 108 128 bool site_cfg_read (text_t &gsdlhome) { 109 129 … … 113 133 text_tarray cfgline; 114 134 text_t key; 115 ifstream confin ("gsdlsite.cfg"); 135 136 #ifdef GSDL_USE_IOS_H 137 ifstream confin ("gsdlsite.cfg", ios::in | ios::nocreate); 138 #else 139 ifstream confin ("gsdlsite.cfg", ios::in); 140 #endif 116 141 117 142 if (confin) { … … 120 145 if (cfgline[0] == "gsdlhome") { 121 146 gsdlhome = cfgline[1]; 122 return true; 123 } 124 } 125 } 147 break; 148 } 149 } 150 } 151 return true; 126 152 confin.close (); 127 153 } … … 143 169 if (file_exists (filename)) { 144 170 char *cstr = filename.getcstr(); 145 ifstream confin (cstr); 171 172 #ifdef GSDL_USE_IOS_H 173 ifstream confin (cstr, ios::in | ios::nocreate); 174 #else 175 ifstream confin (cstr, ios::in); 176 #endif 177 146 178 delete cstr; 147 179 … … 176 208 if (!filename.empty()) { 177 209 char *cstr = filename.getcstr(); 178 ifstream confin (cstr); 210 211 #ifdef GSDL_USE_IOS_H 212 ifstream confin (cstr, ios::in | ios::nocreate); 213 #else 214 ifstream confin (cstr, ios::in); 215 #endif 216 179 217 delete cstr; 180 218 -
branches/New_Config_Format-branch/gsdl/src/recpt/statusaction.cpp
r995 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.25.4.1 2000/07/12 22:21:46 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.31 2000/07/05 21:49:36 sjboddie 34 Receptionist now caches collection information to avoid making multiple 35 get_collectinfo calls to collection server 36 37 Revision 1.30 2000/07/03 22:26:27 nzdl 38 fixed a few errors in the macro files (and one in some text printed out 39 by the statusaction) 40 41 Revision 1.29 2000/06/29 02:47:21 sjboddie 42 added browser info (i.e VList, HList etc.) to status pages 43 44 Revision 1.28 2000/06/29 00:22:59 sjboddie 45 added new numsections field to collection info and made the statusaction 46 recognize it 47 48 Revision 1.27 2000/05/12 03:09:24 sjboddie 49 minor modifications to get web library compiling under VC++ 6.0 50 51 Revision 1.26 2000/04/19 22:30:23 sjboddie 52 tidied up status pages and end-user collection building 53 30 54 Revision 1.25 2000/02/29 21:00:31 sjboddie 31 55 fixed some compiler warnings … … 116 140 117 141 */ 118 119 142 120 143 #include "statusaction.h" … … 138 161 } 139 162 140 void statusaction::output_welcome (cgiargsclass &/*args*/, displayclass &disp, 141 outconvertclass &outconvert, 142 ostream &textout, ostream &/*logout*/) { 143 textout << outconvert << disp << "_status:infoheader_(_titlewelcome_)\n" 144 "_status:welcome_\n" 145 "_status:infofooter_\n"; 163 void statusaction::output_welcome (cgiargsclass &/*args*/, recptprotolistclass *protos, 164 displayclass &disp, outconvertclass &outconvert, 165 ostream &textout, ostream &logout) { 166 167 if (recpt == NULL) return; 168 169 textout << outconvert << disp 170 << "_status:infoheader_(_titlewelcome_)\n" 171 << "_status:welcome_" 172 << "<center><table width=_pagewidth_>\n" 173 << "<th align=left>abbrev.</th><th align=left>collection</th>" 174 << "<th align=left>public?</th><th align=left>running?</th></tr>\n"; 175 176 recptprotolistclass::iterator rprotolist_here = protos->begin(); 177 recptprotolistclass::iterator rprotolist_end = protos->end(); 178 while (rprotolist_here != rprotolist_end) { 179 if ((*rprotolist_here).p != NULL) { 180 text_t protoname = (*rprotolist_here).p->get_protocol_name(); 181 text_tarray collist; 182 comerror_t err; 183 (*rprotolist_here).p->get_collection_list (collist, err, logout); 184 if (err == noError) { 185 text_tarray::iterator collist_here = collist.begin(); 186 text_tarray::iterator collist_end = collist.end(); 187 188 while (collist_here != collist_end) { 189 190 textout << outconvert << disp 191 << "<tr><td><a href=\"_gwcgi_?e=_compressedoptions_&a=status&sp=collectioninfo&pr=" 192 << protoname 193 << "&c=" 194 << *collist_here 195 << "\">" 196 << *collist_here 197 << "</a></td>"; 198 199 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); 200 if (cinfo != NULL) { 201 text_t collname = *collist_here; 202 text_tmap::iterator it = cinfo->collectionmeta.find("collectionname"); 203 if (it != cinfo->collectionmeta.end()) collname = (*it).second; 204 205 textout << "<td>"; 206 if (cinfo->buildDate > 0) 207 textout << outconvert << disp 208 << "<a href=\"_httppagex_(about)&c=" << *collist_here 209 << "\" target=\\_top>"; 210 211 textout << outconvert << disp << collname; 212 213 if (cinfo->buildDate > 0) textout << "</a>"; 214 215 textout << "</td>"; 216 217 if (cinfo->isPublic) textout << "<td>yes</td>"; 218 else textout << "<td>no</td>"; 219 220 if (cinfo->buildDate > 0) 221 textout << outconvert << "<td>yes</td>"; 222 else 223 textout << "<td>no</td>"; 224 225 } else { 226 textout << "<td></td><td></td><td></td>"; 227 } 228 229 textout << "</tr>\n"; 230 collist_here ++; 231 } 232 } 233 } 234 rprotolist_here ++; 235 } 236 237 textout << "</table></center>\n"; 238 textout << outconvert << disp << "_status:infofooter_\n"; 146 239 } 147 240 … … 256 349 } 257 350 351 // browsers 352 browsermapclass *browsers = recpt->get_browsermap_ptr(); 353 if (browsers != NULL) { 354 textout << outconvert << "<tr valign=top><th>browsers</th><td>"; 355 356 browserptrmap::iterator browsershere = browsers->begin (); 357 browserptrmap::iterator browsersend = browsers->end (); 358 bool browsersfirst = true; 359 while (browsershere != browsersend) { 360 if (!browsersfirst) textout << outconvert << ", "; 361 browsersfirst = false; 362 assert ((*browsershere).second.b != NULL); 363 if ((*browsershere).second.b != NULL) { 364 textout << outconvert << "\"" << (*browsershere).second.b->get_browser_name() << "\""; 365 } 366 browsershere++; 367 } 368 369 textout << outconvert << "</td></tr>\n"; 370 } 371 258 372 // protocols 259 373 recptprotolistclass *protocols = recpt->get_recptprotolist_ptr (); … … 399 513 } 400 514 515 void statusaction::output_browserinfo (cgiargsclass &/*args*/, displayclass &disp, 516 outconvertclass &outconvert, 517 ostream &textout, ostream &/*logout*/) { 518 if (recpt == NULL) return; 519 browsermapclass *browsers = recpt->get_browsermap_ptr(); 520 521 textout << outconvert << disp << "_status:infoheader_(Browser Information)\n"; 522 textout << outconvert 523 << "<h2>Browser information</h2>\n" 524 << "<table>"; 525 526 // browser information 527 if (browsers != NULL) { 528 textout << outconvert 529 << "<tr><th>browser name</th><th>default formatstring</th></tr>\n"; 530 531 browserptrmap::iterator browsershere = browsers->begin (); 532 browserptrmap::iterator browsersend = browsers->end (); 533 while (browsershere != browsersend) { 534 assert ((*browsershere).second.b != NULL); 535 if ((*browsershere).second.b != NULL) { 536 textout << outconvert 537 << "<tr><td>" << (*browsershere).second.b->get_browser_name() 538 << "</td><td>" << html_safe ((*browsershere).second.b->get_default_formatstring()) 539 << "</td></tr>\n"; 540 } 541 browsershere++; 542 } 543 } 544 545 textout << outconvert << disp << "</table>\n_status:infofooter_\n"; 546 } 547 401 548 void statusaction::output_protocolinfo (cgiargsclass &/*args*/, displayclass &disp, 402 549 outconvertclass &outconvert, … … 512 659 } else { 513 660 // rproto can't be NULL to get here 514 ColInfoResponse_t collectinfo; 515 comerror_t err; 516 517 rproto->get_collectinfo (arg_c, collectinfo, err, logout); 518 if (err == noError) { 661 ColInfoResponse_t *collectinfo = recpt->get_collectinfo_ptr (rproto, arg_c, logout); 662 if (collectinfo != NULL) { 519 663 textout << outconvert << "<table>\n" 520 664 << "<tr><th>collection name</th><td>\"" 521 << collectinfo .shortInfo.name665 << collectinfo->shortInfo.name 522 666 << "\"</td></tr>\n" 523 667 524 668 << "<tr><th>host</th><td>\"" 525 << collectinfo .shortInfo.host669 << collectinfo->shortInfo.host 526 670 << "\"</td></tr>\n" 527 671 528 672 << "<tr><th>port</th><td>\"" 529 << collectinfo .shortInfo.port673 << collectinfo->shortInfo.port 530 674 << "\"</td></tr>\n" 531 675 532 676 << "<tr><th>is public?</th><td>"; 533 if (collectinfo .isPublic) textout << outconvert << "true";677 if (collectinfo->isPublic) textout << outconvert << "true"; 534 678 else textout << outconvert << "false"; 535 679 textout << outconvert … … 537 681 538 682 << "<tr><th>is beta?</th><td>"; 539 if (collectinfo .isBeta) textout << outconvert << "true";683 if (collectinfo->isBeta) textout << outconvert << "true"; 540 684 else textout << outconvert << "false"; 541 685 textout << outconvert … … 543 687 544 688 << "<tr><th>build date</th><td>\"" 545 << collectinfo .buildDate689 << collectinfo->buildDate 546 690 << "\"</td></tr>\n" 547 691 548 692 << "<tr><th>interface languages</th><td>"; 549 text_tarray::iterator languages_here = collectinfo .languages.begin();550 text_tarray::iterator languages_end = collectinfo .languages.end();693 text_tarray::iterator languages_here = collectinfo->languages.begin(); 694 text_tarray::iterator languages_end = collectinfo->languages.end(); 551 695 bool languages_first = true; 552 696 while (languages_here != languages_end) { … … 558 702 559 703 textout << "<tr><th valign=top>collection metadata</th><td><table>\n"; 560 text_tmap::iterator meta_here = collectinfo .collectionmeta.begin();561 text_tmap::iterator meta_end = collectinfo .collectionmeta.end();704 text_tmap::iterator meta_here = collectinfo->collectionmeta.begin(); 705 text_tmap::iterator meta_end = collectinfo->collectionmeta.end(); 562 706 while (meta_here != meta_end) { 563 707 textout << outconvert << "<tr><td>" << (*meta_here).first … … 568 712 569 713 textout << "<tr><th valign=top>format info</th><td><table>\n"; 570 text_tmap::iterator format_here = collectinfo .format.begin();571 text_tmap::iterator format_end = collectinfo .format.end();714 text_tmap::iterator format_here = collectinfo->format.begin(); 715 text_tmap::iterator format_end = collectinfo->format.end(); 572 716 while (format_here != format_end) { 573 717 textout << outconvert << "<tr><td>" << (*format_here).first … … 578 722 579 723 textout << "<tr><th valign=top>building info</th><td><table>\n"; 580 text_tmap::iterator building_here = collectinfo .building.begin();581 text_tmap::iterator building_end = collectinfo .building.end();724 text_tmap::iterator building_here = collectinfo->building.begin(); 725 text_tmap::iterator building_end = collectinfo->building.end(); 582 726 while (building_here != building_end) { 583 727 textout << outconvert << "<tr><td>" << (*building_here).first … … 591 735 592 736 << "<tr><th>number of documents</th><td>\"" 593 << collectinfo .numDocs737 << collectinfo->numDocs 594 738 << "\"</td></tr>\n" 595 739 740 << "<tr><th>number of sections</th><td>\"" 741 << collectinfo->numSections 742 << "\"</td></tr>\n" 743 596 744 << "<tr><th>number of words</th><td>\"" 597 << collectinfo .numWords745 << collectinfo->numWords 598 746 << "\"</td></tr>\n" 599 747 600 748 << "<tr><th>number of bytes</th><td>\"" 601 << collectinfo .numBytes749 << collectinfo->numBytes 602 750 << "\"</td></tr>\n" 603 751 604 752 << "<tr><th>preferred receptionist</th><td>\"" 605 << collectinfo .receptionist753 << collectinfo->receptionist 606 754 << "\"</td></tr>\n" 607 755 … … 609 757 610 758 } else { 611 textout << outconvert << "Error (" << get_comerror_string (err) 612 << ") while getting collect information\n"; 759 textout << "ERROR (statusaction::output_collectioninfo): while getting collect information\n"; 613 760 } 614 761 … … 617 764 InfoFilterOptionsRequest_t filteroptions_request; 618 765 InfoFilterOptionsResponse_t filteroptions; 766 comerror_t err; 619 767 rproto->get_filterinfo (arg_c, filterinfo, err, logout); 620 768 if (err == noError) { … … 722 870 textout << outconvert << "<h2>Init log</h2>\n"; 723 871 872 #ifdef GSDL_USE_IOS_H 724 873 ifstream initin (cinitfilename, ios::in | ios::nocreate); 874 #else 875 ifstream initin (cinitfilename, ios::in); 876 #endif 877 725 878 delete cinitfilename; 726 879 if (initin) { … … 762 915 logout << flush; 763 916 917 #ifdef GSDL_USE_IOS_H 764 918 ifstream errin (cerrfilename, ios::in | ios::nocreate); 919 #else 920 ifstream errin (cerrfilename, ios::in); 921 #endif 922 765 923 delete cerrfilename; 766 924 if (errin) { … … 856 1014 } 857 1015 858 bool statusaction::do_action (cgiargsclass &args, recptprotolistclass * /*protos*/,1016 bool statusaction::do_action (cgiargsclass &args, recptprotolistclass *protos, 859 1017 browsermapclass * /*browsers*/, displayclass &disp, 860 1018 outconvertclass &outconvert, ostream &textout, … … 898 1056 if (arg_sp == "frameset") output_frameset (args, disp, outconvert, textout, logout); 899 1057 else if (arg_sp == "select") output_select (args, disp, outconvert, textout, logout); 900 else if (arg_sp == "welcome") output_welcome (args, disp, outconvert, textout, logout);1058 else if (arg_sp == "welcome") output_welcome (args, protos, disp, outconvert, textout, logout); 901 1059 else if (arg_sp == "generalinfo") output_generalinfo (args, disp, outconvert, textout, logout); 902 1060 else if (arg_sp == "argumentinfo") output_argumentinfo (args, disp, outconvert, textout, logout); 903 1061 else if (arg_sp == "actioninfo") output_actioninfo (args, disp, outconvert, textout, logout); 1062 else if (arg_sp == "browserinfo") output_browserinfo (args, disp, outconvert, textout, logout); 904 1063 else if (arg_sp == "protocolinfo") output_protocolinfo (args, disp, outconvert, textout, logout); 905 1064 else if (arg_sp == "collectioninfo") output_collectioninfo (args, disp, outconvert, textout, logout); -
branches/New_Config_Format-branch/gsdl/src/recpt/statusaction.h
r760 r1279 44 44 outconvertclass &outconvert, 45 45 ostream &textout, ostream &logout); 46 46 47 void output_select (cgiargsclass &args, displayclass &disp, 47 48 outconvertclass &outconvert, 48 49 ostream &textout, ostream &logout); 49 50 50 void output_welcome (cgiargsclass &args, displayclass &disp, 51 outconvertclass &outconvert, 52 ostream &textout, ostream &logout); 51 void output_welcome (cgiargsclass &args, recptprotolistclass *protos, 52 displayclass &disp, outconvertclass &outconvert, 53 ostream &textout, ostream &logout); 54 53 55 void output_generalinfo (cgiargsclass &args, displayclass &disp, 54 56 outconvertclass &outconvert, 55 57 ostream &textout, ostream &logout); 58 56 59 void output_argumentinfo (cgiargsclass &args, displayclass &disp, 57 60 outconvertclass &outconvert, 58 61 ostream &textout, ostream &logout); 62 59 63 void output_actioninfo (cgiargsclass &args, displayclass &disp, 60 64 outconvertclass &outconvert, 61 65 ostream &textout, ostream &logout); 66 67 void output_browserinfo (cgiargsclass &args, displayclass &disp, 68 outconvertclass &outconvert, 69 ostream &textout, ostream &logout); 70 62 71 void output_protocolinfo (cgiargsclass &args, displayclass &disp, 63 72 outconvertclass &outconvert, 64 73 ostream &textout, ostream &logout); 74 65 75 void output_collectioninfo (cgiargsclass &args, displayclass &disp, 66 76 outconvertclass &outconvert, 67 77 ostream &textout, ostream &logout); 78 68 79 void output_initlog (cgiargsclass &args, displayclass &disp, 69 80 outconvertclass &outconvert, 70 81 ostream &textout, ostream &logout); 82 71 83 void output_errorlog (cgiargsclass &args, displayclass &disp, 72 84 outconvertclass &outconvert, -
branches/New_Config_Format-branch/gsdl/src/recpt/userdb.cpp
r1000 r1279 28 28 /* 29 29 $Log$ 30 Revision 1.6.4.1 2000/07/12 22:21:47 sjboddie 31 merged changes to trunk into New_Config_Format branch 32 33 Revision 1.7 2000/05/22 12:30:36 sjboddie 34 the initial admin user now belongs to the colbuilder group by default 35 (as well as the administrator group) 36 30 37 Revision 1.6 2000/03/01 22:23:09 sjboddie 31 38 tidied up windows installation … … 187 194 userinfo.password = crypt_text("admin"); 188 195 userinfo.enabled = true; 189 userinfo.groups = "administrator ";196 userinfo.groups = "administrator,colbuilder"; 190 197 userinfo.comment = "change the password for this account as soon as possible"; 191 198 return set_user_info (userdbfile, username, userinfo); -
branches/New_Config_Format-branch/gsdl/src/recpt/win32.mak
r1000 r1279 25 25 ########################################################################### 26 26 27 GSDLHOME = d:\home\dl\gsdl28 STLPATH = d:\home\dl\stl\stlport27 GSDLHOME = c:\gsdl 28 STLPATH = c:\stlport 29 29 30 30 AR = lib -
branches/New_Config_Format-branch/gsdl/src/w32server/cgiwrapper.cpp
r1011 r1279 1 #include "text_t.h" 2 1 3 #include <windows.h> 2 4 #include <string.h> … … 32 34 33 35 // actions 34 #include "action.h"35 36 #include "statusaction.h" 36 37 #include "pageaction.h" … … 38 39 #include "queryaction.h" 39 40 #include "documentaction.h" 41 #include "tipaction.h" 40 42 #include "authenaction.h" 41 43 #include "usersaction.h" 42 44 #include "extlinkaction.h" 43 //#include "buildaction.h"45 #include "buildaction.h" 44 46 #include "delhistoryaction.h" 45 47 46 48 // browsers 47 #include "browserclass.h"48 49 #include "vlistbrowserclass.h" 49 50 #include "hlistbrowserclass.h" … … 78 79 RequestInfoT *RInfo; 79 80 ostream *casostr; 81 #if !defined (GSDL_USE_IOS_H) 82 char buffer[256]; 83 #endif 80 84 }; 81 85 82 86 textstreambuf::textstreambuf() { 83 87 tsbreset(); 88 #if !defined (GSDL_USE_IOS_H) 89 setp (&buffer[0], &buffer[255]); 90 #else 84 91 if (base() == ebuf()) allocate(); 85 92 setp (base(), ebuf()); 93 #endif 86 94 }; 87 95 88 96 int textstreambuf::sync () { 89 97 if ((RInfo != NULL) && 90 (Send_String_N(pbase(), out_waiting(), RInfo) < 0)) {98 (Send_String_N(pbase(), pptr()-pbase(), RInfo) < 0)) { 91 99 RInfo = NULL; 92 100 } … … 94 102 if (casostr != NULL) { 95 103 char *thepbase=pbase(); 96 for (int i=0;i<out_waiting();i++) (*casostr).put(thepbase[i]);104 for (int i=0;i<(pptr()-pbase());i++) (*casostr).put(thepbase[i]); 97 105 } 98 106 … … 118 126 int overflow (int ch); 119 127 int underflow () {return EOF;} 128 129 #if !defined (GSDL_USE_IOS_H) 130 private: 131 char buffer[256]; 132 #endif 120 133 }; 121 134 122 135 logstreambuf::logstreambuf () { 136 #if !defined (GSDL_USE_IOS_H) 137 setp (&buffer[0], &buffer[255]); 138 #else 123 139 if (base() == ebuf()) allocate(); 124 140 setp (base(), ebuf()); 141 #endif 125 142 } 126 143 … … 128 145 if (gsdl_keep_log || gsdl_show_console) { 129 146 log_message ("LOCAL LIB MESSAGE: "); 130 log_message_N (pbase(), out_waiting());147 log_message_N (pbase(), pptr()-pbase()); 131 148 } 132 149 … … 154 171 DWORD lastlibaccesstime; 155 172 DWORD baseavailvirtual; 156 157 static void page_errorsitecfg (const text_t &gsdlhome, const text_t &collection) {158 159 text_t message = "Error\n\n"160 "The site.cfg configuration file could not be found. This file\n"161 "should contain configuration information relating to this sites\n"162 "setup.\n";163 164 if (collection.empty()) {165 message += "As this program is not being run in collection specific mode,\n"166 "the file should reside at " + gsdlhome + "\\etc\\site.cfg.\n";167 } else {168 message += "As this program is being run in collection specific mode,\n"169 "the file can reside at " + gsdlhome + "\\collect\\" + collection +170 "\\etc\\site.cfg or " + gsdlhome + "\\etc\\site.cfg.\n";171 }172 173 MessageBox(NULL, message.getcstr(),174 "Greenstone Digital Library Software"175 ,MB_OK|MB_SYSTEMMODAL);176 }177 173 178 174 static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection) { … … 326 322 // returns 1 if successful, 0 if unsuccessful 327 323 int gsdl_init () { 324 #if defined (GSDL_USE_IOS_H) 328 325 cerr = &logstream; 329 326 cout = &textstream; 327 #else 328 cerr.rdbuf(&logstream); 329 cout.rdbuf(&textstream); 330 #endif 330 331 331 332 // collection should be set to "" unless in … … 425 426 426 427 pageaction *apageaction = new pageaction(); 428 apageaction->set_receptionist (&recpt); 427 429 recpt.add_action (apageaction); 428 430 … … 430 432 recpt.add_action (apingaction); 431 433 434 tipaction *atipaction = new tipaction(); 435 recpt.add_action (atipaction); 436 432 437 queryaction *aqueryaction = new queryaction(); 438 aqueryaction->set_receptionist (&recpt); 433 439 recpt.add_action (aqueryaction); 434 440 435 441 documentaction *adocumentaction = new documentaction(); 442 adocumentaction->set_receptionist (&recpt); 436 443 recpt.add_action (adocumentaction); 437 444 … … 442 449 recpt.add_action (anextlinkaction); 443 450 444 //buildaction *abuildaction = new buildaction();445 //abuildaction->set_receptionist (&recpt);446 //recpt.add_action (abuildaction);451 buildaction *abuildaction = new buildaction(); 452 abuildaction->set_receptionist (&recpt); 453 recpt.add_action (abuildaction); 447 454 448 455 authenaction *aauthenaction = new authenaction(); … … 450 457 recpt.add_action (aauthenaction); 451 458 452 delhistoryaction adelhistoryaction;453 recpt.add_action (&adelhistoryaction);459 delhistoryaction *adelhistoryaction = new delhistoryaction(); 460 recpt.add_action (adelhistoryaction); 454 461 455 462 … … 503 510 // those read in last will override those read earlier 504 511 // collections being used together in this way should be 505 // careful not to have site.cfg ormain.cfg files that might512 // careful not to have main.cfg files that might 506 513 // screw with each other. 507 514 text_tset::const_iterator thome = gsdlhomes.begin(); 508 515 text_tset::const_iterator ehome = gsdlhomes.end(); 509 516 while (thome != ehome) { 510 // TODO: should only need to do this once now I think511 // gsdlsite.cfg will need to be installed along with executable512 // if (!site_cfg_read (recpt, *thome, maxrequests)) {513 // couldn't find the site configuration file514 // page_errorsitecfg (*thome, collection);515 // return 0;516 // } else517 517 if (!main_cfg_read (recpt, *thome, collection)) { 518 518 // couldn't find the main configuration file -
branches/New_Config_Format-branch/gsdl/src/w32server/fnord.cpp
r1040 r1279 18 18 The author can be contacted via Email at [email protected] 19 19 */ 20 #include "text_t.h" 20 21 #include <windows.h> 21 22 #include <stdlib.h> … … 76 77 #define ENTERBUTTONY ((MAINWINDOWHEIGHT-RESTHEIGHT)+5) 77 78 78 #define VERSIONSTRING "version 2.13"79 #define VERSIONSTRING "version x.xx" 79 80 const char versionstring[] = VERSIONSTRING; 80 81 … … 165 166 if (coltitledc == NULL) { 166 167 coltitledc = CreateCompatibleDC(pdc); 167 defcoltitlebitmap = SelectObject (coltitledc, coltitlebitmap);168 defcoltitlebitmap = (HBITMAP)SelectObject (coltitledc, coltitlebitmap); 168 169 } 169 170 170 171 if (logodc == NULL) { 171 172 logodc = CreateCompatibleDC(pdc); 172 deflogobitmap = SelectObject (logodc, logobitmap);173 deflogobitmap = (HBITMAP)SelectObject (logodc, logobitmap); 173 174 } 174 175 … … 219 220 statusRect.right = STATUSX+STATUSWIDTH; 220 221 statusRect.bottom = STATUSY+STATUSHEIGHT; 221 FillRect(pdc, &statusRect, GetStockObject(WHITE_BRUSH));222 FillRect(pdc, &statusRect, (HBRUSH)GetStockObject(WHITE_BRUSH)); 222 223 223 224 int cury = STATUSY; … … 259 260 infoRect.right = INFOX+INFOWIDTH; 260 261 infoRect.bottom = INFOY+INFOHEIGHT; 261 FillRect(pdc, &infoRect, GetStockObject(WHITE_BRUSH));262 FillRect(pdc, &infoRect, (HBRUSH)GetStockObject(WHITE_BRUSH)); 262 263 DrawText(pdc, infostring, -1, &infoRect, DT_CENTER); 263 264 } … … 558 559 // finally, get the host name (no error value 559 560 // is returned from this function) 560 GetLocalName( Instance);561 GetLocalName((HINSTANCE)Instance); 561 562 562 563 } else { … … 596 597 startbrowserdir[0] = '\0'; 597 598 netscapeneeded = 0; 598 int err = tryinitnetwork (Instance, MsgWindow, NULL);599 //int err = 1;599 // int err = tryinitnetwork (Instance, MsgWindow, NULL); 600 int err = 1; 600 601 601 602 // if an error occurred, try again with billsock … … 802 803 803 804 804 int __stdcall WinMain(H ANDLE Instance, HANDLE /*PrevInstance*/, LPSTR CmdLineStr, int /*CmdShow*/) {805 int __stdcall WinMain(HINSTANCE Instance, HINSTANCE /*PrevInstance*/, LPSTR CmdLineStr, int /*CmdShow*/) { 805 806 HWND MainWindow; MSG Message; WNDCLASS MainClass; 806 807 … … 813 814 MainClass.hIcon = LoadIcon(Instance, MAKEINTRESOURCE(TRAY_ICON)); 814 815 MainClass.hCursor = LoadCursor(NULL, IDC_ARROW); 815 MainClass.hbrBackground = GetStockObject(WHITE_BRUSH);816 MainClass.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH); 816 817 MainClass.lpszMenuName = MAKEINTRESOURCE(Main_Menu); 817 818 MainClass.lpszClassName = "Greenstone Digital Library Software"; -
branches/New_Config_Format-branch/gsdl/src/w32server/httpreq.cpp
r611 r1279 18 18 The author can be contacted via Email at [email protected] 19 19 */ 20 #include "text_t.h" 20 21 #include <windows.h> 21 22 #include <stdlib.h> -
branches/New_Config_Format-branch/gsdl/src/w32server/httpsrv.cpp
r902 r1279 18 18 The author can be contacted via Email at [email protected] 19 19 */ 20 #include "text_t.h" 20 21 #include <windows.h> 21 22 #include <stdlib.h> … … 29 30 #include "httpsrv.h" 30 31 #include "locate.h" 31 #include "settings.h"32 32 33 33 //Private Functions -
branches/New_Config_Format-branch/gsdl/src/w32server/locate.cpp
r1011 r1279 1 #include "text_t.h" 1 2 #include <windows.h> 2 3 #include <stdio.h> … … 115 116 scroll.bottom = text_rect.bottom; 116 117 ScrollDC(dc,0,-nbits,&scroll,&clip,NULL,&update); 117 FillRect(dc,&update, GetStockObject(WHITE_BRUSH));118 FillRect(dc,&update, (HBRUSH)GetStockObject(WHITE_BRUSH)); 118 119 } 119 120 -
branches/New_Config_Format-branch/gsdl/src/w32server/netio.cpp
r611 r1279 103 103 } 104 104 105 char *GetLocalName(H ANDLE hInstance) {105 char *GetLocalName(HINSTANCE hInstance) { 106 106 // static in case it is written to after the function has finished 107 107 // (I did not error checking on WSACancelAsyncRequest) -
branches/New_Config_Format-branch/gsdl/src/w32server/netio.h
r611 r1279 84 84 Returns: A string containing the local address 85 85 */ 86 char *GetLocalName(H ANDLE hInstance);86 char *GetLocalName(HINSTANCE hInstance); 87 87 88 88 // returns 0 on success, and a WSA error message on failure. -
branches/New_Config_Format-branch/gsdl/src/w32server/settings.cpp
r902 r1279 1 #include "text_t.h" 2 1 3 #if defined(GSDL_USE_OBJECTSPACE) 2 4 # include <ospace\std\fstream> … … 367 369 text_t key, value, section; 368 370 char *cstr_value; 371 #if defined (GSDL_USE_IOS_H) 369 372 ifstream conf (conffile, ios::nocreate); 373 #else 374 ifstream conf (conffile); 375 #endif 370 376 if (conf) { 371 377 while (read_ini_line(conf, key, value) >= 0) { -
branches/New_Config_Format-branch/gsdl/src/w32server/settings.h
r902 r1279 12 12 #include "text_t.h" 13 13 #include "cfgread.h" 14 #include "receptionist.h" 14 15 15 16 // library settings
Note:
See TracChangeset
for help on using the changeset viewer.