Changeset 1565


Ignore:
Timestamp:
2000-09-26T14:21:26+12:00 (24 years ago)
Author:
paynter
Message:

Many changes to make the script suitable for uzse as a CGI script.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/phind/host/phind-host.pl

    r1564 r1565  
    44# Copyright 2000 Gordon Paynter ([email protected])
    55
    6 BEGIN {
    7     die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    8     unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    9 }
    10 
    11 require("open2.pl");
    12 
     6use IPC::Open2;
     7
     8# Set autofulsh (output on newline).
     9# If you don't then the content-type line gets stuck in the STDOUT
     10# buffer and written a second time when you call open2 (it uses fork).
     11$|=1;
    1312
    1413# Output header
     
    1716# Decode arguments
    1817
    19 my $options;
    20 my $action;
    21 my $collection;
    22 my $phrase;
     18my $options = "";
     19my $action = "";
     20my $collection = "";
     21my $phrase = "";
     22my $word = "";
     23
     24my $documentlimit = 10;
     25my $expansionlimit = 10;
    2326
    2427if ($ARGV[0]) { $options = $ARGV[0]; }
     
    3235        elsif ($name =~ /^c/i) { $collection = $value; }
    3336        elsif ($name =~ /^p/i) { $phrase = $value; }
    34     }
    35 }
    36 
    37 
    38 `killall host`;
     37        elsif ($name =~ /^w/i) { $word = $value; }
     38        elsif ($name =~ /^d/i) { $documentlimit = $value; }
     39        elsif ($name =~ /^e/i) { $expansionlimit = $value; }
     40    }
     41}
     42
     43
    3944
    4045&help if ($action =~ /help/i);
    41 &help unless ($phrase && $collection);
     46&help unless ($collection);
     47&help unless ($word || $phrase);
    4248
    4349my $gsdlhome = "/research/gsdl";
     
    4753
    4854
    49 
     55# Suggest a useful phrase in the search form
     56my $suggestion = "$collection";
     57$suggestion =~ s/\-.*//;
     58
     59# Print lists in alternaing colours
     60my $colour = "#EEFFFF";
     61my %other;
     62$other{"#EEFFFF"} = "#FFFFDD";
     63$other{"#FFFFDD"} = "#EEFFFF";
     64
     65my $ecolor = "#FFEEEE";
     66
     67$phrase = &getphrasenumber($word) if ($word);
    5068&query($phrase);
     69
     70
    5171
    5272
     
    6080    # Perform the query
    6181    print W "$phrase\n";
    62 
    6382    my $result = <R>;
    6483   
    65     print "<head><title>phind $phrase</title></head>
    66 <body><pre>$result</pre></body></html>";
     84    my @result = split(/:/, $result);
     85    shift @result;
     86
     87    my $base = shift @result;
     88    my $tf = shift @result;
     89    my $ef  = shift @result;
     90    my $exps = shift @result;
     91    my $df = shift @result;
     92    my $docs = shift @result;
     93
     94    $suggestion = $base;
     95    $suggestion =~ s/ .*//;
     96   
     97    print "<html><head><title>$base</title></head>\n<body bgcolor='#FFFFFF'>\n";
     98
     99    # Start printing the outer table
     100    print "<center><h1>$base</h1></center>\n<p><table align=center border = 0>
     101<tr align=center><td>$base occurs $tf times in $df documents.</td></tr>
     102<tr align=center><td>\n";
     103    &printform;
     104
     105    # Print the shorter terms, if avaibale
     106    if ($base =~ / /) {
     107    my @words = split(/ +/, $base);
     108    print "<p>Find: ";
     109
     110    my $n = 0;
     111    foreach my $w (@words) {
     112        if ($n++) { print " or "; }
     113        print " <a href='phind?c=$collection&w=$w'>$w</a>";
     114    }
     115    }
     116    print "</td></tr>\n";
     117
     118    # Print the expansions
     119    if ($ef) {
     120
     121    my @exps = split(/,/, $exps);
     122    if ($expansionlimit eq "all") {
     123        $expansionlimit = $ef;
     124    }
     125
     126    print "<tr><td>
     127<table align=center border=0>
     128<tr align=center><th colspan = 3>";
     129    if ($expansionlimit >= $ef) {
     130        print "Expansions ($ef)";
     131    } else {
     132        print "Expansions ($expansionlimit of $ef)";
     133        splice(@exps, $expansionlimit);
     134    }
     135    print "</th><th>freq</th><th>docs</th></tr>\n";
     136
     137    foreach my $e (@exps) {
     138        print &phraserow($e,$base) . "\n";
     139    }
     140
     141    if ($expansionlimit < $ef) {
     142        print "<tr align=center bgcolor=$ecolor><td colspan = 5>
     143<table><tr><td align=center>
     144<a href='phind?c=$collection&p=$phrase&d=$documentlimit&e=", $expansionlimit + 10, "'>Get more phrases</a></td><td align=center>
     145<a href='phind?c=$collection&p=$phrase&d=$documentlimit&e=$ef'>Get every phrase</a></td></tr></table>
     146</td></tr>\n";
     147    }
     148
     149    print "</table></tr></td>\n";
     150
     151    } else {
     152    print "<tr align=center><th>There are no expansions of this phrase.</th></tr>\n";
     153    }
     154
     155    # Print the documents
     156    my @docs = split(/[;\n]/, $docs);
     157    if ($documentlimit eq "all") {
     158    $documentlimit = $df;
     159    }
     160   
     161    print "<tr><td>
     162<table align=center border=0>
     163<tr align=center><th>";
     164    if ($documentlimit >= $df) {
     165    print "Documents ($df)";
     166    } else {
     167    print "Documents ($documentlimit of $df)";
     168    splice(@docs, $documentlimit);
     169    }
     170    print "</th><th>freq</th></tr>\n";
     171   
     172    open(DOCS, "<$phindexdir/mg-d.txt");
     173    my ($d, $f, $num, $line, $h, $t);
     174    $num = 0;
     175    foreach $d (@docs) {
     176
     177    # get document number and frequency
     178    $f = 1;
     179    if ($d =~ /,/) {
     180        ($d, $f) = $d =~ /(\d+),(\d+)/
     181    }
     182
     183    # read the document file up to this document
     184    while ($num < $d) {
     185        $line = <DOCS>;
     186#       print "# $line\n";
     187        $num++;
     188    }
     189
     190    # get document hash and title
     191    ($h, $t) = $line =~ /^(.*)\t(.*)$/;
     192
     193    # print the information
     194    $colour = $other{$colour};
     195    print "<tr align=center bgcolor='$colour'><td align=left><a href='library?a=d&c=$collection&d=HASH$h'>$t</a></td><td>$f</td></tr>\n";
     196    }
     197
     198    if ($documentlimit < $df) {
     199    print "<tr align=center bgcolor=$ecolor><td colspan = 2>
     200<table><tr><td align=center>
     201<a href='phind?c=$collection&p=$phrase&d=", $documentlimit + 10, "&e=$expansionlimit'>Get more documents</a>
     202</td><td align=center>
     203<a href='phind?c=$collection&p=$phrase&d=$df&e=$expansionlimit'>Get every document</a>
     204</td></tr></table>";
     205    }
     206 
     207    print "</table></td></tr>
     208</table></body></html>
     209";
     210
     211    # close the host process
     212    print W "0\n";
     213
     214}
     215
     216
     217sub printform {
     218
     219    my @collects = ("aircraft", "folktale", "forestry", "forestry-sw", "rutgers", "rutgers-sw", "acrodemo", "acrostop");
     220
     221    print"<form method='GET' action='phind'>
     222Find <input name='w' size='20' maxlength='200' value='$suggestion'>
     223in <select name=c>
     224";
     225    foreach my $c (@collects) {
     226    print "<option value='$c'";
     227    print " selected" if ($c eq $collection);
     228    print ">$c</option>\n";
     229    }
     230    print "</select><input type='submit' value='Submit'></form>";
     231}
     232
     233sub getphrasenumber {
     234    my ($word) = @_;
     235
     236    $phrase = `grep -in ":$word:" $phindexdir/mg-p.txt`;
     237    $phrase =~ s/:.*//;
     238   
     239    &help_word_not_found if ($phrase !~ /\d+/);
     240    return $phrase;
     241}
     242
     243
     244sub phraserow {
     245    my ($number, $centre) = @_;
     246
     247    print W "$number\n";
     248    my $result = <R>;
     249
     250    my ($text, $tf, $df) = $result =~ /^<Document>\d+:(.*):(\d+):.*:.*:(\d+):.*$/;
     251    my ($l, $r) = $text =~ /(.*)$centre(.*)/;
     252   
     253    $colour = $other{$colour};
     254    my $row = "<tr align=center valign=top bgcolor='$colour'>";
     255    if ($l =~ /./) {
     256    $l =~ s/ *$//;
     257    $row .= "<td align=right><a href='phind?c=$collection&p=$number'>$l</a></td>";
     258    } else {
     259    $row .= "<td>&nbsp;</td>";
     260    }
     261    $row .= "<td><a href='phind?c=$collection&p=$number'>$centre</a></td>";
     262    if ($r =~ /./) {
     263    $r =~ s/^ *//;
     264    $row .= "<td align=left><a href='phind?c=$collection&p=$number'>$r</a></td>";
     265    } else {
     266    $row .= "<td>&nbsp;</td>";
     267    }
     268    $row .= "<td>$tf</td><td>$df</td></tr>";
     269
     270    return $row;
    67271}
    68272
    69273sub help {
    70     print "<head><title>phind help</title></head><body>
    71 <h1>phind help</h1>
    72 <p>help!
    73 </body></html>";
     274    print "<html><head><title>phind</title></head>
     275<body bgcolor='#FFFFFF'><center><h1>phind</h1><p>
     276";
     277    &printform;
     278    print"</center></body></html>";
    74279    exit(0);
    75280}
    76281
    77 
    78 
     282sub help_word_not_found {
     283    print "<html><head><title>phind - word not found</title></head>
     284<body bgcolor='#FFFFFF'><center><h1>phind - word not found</h1>
     285<p> The word &quot;$word&quot; was not found in collection &quot;$collection&quot;
     286<p>";
     287    &printform;
     288    print"</center></body></html>";
     289    exit(0);
     290}
     291
Note: See TracChangeset for help on using the changeset viewer.