Changeset 11070 for trunk/gsdl/perllib/Kea.pm
- Timestamp:
- 2006-01-19T16:48:58+13:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/Kea.pm
r11069 r11070 1 1 package Kea; 2 3 use strict; 2 4 3 5 # This function is called by BasPlug.pm when a flag in a collection … … 21 23 22 24 # returns a string containing comma-separated keyphrases 23 sub extract_KeyPhrases { 25 sub extract_KeyPhrases 26 { 27 my $kea_version = shift(@_); 28 my $doc = shift(@_); # Document's text 29 my $args = shift(@_); # Options 24 30 25 # Parsing arguments of the function 26 my $kea_version = shift(@_); 27 my $doc = shift(@_); # documents text 28 my $args = shift(@_); # any options 29 my @optionlist = split(/\s+/, $args) if (defined($args)); #list of options 30 31 # Specifying directory names 32 my $keahome = &get_Kea_directory($kea_version); 33 my $defaultmodel = &util::filename_cat($keahome, "CSTR-20"); 31 # Set default models 32 my $kea_home = &get_Kea_directory($kea_version); 33 my $default_model_path = &util::filename_cat($kea_home, "CSTR-20"); 34 34 if ($kea_version eq "4.0") { 35 # Use a different model for Kea 4.036 $default model = &util::filename_cat($keahome, "FAO-20docs");35 # Use a different default model for Kea 4.0 36 $default_model_path = &util::filename_cat($kea_home, "FAO-20docs"); 37 37 } 38 38 39 # Initializing variables: 40 my $command = ""; 41 my @keylist; 42 my @options = (); 43 $modelspec = 0; 44 45 # Settings for the java executable: 46 47 # CLASSPATH: 48 $java_classpath = ".:$keahome"; 49 50 # See if java executable is on path 51 my $java_exec=""; 52 if (system("which java >/dev/null 2>/dev/null")==0) { 53 $java_exec=`which java`; 54 chomp $java_exec; 55 } else { 56 $java_exec="$java_home/bin/java"; 57 } 58 59 # The actual java command is based on these other variables: 60 $java_command = "$java_exec -classpath \"$java_classpath\""; 61 62 # end of java settings 63 64 # Parsing options for keyphrase extraction: 65 if (@optionlist) { 66 foreach $element (@optionlist){ #for each option 67 if (length($element) == 1) { 68 push(@options, "-$element"); 69 } else { 70 $option = substr($element, 0, 1); 71 $value = substr($element,1); 72 if (($option eq "m") && (-e "$keahome/$value")) { 73 $modelspec = 1; 74 push(@options, "-$option $keahome/$value"); 75 } elsif ($option eq "m") { 76 $modelspec = 1; 77 print STDERR "Couldn't find model $value. Using the default model instead\n"; 78 push(@options, "-$option $defaultmodel"); 79 } else { 80 push(@options, "-$option $value"); 39 # Parse the Kea options 40 my $options_string; 41 my @args_list = split(/\s+/, $args) if (defined($args)); 42 if (@args_list) { 43 my $model_specified = 0; 44 foreach my $arg (@args_list) { 45 if (length($arg) == 1) { 46 $options_string .= " -$arg"; 47 } 48 else { 49 my $option = substr($arg, 0, 1); 50 my $value = substr($arg, 1); 51 if ($option eq "m") { 52 my $model_path = &util::filename_cat($kea_home, $value); 53 if (-e $model_path) { 54 $options_string .= " -m $model_path"; 55 } 56 else { 57 print STDERR "Warning: Couldn't find model $model_path; using the default model instead.\n"; 58 $options_string .= " -m $default_model_path"; 59 } 60 $model_specified = 1; 81 61 } 82 62 else { 63 $options_string .= " -$option $value"; 64 } 83 65 } 84 66 } 85 # if none of the option specifies the model, set the default one: 86 if ($modelspec != 1) { 87 push(@options, "-m $defaultmodel"); 67 68 # If none of the option specifies the model, use the default one 69 if ($model_specified != 1) { 70 $options_string .= " -m $default_model_path"; 88 71 } 89 $options = join(" ",@options); 90 # print STDERR "OPTIONS: $options\n"; 91 } else { 92 # If no options were specified: Set default value for the model 93 $options = "-m $defaultmodel"; 72 } 73 else { 74 # If no options were specified, use the default model 75 $options_string = "-m $default_model_path"; 94 76 } 95 77 … … 100 82 $doc =~ tr/\n/\n/s; 101 83 102 # Write text to a temporary file doc.txt 103 my $gsdlhome = $ENV{'GSDLHOME'}; 104 open(OUT, ">$gsdlhome/tmp/doc.txt") or die "In Kea.pm doc.txt could not be created\n"; 105 print OUT $doc; 106 close(OUT); 84 # Write text to a temporary file doc.txt 85 my $tmp_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "tmp"); 86 my $doc_txt_file_path = &util::filename_cat($tmp_directory_path, "doc.txt"); 87 open(DOC_TXT, ">$doc_txt_file_path") or die "Error: Could not write $doc_txt_file_path in Kea.pm.\n"; 88 print DOC_TXT $doc; 89 close(DOC_TXT); 107 90 91 # Run Kea with the specified options 92 system("java -classpath \"$kea_home\" KEAKeyphraseExtractor -l $tmp_directory_path $options_string"); 108 93 109 # EXECUTE KEA with specific options: 110 $command = "$java_command KEAKeyphraseExtractor -l $gsdlhome/tmp $options"; 111 system ("$command"); 94 # Read the resulting doc.key file which contains the keyphrases 95 my $doc_key_file_path = &util::filename_cat($tmp_directory_path, "doc.key"); 96 if (!open(IN, "<$doc_key_file_path")) { 97 # The doc.key file does not exist (either an option was wrongly specified, or no keyphrases were found) 98 return ""; 99 } 112 100 113 # Read the resulting doc.key, which contains keyphrases: 114 115 open(IN, "<$gsdlhome/tmp/doc.key") or return ""; 116 #this means doc.key does not exist 117 #either because an option was wrongly specified 118 #or no keyphrases were found 119 while(<IN>){ 101 my @keyphrase_list = (); 102 while (<IN>) { 120 103 chomp; 121 push(@key list,$_);104 push(@keyphrase_list, $_); 122 105 } 123 106 close(IN); 124 107 125 $keylist = join(", ", @keylist); 108 # Delete doc.key so that in future it will not be opened and read (otherwise KEA sees it as more keyphrases!) 109 unlink($doc_key_file_path); 126 110 127 # Delete doc.key so that in future it will not be opened and read. 128 # Otherwise KEA sees it as more keyphrases! 129 130 unlink("$gsdlhome/tmp/doc.key"); 131 132 return $keylist; 111 my $keyphrases = join(", ", @keyphrase_list); 112 return $keyphrases; 133 113 } 134 114
Note:
See TracChangeset
for help on using the changeset viewer.