Changeset 15895
- Timestamp:
- 2008-06-06T11:19:09+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/DateExtract.pm
r15894 r15895 3 3 ##use BasPlug; ## no, DON'T use BasPlug, BasPlug uses us.... 4 4 use sorttools; 5 use strict; 5 6 use util; 6 7 … … 71 72 #get the text of the document, the "document object" concerned, 72 73 #and the current section within the document 73 local($text, $doc, $cursection, $keep_bib, $max_year, $max_century) = @_;74 my ($text, $doc, $cursection, $keep_bib, $max_year, $max_century) = @_; 74 75 75 76 #format a prechristian maximum century value to be negative so that it can … … 83 84 } 84 85 85 local$extr = &remove_excess($text);86 my $extr = &remove_excess($text); 86 87 #print "EXTRACTION TEXT:\n $extr"; 87 88 $extr = &remove_tags($extr); … … 91 92 92 93 93 local@datelist = ();94 my @datelist = (); 94 95 while($extr =~ m!($range)|($millenium)|($qualified)|($centurydate)|($tri_digit)!i) 95 96 { … … 101 102 { 102 103 103 local$date = $fulldate; if($date =~ /\d+/) {$date = $&;}104 my $date = $fulldate; if($date =~ /\d+/) {$date = $&;} 104 105 else 105 106 { … … 112 113 #if it BC, make it negative 113 114 $date = &convert_bc($fulldate,$date); 114 $end = $date + 99;115 @century = ($date..$end);115 my $end = $date + 99; 116 my @century = ($date..$end); 116 117 @datelist = (@datelist,@century); 117 118 } … … 124 125 my @addlist = (); 125 126 #print "Range: $fulldate\n"; 126 $fullfirst = $`; 127 $fullsecond = $'; 128 $fullfirst =~ /\d+/; $first = $&; 129 $fullsecond =~ /\d+/; $second = $&; 130 $len1 = length($first); 131 $len2 = length($second); 127 my $fullfirst = $`; 128 my $fullsecond = $'; 129 $fullfirst =~ /\d+/; 130 my $first = $&; 131 $fullsecond =~ /\d+/; 132 my $second = $&; 133 my $len1 = length($first); 134 my $len2 = length($second); 132 135 $second = (substr($first,0,($len1-$len2))).$second; 133 136 $first = &convert_bc($fullfirst,$first); … … 151 154 @datelist = sort { $a <=> $b } @datelist; 152 155 @datelist = &post_process($max_year, @datelist); 153 foreach $date (@datelist)156 foreach my $date (@datelist) 154 157 { 155 158 if($date>0){ … … 162 165 } 163 166 sub convert_bc { 164 local($full,$num) = @_;167 my ($full,$num) = @_; 165 168 if ($full =~ /B/) { $num *= -1; } 166 169 $num; … … 168 171 169 172 sub post_process { 170 local($max_year, @list) = @_;171 @cleanlist = ();172 $prev = 0;173 foreach $e (@list) {173 my ($max_year, @list) = @_; 174 my @cleanlist = (); 175 my $prev = 0; 176 foreach my $e (@list) { 174 177 if ($e!=$prev && $e <= $max_year) { 175 178 push(@cleanlist, $e); … … 185 188 #contain date lookalikes 186 189 sub remove_tags { 187 local($tmp) = @_;190 my ($tmp) = @_; 188 191 189 local$parsed = "";192 my $parsed = ""; 190 193 #while there is still text to be parsed and tags are still found 191 194 while($tmp=~ m!<([^>])*(>|$)! && $tmp ne "") … … 200 203 201 204 sub remove_excess { 202 local($tmp) = @_;203 local$parsed = "";205 my ($tmp) = @_; 206 my $parsed = ""; 204 207 205 208 … … 213 216 { 214 217 $parsed .= $`; 215 $storage = $&;218 my $storage = $&; 216 219 $tmp = $'; 217 220 #match the pattern which indicates most recent alteration … … 235 238 236 239 sub remove_biblio{ 237 local($tmp) = @_;238 local$parsed = "";240 my ($tmp) = @_; 241 my $parsed = ""; 239 242 240 243 if($tmp =~ m!$bibheader!i) … … 259 262 { 260 263 261 local$date = $&;264 my $date = $&; 262 265 if($parsed =~ m!((\d)($Ord)$)|(($shortmth)$)|(($longmth)$)!i) 263 266 {
Note:
See TracChangeset
for help on using the changeset viewer.