Changeset 1954 for trunk/gsdl/perllib/DateExtract.pm
- Timestamp:
- 2001-02-13T10:58:26+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/DateExtract.pm
r1467 r1954 7 7 #75% of the instances of the word century use full name ordinals 8 8 my %ordinals = ("first" => 1, "second" => 2, "third" => 3, "fourth" => 4, 9 10 11 12 13 14 15 16 9 "fifth" => 5, "sixth" => 6, "seventh" => 7, "eighth" => 8, 10 "ninth" => 9, "tenth" => 10, "eleventh" => 11, "twelfth" => 12, 11 "thirteenth" => 13, "fourteenth" => 14, "fifteenth" => 15, 12 "sixteenth" => 16, "seventeenth" => 17, "eighteenth" => 18, 13 "nineteenth" => 19, "twentieth" => 20); 14 15 16 17 17 18 18 #definitions for a date grammar. … … 20 20 21 21 my @months = ("january","february","march","april","may","june","july", 22 22 "august","september","october","november","december"); 23 23 24 24 my $shortmth = ""; … … 77 77 if($max_century =~ /B/) 78 78 { 79 80 81 82 79 $max_century = $`; 80 $max_century =~ /\d+/; 81 $max_century = $&; 82 $max_century *=-1 83 83 } 84 84 … … 87 87 $extr = &remove_tags($extr); 88 88 if(!$keep_bib){ 89 89 $extr = &remove_biblio($extr); 90 90 } 91 91 … … 94 94 while($extr =~ m!($range)|($millenium)|($qualified)|($centurydate)|($tri_digit)!i) 95 95 { 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 96 $extr = $'; 97 my $fulldate = $&; 98 if ($fulldate =~ /$centurydate/i) 99 { 100 if($max_century!=-1) 101 { 102 103 local $date = $fulldate; if($date =~ /\d+/) {$date = $&;} 104 else 105 { 106 $date=$fulldate; $date =~ m! ($Century)!i; $date = $`; 107 $date =~ tr/A-Z/a-z/; 108 $date = $ordinals{$date}; 109 } 110 if($max_century >= $date){ 111 $date = ($date-1)*100 +1; 112 #if it BC, make it negative 113 $date = &convert_bc($fulldate,$date); 114 $end = $date + 99; 115 @century = ($date..$end); 116 @datelist = (@datelist,@century); 117 } 118 } 119 } 120 121 elsif($fulldate =~ /$range/) 122 { 123 $fulldate =~ /$sep/; 124 my @addlist = (); 125 #print "Range: $fulldate\n"; 126 $fullfirst = $`; 127 $fullsecond = $'; 128 $fullfirst =~ /\d+/; $first = $&; 129 $fullsecond =~ /\d+/; $second = $&; 130 $len1 = length($first); 131 $len2 = length($second); 132 $second = (substr($first,0,($len1-$len2))).$second; 133 $first = &convert_bc($fullfirst,$first); 134 $second = &convert_bc($fullsecond,$second); 135 @addlist = ($first..$second); 136 @datelist = (@datelist,@addlist); 137 138 } 139 else { 140 141 my $date = $fulldate; $date =~ /\d+/; $date = $&; 142 $date = &convert_bc($fulldate,$date); 143 #add the date metadata 144 push(@datelist,$date); 145 #print "datelist @datelist\n" 146 } 147 148 148 } 149 149 150 150 if(@datelist){ 151 152 153 154 155 156 157 158 159 160 151 @datelist = sort { $a <=> $b } @datelist; 152 @datelist = &post_process($max_year, @datelist); 153 foreach $date (@datelist) 154 { 155 if($date>0){ 156 $doc->add_metadata($cursection,"Coverage",$date);} 157 else{ 158 $doc->add_metadata($cursection,"Coverage","bc".(-1*$date));} 159 160 } 161 161 } 162 162 } … … 172 172 $prev = 0; 173 173 foreach $e (@list) { 174 175 176 177 174 if ($e!=$prev && $e <= $max_year) { 175 push(@cleanlist, $e); 176 } 177 $prev = $e; 178 178 } 179 179 @cleanlist; … … 191 191 while($tmp=~ m!<([^>])*(>|$)! && $tmp ne "") 192 192 { 193 194 193 $parsed .= $`;#keep all that is not in a tag 194 $tmp = $'; #restart the search after then end of the tag 195 195 } 196 196 $parsed .= $tmp; #add anything after the last match … … 206 206 if(($tmp =~ m!($spurious)|($lookalikes)!i) == 0 ) 207 207 { 208 208 $parsed = $tmp; 209 209 } 210 210 else { 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 211 while ($tmp =~ m!($spurious)|($lookalikes)!i 212 && $tmp ne "") 213 { 214 $parsed .= $`; 215 $storage = $&; 216 $tmp = $'; 217 #match the pattern which indicates most recent alteration 218 if ($storage =~ m!$lastaltered!i) 219 { 220 #match a four digit year or up until the first / 221 #(as in last edited 3/97). 222 $tmp =~ m!($millenium)|(\/)!; 223 $tmp = $'; 224 } 225 226 } 227 228 $parsed .= $tmp; 229 230 230 } 231 231 #print "Parsed:\n $parsed\n\n"; … … 240 240 if($tmp =~ m!$bibheader!i) 241 241 { 242 242 $tmp=$`; 243 243 } 244 244 … … 246 246 if(($tmp =~ m!($ref_end)|($bracket)|($colonsp)|($reprint)|($comma)|($fullstop)|($semi)|($seasonref) ($millenium)!i) == 0) 247 247 { 248 248 $parsed = $tmp; 249 249 } 250 250 else{ 251 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 252 #print "removing bib\n"; 253 while ($tmp =~ m!($ref_end)|($bracket)|($colonsp)|($reprint)|($comma)|($fullstop)|($semi)|(($seasonref) ($millenium))|($bibheader)!i && $tmp ne "") 254 { 255 256 $parsed .= $`; 257 $tmp = $'; 258 if($&=~m!($comma)|($fullstop)!) 259 { 260 261 local $date = $&; 262 if($parsed =~ m!((\d)($Ord)$)|(($shortmth)$)|(($longmth)$)!i) 263 { 264 $parsed .= $date; 265 } 266 } 267 268 } 269 $parsed .= $tmp; 270 270 } 271 271 $parsed; … … 292 292 293 293 294 295 296 297 298 299 300 301
Note:
See TracChangeset
for help on using the changeset viewer.