source: gs3-extensions/html-to-expeditee/trunk/src/perllib/CssStyleToExpAttr.pm@ 26728

Last change on this file since 26728 was 26728, checked in by davidb, 11 years ago

Can now successfully obtain font size, font weight, font colour and font family information about each piece of text on a web page and convert to a corresponding text item on an Expeditee frame. Still need to account for text nodes with parents such as bold elements or heading elements.

File size: 11.6 KB
RevLine 
[24941]1###########################################################################
2#
3# CssStyleToExpAttr.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package CssStyleToExpAttr;
27
28my $font_generic_family_lookup = {
29 'serif' => "t", # (e.g., Times)
30 'sans-serif' => "s", # (e.g., Helvetica)
31 'cursive', => "s", # (e.g., Zapf-Chancery)
32 'fantasy' => "s", # (e.g., Western)
33 'monospace' => "m" # (e.g., Courier)
34};
35
36
37my $font_specific_family_lookup = {
38 "arial" => "sans-serif",
39 "arial black" => "sans-serif",
40 "gadget" => "sans-serif",
41 "comic sans ms" => "cursive",
42 "comic sans" => "cursive",
43 "courier" => "monospace",
44 "courier new" => "monospace",
45 "georgia" => "serif",
46 "impact" => "sans-serif",
47 "charcoal" => "sans-serif",
48 "lucida console" => "monospace",
49 "monaco" => "monospace",
50 "lucida sans unicode" => "sans-serif",
51 "lucida grande" => "sans-serif",
52 "palatino linotype" => "serif",
53 "book antiqua" => "serif",
54 "palatino" => "serif",
55 "tahoma" => "sans-serif",
56 "geneva" => "sans-serif",
[26727]57 "georgia" => "serif",
[24941]58 "times" => "serif",
59 "times new roman" => "serif",
60 "trebuchet ms" => "sans-serif",
61 "trebuchet" => "sans-serif",
62 "verdana" => "sans-serif",
63 "geneva" => "sans-serif",
64 #symbol => "symbol",
65 #webdings => "symbol",
66 #wingdings => "symbol",
67 #zapf dingbats => "symbol",
68 "ms sans serif" => "sans-serif",
69 "sans serif => sans-serif,
70 geneva" => "sans-serif",
71 "ms serif" => "serif",
72 "new york" => "serif"
73};
74
75my $UNUSED_font_specific_family_mapping = {
76 'serif' => [
77 "Times New Roman",
78 "Bodoni",
79 "Garamond",
80 "Minion Web",
81 "ITC Stone Serif",
82 "MS Georgia",
83 "Bitstream Cyberbit",
84 "Adobe Minion Cyrillic",
85 "Excelsior Cyrillic Upright",
86 "Monotype Albion 70",
87 "ER Bukinist",
88 "New Peninim",
89 "Raanana",
90 "Ryumin Light-KL",
91 "Kyokasho ICA",
92 "Futo Min A101",
93 "Lo Cicero Cherokee"
94 ],
95
96 'sans-serif' => [
97 "MS Trebuchet",
98 "ITC Avant Garde Gothic",
99 "MS Arial",
100 "MS Verdana",
101 "Univers",
102 "Futura",
103 "ITC Stone Sans",
104 "Gill Sans",
105 "Akzidenz Grotesk",
106 "Helvetica",
107 "Attika",
108 "Typiko New Era",
109 "MS Tahoma",
110 "Monotype Gill Sans 571",
111 "Helvetica Greek",
112 "Helvetica Cyrillic",
113 "ER Univers",
114 "Lucida Sans Unicode",
115 "Bastion",
116 "Arial Hebrew",
117 "Shin Go",
118 "Heisei Kaku Gothic W5"
119 ],
120 'cursive' => [
121 "Caflisch Script",
122 "Adobe Poetica",
123 "Sanvito",
124 "Ex Ponto",
125 "Snell Roundhand",
126 "Zapf-Chancery",
127 "ER Architekt",
128 "Corsiva",
129 "DecoType Naskh",
130 "Monotype Urdu 507"
131 ],
132
133 'fantasy' => [
134 "Alpha Geometrique",
135 "Critter",
136 "Cottonwood",
137 "FB Reactor",
138 "Studz"
139 ],
140
141 'monospace' => [
142 "Courier",
143 "MS Courier New",
144 "Prestige",
145 "Everson Mono",
146 "ER Kurier",
147 "Osaka Monospaced",
148 ]
149};
150
151
152
153my $font_style_lookup = {
154 'normal' => "n",
155 'italic' => "i",
156 'oblique' => 'i'
157};
158
159# Nothing that can currently be done for small-caps
160my $font_varient_lookup = {
161 'normal' => "",
162 'small-caps' => ""
163};
164
165
166my $font_weight_lookup = {
167 'normal' => "n",
168 'bold' => "n",
169 'bolder' => "b", # filler for now
170 'lighter' => "n", # filler for now
171 '100' => "n",
172 '200' => "n",
173 '300' => "b",
174 '400' => "b",
175 '500' => "b",
176 '600' => "b",
177 '700' => "b",
178 '800' => "b",
179 '900' => "b"
180};
181
182
183my $font_size_absolute = {
184 'xx-small' => 8,
185 'x-small' =>10,
186 'small' =>12,
187 'medium' =>14,
188 'large' =>16,
189 'x-large' =>18,
190 'xx-large' =>20
191};
192
193# px, em, percentage, and relative, such as larger
194
195## color, background-color
196
197my $color_name_lookup = {
198 "aliceblue" => [240,248,255],
199 "antiquewhite" => [250,235,215],
200 "aqua" => [0,255,255],
201 "aquamarine" => [127,255,212],
202 "azure" => [240,255,255],
203 "beige" => [245,245,220],
204 "bisque" => [255,228,196],
205 "black" => [0,0,0],
206 "blanchedalmond" => [255,235,205],
207 "blue" => [0,0,255],
208 "blueviolet" => [138,43,226],
209 "brown" => [165,42,42],
210 "burlywood" => [222,184,135],
211 "cadetblue" => [95,158,160],
212 "chartreuse" => [127,255,0],
213 "chocolate" => [210,105,30],
214 "coral" => [255,127,80],
215 "cornflowerblue" => [100,149,237],
216 "cornsilk" => [255,248,220],
217 "crimson" => [220,20,60],
218 "cyan" => [0,255,255],
219 "darkblue" => [0,0,139],
220 "darkcyan" => [0,139,139],
221 "darkgoldenrod" => [184,134,11],
222 "darkgray" => [169,169,169],
223 "darkgreen" => [0,100,0],
224 "darkgrey" => [169,169,169],
225 "darkkhaki" => [189,183,107],
226 "darkmagenta" => [139,0,139],
227 "darkolivegreen" => [85,107,47],
228 "darkorange" => [255,140,0],
229 "darkorchid" => [153,50,204],
230 "darkred" => [139,0,0],
231 "darksalmon" => [233,150,122],
232 "darkseagreen" => [143,188,143],
233 "darkslateblue" => [72,61,139],
234 "darkslategray" => [47,79,79],
235 "darkslategrey" => [47,79,79],
236 "darkturquoise" => [0,206,209],
237 "darkviolet" => [148,0,211],
238 "deeppink" => [255,20,147],
239 "deepskyblue" => [0,191,255],
240 "dimgray" => [105,105,105],
241 "dimgrey" => [105,105,105],
242 "dodgerblue" => [30,144,255],
243 "firebrick" => [178,34,34],
244 "floralwhite" => [255,250,240],
245 "forestgreen" => [34,139,34],
246 "fuchsia" => [255,0,255],
247 "gainsboro" => [220,220,220],
248 "ghostwhite" => [248,248,255],
249 "gold" => [255,215,0],
250 "goldenrod" => [218,165,32],
251 "gray" => [128,128,128],
252 "green" => [0,128,0],
253 "greenyellow" => [173,255,47],
254 "grey" => [128,128,128],
255 "honeydew" => [240,255,240],
256 "hotpink" => [255,105,180],
257 "indianred" => [205,92,92],
258 "indigo" => [75,0,130],
259 "ivory" => [255,255,240],
260 "khaki" => [240,230,140],
261 "lavender" => [230,230,250],
262 "lavenderblush" => [255,240,245],
263 "lawngreen" => [124,252,0],
264 "lemonchiffon" => [255,250,205],
265 "lightblue" => [173,216,230],
266 "lightcoral" => [240,128,128],
267 "lightcyan" => [224,255,255],
268 "lightgoldenrodyellow" => [250,250,210],
269 "lightgray" => [211,211,211],
270 "lightgreen" => [144,238,144],
271 "lightgrey" => [211,211,211],
272 "lightpink" => [255,182,193],
273 "lightsalmon" => [255,160,122],
274 "lightseagreen" => [32,178,170],
275 "lightskyblue" => [135,206,250],
276 "lightslategray" => [119,136,153],
277 "lightslategrey" => [119,136,153],
278 "lightsteelblue" => [176,196,222],
279 "lightyellow" => [255,255,224],
280 "lime" => [0,255,0],
281 "limegreen" => [50,205,50],
282 "linen" => [250,240,230],
283 "magenta" => [255,0,255],
284 "maroon" => [128,0,0],
285 "mediumaquamarine" => [102,205,170],
286 "mediumblue" => [0,0,205],
287 "mediumorchid" => [186,85,211],
288 "mediumpurple" => [147,112,219],
289 "mediumseagreen" => [60,179,113],
290 "mediumslateblue" => [123,104,238],
291 "mediumspringgreen" => [0,250,154],
292 "mediumturquoise" => [72,209,204],
293 "mediumvioletred" => [199,21,133],
294 "midnightblue" => [25,25,112],
295 "mintcream" => [245,255,250],
296 "mistyrose" => [255,228,225],
297 "moccasin" => [255,228,181],
298 "navajowhite" => [255,222,173],
299 "navy" => [0,0,128],
300 "oldlace" => [253,245,230],
301 "olive" => [128,128,0],
302 "olivedrab" => [107,142,35],
303 "orange" => [255,165,0],
304 "orangered" => [255,69,0],
305 "orchid" => [218,112,214],
306 "palegoldenrod" => [238,232,170],
307 "palegreen" => [152,251,152],
308 "paleturquoise" => [175,238,238],
309 "palevioletred" => [219,112,147],
310 "papayawhip" => [255,239,213],
311 "peachpuff" => [255,218,185],
312 "peru" => [205,133,63],
313 "pink" => [255,192,203],
314 "plum" => [221,160,221],
315 "powderblue" => [176,224,230],
316 "purple" => [128,0,128],
317 "red" => [255,0,0],
318 "rosybrown" => [188,143,143],
319 "royalblue" => [65,105,225],
320 "saddlebrown" => [139,69,19],
321 "salmon" => [250,128,114],
322 "sandybrown" => [244,164,96],
323 "seagreen" => [46,139,87],
324 "seashell" => [255,245,238],
325 "sienna" => [160,82,45],
326 "silver" => [192,192,192],
327 "skyblue" => [135,206,235],
328 "slateblue" => [106,90,205],
329 "slategray" => [112,128,144],
330 "slategrey" => [112,128,144],
331 "snow" => [255,250,250],
332 "springgreen" => [0,255,127],
333 "steelblue" => [70,130,180],
334 "tan" => [210,180,140],
335 "teal" => [0,128,128],
336 "thistle" => [216,191,216],
337 "tomato" => [255,99,71],
338 "turquoise" => [64,224,208],
339 "violet" => [238,130,238],
340 "wheat" => [245,222,179],
341 "white" => [255,255,255],
342 "whitesmoke" => [245,245,245],
343 "yellow" => [255,255,0],
344 "yellowgreen" => [154,205,50 ]
345};
346
347
348sub convert_font_size
349{
350 my ($css_font_size) = @_;
351
352 my $exp_font_size = undef;
353
[26727]354
[26728]355 if ($css_font_size =~ m/^(\d+(?:\.\d*))px$/) {
356 $exp_font_size = int($1);
357 }
[26727]358
[26728]359 return $exp_font_size;
360}
361
362sub convert_font_face
363{
364 my ($css_font_face) = @_;
365 my $exp_font_face = undef;
[26727]366
[26728]367 if(lc($css_font_face) eq "bold"){
368 $exp_font_face = "b";
369 }else{ #for now - also need to take into account italic and italic bold.
370 $exp_font_face = "r";
[26727]371 }
[24941]372
[26728]373 return $exp_font_face;
[24941]374}
[24944]375
376
377sub convert_color
378{
379 my ($css_color) = @_;
380
381 my $exp_color;
382
383 if ($css_color =~ m/^rgb\((\d+),\s*(\d+),\s*(\d+)\)$/) {
384
385 my $r = int(100*$1/255);
386 my $g = int(100*$2/255);
387 my $b = int(100*$3/255);
388
389 print STDERR "*** r = $r, g = $g, b = $b\n";
390
391 $exp_color = "$r $g $b";
392
393 print STDERR "*** exp color = $exp_color\n";
394
395 }
396 elsif ($css_color =~ m/^transparent/i) {
397 $exp_color = undef;
398 }
399
400 return $exp_color;
401}
[26727]402
403sub convert_font_family
404{
405 my ($css_font_family) = @_;
406
407 print STDERR "*** PASSING PARAMETER css font family: ".$css_font_family."\n";
408
409 my $exp_font_family;
410
411 #use $font_specific_family_lookup and $font_generic_family_lookup
412 #to find the appropriate font family tag to output to expeditee.
413 if(defined $font_specific_family_lookup->{lc($css_font_family)}){
414 $exp_font_family = $css_font_family."_";
[26728]415 }else{
416
417 #to account for instances where a list of fonts is stored as the font family, e.g. "Arial,Segoe...Sans-Serif".
418 #just extract the first font name from this list.
419 my @font_family_vals = split(',',$css_font_family);
420
421 if(scalar(@font_family_vals) > 0){
422 $exp_font_family = $font_family_vals[0]."_";
423 }
424
[26727]425 }
426
427 return $exp_font_family;
428}
429
430sub convert_font_style
431{
432 my ($css_font_style) = @_;
433
434 my $exp_font_style;
435
436 return $exp_font_style;
437}
[24941]438
[26727]4391;
Note: See TracBrowser for help on using the repository browser.