root/gs3-extensions/html-to-expeditee/trunk/src/perllib/CssStyleToExpAttr.pm @ 26728

Revision 26728, 11.6 KB (checked in by davidb, 7 years ago)

Can now successfully obtain font size, font weight, font colour and font family information about each piece of text on a web page and convert to a corresponding text item on an Expeditee frame. Still need to account for text nodes with parents such as bold elements or heading elements.

Line 
1###########################################################################
2#
3# CssStyleToExpAttr.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package CssStyleToExpAttr;
27
28my $font_generic_family_lookup = {
29    'serif'      => "t", # (e.g., Times)
30    'sans-serif' => "s", # (e.g., Helvetica)
31    'cursive',   => "s", # (e.g., Zapf-Chancery)
32    'fantasy'    => "s", # (e.g., Western)
33    'monospace'  => "m"  # (e.g., Courier)
34};
35
36
37my $font_specific_family_lookup = {
38  "arial"           => "sans-serif",
39  "arial black"     => "sans-serif",
40  "gadget"          => "sans-serif",
41  "comic sans ms"       => "cursive",
42  "comic sans"      => "cursive",
43  "courier"         => "monospace",
44  "courier new"     => "monospace",
45  "georgia"         => "serif",
46  "impact"          => "sans-serif",
47  "charcoal"        => "sans-serif",
48  "lucida console"      => "monospace",
49  "monaco"          => "monospace",
50  "lucida sans unicode" => "sans-serif",
51  "lucida grande"       => "sans-serif",
52  "palatino linotype"   => "serif",
53  "book antiqua"        => "serif",
54  "palatino"        => "serif",
55  "tahoma"          => "sans-serif",
56  "geneva"          => "sans-serif",
57  "georgia"         => "serif",
58  "times"           => "serif",
59  "times new roman"     => "serif",
60  "trebuchet ms"        => "sans-serif",
61  "trebuchet"       => "sans-serif",
62  "verdana"         => "sans-serif",
63  "geneva"          => "sans-serif",
64  #symbol       => "symbol",
65  #webdings         => "symbol",
66  #wingdings        => "symbol",
67  #zapf dingbats    => "symbol",
68  "ms sans serif"       => "sans-serif",
69  "sans serif       => sans-serif,
70  geneva"           => "sans-serif",
71  "ms serif"        => "serif",
72  "new york"        => "serif"
73};
74
75my $UNUSED_font_specific_family_mapping = {
76    'serif'      => [
77             "Times New Roman",
78             "Bodoni",
79             "Garamond",
80             "Minion Web",
81             "ITC Stone Serif",
82             "MS Georgia",
83             "Bitstream Cyberbit",
84             "Adobe Minion Cyrillic",
85             "Excelsior Cyrillic Upright",
86             "Monotype Albion 70",
87             "ER Bukinist",
88             "New Peninim",
89             "Raanana",
90             "Ryumin Light-KL",
91             "Kyokasho ICA",
92             "Futo Min A101",
93             "Lo Cicero Cherokee"
94    ],
95
96    'sans-serif' => [
97             "MS Trebuchet",
98             "ITC Avant Garde Gothic",
99             "MS Arial",
100             "MS Verdana",
101             "Univers",
102             "Futura",
103             "ITC Stone Sans",
104             "Gill Sans",
105             "Akzidenz Grotesk",
106             "Helvetica",
107             "Attika",
108             "Typiko New Era",
109             "MS Tahoma",
110             "Monotype Gill Sans 571",
111             "Helvetica Greek",
112             "Helvetica Cyrillic",
113             "ER Univers",
114             "Lucida Sans Unicode",
115             "Bastion",
116             "Arial Hebrew",
117             "Shin Go",
118             "Heisei Kaku Gothic W5"
119    ],
120    'cursive'    => [
121             "Caflisch Script",
122             "Adobe Poetica",
123             "Sanvito",
124             "Ex Ponto",
125             "Snell Roundhand",
126             "Zapf-Chancery",
127             "ER Architekt",
128             "Corsiva",
129             "DecoType Naskh",
130             "Monotype Urdu 507"
131    ],
132 
133   'fantasy'    => [
134             "Alpha Geometrique",
135             "Critter",
136             "Cottonwood",
137             "FB Reactor",
138             "Studz"
139    ],
140
141    'monospace'  => [
142             "Courier",
143             "MS Courier New",
144             "Prestige",
145             "Everson Mono",
146             "ER Kurier",
147             "Osaka Monospaced",
148    ]
149};
150
151
152                 
153my $font_style_lookup = {
154    'normal'  => "n",
155    'italic'  => "i",
156    'oblique' => 'i'
157};
158
159# Nothing that can currently be done for small-caps
160my $font_varient_lookup = {
161    'normal' => "",
162    'small-caps' => ""
163};
164
165
166my $font_weight_lookup = {
167    'normal'  => "n",
168    'bold'    => "n",
169    'bolder'  => "b", # filler for now
170    'lighter' => "n", # filler for now
171    '100'     => "n",
172    '200'     => "n",
173    '300'     => "b",
174    '400'     => "b",
175    '500'     => "b",
176    '600'     => "b",
177    '700'     => "b",
178    '800'     => "b",
179    '900'     => "b"
180};
181
182
183my $font_size_absolute = {
184    'xx-small' => 8,
185    'x-small'  =>10,
186    'small'    =>12,
187    'medium'   =>14,
188    'large'    =>16,
189    'x-large'  =>18,
190    'xx-large' =>20
191};
192
193# px, em, percentage, and relative, such as larger
194
195## color, background-color
196
197my $color_name_lookup = {
198    "aliceblue" =>  [240,248,255],
199    "antiquewhite" =>   [250,235,215],
200    "aqua" =>       [0,255,255],
201    "aquamarine" =>     [127,255,212],
202    "azure" =>      [240,255,255],
203    "beige" =>      [245,245,220],
204    "bisque" =>     [255,228,196],
205    "black" =>      [0,0,0],
206    "blanchedalmond" => [255,235,205],
207    "blue" =>       [0,0,255],
208    "blueviolet" =>     [138,43,226],
209    "brown" =>      [165,42,42],
210    "burlywood" =>  [222,184,135],
211    "cadetblue" =>  [95,158,160],
212    "chartreuse" =>     [127,255,0],
213    "chocolate" =>  [210,105,30],
214    "coral" =>      [255,127,80],
215    "cornflowerblue" => [100,149,237],
216    "cornsilk" =>   [255,248,220],
217    "crimson" =>    [220,20,60],
218    "cyan" =>       [0,255,255],
219    "darkblue" =>   [0,0,139],
220    "darkcyan" =>   [0,139,139],
221    "darkgoldenrod" =>  [184,134,11],
222    "darkgray" =>   [169,169,169],
223    "darkgreen" =>  [0,100,0],
224    "darkgrey" =>   [169,169,169],
225    "darkkhaki" =>  [189,183,107],
226    "darkmagenta" =>    [139,0,139],
227    "darkolivegreen" => [85,107,47],
228    "darkorange" =>     [255,140,0],
229    "darkorchid" =>     [153,50,204],
230    "darkred" =>    [139,0,0],
231    "darksalmon" =>     [233,150,122],
232    "darkseagreen" =>   [143,188,143],
233    "darkslateblue" =>  [72,61,139],
234    "darkslategray" =>  [47,79,79],
235    "darkslategrey" =>  [47,79,79],
236    "darkturquoise" =>  [0,206,209],
237    "darkviolet" =>     [148,0,211],
238    "deeppink" =>   [255,20,147],
239    "deepskyblue" =>    [0,191,255],
240    "dimgray" =>    [105,105,105],
241    "dimgrey" =>    [105,105,105],
242    "dodgerblue" =>     [30,144,255],
243    "firebrick" =>  [178,34,34],
244    "floralwhite" =>    [255,250,240],
245    "forestgreen" =>    [34,139,34],
246    "fuchsia" =>    [255,0,255],
247    "gainsboro" =>  [220,220,220],
248    "ghostwhite" =>     [248,248,255],
249    "gold" =>       [255,215,0],
250    "goldenrod" =>  [218,165,32],
251    "gray" =>       [128,128,128],
252    "green" =>      [0,128,0],
253    "greenyellow" =>    [173,255,47],
254    "grey" =>       [128,128,128],
255    "honeydew" =>   [240,255,240],
256    "hotpink" =>    [255,105,180],
257    "indianred" =>  [205,92,92],
258    "indigo" =>     [75,0,130],
259    "ivory" =>      [255,255,240],
260    "khaki" =>      [240,230,140],
261    "lavender" =>   [230,230,250],
262    "lavenderblush" =>  [255,240,245],
263    "lawngreen" =>  [124,252,0],
264    "lemonchiffon" =>   [255,250,205],
265    "lightblue" =>  [173,216,230],
266    "lightcoral" =>     [240,128,128],
267    "lightcyan" =>  [224,255,255],
268    "lightgoldenrodyellow" =>   [250,250,210],
269    "lightgray" =>  [211,211,211],
270    "lightgreen" =>     [144,238,144],
271    "lightgrey" =>  [211,211,211],
272    "lightpink" =>  [255,182,193],
273    "lightsalmon" =>    [255,160,122],
274    "lightseagreen" =>  [32,178,170],
275    "lightskyblue" =>   [135,206,250],
276    "lightslategray" => [119,136,153],
277    "lightslategrey" => [119,136,153],
278    "lightsteelblue" => [176,196,222],
279    "lightyellow" =>    [255,255,224],
280    "lime" =>       [0,255,0],
281    "limegreen" =>  [50,205,50],
282    "linen" =>      [250,240,230],
283    "magenta" =>    [255,0,255],
284    "maroon" =>     [128,0,0],
285    "mediumaquamarine" =>   [102,205,170],
286    "mediumblue" =>     [0,0,205],
287    "mediumorchid" =>   [186,85,211],
288    "mediumpurple" =>   [147,112,219],
289    "mediumseagreen" =>     [60,179,113],
290    "mediumslateblue" =>    [123,104,238],
291    "mediumspringgreen" =>  [0,250,154],
292    "mediumturquoise" =>    [72,209,204],
293    "mediumvioletred" =>    [199,21,133],
294    "midnightblue" =>   [25,25,112],
295    "mintcream" =>  [245,255,250],
296    "mistyrose" =>  [255,228,225],
297    "moccasin" =>   [255,228,181],
298    "navajowhite" =>    [255,222,173],
299    "navy" =>       [0,0,128],
300    "oldlace" =>    [253,245,230],
301    "olive" =>      [128,128,0],
302    "olivedrab" =>  [107,142,35],
303    "orange" =>     [255,165,0],
304    "orangered" =>  [255,69,0],
305    "orchid" =>     [218,112,214],
306    "palegoldenrod" =>  [238,232,170],
307    "palegreen" =>  [152,251,152],
308    "paleturquoise" =>  [175,238,238],
309    "palevioletred" =>  [219,112,147],
310    "papayawhip" =>     [255,239,213],
311    "peachpuff" =>  [255,218,185],
312    "peru" =>       [205,133,63],
313    "pink" =>       [255,192,203],
314    "plum" =>       [221,160,221],
315    "powderblue" =>     [176,224,230],
316    "purple" =>     [128,0,128],
317    "red" =>        [255,0,0],
318    "rosybrown" =>  [188,143,143],
319    "royalblue" =>  [65,105,225],
320    "saddlebrown" =>    [139,69,19],
321    "salmon" =>     [250,128,114],
322    "sandybrown" =>     [244,164,96],
323    "seagreen" =>   [46,139,87],
324    "seashell" =>   [255,245,238],
325    "sienna" =>     [160,82,45],
326    "silver" =>     [192,192,192],
327    "skyblue" =>    [135,206,235],
328    "slateblue" =>  [106,90,205],
329    "slategray" =>  [112,128,144],
330    "slategrey" =>  [112,128,144],
331    "snow" =>       [255,250,250],
332    "springgreen" =>    [0,255,127],
333    "steelblue" =>  [70,130,180],
334    "tan" =>        [210,180,140],
335    "teal" =>       [0,128,128],
336    "thistle" =>    [216,191,216],
337    "tomato" =>     [255,99,71],
338    "turquoise" =>  [64,224,208],
339    "violet" =>     [238,130,238],
340    "wheat" =>      [245,222,179],
341    "white" =>      [255,255,255],
342    "whitesmoke" =>     [245,245,245],
343    "yellow" =>     [255,255,0],
344    "yellowgreen" =>    [154,205,50 ]
345};
346
347
348sub convert_font_size
349{
350    my ($css_font_size) = @_;
351
352    my $exp_font_size = undef;
353
354   
355    if ($css_font_size =~ m/^(\d+(?:\.\d*))px$/) {
356        $exp_font_size = int($1);
357    }
358   
359    return $exp_font_size;
360}
361
362sub convert_font_face
363{
364    my ($css_font_face) = @_;
365    my $exp_font_face = undef;
366   
367    if(lc($css_font_face) eq "bold"){
368            $exp_font_face = "b";
369    }else{      #for now - also need to take into account italic and italic bold.
370        $exp_font_face = "r";
371    }
372
373    return $exp_font_face;
374}
375
376
377sub convert_color
378{
379    my ($css_color) = @_;
380
381    my $exp_color;
382
383    if ($css_color =~ m/^rgb\((\d+),\s*(\d+),\s*(\d+)\)$/) {
384
385    my $r = int(100*$1/255);
386    my $g = int(100*$2/255);
387    my $b = int(100*$3/255);
388
389    print STDERR "*** r = $r, g = $g, b = $b\n";
390
391    $exp_color = "$r $g $b";
392
393    print STDERR "*** exp color = $exp_color\n";
394
395    }
396    elsif ($css_color =~ m/^transparent/i) {
397    $exp_color = undef;
398    }
399
400    return $exp_color;
401}
402
403sub convert_font_family
404{
405    my ($css_font_family) = @_;
406       
407    print STDERR "*** PASSING PARAMETER css font family: ".$css_font_family."\n";
408   
409    my $exp_font_family;
410   
411    #use $font_specific_family_lookup and $font_generic_family_lookup
412    #to find the appropriate font family tag to output to expeditee.
413    if(defined $font_specific_family_lookup->{lc($css_font_family)}){
414        $exp_font_family = $css_font_family."_";
415    }else{
416   
417        #to account for instances where a list of fonts is stored as the font family, e.g. "Arial,Segoe...Sans-Serif".
418        #just extract the first font name from this list.
419        my @font_family_vals = split(',',$css_font_family);
420       
421        if(scalar(@font_family_vals) > 0){
422            $exp_font_family = $font_family_vals[0]."_";
423        }
424   
425    }
426   
427    return $exp_font_family;
428}
429
430sub convert_font_style
431{
432    my ($css_font_style) = @_;
433
434    my $exp_font_style;
435   
436    return $exp_font_style;
437}
438   
4391;
Note: See TracBrowser for help on using the browser.