root/gs3-extensions/html-to-expeditee/trunk/src/perllib/ExpediteeFrameIO.pm @ 26728

Revision 26728, 13.8 KB (checked in by davidb, 7 years ago)

Can now successfully obtain font size, font weight, font colour and font family information about each piece of text on a web page and convert to a corresponding text item on an Expeditee frame. Still need to account for text nodes with parents such as bold elements or heading elements.

Line 
1###########################################################################
2#
3# ExpediteeFrameIO.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ExpediteeFrameIO;
27
28use strict;
29
30use CssStyleToExpAttr;
31
32sub new
33{
34    my $class = shift(@_);
35    my $output_dir = shift(@_);
36    my $username   = shift(@_) || "greenstone";
37
38    my $self = { 'items' => [], 'lines' => [], 'constraints' => [] };
39
40    $self->{'output_dir'} = $output_dir;
41    $self->{'username'} = $username;
42
43    return bless $self, $class;
44}
45
46sub getFormattedDate
47{
48    my ($opt_mode) = @_;
49
50    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
51
52    my @mabbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
53
54    $year += 1900;
55
56    my $fdate;
57
58    if ((defined $opt_mode) && ($opt_mode eq "highPrecision")) {
59    $fdate = sprintf("%02d%s%04d[%02d:%0d2.%02d]",
60             $mday, $mabbr[$mon],$year,$hour,$min,$sec);
61    }
62    else {
63    $fdate = sprintf("%02d%s%04d[%02d:%0d2]",
64             $mday, $mabbr[$mon],$year,$hour,$min);
65    }
66
67    return $fdate;
68}
69
70sub convertStyleToAttr
71{
72    my ($css_attr) = @_;
73
74    my $exp_attr = {};
75
76    # load up some defaults for font information
77    my $exp_font_family = "s"; # t
78    my $exp_font_face = "r";
79    my $exp_font_size = "18";
80
81#    if (defined $css_attr->{'font-family'}) {
82#   $font_family = $font_family_lookup->[$css_attr->{'font-family'}];
83#    }
84
85    if (defined $css_attr->{'font-size'}) {
86       
87        my $css_font_size = $css_attr->{'font-size'};
88        $exp_font_size = CssStyleToExpAttr::convert_font_size($css_font_size);
89
90    }
91   
92    if(defined $css_attr->{'font-family'}){
93        my $obtain_font_family = $css_attr->{'font-family'};
94        my $new_exp_font_family = CssStyleToExpAttr::convert_font_family($obtain_font_family); 
95
96        if(defined $new_exp_font_family){
97            $exp_font_family = $new_exp_font_family;
98        }
99    }
100   
101    if(defined $css_attr->{'font-style'}){
102        print STDERR "Font style attribute found: ".$css_attr->{'font-style'}."\n";
103    }
104   
105    if(defined $css_attr->{'font-weight'}){
106        print STDERR "Font weight attribute found: ".$css_attr->{'font-weight'}."\n";
107       
108        my $css_font_face = $css_attr->{'font-weight'};
109        $exp_font_face = CssStyleToExpAttr::convert_font_face($css_font_face);
110    }
111   
112     $exp_attr->{'f'} = $exp_font_family.$exp_font_face.$exp_font_size;
113     print STDERR "exp attribute obtained: ".$exp_attr->{'f'}." ****\n";
114     
115    # background color
116
117
118    if (defined $css_attr->{'background-color'}) {
119        my $css_color = $css_attr->{'background-color'};
120
121        my $exp_color = CssStyleToExpAttr::convert_color($css_color);
122
123        $exp_attr->{'e'} = $exp_color;
124    }
125
126    return $exp_attr;
127}
128
129
130sub _nextFreeId
131{
132    my $self = shift @_;
133
134    my $items = $self->{'items'};
135    my $lines = $self->{'lines'};
136    my $constraints = $self->{'constraints'};
137   
138    # Ids start at base of 1
139    return 1+(scalar(@$items) + scalar(@$lines) + scalar(@$constraints));
140}
141
142
143sub _addItem
144{
145    my $self = shift @_;
146    my ($type,$attr) = @_;
147
148    # By this point 'attr' is synonymous with being an item
149
150    my $items = $self->{'items'};
151
152    my $next_free_id = $self->_nextFreeId();
153
154    $attr->{'_type'} = $type;
155    $attr->{'_id'} = $next_free_id;
156
157    push(@$items,$attr);
158
159    return ($attr,$next_free_id);
160}
161
162
163
164sub _setBaseDefaultAttributes
165{
166    my $self = shift @_;
167    my ($attr) = @_;
168
169    $attr->{'o'} = $self->{'username'};
170    $attr->{'s'} = getFormattedDate("highPrecision");
171    $attr->{'Q'} = "0";     # gradient
172    $attr->{'v'} = "S";     # dot type
173}
174
175
176sub setPointDefaultAttributes
177{
178    my $self = shift @_;
179    my ($attr) = @_;
180
181    $self->_setBaseDefaultAttributes($attr);
182}
183
184sub setTextDefaultAttributes
185{
186    my $self = shift @_;
187    my ($attr) = @_;
188
189    $self->_setBaseDefaultAttributes($attr);
190
191    if(defined $attr->{'d'}){
192
193    }
194    else {
195    $attr->{'d'} = "0 0 0"; # black color
196    }
197
198}
199
200
201sub setRectPointDefaultAttributes
202{
203    my $self = shift @_;
204    my ($attr) = @_;
205
206
207    $self->setPointDefaultAttributes($attr);
208
209    if((defined $attr->{'d'}) && (defined $attr->{'h'})){
210
211    }
212    else {
213        $attr->{'d'} = "80 80 80"; # grey color for rect lines
214        $attr->{'h'} = "1.0";     # line thickness
215    }
216}
217
218
219sub addRectPoint
220{
221    my $self = shift @_;
222    my ($x, $y, $attr) = @_;
223
224    my %attr_copy = %$attr; # make a private copy of 'attr'
225
226    $self->setRectPointDefaultAttributes(\%attr_copy);
227
228    my $items = $self->{'items'};
229
230    $attr_copy{'P'} = "$x $y";
231
232    return $self->_addItem("P",\%attr_copy);
233}
234
235sub addText
236{
237    my $self = shift @_;
238    my ($x,$y,$text,$w,$attr) = @_;
239   
240    my %attr_copy = %$attr; #make a private copy of 'attr'
241   
242    $self->setTextDefaultAttributes(\%attr_copy);
243    my $items = $self->{'items'};
244   
245    $attr_copy{'P'} = "$x $y";
246    $attr_copy{'T'} = $text;
247    $attr_copy{'w'} = "-$w" if (defined $w);
248   
249    return $self->_addItem("T",\%attr_copy);
250}
251
252sub addLine
253{
254    my $self = shift @_;
255
256    my ($item_id1,$item_id2) = @_;
257
258    my $lines = $self->{'lines'};
259    my $line_type = 1;
260
261    my $next_free_id = $self->_nextFreeId();
262
263    my $attr = { 'L' => "$next_free_id $line_type" };
264
265    $attr->{'s'} = "$item_id1 $item_id2";
266
267    push(@$lines,$attr);
268
269    return ($attr,$next_free_id);
270}
271
272
273sub addConstraint
274{
275    my $self = shift @_;
276
277    my ($orientation,$item_id1,$item_id2) = @_;
278
279    my $constraints = $self->{'constraints'};
280
281    my $orientation_type = undef;
282    if ($orientation eq "vertical") {
283    $orientation_type = 2;
284    }
285    else {
286    # assume horizontal for now
287    $orientation_type = 3;
288    }
289
290    my $next_free_id = $self->_nextFreeId();
291
292    my $attr = { 'C' => "$next_free_id $orientation_type" };
293
294    $attr->{'s'} = "$item_id1 $item_id2";
295
296    push(@$constraints,$attr);
297
298    return ($attr,$next_free_id);
299}
300
301
302sub addRect
303{
304    my $self = shift @_;
305
306    my ($xl, $yt, $xr, $yb, $attr) = @_;
307   
308    # do point in same order Expeditee puts them in
309    my ($p_tr,$p_tr_id) = $self->addRectPoint($xr,$yt,$attr);
310    my ($p_tl,$p_tl_id) = $self->addRectPoint($xl,$yt,$attr);
311    my ($p_bl,$p_bl_id) = $self->addRectPoint($xl,$yb,$attr);
312    my ($p_br,$p_br_id) = $self->addRectPoint($xr,$yb,$attr);
313
314    my ($l_t,$l_t_id) = $self->addLine($p_tr_id,$p_tl_id);
315    my ($l_l,$l_l_id) = $self->addLine($p_tl_id,$p_bl_id);
316    my ($l_b,$l_b_id) = $self->addLine($p_bl_id,$p_br_id);
317    my ($l_r,$l_r_id) = $self->addLine($p_br_id,$p_tr_id);
318
319    my ($c_t,$c_t_id) = $self->addConstraint("horizontal",$p_tr_id,$p_tl_id);
320    my ($c_l,$c_l_id) = $self->addConstraint("vertical"  ,$p_tl_id,$p_bl_id);
321    my ($c_b,$c_b_id) = $self->addConstraint("horizontal",$p_bl_id,$p_br_id);
322    my ($c_r,$c_r_id) = $self->addConstraint("vertical"  ,$p_br_id,$p_tr_id);
323
324    $p_tr->{'l'} = "$l_t_id $l_r_id";
325    $p_tl->{'l'} = "$l_t_id $l_l_id";
326    $p_bl->{'l'} = "$l_l_id $l_b_id";
327    $p_br->{'l'} = "$l_b_id $l_r_id";
328
329    $p_tr->{'c'} = "$c_t_id $c_r_id";
330    $p_tl->{'c'} = "$c_t_id $c_l_id";
331    $p_bl->{'c'} = "$c_l_id $c_b_id";
332    $p_br->{'c'} = "$c_b_id $c_r_id";
333
334}
335
336sub writeHeaderSection
337{
338    my $self = shift @_;
339
340    # Example header:
341    #   V 1
342    #   p 4
343    #   U davidb
344    #   D 09Jan2012[13:33]
345    #   M davidb
346    #   d 09Jan2012[13:33]
347    #   Z
348    #   
349
350    # Legend:
351    #   V = version
352    #   p = permision level
353    #   U = username (owner)
354    #   M = last modified by
355    #   D, d = date information
356    #   Z => end of section
357
358
359    my $username = $self->{'username'};
360
361    my $fdate = getFormattedDate();
362
363    print FOUT "V 1\n";
364    print FOUT "p 4\n";
365    print FOUT "U $username\n";
366    print FOUT "D $fdate\n";
367    print FOUT "M $username\n";
368    print FOUT "d $fdate\n";
369    print FOUT "Z\n\n";
370
371}
372
373
374sub writeItemsSection
375{
376    my $self = shift @_;
377
378    my $items = $self->{'items'};
379
380    foreach my $item (@$items) {
381
382    my $type = delete $item->{'_type'};
383    my $id = delete $item->{'_id'};
384
385    if(defined($type) && defined($id)) {
386
387         print FOUT "S $type $id\n";
388
389         foreach my $a (keys %$item) {
390             print FOUT "$a ", $item->{$a}, "\n";
391         }
392   
393             print FOUT "\n";
394
395      }
396
397     }
398
399    print FOUT "Z\n\n";
400}
401
402sub writeLinesSection
403{
404    my $self = shift @_;
405
406    my $lines = $self->{'lines'};
407
408    foreach my $line (@$lines) {
409
410    print FOUT "L ", $line->{'L'}, "\n";
411    print FOUT "s ", $line->{'s'}, "\n";
412   
413    print FOUT "\n";
414    }
415
416    print FOUT "Z\n\n";
417
418}
419
420sub writeConstraintsSection
421{
422    my $self = shift @_;
423
424    my $constraints = $self->{'constraints'};
425
426    foreach my $constraint (@$constraints) {
427    print FOUT "C ", $constraint->{'C'}, "\n";
428    print FOUT "s ", $constraint->{'s'}, "\n";
429   
430    print FOUT "\n";
431    }
432
433    print FOUT "Z\n\n";
434}
435
436sub writeStatisticsSection
437{
438    my $self = shift @_;
439
440    # Currently do nothing
441}
442
443sub saveZeroFrame
444{
445    my $self = shift @_;
446    my $file = "0.exp";
447
448    my $filename = &util::filename_cat($self->{'output_dir'},$file);
449
450    my $status = undef;
451
452    my $username = $self->{'username'};
453    my $fdate = getFormattedDate();
454
455    if (open(FOUT,">$filename")) {
456    binmode(FOUT,":utf8");
457   
458    print FOUT <<EOT;
459   
460V 1
461p 4
462U $username
463D $fdate
464M $username
465d $fdate
466Z
467
468Z
469
470Z
471
472Z
473   
474EOT
475
476    close(FOUT);
477    $status = 1;
478    }
479    else {
480    print STDERR "ExpediteeFrameIO::saveZeroFrame() Failed to open $filename for output\n";
481    $status = 0;
482    }
483
484    return $status;
485}
486
487sub writeAssocFilePath
488{
489    my $self = shift @_;
490    my ($assoc) = @_;
491   
492    my $x = 318;
493    my $y = 123;
494    my $text = "\@assocfilepath: $assoc";
495   
496    my $attr = {};
497   
498    #add data: gsdl.Metadata: assocfilepath to this piece of text.
499    $attr->{'D'} = "gsdl.Metadata: assocfilepath";
500   
501    $self->addText($x,$y,$text,undef,$attr);
502}
503
504sub saveFrame
505{
506    my $self = shift @_;
507    my ($file,$assoc) = @_;
508
509    if ($file eq "1.exp") {
510        $self->saveZeroFrame();
511    }
512
513    my $filename = &util::filename_cat($self->{'output_dir'},$file);
514
515    my $status = undef;
516
517    if (open(FOUT,">$filename")) {
518    binmode(FOUT,":utf8");
519   
520    if(defined $assoc){
521        $self->writeAssocFilePath($assoc);      #write assocfilepath out to frame.
522    }
523   
524    $self->writeHeaderSection();
525    $self->writeItemsSection();
526    $self->writeLinesSection();
527    $self->writeConstraintsSection();
528    $self->writeStatisticsSection();
529   
530   
531   
532    close(FOUT);
533    $status = 1;
534    }
535    else {
536    print STDERR "ExpediteeFrameIO::saveFrame() Failed to open $filename for output\n";
537    $status = 0;
538    }
539
540    return $status;
541}
542
543sub buildFrame
544{
545    my $self = shift @_;
546    my ($html_node) = @_;
547 
548    my $type = $html_node->{'type'};
549
550    if ($type eq "rect") {
551
552    my $rect = $html_node->{'rect'};
553    my $xl = $rect->{'xl'};
554    my $xr = $rect->{'xr'};
555    my $yt = $rect->{'yt'};
556    my $yb = $rect->{'yb'};
557
558    my $attr = convertStyleToAttr($html_node->{'style'});
559
560    if (defined $html_node->{'attr'}) {
561        # values provided in 'attr' explicitly overwrite any values
562        # derived from CSS style
563
564        my $direct_attr_str = $html_node->{'attr'};
565        my @direct_attr_array = split(/\s*;\s*/,$direct_attr_str);
566        foreach my $da (@direct_attr_array) {
567        my ($key,$val) = ($da =~ m/^(.)\s*(.*)$/);
568        $attr->{$key} = $val;
569        }
570    }
571   
572    #don't want to add font information to non-text items!
573    my $deleted = delete $attr->{'f'};
574
575    $self->addRect($xl,$yt,$xr,$yb,$attr);
576
577    if (defined $html_node->{'img'}) {
578
579        my $img_url = $html_node->{'img'};
580        $img_url =~ s/^http:\/\/(.*?)\/greenstone3(.*?)\///;
581        if ($img_url =~ m/^interfaces\//) {
582        $img_url = "greenstone3-svn/web/$img_url";
583        }
584        elsif ($img_url =~ m/^sites\//) {
585#       if ($img_url =~ m/^sites\//) {
586#       $img_url =~ s/^sites\/(.*?)\//images\//;
587            $img_url = "greenstone3-svn/web/$img_url";
588        }
589
590        my $x = $xl;
591        my $y = $yt;
592
593        my $attr = {};
594
595        my $img_text = "\@i: $img_url";
596
597        $self->addText($x,$y,$img_text,undef,$attr);
598    }
599
600    }
601    elsif ($type eq "text") {
602   
603    my $text = $html_node->{'text'};
604
605    my $x = $html_node->{'xl'};
606    my $y = $html_node->{'yt'};
607    my $w = $html_node->{'xr'} - $x +1;
608   
609    my $attr = convertStyleToAttr($html_node->{'style'});
610   
611    # fudge factor for now (based on default font size used)
612    $y += 16; # y-value of text item in Expeditee is it's base line
613    $x += 4;
614
615    my $data = $html_node->{'data'};
616    $attr->{'D'} = $data if defined $data;
617 
618    $self->addText($x,$y,$text,$w,$attr);
619    }
620    else {
621    print STDERR "ExpediteeFrameIO::buildFrame(): Warning, unrecognized type '$type'\n";
622    }
623
624    my $childNodes = $html_node->{'childNodes'};
625    foreach my $child_node (@$childNodes) {
626    $self->buildFrame($child_node);
627    }
628}
629
630
631sub saveLastFrameNumber
632{
633    my $self = shift @_;
634    my ($last_frame_number,$collect) = @_;
635
636    my $filename = &util::filename_cat($self->{'output_dir'},"frame.inf");
637
638    my $status = undef;
639
640    if (open(FNOUT,">$filename")) {
641    binmode(FNOUT,":utf8");
642
643    #writes frameset name concatenated with last frame number in the set to the frame.inf file.
644     #   my $getFramesetName = $self->{'output_dir'};
645   
646    #use collection name rather than the directory name where the frameset is stored, when saving the last frame name/number to the frame.inf file.
647    print FNOUT "$collect"."$last_frame_number";
648   
649    close(FNOUT);
650    $status = 1;
651    }
652    else {
653    print STDERR "ExpediteeFrameIO::saveLastFrameNumber() Failed to open $filename for output\n";
654   
655    $status = 0;
656    }
657
658    return $status;
659
660}
661
6621;
Note: See TracBrowser for help on using the browser.