root/gs3-extensions/html-to-expeditee/trunk/src/perllib/ExpediteeFrameIO.pm @ 26727

Revision 26727, 14.3 KB (checked in by davidb, 7 years ago)

Working on obtaining the correct font information for text elements on an html page.

Line 
1###########################################################################
2#
3# ExpediteeFrameIO.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ExpediteeFrameIO;
27
28use strict;
29
30use CssStyleToExpAttr;
31
32sub new
33{
34    my $class = shift(@_);
35    my $output_dir = shift(@_);
36    my $username   = shift(@_) || "greenstone";
37
38    my $self = { 'items' => [], 'lines' => [], 'constraints' => [] };
39
40    $self->{'output_dir'} = $output_dir;
41    $self->{'username'} = $username;
42
43    return bless $self, $class;
44}
45
46sub getFormattedDate
47{
48    my ($opt_mode) = @_;
49
50    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
51
52    my @mabbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
53
54    $year += 1900;
55
56    my $fdate;
57
58    if ((defined $opt_mode) && ($opt_mode eq "highPrecision")) {
59    $fdate = sprintf("%02d%s%04d[%02d:%0d2.%02d]",
60             $mday, $mabbr[$mon],$year,$hour,$min,$sec);
61    }
62    else {
63    $fdate = sprintf("%02d%s%04d[%02d:%0d2]",
64             $mday, $mabbr[$mon],$year,$hour,$min);
65    }
66
67    return $fdate;
68}
69
70sub convertStyleToAttr
71{
72    my ($css_attr) = @_;
73
74    my $exp_attr = {};
75
76    # load up some defaults for font information
77    my $exp_font_family = "s"; # t
78    my $exp_font_face = "r";
79    my $exp_font_size = "18";
80
81#    if (defined $css_attr->{'font-family'}) {
82#   $font_family = $font_family_lookup->[$css_attr->{'font-family'}];
83#    }
84
85    if (defined $css_attr->{'font-size'}) {
86       
87        my $css_font_size = $css_attr->{'font-size'};
88        $exp_font_size = CssStyleToExpAttr::convert_font_size($css_font_size);
89       
90        print STDERR "CSS Font Size is defined: ".$css_font_size."\n";
91        print STDERR "Equivalent expeditee font size: ".$exp_font_size."\n";
92    }
93   
94    if(defined $css_attr->{'font-family'}){
95        my $obtain_font_family = $css_attr->{'font-family'};
96        print STDERR "Font family attribute found: ".$obtain_font_family."\n";
97        my $new_exp_font_family = CssStyleToExpAttr::convert_font_family($obtain_font_family); 
98
99        if(defined $new_exp_font_family){
100            $exp_font_family = $new_exp_font_family;
101        }
102    }
103   
104    if(defined $css_attr->{'font-face'}){
105        print STDERR "Font face attribute found: ".$css_attr->{'font-face'}."\n";
106       
107        #now need to call a method from CssStyleToExpAttr.pm to extract the matching
108        #font symbol
109        #s -
110    }
111   
112    if(defined $css_attr->{'font-weight'}){
113        print STDERR "Font weight attribute found: ".$css_attr->{'font-weight'}."\n";
114    }
115   
116#    if (defined $css_attr->{'font-face'}) {
117#   $font = conver_font_face($css_attr->{'font-face'});
118#    }
119
120     $exp_attr->{'f'} = $exp_font_family.$exp_font_face.$exp_font_size;
121     
122     print STDERR "**** final converted Expeditee font: ".$exp_attr->{'f'};
123     
124    # background color
125
126
127    if (defined $css_attr->{'background-color'}) {
128        my $css_color = $css_attr->{'background-color'};
129
130        my $exp_color = CssStyleToExpAttr::convert_color($css_color);
131
132        $exp_attr->{'e'} = $exp_color;
133    }
134
135    return $exp_attr;
136}
137
138
139sub _nextFreeId
140{
141    my $self = shift @_;
142
143    my $items = $self->{'items'};
144    my $lines = $self->{'lines'};
145    my $constraints = $self->{'constraints'};
146   
147    # Ids start at base of 1
148    return 1+(scalar(@$items) + scalar(@$lines) + scalar(@$constraints));
149}
150
151
152sub _addItem
153{
154    my $self = shift @_;
155    my ($type,$attr) = @_;
156
157    # By this point 'attr' is synonymous with being an item
158
159    my $items = $self->{'items'};
160
161    my $next_free_id = $self->_nextFreeId();
162
163    $attr->{'_type'} = $type;
164    $attr->{'_id'} = $next_free_id;
165
166    push(@$items,$attr);
167
168    return ($attr,$next_free_id);
169}
170
171
172
173sub _setBaseDefaultAttributes
174{
175    my $self = shift @_;
176    my ($attr) = @_;
177
178    $attr->{'o'} = $self->{'username'};
179    $attr->{'s'} = getFormattedDate("highPrecision");
180    $attr->{'Q'} = "0";     # gradient
181    $attr->{'v'} = "S";     # dot type
182}
183
184
185sub setPointDefaultAttributes
186{
187    my $self = shift @_;
188    my ($attr) = @_;
189
190    $self->_setBaseDefaultAttributes($attr);
191}
192
193sub setTextDefaultAttributes
194{
195    my $self = shift @_;
196    my ($attr) = @_;
197
198    $self->_setBaseDefaultAttributes($attr);
199
200    if(defined $attr->{'d'}){
201
202    }
203    else {
204    $attr->{'d'} = "0 0 0"; # black color
205    }
206
207}
208
209
210sub setRectPointDefaultAttributes
211{
212    my $self = shift @_;
213    my ($attr) = @_;
214
215
216    $self->setPointDefaultAttributes($attr);
217
218    if((defined $attr->{'d'}) && (defined $attr->{'h'})){
219
220    }
221    else {
222        $attr->{'d'} = "80 80 80"; # grey color for rect lines
223        $attr->{'h'} = "1.0";     # line thickness
224    }
225}
226
227
228sub addRectPoint
229{
230    my $self = shift @_;
231    my ($x, $y, $attr) = @_;
232
233    my %attr_copy = %$attr; # make a private copy of 'attr'
234
235    $self->setRectPointDefaultAttributes(\%attr_copy);
236
237    my $items = $self->{'items'};
238
239    $attr_copy{'P'} = "$x $y";
240
241    return $self->_addItem("P",\%attr_copy);
242}
243
244sub addText
245{
246    my $self = shift @_;
247    my ($x,$y,$text,$w,$attr) = @_;
248   
249    my %attr_copy = %$attr; #make a private copy of 'attr'
250   
251    $self->setTextDefaultAttributes(\%attr_copy);
252    my $items = $self->{'items'};
253   
254    $attr_copy{'P'} = "$x $y";
255    $attr_copy{'T'} = $text;
256    $attr_copy{'w'} = "-$w" if (defined $w);
257   
258    return $self->_addItem("T",\%attr_copy);
259}
260
261sub addLine
262{
263    my $self = shift @_;
264
265    my ($item_id1,$item_id2) = @_;
266
267    my $lines = $self->{'lines'};
268    my $line_type = 1;
269
270    my $next_free_id = $self->_nextFreeId();
271
272    my $attr = { 'L' => "$next_free_id $line_type" };
273
274    $attr->{'s'} = "$item_id1 $item_id2";
275
276    push(@$lines,$attr);
277
278    return ($attr,$next_free_id);
279}
280
281
282sub addConstraint
283{
284    my $self = shift @_;
285
286    my ($orientation,$item_id1,$item_id2) = @_;
287
288    my $constraints = $self->{'constraints'};
289
290    my $orientation_type = undef;
291    if ($orientation eq "vertical") {
292    $orientation_type = 2;
293    }
294    else {
295    # assume horizontal for now
296    $orientation_type = 3;
297    }
298
299    my $next_free_id = $self->_nextFreeId();
300
301    my $attr = { 'C' => "$next_free_id $orientation_type" };
302
303    $attr->{'s'} = "$item_id1 $item_id2";
304
305    push(@$constraints,$attr);
306
307    return ($attr,$next_free_id);
308}
309
310
311sub addRect
312{
313    my $self = shift @_;
314
315    my ($xl, $yt, $xr, $yb, $attr) = @_;
316   
317    # do point in same order Expeditee puts them in
318    my ($p_tr,$p_tr_id) = $self->addRectPoint($xr,$yt,$attr);
319    my ($p_tl,$p_tl_id) = $self->addRectPoint($xl,$yt,$attr);
320    my ($p_bl,$p_bl_id) = $self->addRectPoint($xl,$yb,$attr);
321    my ($p_br,$p_br_id) = $self->addRectPoint($xr,$yb,$attr);
322
323    my ($l_t,$l_t_id) = $self->addLine($p_tr_id,$p_tl_id);
324    my ($l_l,$l_l_id) = $self->addLine($p_tl_id,$p_bl_id);
325    my ($l_b,$l_b_id) = $self->addLine($p_bl_id,$p_br_id);
326    my ($l_r,$l_r_id) = $self->addLine($p_br_id,$p_tr_id);
327
328    my ($c_t,$c_t_id) = $self->addConstraint("horizontal",$p_tr_id,$p_tl_id);
329    my ($c_l,$c_l_id) = $self->addConstraint("vertical"  ,$p_tl_id,$p_bl_id);
330    my ($c_b,$c_b_id) = $self->addConstraint("horizontal",$p_bl_id,$p_br_id);
331    my ($c_r,$c_r_id) = $self->addConstraint("vertical"  ,$p_br_id,$p_tr_id);
332
333    $p_tr->{'l'} = "$l_t_id $l_r_id";
334    $p_tl->{'l'} = "$l_t_id $l_l_id";
335    $p_bl->{'l'} = "$l_l_id $l_b_id";
336    $p_br->{'l'} = "$l_b_id $l_r_id";
337
338    $p_tr->{'c'} = "$c_t_id $c_r_id";
339    $p_tl->{'c'} = "$c_t_id $c_l_id";
340    $p_bl->{'c'} = "$c_l_id $c_b_id";
341    $p_br->{'c'} = "$c_b_id $c_r_id";
342
343}
344
345sub writeHeaderSection
346{
347    my $self = shift @_;
348
349    # Example header:
350    #   V 1
351    #   p 4
352    #   U davidb
353    #   D 09Jan2012[13:33]
354    #   M davidb
355    #   d 09Jan2012[13:33]
356    #   Z
357    #   
358
359    # Legend:
360    #   V = version
361    #   p = permision level
362    #   U = username (owner)
363    #   M = last modified by
364    #   D, d = date information
365    #   Z => end of section
366
367
368    my $username = $self->{'username'};
369
370    my $fdate = getFormattedDate();
371
372    print FOUT "V 1\n";
373    print FOUT "p 4\n";
374    print FOUT "U $username\n";
375    print FOUT "D $fdate\n";
376    print FOUT "M $username\n";
377    print FOUT "d $fdate\n";
378    print FOUT "Z\n\n";
379
380}
381
382
383sub writeItemsSection
384{
385    my $self = shift @_;
386
387    my $items = $self->{'items'};
388
389    foreach my $item (@$items) {
390
391    my $type = delete $item->{'_type'};
392    my $id = delete $item->{'_id'};
393
394    if(defined($type) && defined($id)) {
395
396         print FOUT "S $type $id\n";
397
398         foreach my $a (keys %$item) {
399             print FOUT "$a ", $item->{$a}, "\n";
400         }
401   
402             print FOUT "\n";
403
404      }
405
406     }
407
408    print FOUT "Z\n\n";
409}
410
411sub writeLinesSection
412{
413    my $self = shift @_;
414
415    my $lines = $self->{'lines'};
416
417    foreach my $line (@$lines) {
418
419    print FOUT "L ", $line->{'L'}, "\n";
420    print FOUT "s ", $line->{'s'}, "\n";
421   
422    print FOUT "\n";
423    }
424
425    print FOUT "Z\n\n";
426
427}
428
429sub writeConstraintsSection
430{
431    my $self = shift @_;
432
433    my $constraints = $self->{'constraints'};
434
435    foreach my $constraint (@$constraints) {
436    print FOUT "C ", $constraint->{'C'}, "\n";
437    print FOUT "s ", $constraint->{'s'}, "\n";
438   
439    print FOUT "\n";
440    }
441
442    print FOUT "Z\n\n";
443}
444
445sub writeStatisticsSection
446{
447    my $self = shift @_;
448
449    # Currently do nothing
450}
451
452sub saveZeroFrame
453{
454    my $self = shift @_;
455    my $file = "0.exp";
456
457    my $filename = &util::filename_cat($self->{'output_dir'},$file);
458
459    my $status = undef;
460
461    my $username = $self->{'username'};
462    my $fdate = getFormattedDate();
463
464    if (open(FOUT,">$filename")) {
465    binmode(FOUT,":utf8");
466   
467    print FOUT <<EOT;
468   
469V 1
470p 4
471U $username
472D $fdate
473M $username
474d $fdate
475Z
476
477Z
478
479Z
480
481Z
482   
483EOT
484
485    close(FOUT);
486    $status = 1;
487    }
488    else {
489    print STDERR "ExpediteeFrameIO::saveZeroFrame() Failed to open $filename for output\n";
490    $status = 0;
491    }
492
493    return $status;
494}
495
496sub writeAssocFilePath
497{
498    my $self = shift @_;
499    my ($assoc) = @_;
500   
501    my $x = 318;
502    my $y = 123;
503    my $text = "\@assocfilepath: $assoc";
504   
505    my $attr = {};
506   
507    #add data: gsdl.Metadata: assocfilepath to this piece of text.
508    $attr->{'D'} = "gsdl.Metadata: assocfilepath";
509   
510    $self->addText($x,$y,$text,undef,$attr);
511}
512
513sub saveFrame
514{
515    my $self = shift @_;
516    my ($file,$assoc) = @_;
517
518    if ($file eq "1.exp") {
519        $self->saveZeroFrame();
520    }
521
522    my $filename = &util::filename_cat($self->{'output_dir'},$file);
523
524    my $status = undef;
525
526    if (open(FOUT,">$filename")) {
527    binmode(FOUT,":utf8");
528   
529    if(defined $assoc){
530        $self->writeAssocFilePath($assoc);      #write assocfilepath out to frame.
531    }
532   
533    $self->writeHeaderSection();
534    $self->writeItemsSection();
535    $self->writeLinesSection();
536    $self->writeConstraintsSection();
537    $self->writeStatisticsSection();
538   
539   
540   
541    close(FOUT);
542    $status = 1;
543    }
544    else {
545    print STDERR "ExpediteeFrameIO::saveFrame() Failed to open $filename for output\n";
546    $status = 0;
547    }
548
549    return $status;
550}
551
552sub buildFrame
553{
554    my $self = shift @_;
555    my ($html_node) = @_;
556 
557    my $type = $html_node->{'type'};
558
559    if ($type eq "rect") {
560
561    my $rect = $html_node->{'rect'};
562    my $xl = $rect->{'xl'};
563    my $xr = $rect->{'xr'};
564    my $yt = $rect->{'yt'};
565    my $yb = $rect->{'yb'};
566
567    my $attr = convertStyleToAttr($html_node->{'style'});
568
569    if (defined $html_node->{'attr'}) {
570        # values provided in 'attr' explicitly overwrite any values
571        # derived from CSS style
572
573        my $direct_attr_str = $html_node->{'attr'};
574        my @direct_attr_array = split(/\s*;\s*/,$direct_attr_str);
575        foreach my $da (@direct_attr_array) {
576        my ($key,$val) = ($da =~ m/^(.)\s*(.*)$/);
577        $attr->{$key} = $val;
578        }
579    }
580   
581    #don't want to add font information to non-text items!
582    my $deleted = delete $attr->{'f'};
583   
584    print STDERR "**** DELETED: $deleted \n";
585
586    $self->addRect($xl,$yt,$xr,$yb,$attr);
587
588    if (defined $html_node->{'img'}) {
589
590        my $img_url = $html_node->{'img'};
591        $img_url =~ s/^http:\/\/(.*?)\/greenstone3(.*?)\///;
592        if ($img_url =~ m/^interfaces\//) {
593        $img_url = "greenstone3-svn/web/$img_url";
594        }
595        elsif ($img_url =~ m/^sites\//) {
596#       if ($img_url =~ m/^sites\//) {
597#       $img_url =~ s/^sites\/(.*?)\//images\//;
598            $img_url = "greenstone3-svn/web/$img_url";
599        }
600
601        my $x = $xl;
602        my $y = $yt;
603
604        my $attr = {};
605
606        my $img_text = "\@i: $img_url";
607
608        $self->addText($x,$y,$img_text,undef,$attr);
609    }
610
611    }
612    elsif ($type eq "text") {
613   
614    my $text = $html_node->{'text'};
615
616    my $x = $html_node->{'xl'};
617    my $y = $html_node->{'yt'};
618    my $w = $html_node->{'xr'} - $x +1;
619
620    print STDERR "**** CHECKING STYLE NODE: ".$html_node->{'style'}."\n";
621    my $attr = convertStyleToAttr($html_node->{'style'});
622   
623    #DEBUGGING
624    if(defined $attr->{'f'}){
625        print STDERR "***** Checking text attributes:".$attr->{'f'}."\n";
626        print STDERR "****************************************\n";
627    }
628   
629    # fudge factor for now (based on default font size used)
630    $y += 16; # y-value of text item in Expeditee is it's base line
631    $x += 4;
632
633    my $data = $html_node->{'data'};
634    $attr->{'D'} = $data if defined $data;
635 
636    $self->addText($x,$y,$text,$w,$attr);
637    }
638    else {
639    print STDERR "ExpediteeFrameIO::buildFrame(): Warning, unrecognized type '$type'\n";
640    }
641
642    my $childNodes = $html_node->{'childNodes'};
643    foreach my $child_node (@$childNodes) {
644    $self->buildFrame($child_node);
645    }
646}
647
648
649sub saveLastFrameNumber
650{
651    my $self = shift @_;
652    my ($last_frame_number,$collect) = @_;
653
654    my $filename = &util::filename_cat($self->{'output_dir'},"frame.inf");
655
656    my $status = undef;
657
658    if (open(FNOUT,">$filename")) {
659    binmode(FNOUT,":utf8");
660
661    #writes frameset name concatenated with last frame number in the set to the frame.inf file.
662     #   my $getFramesetName = $self->{'output_dir'};
663   
664    #use collection name rather than the directory name where the frameset is stored, when saving the last frame name/number to the frame.inf file.
665    print FNOUT "$collect"."$last_frame_number";
666   
667    close(FNOUT);
668    $status = 1;
669    }
670    else {
671    print STDERR "ExpediteeFrameIO::saveLastFrameNumber() Failed to open $filename for output\n";
672   
673    $status = 0;
674    }
675
676    return $status;
677
678}
679
6801;
Note: See TracBrowser for help on using the browser.