source: gs3-extensions/html-to-expeditee/trunk/src/perllib/ExpediteeFrameIO.pm@ 26728

Last change on this file since 26728 was 26728, checked in by davidb, 11 years ago

Can now successfully obtain font size, font weight, font colour and font family information about each piece of text on a web page and convert to a corresponding text item on an Expeditee frame. Still need to account for text nodes with parents such as bold elements or heading elements.

File size: 13.8 KB
Line 
1###########################################################################
2#
3# ExpediteeFrameIO.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ExpediteeFrameIO;
27
28use strict;
29
30use CssStyleToExpAttr;
31
32sub new
33{
34 my $class = shift(@_);
35 my $output_dir = shift(@_);
36 my $username = shift(@_) || "greenstone";
37
38 my $self = { 'items' => [], 'lines' => [], 'constraints' => [] };
39
40 $self->{'output_dir'} = $output_dir;
41 $self->{'username'} = $username;
42
43 return bless $self, $class;
44}
45
46sub getFormattedDate
47{
48 my ($opt_mode) = @_;
49
50 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
51
52 my @mabbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
53
54 $year += 1900;
55
56 my $fdate;
57
58 if ((defined $opt_mode) && ($opt_mode eq "highPrecision")) {
59 $fdate = sprintf("%02d%s%04d[%02d:%0d2.%02d]",
60 $mday, $mabbr[$mon],$year,$hour,$min,$sec);
61 }
62 else {
63 $fdate = sprintf("%02d%s%04d[%02d:%0d2]",
64 $mday, $mabbr[$mon],$year,$hour,$min);
65 }
66
67 return $fdate;
68}
69
70sub convertStyleToAttr
71{
72 my ($css_attr) = @_;
73
74 my $exp_attr = {};
75
76 # load up some defaults for font information
77 my $exp_font_family = "s"; # t
78 my $exp_font_face = "r";
79 my $exp_font_size = "18";
80
81# if (defined $css_attr->{'font-family'}) {
82# $font_family = $font_family_lookup->[$css_attr->{'font-family'}];
83# }
84
85 if (defined $css_attr->{'font-size'}) {
86
87 my $css_font_size = $css_attr->{'font-size'};
88 $exp_font_size = CssStyleToExpAttr::convert_font_size($css_font_size);
89
90 }
91
92 if(defined $css_attr->{'font-family'}){
93 my $obtain_font_family = $css_attr->{'font-family'};
94 my $new_exp_font_family = CssStyleToExpAttr::convert_font_family($obtain_font_family);
95
96 if(defined $new_exp_font_family){
97 $exp_font_family = $new_exp_font_family;
98 }
99 }
100
101 if(defined $css_attr->{'font-style'}){
102 print STDERR "Font style attribute found: ".$css_attr->{'font-style'}."\n";
103 }
104
105 if(defined $css_attr->{'font-weight'}){
106 print STDERR "Font weight attribute found: ".$css_attr->{'font-weight'}."\n";
107
108 my $css_font_face = $css_attr->{'font-weight'};
109 $exp_font_face = CssStyleToExpAttr::convert_font_face($css_font_face);
110 }
111
112 $exp_attr->{'f'} = $exp_font_family.$exp_font_face.$exp_font_size;
113 print STDERR "exp attribute obtained: ".$exp_attr->{'f'}." ****\n";
114
115 # background color
116
117
118 if (defined $css_attr->{'background-color'}) {
119 my $css_color = $css_attr->{'background-color'};
120
121 my $exp_color = CssStyleToExpAttr::convert_color($css_color);
122
123 $exp_attr->{'e'} = $exp_color;
124 }
125
126 return $exp_attr;
127}
128
129
130sub _nextFreeId
131{
132 my $self = shift @_;
133
134 my $items = $self->{'items'};
135 my $lines = $self->{'lines'};
136 my $constraints = $self->{'constraints'};
137
138 # Ids start at base of 1
139 return 1+(scalar(@$items) + scalar(@$lines) + scalar(@$constraints));
140}
141
142
143sub _addItem
144{
145 my $self = shift @_;
146 my ($type,$attr) = @_;
147
148 # By this point 'attr' is synonymous with being an item
149
150 my $items = $self->{'items'};
151
152 my $next_free_id = $self->_nextFreeId();
153
154 $attr->{'_type'} = $type;
155 $attr->{'_id'} = $next_free_id;
156
157 push(@$items,$attr);
158
159 return ($attr,$next_free_id);
160}
161
162
163
164sub _setBaseDefaultAttributes
165{
166 my $self = shift @_;
167 my ($attr) = @_;
168
169 $attr->{'o'} = $self->{'username'};
170 $attr->{'s'} = getFormattedDate("highPrecision");
171 $attr->{'Q'} = "0"; # gradient
172 $attr->{'v'} = "S"; # dot type
173}
174
175
176sub setPointDefaultAttributes
177{
178 my $self = shift @_;
179 my ($attr) = @_;
180
181 $self->_setBaseDefaultAttributes($attr);
182}
183
184sub setTextDefaultAttributes
185{
186 my $self = shift @_;
187 my ($attr) = @_;
188
189 $self->_setBaseDefaultAttributes($attr);
190
191 if(defined $attr->{'d'}){
192
193 }
194 else {
195 $attr->{'d'} = "0 0 0"; # black color
196 }
197
198}
199
200
201sub setRectPointDefaultAttributes
202{
203 my $self = shift @_;
204 my ($attr) = @_;
205
206
207 $self->setPointDefaultAttributes($attr);
208
209 if((defined $attr->{'d'}) && (defined $attr->{'h'})){
210
211 }
212 else {
213 $attr->{'d'} = "80 80 80"; # grey color for rect lines
214 $attr->{'h'} = "1.0"; # line thickness
215 }
216}
217
218
219sub addRectPoint
220{
221 my $self = shift @_;
222 my ($x, $y, $attr) = @_;
223
224 my %attr_copy = %$attr; # make a private copy of 'attr'
225
226 $self->setRectPointDefaultAttributes(\%attr_copy);
227
228 my $items = $self->{'items'};
229
230 $attr_copy{'P'} = "$x $y";
231
232 return $self->_addItem("P",\%attr_copy);
233}
234
235sub addText
236{
237 my $self = shift @_;
238 my ($x,$y,$text,$w,$attr) = @_;
239
240 my %attr_copy = %$attr; #make a private copy of 'attr'
241
242 $self->setTextDefaultAttributes(\%attr_copy);
243 my $items = $self->{'items'};
244
245 $attr_copy{'P'} = "$x $y";
246 $attr_copy{'T'} = $text;
247 $attr_copy{'w'} = "-$w" if (defined $w);
248
249 return $self->_addItem("T",\%attr_copy);
250}
251
252sub addLine
253{
254 my $self = shift @_;
255
256 my ($item_id1,$item_id2) = @_;
257
258 my $lines = $self->{'lines'};
259 my $line_type = 1;
260
261 my $next_free_id = $self->_nextFreeId();
262
263 my $attr = { 'L' => "$next_free_id $line_type" };
264
265 $attr->{'s'} = "$item_id1 $item_id2";
266
267 push(@$lines,$attr);
268
269 return ($attr,$next_free_id);
270}
271
272
273sub addConstraint
274{
275 my $self = shift @_;
276
277 my ($orientation,$item_id1,$item_id2) = @_;
278
279 my $constraints = $self->{'constraints'};
280
281 my $orientation_type = undef;
282 if ($orientation eq "vertical") {
283 $orientation_type = 2;
284 }
285 else {
286 # assume horizontal for now
287 $orientation_type = 3;
288 }
289
290 my $next_free_id = $self->_nextFreeId();
291
292 my $attr = { 'C' => "$next_free_id $orientation_type" };
293
294 $attr->{'s'} = "$item_id1 $item_id2";
295
296 push(@$constraints,$attr);
297
298 return ($attr,$next_free_id);
299}
300
301
302sub addRect
303{
304 my $self = shift @_;
305
306 my ($xl, $yt, $xr, $yb, $attr) = @_;
307
308 # do point in same order Expeditee puts them in
309 my ($p_tr,$p_tr_id) = $self->addRectPoint($xr,$yt,$attr);
310 my ($p_tl,$p_tl_id) = $self->addRectPoint($xl,$yt,$attr);
311 my ($p_bl,$p_bl_id) = $self->addRectPoint($xl,$yb,$attr);
312 my ($p_br,$p_br_id) = $self->addRectPoint($xr,$yb,$attr);
313
314 my ($l_t,$l_t_id) = $self->addLine($p_tr_id,$p_tl_id);
315 my ($l_l,$l_l_id) = $self->addLine($p_tl_id,$p_bl_id);
316 my ($l_b,$l_b_id) = $self->addLine($p_bl_id,$p_br_id);
317 my ($l_r,$l_r_id) = $self->addLine($p_br_id,$p_tr_id);
318
319 my ($c_t,$c_t_id) = $self->addConstraint("horizontal",$p_tr_id,$p_tl_id);
320 my ($c_l,$c_l_id) = $self->addConstraint("vertical" ,$p_tl_id,$p_bl_id);
321 my ($c_b,$c_b_id) = $self->addConstraint("horizontal",$p_bl_id,$p_br_id);
322 my ($c_r,$c_r_id) = $self->addConstraint("vertical" ,$p_br_id,$p_tr_id);
323
324 $p_tr->{'l'} = "$l_t_id $l_r_id";
325 $p_tl->{'l'} = "$l_t_id $l_l_id";
326 $p_bl->{'l'} = "$l_l_id $l_b_id";
327 $p_br->{'l'} = "$l_b_id $l_r_id";
328
329 $p_tr->{'c'} = "$c_t_id $c_r_id";
330 $p_tl->{'c'} = "$c_t_id $c_l_id";
331 $p_bl->{'c'} = "$c_l_id $c_b_id";
332 $p_br->{'c'} = "$c_b_id $c_r_id";
333
334}
335
336sub writeHeaderSection
337{
338 my $self = shift @_;
339
340 # Example header:
341 # V 1
342 # p 4
343 # U davidb
344 # D 09Jan2012[13:33]
345 # M davidb
346 # d 09Jan2012[13:33]
347 # Z
348 #
349
350 # Legend:
351 # V = version
352 # p = permision level
353 # U = username (owner)
354 # M = last modified by
355 # D, d = date information
356 # Z => end of section
357
358
359 my $username = $self->{'username'};
360
361 my $fdate = getFormattedDate();
362
363 print FOUT "V 1\n";
364 print FOUT "p 4\n";
365 print FOUT "U $username\n";
366 print FOUT "D $fdate\n";
367 print FOUT "M $username\n";
368 print FOUT "d $fdate\n";
369 print FOUT "Z\n\n";
370
371}
372
373
374sub writeItemsSection
375{
376 my $self = shift @_;
377
378 my $items = $self->{'items'};
379
380 foreach my $item (@$items) {
381
382 my $type = delete $item->{'_type'};
383 my $id = delete $item->{'_id'};
384
385 if(defined($type) && defined($id)) {
386
387 print FOUT "S $type $id\n";
388
389 foreach my $a (keys %$item) {
390 print FOUT "$a ", $item->{$a}, "\n";
391 }
392
393 print FOUT "\n";
394
395 }
396
397 }
398
399 print FOUT "Z\n\n";
400}
401
402sub writeLinesSection
403{
404 my $self = shift @_;
405
406 my $lines = $self->{'lines'};
407
408 foreach my $line (@$lines) {
409
410 print FOUT "L ", $line->{'L'}, "\n";
411 print FOUT "s ", $line->{'s'}, "\n";
412
413 print FOUT "\n";
414 }
415
416 print FOUT "Z\n\n";
417
418}
419
420sub writeConstraintsSection
421{
422 my $self = shift @_;
423
424 my $constraints = $self->{'constraints'};
425
426 foreach my $constraint (@$constraints) {
427 print FOUT "C ", $constraint->{'C'}, "\n";
428 print FOUT "s ", $constraint->{'s'}, "\n";
429
430 print FOUT "\n";
431 }
432
433 print FOUT "Z\n\n";
434}
435
436sub writeStatisticsSection
437{
438 my $self = shift @_;
439
440 # Currently do nothing
441}
442
443sub saveZeroFrame
444{
445 my $self = shift @_;
446 my $file = "0.exp";
447
448 my $filename = &util::filename_cat($self->{'output_dir'},$file);
449
450 my $status = undef;
451
452 my $username = $self->{'username'};
453 my $fdate = getFormattedDate();
454
455 if (open(FOUT,">$filename")) {
456 binmode(FOUT,":utf8");
457
458 print FOUT <<EOT;
459
460V 1
461p 4
462U $username
463D $fdate
464M $username
465d $fdate
466Z
467
468Z
469
470Z
471
472Z
473
474EOT
475
476 close(FOUT);
477 $status = 1;
478 }
479 else {
480 print STDERR "ExpediteeFrameIO::saveZeroFrame() Failed to open $filename for output\n";
481 $status = 0;
482 }
483
484 return $status;
485}
486
487sub writeAssocFilePath
488{
489 my $self = shift @_;
490 my ($assoc) = @_;
491
492 my $x = 318;
493 my $y = 123;
494 my $text = "\@assocfilepath: $assoc";
495
496 my $attr = {};
497
498 #add data: gsdl.Metadata: assocfilepath to this piece of text.
499 $attr->{'D'} = "gsdl.Metadata: assocfilepath";
500
501 $self->addText($x,$y,$text,undef,$attr);
502}
503
504sub saveFrame
505{
506 my $self = shift @_;
507 my ($file,$assoc) = @_;
508
509 if ($file eq "1.exp") {
510 $self->saveZeroFrame();
511 }
512
513 my $filename = &util::filename_cat($self->{'output_dir'},$file);
514
515 my $status = undef;
516
517 if (open(FOUT,">$filename")) {
518 binmode(FOUT,":utf8");
519
520 if(defined $assoc){
521 $self->writeAssocFilePath($assoc); #write assocfilepath out to frame.
522 }
523
524 $self->writeHeaderSection();
525 $self->writeItemsSection();
526 $self->writeLinesSection();
527 $self->writeConstraintsSection();
528 $self->writeStatisticsSection();
529
530
531
532 close(FOUT);
533 $status = 1;
534 }
535 else {
536 print STDERR "ExpediteeFrameIO::saveFrame() Failed to open $filename for output\n";
537 $status = 0;
538 }
539
540 return $status;
541}
542
543sub buildFrame
544{
545 my $self = shift @_;
546 my ($html_node) = @_;
547
548 my $type = $html_node->{'type'};
549
550 if ($type eq "rect") {
551
552 my $rect = $html_node->{'rect'};
553 my $xl = $rect->{'xl'};
554 my $xr = $rect->{'xr'};
555 my $yt = $rect->{'yt'};
556 my $yb = $rect->{'yb'};
557
558 my $attr = convertStyleToAttr($html_node->{'style'});
559
560 if (defined $html_node->{'attr'}) {
561 # values provided in 'attr' explicitly overwrite any values
562 # derived from CSS style
563
564 my $direct_attr_str = $html_node->{'attr'};
565 my @direct_attr_array = split(/\s*;\s*/,$direct_attr_str);
566 foreach my $da (@direct_attr_array) {
567 my ($key,$val) = ($da =~ m/^(.)\s*(.*)$/);
568 $attr->{$key} = $val;
569 }
570 }
571
572 #don't want to add font information to non-text items!
573 my $deleted = delete $attr->{'f'};
574
575 $self->addRect($xl,$yt,$xr,$yb,$attr);
576
577 if (defined $html_node->{'img'}) {
578
579 my $img_url = $html_node->{'img'};
580 $img_url =~ s/^http:\/\/(.*?)\/greenstone3(.*?)\///;
581 if ($img_url =~ m/^interfaces\//) {
582 $img_url = "greenstone3-svn/web/$img_url";
583 }
584 elsif ($img_url =~ m/^sites\//) {
585# if ($img_url =~ m/^sites\//) {
586# $img_url =~ s/^sites\/(.*?)\//images\//;
587 $img_url = "greenstone3-svn/web/$img_url";
588 }
589
590 my $x = $xl;
591 my $y = $yt;
592
593 my $attr = {};
594
595 my $img_text = "\@i: $img_url";
596
597 $self->addText($x,$y,$img_text,undef,$attr);
598 }
599
600 }
601 elsif ($type eq "text") {
602
603 my $text = $html_node->{'text'};
604
605 my $x = $html_node->{'xl'};
606 my $y = $html_node->{'yt'};
607 my $w = $html_node->{'xr'} - $x +1;
608
609 my $attr = convertStyleToAttr($html_node->{'style'});
610
611 # fudge factor for now (based on default font size used)
612 $y += 16; # y-value of text item in Expeditee is it's base line
613 $x += 4;
614
615 my $data = $html_node->{'data'};
616 $attr->{'D'} = $data if defined $data;
617
618 $self->addText($x,$y,$text,$w,$attr);
619 }
620 else {
621 print STDERR "ExpediteeFrameIO::buildFrame(): Warning, unrecognized type '$type'\n";
622 }
623
624 my $childNodes = $html_node->{'childNodes'};
625 foreach my $child_node (@$childNodes) {
626 $self->buildFrame($child_node);
627 }
628}
629
630
631sub saveLastFrameNumber
632{
633 my $self = shift @_;
634 my ($last_frame_number,$collect) = @_;
635
636 my $filename = &util::filename_cat($self->{'output_dir'},"frame.inf");
637
638 my $status = undef;
639
640 if (open(FNOUT,">$filename")) {
641 binmode(FNOUT,":utf8");
642
643 #writes frameset name concatenated with last frame number in the set to the frame.inf file.
644 # my $getFramesetName = $self->{'output_dir'};
645
646 #use collection name rather than the directory name where the frameset is stored, when saving the last frame name/number to the frame.inf file.
647 print FNOUT "$collect"."$last_frame_number";
648
649 close(FNOUT);
650 $status = 1;
651 }
652 else {
653 print STDERR "ExpediteeFrameIO::saveLastFrameNumber() Failed to open $filename for output\n";
654
655 $status = 0;
656 }
657
658 return $status;
659
660}
661
6621;
Note: See TracBrowser for help on using the repository browser.