source: gs3-extensions/html-to-expeditee/trunk/src/perllib/ExpediteeFrameIO.pm@ 26727

Last change on this file since 26727 was 26727, checked in by davidb, 11 years ago

Working on obtaining the correct font information for text elements on an html page.

File size: 14.3 KB
RevLine 
[24934]1###########################################################################
2#
3# ExpediteeFrameIO.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package ExpediteeFrameIO;
27
28use strict;
29
[24941]30use CssStyleToExpAttr;
31
[24934]32sub new
33{
34 my $class = shift(@_);
[24938]35 my $output_dir = shift(@_);
36 my $username = shift(@_) || "greenstone";
[24934]37
38 my $self = { 'items' => [], 'lines' => [], 'constraints' => [] };
39
[24938]40 $self->{'output_dir'} = $output_dir;
[24934]41 $self->{'username'} = $username;
42
43 return bless $self, $class;
44}
45
[26511]46sub getFormattedDate
[24934]47{
48 my ($opt_mode) = @_;
49
50 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
51
52 my @mabbr = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
53
54 $year += 1900;
55
56 my $fdate;
57
58 if ((defined $opt_mode) && ($opt_mode eq "highPrecision")) {
59 $fdate = sprintf("%02d%s%04d[%02d:%0d2.%02d]",
60 $mday, $mabbr[$mon],$year,$hour,$min,$sec);
61 }
62 else {
63 $fdate = sprintf("%02d%s%04d[%02d:%0d2]",
64 $mday, $mabbr[$mon],$year,$hour,$min);
65 }
66
67 return $fdate;
68}
69
[24939]70sub convertStyleToAttr
71{
72 my ($css_attr) = @_;
73
74 my $exp_attr = {};
75
76 # load up some defaults for font information
[24941]77 my $exp_font_family = "s"; # t
78 my $exp_font_face = "r";
[26727]79 my $exp_font_size = "18";
[24939]80
81# if (defined $css_attr->{'font-family'}) {
82# $font_family = $font_family_lookup->[$css_attr->{'font-family'}];
83# }
84
[24941]85 if (defined $css_attr->{'font-size'}) {
[26727]86
87 my $css_font_size = $css_attr->{'font-size'};
88 $exp_font_size = CssStyleToExpAttr::convert_font_size($css_font_size);
89
90 print STDERR "CSS Font Size is defined: ".$css_font_size."\n";
91 print STDERR "Equivalent expeditee font size: ".$exp_font_size."\n";
[24941]92 }
[26727]93
94 if(defined $css_attr->{'font-family'}){
95 my $obtain_font_family = $css_attr->{'font-family'};
96 print STDERR "Font family attribute found: ".$obtain_font_family."\n";
97 my $new_exp_font_family = CssStyleToExpAttr::convert_font_family($obtain_font_family);
[24939]98
[26727]99 if(defined $new_exp_font_family){
100 $exp_font_family = $new_exp_font_family;
101 }
102 }
103
104 if(defined $css_attr->{'font-face'}){
105 print STDERR "Font face attribute found: ".$css_attr->{'font-face'}."\n";
106
107 #now need to call a method from CssStyleToExpAttr.pm to extract the matching
108 #font symbol
109 #s -
110 }
111
112 if(defined $css_attr->{'font-weight'}){
113 print STDERR "Font weight attribute found: ".$css_attr->{'font-weight'}."\n";
114 }
115
[24939]116# if (defined $css_attr->{'font-face'}) {
117# $font = conver_font_face($css_attr->{'font-face'});
118# }
119
[26727]120 $exp_attr->{'f'} = $exp_font_family.$exp_font_face.$exp_font_size;
121
122 print STDERR "**** final converted Expeditee font: ".$exp_attr->{'f'};
123
[24944]124 # background color
125
126
127 if (defined $css_attr->{'background-color'}) {
[26727]128 my $css_color = $css_attr->{'background-color'};
[24944]129
[26727]130 my $exp_color = CssStyleToExpAttr::convert_color($css_color);
[24944]131
[26727]132 $exp_attr->{'e'} = $exp_color;
[24944]133 }
134
[24939]135 return $exp_attr;
136}
137
138
[24934]139sub _nextFreeId
140{
141 my $self = shift @_;
142
143 my $items = $self->{'items'};
144 my $lines = $self->{'lines'};
145 my $constraints = $self->{'constraints'};
146
147 # Ids start at base of 1
148 return 1+(scalar(@$items) + scalar(@$lines) + scalar(@$constraints));
149}
150
151
152sub _addItem
153{
154 my $self = shift @_;
155 my ($type,$attr) = @_;
156
157 # By this point 'attr' is synonymous with being an item
158
159 my $items = $self->{'items'};
160
161 my $next_free_id = $self->_nextFreeId();
162
163 $attr->{'_type'} = $type;
164 $attr->{'_id'} = $next_free_id;
165
166 push(@$items,$attr);
167
168 return ($attr,$next_free_id);
169}
170
171
172
173sub _setBaseDefaultAttributes
174{
175 my $self = shift @_;
176 my ($attr) = @_;
177
178 $attr->{'o'} = $self->{'username'};
[26511]179 $attr->{'s'} = getFormattedDate("highPrecision");
[24934]180 $attr->{'Q'} = "0"; # gradient
181 $attr->{'v'} = "S"; # dot type
182}
183
184
185sub setPointDefaultAttributes
186{
187 my $self = shift @_;
188 my ($attr) = @_;
189
190 $self->_setBaseDefaultAttributes($attr);
191}
192
193sub setTextDefaultAttributes
194{
195 my $self = shift @_;
196 my ($attr) = @_;
197
198 $self->_setBaseDefaultAttributes($attr);
199
[25057]200 if(defined $attr->{'d'}){
201
202 }
203 else {
204 $attr->{'d'} = "0 0 0"; # black color
205 }
206
[24934]207}
208
209
210sub setRectPointDefaultAttributes
211{
212 my $self = shift @_;
213 my ($attr) = @_;
214
[25057]215
[24934]216 $self->setPointDefaultAttributes($attr);
217
[25057]218 if((defined $attr->{'d'}) && (defined $attr->{'h'})){
219
220 }
221 else {
222 $attr->{'d'} = "80 80 80"; # grey color for rect lines
223 $attr->{'h'} = "1.0"; # line thickness
224 }
[24934]225}
226
227
228sub addRectPoint
229{
230 my $self = shift @_;
231 my ($x, $y, $attr) = @_;
232
233 my %attr_copy = %$attr; # make a private copy of 'attr'
234
235 $self->setRectPointDefaultAttributes(\%attr_copy);
236
237 my $items = $self->{'items'};
238
239 $attr_copy{'P'} = "$x $y";
240
241 return $self->_addItem("P",\%attr_copy);
242}
243
244sub addText
245{
246 my $self = shift @_;
[26596]247 my ($x,$y,$text,$w,$attr) = @_;
248
249 my %attr_copy = %$attr; #make a private copy of 'attr'
250
251 $self->setTextDefaultAttributes(\%attr_copy);
252 my $items = $self->{'items'};
253
254 $attr_copy{'P'} = "$x $y";
255 $attr_copy{'T'} = $text;
256 $attr_copy{'w'} = "-$w" if (defined $w);
257
258 return $self->_addItem("T",\%attr_copy);
[24934]259}
260
261sub addLine
262{
263 my $self = shift @_;
264
265 my ($item_id1,$item_id2) = @_;
266
267 my $lines = $self->{'lines'};
268 my $line_type = 1;
269
270 my $next_free_id = $self->_nextFreeId();
271
272 my $attr = { 'L' => "$next_free_id $line_type" };
273
274 $attr->{'s'} = "$item_id1 $item_id2";
275
276 push(@$lines,$attr);
277
278 return ($attr,$next_free_id);
279}
280
281
282sub addConstraint
283{
284 my $self = shift @_;
285
286 my ($orientation,$item_id1,$item_id2) = @_;
287
288 my $constraints = $self->{'constraints'};
289
290 my $orientation_type = undef;
291 if ($orientation eq "vertical") {
292 $orientation_type = 2;
293 }
294 else {
295 # assume horizontal for now
296 $orientation_type = 3;
297 }
298
299 my $next_free_id = $self->_nextFreeId();
300
301 my $attr = { 'C' => "$next_free_id $orientation_type" };
302
303 $attr->{'s'} = "$item_id1 $item_id2";
304
305 push(@$constraints,$attr);
306
307 return ($attr,$next_free_id);
308}
309
310
311sub addRect
312{
313 my $self = shift @_;
314
315 my ($xl, $yt, $xr, $yb, $attr) = @_;
316
317 # do point in same order Expeditee puts them in
318 my ($p_tr,$p_tr_id) = $self->addRectPoint($xr,$yt,$attr);
319 my ($p_tl,$p_tl_id) = $self->addRectPoint($xl,$yt,$attr);
320 my ($p_bl,$p_bl_id) = $self->addRectPoint($xl,$yb,$attr);
321 my ($p_br,$p_br_id) = $self->addRectPoint($xr,$yb,$attr);
322
323 my ($l_t,$l_t_id) = $self->addLine($p_tr_id,$p_tl_id);
324 my ($l_l,$l_l_id) = $self->addLine($p_tl_id,$p_bl_id);
325 my ($l_b,$l_b_id) = $self->addLine($p_bl_id,$p_br_id);
326 my ($l_r,$l_r_id) = $self->addLine($p_br_id,$p_tr_id);
327
328 my ($c_t,$c_t_id) = $self->addConstraint("horizontal",$p_tr_id,$p_tl_id);
329 my ($c_l,$c_l_id) = $self->addConstraint("vertical" ,$p_tl_id,$p_bl_id);
330 my ($c_b,$c_b_id) = $self->addConstraint("horizontal",$p_bl_id,$p_br_id);
331 my ($c_r,$c_r_id) = $self->addConstraint("vertical" ,$p_br_id,$p_tr_id);
332
333 $p_tr->{'l'} = "$l_t_id $l_r_id";
334 $p_tl->{'l'} = "$l_t_id $l_l_id";
335 $p_bl->{'l'} = "$l_l_id $l_b_id";
336 $p_br->{'l'} = "$l_b_id $l_r_id";
337
338 $p_tr->{'c'} = "$c_t_id $c_r_id";
339 $p_tl->{'c'} = "$c_t_id $c_l_id";
340 $p_bl->{'c'} = "$c_l_id $c_b_id";
341 $p_br->{'c'} = "$c_b_id $c_r_id";
342
343}
344
345sub writeHeaderSection
346{
347 my $self = shift @_;
348
349 # Example header:
350 # V 1
351 # p 4
352 # U davidb
353 # D 09Jan2012[13:33]
354 # M davidb
355 # d 09Jan2012[13:33]
356 # Z
357 #
358
359 # Legend:
360 # V = version
361 # p = permision level
362 # U = username (owner)
363 # M = last modified by
364 # D, d = date information
365 # Z => end of section
366
367
368 my $username = $self->{'username'};
369
[26511]370 my $fdate = getFormattedDate();
[24934]371
372 print FOUT "V 1\n";
373 print FOUT "p 4\n";
374 print FOUT "U $username\n";
375 print FOUT "D $fdate\n";
376 print FOUT "M $username\n";
377 print FOUT "d $fdate\n";
378 print FOUT "Z\n\n";
379
380}
381
382
383sub writeItemsSection
384{
385 my $self = shift @_;
386
387 my $items = $self->{'items'};
388
389 foreach my $item (@$items) {
[25057]390
[24934]391 my $type = delete $item->{'_type'};
[25057]392 my $id = delete $item->{'_id'};
[24934]393
[25057]394 if(defined($type) && defined($id)) {
395
396 print FOUT "S $type $id\n";
397
398 foreach my $a (keys %$item) {
399 print FOUT "$a ", $item->{$a}, "\n";
400 }
[24934]401
[25057]402 print FOUT "\n";
[24934]403
[25057]404 }
405
406 }
407
[24934]408 print FOUT "Z\n\n";
409}
410
411sub writeLinesSection
412{
413 my $self = shift @_;
414
415 my $lines = $self->{'lines'};
416
417 foreach my $line (@$lines) {
418
419 print FOUT "L ", $line->{'L'}, "\n";
420 print FOUT "s ", $line->{'s'}, "\n";
421
422 print FOUT "\n";
423 }
424
425 print FOUT "Z\n\n";
426
427}
428
429sub writeConstraintsSection
430{
431 my $self = shift @_;
432
433 my $constraints = $self->{'constraints'};
434
435 foreach my $constraint (@$constraints) {
436 print FOUT "C ", $constraint->{'C'}, "\n";
437 print FOUT "s ", $constraint->{'s'}, "\n";
438
439 print FOUT "\n";
440 }
441
442 print FOUT "Z\n\n";
443}
444
445sub writeStatisticsSection
446{
447 my $self = shift @_;
448
449 # Currently do nothing
450}
451
[26596]452sub saveZeroFrame
453{
454 my $self = shift @_;
455 my $file = "0.exp";
456
457 my $filename = &util::filename_cat($self->{'output_dir'},$file);
458
459 my $status = undef;
460
461 my $username = $self->{'username'};
462 my $fdate = getFormattedDate();
463
464 if (open(FOUT,">$filename")) {
465 binmode(FOUT,":utf8");
466
467 print FOUT <<EOT;
468
469V 1
470p 4
471U $username
472D $fdate
473M $username
474d $fdate
475Z
476
477Z
478
479Z
480
481Z
482
483EOT
484
485 close(FOUT);
486 $status = 1;
487 }
488 else {
489 print STDERR "ExpediteeFrameIO::saveZeroFrame() Failed to open $filename for output\n";
490 $status = 0;
491 }
492
493 return $status;
494}
495
496sub writeAssocFilePath
497{
498 my $self = shift @_;
499 my ($assoc) = @_;
500
501 my $x = 318;
502 my $y = 123;
503 my $text = "\@assocfilepath: $assoc";
504
505 my $attr = {};
506
507 #add data: gsdl.Metadata: assocfilepath to this piece of text.
508 $attr->{'D'} = "gsdl.Metadata: assocfilepath";
509
510 $self->addText($x,$y,$text,undef,$attr);
511}
512
[24934]513sub saveFrame
514{
515 my $self = shift @_;
[26596]516 my ($file,$assoc) = @_;
[24934]517
[26596]518 if ($file eq "1.exp") {
519 $self->saveZeroFrame();
520 }
521
[24938]522 my $filename = &util::filename_cat($self->{'output_dir'},$file);
523
[24934]524 my $status = undef;
525
526 if (open(FOUT,">$filename")) {
527 binmode(FOUT,":utf8");
[26596]528
[26695]529 if(defined $assoc){
530 $self->writeAssocFilePath($assoc); #write assocfilepath out to frame.
531 }
[26596]532
[24934]533 $self->writeHeaderSection();
534 $self->writeItemsSection();
535 $self->writeLinesSection();
536 $self->writeConstraintsSection();
537 $self->writeStatisticsSection();
[26596]538
539
540
[24934]541 close(FOUT);
542 $status = 1;
543 }
544 else {
[24938]545 print STDERR "ExpediteeFrameIO::saveFrame() Failed to open $filename for output\n";
[24934]546 $status = 0;
547 }
548
549 return $status;
550}
551
552sub buildFrame
553{
554 my $self = shift @_;
555 my ($html_node) = @_;
556
557 my $type = $html_node->{'type'};
558
559 if ($type eq "rect") {
560
561 my $rect = $html_node->{'rect'};
562 my $xl = $rect->{'xl'};
563 my $xr = $rect->{'xr'};
564 my $yt = $rect->{'yt'};
565 my $yb = $rect->{'yb'};
566
[24939]567 my $attr = convertStyleToAttr($html_node->{'style'});
[24934]568
[25060]569 if (defined $html_node->{'attr'}) {
570 # values provided in 'attr' explicitly overwrite any values
571 # derived from CSS style
572
573 my $direct_attr_str = $html_node->{'attr'};
574 my @direct_attr_array = split(/\s*;\s*/,$direct_attr_str);
575 foreach my $da (@direct_attr_array) {
576 my ($key,$val) = ($da =~ m/^(.)\s*(.*)$/);
577 $attr->{$key} = $val;
578 }
579 }
[26727]580
581 #don't want to add font information to non-text items!
582 my $deleted = delete $attr->{'f'};
583
584 print STDERR "**** DELETED: $deleted \n";
[25060]585
[24934]586 $self->addRect($xl,$yt,$xr,$yb,$attr);
587
[24941]588 if (defined $html_node->{'img'}) {
589
590 my $img_url = $html_node->{'img'};
[24944]591 $img_url =~ s/^http:\/\/(.*?)\/greenstone3(.*?)\///;
[24941]592 if ($img_url =~ m/^interfaces\//) {
[26596]593 $img_url = "greenstone3-svn/web/$img_url";
[24941]594 }
595 elsif ($img_url =~ m/^sites\//) {
[26596]596# if ($img_url =~ m/^sites\//) {
597# $img_url =~ s/^sites\/(.*?)\//images\//;
598 $img_url = "greenstone3-svn/web/$img_url";
[24941]599 }
600
601 my $x = $xl;
602 my $y = $yt;
603
604 my $attr = {};
605
606 my $img_text = "\@i: $img_url";
607
608 $self->addText($x,$y,$img_text,undef,$attr);
609 }
610
[24934]611 }
612 elsif ($type eq "text") {
[26727]613
[24934]614 my $text = $html_node->{'text'};
615
[24941]616 my $x = $html_node->{'xl'};
617 my $y = $html_node->{'yt'};
618 my $w = $html_node->{'xr'} - $x +1;
[24934]619
[26727]620 print STDERR "**** CHECKING STYLE NODE: ".$html_node->{'style'}."\n";
[24939]621 my $attr = convertStyleToAttr($html_node->{'style'});
[26727]622
623 #DEBUGGING
624 if(defined $attr->{'f'}){
625 print STDERR "***** Checking text attributes:".$attr->{'f'}."\n";
626 print STDERR "****************************************\n";
627 }
628
[24941]629 # fudge factor for now (based on default font size used)
630 $y += 16; # y-value of text item in Expeditee is it's base line
631 $x += 4;
632
[25057]633 my $data = $html_node->{'data'};
634 $attr->{'D'} = $data if defined $data;
635
[24941]636 $self->addText($x,$y,$text,$w,$attr);
[24934]637 }
638 else {
639 print STDERR "ExpediteeFrameIO::buildFrame(): Warning, unrecognized type '$type'\n";
640 }
641
642 my $childNodes = $html_node->{'childNodes'};
643 foreach my $child_node (@$childNodes) {
644 $self->buildFrame($child_node);
645 }
646}
647
648
[24938]649sub saveLastFrameNumber
[24934]650{
651 my $self = shift @_;
[26597]652 my ($last_frame_number,$collect) = @_;
[24934]653
[24938]654 my $filename = &util::filename_cat($self->{'output_dir'},"frame.inf");
655
656 my $status = undef;
657
658 if (open(FNOUT,">$filename")) {
[24939]659 binmode(FNOUT,":utf8");
[25057]660
661 #writes frameset name concatenated with last frame number in the set to the frame.inf file.
[26597]662 # my $getFramesetName = $self->{'output_dir'};
[24938]663
[26597]664 #use collection name rather than the directory name where the frameset is stored, when saving the last frame name/number to the frame.inf file.
665 print FNOUT "$collect"."$last_frame_number";
666
[24938]667 close(FNOUT);
668 $status = 1;
669 }
670 else {
671 print STDERR "ExpediteeFrameIO::saveLastFrameNumber() Failed to open $filename for output\n";
[26597]672
[24938]673 $status = 0;
674 }
675
676 return $status;
677
[24934]678}
679
6801;
Note: See TracBrowser for help on using the repository browser.