source: main/trunk/model-sites-dev/pei-jones/collect/written-works/perllib/plugins/PJPlugin.pm@ 31940

Last change on this file since 31940 was 31940, checked in by kjdon, 3 years ago

we are actually processing the tifs now

File size: 5.2 KB
Line 
1package PJPlugin;
2
3use PagedImagePlugin;
4
5use strict;
6no strict 'refs'; # allow filehandles to be variables and viceversa
7no strict 'subs';
8
9sub BEGIN {
10 @PJPlugin::ISA = ('PagedImagePlugin');
11}
12
13my $arguments = [];
14
15my $options = { 'name' => "PJPlugin",
16 'desc' => "PagedImagePlugin variant for pei-jones written-works collection",
17 'abstract' => "no",
18 'inherits' => "yes",
19 'args' => $arguments };
20
21sub new {
22 my ($class) = shift (@_);
23 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
24 push(@$pluginlist, $class);
25
26 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
27 push(@{$hashArgOptLists->{"OptList"}},$options);
28
29 my $self = new PagedImagePlugin($pluginlist, $inputargs, $hashArgOptLists);
30
31 return bless $self, $class;
32}
33
34
35sub process_item {
36 my $self = shift (@_);
37 my ($filename_full_path, $dir, $filename_no_path, $processor, $metadata) = @_;
38
39 my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
40 $self->set_initial_doc_fields($doc_obj, $filename_full_path, $processor, $metadata);
41 my $topsection = $doc_obj->get_top_section();
42 # simple item files are always paged unless user specified
43 if ($self->{'documenttype'} eq "auto") {
44 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "paged");
45 } else {
46 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", $self->{'documenttype'});
47 }
48 open (ITEMFILE, "<:encoding(UTF-8)", $filename_full_path) || die "couldn't open $filename_full_path\n";
49 my $line = "";
50 my $num = 0;
51 while (defined ($line = <ITEMFILE>)) {
52
53 next unless $line =~ /\w/;
54 chomp $line;
55 next if $line =~ /^#/; # ignore comment lines
56 if ($line =~ /^<([^>]*)>\s*(.*?)\s*$/) {
57 my $meta_name = $1;
58 my $meta_value = $2;
59 #if ($meta_name =~ /\./) {
60 # $meta_name = "ex.$meta_name";
61 # }
62 # PJ mod:
63 if ($meta_value !~ /^unknown$/) {
64 # don't add in unknown values
65 # set all metadata at pj.
66 $doc_obj->set_utf8_metadata_element ($topsection, "pj.".$meta_name, $meta_value);
67 }
68 #$meta->{$1} = $2;
69 } else {
70 $num++;
71 # line should be like page:imagefilename:textfilename:r - the r is optional -> means rotate the image 180 deg
72 $line =~ s/^\s+//; #remove space at the front
73 $line =~ s/\s+$//; #remove space at the end
74 my ($pagenum, $imgname, $txtname, $rotate) = split /:/, $line;
75
76 # PJ: use jpg versions instead of tif versions
77 #$imgname =~ s/tif/jpg/g;
78 #print STDERR "new img name=$imgname\n";
79 # create a new section for each image file
80 my $cursection = $doc_obj->insert_section($doc_obj->get_end_child($topsection));
81 # the page number becomes the Title
82 $doc_obj->set_utf8_metadata_element($cursection, 'Title', $pagenum);
83
84 # process the image for this page if there is one
85 if (defined $imgname && $imgname ne "") {
86 my $result1 = $self->process_image($dir.$imgname, $imgname, $doc_obj, $cursection, $rotate);
87 if (!defined $result1)
88 {
89 print "PagedImagePlugin: couldn't process image \"$dir$imgname\" for item \"$filename_full_path\"\n";
90 }
91 }
92 # process the text file if one is there
93 if (defined $txtname && $txtname ne "") {
94 my $result2 = $self->process_text ($dir.$txtname, $txtname, $doc_obj, $cursection);
95
96 if (!defined $result2) {
97 print "PagedImagePlugin: couldn't process text file \"$dir.$txtname\" for item \"$filename_full_path\"\n";
98 $self->add_dummy_text($doc_obj, $cursection);
99 }
100 } else {
101 # otherwise add in some dummy text
102 $self->add_dummy_text($doc_obj, $cursection);
103 }
104 }
105 }
106
107 close ITEMFILE;
108
109 # add numpages metadata
110 $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', "$num");
111
112 $doc_obj->set_utf8_metadata_element($topsection,"MaxImageWidth",$self->{'MaxImageWidth'});
113 $doc_obj->set_utf8_metadata_element($topsection,"MaxImageHeight",$self->{'MaxImageHeight'});
114 $self->{'MaxImageWidth'} = undef;
115 $self->{'MaxImageHeight'} = undef;
116
117
118 return $doc_obj;
119}
120
121sub scan_item_for_files_to_block
122{
123 my $self = shift (@_);
124 my ($filename_full_path, $dir, $block_hash) = @_;
125
126
127 open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path to work out which files to block\n";
128 my $line = "";
129 while (defined ($line = <ITEMFILE>)) {
130 next unless $line =~ /\w/;
131 chomp $line;
132 next if $line =~ /^#/; # ignore comment lines
133 next if ($line =~ /^<([^>]*)>\s*(.*?)\s*$/); # ignore metadata lines
134 # line should be like page:imagefilename:textfilename:r
135 $line =~ s/^\s+//; #remove space at the front
136 $line =~ s/\s+$//; #remove space at the end
137 my ($pagenum, $imgname, $txtname, $rotate) = split /:/, $line;
138
139 # PJ: use jpg versions instead of tif versions
140 #$imgname =~ s/tif/jpg/g;
141
142 # find the image file if there is one
143 if (defined $imgname && $imgname ne "") {
144 $self->block_raw_filename($block_hash, &FileUtils::filenameConcatenate( $dir,$imgname));
145 }
146 # find the text file if there is one
147 if (defined $txtname && $txtname ne "") {
148 $self->block_raw_filename($block_hash, &FileUtils::filenameConcatenate($dir,$txtname));
149 }
150 }
151 close ITEMFILE;
152
153}
154
1551;
Note: See TracBrowser for help on using the repository browser.