Changeset 1700 for trunk/gsdl/perllib
- Timestamp:
- 2000-11-27T08:39:17+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/PSPlug.pm
r1685 r1700 27 27 28 28 use ConvertToPlug; 29 use sorttools; 29 30 30 31 sub BEGIN { … … 40 41 my $self = new ConvertToPlug ($class, ("-convert_to","text",@_), "--", "-title_sub", 'Page\s+\d+'); 41 42 43 if (!parsargv::parse(\@_, 44 q^extract_date^, \$self->{'extract_date'}, 45 q^extract_pages^, \$self->{'extract_pages'}, 46 q^extract_title^, \$self->{'extract_title'}, 47 "allow_extra_options")) { 48 print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n"; 49 &print_usage(); 50 die "\n"; 51 } 52 42 53 return bless $self, $class; 54 } 55 56 sub print_usage { 57 print STDERR "\n usage: plugin PSPlug [options]\n\n"; 58 print STDERR " options:\n"; 59 print STDERR " -extract_date Extract date from PS header\n"; 60 print STDERR " -extract_pages Extract pages from PS header\n"; 61 print STDERR " -extract_title Extract title from PS header\n"; 43 62 } 44 63 … … 55 74 return q^(?i)\.ps$^; 56 75 } 57 76 77 sub extract_metadata_from_postscript { 78 my $self = shift (@_); 79 my $filename = shift (@_); 80 my $doc = shift (@_); 81 my $section = $doc->get_top_section(); 82 83 my $title_found = 0; 84 my $pages_found = 0; 85 my $date_found = 0; 86 87 print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n" 88 if $self->{'verbosity'} > 1; 89 90 open(INPUT, "<$filename"); 91 my $date; 92 93 while(my $line =<INPUT>) { 94 if ($self->{'extract_title'} && !$title_found) { 95 foreach my $word ($line =~ m|Title: ([-A-Za-z0-9@/\/\(\):,. ]*)|g) { 96 if ($word ne "") { 97 $doc->add_utf8_metadata($section, "Title", $word ); 98 print "\"$word\" ===>>> \"Title\" = \"$word\"\n"; 99 $title_found = 1; 100 } 101 } 102 } 103 if ($self->{'extract_date'} && !$date_found) { 104 foreach my $word ($line =~ m/(Creation[-A-Za-z0-9@\/\(\):,. ]*)/g) { 105 if ($word =~ m/ ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9]) ?[0-9: ]+ ([0-9]{4})/) { 106 $date = &sorttools::format_date($2,$1,$3); 107 $doc->add_utf8_metadata($section, "Date", $date ); 108 print "\"$word\" ===>>> \"Date\" = \"$date\"\n"; 109 } 110 if ($word =~ m/D:([0-9]{4})([0-9]{2})([0-9]{2})[0-9]{6}\)/) { 111 $date = &sorttools::format_date($3,$2,$1); 112 $doc->add_utf8_metadata($section, "Date", $date ); 113 print "\"$word\" ===>>> \"Date\" = \"$date\"\n"; 114 } 115 if ($word =~ m/CreationDate: ([0-9]{4}) ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9]) [0-9:]*/) { 116 $date = &sorttools::format_date($3,$2,$1); 117 $doc->add_utf8_metadata($section, "Date", $date ); 118 print "\"$word\" ===>>> \"Date\" = \"$date\"\n"; 119 } 120 $date_found = 1; 121 } 122 } 123 if ($self->{'extract_pages'} && !$pages_found) { 124 foreach my $word ($line =~ m/(Pages: [0-9]*)/g) { 125 my $digits = $word; 126 $digits =~ s/[^0-9]//g; 127 if ($digits ne "" && $digits ne "0") { 128 $doc->add_utf8_metadata($section, "Pages", $digits ); 129 print "\"$word\" ===>>> \"Pages\" = $digits\n"; 130 $pages_found = 1; 131 } 132 } 133 } 134 } 135 } 136 58 137 59 138 # do plugin specific processing of doc_obj for HTML type 60 139 sub process { 61 140 my $self = shift (@_); 141 my ($trash, $trash2, $path, $file, $trash3, $doc) = @_; 62 142 63 143 print STDERR "PSPlug: passing $_[3] onto $self->{'convert_to'} Plug\n" 64 144 if $self->{'verbosity'} > 1; 65 145 146 &extract_metadata_from_postscript($self,"$path/$file", $doc); 147 66 148 return ConvertToPlug::process_type($self,"ps",@_); 67 149 }
Note:
See TracChangeset
for help on using the changeset viewer.