Changeset 317
- Timestamp:
- 1999-06-30T15:36:35+12:00 (25 years ago)
- Location:
- trunk/gsdl/perllib/plugins
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/ArcPlug.pm
r285 r317 1 # plugin which recurses through directories processing 2 # each file it finds 1 # plugin which recurses through an archives.inf file 2 # (i.e. the file generated in the archives directory 3 # when an import is done), processing each file it finds 3 4 4 5 package ArcPlug; … … 27 28 } 28 29 29 # return 1 if processed, 0 if not processed30 # return number of files processed, undef if can't process 30 31 # Note that $base_dir might be "" and that $file might 31 32 # include directories 32 33 sub read { 33 34 my $self = shift (@_); 34 ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 35 # my $count = 0; 35 ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 36 37 my $count = 0; 36 38 37 39 # see if this has a archives information file within it … … 39 41 40 42 if (-e $archive_info_filename) { 43 44 # found an archives.inf file 45 print STDERR "ArcPlug: processing $archive_info_filename\n"; 41 46 42 47 # read in the archives information file … … 48 53 # process each file 49 54 foreach $subfile (@$file_list) { 50 #### # note: metadata is not carried on to the next level - why ??? - I changed this 51 # so I could pass the classifytype from mgbuilder - Stefan. 55 last if (defined $maxdocs && $maxdocs =~ /\d/ && $count >= $maxdocs); 56 52 57 my $tmp = &util::filename_cat ($file, $subfile->[0]); 53 58 next if $tmp eq $file; 54 &plugin::read ($pluginfo, $base_dir, $tmp, $metadata, $processor); 55 # $count ++; 56 # last if $count > 200; 57 } 58 59 # all books have been processed so need to output classifications 60 # to infodb - note that at present you have to import before building 61 if (defined $processor->{'mode'} && $processor->{'mode'} eq 'infodb') { 62 print STDERR "ArcPlug: Adding classifications to infodb\n"; 63 $processor->process('classifications'); 59 # note: metadata is not carried on to the next level 60 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs); 64 61 } 65 62 66 return 1;63 return $count; 67 64 } 68 65 69 66 # wasn't an archives directory, someone else will have to process it 70 return 0;67 return undef; 71 68 } 72 69 -
trunk/gsdl/perllib/plugins/BasPlug.pm
r4 r317 17 17 } 18 18 19 # return 1 if processed, 0 if not processed19 # return number of files processed, undef if can't process 20 20 # Note that $base_dir might be "" and that $file might 21 21 # include directories 22 22 sub read { 23 23 my $self = shift (@_); 24 my ($pluginfo, $base_dir, $file, $metadata, $processor ) = @_;24 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 25 25 26 26 die "BasPlug::read function must be implemented in sub classes\n"; 27 27 28 return 0; # will never get here28 return undef; # will never get here 29 29 } 30 30 -
trunk/gsdl/perllib/plugins/FOXPlug.pm
r168 r317 32 32 33 33 34 # return 1 if processed, 0 if not processed34 # return number of files processed, undef if can't process 35 35 # Note that $base_dir might be "" and that $file might 36 36 # include directories … … 41 41 42 42 # dbt files are processed at the same time as dbf files 43 return 1if ($fullname =~ /\.dbt$/i);43 return 0 if ($fullname =~ /\.dbt$/i); 44 44 45 45 # see if this is a foxbase database 46 return 0unless (-f $fullname && $fullname =~ /\.dbf$/i);46 return undef unless (-f $fullname && $fullname =~ /\.dbf$/i); 47 47 48 48 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; … … 51 51 if (!open (FOXBASEIN, $fullname)) { 52 52 print STDERR "FOXPlug::read - couldn't read $fullname\n"; 53 return 0;53 return undef; 54 54 } 55 55 … … 63 63 print STDERR "FOXPlug::read - eof while reading database header"; 64 64 close (FOXBASEIN); 65 return 0;65 return undef; 66 66 } 67 67 … … 79 79 } else { 80 80 print STDERR "FOXPlug:read $fullname doesn't seem to be a Foxbase file\n"; 81 return 0;81 return undef; 82 82 } 83 83 … … 109 109 print STDERR "FOXPlug::read - couldn't read $dbtfullname\n"; 110 110 close (FOXBASEIN); 111 return 0;111 return undef; 112 112 } 113 113 -
trunk/gsdl/perllib/plugins/GMLPlug.pm
r245 r317 38 38 } 39 39 40 41 # return 1 if processed, 0 if not processed 40 # return number of files processed, undef if can't process 42 41 # Note that $base_dir might be "" and that $file might 43 42 # include directories … … 48 47 49 48 # see if this is a gml book 50 return 0unless (-f $fullname && $fullname =~ /\.gml$/i);49 return undef unless (-f $fullname && $fullname =~ /\.gml$/i); 51 50 52 51 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+.gml$/; … … 63 62 if (!open (INFILE, $fullname)) { 64 63 print STDERR "GMLPlug::read - couldn't read $fullname\n"; 65 return 0;64 return undef; 66 65 } 67 66 while (defined ($line = <INFILE>)) { -
trunk/gsdl/perllib/plugins/HBPlug.pm
r288 r317 104 104 $section =~ s/(.{1,80})\s/$1\n/g; 105 105 106 # fix up the image links -- not sure what Rodger intended here - Stefan. 107 # $section =~ s/<img[^>]*?src=\"?([^\">]+)\"?[^>]*>/<img src=\"_linkOID_(_thisOID_\/$1)\"><br>/ig; 108 # $section =~ s/<<I>>\s*([^\.]+\.(png|jpg|gif))/<img src=\"_linkOID_(_thisOID_\/$1)\"><br>/ig; 109 110 $section =~ s/<img[^>]*?src=\"?([^\">]+)\"?[^>]*>/<center><img src=\"_httpcollection_\/archives\/_thisOID_\/$1\"><\/center><br>/ig; 111 $section =~ s/<<I>>\s*([^\.]+\.(png|jpg|gif))/<center><img src=\"_httpcollection_\/archives\/_thisOID_\/$1\"><\/center><br>/ig; 106 # fix up the image links 107 $section =~ s/<img[^>]*?src=\"?([^\">]+)\"?[^>]*>/ 108 <center><img src=\"_httpcollection_\/archives\/_thisOID_\/$1\"><\/center><br>/ig; 109 $section =~ s/<<I>>\s*([^\.]+\.(png|jpg|gif))/ 110 <center><img src=\"_httpcollection_\/archives\/_thisOID_\/$1\"><\/center><br>/ig; 112 111 113 112 return $section; … … 126 125 127 126 128 # return 1 if processed, 0 if not processed127 # return number of files processed, undef if can't process 129 128 # Note that $base_dir might be "" and that $file might 130 129 # include directories … … 138 137 ($jobnumber) = $file =~ /[\\\/]([^\\\/]+)$/; 139 138 } 140 return 0unless defined $jobnumber;139 return undef unless defined $jobnumber; 141 140 my $htmlfile = &util::filename_cat($base_dir, $file, "$jobnumber.htm"); 142 return 0unless -e $htmlfile;141 return undef unless -e $htmlfile; 143 142 144 143 print STDERR "HBPlug: processing $file\n"; … … 155 154 if -e $bookcover; 156 155 156 my $cursection = $doc_obj->get_top_section(); 157 158 # add metadata for top level of document 159 foreach $field (keys(%$metadata)) { 160 # $metadata->{$field} may be an array reference 161 if (ref ($metadata->{$field}) eq "ARRAY") { 162 map { 163 $doc_obj->add_metadata ($cursection, $field, $_); 164 } @{$metadata->{$field}}; 165 } else { 166 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field}); 167 } 168 } 169 # need to add <classifytype> to each book as we'll be using 'Hierarchy' 170 # for HB collections rather than the default ('Book') 171 $doc_obj->add_metadata ($cursection, 'classifytype', 'Hierarchy'); 172 173 157 174 # process the file one section at a time 158 175 my $curtoclevel = 1; 159 my $cursection = $doc_obj->get_top_section();160 176 my $firstsection = 1; 161 177 while (length ($html) > 0) { … … 195 211 $doc_obj->add_metadata ($cursection, "Title", $title); 196 212 197 if ($firstsection) {198 foreach $field (keys(%$metadata)) {199 # $metadata->{$field} may be an array reference200 if (ref ($metadata->{$field}) eq "ARRAY") {201 map {202 $doc_obj->add_metadata ($cursection, $field, $_);203 } @{$metadata->{$field}};204 } else {205 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});206 }207 }208 $firstsection = 0;209 }210 211 213 # clean up the section html 212 214 $sectiontext = $self->HB_clean_section($sectiontext); 213 215 214 216 # associate any files 215 # map { $doc_obj->associate_file("$base_dir$file/$1", $1)216 # if /_linkOID_\(_thisOID_\/([^\)]+)\)/; 0; }217 # split (/(_linkOID_\(_thisOID_\/[^\)]+\))/, $sectiontext);218 219 217 map { $doc_obj->associate_file(&util::filename_cat ($base_dir, $file, $1), $1) 220 218 if /_httpcollection_\/archives\/_thisOID_\/([^\"]+)\"/; 0; } … … 229 227 last; 230 228 } 229 $firstsection = 0; 231 230 } 232 231 -
trunk/gsdl/perllib/plugins/IndexPlug.pm
r286 r317 52 52 } 53 53 54 # return 1 if processed, 0 if not processed 54 55 # return number of files processed, undef if can't process 55 56 # Note that $base_dir might be "" and that $file might 56 57 # include directories 57 58 sub read { 58 59 my $self = shift (@_); 59 my ($pluginfo, $base_dir, $file, $metadata, $processor ) = @_;60 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 60 61 61 62 my $indexfile = &util::filename_cat($base_dir, $file, "index.txt"); 62 63 if (!-f $indexfile) { 63 64 # not a directory containing an index file 64 return 0;65 return undef; 65 66 } 66 67 … … 72 73 my @fields = (); 73 74 # see if there's a 'key:' line 74 foreach $line (keys %$list) { 75 if ($line =~ /key:/i) { 76 @fields = @{$list->{$line}}; 77 last; 78 } 75 if (defined $list->{'key:'}) { 76 @fields = @{$list->{'key:'}}; 79 77 } 80 78 81 79 # process each document 82 83 80 my $count = 0; 84 81 foreach $docfile (keys (%$list)) { 85 last if $count > 10; 86 $count ++; 82 last if (defined $maxdocs && $maxdocs =~ /\d/ && $count >= $maxdocs); 87 83 $metadata = {}; # at present we can do this as metadata 88 84 # will always be empty when it arrives 89 # at anyplugin - this might cause85 # at this plugin - this might cause 90 86 # problems if things change though 91 87 … … 108 104 } 109 105 } 110 &plugin::read ($pluginfo, $base_dir, $docfile, $metadata, $processor);106 $count += &plugin::read ($pluginfo, $base_dir, $docfile, $metadata, $processor, $maxdocs); 111 107 } 112 108 113 return 1; # was processed109 return $count; # was processed 114 110 } 115 111 -
trunk/gsdl/perllib/plugins/RecPlug.pm
r136 r317 27 27 } 28 28 29 # return 1 if processed, 0 if not processed 29 30 # return number of files processed, undef if can't process 30 31 # Note that $base_dir might be "" and that $file might 31 32 # include directories 32 33 sub read { 33 34 my $self = shift (@_); 34 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 35 35 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; 36 36 37 37 my (@dir, $subfile); 38 39 my $count = 0; 38 40 39 41 # see if this is a directory … … 47 49 closedir (DIR); 48 50 51 print STDERR "RecPlug: getting directory $dirname\n"; 52 49 53 # process each file 50 54 foreach $subfile (@dir) { 55 last if (defined $maxdocs && $maxdocs =~ /\d/ && $count >= $maxdocs); 56 51 57 if ($subfile !~ /^\.\.?$/) { 52 58 # note: metadata is not carried on to the next level 53 &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile),54 {}, $processor);59 $count += &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile), 60 {}, $processor, $maxdocs); 55 61 } 56 62 } 57 58 return 1; 63 return $count; 59 64 } 60 65 61 66 # wasn't a directory, someone else will have to process it 62 return 0;67 return undef; 63 68 } 64 69
Note:
See TracChangeset
for help on using the changeset viewer.