source: trunk/BBC_2005/collect/bbc/perllib/plugins/BRSPlug.pm@ 10021

Last change on this file since 10021 was 10021, checked in by chi, 19 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 28.8 KB
Line 
1
2# plugin which processes an the BBC Sound archive exported in an ASCII format
3
4package BRSPlug;
5
6use BasPlug;
7use ghtml;
8
9sub BEGIN
10{
11 @ISA = ('BasPlug');
12 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
13}
14
15my $bbc_collections_list =
16 [{'name' => "all", # all collections
17 'desc' => "{BRSPlug.bbc_collections.all}"},
18 {'name' => "nsa", # National Sound Archive
19 'desc' => "{BRSPlug.bbc_collections.nsa"},
20 {'name' => "bfi", # Brisith File Institute
21 'desc' => "{BRSPlug.bbc_collections.bfi"},
22 {'name' => "bbcother", # BBC Other
23 'desc' => "{BRSPlug.bbc_collections.bbcother"}];
24
25
26my $arguments =
27 [{ 'name' => "bbc_collections",
28 'desc' => "{BRSPlug.bbc_collections}",
29 #'type' => $bbc_collections_list,
30 'type' => "string",
31 'reqd' => "no",
32 'deft' => "all"}];
33
34%Exclude = ();
35
36my $options = { 'name' => "BRSPlug",
37 'desc' => "{BRSPlug.desc}",
38 'abstract' => "yes",
39 'inherits' => "no",
40 'args' => $arguments };
41
42# Note: sext is short for scrollable text
43
44#field_id label disp_type crit_type width height brs_para
45#----------------------------------------------------------------
46#10 sport prdcr text text 30 1 T006
47#1 text sext text 40 16 TEXT
48#16 scol ref text text text 30 1 SCOL
49#19 date text date 10 1 D001
50#25 Item title text text 20 1 T001
51#26 B/W Seqs text text 30 1 S001
52#27 Medium text text 30 1 S002
53#28 shot type text text 30 1 S003
54#29 B/W Prog/Item text text 30 1 S004
55#30 Seqs text text 10 1 S005
56#31 Format text text 30 1 S006
57#32 prog/item text text 30 1 S007
58#35 reg libno text text 30 1 S008
59#36 reg tape no text text 30 1 S009
60#37 C Indicator text text 30 1 S010
61#38 C Holder text text 30 1 T002
62#39 cprd ind text text 30 1 S011
63#40 cprd name text text 30 1 T003
64#44 reference sext text 30 5 R001
65#49 prog num text text 10 1 PRNO
66#59 news title text text 30 1 T005
67#65 cat num text text 10 1 CATN
68#72 Record number text text 30 1 T007
69#73 rx_date text date 10 1 S013
70#74 Music performer text text 30 2 T008
71#75 Composer text text 30 2 T009
72#76 mono/stereo text text 30 1 S014
73#77 music medium text text 30 1 S015
74#82 Sound Tech text text 30 1 S016
75#83 Music publisher text text 30 1 S017
76#84 music tape text text 30 1 T012
77#89 catalogue text text 3 1 CODE
78#104 subcat text text 10 1 SUBC
79#106 prog title text text 40 1 TTLE
80#109 screen type text text 10 1 DOCT
81#110 stock title text text 30 1 T013
82#113 stock ln 4 text text 30 1 T015
83#114 stock lib 1 text text 30 1 S020
84#115 stock lib 2 text text 30 1 S021
85#----------------------------------------------------------------
86
87
88my @brs_field_table
89 = (
90 { field_name => "TTLE", field_id => 106, label => "Prog title",
91 disp_type => "text", crit_type => "text",
92 width => 40, height => 1 },
93 { field_name => "T001", field_id => 25, label => "Item title",
94 disp_type => "text", crit_type => "text",
95 width => 20, height => 1 },
96 { field_name => "T005", field_id => 59, label => "News title",
97 disp_type => "text", crit_type => "text",
98 width => 30, height => 1 },
99 { field_name => "T013", field_id => 110, label => "Stock title",
100 disp_type => "text", crit_type => "text",
101 width => 30, height => 1 },
102 { field_name => "T007", field_id => 72, label => "Record number",
103 disp_type => "text", crit_type => "text",
104 width => 30, height => 1 },
105 { field_name => "RFTI", field_id => 116, label => "RFTI",
106 disp_type => "text", crit_type => "text",
107 width => 30, height => 1 },
108 { field_name => "TEXT", field_id => 1, label => "Text",
109 disp_type => "scrl", crit_type => "text",
110 width => 40, height => 16 },
111 { field_name => "T006", field_id => 10, label => "Sport prdcr",
112 disp_type => "text", crit_type => "text",
113 width => 30, height => 1 },
114 { field_name => "SCOL", field_id => 16, label => "Scol ref text",
115 disp_type => "text", crit_type => "text",
116 width => 30, height => 1 },
117 { field_name => "D001", field_id => 19, label => "Date",
118 disp_type => "text", crit_type => "date",
119 width => 10, height => 1 },
120 { field_name => "S001", field_id => 26, label => "B/W Seqs",
121 disp_type => "text", crit_type => "text",
122 width => 30, height => 1 },
123 { field_name => "S002", field_id => 27, label => "Medium",
124 disp_type => "text", crit_type => "text",
125 width => 30, height => 1 },
126 { field_name => "S003", field_id => 28, label => "Shot type",
127 disp_type => "text", crit_type => "text",
128 width => 30, height => 1 },
129 { field_name => "S004", field_id => 29, label => "B/W Prog/Item",
130 disp_type => "text", crit_type => "text",
131 width => 30, height => 1 },
132 { field_name => "S005", field_id => 30, label => "Seqs",
133 disp_type => "text", crit_type => "text",
134 width => 10, height => 1 },
135 { field_name => "S006", field_id => 31, label => "Format",
136 disp_type => "text", crit_type => "text",
137 width => 30, height => 1 },
138 { field_name => "S007", field_id => 32, label => "Prog/item",
139 disp_type => "text", crit_type => "text",
140 width => 30, height => 1 },
141 { field_name => "S008", field_id => 35, label => "Reg libno",
142 disp_type => "text", crit_type => "text",
143 width => 30, height => 1 },
144 { field_name => "S009", field_id => 36, label => "Reg tape no",
145 disp_type => "text", crit_type => "text",
146 width => 30, height => 1 },
147 { field_name => "S010", field_id => 37, label => "C Indicator",
148 disp_type => "text", crit_type => "text",
149 width => 30, height => 1 },
150 { field_name => "T002", field_id => 38, label => "C Holder",
151 disp_type => "text", crit_type => "text",
152 width => 30, height => 1 },
153 { field_name => "S011", field_id => 39, label => "Cprd ind",
154 disp_type => "text", crit_type => "text",
155 width => 30, height => 1 },
156 { field_name => "T003", field_id => 40, label => "Cprd name",
157 disp_type => "text", crit_type => "text",
158 width => 30, height => 1 },
159 { field_name => "R001", field_id => 44, label => "Reference",
160 disp_type => "scrl", crit_type => "text",
161 width => 30, height => 5 },
162 { field_name => "PRNO", field_id => 49, label => "Prog num",
163 disp_type => "text", crit_type => "text",
164 width => 10, height => 1 },
165 { field_name => "CATN", field_id => 65, label => "Cat num",
166 disp_type => "text", crit_type => "text",
167 width => 10, height => 1 },
168 { field_name => "S013", field_id => 73, label => "Rx_date",
169 disp_type => "text", crit_type => "date",
170 width => 10, height => 1 },
171 { field_name => "T008", field_id => 74, label => "Music performer",
172 disp_type => "text", crit_type => "text",
173 width => 30, height => 2 },
174 { field_name => "T009", field_id => 75, label => "Composer",
175 disp_type => "text", crit_type => "text",
176 width => 30, height => 2 },
177 { field_name => "S014", field_id => 76, label => "Mono/stereo",
178 disp_type => "text", crit_type => "text",
179 width => 30, height => 1 },
180 { field_name => "S015", field_id => 77, label => "Music medium",
181 disp_type => "text", crit_type => "text",
182 width => 30, height => 1 },
183 { field_name => "S016", field_id => 82, label => "Sound Tech",
184 disp_type => "text", crit_type => "text",
185 width => 30, height => 1 },
186 { field_name => "S017", field_id => 83, label => "Music publisher",
187 disp_type => "text", crit_type => "text",
188 width => 30, height => 1 },
189 { field_name => "T012", field_id => 84, label => "Music tape",
190 disp_type => "text", crit_type => "text",
191 width => 30, height => 1 },
192 { field_name => "CODE", field_id => 89, label => "Catalogue",
193 disp_type => "text", crit_type => "text",
194 width => 3, height => 1 },
195 { field_name => "SUBC", field_id => 104, label => "Subcat",
196 disp_type => "text", crit_type => "text",
197 width => 10, height => 1 },
198 { field_name => "DOCT", field_id => 109, label => "Screen type",
199 disp_type => "text", crit_type => "text",
200 width => 10, height => 1 },
201 { field_name => "T015", field_id => 113, label => "Stock ln 4",
202 disp_type => "text", crit_type => "text",
203 width => 30, height => 1 },
204 { field_name => "S020", field_id => 114, label => "Stock lib 1",
205 disp_type => "text", crit_type => "text",
206 width => 30, height => 1 },
207 { field_name => "S021", field_id => 115, label => "Stock lib 2",
208 disp_type => "text", crit_type => "text",
209 width => 30, height => 1 }
210 );
211
212
213my @brs_title_list = ("TTLE", "T001", "T005", "T013", "T007", "RFTI");
214my %cat_num_list = ();
215
216sub new {
217 my $class = shift (@_);
218 my $self = new BasPlug ($class, @_);
219 $self->{'plugin_type'} = "BRSPlug";
220
221 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
222 my $option_list = $self->{'option_list'};
223 push( @{$option_list}, $options );
224
225 if (!parsargv::parse(\@_,
226 q^bbc_collections/(all|nsa|bfi|bbcother)/^, \$self->{'bbc_collections'},
227 "allow_extra_options")) {
228
229 print STDERR "\nIncorrect options passed to BRSPlug, check your collect.cfg configuration file\n";
230 $self->print_txt_usage(""); # Use default resource bundle
231 die "\n";
232 }
233
234 #my ($class) = @_;
235 #my $self = new BasPlug ($class, @_);
236
237 return bless $self, $class;
238}
239
240sub get_default_process_exp {
241 my $self = shift (@_);
242
243# return q^(?i)\.brs$^;
244 return q^TVRD.+^;
245}
246
247my $brs_doc_count = 0;
248my $brs_dot_count = 0;
249my $brs_line_no = 0;
250
251sub brs_error
252{
253 my ($file, $mess, $outhandle) = @_;
254
255 print $outhandle "Malformed BRS recorded at line $brs_line_no in $file:\n";
256 print $outhandle " $mess\n";
257}
258
259sub read_brs_record
260{
261 my ($recordref, $file, $outhandle) = @_;
262
263 my $extra_trigger = '\s*>';
264 my $text_divider = "-" x 78;
265 #my $text_end = '^\.\.[^:]+:$';
266 my $meta_key = '^\.\.(\w+):$'; # modified:2005
267
268 # Print "." to signify processing if enough records have been read in
269 $brs_doc_count++;
270 if (($brs_doc_count % 10) == 0)
271 {
272 print $outhandle ".";
273 $brs_dot_count++;
274 print $outhandle "\n" if (($brs_dot_count % 80) == 0);
275 }
276
277 my $brs_rec = undef;
278 my $line = undef;
279
280 # read in the record
281 my @brs_lines = split /\n/, $$recordref;
282
283 while (scalar(@brs_lines)>0)
284 {
285 $line = shift(@brs_lines);
286
287 $line =~ s/\cM//g;
288
289 $brs_line_no++;
290
291# if ($line =~ m/^\.\.([^:]+):$/)
292 if ($line =~ m/$meta_key/)
293 {
294 my $field_name = $1;
295
296 if ($field_name eq "TEXT")
297 {
298 my $text_above = "";
299 my $text_extra = "";
300 my $text_below = "";
301
302 my $add_mode = "above";
303
304 while (scalar(@brs_lines)>0)
305 {
306
307 last if ($brs_lines[0] =~ m/$meta_key/);
308
309
310 $line = shift(@brs_lines);
311
312
313 $brs_line_no++;
314
315 if ($line =~ m/^$text_divider/)
316 {
317 $add_mode = "below";
318 next;
319 }
320
321 if ($line =~ m/^$extra_trigger/)
322 {
323 $text_extra .= "$line\n";
324 $add_mode = "below";
325 next;
326 }
327
328 if ($line =~ m/$meta_key/)
329 #if ($line =~ m/^\.\.[^:]+:/)
330 {
331 if ($line =~ m/^\.\.TEXT/)
332 {
333 $add_mode = "above";
334 next;
335 }
336 else
337 {
338 brs_error($file, "Unexpected field $line", $outhandle);
339 # put field name back
340 unshift(@brs_lines, $line);
341 $brs_line_no--;
342 last;
343 }
344 }
345
346 if ($line !~ m/^\s+(-){10,}$/)
347 {
348 if ($add_mode eq "above")
349 {
350 $text_above .= "$line ";
351 }
352 else
353 {
354 $text_below .= "$line ";
355 }
356 }
357 }
358
359 $brs_rec->{TEXTA} .= $text_above if ($text_above ne "");
360 $brs_rec->{TEXTB} .= $text_below if ($text_below ne "");
361 $brs_rec->{TEXTE} .= $text_extra if ($text_extra ne "");
362 }
363 elsif ($field_name =~ /^RF/)
364 {
365 # deal with the field name start with RF, eg. RFTI,RFAN, RFAB...
366 # read zz fields until end of record
367
368 while (scalar(@brs_lines)>0)
369 {
370 my $line_ahead = $brs_lines[0];
371 #last if ($line_ahead =~ m/^\.\.[^:]+:$/);
372 last if ($line_ahead =~ m/$meta_key/);
373 $line = shift(@brs_lines);
374 $brs_line_no++;
375 #if (!defined($brs_rec->{$field_name}))
376 #{
377 #$brs_rec->{$field_name} = [$line];
378 #}
379 #else
380 #{
381 #push(@{$brs_rec->{$field_name}},$line);
382 #}
383
384 if (!defined($brs_rec->{$field_name}))
385 {
386 $brs_rec->{$field_name} = (); #[$line];
387 }
388 push(@{$brs_rec->{$field_name}},$line);
389 }
390 }
391 else
392 {
393 my $field_entry = "";
394 #while ($brs_lines[0] !~ m/^\.\.[^:]+:$/)
395 while ($brs_lines[0] !~ m/$meta_key/)
396 {
397 $field_entry .= shift(@brs_lines);
398 $brs_line_no++;
399 last if (scalar(@brs_lines)==0);
400 }
401
402 if (!defined($brs_rec->{$field_name}))
403 {
404 $brs_rec->{$field_name} = $field_entry;
405 }
406 else
407 {
408 brs_error($file, "$field_name already defined.", $outhandle);
409 }
410 }
411 }
412 else
413 {
414 brs_error($file, "Malformed field: $line.", $outhandle);
415 }
416 }
417
418 # format the programme number if required
419 &format_prognum($brs_rec);
420
421 return ($brs_rec);
422}
423
424my $brs_processed_count = 0;
425
426sub brs_full_record_in_html
427{
428 my ($doc_obj,$cursection,$brs_rec) = @_;
429
430 my $html_table = "";
431 my $table_line = "";
432 my $cell_count = 0;
433 my $long_lines = "";
434
435 my $i;
436 for ($i=0; $i<=$#brs_field_table; $i++)
437 {
438 my $field_name = $brs_field_table[$i]->{'field_name'};
439
440 if (defined($brs_rec->{$field_name}))
441 {
442 my $field_label = $brs_field_table[$i]->{'label'};
443 my $field_value = $brs_rec->{$field_name};
444
445 # these fields get their own line
446 if ($field_label =~ /^(Prog|Item|News|Stock) title$/) {
447 my $safe_value = $field_value;
448 &ghtml::urlsafe ($safe_value);
449 $long_lines .= "<tr valign=top><td><b>$field_label:</b></td><td colspan=5>";
450 $long_lines .= "<a href=\"_httpquerytitle_&q=$safe_value\">$field_value</a></td></tr>\n";
451 $long_lines .= "$field_value </td></tr>\n";
452 } else {
453
454 if (($cell_count>0) && ($cell_count%3 == 0))
455 {
456 $html_table .= "\n<tr valign=top>\n$table_line\n</tr>\n";
457 $table_line = "";
458 }
459 $cell_count++;
460 if ($field_name eq "D001") # a Dublin core style date
461 {
462 my @mon_convert
463 = ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
464 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
465
466 my ($year,$monnum,$day)
467 = ($field_value =~ m/(\d{4})(\d{2})(\d{2})/);
468 my $moneng = $mon_convert[$monnum-1];
469
470 $field_value = "$day $moneng $year";
471
472 } elsif ($field_name eq "PRNO") {
473 my $safe_value = $field_value;
474 &ghtml::urlsafe ($safe_value);
475 $field_value = "<a href=\"_httpqueryprogname_&q=$safe_value\">$field_value</a>";
476
477 } elsif ($field_name eq "CATN") {
478 my $safe_value = $field_value;
479 &ghtml::urlsafe ($safe_value);
480 $field_value = "<a href=\"_httpquerycatnum_&q=$safe_value\">$field_value</a>";
481
482 } elsif ($field_name eq "T003") {
483 my $safe_value = $field_value;
484 &ghtml::urlsafe ($safe_value);
485 $field_value = "<a href=\"_httpqueryzzabn_&q=$safe_value\">$field_value</a>";
486 }
487 ## Modified 2005
488 if ($field_label =~ /^(RFTI)/){
489 $field_value = join(" ",@{$brs_rec->{'RFTI'}});
490 #print STDERR "**** $field_label, $field_value\n";
491 }
492 $table_line .= "<td><b>$field_label:</b></td><td>$field_value</td>\n";
493 }
494 }
495 }
496
497 if ($cell_count%3 != 0) {
498 $table_line .= "<td colspan=" . (3-($cell_count%3))*2 . "></td>";
499 }
500
501 $html_table .= "\n<tr>\n$table_line\n</tr>\n";
502 $html_table = "<table>\n$long_lines$html_table\n</table>\n";
503
504 $doc_obj->add_utf8_text($cursection, $html_table);
505}
506
507sub process_brs_record
508{
509 my $self = shift (@_);
510
511 my ($processor, $brs_rec, $file, $outhandle) = @_;
512
513 if (defined($brs_rec))
514 {
515 # only include those records in the chosen subcats - these lines
516 # should be commented out to build the entire collection
517
518 # National Sound Archive collection-Stephan
519 #if (!defined ($brs_rec->{'SUBC'}) ||
520 # $brs_rec->{'SUBC'} !~ /^(RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) {
521 # return;
522 #}
523
524 # British Film Institute collection
525 #if (!defined ($brs_rec->{'SUBC'}) ||
526 # $brs_rec->{'SUBC'} !~ /^(LONPROG|NEWS)$/) {
527 # return;
528 #}
529
530 # "the rest" collection
531 #if (!defined ($brs_rec->{'SUBC'}) ||
532 # $brs_rec->{'SUBC'} =~ /^(LONPROG|NEWS|RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) {
533 # return;
534 #}
535 #}
536
537 # modified Chi-Yu Huang:2005
538 if (defined $self->{'bbc_collections'}){
539 # National Sound Archive collection
540 if ($self->{'bbc_collections'} eq "nsa"){
541 #print STDERR "***This is NSA collections\n";
542 if (!defined ($brs_rec->{'SUBC'}) ||
543 $brs_rec->{'SUBC'} !~ /^(RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) {
544 return;
545 }
546 }
547 elsif ($self->{'bbc_collections'} eq "bfi"){
548 print STDERR "***This is BFI collections\n";
549 # British Film Institute collection
550 if (!defined ($brs_rec->{'SUBC'}) ||
551 $brs_rec->{'SUBC'} !~ /^(LONPROG|NEWS)$/) {
552 return;
553 }
554 }
555 elsif ($self->{'bbc_collections'} eq "bbcother"){
556 #print STDERR "*** This is bbc Other collections\n";
557 # "the rest" collection
558 if (!defined ($brs_rec->{'SUBC'}) ||
559 $brs_rec->{'SUBC'} =~ /^(LONPROG|NEWS|RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) {
560 return;
561 }
562 } else {
563 # build the whole collections
564 #print STDERR "*** This is to build the whole collection\n";
565 }
566 } else {
567 #build the whole collections
568 }
569
570 # don't include the secret stuff ;-)
571 if (defined ($brs_rec->{'SUBC'}) && $brs_rec->{'SUBC'} eq "HISTORY") {
572 print STDERR "\nexcluding secret stuff - line $brs_line_no in $file\n";
573 return;
574 }
575
576
577 my $doc_obj = new doc ($file, "indexed_doc");
578 $brs_processed_count++;
579
580 my $cursection = $doc_obj->get_top_section();
581
582 my $found_match = "no";
583
584 my $pot_title;
585 foreach $pot_title ( @brs_title_list )
586 {
587 if (defined ($brs_rec->{$pot_title}))
588 {
589 my $title = "";
590
591 if ($pot_title =~ m/^zz/)
592 {
593 my $zz_join = join(" ", @{$brs_rec->{$pot_title}});
594 my @zz_split = split(" ", $zz_join);
595 map { $title .= "$_ " if ($_ !~ m/^xx/); } @zz_split;
596 }
597 else
598 {
599 if ($pot_title =~ /^RF/){
600 $title = join (" ", @{$brs_rec->{$pot_title}});
601 } else{
602 $title = $brs_rec->{$pot_title};
603 }
604 #$title = $brs_rec->{$pot_title};
605 }
606
607 #print STDERR "*** What is the title=$title\n";
608
609 my $tl_ref = $doc_obj->get_metadata ($cursection, "Title");
610
611 if (scalar(@$tl_ref==0))
612 {
613 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
614 }
615 else
616 {
617 my $exists = "no";
618
619 map { $exists = "yes" if ($title eq $_); } @$tl_ref;
620 if ($exists eq "no")
621 {
622 $doc_obj->add_utf8_metadata($cursection, "Title", $title);
623 }
624 }
625 $found_match = "yes";
626 }
627 }
628 if ($found_match eq "no")
629 {
630 if (defined($brs_rec->{TEXTA}))
631 {
632 my $sub_title = substr($brs_rec->{TEXTA},0,60) . " ... ";
633 $doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title);
634 }
635 elsif (defined($brs_rec->{TEXTB}))
636 {
637 my $sub_title = substr($brs_rec->{TEXTB},0,60) . " ... ";
638 $doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title);
639 }
640 else
641 {
642 print $outhandle "\nNo title or text for record ending at line ";
643 print $outhandle $brs_line_no-1, " in $file\n";
644 }
645 }
646
647 if (defined($brs_rec->{'T001'})) # item title
648 {
649 $doc_obj->add_utf8_metadata ($cursection, "ItemTitle", $brs_rec->{'T001'});
650 }
651
652 if (defined($brs_rec->{'D001'})) # date
653 {
654 $doc_obj->add_utf8_metadata ($cursection, "Date", $brs_rec->{'D001'});
655 }
656 else
657 {
658 print $outhandle "\nNo date for record ending at line ";
659 print $outhandle $brs_line_no-1, " in $file\n";
660 }
661
662 if (defined($brs_rec->{'PRNO'})) # programme number
663 {
664 $doc_obj->add_utf8_metadata ($cursection, "ProgNumber", $brs_rec->{'PRNO'});
665 }
666
667 if (defined($brs_rec->{'CATN'})) # catalogue number
668 {
669 $doc_obj->add_utf8_metadata ($cursection, "CatNum", $brs_rec->{'CATN'});
670 }
671
672 # RFAN, RFAB and T003 fields
673 if (defined($brs_rec->{'RFAB'}) || defined($brs_rec->{'RFAN'}) || defined($brs_rec->{'T003'}))
674 {
675 my $zzabn = "";
676 if (defined($brs_rec->{'RFAB'})) {
677 foreach my $a (@{$brs_rec->{'RFAB'}}) {
678 $a =~ s/[^a-zA-Z0-9]//g;
679 $zzabn .= " " . $a;
680 }
681 }
682 if (defined($brs_rec->{'RFAN'})) {
683 foreach my $a (@{$brs_rec->{'RFAN'}}) {
684 $a =~ s/[^a-zA-Z0-9]//g;
685 $zzabn .= " " . $a;
686 }
687 }
688 if (defined($brs_rec->{'T003'})) {
689 my $value = $brs_rec->{'T003'};
690 $value =~ s/[^a-zA-Z0-9]//g;
691 $zzabn .= " " . $value;
692 }
693 $doc_obj->add_utf8_metadata ($cursection, "zzabn", $zzabn);
694 }
695
696 if (defined($brs_rec->{'RFN'}))
697 {
698 my $name;
699 foreach $name (@{$brs_rec->{'RFN'}})
700 {
701 $doc_obj->add_utf8_metadata ($cursection, "People", $name);
702 }
703 }
704
705 brs_full_record_in_html($doc_obj,$cursection,$brs_rec);
706
707 if (defined($brs_rec->{TEXTA}))
708 {
709
710 my $desc_texta = "<h3>Description</h3>$brs_rec->{TEXTA}";
711 $doc_obj->add_utf8_text ($cursection, $desc_texta);
712
713 if (defined($brs_rec->{TEXTE}))
714 {
715 my $raw_texte = $brs_rec->{TEXTE};
716 $raw_texte =~ s/^\s*>\s*/<li>/mg;
717
718 my $test_empty = $raw_texte;
719 $test_empty =~ s/(<li>)+//g;
720
721 if ($test_empty ne "")
722 {
723 my $desc_texte = $raw_texte;
724 $desc_texte =~ s/^<li>(.*)$/<li><i>$1<\/i>/mg;
725 $desc_texte = "<p>Additional:<ul>$desc_texte</ul>";
726 $doc_obj->add_utf8_text ($cursection, $desc_texte);
727 }
728 }
729 }
730
731 if (defined($brs_rec->{TEXTB}))
732 {
733 my $desc_textb = "<h3>Comments</h3>$brs_rec->{TEXTB}";
734 $doc_obj->add_utf8_text($cursection, $desc_textb);
735 }
736
737 my $full_text = "";
738 $full_text .= "$brs_rec->{TEXTA}\n" if (defined($brs_rec->{TEXTA}));
739 $full_text .= "$brs_rec->{TEXTE}\n" if (defined($brs_rec->{TEXTE}));
740 $full_text .= "$brs_rec->{TEXTB}\n" if (defined($brs_rec->{TEXTB}));
741
742 my @zz_list = ( "RFPG", "RFLO", "RFN" );
743 my $zz;
744 foreach $zz ( @zz_list)
745 {
746 if (defined($brs_rec->{$zz}))
747 {
748 my $filtered_text = "";
749
750 my $name;
751 foreach $name (@{$brs_rec->{$zz}})
752 {
753 my @split_zz = split(' ',$name);
754 map { $filtered_text .= "$_ " if ($_ =~ m/^xx/) } @split_zz
755 }
756
757 $full_text .= "$filtered_text\n";
758 }
759 }
760
761 # format zz fields (tail end of record)
762 my $zz_html = "<table>\n";
763
764 my $pot_zz;
765 foreach $pot_zz (keys %{$brs_rec})
766 {
767 if ($pot_zz =~ m/^RF/)
768 {
769 my $zz = $pot_zz;
770 my $name;
771 foreach $name (@{$brs_rec->{$zz}})
772 {
773 if ($zz eq "RFN") {
774 my $safe_name = $name;
775 &ghtml::urlsafe ($safe_name);
776 $zz_html .= "<tr><td><b>$zz:</b></td><td><a href=\"_httpquerypeople_&q=$safe_name\">$name</a></td></tr>\n";
777 } elsif ($zz =~ /^RFA[BN]$/) {
778 my $safe_name = $name;
779 &ghtml::urlsafe ($safe_name);
780 $zz_html .= "<tr><td><b>$zz:</b></td><td><a href=\"_httpqueryzzabn_&q=$safe_name\">$name</a></td></tr>\n";
781
782 } else {
783 $zz_html .= "<tr><td><b>$zz:</b></td><td>$name</td></tr>\n";
784 }
785 }
786 }
787 }
788 $zz_html .= "</table>\n";
789 $doc_obj->add_utf8_text($cursection, $zz_html);
790
791 # add OID - we'll use the catalog number (hoping it's unique)
792 my $cat_num = $brs_rec->{'CATN'};
793 if (!defined $cat_num) {
794 print $outhandle "\n***** No catalogue number for record ending at line ";
795 print $outhandle $brs_line_no-1, " in $file - THIS RECORD WILL BE IGNORED\n";
796 foreach my $v (keys(%$brs_rec)) {
797 if (ref($brs_rec->{$v}) eq "ARRAY") {
798 foreach my $i (@{$brs_rec->{$v}}) {
799 print STDERR "$v -> $i\n";
800 }
801 } else {
802 print STDERR $v . " -> " . $brs_rec->{$v} . "\n";
803 }
804 }
805 return;
806 }
807 if (defined ($cat_num_list{$cat_num})) {
808 print $outhandle "WARNING: catalog number $cat_num used more than once\n";
809 }
810 $doc_obj->set_OID ("bbc" . $cat_num);
811 $cat_num_list{$cat_num} = 1;
812
813 # process the document
814 $processor->process($doc_obj);
815 }
816 else
817 {
818 print $outhandle "BRS record empty\n";
819 }
820
821}
822
823sub read {
824 my $self = shift (@_);
825 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
826 my $outhandle = $self->{'outhandle'};
827
828 my $filename = &util::filename_cat($base_dir, $file);
829# return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
830 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
831 return undef;
832 }
833# my $plugin_name = ref ($self);
834 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
835
836 # create a new document
837# my $doc_obj = new doc ($filename, "indexed_doc");
838
839 # read in file ($text will be in utf8)
840# my $text = "";
841# $self->read_file ($filename, \$text);
842
843# if ($text !~ /\w/) {
844# print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
845# return 0;
846# }
847
848 # text is always plain ascii
849 undef $/;
850 open (FILE, $filename) || die;
851 my $text = <FILE>;
852 close FILE;
853 $/ = "\n";
854
855
856 # include any metadata passed in from previous plugins
857 # note that this metadata is associated with the top level section
858# $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
859
860 print $outhandle "BBC Sound Archive Plug for BRS format: processing $file\n"
861 if $self->{'verbosity'} > 1;
862
863 # reset line number count
864 $brs_line_no = 0;
865
866 my @records = split /\*\*\* BRS DOCUMENT BOUNDARY \*\*\*\s+/, $text;
867 $text = "";
868
869 foreach $record (@records) {
870 next if $record !~ /\w/; # first record will be empty
871 my $brs_rec = &read_brs_record(\$record, $file, $self->{'outhandle'});
872 &process_brs_record ($self, $processor, $brs_rec, $file, $self->{'outhandle'});
873 }
874
875 $self->{'num_processed'} += $brs_processed_count;
876
877 print $outhandle "\nNumber of BRS records = $brs_doc_count\n";
878 print $outhandle "Number of BRS records processed = $brs_processed_count\n";
879
880 return 1; # processed the file
881}
882
883sub end {
884 my $self = shift (@_);
885 my ($processor) = @_;
886
887 $processor->close_file_output() if defined $processor;
888}
889
890sub format_prognum {
891 my ($brs_rec) = @_;
892
893 return unless defined $brs_rec->{'PRNO'};
894
895 my $old_num = $brs_rec->{'PRNO'};
896
897 if (defined $brs_rec->{'magazine'}) {
898 if ($brs_rec->{'PRNO'} =~ /^\S\S /) { # space at position 3
899 if ($brs_rec->{'magazine'} =~ /(\d+)/) {
900 # algorithm A
901 my $magnumber = $1;
902 $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
903 $brs_rec->{'PRNO'} = $2 . $1 . $3 . $magnumber;
904 } else {
905 # common algorithm
906 $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
907 $brs_rec->{'PRNO'} = $2 . $1 . $3;
908 }
909 }
910 } else {
911 if ($brs_rec->{'PRNO'} =~ /^\S\S / && $brs_rec->{'PRNO'} !~ /:/) { # space at position 3 and no colon
912 # common algorithm
913 $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
914 $brs_rec->{'PRNO'} = $2 . $1 . $3;
915 } elsif ($brs_rec->{'PRNO'} =~ /^(\S):(\S\S) (\S\S)(\S+)/) { # colon at position 2 and space at position 5
916 # algorithm B
917 $brs_rec->{'PRNO'} = $3 . $2 . $4 . $1;
918 }
919 }
920
921# if ($brs_rec->{'PRNO'} ne $old_num) {
922# print STDERR "\n$old_num ... " . $brs_rec->{'PRNO'} . "\n";
923# }
924}
925
9261;
Note: See TracBrowser for help on using the repository browser.