# plugin which processes an the BBC Sound archive exported in an ASCII format package BRSPlug; use BasPlug; use ghtml; sub BEGIN { @ISA = ('BasPlug'); unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); } my $bbc_collections_list = [{'name' => "all", # all collections 'desc' => "{BRSPlug.bbc_collections.all}"}, {'name' => "nsa", # National Sound Archive 'desc' => "{BRSPlug.bbc_collections.nsa"}, {'name' => "bfi", # Brisith File Institute 'desc' => "{BRSPlug.bbc_collections.bfi"}, {'name' => "bbcother", # BBC Other 'desc' => "{BRSPlug.bbc_collections.bbcother"}]; my $arguments = [{ 'name' => "bbc_collections", 'desc' => "{BRSPlug.bbc_collections}", #'type' => $bbc_collections_list, 'type' => "string", 'reqd' => "no", 'deft' => "all"}]; %Exclude = (); my $options = { 'name' => "BRSPlug", 'desc' => "BRSPlug deals with the BBC collections. There are three BBC collections we deliver, which are National Sound Archive (nsa), British Film Institute (bfi), and BBC Other (bbcother) (everything does not fit into one of the first two). Four options are provided which are all-build all collections, nsa-build the NSA collections, bfi-build the BFI collections, bbcother-build the OTHER collections.", 'abstract' => "yes", 'inherits' => "no", 'args' => $arguments }; # Note: sext is short for scrollable text #field_id label disp_type crit_type width height brs_para #---------------------------------------------------------------- #10 sport prdcr text text 30 1 T006 #1 text sext text 40 16 TEXT #16 scol ref text text text 30 1 SCOL #19 date text date 10 1 D001 #25 Item title text text 20 1 T001 #26 B/W Seqs text text 30 1 S001 #27 Medium text text 30 1 S002 #28 shot type text text 30 1 S003 #29 B/W Prog/Item text text 30 1 S004 #30 Seqs text text 10 1 S005 #31 Format text text 30 1 S006 #32 prog/item text text 30 1 S007 #35 reg libno text text 30 1 S008 #36 reg tape no text text 30 1 S009 #37 C Indicator text text 30 1 S010 #38 C Holder text text 30 1 T002 #39 cprd ind text text 30 1 S011 #40 cprd name text text 30 1 T003 #44 reference sext text 30 5 R001 #49 prog num text text 10 1 PRNO #59 news title text text 30 1 T005 #65 cat num text text 10 1 CATN #72 Record number text text 30 1 T007 #73 rx_date text date 10 1 S013 #74 Music performer text text 30 2 T008 #75 Composer text text 30 2 T009 #76 mono/stereo text text 30 1 S014 #77 music medium text text 30 1 S015 #82 Sound Tech text text 30 1 S016 #83 Music publisher text text 30 1 S017 #84 music tape text text 30 1 T012 #89 catalogue text text 3 1 CODE #104 subcat text text 10 1 SUBC #106 prog title text text 40 1 TTLE #109 screen type text text 10 1 DOCT #110 stock title text text 30 1 T013 #113 stock ln 4 text text 30 1 T015 #114 stock lib 1 text text 30 1 S020 #115 stock lib 2 text text 30 1 S021 #---------------------------------------------------------------- my @brs_field_table = ( { field_name => "TTLE", field_id => 106, label => "Prog title", disp_type => "text", crit_type => "text", width => 40, height => 1 }, { field_name => "T001", field_id => 25, label => "Item title", disp_type => "text", crit_type => "text", width => 20, height => 1 }, { field_name => "T005", field_id => 59, label => "News title", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "T013", field_id => 110, label => "Stock title", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "T007", field_id => 72, label => "Record number", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "RFTI", field_id => 116, label => "RFTI", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "TEXT", field_id => 1, label => "Text", disp_type => "scrl", crit_type => "text", width => 40, height => 16 }, { field_name => "T006", field_id => 10, label => "Sport prdcr", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "SCOL", field_id => 16, label => "Scol ref text", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "D001", field_id => 19, label => "Date", disp_type => "text", crit_type => "date", width => 10, height => 1 }, { field_name => "S001", field_id => 26, label => "B/W Seqs", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S002", field_id => 27, label => "Medium", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S003", field_id => 28, label => "Shot type", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S004", field_id => 29, label => "B/W Prog/Item", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S005", field_id => 30, label => "Seqs", disp_type => "text", crit_type => "text", width => 10, height => 1 }, { field_name => "S006", field_id => 31, label => "Format", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S007", field_id => 32, label => "Prog/item", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S008", field_id => 35, label => "Reg libno", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S009", field_id => 36, label => "Reg tape no", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S010", field_id => 37, label => "C Indicator", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "T002", field_id => 38, label => "C Holder", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S011", field_id => 39, label => "Cprd ind", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "T003", field_id => 40, label => "Cprd name", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "R001", field_id => 44, label => "Reference", disp_type => "scrl", crit_type => "text", width => 30, height => 5 }, { field_name => "PRNO", field_id => 49, label => "Prog num", disp_type => "text", crit_type => "text", width => 10, height => 1 }, { field_name => "CATN", field_id => 65, label => "Cat num", disp_type => "text", crit_type => "text", width => 10, height => 1 }, { field_name => "S013", field_id => 73, label => "Rx_date", disp_type => "text", crit_type => "date", width => 10, height => 1 }, { field_name => "T008", field_id => 74, label => "Music performer", disp_type => "text", crit_type => "text", width => 30, height => 2 }, { field_name => "T009", field_id => 75, label => "Composer", disp_type => "text", crit_type => "text", width => 30, height => 2 }, { field_name => "S014", field_id => 76, label => "Mono/stereo", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S015", field_id => 77, label => "Music medium", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S016", field_id => 82, label => "Sound Tech", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S017", field_id => 83, label => "Music publisher", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "T012", field_id => 84, label => "Music tape", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "CODE", field_id => 89, label => "Catalogue", disp_type => "text", crit_type => "text", width => 3, height => 1 }, { field_name => "SUBC", field_id => 104, label => "Subcat", disp_type => "text", crit_type => "text", width => 10, height => 1 }, { field_name => "DOCT", field_id => 109, label => "Screen type", disp_type => "text", crit_type => "text", width => 10, height => 1 }, { field_name => "T015", field_id => 113, label => "Stock ln 4", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S020", field_id => 114, label => "Stock lib 1", disp_type => "text", crit_type => "text", width => 30, height => 1 }, { field_name => "S021", field_id => 115, label => "Stock lib 2", disp_type => "text", crit_type => "text", width => 30, height => 1 } ); my @brs_title_list = ("TTLE", "T001", "T005", "T013", "T007", "RFTI"); my %cat_num_list = (); sub new { my $class = shift (@_); my $self = new BasPlug ($class, @_); $self->{'plugin_type'} = "BRSPlug"; # 14-05-02 To allow for proper inheritance of arguments - John Thompson my $option_list = $self->{'option_list'}; push( @{$option_list}, $options ); if (!parsargv::parse(\@_, q^bbc_collections/(all|nsa|bfi|bbcother)/^, \$self->{'bbc_collections'}, "allow_extra_options")) { print STDERR "\nIncorrect options passed to BRSPlug, check your collect.cfg configuration file\n"; $self->print_txt_usage(""); # Use default resource bundle die "\n"; } #my ($class) = @_; #my $self = new BasPlug ($class, @_); return bless $self, $class; } sub get_default_process_exp { my $self = shift (@_); # return q^(?i)\.brs$^; return q^TVRD.+^; } my $brs_doc_count = 0; my $brs_dot_count = 0; my $brs_line_no = 0; sub brs_error { my ($file, $mess, $outhandle) = @_; print $outhandle "Malformed BRS recorded at line $brs_line_no in $file:\n"; print $outhandle " $mess\n"; } sub read_brs_record { my ($recordref, $file, $outhandle) = @_; my $extra_trigger = '\s*>'; my $text_divider = "-" x 78; #my $text_end = '^\.\.[^:]+:$'; my $meta_key = '^\.\.(\w+):$'; # Print "." to signify processing if enough records have been read in $brs_doc_count++; if (($brs_doc_count % 10) == 0) { print $outhandle "."; $brs_dot_count++; print $outhandle "\n" if (($brs_dot_count % 80) == 0); } my $brs_rec = undef; my $line = undef; # read in the record my @brs_lines = split /\n/, $$recordref; while (scalar(@brs_lines)>0) { $line = shift(@brs_lines); $line =~ s/\cM//g; $brs_line_no++; # if ($line =~ m/^\.\.([^:]+):$/) if ($line =~ m/$meta_key/) { my $field_name = $1; if ($field_name eq "TEXT") { my $text_above = ""; my $text_extra = ""; my $text_below = ""; my $add_mode = "above"; while (scalar(@brs_lines)>0) { last if ($brs_lines[0] =~ m/$meta_key/); $line = shift(@brs_lines); $brs_line_no++; if ($line =~ m/^$text_divider/) { $add_mode = "below"; next; } if ($line =~ m/^$extra_trigger/) { $text_extra .= "$line\n"; $add_mode = "below"; next; } if ($line =~ m/$meta_key/) #if ($line =~ m/^\.\.[^:]+:/) { if ($line =~ m/^\.\.TEXT/) { $add_mode = "above"; next; } else { brs_error($file, "Unexpected field $line", $outhandle); # put field name back unshift(@brs_lines, $line); $brs_line_no--; last; } } if ($line !~ m/^\s+(-){10,}$/) { if ($add_mode eq "above") { $text_above .= "$line "; } else { $text_below .= "$line "; } } } $brs_rec->{TEXTA} .= $text_above if ($text_above ne ""); $brs_rec->{TEXTB} .= $text_below if ($text_below ne ""); $brs_rec->{TEXTE} .= $text_extra if ($text_extra ne ""); } elsif ($field_name =~ /^RF/) { # deal with the field name start with RF, eg. RFTI,RFAN, RFAB... # read zz fields until end of record while (scalar(@brs_lines)>0) { my $line_ahead = $brs_lines[0]; #last if ($line_ahead =~ m/^\.\.[^:]+:$/); last if ($line_ahead =~ m/$meta_key/); $line = shift(@brs_lines); $brs_line_no++; #if (!defined($brs_rec->{$field_name})) #{ #$brs_rec->{$field_name} = [$line]; #} #else #{ #push(@{$brs_rec->{$field_name}},$line); #} if (!defined($brs_rec->{$field_name})) { $brs_rec->{$field_name} = (); #[$line]; } push(@{$brs_rec->{$field_name}},$line); } } else { my $field_entry = ""; #while ($brs_lines[0] !~ m/^\.\.[^:]+:$/) while ($brs_lines[0] !~ m/$meta_key/) { $field_entry .= shift(@brs_lines); $brs_line_no++; last if (scalar(@brs_lines)==0); } if (!defined($brs_rec->{$field_name})) { $brs_rec->{$field_name} = $field_entry; } else { brs_error($file, "$field_name already defined.", $outhandle); } } } else { brs_error($file, "Malformed field: $line.", $outhandle); } } # format the programme number if required &format_prognum($brs_rec); return ($brs_rec); } my $brs_processed_count = 0; sub brs_full_record_in_html { my ($doc_obj,$cursection,$brs_rec) = @_; my $html_table = ""; my $table_line = ""; my $cell_count = 0; my $long_lines = ""; my $i; for ($i=0; $i<=$#brs_field_table; $i++) { my $field_name = $brs_field_table[$i]->{'field_name'}; if (defined($brs_rec->{$field_name})) { my $field_label = $brs_field_table[$i]->{'label'}; my $field_value = $brs_rec->{$field_name}; # these fields get their own line if ($field_label =~ /^(Prog|Item|News|Stock) title$/) { my $safe_value = $field_value; &ghtml::urlsafe ($safe_value); $long_lines .= "$field_label:"; $long_lines .= "$field_value\n"; $long_lines .= "$field_value \n"; } else { if (($cell_count>0) && ($cell_count%3 == 0)) { $html_table .= "\n\n$table_line\n\n"; $table_line = ""; } $cell_count++; if ($field_name eq "D001") # a Dublin core style date { my @mon_convert = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"); my ($year,$monnum,$day) = ($field_value =~ m/(\d{4})(\d{2})(\d{2})/); my $moneng = $mon_convert[$monnum-1]; $field_value = "$day $moneng $year"; } elsif ($field_name eq "PRNO") { my $safe_value = $field_value; &ghtml::urlsafe ($safe_value); $field_value = "$field_value"; } elsif ($field_name eq "CATN") { my $safe_value = $field_value; &ghtml::urlsafe ($safe_value); $field_value = "$field_value"; } elsif ($field_name eq "T003") { my $safe_value = $field_value; &ghtml::urlsafe ($safe_value); $field_value = "$field_value"; } if ($field_label =~ /^(RFTI)/){ $field_value = join(" ",@{$brs_rec->{'RFTI'}}); #print STDERR "**** $field_label, $field_value\n"; } $table_line .= "$field_label:$field_value\n"; } } } if ($cell_count%3 != 0) { $table_line .= ""; } $html_table .= "\n\n$table_line\n\n"; $html_table = "\n$long_lines$html_table\n
\n"; $doc_obj->add_utf8_text($cursection, $html_table); } sub process_brs_record { my $self = shift (@_); my ($processor, $brs_rec, $file, $outhandle) = @_; if (defined($brs_rec)) { # only include those records in the chosen subcats - these lines # should be commented out to build the entire collection # National Sound Archive collection-Stephan #if (!defined ($brs_rec->{'SUBC'}) || # $brs_rec->{'SUBC'} !~ /^(RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) { # return; #} # British Film Institute collection #if (!defined ($brs_rec->{'SUBC'}) || # $brs_rec->{'SUBC'} !~ /^(LONPROG|NEWS)$/) { # return; #} # "the rest" collection #if (!defined ($brs_rec->{'SUBC'}) || # $brs_rec->{'SUBC'} =~ /^(LONPROG|NEWS|RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) { # return; #} #} # modified Chi-Yu Huang if (defined $self->{'bbc_collections'}){ # National Sound Archive collection if ($self->{'bbc_collections'} eq "nsa"){ #print STDERR "***This is NSA collections\n"; if (!defined ($brs_rec->{'SUBC'}) || $brs_rec->{'SUBC'} !~ /^(RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) { return; } } elsif ($self->{'bbc_collections'} eq "bfi"){ print STDERR "***This is BFI collections\n"; # British Film Institute collection if (!defined ($brs_rec->{'SUBC'}) || $brs_rec->{'SUBC'} !~ /^(LONPROG|NEWS)$/) { return; } } elsif ($self->{'bbc_collections'} eq "bbcother"){ #print STDERR "*** This is bbc Other collections\n"; # "the rest" collection if (!defined ($brs_rec->{'SUBC'}) || $brs_rec->{'SUBC'} =~ /^(LONPROG|NEWS|RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) { return; } } else { # build the whole collections #print STDERR "*** This is to build the whole collection\n"; } } else { #build the whole collections } # don't include the secret stuff ;-) if (defined ($brs_rec->{'SUBC'}) && $brs_rec->{'SUBC'} eq "HISTORY") { print STDERR "\nexcluding secret stuff - line $brs_line_no in $file\n"; return; } my $doc_obj = new doc ($file, "indexed_doc"); $brs_processed_count++; my $cursection = $doc_obj->get_top_section(); my $found_match = "no"; my $pot_title; foreach $pot_title ( @brs_title_list ) { if (defined ($brs_rec->{$pot_title})) { my $title = ""; if ($pot_title =~ m/^zz/) { my $zz_join = join(" ", @{$brs_rec->{$pot_title}}); my @zz_split = split(" ", $zz_join); map { $title .= "$_ " if ($_ !~ m/^xx/); } @zz_split; } else { if ($pot_title =~ /^RF/){ $title = join (" ", @{$brs_rec->{$pot_title}}); } else{ $title = $brs_rec->{$pot_title}; } #$title = $brs_rec->{$pot_title}; } #print STDERR "*** What is the title=$title\n"; my $tl_ref = $doc_obj->get_metadata ($cursection, "Title"); if (scalar(@$tl_ref==0)) { $doc_obj->add_utf8_metadata ($cursection, "Title", $title); } else { my $exists = "no"; map { $exists = "yes" if ($title eq $_); } @$tl_ref; if ($exists eq "no") { $doc_obj->add_utf8_metadata($cursection, "Title", $title); } } $found_match = "yes"; } } if ($found_match eq "no") { if (defined($brs_rec->{TEXTA})) { my $sub_title = substr($brs_rec->{TEXTA},0,60) . " ... "; $doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title); } elsif (defined($brs_rec->{TEXTB})) { my $sub_title = substr($brs_rec->{TEXTB},0,60) . " ... "; $doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title); } else { print $outhandle "\nNo title or text for record ending at line "; print $outhandle $brs_line_no-1, " in $file\n"; } } if (defined($brs_rec->{'T001'})) # item title { $doc_obj->add_utf8_metadata ($cursection, "ItemTitle", $brs_rec->{'T001'}); } if (defined($brs_rec->{'D001'})) # date { $doc_obj->add_utf8_metadata ($cursection, "Date", $brs_rec->{'D001'}); } else { print $outhandle "\nNo date for record ending at line "; print $outhandle $brs_line_no-1, " in $file\n"; } if (defined($brs_rec->{'PRNO'})) # programme number { $doc_obj->add_utf8_metadata ($cursection, "ProgNumber", $brs_rec->{'PRNO'}); } if (defined($brs_rec->{'CATN'})) # catalogue number { $doc_obj->add_utf8_metadata ($cursection, "CatNum", $brs_rec->{'CATN'}); } # RFAN, RFAB and T003 fields if (defined($brs_rec->{'RFAB'}) || defined($brs_rec->{'RFAN'}) || defined($brs_rec->{'T003'})) { my $zzabn = ""; if (defined($brs_rec->{'RFAB'})) { foreach my $a (@{$brs_rec->{'RFAB'}}) { $a =~ s/[^a-zA-Z0-9]//g; $zzabn .= " " . $a; } } if (defined($brs_rec->{'RFAN'})) { foreach my $a (@{$brs_rec->{'RFAN'}}) { $a =~ s/[^a-zA-Z0-9]//g; $zzabn .= " " . $a; } } if (defined($brs_rec->{'T003'})) { my $value = $brs_rec->{'T003'}; $value =~ s/[^a-zA-Z0-9]//g; $zzabn .= " " . $value; } $doc_obj->add_utf8_metadata ($cursection, "zzabn", $zzabn); } if (defined($brs_rec->{'RFN'})) { my $name; foreach $name (@{$brs_rec->{'RFN'}}) { $doc_obj->add_utf8_metadata ($cursection, "People", $name); } } brs_full_record_in_html($doc_obj,$cursection,$brs_rec); if (defined($brs_rec->{TEXTA})) { my $desc_texta = "

Description

$brs_rec->{TEXTA}"; $doc_obj->add_utf8_text ($cursection, $desc_texta); if (defined($brs_rec->{TEXTE})) { my $raw_texte = $brs_rec->{TEXTE}; $raw_texte =~ s/^\s*>\s*/
  • /mg; my $test_empty = $raw_texte; $test_empty =~ s/(
  • )+//g; if ($test_empty ne "") { my $desc_texte = $raw_texte; $desc_texte =~ s/^
  • (.*)$/
  • $1<\/i>/mg; $desc_texte = "

    Additional:

    "; $doc_obj->add_utf8_text ($cursection, $desc_texte); } } } if (defined($brs_rec->{TEXTB})) { my $desc_textb = "

    Comments

    $brs_rec->{TEXTB}"; $doc_obj->add_utf8_text($cursection, $desc_textb); } my $full_text = ""; $full_text .= "$brs_rec->{TEXTA}\n" if (defined($brs_rec->{TEXTA})); $full_text .= "$brs_rec->{TEXTE}\n" if (defined($brs_rec->{TEXTE})); $full_text .= "$brs_rec->{TEXTB}\n" if (defined($brs_rec->{TEXTB})); my @zz_list = ( "RFPG", "RFLO", "RFN" ); my $zz; foreach $zz ( @zz_list) { if (defined($brs_rec->{$zz})) { my $filtered_text = ""; my $name; foreach $name (@{$brs_rec->{$zz}}) { my @split_zz = split(' ',$name); map { $filtered_text .= "$_ " if ($_ =~ m/^xx/) } @split_zz } $full_text .= "$filtered_text\n"; } } # format zz fields (tail end of record) my $zz_html = "\n"; my $pot_zz; foreach $pot_zz (keys %{$brs_rec}) { if ($pot_zz =~ m/^RF/) { my $zz = $pot_zz; my $name; foreach $name (@{$brs_rec->{$zz}}) { if ($zz eq "RFN") { my $safe_name = $name; &ghtml::urlsafe ($safe_name); $zz_html .= "\n"; } elsif ($zz =~ /^RFA[BN]$/) { my $safe_name = $name; &ghtml::urlsafe ($safe_name); $zz_html .= "\n"; } else { $zz_html .= "\n"; } } } } $zz_html .= "
    $zz:$name
    $zz:$name
    $zz:$name
    \n"; $doc_obj->add_utf8_text($cursection, $zz_html); # add OID - we'll use the catalog number (hoping it's unique) my $cat_num = $brs_rec->{'CATN'}; if (!defined $cat_num) { print $outhandle "\n***** No catalogue number for record ending at line "; print $outhandle $brs_line_no-1, " in $file - THIS RECORD WILL BE IGNORED\n"; foreach my $v (keys(%$brs_rec)) { if (ref($brs_rec->{$v}) eq "ARRAY") { foreach my $i (@{$brs_rec->{$v}}) { print STDERR "$v -> $i\n"; } } else { print STDERR $v . " -> " . $brs_rec->{$v} . "\n"; } } return; } if (defined ($cat_num_list{$cat_num})) { print $outhandle "WARNING: catalog number $cat_num used more than once\n"; } $doc_obj->set_OID ("bbc" . $cat_num); $cat_num_list{$cat_num} = 1; # process the document $processor->process($doc_obj); } else { print $outhandle "BRS record empty\n"; } } sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; my $outhandle = $self->{'outhandle'}; my $filename = &util::filename_cat($base_dir, $file); # return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/; if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) { return undef; } # my $plugin_name = ref ($self); $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up # create a new document # my $doc_obj = new doc ($filename, "indexed_doc"); # read in file ($text will be in utf8) # my $text = ""; # $self->read_file ($filename, \$text); # if ($text !~ /\w/) { # print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'}; # return 0; # } # text is always plain ascii undef $/; open (FILE, $filename) || die; my $text = ; close FILE; $/ = "\n"; # include any metadata passed in from previous plugins # note that this metadata is associated with the top level section # $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata); print $outhandle "BBC Sound Archive Plug for BRS format: processing $file\n" if $self->{'verbosity'} > 1; # reset line number count $brs_line_no = 0; my @records = split /\*\*\* BRS DOCUMENT BOUNDARY \*\*\*\s+/, $text; $text = ""; foreach $record (@records) { next if $record !~ /\w/; # first record will be empty my $brs_rec = &read_brs_record(\$record, $file, $self->{'outhandle'}); &process_brs_record ($self, $processor, $brs_rec, $file, $self->{'outhandle'}); } $self->{'num_processed'} += $brs_processed_count; print $outhandle "\nNumber of BRS records = $brs_doc_count\n"; print $outhandle "Number of BRS records processed = $brs_processed_count\n"; return 1; # processed the file } sub end { my $self = shift (@_); my ($processor) = @_; $processor->close_file_output() if defined $processor; } sub format_prognum { my ($brs_rec) = @_; return unless defined $brs_rec->{'PRNO'}; my $old_num = $brs_rec->{'PRNO'}; if (defined $brs_rec->{'magazine'}) { if ($brs_rec->{'PRNO'} =~ /^\S\S /) { # space at position 3 if ($brs_rec->{'magazine'} =~ /(\d+)/) { # algorithm A my $magnumber = $1; $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/; $brs_rec->{'PRNO'} = $2 . $1 . $3 . $magnumber; } else { # common algorithm $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/; $brs_rec->{'PRNO'} = $2 . $1 . $3; } } } else { if ($brs_rec->{'PRNO'} =~ /^\S\S / && $brs_rec->{'PRNO'} !~ /:/) { # space at position 3 and no colon # common algorithm $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/; $brs_rec->{'PRNO'} = $2 . $1 . $3; } elsif ($brs_rec->{'PRNO'} =~ /^(\S):(\S\S) (\S\S)(\S+)/) { # colon at position 2 and space at position 5 # algorithm B $brs_rec->{'PRNO'} = $3 . $2 . $4 . $1; } } # if ($brs_rec->{'PRNO'} ne $old_num) { # print STDERR "\n$old_num ... " . $brs_rec->{'PRNO'} . "\n"; # } } 1;