# plugin which processes an the BBC Sound archive exported in an ASCII format

package BRSPlug;

use BasPlug;
use ghtml;

sub BEGIN 
{
    @ISA = ('BasPlug');
    unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
}

%Exclude = ();


# Note: sext is short for scrollable text

#field_id  label     disp_type crit_type width  height  brs_para
#----------------------------------------------------------------
#10      sport prdcr     text    text    30      1       T006
#1       text            sext    text    40      16      TEXT
#16      scol ref text   text    text    30      1       SCOL
#19      date            text    date    10      1       D001
#25      Item title      text    text    20      1       T001
#26      B/W Seqs        text    text    30      1       S001
#27      Medium          text    text    30      1       S002
#28      shot type       text    text    30      1       S003
#29      B/W Prog/Item   text    text    30      1       S004
#30      Seqs            text    text    10      1       S005
#31      Format          text    text    30      1       S006
#32      prog/item       text    text    30      1       S007
#35      reg libno       text    text    30      1       S008
#36      reg tape no     text    text    30      1       S009
#37      C Indicator     text    text    30      1       S010
#38      C Holder        text    text    30      1       T002
#39      cprd ind        text    text    30      1       S011
#40      cprd name       text    text    30      1       T003
#44      reference       sext    text    30      5       R001
#49      prog num        text    text    10      1       PRNO
#59      news title      text    text    30      1       T005
#65      cat num         text    text    10      1       CATN
#72      Record number   text    text    30      1       T007
#73      rx_date         text    date    10      1       S013
#74      Music performer text    text    30      2       T008
#75      Composer        text    text    30      2       T009
#76      mono/stereo     text    text    30      1       S014
#77      music medium    text    text    30      1       S015
#82      Sound Tech      text    text    30      1       S016
#83      Music publisher text    text    30      1       S017
#84      music tape      text    text    30      1       T012
#89      catalogue       text    text    3       1       CODE
#104     subcat          text    text    10      1       SUBC
#106     prog title      text    text    40      1       TTLE
#109     screen type     text    text    10      1       DOCT
#110     stock title     text    text    30      1       T013
#113     stock ln 4      text    text    30      1       T015
#114     stock lib 1     text    text    30      1       S020
#115     stock lib 2     text    text    30      1       S021
#----------------------------------------------------------------


my @brs_field_table 
  = (
      { field_name => "TTLE",  field_id   => 106, label => "Prog title",
        disp_type  => "text",  crit_type  => "text",
        width      => 40,      height     => 1       },
      { field_name => "T001",  field_id   => 25,  label => "Item title",
        disp_type  => "text",  crit_type  => "text",
        width      => 20,      height     => 1       },
      { field_name => "T005",  field_id   => 59,  label => "News title",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "T013",  field_id   => 110, label => "Stock title",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "T007",  field_id   => 72,  label => "Record number",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "TEXT",  field_id   => 1,   label => "Text",
        disp_type  => "scrl",  crit_type  => "text",
	width      => 40,      height     => 16      },

      { field_name => "T006",  field_id   => 10,  label => "Sport prdcr",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "SCOL",  field_id   => 16,  label => "Scol ref text",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "D001",  field_id   => 19,  label => "Date",
        disp_type  => "text",  crit_type  => "date",
        width      => 10,      height     => 1       },
      { field_name => "S001",  field_id   => 26,  label => "B/W Seqs",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S002",  field_id   => 27,  label => "Medium",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S003",  field_id   => 28,  label => "Shot type",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S004",  field_id   => 29,  label => "B/W Prog/Item",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S005",  field_id   => 30,  label => "Seqs",
        disp_type  => "text",  crit_type  => "text",
        width      => 10,      height     => 1       },
      { field_name => "S006",  field_id   => 31,  label => "Format",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S007",  field_id   => 32,  label => "Prog/item",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S008",  field_id   => 35,  label => "Reg libno",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S009",  field_id   => 36,  label => "Reg tape no",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S010",  field_id   => 37,  label => "C Indicator",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "T002",  field_id   => 38,  label => "C Holder",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S011",  field_id   => 39,  label => "Cprd ind",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "T003",  field_id   => 40,  label => "Cprd name",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "R001",  field_id   => 44,  label => "Reference",
        disp_type  => "scrl",  crit_type  => "text",
        width      => 30,      height     => 5       },
      { field_name => "PRNO",  field_id   => 49,  label => "Prog num",
        disp_type  => "text",  crit_type  => "text",
        width      => 10,      height     => 1       },
      { field_name => "CATN",  field_id   => 65,  label => "Cat num",
        disp_type  => "text",  crit_type  => "text",
        width      => 10,      height     => 1       },
      { field_name => "S013",  field_id   => 73,  label => "Rx_date",
        disp_type  => "text",  crit_type  => "date",
        width      => 10,      height     => 1       },
      { field_name => "T008",  field_id   => 74,  label => "Music performer",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 2       },
      { field_name => "T009",  field_id   => 75,  label => "Composer",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 2       },
      { field_name => "S014",  field_id   => 76,  label => "Mono/stereo",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S015",  field_id   => 77,  label => "Music medium",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S016",  field_id   => 82,  label => "Sound Tech",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S017",  field_id   => 83,  label => "Music publisher",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "T012",  field_id   => 84,  label => "Music tape",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "CODE",  field_id   => 89,  label => "Catalogue",
        disp_type  => "text",  crit_type  => "text",
        width      => 3,      height     => 1       },
      { field_name => "SUBC",  field_id   => 104, label => "Subcat",
        disp_type  => "text",  crit_type  => "text",
        width      => 10,      height     => 1       },
      { field_name => "DOCT",  field_id   => 109, label => "Screen type",
        disp_type  => "text",  crit_type  => "text",
        width      => 10,      height     => 1       },
      { field_name => "T015",  field_id   => 113, label => "Stock ln 4",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S020",  field_id   => 114, label => "Stock lib 1",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       },
      { field_name => "S021",  field_id   => 115, label => "Stock lib 2",
        disp_type  => "text",  crit_type  => "text",
        width      => 30,      height     => 1       }
     );


my @brs_title_list = ("TTLE", "T001", "T005", "T013", "T007", "zzTI");
my %cat_num_list = ();

sub new {
    my ($class) = @_;
    my $self = new BasPlug ($class, @_);

    return bless $self, $class;
}

sub get_default_process_exp {
    my $self = shift (@_);

    return q^(?i)\.brs$^;
}

my $brs_doc_count = 0;
my $brs_dot_count = 0;
my $brs_line_no = 0;

sub brs_error
{
    my ($file, $mess, $outhandle) = @_;

    print $outhandle "Malformed BRS recorded at line $brs_line_no in $file:\n";
    print $outhandle "  $mess\n";
}

sub read_brs_record
{
    my ($recordref, $file, $outhandle) = @_;

    my $extra_trigger = '\s*>';
    my $text_divider = "-" x 78;
    my $text_end = "..R001";

    # Print "." to signify processing if enough records have been read in
    $brs_doc_count++;
    if (($brs_doc_count % 10) == 0)
    {
	print $outhandle ".";
	$brs_dot_count++;
	print $outhandle "\n" if (($brs_dot_count % 80) == 0);
    }

    my $brs_rec = undef;
    my $line = undef;

    # read in the record
    my @brs_lines = split /\n/, $$recordref;

    while (scalar(@brs_lines)>0)
    {
	$line = shift(@brs_lines);
	$line =~ s/\cM//g;

	$brs_line_no++;

	if  ($line =~ m/^\.\.([A-Z]|[0-9]){4}:$/) 
	{
	    my $field_name = substr($line,2,4);

	    if ($field_name eq "TEXT")
	    {
		my $text_above = "";
		my $text_extra = "";
		my $text_below = "";
		
		my $add_mode = "above";
		
		while (scalar(@brs_lines)>0)
		{
		    last if ($brs_lines[0] =~ m/^$text_end/);

		    $line = shift(@brs_lines);
		    $brs_line_no++;
		    
		    if ($line =~ m/^$text_divider/)
		    {
			$add_mode = "below";
			next;
		    }
		    
		    if ($line =~ m/^$extra_trigger/)
		    {
			$text_extra .= "$line\n";
			$add_mode = "below";
			next;
		    }
			
		    if ($line =~ m/^\.\.([A-Z]|[0-9]){4}:/)
		    {
			if ($line =~ m/^\.\.TEXT/)
			{
			    $add_mode = "above";
			    next;
			}
			else
			{
			    brs_error($file, "Unexpected field $line", $outhandle);
			    # put field name back
			    unshift(@brs_lines, $line); 
			    $brs_line_no--;
			    last;
			}
		    }
		    
		    if ($line !~ m/^\s+(-){10,}$/)
		    {
			if ($add_mode eq "above")
			{
			    $text_above .= "$line ";
			}
			else
			{
			    $text_below .= "$line ";
			}
		    }
		}

		$brs_rec->{TEXTA} .= $text_above if ($text_above ne "");
		$brs_rec->{TEXTB} .= $text_below if ($text_below ne "");
		$brs_rec->{TEXTE} .= $text_extra if ($text_extra ne "");
	    }
	    elsif ($field_name eq "R001")
	    {
		# read zz fields until end of record
		    
		while (scalar(@brs_lines)>0)
		{
		    $line = shift(@brs_lines);
		    $brs_line_no++;

		    if ($line =~ m/^(zz[A-Z]+)( +)(.*)/)
		    {
			my $field_name = $1;
			my $field_text = $3;

			while (scalar(@brs_lines)>0)
			{
			    my $line_ahead = $brs_lines[0];

			    last if ($line_ahead =~ m/^zz[A-Z]+/);
					
			    $line = shift(@brs_lines);
			    $brs_line_no++;
			    
			    $field_text .= $line;
			}
			
			if (!defined($brs_rec->{$field_name}))
			{
			    $brs_rec->{$field_name} = [$field_text];
			}
			else
			{
			    push(@{$brs_rec->{$field_name}},$field_text);
			}
		    }
		    else
		    {
			brs_error($file, "Unexpected field $line", $outhandle);
		    }
		}
	    }
	    else
	    {
		my $field_entry = "";
		while ($brs_lines[0] !~ m/^..([A-Z]|[0-9]){4}:$/)
		{
		    $field_entry .= shift(@brs_lines);
		    $brs_line_no++;
		    last if (scalar(@brs_lines)==0);
		}		    

		if (!defined($brs_rec->{$field_name}))
		{
		    $brs_rec->{$field_name} = $field_entry;
		}
		else
		{
		    brs_error($file, "$field_name already defined.", $outhandle);
		}
	    }		
	}
	else
	{
	    brs_error($file, "Malformed field: $line.", $outhandle);
	}
    }

    # format the programme number if required
    &format_prognum($brs_rec);

    return ($brs_rec);
}

my $brs_processed_count = 0;

sub brs_full_record_in_html
{
    my ($doc_obj,$cursection,$brs_rec) = @_;

    my $html_table = "";
    my $table_line = "";
    my $cell_count = 0;
    my $long_lines = "";

    my $i;
    for ($i=0; $i<=$#brs_field_table; $i++)
    {
	my $field_name = $brs_field_table[$i]->{'field_name'};
	if (defined($brs_rec->{$field_name}))
	{
	    my $field_label = $brs_field_table[$i]->{'label'};
	    my $field_value = $brs_rec->{$field_name};

	    # these fields get their own line
	    if ($field_label =~ /^(Prog|Item|News|Stock) title$/) {
		my $safe_value = $field_value;
		&ghtml::urlsafe ($safe_value);
		$long_lines .= "<tr valign=top><td><b>$field_label:</b></td><td colspan=5>";
		$long_lines .= "<a href=\"_httpquerytitle_&q=$safe_value\">$field_value</a></td></tr>\n";
		$long_lines .= "$field_value</td></tr>\n";
		
	    } else {
		
		if (($cell_count>0) && ($cell_count%3 == 0))
		{
		    $html_table .= "\n<tr valign=top>\n$table_line\n</tr>\n";
		    $table_line = "";
		}
		$cell_count++;
		if ($field_name eq "D001") # a Dublin core style date
		{
		    my @mon_convert 
			= ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
			   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
		    
		    my ($year,$monnum,$day) 
			= ($field_value =~ m/(\d{4})(\d{2})(\d{2})/);
		    my $moneng = $mon_convert[$monnum-1];
		    
		    $field_value = "$day $moneng $year";

		} elsif ($field_name eq "PRNO") {
		    my $safe_value = $field_value;
		    &ghtml::urlsafe ($safe_value);
		    $field_value = "<a href=\"_httpqueryprogname_&q=$safe_value\">$field_value</a>";

		} elsif ($field_name eq "CATN") {
		    my $safe_value = $field_value;
		    &ghtml::urlsafe ($safe_value);
		    $field_value = "<a href=\"_httpquerycatnum_&q=$safe_value\">$field_value</a>";

		} elsif ($field_name eq "T003") {
		    my $safe_value = $field_value;
		    &ghtml::urlsafe ($safe_value);
		    $field_value = "<a href=\"_httpqueryzzabn_&q=$safe_value\">$field_value</a>";
		}
		
		$table_line .= "<td><b>$field_label:</b></td><td>$field_value</td>\n";
	    }
	}
    }

    if ($cell_count%3 != 0) {
	$table_line .= "<td colspan=" . (3-($cell_count%3))*2 . "></td>";
    }

    $html_table .= "\n<tr>\n$table_line\n</tr>\n";    
    $html_table = "<table>\n$long_lines$html_table\n</table>\n";

    $doc_obj->add_utf8_text($cursection, $html_table);
}

sub process_brs_record
{
    my ($processor, $brs_rec, $file, $outhandle) = @_;

    if (defined($brs_rec))
    {

	# only include those records in the chosen subcats - these lines
	# should be commented out to build the entire collection

	# National Sound Archive collection
	if (!defined ($brs_rec->{'SUBC'}) || 
	    $brs_rec->{'SUBC'} !~ /^(RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF|WILDSOUND)$/) {
	    return;
	}

	# British Film Institute collection
#	if (!defined ($brs_rec->{'SUBC'}) || 
#	    $brs_rec->{'SUBC'} !~ /^(LONPROG|NEWS)$/) {
#	    return;
#	}

	# "the rest" collection
#	if (!defined ($brs_rec->{'SUBC'}) || 
#	    $brs_rec->{'SUBC'} =~ /^(LONPROG|NEWS|RADCOMP|RADIOX|RADPROG|RADNEWS|BDSPROG|UNCATRAD|RADIOREF)$/) {
#	    return;
#	}

	# don't include the secret stuff ;-)
	if (defined ($brs_rec->{'SUBC'}) && $brs_rec->{'SUBC'} eq "HISTORY") {
	    print STDERR "excluding secret stuff - line $brs_line_no\n";
	    return;
	}


	my $doc_obj = new doc ($file, "indexed_doc");
	$brs_processed_count++;

	my $cursection = $doc_obj->get_top_section();

	my $found_match = "no";

	my $pot_title;	
	foreach $pot_title ( @brs_title_list )
	{
	    if (defined ($brs_rec->{$pot_title}))
	    {
		my $title = "";

		if ($pot_title =~ m/^zz/)
		{
		    my $zz_join = join(" ", @{$brs_rec->{$pot_title}});
		    my @zz_split = split(" ", $zz_join);
		    map { $title .= "$_ " if ($_ !~ m/^xx/); } @zz_split;
		}
		else
		{
		    $title =  $brs_rec->{$pot_title};
		}

		my $tl_ref = $doc_obj->get_metadata ($cursection, "Title");

		if (scalar(@$tl_ref==0))
		{
		    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
		}
		else
		{
		    my $exists = "no";
		    
		    map { $exists = "yes" if ($title eq $_); } @$tl_ref;
		    if ($exists eq "no")
		    {
			$doc_obj->add_utf8_metadata($cursection, "Title", $title);
		    }
		}
		$found_match = "yes";
	    }
	}
	if ($found_match eq "no")
	{
	    if (defined($brs_rec->{TEXTA}))
	    {
		my $sub_title = substr($brs_rec->{TEXTA},0,60) . " ... ";
		$doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title);
	    }
	    elsif (defined($brs_rec->{TEXTB}))
	    {
		my $sub_title = substr($brs_rec->{TEXTB},0,60) . " ... ";
		$doc_obj->add_utf8_metadata ($cursection, "Title", $sub_title);
	    }
	    else
	    {
		print $outhandle " No title or text for record ending at line ";
		print $outhandle $brs_line_no-1, " ";
	    }
	}

	if (defined($brs_rec->{'T001'})) # item title
	{
	    $doc_obj->add_utf8_metadata ($cursection, "ItemTitle", $brs_rec->{'T001'});
	}
	
	if (defined($brs_rec->{'D001'})) # date
	{
	    $doc_obj->add_utf8_metadata ($cursection, "Date", $brs_rec->{'D001'});
	}
	else
	{
	    print $outhandle " No date for record ending at line ";
	    print $outhandle $brs_line_no-1, " ";
	}

	if (defined($brs_rec->{'PRNO'})) # programme number
	{
	    $doc_obj->add_utf8_metadata ($cursection, "ProgNumber", $brs_rec->{'PRNO'});
	}

	if (defined($brs_rec->{'CATN'})) # catalogue number
	{
	    $doc_obj->add_utf8_metadata ($cursection, "CatNum", $brs_rec->{'CATN'});
	}

	# zzAN, zzAB and T003 fields
	if (defined($brs_rec->{'zzAB'}) || defined($brs_rec->{'zzAN'}) || defined($brs_rec->{'T003'}))
	{
	    my $zzabn = "";
	    if (defined($brs_rec->{'zzAB'})) {
		foreach my $a (@{$brs_rec->{'zzAB'}}) {
		    $a =~ s/[^a-zA-Z0-9]//g;
		    $zzabn .= " " . $a;
		}
	    } 
	    if (defined($brs_rec->{'zzAN'})) {
		foreach my $a (@{$brs_rec->{'zzAN'}}) {
		    $a =~ s/[^a-zA-Z0-9]//g;
		    $zzabn .= " " . $a;
		}
	    } 
	    if (defined($brs_rec->{'T003'})) {
		my $value = $brs_rec->{'T003'};
		$value =~ s/[^a-zA-Z0-9]//g;
		$zzabn .= " " . $value;
	    } 
	    print STDERR "zzabn: '$zzabn'\n";
	    $doc_obj->add_utf8_metadata ($cursection, "zzabn", $zzabn);
	}

	if (defined($brs_rec->{zzN}))
	{
	    my $name;
	    foreach $name (@{$brs_rec->{zzN}})
	    {
		$doc_obj->add_utf8_metadata ($cursection, "People", $name);
	    }
	}

	brs_full_record_in_html($doc_obj,$cursection,$brs_rec);

	if (defined($brs_rec->{TEXTA}))
	{

	    my $desc_texta = "<h3>Description</h3>$brs_rec->{TEXTA}";
	    $doc_obj->add_utf8_text ($cursection, $desc_texta);

	    if (defined($brs_rec->{TEXTE}))
	    {
		my $raw_texte = $brs_rec->{TEXTE};
		$raw_texte =~ s/^\s*>\s*/<li>/mg;
		
		my $test_empty = $raw_texte;
		$test_empty =~ s/(<li>)+//g;
		
		if ($test_empty ne "")
		{
		    my $desc_texte = $raw_texte;
		    $desc_texte =~ s/^<li>(.*)$/<li><i>$1<\/i>/mg;
		    $desc_texte = "<p>Additional:<ul>$desc_texte</ul>";
		    $doc_obj->add_utf8_text ($cursection, $desc_texte);
		}
	    }
	}

	if (defined($brs_rec->{TEXTB}))
	{
	    my $desc_textb = "<h3>Comments</h3>$brs_rec->{TEXTB}";
	    $doc_obj->add_utf8_text($cursection, $desc_textb);
	}

	my $full_text = "";
	$full_text .= "$brs_rec->{TEXTA}\n" if (defined($brs_rec->{TEXTA}));
	$full_text .= "$brs_rec->{TEXTE}\n" if (defined($brs_rec->{TEXTE}));
	$full_text .= "$brs_rec->{TEXTB}\n" if (defined($brs_rec->{TEXTB}));

	my @zz_list = ( "zzPG", "zzLO", "zzN" );
	my $zz;
	foreach $zz ( @zz_list)
	{
	    if (defined($brs_rec->{$zz}))
	    {
		my $filtered_text = "";

		my $name;
		foreach $name (@{$brs_rec->{$zz}})
		{
		    my @split_zz = split(' ',$name);
		    map { $filtered_text .= "$_ " if ($_ =~ m/^xx/) } @split_zz
		}
	
		$full_text .= "$filtered_text\n";
	    }
	}

	# format zz fields (tail end of record)
	my $zz_html = "<table>\n";

	my $pot_zz;
	foreach $pot_zz (keys %{$brs_rec})
	{
	    if ($pot_zz =~ m/^zz/)
	    {
		my $zz = $pot_zz;
		my $name;
		foreach $name (@{$brs_rec->{$zz}})
		{
		    if ($zz eq "zzN") {
			my $safe_name = $name;
			&ghtml::urlsafe ($safe_name);
			$zz_html .= "<tr><td><b>$zz:</b></td><td><a href=\"_httpquerypeople_&q=$safe_name\">$name</a></td></tr>\n";
		    } elsif ($zz =~ /^zzA[BN]$/) {
			my $safe_name = $name;
			&ghtml::urlsafe ($safe_name);
			$zz_html .= "<tr><td><b>$zz:</b></td><td><a href=\"_httpqueryzzabn_&q=$safe_name\">$name</a></td></tr>\n";

		    } else {
			$zz_html .= "<tr><td><b>$zz:</b></td><td>$name</td></tr>\n";
		    }
		}
	    }
	}
	$zz_html .= "</table>\n";
	$doc_obj->add_utf8_text($cursection, $zz_html);

	# add OID - we'll use the catalog number (hoping it's unique)
	my $cat_num = $brs_rec->{'CATN'};
	if (!defined $cat_num) {
	    die "bugger!!\n";
	}
	if (defined ($cat_num_list{$cat_num})) {
	    print $outhandle "WARNING: catalog number $cat_num used more than once\n";
	}
	$doc_obj->set_OID ("bbc" . $cat_num);
	$cat_num_list{$cat_num} = 1;
    
	# process the document
	$processor->process($doc_obj);
    }
    else
    {
	print $outhandle "BRS record empty\n";
    }
    
}

sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    my $outhandle = $self->{'outhandle'};

    my $filename = &util::filename_cat($base_dir, $file);
#    return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
    if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
	return undef;
    }
#    my $plugin_name = ref ($self);
    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    
    # create a new document
#    my $doc_obj = new doc ($filename, "indexed_doc");
    
    # read in file ($text will be in utf8)
#    my $text = "";
#    $self->read_file ($filename, \$text);

#    if ($text !~ /\w/) {
#	print $outhandle "$plugin_name: ERROR: $file contains no text\n" if $self->{'verbosity'};
#	return 0;
#    }

    # text is always plain ascii
    undef $/;
    open (FILE, $filename) || die;
    my $text = <FILE>;
    close FILE;
    $/ = "\n";


    # include any metadata passed in from previous plugins 
    # note that this metadata is associated with the top level section
#    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);

    print $outhandle "BBC Sound Archive Plug for BRS format: processing $file\n"
	if $self->{'verbosity'} > 1;

    my @records = split /\*\*\* BRS DOCUMENT BOUNDARY \*\*\*\s+/, $text;
    $text = "";

    foreach $record (@records) {
	next if $record !~ /\w/; # first record will be empty
	my $brs_rec = &read_brs_record(\$record, $file, $self->{'outhandle'});
	&process_brs_record ($processor, $brs_rec, $file, $self->{'outhandle'});
    }

    print $outhandle "\nNumber of BRS records = $brs_doc_count\n";
    print $outhandle "Number of BRS records processed = $brs_processed_count\n";

    return 1; # processed the file
}

sub end {
    my $self = shift (@_);
    my ($processor) = @_;

    $processor->close_file_output() if defined $processor;
}

sub format_prognum {
    my ($brs_rec) = @_;

    return unless defined $brs_rec->{'PRNO'};

    my $old_num = $brs_rec->{'PRNO'};

    if (defined $brs_rec->{'magazine'}) {
	if ($brs_rec->{'PRNO'} =~ /^\S\S /) { # space at position 3
	    if ($brs_rec->{'magazine'} =~ /(\d+)/) {
		# algorithm A
		my $magnumber = $1;
		$brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
		$brs_rec->{'PRNO'} = $2 . $1 . $3 . $magnumber;
	    } else {
		# common algorithm
		$brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
		$brs_rec->{'PRNO'} = $2 . $1 . $3;
	    }
	}
    } else {
	if ($brs_rec->{'PRNO'} =~ /^\S\S / && $brs_rec->{'PRNO'} !~ /:/) { # space at position 3 and no colon
	    # common algorithm
	    $brs_rec->{'PRNO'} =~ /(\S\S) (\S\S)(\S+)/;
	    $brs_rec->{'PRNO'} = $2 . $1 . $3;
	} elsif ($brs_rec->{'PRNO'} =~ /^(\S):(\S\S) (\S\S)(\S+)/) { # colon at position 2 and space at position 5
	    # algorithm B
	    $brs_rec->{'PRNO'} = $3 . $2 . $4 . $1;
	}
    }

#    if ($brs_rec->{'PRNO'} ne $old_num) {
#	print STDERR "\n$old_num ... " . $brs_rec->{'PRNO'} . "\n";
#    }
}

1;