Changeset 23761 for main


Ignore:
Timestamp:
2011-03-02T16:45:11+13:00 (13 years ago)
Author:
davidb
Message:

General upgrading of the set metadata action to cover more cases (such as setting metadata values at the sub-section level). To ensure the output file correctly maintains it's 'UTF-8'-ness, I have had to change the code that explicity prints out the DOCTYPE tag -- the comment for this itself says this is a hack. Without the 'binmode(...)' then accented characters etc. will be incorrectly coded and the whole deck of cards comes crashing down. I noticed there is a new version of XML::Rule out, and so with luck this version has a better way to handle setting UTF-8 within its API, rather than resorting the the external 'binmode' now used. If so, then this would let us go back to printing out the DOCTYPE tag ... it might even be that this element can be more gracefully handled within the updated XML::Rule implementation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm

    r23478 r23761  
    8585    my ($gsdl_cgi,$iis6_mode) = @_;
    8686
     87    # Treat metavalue specially.  To transmit this through a GET request
     88    # the Javascript side has url-encoded it, so here we need to decode
     89    # it before proceeding
     90
     91    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
     92    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
     93
     94    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
     95
     96    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
     97
     98    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
     99
    87100    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
    88101
     
    102115   
    103116    # live metadata gets/saves value scoped (prefixed) by the current usename
    104     # so (for now) let's not bother to enforce authentication
     117    # so (for now) let's not bother to enforce authentication
    105118
    106119    # Obtain the collect dir
     
    137150    }
    138151    else {
     152    binmode(GIN,":utf8");
    139153        # Read everything in and concatenate them into $metavalue
    140154    my $metavalue = "";
     
    220234    my $infodbtype = $self->{'infodbtype'};
    221235 
    222     # don't user authenticate for now
    223236    if ($baseaction::authentication_enabled) {
    224237    # Ensure the user is allowed to edit this collection
     
    283296    my $gsdlhome  = $self->{'gsdlhome'};
    284297
    285    
    286    
    287     # don't user authenticate for now
    288298    if ($baseaction::authentication_enabled) {
    289299    # Ensure the user is allowed to edit this collection
     
    302312    my $metapos   = $self->{'metapos'};
    303313    my $metavalue = $self->{'metavalue'};
    304     my $infodbtype = $self->{'infodbtype'};
     314    my $infodbtype = $self->{'infodbtype'};
    305315   
    306316    # To people who know $collect_tail please add some comments
     
    316326    # Set the metadata value
    317327    if (defined $metapos) {
    318         $doc_rec->{$metaname}->[$metapos] = $metavalue;
     328    $doc_rec->{$metaname}->[$metapos] = $metavalue;
    319329    }
    320330    else {
    321         $doc_rec->{$metaname} = [ $metavalue ];
     331    $doc_rec->{$metaname} = [ $metavalue ];
    322332    }
    323333 
    324     my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
     334    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
    325335    if ($status != 0) {
    326         # Catch error if gdbmget failed
    327         my $mess = "Failed to set metadata key: $docid\n";
    328    
    329         $mess .= "PATH: $ENV{'PATH'}\n";
    330         $mess .= "Exit status: $status\n";
    331         $mess .= "System Error Message: $!\n";
    332 
    333         $gsdl_cgi->generate_error($mess);
     336        # Catch error if set infodb entry failed
     337    my $mess = "Failed to set metadata key: $docid\n";
     338   
     339    $mess .= "PATH: $ENV{'PATH'}\n";
     340    $mess .= "Exit status: $status\n";
     341    $mess .= "System Error Message: $!\n";
     342   
     343    $gsdl_cgi->generate_error($mess);
    334344    }
    335345    else {
    336         my $mess = "set-document-metadata successful: Key[$docid]\n";
    337         $mess .= "  $metaname";
    338         $mess .= "->[$metapos]" if (defined $metapos);
    339         $mess .= " = $metavalue";
    340 
    341         $gsdl_cgi->generate_ok_message($mess);
     346    my $mess = "set-document-metadata successful: Key[$docid]\n";
     347    $mess .= "  $metaname";
     348    $mess .= "->[$metapos]" if (defined $metapos);
     349    $mess .= " = $metavalue";
     350   
     351    $gsdl_cgi->generate_ok_message($mess);
    342352    }
    343353   
     
    352362    my $metaname = $parser->{'parameters'}->{'metaname'};
    353363    my $metamode = $parser->{'parameters'}->{'metamode'};
    354 
    355     # Find the right metadata tag and checks if we are going to override it
    356     # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect
    357     my $name_attr = $attrHash->{'name'};
    358     if (($name_attr eq $metaname) && ($metamode eq "override")) {
    359         # Get the value and override the current value
    360     my $metavalue = $parser->{'parameters'}->{'metavalue'};
    361     $attrHash->{'_content'} = $metavalue;
    362 
    363     # Don't want it to wipe out any other pieces of metadata
    364     $parser->{'parameters'}->{'metamode'} = "done";
     364   
     365    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
     366   
     367    # Find the right metadata tag and checks if we are going to
     368    # override it
     369    #
     370    # Note: This over writes the first metadata block it
     371    # encountered. If there are multiple Sections in the doc.xml, it
     372    # might not behave as you would expect
     373
     374    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
     375##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
     376##    print STDERR "**** metamode = $metamode\n";
     377   
     378    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
     379    my $name_attr = $attrHash->{'name'};
     380    if (($name_attr eq $metaname) && ($metamode eq "override")) {
     381##      print STDERR "**** got match!!\n";
     382        # Get the value and override the current value
     383        my $metavalue = $parser->{'parameters'}->{'metavalue'};
     384        $attrHash->{'_content'} = $metavalue;
     385       
     386        # Don't want it to wipe out any other pieces of metadata
     387        $parser->{'parameters'}->{'metamode'} = "done";
     388    }
    365389    }
    366390
     
    378402    # NOTE: This appends new metadata element to all description fields.
    379403    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
    380     if ($metamode eq "accumulate") {
    381     # tack a new metadata tag on to the end of the <Metadata>+ block
     404    if (($metamode eq "accumulate") || ($metamode eq "override")) {
     405    # If get to here and metamode is override, the this means there
     406    # was no existing value to overide => treat as an append operation
     407
     408    # Tack a new metadata tag on to the end of the <Metadata>+ block
    382409    my $metaname = $parser->{'parameters'}->{'metaname'};
    383410    my $metavalue = $parser->{'parameters'}->{'metavalue'};
     
    390417    my $description_content = $attrHash->{'_content'};
    391418
     419##  print STDERR "**** appending to doc.xml\n";
     420
    392421    push(@$description_content,"    ", $append_metadata ,"\n        ");
     422    $parser->{'parameters'}->{'metamode'} = "done";
    393423    }
    394424
     
    399429
    400430
     431
     432sub dxml_start_section
     433{
     434    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
     435
     436    my $new_depth = scalar(@$contextArray);
     437
     438    if ($new_depth == 1) {
     439    $parser->{'parameters'}->{'curr_section_depth'} = 1;
     440    $parser->{'parameters'}->{'curr_section_num'}   = "";
     441    }
     442
     443    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
     444    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
     445
     446    my $new_secnum;
     447
     448    if ($new_depth > $old_depth) {
     449    # child subsection
     450    $new_secnum = "$old_secnum.1";
     451    }
     452    elsif ($new_depth == $old_depth) {
     453    # sibling section => increase it's value by 1
     454    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
     455    $tail_num++;
     456    $new_secnum = $old_secnum;
     457    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
     458    }
     459    else {
     460    # back up to parent section => lopp off tail
     461    $new_secnum = $old_secnum;
     462    $new_secnum =~ s/\.\d+$//;
     463    }
     464
     465    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
     466    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
     467
     468    print STDERR "*** In Section: $new_secnum\n";
     469}
     470
    401471sub edit_xml_file
    402472{
    403473    my $self = shift @_;
    404     my ($gsdl_cgi, $filename, $rules, $options) = @_;
     474    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
    405475
    406476    # use XML::Rules to add it in (read in and out again)
    407     my $parser = XML::Rules->new(rules => $rules,
    408                  style => 'filter' );
     477    my $parser = XML::Rules->new(start_rules     => $start_rules,
     478                 rules           => $rules,
     479                 style           => 'filter',
     480                                 output_encoding => 'utf8' );
    409481
    410482    my $xml_in = "";
     
    420492    close(MIN);
    421493   
    422     # Matched lines will get handled by the call backs
    423     my $xml_out = "";
    424     $parser->filter($xml_in,\$xml_out, $options);
    425    
    426     if (!open(MOUT,">$filename")) {
     494    my $MOUT;   
     495    if (!open($MOUT,">$filename")) {
    427496        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
    428497    }
    429498    else {
    430         print MOUT $xml_out;
    431         close(MOUT);       
     499        # Matched lines will get handled by the call backs
     500##      my $xml_out = "";
     501
     502        binmode($MOUT,":utf8");
     503        $parser->filter($xml_in,$MOUT, $options);
     504
     505#       binmode(MOUT,":utf8");
     506#       print MOUT $xml_out;
     507        close($MOUT);       
    432508    }
    433509    }
     
    438514{
    439515    my $self = shift @_;
    440     my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode) = @_;
     516    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
     517
     518    # To monitor which section/subsection number we are in
     519    my @start_rules =
     520    ( 'Section'    => \&dxml_start_section );
    441521
    442522    # use XML::Rules to add it in (read in and out again)
     
    444524    my @rules =
    445525    ( _default => 'raw',
    446       'Metadata' => \&dxml_metadata,
    447       'Description' => \&dxml_description );
     526      'Metadata'    => \&dxml_metadata,
     527      'Description' => \&dxml_description);
    448528     
    449529    # Sets the parameters
     
    451531            'metapos'   => $metapos,
    452532            'metavalue' => $metavalue,
    453             'metamode'  => $metamode };
     533            'metamode'  => $metamode };
    454534           
    455     $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options);
     535    if (defined $opt_secnum) {
     536    $options->{'secnum'} = $opt_secnum;
     537    }
     538
     539    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
    456540}
    457541
     
    467551    my $infodbtype = $self->{'infodbtype'};
    468552   
    469     # don't user authenticate for now
    470553    if ($baseaction::authentication_enabled) {
    471554    # Ensure the user is allowed to edit this collection
     
    488571    $metapos = 0 if (!defined $metapos);
    489572
    490     my $metamode   = $self->{'metamode'};
     573    my $metamode   = $self->{'metamode'};
    491574    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
    492575    # make "accumulate" the default (less destructive, as won't actually
     
    496579   
    497580    # Obtain the doc.xml path for the specified docID
     581    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
     582
    498583    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
    499     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
     584    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
    500585    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
    501586   
     
    509594    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
    510595    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
    511             $metaname,$metavalue,$metapos,$metamode);
    512 
    513     my $mess = "set-archives-metadata successful: Key[$docid]\n";
    514     $mess .= "  $metaname";
    515     $mess .= "->[$metapos]" if (defined $metapos);
    516     $mess .= " = $metavalue";
    517     $mess .= " ($metamode)\n";
    518    
    519     $gsdl_cgi->generate_ok_message($mess);
    520        
     596            $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
     597   
    521598    # Release the lock once it is done
    522599    $self->unlock_collection($username, $collect);
    523    
     600
     601    my $mess = "set-archives-metadata successful: Key[$docid]\n";
     602    $mess .= "  $metaname";
     603    $mess .= "->[$metapos]" if (defined $metapos);
     604    $mess .= " = $metavalue";
     605    $mess .= " ($metamode)\n";
     606   
     607    $gsdl_cgi->generate_ok_message($mess); 
    524608}
    525609
     
    536620    # Don't do anything if we are not in the right FileSet
    537621    my $file_regexp = $parser->{'parameters'}->{'current_file'};
    538     return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
    539    
     622    if ($file_regexp =~ /\.\*/) {
     623    # Only interested in a file_regexp if it specifies precisely one
     624    # file. 
     625    # So, skip anything with a .* in it as it is too general
     626    return [$tagname => $attrHash];
     627    }
     628    my $src_file = $parser->{'parameters'}->{'src_file'};
     629    if (!($src_file =~ /$file_regexp/)) {
     630    return [$tagname => $attrHash];
     631    }
     632##    print STDERR "*** mxl metamode = $metamode\n";
     633
    540634    # Find the right metadata tag and checks if we are going to override it
    541     # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified
    542635    my $name_attr = $attrHash->{'name'};
    543636    if (($name_attr eq $metaname) && ($metamode eq "override")) {
     
    546639    $attrHash->{'_content'} = $metavalue;
    547640
     641##  print STDERR "**** overrideing metadata.xml\n";
     642
    548643    # Don't want it to wipe out any other pieces of metadata
    549644    $parser->{'parameters'}->{'metamode'} = "done";
     
    565660    # Don't do anything if we are not in the right FileSet
    566661    my $file_regexp = $parser->{'parameters'}->{'current_file'};
    567     return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
     662    if ($file_regexp =~ /\.\*/) {
     663    # Only interested in a file_regexp if it specifies precisely one
     664    # file. 
     665    # So, skip anything with a .* in it as it is too general
     666    return [$tagname => $attrHash];
     667    }
     668    my $src_file = $parser->{'parameters'}->{'src_file'};
     669    if (!($src_file =~ /$file_regexp/)) {
     670    return [$tagname => $attrHash];
     671    }
    568672
    569673    # Accumulate the metadata block to the end of the description block
    570674    # Note: This adds metadata block to all description blocks, so if there are
    571675    # multiple FileSets, it will add to all of them
    572     if ($metamode eq "accumulate") {
     676    if (($metamode eq "accumulate") || ($metamode eq "override")) {
     677    # if metamode was "override" but get to here then it failed to
     678    # find an item to override, in which case it should append its
     679    # value to the end, just like the "accumulate" mode
     680
    573681    # tack a new metadata tag on to the end of the <Metadata>+ block
    574682    my $metaname = $parser->{'parameters'}->{'metaname'};
     
    582690    my $description_content = $attrHash->{'_content'};
    583691
     692##  print STDERR "*** appending to metadata.xml\n";
     693
     694    # append the new metadata element to the end of the current
     695    # content contained inside this tag
    584696    push(@$description_content,"    ", $append_metadata ,"\n        ");
     697
     698    $parser->{'parameters'}->{'metamode'} = "done";
    585699    }
    586700
     
    647761    }
    648762    close(MIN);
    649    
     763
    650764        # Filter with the call-back functions
    651765    my $xml_out = "";
    652     $parser->filter($xml_in,\$xml_out, { metaname => $metaname,
    653                          metavalue => $metavalue,
    654                                              metamode => $metamode,
    655                          src_file => $src_file,
    656                          current_file => undef} );
    657 
    658     if (!open(MOUT,">$metadata_xml_filename")) {
     766
     767    my $MOUT;
     768    if (!open($MOUT,">$metadata_xml_filename")) {
    659769        $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
    660770    }
    661771    else {
     772        binmode($MOUT,":utf8");
     773
    662774            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
    663775            # At the moment, I will just hack it!
    664             my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
    665             $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
    666         print MOUT $xml_out;
    667         close(MOUT);       
     776            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     777        #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
     778            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
     779        #print MOUT $xml_out;
     780
     781        $parser->filter($xml_in, $MOUT, { metaname => $metaname,
     782                          metavalue => $metavalue,
     783                          metamode => $metamode,
     784                          src_file => $src_file,
     785                          current_file => undef} );
     786        close($MOUT);       
    668787    }
    669788    }
     
    681800    my $infodbtype = $self->{'infodbtype'};
    682801   
    683     # don't user authenticate for now
    684802    if ($baseaction::authentication_enabled) {
    685803    # Ensure the user is allowed to edit this collection
    686804    $self->authenticate_user($username, $collect);
    687805    }
     806
    688807
    689808    # Obtain the collect and archive dir   
     
    705824    my $metaname   = $self->{'metaname'};
    706825    my $metavalue  = $self->{'metavalue'};
     826##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
     827    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
     828    print STDERR "*** set import meta: val = $metavalue\n";
     829   
    707830    my $metamode   = $self->{'metamode'};
    708831    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
     
    721844
    722845    # This now stores the full pathname
    723     $import_filename = $doc_rec->{'src-file'}->[0];
     846    $import_filename = $doc_rec->{'src-file'}->[0]; 
    724847    }
    725848    else {
    726849        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
    727850    }
    728        
     851
    729852    # figure out correct metadata.xml file [?]
    730853    # Assuming the metadata.xml file is next to the source file
     
    745868    # Release the lock once it is done
    746869    $self->unlock_collection($username, $collect);
     870
     871    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
     872    $mess .= "  $metaname";
     873    $mess .= " = $metavalue";
     874    $mess .= " ($metamode)\n";
     875   
     876    $gsdl_cgi->generate_ok_message($mess);
     877   
    747878}
    748879
     
    817948    my $infodbtype = $self->{'infodbtype'};
    818949   
    819     # don't user authenticate for now
    820950    if ($baseaction::authentication_enabled) {
    821951    # Ensure the user is allowed to edit this collection
     
    9121042
    9131043
    914 # Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process...
     1044# Was trying to reused the codes, but the functions need to be broken
     1045# down more before they can be reused, otherwise there will be too
     1046# much overhead and duplicate process...
    9151047sub insert_metadata
    9161048{
     
    9231055    my $infodbtype = $self->{'infodbtype'};
    9241056   
    925     # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild.
     1057    # If the import metadata and gdbm database have been updated, we
     1058    # need to insert some notification to warn user that the the text
     1059    # they see at the moment is not indexed and require a rebuild.
    9261060    my $rebuild_pending_macro = "_rebuildpendingmessage_";
    9271061
    928     # don't user authenticate for now
    9291062    if ($baseaction::authentication_enabled) {
    9301063    # Ensure the user is allowed to edit this collection
Note: See TracChangeset for help on using the changeset viewer.