Changeset 23761

Show
Ignore:
Timestamp:
02.03.2011 16:45:11 (9 years ago)
Author:
davidb
Message:

General upgrading of the set metadata action to cover more cases (such as setting metadata values at the sub-section level). To ensure the output file correctly maintains it's 'UTF-8'-ness, I have had to change the code that explicity prints out the DOCTYPE tag -- the comment for this itself says this is a hack. Without the 'binmode(...)' then accented characters etc. will be incorrectly coded and the whole deck of cards comes crashing down. I noticed there is a new version of XML::Rule out, and so with luck this version has a better way to handle setting UTF-8 within its API, rather than resorting the the external 'binmode' now used. If so, then this would let us go back to printing out the DOCTYPE tag ... it might even be that this element can be more gracefully handled within the updated XML::Rule implementation.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm

    r23478 r23761  
    8585    my ($gsdl_cgi,$iis6_mode) = @_; 
    8686 
     87    # Treat metavalue specially.  To transmit this through a GET request 
     88    # the Javascript side has url-encoded it, so here we need to decode 
     89    # it before proceeding 
     90 
     91    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue"); 
     92    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1); 
     93 
     94    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue); 
     95 
     96    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array)); 
     97 
     98    $gsdl_cgi->param("metavalue",$url_decoded_metavalue); 
     99 
    87100    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode); 
    88101 
     
    102115     
    103116    # live metadata gets/saves value scoped (prefixed) by the current usename  
    104     # so (for now) let's not bother to enforce authentication 
     117    # so (for now) let's not bother to enforce authentication 
    105118 
    106119    # Obtain the collect dir 
     
    137150    } 
    138151    else { 
     152    binmode(GIN,":utf8"); 
    139153        # Read everything in and concatenate them into $metavalue 
    140154    my $metavalue = ""; 
     
    220234    my $infodbtype = $self->{'infodbtype'}; 
    221235   
    222     # don't user authenticate for now 
    223236    if ($baseaction::authentication_enabled) { 
    224237    # Ensure the user is allowed to edit this collection 
     
    283296    my $gsdlhome  = $self->{'gsdlhome'}; 
    284297 
    285      
    286      
    287     # don't user authenticate for now 
    288298    if ($baseaction::authentication_enabled) { 
    289299    # Ensure the user is allowed to edit this collection 
     
    302312    my $metapos   = $self->{'metapos'}; 
    303313    my $metavalue = $self->{'metavalue'}; 
    304     my $infodbtype = $self->{'infodbtype'}; 
     314    my $infodbtype = $self->{'infodbtype'}; 
    305315     
    306316    # To people who know $collect_tail please add some comments 
     
    316326    # Set the metadata value 
    317327    if (defined $metapos) { 
    318         $doc_rec->{$metaname}->[$metapos] = $metavalue; 
     328    $doc_rec->{$metaname}->[$metapos] = $metavalue; 
    319329    } 
    320330    else { 
    321         $doc_rec->{$metaname} = [ $metavalue ]; 
     331    $doc_rec->{$metaname} = [ $metavalue ]; 
    322332    } 
    323333  
    324     my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec); 
     334    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec); 
    325335    if ($status != 0) { 
    326         # Catch error if gdbmget failed 
    327         my $mess = "Failed to set metadata key: $docid\n"; 
    328      
    329         $mess .= "PATH: $ENV{'PATH'}\n"; 
    330         $mess .= "Exit status: $status\n"; 
    331         $mess .= "System Error Message: $!\n"; 
    332  
    333         $gsdl_cgi->generate_error($mess); 
     336        # Catch error if set infodb entry failed 
     337    my $mess = "Failed to set metadata key: $docid\n"; 
     338     
     339    $mess .= "PATH: $ENV{'PATH'}\n"; 
     340    $mess .= "Exit status: $status\n"; 
     341    $mess .= "System Error Message: $!\n"; 
     342     
     343    $gsdl_cgi->generate_error($mess); 
    334344    } 
    335345    else { 
    336         my $mess = "set-document-metadata successful: Key[$docid]\n"; 
    337         $mess .= "  $metaname"; 
    338         $mess .= "->[$metapos]" if (defined $metapos); 
    339         $mess .= " = $metavalue"; 
    340  
    341         $gsdl_cgi->generate_ok_message($mess); 
     346    my $mess = "set-document-metadata successful: Key[$docid]\n"; 
     347    $mess .= "  $metaname"; 
     348    $mess .= "->[$metapos]" if (defined $metapos); 
     349    $mess .= " = $metavalue"; 
     350     
     351    $gsdl_cgi->generate_ok_message($mess); 
    342352    } 
    343353     
     
    352362    my $metaname = $parser->{'parameters'}->{'metaname'}; 
    353363    my $metamode = $parser->{'parameters'}->{'metamode'}; 
    354  
    355     # Find the right metadata tag and checks if we are going to override it 
    356     # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect 
    357     my $name_attr = $attrHash->{'name'}; 
    358     if (($name_attr eq $metaname) && ($metamode eq "override")) { 
    359         # Get the value and override the current value 
    360     my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    361     $attrHash->{'_content'} = $metavalue; 
    362  
    363     # Don't want it to wipe out any other pieces of metadata 
    364     $parser->{'parameters'}->{'metamode'} = "done"; 
     364     
     365    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'}; 
     366     
     367    # Find the right metadata tag and checks if we are going to 
     368    # override it 
     369    # 
     370    # Note: This over writes the first metadata block it 
     371    # encountered. If there are multiple Sections in the doc.xml, it 
     372    # might not behave as you would expect 
     373 
     374    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
     375##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n"; 
     376##    print STDERR "**** metamode = $metamode\n"; 
     377     
     378    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) { 
     379    my $name_attr = $attrHash->{'name'}; 
     380    if (($name_attr eq $metaname) && ($metamode eq "override")) { 
     381##      print STDERR "**** got match!!\n"; 
     382        # Get the value and override the current value 
     383        my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
     384        $attrHash->{'_content'} = $metavalue; 
     385         
     386        # Don't want it to wipe out any other pieces of metadata 
     387        $parser->{'parameters'}->{'metamode'} = "done"; 
     388    } 
    365389    } 
    366390 
     
    378402    # NOTE: This appends new metadata element to all description fields. 
    379403    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them 
    380     if ($metamode eq "accumulate") { 
    381     # tack a new metadata tag on to the end of the <Metadata>+ block 
     404    if (($metamode eq "accumulate") || ($metamode eq "override")) { 
     405    # If get to here and metamode is override, the this means there  
     406    # was no existing value to overide => treat as an append operation 
     407 
     408    # Tack a new metadata tag on to the end of the <Metadata>+ block 
    382409    my $metaname = $parser->{'parameters'}->{'metaname'}; 
    383410    my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
     
    390417    my $description_content = $attrHash->{'_content'}; 
    391418 
     419##  print STDERR "**** appending to doc.xml\n"; 
     420 
    392421    push(@$description_content,"    ", $append_metadata ,"\n        "); 
     422    $parser->{'parameters'}->{'metamode'} = "done"; 
    393423    } 
    394424 
     
    399429 
    400430 
     431 
     432sub dxml_start_section 
     433{ 
     434    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
     435 
     436    my $new_depth = scalar(@$contextArray); 
     437 
     438    if ($new_depth == 1) { 
     439    $parser->{'parameters'}->{'curr_section_depth'} = 1; 
     440    $parser->{'parameters'}->{'curr_section_num'}   = ""; 
     441    } 
     442 
     443    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'}; 
     444    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
     445 
     446    my $new_secnum; 
     447 
     448    if ($new_depth > $old_depth) { 
     449    # child subsection 
     450    $new_secnum = "$old_secnum.1"; 
     451    } 
     452    elsif ($new_depth == $old_depth) { 
     453    # sibling section => increase it's value by 1 
     454    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/); 
     455    $tail_num++; 
     456    $new_secnum = $old_secnum; 
     457    $new_secnum =~ s/\.(\d+)$/\.$tail_num/; 
     458    } 
     459    else { 
     460    # back up to parent section => lopp off tail 
     461    $new_secnum = $old_secnum; 
     462    $new_secnum =~ s/\.\d+$//; 
     463    } 
     464 
     465    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth; 
     466    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum; 
     467 
     468    print STDERR "*** In Section: $new_secnum\n"; 
     469} 
     470 
    401471sub edit_xml_file 
    402472{ 
    403473    my $self = shift @_; 
    404     my ($gsdl_cgi, $filename, $rules, $options) = @_; 
     474    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_; 
    405475 
    406476    # use XML::Rules to add it in (read in and out again) 
    407     my $parser = XML::Rules->new(rules => $rules,  
    408                  style => 'filter' ); 
     477    my $parser = XML::Rules->new(start_rules     => $start_rules, 
     478                 rules           => $rules,  
     479                 style           => 'filter', 
     480                                 output_encoding => 'utf8' ); 
    409481 
    410482    my $xml_in = ""; 
     
    420492    close(MIN); 
    421493     
    422     # Matched lines will get handled by the call backs 
    423     my $xml_out = ""; 
    424     $parser->filter($xml_in,\$xml_out, $options); 
    425      
    426     if (!open(MOUT,">$filename")) { 
     494    my $MOUT;     
     495    if (!open($MOUT,">$filename")) { 
    427496        $gsdl_cgi->generate_error("Unable to write out to $filename: $!"); 
    428497    } 
    429498    else { 
    430         print MOUT $xml_out; 
    431         close(MOUT);         
     499        # Matched lines will get handled by the call backs 
     500##      my $xml_out = ""; 
     501 
     502        binmode($MOUT,":utf8"); 
     503        $parser->filter($xml_in,$MOUT, $options); 
     504 
     505#       binmode(MOUT,":utf8"); 
     506#       print MOUT $xml_out; 
     507        close($MOUT);        
    432508    } 
    433509    } 
     
    438514{ 
    439515    my $self = shift @_; 
    440     my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode) = @_; 
     516    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_; 
     517 
     518    # To monitor which section/subsection number we are in 
     519    my @start_rules =  
     520    ( 'Section'    => \&dxml_start_section ); 
    441521 
    442522    # use XML::Rules to add it in (read in and out again) 
     
    444524    my @rules =  
    445525    ( _default => 'raw', 
    446       'Metadata' => \&dxml_metadata, 
    447       'Description' => \&dxml_description ); 
     526      'Metadata'    => \&dxml_metadata, 
     527      'Description' => \&dxml_description); 
    448528       
    449529    # Sets the parameters 
     
    451531            'metapos'   => $metapos, 
    452532            'metavalue' => $metavalue, 
    453             'metamode'  => $metamode }; 
     533            'metamode'  => $metamode }; 
    454534             
    455     $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options); 
     535    if (defined $opt_secnum) { 
     536    $options->{'secnum'} = $opt_secnum; 
     537    } 
     538 
     539    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options); 
    456540} 
    457541 
     
    467551    my $infodbtype = $self->{'infodbtype'}; 
    468552     
    469     # don't user authenticate for now 
    470553    if ($baseaction::authentication_enabled) { 
    471554    # Ensure the user is allowed to edit this collection 
     
    488571    $metapos = 0 if (!defined $metapos); 
    489572 
    490     my $metamode   = $self->{'metamode'}; 
     573    my $metamode   = $self->{'metamode'}; 
    491574    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    492575    # make "accumulate" the default (less destructive, as won't actually  
     
    496579     
    497580    # Obtain the doc.xml path for the specified docID 
     581    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/); 
     582 
    498583    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    499     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid); 
     584    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root); 
    500585    my $doc_xml_file = $doc_rec->{'doc-file'}->[0]; 
    501586     
     
    509594    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml 
    510595    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename, 
    511             $metaname,$metavalue,$metapos,$metamode); 
    512  
    513     my $mess = "set-archives-metadata successful: Key[$docid]\n"; 
    514     $mess .= "  $metaname"; 
    515     $mess .= "->[$metapos]" if (defined $metapos); 
    516     $mess .= " = $metavalue"; 
    517     $mess .= " ($metamode)\n"; 
    518      
    519     $gsdl_cgi->generate_ok_message($mess); 
    520          
     596            $metaname,$metavalue,$metapos,$metamode,$docid_secnum); 
     597     
    521598    # Release the lock once it is done 
    522599    $self->unlock_collection($username, $collect); 
    523      
     600 
     601    my $mess = "set-archives-metadata successful: Key[$docid]\n"; 
     602    $mess .= "  $metaname"; 
     603    $mess .= "->[$metapos]" if (defined $metapos); 
     604    $mess .= " = $metavalue"; 
     605    $mess .= " ($metamode)\n"; 
     606     
     607    $gsdl_cgi->generate_ok_message($mess);   
    524608} 
    525609 
     
    536620    # Don't do anything if we are not in the right FileSet 
    537621    my $file_regexp = $parser->{'parameters'}->{'current_file'}; 
    538     return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); 
    539      
     622    if ($file_regexp =~ /\.\*/) { 
     623    # Only interested in a file_regexp if it specifies precisely one 
     624    # file.   
     625    # So, skip anything with a .* in it as it is too general 
     626    return [$tagname => $attrHash]; 
     627    } 
     628    my $src_file = $parser->{'parameters'}->{'src_file'}; 
     629    if (!($src_file =~ /$file_regexp/)) { 
     630    return [$tagname => $attrHash]; 
     631    } 
     632##    print STDERR "*** mxl metamode = $metamode\n"; 
     633 
    540634    # Find the right metadata tag and checks if we are going to override it 
    541     # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified 
    542635    my $name_attr = $attrHash->{'name'}; 
    543636    if (($name_attr eq $metaname) && ($metamode eq "override")) { 
     
    546639    $attrHash->{'_content'} = $metavalue; 
    547640 
     641##  print STDERR "**** overrideing metadata.xml\n"; 
     642 
    548643    # Don't want it to wipe out any other pieces of metadata 
    549644    $parser->{'parameters'}->{'metamode'} = "done"; 
     
    565660    # Don't do anything if we are not in the right FileSet 
    566661    my $file_regexp = $parser->{'parameters'}->{'current_file'}; 
    567     return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); 
     662    if ($file_regexp =~ /\.\*/) { 
     663    # Only interested in a file_regexp if it specifies precisely one 
     664    # file.   
     665    # So, skip anything with a .* in it as it is too general 
     666    return [$tagname => $attrHash]; 
     667    } 
     668    my $src_file = $parser->{'parameters'}->{'src_file'}; 
     669    if (!($src_file =~ /$file_regexp/)) { 
     670    return [$tagname => $attrHash]; 
     671    } 
    568672 
    569673    # Accumulate the metadata block to the end of the description block 
    570674    # Note: This adds metadata block to all description blocks, so if there are  
    571675    # multiple FileSets, it will add to all of them 
    572     if ($metamode eq "accumulate") { 
     676    if (($metamode eq "accumulate") || ($metamode eq "override")) { 
     677    # if metamode was "override" but get to here then it failed to 
     678    # find an item to override, in which case it should append its  
     679    # value to the end, just like the "accumulate" mode 
     680 
    573681    # tack a new metadata tag on to the end of the <Metadata>+ block 
    574682    my $metaname = $parser->{'parameters'}->{'metaname'}; 
     
    582690    my $description_content = $attrHash->{'_content'}; 
    583691 
     692##  print STDERR "*** appending to metadata.xml\n"; 
     693 
     694    # append the new metadata element to the end of the current 
     695    # content contained inside this tag 
    584696    push(@$description_content,"    ", $append_metadata ,"\n        "); 
     697 
     698    $parser->{'parameters'}->{'metamode'} = "done"; 
    585699    } 
    586700 
     
    647761    } 
    648762    close(MIN);  
    649      
     763 
    650764        # Filter with the call-back functions 
    651765    my $xml_out = ""; 
    652     $parser->filter($xml_in,\$xml_out, { metaname => $metaname, 
    653                          metavalue => $metavalue, 
    654                                              metamode => $metamode, 
    655                          src_file => $src_file, 
    656                          current_file => undef} ); 
    657  
    658     if (!open(MOUT,">$metadata_xml_filename")) { 
     766 
     767    my $MOUT; 
     768    if (!open($MOUT,">$metadata_xml_filename")) { 
    659769        $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!"); 
    660770    } 
    661771    else { 
     772        binmode($MOUT,":utf8"); 
     773 
    662774            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules 
    663775            # At the moment, I will just hack it! 
    664             my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">"; 
    665             $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/; 
    666         print MOUT $xml_out; 
    667         close(MOUT);         
     776            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 
     777        #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">"; 
     778            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/; 
     779        #print MOUT $xml_out; 
     780 
     781        $parser->filter($xml_in, $MOUT, { metaname => $metaname, 
     782                          metavalue => $metavalue, 
     783                          metamode => $metamode, 
     784                          src_file => $src_file, 
     785                          current_file => undef} ); 
     786        close($MOUT);        
    668787    } 
    669788    } 
     
    681800    my $infodbtype = $self->{'infodbtype'}; 
    682801     
    683     # don't user authenticate for now 
    684802    if ($baseaction::authentication_enabled) { 
    685803    # Ensure the user is allowed to edit this collection 
    686804    $self->authenticate_user($username, $collect); 
    687805    } 
     806 
    688807 
    689808    # Obtain the collect and archive dir     
     
    705824    my $metaname   = $self->{'metaname'}; 
    706825    my $metavalue  = $self->{'metavalue'}; 
     826##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g; 
     827    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g; 
     828    print STDERR "*** set import meta: val = $metavalue\n"; 
     829     
    707830    my $metamode   = $self->{'metamode'}; 
    708831    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
     
    721844 
    722845    # This now stores the full pathname 
    723     $import_filename = $doc_rec->{'src-file'}->[0]; 
     846    $import_filename = $doc_rec->{'src-file'}->[0];  
    724847    } 
    725848    else { 
    726849        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file); 
    727850    } 
    728          
     851 
    729852    # figure out correct metadata.xml file [?] 
    730853    # Assuming the metadata.xml file is next to the source file 
     
    745868    # Release the lock once it is done 
    746869    $self->unlock_collection($username, $collect); 
     870 
     871    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n"; 
     872    $mess .= "  $metaname"; 
     873    $mess .= " = $metavalue"; 
     874    $mess .= " ($metamode)\n"; 
     875     
     876    $gsdl_cgi->generate_ok_message($mess); 
     877     
    747878} 
    748879 
     
    817948    my $infodbtype = $self->{'infodbtype'}; 
    818949     
    819     # don't user authenticate for now 
    820950    if ($baseaction::authentication_enabled) { 
    821951    # Ensure the user is allowed to edit this collection 
     
    9121042 
    9131043 
    914 # Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process... 
     1044# Was trying to reused the codes, but the functions need to be broken 
     1045# down more before they can be reused, otherwise there will be too 
     1046# much overhead and duplicate process... 
    9151047sub insert_metadata 
    9161048{ 
     
    9231055    my $infodbtype = $self->{'infodbtype'}; 
    9241056     
    925     # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild. 
     1057    # If the import metadata and gdbm database have been updated, we 
     1058    # need to insert some notification to warn user that the the text 
     1059    # they see at the moment is not indexed and require a rebuild. 
    9261060    my $rebuild_pending_macro = "_rebuildpendingmessage_"; 
    9271061 
    928     # don't user authenticate for now 
    9291062    if ($baseaction::authentication_enabled) { 
    9301063    # Ensure the user is allowed to edit this collection