Changeset 24943


Ignore:
Timestamp:
2012-01-12T15:10:25+13:00 (12 years ago)
Author:
sjm84
Message:

Added remove_import_metadata and remove_archive_metadata, more functionality should be added to both at some stage, also some other fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm

    r24071 r24943  
    7272               
    7373    "set-import-metadata"   => { 'compulsory-args' => [ "metaname", "metavalue" ],
    74                      'optional-args'   => [ "d", "f", "metamode" ]
     74                     'optional-args'   => [ "d", "f", "metamode" ] # Need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
    7575                    # metamode can be "accumulate", "override", or "unique-id"
    7676                 },
    7777
     78    "remove-import-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
     79                     'optional-args'   => [ ] },
     80                     
     81    "remove-archives-metadata" => { 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
     82                     'optional-args'   => [ "metapos" ] },
    7883
    7984    "remove-live-metadata"  => { 'compulsory-args' => [ "d", "metaname" ],
     
    693698    my $infodbtype = $self->{'infodbtype'};
    694699   
    695 
    696    
    697700    if ($baseaction::authentication_enabled) {
    698701        # Ensure the user is allowed to edit this collection
     
    836839}
    837840
     841sub remove_archives_metadata
     842{
     843    my $self = shift @_;
     844
     845    my $username  = $self->{'username'};
     846    my $collect   = $self->{'collect'};
     847    my $gsdl_cgi  = $self->{'gsdl_cgi'};
     848    my $gsdlhome  = $self->{'gsdlhome'};
     849    my $infodbtype = $self->{'infodbtype'};
     850   
     851    if ($baseaction::authentication_enabled)
     852    {
     853        # Ensure the user is allowed to edit this collection
     854        &authenticate_user($gsdl_cgi, $username, $collect);
     855    }
     856   
     857    my $site = $self->{'site'};
     858       
     859    # Obtain the collect and archive dir   
     860    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
     861   
     862    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
     863
     864    # Make sure the collection isn't locked by someone else
     865    $self->lock_collection($username, $collect);
     866   
     867    # look up additional args
     868    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
     869   
     870    my $metaname = $self->{'metaname'};
     871    my $metapos = $self->{'metapos'};
     872    $metapos = 0 if (!defined $metapos);
     873   
     874    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
     875    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
     876
     877    # This now stores the full pathname
     878    my $import_filename = $doc_rec->{'doc-file'}->[0]; 
     879
     880    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $import_filename), $metaname, $metapos, $docid_secnum);
     881   
     882    # Release the lock once it is done
     883    $self->unlock_collection($username, $collect);
     884
     885    if ($status == 0)
     886    {
     887        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
     888        $mess .= "  $metaname";
     889        $mess .= "->[$metapos]" if (defined $metapos);
     890
     891        $gsdl_cgi->generate_ok_message($mess); 
     892    }
     893    else
     894    {
     895        my $mess .= "Failed to remove archives metadata key: $docid\n";
     896        $mess .= "Exit status: $status\n";
     897        $mess .= "System Error Message: $!\n";
     898        $mess .= "-" x 20 . "\n";
     899       
     900        $gsdl_cgi->generate_error($mess);
     901    }
     902}
     903
     904sub remove_from_doc_xml
     905{
     906    my $self = shift @_;
     907    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
     908   
     909    my @start_rules = ('Section' => \&dxml_start_section);
     910   
     911    # Set the call-back functions for the metadata tags
     912    my @rules =
     913    (
     914        _default => 'raw',
     915        'Metadata' => \&rfdxml_metadata
     916    );
     917       
     918    my $parser = XML::Rules->new
     919    (
     920        start_rules => \@start_rules,
     921        rules => \@rules,
     922        style => 'filter',
     923        output_encoding => 'utf8'
     924    );
     925   
     926    my $status = 0;
     927    my $xml_in = "";
     928    if (!open(MIN,"<$doc_xml_filename"))
     929    {
     930        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
     931        $status = 1;
     932    }
     933    else
     934    {
     935        # Read them in
     936        my $line;
     937        while (defined ($line=<MIN>)) {
     938            $xml_in .= $line;
     939        }
     940        close(MIN);
     941
     942        # Filter with the call-back functions
     943        my $xml_out = "";
     944
     945        my $MOUT;
     946        if (!open($MOUT,">$doc_xml_filename")) {
     947            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
     948            $status = 1;
     949        }
     950        else {
     951            binmode($MOUT,":utf8");
     952            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, secid => $secid});
     953            close($MOUT);       
     954        }
     955    }
     956    return $status;
     957}
     958
     959sub rfdxml_metadata
     960{
     961    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
     962
     963    if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
     964    {
     965        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
     966        return [$tagname => $attrHash];
     967    }
     968
     969    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
     970    {
     971        if (!defined $parser->{'parameters'}->{'poscount'})
     972        {
     973            $parser->{'parameters'}->{'poscount'} = 0;
     974        }
     975        else
     976        {
     977            $parser->{'parameters'}->{'poscount'}++;
     978        }
     979    }
     980   
     981    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
     982    {   
     983        return [];
     984    }
     985   
     986    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
     987    return [$tagname => $attrHash];
     988}
    838989
    839990sub mxml_metadata
     
    8881039    # Don't do anything if we are not in the right FileSet
    8891040    my $file_regexp = $parser->{'parameters'}->{'current_file'};
    890     if ($file_regexp =~ /\.\*/) {
     1041    if ($file_regexp =~ m/\.\*/) {
    8911042    # Only interested in a file_regexp if it specifies precisely one
    8921043    # file. 
     
    8951046    }
    8961047    my $src_file = $parser->{'parameters'}->{'src_file'};
    897     if (!($src_file =~ /$file_regexp/)) {
     1048   
     1049    if (!($src_file =~ m/$file_regexp/)) {
    8981050    return [$tagname => $attrHash];
    8991051    }
     
    9171069    my $append_metadata = [ "Metadata" => $metadata_attr ];
    9181070    my $description_content = $attrHash->{'_content'};
    919 
     1071   
    9201072##  print STDERR "*** appending to metadata.xml\n";
    9211073
    9221074    # append the new metadata element to the end of the current
    9231075    # content contained inside this tag
     1076    if (ref($description_content) eq "") {
     1077        # => string or numeric literal
     1078        # this is caused by a <Description> block has no <Metadata> child elements
     1079        # => set up an empty array in '_content'
     1080        $attrHash->{'_content'} = [ "\n" ];
     1081        $description_content = $attrHash->{'_content'};
     1082    }
     1083
    9241084    push(@$description_content,"    ", $append_metadata ,"\n        ");
    925 
    9261085    $parser->{'parameters'}->{'metamode'} = "done";
    9271086    }
     
    9781137                                 output_encoding => 'utf8');
    9791138
    980     my $xml_in = "";
    981     if (!open(MIN,"<$metadata_xml_filename")) {
    982     $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
    983     }
    984     else {
    985         # Read them in
    986     my $line;
    987     while (defined ($line=<MIN>)) {
    988         $xml_in .= $line;
    989     }
    990     close(MIN);
    991 
    992         # Filter with the call-back functions
    993     my $xml_out = "";
    994 
    995     my $MOUT;
    996     if (!open($MOUT,">$metadata_xml_filename")) {
    997         $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
     1139    if (!-e $metadata_xml_filename) {
     1140   
     1141        if (open(MOUT,">$metadata_xml_filename")) {
     1142           
     1143            my $src_file_re = &util::filename_to_regex($src_file);
     1144            # shouldn't the following also be in the above utility routine??
     1145            # $src_file_re =~ s/\./\\./g;
     1146       
     1147            print MOUT "<?xml version=\"1.0\"?>\n";
     1148            print MOUT "<DirectoryMetadata>\n";
     1149            print MOUT " <FileSet>\n";
     1150            print MOUT "  <FileName>$src_file_re</FileName>\n";
     1151            print MOUT "  <Description>\n";
     1152            print MOUT "  </Description>\n";
     1153            print MOUT " </FileSet>\n";
     1154            print MOUT "</DirectoryMetadata>\n";
     1155
     1156            close(MOUT);
     1157        }
     1158        else {
     1159            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
     1160        }
     1161    }
     1162   
     1163   
     1164    my $xml_in = "";
     1165    if (!open(MIN,"<$metadata_xml_filename")) {
     1166        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
    9981167    }
    9991168    else {
    1000         binmode($MOUT,":utf8");
    1001 
    1002             # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
    1003             # At the moment, I will just hack it!
    1004             #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    1005         #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
    1006             #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
    1007         #print MOUT $xml_out;
    1008 
    1009         $parser->filter($xml_in, $MOUT, { metaname => $metaname,
    1010                           metavalue => $metavalue,
    1011                           metamode => $metamode,
    1012                           src_file => $src_file,
    1013                           current_file => undef} );
    1014         close($MOUT);       
    1015     }
    1016     }
     1169        # Read them in
     1170        my $line;
     1171        while (defined ($line=<MIN>)) {
     1172            $xml_in .= $line;
     1173        }
     1174        close(MIN);
     1175
     1176        # Filter with the call-back functions
     1177        my $xml_out = "";
     1178
     1179        my $MOUT;
     1180        if (!open($MOUT,">$metadata_xml_filename")) {
     1181            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
     1182        }
     1183        else {
     1184            binmode($MOUT,":utf8");
     1185
     1186            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
     1187            # At the moment, I will just hack it!
     1188            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
     1189            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
     1190            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
     1191            #print MOUT $xml_out;
     1192
     1193            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
     1194                      metavalue => $metavalue,
     1195                      metamode => $metamode,
     1196                      src_file => $src_file,
     1197                      current_file => undef} );
     1198            close($MOUT);       
     1199        }
     1200   }
    10171201}
    10181202
     
    10841268    # Assuming the metadata.xml file is next to the source file
    10851269    # Note: This will not work if it is using the inherited metadata from the parent folder
    1086     my ($import_tailname, $import_dirname)
    1087     = File::Basename::fileparse($import_filename);
     1270    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
    10881271    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
    10891272
     
    11091292}
    11101293
     1294sub remove_import_metadata
     1295{
     1296    my $self = shift @_;
     1297   
     1298    my $username = $self->{'username'};
     1299    my $collect   = $self->{'collect'};
     1300    my $gsdl_cgi  = $self->{'gsdl_cgi'};
     1301   
     1302    if ($baseaction::authentication_enabled) {
     1303        # Ensure the user is allowed to edit this collection
     1304        &authenticate_user($gsdl_cgi, $username, $collect);
     1305    }
     1306
     1307    my $gsdlhome  = $self->{'gsdlhome'};
     1308    my $infodbtype = $self->{'infodbtype'};
     1309   
     1310    # Obtain the collect dir
     1311    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
     1312    my $site = $self->{'site'};
     1313    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
     1314   
     1315    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
     1316    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
     1317   
     1318    # Make sure the collection isn't locked by someone else
     1319    $self->lock_collection($username, $collect);
     1320   
     1321    # look up additional args
     1322    my $docid = $self->{'d'};
     1323    if ((!defined $docid) || ($docid =~ m/^\s*$/))
     1324    {
     1325        die "No docid (d=...) specified.\n";
     1326    }
     1327   
     1328    my $metaname = $self->{'metaname'};
     1329    my $metavalue = $self->{'metavalue'};
     1330    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
     1331   
     1332    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
     1333    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
     1334    my $metadata_xml_file;
     1335    my $import_filename = undef;
     1336    if (defined $docid)
     1337    {
     1338        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
     1339        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
     1340
     1341        # This now stores the full pathname
     1342        $import_filename = $doc_rec->{'src-file'}->[0];
     1343    }
     1344
     1345    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
     1346    {
     1347        die "There is no metadata\n";
     1348    }
     1349   
     1350    # figure out correct metadata.xml file [?]
     1351    # Assuming the metadata.xml file is next to the source file
     1352    # Note: This will not work if it is using the inherited metadata from the parent folder
     1353    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
     1354    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
     1355   
     1356    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
     1357   
     1358    # Release the lock once it is done
     1359    $self->unlock_collection($username, $collect);
     1360
     1361    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
     1362    $mess .= "  $metaname";
     1363    $mess .= " = $metavalue\n";
     1364   
     1365    $gsdl_cgi->generate_ok_message($mess);
     1366}
     1367
     1368sub remove_from_metadata_xml
     1369{
     1370    my $self = shift @_;
     1371    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
     1372   
     1373    # Set the call-back functions for the metadata tags
     1374    my @rules =
     1375    (
     1376        _default => 'raw',
     1377        'Metadata' => \&rfmxml_metadata,
     1378        'FileName' => \&mxml_filename
     1379    );
     1380       
     1381    my $parser = XML::Rules->new
     1382    (
     1383        rules => \@rules,
     1384        style => 'filter',
     1385        output_encoding => 'utf8'
     1386    );
     1387   
     1388    my $xml_in = "";
     1389    if (!open(MIN,"<$metadata_xml_filename"))
     1390    {
     1391        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
     1392    }
     1393    else
     1394    {
     1395        # Read them in
     1396        my $line;
     1397        while (defined ($line=<MIN>)) {
     1398            $xml_in .= $line;
     1399        }
     1400        close(MIN);
     1401
     1402        # Filter with the call-back functions
     1403        my $xml_out = "";
     1404
     1405        my $MOUT;
     1406        if (!open($MOUT,">$metadata_xml_filename")) {
     1407            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
     1408        }
     1409        else {
     1410            binmode($MOUT,":utf8");
     1411            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
     1412            close($MOUT);       
     1413        }
     1414    }
     1415}
     1416
     1417sub rfmxml_metadata
     1418{
     1419    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
     1420
     1421    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
     1422    {
     1423        return [];
     1424    }
     1425
     1426    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
     1427    return [$tagname => $attrHash];
     1428}
    11111429
    11121430sub remove_live_metadata
Note: See TracChangeset for help on using the changeset viewer.