Changeset 31230

Show
Ignore:
Timestamp:
13.12.2016 20:36:01 (3 years ago)
Author:
ak19
Message:

Commit for GS3 server side part of OAI deletion police implementation. Still to implement the GS2 server side part. The earlier commits implemented the PERL side, the oai-inf db implementation. I think I've now got the GS3 server side working, but have yet to try validating against the OAI validator. (I need to test that on a machine that is publicly accessible).

Location:
main/trunk/greenstone3/src/java/org/greenstone/gsdl3
Files:
7 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/OAIPMH.java

    r30556 r31230  
    5858   
    5959  protected SimpleCollectionDatabase coll_db = null; 
     60  protected SimpleCollectionDatabase oaiinf_db = null; 
    6061   
    6162  protected String site_name = ""; 
     
    7677    super.cleanUp();//?? 
    7778    this.coll_db.closeDatabase(); 
     79    this.oaiinf_db.closeDatabase(); 
    7880  } 
    7981  /** configure this service  
     
    114116 
    115117    if (index_stem == null || index_stem.equals("")) { 
    116       index_stem = this.cluster_name; 
     118    index_stem = this.cluster_name; // index_stem is the name of the db in indext/text, it is <colname>.<db> 
    117119    } 
    118120    if (infodb_type == null || infodb_type.equals("")) { 
     
    125127      return false; 
    126128    } 
    127      
    128     // Open database for querying 
     129 
     130    oaiinf_db = new SimpleCollectionDatabase(infodb_type); 
     131    if (!oaiinf_db.databaseOK()) { 
     132      logger.error("Couldn't create the oai-inf database of type "+infodb_type); 
     133      oaiinf_db = null; 
     134      return false; 
     135    } 
     136 
     137     
     138    // Open databases for querying 
    129139    String coll_db_file = GSFile.collectionDatabaseFile(this.site_home, this.cluster_name, index_stem, infodb_type); 
    130140    if (!this.coll_db.openDatabase(coll_db_file, SimpleCollectionDatabase.READ)) { 
    131141      logger.error("Could not open collection database!"); 
    132142      return false; 
     143    } 
     144    // the oaiinf_db is called oai-inf.<infodb_type_extension> 
     145    String oaiinf_db_file = GSFile.OAIInfoDatabaseFile(this.site_home, this.cluster_name, "oai-inf", infodb_type); 
     146    if (oaiinf_db != null && !this.oaiinf_db.openDatabase(oaiinf_db_file, SimpleCollectionDatabase.READ)) { 
     147      logger.warn("Could not open oai-inf database for collection + " + this.cluster_name + "!"); 
    133148    } 
    134149     
     
    332347      return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); 
    333348    } 
     349 
     350    Document doc = XMLConverter.newDOM(); 
    334351     
    335352    String oid = param_map.get(OAIXML.OID); // TODO should this be identifier??? 
     353    boolean OID_is_deleted = false; 
     354    long millis = -1; 
     355 
     356    DBInfo oai_info = null; 
     357    if(oaiinf_db != null) { 
     358    oai_info = this.oaiinf_db.getInfo(oid); 
     359    if (oai_info == null) { 
     360        logger.warn("OID: " + oid + " is not present in the collection's oai-inf database."); 
     361    } else  { 
     362        String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS); 
     363        if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) { 
     364        OID_is_deleted = true; 
     365 
     366        // get the right timestamp for deletion: from oaiinf db 
     367        String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // in seconds presumably, like oailastmodified in the collection index db         
     368         
     369        millis = Long.parseLong(timestamp)*1000; // in milliseconds 
     370        } 
     371    } 
     372    } 
    336373 
    337374    //get a DBInfo object of the identifier; if this identifier is not present in the database, 
     
    339376    DBInfo info = this.coll_db.getInfo(oid); 
    340377    if (info == null) { 
    341       logger.error("OID: " + oid + " is not present in the database."); 
    342       return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); 
    343     } 
    344  
    345     Document doc = XMLConverter.newDOM(); 
    346     ArrayList<String> keys = new ArrayList<String>(info.getKeys()); 
    347     long millis = getDateStampMillis(info); 
    348     String oailastmodified = "";  
    349     if (millis != -1) { 
    350       oailastmodified = OAIXML.getTime(millis); 
    351     } 
     378      logger.error("OID: " + oid + " is not present in the collection database."); 
     379      //return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); // may exist as deleted in oai-inf db 
     380    } 
     381    else if (millis == -1) { // so !OID_is_deleted, get oailastmodified from collection's index db 
     382    ArrayList<String> keys = new ArrayList<String>(info.getKeys());  
     383    millis = getDateStampMillis(info);   
     384    } 
     385    String oailastmodified = (millis == -1) ? "" : OAIXML.getTime(millis); 
     386     
    352387 
    353388    Element get_record_response = doc.createElement(GSXML.RESPONSE_ELEM); 
     
    356391    Element record = doc.createElement(OAIXML.RECORD); 
    357392    //compose the header element 
    358     record.appendChild(createHeaderElement(doc, oid, oailastmodified));       
    359     //compose the metadata element 
    360     record.appendChild(createMetadataElement(doc, prefix, info)); 
     393    record.appendChild(createHeaderElement(doc, oid, oailastmodified, OID_is_deleted));       
     394    if(!OID_is_deleted) { 
     395    //compose the metadata element 
     396    record.appendChild(createMetadataElement(doc, prefix, info)); 
     397    } 
    361398    get_record.appendChild(record); 
    362399    return get_record_response; 
     
    421458      return OAIXML.createErrorResponse(OAIXML.CANNOT_DISSEMINATE_FORMAT, ""); 
    422459    } 
    423     ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST); 
     460 
     461    // get list of oids 
     462    ArrayList<String> oid_list = null; 
     463    if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db 
     464    oid_list = new ArrayList<String>(oaiinf_db.getAllKeys()); 
     465     
     466    if(oid_list == null) { // try getting the OIDs from the oai entries in the index db 
     467        logger.warn("@@@@@@@@@@@@@ NO OIDs in oai-inf db for " + this.cluster_name); 
     468        oid_list = getChildrenIds(OAIXML.BROWSELIST); 
     469    } 
     470    } 
     471 
    424472    if (oid_list == null) { 
    425       logger.error("No matched records found in collection: browselist is empty"); 
     473      logger.error("No matched records found in collection: oai-inf and index db's browselist are empty"); 
    426474      return OAIXML.createErrorResponse(OAIXML.NO_RECORDS_MATCH, ""); 
    427475    } 
     
    438486    for(int i=0; i<oid_list.size(); i++) { 
    439487      String oid = oid_list.get(i); 
     488      boolean OID_is_deleted = false; 
     489      long millis = -1; 
     490 
     491      DBInfo oai_info = null; 
     492      if(oaiinf_db != null) { 
     493      oai_info = this.oaiinf_db.getInfo(oid); 
     494      if (oai_info == null) { 
     495          logger.warn("OID: " + oid + " is not present in the collection's oai-inf database."); 
     496      } else  { 
     497          String oaiinf_status = oai_info.getInfo(OAIXML.OAI_INF_STATUS); 
     498          if(oaiinf_status != null && oaiinf_status.equals(OAIXML.OAI_INF_DELETED)) { 
     499          OID_is_deleted = true; 
     500           
     501          // get the right timestamp for deletion: from oaiinf db 
     502          String timestamp = oai_info.getInfo(OAIXML.OAI_INF_TIMESTAMP); // in seconds presumably, like oailastmodified in the collection index db       
     503           
     504          millis = Long.parseLong(timestamp)*1000; // in milliseconds 
     505          } 
     506      } 
     507      } 
    440508      DBInfo info = this.coll_db.getInfo(oid); 
    441       if (info == null) { 
    442         logger.error("Database does not contains information about oid: " +oid); 
    443         continue; 
    444       } 
    445        
    446       long millis = getDateStampMillis(info); 
     509      if (info == null) { // can happen if oid was deleted, in which case only oai_info keeps a record of it 
     510        logger.error("Collection database does not contain information about oid: " +oid); 
     511      } 
     512      else if (millis == -1) { // so !OID_is_deleted, get oailastmodified from collection's index db 
     513       
     514      millis = getDateStampMillis(info); 
     515      } 
     516 
    447517      Date this_date = null; 
    448518      if (millis == -1) { 
    449     if (from_date != null || until_date !=null) { 
    450       continue; // if this doc doesn't have a date for some reason, and 
    451       // we are doing a date range, then don't include it. 
    452     } 
     519      if (from_date != null || until_date !=null) { 
     520          continue; // if this doc doesn't have a date for some reason, and 
     521          // we are doing a date range, then don't include it. 
     522      } 
    453523      } else { 
    454     this_date = new Date(millis); 
    455     if (from_date != null) { 
    456       if(this_date.before(from_date)) { 
    457         continue; 
     524      this_date = new Date(millis); 
     525      if (from_date != null) { 
     526          if(this_date.before(from_date)) { 
     527          continue; 
     528          } 
    458529      } 
    459     } 
    460     if (until_date != null) { 
    461       if (this_date.after(until_date)) { 
    462         continue; 
    463       } 
    464     }     
    465       }   
    466   
     530      if (until_date != null) { 
     531          if (this_date.after(until_date)) { 
     532          continue; 
     533          } 
     534      }     
     535      } 
     536       
     537       
     538      // compose a record for adding header and metadata 
     539      Element record = doc.createElement(OAIXML.RECORD); 
     540      list_items.appendChild(record); 
     541      //compose the header element 
     542      record.appendChild(createHeaderElement(doc, oid, OAIXML.getTime(millis), OID_is_deleted)); 
     543 
     544 
    467545      //Now check that this id has metadata for the required prefix. 
    468       if (documentContainsMetadata(info, set_of_elems)) { 
    469     // YES, it does have some metadata for this prefix 
    470     if (include_metadata) { 
    471       // compose a record and add header and metadata 
    472       Element record = doc.createElement(OAIXML.RECORD); 
    473       list_items.appendChild(record); 
    474       //compose the header element 
    475       record.appendChild(createHeaderElement(doc, oid, OAIXML.getTime(millis)));       
    476       //compose the metadata element 
    477       record.appendChild(createMetadataElement(doc, prefix, info)); 
    478     } else { 
    479       //compose the header element and append it 
    480       list_items.appendChild(createHeaderElement(doc, oid, OAIXML.getTime(millis)));       
    481     } 
    482       } // otherwise we won't include this oid. 
     546      if (info != null && documentContainsMetadata(info, set_of_elems)) { 
     547      // YES, it does have some metadata for this prefix         
     548       
     549        if (include_metadata) {      
     550        //compose the metadata element 
     551        record.appendChild(createMetadataElement(doc, prefix, info)); 
     552        } /*else { 
     553          //compose the header element and append it 
     554          list_items.appendChild(createHeaderElement(doc, oid, OAIXML.getTime(millis)));       
     555          }*/ 
     556      } // otherwise we won't include this oid. with meta 
     557       
     558       
     559       
    483560    }//end of for(int i=0; i<oid_list.size(); i++) of doing thru each record 
    484561     
     
    620697 
    621698  /** create a header element used when processing requests like ListRecords/GetRecord/ListIdentifiers 
    622    */ 
    623   protected Element createHeaderElement(Document doc, String oid, String oailastmodified) {     
     699   */   
     700  protected Element createHeaderElement(Document doc, String oid, String oailastmodified, boolean deleted) { 
     701 
    624702        Element header = doc.createElement(OAIXML.HEADER); 
     703     
     704    // if deleted, get the date and change oailastmodified to timestamp in oaiinfo 
     705    if(deleted) { 
     706        header.setAttribute(OAIXML.OAI_INF_STATUS, OAIXML.HEADER_STATUS_ATTR_DELETED); // set the header status to deleted 
     707        // then the timestamp for deletion will be from oai-inf database  
     708    } 
     709     
    625710        Element identifier = doc.createElement(OAIXML.IDENTIFIER); 
    626711    GSXML.setNodeText(identifier, coll_name + ":" + oid); 
     
    648733      return OAIXML.createErrorResponse(OAIXML.ID_DOES_NOT_EXIST, ""); 
    649734    } 
     735 
     736    /* 
     737    ArrayList<String> oid_list = null; 
     738    if(oaiinf_db != null) { // try getting the OIDs from the oaiinf_db 
     739    oid_list = new ArrayList<String>(oaiinf_db.getAllKeys()); 
     740     
     741    if(oid_list == null) { // try getting the OIDs from the oai entries in the index db 
     742        oid_list = getChildrenIds(OAIXML.BROWSELIST); 
     743    } 
     744    } 
     745    */ 
     746    // assume meta formats are only for OIDs that have not been deleted 
     747    // so don't need to check oai-inf db, and can just check collection's index db for list of OIDs 
    650748    ArrayList<String> oid_list = getChildrenIds(OAIXML.BROWSELIST); 
    651749    if (oid_list == null || oid_list.contains(oid) == false) { 
     
    724822   * The name may be in the form: <name>,<mapped name>, in which the mapped name is 
    725823   * optional. The mapped name is looked up in the DBInfo; if not present, use the first 
    726    * name which is mendatory. 
     824   * name which is mandatory. 
    727825   */ 
    728826  protected boolean containsMetadata(DBInfo info, String[] metadata_names) { 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/FlatDatabaseWrapper.java

    r24393 r31230  
    1818 */ 
    1919package org.greenstone.gsdl3.util; 
     20 
     21import java.util.ArrayList; 
    2022 
    2123public interface FlatDatabaseWrapper { 
     
    4446  public boolean deleteKey(String key); 
    4547 
     48  /** Returns all the keys of the database as String */ 
     49  public ArrayList<String> getAllEntryKeys(); 
     50 
    4651  /** returns a string of key-value entries that can be  
    4752   *    printed for debugging purposes*/ 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/GDBMWrapper.java

    r30264 r31230  
    2727import java.io.UnsupportedEncodingException; 
    2828import java.io.File; 
     29import java.util.ArrayList; 
    2930 
    3031/** 
     
    262263    } 
    263264 
     265    /** Returns all the keys of the database as Strings */ 
     266    public ArrayList<String> getAllEntryKeys() { 
     267 
     268    ArrayList<String> keys = new ArrayList<String>(); 
     269     
     270    try { 
     271        java.util.Enumeration e = db_.keys(); 
     272        while (e.hasMoreElements()) { 
     273         
     274        Object key = e.nextElement(); 
     275        keys.add((String)key); 
     276         
     277        }     
     278    } catch (Exception e) { 
     279        logger.error("Exception encountered when trying to GDBMWrapper.getAllEntries():" + e); 
     280    } 
     281     
     282    return keys; 
     283    } 
     284     
     285 
    264286    /** 
    265287     * returns a string of key-value entries that can be printed for debugging 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/GSFile.java

    r30833 r31230  
    372372    return site_home + File.separatorChar + "collect" + File.separatorChar + collection_name + File.separatorChar + "index" + File.separatorChar + "text" + File.separatorChar + index_stem + db_ext; 
    373373    } 
     374     
     375    /** the oai-inf database file in the collection's etc folder */ 
     376    static public String OAIInfoDatabaseFile(String site_home, String collection_name, String db_tailname, String database_type) 
     377    { 
     378    String db_ext = DBHelper.getDBExtFromDBType(database_type); 
     379    if (null == db_ext || db_ext.equals("")) { 
     380        logger.warn("Could not recognise database type \"" + database_type + "\", defaulting to GDBM and extension \".gdb\""); 
     381        // assume gdbm 
     382        db_ext = ".gdb"; 
     383    } 
     384    return site_home + File.separatorChar + "collect" + File.separatorChar + collection_name + File.separatorChar + "etc" + File.separatorChar + db_tailname + db_ext; // db tailname should be oai-inf 
     385    } 
     386 
    374387 
    375388    /** the archives database file - */ 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/JDBMWrapper.java

    r30516 r31230  
    217217    } 
    218218 
     219    /** Returns all the keys of the database as Strings */ 
     220    public ArrayList<String> getAllEntryKeys() { 
     221     
     222    ArrayList<String> keys = new ArrayList<String>(); 
     223     
     224    try { 
     225        FastIterator iter = hashtable_.keys(); 
     226         
     227        String key = (String) iter.next(); 
     228         
     229        while (key != null) { 
     230        keys.add(key); 
     231        key = (String) iter.next(); 
     232        } 
     233         
     234        recman_.commit(); 
     235    } catch (IOException e) { 
     236        logger.error("Failed to get all keys from JDBM database"); 
     237        return null; 
     238    } 
     239     
     240    return keys; 
     241    } 
     242     
     243 
    219244    /** 
    220245     * returns a string of key-value entries that can be printed for debugging 
     
    247272        catch (IOException e) 
    248273        { 
    249             logger.error("Failed get all keys and values from JDBM database"); 
     274            logger.error("Failed to get all keys and values from JDBM database"); 
    250275            return null; 
    251276        } 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/OAIXML.java

    r29266 r31230  
    147147   public static final String OAI_SERVICE_UNAVAILABLE = "OAI service unavailable"; 
    148148   public static final String OID = "OID"; 
    149      
     149 
     150    // The node id in the oai-inf database of the collection, which should contain all the OIDs in the db 
     151    public static final String OAI_INF_STATUS = "status"; // D = Deleted; E = Existing (PD = Provisionally Deleted but PD should not be present at this stage in the oai-inf database) 
     152    public static final String OAI_INF_TIMESTAMP = "timestamp"; // the time of deletion/last (re)indexing of doc 
     153    public static final String OAI_INF_DATESTAMP = "datestamp"; // date version of timestamp     
     154    // oai-inf db values for OAI_INF_STATUS 
     155    public static final String OAI_INF_DELETED = "D"; 
     156    public static final String OAI_INF_EXISTS = "E"; 
     157    public static final String OAI_INF_PROVISIONALLY_DELETED = "PD"; 
     158    // header values for ListRecords/GetRecord and listIdentifiers 
     159    public static final String HEADER_STATUS_ATTR_DELETED = "deleted"; 
     160 
    150161  public static final String OAI_SERVICE_RACK = "OAIPMH"; 
    151162  //system-dependent file separator, maybe '/' or '\' 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/SimpleCollectionDatabase.java

    r30263 r31230  
    1919package org.greenstone.gsdl3.util; 
    2020 
     21import java.util.ArrayList; 
    2122import java.util.Iterator; 
    2223import java.util.Set; 
     
    216217    } 
    217218 
     219    public ArrayList<String> getAllKeys() { 
     220    return this.coll_db.getAllEntryKeys(); 
     221    } 
     222 
    218223    /** 
    219224     * converts a greenstone OID to an internal docnum, returning a Long -