Show
Ignore:
Timestamp:
13.12.2013 13:22:44 (6 years ago)
Author:
ak19
Message:

Adding in the first working version of the formatconverter program which uses formattools to convert GS2 statements to GS3. Not all the GS2 terms have GS3 equivalents yet and the current ones still need to be run by Dr Bainbridge, but nested IFs and ORs seem to work alright in general. Kathy made the important changes to Makefile.in to get the new formatconverter.cpp to compile. formatconverter.cpp uses the new GS2-to-GS3 specific functions added to formattools.cpp

Location:
main/trunk/greenstone2/runtime-src/src/recpt
Files:
1 added
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/runtime-src/src/recpt/Makefile.in

    r25560 r28760  
    206206    extlinkaction.cpp \ 
    207207    formattools.cpp \ 
     208    formatconverter.cpp \ 
    208209    gtiaction.cpp \ 
    209210    highlighttext.cpp \ 
     
    284285    wizardaction.o  
    285286 
     287CONVERTER_OBJECTS = \ 
     288    cgiargs.o \ 
     289    cgiutils.o \ 
     290    formattools.o \ 
     291    formatconverter.o \ 
     292    summarise.o 
     293 
    286294LIBRARY = gsdlrecpt.a 
    287295 
     
    301309 
    302310# Default target: make both gsdlrecpt.a and the library executable 
    303 all: $(LIBRARY) $(EXECUTABLE) $(APACHE_MODULE) 
     311all: $(LIBRARY) $(EXECUTABLE) $(APACHE_MODULE) formatconverter 
     312 
     313formatconverter: $(CONVERTER_OBJECTS) 
     314    $(CXXLINK) $(CONVERTER_OBJECTS) $(COMMON_DIR)/src/lib/gsdllib.a $(PROTOCOL_DIR)/gsdlprotocol.a $(COLSERVR_DIR)/gsdlcolservr.a 
    304315 
    305316gsdlrecpt.a: $(OBJECTS) 
  • main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp

    r24306 r28760  
    4949                  ResultDocInfo_t &docinfo, displayclass &disp,  
    5050                  text_tmap &options, ostream& logout); 
     51 
    5152static text_t format_text (const text_t& collection, recptproto* collectproto, 
    5253                  ResultDocInfo_t &docinfo, displayclass &disp,  
     
    5859                  ostream &logout); 
    5960 
     61static text_t transform_to_GS3_format (format_t *formatlistptr); 
    6062 
    6163void metadata_t::clear() { 
     
    127129        (*(here+2) == 'd' || *(here+2) == 'D') && 
    128130        (*(here+3) == '>' || *(here+3) == ' ')) 
     131      //|| *(here+3) == '\t' || *(here+3) == '\n')) 
    129132      return true; 
    130133      } else return false; 
     
    15521555 
    15531556 
    1554  
    1555  
    15561557static bool uses_expression(const text_t& collection, recptproto* collectproto, 
    15571558                ResultDocInfo_t &docinfo, 
     
    17401741    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is 
    17411742    // a cgi argument macro that has not been set, it evaluates to itself. 
    1742     // Therefore, were have to say that a piece of text evalautes true if 
     1743    // Therefore, we have to say that a piece of text evaluates true if 
    17431744    // it is non-empty and if it is a cgi argument evaulating to itself. 
    17441745 
     
    20572058  return summary; 
    20582059} 
     2060 
     2061//-------------- GS3 related functions -------------- 
     2062// copy of the other uses_expression function, but without using the extra GS2-runtime-specific parameters 
     2063static bool uses_expression(const text_t& outstring, text_t& lhs_expr, 
     2064                text_t& op_expr, text_t& rhs_expr) 
     2065{ 
     2066  // Note: the string may not be of the form: str1 op str2, however 
     2067  // to deterine this we have to process it on the assumption it is, 
     2068  // and if at any point an 'erroneous' value is encountered, return 
     2069  // false and let something else have a go at evaluating it 
     2070 
     2071  // Starting at the end of the string and working backwards .. 
     2072 
     2073  const int outstring_len = outstring.size(); 
     2074 
     2075  // skip over white space 
     2076  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1); 
     2077 
     2078  if (rhs_end<=0) {  
     2079    // no meaningful text or (rhs_end==0) no room for operator 
     2080    return false; 
     2081  } 
     2082 
     2083  // check for ' or " and then scan over token 
     2084  const char potential_quote = outstring[rhs_end]; 
     2085  int rhs_start=rhs_end; 
     2086  bool quoted = false; 
     2087 
     2088  if ((potential_quote == '\'') || (potential_quote == '\"')) { 
     2089    --rhs_end; 
     2090    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1; 
     2091    quoted = true; 
     2092  } 
     2093  else { 
     2094    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1; 
     2095  } 
     2096 
     2097  if ((rhs_end-rhs_start)<0) { 
     2098    // no meaningful rhs expression 
     2099    return false; 
     2100  } 
     2101 
     2102  // form rhs_expr 
     2103  rhs_expr = extract_substr(outstring,rhs_start,rhs_end); 
     2104 
     2105  // skip over white space 
     2106  const int to_whitespace = (quoted) ? 2 : 1; 
     2107 
     2108  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace); 
     2109  int op_start = rscan_for_whitespace(outstring,op_end-1)+1; 
     2110 
     2111  if ((op_end<0) && (op_start<0)) { 
     2112    // no meaningful expression operator 
     2113    return false; 
     2114  } 
     2115 
     2116  if (op_end-op_start<0) { 
     2117    // no meaningful expression operator 
     2118    return false; 
     2119  } 
     2120 
     2121  op_expr = extract_substr(outstring,op_start,op_end); 
     2122 
     2123 
     2124  // check for operator 
     2125  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") && 
     2126     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) { 
     2127 
     2128    // not a valid operator 
     2129    return false; 
     2130  } 
     2131 
     2132  int lhs_end = rscan_over_whitespace(outstring,op_start-1); 
     2133  if (lhs_end<0) { 
     2134    // no meaningful lhs expression 
     2135    return false; 
     2136  } 
     2137 
     2138  int lhs_start = scan_over_whitespace(outstring,0); 
     2139 
     2140  // form lhs_expr from remainder of string 
     2141  lhs_expr = extract_substr(outstring,lhs_start,lhs_end); 
     2142 
     2143  return true; 
     2144} 
     2145 
     2146// [ex.Title] -> ex.Title 
     2147static text_t remove_bracket_bookends(const text_t &str) { 
     2148   
     2149  if(str[0] == '[' && str[str.size()-1] == ']') { 
     2150    return substr (str.begin()+1, str.end()-1); 
     2151  } else { 
     2152    return str; 
     2153  } 
     2154} 
     2155 
     2156static text_t get_gs3_if (const decision_t &decision, format_t *ifptr, format_t *elseptr) 
     2157{ 
     2158  text_t ifstmt ="<gsf:switch>"; 
     2159 
     2160 
     2161  if (decision.command == dMeta) { 
     2162    ifstmt += "<gsf:metadata name=\""; 
     2163    ifstmt += remove_bracket_bookends(decision.meta.metaname); 
     2164    ifstmt += "\"/>";  
     2165  } 
     2166 
     2167  else { //if(decision.command == dText)  
     2168 
     2169    text_t outstring = decision.text; 
     2170 
     2171    // Check for if expression in form: str1 op str2  
     2172    // (such as [x] eq "y") 
     2173    text_t lhs_expr, op_expr, rhs_expr; 
     2174    if (uses_expression(outstring,lhs_expr,op_expr,rhs_expr)) { 
     2175 
     2176      text_t if_operator = op_expr; 
     2177      if (op_expr == "eq" || op_expr == "==") { 
     2178    if_operator = "equals"; 
     2179      } else if (op_expr == "ne" || op_expr == "!=") { 
     2180    if_operator = "notEquals"; 
     2181      } else if (op_expr == "gt" || op_expr == ">") { 
     2182    if_operator = "greaterThan"; 
     2183      } else if (op_expr == "lt" || op_expr == "<") { 
     2184    if_operator = "lessThan"; 
     2185      } else if (op_expr == "ge" || op_expr == ">=") { 
     2186    if_operator = "greaterThanOrEquals"; 
     2187      } else if (op_expr == "le" || op_expr == "<=") { 
     2188    if_operator = "lessThanOrEquals"; 
     2189      } else if (op_expr == "sw") { 
     2190    if_operator = "startsWith"; 
     2191      } else if (op_expr == "ew") { 
     2192    if_operator = "endsWith"; 
     2193      } 
     2194 
     2195      ifstmt += "<gsf:metadata name=\""; 
     2196      ifstmt += remove_bracket_bookends(lhs_expr); 
     2197      ifstmt += "\"/>";  
     2198 
     2199      ifstmt += "<gsf:when test=\""; 
     2200      ifstmt += if_operator; // the test operator 
     2201      ifstmt += "\" test-value=\""; 
     2202      ifstmt += remove_bracket_bookends(rhs_expr); // the test-value 
     2203      ifstmt += "\">"; 
     2204    }  
     2205    else { 
     2206      ifstmt += "<gsf:metadata name=\""; 
     2207      ifstmt += remove_bracket_bookends(decision.text); 
     2208      ifstmt += "\"/>"; 
     2209      ifstmt += "<gsf:when test=\"exists\">"; 
     2210    } 
     2211  } 
     2212    
     2213  // if portion 
     2214  text_t if_body = ""; 
     2215  while(ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts 
     2216    if_body += transform_to_GS3_format (ifptr); 
     2217    ifptr = ifptr->nextptr; 
     2218  } 
     2219  ifstmt += if_body; 
     2220  ifstmt += "</gsf:when>"; 
     2221 
     2222  // else portion 
     2223  if(elseptr != NULL) { 
     2224 
     2225    ifstmt += "<gsf:otherwise>"; 
     2226    text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts 
     2227    while(elseptr != NULL) { 
     2228      else_body += transform_to_GS3_format (elseptr); 
     2229      elseptr = elseptr->nextptr; 
     2230    } 
     2231    ifstmt += else_body; 
     2232    ifstmt += "</gsf:otherwise>"; 
     2233  }   
     2234 
     2235  ifstmt += "</gsf:switch>"; 
     2236  return ifstmt; 
     2237} 
     2238 
     2239 
     2240static text_t get_gs3_or (format_t *orptr) { 
     2241  text_t result = "<gsf:choose-metadata>"; 
     2242 
     2243  while(orptr != NULL) { 
     2244    text_t or_body = transform_to_GS3_format (orptr); 
     2245    if (!or_body.empty()) { 
     2246      result += or_body; 
     2247    } 
     2248 
     2249    orptr = orptr->nextptr;     
     2250  } 
     2251  result += "</gsf:choose-metadata>"; 
     2252  return result; 
     2253} 
     2254 
     2255// what about all the <td>? Does that get stored in formatlistptr, such as under the ->text field? 
     2256text_t get_GS3_formatstring (format_t *formatlistptr) { 
     2257  text_t result; 
     2258 
     2259  while (formatlistptr != NULL) { 
     2260    result += transform_to_GS3_format(formatlistptr);     
     2261    formatlistptr = formatlistptr->nextptr; 
     2262  } 
     2263   
     2264  return result; 
     2265} 
     2266 
     2267text_t transform_to_GS3_format (format_t *formatlistptr) { 
     2268 
     2269  if (formatlistptr == NULL) return ""; 
     2270   
     2271  switch (formatlistptr->command) { 
     2272  case comOID: 
     2273    return "<gsf:OID/>"; 
     2274  case comTopOID: 
     2275    return "<gsf:metadata name='OID' select='root' />"; // for now try this 
     2276  case comRank: 
     2277    return "<gsf:rank/>"; 
     2278  case comText: 
     2279    return formatlistptr->text; // [text]? or any string that is not a command or reserved 
     2280  case comLink: 
     2281    return "<gsf:link type='document'>"; // type? 
     2282  case comEndLink: 
     2283    return "</gsf:link>"; 
     2284  case comHref: 
     2285    return "<gsf:lib name=\"href\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to 
     2286  case comIcon: 
     2287    return "<gsf:icon type='document'/>"; 
     2288  case comNum: 
     2289    return "<gsf:lib name=\"num\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to 
     2290  case comRel: //if [RelatedDocuments] appears in format string, collect relation data 
     2291    return "<gsf:lib name=\"RelatedDocuments\"/>"; // output comment marking this as deprecated or to be implemented for GS3 in gslib xslt 
     2292  case comSummary: 
     2293    return "<gsf:lib name=\"Summary\"/>"; // in gslib xslt output comment marking this as to be implemented for GS3  
     2294    // need to invent this for GS3 based on what GS2 does 
     2295  case comAssocLink: 
     2296    return "<gsf:link type='source'>"; 
     2297  case comEndAssocLink: 
     2298    return "</gsf:link>"; 
     2299  case comMeta: 
     2300    return "<gsf:metadata name=\"" + formatlistptr->meta.metaname + "\" />";//? 
     2301  case comDoc: 
     2302    return "<gsf:text/>"; 
     2303  case comImage: // the cover img seems to be handled by some magic code in GS3 
     2304    return "<gsf:lib name=\"image\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable 
     2305  case comTOC: 
     2306    return "<gsf:lib name=\"TOC\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable 
     2307    // need to think about whether an equivalent actually exists 
     2308    // return "<gsf:option name=\"TOC\" value=\"true\"/>"; // this is wrong 
     2309  case comDocumentButtonDetach: 
     2310    return "<gsf:lib name=\"DocumentButtonDetach\"/>"; // output comment marking this as deprecated in gslib xslt 
     2311  case comDocumentButtonHighlight: 
     2312    return "<gsf:lib name=\"DocumentButtonHighlight\"/>"; // output comment marking this as deprecated in gslib xslt 
     2313  case comDocumentButtonExpandContents: 
     2314    return "<gsf:lib name=\"DocumentButtonExpandContents\"/>"; // output comment marking this as deprecated in gslib xslt 
     2315  case comDocumentButtonExpandText: 
     2316    return "<gsf:lib name=\"DocumentButtonExpandText\"/>"; // output comment marking this as deprecated in gslib xslt 
     2317  case comHighlight: 
     2318    return "<span class=\"highlight\">"; 
     2319    break; 
     2320  case comEndHighlight: 
     2321    return "</span>"; 
     2322    break; 
     2323  case comMetadataSpanWrap: 
     2324    metadata_wrap=true;  metadata_wrap_type="span"; return ""; 
     2325    break; 
     2326  case comEndMetadataSpanWrap: 
     2327    metadata_wrap=false; metadata_wrap_type="";     return ""; 
     2328    break; 
     2329  case comMetadataDivWrap: 
     2330    metadata_wrap=true;  metadata_wrap_type="div";  return ""; 
     2331    break; 
     2332  case comEndMetadataDivWrap: 
     2333    metadata_wrap=false; metadata_wrap_type="";     return ""; 
     2334    break; 
     2335  case comIf: 
     2336    return get_gs3_if (formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr); 
     2337  case comOr: 
     2338    return get_gs3_or (formatlistptr->orptr); 
     2339    //return "<gsf:choose-metadata>"+get_gs3_or (formatlistptr->orptr)+"</gsf:choose-metadata>"; 
     2340  case comDocTermsFreqTotal: 
     2341    return "<gsf:lib name=\"DocTermsFreqTotal\"/>"; 
     2342  case comCollection: // trying to get all the metadata for a collection. How is this done in GS3??? 
     2343    return "<gsf:lib name=\"collection\"/>"; 
     2344  } 
     2345  return ""; 
     2346} 
  • main/trunk/greenstone2/runtime-src/src/recpt/formattools.h

    r23515 r28760  
    148148                 ostream& logout); 
    149149 
     150text_t get_GS3_formatstring (format_t *formatlistptr); 
     151 
    150152#endif 
    151153