Ignore:
Timestamp:
2013-12-13T13:22:44+13:00 (10 years ago)
Author:
ak19
Message:

Adding in the first working version of the formatconverter program which uses formattools to convert GS2 statements to GS3. Not all the GS2 terms have GS3 equivalents yet and the current ones still need to be run by Dr Bainbridge, but nested IFs and ORs seem to work alright in general. Kathy made the important changes to Makefile.in to get the new formatconverter.cpp to compile. formatconverter.cpp uses the new GS2-to-GS3 specific functions added to formattools.cpp

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/runtime-src/src/recpt/formattools.cpp

    r24306 r28760  
    4949                  ResultDocInfo_t &docinfo, displayclass &disp,
    5050                  text_tmap &options, ostream& logout);
     51
    5152static text_t format_text (const text_t& collection, recptproto* collectproto,
    5253                  ResultDocInfo_t &docinfo, displayclass &disp,
     
    5859                  ostream &logout);
    5960
     61static text_t transform_to_GS3_format (format_t *formatlistptr);
    6062
    6163void metadata_t::clear() {
     
    127129        (*(here+2) == 'd' || *(here+2) == 'D') &&
    128130        (*(here+3) == '>' || *(here+3) == ' '))
     131      //|| *(here+3) == '\t' || *(here+3) == '\n'))
    129132      return true;
    130133      } else return false;
     
    15521555
    15531556
    1554 
    1555 
    15561557static bool uses_expression(const text_t& collection, recptproto* collectproto,
    15571558                ResultDocInfo_t &docinfo,
     
    17401741    // This is a tad tricky.  When we expand a string like _cgiargmode_, that is
    17411742    // a cgi argument macro that has not been set, it evaluates to itself.
    1742     // Therefore, were have to say that a piece of text evalautes true if
     1743    // Therefore, we have to say that a piece of text evaluates true if
    17431744    // it is non-empty and if it is a cgi argument evaulating to itself.
    17441745
     
    20572058  return summary;
    20582059}
     2060
     2061//-------------- GS3 related functions --------------
     2062// copy of the other uses_expression function, but without using the extra GS2-runtime-specific parameters
     2063static bool uses_expression(const text_t& outstring, text_t& lhs_expr,
     2064                text_t& op_expr, text_t& rhs_expr)
     2065{
     2066  // Note: the string may not be of the form: str1 op str2, however
     2067  // to deterine this we have to process it on the assumption it is,
     2068  // and if at any point an 'erroneous' value is encountered, return
     2069  // false and let something else have a go at evaluating it
     2070
     2071  // Starting at the end of the string and working backwards ..
     2072
     2073  const int outstring_len = outstring.size();
     2074
     2075  // skip over white space
     2076  int rhs_end = rscan_over_whitespace(outstring,outstring_len-1);
     2077
     2078  if (rhs_end<=0) {
     2079    // no meaningful text or (rhs_end==0) no room for operator
     2080    return false;
     2081  }
     2082
     2083  // check for ' or " and then scan over token
     2084  const char potential_quote = outstring[rhs_end];
     2085  int rhs_start=rhs_end;
     2086  bool quoted = false;
     2087
     2088  if ((potential_quote == '\'') || (potential_quote == '\"')) {
     2089    --rhs_end;
     2090    rhs_start = rscan_for(outstring,rhs_end-1,potential_quote) +1;
     2091    quoted = true;
     2092  }
     2093  else {
     2094    rhs_start = rscan_for_whitespace(outstring,rhs_end-1) +1;
     2095  }
     2096
     2097  if ((rhs_end-rhs_start)<0) {
     2098    // no meaningful rhs expression
     2099    return false;
     2100  }
     2101
     2102  // form rhs_expr
     2103  rhs_expr = extract_substr(outstring,rhs_start,rhs_end);
     2104
     2105  // skip over white space
     2106  const int to_whitespace = (quoted) ? 2 : 1;
     2107
     2108  int op_end = rscan_over_whitespace(outstring,rhs_start-to_whitespace);
     2109  int op_start = rscan_for_whitespace(outstring,op_end-1)+1;
     2110
     2111  if ((op_end<0) && (op_start<0)) {
     2112    // no meaningful expression operator
     2113    return false;
     2114  }
     2115
     2116  if (op_end-op_start<0) {
     2117    // no meaningful expression operator
     2118    return false;
     2119  }
     2120
     2121  op_expr = extract_substr(outstring,op_start,op_end);
     2122
     2123
     2124  // check for operator
     2125  if ((op_expr != "eq") && (op_expr != "ne") && (op_expr != "gt") &&
     2126     (op_expr != "ge") && (op_expr != "lt") && (op_expr != "le") && (op_expr != "==") && (op_expr != "!=") &&  (op_expr != ">") && (op_expr != ">=") && (op_expr != "<") && (op_expr != "<=") && (op_expr != "sw") && (op_expr != "ew")) {
     2127
     2128    // not a valid operator
     2129    return false;
     2130  }
     2131
     2132  int lhs_end = rscan_over_whitespace(outstring,op_start-1);
     2133  if (lhs_end<0) {
     2134    // no meaningful lhs expression
     2135    return false;
     2136  }
     2137
     2138  int lhs_start = scan_over_whitespace(outstring,0);
     2139
     2140  // form lhs_expr from remainder of string
     2141  lhs_expr = extract_substr(outstring,lhs_start,lhs_end);
     2142
     2143  return true;
     2144}
     2145
     2146// [ex.Title] -> ex.Title
     2147static text_t remove_bracket_bookends(const text_t &str) {
     2148 
     2149  if(str[0] == '[' && str[str.size()-1] == ']') {
     2150    return substr (str.begin()+1, str.end()-1);
     2151  } else {
     2152    return str;
     2153  }
     2154}
     2155
     2156static text_t get_gs3_if (const decision_t &decision, format_t *ifptr, format_t *elseptr)
     2157{
     2158  text_t ifstmt ="<gsf:switch>";
     2159
     2160
     2161  if (decision.command == dMeta) {
     2162    ifstmt += "<gsf:metadata name=\"";
     2163    ifstmt += remove_bracket_bookends(decision.meta.metaname);
     2164    ifstmt += "\"/>";
     2165  }
     2166
     2167  else { //if(decision.command == dText)
     2168
     2169    text_t outstring = decision.text;
     2170
     2171    // Check for if expression in form: str1 op str2
     2172    // (such as [x] eq "y")
     2173    text_t lhs_expr, op_expr, rhs_expr;
     2174    if (uses_expression(outstring,lhs_expr,op_expr,rhs_expr)) {
     2175
     2176      text_t if_operator = op_expr;
     2177      if (op_expr == "eq" || op_expr == "==") {
     2178    if_operator = "equals";
     2179      } else if (op_expr == "ne" || op_expr == "!=") {
     2180    if_operator = "notEquals";
     2181      } else if (op_expr == "gt" || op_expr == ">") {
     2182    if_operator = "greaterThan";
     2183      } else if (op_expr == "lt" || op_expr == "<") {
     2184    if_operator = "lessThan";
     2185      } else if (op_expr == "ge" || op_expr == ">=") {
     2186    if_operator = "greaterThanOrEquals";
     2187      } else if (op_expr == "le" || op_expr == "<=") {
     2188    if_operator = "lessThanOrEquals";
     2189      } else if (op_expr == "sw") {
     2190    if_operator = "startsWith";
     2191      } else if (op_expr == "ew") {
     2192    if_operator = "endsWith";
     2193      }
     2194
     2195      ifstmt += "<gsf:metadata name=\"";
     2196      ifstmt += remove_bracket_bookends(lhs_expr);
     2197      ifstmt += "\"/>";
     2198
     2199      ifstmt += "<gsf:when test=\"";
     2200      ifstmt += if_operator; // the test operator
     2201      ifstmt += "\" test-value=\"";
     2202      ifstmt += remove_bracket_bookends(rhs_expr); // the test-value
     2203      ifstmt += "\">";
     2204    }
     2205    else {
     2206      ifstmt += "<gsf:metadata name=\"";
     2207      ifstmt += remove_bracket_bookends(decision.text);
     2208      ifstmt += "\"/>";
     2209      ifstmt += "<gsf:when test=\"exists\">";
     2210    }
     2211  }
     2212   
     2213  // if portion
     2214  text_t if_body = "";
     2215  while(ifptr != NULL) { // body of if can contain a list of items to be transformed into GS3 format stmts
     2216    if_body += transform_to_GS3_format (ifptr);
     2217    ifptr = ifptr->nextptr;
     2218  }
     2219  ifstmt += if_body;
     2220  ifstmt += "</gsf:when>";
     2221
     2222  // else portion
     2223  if(elseptr != NULL) {
     2224
     2225    ifstmt += "<gsf:otherwise>";
     2226    text_t else_body = ""; // body of else can contain a list of items to be transformed into GS3 format stmts
     2227    while(elseptr != NULL) {
     2228      else_body += transform_to_GS3_format (elseptr);
     2229      elseptr = elseptr->nextptr;
     2230    }
     2231    ifstmt += else_body;
     2232    ifstmt += "</gsf:otherwise>";
     2233  } 
     2234
     2235  ifstmt += "</gsf:switch>";
     2236  return ifstmt;
     2237}
     2238
     2239
     2240static text_t get_gs3_or (format_t *orptr) {
     2241  text_t result = "<gsf:choose-metadata>";
     2242
     2243  while(orptr != NULL) {
     2244    text_t or_body = transform_to_GS3_format (orptr);
     2245    if (!or_body.empty()) {
     2246      result += or_body;
     2247    }
     2248
     2249    orptr = orptr->nextptr;   
     2250  }
     2251  result += "</gsf:choose-metadata>";
     2252  return result;
     2253}
     2254
     2255// what about all the <td>? Does that get stored in formatlistptr, such as under the ->text field?
     2256text_t get_GS3_formatstring (format_t *formatlistptr) {
     2257  text_t result;
     2258
     2259  while (formatlistptr != NULL) {
     2260    result += transform_to_GS3_format(formatlistptr);   
     2261    formatlistptr = formatlistptr->nextptr;
     2262  }
     2263 
     2264  return result;
     2265}
     2266
     2267text_t transform_to_GS3_format (format_t *formatlistptr) {
     2268
     2269  if (formatlistptr == NULL) return "";
     2270 
     2271  switch (formatlistptr->command) {
     2272  case comOID:
     2273    return "<gsf:OID/>";
     2274  case comTopOID:
     2275    return "<gsf:metadata name='OID' select='root' />"; // for now try this
     2276  case comRank:
     2277    return "<gsf:rank/>";
     2278  case comText:
     2279    return formatlistptr->text; // [text]? or any string that is not a command or reserved
     2280  case comLink:
     2281    return "<gsf:link type='document'>"; // type?
     2282  case comEndLink:
     2283    return "</gsf:link>";
     2284  case comHref:
     2285    return "<gsf:lib name=\"href\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
     2286  case comIcon:
     2287    return "<gsf:icon type='document'/>";
     2288  case comNum:
     2289    return "<gsf:lib name=\"num\"/>"; // ??? in gslib xsl, output comment marking not sure what this maps to
     2290  case comRel: //if [RelatedDocuments] appears in format string, collect relation data
     2291    return "<gsf:lib name=\"RelatedDocuments\"/>"; // output comment marking this as deprecated or to be implemented for GS3 in gslib xslt
     2292  case comSummary:
     2293    return "<gsf:lib name=\"Summary\"/>"; // in gslib xslt output comment marking this as to be implemented for GS3
     2294    // need to invent this for GS3 based on what GS2 does
     2295  case comAssocLink:
     2296    return "<gsf:link type='source'>";
     2297  case comEndAssocLink:
     2298    return "</gsf:link>";
     2299  case comMeta:
     2300    return "<gsf:metadata name=\"" + formatlistptr->meta.metaname + "\" />";//?
     2301  case comDoc:
     2302    return "<gsf:text/>";
     2303  case comImage: // the cover img seems to be handled by some magic code in GS3
     2304    return "<gsf:lib name=\"image\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
     2305  case comTOC:
     2306    return "<gsf:lib name=\"TOC\"/>"; // in gslib xslt output a comment marking this as not working in the same way in GS3 as in GS2. TOC currently not relocatable
     2307    // need to think about whether an equivalent actually exists
     2308    // return "<gsf:option name=\"TOC\" value=\"true\"/>"; // this is wrong
     2309  case comDocumentButtonDetach:
     2310    return "<gsf:lib name=\"DocumentButtonDetach\"/>"; // output comment marking this as deprecated in gslib xslt
     2311  case comDocumentButtonHighlight:
     2312    return "<gsf:lib name=\"DocumentButtonHighlight\"/>"; // output comment marking this as deprecated in gslib xslt
     2313  case comDocumentButtonExpandContents:
     2314    return "<gsf:lib name=\"DocumentButtonExpandContents\"/>"; // output comment marking this as deprecated in gslib xslt
     2315  case comDocumentButtonExpandText:
     2316    return "<gsf:lib name=\"DocumentButtonExpandText\"/>"; // output comment marking this as deprecated in gslib xslt
     2317  case comHighlight:
     2318    return "<span class=\"highlight\">";
     2319    break;
     2320  case comEndHighlight:
     2321    return "</span>";
     2322    break;
     2323  case comMetadataSpanWrap:
     2324    metadata_wrap=true;  metadata_wrap_type="span"; return "";
     2325    break;
     2326  case comEndMetadataSpanWrap:
     2327    metadata_wrap=false; metadata_wrap_type="";     return "";
     2328    break;
     2329  case comMetadataDivWrap:
     2330    metadata_wrap=true;  metadata_wrap_type="div";  return "";
     2331    break;
     2332  case comEndMetadataDivWrap:
     2333    metadata_wrap=false; metadata_wrap_type="";     return "";
     2334    break;
     2335  case comIf:
     2336    return get_gs3_if (formatlistptr->decision, formatlistptr->ifptr, formatlistptr->elseptr);
     2337  case comOr:
     2338    return get_gs3_or (formatlistptr->orptr);
     2339    //return "<gsf:choose-metadata>"+get_gs3_or (formatlistptr->orptr)+"</gsf:choose-metadata>";
     2340  case comDocTermsFreqTotal:
     2341    return "<gsf:lib name=\"DocTermsFreqTotal\"/>";
     2342  case comCollection: // trying to get all the metadata for a collection. How is this done in GS3???
     2343    return "<gsf:lib name=\"collection\"/>";
     2344  }
     2345  return "";
     2346}
Note: See TracChangeset for help on using the changeset viewer.