Ignore:
Timestamp:
2012-01-12T15:52:20+13:00 (12 years ago)
Author:
davidb
Message:

More careful recursive traversal to classifier nodes; further developed of CSS to Expeditee attributes (background colour); and slightly less cludgy way to deal with full img URLS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in

    r24937 r24944  
    108108          docOIDs = [];
    109109          var outstandingURLs = [];
     110          var visitedURLs = {};
     111
    110112          outstandingURLs.push(url);
     113          visitedURLs[url] = 1;
    111114
    112115          while (outstandingURLs.length>0) {
     
    121124         
    122125         
    123           /* any links with a=d ... cl=??? => outstandingURLS */
     126          /* any links with a=(b|d) ... cl=??? => outstandingURLS */
    124127          /* any links with a=d ... d=???  => docOIDS */
    125128         
    126129          var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
    127           var clRE     = new RegExp("(\\\\?|&)cl=" + cl + "\\\\.");
     130          var clRE     = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
    128131          var docRE    = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
    129132
     
    133136              if (href && href.match(actionRE)) {
    134137              if (href.match(clRE)) {
    135                   outstandingURLs.push(href);
     138                  if (!visitedURLs[href]) {     
     139                      // console.log("found a new CL line: " + href);
     140                      outstandingURLs.push(href);
     141                  visitedURLs[href] = 1;
     142                  }
    136143              }
    137144              else if (href.match(docRE)) {
     
    139146                  var docMatch = docRE.exec(href);
    140147                  var docOID = docMatch[1];
     148
     149                  // console.log("found a new doc line: " + docOID);
    141150
    142151                  docOIDs.push(docOID);
     
    151160
    152161          var docOID = docOIDs.shift();
    153           console.log("doc oid = " + docOID);
     162          //console.log("doc oid = " + docOID);
    154163         
    155164          var url;
Note: See TracChangeset for help on using the changeset viewer.