root/gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in @ 24916

Revision 24916, 7.5 KB (checked in by davidb, 8 years ago)

Initial cut at code to support converting Greenstone HTML document pages to the file format used by Expeditee through a CGI script

Line 
1#!@FULL_PERL_PATH@ -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14#   directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17    # Change into cgi-bin directory
18    chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26    print STDOUT "Content-type:text/plain\n\n";
27    print STDOUT "ERROR: $@\n";
28    exit 0;
29}
30
31
32sub main
33{
34
35#    $ENV{'QUERY_STRING'} = "a=set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
36#    $ENV{'QUERYSTRING'} = "a=set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
37#    $ENV{'REQUEST_METHOD'} = "GET";
38
39
40    my $gsdl_cgi = new gsdlCGI();
41
42    # Load the Greenstone modules that we need to use
43    $gsdl_cgi->setup_gsdl();
44
45    my $gsdlhome = $ENV{'GSDLHOME'};   
46    $gsdl_cgi->checked_chdir($gsdlhome);
47
48##    require cgiactions::metadataaction;
49   
50    # Useful debug statement for seeing what packages have been included
51####    printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
52   
53    $gsdl_cgi->parse_cgi_args();
54
55    # We don't want the gsdlCGI module to return errors and warnings in XML
56    $gsdl_cgi->{'xml'} = 0;
57
58    my $fn = $gsdl_cgi->clean_param("fn");     # frame number
59
60    if (defined $fn) {
61    my $json_str = $gsdl_cgi->param("json");
62   
63    my $output_dir = &util::filename_cat("C:","cygwin","tmp");
64
65    # my $output_dir = &filename_cat($gsdlhome,"collect",$collect,"export");
66
67    my $frame_filename = &util::filename_cat($output_dir,"$fn.exp");
68    if (open(FOUT,">$frame_filename")) {
69
70        print FOUT $json_str;
71        close(FOUT);
72
73        # write out next free frame num
74
75        $gsdl_cgi->generate_message("html-to-expeditee saved frame $fn");   
76    }
77    else {
78        $gsdl_cgi->generate_error("Failed to open $frame_filename for output");
79    }
80
81    }
82    else {
83   
84    my $html_form = <<EOT;
85<html>
86  <head>
87    <title>HTML to Expeditee Frames</title>
88    <link type="text/css" href="/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
89    <script type="text/javascript" src="/jquery/js/jquery-1.6.2.min.js"></script>
90    <script type="text/javascript" src="/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
91    <script type="text/javascript" src="/gsajax-min.js"></script>
92    <script type="text/javascript" src="/html-to-expeditee.js"></script>
93
94  </head>
95  <body>
96    <form>
97      Convert the collection <input type="text" name="collect" id="collect" />
98      to Expeditee frames by traversing the classifier <input type="text" name="cl" id="cl" />
99
100      <input value="Go" id="go" type="submit">
101
102    </form>
103    <script type="text/javascript">
104      var docOIDs = [];
105      var numDocOIDs;
106
107      \$(function() {
108      \$('#progress').progressbar();
109     
110      \$('#go').button().click(function() {
111          var collect = document.getElementById("collect").value;
112          if (collect.match(/^\\s*\$/)) {
113          alert("No collection specified");
114          return false;
115          }
116
117          var cl = document.getElementById("cl").value;
118          if (cl.match(/^\\s*\$/)) {
119          alert("No classifier specified");
120          return false;
121          }
122          if (cl.match(/^\\d+\$/)) {
123          cl = "CL" + cl;
124          }
125         
126          var url = "/greenstone/cgi-bin/library.cgi";
127          url += "?c="+collect +"&a=d&cl=" + cl;
128         
129          /* processing animation */
130         
131          docOIDs = [];
132          var outstandingURLs = [];
133          outstandingURLs.push(url);
134         
135          while (outstandingURLs.length>0) {
136          url = outstandingURLs.shift();
137         
138          var clHtml = urlGetSync(url);
139         
140          var gs2=1;
141         
142          var aElems;
143          if (gs2) {
144              var workingTrav = document.getElementById("workingTraverse");
145              workingTrav.innerHTML = clHtml;
146              aElems = workingTrav.getElementsByTagName("a");
147          }
148          else {
149              /* gs3 */
150              var parser = new DOMParser();
151              cl_doc = parser.parseFromString(clHtml,"text/html");
152              var aElems = cl_doc.getElementsByTagName("a");
153          }
154         
155         
156          /* any links with a=d ... cl=??? => outstandingURLS */
157          /* any links with a=d ... d=???  => docOIDS */
158         
159          var actionRE = new RegExp("(\\\\?|&)a=d(&|\$)");
160          var clRE     = new RegExp("(\\\\?|&)cl=" + cl + "\\\\.");
161          var docRE    = new RegExp("(?:|&)d=(.*?)(?:&|\$)");
162
163          for (var i=0; i<aElems.length; i++) {
164              var aElem = aElems[i];
165              var href=aElem.href;
166              if (href && href.match(actionRE)) {
167              if (href.match(clRE)) {
168                  outstandingURLs.push(href);
169              }
170              else if (href.match(docRE)) {
171                  var docMatch = docRE.exec(href);
172                  var docOID = docMatch[1];
173                  docOIDs.push(docOID);
174              }
175              }
176          }
177          }
178
179          numDocOIDs = docOIDs.length;
180
181          var iframe = document.getElementById("iframe");
182
183          var docOID = docOIDs.shift();
184          console.log("doc oid = " + docOID);
185         
186          var url = "/greenstone/cgi-bin/library.cgi";
187          url += "?c="+collect +"&a=d&d=" + docOID;
188         
189          iframe.src = url;
190         
191          var progressbar = document.getElementById("progressbar");
192          progressbar.style.display = "block";
193
194
195          return false; });
196     });
197     
198      function pageLoaded()
199      {
200      var iframe = document.getElementById("iframe");     
201     
202      if (iframe.src) {
203          if (iframe.style.display != "block") {
204          iframe.height = "90%";
205          iframe.style.display = "block";
206          }
207
208          var iframeDoc;
209          if ( iframe.contentDocument )
210          { /* FF */
211            iframeDoc = iframe.contentDocument;
212          }
213          else if ( iframe.contentWindow )
214          { /* IE */
215            iframeDoc = iframe.contentWindow.document;
216          }
217
218          var frameID = (numDocOIDs - docOIDs.length);
219          var progressPercent = frameID/numDocOIDs * 100;
220
221          \$(function() {
222          \$('#progressbar').progressbar({ value: progressPercent })
223         });
224
225          expFrame = htmlToExpeditee(iframeDoc.body);
226          //alert(expFrame);
227
228          var collect = document.getElementById("collect").value;
229
230          var url = "/greenstone/cgi-bin/html-to-expeditee.pl";
231          var params = "c=" + collect + "&fn=" + frameID;
232          params += "&json=" + escape(expFrame);
233          //params += "&json=" + "testing123";
234
235          var clHtml = urlPostSync(url,params);
236
237          if (!clHtml.match(/html-to-expeditee saved frame/)) {
238          alert("Error processing url: " + url);
239          }
240
241          if (docOIDs.length>0) {
242          var docOID = docOIDs.shift();
243         
244          // console.log("doc oid = " + docOID);
245
246          var cl = document.getElementById("cl").value;
247         
248          var url = "/greenstone/cgi-bin/library.cgi";
249          url += "?c="+collect +"&a=d&d=" + docOID;
250         
251          iframe.src = url;
252          }
253          else {
254          var progressbar = document.getElementById("progressbar");
255          progressbar.style.display = "none";
256
257          iframe.style.display = "none";
258          delete iframe.src;
259          }
260      }
261     
262      }
263
264    </script>
265
266    <div id="progressbar" width="100%"
267          style="display: none; margin: 10px; height: 10px;"></div>
268    <div id="workingTraverse" style="display: none"></div>
269
270    <hr style="margin: 10px;">
271
272    <iframe width="100%" id="iframe" style="display: none;"
273            onload="pageLoaded()"></iframe>
274
275  </body>
276</html>
277EOT
278
279
280        print "Content-type:text/html\n\n";
281        print $html_form;
282
283    }
284}
285
286
287
288&main();
Note: See TracBrowser for help on using the browser.