source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 24916

Last change on this file since 24916 was 24916, checked in by davidb, 12 years ago

Initial cut at code to support converting Greenstone HTML document pages to the file format used by Expeditee through a CGI script

File size: 7.5 KB
Line 
1#!@FULL_PERL_PATH@ -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
32sub main
33{
34
35# $ENV{'QUERY_STRING'} = "a=set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
36# $ENV{'QUERYSTRING'} = "a=set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
37# $ENV{'REQUEST_METHOD'} = "GET";
38
39
40 my $gsdl_cgi = new gsdlCGI();
41
42 # Load the Greenstone modules that we need to use
43 $gsdl_cgi->setup_gsdl();
44
45 my $gsdlhome = $ENV{'GSDLHOME'};
46 $gsdl_cgi->checked_chdir($gsdlhome);
47
48## require cgiactions::metadataaction;
49
50 # Useful debug statement for seeing what packages have been included
51#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
52
53 $gsdl_cgi->parse_cgi_args();
54
55 # We don't want the gsdlCGI module to return errors and warnings in XML
56 $gsdl_cgi->{'xml'} = 0;
57
58 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
59
60 if (defined $fn) {
61 my $json_str = $gsdl_cgi->param("json");
62
63 my $output_dir = &util::filename_cat("C:","cygwin","tmp");
64
65 # my $output_dir = &filename_cat($gsdlhome,"collect",$collect,"export");
66
67 my $frame_filename = &util::filename_cat($output_dir,"$fn.exp");
68 if (open(FOUT,">$frame_filename")) {
69
70 print FOUT $json_str;
71 close(FOUT);
72
73 # write out next free frame num
74
75 $gsdl_cgi->generate_message("html-to-expeditee saved frame $fn");
76 }
77 else {
78 $gsdl_cgi->generate_error("Failed to open $frame_filename for output");
79 }
80
81 }
82 else {
83
84 my $html_form = <<EOT;
85<html>
86 <head>
87 <title>HTML to Expeditee Frames</title>
88 <link type="text/css" href="/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
89 <script type="text/javascript" src="/jquery/js/jquery-1.6.2.min.js"></script>
90 <script type="text/javascript" src="/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
91 <script type="text/javascript" src="/gsajax-min.js"></script>
92 <script type="text/javascript" src="/html-to-expeditee.js"></script>
93
94 </head>
95 <body>
96 <form>
97 Convert the collection <input type="text" name="collect" id="collect" />
98 to Expeditee frames by traversing the classifier <input type="text" name="cl" id="cl" />
99
100 <input value="Go" id="go" type="submit">
101
102 </form>
103 <script type="text/javascript">
104 var docOIDs = [];
105 var numDocOIDs;
106
107 \$(function() {
108 \$('#progress').progressbar();
109
110 \$('#go').button().click(function() {
111 var collect = document.getElementById("collect").value;
112 if (collect.match(/^\\s*\$/)) {
113 alert("No collection specified");
114 return false;
115 }
116
117 var cl = document.getElementById("cl").value;
118 if (cl.match(/^\\s*\$/)) {
119 alert("No classifier specified");
120 return false;
121 }
122 if (cl.match(/^\\d+\$/)) {
123 cl = "CL" + cl;
124 }
125
126 var url = "/greenstone/cgi-bin/library.cgi";
127 url += "?c="+collect +"&a=d&cl=" + cl;
128
129 /* processing animation */
130
131 docOIDs = [];
132 var outstandingURLs = [];
133 outstandingURLs.push(url);
134
135 while (outstandingURLs.length>0) {
136 url = outstandingURLs.shift();
137
138 var clHtml = urlGetSync(url);
139
140 var gs2=1;
141
142 var aElems;
143 if (gs2) {
144 var workingTrav = document.getElementById("workingTraverse");
145 workingTrav.innerHTML = clHtml;
146 aElems = workingTrav.getElementsByTagName("a");
147 }
148 else {
149 /* gs3 */
150 var parser = new DOMParser();
151 cl_doc = parser.parseFromString(clHtml,"text/html");
152 var aElems = cl_doc.getElementsByTagName("a");
153 }
154
155
156 /* any links with a=d ... cl=??? => outstandingURLS */
157 /* any links with a=d ... d=??? => docOIDS */
158
159 var actionRE = new RegExp("(\\\\?|&)a=d(&|\$)");
160 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "\\\\.");
161 var docRE = new RegExp("(?:|&)d=(.*?)(?:&|\$)");
162
163 for (var i=0; i<aElems.length; i++) {
164 var aElem = aElems[i];
165 var href=aElem.href;
166 if (href && href.match(actionRE)) {
167 if (href.match(clRE)) {
168 outstandingURLs.push(href);
169 }
170 else if (href.match(docRE)) {
171 var docMatch = docRE.exec(href);
172 var docOID = docMatch[1];
173 docOIDs.push(docOID);
174 }
175 }
176 }
177 }
178
179 numDocOIDs = docOIDs.length;
180
181 var iframe = document.getElementById("iframe");
182
183 var docOID = docOIDs.shift();
184 console.log("doc oid = " + docOID);
185
186 var url = "/greenstone/cgi-bin/library.cgi";
187 url += "?c="+collect +"&a=d&d=" + docOID;
188
189 iframe.src = url;
190
191 var progressbar = document.getElementById("progressbar");
192 progressbar.style.display = "block";
193
194
195 return false; });
196 });
197
198 function pageLoaded()
199 {
200 var iframe = document.getElementById("iframe");
201
202 if (iframe.src) {
203 if (iframe.style.display != "block") {
204 iframe.height = "90%";
205 iframe.style.display = "block";
206 }
207
208 var iframeDoc;
209 if ( iframe.contentDocument )
210 { /* FF */
211 iframeDoc = iframe.contentDocument;
212 }
213 else if ( iframe.contentWindow )
214 { /* IE */
215 iframeDoc = iframe.contentWindow.document;
216 }
217
218 var frameID = (numDocOIDs - docOIDs.length);
219 var progressPercent = frameID/numDocOIDs * 100;
220
221 \$(function() {
222 \$('#progressbar').progressbar({ value: progressPercent })
223 });
224
225 expFrame = htmlToExpeditee(iframeDoc.body);
226 //alert(expFrame);
227
228 var collect = document.getElementById("collect").value;
229
230 var url = "/greenstone/cgi-bin/html-to-expeditee.pl";
231 var params = "c=" + collect + "&fn=" + frameID;
232 params += "&json=" + escape(expFrame);
233 //params += "&json=" + "testing123";
234
235 var clHtml = urlPostSync(url,params);
236
237 if (!clHtml.match(/html-to-expeditee saved frame/)) {
238 alert("Error processing url: " + url);
239 }
240
241 if (docOIDs.length>0) {
242 var docOID = docOIDs.shift();
243
244 // console.log("doc oid = " + docOID);
245
246 var cl = document.getElementById("cl").value;
247
248 var url = "/greenstone/cgi-bin/library.cgi";
249 url += "?c="+collect +"&a=d&d=" + docOID;
250
251 iframe.src = url;
252 }
253 else {
254 var progressbar = document.getElementById("progressbar");
255 progressbar.style.display = "none";
256
257 iframe.style.display = "none";
258 delete iframe.src;
259 }
260 }
261
262 }
263
264 </script>
265
266 <div id="progressbar" width="100%"
267 style="display: none; margin: 10px; height: 10px;"></div>
268 <div id="workingTraverse" style="display: none"></div>
269
270 <hr style="margin: 10px;">
271
272 <iframe width="100%" id="iframe" style="display: none;"
273 onload="pageLoaded()"></iframe>
274
275 </body>
276</html>
277EOT
278
279
280 print "Content-type:text/html\n\n";
281 print $html_form;
282
283 }
284}
285
286
287
288&main();
Note: See TracBrowser for help on using the repository browser.