source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26743

Last change on this file since 26743 was 26743, checked in by davidb, 11 years ago

Changing "Generate Browsing" text to "Generate Collection Space"

File size: 12.4 KB
Line 
1 #!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6
7use strict;
8
9# Set this to 1 to work around IIS 6 craziness
10my $iis6_mode = 0;
11
12
13# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
14# directory rather than the cgi-bin directory, causing lots of stuff to fail
15if ($iis6_mode)
16{
17 # Change into cgi-bin directory
18 chdir("cgi-bin");
19}
20
21
22# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
23eval("require \"gsdlCGI.pm\"");
24if ($@)
25{
26 print STDOUT "Content-type:text/plain\n\n";
27 print STDOUT "ERROR: $@\n";
28 exit 0;
29}
30
31
32sub generate_html_form
33{
34 my ($isGSDL2,$site,$collect,$cl) = @_;
35
36 my $html_form = <<EOT;
37<html>
38 <head>
39 <title>HTML to Expeditee Frames</title>
40
41 <base href=".." />
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48
49 </head>
50 <body>
51 <form class="ui-widget">
52 Convert the collection
53 <input type="text"
54 class="ui-corner-all" style="padding: 4px;"
55 name="collect" value="$collect" id="collect" />
56 to Expeditee frames by traversing the classifier
57 <input type="text"
58 class="ui-corner-all" style="padding: 4px;"
59 name="cl" value="$cl" id="cl" />
60
61 <input value="$site" name="site" id="site" type="hidden">
62
63 <p>
64 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
65 </p>
66
67 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
68 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
69 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
70
71 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"><p>
72
73 </form>
74 <script type="text/javascript">
75 var docOIDs = [];
76 var numDocOIDs;
77 var compute_font = false;
78 var compute_width = false;
79 var generate_browsing = false; //whether to generate a matching browsing frameset
80
81 \$(function() {
82 \$('#progress').progressbar();
83
84 \$('#go').button().click(function() {
85 var collect = document.getElementById("collect").value;
86 if (collect.match(/^\\s*\$/)) {
87 alert("No collection specified");
88 return false;
89 }
90
91 var cl = document.getElementById("cl").value;
92 if (cl.match(/^\\s*\$/)) {
93 alert("No classifier specified");
94 return false;
95 }
96 if (cl.match(/^\\d+\$/)) {
97 cl = "CL" + cl;
98 }
99
100 compute_font = document.getElementById("checkBoxFont").checked;
101 compute_width = document.getElementById("checkBoxWidth").checked;
102
103 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
104
105 var gs2=$isGSDL2;
106
107 var url;
108 if (gs2) {
109 url = "library.cgi";
110 url += "?c="+collect +"&a=d&cl=" + cl;
111 }
112 else {
113 url = "library";
114 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;
115 url += "&excerptid=gs_content";
116 }
117
118 console.log("*** CHECKING URL: " + url);
119
120 /* processing animation */
121
122 docOIDs = [];
123 var outstandingURLs = [];
124 var visitedURLs = {};
125
126 outstandingURLs.push(url);
127 visitedURLs[url] = 1;
128
129 while (outstandingURLs.length>0) {
130 url = outstandingURLs.shift();
131
132 var clHtml = urlGetSync(url);
133
134 var aElems;
135 var workingTrav = document.getElementById("workingTraverse");
136 workingTrav.innerHTML = clHtml;
137 aElems = workingTrav.getElementsByTagName("a");
138
139
140 /* any links with a=(b|d) ... cl=??? => outstandingURLS */
141 /* any links with a=d ... d=??? => docOIDS */
142
143/*
144 var actionRE = new RegExp("(\\\\?|&)a=(?:d|b)(&|\$)");
145 var clRE = new RegExp("(\\\\?|&)cl=" + cl + "(\\\\.\\\\d+)+(&|\$)");
146 var docRE = new RegExp("(?:\\\\?|&)d=(.*?)(?:&|\$)");
147*/
148 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
149 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
150 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
151
152 for (var i=0; i<aElems.length; i++) {
153 var aElem = aElems[i];
154 var href=aElem.href;
155 if (href && href.match(actionRE)) {
156 if (href.match(clRE)) {
157 if (!visitedURLs[href]) {
158 console.log("found a new CL line: " + href);
159 outstandingURLs.push(href);
160 visitedURLs[href] = 1;
161 }
162 }
163 else if (href.match(docRE)) {
164 if(!visitedURLs[href]){
165 var docMatch = docRE.exec(href);
166 var docOID = docMatch[1];
167
168 console.log("found a new doc line: " + docOID);
169 visitedURLs[href] = 1;
170 docOIDs.push(docOID);
171 }
172 }
173 }
174 }
175 }
176
177 numDocOIDs = docOIDs.length;
178
179 var iframe = document.getElementById("iframe");
180
181 var docOID = docOIDs.shift();
182 //console.log("doc oid = " + docOID);
183
184 var url;
185 if (gs2) {
186 url = "library.cgi";
187 url += "?c="+collect +"&a=d&d=" + docOID;
188 }
189 else {
190 url = "library";
191 url += "?c="+collect +"&a=d&d=" + docOID;
192
193 /** The line below has been commented out. This is because the incorrect page is being loaded up in the iframe (i.e. just the document image and heading is being displayed but not any html customizations made in the collectionConfig.xml file**/
194 //url += "&ed=1&dt=hierarchy";
195 // url += "&excerptid=gs_content";
196 }
197
198 //url += "&p.showExpediteeAttributes=1";
199 url += "&p.showAssocFilePath=1";
200
201
202 /* load iframe with document drawn from Greenstone collection */
203 iframe.src = url;
204
205 var progressbar = document.getElementById("progressbar");
206 progressbar.style.display = "block";
207
208
209 return false; });
210 });
211
212 function pageLoaded()
213 {
214 var iframe = document.getElementById("iframe");
215
216 if (iframe.src) {
217 if (iframe.style.display != "block") {
218 iframe.height = "90%";
219 iframe.style.display = "block";
220 }
221
222 var iframeDoc;
223 if ( iframe.contentDocument )
224 { /* FF */
225 iframeDoc = iframe.contentDocument;
226 }
227 else if ( iframe.contentWindow )
228 { /* IE */
229 iframeDoc = iframe.contentWindow.document;
230 }
231
232 var xmlUrl = iframe.src + "&o=xml";
233
234 var iter = (numDocOIDs - docOIDs.length);
235 var progressPercent = iter/numDocOIDs * 100;
236
237 /*Check if a frame-id metadata element already exists,
238 otherwise generate a new frame number based on the
239 number of documents to process.*/
240 var frameNum = getMetadata(xmlUrl,'frameID');
241
242 if(frameNum === null){
243 frameNum = iter;
244 }
245
246 \$(function() {
247 \$('#progressbar').progressbar({ value: progressPercent })
248 });
249
250 var gsContent = iframeDoc.getElementById("gs_content");
251
252 /*
253 var gsContentPos = getElementPosition(gsContent);
254 var pxl = gsContentPos.xl;
255 var pxr = gsContentPos.xr;
256 */
257
258 //Check if an assocfilepath annotation already exists on the page, otherwise obtain it from the xml.
259 var assocElem = iframeDoc.getElementById('assocfilepath');
260 var assoc = null;
261
262 if(assocElem === undefined || assocElem === null){
263 assoc = getMetadata(xmlUrl,'assocfilepath');
264 }
265
266 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
267
268 var expFrame = JSON.stringify(expFrameTree);
269 //console.log(expFrame);
270 //alert(expFrame);
271
272 var collect = document.getElementById("collect").value;
273 var site = document.getElementById("site").value;
274
275 var url = "cgi-bin/html-to-expeditee.pl";
276 var params = "c=" + collect;
277 if (site.match(/\\w/)) {
278 params += "&site=" + site;
279 }
280
281 //params += "&a=generate-frame&fn=" + iter;
282 params += "&a=generate-frame&fn=" + frameNum;
283 params += "&json=" + escape(expFrame);
284
285 //add an assocfilepath parameter but only if it is defined.
286 if(assoc !== null){
287 params += "&assoc=" + assoc;
288 }
289
290 params += "&compute-font=" + compute_font;
291
292 var clHtml = urlPostSync(url,params);
293
294 if (!clHtml.match(/html-to-expeditee saved frame/)) {
295 alert("Error processing url: " + url);
296 }
297
298 if (docOIDs.length>0) {
299 var docOID = docOIDs.shift();
300
301 // console.log("doc oid = " + docOID);
302
303 // is the following line used ????
304 var cl = document.getElementById("cl").value;
305
306 var gs2 = $isGSDL2;
307 var url;
308
309 if (gs2) {
310 url = "library.cgi";
311 url += "?c="+collect +"&a=d&d=" + docOID;
312 }
313 else {
314 url = "library";
315 url += "?c="+collect +"&a=d&d=" + docOID;
316
317 /** Refer to comment earlier in this code in regards to the commented out line below.**/
318 //url += "&ed=1&dt=hierarchy";
319 // url += "&excerptid=gs_content";
320 }
321
322 //url += "&p.showExpediteeAttributes=1";
323 url += "&p.showAssocFilePath=1";
324
325 iframe.src = url;
326 }
327 else {
328 var progressbar = document.getElementById("progressbar");
329 progressbar.style.display = "none";
330
331 iframe.style.display = "none";
332 delete iframe.src;
333 }
334 }
335
336 }
337
338 /**
339 * This method is used at this stage to retrieve assocfilepath
340 * and frame number metadata values from the document's xml.
341 **/
342 function getMetadata(xmlUrl,nameValue){
343
344 var metadata = null;
345
346 \$.ajax({
347 type: "GET",
348 async: false,
349 url: xmlUrl,
350 dataType: "xml",
351 success: function(xml){
352
353
354 \$(xml).find('metadata').each(function(){
355
356 var name = \$(this).attr('name');
357
358 if(name === nameValue){
359 if(metadata === null){
360 metadata = \$(this).text();
361 //console.log(metadata);
362 }
363
364 }
365 });
366
367 }
368
369 });
370
371 return metadata;
372 }
373
374 </script>
375
376 <div id="progressbar" width="100%"
377 style="display: none; margin: 10px; height: 10px;"></div>
378 <div id="workingTraverse" style="display: none"></div>
379
380 <hr style="margin: 10px;">
381
382 <iframe width="100%" id="iframe" style="display: none;"
383 onload="pageLoaded()"></iframe>
384
385 </body>
386</html>
387EOT
388
389 print "Content-type:text/html\n\n";
390 print $html_form;
391}
392
393sub main
394{
395
396# $ENV{'QUERY_STRING'} = "a=...";set-import-metadata&c=espresso-music&d=HASH012d6f72cde5dc48162f4a1d.1&metaname=annotation&metapos=0&metavalue=adfadfad";
397# $ENV{'REQUEST_METHOD'} = "GET";
398
399 my $gsdl_cgi = new gsdlCGI();
400
401 # Load the Greenstone modules that we need to use
402 $gsdl_cgi->setup_gsdl();
403
404 my $gsdlhome = $ENV{'GSDLHOME'};
405 $gsdl_cgi->checked_chdir($gsdlhome);
406
407 require cgiactions::HtmlToExpediteeAction;
408
409 # Useful debug statement for seeing what packages have been included
410#### printf("%-45s%-s\n",$_,$INC{$_}) foreach (sort keys %INC);
411
412 $gsdl_cgi->parse_cgi_args();
413
414 # We don't want the gsdlCGI module to return errors and warnings in XML
415 $gsdl_cgi->{'xml'} = 0;
416
417 my $fn = $gsdl_cgi->clean_param("fn"); # frame number
418
419 if (defined $fn) {
420
421 my $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
422
423 $action->do_action();
424 }
425 else {
426 # generate form, pre-filled out with any useful values such
427 # as the collection and classifier value
428
429 my $collect = $gsdl_cgi->clean_param("collect");
430 my $cl = $gsdl_cgi->clean_param("cl");
431
432 # Establish collect_dir using defining 'site' along the way if GS3
433
434 my $site = undef;
435 my $isGSDL2 = undef;
436
437 if ($gsdl_cgi->greenstone_version() == 2) {
438 $isGSDL2 = 1;
439 }
440 else {
441 $isGSDL2 = 0;
442
443 # GS3 (and possible future versions) make use of 'site'
444 $site = $gsdl_cgi->clean_param("site");
445 if (!defined $site) {
446 $gsdl_cgi->generate_error("No site specified.");
447 }
448 }
449
450 generate_html_form($isGSDL2,$site,$collect,$cl);
451 }
452}
453
454
455
456&main();
Note: See TracBrowser for help on using the repository browser.