source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26749

Last change on this file since 26749 was 26749, checked in by davidb, 11 years ago

Generation of collection space now done during running of html-to-expeditee script and only if the appropriate checkbox is selected by the user.

File size: 15.0 KB
RevLine 
[26749]1#!/cygdrive/c/strawberry/perl/bin/perl -w
[24916]2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6use strict;
7
8# Set this to 1 to work around IIS 6 craziness
9my $iis6_mode = 0;
10
11
12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
13# directory rather than the cgi-bin directory, causing lots of stuff to fail
14if ($iis6_mode)
15{
16 # Change into cgi-bin directory
17 chdir("cgi-bin");
18}
19
20
21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
22eval("require \"gsdlCGI.pm\"");
23if ($@)
24{
25 print STDOUT "Content-type:text/plain\n\n";
26 print STDOUT "ERROR: $@\n";
27 exit 0;
28}
29
[24926]30sub generate_html_form
[24916]31{
[26749]32 my($isGSDL2,$site,$collect,$cl) = @_;
33
34 # first generate the document frames
35 # then generate the classifier browsing frames.
36
37 my $html_form = <<EOT;
38 <html>
39 <head>
40 <title>HTML To Expeditee Frames</title>
41 <base href=".."/>
42
43 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
44 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
45 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
46 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
48 </head>
49 <body>
50 <form class="ui-widget">
51 Convert the collection <input type="text" class="ui-corner-all" style="padding: 4px;" name="collect" value="$collect" id="collect" /> to Expeditee frames by traversing the classifier <input type="text" class="ui-corner-all" style="padding: 4px;" name="cl" value="$cl" id="cl" />
[24916]52
[26749]53 <input value="$site" name="site" id="site" type="hidden">
[26731]54
[26749]55 <p>
56 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
57 </p>
[26731]58
[26749]59 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
60 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
61 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
[26731]62
[26749]63 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"></p>
64 </form>
[26596]65
[26749]66 <script type="text/javascript">
67 var collect;
68 var site;
69 var cl;
70 var gs2;
71
72 var docOIDs = [];
73 var clPages = [];
74
75 var hashMapDocFrames = new Array();
76
77 var numDocOIDs = 0;
78 var numClPages = 0;
79
80 var currDocFrameNum;
81 var currClFrameNum;
82
83 var compute_font = false;
84 var compute_width = false;
85 var generate_browsing = false; //generate a matching collection space frameset
86
87 \$(function(){
88
89 \$('#progress').progressbar();
90
91 \$('#go').button().click(function(){
92
93 site = document.getElementById("site").value;
94
95 collect = document.getElementById("collect").value;
96
97 if(collect.match(/^\\s*\$/)){
98 alert("No collection specified");
99 return false;
100 }
101
102 cl = document.getElementById("cl").value;
103 if(cl.match(/^\\s*\$/)){
104 alert("No classifier specified");
105 return false;
106 }
107
108 compute_font = document.getElementById("checkBoxFont").checked;
109 compute_width = document.getElementById("checkBoxWidth").checked;
110
111 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
112
113 gs2=$isGSDL2;
114
115 var url;
116
117 /*obtain url for classifier/browse page and grab all links (doc and CL links) from this page.*/
118 if(gs2){
119 url = "library.cgi";
120 url += "?c="+collect +"&a=d&cl=" + cl;
121 }else{
122 url = "library";
123 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;\
124 url += "&excerptid=gs_content";
125 }
126
127 docOIDs = [];
128 clPages = [];
129
130 var outstandingURLs = [];
131 var visitedURLs = {};
132
133 outstandingURLs.push(url);
134 visitedURLs[url] = 1;
135
136 while(outstandingURLs.length > 0){
137 url = outstandingURLs.shift();
[26596]138
[26749]139 var clHtml = urlGetSync(url);
140
141 var workingTrav = document.getElementById("workingTraverse");
142 workingTrav.innerHTML = clHtml;
143
144 var aElems = workingTrav.getElementsByTagName("a");
145
146 /* any links with (document|browse)=> outstandingURLS */
147 /* any links with (document|browse)/CL[0-9]/[0-9] => clPages */
148 /* any links with (document) => docOIDs */
149
150 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
151 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
152 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
153
154 for(var i = 0; i < aElems.length; i++){
155 var aElem = aElems[i];
156 var href = aElem.href;
157
158 if(href && href.match(actionRE)){
159 if(href.match(clRE)){
160 if(!visitedURLs[href]){
161 console.log("found a new CL line: " + href);
162 outstandingURLs.push(href);
163 visitedURLs[href] = 1;
164 clPages.push(href);
165 }
166 }else if(href.match(docRE)){
167 if(!visitedURLs[href]){
168 var docMatch = docRE.exec(href);
169 var docOID = docMatch[1];
170
171 console.log("found a new doc line: " + docOID);
172 visitedURLs[href] = 1;
173 docOIDs.push(docOID);
174 }
175 }
176 }
177 }
178 }
179
180 numDocOIDs = docOIDs.length;
181 numClPages = clPages.length;
182 var iframe = document.getElementById('iframe');
183
184 startProcessing(iframe); //Process documents
185
186 return false;
187 });
188
189 });
[26596]190
[26749]191 function startProcessing(iframe){
192
193 var docOID = docOIDs.shift();
194
195 iframe.src = getDocumentUrl(docOID);
196
197 var progressbar = document.getElementById('progressbar');
198 progressbar.style.display = 'block';
199 }
[26596]200
[26749]201
202 function pageLoaded(){
203 var iframe = document.getElementById("iframe");
204
205 if(iframe.src){
206
207 if(iframe.style.display != 'block'){
208 iframe.height = '90%';
209 iframe.style.display = 'block';
210 }
211
212 var iframeDoc;
213
214 if(iframe.contentDocument){ /* FF and Chrome */
215 iframeDoc = iframe.contentDocument;
216 }else if(iframe.contentWindow){ /* IE */
217 iframeDoc = iframe.contentWindow.document;
218 }
219
220 /*Check url - if it's a doc Url, call "writeDocument", otherwise call "writeClPage"*/
221 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
222
223
224 if(iframe.src.match(clRE)){
225 writeClPage(iframe,iframeDoc);
226 }else{
227 writeDocument(iframe,iframeDoc);
228 }
[26596]229 }
[26749]230 }
231
232 function writeDocument(iframe,iframeDoc){
[26596]233
[26749]234 var xmlUrl = iframe.src + "&o=xml";
235
236 var iter = (numDocOIDs - docOIDs.length);
237 var progressPercent = iter/numDocOIDs * 100;
238
239 var frameNum = getMetadata(xmlUrl,'frameID');
240
241 if(frameNum == null){
242 frameNum = iter;
243 }
244
245 //Add docOID and matching frame number to an associative array for later use.
246 hashMapDocFrames[docOID] = frameNum;
247
248 \$(function(){
249 \$('#progressbar').progressbar({ value: progressPercent });
250 });
251
252 var gsContent = iframeDoc.getElementById("gs_content");
253
254 var assocElem = iframeDoc.getElementById("assocfilepath");
255
256 var assocElem = iframeDoc.getElementById('assocfilepath');
257 var assoc = null;
258
259 if(assocElem === undefined || assocElem === null){
260 assoc = getMetadata(xmlUrl,'assocfilepath');
261 }
262
263 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
264
265 var expFrame = JSON.stringify(expFrameTree);
266 //console.log(expFrame);
267
268 var url = "cgi-bin/html-to-expeditee.pl";
269 var params = "c=" + collect;
270
271 if(site.match(/\\w/)){
272 params += "&site=" + site;
273 }
274
275 params += "&a=generate-frame&fn=" + frameNum;
276 params += "&json=" + escape(expFrame);
277
278 //Add an assocfilepath but only if it is defined
279 if(assoc !== null){
280 params += "&assoc=" + assoc;
281 }
282
283 params += "&compute-font=" + compute_font;
284
285 params += "&page-type=" + "document";
286
287
288 var clHtml = urlPostSync(url,params);
289
290 if(!clHtml.match(/html-to-expeditee saved frame/)){
291 alert("ERROR PROCESSING URL: " + url);
292 }
293
294 if(docOIDs.length > 0){
295 var docOID = docOIDs.shift();
296 iframe.src = getDocumentUrl(docOID);
297
298 }else{
299 //start writing CL pages.
300 if(generate_browsing){
301 numClPages = clPages.length;
302
303 var url = clPages.shift();
304 iframe.src = url;
305 writeClPage(iframe,iframeDoc);
306 }else{
307 //We are finished
308 finish(iframe);
309 }
310 }
311 }
312
313 function getDocumentUrl(docOID){
314 var url;
315
316 if(gs2){
317 url = "library.cgi";
318 }else{
319 url = "library";
320 }
321
322 url += "?c=" + collect + "&a=d&d=" + docOID;
323
324 url += "&p.showAssocFilePath = 1";
325
326 return url;
327 }
328
329 function writeClPage(iframe,iframeDoc){
330
331 var frameNum = numClPages - clPages.length;
332 var progressPercent = frameNum / numClPages * 100;
333
334 \$(function(){
335 \$('#progressbar').progressbar({ value: progressPercent });
336 });
337
338 var gsContent = iframeDoc.getElementById("gs_content");
339 var gsContentChildren = gsContent.getElementsByTagName('*');
340
341 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
342
343 //get rid of rectangles around nodes.
344 for(var i = 0; i < gsContentChildren.length; i++){
345 var child = gsContentChildren[i];
346
347 if(child.tagName !== "IMG"){
348 child.setAttribute('rect','norect');
349 }
350
351 if(child.tagName === "A"){
352 var aElem = child;
353 var aElemSrc = aElem.href;
354
355 //if aElemSrc is a document url, then extract docOID then access hash map and get matching frame number.
356 if(aElemSrc.match(docRE)){
357 var docMatch = docRE.exec(aElemSrc);
358 var docOID = docMatch[1];
359 console.log("*** " + docOID);
360 var frameNum = hashMapDocFrames[docOID];
361 var aElemChildren = aElem.getElementsByTagName('*');
362
363 for(var j = 0; j < aElemChildren.length; j++){
364 var aElemChild = aElemChildren[j];
365 var frameName = collect + frameNum;
366 //console.log("*** " + frameName);
367 //aElemChild.setAttribute("link",frameName);
368 //console.log(aElemChild.link);
369 }
370
371 }else{
372 //TODO: Check if the link is a CL link.
373 //TODO: Make a hashmap for storing CL page frame numbers?
374 }
375 }
376 }
377
378 //add link attribute to children nodes of <a> elements.
379
380
381 //TODO: Need to change htmlToExpeditee code to account for links
382
383
384 var expFrameTree = htmlToExpeditee(gsContent);
385 var expFrame = JSON.stringify(expFrameTree);
386 //console.log(expFrame);
387
388 var url = "cgi-bin/html-to-expeditee.pl";
389 var params = "c=" + collect;
390
391 if(site.match(/\\w/)){
392 params += "&site=" + site;
393 }
394
395 params += "&cl=" + cl;
396 params += "&a=generate-frame&fn=" + frameNum;
397 params += "&json=" + escape(expFrame);
398 params += "&page-type=" + "clPage";
399
400 var clHtml = urlPostSync(url,params);
401
402 if(!clHtml.match(/html-to-expeditee saved frame/)){
403 alert("Error processing url: " + url);
404 }
405
406 if(clPages.length > 0){
407 var clPage = clPages.shift();
408 iframe.src = clPage;
409 }else{
410 finish(iframe);
411 }
412 }
413
414 function finish(iframe){
415 var progressbar = document.getElementById("progressbar");
416 progressbar.style.display = "none";
417
418 iframe.style.display = "none";
419 delete iframe.src;
420 }
421
422 /**
423 * This method is used at this stage to retrieve assocfilepath and
424 * frame number metadata values from the document's xml.
425 **/
426 function getMetadata(xmlUrl,nameValue){
427 var metadata = null;
428
429 \$.ajax({
430 type: "GET",
431 async: false,
432 url: xmlUrl,
433 dataType: "xml",
434 success: function(xml){
435 \$(xml).find('metadata').each(function(){
436 var name = \$(this).attr('name');
437
438 if(name === nameValue){
439 if(metadata === null){
440 metadata = \$(this).text();
441 }
442 }
443 });
444 }
445 });
446
447 return metadata;
448 }
449
450 </script>
[26596]451
[26749]452 <div id="progressbar" width="100%" style="display: none; margin: 10px; height: 10px;"></div>
453
454 <div id="workingTraverse" style="display: none"></div>
455
456 <hr style="margin: 10px;">
457
458 <iframe width="100%" id="iframe" style="display:none;" onload="pageLoaded()"></iframe>
459 </body>
460</html>
[24916]461EOT
462
[26749]463 print "Content-type:text/html\n\n";
464 print $html_form;
[24926]465}
[24916]466
[24926]467sub main
468{
[26749]469 my $gsdl_cgi = new gsdlCGI();
470
471 #Load GS modules
472 $gsdl_cgi->setup_gsdl();
473
474 my $gsdlhome = $ENV{'GSDLHOME'};
475 $gsdl_cgi->checked_chdir($gsdlhome);
476
477 #TODO: Refactor so we only need to use HtmlToExpediteeAction
478 require cgiactions::HtmlToExpediteeAction;
479 require cgiactions::CollectionSpaceAction;
480
481 $gsdl_cgi->parse_cgi_args();
482 $gsdl_cgi->{'xml'} = 0;
483
484 my $fn = $gsdl_cgi->clean_param("fn");
485
486 if(defined $fn){
487 #page_type can have two values: "document" or "clPage"
488 my $page_type = $gsdl_cgi->clean_param("page-type");
[26731]489
[26749]490 if(defined $page_type){
491 my $action;
492
493 if($page_type eq "document"){
494 $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
495 }elsif($page_type eq "clPage"){
496 $action = new CollectionSpaceAction($gsdl_cgi,$iis6_mode);
497 }else{
498 $gsdl_cgi->generate_error("Invalid page type specified. Must be 'document' or 'clPage'");
499 }
500
501 $action->do_action();
502 }else{
503 $gsdl_cgi->generate_error("No page type specified. Must be 'document' or 'clPage'");
504 }
505
506 }else{
507 # generate html form
508 my $collect = $gsdl_cgi->clean_param("collect");
509 my $cl = $gsdl_cgi->clean_param("cl");
510
511 #Establish collect_dir using defining 'site' along the way if GS3
512 my $site = undef;
513 my $isGSDL2 = undef;
514
515 if($gsdl_cgi->greenstone_version() == 2){
516 $isGSDL2 = 1;
517 }else{
518 $isGSDL2 = 0;
519
520 #GS3 (and possible future versions) make use of 'site'
521 $site = $gsdl_cgi->clean_param("site");
522
523 if(!defined $site){
524 $gsdl_cgi->generate_error("No site specified.");
525 }
526 }
527
528 generate_html_form($isGSDL2,$site,$collect,$cl);
[24926]529 }
[24916]530}
531
[26749]532&main();
Note: See TracBrowser for help on using the repository browser.