source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26753

Last change on this file since 26753 was 26753, checked in by davidb, 11 years ago

Fixed bug with assocfilepath annotation not being written to the frames.

File size: 15.3 KB
Line 
1#!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6use strict;
7
8# Set this to 1 to work around IIS 6 craziness
9my $iis6_mode = 0;
10
11
12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
13# directory rather than the cgi-bin directory, causing lots of stuff to fail
14if ($iis6_mode)
15{
16 # Change into cgi-bin directory
17 chdir("cgi-bin");
18}
19
20
21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
22eval("require \"gsdlCGI.pm\"");
23if ($@)
24{
25 print STDOUT "Content-type:text/plain\n\n";
26 print STDOUT "ERROR: $@\n";
27 exit 0;
28}
29
30sub generate_html_form
31{
32 my($isGSDL2,$site,$collect,$cl) = @_;
33
34 # first generate the document frames
35 # then generate the classifier browsing frames.
36
37 my $html_form = <<EOT;
38 <!DOCTYPE html>
39 <head>
40 <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
41 <meta content="utf-8" http-equiv="encoding">
42 <title>HTML To Expeditee Frames</title>
43 <base href=".."/>
44
45 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
46 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
48 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
49 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
50 </head>
51 <body>
52 <form class="ui-widget">
53 Convert the collection <input type="text" class="ui-corner-all" style="padding: 4px;" name="collect" value="$collect" id="collect" /> to Expeditee frames by traversing the classifier <input type="text" class="ui-corner-all" style="padding: 4px;" name="cl" value="$cl" id="cl" />
54
55 <input value="$site" name="site" id="site" type="hidden">
56
57 <p>
58 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
59 </p>
60
61 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
62 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
63 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
64
65 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"></p>
66 </form>
67
68 <script type="text/javascript">
69 var collect;
70 var site;
71 var cl;
72 var gs2;
73
74 var docOIDs = [];
75 var clPages = [];
76
77 var hashMapDocFrames = new Array();
78
79 var numDocOIDs = 0;
80 var numClPages = 0;
81
82 var currDocFrameNum;
83 var currClFrameNum;
84
85 var compute_font = false;
86 var compute_width = false;
87 var generate_browsing = false; //generate a matching collection space frameset
88
89 \$(function(){
90
91 \$('#progress').progressbar();
92
93 \$('#go').button().click(function(){
94
95 site = document.getElementById("site").value;
96
97 collect = document.getElementById("collect").value;
98
99 if(collect.match(/^\\s*\$/)){
100 alert("No collection specified");
101 return false;
102 }
103
104 cl = document.getElementById("cl").value;
105 if(cl.match(/^\\s*\$/)){
106 alert("No classifier specified");
107 return false;
108 }
109
110 compute_font = document.getElementById("checkBoxFont").checked;
111 compute_width = document.getElementById("checkBoxWidth").checked;
112
113 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
114
115 gs2=$isGSDL2;
116
117 var url;
118
119 /*obtain url for classifier/browse page and grab all links (doc and CL links) from this page.*/
120 if(gs2){
121 url = "library.cgi";
122 url += "?c="+collect +"&a=d&cl=" + cl;
123 }else{
124 url = "library";
125 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;\
126 url += "&excerptid=gs_content";
127 }
128
129 docOIDs = [];
130 clPages = [];
131
132 var outstandingURLs = [];
133 var visitedURLs = {};
134
135 outstandingURLs.push(url);
136 visitedURLs[url] = 1;
137
138 while(outstandingURLs.length > 0){
139 url = outstandingURLs.shift();
140
141 var clHtml = urlGetSync(url);
142
143 var workingTrav = document.getElementById("workingTraverse");
144 workingTrav.innerHTML = clHtml;
145
146 var aElems = workingTrav.getElementsByTagName("a");
147
148 /* any links with (document|browse)=> outstandingURLS */
149 /* any links with (document|browse)/CL[0-9]/[0-9] => clPages */
150 /* any links with (document) => docOIDs */
151
152 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
153 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
154 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
155
156 for(var i = 0; i < aElems.length; i++){
157 var aElem = aElems[i];
158 var href = aElem.href;
159
160 if(href && href.match(actionRE)){
161 if(href.match(clRE)){
162 if(!visitedURLs[href]){
163 //console.log("found a new CL line: " + href);
164 outstandingURLs.push(href);
165 visitedURLs[href] = 1;
166 clPages.push(href);
167 }
168 }else if(href.match(docRE)){
169 if(!visitedURLs[href]){
170 var docMatch = docRE.exec(href);
171 var docOID = docMatch[1];
172
173 //console.log("found a new doc line: " + docOID);
174 visitedURLs[href] = 1;
175 docOIDs.push(docOID);
176 }
177 }
178 }
179 }
180 }
181
182 numDocOIDs = docOIDs.length;
183 numClPages = clPages.length;
184 var iframe = document.getElementById('iframe');
185
186 startProcessing(iframe); //Process documents
187
188 return false;
189 });
190
191 });
192
193 function startProcessing(iframe){
194
195 var docOID = docOIDs.shift();
196
197 iframe.src = getDocumentUrl(docOID);
198
199 var progressbar = document.getElementById('progressbar');
200 progressbar.style.display = 'block';
201 }
202
203
204
205
206 function pageLoaded(){
207 var iframe = document.getElementById("iframe");
208
209 if(iframe.src){
210
211 if(iframe.style.display != 'block'){
212 iframe.height = '90%';
213 iframe.style.display = 'block';
214 }
215
216 var iframeDoc = getIframeDoc(iframe);
217
218
219 /*Check url - if it's a doc Url, call "writeDocument", otherwise call "writeClPage"*/
220 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
221
222
223 if(iframe.src.match(clRE)){
224 writeClPage(iframe,iframeDoc);
225 }else{
226 writeDocument(iframe,iframeDoc);
227 }
228 }
229 }
230
231 function writeDocument(iframe){
232
233 var xmlUrl = iframe.src + "&o=xml";
234 console.log("xml url: " + xmlUrl);
235 var iter = (numDocOIDs - docOIDs.length);
236 var progressPercent = iter/numDocOIDs * 100;
237
238 var frameNum = getMetadata(xmlUrl,'frameID');
239
240 if(frameNum === null){
241 frameNum = iter;
242 }
243
244 var iframeDoc = getIframeDoc(iframe);
245
246 \$(function(){
247 \$('#progressbar').progressbar({ value: progressPercent });
248 });
249
250 var gsContent = iframeDoc.getElementById("gs_content");
251
252 var assocElem = iframeDoc.getElementById("assocfilepath");
253
254 var assoc = null;
255
256 if(assocElem === undefined || assocElem === null){
257 assoc = getMetadata(xmlUrl,'assocfilepath');
258
259 }
260
261 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
262
263 var expFrame = JSON.stringify(expFrameTree);
264 //console.log(expFrame);
265
266 var url = "cgi-bin/html-to-expeditee.pl";
267 var params = "c=" + collect;
268
269 if(site.match(/\\w/)){
270 params += "&site=" + site;
271 }
272
273 params += "&a=generate-frame&fn=" + frameNum;
274 params += "&json=" + escape(expFrame);
275
276 //Add an assocfilepath but only if it is defined
277 if(assoc !== null){
278 params += "&assoc=" + assoc;
279 }
280
281 params += "&compute-font=" + compute_font;
282
283 params += "&page-type=" + "document";
284
285
286 var clHtml = urlPostSync(url,params);
287
288 if(!clHtml.match(/html-to-expeditee saved frame/)){
289 alert("ERROR PROCESSING URL: " + url);
290 }
291
292 if(docOIDs.length > 0){
293 var docOID = docOIDs.shift();
294
295 //Add docOID and matching frame number to an associative array for later use.
296 hashMapDocFrames[docOID] = frameNum;
297
298 iframe.src = getDocumentUrl(docOID);
299
300 }else{
301
302 //start writing CL pages.
303 if(generate_browsing){
304 iframe.src = clPages.shift();
305 }else{
306 //We are finished
307 finish(iframe);
308 }
309 }
310 }
311
312 function getIframeDoc(){
313
314 var iframeDoc = null;
315
316 if(iframe.contentDocument){ /* FF and Chrome */
317 iframeDoc = iframe.contentDocument;
318 }else if(iframe.contentWindow){ /* IE */
319 iframeDoc = iframe.contentWindow.document;
320 }
321
322
323 return iframeDoc;
324 }
325
326 function getDocumentUrl(docOID){
327 var url;
328
329 if(gs2){
330 url = "library.cgi";
331 }else{
332 url = "library";
333 }
334
335 url += "?c=" + collect + "&a=d&d=" + docOID;
336
337 url += "&p.showAssocFilePath=1";
338
339 return url;
340 }
341
342 function writeClPage(iframe){
343
344 var frameNum = numClPages - clPages.length;
345 var progressPercent = frameNum / numClPages * 100;
346 var iframeDoc = getIframeDoc(iframe);
347 //console.log("Processing cl page: " + iframe.src + " ****");
348
349 \$(function(){
350 \$('#progressbar').progressbar({ value: progressPercent });
351 });
352
353 var gsContent = iframeDoc.getElementById("gs_content");
354 var gsContentChildren = gsContent.getElementsByTagName('*');
355
356 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
357
358
359 for(var i = 0; i < gsContentChildren.length; i++){
360 var child = gsContentChildren[i];
361
362 //get rid of rectangles around nodes.
363 if(child.tagName !== "IMG"){
364 child.setAttribute('rect','norect');
365 }
366
367 if(child.tagName === "A"){
368 var aElem = child;
369 var aElemSrc = aElem.href;
370
371 //if aElemSrc is a document url, then extract docOID then access hash map and get matching frame number.
372 if(aElemSrc.match(docRE)){
373 var docMatch = docRE.exec(aElemSrc);
374 var docOID = docMatch[1];
375
376 var fn = hashMapDocFrames[docOID];
377
378 var aElemChildren = aElem.getElementsByTagName('*');
379
380 for(var j = 0; j < aElemChildren.length; j++){
381 var aElemChild = aElemChildren[j];
382
383 if(fn !== null && fn!== undefined){
384 var frameName = collect + fn;
385
386 //aElemChild.setAttribute("link",frameName);
387 //console.log(aElemChild.getAttribute('link'));
388 //console.log(aElemChild.link);
389 }
390 }
391
392 }else{
393 //TODO: Check if the link is a CL link.
394 //TODO: Make a hashmap for storing CL page frame numbers?
395 }
396 }
397 }
398
399
400 //TODO: Need to change htmlToExpeditee code to account for links
401
402
403 var expFrameTree = htmlToExpeditee(gsContent);
404 var expFrame = JSON.stringify(expFrameTree);
405 console.log(expFrame);
406
407 var url = "cgi-bin/html-to-expeditee.pl";
408 var params = "c=" + collect;
409
410 if(site.match(/\\w/)){
411 params += "&site=" + site;
412 }
413
414 params += "&cl=" + cl;
415 params += "&a=generate-frame&fn=" + frameNum;
416 params += "&json=" + escape(expFrame);
417 params += "&page-type=" + "clPage";
418
419 var clHtml = urlPostSync(url,params);
420
421 if(!clHtml.match(/html-to-expeditee saved frame/)){
422 alert("Error processing url: " + url);
423 }
424
425 if(clPages.length > 0){
426 iframe.src = clPages.shift();
427 }else{
428 finish(iframe);
429 }
430 }
431
432 function finish(iframe){
433 var progressbar = document.getElementById("progressbar");
434 progressbar.style.display = "none";
435
436 iframe.style.display = "none";
437 delete iframe.src;
438 }
439
440 /**
441 * This method is used at this stage to retrieve assocfilepath and
442 * frame number metadata values from the document's xml.
443 **/
444 function getMetadata(xmlUrl,nameValue){
445 var metadata = null;
446
447 \$.ajax({
448 type: "GET",
449 async: false,
450 url: xmlUrl,
451 dataType: "xml",
452 success: function(xml){
453 \$(xml).find('metadata').each(function(){
454 var name = \$(this).attr('name');
455
456 if(name === nameValue){
457 if(metadata === null){
458 metadata = \$(this).text();
459 }
460 }
461 });
462 }
463 });
464
465 return metadata;
466 }
467
468 </script>
469
470 <div id="progressbar" width="100%" style="display: none; margin: 10px; height: 10px;"></div>
471
472 <div id="workingTraverse" style="display: none"></div>
473
474 <hr style="margin: 10px;">
475
476 <iframe width="100%" id="iframe" style="display:none;" onload="pageLoaded()"></iframe>
477 </body>
478</html>
479EOT
480
481 print "Content-type:text/html\n\n";
482 print $html_form;
483}
484
485sub main
486{
487 my $gsdl_cgi = new gsdlCGI();
488
489 #Load GS modules
490 $gsdl_cgi->setup_gsdl();
491
492 my $gsdlhome = $ENV{'GSDLHOME'};
493 $gsdl_cgi->checked_chdir($gsdlhome);
494
495 #TODO: Refactor so we only need to use HtmlToExpediteeAction
496 require cgiactions::HtmlToExpediteeAction;
497 require cgiactions::CollectionSpaceAction;
498
499 $gsdl_cgi->parse_cgi_args();
500 $gsdl_cgi->{'xml'} = 0;
501
502 my $fn = $gsdl_cgi->clean_param("fn");
503
504 if(defined $fn){
505 #page_type can have two values: "document" or "clPage"
506 my $page_type = $gsdl_cgi->clean_param("page-type");
507
508 if(defined $page_type){
509 my $action;
510
511 if($page_type eq "document"){
512 $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
513 }elsif($page_type eq "clPage"){
514 $action = new CollectionSpaceAction($gsdl_cgi,$iis6_mode);
515 }else{
516 $gsdl_cgi->generate_error("Invalid page type specified. Must be 'document' or 'clPage'");
517 }
518
519 $action->do_action();
520 }else{
521 $gsdl_cgi->generate_error("No page type specified. Must be 'document' or 'clPage'");
522 }
523
524 }else{
525 # generate html form
526 my $collect = $gsdl_cgi->clean_param("collect");
527 my $cl = $gsdl_cgi->clean_param("cl");
528
529 #Establish collect_dir using defining 'site' along the way if GS3
530 my $site = undef;
531 my $isGSDL2 = undef;
532
533 if($gsdl_cgi->greenstone_version() == 2){
534 $isGSDL2 = 1;
535 }else{
536 $isGSDL2 = 0;
537
538 #GS3 (and possible future versions) make use of 'site'
539 $site = $gsdl_cgi->clean_param("site");
540
541 if(!defined $site){
542 $gsdl_cgi->generate_error("No site specified.");
543 }
544 }
545
546 generate_html_form($isGSDL2,$site,$collect,$cl);
547 }
548}
549
550&main();
Note: See TracBrowser for help on using the repository browser.