source: gs3-extensions/html-to-expeditee/trunk/src/src/cgi-bin/html-to-expeditee.pl.in@ 26750

Last change on this file since 26750 was 26750, checked in by davidb, 11 years ago

Fixed up conflicting frame number variable names.

File size: 15.0 KB
Line 
1#!/cygdrive/c/strawberry/perl/bin/perl -w
2
3# Need to specify the full path of Perl above, e.g. for Windows something like
4#!C:\\Perl32\\bin\\perl -w
5
6use strict;
7
8# Set this to 1 to work around IIS 6 craziness
9my $iis6_mode = 0;
10
11
12# IIS 6: for some reason, IIS runs this script with the working directory set to the Greenstone
13# directory rather than the cgi-bin directory, causing lots of stuff to fail
14if ($iis6_mode)
15{
16 # Change into cgi-bin directory
17 chdir("cgi-bin");
18}
19
20
21# We use require and an eval here (instead of "use") to catch any errors loading the module (for IIS)
22eval("require \"gsdlCGI.pm\"");
23if ($@)
24{
25 print STDOUT "Content-type:text/plain\n\n";
26 print STDOUT "ERROR: $@\n";
27 exit 0;
28}
29
30sub generate_html_form
31{
32 my($isGSDL2,$site,$collect,$cl) = @_;
33
34 # first generate the document frames
35 # then generate the classifier browsing frames.
36
37 my $html_form = <<EOT;
38 <!DOCTYPE html>
39 <head>
40 <meta content="text/html;charset=utf-8" http-equiv="Content-Type">
41 <meta content="utf-8" http-equiv="encoding">
42 <title>HTML To Expeditee Frames</title>
43 <base href=".."/>
44
45 <link type="text/css" href="ext/html-to-expeditee/jquery/css/le-frog/jquery-ui-1.8.16.custom.css" rel="stylesheet" />
46 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-1.6.2.min.js"></script>
47 <script type="text/javascript" src="ext/html-to-expeditee/jquery/js/jquery-ui-1.8.16.custom.min.js"></script>
48 <script type="text/javascript" src="ext/html-to-expeditee/js/gsajax-min.js"></script>
49 <script type="text/javascript" src="ext/html-to-expeditee/js/html-to-expeditee.js"></script>
50 </head>
51 <body>
52 <form class="ui-widget">
53 Convert the collection <input type="text" class="ui-corner-all" style="padding: 4px;" name="collect" value="$collect" id="collect" /> to Expeditee frames by traversing the classifier <input type="text" class="ui-corner-all" style="padding: 4px;" name="cl" value="$cl" id="cl" />
54
55 <input value="$site" name="site" id="site" type="hidden">
56
57 <p>
58 <input type="checkbox" id="checkBoxBrowsing" name="generate_browsing" value="generate_browsing">Generate Collection Space<br/>
59 </p>
60
61 <p style="font-weight: bold;">Extra Expeditee Frame Output Options:</p>
62 <input type="checkbox" id="checkBoxFont" name="compute_font" value="compute_font">Compute Font<br/>
63 <input type="checkbox" id="checkBoxWidth" name="compute_width" value="compute_width">Compute Width<br/>
64
65 <p><input value="Go" id="go" class="ui-button ui-widget ui-state-default ui-corner-all" type="submit"></p>
66 </form>
67
68 <script type="text/javascript">
69 var collect;
70 var site;
71 var cl;
72 var gs2;
73
74 var docOIDs = [];
75 var clPages = [];
76
77 var hashMapDocFrames = new Array();
78
79 var numDocOIDs = 0;
80 var numClPages = 0;
81
82 var currDocFrameNum;
83 var currClFrameNum;
84
85 var compute_font = false;
86 var compute_width = false;
87 var generate_browsing = false; //generate a matching collection space frameset
88
89 \$(function(){
90
91 \$('#progress').progressbar();
92
93 \$('#go').button().click(function(){
94
95 site = document.getElementById("site").value;
96
97 collect = document.getElementById("collect").value;
98
99 if(collect.match(/^\\s*\$/)){
100 alert("No collection specified");
101 return false;
102 }
103
104 cl = document.getElementById("cl").value;
105 if(cl.match(/^\\s*\$/)){
106 alert("No classifier specified");
107 return false;
108 }
109
110 compute_font = document.getElementById("checkBoxFont").checked;
111 compute_width = document.getElementById("checkBoxWidth").checked;
112
113 generate_browsing = document.getElementById("checkBoxBrowsing").checked;
114
115 gs2=$isGSDL2;
116
117 var url;
118
119 /*obtain url for classifier/browse page and grab all links (doc and CL links) from this page.*/
120 if(gs2){
121 url = "library.cgi";
122 url += "?c="+collect +"&a=d&cl=" + cl;
123 }else{
124 url = "library";
125 url += "?c="+collect +"&a=b&rt=s&s=ClassifierBrowse&cl=" + cl;\
126 url += "&excerptid=gs_content";
127 }
128
129 docOIDs = [];
130 clPages = [];
131
132 var outstandingURLs = [];
133 var visitedURLs = {};
134
135 outstandingURLs.push(url);
136 visitedURLs[url] = 1;
137
138 while(outstandingURLs.length > 0){
139 url = outstandingURLs.shift();
140
141 var clHtml = urlGetSync(url);
142
143 var workingTrav = document.getElementById("workingTraverse");
144 workingTrav.innerHTML = clHtml;
145
146 var aElems = workingTrav.getElementsByTagName("a");
147
148 /* any links with (document|browse)=> outstandingURLS */
149 /* any links with (document|browse)/CL[0-9]/[0-9] => clPages */
150 /* any links with (document) => docOIDs */
151
152 var actionRE = new RegExp("(\\\\/)(?:document|browse)(\\\\/)");
153 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
154 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
155
156 for(var i = 0; i < aElems.length; i++){
157 var aElem = aElems[i];
158 var href = aElem.href;
159
160 if(href && href.match(actionRE)){
161 if(href.match(clRE)){
162 if(!visitedURLs[href]){
163 console.log("found a new CL line: " + href);
164 outstandingURLs.push(href);
165 visitedURLs[href] = 1;
166 clPages.push(href);
167 }
168 }else if(href.match(docRE)){
169 if(!visitedURLs[href]){
170 var docMatch = docRE.exec(href);
171 var docOID = docMatch[1];
172
173 console.log("found a new doc line: " + docOID);
174 visitedURLs[href] = 1;
175 docOIDs.push(docOID);
176 }
177 }
178 }
179 }
180 }
181
182 numDocOIDs = docOIDs.length;
183 numClPages = clPages.length;
184 var iframe = document.getElementById('iframe');
185
186 startProcessing(iframe); //Process documents
187
188 return false;
189 });
190
191 });
192
193 function startProcessing(iframe){
194
195 var docOID = docOIDs.shift();
196
197 iframe.src = getDocumentUrl(docOID);
198
199 var progressbar = document.getElementById('progressbar');
200 progressbar.style.display = 'block';
201 }
202
203
204 function pageLoaded(){
205 var iframe = document.getElementById("iframe");
206
207 if(iframe.src){
208
209 if(iframe.style.display != 'block'){
210 iframe.height = '90%';
211 iframe.style.display = 'block';
212 }
213
214 var iframeDoc;
215
216 if(iframe.contentDocument){ /* FF and Chrome */
217 iframeDoc = iframe.contentDocument;
218 }else if(iframe.contentWindow){ /* IE */
219 iframeDoc = iframe.contentWindow.document;
220 }
221
222 /*Check url - if it's a doc Url, call "writeDocument", otherwise call "writeClPage"*/
223 var clRE = new RegExp("(\\\\/)browse\\\\/" + cl + "(\\\\/\\\\d+)+(\$)");
224
225
226 if(iframe.src.match(clRE)){
227 writeClPage(iframe,iframeDoc);
228 }else{
229 writeDocument(iframe,iframeDoc);
230 }
231 }
232 }
233
234 function writeDocument(iframe,iframeDoc){
235
236 var xmlUrl = iframe.src + "&o=xml";
237
238 var iter = (numDocOIDs - docOIDs.length);
239 var progressPercent = iter/numDocOIDs * 100;
240
241 var frameNum = getMetadata(xmlUrl,'frameID');
242
243 if(frameNum == null){
244 frameNum = iter;
245 }
246
247 //Add docOID and matching frame number to an associative array for later use.
248 hashMapDocFrames[docOID] = frameNum;
249
250 \$(function(){
251 \$('#progressbar').progressbar({ value: progressPercent });
252 });
253
254 var gsContent = iframeDoc.getElementById("gs_content");
255
256 var assocElem = iframeDoc.getElementById("assocfilepath");
257
258 var assocElem = iframeDoc.getElementById('assocfilepath');
259 var assoc = null;
260
261 if(assocElem === undefined || assocElem === null){
262 assoc = getMetadata(xmlUrl,'assocfilepath');
263 }
264
265 var expFrameTree = htmlToExpeditee(gsContent,compute_font,compute_width);
266
267 var expFrame = JSON.stringify(expFrameTree);
268 //console.log(expFrame);
269
270 var url = "cgi-bin/html-to-expeditee.pl";
271 var params = "c=" + collect;
272
273 if(site.match(/\\w/)){
274 params += "&site=" + site;
275 }
276
277 params += "&a=generate-frame&fn=" + frameNum;
278 params += "&json=" + escape(expFrame);
279
280 //Add an assocfilepath but only if it is defined
281 if(assoc !== null){
282 params += "&assoc=" + assoc;
283 }
284
285 params += "&compute-font=" + compute_font;
286
287 params += "&page-type=" + "document";
288
289
290 var clHtml = urlPostSync(url,params);
291
292 if(!clHtml.match(/html-to-expeditee saved frame/)){
293 alert("ERROR PROCESSING URL: " + url);
294 }
295
296 if(docOIDs.length > 0){
297 var docOID = docOIDs.shift();
298 iframe.src = getDocumentUrl(docOID);
299
300 }else{
301 //start writing CL pages.
302 if(generate_browsing){
303 numClPages = clPages.length;
304
305 var url = clPages.shift();
306 iframe.src = url;
307 writeClPage(iframe,iframeDoc);
308 }else{
309 //We are finished
310 finish(iframe);
311 }
312 }
313 }
314
315 function getDocumentUrl(docOID){
316 var url;
317
318 if(gs2){
319 url = "library.cgi";
320 }else{
321 url = "library";
322 }
323
324 url += "?c=" + collect + "&a=d&d=" + docOID;
325
326 url += "&p.showAssocFilePath = 1";
327
328 return url;
329 }
330
331 function writeClPage(iframe,iframeDoc){
332
333 var frameNum = numClPages - clPages.length;
334 var progressPercent = frameNum / numClPages * 100;
335
336 \$(function(){
337 \$('#progressbar').progressbar({ value: progressPercent });
338 });
339
340 var gsContent = iframeDoc.getElementById("gs_content");
341 var gsContentChildren = gsContent.getElementsByTagName('*');
342
343 var docRE = new RegExp("(?:\\\\/)document\\\\/(.*?)(?:\$)");
344
345 //get rid of rectangles around nodes.
346 for(var i = 0; i < gsContentChildren.length; i++){
347 var child = gsContentChildren[i];
348
349 if(child.tagName !== "IMG"){
350 child.setAttribute('rect','norect');
351 }
352
353 if(child.tagName === "A"){
354 var aElem = child;
355 var aElemSrc = aElem.href;
356
357 //if aElemSrc is a document url, then extract docOID then access hash map and get matching frame number.
358 if(aElemSrc.match(docRE)){
359 var docMatch = docRE.exec(aElemSrc);
360 var docOID = docMatch[1];
361
362 var fn = hashMapDocFrames[docOID];
363 var aElemChildren = aElem.getElementsByTagName('*');
364
365 for(var j = 0; j < aElemChildren.length; j++){
366 var aElemChild = aElemChildren[j];
367 var frameName = collect + fn;
368
369 //aElemChild.setAttribute("link",frameName);
370 }
371
372 }else{
373 //TODO: Check if the link is a CL link.
374 //TODO: Make a hashmap for storing CL page frame numbers?
375 }
376 }
377 }
378
379 //add link attribute to children nodes of <a> elements.
380
381
382 //TODO: Need to change htmlToExpeditee code to account for links
383
384
385 var expFrameTree = htmlToExpeditee(gsContent);
386 var expFrame = JSON.stringify(expFrameTree);
387 //console.log(expFrame);
388
389 var url = "cgi-bin/html-to-expeditee.pl";
390 var params = "c=" + collect;
391
392 if(site.match(/\\w/)){
393 params += "&site=" + site;
394 }
395
396 params += "&cl=" + cl;
397 params += "&a=generate-frame&fn=" + frameNum;
398 params += "&json=" + escape(expFrame);
399 params += "&page-type=" + "clPage";
400
401 var clHtml = urlPostSync(url,params);
402
403 if(!clHtml.match(/html-to-expeditee saved frame/)){
404 alert("Error processing url: " + url);
405 }
406
407 if(clPages.length > 0){
408 var clPage = clPages.shift();
409 iframe.src = clPage;
410 }else{
411 finish(iframe);
412 }
413 }
414
415 function finish(iframe){
416 var progressbar = document.getElementById("progressbar");
417 progressbar.style.display = "none";
418
419 iframe.style.display = "none";
420 delete iframe.src;
421 }
422
423 /**
424 * This method is used at this stage to retrieve assocfilepath and
425 * frame number metadata values from the document's xml.
426 **/
427 function getMetadata(xmlUrl,nameValue){
428 var metadata = null;
429
430 \$.ajax({
431 type: "GET",
432 async: false,
433 url: xmlUrl,
434 dataType: "xml",
435 success: function(xml){
436 \$(xml).find('metadata').each(function(){
437 var name = \$(this).attr('name');
438
439 if(name === nameValue){
440 if(metadata === null){
441 metadata = \$(this).text();
442 }
443 }
444 });
445 }
446 });
447
448 return metadata;
449 }
450
451 </script>
452
453 <div id="progressbar" width="100%" style="display: none; margin: 10px; height: 10px;"></div>
454
455 <div id="workingTraverse" style="display: none"></div>
456
457 <hr style="margin: 10px;">
458
459 <iframe width="100%" id="iframe" style="display:none;" onload="pageLoaded()"></iframe>
460 </body>
461</html>
462EOT
463
464 print "Content-type:text/html\n\n";
465 print $html_form;
466}
467
468sub main
469{
470 my $gsdl_cgi = new gsdlCGI();
471
472 #Load GS modules
473 $gsdl_cgi->setup_gsdl();
474
475 my $gsdlhome = $ENV{'GSDLHOME'};
476 $gsdl_cgi->checked_chdir($gsdlhome);
477
478 #TODO: Refactor so we only need to use HtmlToExpediteeAction
479 require cgiactions::HtmlToExpediteeAction;
480 require cgiactions::CollectionSpaceAction;
481
482 $gsdl_cgi->parse_cgi_args();
483 $gsdl_cgi->{'xml'} = 0;
484
485 my $fn = $gsdl_cgi->clean_param("fn");
486
487 if(defined $fn){
488 #page_type can have two values: "document" or "clPage"
489 my $page_type = $gsdl_cgi->clean_param("page-type");
490
491 if(defined $page_type){
492 my $action;
493
494 if($page_type eq "document"){
495 $action = new HtmlToExpediteeAction($gsdl_cgi,$iis6_mode);
496 }elsif($page_type eq "clPage"){
497 $action = new CollectionSpaceAction($gsdl_cgi,$iis6_mode);
498 }else{
499 $gsdl_cgi->generate_error("Invalid page type specified. Must be 'document' or 'clPage'");
500 }
501
502 $action->do_action();
503 }else{
504 $gsdl_cgi->generate_error("No page type specified. Must be 'document' or 'clPage'");
505 }
506
507 }else{
508 # generate html form
509 my $collect = $gsdl_cgi->clean_param("collect");
510 my $cl = $gsdl_cgi->clean_param("cl");
511
512 #Establish collect_dir using defining 'site' along the way if GS3
513 my $site = undef;
514 my $isGSDL2 = undef;
515
516 if($gsdl_cgi->greenstone_version() == 2){
517 $isGSDL2 = 1;
518 }else{
519 $isGSDL2 = 0;
520
521 #GS3 (and possible future versions) make use of 'site'
522 $site = $gsdl_cgi->clean_param("site");
523
524 if(!defined $site){
525 $gsdl_cgi->generate_error("No site specified.");
526 }
527 }
528
529 generate_html_form($isGSDL2,$site,$collect,$cl);
530 }
531}
532
533&main();
Note: See TracBrowser for help on using the repository browser.