source: other-projects/hathitrust/wcsa/extracted-features-solr/trunk/web-portal/index.js@ 31645

Last change on this file since 31645 was 31645, checked in by davidb, 7 years ago

Some initial work on drawing in workset info from sparql-endpoint. Sqparl server is only visible on Illinois network

File size: 19.5 KB
Line 
1
2var langs_with_pos = [ "en","de","pt","da","nl", "sv" ];
3
4var langs_without_pos = [
5 "af","ar","bg","bn","cs","el","es","et","fa","fi","fr","he","hi","hr","hu",
6 "id","it","ja","kn","ko","lt","lv","mk","ml","mr","ne","no","pa","pl",
7 "ro","ru","sk","sl","so","sq","sv","sw","ta","te","th","tl","tr",
8 "uk","ur","vi","zh-cn","zh-tw" ];
9
10var num_rows = 20;
11
12function lang_pos_toggle(event) {
13 var $this =$(this);
14 var checked_state = $this.prop("checked");
15
16 var id = $this.attr("id");
17 var split_id = id.split("-");
18 var related_id = split_id[0] + "-pos-choice";
19
20 var disable_state = !checked_state;
21 $('#'+related_id + " *").prop('disabled',disable_state);
22}
23
24function ajax_error(jqXHR, textStatus, errorThrown) {
25 alert('An error occurred... Look at the console (F12 or Ctrl+Shift+I, Console tab) for more information!');
26
27 console.log('jqXHR:' + jqXHR);
28 console.log('textStatus:' + textStatus);
29 console.log('errorThrown:' + errorThrown);
30}
31
32
33function add_titles(json_data) {
34 var itemURLs = [];
35
36 $.each( json_data, function( htid_with_prefix, htid_val ) {
37 var htid = htid_with_prefix.replace(/^htid:/,"");
38
39 $.each(htid_val.records, function( internalid, metadata ) {
40 var title = metadata.titles[0];
41 $("[name='"+htid+"']").each(function() {$(this).html(title)});
42 console.log(htid + ", title = " + metadata.titles[0]);
43 });
44
45 $.each(htid_val.items, function( item_index, item_val ) {
46 if (item_val.htid == htid) {
47 var itemURL = item_val.itemURL;
48 itemURL = itemURL.replace(/^https:/,"http:");
49
50 var ws_span = '<span style="display: none;"><br>[Workset: <span name="'+itemURL+'"></span>]</span>';
51 $("[name='"+htid+"']").each(function() {$(this).append(ws_span)});
52 console.log("itemURL = " + itemURL);
53 itemURLs.push(itemURL);
54 }
55 });
56 });
57
58 workset_enrich_results(itemURLs);
59
60}
61
62function add_worksets(json_data) {
63
64 //console.log("****" + JSON.stringify(json_data));
65 $.each( json_data["@graph"], function( ws_index, ws_val ) {
66 var workset_title = ws_val["http://purl.org/dc/terms/title"][0]["@value"];
67
68 var gathers = ws_val["http://www.europeana.eu/schemas/edm/gathers"]
69
70 $.each(gathers, function(gather_index,gather_val) {
71 var item_url = gather_val["@id"];
72
73 $("[name='"+item_url+"']").each(function() {
74 $(this).parent().show();
75// if ($(this).find("span").length>1) {
76 if ($(this).children().size()>=1) {
77 $(this).append("; ");
78 }
79
80 $(this).append("<span>" + workset_title + "</span>")
81 });
82 });
83 });
84
85}
86
87
88function show_new_results(delta)
89{
90 $('.search-in-progress').css("cursor","wait");
91
92 var start = parseInt(store_search_args.start)
93
94 store_search_args.start = start + parseInt(delta);
95
96 $.ajax({
97 type: 'GET',
98 url: store_search_action,
99 data: store_search_args,
100 dataType: 'json',
101 success: show_results,
102 error: ajax_error
103 });
104}
105
106function generate_item(line,id,id_pages)
107{
108 var css_class = (line%2 == 0) ? 'class="evenline"' : 'class="oddline"';
109
110 var html_item = "";
111
112 var id_pages_len = id_pages.length;
113
114 for (var pi=0; pi<id_pages_len; pi++) {
115 var page = id_pages[pi];
116
117 var seqnum = (page==0) ? 1 : page;
118 var babel_url = "https://babel.hathitrust.org/cgi/pt?id="+id+";view=1up;seq="+seqnum;
119
120 if (id_pages_len>1) {
121
122 if (pi==0) {
123 html_item += '<p ' + css_class + '>';
124 html_item += '<span style="font-style: italic;" name="'
125 +id+'"><span style="cursor: progress;">Loading ...</span></span><br>';
126 if (page > 0) {
127 html_item += id + ': <a target="_blank" href="' + babel_url + '">seq ' + seqnum + '</a> ';
128 }
129 else {
130 // skip linking to the 'phony' page 0
131 html_item += id;
132 }
133 }
134 else {
135 html_item += ', <a target="_blank" href="' + babel_url + '">seq ' + seqnum + '</a> ';
136 }
137 }
138 else {
139 html_item += '<p ' + css_class + '>';
140 html_item += ' <span style="font-style: italic;" name="'
141 +id+'"><span style="cursor: progress;">Loading ...</span></span><br>';
142
143 if (page > 0) {
144 html_item += '<a target="_blank" href="' + babel_url + '">' + id + ', seq ' + seqnum + '</a>';
145 }
146 else {
147 html_item += '<a target="_blank" href="' + babel_url + '">' + id + ', all pages</a>';
148 }
149
150 html_item += '</p>';
151 }
152
153 }
154
155 if (id_pages_len>1) {
156 html_item += "</p>";
157 }
158
159 return html_item;
160}
161
162
163function workset_enrich_results(itemURLs)
164{
165 // prefix dcterms: <http://purl.org/dc/terms/>
166 // prefix edm: <http://www.europeana.eu/schemas/edm/>
167 // prefix htrc: <http://wcsa.htrc.illinois.edu/>
168 // prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
169 // prefix xsd: <http://www.w3.org/2001/XMLSchema#>
170
171 // CONSTRUCT {
172 // ?wsid
173 // rdf:type htrc:Workset ;
174 // dcterms:title ?title ;
175 // dcterms:creator ?cre ;
176 // dcterms:created ?dat ;
177 // edm:gathers ?gar.}
178
179 // where {
180 // ?wsid
181 // rdf:type htrc:Workset ;
182 // dcterms:title ?title ;
183 // dcterms:creator ?cre ;
184 // dcterms:created ?dat ;
185 // edm:gathers ?gar
186
187 // FILTER ( ?gar = <http://hdl.handle.net/2027/uc2.ark:/13960/t4fn12212> || ?gar = <http://hdl.handle.net/2027/uva.x030825627> )
188 // }
189
190 var prefixes = "";
191 prefixes += "prefix dcterms: <http://purl.org/dc/terms/>\n";
192 prefixes += "prefix edm: <http://www.europeana.eu/schemas/edm/>\n";
193 prefixes += "prefix htrc: <http://wcsa.htrc.illinois.edu/>\n";
194 prefixes += "prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n";
195 prefixes += "prefix xsd: <http://www.w3.org/2001/XMLSchema#>\n";
196
197 var graph_body = "";
198 graph_body += " ?wsid\n";
199 graph_body += " rdf:type htrc:Workset ;\n";
200 graph_body += " dcterms:title ?title ;\n";
201 graph_body += " dcterms:creator ?cre ;\n";
202 graph_body += " dcterms:created ?dat ;\n";
203 graph_body += " edm:gathers ?gar .\n";
204
205 var filter_array = [];
206 var item_urls_len = itemURLs.length;
207 for (var hi=0; hi<item_urls_len; hi++) {
208 var htid = itemURLs[hi];
209 filter_array.push("?gar = " + "<"+htid+">");
210 }
211 var filter = " FILTER ( " + filter_array.join(" || ") + " ) ";
212
213 var construct = "CONSTRUCT {\n" + graph_body + "}\n";
214 var where = "WHERE {\n" + graph_body + filter + "}\n";
215
216 var sparql_query = prefixes + construct + where;
217 //console.log("*** sparql query = " + sparql_query);
218
219 // http://acbres224.ischool.illinois.edu:8890/sparql?default-graph-uri=&query
220 // &format=application/x-json+ld&timeout=0&debug=on
221
222 var sparql_url = "http://acbres224.ischool.illinois.edu:8890/sparql";
223 var sparql_data = { "default-graph-uri": "",
224 "format": "application/x-json+ld",
225 "timeout": 0,
226 "debug": "on" };
227 sparql_data.query = sparql_query;
228
229 $.ajax({
230 type: "POST",
231 url: sparql_url,
232 data: sparql_data,
233 dataType: 'jsonp',
234 jsonpCallback: "add_worksets"
235 });
236
237
238
239}
240
241function show_results(jsonData) {
242 var response = jsonData.response;
243 var num_found = response.numFound;
244 var docs = response.docs;
245 var num_docs = docs.length;
246
247 $('.search-in-progress').css("cursor","auto");
248
249 var $search_results = $('#search-results');
250
251 if (num_docs>0) {
252 $search_results.html("<p>Results: " + num_found + " pages matched</p>");
253 var from = parseInt(store_search_args.start);
254 var to = from + num_rows;
255 $search_results.append("<p>Showing matches: "+from+"-" + to + "</p>");
256 }
257 else {
258 $search_results.html("<p>No pages matched your query</p>");
259 }
260
261 // Example form of URL
262 // https://babel.hathitrust.org/cgi/pt?id=hvd.hnnssu;view=1up;seq=11
263
264 var htids = [];
265
266 var prev_id = null;
267 var prev_pages = [];
268
269 var i=0;
270 var line_num = 0;
271 while (i<num_docs) {
272 var doc = docs[i];
273 var id_and_page = doc.id.split(".page-");
274 var id = id_and_page[0];
275 var seqnum;
276 if (id_and_page.length>1) {
277 seqnum = parseInt(id_and_page[1]) +1; // fix up ingest error
278 }
279 else {
280 seqnum = 0;
281 }
282 var page = seqnum;
283
284 if ((prev_id != null) && (id != prev_id)) {
285 // time to output previous item
286 var html_item = generate_item(line_num,prev_id,prev_pages);
287 $search_results.append(html_item);
288 line_num++;
289 prev_pages = [page];
290 }
291 else {
292 // accumulate pages
293 prev_pages.push(page)
294 }
295
296 htids.push("htid:"+id);
297
298 prev_id = id;
299 i++;
300 }
301
302 var html_item = generate_item(line_num,prev_id,prev_pages);
303 $search_results.append(html_item);
304
305
306 document.location.href="#search-results-anchor";
307
308 var next_prev = '<p style="width:100%;"><div id="search-prev" style="float: left;"><a>&lt; Previous</a></div><div id="search-next" style="float: right;"><a>Next &gt;</a></div></p>';
309
310 $search_results.append(next_prev);
311 $('#search-prev').click(function(event) { show_new_results(-1*num_rows); });
312 $('#search-next').click(function(event) { show_new_results(num_rows); });
313
314 var search_start = store_search_args.start;
315 if (search_start==0) {
316 $('#search-prev').hide();
317 }
318
319 // Example URL for catalog metadata (multiple items)
320 // http://catalog.hathitrust.org/api/volumes/brief/json/id:552;lccn:70628581|isbn:0030110408
321
322 var htids_str = htids.join("|",htids);
323 var cat_url = "http://catalog.hathitrust.org/api/volumes/brief/json/" + htids_str;
324 $.ajax({
325 url: cat_url,
326 dataType: 'jsonp',
327 jsonpCallback: "add_titles"
328 });
329
330
331}
332
333var store_search_args = null;
334var store_search_action = null;
335
336var group_by_vol_checked = 0;
337
338function expand_vfield(q_term,all_vfields)
339{
340 var vfields = [];
341 var metadata_fields = [ "accessProfile_t", "genre_t", "imprint_t", "isbn_t", "issn_t",
342 "issuance_t", "language_t", "lccn_t", "names_t", "oclc_t",
343 "pubPlace_t", "pubDate_t", "rightsAttributes_t", "title_t", "typeOfResource_t" ];
344
345 if (all_vfields) {
346 for (var fi=0; fi<metadata_fields.length; fi++) {
347 var vfield = metadata_fields[fi];
348 vfields.push(vfield+":"+q_term);
349 }
350 }
351 else {
352 if (q_term.match(/:/)) {
353 vfields.push(q_term);
354 }
355 else {
356 // make searching by title the default
357 vfields.push("title_t:"+q_term);
358 }
359 }
360
361
362 var vfields_str = vfields.join(" OR ");
363
364 return vfields_str;
365}
366
367function expand_vquery_field_and_boolean(query,all_vfields)
368{
369 // boolean terms
370 // => pos and lang field
371 if (query === "") { return "" }
372
373 var query_terms = query.split(/\s+/);
374 var query_terms_len = query_terms.length;
375
376 var bool_query_term = [];
377
378 var i=0;
379 var prev_bool = "";
380
381 for (var i=0; i<query_terms_len; i++) {
382 var term = query_terms[i];
383 if (term.match(/^(and|or)$/i)) {
384 prev_bool = term.toUpperCase();
385 }
386 else {
387 if (i>0) {
388 if (prev_bool == "") {
389 prev_bool = "AND";
390 }
391 }
392
393 var expanded_term = expand_vfield(term,all_vfields); // **** only difference to POS version
394
395 term = "(" + expanded_term + ")";
396
397 if (prev_bool != "") {
398 bool_query_term.push(prev_bool);
399 prev_bool = "";
400 }
401 bool_query_term.push(term);
402 }
403 }
404
405 var bool_query = bool_query_term.join(" ");
406
407 return bool_query;
408}
409
410
411function expand_field_lang_pos(q_text,langs_with_pos,langs_without_pos,search_all_checked)
412{
413 var fields = [];
414 var universal_pos_tags = [ "VERB", "NOUN", "ADJ", "ADV", "ADP", "CONJ", "DET", "NUM", "PRT", "X" ];
415
416 for (var li=0; li<langs_with_pos.length; li++) {
417 var lang = langs_with_pos[li];
418 var lang_enabled_id = lang + "-enabled";
419 var $lang_enabled_cb = $('#'+lang_enabled_id);
420 if ($lang_enabled_cb.is(':checked')) {
421 console.log("Extracting POS tags for: " + lang);
422
423 for (var ti=0; ti<universal_pos_tags.length; ti++) {
424 var tag = universal_pos_tags[ti];
425 var lang_tag_id = lang+"-"+tag+"-htrctoken-cb";
426 var $lang_tag_cb = $('#'+lang_tag_id);
427 if (search_all_checked || ($lang_tag_cb.is(':checked'))) {
428 var lang_tag_field = lang+"_"+tag+"_htrctoken";
429 fields.push(lang_tag_field+":"+q_text);
430 }
431 }
432 }
433 }
434
435 for (var li=0; li<langs_without_pos.length; li++) {
436 var lang = langs_without_pos[li];
437 var lang_enabled_id = lang + "-enabled";
438 var $lang_enabled_cb = $('#'+lang_enabled_id);
439
440 if (search_all_checked || ($lang_enabled_cb.is(':checked'))) {
441 console.log("Adding in non-POS field for: " + lang);
442 var lang_tag_field = lang+"_htrctoken";
443 fields.push(lang_tag_field+":"+q_text);
444 }
445 }
446
447 var fields_str = fields.join(" OR ");
448
449 return fields_str;
450}
451
452function expand_query_field_and_boolean(query,langs_with_pos,langs_without_pos,search_all_checked)
453{
454 // boolean terms
455 // => pos and lang field
456 if (query === "") { return "" }
457
458 var query_terms = query.split(/\s+/);
459 var query_terms_len = query_terms.length;
460
461 var bool_query_term = [];
462
463 var i=0;
464 var prev_bool = "";
465
466 for (var i=0; i<query_terms_len; i++) {
467 var term = query_terms[i];
468 if (term.match(/^(and|or)$/i)) {
469 prev_bool = term.toUpperCase();
470 }
471 else {
472 if (i>0) {
473 if (prev_bool == "") {
474 prev_bool = "AND";
475 }
476 }
477
478 var expanded_term = expand_field_lang_pos(term,langs_with_pos,langs_without_pos,search_all_checked)
479
480 term = "(" + expanded_term + ")";
481
482 if (prev_bool != "") {
483 bool_query_term.push(prev_bool);
484 prev_bool = "";
485 }
486 bool_query_term.push(term);
487 }
488 }
489
490 var bool_query = bool_query_term.join(" ");
491
492 return bool_query;
493}
494
495
496function submit_action(event)
497{
498 event.preventDefault();
499
500 $('.search-in-progress').css("cursor","wait");
501
502 store_search_action = $('#search-form').attr("action");
503
504 var arg_indent = $('#indent').attr('value');
505 var arg_wt = $('#wt').attr('value');
506 var arg_start = $('#start').attr('value');
507 var arg_rows = $('#rows').attr('value');
508
509 var q_text = $('#q').val();
510 var vq_text = $('#vq').val();
511
512 group_by_vol_checked = $('#group-results-by-id:checked').length;
513
514 var search_all_langs_checked = $('#search-all-langs:checked').length;
515 var search_all_vfields_checked = $('#search-all-vfields:checked').length;
516
517 if ((q_text === "") && (vq_text === "")) {
518 $('.search-in-progress').css("cursor","auto");
519 alert("No query term(s) entered");
520 return;
521 }
522
523 arg_vq = expand_vquery_field_and_boolean(vq_text,search_all_vfields_checked);
524
525 arg_q = expand_query_field_and_boolean(q_text,langs_with_pos,langs_without_pos,search_all_langs_checked);
526
527 //console.log("*** arg_vq = " + arg_vq);
528 //console.log("*** arg_q = " + arg_q);
529
530 if (arg_q == "") {
531 if (arg_vq == "") {
532 // arg_vq was empty to start with, but attempt to expand non-empty arg_q
533 // lead to an empty arg_q being returned
534 $('.search-in-progress').css("cursor","auto");
535 alert("No languages selected");
536 return;
537 }
538 else {
539 arg_q = arg_vq;
540 }
541 }
542 else {
543 if (arg_vq != "") {
544 // join the two with an AND
545 arg_q = "(" + arg_vq + ")" + " OR " + "(" + arg_q + ")";
546
547 // also implies
548 group_by_vol_checked = true;
549 }
550 }
551
552 //console.log("*** NOW arg_q = " + arg_q);
553
554 // Example search on one of the htrc-full-ef fields is:
555 // q=en_NOUN_htrctoken:farming
556
557 store_search_args = { q: arg_q, indent: arg_indent, wt: arg_wt, start: arg_start, rows: arg_rows };
558
559 if (group_by_vol_checked) {
560 store_search_args.sort="id asc";
561 }
562
563
564 $.ajax({
565 type: 'GET',
566 url: store_search_action,
567 data: store_search_args,
568 dataType: 'json',
569 success: show_results,
570 error: ajax_error
571 });
572
573}
574
575function generate_pos_langs()
576{
577 var pos_checkbox = [
578 { pos:"VERB", label:"Verbs", tooltip:"Verbs (all tenses and modes)" },
579 { pos:"NOUN", label:"Nouns", tooltip:"Nouns (common and proper)" },
580 { pos:"ADJ", label:"Adjectives", tooltip: null },
581 { pos:"ADV", label:"Adverbs", tooltip: null },
582 { pos:"ADP", label:"Adpositions", tooltip:"Adpositions (prepositions and postpositions)" },
583 { pos:"CONJ", label:"Conjunctions",tooltip: null },
584 { pos:"DET", label:"Determiners", tooltip: null },
585 { pos:"NUM", label:"Numbers", tooltip:"Cardinal numbers" },
586 { pos:"PRT", label:"Particles", tooltip:"Particles or other function words" },
587 { pos:"X", label:"Other", tooltip:"Other words, such as foreign words, typos, abbreviations"}
588 ];
589
590 var $pos_fieldsets = $('#pos-fieldsets');
591
592 for (var li=0; li<langs_with_pos.length; li++) {
593
594 var l = langs_with_pos[li];
595 var lang_full = isoLangs[l].name;
596 var lang_native_full = isoLangs[l].nativeName;
597 var opt_title = (lang_full !== lang_native_full) ? 'title="'+lang_native_full+'"' : "";
598
599 var opt_enabled = (l == "en") ? 'checked="checked"' : "";
600
601 var legend = "";
602 legend += ' <legend style="margin-bottom: 5px; padding-top: 15px;">\n';
603 legend += ' <input type="checkbox" name="'+l+'-enabled" id="'+l+'-enabled" ' + opt_enabled +'/>\n';
604 legend += ' <span ' + opt_title + '>'+lang_full+':</span>\n';
605 legend += ' </legend>\n';
606
607
608 var check_box_list = [];
609
610 for (var pi=0; pi<pos_checkbox.length; pi++) {
611 var pos_info = pos_checkbox[pi];
612 var pos = pos_info.pos;
613 var label = pos_info.label;
614 var tooltip = pos_info.tooltip;
615 var opt_tooltip = (tooltip != null) ? 'title="'+tooltip+'"' : "";
616
617 var check_box = "";
618 check_box += ' <input type="checkbox" name="'+l+'-'+pos+'-htrctoken-cb" id="'+l+'-'+pos+'-htrctoken-cb" checked="checked" />\n';
619 check_box += ' <label for="'+l+'-'+pos+'-htrctoken-cb" '+opt_tooltip+'>'+label+'</label>\n';
620
621 check_box_list.push(check_box);
622 }
623
624 var fieldset = "";
625 var opt_showhide_class = (li>0) ? 'class="show-hide-lang"' : "";
626
627 if (li==1) {
628 fieldset += '<button id="show-hide-lang">Show other languages ...</button>';
629 }
630
631 fieldset += '<fieldset ' + opt_showhide_class + '>\n';
632 fieldset += legend;
633 fieldset += ' <div id="'+l+'-pos-choice">\n';
634
635 var check_box_join = check_box_list.join('&nbsp;');
636 fieldset += check_box_join;
637
638 fieldset += ' </div>\n';
639 fieldset += '</fieldset>\n';
640
641 $pos_fieldsets.append(fieldset);
642 $('#'+l+'-enabled').click(lang_pos_toggle);
643
644 if (l == "en") {
645 $('#en-pos-choice *').prop('disabled',false);
646 }
647 else {
648 $('#'+l+'-pos-choice *').prop('disabled',true);
649 }
650 }
651
652 show_hide_lang()
653}
654
655function show_hide_lang()
656{
657 $( "#show-hide-lang" ).click(function(event) {
658 event.preventDefault();
659 if($('.show-hide-lang:visible').length) {
660 $('.show-hide-lang').hide("slide", {direction: "up" } , 1000);
661 $('#show-hide-lang').html("Show other languages ...");
662 }
663 else {
664 $('.show-hide-lang').show("slide", {direction: "up" } , 1000);
665 $('#show-hide-lang').html("Hide other languages ...");
666 }
667 });
668}
669
670function generate_other_langs()
671{
672 // setup other languages
673 // for each 'langs_without_pos' generate HTML of the form:
674 // <input type="checkbox" name="fr-enabled" id="fr-enabled" />French
675 var $other_langs = $('#other-langs');
676
677 for (var i=0; i<langs_without_pos.length; i++) {
678 var lang = langs_without_pos[i];
679 var labeled_checkbox = '<nobr>';
680
681 labeled_checkbox += '<input type="checkbox" name="'+lang+'-enabled" id="'+lang+'-enabled" />';
682 /*
683 if (lang === "zh-cn") {
684 console.log("Mapping zh-cn => zh");
685 lang = "zh";
686 }
687 if (lang === "zh-tw") {
688 console.log("Mapping zh-tw => zh");
689 lang = "zh";
690 }
691*/
692 var lang_full = isoLangs[lang].name;
693 var lang_native_full = isoLangs[lang].nativeName;
694 var opt_title = (lang_full !== lang_native_full) ? 'title="'+lang_native_full+'"' : "";
695
696 labeled_checkbox += '<label for="'+lang+'-enabled" style="padding-left: 5px; padding-right: 10px;" ' + opt_title + '>'+lang_full+'</label>';
697
698 labeled_checkbox += '</nobr> ';
699
700 $other_langs.append(labeled_checkbox);
701
702 }
703}
704
705$(function() {
706 generate_pos_langs();
707
708 generate_other_langs();
709
710 if ($('#search-submit').length>0) {
711 $('#search-submit').click(submit_action);
712 }
713
714});
Note: See TracBrowser for help on using the repository browser.