source: other-projects/hathitrust/wcsa/vol-checker/src/org/hathitrust/extractedfeatures/VolumeCheck.java@ 31279

Last change on this file since 31279 was 31279, checked in by davidb, 7 years ago

First cut at servlet

  • Property svn:executable set to *
File size: 4.0 KB
Line 
1package org.hathitrust.extractedfeatures;
2
3import java.io.BufferedReader;
4import java.io.FileReader;
5import java.io.IOException;
6import java.io.PrintWriter;
7import java.util.ArrayList;
8import java.util.HashMap;
9
10import javax.servlet.ServletConfig;
11import javax.servlet.ServletException;
12import javax.servlet.annotation.WebServlet;
13import javax.servlet.http.HttpServlet;
14import javax.servlet.http.HttpServletRequest;
15import javax.servlet.http.HttpServletResponse;
16
17/**
18 * Servlet implementation class VolumeCheck
19 */
20@WebServlet("/VolumeCheck")
21public class VolumeCheck extends HttpServlet {
22 private static final long serialVersionUID = 1L;
23
24 protected static int HASHMAP_INIT_SIZE = 13800000;
25 protected static String DATA_DIR="D:/cygwin64/home/davidb/research/code-managed/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest";
26 protected static HashMap<String,Boolean> id_check_ = null;
27
28
29 public VolumeCheck() {
30 if (id_check_ == null) {
31 id_check_ = new HashMap<String,Boolean>(HASHMAP_INIT_SIZE);
32 storeIDs(DATA_DIR + "/full-listing.txt");
33 }
34 }
35
36 protected void storeIDs(String filename)
37 {
38 try {
39 FileReader fr = new FileReader(filename);
40 BufferedReader br = new BufferedReader(fr);
41
42 long line_num = 1;
43 String line;
44 System.err.println("Loading in volume IDS: " + filename);
45
46 String opt_file_ext = ".json.bz2";
47
48 while ((line = br.readLine()) != null) {
49
50 String file_id = line.substring(line.lastIndexOf("/")+1);
51 String id = null;
52 if (file_id.endsWith(opt_file_ext)) {
53 id = file_id.substring(0,file_id.lastIndexOf(opt_file_ext));
54 }
55 else {
56 id = file_id;
57 }
58
59 id = id.replaceAll("\\+", ":").replaceAll("=", "/");
60
61 id_check_.put(id, true);
62
63 if ((line_num % 100000) == 0) {
64 System.err.println("sample id = " + id);
65 System.err.println("Passed line: " + line_num);
66 }
67 line_num++;
68
69 }
70
71 br.close();
72 }
73 catch (Exception e) {
74 e.printStackTrace();
75 }
76
77 }
78 /**
79 * @see Servlet#init(ServletConfig)
80 */
81 public void init(ServletConfig config) throws ServletException {
82 // currently everything we need to happen at start up can be done in the constructor
83 }
84
85 /**
86 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
87 */
88 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
89 PrintWriter pw = response.getWriter();
90
91 String cgi_ids = request.getParameter("ids");
92 String cgi_id = request.getParameter("id");
93
94 if (cgi_ids != null) {
95 response.setContentType("application/json");
96
97 String[] ids = cgi_ids.split(",");
98 int ids_len = ids.length;
99
100 pw.append("{");
101
102 for (int i=0; i<ids_len; i++) {
103 String id = ids[i];
104
105 Boolean exists = id_check_.get(id);
106 boolean status = (exists != null) ? exists : false;
107
108 if (i>0) {
109 pw.append(",");
110 }
111 pw.append("\"" + id + "\":" + status );
112 }
113 pw.append("}");
114
115 }
116 else if (cgi_id != null) {
117 response.setContentType("application/json");
118
119 String id = cgi_id;
120 Boolean exists = id_check_.get(id);
121 boolean status = (exists != null) ? exists : false;
122 pw.append("{'" + id + "':" + status + "}");
123 }
124 else {
125
126 pw.append("General Info: Number of HTRC Volumes in check-list = " + id_check_.size());
127
128 }
129 //pw.close();
130
131 }
132
133 /**
134 * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
135 */
136 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
137 doGet(request, response);
138 }
139
140}
Note: See TracBrowser for help on using the repository browser.