Changeset 31347

Show
Ignore:
Timestamp:
24.01.2017 13:40:12 (3 years ago)
Author:
davidb
Message:

First stage of developing HT collection to HTRC workset. Code to allow this server to proxy the download function of the HT server

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/vol-checker/src/org/hathitrust/extractedfeatures/VolumeCheck.java

    r31341 r31347  
    44import java.io.BufferedOutputStream; 
    55import java.io.BufferedReader; 
     6import java.io.DataOutputStream; 
    67import java.io.FileInputStream; 
    78import java.io.FileReader; 
     
    1213import java.io.PrintWriter; 
    1314import java.io.UnsupportedEncodingException; 
     15import java.net.HttpURLConnection; 
     16import java.net.URL; 
     17import java.nio.charset.StandardCharsets; 
    1418import java.nio.file.Files; 
    1519import java.nio.file.Path; 
     
    2529import javax.servlet.http.HttpServletResponse; 
    2630 
     31 
    2732/** 
    2833 * Servlet implementation class VolumeCheck 
     
    3641 
    3742    protected static final String file_ext = ".json.bz2"; 
    38  
     43    protected static final String ht_col_url = "https://babel.hathitrust.org/cgi/mb"; 
     44         
    3945    public VolumeCheck()  
    4046    {} 
     
    178184         
    179185    } 
     186     
     187    protected void doCollectionToWorkset(HttpServletResponse response, String c, String a, String format) throws IOException 
     188    { 
     189        String post_url_params  = "c="+c+"&a="+a+"&format="+format; 
     190         
     191        byte[] post_data       = post_url_params.getBytes(StandardCharsets.UTF_8); 
     192        int    post_data_len = post_data.length; 
     193         
     194        try { 
     195             
     196            URL post_url = new URL(ht_col_url); 
     197            HttpURLConnection conn = (HttpURLConnection) post_url.openConnection();            
     198            conn.setDoOutput(true); 
     199            conn.setInstanceFollowRedirects(false); 
     200            conn.setRequestMethod("POST"); 
     201            conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");  
     202            conn.setRequestProperty("charset", "utf-8"); 
     203            conn.setRequestProperty("Content-Length", Integer.toString(post_data_len)); 
     204            conn.setUseCaches(false); 
     205             
     206            try(DataOutputStream dos = new DataOutputStream(conn.getOutputStream())) { 
     207               dos.write(post_data); 
     208            } 
     209            // try-resource auto-closes stream 
     210             
     211            InputStream is = conn.getInputStream(); 
     212            InputStreamReader isr = new InputStreamReader(is); 
     213            BufferedReader reader = new BufferedReader(isr); 
     214             
     215            StringBuilder sb = new StringBuilder(); 
     216            String line = null; 
     217            while ((line = reader.readLine()) != null) 
     218            { 
     219                sb.append(line + "\n"); 
     220            } 
     221               
     222            response.setContentType("text/plain"); 
     223            PrintWriter pw = response.getWriter(); 
     224            pw.append(sb.toString()); 
     225        } 
     226        catch (Exception e) { 
     227            e.printStackTrace(); 
     228            response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Failed to convert HT collection to HTRC workset"); 
     229        } 
     230         
     231    } 
    180232    /** 
    181233     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) 
    182234     */ 
    183     protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 
     235    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException  
     236    { 
    184237         
    185238         
     
    187240        String cgi_id = request.getParameter("id"); 
    188241        String cgi_download_id = request.getParameter("download-id"); 
     242        String cgi_convert_col = request.getParameter("convert-col"); 
    189243         
    190244        if (cgi_ids != null) { 
     
    259313            } 
    260314        } 
     315        else if (cgi_convert_col != null) { 
     316            // c=464226859&a=download&format=text 
     317            String cgi_c = request.getParameter("c"); 
     318            String cgi_a = request.getParameter("a"); 
     319            String cgi_format = request.getParameter("format"); 
     320             
     321            if ((cgi_c == null) || (cgi_a == null) || (cgi_format == null)) { 
     322                response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Malformed arguments.  Need 'c', 'a', and 'format'"); 
     323            } 
     324            else { 
     325                doCollectionToWorkset(response,cgi_c,cgi_a,cgi_format); 
     326            } 
     327             
     328        } 
    261329        else { 
    262330            PrintWriter pw = response.getWriter();