Changeset 31347 for other-projects


Ignore:
Timestamp:
2017-01-24T13:40:12+13:00 (7 years ago)
Author:
davidb
Message:

First stage of developing HT collection to HTRC workset. Code to allow this server to proxy the download function of the HT server

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/vol-checker/src/org/hathitrust/extractedfeatures/VolumeCheck.java

    r31341 r31347  
    44import java.io.BufferedOutputStream;
    55import java.io.BufferedReader;
     6import java.io.DataOutputStream;
    67import java.io.FileInputStream;
    78import java.io.FileReader;
     
    1213import java.io.PrintWriter;
    1314import java.io.UnsupportedEncodingException;
     15import java.net.HttpURLConnection;
     16import java.net.URL;
     17import java.nio.charset.StandardCharsets;
    1418import java.nio.file.Files;
    1519import java.nio.file.Path;
     
    2529import javax.servlet.http.HttpServletResponse;
    2630
     31
    2732/**
    2833 * Servlet implementation class VolumeCheck
     
    3641
    3742    protected static final String file_ext = ".json.bz2";
    38 
     43    protected static final String ht_col_url = "https://babel.hathitrust.org/cgi/mb";
     44       
    3945    public VolumeCheck()
    4046    {}
     
    178184       
    179185    }
     186   
     187    protected void doCollectionToWorkset(HttpServletResponse response, String c, String a, String format) throws IOException
     188    {
     189        String post_url_params  = "c="+c+"&a="+a+"&format="+format;
     190       
     191        byte[] post_data       = post_url_params.getBytes(StandardCharsets.UTF_8);
     192        int    post_data_len = post_data.length;
     193       
     194        try {
     195           
     196            URL post_url = new URL(ht_col_url);
     197            HttpURLConnection conn = (HttpURLConnection) post_url.openConnection();           
     198            conn.setDoOutput(true);
     199            conn.setInstanceFollowRedirects(false);
     200            conn.setRequestMethod("POST");
     201            conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
     202            conn.setRequestProperty("charset", "utf-8");
     203            conn.setRequestProperty("Content-Length", Integer.toString(post_data_len));
     204            conn.setUseCaches(false);
     205           
     206            try(DataOutputStream dos = new DataOutputStream(conn.getOutputStream())) {
     207               dos.write(post_data);
     208            }
     209            // try-resource auto-closes stream
     210           
     211            InputStream is = conn.getInputStream();
     212            InputStreamReader isr = new InputStreamReader(is);
     213            BufferedReader reader = new BufferedReader(isr);
     214           
     215            StringBuilder sb = new StringBuilder();
     216            String line = null;
     217            while ((line = reader.readLine()) != null)
     218            {
     219                sb.append(line + "\n");
     220            }
     221             
     222            response.setContentType("text/plain");
     223            PrintWriter pw = response.getWriter();
     224            pw.append(sb.toString());
     225        }
     226        catch (Exception e) {
     227            e.printStackTrace();
     228            response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Failed to convert HT collection to HTRC workset");
     229        }
     230       
     231    }
    180232    /**
    181233     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
    182234     */
    183     protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
     235    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException
     236    {
    184237       
    185238       
     
    187240        String cgi_id = request.getParameter("id");
    188241        String cgi_download_id = request.getParameter("download-id");
     242        String cgi_convert_col = request.getParameter("convert-col");
    189243       
    190244        if (cgi_ids != null) {
     
    259313            }
    260314        }
     315        else if (cgi_convert_col != null) {
     316            // c=464226859&a=download&format=text
     317            String cgi_c = request.getParameter("c");
     318            String cgi_a = request.getParameter("a");
     319            String cgi_format = request.getParameter("format");
     320           
     321            if ((cgi_c == null) || (cgi_a == null) || (cgi_format == null)) {
     322                response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Malformed arguments.  Need 'c', 'a', and 'format'");
     323            }
     324            else {
     325                doCollectionToWorkset(response,cgi_c,cgi_a,cgi_format);
     326            }
     327           
     328        }
    261329        else {
    262330            PrintWriter pw = response.getWriter();
Note: See TracChangeset for help on using the changeset viewer.