source: trunk/gsdl/src/w32server/httpreq.cpp@ 3810

Last change on this file since 3810 was 3810, checked in by sjboddie, 21 years ago

Removed some hard string length limits in local library server code

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.2 KB
Line 
1/**********************************************************************
2 *
3 * httpreq.cpp
4 * Copyright (C) 1996
5 *
6 * A component of the fnord webserver written by [email protected].
7 *
8 * Altered for use with the Greenstone digital library software by the
9 * New Zealand Digital Library Project at the University of Waikato,
10 * New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#include <windows.h>
29#include <stdlib.h>
30#include <stdio.h>
31#include <string.h>
32#include <memory.h>
33#include "httpreq.h"
34#include "parse.h"
35#include "netio.h"
36#include "settings.h"
37#include "httpsrv.h"
38#include "httpsend.h"
39#include "cgiwrapper.h"
40#include "d_winsock.h"
41
42/*
43Implementation Notes:
44
45HTTP field names, method and version strings are converted to upper case
46right after being read from the client in order to allow case insensitive
47string comparisons to be done on them. Since these fields are worked with a
48lot, this should help performance.
49*/
50
51//Private Data and declarations
52#define IO_BUFFER_SIZE 16384 //16K IO Buffer
53#define MAX_HTTP_LINE_LEN 1024 //Max length of line in a header of 1024
54
55//Private Function Declarations with Return Contstants
56
57/*
58Function Name: DispatchRequest
59Purpose: Manages having the request parsed, then sent to the right function
60 to send a response or handle an error.
61Parameters:
62 ClientSocket - Socket the client is on
63 ClientSockAddr - Address of client
64 AddrLen - Length of address of client
65 IOBuffer - Pointer to buffer allocated for IO operations
66 ThreadNum - Number of thread that called this function for debugging purposes
67Notes: I'm still playing with the keep alive support. I commented out
68 the stuff for giving a client a timeout because I was unable to detect
69 disconnects.
70More Notes: Not sure if this organization will allow me to easily add support
71 for ISAPI filter DLLs.
72*/
73void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer);
74
75/*
76Function Name: Get HTTP Headers
77Purpose: Manages having the request parsed, then sent to the right function
78 to send a response or handle an error.
79Parameters:
80 RequestInfo - Request information structure (see httpreq.h)
81 RequestFields - HTTP request fields structure (see httpreq.h)
82Returns: GH_ERROR on error (diconnect, bad data, Windows in a bad mood, etc.)
83 GH_UNKNOWN_VERSION if the version number is not HTTP/0.9 or HTTP/1.x
84 GH_SIMPLE_REQUEST on a properly formated HTTP/0.9 request
85 GH_10_REQUEST on a properly formated HTTP/1.x request
86*/
87int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
88#define GH_BAD_METHOD -2
89#define GH_ERROR -1
90#define GH_UNKNOWN_VERSION 0
91#define GH_SIMPLE_REQUEST 1
92#define GH_10_REQUEST 2
93
94/*
95Function Name: Clean Up HTTP Headers
96Purpose: Cleans up memory dynamicly allocated for headers
97Parameters:
98 RequestInfo - Request information structure (see httpreq.h)
99 RequestFields - HTTP request fields structure (see httpreq.h)
100Returns: Nothing
101*/
102void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
103
104/*
105Function Name: Split Query
106Purpose: Splits the file and query part of a URI. In other words, it
107 puts the parts before and after the "?" in differnet strings.
108Parameters:
109 URIStr - The requested URI
110 FileStr - String to contain the name of the path + file part of the URI
111 QueryStr - String to contain the query part of the URI
112Returns: TRUE if there is a query, else FALSE
113*/
114BOOL SplitQuery(char *URIStr, char *FileStr, char *QueryStr, int ThreadNum);
115
116/*
117Function Name: Get File
118Purpose: Attempts to find a given file, including looking for index.html.
119 Updates the given URI string so it points to the true document location
120Parameters:
121 FilePath - Path of file, may be modified to best reflect the retrived file
122 or directory
123 URIStr - URI string, minus the query
124Returns: GF_ERROR on error
125 GF_FILE_FOUND on success
126 GF_INDEX_FOUND if file is a directory with an index.html file in it
127 GF_DIRECTORY if file is a directory
128 GF_FILE_NOT_FOUND if file was found
129*/
130
131/*
132Function Name: Process Simple Request
133Purpose: Sends a reply to a HTTP 0.9 "simple" request
134Parameters:
135 ClientSocket - Socket the client is on
136 RequestInfo - Structure storing the parsed headers
137 IOBuffer - Pointer to buffer allocated for IO operations
138 TheadNum - Number of calling thread for debugging
139Notes: I should really test this and see if it works...
140*/
141void ProcessSimpleRequest(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
142
143//Public Functions
144/******************************************************************************/
145void RequestThread(RequestThreadMessageT *Parameters) {
146 SOCKADDR_IN ClientSockAddr;
147 SOCKET ClientSocket;
148 int AddrLen;
149 //Allocate an IO buffer for this thread
150 BYTE *IOBuffer = new BYTE[IO_BUFFER_SIZE];
151
152 //Get the parameters for the request
153 ClientSocket = Parameters->ClientSocket;
154 ClientSockAddr = Parameters->ClientSockAddr;
155 AddrLen = Parameters->AddrLen;
156 DispatchRequest(ClientSocket, ClientSockAddr, AddrLen, IOBuffer);
157}
158/******************************************************************************/
159
160//Private Functions
161
162/******************************************************************************/
163void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer) {
164 RequestInfoT RequestInfo;
165 RequestFieldsT RequestFields;
166
167 // TrayAddConnection();
168
169 //Setup the RequestInfo structure
170 memset(&RequestInfo, 0, sizeof(RequestInfoT));
171 RequestInfo.ThreadNum = 0;
172 RequestInfo.IOBuffer = IOBuffer;
173 RequestInfo.IOBufferSize = IO_BUFFER_SIZE;
174 RequestInfo.ClientSocket = ClientSocket;
175 RequestInfo.ClientSockAddr = ClientSockAddr;
176 RequestInfo.AddrLen = AddrLen;
177 RequestInfo.KeepAlive = FALSE;
178
179 int GetHeadersResult;
180 do {
181 //Get Headers
182 GetHeadersResult = GetHTTPHeaders(RequestInfo, RequestFields);
183
184 //Figure out what version we're dealing with and deal with it
185 switch (GetHeadersResult) {
186 case GH_SIMPLE_REQUEST :
187 SendHTTPError(400, "HTTP Request not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
188 // TrayIncNumServed();
189 break;
190 case GH_10_REQUEST :
191 ExamineURIStr(RequestFields.URIStr,&RequestInfo,&RequestFields);
192 // TrayIncNumServed();
193 break;
194 case GH_UNKNOWN_VERSION :
195 SendHTTPError(400, "HTTP Version not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
196 // TrayIncNumServed();
197 break;
198/* added Feb 2002 to handle stupid MS behaviour */
199 case GH_BAD_METHOD :
200 SendHTTPError(501, "Not implemented", "Only GET and POST currently implemented", RequestInfo, RequestFields);
201 break;
202 case GH_ERROR:
203 //Disconnect
204 RequestInfo.KeepAlive = FALSE;
205 break;
206 }
207 CleanUpHTTPHeaders(RequestInfo, RequestFields);
208 } while (0/*RequestInfo.KeepAlive == TRUE*/);
209 //Close connection
210 CloseSocket(RequestInfo.ClientSocket);
211 // TrayRemoveConnection();
212}
213
214/******************************************************************************/
215int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
216 //Parsing and IO buffers
217 text_t CurLine;
218 text_t NextLine;
219 text_t FieldNameStr;
220 text_t FieldValStr;
221
222 //Parsing and IO working vars
223 int ReadBufferIndex;
224 int DataInBuffer;
225 text_t::const_iterator next;
226 text_t::const_iterator end;
227
228 //Clear all the fields
229 memset(&RequestFields, 0, sizeof(RequestFieldsT));
230
231 ReadBufferIndex = 0;
232 DataInBuffer = 0;
233
234 //Get First Line
235 if (GetLine(CurLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
236 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
237 RequestInfo.ThreadNum) != 0) return GH_ERROR;
238 do {//Get Next Line, append it if the first charactor is space
239 if(GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
240 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
241 RequestInfo.ThreadNum) != 0) return GH_ERROR;
242 if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
243 CurLine += NextLine;
244 }
245 } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
246 //Method String (first word)
247 GetWord(RequestFields.MethodStr, CurLine.begin(), CurLine.end(), next);
248 uc(RequestFields.MethodStr);
249
250 /* Added Feb 2002 - IE since about version 5 send stupid frontpage requests
251 for MS Document formats eg "GET /_vti_inf.html" */
252 if (RequestFields.MethodStr == "OPTIONS") {
253 return GH_BAD_METHOD;
254 }
255 //Version String (last word)
256 GetLastWord(RequestFields.VersionStr, CurLine.begin(), CurLine.end(), end);
257 uc(RequestFields.VersionStr);
258 text_t::const_iterator versionbegin = RequestFields.VersionStr.begin();
259
260 if ((RequestFields.VersionStr.size() > 5) && (substr(versionbegin, versionbegin+5) != "HTTP/")) {
261 //No version, assume simple request
262 //part after method is URI
263 RequestFields.URIStr = CurLine;
264 return GH_SIMPLE_REQUEST;
265 }
266
267 //URI String (in between End of first and Start of last)
268 //<Method> <WhiteSpace> <URI> <WhiteSpace> <Version> <CRLF>
269 // next^ end^
270 text_t spacebuffer;
271 text_t::const_iterator here = next;
272 while (here != end) {
273 // do this to remove trailing space
274 if (*here == ' ' || *here == '\t') {
275 spacebuffer.push_back(*here);
276 } else {
277 if (!spacebuffer.empty()) {
278 RequestFields.URIStr += spacebuffer;
279 spacebuffer.clear();
280 }
281 RequestFields.URIStr.push_back(*here);
282 }
283 here++;
284 }
285
286 //Only accept requests from HTTP/0.9 or HTTP/1.X clients, we'll
287 //assume that anything else will require an upgrade or patch
288 if ((RequestFields.VersionStr.size() > 7) && (substr(versionbegin, versionbegin+7) != "HTTP/1.")) {
289 return GH_UNKNOWN_VERSION;
290 }
291
292 //Get the rest of the lines
293 CurLine = NextLine;
294
295 while (!CurLine.empty()) {//Blank Line, we're done
296 do {//Get Next Line, append it if the first charactor is space
297 if (GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
298 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
299 RequestInfo.ThreadNum) != 0)
300 return GH_ERROR;
301 if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
302 CurLine += NextLine;
303 }
304 } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
305
306 GetWord(FieldNameStr, CurLine.begin(), CurLine.end(), next);
307 uc(FieldNameStr);
308
309 FieldValStr = substr(next, CurLine.end());
310
311 //Process it
312 //In order of expected commonality
313 //All constants are in canonized, thus in upper case and case sensitive
314 //comparisons are used
315
316 //--Just About Always--
317 if (FieldNameStr == "ACCEPT:") {
318 if (!RequestFields.AcceptStr.empty()) {
319 RequestFields.AcceptStr += ", ";
320 }
321 RequestFields.AcceptStr += FieldValStr;
322 }
323 else if (FieldNameStr == "DATE:") {
324 RequestFields.DateStr = FieldValStr;
325 }
326 else if (FieldNameStr == "USER-AGENT:") {
327 RequestFields.UserAgentStr = FieldValStr;
328 }
329 else if (FieldNameStr == "CONNECTION:") {
330 RequestFields.ConnectionStr = FieldValStr;
331 }
332 //--Sometimes--
333 else if (FieldNameStr == "ACCEPT-LANGUAGE:") {
334 RequestFields.AcceptLangStr = FieldValStr;
335 }
336 else if (FieldNameStr == "REFERER:") {
337 RequestFields.RefererStr = FieldValStr;
338 }
339 else if (FieldNameStr == "IF-MODIFIED-SINCE:") {
340 RequestFields.IfModSinceStr = FieldValStr;
341 }
342 //--Uncommon--
343 else if (FieldNameStr == "FROM:") {
344 RequestFields.FromStr = FieldValStr;
345 }
346 else if (FieldNameStr == "MIME-VERSION:") {
347 RequestFields.MIMEVerStr = FieldValStr;
348 }
349 else if (FieldNameStr == "PRAGMA:") {
350 RequestFields.PragmaStr = FieldValStr;
351 }
352 //--Special case--
353 else if (FieldNameStr == "AUTHORIZATION:") {
354 RequestFields.AuthorizationStr = FieldValStr;
355 }
356 else if (FieldNameStr == "CONTENT-LENGTH:") {
357 RequestFields.ContentLengthStr = FieldValStr;
358 }
359 else if (FieldNameStr == "CONTENT-TYPE:") {
360 RequestFields.ContentTypeStr = FieldValStr;
361 }
362 else if (FieldNameStr == "CONTENT-ENCODING:") {
363 RequestFields.ContentEncodingStr = FieldValStr;
364 }
365 else if (!FieldNameStr.empty()) {
366 //Add it to the other headers
367
368 //Remove the colon
369 if (*(FieldNameStr.end()-1) == ':') {
370 FieldNameStr.pop_back();
371 }
372 RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Var = FieldNameStr;
373 RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Val = FieldValStr;
374 RequestFields.NumOtherHeaders++;
375 }
376 CurLine = NextLine;
377 }
378
379 if (!RequestFields.ContentLengthStr.empty()) { //Do we have attached data?
380 unsigned int NumRecv;
381
382 RequestFields.ContentLength = RequestFields.ContentLengthStr.getint();
383 if (RequestFields.ContentLength > 0) {
384
385 //Allocate memory
386 RequestFields.Content = new BYTE[RequestFields.ContentLength];
387
388 //Get rest of data from get lines
389 NumRecv = DataInBuffer - ReadBufferIndex;
390
391 if (NumRecv >RequestFields.ContentLength) {
392 //Overflow, only read what they said they'd send
393 NumRecv = RequestFields.ContentLength;
394 }
395 memcpy(RequestFields.Content, RequestInfo.IOBuffer + ReadBufferIndex,
396 NumRecv);
397
398 while (NumRecv < RequestFields.ContentLength) {
399 NumRecv += GetData(RequestInfo.ClientSocket,
400 RequestFields.Content + NumRecv,
401 RequestFields.ContentLength - NumRecv,
402 RequestInfo.ThreadNum);
403 if (NumRecv < 0) return GH_ERROR;
404 }
405
406 // It seems to be important on NT that all available data was read
407 // from the socket before the socket is closed (otherwise netscape
408 // throws a "connection reset by peer" error). Since netscape seems
409 // to send a few extra bytes in certain situations we'll make sure we
410 // slurp it all up here.
411 char *tmpbuffer = new char(100);
412 // unsigned long int nonblockmode=1;
413 // ioctlsocket(RequestInfo.ClientSocket, FIONBIO, &nonblockmode);
414 d_recv(RequestInfo.ClientSocket, tmpbuffer, 100, 0);
415 delete tmpbuffer;
416
417 }
418 else {
419 RequestFields.Content = NULL;
420 RequestFields.ContentLength = 0;
421 }
422 }
423 else {
424 RequestFields.Content = NULL;
425 RequestFields.ContentLength = 0;
426 }
427
428 return GH_10_REQUEST;
429}
430
431/******************************************************************************/
432void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
433 //Clean up memory allocated for the Content
434 if (RequestFields.Content != NULL) {
435 delete[] RequestFields.Content;
436 }
437
438 // clean up memory allocated for the IOBuffer
439 if (RequestInfo.IOBuffer != NULL) {
440 delete[] RequestInfo.IOBuffer;
441 RequestInfo.IOBuffer = NULL;
442 }
443}
Note: See TracBrowser for help on using the repository browser.