source: main/trunk/greenstone2/runtime-src/src/w32server/httpreq.cpp@ 30565

Last change on this file since 30565 was 18313, checked in by davidb, 15 years ago

Fixed bug to do with initialization of RequestFields variable. Had been using a memset to make it all zero, but this is not a safe thing to do when there are text_t fields in the class. Code upgraded to use a constructor and 'reset' function that explicitly initializes and clears the text_t fields

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 16.6 KB
Line 
1/**********************************************************************
2 *
3 * httpreq.cpp
4 * Copyright (C) 1996
5 *
6 * A component of the fnord webserver written by [email protected].
7 *
8 * Altered for use with the Greenstone digital library software by the
9 * New Zealand Digital Library Project at the University of Waikato,
10 * New Zealand.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 *********************************************************************/
27
28#include <windows.h>
29#include <stdlib.h>
30#include <stdio.h>
31#include <string.h>
32#include <memory.h>
33#include "httpreq.h"
34#include "parse.h"
35#include "netio.h"
36#include "settings.h"
37#include "httpsrv.h"
38#include "httpsend.h"
39#include "cgiwrapper.h"
40#include "d_winsock.h"
41
42/*
43Implementation Notes:
44
45HTTP field names, method and version strings are converted to upper case
46right after being read from the client in order to allow case insensitive
47string comparisons to be done on them. Since these fields are worked with a
48lot, this should help performance.
49*/
50
51//Private Data and declarations
52#define IO_BUFFER_SIZE 16384 //16K IO Buffer
53#define MAX_HTTP_LINE_LEN 1024 //Max length of line in a header of 1024
54
55//Private Function Declarations with Return Contstants
56
57/*
58Function Name: DispatchRequest
59Purpose: Manages having the request parsed, then sent to the right function
60 to send a response or handle an error.
61Parameters:
62 ClientSocket - Socket the client is on
63 ClientSockAddr - Address of client
64 AddrLen - Length of address of client
65 IOBuffer - Pointer to buffer allocated for IO operations
66 ThreadNum - Number of thread that called this function for debugging purposes
67Notes: I'm still playing with the keep alive support. I commented out
68 the stuff for giving a client a timeout because I was unable to detect
69 disconnects.
70More Notes: Not sure if this organization will allow me to easily add support
71 for ISAPI filter DLLs.
72*/
73void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer);
74
75/*
76Function Name: Get HTTP Headers
77Purpose: Manages having the request parsed, then sent to the right function
78 to send a response or handle an error.
79Parameters:
80 RequestInfo - Request information structure (see httpreq.h)
81 RequestFields - HTTP request fields structure (see httpreq.h)
82Returns: GH_ERROR on error (diconnect, bad data, Windows in a bad mood, etc.)
83 GH_UNKNOWN_VERSION if the version number is not HTTP/0.9 or HTTP/1.x
84 GH_SIMPLE_REQUEST on a properly formated HTTP/0.9 request
85 GH_10_REQUEST on a properly formated HTTP/1.x request
86*/
87int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
88#define GH_BAD_METHOD -2
89#define GH_ERROR -1
90#define GH_UNKNOWN_VERSION 0
91#define GH_SIMPLE_REQUEST 1
92#define GH_10_REQUEST 2
93
94/*
95Function Name: Clean Up HTTP Headers
96Purpose: Cleans up memory dynamicly allocated for headers
97Parameters:
98 RequestInfo - Request information structure (see httpreq.h)
99 RequestFields - HTTP request fields structure (see httpreq.h)
100Returns: Nothing
101*/
102void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
103
104/*
105Function Name: Split Query
106Purpose: Splits the file and query part of a URI. In other words, it
107 puts the parts before and after the "?" in differnet strings.
108Parameters:
109 URIStr - The requested URI
110 FileStr - String to contain the name of the path + file part of the URI
111 QueryStr - String to contain the query part of the URI
112Returns: TRUE if there is a query, else FALSE
113*/
114BOOL SplitQuery(char *URIStr, char *FileStr, char *QueryStr, int ThreadNum);
115
116/*
117Function Name: Get File
118Purpose: Attempts to find a given file, including looking for index.html.
119 Updates the given URI string so it points to the true document location
120Parameters:
121 FilePath - Path of file, may be modified to best reflect the retrived file
122 or directory
123 URIStr - URI string, minus the query
124Returns: GF_ERROR on error
125 GF_FILE_FOUND on success
126 GF_INDEX_FOUND if file is a directory with an index.html file in it
127 GF_DIRECTORY if file is a directory
128 GF_FILE_NOT_FOUND if file was found
129*/
130
131/*
132Function Name: Process Simple Request
133Purpose: Sends a reply to a HTTP 0.9 "simple" request
134Parameters:
135 ClientSocket - Socket the client is on
136 RequestInfo - Structure storing the parsed headers
137 IOBuffer - Pointer to buffer allocated for IO operations
138 TheadNum - Number of calling thread for debugging
139Notes: I should really test this and see if it works...
140*/
141void ProcessSimpleRequest(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields);
142
143//Public Functions
144/******************************************************************************/
145void RequestThread(RequestThreadMessageT *Parameters) {
146 SOCKADDR_IN ClientSockAddr;
147 SOCKET ClientSocket;
148 int AddrLen;
149 //Allocate an IO buffer for this thread
150 BYTE *IOBuffer = new BYTE[IO_BUFFER_SIZE];
151
152 //Get the parameters for the request
153 ClientSocket = Parameters->ClientSocket;
154 ClientSockAddr = Parameters->ClientSockAddr;
155 AddrLen = Parameters->AddrLen;
156 DispatchRequest(ClientSocket, ClientSockAddr, AddrLen, IOBuffer);
157}
158/******************************************************************************/
159
160//Private Functions
161
162/******************************************************************************/
163void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer) {
164 RequestInfoT RequestInfo;
165 RequestFieldsT RequestFields;
166
167 // TrayAddConnection();
168
169 //Setup the RequestInfo structure
170 memset(&RequestInfo, 0, sizeof(RequestInfoT));
171 RequestInfo.ThreadNum = 0;
172 RequestInfo.IOBuffer = IOBuffer;
173 RequestInfo.IOBufferSize = IO_BUFFER_SIZE;
174 RequestInfo.ClientSocket = ClientSocket;
175 RequestInfo.ClientSockAddr = ClientSockAddr;
176 RequestInfo.AddrLen = AddrLen;
177 RequestInfo.KeepAlive = FALSE;
178
179 int GetHeadersResult;
180 do {
181 //Get Headers
182 GetHeadersResult = GetHTTPHeaders(RequestInfo, RequestFields);
183
184 //Figure out what version we're dealing with and deal with it
185 switch (GetHeadersResult) {
186 case GH_SIMPLE_REQUEST :
187 SendHTTPError(400, "HTTP Request not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
188 // TrayIncNumServed();
189 break;
190 case GH_10_REQUEST :
191 ExamineURIStr(RequestFields.URIStr,&RequestInfo,&RequestFields);
192 // TrayIncNumServed();
193 break;
194 case GH_UNKNOWN_VERSION :
195 SendHTTPError(400, "HTTP Version not supported", "Only 1.x requests supported", RequestInfo, RequestFields);
196 // TrayIncNumServed();
197 break;
198/* added Feb 2002 to handle stupid MS behaviour */
199 case GH_BAD_METHOD :
200 SendHTTPError(501, "Not implemented", "Only GET and POST currently implemented", RequestInfo, RequestFields);
201 break;
202 case GH_ERROR:
203 //Disconnect
204 RequestInfo.KeepAlive = FALSE;
205 break;
206 }
207 CleanUpHTTPHeaders(RequestInfo, RequestFields);
208 } while (0/*RequestInfo.KeepAlive == TRUE*/);
209 //Close connection
210 CloseSocket(RequestInfo.ClientSocket);
211 // TrayRemoveConnection();
212}
213
214void resetRequestFieldsT(RequestFieldsT& RequestFields)
215{
216 // Can't just 'memset' the text_t fields (as used to be done),
217 // as this could lead to inconsistent state in text_t fields
218 // ultimately resulting in a segmentation fault
219 // => need to go through an explicitly reset them
220
221 //Simple request line info v0.9
222 RequestFields.MethodStr.clear();
223 RequestFields.URIStr.clear();
224 //added v1.0
225 RequestFields.VersionStr.clear();
226 //General Header
227 RequestFields.DateStr.clear();
228 RequestFields.MIMEVerStr.clear();
229 RequestFields.PragmaStr.clear();
230 //Request Header
231 RequestFields.AuthorizationStr.clear();
232 RequestFields.FromStr.clear();
233 RequestFields.IfModSinceStr.clear();
234 RequestFields.RefererStr.clear();
235 RequestFields.UserAgentStr.clear();
236 //Entity Header (Only CGI stuff)
237 RequestFields.ContentEncodingStr.clear();
238 RequestFields.ContentTypeStr.clear();
239 RequestFields.ContentLengthStr.clear();
240 //v1.0 Optional (the more common ones)
241 RequestFields.AcceptStr.clear();
242 RequestFields.AcceptLangStr.clear();
243 //v1.1 Exentions
244 RequestFields.ConnectionStr.clear();
245
246 //Reset all the non text_t fields
247 RequestFields.ContentLength = 0;
248 RequestFields.Content = NULL;
249 RequestFields.NumOtherHeaders = 0;
250
251 for (int i=0 ; i<MAX_OTHER_HEADERS; i++) {
252 RequestFields.OtherHeaders[i].Var.clear();
253 RequestFields.OtherHeaders[i].Val.clear();
254 }
255}
256
257/******************************************************************************/
258int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
259 //Parsing and IO buffers
260 text_t CurLine;
261 text_t NextLine;
262 text_t FieldNameStr;
263 text_t FieldValStr;
264
265 //Parsing and IO working vars
266 int ReadBufferIndex;
267 int DataInBuffer;
268 text_t::const_iterator next;
269 text_t::const_iterator end;
270
271 resetRequestFieldsT(RequestFields);
272
273 ReadBufferIndex = 0;
274 DataInBuffer = 0;
275
276 //Get First Line
277 if (GetLine(CurLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
278 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
279 RequestInfo.ThreadNum) != 0) return GH_ERROR;
280 do {//Get Next Line, append it if the first charactor is space
281 if(GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
282 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
283 RequestInfo.ThreadNum) != 0) return GH_ERROR;
284 if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
285 CurLine += NextLine;
286 }
287 } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
288 //Method String (first word)
289 GetWord(RequestFields.MethodStr, CurLine.begin(), CurLine.end(), next);
290 uc(RequestFields.MethodStr);
291
292 /* Added Feb 2002 - IE since about version 5 send stupid frontpage requests
293 for MS Document formats eg "GET /_vti_inf.html" */
294 if (RequestFields.MethodStr == "OPTIONS") {
295 return GH_BAD_METHOD;
296 }
297 //Version String (last word)
298 GetLastWord(RequestFields.VersionStr, CurLine.begin(), CurLine.end(), end);
299 uc(RequestFields.VersionStr);
300 text_t::const_iterator versionbegin = RequestFields.VersionStr.begin();
301
302 if ((RequestFields.VersionStr.size() > 5) && (substr(versionbegin, versionbegin+5) != "HTTP/")) {
303 //No version, assume simple request
304 //part after method is URI
305 RequestFields.URIStr = CurLine;
306 return GH_SIMPLE_REQUEST;
307 }
308
309 //URI String (in between End of first and Start of last)
310 //<Method> <WhiteSpace> <URI> <WhiteSpace> <Version> <CRLF>
311 // next^ end^
312 text_t spacebuffer;
313 text_t::const_iterator here = next;
314 while (here != end) {
315 // do this to remove trailing space
316 if (*here == ' ' || *here == '\t') {
317 spacebuffer.push_back(*here);
318 } else {
319 if (!spacebuffer.empty()) {
320 RequestFields.URIStr += spacebuffer;
321 spacebuffer.clear();
322 }
323 RequestFields.URIStr.push_back(*here);
324 }
325 ++here;
326 }
327
328 //Only accept requests from HTTP/0.9 or HTTP/1.X clients, we'll
329 //assume that anything else will require an upgrade or patch
330 if ((RequestFields.VersionStr.size() > 7) && (substr(versionbegin, versionbegin+7) != "HTTP/1.")) {
331 return GH_UNKNOWN_VERSION;
332 }
333
334 //Get the rest of the lines
335 CurLine = NextLine;
336
337 while (!CurLine.empty()) {//Blank Line, we're done
338 do {//Get Next Line, append it if the first charactor is space
339 if (GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer,
340 RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer,
341 RequestInfo.ThreadNum) != 0)
342 return GH_ERROR;
343 if (NextLine.empty())
344 break;
345 if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) {
346 CurLine += NextLine;
347 }
348 } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t'));
349
350 GetWord(FieldNameStr, CurLine.begin(), CurLine.end(), next);
351 uc(FieldNameStr);
352
353 FieldValStr = substr(next, CurLine.end());
354
355 //Process it
356 //In order of expected commonality
357 //All constants are in canonized, thus in upper case and case sensitive
358 //comparisons are used
359
360 //--Just About Always--
361 if (FieldNameStr == "ACCEPT:") {
362 if (!RequestFields.AcceptStr.empty()) {
363 RequestFields.AcceptStr += ", ";
364 }
365 RequestFields.AcceptStr += FieldValStr;
366 }
367 else if (FieldNameStr == "DATE:") {
368 RequestFields.DateStr = FieldValStr;
369 }
370 else if (FieldNameStr == "USER-AGENT:") {
371 RequestFields.UserAgentStr = FieldValStr;
372 }
373 else if (FieldNameStr == "CONNECTION:") {
374 RequestFields.ConnectionStr = FieldValStr;
375 }
376 //--Sometimes--
377 else if (FieldNameStr == "ACCEPT-LANGUAGE:") {
378 RequestFields.AcceptLangStr = FieldValStr;
379 }
380 else if (FieldNameStr == "REFERER:") {
381 RequestFields.RefererStr = FieldValStr;
382 }
383 else if (FieldNameStr == "IF-MODIFIED-SINCE:") {
384 RequestFields.IfModSinceStr = FieldValStr;
385 }
386 //--Uncommon--
387 else if (FieldNameStr == "FROM:") {
388 RequestFields.FromStr = FieldValStr;
389 }
390 else if (FieldNameStr == "MIME-VERSION:") {
391 RequestFields.MIMEVerStr = FieldValStr;
392 }
393 else if (FieldNameStr == "PRAGMA:") {
394 RequestFields.PragmaStr = FieldValStr;
395 }
396 //--Special case--
397 else if (FieldNameStr == "AUTHORIZATION:") {
398 RequestFields.AuthorizationStr = FieldValStr;
399 }
400 else if (FieldNameStr == "CONTENT-LENGTH:") {
401 RequestFields.ContentLengthStr = FieldValStr;
402 }
403 else if (FieldNameStr == "CONTENT-TYPE:") {
404 RequestFields.ContentTypeStr = FieldValStr;
405 }
406 else if (FieldNameStr == "CONTENT-ENCODING:") {
407 RequestFields.ContentEncodingStr = FieldValStr;
408 }
409 else if (!FieldNameStr.empty()) {
410 //Add it to the other headers
411
412 //Remove the colon
413 if (*(FieldNameStr.end()-1) == ':') {
414 FieldNameStr.pop_back();
415 }
416 RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Var = FieldNameStr;
417 RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Val = FieldValStr;
418 ++RequestFields.NumOtherHeaders;
419 }
420 CurLine = NextLine;
421 }
422
423 if (!RequestFields.ContentLengthStr.empty()) { //Do we have attached data?
424 unsigned int NumRecv;
425
426 RequestFields.ContentLength = RequestFields.ContentLengthStr.getint();
427 if (RequestFields.ContentLength > 0) {
428
429 //Allocate memory
430 RequestFields.Content = new BYTE[RequestFields.ContentLength];
431
432 //Get rest of data from get lines
433 NumRecv = DataInBuffer - ReadBufferIndex;
434
435 if (NumRecv >RequestFields.ContentLength) {
436 //Overflow, only read what they said they'd send
437 NumRecv = RequestFields.ContentLength;
438 }
439 memcpy(RequestFields.Content, RequestInfo.IOBuffer + ReadBufferIndex,
440 NumRecv);
441
442 while (NumRecv < RequestFields.ContentLength) {
443 NumRecv += GetData(RequestInfo.ClientSocket,
444 RequestFields.Content + NumRecv,
445 RequestFields.ContentLength - NumRecv,
446 RequestInfo.ThreadNum);
447 if (NumRecv < 0) return GH_ERROR;
448 }
449
450 // It seems to be important on NT that all available data was read
451 // from the socket before the socket is closed (otherwise netscape
452 // throws a "connection reset by peer" error). Since netscape seems
453 // to send a few extra bytes in certain situations we'll make sure we
454 // slurp it all up here.
455 char *tmpbuffer = new char[100]; // this had new char(100)????
456 // unsigned long int nonblockmode=1;
457 // ioctlsocket(RequestInfo.ClientSocket, FIONBIO, &nonblockmode);
458 d_recv(RequestInfo.ClientSocket, tmpbuffer, 100, 0);
459 delete []tmpbuffer;
460
461 }
462 else {
463 RequestFields.Content = NULL;
464 RequestFields.ContentLength = 0;
465 }
466 }
467 else {
468 RequestFields.Content = NULL;
469 RequestFields.ContentLength = 0;
470 }
471
472 return GH_10_REQUEST;
473}
474
475/******************************************************************************/
476void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) {
477 //Clean up memory allocated for the Content
478 if (RequestFields.Content != NULL) {
479 delete[] RequestFields.Content;
480 }
481
482 // clean up memory allocated for the IOBuffer
483 if (RequestInfo.IOBuffer != NULL) {
484 delete[] RequestInfo.IOBuffer;
485 RequestInfo.IOBuffer = NULL;
486 }
487}
Note: See TracBrowser for help on using the repository browser.