/********************************************************************** * * httpreq.cpp * Copyright (C) 1996 * * A component of the fnord webserver written by bmorin@wpi.edu. * * Altered for use with the Greenstone digital library software by the * New Zealand Digital Library Project at the University of Waikato, * New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include #include #include #include #include #include "httpreq.h" #include "parse.h" #include "netio.h" #include "settings.h" #include "httpsrv.h" #include "httpsend.h" #include "cgiwrapper.h" #include "d_winsock.h" /* Implementation Notes: HTTP field names, method and version strings are converted to upper case right after being read from the client in order to allow case insensitive string comparisons to be done on them. Since these fields are worked with a lot, this should help performance. */ //Private Data and declarations #define IO_BUFFER_SIZE 16384 //16K IO Buffer #define MAX_HTTP_LINE_LEN 1024 //Max length of line in a header of 1024 //Private Function Declarations with Return Contstants /* Function Name: DispatchRequest Purpose: Manages having the request parsed, then sent to the right function to send a response or handle an error. Parameters: ClientSocket - Socket the client is on ClientSockAddr - Address of client AddrLen - Length of address of client IOBuffer - Pointer to buffer allocated for IO operations ThreadNum - Number of thread that called this function for debugging purposes Notes: I'm still playing with the keep alive support. I commented out the stuff for giving a client a timeout because I was unable to detect disconnects. More Notes: Not sure if this organization will allow me to easily add support for ISAPI filter DLLs. */ void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer); /* Function Name: Get HTTP Headers Purpose: Manages having the request parsed, then sent to the right function to send a response or handle an error. Parameters: RequestInfo - Request information structure (see httpreq.h) RequestFields - HTTP request fields structure (see httpreq.h) Returns: GH_ERROR on error (diconnect, bad data, Windows in a bad mood, etc.) GH_UNKNOWN_VERSION if the version number is not HTTP/0.9 or HTTP/1.x GH_SIMPLE_REQUEST on a properly formated HTTP/0.9 request GH_10_REQUEST on a properly formated HTTP/1.x request */ int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields); #define GH_BAD_METHOD -2 #define GH_ERROR -1 #define GH_UNKNOWN_VERSION 0 #define GH_SIMPLE_REQUEST 1 #define GH_10_REQUEST 2 /* Function Name: Clean Up HTTP Headers Purpose: Cleans up memory dynamicly allocated for headers Parameters: RequestInfo - Request information structure (see httpreq.h) RequestFields - HTTP request fields structure (see httpreq.h) Returns: Nothing */ void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields); /* Function Name: Split Query Purpose: Splits the file and query part of a URI. In other words, it puts the parts before and after the "?" in differnet strings. Parameters: URIStr - The requested URI FileStr - String to contain the name of the path + file part of the URI QueryStr - String to contain the query part of the URI Returns: TRUE if there is a query, else FALSE */ BOOL SplitQuery(char *URIStr, char *FileStr, char *QueryStr, int ThreadNum); /* Function Name: Get File Purpose: Attempts to find a given file, including looking for index.html. Updates the given URI string so it points to the true document location Parameters: FilePath - Path of file, may be modified to best reflect the retrived file or directory URIStr - URI string, minus the query Returns: GF_ERROR on error GF_FILE_FOUND on success GF_INDEX_FOUND if file is a directory with an index.html file in it GF_DIRECTORY if file is a directory GF_FILE_NOT_FOUND if file was found */ /* Function Name: Process Simple Request Purpose: Sends a reply to a HTTP 0.9 "simple" request Parameters: ClientSocket - Socket the client is on RequestInfo - Structure storing the parsed headers IOBuffer - Pointer to buffer allocated for IO operations TheadNum - Number of calling thread for debugging Notes: I should really test this and see if it works... */ void ProcessSimpleRequest(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields); //Public Functions /******************************************************************************/ void RequestThread(RequestThreadMessageT *Parameters) { SOCKADDR_IN ClientSockAddr; SOCKET ClientSocket; int AddrLen; //Allocate an IO buffer for this thread BYTE *IOBuffer = new BYTE[IO_BUFFER_SIZE]; //Get the parameters for the request ClientSocket = Parameters->ClientSocket; ClientSockAddr = Parameters->ClientSockAddr; AddrLen = Parameters->AddrLen; DispatchRequest(ClientSocket, ClientSockAddr, AddrLen, IOBuffer); } /******************************************************************************/ //Private Functions /******************************************************************************/ void DispatchRequest(SOCKET ClientSocket, SOCKADDR_IN ClientSockAddr, int AddrLen, BYTE *IOBuffer) { RequestInfoT RequestInfo; RequestFieldsT RequestFields; // TrayAddConnection(); //Setup the RequestInfo structure memset(&RequestInfo, 0, sizeof(RequestInfoT)); RequestInfo.ThreadNum = 0; RequestInfo.IOBuffer = IOBuffer; RequestInfo.IOBufferSize = IO_BUFFER_SIZE; RequestInfo.ClientSocket = ClientSocket; RequestInfo.ClientSockAddr = ClientSockAddr; RequestInfo.AddrLen = AddrLen; RequestInfo.KeepAlive = FALSE; int GetHeadersResult; do { //Get Headers GetHeadersResult = GetHTTPHeaders(RequestInfo, RequestFields); //Figure out what version we're dealing with and deal with it switch (GetHeadersResult) { case GH_SIMPLE_REQUEST : SendHTTPError(400, "HTTP Request not supported", "Only 1.x requests supported", RequestInfo, RequestFields); // TrayIncNumServed(); break; case GH_10_REQUEST : ExamineURIStr(RequestFields.URIStr,&RequestInfo,&RequestFields); // TrayIncNumServed(); break; case GH_UNKNOWN_VERSION : SendHTTPError(400, "HTTP Version not supported", "Only 1.x requests supported", RequestInfo, RequestFields); // TrayIncNumServed(); break; /* added Feb 2002 to handle stupid MS behaviour */ case GH_BAD_METHOD : SendHTTPError(501, "Not implemented", "Only GET and POST currently implemented", RequestInfo, RequestFields); break; case GH_ERROR: //Disconnect RequestInfo.KeepAlive = FALSE; break; } CleanUpHTTPHeaders(RequestInfo, RequestFields); } while (0/*RequestInfo.KeepAlive == TRUE*/); //Close connection CloseSocket(RequestInfo.ClientSocket); // TrayRemoveConnection(); } /******************************************************************************/ int GetHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) { //Parsing and IO buffers text_t CurLine; text_t NextLine; text_t FieldNameStr; text_t FieldValStr; //Parsing and IO working vars int ReadBufferIndex; int DataInBuffer; text_t::const_iterator next; text_t::const_iterator end; //Clear all the fields memset(&RequestFields, 0, sizeof(RequestFieldsT)); ReadBufferIndex = 0; DataInBuffer = 0; //Get First Line if (GetLine(CurLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer, RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer, RequestInfo.ThreadNum) != 0) return GH_ERROR; do {//Get Next Line, append it if the first charactor is space if(GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer, RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer, RequestInfo.ThreadNum) != 0) return GH_ERROR; if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) { CurLine += NextLine; } } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')); //Method String (first word) GetWord(RequestFields.MethodStr, CurLine.begin(), CurLine.end(), next); uc(RequestFields.MethodStr); /* Added Feb 2002 - IE since about version 5 send stupid frontpage requests for MS Document formats eg "GET /_vti_inf.html" */ if (RequestFields.MethodStr == "OPTIONS") { return GH_BAD_METHOD; } //Version String (last word) GetLastWord(RequestFields.VersionStr, CurLine.begin(), CurLine.end(), end); uc(RequestFields.VersionStr); text_t::const_iterator versionbegin = RequestFields.VersionStr.begin(); if ((RequestFields.VersionStr.size() > 5) && (substr(versionbegin, versionbegin+5) != "HTTP/")) { //No version, assume simple request //part after method is URI RequestFields.URIStr = CurLine; return GH_SIMPLE_REQUEST; } //URI String (in between End of first and Start of last) // // next^ end^ text_t spacebuffer; text_t::const_iterator here = next; while (here != end) { // do this to remove trailing space if (*here == ' ' || *here == '\t') { spacebuffer.push_back(*here); } else { if (!spacebuffer.empty()) { RequestFields.URIStr += spacebuffer; spacebuffer.clear(); } RequestFields.URIStr.push_back(*here); } ++here; } //Only accept requests from HTTP/0.9 or HTTP/1.X clients, we'll //assume that anything else will require an upgrade or patch if ((RequestFields.VersionStr.size() > 7) && (substr(versionbegin, versionbegin+7) != "HTTP/1.")) { return GH_UNKNOWN_VERSION; } //Get the rest of the lines CurLine = NextLine; while (!CurLine.empty()) {//Blank Line, we're done do {//Get Next Line, append it if the first charactor is space if (GetLine(NextLine, RequestInfo.ClientSocket, RequestInfo.IOBuffer, RequestInfo.IOBufferSize, ReadBufferIndex, DataInBuffer, RequestInfo.ThreadNum) != 0) return GH_ERROR; if (NextLine.empty()) break; if ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')) { CurLine += NextLine; } } while ((*(NextLine.begin()) == ' ') || (*(NextLine.begin()) == '\t')); GetWord(FieldNameStr, CurLine.begin(), CurLine.end(), next); uc(FieldNameStr); FieldValStr = substr(next, CurLine.end()); //Process it //In order of expected commonality //All constants are in canonized, thus in upper case and case sensitive //comparisons are used //--Just About Always-- if (FieldNameStr == "ACCEPT:") { if (!RequestFields.AcceptStr.empty()) { RequestFields.AcceptStr += ", "; } RequestFields.AcceptStr += FieldValStr; } else if (FieldNameStr == "DATE:") { RequestFields.DateStr = FieldValStr; } else if (FieldNameStr == "USER-AGENT:") { RequestFields.UserAgentStr = FieldValStr; } else if (FieldNameStr == "CONNECTION:") { RequestFields.ConnectionStr = FieldValStr; } //--Sometimes-- else if (FieldNameStr == "ACCEPT-LANGUAGE:") { RequestFields.AcceptLangStr = FieldValStr; } else if (FieldNameStr == "REFERER:") { RequestFields.RefererStr = FieldValStr; } else if (FieldNameStr == "IF-MODIFIED-SINCE:") { RequestFields.IfModSinceStr = FieldValStr; } //--Uncommon-- else if (FieldNameStr == "FROM:") { RequestFields.FromStr = FieldValStr; } else if (FieldNameStr == "MIME-VERSION:") { RequestFields.MIMEVerStr = FieldValStr; } else if (FieldNameStr == "PRAGMA:") { RequestFields.PragmaStr = FieldValStr; } //--Special case-- else if (FieldNameStr == "AUTHORIZATION:") { RequestFields.AuthorizationStr = FieldValStr; } else if (FieldNameStr == "CONTENT-LENGTH:") { RequestFields.ContentLengthStr = FieldValStr; } else if (FieldNameStr == "CONTENT-TYPE:") { RequestFields.ContentTypeStr = FieldValStr; } else if (FieldNameStr == "CONTENT-ENCODING:") { RequestFields.ContentEncodingStr = FieldValStr; } else if (!FieldNameStr.empty()) { //Add it to the other headers //Remove the colon if (*(FieldNameStr.end()-1) == ':') { FieldNameStr.pop_back(); } RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Var = FieldNameStr; RequestFields.OtherHeaders[RequestFields.NumOtherHeaders].Val = FieldValStr; ++RequestFields.NumOtherHeaders; } CurLine = NextLine; } if (!RequestFields.ContentLengthStr.empty()) { //Do we have attached data? unsigned int NumRecv; RequestFields.ContentLength = RequestFields.ContentLengthStr.getint(); if (RequestFields.ContentLength > 0) { //Allocate memory RequestFields.Content = new BYTE[RequestFields.ContentLength]; //Get rest of data from get lines NumRecv = DataInBuffer - ReadBufferIndex; if (NumRecv >RequestFields.ContentLength) { //Overflow, only read what they said they'd send NumRecv = RequestFields.ContentLength; } memcpy(RequestFields.Content, RequestInfo.IOBuffer + ReadBufferIndex, NumRecv); while (NumRecv < RequestFields.ContentLength) { NumRecv += GetData(RequestInfo.ClientSocket, RequestFields.Content + NumRecv, RequestFields.ContentLength - NumRecv, RequestInfo.ThreadNum); if (NumRecv < 0) return GH_ERROR; } // It seems to be important on NT that all available data was read // from the socket before the socket is closed (otherwise netscape // throws a "connection reset by peer" error). Since netscape seems // to send a few extra bytes in certain situations we'll make sure we // slurp it all up here. char *tmpbuffer = new char[100]; // this had new char(100)???? // unsigned long int nonblockmode=1; // ioctlsocket(RequestInfo.ClientSocket, FIONBIO, &nonblockmode); d_recv(RequestInfo.ClientSocket, tmpbuffer, 100, 0); delete []tmpbuffer; } else { RequestFields.Content = NULL; RequestFields.ContentLength = 0; } } else { RequestFields.Content = NULL; RequestFields.ContentLength = 0; } return GH_10_REQUEST; } /******************************************************************************/ void CleanUpHTTPHeaders(RequestInfoT &RequestInfo, RequestFieldsT &RequestFields) { //Clean up memory allocated for the Content if (RequestFields.Content != NULL) { delete[] RequestFields.Content; } // clean up memory allocated for the IOBuffer if (RequestInfo.IOBuffer != NULL) { delete[] RequestInfo.IOBuffer; RequestInfo.IOBuffer = NULL; } }