From fe6c8ff411ecd65a6d263fb6a8ee2b2bb5398e7f Mon Sep 17 00:00:00 2001 From: Albert Date: Tue, 21 Oct 2014 14:43:05 +0200 Subject: [PATCH 1/2] Fix issues with encoding/default values of parameters Thanks @smmiller for suggesting this and providing an initial fix. My modifications make `httpretrieve_open` work as its docstring says, i.e. the *caller* must take care of urlencoding the `querydata` and/or `postdata` arguments. The `_httpretrieve_build_request` function that is called internally only adds the `Content-encoding` HTTP header automatically, but does not attempt to convert/modify the actual data. This behavior essentially mimics what Python's `urllib` does. --- httpretrieve.r2py | 53 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/httpretrieve.r2py b/httpretrieve.r2py index 5938970..5e31795 100644 --- a/httpretrieve.r2py +++ b/httpretrieve.r2py @@ -504,21 +504,25 @@ def _httpretrieve_build_request(host, port, path, querydata, postdata, \ # Sanity checks: if path == "": raise ValueError("Invalid path -- empty string.") - if postdata is not None and type(postdata) not in (str, dict): - raise TypeError("Postdata should be a dict of form-data or a string") - if querydata is not None and type(querydata) not in (str, dict): - raise TypeError("Querydata should be a dict of form-data or a string") - if httpheaders is not None and type(httpheaders) is not dict: - raise TypeError("Expected HTTP headers as a dictionary.") - - # Type-conversions: - if type(querydata) is dict: - querydata = urllib.urllib_quote_parameters(querydata) - elif querydata is None: - querydata = "" - - if type(postdata) is dict: - postdata = urllib.urllib_quote_parameters(postdata) + if postdata is not None and type(postdata) is not str: + raise TypeError("postdata should be a urlencoded string") + + # Initialize querydata if not given, ... + querydata = querydata or "" + + # ... and ensure it is a str + if type(querydata) is not str: + raise TypeError("querydata should be a urlencoded string") + + # Initialize the HTTP headers dict if not given, ... + httpheaders = httpheaders or {} + + # ... and ensure it is a dict + if type(httpheaders) is not dict: + raise TypeError("httpheaders must be a dict") + + # If we have query or post data, announce its encoding + httpheaders['Content-Type'] = 'application/x-www-form-urlencoded' # Default to GET, unless the caller specifies a message body to send. methodstr = "GET" @@ -538,24 +542,23 @@ def _httpretrieve_build_request(host, port, path, querydata, postdata, \ # there is no proxy; send normal http request requeststr = methodstr + ' ' + path + resourcestr + ' HTTP/1.0\r\n' - if httpheaders is not None: - # Most servers require a 'Host' header for normal functionality - # (especially in the case of multiple domains being hosted on a - # single server). - if "Host" not in httpheaders: - requeststr += "Host: " + host + ':' + str(port) + "\r\n" + # Most servers require a 'Host' header for normal functionality + # (especially in the case of multiple domains being hosted on a + # single server). + if "Host" not in httpheaders: + requeststr += "Host: " + host + ':' + str(port) + "\r\n" - for key, val in httpheaders.items(): - requeststr += key + ": " + val + '\r\n' + for key, val in httpheaders.items(): + requeststr += key + ": " + val + '\r\n' - # Affix post-data related headers and content: + # Add post-data related headers and content: if methodstr == "POST": requeststr += 'Content-Length: ' + str(len(postdata)) + '\r\n' # The empty line terminates HTTP headers. requeststr += '\r\n' - # If we're a POST request, affix any requested data to the message body. + # If we're a POST request, add any requested data to the message body. if methodstr == "POST": requeststr += postdata From 946a9ecf7ca2ba828911034615aae1c4f1c150d4 Mon Sep 17 00:00:00 2001 From: aaaaalbert Date: Thu, 23 Oct 2014 16:07:20 +0200 Subject: [PATCH 2/2] Make setting default Content-Encoding conditional If there is no data to be POSTed or no query, then there need not be an encoding. If there is data/a query, its default encoding is set to `application/x-www-form-urlencoded` unless an encoding was specified by the caller already. --- httpretrieve.r2py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/httpretrieve.r2py b/httpretrieve.r2py index 5e31795..d13cc2f 100644 --- a/httpretrieve.r2py +++ b/httpretrieve.r2py @@ -62,9 +62,13 @@ def httpretrieve_open(url, querydata=None, postdata=None,\ both postdata and querydata are omitted, there is no query string sent in the request. - For both querydata and postdata, strings are sent *unmodified*. - This means you probably should encode them first, with - urllib_quote(). + Per default, we use + `Content-Encoding: application/x-www-form-urlencoded` + for querydata and postdata. However, we do not MIME-encode the + data; the caller must take care of that (using `urllib_quote`). + If you encode querydata/postdata differently, also supply a + `httpheaders` dict, with the key `Content-Encoding` mapping to + the MIME type you use. httpheaders (optional): A dictionary of supplemental HTTP request headers to add to the request. @@ -505,14 +509,14 @@ def _httpretrieve_build_request(host, port, path, querydata, postdata, \ if path == "": raise ValueError("Invalid path -- empty string.") if postdata is not None and type(postdata) is not str: - raise TypeError("postdata should be a urlencoded string") + raise TypeError("postdata should be a MIME-encoded string") # Initialize querydata if not given, ... querydata = querydata or "" # ... and ensure it is a str if type(querydata) is not str: - raise TypeError("querydata should be a urlencoded string") + raise TypeError("querydata should be a MIME-encoded string") # Initialize the HTTP headers dict if not given, ... httpheaders = httpheaders or {} @@ -521,8 +525,10 @@ def _httpretrieve_build_request(host, port, path, querydata, postdata, \ if type(httpheaders) is not dict: raise TypeError("httpheaders must be a dict") - # If we have query or post data, announce its encoding - httpheaders['Content-Type'] = 'application/x-www-form-urlencoded' + # If we have query or post data, set a default encoding if not given + if querydata or postdata and "Content-Type" not in httpheaders: + httpheaders['Content-Type'] = 'application/x-www-form-urlencoded' + # Default to GET, unless the caller specifies a message body to send. methodstr = "GET"