From 1d703e8299f586d2a9c4d0efe91a3e8ec0726868 Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Tue, 19 Mar 2013 17:24:08 +0400 Subject: [PATCH 1/8] fixed cleanup_pool --- human_curl/async.py | 1 + 1 file changed, 1 insertion(+) diff --git a/human_curl/async.py b/human_curl/async.py index d840b33..0469a36 100644 --- a/human_curl/async.py +++ b/human_curl/async.py @@ -297,6 +297,7 @@ def cleanup_pool(self): opener.close() self._openers_pool.close() + self._openers_pool = None def method(self, method, **kwargs): """Added request params to data_queue From c07e8ca2dbe23f3c6c00454b92c941be30c40046 Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 03:32:23 +0400 Subject: [PATCH 2/8] Fixed process_func setting in AsyncClient.start --- human_curl/async.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/human_curl/async.py b/human_curl/async.py index 0469a36..ae5c6e5 100644 --- a/human_curl/async.py +++ b/human_curl/async.py @@ -144,9 +144,10 @@ def start(self, process_func=None): :param process_func: function to call in process """ - if process_func and not isinstance(process_func, FunctionType): + if process_func: + if not isinstance(process_func, FunctionType): + raise InterfaceError("process_func must be function") self._process_func = process_func - raise InterfaceError("process_func must be function") if not self._openers_pool: self._openers_pool = self.build_pool() From 0bb00b19c46da2a882f1815c2085ef6c651cb27d Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 17:26:19 +0400 Subject: [PATCH 3/8] Fixed exception, when misconfigured server returns more than one status header --- human_curl/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/human_curl/core.py b/human_curl/core.py index 3fb4979..76e46a8 100644 --- a/human_curl/core.py +++ b/human_curl/core.py @@ -763,7 +763,11 @@ def parse_header_block(raw_block): logger.warn(e) continue else: - block_headers.append((version, code, message)) + if len(block_headers) > 0: + logger.warn("Status HTTP header already exitst %s, but found one more %s for url %s" % + (block_headers[0], (version, code, message), self.url)) + else: + block_headers.append((version, code, message)) else: # raise ValueError("Wrong header field") pass From 5a4407406a3bf841ddc58e116ff690a0aff93ea9 Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 19:05:25 +0400 Subject: [PATCH 4/8] Revert "Fixed process_func setting in AsyncClient.start" This reverts commit c07e8ca2dbe23f3c6c00454b92c941be30c40046. --- human_curl/async.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/human_curl/async.py b/human_curl/async.py index ae5c6e5..0469a36 100644 --- a/human_curl/async.py +++ b/human_curl/async.py @@ -144,10 +144,9 @@ def start(self, process_func=None): :param process_func: function to call in process """ - if process_func: - if not isinstance(process_func, FunctionType): - raise InterfaceError("process_func must be function") + if process_func and not isinstance(process_func, FunctionType): self._process_func = process_func + raise InterfaceError("process_func must be function") if not self._openers_pool: self._openers_pool = self.build_pool() From 0025f5d98a1975feeb078be1b647f558e4b2f39c Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 19:30:07 +0400 Subject: [PATCH 5/8] Fixed no exception when no newline at end of headers when allow_redirects=True --- human_curl/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/human_curl/core.py b/human_curl/core.py index 76e46a8..2e6299b 100644 --- a/human_curl/core.py +++ b/human_curl/core.py @@ -727,6 +727,12 @@ def json(self): except ValueError: return None + def _clean_raw_headers(self, raw_headers): + ret = raw_headers.strip() + ret = ret.replace("\r\nHTTP", "\r\n\r\nHTTP") + ret = ret.replace("\r\n\r\n\r\n", "\r\n\r\n") + return ret + def _parse_headers_raw(self): """Parse response headers and save as instance vars """ @@ -775,7 +781,7 @@ def parse_header_block(raw_block): raw_headers = self._headers_output.getvalue() - headers_blocks = raw_headers.strip().split("\r\n\r\n") + headers_blocks = self._clean_raw_headers(raw_headers).split("\r\n\r\n") for raw_block in headers_blocks: block = parse_header_block(raw_block) From d1d4ae1d45296336424096268400141d6a8822f9 Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 19:42:14 +0400 Subject: [PATCH 6/8] added try catch while parsing headers, to prevent exception on bogus responses. e.g. see "Apache" header $ curl -q -v http://khabarfarsi.com > /dev/null * About to connect() to khabarfarsi.com port 80 (#0) * Trying 176.9.154.107... % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* connected * Connected to khabarfarsi.com (176.9.154.107) port 80 (#0) > GET / HTTP/1.1 > User-Agent: curl/7.28.1 > Host: khabarfarsi.com > Accept: */* > < HTTP/1.1 200 OK < Apache < Content-Type: text/html; charset=utf-8 < Vary: Accept-Encoding, Cookie < Etag: "1363793834-0" --- human_curl/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/human_curl/core.py b/human_curl/core.py index 76e46a8..37c33cc 100644 --- a/human_curl/core.py +++ b/human_curl/core.py @@ -746,7 +746,11 @@ def parse_header_block(raw_block): if not header: continue elif not header.startswith("HTTP"): - field, value = map(lambda u: u.strip(), header.split(":", 1)) + try: + field, value = map(lambda u: u.strip(), header.split(":", 1)) + except Exception, e: + logger.warn("Unable to parse header %s for url %s , %s", header, self.url, e) + continue if field.startswith("Location"): # maybe not good if not value.startswith("http"): From bfcc9bd69c7ca2b01906ea52729b50f4a0634a6a Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Wed, 20 Mar 2013 19:45:49 +0400 Subject: [PATCH 7/8] fixed indent --- human_curl/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/human_curl/core.py b/human_curl/core.py index 37c33cc..46198e9 100644 --- a/human_curl/core.py +++ b/human_curl/core.py @@ -748,9 +748,10 @@ def parse_header_block(raw_block): elif not header.startswith("HTTP"): try: field, value = map(lambda u: u.strip(), header.split(":", 1)) - except Exception, e: + except Exception, e: logger.warn("Unable to parse header %s for url %s , %s", header, self.url, e) continue + if field.startswith("Location"): # maybe not good if not value.startswith("http"): From fac4879f5b18fecd258363be110c1ee904872855 Mon Sep 17 00:00:00 2001 From: Stepan Shirokikh Date: Fri, 5 Apr 2013 16:47:29 +0400 Subject: [PATCH 8/8] Created workaround for reset curl handler --- human_curl/async.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/human_curl/async.py b/human_curl/async.py index b2a30f2..ac2a425 100644 --- a/human_curl/async.py +++ b/human_curl/async.py @@ -199,14 +199,20 @@ def reset_opener(self, opener): :param opener: :class:`pycurl.Curl` object """ - opener.success_callback = None - opener.fail_callback = None - opener.request = None - if getattr(opener, "dirty", False) is True: # After appling this method curl raise error # Unable to fetch curl handle from curl object - opener.reset() + # This code is workaround case reset() method is not working + # So we create new instance of opener + self._openers_pool.handles.remove(opener) + del opener + + opener = self.get_opener() + self._openers_pool.handles.append(opener) + + opener.success_callback = None + opener.fail_callback = None + opener.request = None # Maybe need delete cookies? return opener @@ -229,7 +235,7 @@ def process_raw_data(self): opener = self._free_openers.pop() # Create request object - self.configure_opener(opener, request_data) + opener = self.configure_opener(opener, request_data) # Add configured opener to handles pool self._openers_pool.add_handle(opener) @@ -253,8 +259,7 @@ def process_pending_requests(self): response = self.make_response(opener) opener.success_callback(response=response, async_client=self, opener=opener) - ## FIXME: after pycurl.MultiCurl reset error - ## opener.dirty = True + opener.dirty = True self._free_openers.append(opener) for opener, errno, errmsg in error_list: @@ -264,8 +269,7 @@ def process_pending_requests(self): opener.fail_callback(errno=errno, errmsg=errmsg, async_client=self, opener=opener, request=opener.request) - ## FIXME: after pycurl.MultiCurl reset error - ## opener.dirty = True + opener.dirty = True self._free_openers.append(opener)