From d6c4b3d47bb82c5867c9b6e16545541b8ae832ad Mon Sep 17 00:00:00 2001 From: Diego Zanella Date: Wed, 17 Sep 2014 15:05:13 +0100 Subject: [PATCH 1/5] Added possibility to limit the amount of data retrieved * Ref https://github.com/nategood/httpful/issues/137. --- README.md | 4 ++- src/Httpful/Request.php | 77 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0525bba..0e18e28 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,9 @@ Httpful highly encourages sending in pull requests. When submitting a pull requ # Changelog +## 0.2.18 +- FEATURE [Allow to limit the amount of data retrieved](https://github.com/nategood/httpful/issues/137) + ## 0.2.17 - FEATURE [PR #144](https://github.com/nategood/httpful/pull/144) Adds additional parameter to the Response class to specify additional meta data about the request/response (e.g. number of redirect). @@ -201,4 +204,3 @@ Httpful highly encourages sending in pull requests. When submitting a pull requ - Created AbstractMimeHandler type that all Mime Handlers must extend - Pulled out the parsing/serializing logic from the Request/Response classes into their own MimeHandler classes - Added ability to register new mime handlers for mime types - diff --git a/src/Httpful/Request.php b/src/Httpful/Request.php index 65ba63d..b16210a 100755 --- a/src/Httpful/Request.php +++ b/src/Httpful/Request.php @@ -61,6 +61,11 @@ class Request // Template Request object private static $_template; + // @var int The maximum amount of data to retrieve. + protected $download_limit; + // @var string The data retrieved by the CURL request. Used only a download limit is set. + protected $retrieved_data; + /** * We made the constructor private to force the factory style. This was * done to keep the syntax cleaner and better the support the idea of @@ -201,14 +206,28 @@ public function send() $result = curl_exec($this->_ch); if ($result === false) { - if ($curlErrorNumber = curl_errno($this->_ch)) { - $curlErrorString = curl_error($this->_ch); - $this->_error($curlErrorString); - throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); - } - - $this->_error('Unable to connect.'); - throw new ConnectionErrorException('Unable to connect.'); + $curlErrorNumber = curl_errno($this->_ch); + + // If error number is CURLE_WRITE_ERROR, it may just be that we hit + // the download limit. In such case, we can get the data retrieved so + // far and carry on. No need to throw an exception, as we are not + // dealing with an actual error + if(($curlErrorNumber == CURLE_WRITE_ERROR) && + ($this->download_limit > 0)) { + // + $result = $this->retrieved_data; + } + else { + // Any other error number represents an actual error + if ($curlErrorNumber) { + $curlErrorString = curl_error($this->_ch); + $this->_error($curlErrorString); + throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); + } + + $this->_error('Unable to connect.'); + throw new ConnectionErrorException('Unable to connect.'); + } } $info = curl_getinfo($this->_ch); @@ -1135,4 +1154,46 @@ public static function options($uri) { return self::init(Http::OPTIONS)->uri($uri); } + + /** + * Allows to limit the size of retrieved data. Useful when you only need to + * get the headers of a page, as remote servers usually don't honour the + * "range" header in HTTP requests. + * + * @param int size The amount of data to retrieve, in bytes. + * @return Httpful\Request + */ + public function limit($size) { + if((int)$size > 0) { + $this->download_limit = $size; + $this->retrieved_data = ''; + $this->addOnCurlOption(CURLOPT_BINARYTRANSFER, 1); + $this->addOnCurlOption(CURLOPT_WRITEFUNCTION, array($this, 'downloadLimiter')); + } + + return $this; + } + + /** + * Callback for the Request::limit() method. This method keeps track of the + * data retrieved, and interrupts the transfer once the limit has been + * reached. + * + * @param object ch The CURL handle. + * @param string chunk A chunk of the data retrieved by the CURL request. + * @return int The length of the retrieved chunk, or -1 if the limit has + * been reached. + */ + public function downloadLimiter($ch, $chunk) { + $len = strlen($this->retrieved_data) + strlen($chunk); + if($len >= $this->download_limit) { + $this->retrieved_data .= substr($chunk, 0, $this->download_limit - strlen($this->retrieved_data)); + //echo strlen($this->retrieved_data) , ' ', $this->retrieved_data; + return -1; + } + + $this->retrieved_data .= $chunk; + + return strlen($chunk); + } } From 98ceb726ed5ff7b82613073888a1cb8116eabfd3 Mon Sep 17 00:00:00 2001 From: Diego Zanella Date: Wed, 17 Sep 2014 15:18:01 +0100 Subject: [PATCH 2/5] Improved code documentation --- src/Httpful/Request.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Httpful/Request.php b/src/Httpful/Request.php index b16210a..1027a1e 100755 --- a/src/Httpful/Request.php +++ b/src/Httpful/Request.php @@ -1160,6 +1160,11 @@ public static function options($uri) * get the headers of a page, as remote servers usually don't honour the * "range" header in HTTP requests. * + * IMPORTANT: setting the limit too low will cause the request to fail, because + * the response will not contain the headers and body expected by the parser. + * The minimum value should be at least 1000, to ensure that all headers are + * retrieved and the parsing can succeed. + * * @param int size The amount of data to retrieve, in bytes. * @return Httpful\Request */ From f23fa009dcdf065a5182fcc8ce625ea1fb0877d7 Mon Sep 17 00:00:00 2001 From: Diego Zanella Date: Wed, 17 Sep 2014 17:35:58 +0100 Subject: [PATCH 3/5] Updated composer.json --- composer.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 52329c4..48e87ec 100644 --- a/composer.json +++ b/composer.json @@ -4,12 +4,17 @@ "homepage": "http://github.com/nategood/httpful", "license": "MIT", "keywords": ["http", "curl", "rest", "restful", "api", "requests"], - "version": "0.2.17", + "version": "0.2.18", "authors": [ { "name": "Nate Good", "email": "me@nategood.com", "homepage": "http://nategood.com" + }, + { + "name": "Diego Zanella", + "email": "admin@aelia.co", + "homepage": "http://aelia.co" } ], "require": { From f4121596346f32211473a96199eebb0e19127fdf Mon Sep 17 00:00:00 2001 From: Diego Zanella Date: Fri, 19 Sep 2014 23:51:12 +0100 Subject: [PATCH 4/5] Fixed bug in logic used to limit the data retrieved * Pevious logic was not taking into account the case in which the limit was high enough for the Request to retrieve the whole page. When this is the case, the $result variable contains "true" (boolean), and the actual content must be retrieved from Request::retrieved_data property. --- src/Httpful/Request.php | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Httpful/Request.php b/src/Httpful/Request.php index 1027a1e..dfd9e03 100755 --- a/src/Httpful/Request.php +++ b/src/Httpful/Request.php @@ -211,11 +211,10 @@ public function send() // If error number is CURLE_WRITE_ERROR, it may just be that we hit // the download limit. In such case, we can get the data retrieved so // far and carry on. No need to throw an exception, as we are not - // dealing with an actual error + // dealing with an actual error. if(($curlErrorNumber == CURLE_WRITE_ERROR) && ($this->download_limit > 0)) { - // - $result = $this->retrieved_data; + $result = true; } else { // Any other error number represents an actual error @@ -230,6 +229,22 @@ public function send() } } + /* Result can be "true" in two cases: + * - When download limit is greater than zero, and the limit set was + * larger than the page size (i.e. the whole page was fetched, despite + * the limit). + * - When download limit is greater than zero and error CURLE_WRITE_ERROR + * was raised (i.e. the transfer was interrupted because the limit was + * reached). + * + * In both cases, the data is actually stored in $this->retrieved_data, + * therefore it must be put back inside $result, where the library + * expects to find it. + */ + if($result === true) { + $result = $this->retrieved_data; + } + $info = curl_getinfo($this->_ch); // Remove the "HTTP/1.x 200 Connection established" string and any other headers added by proxy From 5fd561b448e32ab4be807cb2a936f695839cc5be Mon Sep 17 00:00:00 2001 From: Diego Zanella Date: Tue, 21 Oct 2014 20:56:45 +0100 Subject: [PATCH 5/5] Replaced indentation tabs with spaces --- src/Httpful/Request.php | 172 ++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/src/Httpful/Request.php b/src/Httpful/Request.php index dfd9e03..c582fc4 100755 --- a/src/Httpful/Request.php +++ b/src/Httpful/Request.php @@ -61,10 +61,10 @@ class Request // Template Request object private static $_template; - // @var int The maximum amount of data to retrieve. - protected $download_limit; - // @var string The data retrieved by the CURL request. Used only a download limit is set. - protected $retrieved_data; + // @var int The maximum amount of data to retrieve. + protected $download_limit; + // @var string The data retrieved by the CURL request. Used only a download limit is set. + protected $retrieved_data; /** * We made the constructor private to force the factory style. This was @@ -206,44 +206,44 @@ public function send() $result = curl_exec($this->_ch); if ($result === false) { - $curlErrorNumber = curl_errno($this->_ch); - - // If error number is CURLE_WRITE_ERROR, it may just be that we hit - // the download limit. In such case, we can get the data retrieved so - // far and carry on. No need to throw an exception, as we are not - // dealing with an actual error. - if(($curlErrorNumber == CURLE_WRITE_ERROR) && - ($this->download_limit > 0)) { - $result = true; - } - else { - // Any other error number represents an actual error - if ($curlErrorNumber) { - $curlErrorString = curl_error($this->_ch); - $this->_error($curlErrorString); - throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); - } - - $this->_error('Unable to connect.'); - throw new ConnectionErrorException('Unable to connect.'); - } + $curlErrorNumber = curl_errno($this->_ch); + + // If error number is CURLE_WRITE_ERROR, it may just be that we hit + // the download limit. In such case, we can get the data retrieved so + // far and carry on. No need to throw an exception, as we are not + // dealing with an actual error. + if(($curlErrorNumber == CURLE_WRITE_ERROR) && + ($this->download_limit > 0)) { + $result = true; + } + else { + // Any other error number represents an actual error + if ($curlErrorNumber) { + $curlErrorString = curl_error($this->_ch); + $this->_error($curlErrorString); + throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); + } + + $this->_error('Unable to connect.'); + throw new ConnectionErrorException('Unable to connect.'); + } } - /* Result can be "true" in two cases: - * - When download limit is greater than zero, and the limit set was - * larger than the page size (i.e. the whole page was fetched, despite - * the limit). - * - When download limit is greater than zero and error CURLE_WRITE_ERROR - * was raised (i.e. the transfer was interrupted because the limit was - * reached). - * - * In both cases, the data is actually stored in $this->retrieved_data, - * therefore it must be put back inside $result, where the library - * expects to find it. - */ - if($result === true) { - $result = $this->retrieved_data; - } + /* Result can be "true" in two cases: + * - When download limit is greater than zero, and the limit set was + * larger than the page size (i.e. the whole page was fetched, despite + * the limit). + * - When download limit is greater than zero and error CURLE_WRITE_ERROR + * was raised (i.e. the transfer was interrupted because the limit was + * reached). + * + * In both cases, the data is actually stored in $this->retrieved_data, + * therefore it must be put back inside $result, where the library + * expects to find it. + */ + if($result === true) { + $result = $this->retrieved_data; + } $info = curl_getinfo($this->_ch); @@ -1170,50 +1170,50 @@ public static function options($uri) return self::init(Http::OPTIONS)->uri($uri); } - /** - * Allows to limit the size of retrieved data. Useful when you only need to - * get the headers of a page, as remote servers usually don't honour the - * "range" header in HTTP requests. - * - * IMPORTANT: setting the limit too low will cause the request to fail, because - * the response will not contain the headers and body expected by the parser. - * The minimum value should be at least 1000, to ensure that all headers are - * retrieved and the parsing can succeed. - * - * @param int size The amount of data to retrieve, in bytes. - * @return Httpful\Request - */ - public function limit($size) { - if((int)$size > 0) { - $this->download_limit = $size; - $this->retrieved_data = ''; - $this->addOnCurlOption(CURLOPT_BINARYTRANSFER, 1); - $this->addOnCurlOption(CURLOPT_WRITEFUNCTION, array($this, 'downloadLimiter')); - } - - return $this; - } - - /** - * Callback for the Request::limit() method. This method keeps track of the - * data retrieved, and interrupts the transfer once the limit has been - * reached. - * - * @param object ch The CURL handle. - * @param string chunk A chunk of the data retrieved by the CURL request. - * @return int The length of the retrieved chunk, or -1 if the limit has - * been reached. - */ - public function downloadLimiter($ch, $chunk) { - $len = strlen($this->retrieved_data) + strlen($chunk); - if($len >= $this->download_limit) { - $this->retrieved_data .= substr($chunk, 0, $this->download_limit - strlen($this->retrieved_data)); - //echo strlen($this->retrieved_data) , ' ', $this->retrieved_data; - return -1; - } - - $this->retrieved_data .= $chunk; - - return strlen($chunk); - } + /** + * Allows to limit the size of retrieved data. Useful when you only need to + * get the headers of a page, as remote servers usually don't honour the + * "range" header in HTTP requests. + * + * IMPORTANT: setting the limit too low will cause the request to fail, because + * the response will not contain the headers and body expected by the parser. + * The minimum value should be at least 1000, to ensure that all headers are + * retrieved and the parsing can succeed. + * + * @param int size The amount of data to retrieve, in bytes. + * @return Httpful\Request + */ + public function limit($size) { + if((int)$size > 0) { + $this->download_limit = $size; + $this->retrieved_data = ''; + $this->addOnCurlOption(CURLOPT_BINARYTRANSFER, 1); + $this->addOnCurlOption(CURLOPT_WRITEFUNCTION, array($this, 'downloadLimiter')); + } + + return $this; + } + + /** + * Callback for the Request::limit() method. This method keeps track of the + * data retrieved, and interrupts the transfer once the limit has been + * reached. + * + * @param object ch The CURL handle. + * @param string chunk A chunk of the data retrieved by the CURL request. + * @return int The length of the retrieved chunk, or -1 if the limit has + * been reached. + */ + public function downloadLimiter($ch, $chunk) { + $len = strlen($this->retrieved_data) + strlen($chunk); + if($len >= $this->download_limit) { + $this->retrieved_data .= substr($chunk, 0, $this->download_limit - strlen($this->retrieved_data)); + //echo strlen($this->retrieved_data) , ' ', $this->retrieved_data; + return -1; + } + + $this->retrieved_data .= $chunk; + + return strlen($chunk); + } }