Skip to content

Commit

Permalink
Merge pull request #7 from wdwf/enhancement/cache-and-retryRequest-fu…
Browse files Browse the repository at this point in the history
…nction

Enhancement/cache and retry request function
  • Loading branch information
vcampitelli authored Sep 25, 2024
2 parents 161099e + ed943aa commit 87c78c7
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 20 deletions.
1 change: 1 addition & 0 deletions .env.dist
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
GITHUB_TOKEN=
CACHE_PATH=/tmp/cache
7 changes: 7 additions & 0 deletions build.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,31 @@
->addOrganization('laminas', 'laminas')
);

$curlExec = new \App\CurlExec(
cachePath: \getenv('CACHE_PATH') ?: null,
);

[$latestPhpVersion, $latestPhpVersionReleaseDate] = $run(
'Última versão do PHP',
new App\Crawler\LatestPhpVersion(
'https://www.php.net/releases/active.php',
$curlExec,
)
);

$phpUsagePercentage = $run(
'Uso do PHP no w3techs.com',
new App\Crawler\PhpUsagePercentage(
'https://w3techs.com/technologies/history_overview/programming_language',
$curlExec,
)
);

[$wordpressCmsUsagePercentage, $wordpressTotalUsagePercentage] = $run(
'Uso do WordPress no w3techs.com',
new App\Crawler\WordpressUsagePercentage(
'https://w3techs.com/technologies/details/cm-wordpress',
$curlExec,
)
);

Expand Down
5 changes: 4 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
],
"minimum-stability": "dev",
"require": {
"php": "^8.3"
"php": "^8.3",
"ext-curl": "*",
"ext-dom": "*",
"ext-libxml": "*"
},
"require-dev": {
"squizlabs/php_codesniffer": "4.0.x-dev",
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/LatestPhpVersion.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
use App\CurlExec;
use DateTimeImmutable;

class LatestPhpVersion
readonly class LatestPhpVersion
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec,
) {
}

public function __invoke(): ?array
{
$json = (new CurlExec())->fetchAsString($this->url);
$json = $this->curlExec->fetchAsString($this->url);
$json = \json_decode($json, true);
$latestMajor = end($json);
$latestMinor = end($latestMajor);
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/PhpUsagePercentage.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@
use DOMDocument;
use DOMXPath;

class PhpUsagePercentage
readonly class PhpUsagePercentage
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec
) {
}

public function __invoke(): ?float
{
$xpath = (new CurlExec())->fetchAsXpath($this->url);
$xpath = $this->curlExec->fetchAsXpath($this->url);

$languages = $xpath->query('//table[@class="hist"]/tr');
foreach ($languages as $language) {
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/WordpressUsagePercentage.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@
use DOMDocument;
use DOMXPath;

class WordpressUsagePercentage
readonly class WordpressUsagePercentage
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec
) {
}

public function __invoke(): ?array
{
$xpath = (new CurlExec())->fetchAsXpath($this->url);
$xpath = $this->curlExec->fetchAsXpath($this->url);

$languages = $xpath->query('//p[@class="surv"]');
foreach ($languages as $language) {
Expand Down
65 changes: 58 additions & 7 deletions src/CurlExec.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,58 @@
use DOMXPath;
use RuntimeException;

class CurlExec
readonly class CurlExec
{
public function __construct(
private ?string $cachePath = null,
private int $maxRetries = 3,
private int $retryDelay = 5,
) {
if (($cachePath) && (!\is_dir($cachePath))) {
\mkdir($cachePath, 0755, true);
}
}

public function fetchAsString(string $url): string
{
if ($this->cachePath) {
$cacheFile = $this->getCacheFilePath($url);
if (\file_exists($cacheFile)) {
return \file_get_contents($cacheFile);
}
}

$attempt = 0;
while ($attempt < $this->maxRetries) {
$response = $this->makeRequest($url);

// Timeout
if ($response === false) {
\sleep($this->retryDelay);
$attempt++;
continue;
}

// Resposta verdadeira
if (isset($cacheFile)) {
\file_put_contents($cacheFile, $response);
}
return $response;
}

throw new RuntimeException("Falha ao realizar a requisição para URL: {$url}");
}

public function fetchAsXpath(string $url): DOMXPath
{
$htmlString = $this->fetchAsString($url);
\libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($htmlString);
return new DOMXPath($doc);
}

protected function makeRequest(string $url): string|false
{
$curlHandle = \curl_init();
\curl_setopt_array($curlHandle, [
Expand All @@ -20,19 +69,21 @@ public function fetchAsString(string $url): string
]);

$response = \curl_exec($curlHandle);

if ($response === false) {
$errorCode = \curl_errno($curlHandle);
if ($errorCode === CURLE_OPERATION_TIMEDOUT) {
return false;
}
throw new RuntimeException(\curl_error($curlHandle));
}

return $response;
}

public function fetchAsXpath(string $url): DOMXPath
private function getCacheFilePath(string $url): string
{
$htmlString = $this->fetchAsString($url);
\libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($htmlString);
return new DOMXPath($doc);
$fileName = \sha1($url) . '.html';
return $this->cachePath . DIRECTORY_SEPARATOR . $fileName;
}
}

0 comments on commit 87c78c7

Please sign in to comment.