Skip to content

Commit

Permalink
feat: Update DataGovUkExport.php to upload to S3 bucket. (#357)
Browse files Browse the repository at this point in the history
  • Loading branch information
fibble authored Oct 3, 2024
1 parent 8918d18 commit 3414573
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 157 deletions.
8 changes: 6 additions & 2 deletions app/api/config/autoload/config.global.php
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,13 @@
'export_path' => '/tmp/ViExtract'
],

// Path to export CSV data for data.gov.uk
// s3 bucket URI to export CSV data for data.gov.uk
'data-gov-uk-export' => [
'path' => '/tmp/dataGovUk',
's3_uri' => '%data-gov-uk-export-s3uri%'
],

'data-dva-ni-export' => [
's3_uri' => '%data-dva-ni-export-s3uri%'
],

// Path to export CSV data for Companies House differences
Expand Down
7 changes: 7 additions & 0 deletions app/api/config/autoload/local.php.dist
Original file line number Diff line number Diff line change
Expand Up @@ -373,4 +373,11 @@ return [
'proxy' => new \Laminas\Stdlib\ArrayUtils\MergeRemoveKey(),
],
],
'data-gov-uk-export' => [
's3_uri' => 's3://devapp-vol-content/olcs.local.nonprod.dvsa.aws/data-gov-uk-export/'
],

'data-dva-ni-export' => [
's3_uri' => 's3://devapp-olcs-pri-integration-dva-s3/
],
];
2 changes: 2 additions & 0 deletions app/api/module/Api/config/module.config.php
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@
ApiSrv\GovUkAccount\GovUkAccountService::class => ApiSrv\GovUkAccount\GovUkAccountServiceFactory::class,

ApiSrv\AddressHelper\AddressHelperService::class => ApiSrv\AddressHelper\AddressHelperServiceFactory::class,

Aws\S3\S3Client::class => Dvsa\Olcs\Api\Service\S3\S3ClientFactory::class,
],
],
'view_manager' => [
Expand Down
23 changes: 23 additions & 0 deletions app/api/module/Api/src/Service/S3/S3ClientFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

namespace Dvsa\Olcs\Api\Service\S3;

use Aws\S3\S3Client;
use Psr\Container\ContainerInterface;

class S3ClientFactory
{
public function __invoke(ContainerInterface $container)
{
$config = $container->get('config');
$awsOptions = $config['awsOptions'];

$s3Client = new S3Client([
'version' => $awsOptions['version'],
'region' => $awsOptions['region'],
'use_path_style_endpoint' => $awsOptions['s3']['use_path_style_endpoint'],
]);

return $s3Client;
}
}
121 changes: 92 additions & 29 deletions app/api/module/Cli/src/Domain/CommandHandler/AbstractDataExport.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

namespace Dvsa\Olcs\Cli\Domain\CommandHandler;

use Aws\S3\S3Client;
use Doctrine\DBAL\Result;
use Dvsa\Olcs\Api\Domain\Util\DateTime\DateTime;
use Dvsa\Olcs\Api\Entity\TrafficArea\TrafficArea as TrafficAreaEntity;
use Dvsa\Olcs\Api\Domain\CommandHandler\AbstractCommandHandler;
use Dvsa\Olcs\Cli\Service\Utils\ExportToCsv;
use Dvsa\Olcs\Api\Domain\QueueAwareTrait;
use Dvsa\Olcs\Api\Domain\Repository;
use Dvsa\Olcs\Api\Service\Exception;
Expand Down Expand Up @@ -36,15 +36,13 @@ abstract class AbstractDataExport extends AbstractCommandHandler
'Licence'
];

/**
* @var string
*/
protected $path;
protected string $path;

/**
* @var array
*/
private $csvPool = [];
private array $csvPool = [];

protected S3Client $s3Client;

protected string $s3Bucket;

/**
* Fill a CSV with the result of a doctrine statement
Expand All @@ -58,18 +56,16 @@ abstract class AbstractDataExport extends AbstractCommandHandler
protected function singleCsvFromDbalResult(Result $dbalResult, $fileName, $fileNameSeparator = '_')
{
$date = new DateTime('now');
$fileBaseName = $fileName . $fileNameSeparator . $date->format(static::FILE_DATETIME_FORMAT) . '.csv';

$filePath = $this->path . '/' . $fileName . $fileNameSeparator . $date->format(static::FILE_DATETIME_FORMAT) . '.csv';
$tempCsvPath = sys_get_temp_dir() . '/' . $fileBaseName;
$this->result->addMessage('Creating CSV file: ' . $tempCsvPath);
$fh = fopen($tempCsvPath, 'w');

// create csv file
$this->result->addMessage('create csv file: ' . $filePath);
$fh = ExportToCsv::createFile($filePath);
$firstRow = false;

// add rows
while (($row = $dbalResult->fetchAssociative()) !== false) {
if (!$firstRow) {
//add title
fputcsv($fh, array_keys($row));
$firstRow = true;
}
Expand All @@ -79,7 +75,7 @@ protected function singleCsvFromDbalResult(Result $dbalResult, $fileName, $fileN

fclose($fh);

return file_get_contents($filePath);
return $tempCsvPath;
}

/**
Expand All @@ -93,35 +89,38 @@ protected function singleCsvFromDbalResult(Result $dbalResult, $fileName, $fileN
*/
protected function makeCsvsFromDbalResult(Result $dbalResult, $keyFld, $fileName)
{
// add rows
$filePaths = [];
$fileHandles = [];

// add rows
while (($row = $dbalResult->fetchAssociative()) !== false) {
$key = $row[$keyFld];

if (!isset($this->csvPool[$key])) {
// create csv file
$filePath = $this->path . '/' . $fileName . '_' . $key . '.csv';
if (!isset($fileHandles[$key])) {
$fileBaseName = $fileName . '_' . $key . '.csv';
$filePath = sys_get_temp_dir() . '/' . $fileBaseName;

$this->result->addMessage('create csv file: ' . $filePath);
$fh = ExportToCsv::createFile($filePath);
$this->result->addMessage('Creating CSV file: ' . $filePath);
$fh = fopen($filePath, 'w');

// add title & first row
fputcsv($fh, array_keys($row));
fputcsv($fh, $row);

$this->csvPool[$key] = $fh;
$fileHandles[$key] = $fh;
$filePaths[$key] = $filePath;

continue;
}

// add rows to csv from pool
$fh = $this->csvPool[$key];

$fh = $fileHandles[$key];
fputcsv($fh, $row);
}

// close files
foreach ($this->csvPool as $fh) {
foreach ($fileHandles as $key => $fh) {
fclose($fh);
$filePath = $filePaths[$key];
$this->uploadToS3($filePath);
unlink($filePath);
}
}

Expand Down Expand Up @@ -179,4 +178,68 @@ protected function getTrafficAreas()

return $items;
}

protected function createManifest(array $filePaths)
{
$manifestLines = [];

foreach ($filePaths as $filePath) {
$hash = hash_file('sha256', $filePath);
$fileName = basename($filePath);
$manifestLines[] = $hash . ' ' . $fileName;
}

$manifestContent = implode("\n", $manifestLines);

$manifestPath = sys_get_temp_dir() . '/dvaoplic-manifest.txt';
file_put_contents($manifestPath, $manifestContent);

return $manifestPath;
}

protected function createTarGzArchive(array $filePaths, $manifestPath)
{
$date = new DateTime('now');
$archiveBaseName = 'dvaoplic-' . $date->format(static::FILE_DATETIME_FORMAT) . '.tar';
$archiveGzBaseName = $archiveBaseName . '.gz';
$archivePath = sys_get_temp_dir() . '/' . $archiveBaseName;
$archiveGzPath = sys_get_temp_dir() . '/' . $archiveGzBaseName;

$tar = new \PharData($archivePath);

foreach ($filePaths as $filePath) {
$tar->addFile($filePath, basename($filePath));
}

$tar->addFile($manifestPath, basename($manifestPath));
$tar->compress(\Phar::GZ);
unlink($archivePath);

return $archiveGzPath;
}

protected function uploadToS3($filePath)
{
$fileName = basename($filePath);
$fileResource = fopen($filePath, 'r');

$this->s3Client->putObject([
'Bucket' => $this->s3Bucket,
'Key' => $this->path . '/' . $fileName,
'Body' => $fileResource,
]);

fclose($fileResource);

$this->result->addMessage('Uploaded file to S3: ' . $fileName);
}

protected function cleanUpFiles(array $filePaths)
{
foreach ($filePaths as $filePath) {
if (file_exists($filePath)) {
unlink($filePath);
}
}
}
}
30 changes: 19 additions & 11 deletions app/api/module/Cli/src/Domain/CommandHandler/DataDvaNiExport.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace Dvsa\Olcs\Cli\Domain\CommandHandler;

use Aws\S3\S3Client;
use Dvsa\Olcs\Transfer\Command\CommandInterface;
use Psr\Container\ContainerInterface;
use Dvsa\Olcs\Api\Domain\QueueAwareTrait;
Expand Down Expand Up @@ -29,11 +30,6 @@ final class DataDvaNiExport extends AbstractDataExport
*/
private $reportName;

/**
* @var string
*/
protected $path;

/**
* @var Repository\DataDvaNi
*/
Expand All @@ -50,7 +46,6 @@ final class DataDvaNiExport extends AbstractDataExport
*/
public function handleCommand(CommandInterface $command)
{
$this->path = (trim($command->getPath()) ?: $this->path);
$this->reportName = $command->getReportName();

$this->dataDvaNiRepo = $this->getRepo();
Expand All @@ -71,20 +66,33 @@ public function handleCommand(CommandInterface $command)
*/
private function processNiOperatorLicences()
{

$this->result->addMessage('Fetching data from DB for NI Operator Licences');
$dbalResult = $this->dataDvaNiRepo->fetchNiOperatorLicences();

$this->singleCsvFromDbalResult($dbalResult, 'NiGvLicences', '-');
$csvFilePath = $this->singleCsvFromDbalResult($dbalResult, 'NiGvLicences', '-');

$manifestPath = $this->createManifest([$csvFilePath]);

$archivePath = $this->createTarGzArchive([$csvFilePath], $manifestPath);

$this->uploadToS3($archivePath);

$this->cleanUpFiles([$csvFilePath, $manifestPath, $archivePath]);
}

public function __invoke(ContainerInterface $container, $requestedName, array $options = null)
{
$config = $container->get('config');
$exportCfg = (!empty($config['data-dva-ni-export']) ? $config['data-dva-ni-export'] : []);
if (isset($exportCfg['path'])) {
$this->path = $exportCfg['path'];
$exportCfg = $config['data-dva-ni-export'] ?? [];

if (isset($exportCfg['s3_uri'])) {
$parsedUrl = parse_url(rtrim($exportCfg['s3_uri'], '/'));
$this->s3Bucket = $parsedUrl['host'];
$this->path = ltrim($parsedUrl['path'], '/');
}

$this->s3Client = $container->get(S3Client::class);

return parent::__invoke($container, $requestedName, $options);
}
}
Loading

0 comments on commit 3414573

Please sign in to comment.