From 4733b49fa5458ae1e65232397f3ee2c997164048 Mon Sep 17 00:00:00 2001 From: micbar Date: Thu, 25 Jul 2019 11:31:38 +0200 Subject: [PATCH 1/3] Enable drone on feature branches --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index fb07145..fe42622 100644 --- a/.drone.yml +++ b/.drone.yml @@ -2,7 +2,7 @@ workspace: base: /var/www/owncloud path: apps/data_exporter -branches: [ master, release* ] +branches: [ master, release*, feature* ] clone: git: From 17b96eeb18fc24c27ae4e111ee6adef088d3c55b Mon Sep 17 00:00:00 2001 From: micbar Date: Thu, 25 Jul 2019 12:19:53 +0200 Subject: [PATCH 2/3] Remove wildcard on feature branch CI --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index fe42622..7a2e2e4 100644 --- a/.drone.yml +++ b/.drone.yml @@ -2,7 +2,7 @@ workspace: base: /var/www/owncloud path: apps/data_exporter -branches: [ master, release*, feature* ] +branches: [ master, release*, feature/next ] clone: git: From ad51bb2408fc5951bb9ae46633710801185ba6c6 Mon Sep 17 00:00:00 2001 From: Ilja Neumann Date: Mon, 29 Jul 2019 10:23:16 +0200 Subject: [PATCH 3/3] Serializer with streaming api [WIP] --- lib/Exporter.php | 8 +-- lib/Importer.php | 7 ++- lib/InstanceExporter.php | 7 +-- lib/InstanceImporter.php | 10 ++-- lib/Io/JsonLinesEncoder.php | 71 ++++++++++++++++++++++++++ lib/{ => Io}/Serializer.php | 64 +++++++++++++++++------ tests/integration/SerializerTest.php | 76 ++++++++++++++++++++++++++++ 7 files changed, 215 insertions(+), 28 deletions(-) create mode 100644 lib/Io/JsonLinesEncoder.php rename lib/{ => Io}/Serializer.php (52%) create mode 100644 tests/integration/SerializerTest.php diff --git a/lib/Exporter.php b/lib/Exporter.php index c13da77..1b00d62 100644 --- a/lib/Exporter.php +++ b/lib/Exporter.php @@ -24,6 +24,7 @@ use OCA\DataExporter\Extractor\FilesExtractor; use OCA\DataExporter\Extractor\MetadataExtractor; +use OCA\DataExporter\Io\Serializer; use Symfony\Component\Filesystem\Filesystem; class Exporter { @@ -47,10 +48,9 @@ public function __construct(Serializer $serializer, MetadataExtractor $metadataE public function export($uid, $exportDirectoryPath) { $exportPath = "$exportDirectoryPath/$uid"; $metaData = $this->metadataExtractor->extract($uid); - $this->filesystem->dumpFile( - "$exportPath/metadata.json", - $this->serializer->serialize($metaData) - ); + $stream =\fopen("$exportPath/metadata.json", 'wb'); + $this->serializer->serializeToStream($metaData, $stream); + \fclose($stream); $filesPath = \ltrim("$exportPath/files"); $this->filesExtractor->export($uid, $filesPath); diff --git a/lib/Importer.php b/lib/Importer.php index dd107e7..b969de6 100644 --- a/lib/Importer.php +++ b/lib/Importer.php @@ -25,6 +25,7 @@ use OCA\DataExporter\Importer\ImportException; use OCA\DataExporter\Importer\MetadataImporter; +use OCA\DataExporter\Io\Serializer; use OCA\DataExporter\Model\Metadata; use Symfony\Component\Filesystem\Filesystem; use OCA\DataExporter\Importer\FilesImporter; @@ -73,9 +74,11 @@ public function import($pathToExportDir, $alias = null) { throw new ImportException("metadata.json not found in '$metaDataPath'"); } + $stream = \fopen($metaDataPath, 'rb'); + /** @var Metadata $metadata */ - $metadata = $this->serializer->deserialize( - \file_get_contents($metaDataPath), + $metadata = $this->serializer->deserializeStream( + $stream, Metadata::class ); diff --git a/lib/InstanceExporter.php b/lib/InstanceExporter.php index 379f018..e04f040 100644 --- a/lib/InstanceExporter.php +++ b/lib/InstanceExporter.php @@ -25,6 +25,7 @@ use OCA\DataExporter\Exporter\InstanceExtractor; use Symfony\Component\Filesystem\Filesystem; +use OCA\DataExporter\Io\Serializer; /** * Class InstanceExporter @@ -69,9 +70,9 @@ public function __construct(Serializer $serializer, InstanceExtractor $instanceE */ public function export($exportDirectoryPath) { $instanceData = $this->instanceExtractor->extract(); - $this->filesystem->dumpFile( - "$exportDirectoryPath/instancedata.json", - $this->serializer->serialize($instanceData) + $this->serializer->serializeToStream( + $instanceData, + \fopen("$exportDirectoryPath/instancedata.json", 'wb') ); } } diff --git a/lib/InstanceImporter.php b/lib/InstanceImporter.php index 78799b5..9b67058 100644 --- a/lib/InstanceImporter.php +++ b/lib/InstanceImporter.php @@ -27,6 +27,7 @@ use OCA\DataExporter\Importer\InstanceDataImporter; use OCA\DataExporter\Model\Instance; use Symfony\Component\Filesystem\Filesystem; +use OCA\DataExporter\Io\Serializer; class InstanceImporter { /** @@ -74,13 +75,16 @@ public function import($pathToExportDir) { } /** - * @var Instance $instanceData + * @var Instance|\Traversable $instanceData */ - $instanceData = $this->serializer->deserialize( - \file_get_contents($instanceDataPath), + $instanceData = $this->serializer->deserializeStream( + \fopen($instanceDataPath, 'wb'), Instance::class ); + // Workaround, generator should be passed to importer for lazy io + \iterator_to_array($instanceData)[0]; + $this->instanceDataImporter->import($instanceData); } } diff --git a/lib/Io/JsonLinesEncoder.php b/lib/Io/JsonLinesEncoder.php new file mode 100644 index 0000000..f844dc9 --- /dev/null +++ b/lib/Io/JsonLinesEncoder.php @@ -0,0 +1,71 @@ +jsonEncoder = new JsonEncode(); + } + + /** + * Encodes data into the given format. + * + * @param mixed $data Data to encode + * @param string $format Format name + * @param array $context Options that normalizers/encoders have access to + * + * @return string|int|float|bool + * + * @throws UnexpectedValueException + */ + public function encode($data, $format, array $context = []) { + $typeHint = $this->getEncodingTypeHint($context); + + if (!\in_array($typeHint, ['object', 'array'])) { + throw new \InvalidArgumentException('Only objects and arrays supported for jsonl encoding'); + } + + if ($typeHint === 'object') { + return $this->jsonEncoder->encode($data, 'json') . PHP_EOL; + } + + $jsonLines = ''; + + if ($typeHint === 'array' && \count($data) > 0) { + foreach ($data as $line) { + $jsonLines .= $this->jsonEncoder->encode($line, 'json') . PHP_EOL; + } + } + + return $jsonLines; + } + + private function getEncodingTypeHint($context) { + if (!isset($context[self::class]['type_hint'])) { + throw new \InvalidArgumentException('Missing typehint for jsonl encoder'); + } + + return $context[self::class]['type_hint']; + } + + /** + * Checks whether the serializer can encode to given format. + * + * @param string $format Format name + * + * @return bool + */ + public function supportsEncoding($format) { + return $format === self::FORMAT; + } +} diff --git a/lib/Serializer.php b/lib/Io/Serializer.php similarity index 52% rename from lib/Serializer.php rename to lib/Io/Serializer.php index 8c5ad2a..50c559b 100644 --- a/lib/Serializer.php +++ b/lib/Io/Serializer.php @@ -20,7 +20,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * */ -namespace OCA\DataExporter; +namespace OCA\DataExporter\Io; use Symfony\Component\PropertyInfo\Extractor\PhpDocExtractor; use Symfony\Component\Serializer\Encoder\JsonEncoder; @@ -28,13 +28,16 @@ use Symfony\Component\Serializer\Normalizer\DateTimeNormalizer; use Symfony\Component\Serializer\Normalizer\ObjectNormalizer; +/** + * Lazy jsonl (de)serialization from streams. + */ class Serializer { /** @var \Symfony\Component\Serializer\Serializer */ private $serializer; public function __construct() { - $encoders = [new JsonEncoder()]; + $encoders = [new JsonEncoder(), new JsonLinesEncoder()]; $normalizers = [ new DateTimeNormalizer(), new ArrayDenormalizer(), @@ -45,25 +48,54 @@ public function __construct() { } /** - * Serializes data in the appropriate format. - * - * @param mixed $data Any data - * - * @return string + * @param $jsonlStream + * @param $type + * @return \Generator */ - public function serialize($data) { - return $this->serializer->serialize($data, 'json', []); + public function deserializeStream($jsonlStream, $type) { + foreach ($this->readLines($jsonlStream) as $jsonLine) { + $jsonLine = $this->serializer->decode($jsonLine, 'json'); + yield $this->serializer->denormalize($jsonLine, $type); + } } /** - * Deserializes data into the given type. - * - * @param mixed $data - * @param string $type + * @param $data + * @param $toStream + */ + public function serializeToStream($data, $toStream) { + $ctx = [JsonLinesEncoder::class => ['type_hint' => \gettype($data)]]; + $norm = $this->serializer->normalize($data, 'jsonl'); + $jsonLine = $this->serializer->encode($norm, 'jsonl', $ctx); + + \fwrite($toStream, $jsonLine); + } + + /** + * Lazily-reads a stream of lines in to a buffer, then blocks until + * the buffer is yielded completely. * - * @return object + * @param resource $stream + * @param int $lineBufSize Number of lines to buffer + * @return \Generator */ - public function deserialize($data, $type) { - return $this->serializer->deserialize($data, $type, 'json', []); + private function readLines($stream, $lineBufSize = 256) { + $buf = []; + while (($line = \fgets($stream)) !== false) { + $buf[] = $line; + //Buffer n lines then decode batch + if (\sizeof($buf) >= $lineBufSize) { + foreach ($buf as $k => $l) { + yield $l; + unset($buf[$k]); + } + } + } + + // Empty the remaining buffer + foreach ($buf as $k => $l) { + yield $l; + unset($buf[$k]); + } } } diff --git a/tests/integration/SerializerTest.php b/tests/integration/SerializerTest.php new file mode 100644 index 0000000..603e4a7 --- /dev/null +++ b/tests/integration/SerializerTest.php @@ -0,0 +1,76 @@ +testId = \bin2hex(\random_bytes(4)); + $this->ser = new Serializer(); + + $this->testObjects = [ + (new File()) + ->setPermissions(19) + ->setETag('12413rr') + ->setType(File::TYPE_FILE) + ->setPath('/foo/bar.txt'), + (new File()) + ->setPermissions(20) + ->setETag('43t3t3g3g') + ->setType(File::TYPE_FOLDER) + ->setPath('/pics'), + ]; + } + + public function testSerialize() { + $stream = \fopen('php://memory', 'rb+'); + + // Serialize single objects + foreach ($this->testObjects as $f) { + $this->ser->serializeToStream($f, $stream); + } + + \rewind($stream); + + $actual = \stream_get_contents($stream); + $this->assertEquals(self::TEST_JSONL, $actual); + + \fclose($stream); + } + + public function testDeserialization() { + $stream = \fopen('php://memory', 'rb+'); + \fwrite($stream, self::TEST_JSONL); + \rewind($stream); + + /** @var File[] $expected */ + $expected = $this->testObjects; + /** @var File[] $actual */ + $actual = $this->ser->deserializeStream($stream, File::class); + + foreach ($actual as $key => $obj) { + $this->assertEquals($expected[$key]->getETag(), $obj->getETag()); + $this->assertEquals($expected[$key]->getPath(), $obj->getPath()); + $this->assertEquals($expected[$key]->getType(), $obj->getType()); + $this->assertEquals($expected[$key]->getPermissions(), $obj->getPermissions()); + } + + \fclose($stream); + } +}