From c0598aa763bfc5cbb3cd02911245000d270904f9 Mon Sep 17 00:00:00 2001 From: Ori Hoch Date: Mon, 17 Apr 2017 23:39:39 +0300 Subject: [PATCH] added basic Datapackage, Resource and DataStream objects (+travis, coveralls, phpunit etc..) (#3) --- .coveralls.yml | 2 + .gitignore | 4 + .travis.yml | 15 ++ CONTRIBUTING.md | 23 ++ README.md | 36 ++- composer.json | 20 ++ src/DataStream.php | 53 ++++ src/Datapackage.php | 89 +++++++ src/Resource.php | 54 +++++ src/Utils.php | 26 ++ tests/DatapackageTest.php | 229 ++++++++++++++++++ tests/ResourceTest.php | 55 +++++ tests/fixtures/bar.txt | 3 + tests/fixtures/baz.txt | 2 + tests/fixtures/foo.txt | 1 + tests/fixtures/multi_data_datapackage.json | 8 + tests/fixtures/simple_valid_datapackage.json | 6 + .../simple_valid_datapackage_no_data.json | 6 + 18 files changed, 630 insertions(+), 2 deletions(-) create mode 100644 .coveralls.yml create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 CONTRIBUTING.md create mode 100644 composer.json create mode 100644 src/DataStream.php create mode 100644 src/Datapackage.php create mode 100644 src/Resource.php create mode 100644 src/Utils.php create mode 100644 tests/DatapackageTest.php create mode 100644 tests/ResourceTest.php create mode 100644 tests/fixtures/bar.txt create mode 100644 tests/fixtures/baz.txt create mode 100644 tests/fixtures/foo.txt create mode 100644 tests/fixtures/multi_data_datapackage.json create mode 100644 tests/fixtures/simple_valid_datapackage.json create mode 100644 tests/fixtures/simple_valid_datapackage_no_data.json diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..e1371a2 --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1,2 @@ +coverage_clover: coverage-clover.xml +json_path: coveralls.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1dd8ac7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/vendor/ +/.idea/ +/coverage-clover.xml +/composer.lock diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..57e3f14 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: php +php: + - '5.4' + - '5.5' + - '5.6' + - '7.0' + - '7.1' + - nightly + - hhvm +before_script: + - composer install --prefer-dist +script: + - composer test +after_success: + - vendor/bin/coveralls diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..3da114e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# Contributing + +The project follows the [Open Knowledge International coding standards](https://github.com/okfn/coding-standards). + + +## Getting Started + +1. Clone the repo +2. Run the tests +``` +$ composer install +$ composer test +``` + +## Phpunit - for unit tests + +Phpunit is used for unit tests, you can find the tests under tests directory + +Running Phpunit directly: `vendor/bin/phunit` + +## Coveralls - for coverage + +when running `composer test` phpunit generates coverage report in coverage-clover.xml - this is then sent to Coveralls via Travis. diff --git a/README.md b/README.md index 74010ce..cb077d1 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,40 @@ [![Travis](https://travis-ci.org/frictionlessdata/datapackage-php.svg?branch=master)](https://travis-ci.org/frictionlessdata/datapackage-php) [![Coveralls](http://img.shields.io/coveralls/frictionlessdata/datapackage-php.svg?branch=master)](https://coveralls.io/r/frictionlessdata/datapackage-php?branch=master) -[![Packagist](https://img.shields.io/packagist/dm/oki/datapackage.svg)](https://packagist.org/packages/oki/datapackage) +[![Packagist](https://img.shields.io/packagist/dm/frictionlessdata/datapackage.svg)](https://packagist.org/packages/frictionlessdata/datapackage) [![SemVer](https://img.shields.io/badge/versions-SemVer-brightgreen.svg)](http://semver.org/) [![Gitter](https://img.shields.io/gitter/room/frictionlessdata/chat.svg)](https://gitter.im/frictionlessdata/chat) -A utility library for working with [Data Package](https://specs.frictionlessdata.io/data-package/) in php. +A utility library for working with [Data Package](https://specs.frictionlessdata.io/data-package/) in PHP. + + +## Getting Started + +### Installation + +```bash +$ composer require frictionlessdata/datapackage +``` + +### Usage + +```php +use frictionlessdata\datapackage; + +$datapackage = new Datapackage("tests/fixtures/multi_data_datapackage.json"); +foreach ($datapackage as $resource) { + print("-- ".$resource->name()." --"); + $i = 0; + foreach ($resource as $dataStream) { + print("-dataStream ".++$i); + foreach ($dataStream as $line) { + print($line); + } + } +} +``` + + +## Contributing + +Please read the contribution guidelines: [How to Contribute](CONTRIBUTING.md) diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..9deae4b --- /dev/null +++ b/composer.json @@ -0,0 +1,20 @@ +{ + "name": "frictionlessdata/datapackage", + "description": "A utility library for working with Data Packages", + "license": "MIT", + "require": { + "php": ">=5.4" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35", + "satooshi/php-coveralls": "^1.0" + }, + "autoload": { + "psr-4": { + "frictionlessdata\\datapackage\\": "src/" + } + }, + "scripts": { + "test": "phpunit --debug tests/ --coverage-clover coverage-clover.xml" + } +} diff --git a/src/DataStream.php b/src/DataStream.php new file mode 100644 index 0000000..0e59ac0 --- /dev/null +++ b/src/DataStream.php @@ -0,0 +1,53 @@ +_fopenResource = fopen($dataSource, "r"); + } catch (\Exception $e) { + throw new DataStreamOpenException("Failed to open source ".json_encode($dataSource)); + } + } + + public function __destruct() + { + fclose($this->_fopenResource); + } + + public function rewind() { + if ($this->_currentLineNumber != 0) { + throw new \Exception("DataStream does not support rewind, sorry"); + } + } + + public function current() { + $line = fgets($this->_fopenResource); + if ($line === false) { + return ""; + } else { + return $line; + } + } + + public function key() { + return $this->_currentLineNumber; + } + + public function next() { + $this->_currentLineNumber++; + } + + public function valid() { + return (!feof($this->_fopenResource)); + } +} + + +class DataStreamOpenException extends \Exception {}; diff --git a/src/Datapackage.php b/src/Datapackage.php new file mode 100644 index 0000000..d8b714a --- /dev/null +++ b/src/Datapackage.php @@ -0,0 +1,89 @@ +_descriptor = $source; + $this->_basePath = $basePath; + } elseif (is_string($source)) { + if (Utils::is_json_string($source)) { + try { + $this->_descriptor = json_decode($source); + } catch (\Exception $e) { + throw new DatapackageInvalidSourceException("Failed to load source: ".json_encode($source).": ".$e->getMessage()); + } + $this->_basePath = $basePath; + } elseif ($this->_isHttpSource($source)) { + try { + $this->_descriptor = json_decode(file_get_contents($this->_normalizeHttpSource($source))); + } catch (\Exception $e) { + throw new DatapackageInvalidSourceException("Failed to load source: ".json_encode($source).": ".$e->getMessage()); + } + // http sources don't allow relative paths, hence basePath should remain null + $this->_basePath = null; + } else { + if (empty($basePath)) { + $this->_basePath = dirname($source); + } else { + $this->_basePath = $basePath; + $absPath = $this->_basePath.DIRECTORY_SEPARATOR.$source; + if (file_exists($absPath)) { + $source = $absPath; + } + } + try { + $this->_descriptor = json_decode(file_get_contents($source)); + } catch (\Exception $e) { + throw new DatapackageInvalidSourceException("Failed to load source: ".json_encode($source).": ".$e->getMessage()); + } + + } + } else { + throw new DatapackageInvalidSourceException("Invalid source: ".json_encode($source)); + } + } + + protected function _normalizeHttpSource($source) + { + return $source; + } + + protected function _isHttpSource($source) + { + return Utils::is_http_source($source); + } + + protected function _initResource($resourceDescriptor) + { + return new Resource($resourceDescriptor, $this->_basePath); + } + + public function descriptor() + { + return $this->_descriptor; + } + + // standard iterator functions - to iterate over the resources + public function rewind() { $this->_currentResourcePosition = 0; } + public function current() { return $this->_initResource($this->descriptor()->resources[$this->_currentResourcePosition]); } + public function key() { return $this->_currentResourcePosition; } + public function next() { $this->_currentResourcePosition++; } + public function valid() { return isset($this->descriptor()->resources[$this->_currentResourcePosition]); } +} + + +class DatapackageInvalidSourceException extends \Exception {}; diff --git a/src/Resource.php b/src/Resource.php new file mode 100644 index 0000000..f43d455 --- /dev/null +++ b/src/Resource.php @@ -0,0 +1,54 @@ +_basePath = $basePath; + $this->_descriptor = $descriptor; + } + + protected function _isHttpSource($dataSource) + { + return Utils::is_http_source($dataSource); + } + + protected function _normalizeDataSource($dataSource) + { + if (!empty($this->_basePath) && !Utils::is_http_source($dataSource)) { + // TODO: support JSON pointers + $absPath = $this->_basePath.DIRECTORY_SEPARATOR.$dataSource; + if (file_exists($absPath)) { + $dataSource = $absPath; + } + } + return $dataSource; + } + + protected function _getDataStream($dataSource) + { + return new DataStream($this->_normalizeDataSource($dataSource)); + } + + public function descriptor() + { + return $this->_descriptor; + } + + public function name() + { + return $this->descriptor()->name; + } + + // standard iterator functions - to iterate over the data sources + public function rewind() { $this->_currentDataPosition = 0; } + public function current() { return $this->_getDataStream($this->descriptor()->data[$this->_currentDataPosition]); } + public function key() { return $this->_currentDataPosition; } + public function next() { $this->_currentDataPosition++; } + public function valid() { return isset($this->descriptor()->data[$this->_currentDataPosition]); } +} diff --git a/src/Utils.php b/src/Utils.php new file mode 100644 index 0000000..bc65bf2 --- /dev/null +++ b/src/Utils.php @@ -0,0 +1,26 @@ +simpleDescriptorArray = [ + "name" => "datapackage-name", + "resources" => [ + ["name" => "resource-name", "data" => ["foo.txt"] ] + ] + ]; + $this->simpleDescriptor = (object)[ + "name" => "datapackage-name", + "resources" => [ + (object)["name" => "resource-name", "data" => ["foo.txt"] ] + ] + ]; + $this->simpleDescriptorExpectedData = ["resource-name" => [["foo"]]]; + $this->fixturesPath = dirname(__FILE__)."/fixtures"; + } + + /** + * @param object $expectedDescriptor + * @param Datapackage $datapackage + */ + public function assertDatapackageDescriptor($expectedDescriptor, $datapackage) + { + $this->assertEquals($expectedDescriptor, $datapackage->descriptor()); + } + + /** + * @param array $expectedData + * @param Datapackage $datapackage + */ + public function assertDatapackageData($expectedData, $datapackage) + { + $allResourcesData = []; + foreach ($datapackage as $resource) { + $resourceData = []; + foreach ($resource as $dataStream) { + $data = []; + foreach ($dataStream as $line) { + $data[] = $line; + } + $resourceData[] = $data; + } + $allResourcesData[$resource->name()] = $resourceData; + } + $this->assertEquals($expectedData, $allResourcesData); + } + + /** + * @param string $source + * @param object $expectedDescriptor + * @param array $expectedData + */ + public function assertDatapackage($expectedDescriptor, $expectedData, $datapackage) + { + $this->assertDatapackageDescriptor($expectedDescriptor, $datapackage); + $this->assertDatapackageData($expectedData, $datapackage); + } + + public function assertDatapackageException($expectedExceptionClass, $datapackageCallback) + { + try { + $datapackageCallback(); + } catch (\Exception $e) { + $this->assertEquals($expectedExceptionClass, get_class($e), $e->getMessage()); + } + } + + public function testNativePHPArrayShouldFail() + { + $descriptorArray = $this->simpleDescriptorArray; + $this->assertDatapackageException( + "frictionlessdata\\datapackage\\DatapackageInvalidSourceException", + function() use ($descriptorArray) { new Datapackage($descriptorArray); } + ); + } + + public function testNativePHPObjectWithoutBasePathShouldFail() + { + $descriptor = $this->simpleDescriptor; + $this->assertDatapackageException( + "frictionlessdata\\datapackage\\DataStreamOpenException", + function() use ($descriptor) { new Datapackage($descriptor); } + ); + } + + public function testNativePHPObjectWithBasePath() + { + $this->assertDatapackage( + $this->simpleDescriptor, $this->simpleDescriptorExpectedData, + new Datapackage($this->simpleDescriptor, $this->fixturesPath) + ); + } + + public function testJsonStringWithoutBasePathShouldFail() + { + $source = json_encode($this->simpleDescriptor); + $this->assertDatapackageException( + "frictionlessdata\\datapackage\\DataStreamOpenException", + function() use ($source) { new Datapackage($source); } + ); + } + + public function testJsonStringWithBasePath() + { + $source = json_encode($this->simpleDescriptor); + $this->assertDatapackage( + $this->simpleDescriptor, $this->simpleDescriptorExpectedData, + new Datapackage($source, $this->fixturesPath) + ); + } + + public function testNonExistantFileShouldFail() + { + $this->assertDatapackageException( + "frictionlessdata\\datapackage\\DatapackageInvalidSourceException", + function() { new Datapackage("-invalid-"); } + ); + } + + public function testJsonFileRelativeToBasePath() + { + $this->assertDatapackage( + $this->simpleDescriptor, $this->simpleDescriptorExpectedData, + new Datapackage("simple_valid_datapackage.json", $this->fixturesPath) + ); + } + + public function testJsonFileRelativeToCurrentDirectory() + { + $this->assertDatapackage( + $this->simpleDescriptor, $this->simpleDescriptorExpectedData, + new Datapackage("tests/fixtures/simple_valid_datapackage.json") + ); + } + + public function testHttpSource() + { + $this->assertDatapackage( + (object)[ + "name" => "datapackage-name", + "resources" => [ + (object)["name" => "resource-name", "data" => [] ] + ] + ], ["resource-name" => []], + new MockDatapackage("mock-http://simple_valid_datapackage_no_data.json") + ); + } + + public function testMultiDataDatapackage() + { + $out = []; + $datapackage = new Datapackage("tests/fixtures/multi_data_datapackage.json"); + foreach ($datapackage as $resource) { + $out[] = "-- ".$resource->name()." --"; + $i = 0; + foreach ($resource as $dataStream) { + $out[] = "-dataStream ".++$i; + foreach ($dataStream as $line) { + $out[] = $line; + } + } + } + $this->assertEquals([ + "-- first-resource --", + "-dataStream 1", + "foo", + "-dataStream 2", + "BAR!\n", + "bar\n", + "בר\n", + "", + "-dataStream 3", + "בזבזבז\n", + "זבזבזב", + "-- second-resource --", + "-dataStream 1", + "BAR!\n", + "bar\n", + "בר\n", + "", + "-dataStream 2", + "בזבזבז\n", + "זבזבזב", + "-- third-resource --", + "-dataStream 1", + "בזבזבז\n", + "זבזבזב", + ], $out); + } + +} + + +class MockDatapackage extends Datapackage { + + protected function _isHttpSource($dataSource) + { + return ( + strpos($dataSource, "mock-http://") === 0 + || parent::_isHttpSource($dataSource) + ); + } + + protected function _normalizeHttpSource($dataSource) + { + $dataSource = parent::_normalizeHttpSource($dataSource); + if (strpos($dataSource, "mock-http://") === 0) { + $dataSource = str_replace("mock-http://", "", $dataSource); + $dataSource = dirname(__FILE__).DIRECTORY_SEPARATOR."fixtures".DIRECTORY_SEPARATOR.$dataSource; + } + return $dataSource; + } + +} \ No newline at end of file diff --git a/tests/ResourceTest.php b/tests/ResourceTest.php new file mode 100644 index 0000000..7ae7333 --- /dev/null +++ b/tests/ResourceTest.php @@ -0,0 +1,55 @@ +assertEquals($expectedData, $actualData); + } + + public function testHttpDataSourceShouldNotGetBasePath() + { + $this->assertResourceData([["foo"],["foo"]], new MockResource((object)[ + "name" => "resource-name", + "data" => [ + "mock-http://foo.txt", // basePath will not be added to http source + "foo.txt" // basePath will be added here + ] + ], dirname(__FILE__).DIRECTORY_SEPARATOR."fixtures")); + } +} + + +class MockResource extends Resource +{ + protected function _isHttpSource($dataSource) + { + return ( + strpos($dataSource, "mock-http://") === 0 + || parent::_isHttpSource($dataSource) + ); + } + + protected function _normalizeDataSource($dataSource) + { + $dataSource = parent::_normalizeDataSource($dataSource); + if (strpos($dataSource, "mock-http://") === 0) { + $dataSource = str_replace("mock-http://", "", $dataSource); + $dataSource = dirname(__FILE__).DIRECTORY_SEPARATOR."fixtures".DIRECTORY_SEPARATOR.$dataSource; + } + return $dataSource; + } +} \ No newline at end of file diff --git a/tests/fixtures/bar.txt b/tests/fixtures/bar.txt new file mode 100644 index 0000000..e2c85f4 --- /dev/null +++ b/tests/fixtures/bar.txt @@ -0,0 +1,3 @@ +BAR! +bar +בר diff --git a/tests/fixtures/baz.txt b/tests/fixtures/baz.txt new file mode 100644 index 0000000..de35e21 --- /dev/null +++ b/tests/fixtures/baz.txt @@ -0,0 +1,2 @@ +בזבזבז +זבזבזב \ No newline at end of file diff --git a/tests/fixtures/foo.txt b/tests/fixtures/foo.txt new file mode 100644 index 0000000..1910281 --- /dev/null +++ b/tests/fixtures/foo.txt @@ -0,0 +1 @@ +foo \ No newline at end of file diff --git a/tests/fixtures/multi_data_datapackage.json b/tests/fixtures/multi_data_datapackage.json new file mode 100644 index 0000000..300b9bc --- /dev/null +++ b/tests/fixtures/multi_data_datapackage.json @@ -0,0 +1,8 @@ +{ + "name": "multi-data", + "resources": [ + {"name": "first-resource", "data": ["foo.txt", "bar.txt", "baz.txt"]}, + {"name": "second-resource", "data": ["bar.txt", "baz.txt"]}, + {"name": "third-resource", "data": ["baz.txt"]} + ] +} \ No newline at end of file diff --git a/tests/fixtures/simple_valid_datapackage.json b/tests/fixtures/simple_valid_datapackage.json new file mode 100644 index 0000000..1f17976 --- /dev/null +++ b/tests/fixtures/simple_valid_datapackage.json @@ -0,0 +1,6 @@ +{ + "name": "datapackage-name", + "resources": [ + { "name": "resource-name", "data": ["foo.txt"] } + ] +} \ No newline at end of file diff --git a/tests/fixtures/simple_valid_datapackage_no_data.json b/tests/fixtures/simple_valid_datapackage_no_data.json new file mode 100644 index 0000000..2c71d24 --- /dev/null +++ b/tests/fixtures/simple_valid_datapackage_no_data.json @@ -0,0 +1,6 @@ +{ + "name": "datapackage-name", + "resources": [ + { "name": "resource-name", "data": [] } + ] +} \ No newline at end of file