Skip to content
This repository has been archived by the owner on Jun 21, 2024. It is now read-only.

Commit

Permalink
Flatten structure of file-layout inside the export dir.
Browse files Browse the repository at this point in the history
Before this PR all data-types (files, versions, trashbin...)
were stored inside a single files directory which closely followed ownClouds
home-folder layout. This allowed for fast iteration but also coupled
the format to ownCloud which in turn introduced some design quirks
(/files/files/) in files.jsonl (#111)

All data type specific folders are now stored in the root directory of
the export which allows simpler mapping from metadata (#118).
This is also reflected in the architecture: The exporter traverses down from
the specific directories instead from the home.

A special "root folder" was introduced in files.jsonl to further decouple things
from owncloud and to be able to carry the e-tag for the whole tree. Instead of
"/files", "/" is now root in the export.

Path class has been added to reduce path-merging boilerplate.
  • Loading branch information
IljaN committed Nov 9, 2019
1 parent 491f94d commit e0fa537
Show file tree
Hide file tree
Showing 17 changed files with 282 additions and 197 deletions.
7 changes: 4 additions & 3 deletions lib/Exporter.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

use OCA\DataExporter\Extractor\FilesExtractor;
use OCA\DataExporter\Extractor\MetadataExtractor;
use OCA\DataExporter\Utilities\Path;
use Symfony\Component\Filesystem\Filesystem;

class Exporter {
Expand Down Expand Up @@ -56,15 +57,15 @@ public function __construct(Serializer $serializer, MetadataExtractor $metadataE
* @return void
*/
public function export($uid, $exportDirectoryPath, $exportFiles = true) {
$exportPath = "$exportDirectoryPath/$uid";
$exportPath = Path::join($exportDirectoryPath, $uid);
$metaData = $this->metadataExtractor->extract($uid, $exportPath);
$this->filesystem->dumpFile(
"$exportPath/user.json",
Path::join($exportPath, '/user.json'),
$this->serializer->serialize($metaData)
);

if ($exportFiles) {
$filesPath = \ltrim("$exportPath/files");
$filesPath = Path::join($exportPath, 'files');
$this->filesExtractor->export($uid, $filesPath);
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Extractor/FilesExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public function __construct(RecursiveNodeIteratorFactory $iteratorFactory, Files
* @throws \OCP\Files\NotPermittedException
*/
public function export($userId, $exportPath) {
list($iterator, $baseFolder) = $this->iteratorFactory->getUserFolderParentRecursiveIterator($userId);
list($iterator, $baseFolder) = $this->iteratorFactory->getUserFolderRecursiveIterator($userId);
/** @var \OCP\Files\Node $node */
foreach ($iterator as $node) {
$nodePath = $node->getPath();
Expand Down
25 changes: 22 additions & 3 deletions lib/Extractor/MetadataExtractor/FilesMetadataExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
namespace OCA\DataExporter\Extractor\MetadataExtractor;

use OC\User\NoUserException;
use OCA\DataExporter\Utilities\Iterators\Nodes\RecursiveNodeIteratorFactory;
use OCA\DataExporter\Model\File;
use OCA\DataExporter\Utilities\Iterators\Nodes\RecursiveNodeIteratorFactory;
use OCA\DataExporter\Utilities\Path;
use OCA\DataExporter\Utilities\StreamHelper;
use OCP\Files\Node;

Expand Down Expand Up @@ -56,17 +57,35 @@ public function __construct(RecursiveNodeIteratorFactory $iteratorFactory, Strea
* @throws NoUserException
*/
public function extract($userId, $exportPath) {
list($iterator, $baseFolder) = $this->iteratorFactory->getUserFolderParentRecursiveIterator($userId);
list($iterator, $baseFolder) = $this->iteratorFactory->getUserFolderRecursiveIterator($userId);

$filename = $exportPath . '/' . $this::FILE_NAME;
$filename = Path::join($exportPath, $this::FILE_NAME);
$this->streamFile = $this->streamHelper->initStream($filename, 'ab', true);

// Write root folder entry first to preserve it's metadata
$rootFolder = (new File())
->setType(File::TYPE_FOLDER)
->setPath('/')
->setETag($baseFolder->getEtag())
->setMtime($baseFolder->getMTime())
->setPermissions($baseFolder->getPermissions());

$this->streamHelper->writelnToStream($this->streamFile, $rootFolder);

foreach ($iterator as $node) {
$nodePath = $node->getPath();
$relativePath = $baseFolder->getRelativePath($nodePath);

$file = new File();

if ("$relativePath/" === File::ROOT_FOLDER_PATH) {
$relativePath = '/';
}

if (\substr($relativePath, 0, \strlen(File::ROOT_FOLDER_PATH)) == File::ROOT_FOLDER_PATH) {
$relativePath = '/' . \substr($relativePath, \strlen(File::ROOT_FOLDER_PATH));
}

$file->setPath($relativePath);
$file->setETag($node->getEtag());
$file->setMtime($node->getMTime());
Expand Down
6 changes: 4 additions & 2 deletions lib/Importer.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
use OCA\DataExporter\Importer\ImportException;
use OCA\DataExporter\Importer\MetadataImporter;
use OCA\DataExporter\Model\Metadata;
use OCA\DataExporter\Utilities\Path;
use Symfony\Component\Filesystem\Filesystem;
use OCA\DataExporter\Importer\FilesImporter;
use OCA\DataExporter\Importer\MetadataImporter\ShareImporter;
Expand Down Expand Up @@ -67,10 +68,11 @@ public function __construct(
* @throws \OCP\PreConditionNotMetException
*/
public function import($pathToExportDir, $alias = null) {
$metaDataPath = "$pathToExportDir/user.json";
$pathToExportDir =\rtrim($pathToExportDir, '\/');
$metaDataPath = Path::join($pathToExportDir, 'user.json');

if (!$this->filesystem->exists($metaDataPath)) {
throw new ImportException("user.json not found in '$metaDataPath'");
throw new ImportException("user.json not found in \'$metaDataPath\'");
}

/** @var Metadata $metadata */
Expand Down
17 changes: 10 additions & 7 deletions lib/Importer/FilesImporter.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
use OCA\DataExporter\Utilities\StreamHelper;
use OCP\Files\IRootFolder;
use Symfony\Component\Filesystem\Filesystem;
use OCA\DataExporter\Utilities\Path;

class FilesImporter {
const FILE_NAME = 'files.jsonl';
Expand Down Expand Up @@ -69,10 +70,10 @@ public function import($userId, $exportPath) {
/**
* @var \OCP\Files\Folder $userFolder
*/
$filename = $exportPath . '/' . $this::FILE_NAME;
$exportRootFilesPath = $exportPath . '/files';
$filename = Path::join($exportPath, $this::FILE_NAME);
$exportRootFilesPath = Path::join($exportPath, '/files');

$userFolder = $this->rootFolder->getUserFolder($userId)->getParent();
$userFolder = $this->rootFolder->getUserFolder($userId);
$this->streamFile = $this
->streamHelper
->initStream($filename, 'rb');
Expand All @@ -90,15 +91,14 @@ public function import($userId, $exportPath) {
!== false
) {
$fileCachePath = $fileMetadata->getPath();
$pathToFileInExport = "$exportRootFilesPath/$fileCachePath";
$pathToFileInExport = Path::join($exportRootFilesPath, $fileCachePath);

if (!$this->filesystem->exists($pathToFileInExport)) {
throw new ImportException("File '$pathToFileInExport' not found in export but exists in metadata.json");
}

if ($fileMetadata->getType() === File::TYPE_FILE) {
$file = $userFolder->newFile($fileCachePath);

$src = \fopen($pathToFileInExport, "rb+");
if (!\is_resource($src)) {
throw new \RuntimeException("Couldn't read file in export $pathToFileInExport");
Expand All @@ -114,7 +114,6 @@ public function import($userId, $exportPath) {
\fclose($src);
\fclose($dst);

$file->putContent(\file_get_contents($pathToFileInExport));
$file->getStorage()->getCache()->update($file->getId(), [
'etag' => $fileMetadata->getETag(),
'permissions' => $fileMetadata->getPermissions()
Expand All @@ -124,7 +123,11 @@ public function import($userId, $exportPath) {
}

if ($fileMetadata->getType() === File::TYPE_FOLDER) {
$folder = $userFolder->newFolder($fileCachePath);
if ($fileMetadata->getPath() == '/') {
$folder = $userFolder;
} else {
$folder = $userFolder->newFolder($fileCachePath);
}
$folder->getStorage()->getCache()->update($folder->getId(), [
'etag' => $fileMetadata->getETag(),
'permissions' => $fileMetadata->getPermissions()
Expand Down
1 change: 1 addition & 0 deletions lib/Model/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
class File {
const TYPE_FOLDER = 'folder';
const TYPE_FILE = 'file';
const ROOT_FOLDER_PATH = '/files/';

private $type;
/** @var string */
Expand Down
41 changes: 0 additions & 41 deletions lib/Utilities/Iterators/Nodes/RecursiveNodeIteratorFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,45 +64,4 @@ public function getUserFolderRecursiveIterator($userId, $mode = \RecursiveIterat
$nodeIterator->addSkipCondition($conditionDifferentStorage);
return [new \RecursiveIteratorIterator($nodeIterator, $mode), $userFolder];
}

/**
* Returns an array containing a recursive iterator to iterate over the files of the user as the first
* element of the array, and the base Folder node used in the iterator as the second element. Something like:
* [RecursiveIteratorIterator, Folder]
* If the getUserFolderRecursiveIterator method will return an iterator over the files
* of the user (/<user>/files/), this iterator will iterate over that parent folder
* (/<user>/) so you could get access to trashbin and versions and maybe other directories
* related the to user.
* It will use a RecursiveIteratorIterator class wrapping a RecursiveNodeIterator class.
* This RecursiveNodeIterator will return \OCP\Files\Node elements
*
* Note that a SkipNodeConditionDifferentStorage is already set in the iterator in order to traverse
* only the primary storage, and also a SkipNodeConditionIgnorePath to skip some folders containing
* temporary information
*
* Consider to use something like:
* ```
* list($iterator, $baseFolder) = $factory->getUserFolderParentRecursiveIterator($userId);
* ```
*
* You can traverse the iterator like:
* ```
* foreach ($iterator as $key => $node) { .... }
* ```
* Note that the $key will always be the path of the node, the same as $node->getPath()
* @param string $userId the id of the user
* @param int $mode one of the \RecursiveIteratorIterator constants
* @return array a RecursiveIteratorIterator wrapping a RecursiveNodeIterator and the base Folder node
* @throws \OC\User\NoUserException (unhandled exception)
*/
public function getUserFolderParentRecursiveIterator($userId, $mode = \RecursiveIteratorIterator::SELF_FIRST) {
$userFolder = $this->rootFolder->getUserFolder($userId);
$parentFolder = $userFolder->getParent();
$nodeIterator = new RecursiveNodeIterator($parentFolder);
$conditionDifferentStorage = new SkipNodeConditionDifferentStorage($parentFolder->getStorage()->getId());
$conditionIgnorePaths = new SkipNodeConditionIgnorePath($parentFolder, ['/cache', '/thumbnails', '/uploads']);
$nodeIterator->addSkipCondition($conditionDifferentStorage);
$nodeIterator->addSkipCondition($conditionIgnorePaths);
return [new \RecursiveIteratorIterator($nodeIterator, $mode), $parentFolder];
}
}
60 changes: 60 additions & 0 deletions lib/Utilities/Path.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php
/**
* @author Ilja Neumann <[email protected]>
*
* @copyright Copyright (c) 2019, ownCloud GmbH
* @license GPL-2.0
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General,
* Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
namespace OCA\DataExporter\Utilities;

class Path {
const REGEX = '#/+#';

/**
* Joins paths, removes duplicate and adds missing slashes. Preservers
* double slashes in the scheme part of the path e.g vfs://foo/bar
*
* @return string
*/
public static function join() {
$paths = [];

foreach (\func_get_args() as $arg) {
if ($arg !== '') {
$paths[] = $arg;
}
}

if (\count($paths) === 0) {
return '';
}

$firstPart = $paths[0];
$path = \preg_replace(self::REGEX, '/', \join('/', $paths));
$scheme = \parse_url($firstPart, PHP_URL_SCHEME);
$hasScheme = \substr($firstPart, 0, \strlen("$scheme://")) === "$scheme://";
$slashWasRemoved = \substr($path, 0, \strlen("$scheme:/")) == "$scheme:/";

if ($hasScheme && $slashWasRemoved) {
$path = "$scheme://" . \substr($path, \strlen("$scheme:/"));
}

return $path;
}
}
8 changes: 4 additions & 4 deletions tests/acceptance/data/simpleExport/userfoo/files.jsonl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"type":"folder","path":"\/files","eTag":"5bc8867cc2375","permissions":31,"mtime":1565124588}
{"type":"folder","path":"\/files\/AFolder","eTag":"5bc8867cc2375","permissions":31,"mtime":1565124588}
{"type":"file","path":"\/files\/AFolder\/afile.txt","eTag":"533c8d4b4c45b62e68cc09e810db7a23","permissions":27,"mtime":1565124588}
{"type":"file","path":"\/files\/welcome.txt","eTag":"84131779d95429f06405840e136babc2","permissions":27,"mtime":1565124588}
{"type":"folder","path":"\/","eTag":"5bc8867cc2375","permissions":31,"mtime":1565124588}
{"type":"folder","path":"\/AFolder","eTag":"5bc8867cc2375","permissions":31,"mtime":1565124588}
{"type":"file","path":"\/AFolder\/afile.txt","eTag":"533c8d4b4c45b62e68cc09e810db7a23","permissions":27,"mtime":1565124588}
{"type":"file","path":"\/welcome.txt","eTag":"84131779d95429f06405840e136babc2","permissions":27,"mtime":1565124588}
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ Feature: An administrator wants to export the files of his user using
Scenario: An uploaded file should be contained in an export.
Given user "user0" uploads file with content "hello" to "testfile.txt" using the WebDAV API
When user "user0" is exported to path "/tmp/fooSomething" using the occ command
Then the last export should contain file "files/testfile.txt" with content "hello"
Then the last export should contain file "testfile.txt" with content "hello"

6 changes: 3 additions & 3 deletions tests/unit/Extractor/FilesExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public function testExportFile() {
}));

// iterator can return an array because will just need to traverse it
$this->iteratorFactory->method('getUserFolderParentRecursiveIterator')->willReturn([[$mockFile], $userFolderParent]);
$this->iteratorFactory->method('getUserFolderRecursiveIterator')->willReturn([[$mockFile], $userFolderParent]);

$this->filesExporter->export('usertest', '/tmp/randomF');
$content = \file_get_contents('/tmp/randomF/files/foo/bar.txt');
Expand All @@ -89,7 +89,7 @@ public function testExportFolder() {
}));

// iterator can return an array because will just need to traverse it
$this->iteratorFactory->method('getUserFolderParentRecursiveIterator')->willReturn([[$mockFolder], $userFolderParent]);
$this->iteratorFactory->method('getUserFolderRecursiveIterator')->willReturn([[$mockFolder], $userFolderParent]);

$this->filesystem->expects($this->once())
->method('mkdir')
Expand Down Expand Up @@ -134,7 +134,7 @@ public function testExportFileAndFolder() {
}));

// iterator can return an array because will just need to traverse it
$this->iteratorFactory->method('getUserFolderParentRecursiveIterator')
$this->iteratorFactory->method('getUserFolderRecursiveIterator')
->willReturn([[$mockFolder1, $mockFolder2, $mockFile1, $mockFile2], $userFolderParent]);

$this->filesystem->expects($this->exactly(2))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ public function testExtract() {
$mockFile2->method('getType')->willReturn(Node::TYPE_FILE);

$userFolderParent = $this->createMock(Folder::class);
$userFolderParent->method('getEtag')->willReturn('123qweasdzxc');
$userFolderParent->method('getMTime')->willReturn(1565074220);
$userFolderParent->method('getPermissions')->willReturn(31);
$userFolderParent->method('getType')->willReturn(Node::TYPE_FOLDER);
$userFolderParent->method('getRelativePath')
->will($this->returnCallback(function ($path) {
if (\strpos($path, '/usertest/') === 0) {
Expand All @@ -101,41 +105,49 @@ public function testExtract() {
}));

// iterator can return an array because will just need to traverse it
$this->iteratorFactory->method('getUserFolderParentRecursiveIterator')
$this->iteratorFactory->method('getUserFolderRecursiveIterator')
->willReturn([[$mockFolder1, $mockFolder2, $mockFile1, $mockFile2], $userFolderParent]);

$expectedFolder0 = new File();
$expectedFolder0->setPath('/')
->setEtag('123qweasdzxc')
->setMtime(1565074220)
->setPermissions(31)
->setType(File::TYPE_FOLDER);

$expectedFolder1 = new File();
$expectedFolder1->setPath('/files/foo')
$expectedFolder1->setPath('/foo')
->setEtag('123qweasdzxc')
->setMtime(1565074220)
->setPermissions(31)
->setType(File::TYPE_FOLDER);

$expectedFolder2 = new File();
$expectedFolder2->setPath('/files/foo/courses')
$expectedFolder2->setPath('/foo/courses')
->setEtag('zaqxswcde')
->setMtime(1565074223)
->setPermissions(31)
->setType(File::TYPE_FOLDER);

$expectedFile1 = new File();
$expectedFile1->setPath('/files/foo/courses/awesome qwerty')
$expectedFile1->setPath('/foo/courses/awesome qwerty')
->setEtag('poiulkjhmnbv')
->setMtime(1565074221)
->setPermissions(1)
->setType(File::TYPE_FILE);

$expectedFile2 = new File();
$expectedFile2->setPath('/files/foo/bar.txt')
$expectedFile2->setPath('/foo/bar.txt')
->setEtag('123456789')
->setMtime(1565074120)
->setPermissions(9)
->setType(File::TYPE_FILE);

$this->streamHelper
->expects($this->exactly(4))
->expects($this->exactly(5))
->method('writelnToStream')
->withConsecutive(
[$resource, $expectedFolder0],
[$resource, $expectedFolder1],
[$resource, $expectedFolder2],
[$resource, $expectedFile1],
Expand Down
Loading

0 comments on commit e0fa537

Please sign in to comment.