Skip to content

Commit

Permalink
Merge pull request #94 from internetarchive/adding_video_formats
Browse files Browse the repository at this point in the history
Adding video formats
  • Loading branch information
cdrini authored Dec 13, 2024
2 parents e206c9a + 3c311da commit 83bfc16
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 55 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9' ]
python-version: [ '3.12' ]
name: Python ${{ matrix.python-version }} sample
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
Expand Down
78 changes: 40 additions & 38 deletions iiify/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
bookdata = 'https://%s/BookReader/BookReaderJSON.php'
bookreader = "https://%s/BookReader/BookReaderImages.php"
URI_PRIFIX = "https://iiif.archive.org/iiif"

MAX_SCRAPE_LIMIT = 10_000
MAX_API_LIMIT = 1_000

class MaxLimitException(Exception):
pass

valid_filetypes = ['jpg', 'jpeg', 'png', 'gif', 'tif', 'jp2', 'pdf', 'tiff']
AUDIO_FORMATS = ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']
VIDEO_FORMATS = ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']

class IsCollection(Exception):
# Used for when we need to raise to the route handler from inside the manifest function
Expand Down Expand Up @@ -506,6 +507,35 @@ def addThumbnails(manifest, identifier, files):
if thumbnails:
manifest.thumbnail = thumbnails

def sortDerivatives(metadata, includeVtt=False):
"""
Sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
"""
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives and not isinstance(f['original'], list):
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if includeVtt and f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)

if includeVtt:
return (originals, derivatives, vttfiles)
else:
return (originals, derivatives)

def create_manifest3(identifier, domain=None, page=None):
# Get item metadata
metadata = requests.get('%s/metadata/%s' % (ARCHIVE, identifier)).json()
Expand Down Expand Up @@ -611,7 +641,6 @@ def create_manifest3(identifier, domain=None, page=None):
count += 1
elif mediatype == 'image':
(multiFile, format) = checkMultiItem(metadata)
print (f"Checking multiFile {multiFile} {format}")
if multiFile:
# Create multi file manifest
pageCount = 0
Expand All @@ -637,19 +666,11 @@ def create_manifest3(identifier, domain=None, page=None):
singleImage(metadata, identifier, manifest, uri)
elif mediatype == 'audio' or mediatype == 'etree':
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
for f in metadata['files']:
if f['source'] == 'derivative' and not isinstance(f['original'], list):
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

(originals, derivatives) = sortDerivatives(metadata)

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']]:

for file in [f for f in originals if f['format'] in AUDIO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand All @@ -663,7 +684,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['VBR MP3', '32Kbps MP3', '56Kbps MP3', '64Kbps MP3', '96Kbps MP3', '128Kbps MP3', 'MPEG-4 Audio', 'Flac', 'AIFF', 'Apple Lossless Audio', 'Ogg Vorbis', 'WAVE', '24bit Flac', 'Shorten']:
for format in AUDIO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Sound',
Expand All @@ -689,26 +710,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)

elif mediatype == "movies":
# sort the files into originals and derivatives, splitting the derivatives into buckets based on the original
originals = []
derivatives = {}
vttfiles = {}
for f in metadata['files']:
if f['source'] == 'derivative':
if f['original'] in derivatives:
derivatives[f['original']][f['format']] = f
else:
derivatives[f['original']] = {f['format']: f}
elif f['source'] == 'original':
originals.append(f)

if f['format'] == 'Web Video Text Tracks':
# Example: cruz-test.en.vtt and 34C3_-_International_Image_Interoperability_Framework_IIIF_Kulturinstitutionen_schaffen_interop-SvH4fbjOT0A.autogenerated.vtt
sourceFilename = re.sub(r'\.[a-zA-H-]*\.vtt', '', f['name'])
if sourceFilename not in vttfiles:
vttfiles[sourceFilename] = []

vttfiles[sourceFilename].append(f)
(originals, derivatives, vttfiles) = sortDerivatives(metadata, includeVtt=True)

if 'access-restricted-item' in metadata['metadata'] and metadata['metadata']['access-restricted-item']:
# this is a news item so has to be treated differently
Expand All @@ -723,7 +725,7 @@ def create_manifest3(identifier, domain=None, page=None):
filedata = file

# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 HD', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack','QuickTime']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -768,7 +770,7 @@ def create_manifest3(identifier, domain=None, page=None):
manifest.add_item(c)
else:
# create the canvases for each original
for file in [f for f in originals if f['format'] in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']]:
for file in [f for f in originals if f['format'] in VIDEO_FORMATS]:
normalised_id = file['name'].rsplit(".", 1)[0]
slugged_id = normalised_id.replace(" ", "-")
c_id = f"{URI_PRIFIX}/{identifier}/{slugged_id}/canvas"
Expand Down Expand Up @@ -808,7 +810,7 @@ def create_manifest3(identifier, domain=None, page=None):
if file['name'] in derivatives:
body = Choice(items=[])
# add the choices in order per https://github.com/ArchiveLabs/iiif.archivelab.org/issues/77#issuecomment-1499672734
for format in ['MPEG4', 'h.264 MPEG4', '512Kb MPEG4', 'HiRes MPEG4', 'MPEG2', 'h.264', 'Matroska', 'Ogg Video', 'Ogg Theora', 'WebM', 'Windows Media', 'Cinepack']:
for format in VIDEO_FORMATS:
if format in derivatives[file['name']]:
r = ResourceItem(id=f"https://archive.org/download/{identifier}/{derivatives[file['name']][format]['name'].replace(' ', '%20')}",
type='Video',
Expand Down
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
setup.py
~~~~~~~~
iiify IIIF2 web server
iiify IIIF web server
:copyright: (c) 2015 by mek.
:license: see LICENSE for more details.
Expand Down Expand Up @@ -37,16 +37,15 @@ def find_version(*file_paths):
setup(
name='iiify',
version=find_version("iiify", "__init__.py"),
description='An implementation of the IIIF Image API 2.0 Specification ',
description='An implementation of the IIIF Image API 2 and 3 Specification ',
long_description=read('README.md'),
classifiers=[
"Development Status :: 3 - Alpha",
"Environment :: Web Environment",
"Intended Audience :: Developers",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP",
],
author='mek',
Expand Down
3 changes: 3 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os

import unittest
from flask.testing import FlaskClient
from iiify.app import app

class TestAnnotations(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v3_manifest_has_annotations(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -9,6 +8,7 @@
class TestBasic(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)


Expand Down
4 changes: 3 additions & 1 deletion tests/test_cantaloupe_resolver.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from iiify.resolver import cantaloupe_resolver

class TestCantaloupeResolver(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"

def test_single_image(self):
cid = cantaloupe_resolver("img-8664_202009")
self.assertEqual(cid, "img-8664_202009%2fIMG_8664.jpg")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
os.environ["FLASK_ENV"] = "testing"

import unittest
from flask.testing import FlaskClient
from iiify.app import app
class TestCollections(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_ENV"] = "testing"
self.test_app = FlaskClient(app)

def test_v3_collection(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_images.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestImages(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_ENV"] = "testing"
self.test_app = FlaskClient(app)

def test_v3_resolving(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_linking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestLinking(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def convertListToHash(self, items):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manifests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestManifests(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_no_version(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_manifests_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from flask.testing import FlaskClient
Expand All @@ -8,6 +7,7 @@
class TestManifests(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v2_image_manifest(self):
Expand Down
4 changes: 3 additions & 1 deletion tests/test_resolver.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
from iiify.resolver import purify_domain, collection, manifest_page

class TestResolver(unittest.TestCase):
def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"

def test_purify(self):
domain = purify_domain("https://example.org/iiif/")
self.assertEqual(domain, "https://example.org/iiif/")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
os.environ["FLASK_CACHE_DISABLE"] = "true"

import unittest
import math
Expand All @@ -9,6 +8,7 @@
class TestVideo(unittest.TestCase):

def setUp(self) -> None:
os.environ["FLASK_CACHE_DISABLE"] = "true"
self.test_app = FlaskClient(app)

def test_v3_single_video_manifest(self):
Expand Down

0 comments on commit 83bfc16

Please sign in to comment.