From e6219dd547ee747ac51ab1f46e295bdb537d2ca1 Mon Sep 17 00:00:00 2001 From: Eric Z Date: Tue, 15 Jan 2019 17:34:51 -0600 Subject: [PATCH] update to use gzip-compressed precompute features --- Makefile | 8 ++++---- audfprint_analyze.py | 16 ++++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 06d3c06..83d6745 100644 --- a/Makefile +++ b/Makefile @@ -33,14 +33,14 @@ test_onecore_precomp: precompdir ${AUDFPRINT} new --dbase fpdbase0.pklz precompdir/Nine_Lives/0* ${AUDFPRINT} new --dbase fpdbase1.pklz precompdir/Nine_Lives/1* ${AUDFPRINT} merge --dbase fpdbase1.pklz fpdbase0.pklz - ${AUDFPRINT} match --dbase fpdbase1.pklz precompdir/query.afpt + ${AUDFPRINT} match --dbase fpdbase1.pklz precompdir/query.afptz test_onecore_newmerge: precompdir ${AUDFPRINT} new --dbase fpdbase0.pklz precompdir/Nine_Lives/0* ${AUDFPRINT} new --dbase fpdbase1.pklz precompdir/Nine_Lives/1* rm -f fpdbase2.pklz ${AUDFPRINT} newmerge --dbase fpdbase2.pklz fpdbase0.pklz fpdbase1.pklz - ${AUDFPRINT} match --dbase fpdbase2.pklz precompdir/query.afpt + ${AUDFPRINT} match --dbase fpdbase2.pklz precompdir/query.afptz precompdir: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py rm -rf precompdir @@ -52,7 +52,7 @@ test_onecore_precomppk: precomppkdir ${AUDFPRINT} new --dbase fpdbase0.pklz precomppkdir/Nine_Lives/0* ${AUDFPRINT} new --dbase fpdbase1.pklz precomppkdir/Nine_Lives/1* ${AUDFPRINT} merge --dbase fpdbase1.pklz fpdbase0.pklz - ${AUDFPRINT} match --dbase fpdbase1.pklz precomppkdir/query.afpk + ${AUDFPRINT} match --dbase fpdbase1.pklz precomppkdir/query.afpkz rm -rf precomppkdir precomppkdir: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py @@ -72,7 +72,7 @@ test_mucore_precomp: precompdir_mu ${AUDFPRINT} new --dbase fpdbase_mu0.pklz --ncores 4 precompdir_mu/Nine_Lives/0* ${AUDFPRINT} new --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/Nine_Lives/1* ${AUDFPRINT} merge --dbase fpdbase_mu.pklz fpdbase_mu0.pklz - ${AUDFPRINT} match --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt + ${AUDFPRINT} match --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py rm -rf precompdir_mu diff --git a/audfprint_analyze.py b/audfprint_analyze.py index 78189c0..6e71e18 100644 --- a/audfprint_analyze.py +++ b/audfprint_analyze.py @@ -13,6 +13,7 @@ import os import struct # For reading/writing hashes to file import time # For glob2hashtable, localtester +import gzip # for compressed file writing import numpy as np import scipy.signal @@ -22,11 +23,12 @@ import stft + # ############### Globals ############### # # Special extension indicating precomputed fingerprint -PRECOMPEXT = '.afpt' +PRECOMPEXT = '.afptz' # A different precomputed fingerprint is just the peaks -PRECOMPPKEXT = '.afpk' +PRECOMPPKEXT = '.afpkz' def locmax(vec, indices=False): @@ -458,10 +460,12 @@ def ingest(self, hashtable, filename): PEAK_FMT = '<2i' PEAK_MAGIC = b'audfprintpeakV00' # 16 chars, FWIW +def is_compressed_ext(filename): + return filename[-1]=='z' def hashes_save(hashfilename, hashes): """ Write out a list of (time, hash) pairs as 32 bit ints """ - with open(hashfilename, 'wb') as f: + with gzip.open(hashfilename, 'wb') if is_compressed_ext(hashfilename) else open(hashfilename, 'wb') as f: f.write(HASH_MAGIC) for time_, hash_ in hashes: f.write(struct.pack(HASH_FMT, time_, hash_)) @@ -471,7 +475,7 @@ def hashes_load(hashfilename): """ Read back a set of hashes written by hashes_save """ hashes = [] fmtsize = struct.calcsize(HASH_FMT) - with open(hashfilename, 'rb') as f: + with gzip.open(hashfilename, 'rb') if is_compressed_ext(hashfilename) else open(hashfilename, 'wb') as f: magic = f.read(len(HASH_MAGIC)) if magic != HASH_MAGIC: raise IOError('%s is not a hash file (magic %s)' @@ -485,7 +489,7 @@ def hashes_load(hashfilename): def peaks_save(peakfilename, peaks): """ Write out a list of (time, bin) pairs as 32 bit ints """ - with open(peakfilename, 'wb') as f: + with gzip.open(peakfilename, 'wb') if is_compressed_ext(peakfilename) else open(peakfilename, 'wb') as f: f.write(PEAK_MAGIC) for time_, bin_ in peaks: f.write(struct.pack(PEAK_FMT, time_, bin_)) @@ -495,7 +499,7 @@ def peaks_load(peakfilename): """ Read back a set of (time, bin) pairs written by peaks_save """ peaks = [] fmtsize = struct.calcsize(PEAK_FMT) - with open(peakfilename, 'rb') as f: + with gzip.open(peakfilename, 'rb') if is_compressed_ext(peakfilename) else open(peakfilename, 'wb') as f: magic = f.read(len(PEAK_MAGIC)) if magic != PEAK_MAGIC: raise IOError('%s is not a peak file (magic %s)'