From e6219dd547ee747ac51ab1f46e295bdb537d2ca1 Mon Sep 17 00:00:00 2001
From: Eric Z <ezavesky@research.att.com>
Date: Tue, 15 Jan 2019 17:34:51 -0600
Subject: [PATCH] update to use gzip-compressed precompute features

---
 Makefile             |  8 ++++----
 audfprint_analyze.py | 16 ++++++++++------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 06d3c06..83d6745 100644
--- a/Makefile
+++ b/Makefile
@@ -33,14 +33,14 @@ test_onecore_precomp: precompdir
 	${AUDFPRINT} new --dbase fpdbase0.pklz precompdir/Nine_Lives/0*
 	${AUDFPRINT} new --dbase fpdbase1.pklz precompdir/Nine_Lives/1*
 	${AUDFPRINT} merge --dbase fpdbase1.pklz fpdbase0.pklz
-	${AUDFPRINT} match --dbase fpdbase1.pklz precompdir/query.afpt
+	${AUDFPRINT} match --dbase fpdbase1.pklz precompdir/query.afptz
 
 test_onecore_newmerge: precompdir
 	${AUDFPRINT} new --dbase fpdbase0.pklz precompdir/Nine_Lives/0*
 	${AUDFPRINT} new --dbase fpdbase1.pklz precompdir/Nine_Lives/1*
 	rm -f fpdbase2.pklz
 	${AUDFPRINT} newmerge --dbase fpdbase2.pklz fpdbase0.pklz fpdbase1.pklz
-	${AUDFPRINT} match --dbase fpdbase2.pklz precompdir/query.afpt
+	${AUDFPRINT} match --dbase fpdbase2.pklz precompdir/query.afptz
 
 precompdir: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py
 	rm -rf precompdir
@@ -52,7 +52,7 @@ test_onecore_precomppk: precomppkdir
 	${AUDFPRINT} new --dbase fpdbase0.pklz precomppkdir/Nine_Lives/0*
 	${AUDFPRINT} new --dbase fpdbase1.pklz precomppkdir/Nine_Lives/1*
 	${AUDFPRINT} merge --dbase fpdbase1.pklz fpdbase0.pklz
-	${AUDFPRINT} match --dbase fpdbase1.pklz precomppkdir/query.afpk
+	${AUDFPRINT} match --dbase fpdbase1.pklz precomppkdir/query.afpkz
 	rm -rf precomppkdir
 
 precomppkdir: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py
@@ -72,7 +72,7 @@ test_mucore_precomp: precompdir_mu
 	${AUDFPRINT} new --dbase fpdbase_mu0.pklz --ncores 4 precompdir_mu/Nine_Lives/0*
 	${AUDFPRINT} new --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/Nine_Lives/1*
 	${AUDFPRINT} merge --dbase fpdbase_mu.pklz fpdbase_mu0.pklz
-	${AUDFPRINT} match --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt precompdir_mu/query.afpt
+	${AUDFPRINT} match --dbase fpdbase_mu.pklz --ncores 4 precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz precompdir_mu/query.afptz
 
 precompdir_mu: audfprint.py audfprint_analyze.py audfprint_match.py hash_table.py
 	rm -rf precompdir_mu
diff --git a/audfprint_analyze.py b/audfprint_analyze.py
index 78189c0..6e71e18 100644
--- a/audfprint_analyze.py
+++ b/audfprint_analyze.py
@@ -13,6 +13,7 @@
 import os
 import struct  # For reading/writing hashes to file
 import time  # For glob2hashtable, localtester
+import gzip  # for compressed file writing
 
 import numpy as np
 import scipy.signal
@@ -22,11 +23,12 @@
 import stft
 
 
+
 # ############### Globals ############### #
 # Special extension indicating precomputed fingerprint
-PRECOMPEXT = '.afpt'
+PRECOMPEXT = '.afptz'
 # A different precomputed fingerprint is just the peaks
-PRECOMPPKEXT = '.afpk'
+PRECOMPPKEXT = '.afpkz'
 
 
 def locmax(vec, indices=False):
@@ -458,10 +460,12 @@ def ingest(self, hashtable, filename):
 PEAK_FMT = '<2i'
 PEAK_MAGIC = b'audfprintpeakV00'  # 16 chars, FWIW
 
+def is_compressed_ext(filename):
+    return filename[-1]=='z'
 
 def hashes_save(hashfilename, hashes):
     """ Write out a list of (time, hash) pairs as 32 bit ints """
-    with open(hashfilename, 'wb') as f:
+    with gzip.open(hashfilename, 'wb') if is_compressed_ext(hashfilename) else open(hashfilename, 'wb') as f:
         f.write(HASH_MAGIC)
         for time_, hash_ in hashes:
             f.write(struct.pack(HASH_FMT, time_, hash_))
@@ -471,7 +475,7 @@ def hashes_load(hashfilename):
     """ Read back a set of hashes written by hashes_save """
     hashes = []
     fmtsize = struct.calcsize(HASH_FMT)
-    with open(hashfilename, 'rb') as f:
+    with gzip.open(hashfilename, 'rb') if is_compressed_ext(hashfilename) else open(hashfilename, 'wb') as f:
         magic = f.read(len(HASH_MAGIC))
         if magic != HASH_MAGIC:
             raise IOError('%s is not a hash file (magic %s)'
@@ -485,7 +489,7 @@ def hashes_load(hashfilename):
 
 def peaks_save(peakfilename, peaks):
     """ Write out a list of (time, bin) pairs as 32 bit ints """
-    with open(peakfilename, 'wb') as f:
+    with gzip.open(peakfilename, 'wb') if is_compressed_ext(peakfilename) else open(peakfilename, 'wb') as f:
         f.write(PEAK_MAGIC)
         for time_, bin_ in peaks:
             f.write(struct.pack(PEAK_FMT, time_, bin_))
@@ -495,7 +499,7 @@ def peaks_load(peakfilename):
     """ Read back a set of (time, bin) pairs written by peaks_save """
     peaks = []
     fmtsize = struct.calcsize(PEAK_FMT)
-    with open(peakfilename, 'rb') as f:
+    with gzip.open(peakfilename, 'rb') if is_compressed_ext(peakfilename) else open(peakfilename, 'wb') as f:
         magic = f.read(len(PEAK_MAGIC))
         if magic != PEAK_MAGIC:
             raise IOError('%s is not a peak file (magic %s)'