Maintenance.

data61 · Jul 9, 2024 · 78fe3d8 · 78fe3d8
1 parent b0dc2b3
commit 78fe3d8
Show file tree

Hide file tree

Showing 234 changed files with 4,265 additions and 1,359 deletions.
diff --git a/BMR/AndJob.cpp b/BMR/AndJob.cpp
@@ -15,7 +15,7 @@ int AndJob::run()
 #endif
 	__m128i* prf_output = new __m128i[PAD_TO_8(ProgramParty::s().get_n_parties())];
 	auto gate = gates.begin();
-	vector< GC::Secret<EvalRegister> >& S = *this->S;
+	auto& S = *this->S;
 	const vector<int>& args = *this->args;
 	int i_gate = 0;
 	for (size_t i = start; i < end; i += 4)

diff --git a/BMR/AndJob.h b/BMR/AndJob.h
@@ -15,7 +15,7 @@ using namespace std;
 
 class AndJob
 {
-	vector< GC::Secret<EvalRegister> >* S;
+	StackedVector< GC::Secret<EvalRegister> >* S;
 	const vector<int>* args;
 
 public:
@@ -25,7 +25,7 @@ class AndJob
 
 	AndJob() : S(0), args(0), start(0), end(0), gate_id(0) {}
 
-	void reset(vector<GC::Secret<EvalRegister> >& S, const vector<int>& args,
+	void reset(StackedVector<GC::Secret<EvalRegister> >& S, const vector<int>& args,
 			size_t start, gate_id_t gate_id, size_t n_gates, int n_parties)
 	{
 		this->S = &S;

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,22 @@
 The changelog explains changes pulled through from the private development repository. Bug fixes and small enhancements are committed between releases and not documented here.
 
+## 0.3.9 (July 9, 2024)
+
+- Inference with non-sequential PyTorch networks
+- SHA-3 for any input length (@hiddely)
+- Improved client facilities
+- Shuffling with malicious security for SPDZ-wise protocols by [Asharov et al.](https://ia.cr/2022/1595)
+- More reusable bytecode via in-thread calling facility
+- Recursive functions without return values
+- Fewer rounds for parallel matrix multiplications (@vincent-ehrmanntraut)
+- Optimized usage of SoftSpokenOT in semi-honest protocols
+- More integrity checks on storage in MAC-based protocols
+- Use C++17
+- Use glibc 2.18 for the binaries
+- Fixed security bugs: remotely caused buffer overflows (#1382)
+- Fixed security bug: Missing randomization before revealing to client
+- Fixed security bug: Bias in Rep3 secure shuffling
+
 ## 0.3.8 (December 14, 2023)
 
 - Functionality for multiple nodes per party

diff --git a/CONFIG b/CONFIG
@@ -106,7 +106,7 @@ else
 BOOST = -lboost_thread $(MY_BOOST)
 endif
 
-CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -I$(ROOT)/deps -pthread $(PROF) $(DEBUG) $(MOD) $(GF2N_LONG) $(PREP_DIR) $(SSL_DIR) $(SECURE) -std=c++11 -Werror
+CFLAGS += $(ARCH) $(MY_CFLAGS) $(GDEBUG) -Wextra -Wall $(OPTIM) -I$(ROOT) -I$(ROOT)/deps -pthread $(PROF) $(DEBUG) $(MOD) $(GF2N_LONG) $(PREP_DIR) $(SSL_DIR) $(SECURE) -std=c++17 -Werror
 CFLAGS += $(BREW_CFLAGS)
 CPPFLAGS = $(CFLAGS)
 LD = $(CXX)

diff --git a/Compiler/GC/instructions.py b/Compiler/GC/instructions.py
@@ -203,8 +203,10 @@ def dynamic_arg_format(cls, args):
     def add_usage(self, req_node):
         for i, n in self.bases(iter(self.args)):
             size = self.args[i + 1]
-            req_node.increment(('bit', 'triple'), size * (n - 3) // 2)
-            req_node.increment(('bit', 'mixed'), size)
+            n = (n - 3) // 2
+            req_node.increment(('bit', 'triple'), size * n)
+            if n > 1:
+                req_node.increment(('bit', 'mixed'), size * ((n + 63) // 64))
 
     def copy(self, size, subs):
         return type(self)(*self.get_new_args(size, subs))

diff --git a/Compiler/GC/types.py b/Compiler/GC/types.py
@@ -13,14 +13,19 @@
 from Compiler.types import vectorized_classmethod
 from Compiler.program import Tape, Program
 from Compiler.exceptions import *
-from Compiler import util, oram, floatingpoint, library
+from Compiler import util, oram, floatingpoint, library, comparison
 from Compiler import instructions_base
 import Compiler.GC.instructions as inst
 import operator
 import math
 import itertools
 from functools import reduce
 
+class _binary:
+    def reveal_to(self, *args, **kwargs):
+        raise CompilerError(
+            '%s does not support revealing to indivual players' % type(self))
+
 class bits(Tape.Register, _structure, _bit):
     n = 40
     unit = 64
@@ -149,6 +154,12 @@ def set_length(self, n):
         self.n = n
     def set_size(self, size):
         pass
+    def load_int(self, value):
+        n_limbs = math.ceil(self.n / self.unit)
+        for i in range(n_limbs):
+            self.conv_regint(min(self.unit, self.n - i * self.unit),
+                             self[i], regint(value % 2 ** self.unit))
+            value >>= self.unit
     def load_other(self, other):
         if isinstance(other, cint):
             assert(self.n == other.size)
@@ -236,12 +247,14 @@ def _new_by_number(self, i, size=1):
         return res
     def if_else(self, x, y):
         """
-        Vectorized oblivious selection::
+        Bit-wise oblivious selection::
 
             sb32 = sbits.get_type(32)
             print_ln('%s', sb32(3).if_else(sb32(5), sb32(2)).reveal())
 
-        This will output 1.
+        This will output 1 because it selects the two least
+        significant bits from 5 and the rest of the bits from 2.
+
         """
         return result_conv(x, y)(self & (x ^ y) ^ y)
     def zero_if_not(self, condition):
@@ -268,6 +281,9 @@ def copy_from_part(self, source, base, size):
                  self.bit_compose(source.bit_decompose()[base:base + size]))
     def vector_size(self):
         return self.n
+    @staticmethod
+    def size_for_mem():
+        return 1
 
 class cbits(bits):
     """ Clear bits register. Helper type with limited functionality. """
@@ -302,13 +318,6 @@ def conv(cls, other):
         else:
             return super(cbits, cls).conv(other)
     types = {}
-    def load_int(self, value):
-        n_limbs = math.ceil(self.n / self.unit)
-        tmp = regint(size=n_limbs)
-        for i in range(n_limbs):
-            tmp[i].load_int(value % 2 ** self.unit)
-            value >>= self.unit
-        self.load_other(tmp)
     def store_in_dynamic_mem(self, address):
         inst.stmsdci(self, cbits.conv(address))
     def clear_op(self, other, c_inst, ci_inst, op):
@@ -502,11 +511,7 @@ def load_int(self, value):
         if self.n <= 32:
             inst.ldbits(self, self.n, value)
         else:
-            size = math.ceil(self.n / self.unit)
-            tmp = regint(size=size)
-            for i in range(size):
-                tmp[i].load_int((value >> (i * 64)) % 2**64)
-            self.load_other(tmp)
+            bits.load_int(self, value)
     def load_other(self, other):
         if isinstance(other, cbits) and self.n == other.n:
             inst.convcbit2s(self.n, self, other)
@@ -675,7 +680,7 @@ def bit_adder(*args, **kwargs):
     def ripple_carry_adder(*args, **kwargs):
         return sbitint.ripple_carry_adder(*args, **kwargs)
 
-class sbitvec(_vec, _bit):
+class sbitvec(_vec, _bit, _binary):
     """ Vector of registers of secret bits, effectively a matrix of secret bits.
     This facilitates parallel arithmetic operations in binary circuits.
     Container types are not supported, use :py:obj:`sbitvec.get_type` for that.
@@ -732,15 +737,16 @@ def get_type(cls, n):
         :py:obj:`v` and the columns by calling :py:obj:`elements`.
         """
         class sbitvecn(cls, _structure):
-            @staticmethod
-            def malloc(size, creator_tape=None):
-                return sbit.malloc(size * n, creator_tape=creator_tape)
+            @classmethod
+            def malloc(cls, size, creator_tape=None):
+                return sbit.malloc(
+                    size * cls.mem_size(), creator_tape=creator_tape)
             @staticmethod
             def n_elements():
                 return 1
             @staticmethod
             def mem_size():
-                return n
+                return sbits.get_type(n).mem_size()
             @classmethod
             def get_input_from(cls, player, size=1, f=0):
                 """ Secret input from :py:obj:`player`. The input is decomposed
@@ -780,38 +786,28 @@ def __init__(self, other=None, size=None):
                         self.v = sbits.get_type(n)(other).bit_decompose()
                     assert len(self.v) == n
                     assert size is None or size == self.v[0].n
-            @vectorized_classmethod
-            def load_mem(cls, address):
-                size = instructions_base.get_global_vector_size()
-                if size not in (None, 1):
-                    assert isinstance(address, int) or len(address) == 1
-                    sb = sbits.get_type(size)
-                    return cls.from_vec(sb.bit_compose(
-                        sbit.load_mem(address + i + j * n) for j in range(size))
-                                        for i in range(n))
-                if not isinstance(address, int):
-                    v = [sbit.load_mem(x, size=n).v[0] for x in address]
-                    return cls(v)
+            @classmethod
+            def load_mem(cls, address, size=None):
+                if isinstance(address, int) or len(address) == 1:
+                    address = [address + i for i in range(size or 1)]
                 else:
-                    return cls.from_vec(sbit.load_mem(address + i)
-                                        for i in range(n))
+                    assert size == None
+                return cls(
+                    [sbits.get_type(n).load_mem(x) for x in address])
             def store_in_mem(self, address):
                 size = 1
                 for x in self.v:
                     if not util.is_constant(x):
                         size = max(size, x.n)
-                v = [sbits.get_type(size).conv(x) for x in self.v]
-                if not isinstance(address, int) and len(address) != 1:
-                    v = self.elements()
-                    assert len(v) == len(address)
-                    for x, y in zip(v, address):
-                        for i, xx in enumerate(x.bit_decompose(n)):
-                            xx.store_in_mem(y + i)
+                if isinstance(address, int):
+                    address = range(address, address + size)
+                elif len(address) == 1:
+                    address = [address + i * self.mem_size()
+                               for i in range(size)]
                 else:
-                    assert isinstance(address, int) or len(address) == 1
-                    for i in range(n):
-                        for j, x in enumerate(v[i].bit_decompose()):
-                            x.store_in_mem(address + i + j * n)
+                    assert size == len(address)
+                for x, dest in zip(self.elements(), address):
+                    x.store_in_mem(dest)
             @classmethod
             def two_power(cls, nn, size=1):
                 return cls.from_vec(
@@ -864,7 +860,7 @@ def __init__(self, elements=None, length=None, input_length=None):
             assert isinstance(elements, sint)
             if Program.prog.use_split():
                 x = elements.split_to_two_summands(length)
-                v = sbitint.carry_lookahead_adder(x[0], x[1], fewer_inv=True)
+                v = sbitint.bit_adder(x[0], x[1])
             else:
                 prog = Program.prog
                 if not prog.options.ring:
@@ -877,6 +873,7 @@ def __init__(self, elements=None, length=None, input_length=None):
                         length, prog.security)
                     prog.use_edabit(backup)
                     return
+                comparison.require_ring_size(length, 'A2B conversion')
                 l = int(Program.prog.options.ring)
                 r, r_bits = sint.get_edabit(length, size=elements.size)
                 c = ((elements - r) << (l - length)).reveal()
@@ -885,6 +882,8 @@ def __init__(self, elements=None, length=None, input_length=None):
                 x = sbitintvec.from_vec(r_bits) + sbitintvec.from_vec(cb)
                 v = x.v
             self.v = v[:length]
+        elif isinstance(elements, sbitvec):
+            self.v = elements.v
         elif elements is not None and not (util.is_constant(elements) and \
              elements == 0):
             self.v = sbits.trans(elements)
@@ -1347,13 +1346,19 @@ def elements(self):
     def __add__(self, other):
         if util.is_zero(other):
             return self
-        a, b = self.expand(other)
+        try:
+            a, b = self.expand(other)
+        except:
+            return NotImplemented
         v = sbitint.bit_adder(a, b)
         return self.get_type(len(v)).from_vec(v)
     __radd__ = __add__
     __sub__ = _bitint.__sub__
     def __rsub__(self, other):
-        a, b = self.expand(other)
+        try:
+            a, b = self.expand(other)
+        except:
+            return NotImplemented
         return self.from_vec(b) - self.from_vec(a)
     def __mul__(self, other):
         if isinstance(other, sbits):
@@ -1447,7 +1452,7 @@ def output(self):
         inst.print_float_plainb(v, cbits.get_type(32)(-self.f), cbits(0),
                                 cbits(0), cbits(0))
 
-class sbitfix(_fix):
+class sbitfix(_fix, _binary):
     """ Secret signed fixed-point number in one binary register.
     Use :py:obj:`set_precision()` to change the precision.
 
@@ -1515,7 +1520,7 @@ class cls(_fix):
         cls.set_precision(f, k)
         return cls._new(cls.int_type(other), k, f)
 
-class sbitfixvec(_fix, _vec):
+class sbitfixvec(_fix, _vec, _binary):
     """ Vector of fixed-point numbers for parallel binary computation.
 
     Use :py:obj:`set_precision()` to change the precision.

diff --git a/Compiler/allocator.py b/Compiler/allocator.py
@@ -76,7 +76,7 @@ def alloc(self, size):
             self.top += size
             self.limit = max(self.limit, self.top)
             if res >= REG_MAX:
-                raise RegisterOverflowError()
+                raise RegisterOverflowError(size)
             return res
 
     def free(self, base, size):
@@ -209,7 +209,8 @@ def dealloc_reg(self, reg, inst, free):
                 for x in itertools.chain(dup.duplicates, base.duplicates):
                     to_check.add(x)
 
-        if reg not in self.program.base_addresses:
+        if reg not in self.program.base_addresses \
+           and not isinstance(inst, call_arg):
             free.free(base)
         if inst.is_vec() and base.vector:
             self.defined[base] = inst
@@ -608,7 +609,8 @@ def keep_text_order(inst, n):
                             # so this threshold should lead to acceptable compile times even on slower processors.
                             first_factor_total_number_of_values = instr.args[12 * matmul_idx + 3] * instr.args[12 * matmul_idx + 4]
                             second_factor_total_number_of_values = instr.args[12 * matmul_idx + 4] * instr.args[12 * matmul_idx + 5]
-                            max_dependencies_per_matrix = 1500**2
+                            max_dependencies_per_matrix = \
+                                self.block.parent.program.budget
                             if first_factor_total_number_of_values > max_dependencies_per_matrix or second_factor_total_number_of_values > max_dependencies_per_matrix:
                                 if block.warn_about_mem and not block.parent.warned_about_mem:
                                     print('WARNING: Order of memory instructions not preserved due to long vector, errors possible')

diff --git a/Compiler/circuit.py b/Compiler/circuit.py
@@ -5,7 +5,7 @@
 
     make Programs/Circuits
 
-.. _`Bristol Fashion`: https://homes.esat.kuleuven.be/~nsmart/MPC
+.. _`Bristol Fashion`: https://nigelsmart.github.io/MPC-Circuits
 
 """
 import math
@@ -15,6 +15,7 @@
 from Compiler import util
 import itertools
 import struct
+import os
 
 class Circuit:
     """
@@ -47,7 +48,12 @@ class Circuit:
     """
 
     def __init__(self, name):
+        self.name = name
         self.filename = 'Programs/Circuits/%s.txt' % name
+        if not os.path.exists(self.filename):
+            if os.system('make Programs/Circuits'):
+                raise CompilerError('Cannot download circuit descriptions. '
+                                    'Make sure make and git are installed.')
         f = open(self.filename)
         self.functions = {}
 
@@ -57,8 +63,9 @@ def __call__(self, *inputs):
     def run(self, *inputs):
         n = inputs[0][0].n, get_tape()
         if n not in self.functions:
-            self.functions[n] = function_block(lambda *args:
-                                               self.compile(*args))
+            self.functions[n] = function_block(
+                lambda *args: self.compile(*args))
+            self.functions[n].name = '%s(%d)' % (self.name, inputs[0][0].n)
         flat_res = self.functions[n](*itertools.chain(*inputs))
         res = []
         i = 0
@@ -124,7 +131,7 @@ def compile(self, *all_inputs):
 
 def sha3_256(x):
     """
-    This function implements SHA3-256 for inputs of up to 1080 bits::
+    This function implements SHA3-256 for inputs of any length::
 
         from circuit import sha3_256
         a = sbitvec.from_vec([])
@@ -138,7 +145,8 @@ def sha3_256(x):
         for x in a, b, c, d, e, f, g, h:
             sha3_256(x).reveal_print_hex()
 
-    This should output the `test vectors
+    This should output the hashes of the above inputs, beginning with
+    the `test vectors
     <https://github.com/XKCP/XKCP/blob/master/tests/TestVectors/ShortMsgKAT_SHA3-256.txt>`_
     of SHA3-256 for 0, 8, 16, and 24 bits as well as the hash of the
     0 byte::