diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py new file mode 100644 index 0000000..24c139e --- /dev/null +++ b/pytorch/SetFunction.py @@ -0,0 +1,66 @@ +from typing import Set, List, Tuple +import numpy as np +import torch +import torch.nn as nn +import numpy as np +import random +from pytorch.optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer +from pytorch.optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer +from pytorch.optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer +from pytorch.optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer + + +class SetFunction(nn.Module): + def __init__(self): + pass + + def evaluate(self, X: Set[int]) -> float: + return self.evaluate(X) + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.evaluate_with_memoization(X) + + def marginal_gain(self, X: Set[int], item: int) -> float: + return self.marginal_gain(X, item) + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + return self.marginal_gain_with_memoization(X, item) + + def update_memoization(self, X: Set[int], item: int) -> None: + return self.update_memoization(X, item) + + + def get_effective_ground_set(self) -> Set[int]: + return self.get_effective_ground_set() + + def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool, + costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]: + optimizer = self._get_optimizer(optimizer) + if optimizer: + return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy) + else: + print("Invalid Optimizer") + return [] + + def _get_optimizer(self, optimizer_name: str): + if optimizer_name == "NaiveGreedy": + return NaiveGreedyOptimizer() + # define all optimizer classed into files + elif optimizer_name == "LazyGreedy": + return LazyGreedyOptimizer() + elif optimizer_name == "StochasticGreedy": + return StochasticGreedyOptimizer() + elif optimizer_name == "LazierThanLazyGreedy": + return LazierThanLazyGreedyOptimizer() + else: + return None + + def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int], + partial: bool, lambda_: float) -> None: + self.cluster_init(n, k_dense, ground, partial, lambda_) + + def set_memoization(self, X: Set[int]) -> None: + self.set_memoization(X) + + def clear_memoization(self) -> None: + self.clear_memoization() diff --git a/pytorch/__init__.py b/pytorch/__init__.py new file mode 100644 index 0000000..b8f5042 --- /dev/null +++ b/pytorch/__init__.py @@ -0,0 +1,5 @@ +# /pytorch/__init__.py +from .SetFunction import SetFunction + +from .optimizer import * +from .submod import * \ No newline at end of file diff --git a/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py new file mode 100644 index 0000000..86e355c --- /dev/null +++ b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py @@ -0,0 +1,120 @@ +import random +import math + +class LazierThanLazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + @staticmethod + def print_sorted_set(sorted_set): + print("[", end="") + for val, elem in sorted_set: + print(f"({val}, {elem}), ", end="") + print("]") + + def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False, + epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False): + greedy_vector = [] + greedy_set = set() + + if costs is None: + greedy_vector.reserve(budget) + greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(remaining_set) + print(f"Num elements in ground set = {len(remaining_set)}") + print("Starting the LazierThanLazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {random_set}") + print("Now running lazy greedy on the random set") + + candidate_id = None + candidate_val = None + new_candidate_bound = None + + # Compute gains only for the elements in the remaining set + gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem) + for elem in remaining_set] + + for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))): + if elem in random_set and elem not in greedy_set: # Check if the element is not already selected + if verbose: + print(f"Checking {elem}...") + candidate_id = elem + candidate_val = val + new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False) + if verbose: + print(f"Updated gain as per updated greedy set = {new_candidate_bound}") + next_elem = gains[j + 1] if j + 1 < len(gains) else None + if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'): + if verbose: + print("..better than next best upper bound, " + "selecting...") + best_id = candidate_id + best_val = new_candidate_bound + break + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + remaining_set.remove(best_id) + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", greedy_set) + + if show_progress: + percent = int(((iter_count + 1.0) / N) * 100) + if percent >= display_next: + print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="") + print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + iter_count += 1 + + i += 1 + + return greedy_vector diff --git a/pytorch/optimizer/LazyGreedyOptimizer.py b/pytorch/optimizer/LazyGreedyOptimizer.py new file mode 100644 index 0000000..45d7590 --- /dev/null +++ b/pytorch/optimizer/LazyGreedyOptimizer.py @@ -0,0 +1,97 @@ +import torch +import heapq + +class LazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain, + verbose, show_progress, costs, cost_sensitive_greedy): + greedy_vector = [] + greedy_set = set() + + # if not costs: + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the lazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + + container = [] + heapq.heapify(container) + max_heap = container + + if cost_sensitive_greedy: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem] + heapq.heappush(max_heap, (-gain, elem)) + else: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + heapq.heappush(max_heap, (-gain, elem)) + + if verbose: + print("Max heap constructed") + + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0 and max_heap: + current_max = heapq.heappop(max_heap) + current_max_gain, current_max_elem = -current_max[0], current_max[1] + + if verbose: + print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}") + + new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False) + + if verbose: + print(f"newMaxBound: {new_max_bound}") + + if new_max_bound >= -max_heap[0][0]: + if (new_max_bound < 0 and stop_if_negative_gain) or \ + (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, current_max_elem) + greedy_set.add(current_max_elem) + greedy_vector.append((current_max_elem, new_max_bound)) + rem_budget -= 1 + + if verbose: + print(f"Added element {current_max_elem} and the gain is {new_max_bound}") + print("Updated greedySet:", greedy_set) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end=f" {percent}% [Iteration {iter + 1} of {N}]") + display_next += step + + iter += 1 + else: + heapq.heappush(max_heap, (-new_max_bound, current_max_elem)) + + return greedy_vector diff --git a/pytorch/optimizer/NaiveGreedyOptimizer.py b/pytorch/optimizer/NaiveGreedyOptimizer.py new file mode 100644 index 0000000..728f16a --- /dev/null +++ b/pytorch/optimizer/NaiveGreedyOptimizer.py @@ -0,0 +1,90 @@ +import torch +import random +from typing import List, Tuple, Set + +class NaiveGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize( + self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy + ): + greedy_vector = [] + greedy_set = set() + if not costs: + # greedy_vector = [None] * budget + greedy_set = set() + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + #print(ground_set) + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the naive greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + best_id = None + best_val = float("-inf") + + for i in ground_set: + if i in greedy_set: + continue + gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) + # print(gain) + if verbose: + print(f"Gain of {i} is {gain}") + + if gain > best_val: + best_id = i + best_val = gain + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stopIfNegativeGain) or ( + self.equals(best_val, 0, 1e-5) and stop_if_zero_gain + ): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print(f"Updated greedy set: {greedy_set}") + + if show_progress: + percent = int((iter_count + 1.0) / N * 100) + + if percent >= display_next: + print( + f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end="", + ) + print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + + iter_count += 1 + + return greedy_vector diff --git a/pytorch/optimizer/StochasticGreedyOptimizer.py b/pytorch/optimizer/StochasticGreedyOptimizer.py new file mode 100644 index 0000000..0f70547 --- /dev/null +++ b/pytorch/optimizer/StochasticGreedyOptimizer.py @@ -0,0 +1,104 @@ +import random +from typing import List, Tuple, Set +import math +import sys + +class StochasticGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1: float, val2: float, eps: float) -> bool: + return abs(val1 - val2) < eps + + def maximize(self, f_obj, budget: float, stop_if_zero_gain: bool, + stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, + show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: + # TODO: Implement handling of equal guys and different sizes of each item later + # TODO: Implement cost-sensitive selection + + greedy_vector = [] + greedy_set = set() + + # if not costs: + # # Every element is of the same size, budget corresponds to cardinality + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(" ".join(map(str, remaining_set))) + print(f"Num elements in groundset = {len(remaining_set)}") + print("Starting the stochastic greedy algorithm") + print("Initial greedy set:") + print(" ".join(map(str, greedy_set))) + + f_obj.clear_memoization() + random.seed(1) + best_id = -1 + best_val = -1 * float('inf') + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {list(random_set)}") + print("Now running naive greedy on the random set") + + best_id = -1 + best_val = -1 * float('inf') + + for elem in random_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + if gain > best_val: + best_id = elem + best_val = gain + + if verbose: + if best_id == -1: + raise ValueError("Nobody had greater gain than minus infinity!!") + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + remaining_set.remove(best_id) + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", " ".join(map(str, greedy_set))) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") + print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") + sys.stdout.flush() + display_next += step + iter += 1 + + i += 1 + + return greedy_vector diff --git a/pytorch/optimizer/__init__.py b/pytorch/optimizer/__init__.py new file mode 100644 index 0000000..4a1cbb3 --- /dev/null +++ b/pytorch/optimizer/__init__.py @@ -0,0 +1,5 @@ +# /pytorch/optimizer/__init__.py +from .LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer +from .LazyGreedyOptimizer import LazyGreedyOptimizer +from .NaiveGreedyOptimizer import NaiveGreedyOptimizer +from .StochasticGreedyOptimizer import StochasticGreedyOptimizer diff --git a/pytorch/submod/DisparityMin.py b/pytorch/submod/DisparityMin.py new file mode 100644 index 0000000..27a4d2b --- /dev/null +++ b/pytorch/submod/DisparityMin.py @@ -0,0 +1,192 @@ +import numpy as np +import scipy +from helper import * +from ..SetFunction import SetFunction + +class DisparityMinFunction(SetFunction): + + def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None): + super(DisparityMinFunction, self).__init__() + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_content = None + self.effective_ground_set = None + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data.tolist()), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense()) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.cpp_sijs) + self.cpp_sijs=l + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentMin = 0 + + if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) + self.cpp_sijs = {} + self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) + self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val n, arr_val arr_count arr_col + if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0): + raise Exception("Error: Empty/Corrupt sparse similarity kernel") + self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col']) + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentMin = 0 + + + + def evaluate(self, X: Set[int]) -> float: + effective_X = X + if len(effective_X) == 0 or len(effective_X) == 1: + return 0.0 + if self.mode == 'dense': + return get_min_dense(effective_X, self) + elif self.mode == 'sparse': + return get_min_sparse(effective_X, self) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.currentMin + + def get_effective_ground_set(self) -> Set[int]: + + return self.effective_ground_set + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_X = X + + if item in effective_X: + return 0.0 + + if item not in self.effective_ground_set: + return 0.0 + + min_val = 1.0 if len(effective_X) == 1 else self.currentMin + + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < min_val and elem != item: + min_val = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1 - self.sparse_kernel.get_val(elem, item) < min_val and elem != item: + min_val = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return min_val - self.currentMin + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + effective_X = X + + if enable_checks and item in effective_X: + return 0.0 + + if False and item not in self.effective_ground_set: + return 0.0 + + min_val = 1.0 if len(effective_X) == 1 else self.currentMin + + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < min_val and elem != item: + min_val = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1-self.sparse_kernel.get_val(item, elem) and elem!=item: + min = 1-self.sparse_kernel.get_val(item,elem) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return min_val - self.currentMin + + def update_memoization(self, X: Set[int], item: int) -> None: + effective_X = X + + if item in effective_X: + return + + if item not in self.effective_ground_set: + return + + if len(effective_X) == 1: + if self.mode == 'dense': + for elem in effective_X: + self.currentMin = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + self.currentMin = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + else: + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < self.currentMin and elem != item: + self.currentMin = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1 - self.sparse_kernel.get_val(elem, item) < self.currentMin and elem != item: + self.currentMin = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def clear_memoization(self) -> None: + self.currentMin = 0.0 + + def set_memoization(self, X: Set[int]) -> None: + self.currentMin = self.evaluate(X) diff --git a/pytorch/submod/DisparitySum.py b/pytorch/submod/DisparitySum.py new file mode 100644 index 0000000..efb9f1d --- /dev/null +++ b/pytorch/submod/DisparitySum.py @@ -0,0 +1,175 @@ +from helper import * +import numpy as np +import scipy +from ..SetFunction import SetFunction + +class DisparitySumFunction(SetFunction): + + def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None): + super(DisparitySumFunction, self).__init__() + + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_content = None + self.effective_ground_set = None + + + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + + + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense()) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.num_neighbors = 0 + self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + + self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.cpp_sijs) + self.cpp_sijs=l + + + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentSum = 0 + + + + if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) + self.cpp_sijs = {} + self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) + self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val + if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0): + raise Exception("Error: Empty/Corrupt sparse similarity kernel") + self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col']) + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentSum = 0 + + + def evaluate(self, X: Set[int]) -> float: + effective_X = X + if len(effective_X) == 0 : + return 0.0 + if self.mode == 'dense': + return get_sum_dense(effective_X, self) + elif self.mode == 'sparse': + return get_sum_sparse(effective_X, self) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.currentSum + + def get_effective_ground_set(self) -> Set[int]: + return self.effective_ground_set + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_X = X + gain = 0.0 + + if item in effective_X: + return 0.0 + + if item not in self.effective_ground_set: + return 0.0 + + + if self.mode == 'dense': + for elem in effective_X: + gain += (1 - self.cpp_sijs[elem][item]) + elif self.mode == 'sparse': + for elem in effective_X: + gain += (1 - self.sparse_kernel.get_val(item, elem)) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return gain + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + effective_X = X + gain =0.0 + + if enable_checks and item in effective_X: + return 0.0 + + if False and item not in self.effective_ground_set: + return 0.0 + + + + if self.mode == 'dense': + for elem in effective_X: + gain += (1 - self.cpp_sijs[elem][item]) + elif self.mode == 'sparse': + for elem in effective_X: + gain += (1 - self.sparse_kernel.get_val(item, elem)) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return gain + + def update_memoization(self, X: Set[int], item: int) -> None: + + + self.currentSum += self.marginal_gain(X, item) + + + def clear_memoization(self) -> None: + self.currentSum = 0.0 + + def set_memoization(self, X: Set[int]) -> None: + self.currentSum = self.evaluate(X) diff --git a/pytorch/submod/FacilityLocation.py b/pytorch/submod/FacilityLocation.py new file mode 100644 index 0000000..1a5a155 --- /dev/null +++ b/pytorch/submod/FacilityLocation.py @@ -0,0 +1,431 @@ +import numpy as np +import scipy +from scipy import sparse +from helper import * +from ..SetFunction import SetFunction + +class FacilityLocationFunction(SetFunction): + def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, data_rep=None, num_clusters=None, cluster_labels=None, metric="cosine", num_neighbors=None, + dense_kernel = None, data_master = None, create_dense_cpp_kernel_in_python = True, partial = False, seperate_master = False): + self.n = n + self.n_rep = n_rep + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.partial = partial + self.data_rep = data_rep + self.num_neighbors = num_neighbors + self.separate_rep = separate_rep + self.clusters = None + self.cluster_sijs = None + self.cluster_map = None + self.cluster_labels = cluster_labels + self.num_clusters = num_clusters + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_ground_sub = None + self.cpp_content = None + self.effective_ground = None + self.seperate_master = seperate_master + self.dense_kernel = dense_kernel + self.data_master = data_master + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse', 'clustered']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'") + + if self.separate_rep == True: + if self.n_rep is None or self.n_rep <= 0: + raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") + if self.mode != "dense": + raise Exception("Only dense mode supported if separate_rep = True") + + if self.mode == "clustered": + if type(self.cluster_labels) != type(None) and (self.num_clusters is None or self.num_clusters <= 0): + raise Exception("ERROR: Positive number of clusters must be provided in clustered mode when cluster_labels is provided") + if type(self.cluster_labels) == type(None) and self.num_clusters is not None and self.num_clusters <= 0: + raise Exception("Invalid number of clusters provided") + if type(self.cluster_labels) != type(None) and len(self.cluster_labels) != self.n: + raise Exception("ERROR: cluster_labels's size is NOT same as ground set size") + if type(self.cluster_labels) != type(None) and not all(ele >= 0 and ele <= self.num_clusters-1 for ele in self.cluster_labels): + raise Exception("Cluster IDs/labels contain invalid values") + + if type(self.sijs) != type(None): + if create_dense_cpp_kernel_in_python == False: + raise Exception("ERROR: create_dense_cpp_kernel_in_python is to be set to False ONLY when a similarity kernel is not provided and a CPP kernel is desired to be created in CPP") + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if self.separate_rep is None: + raise Exception("ERROR: separate_rep bool must be specified with custom dense kernel") + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + + if self.separate_rep == True: + if np.shape(self.sijs)[1] != self.n or np.shape(self.sijs)[0] != self.n_rep: + raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given kernel") + else: + if np.shape(self.sijs)[0] != self.n or np.shape(self.sijs)[1] != self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + + if type(self.data) != type(None) or type(self.data_rep) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + else: + if type(self.data) != type(None): + if self.separate_rep == True: + if type(self.data_rep) == type(None): + raise Exception("Represented data matrix not given") + if np.shape(self.data)[0] != self.n or np.shape(self.data_rep)[0] != self.n_rep: + raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix") + else: + if type(self.data_rep) != type(None): + print("WARNING: Represented data matrix not required but given, will be ignored.") + if np.shape(self.data)[0] != self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "clustered": + self.clusters, self.cluster_sijs, self.cluster_map = create_cluster_kernels(self.data.tolist(), self.metric, self.cluster_labels, self.num_clusters) + else: + if self.separate_rep == True: + if create_dense_cpp_kernel_in_python == True: + self.sijs = np.array(create_kernel_NS(self.data.tolist(), self.data_rep.tolist(), self.metric)) + else: + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + if create_dense_cpp_kernel_in_python == True: + pass + # self.sijs = np.array(create_square_kernel_dense(self.data.tolist(), self.metric)) + else: + self.cpp_content = np.array(create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + self.sijs = sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + # self.cpp_ground_sub = {-1} + + if separate_rep == None: + self.separate_rep = False + + elif self.mode == "sparse": + self.cpp_sijs = {} + self.cpp_sijs["arr_val"] = self.sijs.data.tolist() + self.cpp_sijs["arr_count"] = self.sijs.indptr.tolist() + self.cpp_sijs["arr_col"] = self.sijs.indices.tolist() + # self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs["arr_val"], self.cpp_sijs["arr_count"], self.cpp_sijs["arr_col"]) + elif self.mode == "clustered": + l_temp = [] + for el in self.cluster_sijs: + temp = el.tolist() + if isinstance(temp[0], int) or isinstance(temp[0], float): + l = [] + l.append(temp) + temp = l + l_temp.append(temp) + self.cluster_sijs = l_temp + + + if self.mode == 'dense': + if self.dense_kernel == None: + self.dense_constructor_no_kernel(n = self.n, data = self.data, data_master = self.data_master) ## dense mode with no dense_kernel + elif self.dense_kernel != None: + self.dense_constructor(n = self.n, dense_kernel = self.dense_kernel, ground = self.data, partial = self.partial, separate_master = self.separate_master) ## dense mode with dense_kernel + ### other modes are remaining + elif self.mode == 'sparse': + pass + elif self.mode == 'clustered': + pass + + self.effective_ground = self.get_effective_ground_set() + + + def dense_constructor(self, n, dense_kernel, partial = False, ground = None, separate_master = False): + self.n = n + self.mode = 'dense' + self.dense_kernel = dense_kernel + self.partial = partial + self.separate_master = separate_master + + if partial: + self.effective_ground_set = ground + else: + self.effective_ground_set = set(range(n)) + + self.num_effective_groundset = len(self.effective_ground_set) + + if separate_master: + self.n_master = len(dense_kernel) + self.master_set = set(range(self.n_master)) + else: + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + if partial: + self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)} + + # Constructor for dense mode (kernel not supplied) + def dense_constructor_no_kernel(self, n, data, data_master, separate_master = False, metric = 'cosine'): + if separate_master: + self.dense_kernel = create_kernel_NS(data, data_master, metric) + else: + self.dense_kernel = create_square_kernel_dense(data, metric) + + self.mode = 'dense' + self.partial = False + + self.n = n + self.separate_master = separate_master + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + if separate_master: + self.n_master = len(self.dense_kernel) + self.master_set = set(range(self.n_master)) + else: + self.n_master = n + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + # Constructor for sparse mode + def sparse_constructor(self, n, arr_val, arr_count, arr_col): + self.n = n + self.mode = 'sparse' + self.partial = False + self.separate_master = False + + self.sparse_kernel = self.SparseSim(arr_val, arr_count, arr_col) + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + # Constructor for cluster mode + def cluster_constructor(self, n, clusters, cluster_kernels, cluster_index_map): + self.n = n + self.mode = 'clustered' + self.num_clusters = len(clusters) + self.clusters = clusters + self.cluster_kernels = cluster_kernels + self.cluster_index_map = cluster_index_map + self.partial = False + self.separate_master = False + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.cluster_ids = [0] * n + for i, ci in enumerate(clusters): + for ind in ci: + self.cluster_ids[ind] = i + + self.relevant_x = [[] for _ in range(self.num_clusters)] + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n) + + # def clone(self): + # return FacilityLocation(**self.__dict__) + + def evaluate(self, X): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + result = 0 + + if effective_X: + if self.mode == 'dense': + for ind in self.master_set: + result += self.get_max_sim_dense(ind, effective_X) + elif self.mode == 'sparse': + for ind in self.master_set: + result += self.get_max_sim_sparse(ind, effective_X) + else: # clustered + for i in range(self.num_clusters): + relevant_subset = X.intersection(self.clusters[i]) + if relevant_subset: + for ind in self.clusters[i]: + result += self.get_max_sim_cluster(ind, relevant_subset, i) + + return result + + def evaluate_with_memoization(self, X): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + result = 0 + + if effective_X: + if self.mode == 'dense' or self.mode == 'sparse': + for ind in self.master_set: + result += self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + for i in range(self.num_clusters): + if self.relevant_x[i]: + for ind in self.clusters[i]: + result += self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return result + + def marginal_gain(self, X, item): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + gain = 0 + + if item not in effective_X: + if self.mode == 'dense': + print(self.master_set) + for ind in self.master_set: + m = self.get_max_sim_dense(ind, effective_X) + if self.dense_kernel[item][ind] > m: + m = self.dense_kernel[item][ind] + gain += m - self.similarity_with_nearest_in_effective_x[ind] + elif self.mode == 'sparse': + for ind in self.master_set: + m = self.get_max_sim_sparse(ind, effective_X) + if self.sparse_kernel[item, ind] > m: + m = self.sparse_kernel[item, ind] + gain += m - self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + cluster_id = self.cluster_ids[item] + relevant_subset = effective_X.intersection(self.clusters[cluster_id]) + for ind in self.clusters[cluster_id]: + m = self.get_max_sim_cluster(ind, relevant_subset, cluster_id) + if self.cluster_kernels[cluster_id][item][ind] > m: + m = self.cluster_kernels[cluster_id][item][ind] + gain += m - self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return gain + def marginal_gain_with_memoization(self, X, item, enable_checks): + effective_X = set() + gain = 0 + + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X + + if enable_checks and item in effective_X: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + if self.mode == 'dense': + for ind in self.master_set: + if self.partial: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]: + gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]] + else: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]: + gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[ind] + elif self.mode == 'sparse': + for ind in self.master_set: + temp = self.sparse_kernel[ind, item] + if temp > self.similarity_with_nearest_in_effective_x[ind]: + gain += temp - self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + i = self.cluster_ids[item] + item_ = self.cluster_index_map[item] + relevant_subset = self.relevant_x[i] + ci = self.clusters[i] + + if len(relevant_subset) == 0: + for ind in ci: + ind_ = self.cluster_index_map[ind] + gain += self.cluster_kernels[i][ind_][item_] + else: + for ind in ci: + ind_ = self.cluster_index_map[ind] + if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]: + gain += self.cluster_kernels[i][ind_][item_] - self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return gain + + + def update_memoization(self, X, item): + effective_X = set() + + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X + + if item in effective_X: + return + + if self.partial and item not in self.effective_ground_set: + return + + if self.mode == 'dense': + for ind in self.master_set: + if self.partial: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]: + self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]] = self.dense_kernel[ind][item] + else: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]: + self.similarity_with_nearest_in_effective_x[ind] = self.dense_kernel[ind][item] + elif self.mode == 'sparse': + for ind in self.master_set: + temp_val = self.sparse_kernel[ind, item] + if temp_val > self.similarity_with_nearest_in_effective_x[ind]: + self.similarity_with_nearest_in_effective_x[ind] = temp_val + else: # clustered + i = self.cluster_ids[item] + item_ = self.cluster_index_map[item] + ci = self.clusters[i] + + for ind in ci: + ind_ = self.cluster_index_map[ind] + if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]: + self.clustered_similarity_with_nearest_in_relevant_x[ind] = self.cluster_kernels[i][ind_][item_] + + self.relevant_x[i].add(item) + + + def get_effective_ground_set(self): + return set(range(self.n)) + + + def cluster_init(self, n_, dense_kernel_, ground_, partial, lambda_): + self.n = n_ + self.partial = partial + self.effective_ground_set = ground_ + self.n_master = len(dense_kernel_) + self.master_set = set(range(self.n_master)) + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + self.mode = 'dense' + self.dense_kernel = dense_kernel_ + self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)} + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n_) + self.relevant_x = [set() for _ in range(n_)] + + + def clear_memoization(self): + if self.mode == 'dense' or self.mode == 'sparse': + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + else: + self.relevant_x = [set() for _ in range(self.num_clusters)] + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(self.n) + + + def set_memoization(self, X): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py new file mode 100644 index 0000000..5cfb774 --- /dev/null +++ b/pytorch/submod/GraphCut.py @@ -0,0 +1,378 @@ +from typing import List, Set +import random +from helper import * +from ..SetFunction import SetFunction + +class GraphCutFunction(SetFunction): + def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, + master_ground_kernel: List[List[float]] = None, + ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, + arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False, + ground: Set[int] = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.mode = mode + self.lambda_ = lambdaVal + self.separate_rep=separate_rep + self.n_rep = n_rep + self.partial = partial + self.original_to_partial_index_map = {} + self.mgsijs = mgsijs + self.ggsijs = ggsijs + self.data = data + self.data_rep=data_rep + self.metric = metric + self.num_neighbors = num_neighbors + self.effective_ground_set = set(range(n)) + self.clusters=None + self.cluster_sijs=None + self.cluster_map=None + self.ggsijs = None + self.mgsijs = None + self.content = None + self.effective_ground = None + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + if self.separate_rep == True: + if self.n_rep is None or self.n_rep <=0: + raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") + if self.mode != "dense": + raise Exception("Only dense mode supported if separate_rep = True") + if (type(self.mgsijs) != type(None)) and (type(self.mgsijs) != np.ndarray): + raise Exception("mgsijs provided, but is not dense") + if (type(self.ggsijs) != type(None)) and (type(self.ggsijs) != np.ndarray): + raise Exception("ggsijs provided, but is not dense") + + if mode == "dense": + self.master_ground_kernel = master_ground_kernel + self.ground_ground_kernel = ground_ground_kernel + + if ground_ground_kernel is not None: + self.separate_master = True + + if partial: + self.effective_ground_set = ground + else: + self.effective_ground_set = set(range(n)) + + self.num_effective_ground_set = len(self.effective_ground_set) + + self.n_master = self.num_effective_ground_set + self.master_set = self.effective_ground_set + + if partial: + self.original_to_partial_index_map = {elem: ind for ind, elem in enumerate(self.effective_ground_set)} + + self.total_similarity_with_subset = [random.random() for _ in range(self.num_effective_ground_set)] + self.total_similarity_with_master = [random.random() for _ in range(self.num_effective_ground_set)] + self.master_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)] + self.ground_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)] + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if partial else elem + self.total_similarity_with_subset[index] = 1 + self.total_similarity_with_master[index] = 1 + for j in self.master_set: + self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem] + + if self.separate_rep == True: + if type(self.mgsijs) == type(None): + #not provided mgsij - make it + if (type(data) == type(None)) or (type(data_rep) == type(None)): + raise Exception("Data missing to compute mgsijs") + if np.shape(self.data)[0]!=self.n or np.shape(self.data_rep)[0]!=self.n_rep: + raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix") + + #create_kernel_NS is there .................... find it and define it not found in helper.py but used as here + # self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric)) + else: + #provided mgsijs - verify it's dimensionality + if np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n_rep: + raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given mg kernel") + if type(self.ggsijs) == type(None): + #not provided ggsijs - make it + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + self.ggsijs = np.zeros((n,n)) + self.ggsijs[row,col] = val + else: + #provided ggsijs - verify it's dimensionality + if np.shape(self.ggsijs)[0]!=self.n or np.shape(self.ggsijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity gg kernel") + + else: + if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)): + #no kernel is provided make ggsij kernel + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + self.ggsijs = np.zeros((n,n)) + self.ggsijs[row,col] = val + elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)): + #gg is not available, mg is - good + #verify that it is dense and of correct dimension + if (type(self.mgsijs) != np.ndarray) or np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n: + raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel") + self.ggsijs = self.mgsijs + elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)): + #gg is available, mg is not - good + #verify that it is dense and of correct dimension + if (type(self.ggsijs) != np.ndarray) or np.shape(self.ggsijs)[1]!=self.n or np.shape(self.ggsijs)[0]!=self.n: + raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel") + else: + #both are available - something is wrong + raise Exception("Two kernels have been wrongly provided when separate_rep=False") + elif mode == "sparse": + if self.separate_rep == True: + raise Exception("Separate represented is supported only in dense mode") + if self.num_neighbors is None or self.num_neighbors <=0: + raise Exception("Valid num_neighbors is needed for sparse mode") + if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)): + #no kernel is provided make ggsij sparse kernel + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + self.ggsijs = sparse.csr_matrix((val, (row, col)), [n,n]) + elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)): + #gg is not available, mg is - good + #verify that it is sparse + if type(self.mgsijs) != scipy.sparse.csr.csr_matrix: + raise Exception("Provided kernel is not sparse") + self.ggsijs = self.mgsijs + elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)): + #gg is available, mg is not - good + #verify that it is dense and of correct dimension + if type(self.ggsijs) != scipy.sparse.csr.csr_matrix: + raise Exception("Provided kernel is not sparse") + else: + #both are available - something is wrong + raise Exception("Two kernels have been wrongly provided when separate_rep=False") + + if self.separate_rep==None: + self.separate_rep = False + + if self.mode=="dense" and self.separate_rep == False : + self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.ggsijs) + self.ggsijs=l + + elif self.mode=="dense" and self.separate_rep == True : + self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.ggsijs) + self.ggsijs=l + + self.mgsijs = self.mgsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.mgsijs[0])==int or type(self.mgsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.mgsijs) + self.mgsijs=l + + # self.cpp_obj = GraphCutpy(self.n, self.cpp_mgsijs, self.cpp_ggsijs, self.lambdaVal) + + elif self.mode == "sparse": + self.ggsijs = {} + # self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal) + # self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + # self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val + # # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal) + else: + raise Exception("Invalid") + + self.effective_ground = self.get_effective_ground_set() + + # if mode == "dense": + + # elif mode == "sparse": + # if not arr_val or not arr_count or not arr_col: + # raise ValueError("Error: Empty/Corrupt sparse similarity kernel") + + # self.sparse_kernel = SparseSim(arr_val, arr_count, arr_col) + + # self.effective_ground_set = set(range(n)) + # self.num_effective_ground_set = len(self.effective_ground_set) + + # self.n_master = self.num_effective_ground_set + # self.master_set = self.effective_ground_set + + # self.total_similarity_with_subset = [0] * n + # self.total_similarity_with_master = [0] * n + + # for i in range(n): + # self.total_similarity_with_subset[i] = 0 + # self.total_similarity_with_master[i] = 0 + + # for j in range(n): + # self.total_similarity_with_master[i] += self.sparse_kernel.get_val(j, i) + + # else: + # raise ValueError("Invalid mode") + + def evaluate(self, X: Set[int]) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if not effective_x: + return 0 + + result = 0 + + if self.mode == "dense": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] + + for elem2 in effective_x: + result -= self.lambda_ * self.ground_ground_kernel[elem][elem2] + + elif self.mode == "sparse": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] + + for elem2 in effective_x: + result -= self.lambda_ * self.sparse_kernel.get_val(elem, elem2) + + return result + + def evaluate_with_memoization(self, X: Set[int]) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if not effective_x: + return 0 + + result = 0 + + if self.mode == "dense" or self.mode == "sparse": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] - self.lambda_ * self.total_similarity_with_subset[index] + + return result + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effective_x or item not in self.effective_ground_set: + return 0 + + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] + + if self.mode == "dense": + for elem in effective_x: + gain -= 2 * self.lambda_ * self.ground_ground_kernel[item][elem] + gain -= self.lambda_ * self.ground_ground_kernel[item][item] + + elif self.mode == "sparse": + for elem in effective_x: + gain -= 2 * self.lambda_ * self.sparse_kernel.get_val(item, elem) + gain -= self.lambda_ * self.sparse_kernel.get_val(item, item) + return gain + + # def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + # effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + # if enable_checks and item in effective_x: + # return 0 + + # if self.partial and item not in self.effective_ground_set: + # return 0 + + # gain = 0 + + # if self.mode == "dense": + # index = self.original_to_partial_index_map[item] if self.partial else item + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item] + + # elif self.mode == "sparse": + # index = self.original_to_partial_index_map[item] if self.partial else item + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item) + + # return gain + + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool) -> float: + effective_X = set() + gain = 0 + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X + + if enable_checks and item in effective_X: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + if self.mode == 'dense': + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \ + - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \ + - self.lambda_ * self.ground_ground_kernel[item][item] + elif self.mode == 'sparse': + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \ + - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \ + - self.lambda_ * self.sparse_kernel.get_val(item, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + # print("gain value",gain) + return gain + + + def update_memoization(self, X: Set[int], item: int): + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effective_x or item not in self.effective_ground_set: + return + + if self.mode == "dense": + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if self.partial else elem + # self.total_similarity_with_subset[index] += self.ground_ground_kernel[elem][item] + + elif self.mode == "sparse": + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if self.partial else elem + self.total_similarity_with_subset[index] += self.sparse_kernel.get_val(elem, item) + + def get_effective_ground_set(self) -> Set[int]: + return self.effective_ground_set + + def clear_memoization(self): + if self.mode == "dense" or self.mode == "sparse": + self.total_similarity_with_subset = [0] * self.num_effective_ground_set + + def set_memoization(self, X: Set[int]): + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) diff --git a/pytorch/submod/LogDeterminant.py b/pytorch/submod/LogDeterminant.py new file mode 100644 index 0000000..96b0e50 --- /dev/null +++ b/pytorch/submod/LogDeterminant.py @@ -0,0 +1,248 @@ +import math +from collections import defaultdict +import scipy +from helper import * +from ..SetFunction import SetFunction + +class LogDeterminantFunction(SetFunction): + + def dot_product(self, x, y): + return sum(xi * yi for xi, yi in zip(x, y)) + + + def __init__(self, n, mode, lambdaVal, arr_val=None, arr_count=None, arr_col=None, dense_kernel=None, partial=None, + sijs=None, data=None, metric="cosine", num_neighbors=None, memoizedC = None, memoizedD = None, data_master = None): + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.lambdaVal = lambdaVal + self.sijs = None + self.content = None + self.effective_ground = None + self.partial = partial + self.effective_ground_set = set(range(n)) + self.memoizedC = memoizedC + self.memoizedD = memoizedD + self.data_master = data_master + self.dense_kernel = dense_kernel + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse', 'clustered']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'") + + if self.metric not in ['euclidean', 'cosine']: + raise Exception("ERROR: Unsupported metric. Must be 'euclidean' or 'cosine'") + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(create_kernel( X = self.data.tolist(), metric = self.metric, mode = self.mode, num_neigh = self.num_neighbors)) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.sijs = sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + self.sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.sijs[0])==int or type(self.sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.sijs) + self.sijs=l + + self.effective_ground = self.get_effective_ground_set() + if self.mode == 'dense': + if self.dense_kernel == None: + self.dense_kernel = create_kernel_NS(X_ground = self.data, X_master = self.data, metric = self.metric) + if self.partial: + self.effectiveGroundSet = self.data + else: + self.effectiveGroundSet = set(range(n)) + self.numEffectiveGroundset = len(self.effectiveGroundSet) + self.memoizedC = [[] for _ in range(self.numEffectiveGroundset)] + self.prevDetVal = 0 + self.memoizedD = [] + self.prevItem = -1 + + if self.partial: + ind = 0 + for it in self.effectiveGroundSet: + self.originalToPartialIndexMap[it] = ind + ind += 1 + self.memoizedD.append(np.sqrt(self.dense_kernel[it][it] + self.lambdaVal)) + else: + for i in range(self.n): + self.memoizedD.append(np.sqrt(self.dense_kernel[i][i] + self.lambdaVal)) + + elif arr_val is not None and arr_count is not None and arr_col is not None: + self.n = n + self.mode = 'sparse' + self.lambdaVal = lambdaVal + self.sparseKernel = SparseSim(arr_val, arr_count, arr_col) + self.effectiveGroundSet = set(range(n_)) + self.numEffectiveGroundset = len(self.effectiveGroundSet) + self.memoizedC = [[] for _ in range(n_)] + self.memoizedD = [] + self.prevDetVal = 0 + self.prevItem = -1 + + for i in range(self.n): + self.memoizedD.append(np.sqrt(self.sparseKernel.get_val(i, i) + self.lambdaVal)) + + else: + raise ValueError("Invalid constructor arguments. Please provide either denseKernel or sparse kernel data.") + + def evaluate(self, X): + currMemoizedC = self.memoizedC.copy() + currMemoizedD = self.memoizedD.copy() + currprevItem = self.prevItem + currprevDetVal = self.prevDetVal + self.setMemoization(X) + result = self.evaluate_with_memoization(X) + self.memoizedC = currMemoizedC + self.memoizedD = currMemoizedD + self.prevItem = currprevItem + self.prevDetVal = currprevDetVal + return result + + def evaluate_with_memoization(self, X): + return self.prevDetVal + + def marginal_gain(self, X, item): + currMemoizedC = self.memoizedC.copy() + currMemoizedD = self.memoizedD.copy() + currprevItem = self.prevItem + currprevDetVal = self.prevDetVal + self.set_memoization(X) + result = self.marginal_gain_with_memoization(X, item) + self.memoizedC = currMemoizedC + self.memoizedD = currMemoizedD + self.prevItem = currprevItem + self.prevDetVal = currprevDetVal + return result + + def marginal_gain_with_memoization(self, X, item, enableChecks=True): + effectiveX = X.intersection(self.effective_ground_set) if self.partial else X + gain = 0 + + if enableChecks and item in effectiveX: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + itemIndex = self.originalToPartialIndexMap[item] if self.partial else item + + if self.mode == "dense": + if len(effectiveX) == 0: + gain = math.log(self.memoizedD[itemIndex] * self.memoizedD[itemIndex]) + elif len(effectiveX) == 1: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = self.dense_kernel[self.prevItem][item] / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = (self.dense_kernel[self.prevItem][item] - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + elif self.mode == "sparse": + if len(effectiveX) == 0: + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex])) + elif len(effectiveX) == 1: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = self.sparseKernel.get_val(self.prevItem, item) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = (self.sparseKernel.get_val(self.prevItem, item) - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + raise ValueError("Only dense and sparse mode supported") + + return gain + + def update_memoization(self, X, item): + effectiveX = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effectiveX: + return + + if item not in self.effective_ground_set: + return + + self.prevDetVal += self.marginal_gain_with_memoization(X, item) + + if len(effectiveX) == 0: + pass + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + prevDValue = self.memoizedD[prevItemIndex] + + for i in self.effectiveGroundSet: + iIndex = self.originalToPartialIndexMap[i] if self.partial else i + + if i in effectiveX: + continue + + e = 0 + if len(effectiveX) == 1: + e = self.dense_kernel[self.prevItem][i] / prevDValue + self.memoizedC[iIndex].append(e) + else: + e = (self.dense_kernel[self.prevItem][i] - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[iIndex])) / prevDValue + self.memoizedC[iIndex].append(e) + + self.memoizedD[iIndex] = math.sqrt(math.fabs(self.memoizedD[iIndex] * self.memoizedD[iIndex] - e * e)) + + self.prevItem = item + + def get_effective_ground_set(self): + return self.effective_ground_set + + def clear_memoization(self): + self.memoizedC.clear() + self.memoizedC = defaultdict(list) + self.prevDetVal = 0 + self.prevItem = -1 + + if self.mode == "dense": + if self.partial: + for it in self.effective_ground_set: + index = self.originalTo diff --git a/pytorch/submod/ProbabilisticSetCover.py b/pytorch/submod/ProbabilisticSetCover.py new file mode 100644 index 0000000..5eaaaa1 --- /dev/null +++ b/pytorch/submod/ProbabilisticSetCover.py @@ -0,0 +1,81 @@ +import torch +from typing import List, Set, Tuple +from ..SetFunction import SetFunction + +class ProbabilisticSetCover(SetFunction): + def __init__(self, n: int, ground_set_concept_probabilities: List[List[float]], num_concepts: int, concept_weights: List[float] = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.ground_set_concept_probabilities = ground_set_concept_probabilities + self.num_concepts = num_concepts + self.concept_weights = concept_weights + + if self.concept_weights is None: + self.concept_weights = [1.0] * num_concepts + else: + self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) + self.prob_of_concepts_covered_by_X = num_concepts + + def evaluate(self, X: Set[int]) -> float: + result = 0 + if not X: + return result + + for i in range(self.num_concepts): + product = 1 + for elem in X: + product *= (1 - self.ground_set_concept_probabilities[elem][i]) + result += self.concept_weights[i] * (1 - product) + + return result + + def evaluate_with_memoization(self, X: Set[int]) -> float: + result = 0 + if not X: + return result + + for i in range(self.num_concepts): + result += self.concept_weights[i] * (1 - self.prob_of_concepts_covered_by_X[i]) + + return result + + def marginal_gain(self, X: Set[int], item: int) -> float: + gain = 0 + if item in X: + return gain + + for i in range(self.num_concepts): + old_concept_prod = 1 + for elem in X: + old_concept_prod *= (1 - self.ground_set_concept_probabilities[elem][i]) + gain += self.concept_weights[i] * old_concept_prod * self.ground_set_concept_probabilities[item][i] + return gain + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + gain = 0 + if enable_checks and item in X: + return gain + for i in range(self.num_concepts): + gain += self.concept_weights[i] * self.prob_of_concepts_covered_by_X[i] * self.ground_set_concept_probabilities[item][i] + return gain + + def update_memoization(self, X: Set[int], item: int): + if item in X: + return + + for i in range(self.num_concepts): + self.prob_of_concepts_covered_by_X[i] *= (1 - self.ground_set_concept_probabilities[item][i]) + + def get_effective_ground_set(self) -> Set[int]: + return set(range(self.n)) + + def clear_memoization(self): + self.prob_of_concepts_covered_by_X = torch.ones(self.num_concepts, dtype=torch.double) + + def set_memoization(self, X: Set[int]): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py new file mode 100644 index 0000000..3163400 --- /dev/null +++ b/pytorch/submod/SetCover.py @@ -0,0 +1,97 @@ +import torch +import torch.nn as nn +import numpy as np +import random +from ..SetFunction import SetFunction + +class SetCoverFunction(SetFunction): + def __init__(self, n, cover_set, num_concepts, concept_weights = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.cover_set = cover_set + self.num_concepts = num_concepts + self.concept_weights = concept_weights + if self.concept_weights is None: + self.concept_weights = [1.0] * num_concepts + else: + self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) + + + self.concepts_covered_by_x = set() + + + def evaluate(self, X): + result = 0.0 + + if X.numel() == 0: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem.item()]) + + for con in concepts_covered: + result += self.concept_weights[con] + + return result + + + def evaluate_with_memoization(self, X): + result = 0.0 + + if X.numel() == 0: + print("hi") + return 0.0 + + for con in self.concepts_covered_by_x: + result += self.concept_weights[con] + print(result) + + return result + + def marginal_gain(self, X, item): + gain = 0.0 + + if item in X: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem]) + + for con in self.cover_set[item]: + if con not in concepts_covered: + gain += self.concept_weights[con] + + return gain.item() + + def marginal_gain_with_memoization(self, X, item, enable_checks=True): + gain = 0.0 + + if enable_checks and item in X: + return 0.0 + for con in self.cover_set[item]: + if con not in self.concepts_covered_by_x: + gain += self.concept_weights[con] + + return gain + + def update_memoization(self, X, item): + if item in X: + return + + self.concepts_covered_by_x.update(self.cover_set[item]) + + def get_effective_ground_set(self): + return set(range(self.n)) + + def clear_memoization(self): + self.concepts_covered_by_x.clear() + + def set_memoization(self, X): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py new file mode 100644 index 0000000..99c05d2 --- /dev/null +++ b/pytorch/submod/__init__.py @@ -0,0 +1,8 @@ +# /pytorch/SetFunction/__init__.py +from .SetCover import SetCoverFunction +from .ProbabilisticSetCover import ProbabilisticSetCoverFunction +from .GraphCut import GraphCutFunction +from .DisparityMin import DisparityMinFunction +from .DisparitySum import DisparitySumFunction +from .FacilityLocation import FacilityLocationFunction +from .LogDeteminant import LogDeteminantFunction diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py new file mode 100644 index 0000000..1797e4f --- /dev/null +++ b/pytorch/submod/helper.py @@ -0,0 +1,317 @@ +import torch +import torch.nn.functional as F +from sklearn.cluster import Birch +from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity, pairwise_distances +from sklearn.neighbors import NearestNeighbors +from scipy import sparse +import pickle +import time +import os +import numpy as np +from typing import List, Dict, Union +from math import sqrt + +# Define type aliases for clarity +Vector = List[float] +Matrix = List[Vector] +Set = List[int] # Considering integer elements for simplicity + +def cos_sim_square(A): + similarity = torch.matmul(A, A.t()) + + square_mag = torch.diag(similarity) + + inv_square_mag = 1 / square_mag + inv_square_mag[torch.isinf(inv_square_mag)] = 0 + + inv_mag = torch.sqrt(inv_square_mag) + + cosine = similarity * inv_mag + cosine = cosine.t() * inv_mag + return cosine + +def cos_sim_rectangle(A, B): + num = torch.matmul(A, B.t()) + p1 = torch.sqrt(torch.sum(A**2, dim=1)).unsqueeze(1) + p2 = torch.sqrt(torch.sum(B**2, dim=1)).unsqueeze(0) + return num / (p1 * p2) + +def create_sparse_kernel(X, metric, num_neigh, n_jobs=1, method="sklearn"): + if num_neigh > X.shape[0]: + raise Exception("ERROR: num of neighbors can't be more than the number of datapoints") + dense = None + dense = create_kernel_dense_sklearn(X, metric) + dense_ = None + if num_neigh == -1: + num_neigh = X.shape[0] # default is the total number of datapoints + + # Assuming X is a PyTorch tensor + X_np = X.numpy() + + # Use PyTorch functions for the nearest neighbors search + if metric == 'euclidean': + distances = torch.cdist(X, X, p=2) # Euclidean distance + elif metric == 'cosine': + distances = 1 - torch.nn.functional.cosine_similarity(X, X, dim=1) # Cosine similarity as distance + + # Exclude the distance to oneself (diagonal elements) + distances.fill_diagonal_(float('inf')) + + # Find the indices of the k-nearest neighbors using torch.topk + _, ind = torch.topk(distances, k=num_neigh, largest=False) + + # ind_l = [(index[0], x.item()) for index, x in torch.ndenumerate(ind)] + # Convert indices to row and col lists + row = [] + col = [] + for i, indices_row in enumerate(ind): + for j in indices_row: + row.append(i) + col.append(j.item()) + + mat = torch.zeros_like(distances) + mat[row, col] = 1 + dense_ = dense * mat # Only retain similarity of nearest neighbors + sparse_coo = torch.sparse_coo_tensor(torch.tensor([row, col]), mat[row, col], dense.size()) + # Convert the COO tensor to CSR format + sparse_csr = sparse_coo.coalesce() + return sparse_csr + # pass + + +def create_kernel_dense(X, metric, method="sklearn"): + dense = None + if method == "sklearn": + dense = create_kernel_dense_sklearn(X, metric) + else: + raise Exception("For creating dense kernel, only 'sklearn' method is supported") + return dense + +def create_kernel_dense_sklearn(X, metric, X_rep=None, batch=0): + dense = None + D = None + batch_size = batch + if metric == "euclidean": + if X_rep is None: + # print(X.shape) + # Process data in batches for torch.cdist + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + # print(X_batch.shape) + D_batch = torch.cdist(X_batch, X, p=2).to(device="cuda") + gamma = 1 / X.shape[1] + dense_batch = torch.exp(-D_batch * gamma).to(device="cuda") + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + else: + # Process data in batches for torch.cdist + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + D_batch = torch.cdist(X_rep_batch, X).to(device="cuda") + gamma = 1 / X.shape[1] + dense_batch = torch.exp(-D_batch * gamma).to(device="cuda") + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + + elif metric == "cosine": + if X_rep is None: + # Process data in batches for torch.nn.functional.cosine_similarity + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + dense_batch = torch.nn.functional.cosine_similarity(X_batch.unsqueeze(1), X.unsqueeze(0), dim=2) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + else: + # Process data in batches for torch.nn.functional.cosine_similarity + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + dense_batch = torch.nn.functional.cosine_similarity(X_rep_batch, X, dim=1) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + + elif metric == "dot": + if X_rep is None: + # Process data in batches for torch.matmul + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + dense_batch = torch.matmul(X_batch, X.t()) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + else: + # Process data in batches for torch.matmul + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + dense_batch = torch.matmul(X_rep_batch, X.t()) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) + + else: + raise Exception("ERROR: unsupported metric for this method of kernel creation") + + if X_rep is not None: + assert dense.shape == (X_rep.shape[0], X.shape[0]) + else: + assert dense.shape == (X.shape[0], X.shape[0]) + + torch.cuda.empty_cache() + return dense + +def create_cluster_kernels(X, metric, cluster_lab=None, num_cluster=None, onlyClusters=False): + lab = [] + if cluster_lab is None: + obj = Birch(n_clusters=num_cluster) + obj.fit(X) + lab = obj.predict(X).tolist() + if num_cluster is None: + num_cluster = len(obj.subcluster_labels_) + else: + if num_cluster is None: + raise Exception("ERROR: num_cluster needs to be specified if cluster_lab is provided") + lab = cluster_lab + + l_cluster = [set() for _ in range(num_cluster)] + l_ind = [0] * X.shape[0] + l_count = [0] * num_cluster + + for i, el in enumerate(lab): + l_cluster[el].add(i) + l_ind[i] = l_count[el] + l_count[el] = l_count[el] + 1 + + if onlyClusters: + return l_cluster, None, None + + l_kernel = [] + for el in l_cluster: + k = len(el) + l_kernel.append(torch.zeros((k, k))) # placeholder matrices of suitable size + + M = None + if metric == "euclidean": + D = torch.cdist(X, X) + gamma = 1 / X.shape[1] + M = torch.exp(-D * gamma) # similarity from distance + elif metric == "cosine": + M = F.cosine_similarity(X, X, dim=1) + M = M.unsqueeze(0) # converting to 2D for compatibility + else: + raise Exception("ERROR: unsupported metric") + + # Create kernel for each cluster using the bigger kernel + for i in range(X.shape[0]): + for j in range(X.shape[0]): + if lab[i] == lab[j]: + c_ID = lab[i] + ii = l_ind[i] + jj = l_ind[j] + l_kernel[c_ID][ii, jj] = M[i, j] + + return l_cluster, l_kernel, l_ind + +def create_kernel(X, metric, mode="dense", num_neigh=-1, n_jobs=1, X_rep=None, method="sklearn"): + + if X_rep is not None: + assert X_rep.shape[1] == X.shape[1] + + if mode == "dense": + dense = None + dense = globals()['create_kernel_dense_'+method](X, metric, X_rep) + return torch.tensor(dense) + + elif mode == "sparse": + if X_rep is not None: + raise Exception("Sparse mode is not supported for separate X_rep") + return create_sparse_kernel(X, metric, num_neigh, n_jobs, method) + + else: + raise Exception("ERROR: unsupported mode") + + + +# Euclidean similarity function +def euclidean_similarity(a: Vector, b: Vector) -> float: + return np.linalg.norm(np.array(a) - np.array(b)) + +# Cosine similarity function +def cosine_similarity(a: Vector, b: Vector) -> float: + dot_product = np.dot(a, b) + norm_a = np.linalg.norm(a) + norm_b = np.linalg.norm(b) + return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0 + +# Dot product function +def dot_prod(a: Vector, b: Vector) -> float: + return np.dot(a, b) + +# Create kernel function for non-square kernel +def create_kernel_NS(X_ground: Matrix, X_master: Matrix, metric: str = "euclidean") -> Matrix: + n_ground = len(X_ground) + n_master = len(X_master) + k_dense = [[0] * n_ground for _ in range(n_master)] + + for r in range(n_master): + for c in range(n_ground): + if metric == "euclidean": + k_dense[r][c] = euclidean_similarity(X_master[r], X_ground[c]) + elif metric == "cosine": + k_dense[r][c] = cosine_similarity(X_master[r], X_ground[c]) + elif metric == "dot": + k_dense[r][c] = dot_prod(X_master[r], X_ground[c]) + else: + raise ValueError("Unsupported metric for kernel computation in Python") + return k_dense + +# Create square kernel function +def create_square_kernel_dense(X_ground: Matrix, metric: str = "euclidean") -> Matrix: + n_ground = len(X_ground) + k_dense = [[0] * n_ground for _ in range(n_ground)] + + if metric == "euclidean": + for r in range(n_ground): + k_dense[r][r] = 1.0 + for c in range(r + 1, n_ground): + sim = euclidean_similarity(X_ground[r], X_ground[c]) + k_dense[r][c] = sim + k_dense[c][r] = sim + elif metric == "cosine": + for r in range(n_ground): + a_norm = sqrt(dot_prod(X_ground[r], X_ground[r])) + k_dense[r][r] = 1.0 + for c in range(r + 1, n_ground): + sim = dot_prod(X_ground[r], X_ground[c]) + b_norm = sqrt(dot_prod(X_ground[c], X_ground[c])) + sim = sim / (a_norm * b_norm) if a_norm * b_norm > 0 else 0 + k_dense[r][c] = sim + k_dense[c][r] = sim + elif metric == "dot": + for r in range(n_ground): + for c in range(r, n_ground): + sim = dot_prod(X_ground[r], X_ground[c]) + k_dense[r][c] = sim + k_dense[c][r] = sim + else: + raise ValueError("Unsupported metric for kernel computation in Python") + return k_dense + +# Set intersection function +def set_intersection(a: Set, b: Set) -> Set: + return list(set(a) & set(b)) # Converting set intersection to list for better compatibility + diff --git a/setup.py b/setup.py index 10ded66..74a62c5 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name='submodlib', #packages=find_packages(include=['submodlib']), - packages=['submodlib', 'submodlib/functions'], + packages=['submodlib', 'submodlib/functions','pytorch'], #packages=find_packages('submodlib'), #package_dir={'':'submodlib'}, #version='0.0.2', diff --git a/submodlib/functions/facilityLocation.py b/submodlib/functions/facilityLocation.py index 276d8b0..198db06 100644 --- a/submodlib/functions/facilityLocation.py +++ b/submodlib/functions/facilityLocation.py @@ -8,8 +8,15 @@ from submodlib_cpp import FacilityLocation from submodlib_cpp import FacilityLocation2 from submodlib.helper import create_kernel, create_cluster_kernels +import torch #from memory_profiler import profile +if torch.cuda.is_available() : + from pytorch.submod import FacilityLocation +else: + from submodlib_cpp import FacilityLocation + + class FacilityLocationFunction(SetFunction): """Implementation of the Facility Location submodular function (FL). @@ -224,11 +231,20 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, l.append(self.cpp_sijs) self.cpp_sijs=l - self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + + # elif pybind_mode == "memoryview": # self.cpp_obj = FacilityLocation(self.n, memoryview(self.sijs), False, self.cpp_ground_sub, self.separate_rep) elif pybind_mode == "numpyarray": - self.cpp_obj = FacilityLocation(self.n, self.sijs, False, self.cpp_ground_sub, self.separate_rep) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + elif pybind_mode == "array32": # print("Kernel's type = ", self.sijs.dtype) self.sijs.astype('float32', copy=False) @@ -250,16 +266,26 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, elif self.mode=="dense" and create_dense_cpp_kernel_in_python == False: if self.separate_rep == True: - self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) + else: + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) else: - self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + else: + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + elif self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) self.cpp_sijs = {} self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val - self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) elif self.mode=="clustered": l_temp = [] @@ -273,8 +299,11 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, l_temp.append(temp) self.cluster_sijs = l_temp - self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) + else: + self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) #self.cpp_ground_sub=self.cpp_obj.getEffectiveGroundSet() #self.ground_sub=self.cpp_ground_sub - self.effective_ground = self.cpp_obj.getEffectiveGroundSet() \ No newline at end of file + self.effective_ground = self.cpp_obj.getEffectiveGroundSet() diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index 41d0baa..d67d5b3 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -1,42 +1,14 @@ # setCover.py # Author: Vishal Kaushal from .setFunction import SetFunction -from submodlib_cpp import SetCover +import torch +if torch.cuda.is_available() : + from pytorch.submod import SetCover +else: + from submodlib_cpp import SetCover class SetCoverFunction(SetFunction): - """Implementation of the Set-Cover (SC) submodular function. - For a subset :math:`A`, its Set Cover evaluation is defined as: - - .. math:: - f(A) = w(\\cup_{a \\in A} \\gamma(a)) = w(\\gamma(A)) - - where :math:`\\gamma(A)` refers to the set of concepts covered by :math:`A`. Thus the set of all concepts :math:`\\mathcal{U} = \\gamma(\\mathcal{V})`. :math:`w` is a weight vector in :math:`\\Re^{|\\mathcal{U}|}`. Intuitively, each element in :math:`\\mathcal{V}` *covers* a set of elements from the concept set :math:`U` and hence :math:`w(\\gamma(A))` is total weight of concepts covered by elements in :math:`A`. Note that :math:`\\gamma(A \\cup B) = \\gamma(A) \\cup \\gamma(B)` and hence :math:`f(A \\cup B) = w(\\gamma(A \\cup B)) = w(\\gamma(A) \\cup \\gamma(B))`. - - Alternatively we can also view the function as follows. With :math:`U` being the set of all concepts (namely :math:`U = \\gamma(\\mathcal{V})`) and :math:`c_u(i)` denoting whether the concept :math:`u \\in U` is covered by the element :math:`i \\in \\mathcal{V}` i.e :math:`c_u(i) = 1` if :math:`u \\in \\gamma(\\{i\\})` and is zero otherwise. We then define :math:`c_u(A) = \\sum_{a\\in A} c_u(a)` as the count of concept :math:`u` in set :math:`A`, and the weighted set cover can then be written as: - - .. math:: - f(A) = \\sum_{u \\in U} w_u \\min(c_u(A), 1) - - .. note:: - Set Cover functions models coverage of concepts and is monotone submodular. - - Parameters - ---------- - n : int - Number of elements in the ground set, must be > 0. - - cover_set : list - List of sets. Each set is the set of concepts covered by the corresponding data point / image. Hence cover_set is of size n. - - num_concepts : int - Number of concepts. - - concept_weights : list - Weight :math:`w_i` of each concept. Size must be same as num_concepts. - - """ - def __init__(self, n, cover_set, num_concepts, concept_weights=None): self.n = n self.cover_set = cover_set @@ -55,9 +27,11 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): raise Exception("ERROR: Mismtach between num_conepts and len(concept_weights)") else: self.concept_weights = [1] * self.num_concepts - - self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) - + print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)") + + if torch.cuda.is_available() : + self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + else: + self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) self.effective_ground = set(range(n)) - - \ No newline at end of file +