diff --git a/pecos/core/base.py b/pecos/core/base.py index 929a477e..2a348bc8 100644 --- a/pecos/core/base.py +++ b/pecos/core/base.py @@ -527,6 +527,7 @@ def __init__(self, dirname, soname, forced_rebuild=False): self.clib_float32 = corelib.load_dynamic_library( dirname, soname + "_float32", forced_rebuild=forced_rebuild ) + self.link_mlmodel_methods() self.link_xlinear_methods() self.link_sparse_operations() self.link_clustering() @@ -537,6 +538,264 @@ def __init__(self, dirname, soname, forced_rebuild=False): self.link_mmap_valstore_methods() self.link_calibrator_methods() + def link_mlmodel_methods(self): + """ + Specify C-lib's MLModel methods argument and return type. + """ + # compile mmap model + arg_list = [c_char_p, c_char_p] + corelib.fillprototype(self.clib_float32.c_mlmodel_compile_mmap_model, None, arg_list) + # load mmap model + res_list = c_void_p + arg_list = [c_char_p, c_bool] + corelib.fillprototype(self.clib_float32.c_mlmodel_load_mmap_model, res_list, arg_list) + # destruct mmap model + arg_list = [c_void_p] + corelib.fillprototype(self.clib_float32.c_mlmodel_destruct_model, None, arg_list) + # get in attr (nr_labels, nr_codes, nr_features) + res_list = c_uint32 + arg_list = [c_void_p, c_char_p] + corelib.fillprototype(self.clib_float32.c_mlmodel_get_int_attr, res_list, arg_list) + + # Interface of sparse prediction + arg_list = [ + c_void_p, + POINTER(ScipyCsrF32), + POINTER(ScipyCsrF32), + c_char_p, + c_uint32, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype(self.clib_float32.c_mlmodel_predict_csr_f32, None, arg_list) + # Interface of dense prediction + arg_list = [ + c_void_p, + POINTER(ScipyDrmF32), + POINTER(ScipyCsrF32), + c_char_p, + c_uint32, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype(self.clib_float32.c_mlmodel_predict_drm_f32, None, arg_list) + + # Interface of sparse prediction for selected outputs + arg_list = [ + c_void_p, + POINTER(ScipyCsrF32), + POINTER(ScipyCsrF32), + POINTER(ScipyCsrF32), + c_char_p, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype( + self.clib_float32.c_mlmodel_predict_on_selected_outputs_csr_f32, None, arg_list + ) + # Interface of dense prediction for selected outputs + arg_list = [ + c_void_p, + POINTER(ScipyDrmF32), + POINTER(ScipyCsrF32), + POINTER(ScipyCsrF32), + c_char_p, + c_int, + ScipyCompressedSparseAllocator.CFUNCTYPE, + ] + corelib.fillprototype( + self.clib_float32.c_mlmodel_predict_on_selected_outputs_drm_f32, None, arg_list + ) + + def mlmodel_compile_mmap_model(self, npz_folder, mmap_folder): + """ + Compile MLModel from npz format to memory-mapped format + for faster loading. + Args: + npz_folder (str): The source folder path for mlmodel npz model. + mmap_folder (str): The destination folder path for mlmodel mmap model. + """ + self.clib_float32.c_mlmodel_compile_mmap_model( + c_char_p(npz_folder.encode("utf-8")), c_char_p(mmap_folder.encode("utf-8")) + ) + + def mlmodel_load_mmap(self, folder, lazy_load=False): + """ + Load MLModel in read-only mmap mode for prediction. + + Args: + folder (str): The mmap folder path for mlmodel. + lazy_load (bool): Whether to lazy-load, i.e. load when needed(True) + or fully load model before returning(False). + + Return: + cmodel (ptr): The pointer to mlmodel. + """ + cmodel = self.clib_float32.c_mlmodel_load_mmap_model( + c_char_p(folder.encode("utf-8")), c_bool(lazy_load) + ) + return cmodel + + def mlmodel_destruct_model(self, c_model): + """ + Destruct mlmodel. + + Args: + cmodel (ptr): The pointer to xlinear model. + """ + self.clib_float32.mlmodel_destruct_model(c_model) + + def mlmodel_get_int_attr(self, c_model, attr): + """ + Get int attribute from C mlmodel. + + Args: + c_model (ptr): The C mlmodel pointer. + attr (str): The attribute name to get. + + Return: + int_attr (int): The int attribute under given name. + """ + assert attr in { + "nr_labels", + "nr_codes", + "nr_features", + }, f"attr {attr} not implemented" + return self.clib_float32.c_mlmodel_get_int_attr(c_model, c_char_p(attr.encode("utf-8"))) + + def mlmodel_predict( + self, + c_model, + X, + csr_codes, + overriden_post_processor_str, + overriden_only_topk, + threads, + pred_alloc, + ): + """ + Performs a full prediction using the given model and queries. + + Args: + c_model (c_pointer): A C pointer to the model to use for prediction. + This pointer is returned by the c_mlmodel_load_mmap_model in corelib.clib_float32. + X: The query matrix (admissible formats are smat.csr_matrix, + np.ndarray, ScipyCsrF32, or ScipyDrmF32). Note that if this is smat.csr_matrix, + the matrix must have sorted indices. You can call sort_indices() to ensure this. + csr_codes (smat.csr_matrix or ScipyCsrF32): The prediction for the previous layer. + None if this is the first layer. + overriden_post_processor_str (string): Overrides the post processor to use by name. Use + None for model defaults. + overriden_only_topk (uint): Overrides the number of results to return for each query. Use + None for model defaults. + threads (int): Sets the number of threads to use in computation. Use + -1 to use the maximum amount of available threads. + pred_alloc (ScipyCompressedSparseAllocator): The allocator to store the result in. + """ + clib = self.clib_float32 + + if isinstance(X, smat.csr_matrix): + if not X.has_sorted_indices: + raise ValueError("Query matrix does not have sorted indices!") + X = ScipyCsrF32.init_from(X) + elif isinstance(X, np.ndarray): + X = ScipyDrmF32.init_from(X) + + if isinstance(X, ScipyCsrF32): + c_predict = clib.c_mlmodel_predict_csr_f32 + elif isinstance(X, ScipyDrmF32): + c_predict = clib.c_mlmodel_predict_drm_f32 + else: + raise NotImplementedError("type(X) = {} not implemented".format(type(X))) + + if csr_codes is not None: + # Check that the csr_code is of valid shape + nr_codes = clib.c_mlmodel_get_int_attr(c_model, c_char_p("nr_codes".encode("utf-8"))) + if csr_codes.shape[0] != X.shape[0]: + raise ValueError("Instance dimension of query and csr_codes matrix do not match") + if csr_codes.shape[1] != nr_codes: + raise ValueError("Label dimension of csr_codes and C matrix do not match") + csr_codes = ScipyCsrF32.init_from(csr_codes) + + c_predict( + c_model, + byref(X), + byref(csr_codes) if csr_codes is not None else None, + overriden_post_processor_str.encode("utf-8") if overriden_post_processor_str else None, + overriden_only_topk if overriden_only_topk else 0, + threads, + pred_alloc.cfunc, + ) + + def mlmodel_predict_on_selected_outputs( + self, + c_model, + X, + selected_outputs_csr, + csr_codes, + overriden_post_processor_str, + threads, + pred_alloc, + ): + """ + Performs a select prediction using the given model and queries. + + Args: + c_model (c_pointer): A C pointer to the model to use for prediction. + This pointer is returned by the c_mlmodel_load_mmap_model in corelib.clib_float32. + X: The query matrix (admissible formats are smat.csr_matrix, + np.ndarray, ScipyCsrF32, or ScipyDrmF32). Note that if this is smat.csr_matrix, + the matrix must have sorted indices. You can call sort_indices() to ensure this. + selected_outputs_csr (csr_matrix): the selected outputs to predict + csr_codes (smat.csr_matrix or ScipyCsrF32): The prediction for the previous layer. + None if this is the first layer. + overriden_post_processor_str (string): Overrides the post processor to use by name. Use + None for model defaults. + threads (int): Sets the number of threads to use in computation. Use + -1 to use the maximum amount of available threads. + pred_alloc (ScipyCompressedSparseAllocator): The allocator to store the result in. + """ + clib = self.clib_float32 + + if isinstance(X, smat.csr_matrix): + if not X.has_sorted_indices: + raise ValueError("Query matrix does not have sorted indices!") + X = ScipyCsrF32.init_from(X) + elif isinstance(X, np.ndarray): + X = ScipyDrmF32.init_from(X) + + if not isinstance(selected_outputs_csr, smat.csr_matrix): + raise ValueError( + "type(selected_outputs_csr) = {} not implemented".format(type(selected_outputs_csr)) + ) + selected_outputs_csr = ScipyCsrF32.init_from(selected_outputs_csr) + + if isinstance(X, ScipyCsrF32): + c_predict = clib.c_mlmodel_predict_on_selected_outputs_csr_f32 + elif isinstance(X, ScipyDrmF32): + c_predict = clib.c_mlmodel_predict_on_selected_outputs_drm_f32 + else: + raise NotImplementedError("type(X) = {} not implemented".format(type(X))) + + if csr_codes is not None: + # Check that the csr_code is of valid shape + nr_codes = clib.c_mlmodel_get_int_attr(c_model, c_char_p("nr_codes".encode("utf-8"))) + if csr_codes.shape[0] != X.shape[0]: + raise ValueError("Instance dimension of query and csr_codes matrix do not match") + if csr_codes.shape[1] != nr_codes: + raise ValueError("Label dimension of csr_codes and C matrix do not match") + csr_codes = ScipyCsrF32.init_from(csr_codes) + + c_predict( + c_model, + byref(X), + byref(selected_outputs_csr), + byref(csr_codes) if csr_codes is not None else None, + overriden_post_processor_str.encode("utf-8") if overriden_post_processor_str else None, + threads, + pred_alloc.cfunc, + ) + def link_xlinear_methods(self): """ Specify C-lib's Xlinear methods argument and return type. diff --git a/pecos/core/libpecos.cpp b/pecos/core/libpecos.cpp index d7360124..7f10d6ee 100644 --- a/pecos/core/libpecos.cpp +++ b/pecos/core/libpecos.cpp @@ -27,7 +27,92 @@ // C Interface of Types/Structures can be found in utils/matrix.hpp extern "C" { - // ==== C Interface of XMC Models ==== + // ==== C Interface of MLModels ==== + // Only implemented for w_matrix_t = pecos::csc_t + typedef pecos::csc_t MLMODEL_MAT_T; + void c_mlmodel_compile_mmap_model(const char* model_path, const char* mmap_model_path) { + auto model = new pecos::MLModel(model_path, 0); + model->save_mmap(mmap_model_path); + delete model; + } + void* c_mlmodel_load_mmap_model(const char* model_path, const bool lazy_load) { + auto mlm = new pecos::MLModel(model_path, 0, lazy_load); + return static_cast(mlm); + } + void c_mlmodel_destruct_model(void* ptr) { + pecos::MLModel* mlm = static_cast*>(ptr); + delete mlm; + } + // Allowed attr: nr_labels, nr_codes, nr_features + uint32_t c_mlmodel_get_int_attr(void* ptr, const char* attr) { + pecos::MLModel* mlm = static_cast*>(ptr); + return mlm->get_int_attr(attr); + } + + #define C_MLMODEL_PREDICT(SUFFIX, PY_MAT, C_MAT) \ + void c_mlmodel_predict ## SUFFIX( \ + void* ptr, \ + const PY_MAT* input_x, \ + const ScipyCsrF32* csr_codes, \ + const char* overridden_post_processor, \ + const uint32_t overridden_only_topk, \ + const int num_threads, \ + py_sparse_allocator_t pred_alloc) { \ + pecos::MLModel* mlm = static_cast*>(ptr); \ + C_MAT X(input_x); \ + pecos::csr_t prev_layer_pred; \ + bool no_prev_pred; \ + if (csr_codes) { \ + prev_layer_pred = pecos::csr_t(csr_codes).deep_copy(); \ + no_prev_pred = false; \ + } else { \ + prev_layer_pred.fill_ones(X.rows, mlm->code_count()); \ + no_prev_pred = true; \ + } \ + pecos::csr_t cur_layer_pred; \ + mlm->predict(X, prev_layer_pred, no_prev_pred, \ + overridden_only_topk, overridden_post_processor, \ + cur_layer_pred, num_threads); \ + cur_layer_pred.create_pycsr(pred_alloc); \ + cur_layer_pred.free_underlying_memory(); \ + prev_layer_pred.free_underlying_memory(); \ + } + C_MLMODEL_PREDICT(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_MLMODEL_PREDICT(_drm_f32, ScipyDrmF32, pecos::drm_t) + + #define C_MLMODEL_PREDICT_ON_SELECTED_OUTPUTS(SUFFIX, PY_MAT, C_MAT) \ + void c_mlmodel_predict_on_selected_outputs ## SUFFIX( \ + void* ptr, \ + const PY_MAT* input_x, \ + const ScipyCsrF32* selected_outputs_csr, \ + const ScipyCsrF32* csr_codes, \ + const char* overridden_post_processor, \ + const int num_threads, \ + py_sparse_allocator_t pred_alloc) { \ + pecos::MLModel* mlm = static_cast*>(ptr); \ + C_MAT X(input_x); \ + pecos::csr_t curr_outputs_csr = pecos::csr_t(selected_outputs_csr).deep_copy(); \ + pecos::csr_t prev_layer_pred; \ + bool no_prev_pred; \ + if (csr_codes) { \ + prev_layer_pred = pecos::csr_t(csr_codes).deep_copy(); \ + no_prev_pred = false; \ + } else { \ + prev_layer_pred.fill_ones(X.rows, mlm->code_count()); \ + no_prev_pred = true; \ + } \ + pecos::csr_t cur_layer_pred; \ + mlm->predict_on_selected_outputs(X, curr_outputs_csr, prev_layer_pred, no_prev_pred, \ + overridden_post_processor, cur_layer_pred, num_threads); \ + cur_layer_pred.create_pycsr(pred_alloc); \ + cur_layer_pred.free_underlying_memory(); \ + curr_outputs_csr.free_underlying_memory(); \ + prev_layer_pred.free_underlying_memory(); \ + } + C_MLMODEL_PREDICT_ON_SELECTED_OUTPUTS(_csr_f32, ScipyCsrF32, pecos::csr_t) + C_MLMODEL_PREDICT_ON_SELECTED_OUTPUTS(_drm_f32, ScipyDrmF32, pecos::drm_t) + + // ==== C Interface of XLinearModels ==== void* c_xlinear_load_model_from_disk(const char* model_path) { auto model = new pecos::HierarchicalMLModel(model_path); return static_cast(model); @@ -49,6 +134,7 @@ extern "C" { // Only implemented for bin_search_chunked auto model = new pecos::HierarchicalMLModel(model_path, pecos::layer_type_t::LAYER_TYPE_BINARY_SEARCH_CHUNKED); model->save_mmap(mmap_model_path); + delete model; } void c_xlinear_destruct_model(void* ptr) { diff --git a/pecos/core/xmc/inference.hpp b/pecos/core/xmc/inference.hpp index 89be60d0..0efa7e69 100644 --- a/pecos/core/xmc/inference.hpp +++ b/pecos/core/xmc/inference.hpp @@ -42,7 +42,6 @@ #define DEFAULT_LAYER_TYPE LAYER_TYPE_BINARY_SEARCH_CHUNKED - namespace pecos { using robin_hood::unordered_set; @@ -104,15 +103,18 @@ namespace pecos { float bias; int only_topk; std::string post_processor; + bool is_mmap=false; MLModelMetadata( float bias=1.0, int only_topk=10, - std::string post_processor="l3-hinge" + std::string post_processor="l3-hinge", + bool is_mmap=false ) { this->bias = bias; this->only_topk = only_topk; this->post_processor = post_processor; + this->is_mmap = is_mmap; } MLModelMetadata(const std::string& params_filepath) { @@ -149,9 +151,9 @@ namespace pecos { throw std::runtime_error("model corrupted, does not contain post_processor in pred_kwargs"); } - only_topk = pred_kwargs["only_topk"]; post_processor = pred_kwargs["post_processor"]; + is_mmap = j.value("is_mmap", false); } void dump_json(const std::string& params_filepath) const { @@ -166,7 +168,8 @@ namespace pecos { ofs << "\"pred_kwargs\": {\n"; ofs << "\t\"only_topk\": " << only_topk << ",\n"; ofs << "\t\"post_processor\": \"" << post_processor << "\"\n"; - ofs << "\t}\n"; + ofs << "\t},\n"; + ofs << "\"is_mmap\": " << (is_mmap?"true":"false") << "\n"; ofs << "}\n"; ofs.close(); @@ -375,11 +378,11 @@ namespace pecos { } void save_mmap(const std::string& file_name) const { - throw std::runtime_error("Not implemented yet."); + throw std::runtime_error("hash_chunked_matrix_t::save_mmap is Not implemented yet."); } void load_mmap(const std::string& file_name, const bool lazy_load) { - throw std::runtime_error("Not implemented yet."); + throw std::runtime_error("hash_chunked_matrix_t::load_mmap is Not implemented yet."); } }; @@ -1664,12 +1667,16 @@ namespace pecos { // Initialize mmap data void init_mmap(const std::string& foldername, bool lazy_load, value_type bias) { - throw std::runtime_error("Not implemented yet."); + this->bias = bias; + this->b_assumes_ownership = true; // Always true for mmap + this->W.load_mmap(mmap_W_fn_(foldername), lazy_load); + this->C.load_mmap(mmap_C_fn_(foldername), lazy_load); } // Save layer data to mmap format void save_mmap(const std::string& foldername) const { - throw std::runtime_error("Not implemented yet."); + W.save_mmap(mmap_W_fn_(foldername)); + C.save_mmap(mmap_C_fn_(foldername)); } // Not necessary for unchuncked layer data @@ -1683,6 +1690,10 @@ namespace pecos { C.free_underlying_memory(); } } + private: + // mmap file names + inline std::string mmap_W_fn_(const std::string& foldername) const {return foldername + "/W.mmap_store";} + inline std::string mmap_C_fn_(const std::string& foldername) const {return foldername + "/C.mmap_store";} }; // Chunked layer data @@ -2008,14 +2019,6 @@ namespace pecos { init(csc_t(&W), csc_t(&C), cur_depth, b_assumes_ownership, metadata); } - // Save mmap - void save_mmap(const std::string& folderpath) const { - const std::string metadata_path = folderpath + "/param.json"; - metadata.dump_json(metadata_path); - - layer_data.save_mmap(folderpath); - } - // The internal prediction function for a layer, this method is templated to take any // supported query matrix type. It is called by both versions of the ModelLayer::predict method // X should have the same number of rows as prev_layer_pred @@ -2239,6 +2242,10 @@ namespace pecos { return layer_data.W.cols; } + index_type code_count() const override { + return layer_data.C.cols; + } + index_type feature_count() const override { if (layer_data.bias > 0.0) { return layer_data.W.rows - 1; @@ -2247,15 +2254,53 @@ namespace pecos { } } - index_type code_count() const override { - return layer_data.C.cols; - } - value_type bias() const override { return layer_data.bias; } + inline index_type get_int_attr(const char* attr) { + if (std::strcmp(attr, "nr_labels") == 0) { + return this->label_count(); + } else if (std::strcmp(attr, "nr_codes") == 0) { + return this->code_count(); + } else if (std::strcmp(attr, "nr_features") == 0) { + return this->feature_count(); + } else { + throw std::runtime_error(std::string(attr) + " is not implemented in get_int_attr."); + } + } + + // Save mmap + void save_mmap(const std::string& folderpath) const { + // Create folder + if (system(("mkdir -p " + folderpath).c_str()) == -1) { + throw std::runtime_error("Cannot create folder: " + folderpath); + } + + const std::string metadata_path = folderpath + "/param.json"; + MLModelMetadata metadata( + this->metadata.bias, + this->metadata.only_topk, + this->metadata.post_processor, + true); + metadata.dump_json(metadata_path); + + layer_data.save_mmap(folderpath); + } + + MLModel(const std::string& folderpath, const uint32_t cur_depth, const bool lazy_load) { + MLModelMetadata metadata(folderpath + "/param.json"); + if (!metadata.is_mmap) { + throw std::runtime_error("This folder contains npz model. Cannot load in mmap format."); + } + ISpecializedModelLayer::load_mmap(folderpath, cur_depth, lazy_load, this); + } + MLModel(const std::string& folderpath, const uint32_t cur_depth) { + MLModelMetadata metadata(folderpath + "/param.json"); + if (metadata.is_mmap) { + throw std::runtime_error("This folder contains mmap model. Cannot load in npz format."); + } ISpecializedModelLayer::load(folderpath, cur_depth, this); } }; @@ -2545,9 +2590,6 @@ namespace pecos { for (std::size_t d = 0; d < depth; d++) { std::string layer_path = folderpath + "/" + std::to_string(d) + ".model/"; // Create folder for layer - if (system(("mkdir -p " + layer_path).c_str()) == -1) { - throw std::runtime_error("Cannot create layer folder: " + layer_path); - } model_layers[d]->save_mmap(layer_path); } } diff --git a/pecos/xmc/base.py b/pecos/xmc/base.py index a27ff0ca..77c10a3c 100644 --- a/pecos/xmc/base.py +++ b/pecos/xmc/base.py @@ -681,76 +681,114 @@ def is_valid(self): """Check whether self instance is valid""" return self.post_processor in PostProcessor.valid_list() - def __init__(self, W, C=None, bias=-1.0, pred_params=None, **kwargs): + def __init__(self, model_ptr=None, W=None, C=None, bias=-1.0, pred_params=None, **kwargs): """Initialization Args: - W(ScipyCscF32 or np.ndarray): Weight matrix. - C(ScipyCscF32 or np.ndarray, optional): Clustering matrix + model_ptr (c_void_ptr): The pointer to C/C++ of MLModel. Default None. + W(ScipyCscF32 or np.ndarray, optional): Weight matrix. Default None. + C(ScipyCscF32 or np.ndarray, optional): Clustering matrix. Default None. bias(float, optional): The bias of the model. Default to -1.0. pred_params(dict): Parameters to initialize `PredParams`. **kwargs: Other arguments to override `PredParams`. """ - if C is not None: - if isinstance(C, ScipyCscF32): - assert C.buf.shape[0] == W.shape[1], "C:{} W:{}".format(C.buf.shape, W.shape) + if (model_ptr is None) and (W is None) and (C is None): + raise ValueError(f"Initialization Error! Provide either model_ptr or (W/C)!") + + self.model_ptr = model_ptr + if not self.is_mmap: + if C is not None: + if isinstance(C, ScipyCscF32): + assert C.buf.shape[0] == W.shape[1], "C:{} W:{}".format(C.buf.shape, W.shape) + else: + assert C.shape[0] == W.shape[1], "C:{} W:{}".format(C.shape, W.shape) else: - assert C.shape[0] == W.shape[1], "C:{} W:{}".format(C.shape, W.shape) - else: - C = smat.csc_matrix(np.ones((W.shape[1], 1), dtype=W.dtype)) - self.pC = ScipyCscF32.init_from(C) - self.pW = ScipyCscF32.init_from(W) - self.bias = bias + C = smat.csc_matrix(np.ones((W.shape[1], 1), dtype=W.dtype)) + self.pC = ScipyCscF32.init_from(C) + self.pW = ScipyCscF32.init_from(W) + self.bias = bias + pred_params = self.PredParams.from_dict(pred_params) pred_params.override_with_kwargs(kwargs.get("pred_kwargs", None)) self.pred_params = pred_params + @property + def is_mmap(self): + if self.model_ptr is not None and isinstance(self.model_ptr, int): + return True + else: + return False + @property def C(self): """The clustering matrix""" - return self.pC.buf + if self.is_mmap: + raise Exception("Model is mmap format! C() not supported!") + else: + return self.pC.buf @property def W(self): """The weight matrix""" - return None if self.pW is None else self.pW.buf + if self.is_mmap: + raise Exception("Model is mmap format! W() not supported!") + else: + return None if self.pW is None else self.pW.buf @property def nr_labels(self): """The number of labels""" - return self.W.shape[1] + if self.is_mmap: + return clib.mlmodel_get_int_attr(self.model_ptr, "nr_labels") + else: + return self.W.shape[1] @property def nr_codes(self): """The number of clusters.""" - return self.C.shape[1] + if self.is_mmap: + return clib.mlmodel_get_int_attr(self.model_ptr, "nr_codes") + else: + return self.C.shape[1] @property def nr_features(self): """The feature dimension""" - return self.W.shape[0] - (1 if self.bias > 0 else 0) + if self.is_mmap: + return clib.mlmodel_get_int_attr(self.model_ptr, "nr_features") + else: + return self.W.shape[0] - (1 if self.bias > 0 else 0) @property def dtype(self): """The model dtype""" - return self.pW.dtype + if self.is_mmap: + raise Exception("Model is mmap format! dtype() not supported!") + else: + return self.pW.dtype @classmethod - def load(cls, folder): + def load(cls, folder, lazy_load=False): """Load MLModel from file Args: folder (str): dir from which the model is loaded. - + lazy_load (bool): lazy load for mmap format of model. Default False. Returns: MLModel """ param = json.loads(open("{}/param.json".format(folder), "r").read()) assert param["model"] == cls.__name__ - W = smat_util.load_matrix("{}/W.npz".format(folder)).tocsc().sorted_indices() - C = smat_util.load_matrix("{}/C.npz".format(folder)).tocsc().sorted_indices() pred_params = cls.PredParams.from_dict(param["pred_kwargs"]) - return cls(W, C, param["bias"], pred_params) + + is_mmap = bool(param.get("is_mmap", False)) + if is_mmap: + model_ptr = clib.mlmodel_load_mmap(folder, lazy_load=lazy_load) + return cls(model_ptr=model_ptr, pred_params=pred_params) + else: + W = smat_util.load_matrix("{}/W.npz".format(folder)).tocsc().sorted_indices() + C = smat_util.load_matrix("{}/C.npz".format(folder)).tocsc().sorted_indices() + return cls(W=W, C=C, bias=param["bias"], pred_params=pred_params) @classmethod def load_pred_params(cls, folder): @@ -791,6 +829,19 @@ def save(self, folder): smat_util.save_matrix("{}/W.npz".format(folder), self.W) smat_util.save_matrix("{}/C.npz".format(folder), self.C) + @classmethod + def compile_mmap_model(cls, npz_folder, mmap_folder): + """ + Compile model from npz format to memory-mapped format + for faster loading and referencing. + Args: + npz_folder (str): The source folder path for mlmodel npz files. + mmap_folder (str): The destination folder path for xlinear mmap model. + """ + param = json.loads(open(f"{npz_folder}/param.json", "r", encoding="utf-8").read()) + assert param["model"] == cls.__name__ + clib.mlmodel_compile_mmap_model(npz_folder, mmap_folder) + @classmethod def train(cls, prob, train_params=None, pred_params=None, **kwargs): """Training method for MLModel @@ -818,7 +869,7 @@ def train(cls, prob, train_params=None, pred_params=None, **kwargs): if train_params.solver_type == "L2R_L2LOSS_SVC_PRIMAL": train_params.eps = train_params.newton_eps - model = clib.xlinear_single_layer_train( + W = clib.xlinear_single_layer_train( prob.pX, prob.pY, prob.pC, @@ -826,7 +877,7 @@ def train(cls, prob, train_params=None, pred_params=None, **kwargs): prob.pR, **train_params.to_dict(), ) - return cls(model, prob.pC, train_params.bias, pred_params) + return cls(W=W, C=prob.pC, bias=train_params.bias, pred_params=pred_params) def get_pred_params(self): """Return a deep copy of prediction parameters @@ -872,17 +923,28 @@ def predict( pred_alloc = ScipyCompressedSparseAllocator() - clib.xlinear_single_layer_predict( - X, - csr_codes, - self.W, - self.C, - pred_params.post_processor, - pred_params.only_topk if pred_params.only_topk else 0, - kwargs.get("threads", -1), - self.bias, - pred_alloc, - ) + if self.is_mmap: + clib.mlmodel_predict( + self.model_ptr, + X, + csr_codes, + pred_params.post_processor, + pred_params.only_topk if pred_params.only_topk else 0, + kwargs.get("threads", -1), + pred_alloc, + ) + else: + clib.xlinear_single_layer_predict( + X, + csr_codes, + self.W, + self.C, + pred_params.post_processor, + pred_params.only_topk if pred_params.only_topk else 0, + kwargs.get("threads", -1), + self.bias, + pred_alloc, + ) return pred_alloc.get() @@ -927,17 +989,28 @@ def predict_on_selected_outputs( pred_alloc = ScipyCompressedSparseAllocator() - clib.xlinear_single_layer_predict_on_selected_outputs( - X, - selected_outputs_csr, - csr_codes, - self.W, - self.C, - pred_params.post_processor, - kwargs.get("threads", -1), - self.bias, - pred_alloc, - ) + if self.is_mmap: + clib.mlmodel_predict_on_selected_outputs( + self.model_ptr, + X, + selected_outputs_csr, + csr_codes, + pred_params.post_processor, + kwargs.get("threads", -1), + pred_alloc, + ) + else: + clib.xlinear_single_layer_predict_on_selected_outputs( + X, + selected_outputs_csr, + csr_codes, + self.W, + self.C, + pred_params.post_processor, + kwargs.get("threads", -1), + self.bias, + pred_alloc, + ) return pred_alloc.get() @@ -965,6 +1038,9 @@ def get_submodel(self, selected_codes=None, selected_labels=None, reindex=False) 'active_labels': a (sorted) list of labels that are retained } """ + if self.model_ptr: + raise Exception("Model is mmap format! get_submodel() not supported!") + if selected_codes is None: selected_codes = np.arange(self.nr_codes) else: diff --git a/test/pecos/xmc/test_xmc.py b/test/pecos/xmc/test_xmc.py index fed3d005..429b616a 100644 --- a/test/pecos/xmc/test_xmc.py +++ b/test/pecos/xmc/test_xmc.py @@ -20,6 +20,39 @@ def test_importable(): from pecos.xmc import Indexer # noqa: F401 +def test_mmap_mlmodel(tmpdir): + from pathlib import Path + from pecos.utils import smat_util + from pecos.xmc import MLProblem, MLModel + + train_X_file = "test/tst-data/xmc/xlinear/X.npz" + train_Y_file = "test/tst-data/xmc/xlinear/Y.npz" + test_X_file = "test/tst-data/xmc/xlinear/Xt.npz" + X = smat_util.load_matrix(train_X_file) + Y = smat_util.load_matrix(train_Y_file) + Xt = smat_util.load_matrix(test_X_file) + + npz_model_folder = str(tmpdir.join("save_model_npz")) + mmap_model_folder = str(tmpdir.join("save_model_mmap")) + Path(mmap_model_folder).mkdir(parents=True, exist_ok=True) + + py_model = MLModel.train( + MLProblem(X, Y, C=None, M=None, R=None), + train_params=MLModel.TrainParams(), + ) + py_model.save(npz_model_folder) + MLModel.compile_mmap_model(npz_model_folder, mmap_model_folder) + mmap_model = MLModel.load(mmap_model_folder, lazy_load=False) + + assert py_model.nr_features == mmap_model.nr_features + assert py_model.nr_labels == mmap_model.nr_labels + assert py_model.nr_codes == mmap_model.nr_codes + + py_pred = py_model.predict(Xt, only_topk=2).todense() + mmap_pred = mmap_model.predict(Xt, only_topk=2).todense() + assert mmap_pred == approx(py_pred, abs=1e-6) + + def test_hierarchicalkmeans(): import numpy as np import scipy.sparse as smat