From cfeb1d801f902da2be9f5b45df99b3c0beb09540 Mon Sep 17 00:00:00 2001 From: Wei-Cheng Chang Date: Tue, 15 Oct 2024 17:12:28 -0700 Subject: [PATCH] Update RankingTrainer/Python Version, Remove BLAS (#300) Co-authored-by: Wei-Cheng Chang --- .github/build_pypi_wheel.sh | 19 +------- .github/workflows/build_pypi.yml | 2 +- .github/workflows/pytest.yml | 4 +- .github/workflows/pytest_aarch64.yml | 6 +-- .github/workflows/style_type_check.yml | 4 +- Makefile | 4 +- README.md | 13 +----- pecos/core/utils/matrix.hpp | 63 +++++++++++++++----------- pecos/xmr/reranker/model.py | 4 +- pecos/xmr/reranker/trainer.py | 51 +++++++++++++++------ setup.cfg | 4 -- setup.py | 38 +++------------- 12 files changed, 93 insertions(+), 119 deletions(-) diff --git a/.github/build_pypi_wheel.sh b/.github/build_pypi_wheel.sh index 3f1b13f6..7402ec72 100755 --- a/.github/build_pypi_wheel.sh +++ b/.github/build_pypi_wheel.sh @@ -19,25 +19,8 @@ echo "pip: $($PIP --version)" # Install dependencies -# TODO: remove pin on setuptools after removing numpy.distutils echo "Install dependencies..." -$PIP install 'setuptools<=73.0.1' wheel twine auditwheel - -# Install OpenBLAS -# Using pre-build OpenBLAS lib v0.3.27 hosted on Anaconda -# Refer to: https://github.com/MacPython/openblas-libs -# OpenBLAS64 is for ILP64, which is not our case -if [ "$PLAT" = "manylinux2014_x86_64" ] || [ "$PLAT" = "manylinux2014_aarch64" ]; then - OPENBLAS_VER="v0.3.27" - OPENBLAS_LIB="openblas-${OPENBLAS_VER}-${PLAT}.tar.gz" - OPENBLAS_LIB_URL="https://anaconda.org/multibuild-wheels-staging/openblas-libs/$OPENBLAS_VER/download/$OPENBLAS_LIB" - yum install wget -y - wget $OPENBLAS_LIB_URL - tar -xvf $OPENBLAS_LIB -else - echo "$PLAT not supported." - exit 1 -fi +$PIP install setuptools wheel twine auditwheel # Build wheel diff --git a/.github/workflows/build_pypi.yml b/.github/workflows/build_pypi.yml index 930fd70a..693f7581 100644 --- a/.github/workflows/build_pypi.yml +++ b/.github/workflows/build_pypi.yml @@ -9,7 +9,7 @@ jobs: max-parallel: 4 fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] build_platform: ["manylinux2014_x86_64", "manylinux2014_aarch64"] runs-on: ubuntu-20.04 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index babaff58..5b9bb13a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -9,7 +9,7 @@ jobs: max-parallel: 4 fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] platform: [ubuntu-20.04, ubuntu-22.04] runs-on: ${{ matrix.platform }} @@ -38,7 +38,7 @@ jobs: max-parallel: 4 fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] runs-on: ubuntu-latest container: amazonlinux:2 diff --git a/.github/workflows/pytest_aarch64.yml b/.github/workflows/pytest_aarch64.yml index 1d43d8e5..812b4852 100644 --- a/.github/workflows/pytest_aarch64.yml +++ b/.github/workflows/pytest_aarch64.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: Ubuntu-Python-Unit-Test: - name: Ubuntu 20.04 Python3.8 Unit Tests + name: Ubuntu 22.04 Python3.10 Unit Tests runs-on: ubuntu-latest @@ -17,12 +17,12 @@ jobs: platforms: all - name: Check QEMU Simulator - uses: docker://arm64v8/ubuntu:20.04 + uses: docker://arm64v8/ubuntu:22.04 with: args: 'uname -a' - name: Install dependencies and Pytest - uses: docker://arm64v8/ubuntu:20.04 + uses: docker://arm64v8/ubuntu:22.04 with: args: > bash -c diff --git a/.github/workflows/style_type_check.yml b/.github/workflows/style_type_check.yml index c417d020..e9414ff5 100644 --- a/.github/workflows/style_type_check.yml +++ b/.github/workflows/style_type_check.yml @@ -9,10 +9,10 @@ jobs: steps: - uses: actions/checkout@v1 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v1 with: - python-version: "3.8" + python-version: "3.9" - name: Install dependencies id: install-dep run: | diff --git a/Makefile b/Makefile index ff45f1a1..ace2f88d 100644 --- a/Makefile +++ b/Makefile @@ -49,10 +49,8 @@ mypy: # Install and unit test -# TODO: remove pin on pip and setuptools after removing numpy.distutils libpecos: - python3 -m pip install pip==23.0.1 - python3 -m pip install "setuptools<=73.0.1" + python3 -m pip install --upgrade pip ${WARN_AS_ERROR_CMD} python3 -m pip install ${VFLAG} --editable . .PHONY: test diff --git a/README.md b/README.md index 582f30c8..31d8b4da 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Given an input, PECOS identifies a small set (10-100) of relevant outputs from a ## Requirements and Installation -* Python (3.8, 3.9, 3.10, 3.11) +* Python (3.9, 3.10, 3.11, 3.12) * Pip (>=19.3) See other dependencies in [`setup.py`](https://github.com/amzn/pecos/blob/mainline/setup.py#L135) @@ -42,7 +42,6 @@ If you're unfamiliar with Python virtual environments, check out the [user guide ### Installation from Wheel - PECOS can be installed using pip as follows: ```bash python3 -m pip install libpecos @@ -59,16 +58,6 @@ sudo apt-get update && sudo apt-get install -y build-essential git python3 pytho ``` bash sudo yum -y install python3 python3-devel python3-distutils python3-venv && sudo yum -y groupinstall 'Development Tools' ``` -One needs to install at least one BLAS library to compile PECOS, e.g. `OpenBLAS`: -* For Ubuntu (20.04, 22.04): -``` bash -sudo apt-get install -y libopenblas-dev -``` -* For Amazon Linux 2: -``` bash -sudo amazon-linux-extras install epel -y -sudo yum install openblas-devel -y -``` #### Install and develop locally ```bash diff --git a/pecos/core/utils/matrix.hpp b/pecos/core/utils/matrix.hpp index 6106a488..383a1dda 100644 --- a/pecos/core/utils/matrix.hpp +++ b/pecos/core/utils/matrix.hpp @@ -764,37 +764,46 @@ namespace pecos { } }; - // ===== BLAS C++ Wrapper ===== - - extern "C" { - double ddot_(ptrdiff_t *, double *, ptrdiff_t *, double *, ptrdiff_t *); - float sdot_(ptrdiff_t *, float *, ptrdiff_t *, float *, ptrdiff_t *); - - ptrdiff_t dscal_(ptrdiff_t *, double *, double *, ptrdiff_t *); - ptrdiff_t sscal_(ptrdiff_t *, float *, float *, ptrdiff_t *); - - ptrdiff_t daxpy_(ptrdiff_t *, double *, double *, ptrdiff_t *, double *, ptrdiff_t *); - ptrdiff_t saxpy_(ptrdiff_t *, float *, float *, ptrdiff_t *, float *, ptrdiff_t *); - - double dcopy_(ptrdiff_t *, double *, ptrdiff_t *, double *, ptrdiff_t *); - float scopy_(ptrdiff_t *, float *, ptrdiff_t *, float *, ptrdiff_t *); + // ===== self-implemented C++ Wrapper for BLAS interface ===== + // Since removing the dependency on BLAS, we manually realize + // the dot/scal/axpy/copy BLAS-compatible API via our naive implementation, + // which is for backward-compatibility (e.g., in Newton solver) + + template val_type dot(ptrdiff_t *len, val_type *x, ptrdiff_t *xinc, val_type *y, ptrdiff_t *yinc) { + val_type res = 0.0; + for (ptrdiff_t idx = 0; idx < *len; idx++) { + res += (*x) * (*y); + x += *xinc; + y += *yinc; + } + return res; } - template val_type dot(ptrdiff_t *, val_type *, ptrdiff_t *, val_type *, ptrdiff_t *); - template<> inline double dot(ptrdiff_t *len, double *x, ptrdiff_t *xinc, double *y, ptrdiff_t *yinc) { return ddot_(len, x, xinc, y, yinc); } - template<> inline float dot(ptrdiff_t *len, float *x, ptrdiff_t *xinc, float *y, ptrdiff_t *yinc) { return sdot_(len, x, xinc, y, yinc); } - - template val_type scal(ptrdiff_t *, val_type *, val_type *, ptrdiff_t *); - template<> inline double scal(ptrdiff_t *len, double *a, double *x, ptrdiff_t *xinc) { return dscal_(len, a, x, xinc); } - template<> inline float scal(ptrdiff_t *len, float *a, float *x, ptrdiff_t *xinc) { return sscal_(len, a, x, xinc); } + template val_type scal(ptrdiff_t *len, val_type *a, val_type *x, ptrdiff_t *xinc) { + for (ptrdiff_t idx = 0; idx < *len; idx++) { + *x = (*x) * (*a); + x += *xinc; + } + return (val_type) 0; + } - template ptrdiff_t axpy(ptrdiff_t *, val_type *, val_type *, ptrdiff_t *, val_type *, ptrdiff_t *); - template<> inline ptrdiff_t axpy(ptrdiff_t *len, double *alpha, double *x, ptrdiff_t *xinc, double *y, ptrdiff_t *yinc) { return daxpy_(len, alpha, x, xinc, y, yinc); }; - template<> inline ptrdiff_t axpy(ptrdiff_t *len, float *alpha, float *x, ptrdiff_t *xinc, float *y, ptrdiff_t *yinc) { return saxpy_(len, alpha, x, xinc, y, yinc); }; + template ptrdiff_t axpy(ptrdiff_t *len, val_type *alpha, val_type *x, ptrdiff_t *xinc, val_type *y, ptrdiff_t *yinc) { + for (ptrdiff_t idx = 0; idx < *len; idx++) { + *y = (*y) + (*x) * (*alpha); + x += *xinc; + y += *yinc; + } + return (ptrdiff_t) 0; + } - template val_type copy(ptrdiff_t *, val_type *, ptrdiff_t *, val_type *, ptrdiff_t *); - template<> inline double copy(ptrdiff_t *len, double *x, ptrdiff_t *xinc, double *y, ptrdiff_t *yinc) { return dcopy_(len,x,xinc,y,yinc); } - template<> inline float copy(ptrdiff_t *len, float *x, ptrdiff_t *xinc, float *y, ptrdiff_t *yinc) { return scopy_(len,x,xinc,y,yinc); } + template val_type copy(ptrdiff_t *len, val_type *x, ptrdiff_t *xinc, val_type *y, ptrdiff_t *yinc) { + for (ptrdiff_t idx = 0; idx < *len; idx++) { + *y = *x; + x += *xinc; + y += *yinc; + } + return (val_type) 0; + } // ===== do_dot_product ===== template diff --git a/pecos/xmr/reranker/model.py b/pecos/xmr/reranker/model.py index 79a692f5..9a4721f3 100644 --- a/pecos/xmr/reranker/model.py +++ b/pecos/xmr/reranker/model.py @@ -65,7 +65,7 @@ def __init__( inp_feat_dim: int = 1, inp_dropout_prob: float = 0.1, hid_dropout_prob: float = 0.1, - hid_actv_type: str = "gelu", + hid_actv_type: str = "relu6", hid_size_list: list = [64, 128, 256], **kwargs, ): @@ -93,7 +93,7 @@ def __init__( text_config=None, numr_config=None, text_pooling_type="cls", - head_actv_type="gelu", + head_actv_type="relu6", head_dropout_prob=0.1, head_size_list=[128, 64], **kwargs, diff --git a/pecos/xmr/reranker/trainer.py b/pecos/xmr/reranker/trainer.py index b386a03a..f429eeb8 100644 --- a/pecos/xmr/reranker/trainer.py +++ b/pecos/xmr/reranker/trainer.py @@ -73,18 +73,6 @@ def forward(self, preds, target, alpha=0.5): return loss1 -LOSS_FN_DICT = { - "pairwise": PairwisePointwiseHybridLoss( - nn.MarginRankingLoss(reduction="mean", margin=0.1), - nn.MSELoss(reduction="mean"), - ), - "listwise": ListwisePointwiseHybridLoss( - nn.CrossEntropyLoss(reduction="mean"), - nn.BCEWithLogitsLoss(reduction="mean"), - ), -} - - class LoggerCallback(TrainerCallback): def on_epoch_begin( self, @@ -115,6 +103,8 @@ def on_log( logs["loss"] = round(logs["loss"], 6) if "grad_norm" in logs: logs["grad_norm"] = round(logs["grad_norm"], 6) + if "learning_rate" in logs: + logs["learning_rate"] = round(logs["learning_rate"], 8) if "epoch" in logs: logs["epoch"] = round(logs["epoch"], 2) if state.is_world_process_zero: @@ -126,6 +116,17 @@ class RankingTrainer(Trainer, pecos.BaseClass): Trainer class for the pecos.xmr.reranker.RankingModel. """ + LOSS_FN_DICT = { + "pairwise": PairwisePointwiseHybridLoss( + nn.MarginRankingLoss(reduction="mean", margin=0.1), + nn.MSELoss(reduction="mean"), + ), + "listwise": ListwisePointwiseHybridLoss( + nn.CrossEntropyLoss(reduction="mean"), + nn.BCEWithLogitsLoss(reduction="mean"), + ), + } + @dataclass class TrainingArgs(TrainingArguments, pecos.BaseParams): loss_fn: str = "listwise" @@ -148,10 +149,12 @@ def to_dict(self, with_meta=True): return self.append_meta(d) if with_meta else d def __init__(self, *args, **kwargs): - param_to_save = kwargs.pop("param_to_save") + param_to_save = kwargs.pop("param_to_save", None) + if not param_to_save: + raise ValueError("param_to_save can not be None!") super(RankingTrainer, self).__init__(*args, **kwargs) - self.loss_fn = LOSS_FN_DICT[self.args.loss_fn] + self.loss_fn = self.LOSS_FN_DICT[self.args.loss_fn] self.loss_alpha = self.args.loss_alpha self.param_to_save = param_to_save @@ -223,3 +226,23 @@ def compute_loss( loss = self.loss_fn(preds_2d, target, alpha=self.loss_alpha) return (loss, preds_1d) if return_outputs else loss + + def log(self, logs: Dict[str, float]) -> None: + """ + Log `logs` on the various objects watching training. + + Subclass and override this method to inject custom behavior. + + Args: + logs (`Dict[str, float]`): + The values to log. + """ + if self.state.epoch is not None: + logs["epoch"] = self.state.epoch + if self.args.include_num_input_tokens_seen: + logs["num_input_tokens_seen"] = self.state.num_input_tokens_seen + logs["global_step"] = self.state.global_step + + output = {**logs, **{"step": self.state.global_step}} + self.state.log_history.append(output) + self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs) # type: ignore diff --git a/setup.cfg b/setup.cfg index 65899090..e32fc679 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,10 +1,6 @@ [aliases] test=pytest -# TODO: remove pin on setuptools version after removing numpy.distutils -[build-system] -requires = ["setuptools<=73.0.1"] - # Configuration for pytest; enable coverage for pecos, emit # XML, HTML, and terminal reports. [tool:pytest] diff --git a/setup.py b/setup.py index c6890449..b79b7b2f 100644 --- a/setup.py +++ b/setup.py @@ -81,33 +81,12 @@ def get_version(cls): raise RuntimeError("Unable to find version string.") -class BlasHelper(object): - """Helper class to figure out user's BLAS library path by Numpy's system-info tool.""" - - @classmethod - def get_blas_lib_dir(cls): - """Return user's BLAS library found by Numpy's system-info tool. If not found, will raise error.""" - import numpy.distutils.system_info as nps - - blas_info = nps.get_info('lapack_opt') - assert blas_info, "No BLAS/LAPACK library is found, need to install BLAS." - - blas_lib = blas_info['libraries'] - blas_dir = blas_info['library_dirs'] - - assert blas_lib, "No BLAS/LAPACK library is found, need to install BLAS." - assert blas_dir, "No BLAS/LAPACK library directory is found, need to install BLAS." - - return blas_lib, blas_dir - - with open("README.md", "r", encoding="utf-8") as f: long_description = f.read() # Requirements numpy_requires = [ - 'setuptools<=73.0.1', # TODO: remove pin on setuptools version after removing numpy.distutils - 'numpy>=1.19.5,<2.0.0; python_version>="3.8"' + 'numpy>=1.19.5,<2.0.0; python_version>="3.9"' ] setup_requires = numpy_requires + [ 'pytest-runner' @@ -115,16 +94,15 @@ def get_blas_lib_dir(cls): install_requires = numpy_requires + [ 'scipy>=1.4.1,<1.14.0', 'scikit-learn>=0.24.1', - 'torch>=2.0; python_version>="3.8"', + 'torch>=2.0; python_version>="3.9"', 'sentencepiece>=0.1.86,!=0.1.92', # 0.1.92 results in error for transformers - 'transformers>=4.31.0; python_version>="3.8"', # the minimal version supporting py3.8 - 'peft>=0.11.0; python_version>="3.8"', - 'datasets>=2.19.1; python_version>="3.8"', + 'transformers>=4.31.0; python_version>="3.9"', # the minimal version supporting py3.9 + 'peft>=0.11.0; python_version>="3.9"', + 'datasets>=2.19.1; python_version>="3.9"', ] # Fetch Numpy before building Numpy-dependent extension, if Numpy required version was not installed setuptools.distutils.core.Distribution().fetch_build_eggs(numpy_requires) -blas_lib, blas_dir = BlasHelper.get_blas_lib_dir() # Get extra manual compile args if any # Example usage: @@ -140,11 +118,9 @@ def get_blas_lib_dir(cls): "pecos.core.libpecos_float32", sources=["pecos/core/libpecos.cpp"], include_dirs=["pecos/core", "/usr/include/", "/usr/local/include"], - libraries=["gomp", "gcc"] + blas_lib, - library_dirs=blas_dir, + libraries=["gomp", "gcc", "stdc++"], extra_compile_args=["-fopenmp", "-O3", "-std=c++17"] + manual_compile_args, - extra_link_args=['-Wl,--no-as-needed', f"-Wl,-rpath,{':'.join(blas_dir)}"] - ) +) setuptools.setup( name="libpecos",