diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml new file mode 100644 index 0000000000..b4c728400c --- /dev/null +++ b/.github/workflows/pypi.yaml @@ -0,0 +1,28 @@ +name: Build and publish rxn-onmt-py on PyPI + +on: + push: + tags: + - 'v*' + +jobs: + build-and-publish: + name: Build and publish rxn-onmt-py on PyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@master + - name: Python setup 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install build package (for packaging) + run: pip install --upgrade build + - name: Build dist + run: python -m build + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} + skip_existing: true diff --git a/README.md b/README.md index 27b2258e72..79820165f7 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,17 @@ # OpenNMT-py: Open-Source Neural Machine Translation and (Large) Language Models -[![Build Status](https://github.com/OpenNMT/OpenNMT-py/workflows/Lint%20&%20Tests/badge.svg)](https://github.com/OpenNMT/OpenNMT-py/actions) -[![Documentation](https://img.shields.io/badge/docs-latest-blue.svg)](https://opennmt.net/OpenNMT-py/) -[![Gitter](https://badges.gitter.im/OpenNMT/OpenNMT-py.svg)](https://gitter.im/OpenNMT/OpenNMT-py?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -[![Forum](https://img.shields.io/discourse/status?server=https%3A%2F%2Fforum.opennmt.net%2F)](https://forum.opennmt.net/) +This is the RXN fork of [OpenNMT-py](https://github.com/OpenNMT/OpenNMT-py). + +It can be installed from Pypi with: +``` +pip install rxn-opennmt-py +``` + +Links: +* [GitHub repository](https://github.com/rxn4chemistry/OpenNMT-py) +* [PyPI package](https://pypi.org/project/rxn-opennmt-py/) + +## OpenNMT-py OpenNMT-py is the [PyTorch](https://github.com/pytorch/pytorch) version of the [OpenNMT](https://opennmt.net) project, an open-source (MIT) neural machine translation (and beyond!) framework. It is designed to be research friendly to try out new ideas in translation, language modeling, summarization, and many other NLP tasks. Some companies have proven the code to be production ready. diff --git a/onmt/__init__.py b/onmt/__init__.py index 45bdc13f53..4fc2e76b49 100644 --- a/onmt/__init__.py +++ b/onmt/__init__.py @@ -21,4 +21,4 @@ onmt.modules, ] -__version__ = "3.4.3" +__version__ = "2.0.0" diff --git a/onmt/bin/train_profile.py b/onmt/bin/train_profile.py new file mode 100644 index 0000000000..c455f9c3dd --- /dev/null +++ b/onmt/bin/train_profile.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python +"""Train models.""" +import os +import signal +import torch + +import onmt.opts as opts +import onmt.utils.distributed + +from onmt.utils.misc import set_random_seed +from onmt.utils.logging import init_logger, logger +from onmt.train_single import main as single_main +from onmt.utils.parse import ArgumentParser +from onmt.inputters.inputter import build_dataset_iter, \ + load_old_vocab, old_style_vocab, build_dataset_iter_multiple + +from itertools import cycle +import torch.cuda.profiler as profiler +import pyprof2 +pyprof2.init() + +def train(opt): + ArgumentParser.validate_train_opts(opt) + ArgumentParser.update_model_opts(opt) + ArgumentParser.validate_model_opts(opt) + + # Load checkpoint if we resume from a previous training. + if opt.train_from: + logger.info('Loading checkpoint from %s' % opt.train_from) + checkpoint = torch.load(opt.train_from, + map_location=lambda storage, loc: storage) + logger.info('Loading vocab from checkpoint at %s.' % opt.train_from) + vocab = checkpoint['vocab'] + else: + vocab = torch.load(opt.data + '.vocab.pt') + + # check for code where vocab is saved instead of fields + # (in the future this will be done in a smarter way) + if old_style_vocab(vocab): + fields = load_old_vocab( + vocab, opt.model_type, dynamic_dict=opt.copy_attn) + else: + fields = vocab + + if len(opt.data_ids) > 1: + train_shards = [] + for train_id in opt.data_ids: + shard_base = "train_" + train_id + train_shards.append(shard_base) + train_iter = build_dataset_iter_multiple(train_shards, fields, opt) + else: + if opt.data_ids[0] is not None and opt.data_ids[0] != 'None': + shard_base = "train_" + opt.data_ids[0] + else: + shard_base = "train" + train_iter = build_dataset_iter(shard_base, fields, opt) + + nb_gpu = len(opt.gpu_ranks) + + if opt.world_size > 1: + queues = [] + mp = torch.multiprocessing.get_context('spawn') + semaphore = mp.Semaphore(opt.world_size * opt.queue_size) + # Create a thread to listen for errors in the child processes. + error_queue = mp.SimpleQueue() + error_handler = ErrorHandler(error_queue) + # Train with multiprocessing. + procs = [] + for device_id in range(nb_gpu): + q = mp.Queue(opt.queue_size) + queues += [q] + procs.append(mp.Process(target=run, args=( + opt, device_id, error_queue, q, semaphore), daemon=True)) + procs[device_id].start() + logger.info(" Starting process pid: %d " % procs[device_id].pid) + error_handler.add_child(procs[device_id].pid) + producer = mp.Process(target=batch_producer, + args=(train_iter, queues, semaphore, opt,), + daemon=True) + producer.start() + error_handler.add_child(producer.pid) + + for p in procs: + p.join() + producer.terminate() + + elif nb_gpu == 1: # case 1 GPU only + single_main(opt, 0) + else: # case only CPU + single_main(opt, -1) + + +def batch_producer(generator_to_serve, queues, semaphore, opt): + init_logger(opt.log_file) + set_random_seed(opt.seed, False) + # generator_to_serve = iter(generator_to_serve) + + def pred(x): + """ + Filters batches that belong only + to gpu_ranks of current node + """ + for rank in opt.gpu_ranks: + if x[0] % opt.world_size == rank: + return True + + generator_to_serve = filter( + pred, enumerate(generator_to_serve)) + + def next_batch(device_id): + new_batch = next(generator_to_serve) + semaphore.acquire() + return new_batch[1] + + b = next_batch(0) + + for device_id, q in cycle(enumerate(queues)): + b.dataset = None + if isinstance(b.src, tuple): + b.src = tuple([_.to(torch.device(device_id)) + for _ in b.src]) + else: + b.src = b.src.to(torch.device(device_id)) + b.tgt = b.tgt.to(torch.device(device_id)) + b.indices = b.indices.to(torch.device(device_id)) + b.alignment = b.alignment.to(torch.device(device_id)) \ + if hasattr(b, 'alignment') else None + b.src_map = b.src_map.to(torch.device(device_id)) \ + if hasattr(b, 'src_map') else None + + # hack to dodge unpicklable `dict_keys` + b.fields = list(b.fields) + q.put(b) + b = next_batch(device_id) + + +def run(opt, device_id, error_queue, batch_queue, semaphore): + """ run process """ + try: + gpu_rank = onmt.utils.distributed.multi_init(opt, device_id) + if gpu_rank != opt.gpu_ranks[device_id]: + raise AssertionError("An error occurred in \ + Distributed initialization") + single_main(opt, device_id, batch_queue, semaphore) + except KeyboardInterrupt: + pass # killed by parent, do nothing + except Exception: + # propagate exception to parent process, keeping original traceback + import traceback + error_queue.put((opt.gpu_ranks[device_id], traceback.format_exc())) + + +class ErrorHandler(object): + """A class that listens for exceptions in children processes and propagates + the tracebacks to the parent process.""" + + def __init__(self, error_queue): + """ init error handler """ + import signal + import threading + self.error_queue = error_queue + self.children_pids = [] + self.error_thread = threading.Thread( + target=self.error_listener, daemon=True) + self.error_thread.start() + signal.signal(signal.SIGUSR1, self.signal_handler) + + def add_child(self, pid): + """ error handler """ + self.children_pids.append(pid) + + def error_listener(self): + """ error listener """ + (rank, original_trace) = self.error_queue.get() + self.error_queue.put((rank, original_trace)) + os.kill(os.getpid(), signal.SIGUSR1) + + def signal_handler(self, signalnum, stackframe): + """ signal handler """ + for pid in self.children_pids: + os.kill(pid, signal.SIGINT) # kill children processes + (rank, original_trace) = self.error_queue.get() + msg = """\n\n-- Tracebacks above this line can probably + be ignored --\n\n""" + msg += original_trace + raise Exception(msg) + + +def _get_parser(): + parser = ArgumentParser(description='train.py') + + opts.config_opts(parser) + opts.model_opts(parser) + opts.train_opts(parser) + return parser + + +def main(): + parser = _get_parser() + + opt = parser.parse_args() + train(opt) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/onmt/bin/translate.py b/onmt/bin/translate.py index 6b632cc669..3dfa7e55ab 100644 --- a/onmt/bin/translate.py +++ b/onmt/bin/translate.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import torch + from onmt.inference_engine import InferenceEnginePY from onmt.opts import translate_opts from onmt.utils.parse import ArgumentParser @@ -14,6 +16,9 @@ def translate(opt): ArgumentParser._validate_transforms_opts(opt) ArgumentParser.validate_translate_opts_dynamic(opt) + if opt.num_threads is not None: + torch.set_num_threads(opt.num_threads) + set_random_seed(opt.seed, use_gpu(opt)) engine = InferenceEnginePY(opt) diff --git a/onmt/opts.py b/onmt/opts.py index ff039b906c..4fc9e7f93f 100644 --- a/onmt/opts.py +++ b/onmt/opts.py @@ -111,6 +111,28 @@ def _add_logging_opts(parser, is_train=True): default="runs/onmt", help="Log directory for Tensorboard. " "This is also the name of the run.", ) + # Use MLflow for logging training runs and config parameters + group.add('--mlflow', '-mlflow', action="store_true", + help="Use mlflow to log training runs and parameters. " + "Must have the library mlflow >= 1.3.0") + group.add("--mlflow_experiment_name", "-mlflow_experiment_name", + type=str, default=None, + help="MLflow experiment name") + group.add("--mlflow_run_name", "-mlflow_run_name", + type=str, default=None, + help="MLflow run name") + + # Use MLflow for logging training runs and config parameters + # https://docs.wandb.ai/guides/track/advanced/environment-variables + # should be set: WANDB_API_KEY / WANDB_BASE_URL + group.add('--wandb', '-wandb', action="store_true", + help="Use wandb to log training runs and parameters. ") + group.add("--wandb_project_name", "-wandb_project_name", + type=str, default=None, + help="wandb experiment name") + group.add("--wandb_run_name", "-wandb_run_name", + type=str, default=None, + help="wandb run name") group.add( "--override_opts", "-override-opts", @@ -1882,6 +1904,8 @@ def translate_opts(parser): help="Path to output the predictions (each line will " "be the decoded sequence", ) + group.add('--log_probs', '-log_probs', action='store_true', + help="Output file with log_probs and gold_score ") group.add( "--report_align", "-report_align", @@ -1930,6 +1954,9 @@ def translate_opts(parser): ) group.add("--gpu", "-gpu", type=int, default=-1, help="Device to run on") + group.add('--num_threads', '-num_threads', type=int, + help="Number of CPUs to use for translation") + group.add( "-transforms", "--transforms", @@ -1939,6 +1966,10 @@ def translate_opts(parser): help="Default transform pipeline to apply to data.", ) + group = parser.add_argument_group('ibmrxn') + group.add_argument('--is_ibmrxn', action='store_true', + help='Translate returns in a format that is compatible with the api') + # Adding options related to Transforms _add_transform_opts(parser) diff --git a/onmt/train_single.py b/onmt/train_single.py index efd76a2752..6b1a99b882 100644 --- a/onmt/train_single.py +++ b/onmt/train_single.py @@ -152,7 +152,7 @@ def _get_model_opts(opt, checkpoint=None): return model_opt -def main(opt, device_id): +def main(opt, device_id, batch_queue=None, semaphore=None): """Start training on `device_id`.""" # NOTE: It's important that ``opt`` has been validated and updated # at this point. @@ -203,6 +203,42 @@ def main(opt, device_id): opt, device_id, model, vocabs, optim, model_saver=model_saver ) + if batch_queue is None: + if len(opt.data_ids) > 1: + train_shards = [] + for train_id in opt.data_ids: + shard_base = "train_" + train_id + train_shards.append(shard_base) + train_iter = build_dataset_iter_multiple(train_shards, fields, opt) + else: + if opt.data_ids[0] is not None: + if opt.data_ids[0] is not None and opt.data_ids[0] != 'None': + shard_base = "train_" + opt.data_ids[0] + else: + shard_base = "train" + train_iter = build_dataset_iter(shard_base, fields, opt) + else: + assert semaphore is not None, \ + "Using batch_queue requires semaphore as well" + + def _train_iter(): + while True: + batch = batch_queue.get() + semaphore.release() + yield batch + + train_iter = _train_iter() + valid_iter = build_dataset_iter( + "valid", fields, opt, is_train=False) + if len(opt.gpu_ranks): + logger.info('Starting training on GPU: %s' % opt.gpu_ranks) + else: + logger.info('Starting training on CPU, could be very slow') + train_steps = opt.train_steps + if opt.single_pass and train_steps > 0: + logger.warning("Option single_pass is enabled, ignoring train_steps.") + train_steps = 0 + offset = max(0, device_id) if opt.parallel_mode == "data_parallel" else 0 stride = max(1, len(opt.gpu_ranks)) if opt.parallel_mode == "data_parallel" else 1 @@ -235,6 +271,39 @@ def main(opt, device_id): logger.warning("Option single_pass is enabled, ignoring train_steps.") train_steps = 0 + # added for mlflow integration + if opt.mlflow: + import mlflow + if opt.mlflow_experiment_name is not None: + mlflow.set_experiment(opt.mlflow_experiment_name) + if opt.mlflow_run_name is not None: + mlflow.start_run(run_name=opt.mlflow_run_name) + else: + mlflow.start_run() + for k, v in vars(opt).items(): + mlflow.log_param(k, v) + mlflow.log_param('n_enc_parameters', enc) + mlflow.log_param('n_dec_parameters', dec) + mlflow.log_param('n_total_parameters', n_params) + import onmt + mlflow.log_param('onmt_version', onmt.__version__) + elif opt.wandb: + import wandb + init_dict = {} + if opt.wandb_project_name is not None: + init_dict['project'] = opt.wandb_project_name + if opt.wandb_run_name is not None: + init_dict['name'] = opt.wandb_run_name + wandb.init(**init_dict) + + wandb.config.update({k:v for k, v in vars(opt).items()}) + import onmt + wandb.config.update({'n_enc_parameters': enc, + 'n_dec_parameters': dec, + 'n_total_parameters': n_params, + 'onmt_version': onmt.__version__ + }) + trainer.train( train_iter, train_steps, @@ -244,4 +313,9 @@ def main(opt, device_id): ) if trainer.report_manager.tensorboard_writer is not None: - trainer.report_manager.tensorboard_writer.close() + if opt.mlflow: + mlflow.end_run() + elif opt.wandb: + wandb.finish() + else: + trainer.report_manager.tensorboard_writer.close() diff --git a/onmt/translate/translator.py b/onmt/translate/translator.py index 98caf3d0fd..7ab961efb2 100644 --- a/onmt/translate/translator.py +++ b/onmt/translate/translator.py @@ -21,9 +21,20 @@ def build_translator(opt, device_id=0, report_score=True, logger=None, out_file=None): + #phs: + log_probs_out_file=None + target_score_out_file=None + # + if out_file is None: out_file = codecs.open(opt.output, "w+", "utf-8") + # phs: create files to log log probabilities and gold score. + if opt.log_probs: + log_probs_out_file = codecs.open(opt.output + '_log_probs', 'w+', 'utf-8') + target_score_out_file = codecs.open(opt.output + '_gold_score', 'w+', 'utf-8') + # + load_test_model = ( onmt.decoders.ensemble.load_test_model if len(opt.models) > 1 @@ -45,6 +56,8 @@ def build_translator(opt, device_id=0, report_score=True, logger=None, out_file= report_align=opt.report_align, report_score=report_score, logger=logger, + log_probs_out_file=log_probs_out_file, + target_score_out_file=target_score_out_file ) else: translator = Translator.from_opt( @@ -57,6 +70,8 @@ def build_translator(opt, device_id=0, report_score=True, logger=None, out_file= report_align=opt.report_align, report_score=report_score, logger=logger, + log_probs_out_file=log_probs_out_file, + target_score_out_file=target_score_out_file ) return translator @@ -134,6 +149,9 @@ def __init__( seed=-1, with_score=False, return_gold_log_probs=False, + log_probs_out_file=None, # added phs + target_score_out_file=None, # added phs + is_ibmrxn=False # added phs ): self.model = model self.vocabs = vocabs @@ -203,6 +221,10 @@ def __init__( "log_probs": [], } + self.log_probs_out_file = log_probs_out_file # added phs + self.target_score_out_file = target_score_out_file # added phs + self.is_ibmrxn = is_ibmrxn # added phs + set_random_seed(seed, self._use_cuda) self.with_score = with_score @@ -220,6 +242,8 @@ def from_opt( report_align=False, report_score=True, logger=None, + log_probs_out_file=None, # added phs + target_score_out_file=None # added phs ): """Alternate constructor. @@ -274,6 +298,9 @@ def from_opt( logger=logger, seed=opt.seed, with_score=opt.with_score, + log_probs_out_file=log_probs_out_file, # added phs + target_score_out_file=target_score_out_file, # added phs + is_ibmrxn = opt.is_ibmrxn # added_phs ) def _log(self, msg): @@ -295,6 +322,80 @@ def _gold_score( glp = None return gs, glp + + def likelihood( + self, + src, + tgt=None, + src_dir=None, + batch_size=None, + batch_type="sents", + attn_debug=False, + phrase_table=""): + """Translate content of ``src`` and get gold scores from ``tgt``. + Args: + src: See :func:`self.src_reader.read()`. + tgt: See :func:`self.tgt_reader.read()`. + src_dir: See :func:`self.src_reader.read()` (only relevant + for certain types of data). + batch_size (int): size of examples per mini-batch + attn_debug (bool): enables the attention logging + Returns: + (`list`, `list`) + * all_scores is a list of `batch_size` lists of `n_best` scores + * all_predictions is a list of `batch_size` lists + of `n_best` predictions + """ + + if batch_size is None: + raise ValueError("batch_size must be set") + + data = inputters.Dataset( + self.fields, + readers=([self.src_reader, self.tgt_reader] + if tgt else [self.src_reader]), + data=[("src", src), ("tgt", tgt)] if tgt else [("src", src)], + dirs=[src_dir, None] if tgt else [src_dir], + sort_key=inputters.str2sortkey[self.data_type], + filter_pred=self._filter_pred + ) + + data_iter = inputters.OrderedIterator( + dataset=data, + device=self._dev, + batch_size=batch_size, + batch_size_fn=max_tok_len if batch_type == "tokens" else None, + train=False, + sort=False, + sort_within_batch=True, + shuffle=False + ) + + all_gold_scores = [] + + use_src_map = self.copy_attn + beam_size = self.beam_size + + for batch in data_iter: + # import pdb; pdb.set_trace() + # (0) Prep the components of the search. + + + # (1) Run the encoder on the src. + src, enc_states, memory_bank, src_lengths = self._run_encoder(batch) + self.model.decoder.init_state(src, memory_bank, enc_states) + + + gold_scores = self._gold_score( + batch, memory_bank, src_lengths, data.src_vocabs, use_src_map, + enc_states, batch_size, src) + gold_scores = gold_scores.detach().numpy().tolist() + + all_gold_scores += [score for _, score in sorted(zip(batch.indices.numpy().tolist(), gold_scores))] + + return all_gold_scores + + def _translate( self, infer_iter, @@ -336,6 +437,9 @@ def _translate( all_scores = [] all_predictions = [] + if self.is_ibmrxn: + all_attentions = [] # added phs + attn_debug = True start_time = time() @@ -445,6 +549,13 @@ def _process_bucket(bucket_translations): self.out_file.write("\n".join(n_best_preds) + "\n") self.out_file.flush() + # phs: added to log log probs to file + if self.log_probs_out_file is not None: + self.log_probs_out_file.write( + '\n'.join([str(t.item()) for t in trans.pred_scores[:self.n_best]]) + '\n') + self.log_probs_out_file.flush() + # + if self.verbose: srcs = [voc_src[tok] for tok in trans.src[: trans.srclen]] sent_number = next(counter) @@ -530,6 +641,9 @@ def _process_bucket(bucket_translations): gold_words_total += bucket_gold_words bucket_translations = [] + if self.is_ibmrxn: + all_attentions.append(trans.attns[0]) # added phs + if len(bucket_translations) > 0: ( bucket_scores, @@ -546,6 +660,20 @@ def _process_bucket(bucket_translations): gold_score_total += bucket_gold_score gold_words_total += bucket_gold_words + # phs: added to log gold scores to file + if self.target_score_out_file is not None: + self.target_score_out_file.write( + str(trans.gold_score.item()) + '\n') + self.target_score_out_file.flush() + # + + if self.is_ibmrxn: # added phs + return { + 'score': all_scores if batch_size > 1 else all_scores[0], # return more scores when batch_size > 1 + 'prediction': all_predictions, + 'context_attns': all_attentions + } + end_time = time() if self.report_score: diff --git a/onmt/utils/report_manager.py b/onmt/utils/report_manager.py index 60a88e6443..20ed8a262d 100644 --- a/onmt/utils/report_manager.py +++ b/onmt/utils/report_manager.py @@ -16,6 +16,10 @@ def build_report_manager(opt, gpu_rank): opt.tensorboard_log_dir + datetime.now().strftime("/%b-%d_%H-%M-%S") ) writer = SummaryWriter(opt.tensorboard_log_dir_dated, comment="Unmt") + elif opt.mlflow and gpu_rank == 0: + writer = MLflowSummaryWriter() + elif opt.wandb and gpu_rank == 0: + writer = WandbSummaryWriter() else: writer = None @@ -67,6 +71,21 @@ def report_training( (set 'start_time' or use 'start()'""" ) + # For wandb use step (1000, 2000, .. ) instead of progress_step + # (1,2,3, ...) for training, as otherwise it raises an error + # after logging the validation stats with the progress_step + # because of the too "old" logs. + if isinstance(self.tensorboard_writer, WandbSummaryWriter): + self.maybe_log_tensorboard(report_stats, + "progress", + learning_rate, + step) + else: # default onmt behaviour + self.maybe_log_tensorboard(report_stats, + "progress", + learning_rate, + self.progress_step) + if step % self.report_every == 0: if multigpu: report_stats = onmt.utils.Statistics.all_gather_stats(report_stats) @@ -155,3 +174,31 @@ def _report_step(self, lr, patience, step, valid_stats=None, train_stats=None): self.log("Validation accuracy: %g" % valid_stats.accuracy()) self.maybe_log_tensorboard(valid_stats, "valid", lr, patience, step) + + +class MLflowSummaryWriter(object): + """ + Map Summarywriter add_scalar function to mlflow log_metric + """ + def __init__(self): + pass + + def add_scalar(self, tag, scalar_value, global_step=None): + import mlflow + # mlflow cannot display metric that include '/' char + tag = tag.replace('/', '_') + mlflow.log_metric(tag, scalar_value, step=global_step) + + +class WandbSummaryWriter(object): + """ + Map Summarywriter add_scalar function to mlflow log_metric + """ + def __init__(self): + pass + + def add_scalar(self, tag, scalar_value, global_step=None): + import wandb + # mlflow cannot display metric that include '/' char + tag = tag.replace('/', '_') + wandb.log({tag: scalar_value}, step=global_step) diff --git a/setup.py b/setup.py index 9814bb1d2f..2db10c96eb 100644 --- a/setup.py +++ b/setup.py @@ -7,17 +7,15 @@ long_description = f.read() setup( - name="OpenNMT-py", - description="A python implementation of OpenNMT", + name="rxn-opennmt-py", + description="Fork of OpenNMT-py for use in RXN projects", long_description=long_description, long_description_content_type="text/markdown", - version="3.4.3", + version="2.0.0", packages=find_packages(), project_urls={ "Documentation": "http://opennmt.net/OpenNMT-py/", - "Forum": "http://forum.opennmt.net/", - "Gitter": "https://gitter.im/OpenNMT/OpenNMT-py", - "Source": "https://github.com/OpenNMT/OpenNMT-py/", + "Source": "https://github.com/rxn4chemistry/OpenNMT-py/", }, python_requires=">=3.8", install_requires=[