From ad4ad2ca588a04d7729c812fb0f9848f5d25796b Mon Sep 17 00:00:00 2001 From: T2T Team Date: Mon, 25 Dec 2017 10:02:27 -0800 Subject: [PATCH 01/42] Fix some issues with the VQ-VAE discretization bottleneck. PiperOrigin-RevId: 180097448 --- docs/new_problem.md | 31 +-- tensor2tensor/bin/t2t-avg-all | 106 --------- tensor2tensor/bin/t2t-bleu | 137 ----------- tensor2tensor/bin/t2t-datagen | 212 ------------------ tensor2tensor/bin/t2t-decoder | 110 --------- tensor2tensor/bin/t2t-make-tf-configs | 87 ------- tensor2tensor/bin/t2t-trainer | 191 ---------------- tensor2tensor/bin/t2t-translate-all | 91 -------- tensor2tensor/bin/t2t_trainer.py | 165 +------------- .../data_generators/generator_utils.py | 5 +- .../data_generators/translate_enzh.py | 189 ++-------------- tensor2tensor/layers/common_layers.py | 2 +- tensor2tensor/models/transformer_vae.py | 26 ++- tensor2tensor/utils/bleu_hook.py | 77 +------ tensor2tensor/utils/bleu_hook_test.py | 4 - 15 files changed, 50 insertions(+), 1383 deletions(-) delete mode 100755 tensor2tensor/bin/t2t-avg-all delete mode 100755 tensor2tensor/bin/t2t-bleu delete mode 100644 tensor2tensor/bin/t2t-datagen delete mode 100644 tensor2tensor/bin/t2t-decoder delete mode 100644 tensor2tensor/bin/t2t-make-tf-configs delete mode 100644 tensor2tensor/bin/t2t-trainer delete mode 100755 tensor2tensor/bin/t2t-translate-all diff --git a/docs/new_problem.md b/docs/new_problem.md index fd5f9d625..48976a61b 100644 --- a/docs/new_problem.md +++ b/docs/new_problem.md @@ -184,7 +184,7 @@ import os from tensor2tensor.data_generators import problem from tensor2tensor.data_generators import text_encoder -from tensor2tensor.data_generators.translate import character_generator +from tensor2tensor.data_generators.wmt import character_generator from tensor2tensor.utils import registry @@ -240,40 +240,16 @@ All hyperparamters inherit from `_default_hparams()` in `problem.py.` If you wou from tensor2tensor.models import transformer @registry.register_hparams -def word2def_hparams(): +def word2def_hparams(self): hparams = transformer.transformer_base_single_gpu() # Or whatever you'd like to build off. hparams.batch_size = 1024 return hparams ``` -# Test the data generation - -You can test data generation of your a problem in your own project with: - -```bash -PROBLEM=word2def -DATA_DIR=$HOME/t2t_data -TMP_DIR=/tmp/t2t_datagen -mkdir -p $DATA_DIR $TMP_DIR - -t2t-datagen \ - --t2t_usr_dir=$PATH_TO_YOUR_PROBLEM_DIR \ - --data_dir=$DATA_DIR \ - --tmp_dir=$TMP_DIR \ - --problem=$PROBLEM -``` - -Where: -* `PROBLEM` is the name of the class that was registered with `@registry.register_problem()`, but converted from `CamelCase` to `snake_case`. -* `PATH_TO_YOUR_PROBLEM_DIR` is a path to the directory of your python problem file. - -If you plan to contribute to the tensor2tensor repository, you can install the local cloned version in developer mode with `pip install -e .` from the tensor2tensor directory. You can also add your new problem file to [`all_problems.py`](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/all_problems.py). - # Run the problem Now that we've gotten our problem set up, let's train a model and generate definitions. -To train, specify the problem name, the model, and hparams: - +We specify our problem name, the model, and hparams. ```bash PROBLEM=word2def MODEL=transformer @@ -282,6 +258,7 @@ HPARAMS=word2def_hparams The rest of the steps are as given in the [walkthrough](walkthrough.md). + What if we wanted to train a model to generate words given definitions? In T2T, we can change the problem name to be `PROBLEM=word2def_rev`. All done. Let us know what definitions your model generated. diff --git a/tensor2tensor/bin/t2t-avg-all b/tensor2tensor/bin/t2t-avg-all deleted file mode 100755 index 3b4d6211d..000000000 --- a/tensor2tensor/bin/t2t-avg-all +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Script to continously average last N checkpoints in a given directory.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import logging - -# Dependency imports - -import numpy as np -import six -from six.moves import zip # pylint: disable=redefined-builtin -from collections import deque -import shutil -import tensorflow as tf -from tensor2tensor.utils import bleu_hook - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") -flags.DEFINE_string("output_dir", "avg/", "Directory to output the averaged checkpoints to.") -flags.DEFINE_integer("n", 8, "How many checkpoints should be averaged?") -flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint.") - - -def main(_): - tf.logging._handler.setFormatter(logging.Formatter("%(asctime)s:" + logging.BASIC_FORMAT, None)) - tf.logging.set_verbosity(tf.logging.INFO) - - model_dir = os.path.expanduser(FLAGS.model_dir) - output_dir = os.path.expanduser(FLAGS.output_dir) - out_base_file = os.path.join(output_dir, 'model.ckpt') - - # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. - os.makedirs(FLAGS.output_dir, exist_ok=True) - if not os.path.exists(os.path.join(output_dir, 'flags.txt')): - shutil.copy2(os.path.join(model_dir, 'flags.txt'), os.path.join(output_dir, 'flags.txt')) - - models_processed = 0 - queue = deque() - for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): - if models_processed == 0: - var_list = tf.contrib.framework.list_variables(model.filename) - avg_values = {} - for (name, shape) in var_list: - if not name.startswith("global_step"): - avg_values[name] = np.zeros(shape) - models_processed += 1 - - tf.logging.info("Loading [%d]: %s" % (models_processed, model.filename)) - reader = tf.contrib.framework.load_checkpoint(model.filename) - for name in avg_values: - avg_values[name] += reader.get_tensor(name) / FLAGS.n - queue.append(model) - if len(queue) < FLAGS.n: - continue - - out_file = "%s-%d" % (out_base_file, model.steps) - tf_vars = [] - tf.logging.info("Averaging %s" % (out_file)) - for (name, value) in six.iteritems(avg_values): - tf_vars.append(tf.get_variable(name, shape=value.shape)) # TODO , dtype=var_dtypes[name] - placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] - assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] - - global_step = tf.Variable(model.steps, name="global_step", trainable=False, dtype=tf.int64) - saver = tf.train.Saver(tf.global_variables()) - - tf.logging.info("Running session for %s" % (out_file)) - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(avg_values)): - sess.run(assign_op, {p: value}) - tf.logging.info("Storing to %s" % out_file) - saver.save(sess, out_base_file, global_step=global_step) - os.utime(out_file + '.index', (model.mtime, model.mtime)) - - tf.reset_default_graph() - first_model = queue.popleft() - - reader = tf.contrib.framework.load_checkpoint(first_model.filename) - for name in avg_values: - avg_values[name] -= reader.get_tensor(name) / FLAGS.n - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-bleu b/tensor2tensor/bin/t2t-bleu deleted file mode 100755 index cac2b9fc3..000000000 --- a/tensor2tensor/bin/t2t-bleu +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Evaluate BLEU score for all checkpoints/translations in a given directory. - -This script can be used in two ways. - -To evaluate one already translated file: -`t2t-bleu --translation=my-wmt13.de --reference=wmt13_deen.de` - -To evaluate all translations in a given directory (translated by t2t-translate-all): -`t2t-bleu - --translations_dir=my-translations - --reference=wmt13_deen.de - --event_dir=events` - -In addition to the above-mentioned compulsory parameters, -there are optional parameters: - - * bleu_variant: cased (case-sensitive), uncased, both (default). - * tag_suffix: Default="", so the tags will be BLEU_cased and BLEU_uncased. tag_suffix - can be used e.g. for different beam sizes if these should be plotted in different graphs. - * min_steps: Don't evaluate checkpoints with less steps. - Default=-1 means check the `last_evaluated_step.txt` file, which contains the number of steps - of the last successfully evaluated checkpoint. - * report_zero: Store BLEU=0 and guess its time based on the oldest file in the translations_dir. - Default=True. This is useful, so TensorBoard reports correct relative time for the remaining - checkpoints. This flag is set to False if min_steps is > 0. - * wait_minutes: Wait upto N minutes for a new translated file. Default=0. - This is useful for continuous evaluation of a running training, - in which case this should be equal to save_checkpoints_secs/60 plus time needed for translation - plus some reserve. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -from tensor2tensor.utils import bleu_hook -import tensorflow as tf - - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("source", None, "Path to the source-language file to be translated") -flags.DEFINE_string("reference", None, "Path to the reference translation file") -flags.DEFINE_string("translation", None, "Path to the MT system translation file") -flags.DEFINE_string("translations_dir", None, "Directory with translated files to be evaulated.") -flags.DEFINE_string("event_dir", None, "Where to store the event file.") - -flags.DEFINE_string("bleu_variant", "both", - "Possible values: cased(case-sensitive), uncased, both(default).") -flags.DEFINE_string("tag_suffix", "", - "What to add to BLEU_cased and BLEU_uncased tags. Default=''.") -flags.DEFINE_integer("min_steps", -1, "Don't evaluate checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, - "Wait upto N minutes for a new checkpoint, cf. save_checkpoints_secs.") -flags.DEFINE_bool("report_zero", None, "Store BLEU=0 and guess its time based on the oldest file.") - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - if FLAGS.translation: - if FLAGS.translations_dir: - raise ValueError('Cannot specify both --translation and --translations_dir.') - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=False) - print("BLEU_uncased = %6.2f" % bleu) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, FLAGS.translation, case_sensitive=True) - print("BLEU_cased = %6.2f" % bleu) - return - - if not FLAGS.translations_dir: - raise ValueError('Either --translation or --translations_dir must be specified.') - transl_dir = os.path.expanduser(FLAGS.translations_dir) - - last_step_file = os.path.join(FLAGS.event_dir, 'last_evaluated_step.txt') - if FLAGS.min_steps == -1: - try: - with open(last_step_file) as ls_file: - FLAGS.min_steps = int(ls_file.read()) - except FileNotFoundError: - FLAGS.min_steps = 0 - if FLAGS.report_zero is None: - FLAGS.report_zero = FLAGS.min_steps == 0 - - writer = tf.summary.FileWriter(FLAGS.event_dir) - for transl_file in bleu_hook.stepfiles_iterator(transl_dir, FLAGS.wait_minutes, - FLAGS.min_steps, path_suffix=''): - # report_zero handling must be inside the for-loop, - # so we are sure the transl_dir is already created. - if FLAGS.report_zero: - all_files = (os.path.join(transl_dir, f) for f in os.listdir(transl_dir)) - start_time = min(os.path.getmtime(f) for f in all_files if os.path.isfile(f)) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=0)) - if FLAGS.bleu_variant in ('cased', 'both'): - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=0)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), - wall_time=start_time, step=0)) - FLAGS.report_zero = False - - filename = transl_file.filename - tf.logging.info("Evaluating " + filename) - values = [] - if FLAGS.bleu_variant in ('uncased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=False) - values.append(tf.Summary.Value(tag='BLEU_uncased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_uncased = %6.2f" % (filename, bleu)) - if FLAGS.bleu_variant in ('cased', 'both'): - bleu = 100 * bleu_hook.bleu_wrapper(FLAGS.reference, filename, case_sensitive=True) - values.append(tf.Summary.Value(tag='BLEU_cased' + FLAGS.tag_suffix, simple_value=bleu)) - tf.logging.info("%s: BLEU_cased = %6.2f" % (transl_file.filename, bleu)) - writer.add_event(tf.summary.Event(summary=tf.Summary(value=values), - wall_time=transl_file.mtime, step=transl_file.steps)) - writer.flush() - with open(last_step_file, 'w') as ls_file: - ls_file.write(str(transl_file.steps) + '\n') - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen deleted file mode 100644 index 2ac0f0db2..000000000 --- a/tensor2tensor/bin/t2t-datagen +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Produces the training and dev data for --problem into --data_dir. - -Produces sharded and shuffled TFRecord files of tensorflow.Example protocol -buffers for a variety of registered datasets. - -All Problems are registered with @registry.register_problem or are in -_SUPPORTED_PROBLEM_GENERATORS in this file. Each entry maps a string name -(selectable on the command-line with --problem) to a function that takes 2 -arguments - input_directory and mode (one of "train" or "dev") - and yields for -each training example a dictionary mapping string feature names to lists of -{string, int, float}. The generator will be run once for each mode. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import random -import tempfile - -# Dependency imports - -import numpy as np - -from tensor2tensor.data_generators import algorithmic_math -from tensor2tensor.data_generators import all_problems # pylint: disable=unused-import -from tensor2tensor.data_generators import audio -from tensor2tensor.data_generators import generator_utils -from tensor2tensor.data_generators import snli -from tensor2tensor.data_generators import wsj_parsing -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("data_dir", "", "Data directory.") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory.") -flags.DEFINE_string("problem", "", - "The name of the problem to generate data for.") -flags.DEFINE_string("exclude_problems", "", - "Comma-separates list of problems to exclude.") -flags.DEFINE_integer("num_shards", 0, "How many shards to use. Ignored for " - "registered Problems.") -flags.DEFINE_integer("max_cases", 0, - "Maximum number of cases to generate (unbounded if 0).") -flags.DEFINE_bool("only_list", False, - "If true, we only list the problems that will be generated.") -flags.DEFINE_integer("random_seed", 429459, "Random seed to use.") -flags.DEFINE_integer("task_id", -1, "For distributed data generation.") -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_problem calls, that will then be " - "available to t2t-datagen.") - -# Mapping from problems that we can generate data for to their generators. -# pylint: disable=g-long-lambda -_SUPPORTED_PROBLEM_GENERATORS = { - "algorithmic_algebra_inverse": ( - lambda: algorithmic_math.algebra_inverse(26, 0, 2, 100000), - lambda: algorithmic_math.algebra_inverse(26, 3, 3, 10000)), - "parsing_english_ptb8k": ( - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 2**13, 2**9), - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 2**13, 2**9)), - "parsing_english_ptb16k": ( - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 2**14, 2**9), - lambda: wsj_parsing.parsing_token_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 2**14, 2**9)), - "inference_snli32k": ( - lambda: snli.snli_token_generator(FLAGS.tmp_dir, True, 2**15), - lambda: snli.snli_token_generator(FLAGS.tmp_dir, False, 2**15), - ), - "audio_timit_characters_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626)), - "audio_timit_tokens_8k_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**13, vocab_size=2**13)), - "audio_timit_tokens_32k_test": ( - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, True, 1718, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15), - lambda: audio.timit_generator( - FLAGS.data_dir, FLAGS.tmp_dir, False, 626, - vocab_filename="vocab.endefr.%d" % 2**15, vocab_size=2**15)), -} - -# pylint: enable=g-long-lambda - - -def set_random_seed(): - """Set the random seed from flag everywhere.""" - tf.set_random_seed(FLAGS.random_seed) - random.seed(FLAGS.random_seed) - np.random.seed(FLAGS.random_seed) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - - # Calculate the list of problems to generate. - problems = sorted( - list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) - for exclude in FLAGS.exclude_problems.split(","): - if exclude: - problems = [p for p in problems if exclude not in p] - if FLAGS.problem and FLAGS.problem[-1] == "*": - problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] - elif FLAGS.problem: - problems = [p for p in problems if p == FLAGS.problem] - else: - problems = [] - - # Remove TIMIT if paths are not given. - if not FLAGS.timit_paths: - problems = [p for p in problems if "timit" not in p] - # Remove parsing if paths are not given. - if not FLAGS.parsing_path: - problems = [p for p in problems if "parsing" not in p] - - if not problems: - problems_str = "\n * ".join( - sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) - error_msg = ("You must specify one of the supported problems to " - "generate data for:\n * " + problems_str + "\n") - error_msg += ("TIMIT and parsing need data_sets specified with " - "--timit_paths and --parsing_path.") - raise ValueError(error_msg) - - if not FLAGS.data_dir: - FLAGS.data_dir = tempfile.gettempdir() - tf.logging.warning("It is strongly recommended to specify --data_dir. " - "Data will be written to default data_dir=%s.", - FLAGS.data_dir) - - tf.logging.info("Generating problems:\n%s" - % registry.display_list_by_prefix(problems, - starting_spaces=4)) - if FLAGS.only_list: - return - for problem in problems: - set_random_seed() - - if problem in _SUPPORTED_PROBLEM_GENERATORS: - generate_data_for_problem(problem) - else: - generate_data_for_registered_problem(problem) - - -def generate_data_for_problem(problem): - """Generate data for a problem in _SUPPORTED_PROBLEM_GENERATORS.""" - training_gen, dev_gen = _SUPPORTED_PROBLEM_GENERATORS[problem] - - num_shards = FLAGS.num_shards or 10 - tf.logging.info("Generating training data for %s.", problem) - train_output_files = generator_utils.train_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, num_shards) - generator_utils.generate_files(training_gen(), train_output_files, - FLAGS.max_cases) - tf.logging.info("Generating development data for %s.", problem) - dev_output_files = generator_utils.dev_data_filenames( - problem + generator_utils.UNSHUFFLED_SUFFIX, FLAGS.data_dir, 1) - generator_utils.generate_files(dev_gen(), dev_output_files) - all_output_files = train_output_files + dev_output_files - generator_utils.shuffle_dataset(all_output_files) - - -def generate_data_for_registered_problem(problem_name): - tf.logging.info("Generating data for %s.", problem_name) - if FLAGS.num_shards: - raise ValueError("--num_shards should not be set for registered Problem.") - problem = registry.problem(problem_name) - task_id = None if FLAGS.task_id < 0 else FLAGS.task_id - problem.generate_data( - os.path.expanduser(FLAGS.data_dir), - os.path.expanduser(FLAGS.tmp_dir), - task_id=task_id) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder deleted file mode 100644 index f453b01fd..000000000 --- a/tensor2tensor/bin/t2t-decoder +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Decode from trained T2T models. - -This binary performs inference using the Estimator API. - -Example usage to decode from dataset: - - t2t-decoder \ - --data_dir ~/data \ - --problems=algorithmic_identity_binary40 \ - --model=transformer - --hparams_set=transformer_base - -Set FLAGS.decode_interactive or FLAGS.decode_from_file for alternative decode -sources. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -# Dependency imports - -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# Additional flags in tpu/tpu_trainer.py and utils/flags.py -flags.DEFINE_string("decode_from_file", None, - "Path to the source file for decoding") -flags.DEFINE_string("decode_to_file", None, - "Path to the decoded (output) file") -flags.DEFINE_bool("decode_interactive", False, - "Interactive local inference mode.") -flags.DEFINE_integer("decode_shards", 1, "Number of decoding replicas.") - - -def create_hparams(): - return tpu_trainer_lib.create_hparams( - FLAGS.hparams_set, - FLAGS.hparams, - data_dir=os.path.expanduser(FLAGS.data_dir), - problem_name=FLAGS.problems) - - -def create_decode_hparams(): - decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) - decode_hp.add_hparam("shards", FLAGS.decode_shards) - decode_hp.add_hparam("shard_id", FLAGS.worker_id) - return decode_hp - - -def decode(estimator, hparams, decode_hp): - if FLAGS.decode_interactive: - decoding.decode_interactively(estimator, hparams, decode_hp) - elif FLAGS.decode_from_file: - decoding.decode_from_file(estimator, FLAGS.decode_from_file, hparams, - decode_hp, FLAGS.decode_to_file) - else: - decoding.decode_from_dataset( - estimator, - FLAGS.problems.split("-"), - hparams, - decode_hp, - decode_to_file=FLAGS.decode_to_file, - dataset_split="test" if FLAGS.eval_use_test_set else None) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - FLAGS.use_tpu = False # decoding not supported on TPU - - hp = create_hparams() - decode_hp = create_decode_hparams() - - estimator = tpu_trainer_lib.create_estimator( - FLAGS.model, - hp, - tpu_trainer.create_run_config(hp), - decode_hparams=decode_hp, - use_tpu=False) - - decode(estimator, hp, decode_hp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs deleted file mode 100644 index 0b656aba6..000000000 --- a/tensor2tensor/bin/t2t-make-tf-configs +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Output command line arguments and json-encoded TF_CONFIGs. - -Usage: - -`t2t-make-tf-configs --masters="server1:1234" --ps="server3:2134,server4:2334"` - -Outputs 1 line per job to stdout, first the masters, then the parameter servers. -Each line has the TF_CONFIG, then a tab, then the command line flags for that -job. - -If there is a single master, it will have the `--sync` flag. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json - -# Dependency imports - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string("masters", "", "Comma-separated list of master addresses") -flags.DEFINE_string("ps", "", "Comma-separated list of ps addresses") - - -def main(_): - if not (FLAGS.masters and FLAGS.ps): - raise ValueError("Must provide --masters and --ps") - - masters = FLAGS.masters.split(",") - ps = FLAGS.ps.split(",") - - cluster = {"ps": ps, "master": masters} - - for task_type, jobs in (("master", masters), ("ps", ps)): - for idx, job in enumerate(jobs): - if task_type == "master": - cmd_line_flags = " ".join([ - "--master=grpc://%s" % job, - "--ps_replicas=%d" % len(ps), - "--worker_replicas=%d" % len(masters), - "--worker_gpu=1", - "--worker_id=%d" % idx, - "--worker_job='/job:master'", - "--ps_gpu=1", - "--schedule=train", - "--sync" if len(masters) == 1 else "", - ]) - else: - cmd_line_flags = " ".join([ - "--master=grpc://%s" % job, - "--schedule=run_std_server", - ]) - - tf_config = json.dumps({ - "cluster": cluster, - "task": { - "type": task_type, - "index": idx - }, - "environment": "cloud", - }) - print("'%s'\t%s" % (tf_config, cmd_line_flags)) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer deleted file mode 100644 index 70435094a..000000000 --- a/tensor2tensor/bin/t2t-trainer +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train on TPU.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t-translate-all b/tensor2tensor/bin/t2t-translate-all deleted file mode 100755 index 1ee7e535f..000000000 --- a/tensor2tensor/bin/t2t-translate-all +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Translate a file with all checkpoints in a given directory. - -t2t-decoder will be executed with these parameters: ---problems ---data_dir ---output_dir with the value of --model_dir ---decode_from_file with the value of --source ---decode_hparams with properly formated --beam_size and --alpha ---checkpoint_path automatically filled ---decode_to_file automatically filled -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import shutil -import tensorflow as tf -from tensor2tensor.utils import bleu_hook - - -flags = tf.flags - -# t2t-translate-all specific options -flags.DEFINE_string("decoder_command", "t2t-decoder {params}", - "Which command to execute instead t2t-decoder." - "{params} is replaced by the parameters. Useful e.g. for qsub wrapper.") -flags.DEFINE_string("model_dir", "", "Directory to load model checkpoints from.") -flags.DEFINE_string("source", None, "Path to the source-language file to be translated") -flags.DEFINE_string("translations_dir", "translations", "Where to store the translated files.") -flags.DEFINE_integer("min_steps", 0, "Ignore checkpoints with less steps.") -flags.DEFINE_integer("wait_minutes", 0, "Wait upto N minutes for a new checkpoint") - -# options derived from t2t-decoder -flags.DEFINE_integer("beam_size", 4, "Beam-search width.") -flags.DEFINE_float("alpha", 0.6, "Beam-search alpha.") -flags.DEFINE_string("model", "transformer", "see t2t-decoder") -flags.DEFINE_string("t2t_usr_dir", None, "see t2t-decoder") -flags.DEFINE_string("data_dir", None, "see t2t-decoder") -flags.DEFINE_string("problems", None, "see t2t-decoder") -flags.DEFINE_string("hparams_set", "transformer_big_single_gpu", "see t2t-decoder") - - -def main(_): - FLAGS = flags.FLAGS - tf.logging.set_verbosity(tf.logging.INFO) - model_dir = os.path.expanduser(FLAGS.model_dir) - translations_dir = os.path.expanduser(FLAGS.translations_dir) - source = os.path.expanduser(FLAGS.source) - os.makedirs(translations_dir, exist_ok=True) - translated_base_file = os.path.join(translations_dir, FLAGS.problems) - - # Copy flags.txt with the original time, so t2t-bleu can report correct relative time. - flags_path = os.path.join(translations_dir, FLAGS.problems + '-flags.txt') - if not os.path.exists(flags_path): - shutil.copy2(os.path.join(model_dir, 'flags.txt'), flags_path) - - for model in bleu_hook.stepfiles_iterator(model_dir, FLAGS.wait_minutes, FLAGS.min_steps): - tf.logging.info("Translating " + model.filename) - out_file = translated_base_file + '-' + str(model.steps) - if os.path.exists(out_file): - tf.logging.info(out_file + " already exists, so skipping it.") - else: - tf.logging.info("Translating " + out_file) - params = ("--t2t_usr_dir={FLAGS.t2t_usr_dir} --output_dir={model_dir} " - "--data_dir={FLAGS.data_dir} --problems={FLAGS.problems} " - "--decode_hparams=beam_size={FLAGS.beam_size},alpha={FLAGS.alpha} " - "--model={FLAGS.model} --hparams_set={FLAGS.hparams_set} " - "--checkpoint_path={model.filename} --decode_from_file={source} " - "--decode_to_file={out_file}".format(**locals())) - command = FLAGS.decoder_command.format(**locals()) - tf.logging.info("Running:\n" + command) - os.system(command) - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 571a21839..99ec99b20 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -13,177 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Train on TPU.""" +"""Trainer for T2T models. See tpu_trainer.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import contextlib -import os -import sys - # Dependency imports -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir +from tensor2tensor.tpu import tpu_trainer import tensorflow as tf -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) +def main(unused_argv): + tpu_trainer.main(unused_argv) if __name__ == "__main__": diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py index c657a503f..236d43772 100644 --- a/tensor2tensor/data_generators/generator_utils.py +++ b/tensor2tensor/data_generators/generator_utils.py @@ -316,8 +316,7 @@ def get_or_generate_vocab_inner(data_dir, vocab_filename, vocab_size, def get_or_generate_vocab(data_dir, tmp_dir, vocab_filename, vocab_size, - sources, - _file_byte_budget=1e6): + sources): """Generate a vocabulary from the datasets in sources.""" def generate(): @@ -350,7 +349,7 @@ def generate(): # Use Tokenizer to count the word occurrences. with tf.gfile.GFile(filepath, mode="r") as source_file: - file_byte_budget = _file_byte_budget + file_byte_budget = 1e6 counter = 0 countermax = int(source_file.size() / file_byte_budget / 2) for line in source_file: diff --git a/tensor2tensor/data_generators/translate_enzh.py b/tensor2tensor/data_generators/translate_enzh.py index d3ddd8d98..52b364137 100644 --- a/tensor2tensor/data_generators/translate_enzh.py +++ b/tensor2tensor/data_generators/translate_enzh.py @@ -42,145 +42,28 @@ # This is far from being the real WMT17 task - only toyset here # you need to register to get UN data and CWT data. Also, by convention, # this is EN to ZH - use translate_enzh_wmt8k_rev for ZH to EN task -# -# News Commentary, around 220k lines -# This dataset is only a small fraction of full WMT17 task -_NC_TRAIN_DATASETS = [[ - "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz", - ["training/news-commentary-v12.zh-en.en", - "training/news-commentary-v12.zh-en.zh"]]] +_ENZH_TRAIN_DATASETS = [[("http://data.statmt.org/wmt17/translation-task/" + "training-parallel-nc-v12.tgz"), + ("training/news-commentary-v12.zh-en.en", + "training/news-commentary-v12.zh-en.zh")]] -# Test set from News Commentary. 2000 lines -_NC_TEST_DATASETS = [[ +_ENZH_TEST_DATASETS = [[ "http://data.statmt.org/wmt17/translation-task/dev.tgz", ("dev/newsdev2017-enzh-src.en.sgm", "dev/newsdev2017-enzh-ref.zh.sgm") ]] -# UN parallel corpus. 15,886,041 lines -# Visit source website to download manually: -# https://conferences.unite.un.org/UNCorpus -# -# NOTE: You need to register to download dataset from official source -# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz -_UN_TRAIN_DATASETS = [[ - "https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/UNv1.0.en-zh.tar.gz", - ["en-zh/UNv1.0.en-zh.en", - "en-zh/UNv1.0.en-zh.zh"]]] - -# CWMT corpus -# Visit source website to download manually: -# http://nlp.nju.edu.cn/cwmt-wmt/ -# -# casia2015: 1,050,000 lines -# casict2015: 2,036,833 lines -# datum2015: 1,000,003 lines -# datum2017: 1,999,968 lines -# NEU2017: 2,000,000 lines -# -# NOTE: You need to register to download dataset from official source -# place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz - -_CWMT_TRAIN_DATASETS = [ - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/casia2015/casia2015_en.txt", - "cwmt/casia2015/casia2015_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/casict2015/casict2015_en.txt", - "cwmt/casict2015/casict2015_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/neu2017/NEU_en.txt", - "cwmt/neu2017/NEU_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2015/datum_en.txt", - "cwmt/datum2015/datum_ch.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book1_en.txt", - "cwmt/datum2017/Book1_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book2_en.txt", - "cwmt/datum2017/Book2_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book3_en.txt", - "cwmt/datum2017/Book3_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book4_en.txt", - "cwmt/datum2017/Book4_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book5_en.txt", - "cwmt/datum2017/Book5_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book6_en.txt", - "cwmt/datum2017/Book6_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book7_en.txt", - "cwmt/datum2017/Book7_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book8_en.txt", - "cwmt/datum2017/Book8_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book9_en.txt", - "cwmt/datum2017/Book9_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book10_en.txt", - "cwmt/datum2017/Book10_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book11_en.txt", - "cwmt/datum2017/Book11_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book12_en.txt", - "cwmt/datum2017/Book12_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book13_en.txt", - "cwmt/datum2017/Book13_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book14_en.txt", - "cwmt/datum2017/Book14_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book15_en.txt", - "cwmt/datum2017/Book15_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book16_en.txt", - "cwmt/datum2017/Book16_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book17_en.txt", - "cwmt/datum2017/Book17_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book18_en.txt", - "cwmt/datum2017/Book18_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book19_en.txt", - "cwmt/datum2017/Book19_cn.txt"]], - ["https://s3-us-west-2.amazonaws.com/twairball.wmt17.zh-en/cwmt.tgz", - ["cwmt/datum2017/Book20_en.txt", - "cwmt/datum2017/Book20_cn.txt"]] -] - - -def get_filename(dataset): - return dataset[0][0].split('/')[-1] @registry.register_problem -class TranslateEnzhWmt32k(translate.TranslateProblem): - """Problem spec for WMT En-Zh translation. - Attempts to use full training dataset, which needs website - registration and downloaded manually from official sources: - - CWMT: - - http://nlp.nju.edu.cn/cwmt-wmt/ - - Website contrains instructions for FTP server access. - - You'll need to download CASIA, CASICT, DATUM2015, DATUM2017, - NEU datasets - - UN Parallel Corpus: - - https://conferences.unite.un.org/UNCorpus - - You'll need to register your to download the dataset. - - NOTE: place into tmp directory e.g. /tmp/t2t_datagen/dataset.tgz - """ +class TranslateEnzhWmt8k(translate.TranslateProblem): + """Problem spec for WMT En-Zh translation.""" @property def targeted_vocab_size(self): - return 2**15 # 32k + return 2**13 # 8192 + + @property + def num_shards(self): + return 10 # This is a small dataset. @property def source_vocab_name(self): @@ -189,35 +72,20 @@ def source_vocab_name(self): @property def target_vocab_name(self): return "vocab.enzh-zh.%d" % self.targeted_vocab_size - - def get_training_dataset(self, tmp_dir): - """UN Parallel Corpus and CWMT Corpus need to be downloaded manually. - Append to training dataset if available - """ - full_dataset = _NC_TRAIN_DATASETS - for dataset in [_CWMT_TRAIN_DATASETS, _UN_TRAIN_DATASETS]: - filename = get_filename(dataset) - tmp_filepath = os.path.join(tmp_dir, filename) - if tf.gfile.Exists(tmp_filepath): - full_dataset = full_dataset + dataset - else: - tf.logging.info("[TranslateEzhWmt] dataset incomplete, you need to manually download %s" % filename) - return full_dataset def generator(self, data_dir, tmp_dir, train): - TRAIN_DATASET = self.get_training_dataset(tmp_dir) - datasets = TRAIN_DATASET if train else _NC_TEST_DATASETS - source_datasets = [[item[0], [item[1][0]]] for item in TRAIN_DATASET] - target_datasets = [[item[0], [item[1][1]]] for item in TRAIN_DATASET] + datasets = _ENZH_TRAIN_DATASETS if train else _ENZH_TEST_DATASETS + source_datasets = [[item[0], [item[1][0]]] for item in _ENZH_TRAIN_DATASETS] + target_datasets = [[item[0], [item[1][1]]] for item in _ENZH_TRAIN_DATASETS] source_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.source_vocab_name, self.targeted_vocab_size, - source_datasets, _file_byte_budget=1e8) + source_datasets) target_vocab = generator_utils.get_or_generate_vocab( data_dir, tmp_dir, self.target_vocab_name, self.targeted_vocab_size, - target_datasets, _file_byte_budget=1e8) + target_datasets) tag = "train" if train else "dev" - filename_base = "wmt_enzh_%sk_tok_%s" % (self.targeted_vocab_size, tag) - data_path = translate.compile_data(tmp_dir, datasets, filename_base) + data_path = translate.compile_data(tmp_dir, datasets, + "wmt_enzh_tok_%s" % tag) return translate.bi_vocabs_token_generator(data_path + ".lang1", data_path + ".lang2", source_vocab, target_vocab, EOS) @@ -239,22 +107,3 @@ def feature_encoders(self, data_dir): "inputs": source_token, "targets": target_token, } - - -@registry.register_problem -class TranslateEnzhWmt8k(TranslateEnzhWmt32k): - """Problem spec for WMT En-Zh translation. - This is far from being the real WMT17 task - only toyset here - """ - - @property - def targeted_vocab_size(self): - return 2**13 # 8192 - - @property - def num_shards(self): - return 10 # This is a small dataset. - - def get_training_dataset(self, tmp_dir): - """Uses only News Commentary Dataset for training""" - return _NC_TRAIN_DATASETS diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py index 0e305ef54..640730864 100644 --- a/tensor2tensor/layers/common_layers.py +++ b/tensor2tensor/layers/common_layers.py @@ -76,7 +76,7 @@ def shakeshake2_py(x, y, equal=False, individual=False): """The shake-shake sum of 2 tensors, python version.""" if equal: alpha = 0.5 - elif individual: + if individual: alpha = tf.random_uniform(tf.get_shape(x)[:1]) else: alpha = tf.random_uniform([]) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index c43342afd..22d842c73 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -142,10 +142,11 @@ def nearest(x, means, hparams): """Find the nearest means to elements in x.""" x, means = tf.stop_gradient(x), tf.stop_gradient(means) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) - x_norm = tf.norm(x_flat, axis=-1, keep_dims=True) - means_norm = tf.norm(means, axis=-1, keep_dims=True) - dist = x_norm + tf.transpose(means_norm) - 2 * tf.matmul(x_flat, means, - transpose_b=True) + x_norm_sq = tf.reduce_sum(x_flat ** 2, axis=-1, keep_dims=True) + means_norm_sq = tf.reduce_sum(means ** 2, axis=-1, keep_dims=True) + dist = ( + x_norm_sq + tf.transpose(means_norm_sq) - + 2 * tf.matmul(x_flat, means, transpose_b=True)) _, nearest_idx = tf.nn.top_k(- dist, k=1) nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) shape = common_layers.shape_list(x) @@ -158,8 +159,9 @@ def kmeans(x, means, hparams, name): with tf.variable_scope(name): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - reg_loss1 = tf.nn.l2_loss((tf.stop_gradient(x) - x_means)) - reg_loss2 = hparams.beta * tf.nn.l2_loss((x - tf.stop_gradient(x_means))) + reg_loss1 = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + reg_loss2 = hparams.beta * tf.reduce_mean( + (x - tf.stop_gradient(x_means))**2) l = reg_loss1 + reg_loss2 return x_means_hot, x_means, l @@ -198,8 +200,10 @@ def embed(x): hot = tf.one_hot(x, hparams.v_size) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable(name="means", - shape=[hparams.v_size, hparams.hidden_size]) + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -245,8 +249,10 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable(name="means", shape=[hparams.v_size, - hparams.hidden_size]) + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) c = tf.argmax(x_means_hot, axis=-1) diff --git a/tensor2tensor/utils/bleu_hook.py b/tensor2tensor/utils/bleu_hook.py index 50caf09bf..49b31c1bb 100644 --- a/tensor2tensor/utils/bleu_hook.py +++ b/tensor2tensor/utils/bleu_hook.py @@ -20,12 +20,9 @@ import collections import math -import os import re import sys -import time import unicodedata -from collections import namedtuple # Dependency imports @@ -153,7 +150,7 @@ def __init__(self): def _property_chars(prefix): return ''.join(six.unichr(x) for x in range(sys.maxunicode) if unicodedata.category(six.unichr(x)).startswith(prefix)) - punctuation = _property_chars('P') + punctuation = self._property_chars('P') self.nondigit_punct_re = re.compile(r'([^\d])([' + punctuation + r'])') self.punct_nondigit_re = re.compile(r'([' + punctuation + r'])([^\d])') self.symbol_re = re.compile('([' + _property_chars('S') + '])') @@ -183,10 +180,9 @@ def bleu_tokenize(string): Returns: a list of tokens """ - uregex = UnicodeRegex() - string = uregex.nondigit_punct_re.sub(r'\1 \2 ', string) - string = uregex.punct_nondigit_re.sub(r' \1 \2', string) - string = uregex.symbol_re.sub(r' \1 ', string) + string = UnicodeRegex.nondigit_punct_re.sub(r'\1 \2 ', string) + string = UnicodeRegex.punct_nondigit_re.sub(r' \1 \2', string) + string = UnicodeRegex.symbol_re.sub(r' \1 ', string) return string.split() @@ -201,68 +197,3 @@ def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return compute_bleu(ref_tokens, hyp_tokens) - - -StepFile = namedtuple('StepFile', 'filename mtime ctime steps') - - -def _read_stepfiles_list(path_prefix, path_suffix='.index', min_steps=0): - stepfiles = [] - for filename in tf.gfile.Glob(path_prefix + '*-[0-9]*' + path_suffix): - basename = filename[:-len(path_suffix)] if len(path_suffix) else filename - try: - steps = int(basename.rsplit('-')[-1]) - except ValueError: # The -[0-9]* part is not an integer. - continue - if steps < min_steps: - continue - if not os.path.exists(filename): - tf.logging.info(filename + " was deleted, so skipping it") - continue - stepfiles.append(StepFile(basename, os.path.getmtime(filename), - os.path.getctime(filename), steps)) - return sorted(stepfiles, key=lambda x: -x.steps) - - -def stepfiles_iterator(path_prefix, wait_minutes=0, min_steps=0, - path_suffix='.index', sleep_sec=10): - """Continuously yield new files with steps in filename as they appear. - - This is useful for checkpoint files or other files whose names differ just in an interger - marking the number of steps and match the wildcard path_prefix + '*-[0-9]*' + path_suffix. - Unlike `tf.contrib.training.checkpoints_iterator`, this - implementation always starts from the oldest files - (and it cannot miss any file). Note that the oldest checkpoint - may be deleted anytime by Tensorflow (if set up so). It is up to the user - to check that the files returned by this generator actually exist. - Args: - path_prefix: The directory + possible common filename prefix to the files. - path_suffix: Common filename suffix (after steps), including possible extension dot. - wait_minutes: The maximum amount of minutes to wait between files. - min_steps: Skip files with lower global step. - sleep_sec: How often to check for new files. - Yields: - named tuples (filename, mtime, ctime, steps) of the files as they arrive. - """ - # Wildcard D*-[0-9]* does not match D/x-1, so if D is a directory let path_prefix='D/'. - if not path_prefix.endswith(os.sep) and os.path.isdir(path_prefix): - path_prefix += os.sep - stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) - tf.logging.info("Found %d files with steps: %s" - % (len(stepfiles), ", ".join(str(x.steps) for x in reversed(stepfiles)))) - exit_time = time.time() + wait_minutes * 60 - while True: - if not stepfiles and wait_minutes: - tf.logging.info('Waiting till %s if a new file matching %s*-[0-9]*%s appears' - % (time.asctime(time.localtime(exit_time)), path_prefix, path_suffix)) - while True: - stepfiles = _read_stepfiles_list(path_prefix, path_suffix, min_steps) - if stepfiles or time.time() > exit_time: - break - time.sleep(sleep_sec) - if not stepfiles: - return - - stepfile = stepfiles.pop() - exit_time, min_steps = stepfile.ctime + wait_minutes * 60, stepfile.steps + 1 - yield stepfile diff --git a/tensor2tensor/utils/bleu_hook_test.py b/tensor2tensor/utils/bleu_hook_test.py index b616aaf7c..e4f3a18a9 100644 --- a/tensor2tensor/utils/bleu_hook_test.py +++ b/tensor2tensor/utils/bleu_hook_test.py @@ -57,9 +57,5 @@ def testComputeMultipleNgrams(self): actual_bleu = 0.3436 self.assertAllClose(bleu, actual_bleu, atol=1e-03) - def testBleuTokenize(self): - self.assertEqual(bleu_hook.bleu_tokenize(u'hi, “there”'), [u'hi', u',', u'“', u'there', u'”']) - - if __name__ == '__main__': tf.test.main() From 872ce75692eb09f41067d4a314f63e02b037ec9d Mon Sep 17 00:00:00 2001 From: T2T Team Date: Thu, 28 Dec 2017 16:13:10 -0800 Subject: [PATCH 02/42] Use exponential moving average for the VQ-VAE embeddings. PiperOrigin-RevId: 180302324 --- tensor2tensor/models/transformer_vae.py | 84 +++++++++++++++++++------ 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index 22d842c73..f187e2d71 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -25,6 +25,7 @@ from tensor2tensor.utils import registry from tensor2tensor.utils import t2t_model import tensorflow as tf +from tensorflow.python.training import moving_averages _DO_SUMMARIES = True @@ -140,15 +141,14 @@ def vae(x, z_size, name): def nearest(x, means, hparams): """Find the nearest means to elements in x.""" - x, means = tf.stop_gradient(x), tf.stop_gradient(means) x_flat = tf.reshape(x, [-1, hparams.hidden_size]) x_norm_sq = tf.reduce_sum(x_flat ** 2, axis=-1, keep_dims=True) means_norm_sq = tf.reduce_sum(means ** 2, axis=-1, keep_dims=True) dist = ( x_norm_sq + tf.transpose(means_norm_sq) - 2 * tf.matmul(x_flat, means, transpose_b=True)) - _, nearest_idx = tf.nn.top_k(- dist, k=1) - nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size) + nearest_idx = tf.argmax(-dist, axis=-1) + nearest_hot = tf.one_hot(nearest_idx, hparams.v_size) shape = common_layers.shape_list(x) shape[-1] = hparams.v_size nearest_hot = tf.reshape(nearest_hot, shape=shape) @@ -156,14 +156,12 @@ def nearest(x, means, hparams): def kmeans(x, means, hparams, name): - with tf.variable_scope(name): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): x_means_hot = nearest(x, means, hparams) x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - reg_loss1 = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) - reg_loss2 = hparams.beta * tf.reduce_mean( - (x - tf.stop_gradient(x_means))**2) - l = reg_loss1 + reg_loss2 - return x_means_hot, x_means, l + q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) + return x_means_hot, x_means, q_loss, e_loss def bit_to_int(x_bit, nbits): @@ -200,10 +198,17 @@ def embed(x): hot = tf.one_hot(x, hparams.v_size) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + if hparams.ema: + ema_means = tf.get_variable( + name="ema_means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) + means = ema_means + else: + means = tf.get_variable( + name="means", + shape=[hparams.v_size, hparams.hidden_size], + initializer=tf.random_normal_initializer()) h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -249,13 +254,49 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) - x_means_hot, x_means, l = kmeans(x, means, hparams, name="vq-vae-kmeans") - h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) + means = tf.Variable( + tf.random_normal([hparams.v_size, hparams.hidden_size]), name="means") + + # Use EMA if ema flag is set + if hparams.ema: + ema_count = tf.get_variable( + "ema_count", [hparams.v_size], + initializer=tf.constant_initializer(0)) + with tf.colocate_with(means): + ema_means = tf.get_variable( + "ema_means", initializer=means.initialized_value()) + + x_means_hot, x_means, q_loss, e_loss = kmeans( + x, means, hparams, name="vq-vae-kmeans") c = tf.argmax(x_means_hot, axis=-1) + + # Update the ema variables + if hparams.ema: + tf.logging.info("Using EMA with beta = {}".format(hparams.beta)) + x_means_hot_flat = tf.reshape(x_means_hot, shape=[-1, hparams.v_size]) + updated_ema_count = moving_averages.assign_moving_average( + ema_count, + tf.reduce_sum(x_means_hot_flat, axis=0), + hparams.decay, + zero_debias=False) + x_flat = tf.reshape(x, [-1, hparams.hidden_size]) + dw = tf.matmul(x_means_hot_flat, x_flat, transpose_a=True) + updated_ema_means = moving_averages.assign_moving_average( + ema_means, dw, hparams.decay, zero_debias=False) + n = tf.reduce_sum(updated_ema_count) + updated_ema_count = ((updated_ema_count + hparams.epsilon) / + (n + hparams.v_size * hparams.epsilon) * n) + updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1) + + with tf.control_dependencies([e_loss]): + update_w = tf.assign(means, updated_ema_means) + with tf.control_dependencies([update_w]): + l = hparams.beta * e_loss + else: + l = q_loss + e_loss + + h1 = tf.stop_gradient(x_means) + x - tf.stop_gradient(x) + if hparams.bottleneck_kind == "rounding": h = tf.layers.dense(x, 1, name="vcc") @@ -594,6 +635,9 @@ def transformer_ae_small(): hparams.add_hparam("do_vae", True) hparams.add_hparam("bit_vae", True) hparams.add_hparam("beta", 0.25) + hparams.add_hparam("epsilon", 1e-5) + hparams.add_hparam("decay", 0.999) + hparams.add_hparam("ema", True) hparams.kl_warmup_steps = 150000 hparams.force_full_predict = True return hparams @@ -609,7 +653,7 @@ def transformer_ae_cifar(): hparams.num_compress_steps = 2 hparams.v_size = 1024 * 64 hparams.kl_warmup_steps = 150000 - hparams.startup_steps = 20000 + hparams.startup_steps = 10000 hparams.kmeans_lr_factor = 0.0 hparams.is_2d = 1 hparams.learning_rate_warmup_steps = 8000 From a84f42507a2d588891e355f8bac74276d0baed54 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Mon, 31 Dec 2018 01:56:33 -0800 Subject: [PATCH 03/42] Introduces fixes to get VQ-VAE working. PiperOrigin-RevId: 180425931 --- tensor2tensor/models/transformer_vae.py | 118 +++++++++++++++--------- 1 file changed, 73 insertions(+), 45 deletions(-) diff --git a/tensor2tensor/models/transformer_vae.py b/tensor2tensor/models/transformer_vae.py index f187e2d71..2d0e14990 100644 --- a/tensor2tensor/models/transformer_vae.py +++ b/tensor2tensor/models/transformer_vae.py @@ -155,13 +155,12 @@ def nearest(x, means, hparams): return tf.stop_gradient(nearest_hot) -def kmeans(x, means, hparams, name): - with tf.variable_scope(name, reuse=tf.AUTO_REUSE): - x_means_hot = nearest(x, means, hparams) - x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) - q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) - e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) - return x_means_hot, x_means, q_loss, e_loss +def kmeans(x, means, hparams): + x_means_hot = nearest(x, means, hparams) + x_means = tf.gather(means, tf.argmax(x_means_hot, axis=-1)) + q_loss = tf.reduce_mean((tf.stop_gradient(x) - x_means)**2) + e_loss = tf.reduce_mean((x - tf.stop_gradient(x_means))**2) + return x_means_hot, x_means, q_loss, e_loss def bit_to_int(x_bit, nbits): @@ -184,11 +183,23 @@ def int_to_bit(x_int, nbits): return tf.to_float(res) -def bottleneck(x, hparams, filter_size, name): +def bottleneck(x, + hparams, + filter_size, + name, + means=None, + ema_count=None, + ema_means=None): """Bottleneck.""" + if hparams.bottleneck_kind == "vq-vae": + assert means is not None + if hparams.ema: + assert ema_count is not None + assert ema_means is not None + def embed(x): """Embedding function; must be compatible with the code later.""" - with tf.variable_scope(name, reuse=True): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): if hparams.bottleneck_kind == "semhash": c = int_to_bit(x, z_size) h1a = tf.layers.dense(c, filter_size, name="vch1a") @@ -199,16 +210,11 @@ def embed(x): h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") elif hparams.bottleneck_kind == "vq-vae": if hparams.ema: - ema_means = tf.get_variable( - name="ema_means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + ema_means = tf.get_variable(name="ema_means") means = ema_means else: - means = tf.get_variable( - name="means", - shape=[hparams.v_size, hparams.hidden_size], - initializer=tf.random_normal_initializer()) + tf.logging.info("means = {}".format(means)) + h1 = tf.gather(means, x) elif hparams.bottleneck_kind == "rounding": h1 = x @@ -216,7 +222,7 @@ def embed(x): h2 = tf.layers.dense(tf.nn.relu(h1), filter_size, name="vch2") return tf.layers.dense(tf.nn.relu(h2), hparams.hidden_size, name="vcfin") - with tf.variable_scope(name): + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): z_size = hparams.z_size l = tf.constant(0.0) if hparams.bottleneck_kind == "dense": @@ -254,20 +260,7 @@ def embed(x): c = tf.argmax(hot, axis=-1) h1 = tf.layers.dense(hot, hparams.hidden_size, name="dae_dense") if hparams.bottleneck_kind == "vq-vae": - means = tf.Variable( - tf.random_normal([hparams.v_size, hparams.hidden_size]), name="means") - - # Use EMA if ema flag is set - if hparams.ema: - ema_count = tf.get_variable( - "ema_count", [hparams.v_size], - initializer=tf.constant_initializer(0)) - with tf.colocate_with(means): - ema_means = tf.get_variable( - "ema_means", initializer=means.initialized_value()) - - x_means_hot, x_means, q_loss, e_loss = kmeans( - x, means, hparams, name="vq-vae-kmeans") + x_means_hot, x_means, q_loss, e_loss = kmeans(x, means, hparams) c = tf.argmax(x_means_hot, axis=-1) # Update the ema variables @@ -289,8 +282,8 @@ def embed(x): updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1) with tf.control_dependencies([e_loss]): - update_w = tf.assign(means, updated_ema_means) - with tf.control_dependencies([update_w]): + update_means = tf.assign(means, updated_ema_means) + with tf.control_dependencies([update_means]): l = hparams.beta * e_loss else: l = q_loss + e_loss @@ -400,8 +393,15 @@ def next_bit(latents_discrete, i): return latents_discrete -def ae_transformer_internal(inputs, targets, target_space, hparams, - cache=None, predict_mask=1.0): +def ae_transformer_internal(inputs, + targets, + target_space, + hparams, + cache=None, + predict_mask=1.0, + means=None, + ema_count=None, + ema_means=None): """AE Transformer, main step used for training.""" # Summaries break with the do_refine cond, turn them off in that case. global _DO_SUMMARIES @@ -430,7 +430,7 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. latents_dense, latents_discrete, extra_loss, _ = bottleneck( - targets_c, hparams, 2*2048, "vc") + targets_c, hparams, 2 * 2048, "vc", means, ema_count, ema_means) if _DO_SUMMARIES: tf.summary.histogram("b0", tf.reshape(latents_discrete[:, 0, :], [-1])) pc = common_layers.inverse_exp_decay(hparams.startup_steps) * 0.95 @@ -454,7 +454,8 @@ def ae_transformer_internal(inputs, targets, target_space, hparams, losses["latent_pred"] = tf.reduce_mean((inputs_c - targets_c)**2) * 20 def bn_inputs(): with tf.variable_scope(tf.get_variable_scope(), reuse=True): - bn, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + bn, _, _, _ = bottleneck(inputs_c, hparams, 2 * 2048, "vc", means, + ema_count, ema_means) return bn pbn = 0.8 if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0 inputs_c = tf.cond(tf.less(tf.random_uniform([]), pbn), @@ -466,10 +467,11 @@ def bn_inputs(): else: if hparams.bottleneck_kind in ["dense", "vae"]: inputs_c = decode_transformer(inputs, ed, targets_c, hparams, "dec_c") - latents_dense, _, _, _ = bottleneck(inputs_c, hparams, 2*2048, "vc") + latents_dense, _, _, _ = bottleneck(inputs_c, hparams, 2 * 2048, "vc", + means, ema_count, ema_means) else: latent_len = common_layers.shape_list(targets_c)[1] - _, _, _, embed = bottleneck(targets_c, hparams, 2*2048, "vc") + _, _, _, embed = bottleneck(targets_c, hparams, 2 * 2048, "vc", means) latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: cache = ae_latent_sample(latents_dense, inputs, ed, embed, 8, hparams) @@ -529,6 +531,25 @@ def __init__(self, *args, **kwargs): super(TransformerAE, self).__init__(*args, **kwargs) self.predict_mask = 1.0 + # Define the embeddings if we are using vq-vae + self.means = None + self.ema_count = None + self.ema_means = None + if self._hparams.bottleneck_kind == "vq-vae": + self.means = tf.get_variable( + name="means", + shape=[self._hparams.v_size, self._hparams.hidden_size], + initializer=tf.random_normal_initializer()) + + # Create the shadow variables if we are using EMA + if self._hparams.ema: + self.ema_count = tf.get_variable( + "ema_count", [self._hparams.v_size], + initializer=tf.constant_initializer(0)) + with tf.colocate_with(self.means): + self.ema_means = tf.get_variable( + "ema_means", initializer=self.means.initialized_value()) + @property def has_input(self): return self._problem_hparams.input_modality @@ -540,9 +561,15 @@ def body(self, features): reuse = "cache_raw" in features with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): res, loss, _ = ae_transformer_internal( - inputs, features["targets"], features["target_space_id"], - self._hparams, features.get("cache_raw", None), - predict_mask=self.predict_mask) + inputs, + features["targets"], + features["target_space_id"], + self._hparams, + features.get("cache_raw", None), + predict_mask=self.predict_mask, + means=self.means, + ema_count=self.ema_count, + ema_means=self.ema_means) return res, loss def prepare_features_for_infer(self, features): @@ -557,7 +584,8 @@ def prepare_features_for_infer(self, features): targets = tf.zeros([beam_batch_size, 1, 1, self._hparams.hidden_size]) with tf.variable_scope("body"): _, _, cache = ae_transformer_internal( - inputs, targets, features["target_space_id"], self._hparams) + inputs, targets, features["target_space_id"], self._hparams, + self.means, self.ema_count, self.ema_means) features["cache_raw"] = cache def infer(self, features=None, decode_length=50, beam_size=1, top_beams=1, @@ -635,7 +663,7 @@ def transformer_ae_small(): hparams.add_hparam("do_vae", True) hparams.add_hparam("bit_vae", True) hparams.add_hparam("beta", 0.25) - hparams.add_hparam("epsilon", 1e-5) + hparams.add_hparam("epsilon", 1e-1) hparams.add_hparam("decay", 0.999) hparams.add_hparam("ema", True) hparams.kl_warmup_steps = 150000 From 84ee146fc3849f1e913187fcb7548d3f7895dbe5 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 10:02:57 -0800 Subject: [PATCH 04/42] Add random seed to RunConfig PiperOrigin-RevId: 180558518 --- tensor2tensor/tpu/tpu_trainer.py | 3 ++- tensor2tensor/tpu/tpu_trainer_lib.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py index 571a21839..47e92da98 100644 --- a/tensor2tensor/tpu/tpu_trainer.py +++ b/tensor2tensor/tpu/tpu_trainer.py @@ -127,7 +127,8 @@ def create_run_config(hp): ps_gpu=FLAGS.ps_gpu, sync=FLAGS.sync, worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job) + worker_job=FLAGS.worker_job, + random_seed=FLAGS.random_seed) def generate_data(): diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py index bde85e4db..ff2045302 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/tpu/tpu_trainer_lib.py @@ -104,6 +104,7 @@ def create_run_config(master="", ps_replicas=0, ps_job="/job:ps", ps_gpu=0, + random_seed=None, sync=False, use_tpu=False): """Create RunConfig, TPUConfig, and Parallelism object.""" @@ -122,6 +123,7 @@ def create_run_config(master="", "save_checkpoints_steps": save_checkpoints_steps, "keep_checkpoint_max": keep_checkpoint_max, "keep_checkpoint_every_n_hours": keep_checkpoint_every_n_hours, + "tf_random_seed": random_seed, } run_config_cls = tf.contrib.learn.RunConfig From 6407b2d35301e19c14c70921d8f4fdfb4da4e09e Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 11:52:04 -0800 Subject: [PATCH 05/42] Code moves and renames PiperOrigin-RevId: 180573810 --- .travis.yml | 4 +- docs/overview.md | 6 +- tensor2tensor/bin/t2t_decoder.py | 12 +- tensor2tensor/bin/t2t_trainer.py | 166 ++++++++++++++- tensor2tensor/insights/transformer_model.py | 10 +- tensor2tensor/layers/common_hparams.py | 4 +- tensor2tensor/notebooks/hello_t2t.ipynb | 2 +- tensor2tensor/tpu/__init__.py | 15 -- tensor2tensor/tpu/tpu_trainer.py | 191 ------------------ tensor2tensor/utils/registry.py | 6 +- tensor2tensor/utils/t2t_model.py | 5 +- .../trainer_lib.py} | 2 +- .../trainer_lib_test.py} | 16 +- .../TransformerVisualization.ipynb | 4 +- 14 files changed, 196 insertions(+), 247 deletions(-) delete mode 100644 tensor2tensor/tpu/__init__.py delete mode 100644 tensor2tensor/tpu/tpu_trainer.py rename tensor2tensor/{tpu/tpu_trainer_lib.py => utils/trainer_lib.py} (99%) rename tensor2tensor/{tpu/tpu_trainer_lib_test.py => utils/trainer_lib_test.py} (88%) diff --git a/.travis.yml b/.travis.yml index 7841b0b7e..f424014b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,9 +14,9 @@ env: - T2T_DATA_DIR=/tmp/t2t-data - T2T_TRAIN_DIR=/tmp/t2t-train script: - - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/tpu/tpu_trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py + - pytest --ignore=tensor2tensor/utils/registry_test.py --ignore=tensor2tensor/problems_test.py --ignore=tensor2tensor/utils/trainer_lib_test.py --ignore=tensor2tensor/data_generators/algorithmic_math_test.py - pytest tensor2tensor/utils/registry_test.py - - pytest tensor2tensor/tpu/tpu_trainer_lib_test.py + - pytest tensor2tensor/utils/trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" - t2t-trainer --registry_help diff --git a/docs/overview.md b/docs/overview.md index fcc0aba5a..9ea87bc50 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -14,7 +14,7 @@ to training, evaluation, and decoding. Some key files and their functions: -* [`tpu_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer.py) and [`tpu_trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/tpu/tpu_trainer_lib.py): +* [`t2t_trainer.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/bin/t2t_trainer.py) and [`trainer_lib.py`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/utils/trainer_lib.py): Main entrypoint for training and evaluation. Constructs and runs all the main components of the system (the `Problem`, the `HParams`, the `Estimator`, the `Experiment`, the `input_fn`s and `model_fn`). @@ -134,7 +134,7 @@ The default implementations of `bottom`, `top`, and `loss` depend on the The actual training loop and related services (checkpointing, summaries, continuous evaluation, etc.) are all handled by `Estimator` and `Experiment` -objects. `tpu_trainer.py` is the main entrypoint and uses `tpu_trainer_lib.py` +objects. `t2t_trainer.py` is the main entrypoint and uses `trainer_lib.py` to construct the various components. ## Decoding @@ -144,7 +144,7 @@ to construct the various components. ## System Overview for Train/Eval -See `tpu_trainer.py`. +See `t2t_trainer.py` and `trainer_lib.py`. * Create HParams * Create `RunConfig`, including `Parallelism` object (i.e. `data_parallelism`) diff --git a/tensor2tensor/bin/t2t_decoder.py b/tensor2tensor/bin/t2t_decoder.py index 25358739a..132dac0e4 100644 --- a/tensor2tensor/bin/t2t_decoder.py +++ b/tensor2tensor/bin/t2t_decoder.py @@ -36,9 +36,9 @@ # Dependency imports -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib +from tensor2tensor.bin import t2t_trainer from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_lib from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -46,7 +46,7 @@ flags = tf.flags FLAGS = flags.FLAGS -# Additional flags in tpu/tpu_trainer.py and utils/flags.py +# Additional flags in bin/t2t_trainer.py and utils/flags.py flags.DEFINE_string("decode_from_file", None, "Path to the source file for decoding") flags.DEFINE_string("decode_to_file", None, @@ -57,7 +57,7 @@ def create_hparams(): - return tpu_trainer_lib.create_hparams( + return trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(FLAGS.data_dir), @@ -95,10 +95,10 @@ def main(_): hp = create_hparams() decode_hp = create_decode_hparams() - estimator = tpu_trainer_lib.create_estimator( + estimator = trainer_lib.create_estimator( FLAGS.model, hp, - tpu_trainer.create_run_config(hp), + t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 99ec99b20..9e77de384 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -13,20 +13,178 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Trainer for T2T models. See tpu_trainer.py.""" +"""Train and evaluate.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib +import os +import sys + # Dependency imports -from tensor2tensor.tpu import tpu_trainer +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems as problems_lib # pylint: disable=unused-import +from tensor2tensor.utils import decoding +from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import +from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_lib +from tensor2tensor.utils import usr_dir import tensorflow as tf +flags = tf.flags +FLAGS = flags.FLAGS + +# See flags.py for additional command-line flags. +flags.DEFINE_string("t2t_usr_dir", "", + "Path to a Python module that will be imported. The " + "__init__.py file should include the necessary imports. " + "The imported files should contain registrations, " + "e.g. @registry.register_model calls, that will then be " + "available to the t2t-trainer.") +flags.DEFINE_integer("random_seed", 1234, "Random seed.") +flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") +flags.DEFINE_integer("iterations_per_loop", 1000, + "Number of iterations in a TPU training loop.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") +flags.DEFINE_bool("generate_data", False, "Generate data before training?") +flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", + "Temporary storage directory, used if --generate_data.") +flags.DEFINE_bool("profile", False, "Profile performance?") + +# To maintain compatibility with some internal libs, we guard against these flag +# definitions possibly erroring. Apologies for the ugliness. +try: + flags.DEFINE_string("master", "", "Address of TensorFlow master.") + flags.DEFINE_string("output_dir", "", "Base output directory for run.") + flags.DEFINE_string("schedule", "continuous_train_and_eval", + "Method of Experiment to run.") + flags.DEFINE_integer("eval_steps", 10000, + "Number of steps in evaluation. By default, eval will " + "stop after eval_steps or when it runs through the eval " + "dataset once in full, whichever comes first, so this " + "can be a very large number.") +except: # pylint: disable=bare-except + pass + + +def get_problem_name(): + problems = FLAGS.problems.split("-") + assert len(problems) == 1 + return problems[0] + + +def create_hparams(): + return trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) + + +def create_experiment_fn(): + return trainer_lib.create_experiment_fn( + model_name=FLAGS.model, + problem_name=get_problem_name(), + data_dir=os.path.expanduser(FLAGS.data_dir), + train_steps=FLAGS.train_steps, + eval_steps=FLAGS.eval_steps, + min_eval_frequency=FLAGS.local_eval_frequency, + schedule=FLAGS.schedule, + export=FLAGS.export_saved_model, + decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), + use_tfdbg=FLAGS.tfdbg, + use_dbgprofile=FLAGS.dbgprofile, + eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, + eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, + eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, + eval_early_stopping_metric_minimize=FLAGS. + eval_early_stopping_metric_minimize, + use_tpu=FLAGS.use_tpu) + + +def create_run_config(hp): + return trainer_lib.create_run_config( + model_dir=os.path.expanduser(FLAGS.output_dir), + master=FLAGS.master, + iterations_per_loop=FLAGS.iterations_per_loop, + num_shards=FLAGS.tpu_num_shards, + log_device_placement=FLAGS.log_device_placement, + save_checkpoints_steps=max(FLAGS.iterations_per_loop, + FLAGS.local_eval_frequency), + keep_checkpoint_max=FLAGS.keep_checkpoint_max, + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, + num_gpus=FLAGS.worker_gpu, + gpu_order=FLAGS.gpu_order, + shard_to_cpu=FLAGS.locally_shard_to_cpu, + num_async_replicas=FLAGS.worker_replicas, + gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, + enable_graph_rewriter=FLAGS.experimental_optimize_placement, + use_tpu=FLAGS.use_tpu, + schedule=FLAGS.schedule, + no_data_parallelism=hp.no_data_parallelism, + daisy_chain_variables=hp.daisy_chain_variables, + ps_replicas=FLAGS.ps_replicas, + ps_job=FLAGS.ps_job, + ps_gpu=FLAGS.ps_gpu, + sync=FLAGS.sync, + worker_id=FLAGS.worker_id, + worker_job=FLAGS.worker_job, + random_seed=FLAGS.random_seed) + + +def generate_data(): + # Generate data if requested. + data_dir = os.path.expanduser(FLAGS.data_dir) + tmp_dir = os.path.expanduser(FLAGS.tmp_dir) + tf.gfile.MakeDirs(data_dir) + tf.gfile.MakeDirs(tmp_dir) + + problem_name = get_problem_name() + tf.logging.info("Generating data for %s" % problem_name) + registry.problem(problem_name).generate_data(data_dir, tmp_dir) + + +@contextlib.contextmanager +def profile_context(): + if FLAGS.profile: + with tf.contrib.tfprof.ProfileContext("t2tprof", + trace_steps=range(100), + dump_steps=range(100)) as pctx: + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + pctx.add_auto_profiling("op", opts, range(100)) + yield + else: + yield + + +def log_registry(): + if FLAGS.registry_help: + tf.logging.info(registry.help_string()) + sys.exit(0) + + +def execute_schedule(exp): + if not hasattr(exp, FLAGS.schedule): + raise ValueError( + "Experiment has no method %s, from --schedule" % FLAGS.schedule) + with profile_context(): + getattr(exp, FLAGS.schedule)() + + +def main(_): + tf.logging.set_verbosity(tf.logging.INFO) + trainer_lib.set_random_seed(FLAGS.random_seed) + usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) + log_registry() + + if FLAGS.generate_data: + generate_data() + + hparams = create_hparams() + run_config = create_run_config(hparams) -def main(unused_argv): - tpu_trainer.main(unused_argv) + exp_fn = create_experiment_fn() + exp = exp_fn(run_config, hparams) + execute_schedule(exp) if __name__ == "__main__": diff --git a/tensor2tensor/insights/transformer_model.py b/tensor2tensor/insights/transformer_model.py index 94bc7c0e1..0a2ff8c46 100644 --- a/tensor2tensor/insights/transformer_model.py +++ b/tensor2tensor/insights/transformer_model.py @@ -24,12 +24,12 @@ import numpy as np +from tensor2tensor.bin import t2t_trainer from tensor2tensor.data_generators import text_encoder from tensor2tensor.insights import graph from tensor2tensor.insights import query_processor -from tensor2tensor.tpu import tpu_trainer -from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import decoding +from tensor2tensor.utils import trainer_lib from tensor2tensor.utils import usr_dir import tensorflow as tf @@ -111,7 +111,7 @@ def __init__(self, data_dir, model_dir): data_dir = os.path.expanduser(data_dir) # Create the basic hyper parameters. - self.hparams = tpu_trainer_lib.create_hparams( + self.hparams = trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=data_dir, @@ -122,10 +122,10 @@ def __init__(self, data_dir, model_dir): decode_hp.add_hparam("shard_id", 0) # Create the estimator and final hyper parameters. - self.estimator = tpu_trainer_lib.create_estimator( + self.estimator = trainer_lib.create_estimator( FLAGS.model, self.hparams, - tpu_trainer.create_run_config(), + t2t_trainer.create_run_config(), decode_hp, use_tpu=False) # Fetch the vocabulary and other helpful variables for decoding. diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 5b4e39058..35bac33b0 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -190,8 +190,8 @@ def basic_params1(): # This is the actual batch size, *not* tokens per batch (i.e. for # language models this is the number of sentences in the batch) tpu_batch_size_per_shard=24, - # Set by tpu_trainer to let the model know whether we are on TPU. - # Switching on/off tpu should not invalidate checkpoints. + # Set by t2t_trainer if --use_tpu to let the model know whether we are on + # TPU. Switching on/off tpu should not invalidate checkpoints. use_tpu=False, # If True in PREDICT mode, then last-position-only optimizations are not # used. diff --git a/tensor2tensor/notebooks/hello_t2t.ipynb b/tensor2tensor/notebooks/hello_t2t.ipynb index 5b58b042b..bc39b7337 100644 --- a/tensor2tensor/notebooks/hello_t2t.ipynb +++ b/tensor2tensor/notebooks/hello_t2t.ipynb @@ -61,7 +61,7 @@ "source": [ "# Install deps\n", "# We're using some new features from tensorflow so we install tf-nightly\n", - "!pip install -q tensor2tensor tf-nightly" + "!pip install -q 'tensor2tensor==1.4.1' tf-nightly" ], "cell_type": "code", "execution_count": 0, diff --git a/tensor2tensor/tpu/__init__.py b/tensor2tensor/tpu/__init__.py deleted file mode 100644 index 3f714ce1f..000000000 --- a/tensor2tensor/tpu/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py deleted file mode 100644 index 47e92da98..000000000 --- a/tensor2tensor/tpu/tpu_trainer.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding=utf-8 -# Copyright 2017 The Tensor2Tensor Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Train on TPU.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import sys - -# Dependency imports - -from tensor2tensor import models # pylint: disable=unused-import -from tensor2tensor import problems as problems_lib # pylint: disable=unused-import -from tensor2tensor.tpu import tpu_trainer_lib -from tensor2tensor.utils import decoding -from tensor2tensor.utils import flags as t2t_flags # pylint: disable=unused-import -from tensor2tensor.utils import registry -from tensor2tensor.utils import usr_dir - -import tensorflow as tf - -flags = tf.flags -FLAGS = flags.FLAGS - -# See flags.py for additional command-line flags. -flags.DEFINE_string("t2t_usr_dir", "", - "Path to a Python module that will be imported. The " - "__init__.py file should include the necessary imports. " - "The imported files should contain registrations, " - "e.g. @registry.register_model calls, that will then be " - "available to the t2t-trainer.") -flags.DEFINE_integer("random_seed", 1234, "Random seed.") -flags.DEFINE_integer("tpu_num_shards", 8, "Number of tpu shards.") -flags.DEFINE_integer("iterations_per_loop", 1000, - "Number of iterations in a TPU training loop.") -flags.DEFINE_bool("use_tpu", False, "Whether to use TPU.") -flags.DEFINE_bool("generate_data", False, "Generate data before training?") -flags.DEFINE_string("tmp_dir", "/tmp/t2t_datagen", - "Temporary storage directory, used if --generate_data.") -flags.DEFINE_bool("profile", False, "Profile performance?") - -# To maintain compatibility with some internal libs, we guard against these flag -# definitions possibly erroring. Apologies for the ugliness. -try: - flags.DEFINE_string("master", "", "Address of TensorFlow master.") - flags.DEFINE_string("output_dir", "", "Base output directory for run.") - flags.DEFINE_string("schedule", "continuous_train_and_eval", - "Method of Experiment to run.") - flags.DEFINE_integer("eval_steps", 10000, - "Number of steps in evaluation. By default, eval will " - "stop after eval_steps or when it runs through the eval " - "dataset once in full, whichever comes first, so this " - "can be a very large number.") -except: # pylint: disable=bare-except - pass - - -def get_problem_name(): - problems = FLAGS.problems.split("-") - assert len(problems) == 1 - return problems[0] - - -def create_hparams(): - return tpu_trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams) - - -def create_experiment_fn(): - return tpu_trainer_lib.create_experiment_fn( - model_name=FLAGS.model, - problem_name=get_problem_name(), - data_dir=os.path.expanduser(FLAGS.data_dir), - train_steps=FLAGS.train_steps, - eval_steps=FLAGS.eval_steps, - min_eval_frequency=FLAGS.local_eval_frequency, - schedule=FLAGS.schedule, - export=FLAGS.export_saved_model, - decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams), - use_tfdbg=FLAGS.tfdbg, - use_dbgprofile=FLAGS.dbgprofile, - eval_early_stopping_steps=FLAGS.eval_early_stopping_steps, - eval_early_stopping_metric=FLAGS.eval_early_stopping_metric, - eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta, - eval_early_stopping_metric_minimize=FLAGS. - eval_early_stopping_metric_minimize, - use_tpu=FLAGS.use_tpu) - - -def create_run_config(hp): - return tpu_trainer_lib.create_run_config( - model_dir=os.path.expanduser(FLAGS.output_dir), - master=FLAGS.master, - iterations_per_loop=FLAGS.iterations_per_loop, - num_shards=FLAGS.tpu_num_shards, - log_device_placement=FLAGS.log_device_placement, - save_checkpoints_steps=max(FLAGS.iterations_per_loop, - FLAGS.local_eval_frequency), - keep_checkpoint_max=FLAGS.keep_checkpoint_max, - keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, - num_gpus=FLAGS.worker_gpu, - gpu_order=FLAGS.gpu_order, - shard_to_cpu=FLAGS.locally_shard_to_cpu, - num_async_replicas=FLAGS.worker_replicas, - gpu_mem_fraction=FLAGS.worker_gpu_memory_fraction, - enable_graph_rewriter=FLAGS.experimental_optimize_placement, - use_tpu=FLAGS.use_tpu, - schedule=FLAGS.schedule, - no_data_parallelism=hp.no_data_parallelism, - daisy_chain_variables=hp.daisy_chain_variables, - ps_replicas=FLAGS.ps_replicas, - ps_job=FLAGS.ps_job, - ps_gpu=FLAGS.ps_gpu, - sync=FLAGS.sync, - worker_id=FLAGS.worker_id, - worker_job=FLAGS.worker_job, - random_seed=FLAGS.random_seed) - - -def generate_data(): - # Generate data if requested. - data_dir = os.path.expanduser(FLAGS.data_dir) - tmp_dir = os.path.expanduser(FLAGS.tmp_dir) - tf.gfile.MakeDirs(data_dir) - tf.gfile.MakeDirs(tmp_dir) - - problem_name = get_problem_name() - tf.logging.info("Generating data for %s" % problem_name) - registry.problem(problem_name).generate_data(data_dir, tmp_dir) - - -@contextlib.contextmanager -def profile_context(): - if FLAGS.profile: - with tf.contrib.tfprof.ProfileContext("t2tprof", - trace_steps=range(100), - dump_steps=range(100)) as pctx: - opts = tf.profiler.ProfileOptionBuilder.time_and_memory() - pctx.add_auto_profiling("op", opts, range(100)) - yield - else: - yield - - -def log_registry(): - if FLAGS.registry_help: - tf.logging.info(registry.help_string()) - sys.exit(0) - - -def execute_schedule(exp): - if not hasattr(exp, FLAGS.schedule): - raise ValueError( - "Experiment has no method %s, from --schedule" % FLAGS.schedule) - with profile_context(): - getattr(exp, FLAGS.schedule)() - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - tpu_trainer_lib.set_random_seed(FLAGS.random_seed) - usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) - log_registry() - - if FLAGS.generate_data: - generate_data() - - hparams = create_hparams() - run_config = create_run_config(hparams) - - exp_fn = create_experiment_fn() - exp = exp_fn(run_config, hparams) - execute_schedule(exp) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index fe2790194..1125a6ed3 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -24,7 +24,7 @@ class MyModel(T2TModel): ``` Access by snake-cased name: `registry.model("my_model")`. If you're using -`tpu_trainer.py`, you can pass on the command-line: `--model=my_model`. +`t2t_trainer.py`, you can pass on the command-line: `--model=my_model`. See all the models registered: `registry.list_models()`. @@ -32,13 +32,13 @@ class MyModel(T2TModel): * Register: `registry.register_hparams` * List: `registry.list_hparams` * Retrieve by name: `registry.hparams` - * Command-line flag in `tpu_trainer.py`: `--hparams_set=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_set=name` For hyperparameter ranges: * Register: `registry.register_ranged_hparams` * List: `registry.list_ranged_hparams` * Retrieve by name: `registry.ranged_hparams` - * Command-line flag in `tpu_trainer.py`: `--hparams_range=name` + * Command-line flag in `t2t_trainer.py`: `--hparams_range=name` """ from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/utils/t2t_model.py b/tensor2tensor/utils/t2t_model.py index 630011541..d2af84c0f 100644 --- a/tensor2tensor/utils/t2t_model.py +++ b/tensor2tensor/utils/t2t_model.py @@ -739,7 +739,7 @@ def estimator_model_fn(cls, config=None, params=None, decode_hparams=None, - use_tpu=True): + use_tpu=False): """Model fn for Estimator. Args: @@ -755,9 +755,6 @@ def estimator_model_fn(cls, Returns: TPUEstimatorSpec if use tpu else EstimatorSpec """ - tf.logging.warning("T2TModel.estimator_model_fn implements a subset of " - "model_builder.model_fn and is currently only used " - "in tpu_trainer.") _create_dummy_vars() hparams = copy.deepcopy(hparams) hparams.use_tpu = use_tpu diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/utils/trainer_lib.py similarity index 99% rename from tensor2tensor/tpu/tpu_trainer_lib.py rename to tensor2tensor/utils/trainer_lib.py index ff2045302..6442d9781 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib.py +++ b/tensor2tensor/utils/trainer_lib.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Library for training on TPU. See tpu_trainer.py.""" +"""Library for training. See t2t_trainer.py.""" from __future__ import absolute_import from __future__ import division diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/utils/trainer_lib_test.py similarity index 88% rename from tensor2tensor/tpu/tpu_trainer_lib_test.py rename to tensor2tensor/utils/trainer_lib_test.py index 2a2148afd..5df62d2cb 100644 --- a/tensor2tensor/tpu/tpu_trainer_lib_test.py +++ b/tensor2tensor/utils/trainer_lib_test.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tpu_trainer_lib.""" +"""Tests for trainer_lib.""" from __future__ import absolute_import from __future__ import division @@ -28,8 +28,8 @@ from tensor2tensor.data_generators import algorithmic from tensor2tensor.data_generators import generator_utils from tensor2tensor.data_generators import problem as problem_lib -from tensor2tensor.tpu import tpu_trainer_lib from tensor2tensor.utils import registry +from tensor2tensor.utils import trainer_lib import tensorflow as tf @@ -47,7 +47,7 @@ def generate_data(self, data_dir, _): self.dev_filepaths(data_dir, 1, shuffled=True), 100) -class TpuTrainerTest(tf.test.TestCase): +class TrainerLibTest(tf.test.TestCase): @classmethod def setUpClass(cls): @@ -60,7 +60,7 @@ def setUpClass(cls): registry.problem("tiny_algo").generate_data(cls.data_dir, None) def testExperiment(self): - exp_fn = tpu_trainer_lib.create_experiment_fn( + exp_fn = trainer_lib.create_experiment_fn( "transformer", "tiny_algo", self.data_dir, @@ -68,7 +68,7 @@ def testExperiment(self): eval_steps=1, min_eval_frequency=1, use_tpu=False) - run_config = tpu_trainer_lib.create_run_config( + run_config = trainer_lib.create_run_config( model_dir=self.data_dir, num_gpus=0, use_tpu=False) hparams = registry.hparams("transformer_tiny_tpu")() exp = exp_fn(run_config, hparams) @@ -76,9 +76,9 @@ def testExperiment(self): def testModel(self): # HParams - hparams = tpu_trainer_lib.create_hparams("transformer_tiny", - data_dir=self.data_dir, - problem_name="tiny_algo") + hparams = trainer_lib.create_hparams("transformer_tiny", + data_dir=self.data_dir, + problem_name="tiny_algo") # Dataset problem = hparams.problem_instances[0] diff --git a/tensor2tensor/visualization/TransformerVisualization.ipynb b/tensor2tensor/visualization/TransformerVisualization.ipynb index f2c4f1559..bec758327 100644 --- a/tensor2tensor/visualization/TransformerVisualization.ipynb +++ b/tensor2tensor/visualization/TransformerVisualization.ipynb @@ -29,10 +29,10 @@ "import tensorflow as tf\n", "import numpy as np\n", "\n", - "from tensor2tensor.tpu import tpu_trainer_lib\n", "from tensor2tensor.utils import t2t_model\n", "from tensor2tensor.utils import decoding\n", "from tensor2tensor.utils import devices\n", + "from tensor2tensor.utils import trainer_lib\n", "from tensor2tensor.visualization import attention\n" ] }, @@ -133,7 +133,7 @@ } ], "source": [ - "hparams = tpu_trainer_lib.create_hparams(FLAGS.hparams_set, data_dir=FLAGS.data_dir, problem_name=PROBLEM)\n", + "hparams = trainer_lib.create_hparams(FLAGS.hparams_set, data_dir=FLAGS.data_dir, problem_name=PROBLEM)\n", "hparams.use_fixed_batch_size = True\n", "hparams.batch_size = 1\n", "\n", From b20795cbb6d32bda92e1d5e89305bb736634b692 Mon Sep 17 00:00:00 2001 From: Ryan Sepassi Date: Tue, 2 Jan 2018 12:11:53 -0800 Subject: [PATCH 06/42] Make scripts thin and executable; add t2t_usr_dir example and test; log metadata; allow Eager-mode re-registration PiperOrigin-RevId: 180576491 --- .travis.yml | 1 + README.md | 32 +-------------- docs/walkthrough.md | 32 +-------------- setup.py | 4 +- tensor2tensor/bin/t2t-datagen | 15 +++++++ tensor2tensor/bin/t2t-decoder | 15 +++++++ tensor2tensor/bin/t2t-make-tf-configs | 15 +++++++ tensor2tensor/bin/t2t-trainer | 15 +++++++ tensor2tensor/bin/t2t_trainer.py | 40 +++++++++++++++++++ tensor2tensor/layers/common_hparams.py | 2 +- tensor2tensor/models/transformer.py | 1 + .../test_data/example_usr_dir/__init__.py | 17 ++++++++ .../test_data/example_usr_dir/my_submodule.py | 32 +++++++++++++++ tensor2tensor/utils/registry.py | 10 +++-- 14 files changed, 164 insertions(+), 67 deletions(-) create mode 100755 tensor2tensor/bin/t2t-datagen create mode 100755 tensor2tensor/bin/t2t-decoder create mode 100755 tensor2tensor/bin/t2t-make-tf-configs create mode 100755 tensor2tensor/bin/t2t-trainer create mode 100644 tensor2tensor/test_data/example_usr_dir/__init__.py create mode 100644 tensor2tensor/test_data/example_usr_dir/my_submodule.py diff --git a/.travis.yml b/.travis.yml index f424014b5..00fe35951 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,7 @@ script: - pytest tensor2tensor/utils/registry_test.py - pytest tensor2tensor/utils/trainer_lib_test.py - t2t-datagen 2>&1 | grep translate && echo passed + - t2t-trainer --registry_help --t2t_usr_dir=./tensor2tensor/test_data/example_usr_dir 2>&1 | grep my_very_own_hparams && echo passed - python -c "from tensor2tensor.models import transformer; print(transformer.Transformer.__name__)" - t2t-trainer --registry_help - mkdir $T2T_DATA_DIR diff --git a/README.md b/README.md index de2951c53..06a15d1c8 100644 --- a/README.md +++ b/README.md @@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`. You can do so for models, hyperparameter sets, modalities, and problems. Please do submit a pull request if your component might be useful to others. -Here's an example with a new hyperparameter set: - -```python -# In ~/usr/t2t_usr/my_registrations.py - -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry - -@registry.register_hparams -def transformer_my_very_own_hparams_set(): - hparams = transformer.transformer_base() - hparams.hidden_size = 1024 - ... -``` - -```python -# In ~/usr/t2t_usr/__init__.py -from . import my_registrations -``` - -``` -t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help -``` - -You'll see under the registered HParams your -`transformer_my_very_own_hparams_set`, which you can directly use on the command -line with the `--hparams_set` flag. - -`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` -registrations. +See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir) +for an example user directory. ## Adding a dataset diff --git a/docs/walkthrough.md b/docs/walkthrough.md index de2951c53..06a15d1c8 100644 --- a/docs/walkthrough.md +++ b/docs/walkthrough.md @@ -296,36 +296,8 @@ specifying the `--t2t_usr_dir` flag in `t2t-trainer`. You can do so for models, hyperparameter sets, modalities, and problems. Please do submit a pull request if your component might be useful to others. -Here's an example with a new hyperparameter set: - -```python -# In ~/usr/t2t_usr/my_registrations.py - -from tensor2tensor.models import transformer -from tensor2tensor.utils import registry - -@registry.register_hparams -def transformer_my_very_own_hparams_set(): - hparams = transformer.transformer_base() - hparams.hidden_size = 1024 - ... -``` - -```python -# In ~/usr/t2t_usr/__init__.py -from . import my_registrations -``` - -``` -t2t-trainer --t2t_usr_dir=~/usr/t2t_usr --registry_help -``` - -You'll see under the registered HParams your -`transformer_my_very_own_hparams_set`, which you can directly use on the command -line with the `--hparams_set` flag. - -`t2t-datagen` also supports the `--t2t_usr_dir` flag for `Problem` -registrations. +See the [`example_usr_dir`](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/test_data/example_usr_dir) +for an example user directory. ## Adding a dataset diff --git a/setup.py b/setup.py index fb2b6492d..aae7f6288 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ 'six', ], extras_require={ - 'tensorflow': ['tensorflow>=1.4.0'], - 'tensorflow_gpu': ['tensorflow-gpu>=1.4.0'], + 'tensorflow': ['tensorflow>=1.4.1'], + 'tensorflow_gpu': ['tensorflow-gpu>=1.4.1'], 'tests': ['pytest', 'h5py', 'mock'], }, classifiers=[ diff --git a/tensor2tensor/bin/t2t-datagen b/tensor2tensor/bin/t2t-datagen new file mode 100755 index 000000000..ef8933e90 --- /dev/null +++ b/tensor2tensor/bin/t2t-datagen @@ -0,0 +1,15 @@ +"""t2t-datagen.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_datagen + +import tensorflow as tf + +def main(argv): + t2t_datagen.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-decoder b/tensor2tensor/bin/t2t-decoder new file mode 100755 index 000000000..a878c0e9b --- /dev/null +++ b/tensor2tensor/bin/t2t-decoder @@ -0,0 +1,15 @@ +"""t2t-decoder.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_decoder + +import tensorflow as tf + +def main(argv): + t2t_decoder.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-make-tf-configs b/tensor2tensor/bin/t2t-make-tf-configs new file mode 100755 index 000000000..9e656239e --- /dev/null +++ b/tensor2tensor/bin/t2t-make-tf-configs @@ -0,0 +1,15 @@ +"""t2t-make-tf-configs.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import make_tf_configs + +import tensorflow as tf + +def main(argv): + make_tf_configs.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer new file mode 100755 index 000000000..5cbc8cf77 --- /dev/null +++ b/tensor2tensor/bin/t2t-trainer @@ -0,0 +1,15 @@ +"""t2t-trainer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensor2tensor.bin import t2t_trainer + +import tensorflow as tf + +def main(argv): + t2t_trainer.main(argv) + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py index 9e77de384..6ad0fd438 100644 --- a/tensor2tensor/bin/t2t_trainer.py +++ b/tensor2tensor/bin/t2t_trainer.py @@ -162,6 +162,43 @@ def log_registry(): sys.exit(0) +def is_chief(): + schedules = ["train", "train_and_evaluate", "continuous_train_and_eval"] + return FLAGS.worker_id == 0 and FLAGS.schedule in schedules + + +def save_metadata(hparams): + """Saves FLAGS and hparams to output_dir.""" + output_dir = os.path.expanduser(FLAGS.output_dir) + # Save FLAGS in txt file + if hasattr(FLAGS, "flags_into_string"): + flags_str = FLAGS.flags_into_string() + t2t_flags_str = "\n".join([ + "--%s=%s" % (f.name, f.value) + for f in FLAGS.flags_by_module_dict()[ + "tensor2tensor.utils.flags"] + ]) + else: + flags_dict = FLAGS.__dict__["__flags"] + flags_str = "\n".join( + ["--%s=%s" % (name, str(f)) for (name, f) in flags_dict.items()]) + t2t_flags_str = None + + flags_txt = os.path.join(output_dir, "flags.txt") + with tf.gfile.Open(flags_txt, "w") as f: + f.write(flags_str) + + if t2t_flags_str: + t2t_flags_txt = os.path.join(output_dir, "flags_t2t.txt") + with tf.gfile.Open(t2t_flags_txt, "w") as f: + f.write(t2t_flags_str) + + # Save hparams as hparams.json + hparams_fname = os.path.join(output_dir, "hparams.json") + with tf.gfile.Open(hparams_fname, "w") as f: + f.write(hparams.to_json()) + + def execute_schedule(exp): if not hasattr(exp, FLAGS.schedule): raise ValueError( @@ -182,6 +219,9 @@ def main(_): hparams = create_hparams() run_config = create_run_config(hparams) + if is_chief(): + save_metadata(hparams) + exp_fn = create_experiment_fn() exp = exp_fn(run_config, hparams) execute_schedule(exp) diff --git a/tensor2tensor/layers/common_hparams.py b/tensor2tensor/layers/common_hparams.py index 35bac33b0..b9593b00e 100644 --- a/tensor2tensor/layers/common_hparams.py +++ b/tensor2tensor/layers/common_hparams.py @@ -96,7 +96,7 @@ def basic_params1(): norm_type="layer", # "batch", layer", "noam", "none". # epsilon parameter to normalization function norm_epsilon=1e-6, - symbol_modality_num_shards=16, + symbol_modality_num_shards=1, # During training, we drop sequences whose inputs and targets are shorter # than min_length min_length=0, diff --git a/tensor2tensor/models/transformer.py b/tensor2tensor/models/transformer.py index de812b64b..f43ace037 100644 --- a/tensor2tensor/models/transformer.py +++ b/tensor2tensor/models/transformer.py @@ -750,6 +750,7 @@ def transformer_base_v1(): hparams.num_sampled_classes = 0 hparams.label_smoothing = 0.1 hparams.shared_embedding_and_softmax_weights = True + hparams.symbol_modality_num_shards = 16 # Add new ones like this. hparams.add_hparam("filter_size", 2048) # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers. diff --git a/tensor2tensor/test_data/example_usr_dir/__init__.py b/tensor2tensor/test_data/example_usr_dir/__init__.py new file mode 100644 index 000000000..9bab20593 --- /dev/null +++ b/tensor2tensor/test_data/example_usr_dir/__init__.py @@ -0,0 +1,17 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example T2T user directory.""" +from . import my_submodule diff --git a/tensor2tensor/test_data/example_usr_dir/my_submodule.py b/tensor2tensor/test_data/example_usr_dir/my_submodule.py new file mode 100644 index 000000000..b6c3579ac --- /dev/null +++ b/tensor2tensor/test_data/example_usr_dir/my_submodule.py @@ -0,0 +1,32 @@ +# coding=utf-8 +# Copyright 2017 The Tensor2Tensor Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Example registrations for T2T.""" +from tensor2tensor.layers import common_hparams +from tensor2tensor.utils import registry + + +@registry.register_hparams +def my_very_own_hparams(): + # Start with the base set + hp = common_hparams.basic_params1() + # Modify existing hparams + hp.num_hidden_layers = 2 + # Add new hparams + hp.add_hparam("filter_size", 2048) + return hp + +# Use register_model for a new T2TModel +# Use register_problem for a new Problem diff --git a/tensor2tensor/utils/registry.py b/tensor2tensor/utils/registry.py index 1125a6ed3..4f84752d1 100644 --- a/tensor2tensor/utils/registry.py +++ b/tensor2tensor/utils/registry.py @@ -51,6 +51,8 @@ class MyModel(T2TModel): import six +from tensorflow.python.eager import context + _MODELS = {} _HPARAMS = {} _RANGED_HPARAMS = {} @@ -120,7 +122,7 @@ def register_model(name=None): def decorator(model_cls, registration_name=None): """Registers & returns model_cls with registration_name or default name.""" model_name = registration_name or default_name(model_cls) - if model_name in _MODELS: + if model_name in _MODELS and not context.in_eager_mode(): raise LookupError("Model %s already registered." % model_name) model_cls.REGISTERED_NAME = model_name _MODELS[model_name] = model_cls @@ -150,7 +152,7 @@ def register_hparams(name=None): def decorator(hp_fn, registration_name=None): """Registers & returns hp_fn with registration_name or default name.""" hp_name = registration_name or default_name(hp_fn) - if hp_name in _HPARAMS: + if hp_name in _HPARAMS and not context.in_eager_mode(): raise LookupError("HParams set %s already registered." % hp_name) _HPARAMS[hp_name] = hp_fn return hp_fn @@ -217,7 +219,7 @@ def register_problem(name=None): def decorator(p_cls, registration_name=None): """Registers & returns p_cls with registration_name or default name.""" p_name = registration_name or default_name(p_cls) - if p_name in _PROBLEMS: + if p_name in _PROBLEMS and not context.in_eager_mode(): raise LookupError("Problem %s already registered." % p_name) _PROBLEMS[p_name] = p_cls @@ -317,7 +319,7 @@ def _internal_register_modality(name, mod_collection, collection_str): def decorator(mod_cls, registration_name=None): """Registers & returns mod_cls with registration_name or default name.""" mod_name = registration_name or default_name(mod_cls) - if mod_name in mod_collection: + if mod_name in mod_collection and not context.in_eager_mode(): raise LookupError("%s modality %s already registered." % (collection_str, mod_name)) mod_collection[mod_name] = mod_cls From 4361c19242056b19d3455e7d9d7d17bd9f67aa98 Mon Sep 17 00:00:00 2001 From: T2T Team Date: Tue, 2 Jan 2018 15:30:55 -0800 Subject: [PATCH 07/42] Adding bower dependencies and changes to the index html to properly PiperOrigin-RevId: 180601814 --- setup.py | 4 + .../attention-visualization.js | 13 ++- tensor2tensor/insights/polymer/bower.json | 80 +++++++++++++++++++ .../polymer/explore_view/explore-view.html | 4 +- .../polymer/explore_view/explore-view.js | 12 ++- .../graph-visualization.js | 12 ++- tensor2tensor/insights/polymer/index.html | 58 +++++++++++++- .../polymer/insights_app/insights-app.html | 6 +- .../polymer/insights_app/insights-app.js | 12 ++- .../language-selector-content.js | 12 ++- .../language_selector/language-selector.js | 9 ++- .../processing-visualization.js | 9 ++- .../polymer/query_card/query-card.html | 2 +- .../insights/polymer/query_card/query-card.js | 12 ++- .../tensor2tensor.html} | 4 +- .../translation-result.html | 6 +- .../translation_result/translation-result.js | 9 ++- tensor2tensor/insights/server.py | 19 ++++- tensor2tensor/insights/transformer_model.py | 4 +- 19 files changed, 239 insertions(+), 48 deletions(-) create mode 100644 tensor2tensor/insights/polymer/bower.json rename tensor2tensor/insights/{index.html => polymer/tensor2tensor.html} (91%) diff --git a/setup.py b/setup.py index aae7f6288..18f97d089 100644 --- a/setup.py +++ b/setup.py @@ -23,10 +23,14 @@ 'tensor2tensor/bin/t2t-datagen', 'tensor2tensor/bin/t2t-decoder', 'tensor2tensor/bin/t2t-make-tf-configs', + 'tensor2tensor/insights/server', ], install_requires=[ 'bz2file', + 'flask', 'future', + 'gevent', + 'gunicorn', 'gym', 'numpy', 'requests', diff --git a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js index b58d90905..e738c2629 100644 --- a/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js +++ b/tensor2tensor/insights/polymer/attention_visualization/attention-visualization.js @@ -15,8 +15,6 @@ * limitations under the License. */ -goog.module('t2t.AttentionVisualization'); - /** * `` presents a heatmap of input-output associations. * @@ -62,10 +60,16 @@ class AttentionVisualization extends Polymer.Element { this.zoom_ = undefined; } + /** + * @return {string} The component name. + */ static get is() { return 'attention-visualization'; } + /** + * @return {!Object} The component properties. + */ static get properties() { return { /** @@ -84,6 +88,9 @@ class AttentionVisualization extends Polymer.Element { }; } + /** + * @return {!Array} The component observers. + */ static get observers() { return [ 'zoomDepthChanged_(zoomDepth_)', @@ -308,5 +315,3 @@ class AttentionVisualization extends Polymer.Element { } customElements.define(AttentionVisualization.is, AttentionVisualization); - -exports = {AttentionVisualization}; diff --git a/tensor2tensor/insights/polymer/bower.json b/tensor2tensor/insights/polymer/bower.json new file mode 100644 index 000000000..da1f4aaed --- /dev/null +++ b/tensor2tensor/insights/polymer/bower.json @@ -0,0 +1,80 @@ +{ + "name": "tensor2tensor-insights", + "homepage": "https://github.com/tensorflow/tensor2tensor", + "description": "Components for analyzing tensor2tensor neural machine translation models.", + "main": "index.html", + "keywords": [ + "neural", + "machine", + "translation" + ], + "authors": [ + "kstevens@google.com" + ], + "license": "Apache 2.0", + "private": true, + "ignore": [ + "**/.*", + "node_modules", + "bower_components", + "test", + "tests" + ], + "dependencies": { + "app-layout": "PolymerElements/app-layout#2.0.4", + "app-route": "PolymerElements/app-route#2.0.3", + "d3": "d3#4.12.2", + "iron-a11y-keys": "PolymerElements/iron-a11y-keys#2.0.0", + "iron-ajax": "PolymerElements/iron-ajax#2.0.0", + "iron-flex-layout": "PolymerElements/iron-flex-layout#2.0.0", + "iron-icon": "PolymerElements/iron-icon#2.0.0", + "iron-icons": "PolymerElements/iron-icons#2.0.0", + "iron-list": "PolymerElements/iron-list#2.0.0", + "iron-pages": "PolymerElements/iron-pages#2.0.0", + "iron-selector": "PolymerElements/iron-selector#2.0.0", + "neon-animation": "PolymerElements/neon-animation#2.0.0", + "paper-button": "PolymerElements/paper-button#2.0.0", + "paper-card": "PolymerElements/paper-card#2.0.0", + "paper-dialog": "PolymerElements/paper-dialog#2.0.0", + "paper-dropdown-menu": "PolymerElements/paper-dropdown-menu#2.0.0", + "paper-icon-button": "PolymerElements/paper-icon-button#2.0.0", + "paper-input": "PolymerElements/paper-input#2.0.0", + "paper-item": "PolymerElements/paper-item#2.0.0", + "paper-listbox": "PolymerElements/paper-listbox#2.0.0", + "paper-slider": "PolymerElements/paper-slider#2.0.0", + "paper-tabs": "PolymerElements/paper-tabs#2.0.0", + "paper-toggle-button": "PolymerElements/paper-toggle-button#2.0.0", + "paper-tooltip": "PolymerElements/paper-tooltip#2.0.0", + "paper-progress": "PolymerElements/paper-progress#2.0.0", + "polymer": "polymer/polymer#v2.3.1" + }, + "resolutions": { + "webcomponentsjs": "^v1.0.19", + "polymer": "^v2.3.1", + "app-route": "^2.0.3", + "app-layout": "^2.0.4", + "iron-location": "1 - 2", + "iron-selector": "^2.0.0", + "neon-animation": "^2.0.0", + "iron-icon": "^2.0.0", + "iron-pages": "^2.0.0", + "iron-icons": "^2.0.0", + "paper-icon-button": "^2.0.0", + "paper-item": "^2.0.0", + "iron-flex-layout": "^2.0.0", + "paper-listbox": "^2.0.0", + "iron-a11y-keys": "^2.0.0", + "paper-dialog": "^2.0.0", + "iron-ajax": "^2.0.0", + "paper-progress": "^2.0.0", + "paper-dropdown-menu": "^2.0.0", + "paper-tabs": "^2.0.0", + "paper-input": "^2.0.0", + "paper-toggle-button": "^2.0.0", + "paper-slider": "^2.0.0", + "iron-list": "^2.0.0", + "paper-card": "^2.0.0", + "paper-tooltip": "^2.0.0", + "iron-overlay-behavior": "^2.2.0" + } +} diff --git a/tensor2tensor/insights/polymer/explore_view/explore-view.html b/tensor2tensor/insights/polymer/explore_view/explore-view.html index d0456211f..97fce423c 100644 --- a/tensor2tensor/insights/polymer/explore_view/explore-view.html +++ b/tensor2tensor/insights/polymer/explore_view/explore-view.html @@ -31,8 +31,8 @@ - - + +