From 7c24956f45a725fe0eaeb97414b53a5a3d0ff2c0 Mon Sep 17 00:00:00 2001 From: Harikaraja Date: Mon, 23 Sep 2024 15:11:04 +0530 Subject: [PATCH] Added Optuna Search and Configuration changes --- .../OpenKE/generate_embedding_ray.py | 134 ++++++++---------- 1 file changed, 62 insertions(+), 72 deletions(-) diff --git a/seed_embeddings/OpenKE/generate_embedding_ray.py b/seed_embeddings/OpenKE/generate_embedding_ray.py index 008e1abfd..3a2e6bac6 100644 --- a/seed_embeddings/OpenKE/generate_embedding_ray.py +++ b/seed_embeddings/OpenKE/generate_embedding_ray.py @@ -23,7 +23,7 @@ from ray.tune.tune_config import TuneConfig from ray.train import RunConfig, CheckpointConfig from ray.tune.schedulers import ASHAScheduler - +from ray.tune.search.optuna import OptunaSearch os.environ["CUDA_VISIBLE_DEVICES"] = "0" @@ -33,7 +33,6 @@ def test_files(index_dir): train = os.path.join(index_dir, "train2id.txt") print(entities, relations, train) - if not os.path.exists(entities): raise Exception("entity2id.txt not found") if not os.path.exists(relations): @@ -65,7 +64,6 @@ def train(arg_conf): neg_ent=arg_conf["neg_ent"], neg_rel=arg_conf["neg_rel"], ) - # dataloader for test (link prediction) if arg_conf["link_pred"]: test_dataloader = TestDataLoader(arg_conf["index_dir"], "link") @@ -79,14 +77,12 @@ def train(arg_conf): p_norm=1, norm_flag=True, ) - # define the loss function model = NegativeSampling( model=transe, loss=MarginLoss(margin=arg_conf["margin"]), batch_size=train_dataloader.get_batch_size(), ) - # train the model trainer = Trainer( model=model, @@ -94,9 +90,8 @@ def train(arg_conf): train_times=arg_conf["epoch"], alpha=arg_conf["alpha"], index_dir=arg_conf["index_dir"], - use_gpu=False, + use_gpu=arg_conf["use_gpu"], ) - trainer.run( link_prediction=arg_conf["link_pred"], test_dataloader=test_dataloader, @@ -137,7 +132,6 @@ def findRep(src, dest, index_dir, src_type="json"): if __name__ == "__main__": ray.init() - parser = argparse.ArgumentParser() parser.add_argument( "--index_dir", @@ -192,7 +186,14 @@ def findRep(src, dest, index_dir, src_type="json"): type=float, default=1.0, ) - + parser.add_argument( + "--use_gpu", + dest="use_gpu", + help="To use GPU for computation", + required=False, + type=bool, + default=False, + ) arg_conf = parser.parse_args() search_space = { @@ -205,9 +206,11 @@ def findRep(src, dest, index_dir, src_type="json"): "neg_ent": tune.randint(1, 30), "neg_rel": tune.randint(1, 30), "bern": tune.randint(0, 2), - "opt_method": tune.choice(["SGD", "Adagrad", "Adam", "Adadelta"]), + "opt_method": tune.choice(["SGD", "Adam"]), + #"opt_method": tune.choice(["SGD", "Adagrad", "Adam", "Adadelta"]), "is_analogy": arg_conf.is_analogy, "link_pred": arg_conf.link_pred, + "use_gpu": arg_conf.use_gpu, } try: @@ -221,83 +224,58 @@ def findRep(src, dest, index_dir, src_type="json"): scheduler = ASHAScheduler( time_attr="training_iteration", max_t=arg_conf.epoch, - grace_period=min(arg_conf.epoch, 4000), - reduction_factor=2, + grace_period=15, + reduction_factor=3, metric="AnalogiesScore", mode="max", ) - tuner = tune.Tuner( - train, - param_space=search_space, - tune_config=TuneConfig( - max_concurrent_trials=4, - scheduler=scheduler, - num_samples=1, - ), - run_config=RunConfig( - checkpoint_config=CheckpointConfig( - num_to_keep=2, - # *Best* checkpoints are determined by these params: - # checkpoint_score_attribute="AnalogiesScore", - # checkpoint_score_order="max", - ) - ), - ) elif arg_conf.link_pred: scheduler = ASHAScheduler( time_attr="training_iteration", max_t=arg_conf.epoch, - grace_period=min(arg_conf.epoch, 4000), - reduction_factor=2, + grace_period= 15, + reduction_factor=3, metric="hit1", mode="max", ) - tuner = tune.Tuner( - train, - param_space=search_space, - tune_config=TuneConfig( - max_concurrent_trials=4, - scheduler=scheduler, - num_samples=1, - ), - run_config=RunConfig( - checkpoint_config=CheckpointConfig( - num_to_keep=2, - # *Best* checkpoints are determined by these params: - # checkpoint_score_attribute="hit1", - # checkpoint_score_order="max", - ) - ), - ) else: scheduler = ASHAScheduler( time_attr="training_iteration", max_t=arg_conf.epoch, - grace_period=min(arg_conf.epoch, 4000), - reduction_factor=2, + grace_period= 15, + reduction_factor=3, metric="loss", mode="min", ) - tuner = tune.Tuner( - train, - param_space=search_space, - tune_config=TuneConfig( - max_concurrent_trials=4, - scheduler=scheduler, - num_samples=1, - ), - run_config=RunConfig( - checkpoint_config=CheckpointConfig( - num_to_keep=2, - # *Best* checkpoints are determined by these params: - # checkpoint_score_attribute="loss", - # checkpoint_score_order="min", - ) - ), + if arg_conf.use_gpu: + train_with_resources = tune.with_resources( + train, resources={"cpu": 0, "gpu": 0.0625} ) - + else: + train_with_resources = tune.with_resources( + train, resources={"cpu": 10, "gpu": 0} + ) + + tuner = tune.Tuner( + train_with_resources, + param_space=search_space, + tune_config=TuneConfig( + search_alg=OptunaSearch(metric="loss",mode="min"), + max_concurrent_trials=16, + scheduler=scheduler, + num_samples=512, + ), + run_config=RunConfig( + checkpoint_config=CheckpointConfig( + num_to_keep=1, + # *Best* checkpoints are determined by these params: + checkpoint_score_attribute="loss", + checkpoint_score_order="min", + ) + ), + ) results = tuner.fit() - + # Write the best result to a file, best_result.txt best_result = None if arg_conf.is_analogy: @@ -315,12 +293,24 @@ def findRep(src, dest, index_dir, src_type="json"): best_result = results.get_best_result(metric="AnalogiesScore", mode="max") elif arg_conf.link_pred: + + with open(os.path.join(search_space["index_dir"], "best_result.txt"), "a") as f: + f.write( + "\n" + str(results.get_best_result(metric="hit1", mode="max")) + ) + print( "Best Config Based on Hit1 : ", results.get_best_result(metric="hit1", mode="max"), ) best_result = results.get_best_result(metric="hit1", mode="max") else: + + with open(os.path.join(search_space["index_dir"], "best_result.txt"), "a") as f: + f.write( + "\n" + str(results.get_best_result(metric="loss", mode="min")) + ) + print( "Best Config Based on Loss : ", results.get_best_result(metric="loss", mode="min"), @@ -347,11 +337,10 @@ def findRep(src, dest, index_dir, src_type="json"): margin, ), ) - best_checkpoint_path = best_result.checkpoint.path - + print("best_checkpoint_path is: ",best_checkpoint_path) file_name = os.listdir(best_checkpoint_path)[0] - + print("file_name is: ",file_name) if file_name.endswith(".ckpt"): # Construct full file path source_file = os.path.join(best_checkpoint_path, file_name) @@ -368,6 +357,7 @@ def findRep(src, dest, index_dir, src_type="json"): margin, ), ) + print("embeddings_path: ",embeddings_path) findRep(outfile, embeddings_path, index_dir, src_type="ckpt") else: print("No .ckpt file found in the source directory.") @@ -376,4 +366,4 @@ def findRep(src, dest, index_dir, src_type="json"): print(result) del results - print("Training finished...") + print("Training finished...") \ No newline at end of file