-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #98 from IITH-Compilers/inMemoryVocabulary
created file to generated vocabulary as map
- Loading branch information
Showing
18 changed files
with
124 additions
and
129 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Copyright (c) 2024, The Contributors of IR2Vec. | ||
# | ||
# Part of the IR2Vec project. This software is available under the BSD 4-Clause | ||
# License. Please see LICENSE file in the top-level directory for more details. | ||
# | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-o", "--output", type=str, help="Output file name") | ||
args = parser.parse_args() | ||
|
||
if args.output is None: | ||
print("Error: Output file path not provided.") | ||
exit(1) | ||
|
||
output_file = args.output | ||
vocab_file = "../vocabulary/seedEmbeddingVocab.txt" | ||
|
||
# Define headers and opening/closing of map | ||
header = """\ | ||
// Generated by IR2Vec. DO NOT EDIT! | ||
// This file contains the learned vocabulary used by IR2Vec. | ||
// | ||
// clang-format off | ||
#ifndef __VOCABULARY__ | ||
#define __VOCABULARY__ | ||
#include <map> | ||
#include <string> | ||
#include <vector> | ||
#include "IR2Vec.h" | ||
namespace IR2Vec { | ||
class Vocabulary { | ||
public: | ||
static const std::map<std::string, IR2Vec::Vector>& getVocabulary() { | ||
return vocabulary; | ||
} | ||
private: | ||
static const std::map<std::string, IR2Vec::Vector> vocabulary; | ||
}; | ||
""" | ||
|
||
opening = "\nconst std::map<std::string, IR2Vec::Vector> Vocabulary::vocabulary = {\n" | ||
closing = """\ | ||
}; | ||
} // namespace IR2Vec | ||
#endif // __VOCABULARY__ | ||
""" | ||
|
||
try: | ||
with open(output_file, "w") as fw: | ||
fw.write(header) | ||
with open(vocab_file, "r") as fr: | ||
# Write vector declarations to the output file | ||
for line in fr.readlines(): | ||
key, val = line.strip().split(":") | ||
e = val.find("]") | ||
fw.write(f"const IR2Vec::Vector {key}_vector = {{ {val[1:e]} }};\n") | ||
|
||
fw.write(opening) | ||
with open(vocab_file, "r") as fr: | ||
# Write map entries to the output file | ||
for line in fr.readlines(): | ||
key, _ = line.strip().split(":") | ||
fw.write(f' {{ "{key}", {key}_vector }},\n') | ||
fw.write(closing) | ||
|
||
print(f"Generated {output_file}") | ||
|
||
except FileNotFoundError: | ||
print(f"Error: Vocabulary file '{vocab_file}' not found.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.