Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] GloVe trial #12

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
AC_PREREQ(2.59)
AC_INIT([groonga-word2vec], 0.0.1, [[email protected]])
AC_INIT([groonga-word2vec], 0.0.2, [[email protected]])

AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([word2vec/word2vec.cpp])
Expand Down Expand Up @@ -109,6 +109,17 @@ AC_ARG_ENABLE(word2vec,
)
AM_CONDITIONAL(NOINST_WORD2VEC, test "x$enable_word2vec" = "xno")

# GloVe

AC_ARG_ENABLE(glove,
[ --disable-glove Don't install GloVe binary],
[case "${enableval}" in
yes | no ) enable_glove="${enableval}" ;;
*) AC_MSG_ERROR(bad value ${enableval} for --disable-glove) ;;
esac]
)
AM_CONDITIONAL(NOINST_GLOVE, test "x$enable_glove" = "xno")

GROONGA_REQUIRED_VERSION=4.0.3
PKG_CHECK_MODULES([GROONGA], [groonga >= ${GROONGA_REQUIRED_VERSION}])

Expand Down Expand Up @@ -162,3 +173,9 @@ if test "x$enable_word2vec" = "xno"; then
else
echo " Word2Vec: yes"
fi
echo
if test "x$enable_glove" = "xno"; then
echo " GloVe: $enable_glove"
else
echo " GloVe: yes"
fi
32 changes: 32 additions & 0 deletions test/suite/glove_train/min_count.actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
plugin_register word2vec/word2vec
[[0,0.0,0.0],true]
table_create Tags TABLE_PAT_KEY ShortText
[[0,0.0,0.0],true]
table_create Entries TABLE_NO_KEY
[[0,0.0,0.0],true]
column_create Entries title COLUMN_SCALAR ShortText
[[0,0.0,0.0],true]
column_create Entries tag COLUMN_SCALAR Tags
[[0,0.0,0.0],true]
column_create Entries tags COLUMN_VECTOR Tags
[[0,0.0,0.0],true]
load --table Entries
[
{"title": "FulltextSearch", "tag": "Library", "tags": ["Groonga", "Rroonga"]},
{"title": "Database", "tag": "Server", "tags": ["MySQL", "PostgreSQL"]}
]
[[0,0.0,0.0],2]
dump_to_train_file Entries title,tag,tags
[[0,0.0,0.0],true]
glove_vocab_count --min_count 1
[[0,0.0,0.0],true]
#|w| [glove_vocab_count] vocab_count -input-file db/db_w2v.txt -output-file db/db_glv.vocab -min-count 1
glove_cooccur
[[0,0.0,0.0],true]
#|w| [glove_cooccur] cooccur -input-file db/db_w2v.txt -output-file db/db_glv.cooccur -vocab-file db/db_glv.vocab -overflow-file db/db_glv.oveflow
glove_shuffle
[[0,0.0,0.0],true]
#|w| [glove_shuffle] shuffle -input-file db/db_glv.cooccur -output-file db/db_glv.shuffle -temp-file db/db_glv.shuffle_temp
glove_train
[[0,0.0,0.0],true]
#|w| [glove_shuffle] glove -input-file db/db_glv.shuffle -save-file db/db_glv -vocab-file db/db_glv.vocab -gradsq-file db/db_glv.gradsq
20 changes: 20 additions & 0 deletions test/suite/glove_train/min_count.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
plugin_register word2vec/word2vec

table_create Tags TABLE_PAT_KEY ShortText

table_create Entries TABLE_NO_KEY
column_create Entries title COLUMN_SCALAR ShortText
column_create Entries tag COLUMN_SCALAR Tags
column_create Entries tags COLUMN_VECTOR Tags

load --table Entries
[
{"title": "FulltextSearch", "tag": "Library", "tags": ["Groonga", "Rroonga"]},
{"title": "Database", "tag": "Server", "tags": ["MySQL", "PostgreSQL"]}
]

dump_to_train_file Entries title,tag,tags
glove_vocab_count --min_count 1
glove_cooccur
glove_shuffle
glove_train
4 changes: 4 additions & 0 deletions vendor/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ if NOINST_WORD2VEC
else
bin_PROGRAMS += word2vec/word2vec
endif
if NOINST_GLOVE
else
bin_PROGRAMS += glove/cooccur glove/glove glove/shuffle glove/vocab_count
endif
Binary file added vendor/glove/cooccur
Binary file not shown.
Loading