Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

init #3

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
69 changes: 69 additions & 0 deletions Thread-1/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,89 @@ cc_binary(
srcs = [
"q1.cc"
],
deps = [
"//lib:embedding_lib",
"//lib:instruction_lib",
"//lib:utils_lib",
"//lib:model_lib"
],
copts = [
"-std=c++11",
],
data = glob(["data/q1*"]),
linkopts = ["-lpthread"],
)

cc_binary(
name = "q2",
srcs = [
"q2.cc"
],
deps = [
"//lib:embedding_lib",
"//lib:instruction_lib",
"//lib:utils_lib",
"//lib:model_lib"
],
copts = [
"-std=c++11",
],
data = glob(["data/q2*"]),
linkopts = ["-lpthread"],
)

cc_binary(
name = "q3",
srcs = [
"q3.cc"
],
deps = [
"//lib:embedding_lib",
"//lib:instruction_lib",
"//lib:utils_lib",
"//lib:model_lib"
],
copts = [
"-std=c++11",
],
data = glob(["data/q3*"]),
linkopts = ["-lpthread"],
)

cc_binary(
name = "q4",
srcs = [
"q4.cc"
],
deps = [
"//lib:embedding_lib",
"//lib:instruction_lib",
"//lib:utils_lib",
"//lib:model_lib"
],
copts = [
"-std=c++11",
],
data = glob(["data/q4*"]),
linkopts = ["-lpthread"],
)

cc_binary(
name = "q5",
srcs = [
"q5.cc"
],
deps = [
"//lib:embedding_lib",
"//lib:instruction_lib",
"//lib:utils_lib",
"//lib:model_lib"
],
copts = [
"-std=c++11",
],
data = glob(["data/q4*"]),
linkopts = ["-lpthread"],
)

cc_test(
Expand Down
7 changes: 6 additions & 1 deletion Thread-1/data/q4_instruction.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,9 @@
2 3 2 1 7 8 9
1 1 3 1 2
1 2 4 0 2
2 2 -1 4 5 6 3 7 9
2 2 -1 4 5 6 3 7 9
1 8 2 1 6
1 9 2 0 6
2 6 7 0 2 3 5
2 3 3 2 3 4 5
1 6 8 0 3
20 changes: 16 additions & 4 deletions Thread-1/lib/embedding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,20 @@
#include <iostream>
#include <sstream>
#include <cmath>
#include <atomic>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>

#include "utils.h"
#include "embedding.h"

namespace proj1 {
namespace proj1 {
RWLock write_to_stdout_lock = RWLock();

Embedding::Embedding(int length) {
embbedingAssert(length > 0, "Non-positive length encountered!", NON_POSITIVE_LEN);
this->data = new double[length];
for (int i = 0; i < length; ++i) {
this->data[i] = (double) i / 10.0;
Expand Down Expand Up @@ -67,8 +74,10 @@ std::string Embedding::to_string() {
}

void Embedding::write_to_stdout() {
write_to_stdout_lock.write_lock();
std::string prefix("[OUTPUT]");
std::cout << prefix << this->to_string() << '\n';
write_to_stdout_lock.write_unlock();
}

Embedding Embedding::operator+(const Embedding &another) {
Expand Down Expand Up @@ -137,7 +146,9 @@ Embedding Embedding::operator/(const double value) {

bool Embedding::operator==(const Embedding &another) {
for (int i = 0; i < this->length; ++i) {
if(fabs(this->data[i]-another.data[i])>1.0e-6)return false;
if(fabs(this->data[i]-another.data[i])>1.0e-6) {
return false;
}
}
return true;
}
Expand Down Expand Up @@ -214,8 +225,9 @@ void EmbeddingHolder::update_embedding(
}

bool EmbeddingHolder::operator==(const EmbeddingHolder &another) {
if (this->get_n_embeddings() != another.emb_matx.size())
if (this->get_n_embeddings() != another.emb_matx.size()) {
return false;
}
for (int i = 0; i < (int)this->emb_matx.size(); ++i) {
if(!(*(this->emb_matx[i]) == *(another.get_embedding(i)))){
return false;
Expand All @@ -224,4 +236,4 @@ bool EmbeddingHolder::operator==(const EmbeddingHolder &another) {
return true;
}

} // namespace proj1
} // namespace proj1
85 changes: 82 additions & 3 deletions Thread-1/lib/embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

#include <string>
#include <vector>
#include <atomic>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <typeinfo>

namespace proj1 {

Expand All @@ -11,9 +17,81 @@ enum EMBEDDING_ERROR {
NON_POSITIVE_LEN
};

class RWLock {
public:
int AR=0, WW=0, AW=0, WR=0;
std::mutex *mtx;
std::condition_variable *okread;
std::condition_variable *okwrite;
RWLock(){
mtx = new std::mutex;
okread = new std::condition_variable;
okwrite = new std::condition_variable;
}

~RWLock() {
delete mtx;
delete okread;
delete okwrite;
}

void read_lock() {
std::unique_lock<std::mutex> lck(*mtx);
//printf("Start read lock\n");
while ((this->AW + this->WW) > 0) {
this->WR ++;
this->okread->wait(lck);
this->WR --;
}
this->AR ++;
lck.unlock();
//printf("Finish read lock\n");
}

void read_unlock() {
std::unique_lock<std::mutex> lck(*mtx);
//printf("Start read unlock\n");
this->AR --;
if(this->AR == 0 && this->WW > 0)
this->okwrite->notify_all();
lck.unlock();
//printf("Finish read unlock\n");
}

void write_lock() {
std::unique_lock<std::mutex> lck(*mtx);
//printf("Start write lock\n");
while ((this->AW + this->AR) > 0) {
this->WW ++;
this->okwrite->wait(lck);
this->WW --;
}
this->AW ++;
lck.unlock();
//printf("Finish write lock\n");
}

void write_unlock() {
std::unique_lock<std::mutex> lck(*mtx);
//printf("Start write unlock\n");
this->AW --;
if (this->WW > 0)
this->okwrite->notify_all();
else if (this->WR > 0)
this->okread->notify_all();
lck.unlock();
//printf("Finish write unlock\n");
}

private:

//volatile std::atomic<int> AR=0, WW=0, AW=0, WR=0; is a possible choice
};

class Embedding{
public:
Embedding() {}
RWLock lock = RWLock();
Embedding(){}
Embedding(int); // Random init an embedding
Embedding(int, double*);
Embedding(int, std::string);
Expand All @@ -35,15 +113,16 @@ class Embedding{
Embedding operator/(const double);
bool operator==(const Embedding&);
private:
int length;
double* data;
int length;
};

using EmbeddingMatrix = std::vector<Embedding*>;
using EmbeddingGradient = Embedding;

class EmbeddingHolder{
public:
RWLock lock = RWLock();
EmbeddingHolder(std::string filename);
EmbeddingHolder(EmbeddingMatrix &data);
~EmbeddingHolder();
Expand All @@ -63,4 +142,4 @@ class EmbeddingHolder{
};

} // namespace proj1
#endif // THREAD_LIB_EMBEDDING_H_
#endif // THREAD_LIB_EMBEDDING_H_
15 changes: 13 additions & 2 deletions Thread-1/lib/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,25 @@ double similarity(Embedding* embA, Embedding* embB) {
return similarity;
}

// NOTE: do not rely on this exact implementation -- it may get modified.
EmbeddingGradient* calc_gradient(Embedding* embA, Embedding* embB, int label) {
/* For simplicity, here we just simulate the gradient backprop for:
1. a dot product between embeddings
2. a sigmoid activation function
3. a binary cross entropy loss
*/
embA->lock.read_lock();
embB->lock.read_lock();
if(label == -1) {
label = embB->get_data()[0] > 1e-8? 0: 1;
}
double distance = similarity(embA, embB);
embA->lock.read_unlock();
double pred = sigmoid(distance);
double loss = binary_cross_entropy_backward((double) label, pred);
loss *= sigmoid_backward(distance);
EmbeddingGradient *gradA = new Embedding((*embB) * loss);
embB->lock.read_unlock();

// Here we simulate a slow calculation
a_slow_function(10);
Expand All @@ -37,20 +45,23 @@ EmbeddingGradient* cold_start(Embedding* user, Embedding* item) {
// Do some downstream work, e.g. let the user watch this video
a_slow_function(10);
// Then we collect a label, e.g. whether the user finished watching the video
int label = item->get_data()[0] > 1e-8? 0: 1;
return calc_gradient(user, item, label);
return calc_gradient(user, item, -1);
}

Embedding* recommend(Embedding* user, std::vector<Embedding*> items) {
Embedding* maxItem;
double sim, maxSim = -inf;
user->lock.read_lock();
for (auto item: items) {
item->lock.read_lock();
sim = similarity(user, item);
item->lock.read_unlock();
if (sim > maxSim) {
maxItem = item;
maxSim = sim;
}
}
user->lock.read_unlock();
return maxItem;
}

Expand Down
4 changes: 2 additions & 2 deletions Thread-1/lib/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace proj1 {

void a_slow_function(int seconds) {
//std::this_thread::sleep_for(std::chrono::seconds(seconds));
std::this_thread::sleep_for(std::chrono::seconds(seconds));
}

double sigmoid(double x) {
Expand Down Expand Up @@ -42,4 +42,4 @@ AutoTimer::~AutoTimer() {
std::cout << m_name << " : " << dur.count() << " usec\n";
}

} // namespace proj1
} // namespace proj1
Loading