Merge pull request #4 from BrianSong/develop

Adds thread runner, benchmark and stats components of mlmd_bench.
google · Jul 22, 2020 · c7d771f · c7d771f
2 parents bb5d116 + e58999b
commit c7d771f
Show file tree

Hide file tree

Showing 10 changed files with 768 additions and 5 deletions.
diff --git a/ml_metadata/tools/mlmd_bench/BUILD b/ml_metadata/tools/mlmd_bench/BUILD
@@ -31,7 +31,7 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/time",
         "@com_google_absl//absl/types:variant",
-        "//ml_metadata/metadata_store",
+        "//ml_metadata/metadata_store:metadata_store",
         "//ml_metadata/metadata_store:types",
         "//ml_metadata/proto:metadata_store_proto",
         "//ml_metadata/proto:metadata_store_service_proto",
@@ -62,10 +62,87 @@ ml_metadata_cc_test(
 
 cc_library(
     name = "stats",
+    srcs = ["stats.cc"],
     hdrs = ["stats.h"],
     deps = [
+        "@com_google_absl//absl/strings:str_format",
         "@com_google_absl//absl/time",
         "//ml_metadata/metadata_store:types",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
+ml_metadata_cc_test(
+    name = "stats_test",
+    size = "small",
+    srcs = ["stats_test.cc"],
+    deps = [
+        ":stats",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "benchmark",
+    srcs = ["benchmark.cc"],
+    hdrs = ["benchmark.h"],
+    deps = [
+        ":fill_types_workload",
+        ":workload",
+        "@com_google_absl//absl/memory",
+        "//ml_metadata/metadata_store:types",
+        "//ml_metadata/tools/mlmd_bench/proto:mlmd_bench_proto",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
+ml_metadata_cc_test(
+    name = "benchmark_test",
+    size = "small",
+    srcs = ["benchmark_test.cc"],
+    deps = [
+        "benchmark",
+        "@com_google_googletest//:gtest_main",
+        "//ml_metadata/metadata_store:test_util",
+        "//ml_metadata/tools/mlmd_bench/proto:mlmd_bench_proto",
+    ],
+)
+
+cc_library(
+    name = "thread_runner",
+    srcs = ["thread_runner.cc"],
+    hdrs = ["thread_runner.h"],
+    deps = [
+        ":benchmark",
+        ":stats",
+        ":workload",
+        "//ml_metadata/metadata_store:metadata_store",
+        "//ml_metadata/metadata_store:metadata_store_factory",
+        "//ml_metadata/metadata_store:types",
+        "//ml_metadata/proto:metadata_store_proto",
+        "//ml_metadata/tools/mlmd_bench/proto:mlmd_bench_proto",
+        "@org_tensorflow//tensorflow/core:lib",
+    ],
+)
+
+ml_metadata_cc_test(
+    name = "thread_runner_test",
+    size = "small",
+    srcs = ["thread_runner_test.cc"],
+    deps = [
+        ":benchmark",
+        ":stats",
+        ":thread_runner",
+        ":workload",
+        "@com_google_googletest//:gtest_main",
+        "//ml_metadata/metadata_store:metadata_store",
+        "//ml_metadata/metadata_store:metadata_store_factory",
+        "//ml_metadata/metadata_store:test_util",
+        "//ml_metadata/proto:metadata_store_service_proto",
+        "//ml_metadata/tools/mlmd_bench/proto:mlmd_bench_proto",
+        "@org_tensorflow//tensorflow/core:lib",
+        "@org_tensorflow//tensorflow/core:test",
     ],
 )
 
@@ -75,7 +152,7 @@ cc_library(
     hdrs = ["util.h"],
     deps = [
         "@com_google_absl//absl/types:variant",
-        "//ml_metadata/metadata_store",
+        "//ml_metadata/metadata_store:metadata_store",
         "//ml_metadata/proto:metadata_store_proto",
         "//ml_metadata/proto:metadata_store_service_proto",
         "//ml_metadata/tools/mlmd_bench/proto:mlmd_bench_proto",
@@ -89,7 +166,7 @@ cc_library(
     deps = [
         ":stats",
         "@com_google_absl//absl/time",
-        "//ml_metadata/metadata_store",
+        "//ml_metadata/metadata_store:metadata_store",
         "//ml_metadata/metadata_store:types",
         "@org_tensorflow//tensorflow/core:lib",
     ],
@@ -103,7 +180,7 @@ ml_metadata_cc_test(
         ":workload",
         #"@com_google_googletest//:gtest",
         "@com_google_googletest//:gtest_main",
-        "//ml_metadata/metadata_store",
+        "//ml_metadata/metadata_store:metadata_store",
         "//ml_metadata/metadata_store:metadata_store_factory",
         "//ml_metadata/metadata_store:types",
         "//ml_metadata/proto:metadata_store_proto",

diff --git a/ml_metadata/tools/mlmd_bench/benchmark.cc b/ml_metadata/tools/mlmd_bench/benchmark.cc
@@ -0,0 +1,55 @@
+/* Copyright 2020 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "ml_metadata/tools/mlmd_bench/benchmark.h"
+
+#include <vector>
+
+#include "absl/memory/memory.h"
+#include "ml_metadata/metadata_store/types.h"
+#include "ml_metadata/tools/mlmd_bench/fill_types_workload.h"
+#include "ml_metadata/tools/mlmd_bench/proto/mlmd_bench.pb.h"
+#include "ml_metadata/tools/mlmd_bench/workload.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace ml_metadata {
+namespace {
+
+// Creates the executable workload given `workload_config`.
+void CreateWorkload(const WorkloadConfig& workload_config,
+                    std::unique_ptr<WorkloadBase>& workload) {
+  if (!workload_config.has_fill_types_config()) {
+    LOG(FATAL) << "Cannot find corresponding workload!";
+  }
+  workload = absl::make_unique<FillTypes>(FillTypes(
+      workload_config.fill_types_config(), workload_config.num_operations()));
+}
+
+}  // namespace
+
+Benchmark::Benchmark(const MLMDBenchConfig& mlmd_bench_config) {
+  workloads_.resize(mlmd_bench_config.workload_configs_size());
+
+  // For each `workload_config`, calls CreateWorkload() to create corresponding
+  // workload.
+  for (int i = 0; i < mlmd_bench_config.workload_configs_size(); ++i) {
+    CreateWorkload(mlmd_bench_config.workload_configs(i), workloads_[i]);
+  }
+}
+
+WorkloadBase* Benchmark::workload(const int64 workload_index) {
+  return workloads_[workload_index].get();
+}
+
+}  // namespace ml_metadata
diff --git a/ml_metadata/tools/mlmd_bench/benchmark.h b/ml_metadata/tools/mlmd_bench/benchmark.h
@@ -0,0 +1,46 @@
+/* Copyright 2020 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef ML_METADATA_TOOLS_MLMD_BENCH_BENCHMARK_H
+#define ML_METADATA_TOOLS_MLMD_BENCH_BENCHMARK_H
+
+#include <vector>
+
+#include "ml_metadata/metadata_store/types.h"
+#include "ml_metadata/tools/mlmd_bench/proto/mlmd_bench.pb.h"
+#include "ml_metadata/tools/mlmd_bench/workload.h"
+
+namespace ml_metadata {
+
+// Contains a list of workloads to be executed by ThreadRunner.
+// The executable workloads are generated according to `mlmd_bench_config`.
+class Benchmark {
+ public:
+  Benchmark(const MLMDBenchConfig& mlmd_bench_config);
+  ~Benchmark() = default;
+
+  // Returns a particular executable workload given `workload_index`.
+  WorkloadBase* workload(int64 workload_index);
+
+  // Returns the number of executable workloads existed inside benchmark.
+  int64 num_workloads() const { return workloads_.size(); }
+
+ private:
+  // A list of executable workloads.
+  std::vector<std::unique_ptr<WorkloadBase>> workloads_;
+};
+
+}  // namespace ml_metadata
+
+#endif  // ML_METADATA_TOOLS_MLMD_BENCH_BENCHMARK_H
diff --git a/ml_metadata/tools/mlmd_bench/benchmark_test.cc b/ml_metadata/tools/mlmd_bench/benchmark_test.cc
@@ -0,0 +1,62 @@
+/* Copyright 2020 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "ml_metadata/tools/mlmd_bench/benchmark.h"
+
+#include <gtest/gtest.h>
+#include "ml_metadata/metadata_store/test_util.h"
+#include "ml_metadata/tools/mlmd_bench/proto/mlmd_bench.pb.h"
+
+namespace ml_metadata {
+namespace {
+
+// Tests the CreateWorkload() of Benchmark class.
+TEST(BenchmarkTest, CreatWorkloadTest) {
+  MLMDBenchConfig mlmd_bench_config =
+      testing::ParseTextProtoOrDie<MLMDBenchConfig>(
+          R"(
+            workload_configs: {
+              fill_types_config: {
+                update: false
+                specification: ARTIFACT_TYPE
+                num_properties: { minimum: 1 maximum: 10 }
+              }
+              num_operations: 100
+            }
+            workload_configs: {
+              fill_types_config: {
+                update: true
+                specification: EXECUTION_TYPE
+                num_properties: { minimum: 1 maximum: 10 }
+              }
+              num_operations: 500
+            }
+            workload_configs: {
+              fill_types_config: {
+                update: false
+                specification: CONTEXT_TYPE
+                num_properties: { minimum: 1 maximum: 10 }
+              }
+              num_operations: 300
+            }
+          )");
+  Benchmark benchmark(mlmd_bench_config);
+  // Checks that all workload configurations have transformed into executable
+  // workloads inside benchmark.
+  EXPECT_EQ(benchmark.num_workloads(),
+            mlmd_bench_config.workload_configs_size());
+}
+
+}  // namespace
+}  // namespace ml_metadata
diff --git a/ml_metadata/tools/mlmd_bench/stats.cc b/ml_metadata/tools/mlmd_bench/stats.cc
@@ -0,0 +1,86 @@
+/* Copyright 2020 Google LLC
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "ml_metadata/tools/mlmd_bench/stats.h"
+
+#include <vector>
+
+#include "absl/strings/str_format.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "ml_metadata/metadata_store/types.h"
+
+namespace ml_metadata {
+
+ThreadStats::ThreadStats()
+    : accumulated_elapsed_time_(absl::Nanoseconds(0)),
+      done_(0),
+      bytes_(0),
+      next_report_(100) {}
+
+void ThreadStats::Start() { start_ = absl::Now(); }
+
+void ThreadStats::Update(const OpStats& op_stats,
+                         const int64 approx_total_done) {
+  bytes_ += op_stats.transferred_bytes;
+  accumulated_elapsed_time_ += op_stats.elapsed_time;
+  done_++;
+  static const int report_thresholds[]{1000,   5000,   10000,  50000,
+                                       100000, 500000, 1000000};
+  int threshold_index = 0;
+  if (approx_total_done < next_report_) {
+    return;
+  }
+  // Reports the current progress with `approx_total_done`.
+  next_report_ += report_thresholds[threshold_index] / 10;
+  if (next_report_ > report_thresholds[threshold_index]) {
+    threshold_index++;
+  }
+  std::fprintf(stderr, "... finished %lld ops%30s\r", approx_total_done, "");
+  std::fflush(stderr);
+}
+
+void ThreadStats::Stop() { finish_ = absl::Now(); }
+
+void ThreadStats::Merge(const ThreadStats& other) {
+  // Accumulates done_, bytes_ and accumulated_elapsed_time_ of each thread
+  // stats.
+  done_ += other.done();
+  bytes_ += other.bytes();
+  accumulated_elapsed_time_ += other.accumulated_elapsed_time();
+  // Chooses the earliest start time and latest end time of each merged
+  // thread stats.
+  start_ = std::min(start_, other.start());
+  finish_ = std::max(finish_, other.finish());
+}
+
+void ThreadStats::Report(const std::string& specification) {
+  std::string extra;
+  if (bytes_ > 0) {
+    // Rate is computed on actual elapsed time (latest end time minus
+    // earliest start time of each thread) instead of the sum of per-thread
+    // elapsed times.
+    int64 elapsed_seconds = accumulated_elapsed_time_ / absl::Seconds(1);
+    std::string rate =
+        absl::StrFormat("%6.1f KB/s", (bytes_ / 1024.0) / elapsed_seconds);
+    extra = rate;
+  }
+  std::fprintf(
+      stdout, "%-12s : %11.3f micros/op;%s%s\n", specification.c_str(),
+      (double)(accumulated_elapsed_time_ / absl::Microseconds(1)) / done_,
+      (extra.empty() ? "" : " "), extra.c_str());
+  std::fflush(stdout);
+}
+
+}  // namespace ml_metadata