Skip to content

Commit

Permalink
update table
Browse files Browse the repository at this point in the history
  • Loading branch information
mjbench committed Jul 7, 2024
1 parent 8925fe4 commit d265ee6
Show file tree
Hide file tree
Showing 2 changed files with 293 additions and 355 deletions.
230 changes: 42 additions & 188 deletions javascript/benmark_table.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,194 +102,48 @@ var barColorFn = function (value, formatterParams) {

document.addEventListener('DOMContentLoaded', function () {
Promise.all([
fetch('website/data/benchmark.json').then(response => response.json()),
fetch('website/data/feedback_comparison.json').then(response => response.json()),
fetch('website/data/eurus_code_sr_vs_k_series.json').then(response => response.json()),
fetch('website/data/eurus_math_sr_vs_k_series.json').then(response => response.json())
fetch('website/data/my_benchmark.json').then(response => response.json()),
// Add other fetch calls if necessary
])
.then(([
benchmark_tabledata,
benchmark_feedback_efficancy_tabledata,
eurus_code_sr_vs_k_series,
eurus_math_sr_vs_k_series]) => {

// 1. Benchmark Table
benchmark_tabledata.forEach(row => {
row.line = [row['1'], row['2'], row['3'], row['4'], row['5']]
})

var table = new Tabulator("#benchmark-table", {
data: benchmark_tabledata,
layout: "fitColumns",
responsiveLayout: "collapse",
movableColumns: false,
initialSort: [
{ column: "5", dir: "desc" },
],
columnDefaults: {
tooltip: true,
},
columns: [
{ title: "Model Family", field: "model", widthGrow: 1.5, minWidth: 180 },
{ title: "Size", field: "size", widthGrow: 0.9, minWidth: 60 },
{ title: "Type", field: "type", widthGrow: 0.9, minWidth: 60 },
{//create column group
title: "Tool-augmented Task-Solving Success Rate (within k turns)",
columns: [
{ title: "k = 1", field: "1", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 2", field: "2", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 3", field: "3", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 4", field: "4", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 5", field: "5", sorter: "number", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Slope", field: "Slope", sorter: "number", minWidth: 90 },
],
},
{
title: "Ability to Leverage Language Feedback",
columns: [
{
title: "k = 5 (+Feedback)", field: "Success Rate (5 turn) w\/ GPT-4 Feedback",
hozAlign: "center", formatter: colorFormatter,
widthGrow: 1.7,
minWidth: 180,
},
{
title: "ΔFeedback", field: "Delta Feedback",
widthGrow: 1.5,
minWidth: 80
},
],
},
],
});


var eurus_code_table = new Tabulator("#eurus-code-table", {
data: eurus_code_sr_vs_k_series,
layout: "fitColumns",
responsiveLayout: "collapse",
movableColumns: false,
initialSort: [
{ column: "5", dir: "desc" },
],
columnDefaults: {
tooltip: true,
},
columns: [
{ title: "Model Family", field: "model", widthGrow: 2, minWidth: 180 },
{ title: "Size", field: "size", widthGrow: 0.9, minWidth: 60 },
{ title: "Type", field: "type", widthGrow: 0.9, minWidth: 60 },
{//create column group
title: "Tool-augmented Task-Solving Success Rate (within k turns, code subset)",
columns: [
{ title: "k = 1", field: "1", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 2", field: "2", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 3", field: "3", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 4", field: "4", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 5", field: "5", sorter: "number", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Slope", field: "Slope", sorter: "number", minWidth: 90 },
],
},
],
});

var eurus_math_table = new Tabulator("#eurus-math-table", {
data: eurus_math_sr_vs_k_series,
layout: "fitColumns",
responsiveLayout: "collapse",
movableColumns: false,
initialSort: [
{ column: "5", dir: "desc" },
],
columnDefaults: {
tooltip: true,
},
columns: [
{ title: "Model Family", field: "model", widthGrow: 2, minWidth: 180 },
{ title: "Size", field: "size", widthGrow: 0.9, minWidth: 60 },
{ title: "Type", field: "type", widthGrow: 0.9, minWidth: 60 },
{//create column group
title: "Tool-augmented Task-Solving Success Rate (within k turns, math subset)",
columns: [
{ title: "k = 1", field: "1", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 2", field: "2", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 3", field: "3", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 4", field: "4", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "k = 5", field: "5", sorter: "number", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Slope", field: "Slope", sorter: "number", minWidth: 90 },
],
},
],
});

// 2. Benchmark Feedback Efficancy Table
benchmark_feedback_efficancy_tabledata.forEach(row => {
row.model = row.feedback_provider_info.model;
row.size = row.feedback_provider_info.size;
row.type = row.feedback_provider_info.type;
})

var feedback_efficacy_table = new Tabulator("#benchmark-feedback-efficancy-table", {
data: benchmark_feedback_efficancy_tabledata,
layout: "fitColumns",
// responsiveLayout: "collapse",
responsiveLayoutCollapseFormatter: function (data) {
//data - an array of objects containing the column title and value for each cell
var list = document.createElement("ul");

data.forEach(function (col) {
console.log(col);
let item = document.createElement("li");
item.innerHTML = "<strong>" + col.title + "</strong> - " + col.value;
list.appendChild(item);
});

return Object.keys(data).length ? list : "";
},
movableColumns: false,
initialSort: [
{ column: "evaluated_LLM_feedback", dir: "desc" },
],
columnDefaults: {
tooltip: true,
},
columns: [
{
title: "Feedback Provider",
columns: [
{ title: "Model Family", field: "model", widthGrow: 1, minWidth: 180 },
{ title: "Size", field: "size", minWidth: 90 },
{ title: "Type", field: "type", minWidth: 90 },
]
},
{
title: "&Delta; Task Success Rate compared to GPT-3.5",
field: "SR5_difference",
formatter: "progress",
sorter: "number",
minWidth: 400,
widthGrow: 3,
formatterParams: {
min: -50, max: 50,
legend: true,
color: barColorFn,
},
},
{
title: "&Delta; GPT-3.5 Success Rate with Provided Feedback",
field: "evaluated_LLM_feedback",
sorter: "number",
formatter: "progress",
minWidth: 400,
widthGrow: 3,
formatterParams: {
min: -30, max: 30,
legend: true,
color: barColorFn
},
},
]
});
.then(([my_benchmark_data]) => {
// Process your benchmark data as needed
my_benchmark_data.forEach(row => {
row.line = [
row['alignment_avg_with_tie'],
row['alignment_avg_without_tie'],
row['safety_avg_with_tie'],
row['safety_avg_without_tie'],
row['artifact_avg_with_tie'],
row['artifact_avg_without_tie'],
row['bias_acc'],
row['bias_nds'],
row['bias_ges']
];
});

})
var table = new Tabulator("#benchmark-table", {
data: my_benchmark_data,
layout: "fitColumns",
responsiveLayout: "collapse",
movableColumns: false,
initialSort: [
{ column: "alignment_avg_without_tie", dir: "desc" },
],
columnDefaults: {
tooltip: true,
},
columns: [
{ title: "Model", field: "model", widthGrow: 1.5, minWidth: 180 },
{ title: "Alignment Avg w/ Tie", field: "alignment_avg_with_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Alignment Avg w/o Tie", field: "alignment_avg_without_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Safety Avg w/ Tie", field: "safety_avg_with_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Safety Avg w/o Tie", field: "safety_avg_without_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Artifact Avg w/ Tie", field: "artifact_avg_with_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Artifact Avg w/o Tie", field: "artifact_avg_without_tie", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Bias ACC", field: "bias_acc", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Bias NDS", field: "bias_nds", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
{ title: "Bias GES", field: "bias_ges", hozAlign: "center", formatter: colorFormatter, minWidth: 90 },
],
});
});
});
Loading

0 comments on commit d265ee6

Please sign in to comment.