Skip to content

Latest commit

 

History

History
313 lines (291 loc) · 8.04 KB

1.getdata.md

File metadata and controls

313 lines (291 loc) · 8.04 KB

1. First get data from html.

function key_na_by_sample(){
  key_replace='sample';
}
function key_na_by_feature(){
  key_replace='feature';
}
function key_replace_with_mean(){
  key_replace='mean';
}
function key_replace_with_median(){
  key_replace='median';  
}
//get data;
var data= getinforvalue('training_decision');
var label=getinforlabel('label_decision');
var label_unique=sort_unique(label);
//console.log(label_unique);
//merge_matrix()


merge_array=merge_matrix(data,label);
//console.log(merge_array);
merge_array=shufflearray(merge_array);
//console.log(merge_array);
//
var train_range_ratio=[0,1];
var validate_range_ratio=[0,1];
var train_range=[];
var validate_range=[];
var number=parseInt(train_range_ratio[1]*(data.length)) ;

//train_range_ratio[1]*(data.length);
train_range.push(parseInt(train_range_ratio[0]*(data.length)));
train_range.push(parseInt(train_range_ratio[1]*(data.length)));
//
validate_range.push(parseInt(validate_range_ratio[0]*(data.length)));
validate_range.push(parseInt(validate_range_ratio[1]*(data.length)));

//
var train=generate_train_validate_data(label_unique,train_range,merge_array);
trainingData =train[0];
trainingLabel=train[1];
var valide=generate_train_validate_data(label_unique,validate_range,merge_array);
validateData=valide[0];
validateLabel=valide[1];

getinforvalue(trainid)

Use the div id get the data.

function getinforvalue(trainid){
  //var trainid='training_decision';
  //get the html data
  /*
  1. datacontents is test the data from box or upload file. If the upload file was found, it would be first used.
  2. if the file is empty, we will use the box value.
  */ 
  if(typeof datacontents==="undefined"){
    var decision_data=document.getElementById(trainid).value;
  }else if(datacontents.length>0){
    var decision_data=datacontents;
  }else{
    var decision_data=document.getElementById(trainid).value;
  }
  var test_feature_name=document.getElementById('feature_name').checked;
  
  //decision_label=document.getElementById("label_decision").value;

  //transforme data formate.
  var data=decision_data.split('\n');
  var dataarr=[];
  var dataarr2=[];
  feature_name_array=[];
  /*
  If the upload file was not cantained the name of features.
  We will use the number to represent the each of features. 
  */
  if(!test_feature_name){
    for(var i=0;i<data[0].length;i++)
    {
      feature_name_array.push(i);
    }
  }

  for(var i=0;i<data.length;i++){
    var row=[];
    /*
    If the name of header is provided, and the first line will be treated like name array.
    Other will betreat like the numbers.
    */
   if(i==0 & test_feature_name)
   {
    row=data[i].split(',').map(function(item) 
      {
      return item;
      });
   }else{
    row=data[i].split(',').map(function(item) 
      {
      return parseFloat(item);
      });
   }
  /*
  The header will be given to the feature name array.
  Other data will be put into the dataarr.
  */
    if(i==0 & test_feature_name)
    {
      feature_name_array=row;
    }else
    {
      dataarr.push(row);
      //
    }
  }
  //console.log(dataarr);
  if(typeof(key_replace)=="undefined")
  {
    return dataarr;
  }else if(key_replace=="mean") 
  {
    var array_mean=dealwithna_replace_with_mean(dataarr);
    return array_mean;
  }else if(key_replace=="median") 
  {
    var array_median=dealwithna_replace_with_median(dataarr);
    return array_median;
  }else if(key_replace=="sample")
  {
    var array_sample=dealwithna_delete_by_sample(dataarr);
    return array_sample;
  }else if(key_replace=="feature")
  {
    var array_feature=dealwithna_delete_by_feature(dataarr);
    var feature_name_array_new=[];
    for(var i=0;i<feature_keep.length;i++)
    {
      feature_name_array_new.push(feature_name_array[feature_keep[i]]);
    }
    feature_name_array=feature_name_array_new;
    return array_feature;
  }
  
}
// getinforvalue('training_decision')
// a like
// [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1]
// b (3) [Array(800), Array(800), Array(800)]
// [[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1],...]

getinforlabel(lableid)

Use the div id get the label data.

function getinforlabel(){
  //get the html data
  //decision_data=document.getElementById("training_decision").value;
  var lableid='label_decision';
  if(typeof labelcontents==="undefined"){
    var decision_label=document.getElementById(lableid).value;
  }
  else if(labelcontents.length>0){
    var decision_label=labelcontents;
  }else{
    var decision_label=document.getElementById(lableid).value;
  }


  //transforme data formate.

  var decision_label_a=decision_label.split('\n').map(function(item) {
	return parseFloat(item);
  })
  //console.log(decision_label_a);
  if(typeof(key_replace)=="undefined")
  {
    return decision_label_a;

  }else if(key_replace=="sample")
  {
    var decision_label_new=[];
    for(var i=0;i<sample_keep.length;i++)
    {
      decision_label_new.push(decision_label_a[sample_keep[i]]);
    }
    return decision_label_new;
  } else
  {
    return decision_label_a;
  }
  
}

3. Merge data and label to one matrix.
merge_matrix(data,label)

Make two array to one array.

function merge_matrix(a2,b2){
  a=JSON.parse(JSON.stringify(a2));
  b=JSON.parse(JSON.stringify(b2));
  var c=[];
for(var i=0;i<a.length;i++){

  a[i].push(b[i]);
  c.push(a[i]);
}
return c;
}

4. Shuffle array to generate random array.
shufflearray(merge_array)

shuffle array and return the shuffled array.

merge_array=shufflearray(merge_array);
function shufflearray(a2) {
  var a=JSON.parse(JSON.stringify(a2));
    var j, x, i;
    for (i = a.length - 1; i > 0; i--) {
        j = Math.floor(Math.random() * (i + 1));
        x = a[i];
        a[i] = a[j];
        a[j] = x;
    }
    return a;
}

5. Set train and validate range. like train 0-0.8, validate: 0.8-1


Set the train data range. like train_range_ratio=[0,0.8]; then, calculate the range the start number and end number.

var train_range_ratio=[0,0.8];
var validate_range_ratio=[0.8,1];
var train_range=[];
var validate_range=[];
var number=parseInt(train_range_ratio[1]*(data.length)) ;

//train_range_ratio[1]*(data.length);
train_range.push(parseInt(train_range_ratio[0]*(data.length)));
train_range.push(parseInt(train_range_ratio[1]*(data.length)));
//
validate_range.push(parseInt(validate_range_ratio[0]*(data.length)));
validate_range.push(parseInt(validate_range_ratio[1]*(data.length)));

6. Use the partion of the set and generate train and validate.
generate_train_validate_data(label_unique,train_range,merge_array)

give the label unique array, and train data range, and the merge array. It will return the relative data array.

//
var train=generate_train_validate_data(label_unique,train_range,merge_array);
var trainingData =train[0];
var trainingLabel=train[1];
var valide=generate_train_validate_data(label_unique,validate_range,merge_array);
var validateData=valide[0];
var validateLabel=valide[1];

function generate_train_validate_data(label_unique,train_range,merge_array)
{
var trainingData=[];
var trainingLabel=[];
var number_train=0;
if(label_unique.length>1)
  {
    for(var j=0;j<label_unique.length;j++)
    {
      trainingData.push([label_unique[j],[]]);
      trainingLabel.push([label_unique[j],[]]);
      for(var i=train_range[0]; i < train_range[1]; i++)
      {
        var row = [];
        for(var k=0;k<merge_array[i].length-1;k++)
          {
            row.push(merge_array[i][k]);
          }
        var label=0;
        if(merge_array[i][merge_array[i].length-1]==label_unique[j])
          {
            
            label=1;
          }else
          {
            label=0;
          }
        row.push(label);
        trainingData[j][1].push(row);
        trainingLabel[j][1].push(label);
      }
    }
  }
return [trainingData,trainingLabel];
}

merge_matrix(a,b)

merge data into array.

function merge_matrix(a2,b2){
  a=JSON.parse(JSON.stringify(a2));
  b=JSON.parse(JSON.stringify(b2));
  var c=[];
for(var i=0;i<a.length;i++){

  a[i].push(b[i]);
  c.push(a[i]);
}
return c;
}