Skip to content

Commit

Permalink
Constraint Programming Approach
Browse files Browse the repository at this point in the history
  • Loading branch information
tirrolo committed Jun 5, 2015
1 parent f927383 commit 65ed7a6
Show file tree
Hide file tree
Showing 73 changed files with 2,261 additions and 572 deletions.
5 changes: 5 additions & 0 deletions dataPumper/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@
<version>1.14.0</version>
</dependency>

<dependency>
<groupId>it.unibz.inf.vig</groupId>
<artifactId>vig-cpsolver</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>it.unibz.inf.vig</groupId>
<artifactId>vig-mappings-analyzer</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import it.unibz.inf.data_pumper.column_types.DateTimeColumn;
import it.unibz.inf.data_pumper.column_types.IntColumn;
import it.unibz.inf.data_pumper.column_types.StringColumn;
import it.unibz.inf.data_pumper.column_types.exceptions.ValueUnsetException;

import java.util.ArrayList;
import java.util.Collections;
Expand All @@ -36,188 +35,182 @@
import org.apache.log4j.Logger;

public class Schema{
private List<ColumnPumper<? extends Object>> columns;
private final String tableName; // Final in order to avoid the well-known "mutability" problem with the <i>equals</i> method.
private List<ColumnPumper<? extends Object>> primaryKey;

// Fields related to the pumping
private boolean filledFlag; // It keeps the information whether this schema has been already pumped once
private int maxDupsRepetition;

private int originalSize;
private int numRowsOriginal;

private static Logger logger = Logger.getLogger(Schema.class.getCanonicalName());

public Schema(String tableName){
this.tableName = tableName;
columns = new ArrayList<ColumnPumper<? extends Object>>();
primaryKey = new ArrayList<ColumnPumper<? extends Object>>();
filledFlag = false;
maxDupsRepetition = 0;
originalSize = 0;
}

public int getOriginalSize(){
return originalSize;
}

public void setOriginalSize(int size){
this.originalSize = size;
}

public int getMaxDupsRepetition(){
return maxDupsRepetition;
}

public void setMaxDupsRepetition(int maxDupsRepetition){
this.maxDupsRepetition = maxDupsRepetition;
}

public void setFilled(){
filledFlag = true;
}

public boolean isFilled(){
return filledFlag;
}

public void addColumn(String colName, String typeString, int index){

if( typeString.startsWith("int") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, this));
else if( typeString.startsWith("smallint") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, this));
else if( typeString.startsWith("decimal") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, TypeStringParser.getFirstBinaryDatatypeSize(typeString), TypeStringParser.getSecondBinaryDatatypeSize(typeString), this));
else if( typeString.startsWith("double") )
columns.add(new BigDecimalColumn(colName, MySqlDatatypes.DOUBLE, index, this));
else if( typeString.startsWith("bigint") ) columns.add(new BigDecimalColumn(colName, MySqlDatatypes.BIGINT, index, this));
else if( typeString.startsWith("char") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, TypeStringParser.getUnaryDatatypeSize(typeString), this));
else if( typeString.startsWith("varchar") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, TypeStringParser.getUnaryDatatypeSize(typeString), this));
else if( typeString.startsWith("text") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, this));
else if( typeString.startsWith("longtext") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, this));
else if( typeString.startsWith("datetime") ) columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else if( typeString.startsWith("date") ) columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else if( typeString.startsWith("timestamp") )
columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else{
logger.error("SUPPORT FOR TYPE: "+ typeString +" IS MISSING.");
}
}

public ColumnPumper<? extends Object> getColumn(String colName){
for( ColumnPumper<? extends Object> col : columns ){
if( col.getName().equals(colName) )
return col;
}
return null;
}
/**
* Returns a list of all columns. Side-effects if the list is changed
* @return
*/
public List<ColumnPumper<? extends Object>> getColumns(){
return Collections.unmodifiableList(columns);
}
public String getTableName(){
return tableName;
}

public int getNumColumns(){
return columns.size();
}

public String toString(){
return tableName;
}
private List<ColumnPumper<? extends Object>> columns;
private final String tableName; // Final in order to avoid the well-known "mutability" problem with the <i>equals</i> method.
private List<ColumnPumper<? extends Object>> primaryKey;

public boolean isIndependent() {
// TODO Auto-generated method stub
return false;
}
@Override
public boolean equals(Object s){
if(! (s instanceof Schema) ) return false;

return this.getTableName().equals(((Schema)s).getTableName());
}

@Override
public int hashCode(){
return this.getTableName().hashCode();
// Fields related to the pumping
private boolean filledFlag; // It keeps the information whether this schema has been already pumped once
private int maxDupsRepetition;

private int originalSize;
private int numRowsOriginal;

private static Logger logger = Logger.getLogger(Schema.class.getCanonicalName());

public Schema(String tableName){
this.tableName = tableName;
columns = new ArrayList<ColumnPumper<? extends Object>>();
primaryKey = new ArrayList<ColumnPumper<? extends Object>>();
filledFlag = false;
maxDupsRepetition = 0;
originalSize = 0;
}

public int getOriginalSize(){
return originalSize;
}

public void setOriginalSize(int size){
this.originalSize = size;
}

public int getMaxDupsRepetition(){
return maxDupsRepetition;
}

public void setMaxDupsRepetition(int maxDupsRepetition){
this.maxDupsRepetition = maxDupsRepetition;
}

public void setFilled(){
filledFlag = true;
}

public boolean isFilled(){
return filledFlag;
}

public void addColumn(String colName, String typeString, int index){

if( typeString.startsWith("int") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, this));
else if( typeString.startsWith("smallint") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, this));
else if( typeString.startsWith("decimal") ) columns.add(new IntColumn(colName, MySqlDatatypes.INT, index, TypeStringParser.getFirstBinaryDatatypeSize(typeString), TypeStringParser.getSecondBinaryDatatypeSize(typeString), this));
else if( typeString.startsWith("double") )
columns.add(new BigDecimalColumn(colName, MySqlDatatypes.DOUBLE, index, this));
else if( typeString.startsWith("bigint") ) columns.add(new BigDecimalColumn(colName, MySqlDatatypes.BIGINT, index, this));
else if( typeString.startsWith("char") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, TypeStringParser.getUnaryDatatypeSize(typeString), this));
else if( typeString.startsWith("varchar") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, TypeStringParser.getUnaryDatatypeSize(typeString), this));
else if( typeString.startsWith("text") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, this));
else if( typeString.startsWith("longtext") ) columns.add(new StringColumn(colName, MySqlDatatypes.VARCHAR, index, this));
else if( typeString.startsWith("datetime") ) columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else if( typeString.startsWith("date") ) columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else if( typeString.startsWith("timestamp") )
columns.add(new DateTimeColumn(colName, MySqlDatatypes.DATETIME, index, this));
else{
logger.error("SUPPORT FOR TYPE: "+ typeString +" IS MISSING.");
}
}

public ColumnPumper<? extends Object> getColumn(String colName){
for( ColumnPumper<? extends Object> col : columns ){
if( col.getName().equals(colName) )
return col;
}
return null;
}
/**
* Returns a list of all columns. Side-effects if the list is changed
* @return
*/
public List<ColumnPumper<? extends Object>> getColumns(){
return Collections.unmodifiableList(columns);
}
public String getTableName(){
return tableName;
}

public int getNumColumns(){
return columns.size();
}

public String toString(){
return tableName;
}

public boolean isIndependent() {
// TODO Auto-generated method stub
return false;
}
@Override
public boolean equals(Object s){
if(! (s instanceof Schema) ) return false;

return this.getTableName().equals(((Schema)s).getTableName());
}

@Override
public int hashCode(){
return this.getTableName().hashCode();

}
public List<ColumnPumper<? extends Object>> getPk(){
if( primaryKey.size() == 0 ){

// INIT
for( ColumnPumper<? extends Object> c : columns ){
if( c.isPrimary() )
primaryKey.add(c);
}
}
return primaryKey;
}

public void sortColumnsAccordingToDupRatios() {
Collections.sort(columns, new Comparator<ColumnPumper<? extends Object>>() {
@Override
public int compare(ColumnPumper<? extends Object> c1, ColumnPumper<? extends Object> c2) { // Descending order

int result = 0;

result = c1.getDuplicateRatio() > c2.getDuplicateRatio() ? -1 :
c1.getDuplicateRatio() == c2.getDuplicateRatio() ? 0 : 1;

}
public List<ColumnPumper<? extends Object>> getPk(){
if( primaryKey.size() == 0 ){

// INIT
for( ColumnPumper<? extends Object> c : columns ){
if( c.isPrimary() )
primaryKey.add(c);
}
}
return primaryKey;
}

public void sortColumnsAccordingToDupRatios() {
Collections.sort(columns, new Comparator<ColumnPumper<? extends Object>>() {
@Override
public int compare(ColumnPumper<? extends Object> c1, ColumnPumper<? extends Object> c2) { // Descending order

int result = 0;

try{
result = c1.getDuplicateRatio() > c2.getDuplicateRatio() ? -1 :
c1.getDuplicateRatio() == c2.getDuplicateRatio() ? 0 : 1;
}catch(ValueUnsetException e){
e.printStackTrace();
// TODO CLOSE CONNECTIONS!
System.exit(1);
}

return result;
}
});
// Update the indexes
int index = 0;
for( ColumnPumper<? extends Object> cP : columns ){
cP.setIndex(++index);
}
}
return result;
}
});
// Update the indexes
int index = 0;
for( ColumnPumper<? extends Object> cP : columns ){
cP.setIndex(++index);
}
}

public void reset() {
for( ColumnPumper<? extends Object> cP : columns ){
cP.reset();
}
public void reset() {
for( ColumnPumper<? extends Object> cP : columns ){
cP.reset();
}
}

public void setNumRowsOriginal(int nRows) {
this.numRowsOriginal = nRows;
}
public int getNumRowsOriginal(){
return this.numRowsOriginal;
}
public void setNumRowsOriginal(int nRows) {
this.numRowsOriginal = nRows;
}

public int getNumRowsOriginal(){
return this.numRowsOriginal;
}

}
class TypeStringParser{

static int getFirstBinaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf("(") + 1;
int indexEnd = toParse.indexOf(",");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}

static int getSecondBinaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf(",") + 1;
int indexEnd = toParse.indexOf(")");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}

static int getUnaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf("(") + 1;
int indexEnd = toParse.indexOf(")");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}
static int getFirstBinaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf("(") + 1;
int indexEnd = toParse.indexOf(",");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}

static int getSecondBinaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf(",") + 1;
int indexEnd = toParse.indexOf(")");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}

static int getUnaryDatatypeSize(String toParse){
int indexStart = toParse.indexOf("(") + 1;
int indexEnd = toParse.indexOf(")");

return Integer.parseInt(toParse.substring(indexStart, indexEnd));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public BigDecimalColumn(String name, MySqlDatatypes type, int index, Schema sche
}

@Override
public void generateValues(Schema schema, DBMSConnection db) throws BoundariesUnsetException, ValueUnsetException, DebugException {
public void generateValues(Schema schema, DBMSConnection db) {

if( !this.firstIntervalSet ) throw new BoundariesUnsetException("fillFirstIntervalBoundaries() hasn't been called yet");

Expand All @@ -69,7 +69,7 @@ public void generateValues(Schema schema, DBMSConnection db) throws BoundariesUn
}

@Override
public void fillFirstIntervalBoundaries(Schema schema, DBMSConnection db) throws ValueUnsetException, SQLException, DebugException {
public void fillFirstIntervalBoundaries(Schema schema, DBMSConnection db) {

this.initNumDupsNullsFreshs();

Expand Down
Loading

0 comments on commit 65ed7a6

Please sign in to comment.