-
Notifications
You must be signed in to change notification settings - Fork 0
/
ReadingData.R
executable file
·88 lines (77 loc) · 2.48 KB
/
ReadingData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library("openxlsx")
#dftest <- "C:/Users/Konstantin/Desktop/Uni/6Semester/BachelorArbeit/BeispielDaten/metabExampleMaleFemale.xlsx"
#df <- read.xlsx("C:/Users/Konstantin/Desktop/Uni/6Semester/BachelorArbeit/BeispielDaten/QMDiab_metabolomics_Preprocessed.xlsx", sheet = 1)
readFile <- function(filePath, fileSheet = 1){
ext <- tools::file_ext(filePath)
if (ext == "xlsx"){
df <- read.xlsx(filePath, sheet = fileSheet)
} else if (ext == "tsv"){
df <- read.csv(filePath,sep = "\t")
} else {
df <- read.csv(filePath)
}
#df <- df[complete.cases(df),]
id_col <- 0
need_id <- TRUE
metab_start <- 1
metab_end <- 1
metab_start_final <- 1
metab_end_final <- 1
#iterating through all columns
for (col in seq_len(ncol(df))){
#if column is unique and we have not found and id column yet
if (need_id&&length(df[[col]])==length(unique(df[[col]]))){
id_col<-col
need_id<-FALSE
}
if (typeof(df[[col]])=="double"){
#append the current metabolite interval if the last column was a double value as well
if (col==metab_end+1){
metab_end<-col
}
else {
#if longer than current longest metabolites
if (metab_end-metab_start>metab_end_final-metab_start_final){
metab_start_final<-metab_start
metab_end_final<-metab_end
}
metab_start<-col
metab_end<-col
}
}
#columns start with a non-double column, otherwise it would be included in first column interval
else if (col==1){
metab_start <- 0
metab_end <- -1
metab_start_final <- 0
metab_end_final <- -1
}
}
#check once at the end
if (metab_end-metab_start>metab_end_final-metab_start_final){
metab_start_final<-metab_start
metab_end_final<-metab_end
}
if (id_col==0){
stop("NO ID COLUMN FOUND")
}
if (id_col>=metab_start_final&&id_col<=metab_end_final){
stop("ID COLUMN FOUND IN METABOLITES")
}
print("Done reading the file")
#metab <- df[c(metab_start_final:metab_end_final)]
#pheno <- df[-c(metab_start_final:metab_end_final)]
#return(list(metab,pheno))
return (list(values = df,id = id_col,metab_start = metab_start_final,metab_end = metab_end_final))
}
readPhenoFile <- function(filePath, fileSheet = 1){
ext <- tools::file_ext(filePath)
if (ext == "xlsx"){
df <- read.xlsx(filePath, sheet = fileSheet)
} else if (ext == "tsv"){
df <- read.csv(filePath,sep = "\t")
} else {
df <- read.csv(filePath)
}
return (df)
}