-
Notifications
You must be signed in to change notification settings - Fork 0
/
getFile.R
106 lines (86 loc) · 3.56 KB
/
getFile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#This work is co-funded by the EOSC-hub project (Horizon 2020) under Grant number 777536.
# Doron Goldfarb (doron DOT goldfarb AT umweltbundesamt DOT at)
# Johannes Kobler (johannes DOT kobler AT umweltbundesamt DOT at)
# Environment Agency Austria, 2020
getFile <- function(username, password, dav) {
pkgTest("curl")
pkgTest("stringdist")
h <- new_handle()
handle_setopt(h, username = username)
handle_setopt(h, password = password)
tmpzip=tempfile(, fileext="zip")
download.file(dav, destfile=tmpzip, mode="wb")
files=unzip(tmpzip, list=TRUE)
resultat=NULL
cnames=NULL
print (paste("Extract Files from", tmpzip))
for (file in files[files$Length>0,]$Name) {
print (file)
tab=read.table(unz(tmpzip,file), stringsAsFactors = F, sep=";")
resultat=rbind(resultat,tab[-1,])
cnames=append(cnames,paste(tab[1,], collapse=";"))
}
print ("Done extracting")
unlink(tmpzip)
if (sum(stringdistmatrix(cnames)) > 0) {
#different colnames encountered, bail out
return (0)
}
colnames(resultat)=unlist(strsplit(cnames[1], ";"))
print (paste("Set colnames to", colnames(resultat)))
vals=as.numeric(gsub(",",".",resultat$VALUE))
#write(vals, stderr())
resultat$VALUE=vals
tsdata <- resultat
#swap alternative aliases
if (!("SITECODE" %in% colnames(resultat)) & ("SITE_CODE" %in% colnames(resultat)))
tsdata$SITECODE=tsdata$SITE_CODE
if (!("FIELDNAME" %in% colnames(resultat)) & ("SUBST" %in% colnames(resultat)))
tsdata$FIELDNAME=tsdata$SUBST
if (!("VALUE" %in% colnames(resultat)) & ("VAL" %in% colnames(resultat)))
tsdata$VALUE=tsdata$VAL
if (!("HOUR" %in% colnames(resultat)) & ("SHOUR" %in% colnames(resultat)))
tsdata$HOUR=tsdata$SHOUR
if (!("DAY" %in% colnames(resultat)) & ("SDAY" %in% colnames(resultat)))
tsdata$DAY=tsdata$SDAY
if (!("MONTH" %in% colnames(resultat)) & ("SMONTH" %in% colnames(resultat)))
tsdata$MONTH=tsdata$SMONTH
if (!("YEAR" %in% colnames(resultat)) & ("SYEAR" %in% colnames(resultat)))
tsdata$YEAR=tsdata$SYEAR
if (!("MINUTE" %in% colnames(resultat)) & ("SMINUTE" %in% colnames(resultat)))
tsdata$MINUTE=tsdata$SMINUTE
if (!("SECOND" %in% colnames(resultat)) & ("SSECOND" %in% colnames(resultat)))
tsdata$SECOND=tsdata$SSECOND
if (("DATE" %in% colnames(resultat)) & (!("DAY" %in% colnames(resultat))) & (!("MONTH" %in% colnames(resultat))) & (!("YEAR" %in% colnames(resultat)))) {
dt1=as.POSIXlt(tsdata$DATE, format="%Y%m%d%H%M%S", tz="UTC")
if ((sum(is.na(dt1))>0))
dt1=as.POSIXlt(tsdata$DATE, format="%Y%m%d", tz="UTC")
if ((sum(is.na(dt1))>0))
dt1=as.POSIXlt(tsdata$DATE, format="%Y-%m-%d %H:%M:%S", tz="UTC")
if (sum(is.na(dt1))==0) {
tsdata$YEAR=dt1$year+1900
tsdata$MONTH=dt1$mon+1
tsdata$DAY=dt1$mday
tsdata$HOUR=dt1$hour
tsdata$MINUTE=dt1$min
tsdata$SECOND=dt1$sec
}
}
if (!("YEAR" %in% colnames(resultat)) || !("MONTH" %in% colnames(resultat)) || !("DAY" %in% colnames(resultat)))
return(NULL)
if (!("HOUR" %in% colnames(resultat)))
tsdata$HOUR=0
if (!("MINUTE" %in% colnames(resultat)))
tsdata$MINUTE=0
if (!("SECOND" %in% colnames(resultat)))
tsdata$SECOND=0
if (!("RID" %in% colnames(resultat)))
tsdata$RID=""
tsdata=tsdata[,c("SITECODE","VALUE","FIELDNAME","RID","DAY","MONTH","YEAR", "HOUR", "MINUTE", "SECOND")]
# colnames(tsdata) <- c("SITECODE","VALUE","FIELDNAME","RID","DAY","MONTH","YEAR", "HOUR", "MIN", "SEC")
rownames(tsdata) <- seq(1,nrow(tsdata),1)
tsdata$RID <- NULL
tsdata$FIELDNAME <- paste(tsdata$SITECODE, tsdata$FIELDNAME, sep="_")
tsdata$SITECODE <- NULL
return(tsdata)
}