-
Notifications
You must be signed in to change notification settings - Fork 11
/
AC_ValueSet_fromFileColumns.groovy
117 lines (98 loc) · 3.03 KB
/
AC_ValueSet_fromFileColumns.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*** BEGIN META {
"name" : "AC_ValueSet_fromFileColumns",
"comment" : "Returns the set of unique values in user-defined columns of tabular artifacts",
"parameters" : [ 'vSearchSpace','vXtension','vValueColumns','vValueSetPath'],
"core": "1.596",
"authors" : [
{ name : "Ioannis K. Moutsatsos" }
]
} END META**/
import groovy.io.FileType
artifactEndFilter=vXtension
inputFileUrls=[]
valueColumns=vValueColumns
def artifactUrls=[]
def unoOptions=[]
searchSpace=new File(vSearchSpace)
searchSpace.traverse(
type:FileType.FILES,
nameFilter:~/.*\.$artifactEndFilter/,
maxDepth:0
){f-> inputFileUrls.add(f)}
inputFileUrls.each{
println it.name
}
/* separator is defined as a regular expression so that we can catch rogue commans inside quotatio marks
* http://stackoverflow.com/questions/1757065/splitting-a-comma-separated-string-but-ignoring-commas-in-quotes
* */
def separator = ",(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*\$)"
def valueList=valueColumns.split(separator)
def csvValues=[] //the list of csv values
def listOnUniq = [] //helper list for identifying unique rows
/*
Read CSV List and index the positions of interest
Create lists to hold columns of interest
*/
inputFileUrls.each {
it.withReader{reader->
columnSet = reader.readLine().split(separator)
valueIndex = createIndexIntoList(valueList.toList(), columnSet.toList())
/*Read the CSV file lines
construct a multi-value map from each row. Each value is keyed to the header column
*/
lineCount = 0
it.eachLine { l ->
line = l.split(separator)
if (line.size()==columnSet.size()){ //check for complete record
uniqHelper = []
valueIndex.each {vi->
uniqHelper = uniqHelper + line[vi]
}
if (lineCount >= 1) {
listOnUniq.add(uniqHelper)
}
lineCount++
} //end check for complete record
else{
println "Skipping Incomplete record: $line"
}
}
} //end each reader
}
/* create the unique combinations and assign them to the uno-choice list to be returned to the UI */
listOnUniq.flatten().unique().each {u->
unoOptions.add(u)
}
/*
unoOptions.sort().each{
println it
}
*/
if(binding.variables.containsKey("vValueSetPath")){
writeValueSet(vValueSetPath,vValueColumns.replace(',','_'), unoOptions)
println 'Saved File'
}
return unoOptions//.sort()
/*
Find index of a list members in another list
For example if source[A,D,F] and target[A,B,C,D,E,F,G] we want to return sourceIndex[0,3,5]
*/
def createIndexIntoList(List source, List target) {
def indexList = []
source.each { s ->
indexList.add(target.indexOf(s))
}
//need to remove an index of -1 which means it was not found!
return indexList-[-1]
}
//writes the unoOptions list to the provided path
def writeValueSet(path='D:/TEMP/valueSet.csv', header, options){
output=new File("$path")
outputWriter=output.newWriter(false)
outputWriter<<header+',NAME'+'\n'
(options-['null']).eachWithIndex{v,ind->
outputWriter<<"${ind+1},$v\n"
}
outputWriter.flush()
outputWriter.close()
}