-
Notifications
You must be signed in to change notification settings - Fork 0
/
dummytable.m
107 lines (92 loc) · 3.31 KB
/
dummytable.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
function Tdummy = dummytable(T)
% Tdummy = dummytable(T) - convert categorical variables in table to dummy
% variables
%
% This functions takes the categorical variables in a table and converts
% them to separate dummy variables with intelligent names. This way they
% can be used in the Classification Learner App and the variable names make
% sense for feature selection, etc.
%
% Usage:
%
% Tdummy = dummytable(T)
%
% Inputs:
%
% T: Table with categoricals or categorical variable
%
% Outputs:
%
% Tdummy: T with categorical variables turned into dummy variables with
% intelligent names
%
% Example:
%
% % Simple Table
% T = table(rand(10,1),categorical(cellstr('rbbgbgbbgr'.')),...
% 'VariableNames',{'Percent','Color'});
% disp(T)
%
% % Turn it into a dummy table
% Tdummy = dummytable(T);
% disp(Tdummy)
%
% See Also: dummyvar, table, categorical, classificationLearner
% Copyright 2017 The MathWorks, Inc.
% Sean de Wolski Apr 13, 2014
% Error checking
narginchk(1,1)
validateattributes(T,{'categorical', 'table'},{},mfilename,'T',1);
% If it's a categorical, do out best to convert it to a table with an
% intelligent variable name
if iscategorical(T)
% Try to use existing variable name
cname = inputname(1);
if isempty(cname)
% It's a MATLAB Expression, default to Var1
cname = 'Var1';
end
T = table(T,'VariableNames',{cname});
end
% Identify categoricals and their names
cats = varfun(@iscategorical,T,'OutputFormat','uniform');
% Short circuit if there are no categoricals
if ~any(cats)
Tdummy = T;
return
end
% Store everything in a cell. w will be the total width of the table
% with each variable dummyvar'd
w = nnz(~cats)+sum(varfun(@(x)numel(categories(x)),T(:,cats),'OutputFormat','uniform'));
% Preallocate storage
datastorage = cell(1,w);
namestorage = cell(1,w);
% Engine
idx = 0; % Start nowhere in cell
for ii = 1:width(T)
idx = idx+1;
% Loop over table deciding what to do with each variable
if cats(ii)
% It's a categorical,
% Extract it and build keep its categories and dummyvar
Tii = T{:,ii};
categoriesii = categories(Tii)';
ncatii = numel(categoriesii); % How many?
% Build dummy var as a row cell with columns in each
dvii = num2cell(dummyvar(Tii), 1); % Dummy var then cell
% Build names
namesii = strcat(T.Properties.VariableNames{ii}, '_', categoriesii);
% Insert
datastorage(idx:(idx+ncatii-1)) = dvii;
namestorage(idx:(idx+ncatii-1)) = namesii;
% Increment
idx = idx+ncatii-1;
else
% Extract non categorical into current storage location
datastorage{idx} = T{:,ii};
namestorage(idx) = T.Properties.VariableNames(ii);
end
end
% Build Tdummy with comma separated list expansion
Tdummy = table(datastorage{:},'VariableNames',matlab.lang.makeValidName(namestorage));
end