Skip to content

Commit

Permalink
Add more arguments to the regression tools
Browse files Browse the repository at this point in the history
  • Loading branch information
briling committed Feb 21, 2024
1 parent 58b7c4d commit 99e9e5f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 25 deletions.
29 changes: 21 additions & 8 deletions qstack/regression/final_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,34 @@
from qstack.regression.kernel_utils import get_kernel, defaults
from qstack.tools import correct_num_threads

def final_error(X, y, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel, test_size=defaults.test_size, save_alpha=None):
def final_error(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta, akernel=defaults.kernel,
test_size=defaults.test_size,
random_state=defaults.random_state,
return_pred=False, save_alpha=None):
"""
.. todo::
Write the docstring
"""
kernel = get_kernel(akernel)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
K_all = kernel(X_train, X_train, 1.0/sigma)
Ks_all = kernel(X_test, X_train, 1.0/sigma)
if read_kernel is False:
kernel = get_kernel(akernel)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
K_all = kernel(X_train, X_train, 1.0/sigma)
Ks_all = kernel(X_test, X_train, 1.0/sigma)
else:
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=random_state)
K_all = X[np.ix_(idx_train,idx_train)]
Ks_all = X[np.ix_(idx_test, idx_train)]
K_all[np.diag_indices_from(K_all)] += eta
alpha = scipy.linalg.solve(K_all, y_train, assume_a='pos')
y_kf_predict = np.dot(Ks_all, alpha)
aes = np.abs(y_test-y_kf_predict)
if save_alpha: np.save(save_alpha, alpha)
return aes
if save_alpha:
np.save(save_alpha, alpha)
if return_pred:
return aes, y_kf_predict
else:
return aes

def main():
import sys
Expand All @@ -34,12 +46,13 @@ def main():
parser.add_argument('--kernel', type=str, dest='kernel', default=defaults.kernel, help='kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
parser.add_argument('--save-alpha', type=str, dest='save_alpha', default=None, help='file to write the regression coefficients to (default None)')
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
parser.add_argument('--random_state', type=int, dest='random_state', default=defaults.random_state, help='random state for test / train splitting')
args = parser.parse_args()
print(vars(args))
if(args.ll): correct_num_threads()
X = np.load(args.repr)
y = np.loadtxt(args.prop)
aes = final_error(X, y, sigma=args.sigma, eta=args.eta, akernel=args.kernel, test_size=args.test_size, save_alpha=args.save_alpha)
aes = final_error(X, y, sigma=args.sigma, eta=args.eta, akernel=args.kernel, test_size=args.test_size, save_alpha=args.save_alpha, random_state=random_state)
np.savetxt(sys.stdout, aes, fmt='%e')

if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion qstack/regression/kernel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def __call__(self, parser, namespace, values, option_string=None):
train_size=[0.125, 0.25, 0.5, 0.75, 1.0],
etaarr=list(numpy.logspace(-10, 0, 5)),
sigmaarr=list(numpy.logspace(0,6, 13)),
sigmaarr_mult=list(numpy.logspace(0,2, 5))
sigmaarr_mult=list(numpy.logspace(0,2, 5)),
random_state=0,
)


Expand Down
34 changes: 18 additions & 16 deletions qstack/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
akernel=defaults.kernel, gkernel=defaults.gkernel, gdict=defaults.gdict,
test_size=defaults.test_size, train_size=defaults.train_size, n_rep=defaults.n_rep,
random_state=defaults.random_state,
sparse=None, debug=False):
"""
Expand All @@ -19,11 +20,11 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
"""
if read_kernel is False:
kernel = get_kernel(akernel, [gkernel, gdict])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
K_all = kernel(X_train, X_train, 1.0/sigma)
Ks_all = kernel(X_test, X_train, 1.0/sigma)
else:
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=0)
idx_train, idx_test, y_train, y_test = train_test_split(np.arange(len(y)), y, test_size=test_size, random_state=random_state)
K_all = X[np.ix_(idx_train,idx_train)]
Ks_all = X[np.ix_(idx_test, idx_train)]

Expand Down Expand Up @@ -68,20 +69,21 @@ def regression(X, y, read_kernel=False, sigma=defaults.sigma, eta=defaults.eta,
def main():
import argparse
parser = argparse.ArgumentParser(description='This program computes the learning curve.')
parser.add_argument('--x', type=str, dest='repr', required=True, help='path to the representations file')
parser.add_argument('--y', type=str, dest='prop', required=True, help='path to the properties file')
parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction (default='+str(defaults.test_size)+')')
parser.add_argument('--eta', type=float, dest='eta', default=defaults.eta, help='eta hyperparameter (default='+str(defaults.eta)+')')
parser.add_argument('--sigma', type=float, dest='sigma', default=defaults.sigma, help='sigma hyperparameter (default='+str(defaults.sigma)+')')
parser.add_argument('--akernel', type=str, dest='akernel', default=defaults.kernel, help='local kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, help='global kernel type (avg for average kernel, rem for REMatch kernel) (default '+str(defaults.gkernel)+')')
parser.add_argument('--gdict', nargs='*', action=ParseKwargs, dest='gdict', default=defaults.gdict, help='dictionary like input string to initialize global kernel parameters')
parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits (default='+str(defaults.n_rep)+')')
parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions')
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug')
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
parser.add_argument('--readkernel', action='store_true', dest='readk', default=False, help='if X is kernel')
parser.add_argument('--sparse', type=int, dest='sparse', default=None, help='regression basis size for sparse learning')
parser.add_argument('--x', type=str, dest='repr', required=True, help='path to the representations file')
parser.add_argument('--y', type=str, dest='prop', required=True, help='path to the properties file')
parser.add_argument('--test', type=float, dest='test_size', default=defaults.test_size, help='test set fraction (default='+str(defaults.test_size)+')')
parser.add_argument('--eta', type=float, dest='eta', default=defaults.eta, help='eta hyperparameter (default='+str(defaults.eta)+')')
parser.add_argument('--sigma', type=float, dest='sigma', default=defaults.sigma, help='sigma hyperparameter (default='+str(defaults.sigma)+')')
parser.add_argument('--akernel', type=str, dest='akernel', default=defaults.kernel, help='local kernel type (G for Gaussian, L for Laplacian, myL for Laplacian for open-shell systems) (default '+defaults.kernel+')')
parser.add_argument('--gkernel', type=str, dest='gkernel', default=defaults.gkernel, help='global kernel type (avg for average kernel, rem for REMatch kernel) (default '+str(defaults.gkernel)+')')
parser.add_argument('--gdict', nargs='*', action=ParseKwargs, dest='gdict', default=defaults.gdict, help='dictionary like input string to initialize global kernel parameters')
parser.add_argument('--splits', type=int, dest='splits', default=defaults.n_rep, help='number of splits (default='+str(defaults.n_rep)+')')
parser.add_argument('--train', type=float, dest='train_size', default=defaults.train_size, nargs='+', help='training set fractions')
parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='enable debug')
parser.add_argument('--ll', action='store_true', dest='ll', default=False, help='if correct for the numper of threads')
parser.add_argument('--readkernel', action='store_true', dest='readk', default=False, help='if X is kernel')
parser.add_argument('--sparse', type=int, dest='sparse', default=None, help='regression basis size for sparse learning')
parser.add_argument('--random_state', type=int, dest='random_state', default=defaults.random_state, help='random state for test / train splitting')
args = parser.parse_args()
print(vars(args))
if(args.ll): correct_num_threads()
Expand Down

0 comments on commit 99e9e5f

Please sign in to comment.