Source code for posydon.active_learning.psy_cris.run_params.run_psycris_sequence

# --------------------------------------------
# run a psy-cris sequence on synthetic data
# --------------------------------------------
import argparse
import numpy as np
import pickle
import copy
import time
import sys
import os
from posydon.active_learning.psy_cris.utils import parse_inifile
from posydon.active_learning.psy_cris.utils import do_dynamic_sampling
from posydon.active_learning.psy_cris.utils import calc_performance

[docs] def main(): """Run a psy-cris sequence on synthetic data from the command line. Parameters ---------- N/A Notes ----- This code is used in the work of: Rocha et al. 2022, ApJ, 938, 64. doi:10.3847/1538-4357/ac8b05 """ parser = argparse.ArgumentParser() parser.add_argument( "-ini", dest='inifile_path', type=str, help="Path to psy-cris inifile.") parser.add_argument( "-dir", dest='save_data_dir', type=str, help="Path to directory to save data.") parser.add_argument( "-id", dest='run_id', type=str, help="Integer or str used to identify runs.", default = np.random.randint(low=10000, high=99999) ) parser.add_argument( "-n", dest='n_sequences', type=int, help="N sequences to run.", default=1) parser.add_argument( "-num_start", dest='n_starting_points', type=int, help="N points to start with in even grid.", default=3**3) parser.add_argument( "-num_fin", dest='n_final_points', type=int, help="Total num points to converge to.", default=6**3) parser.add_argument( "-ppi", dest='points_per_iter', type=int, help="Num points between each iteration.", default=70) parser.add_argument( "-cls", dest='performance_cls_name', type=str, help="Classification algorithm used in performance calc.", default="linear") parser.add_argument( "-regr", dest='performance_regr_name', type=str, help="Regression algorithm used in performance calc.", default="rbf") parser.add_argument( "-res", dest='performance_res', type=int, help="Resolution to compute performance.", default=40) parser.add_argument( "-length_scale_mult", dest='length_scale_multiplier', type=float, help="Small class proposal length scale mult of normal.", default=0.3333) parser.add_argument( "-percent_increase", dest='percent_increase', type=float, help="Icrease points per iter by a percent of the training set.", default=-1) args = parser.parse_args() inifile_path = args.inifile_path save_data_dir = args.save_data_dir if not isinstance(inifile_path, str) or not os.path.isfile(inifile_path): raise ValueError("Inifile not found. Please check that the path is correct.\nGiven: {}".format(inifile_path)) if not isinstance(save_data_dir, str) or not os.path.isdir(save_data_dir): raise ValueError("Directory not found. Please check that save_data_dir exists.\nGiven: {}".format(save_data_dir)) run_ID = args.run_id n_psycris_sequences = args.n_sequences n_starting_points = args.n_starting_points n_final_points = args.n_final_points new_points_per_iter = args.points_per_iter length_scale_mult = args.length_scale_multiplier if args.percent_increase < 0: percent_increase = None else: percent_increase = args.percent_increase psy_cris_kwargs_dict = parse_inifile(inifile_path, verbose=False) dimensionality = len(psy_cris_kwargs_dict["TableData_kwargs"]["input_cols"]) print("\n\n"+">"*18 + " RUNNING PSYCRIS SEQUENCE ID: {} ".format(run_ID) + "<"*18) print("FILE: {}".format(inifile_path)) print("SAVE DIR: {}\n".format(save_data_dir)) print("n_starting_points: {}".format(n_starting_points)) print("n_final_points: {}".format(n_final_points)) print("new_points_per_iter: {}".format(new_points_per_iter)) print("dimensionality: {}".format(dimensionality)) print("length_scale_multiplier: {}".format(length_scale_mult)) print("percent_increase: {}\n".format(percent_increase)) start_iters_time = time.time() for i in range(n_psycris_sequences): print("START - do_dynamic_sampling - {0}i{1}".format(run_ID,i)) kwargs_per_iter = copy.deepcopy(psy_cris_kwargs_dict) dfs_per_iter, preds_per_iter = do_dynamic_sampling(N_starting_points=n_starting_points, N_final_points=n_final_points, new_points_per_iter=new_points_per_iter, verbose=True, threshold= 1e-6, jitter=True, dim=dimensionality, length_scale_mult=length_scale_mult, percent_increase=percent_increase, **kwargs_per_iter ) try: print("\n\tSaving dfs...") f_name_backup = save_data_dir + "/{0}i{1}_dfs_per_iter".format(run_ID,i) with open(f_name_backup, "wb") as f: pickle.dump( dfs_per_iter, f) print("\tSTART - calc_performance - {0}i{1}".format(run_ID,i)) print("\tcls: {}, regr: {}, res: {}".format(args.performance_cls_name, args.performance_regr_name, args.performance_res) ) acc_per_iter, conf_matrix_per_iter, abs_regr_frac_diffs_per_iter = \ calc_performance(dfs_per_iter, cls_name=args.performance_cls_name, regr_name=args.performance_regr_name, resolution=args.performance_res, verbose=False) f1_path = save_data_dir + "/{0}i{1}_acc_per_iter".format(run_ID,i) f2_path = save_data_dir + "/{0}i{1}_conf_matrix_per_iter".format(run_ID,i) f3_path = save_data_dir + "/{0}i{1}_abs_regr_frac_diffs_per_iter".format(run_ID,i) print("\tSaving files:") try: np.save( f1_path, acc_per_iter, allow_pickle=True ) print("\t\t{}".format(f1_path)) except Exception as err: print("\t\tFAILED: {0}, err: {1}".format(f1_path, err)) try: np.save( f2_path, conf_matrix_per_iter, allow_pickle=True ) print("\t\t{}".format(f2_path)) except Exception as err: print("\t\tFAILED: {0}, err: {1}".format(f2_path, err)) try: np.save( f3_path, np.array(abs_regr_frac_diffs_per_iter, dtype=object), allow_pickle=True ) print("\t\t{}".format(f3_path)) except Exception as err: print("\t\tFAILED: {0}, err: {1}".format(f3_path, err)) except Exception as exc: print("\tPerformance calculation Failed!\n\tErr:{}\n\n".format(exc)) print("END {0}\nTotal Time : {1:.3f} min".format(run_ID,(time.time()-start_iters_time)/60) )
if __name__ == "__main__": main()