Source code for posydon.active_learning.psy_cris.run_params.run_psycris_sequence

# --------------------------------------------
# run a psy-cris sequence on synthetic data
# --------------------------------------------
import argparse
import numpy as np
import pickle
import copy
import time
import sys
import os
from posydon.active_learning.psy_cris.utils import parse_inifile
from posydon.active_learning.psy_cris.utils import do_dynamic_sampling
from posydon.active_learning.psy_cris.utils import calc_performance


[docs]
def main():
    """Run a psy-cris sequence on synthetic data from the command line.

    Parameters
    ----------
        N/A
    
    Notes
    -----
        This code is used in the work of:
        Rocha et al. 2022, ApJ, 938, 64. doi:10.3847/1538-4357/ac8b05
    """
    parser = argparse.ArgumentParser()
    parser.add_argument( "-ini", dest='inifile_path', type=str, help="Path to psy-cris inifile.")
    parser.add_argument( "-dir", dest='save_data_dir', type=str, help="Path to directory to save data.")

    parser.add_argument( "-id", dest='run_id', type=str, help="Integer or str used to identify runs.",
                        default = np.random.randint(low=10000, high=99999) )
    parser.add_argument( "-n", dest='n_sequences', type=int, help="N sequences to run.",
                        default=1)
    parser.add_argument( "-num_start", dest='n_starting_points', type=int, help="N points to start with in even grid.",
                        default=3**3)
    parser.add_argument( "-num_fin", dest='n_final_points', type=int, help="Total num points to converge to.",
                        default=6**3)
    parser.add_argument( "-ppi", dest='points_per_iter', type=int, help="Num points between each iteration.",
                        default=70)
    parser.add_argument( "-cls", dest='performance_cls_name', type=str, help="Classification algorithm used in performance calc.",
                        default="linear")
    parser.add_argument( "-regr", dest='performance_regr_name', type=str, help="Regression algorithm used in performance calc.",
                        default="rbf")
    parser.add_argument( "-res", dest='performance_res', type=int, help="Resolution to compute performance.",
                        default=40)
    parser.add_argument( "-length_scale_mult", dest='length_scale_multiplier', type=float, help="Small class proposal length scale mult of normal.",
                        default=0.3333)
    parser.add_argument( "-percent_increase", dest='percent_increase', type=float, help="Icrease points per iter by a percent of the training set.",
                        default=-1)


    args = parser.parse_args()

    inifile_path = args.inifile_path
    save_data_dir = args.save_data_dir
    if not isinstance(inifile_path, str) or not os.path.isfile(inifile_path):
        raise ValueError("Inifile not found. Please check that the path is correct.\nGiven: {}".format(inifile_path))
    if not isinstance(save_data_dir, str) or not os.path.isdir(save_data_dir):
        raise ValueError("Directory not found. Please check that save_data_dir exists.\nGiven: {}".format(save_data_dir))

    run_ID = args.run_id
    n_psycris_sequences = args.n_sequences

    n_starting_points = args.n_starting_points
    n_final_points = args.n_final_points
    new_points_per_iter = args.points_per_iter
    length_scale_mult = args.length_scale_multiplier
    if args.percent_increase < 0:
        percent_increase = None
    else:
        percent_increase = args.percent_increase

    psy_cris_kwargs_dict = parse_inifile(inifile_path, verbose=False)
    dimensionality = len(psy_cris_kwargs_dict["TableData_kwargs"]["input_cols"])

    print("\n\n"+">"*18 + " RUNNING PSYCRIS SEQUENCE ID: {} ".format(run_ID) + "<"*18)
    print("FILE: {}".format(inifile_path))
    print("SAVE DIR: {}\n".format(save_data_dir))
    print("n_starting_points: {}".format(n_starting_points))
    print("n_final_points: {}".format(n_final_points))
    print("new_points_per_iter: {}".format(new_points_per_iter))
    print("dimensionality: {}".format(dimensionality))
    print("length_scale_multiplier: {}".format(length_scale_mult))
    print("percent_increase: {}\n".format(percent_increase))


    start_iters_time = time.time()
    for i in range(n_psycris_sequences):
        print("START - do_dynamic_sampling - {0}i{1}".format(run_ID,i))
        kwargs_per_iter = copy.deepcopy(psy_cris_kwargs_dict)
        dfs_per_iter, preds_per_iter = do_dynamic_sampling(N_starting_points=n_starting_points,
                                                        N_final_points=n_final_points,
                                                        new_points_per_iter=new_points_per_iter,
                                                        verbose=True,
                                                        threshold= 1e-6,
                                                        jitter=True,
                                                        dim=dimensionality,
                                                        length_scale_mult=length_scale_mult,
                                                        percent_increase=percent_increase,
                                                        **kwargs_per_iter )
        try:
            print("\n\tSaving dfs...")
            f_name_backup = save_data_dir + "/{0}i{1}_dfs_per_iter".format(run_ID,i)
            with open(f_name_backup, "wb") as f:
                pickle.dump( dfs_per_iter, f)

            print("\tSTART - calc_performance - {0}i{1}".format(run_ID,i))
            print("\tcls: {}, regr: {}, res: {}".format(args.performance_cls_name, args.performance_regr_name, args.performance_res) )
            acc_per_iter, conf_matrix_per_iter, abs_regr_frac_diffs_per_iter = \
                                        calc_performance(dfs_per_iter,
                                                        cls_name=args.performance_cls_name,
                                                        regr_name=args.performance_regr_name,
                                                        resolution=args.performance_res, verbose=False)

            f1_path = save_data_dir + "/{0}i{1}_acc_per_iter".format(run_ID,i)
            f2_path = save_data_dir + "/{0}i{1}_conf_matrix_per_iter".format(run_ID,i)
            f3_path = save_data_dir + "/{0}i{1}_abs_regr_frac_diffs_per_iter".format(run_ID,i)
            print("\tSaving files:")
            try:
                np.save( f1_path, acc_per_iter, allow_pickle=True )
                print("\t\t{}".format(f1_path))
            except Exception as err:
                print("\t\tFAILED: {0}, err: {1}".format(f1_path, err))
            try:
                np.save( f2_path, conf_matrix_per_iter, allow_pickle=True )
                print("\t\t{}".format(f2_path))
            except Exception as err:
                print("\t\tFAILED: {0}, err: {1}".format(f2_path, err))
            try:
                np.save( f3_path, np.array(abs_regr_frac_diffs_per_iter, dtype=object), allow_pickle=True )
                print("\t\t{}".format(f3_path))
            except Exception as err:
                print("\t\tFAILED: {0}, err: {1}".format(f3_path, err))

        except Exception as exc:
            print("\tPerformance calculation Failed!\n\tErr:{}\n\n".format(exc))
    print("END {0}\nTotal Time : {1:.3f} min".format(run_ID,(time.time()-start_iters_time)/60) )


if __name__ == "__main__":
    main()