Source code for posydon.active_learning.psy_cris.synthetic_data.synth_data_2D

__authors__ = [
    "Kyle Akira Rocha <kylerocha2024@u.northwestern.edu>",
]


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

if (__name__ == "main"):
    save_figs = True   # plots will be saved
    save_data = True   # saving sampled data to csv
else:
    save_figs = False
    save_data = False

############# Sample Params #############
Nx = 10; Ny = 10
file_name = "synth_data.dat"


############# Cls / Regr Geometry #############
[docs] def cls_curve(x,y): vals = regr_func(x,y) curve_bool = ( y - (-0.1)*(x**3) - 1 ) > -1 # x^3 curve line = np.where( curve_bool, 2, vals ) bound_1 = 0.4 ; bound_2 = -0.28 cls1 = np.where( vals > bound_1, 1, line ) # banana cls2 = np.where( (cls1 < bound_1) == (cls1 > bound_2), 0, cls1 ) # background cls3 = np.where( vals < bound_2, -1, cls2 ) # triangle return cls3
[docs] def regr_func(x,y): eps = .9 # ~ 1/r padding a = 1. ; b = 1. # ~ ellipse return ( np.tanh((x)**3+(y)**2) )/( np.sqrt( (x/a)**2+(y/b)**2) + eps)
################################################ x = np.linspace(-3,3, 140) y = np.linspace(-3,3, 140) X,Y = np.meshgrid(x,y) # %matplotlib notebook # %matplotlib inline if (__name__ == "main"): print("Making analytic plot...") fig, subs = plt.subplots( nrows=1, ncols=2, dpi=120, figsize=(10,4), gridspec_kw={'width_ratios': [0.85, 1]}) subs[0].set_title("Classification") subs[0].pcolormesh( X, Y, cls_curve(X,Y) ) subs[1].set_title("Regression") pcm = subs[1].pcolormesh(X,Y, regr_func(X,Y), cmap="PiYG") #PiYG fig.colorbar( pcm ) my_cont = subs[1].contour(X,Y, regr_func(X,Y), colors='k', levels=[-0.4, -0.3, -0.15, 0, 0.15, 0.3, 0.4], ) my_cont.clabel( fmt='%1.2f', ) for i in range(2): subs[i].set_xlabel(r"$x$", fontsize=12); subs[i].set_ylabel(r"$y$", fontsize=12) if save_figs: plt.savefig("cls_regr_original.png") plt.show() ############# Sampling ############# # x,y vals to query with classifcationa & regression functions x_vals = np.linspace(-3,3, int(Nx)) y_vals = np.linspace(-3,3, int(Ny)) if (__name__=="main") and save_data: print("Sampling {0} points...".format(len(x_vals)*len(y_vals)) ) points = [] for i in x_vals: for j in y_vals: points.append( np.array([i,j]) ) points = np.array(points) results = cls_curve( points.T[0], points.T[1] ) unique_classes = np.unique( results ) mapping = {-1:"A", 0:"B", 1:"C", 2:"D"} str_classification = [ mapping[val] for val in results.astype(int)] df = pd.DataFrame() df["input_1"] = points.T[0] df["input_2"] = points.T[1] df["class"] = str_classification df["output_1"] = regr_func( points.T[0], points.T[1] ) if save_data: df.to_csv( file_name, index = False ); print("Saved to '{0}'.".format(file_name)) if (__name__ == "main"): print("Making sampled points plot...") fig, subs = plt.subplots( nrows=1, ncols=2, figsize=(8,4), dpi=120 ) subs[0].set_title("Sampled Points") subs[0].scatter( df["input_1"], df["input_2"], c = results, s=40, ) subs[1].set_title("Overlay") subs[1].contourf( X, Y, cls_curve(X,Y), levels = 20, vmin=-1, vmax=2, alpha=1. ) #subs[1].pcolormesh( X, Y, cls_curve(X,Y), levels = 10 ) subs[1].scatter( df["input_1"], df["input_2"], c = results, s=40, edgecolors='w' ) for i in range(2): subs[i].set_xlim(-3.1,3.1); subs[i].set_ylim(-3.1,3.1) subs[i].set_xlabel(r"$x$", fontsize=12); subs[i].set_ylabel(r"$y$", fontsize=12) if save_figs: plt.savefig("cls_regr_sampled.png") plt.show()
[docs] def get_raw_output_2D(x,y): """Get the raw output for classification and regression functions for the 2D synthetic data set. Original class data to strings given by the following relation {-1:"A", 0:"B", 1:"C", 2:"D"}. """ if isinstance( x, (float,int) ): x = np.array([x]); y=np.array([y]) elif isinstance(x, list): x= np.array(x); y=np.array(y) classification_output = cls_curve(x,y) regression_output = regr_func(x,y) return classification_output, regression_output
# For queries
[docs] def get_output_2D(x,y): """For a set of query points (x,y) in the range (-3,3) return a DataFrame with the inputs and outputs for classificaiton and regression. """ if isinstance( x, (float,int) ): x = np.array([x]); y=np.array([y]) elif isinstance(x, list): x= np.array(x); y=np.array(y) cls_results, regr_out = get_raw_output_2D(x,y) if x.ndim > 1 or y.ndim > 1: cls_results = cls_results.flatten() regr_out = regr_out.flatten() x = x.flatten(); y = y.flatten() str_results = np.array( [mapping[val] for val in cls_results.astype(int)] ) data_frame = pd.DataFrame() data_frame["input_1"] = x data_frame["input_2"] = y data_frame["class"] = str_results data_frame["output_1"] = regr_out return data_frame