Skip to content

Instantly share code, notes, and snippets.

View jgoodie's full-sized avatar

John Goodman jgoodie

  • San Diego
View GitHub Profile
@jgoodie
jgoodie / simple_linear_dataset.py
Last active April 18, 2024 03:12
Make a simple linear data then create train, validation, and test sets
bias = 3.0
X, y, coef = make_regression(n_samples=1200, n_features=1, bias=bias, noise=2, random_state=42, shuffle=True, coef=True)
X_train, X_val, y_train, y_val = train_test_split(X[:1000], y[:1000], test_size=0.20, random_state=42, shuffle=True)
X_train = torch.tensor(X_train, dtype=torch.float)
X_val = torch.tensor(X_val, dtype=torch.float)
y_train = torch.tensor(y_train, dtype=torch.float).unsqueeze(dim=1)
y_val = torch.tensor(y_val, dtype=torch.float).unsqueeze(dim=1)
@jgoodie
jgoodie / simple_pytorch_workflow.py
Last active April 4, 2024 04:46
simple pytorch workflow
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
pred = tdml.valib.LogRegPredict(data=test[test.columns[:31]], model=model.model, index_columns="index")
pred = pred.result.to_pandas()
y_pred = pred['estimate']
y_test = test.to_pandas()
y_test = y_test['diagnosis']
acc_score = accuracy_score(y_test,y_pred)
print(f"Accuracy: {np.round(acc_score*100, 2)}%")
print(confusion_matrix(y_test, y_pred))
model = tdml.valib.LogReg(data=train, columns=train.columns[1:31], response_column="diagnosis", )
print(model.model)
print(model.statistical_measures)
wiscTT = tdml.DataFrame("wisc_train_test")
train = wiscTT[wiscTT.sampleid == "1"]
test = wiscTT[wiscTT.sampleid == "2"]
sql = """ SELECT * FROM DBC.TablesV WHERE TableName = 'wisc_train_test'; """
con = tdsql.connect(host=host, user=user, password=pw)
cur = con.cursor()
cur.execute(sql)
res = cur.fetchall()
for r in res:
print(res[0])
con.close()
df_train_test = df.sample(frac = [0.70, 0.30])
tdml.copy_to_sql(df_train_test, table_name="wisc_train_test", if_exists="replace", schema_name='SYSDBA')
host = '192.xxx.yyy.zzz'
user = 'sysdba'
pw = 'sysdba'
td_context = tdml.create_context(host=host, username=user, password=pw)
df = tdml.DataFrame('wiscbc')
df = df.sort_index()
df
host = '192.xxx.yyy.zzz'
user = 'sysdba'
pw = 'sysdba'
sql = """SELECT DatabaseName, TableName, CreateTimeStamp,LastAlterTimeStamp
FROM DBC.TablesV
WHERE TableKind = 'T' and DatabaseName = 'SYSDBA' and TableName LIKE 'wisc%'
ORDER BY TableName;"""
con = tdsql.connect(host=host, user=user, password=pw)
df = pd.read_csv("./datasets/Wisconsin_breast_prognostic.csv")
df.columns = ['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',
'smoothness_mean', 'compactness_mean', 'concavity_mean',
'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean',
'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se',
'fractal_dimension_se', 'radius_worst', 'texture_worst',
'perimeter_worst', 'area_worst', 'smoothness_worst',
'compactness_worst', 'concavity_worst', 'concave_points_worst',