aboutsummaryrefslogtreecommitdiff
path: root/code/sunlab
diff options
context:
space:
mode:
authorChristian C <cc@localhost>2024-11-11 12:29:32 -0800
committerChristian C <cc@localhost>2024-11-11 12:29:32 -0800
commitb85ee9d64a536937912544c7bbd5b98b635b7e8d (patch)
treecef7bc17d7b29f40fc6b1867d0ce0a742d5583d0 /code/sunlab
Initial commit
Diffstat (limited to 'code/sunlab')
-rw-r--r--code/sunlab/__init__.py17
-rw-r--r--code/sunlab/common/__init__.py5
-rw-r--r--code/sunlab/common/data/__init__.py6
-rw-r--r--code/sunlab/common/data/basic.py6
-rw-r--r--code/sunlab/common/data/dataset.py255
-rw-r--r--code/sunlab/common/data/dataset_iterator.py34
-rw-r--r--code/sunlab/common/data/image_dataset.py75
-rw-r--r--code/sunlab/common/data/shape_dataset.py57
-rw-r--r--code/sunlab/common/data/utilities.py119
-rw-r--r--code/sunlab/common/distribution/__init__.py7
-rw-r--r--code/sunlab/common/distribution/adversarial_distribution.py35
-rw-r--r--code/sunlab/common/distribution/gaussian_distribution.py23
-rw-r--r--code/sunlab/common/distribution/o_gaussian_distribution.py38
-rw-r--r--code/sunlab/common/distribution/s_gaussian_distribution.py40
-rw-r--r--code/sunlab/common/distribution/swiss_roll_distribution.py42
-rw-r--r--code/sunlab/common/distribution/symmetric_uniform_distribution.py21
-rw-r--r--code/sunlab/common/distribution/uniform_distribution.py21
-rw-r--r--code/sunlab/common/distribution/x_gaussian_distribution.py38
-rw-r--r--code/sunlab/common/mathlib/__init__.py1
-rw-r--r--code/sunlab/common/mathlib/base.py57
-rw-r--r--code/sunlab/common/mathlib/lyapunov.py54
-rw-r--r--code/sunlab/common/mathlib/random_walks.py83
-rw-r--r--code/sunlab/common/plotting/__init__.py2
-rw-r--r--code/sunlab/common/plotting/base.py270
-rw-r--r--code/sunlab/common/plotting/colors.py38
-rw-r--r--code/sunlab/common/scaler/__init__.py2
-rw-r--r--code/sunlab/common/scaler/adversarial_scaler.py44
-rw-r--r--code/sunlab/common/scaler/max_abs_scaler.py48
-rw-r--r--code/sunlab/common/scaler/quantile_scaler.py52
-rw-r--r--code/sunlab/environment/base/__init__.py8
-rw-r--r--code/sunlab/environment/base/cpu.py4
-rw-r--r--code/sunlab/environment/base/cuda.py4
-rw-r--r--code/sunlab/environment/base/extras.py7
-rw-r--r--code/sunlab/environment/base/fortran.py8
-rw-r--r--code/sunlab/fortran_src/__init__.py1
-rw-r--r--code/sunlab/fortran_src/fortran_library.f9096
-rw-r--r--code/sunlab/globals.py265
-rw-r--r--code/sunlab/sunflow/__init__.py6
-rw-r--r--code/sunlab/sunflow/data/__init__.py1
-rw-r--r--code/sunlab/sunflow/data/utilities.py53
-rw-r--r--code/sunlab/sunflow/models/__init__.py7
-rw-r--r--code/sunlab/sunflow/models/adversarial_autoencoder.py344
-rw-r--r--code/sunlab/sunflow/models/autoencoder.py85
-rw-r--r--code/sunlab/sunflow/models/decoder.py127
-rw-r--r--code/sunlab/sunflow/models/discriminator.py132
-rw-r--r--code/sunlab/sunflow/models/encoder.py140
-rw-r--r--code/sunlab/sunflow/models/encoder_discriminator.py96
-rw-r--r--code/sunlab/sunflow/models/utilities.py93
-rw-r--r--code/sunlab/sunflow/plotting/__init__.py1
-rw-r--r--code/sunlab/sunflow/plotting/model_extensions.py289
-rw-r--r--code/sunlab/suntorch/__init__.py3
-rw-r--r--code/sunlab/suntorch/data/__init__.py1
-rw-r--r--code/sunlab/suntorch/data/utilities.py55
-rw-r--r--code/sunlab/suntorch/models/__init__.py3
-rw-r--r--code/sunlab/suntorch/models/adversarial_autoencoder.py165
-rw-r--r--code/sunlab/suntorch/models/autoencoder.py114
-rw-r--r--code/sunlab/suntorch/models/common.py12
-rw-r--r--code/sunlab/suntorch/models/convolutional/variational/autoencoder.py190
-rw-r--r--code/sunlab/suntorch/models/decoder.py33
-rw-r--r--code/sunlab/suntorch/models/discriminator.py32
-rw-r--r--code/sunlab/suntorch/models/encoder.py32
-rw-r--r--code/sunlab/suntorch/models/variational/autoencoder.py128
-rw-r--r--code/sunlab/suntorch/models/variational/common.py12
-rw-r--r--code/sunlab/suntorch/models/variational/decoder.py33
-rw-r--r--code/sunlab/suntorch/models/variational/encoder.py34
-rw-r--r--code/sunlab/suntorch/plotting/__init__.py1
-rw-r--r--code/sunlab/suntorch/plotting/model_extensions.py34
-rw-r--r--code/sunlab/transform_data.py799
68 files changed, 4938 insertions, 0 deletions
diff --git a/code/sunlab/__init__.py b/code/sunlab/__init__.py
new file mode 100644
index 0000000..0ccac86
--- /dev/null
+++ b/code/sunlab/__init__.py
@@ -0,0 +1,17 @@
+from .common import *
+
+font = {"family": "DejaVu Sans", "weight": "regular", "size": 20}
+import matplotlib
+
+matplotlib.rc("font", **font)
+
+
+def set_font(ptsize=10):
+ font = {"family": "DejaVu Sans", "weight": "regular", "size": ptsize}
+ import matplotlib
+
+ matplotlib.rc("font", **font)
+
+
+def set_font_l(ptsize=20):
+ set_font(ptsize)
diff --git a/code/sunlab/common/__init__.py b/code/sunlab/common/__init__.py
new file mode 100644
index 0000000..cb6716c
--- /dev/null
+++ b/code/sunlab/common/__init__.py
@@ -0,0 +1,5 @@
+from .data import *
+from .distribution import *
+from .scaler import *
+from .mathlib import *
+from .plotting import *
diff --git a/code/sunlab/common/data/__init__.py b/code/sunlab/common/data/__init__.py
new file mode 100644
index 0000000..3e26874
--- /dev/null
+++ b/code/sunlab/common/data/__init__.py
@@ -0,0 +1,6 @@
+from .basic import *
+from .dataset import *
+from .dataset_iterator import *
+from .shape_dataset import *
+from .image_dataset import *
+from .utilities import *
diff --git a/code/sunlab/common/data/basic.py b/code/sunlab/common/data/basic.py
new file mode 100644
index 0000000..bb2e912
--- /dev/null
+++ b/code/sunlab/common/data/basic.py
@@ -0,0 +1,6 @@
+import numpy
+
+
+numpy.load_dat = lambda *args, **kwargs: numpy.load(
+ *args, **kwargs, allow_pickle=True
+).item()
diff --git a/code/sunlab/common/data/dataset.py b/code/sunlab/common/data/dataset.py
new file mode 100644
index 0000000..8589abf
--- /dev/null
+++ b/code/sunlab/common/data/dataset.py
@@ -0,0 +1,255 @@
+from .dataset_iterator import DatasetIterator
+
+
+class Dataset:
+ """# Dataset Superclass"""
+
+ base_scale = 10.0
+
+ def __init__(
+ self,
+ dataset_filename,
+ data_columns=[],
+ label_columns=[],
+ batch_size=None,
+ shuffle=False,
+ val_split=0.0,
+ scaler=None,
+ sort_columns=None,
+ random_seed=4332,
+ pre_scale=10.0,
+ **kwargs
+ ):
+ """# Initialize Dataset
+ self.dataset = dataset (N, ...)
+ self.labels = labels (N, ...)
+
+ Optional Arguments:
+ - prescale_function: The function that takes the ratio and transforms
+ the dataset by multiplying the prescale_function output
+ - sort_columns: The columns to sort the data by initially
+ - equal_split: If the classifications should be equally split in
+ training"""
+ from pandas import read_csv
+ from numpy import array, all
+ from numpy.random import seed
+
+ if seed is not None:
+ seed(random_seed)
+
+ # Basic Dataset Information
+ self.data_columns = data_columns
+ self.label_columns = label_columns
+ self.source = dataset_filename
+ self.dataframe = read_csv(self.source)
+
+ # Pre-scaling Transformation
+ prescale_ratio = self.base_scale / pre_scale
+ ratio = prescale_ratio
+ prescale_powers = array([2, 1, 1, 0, 2, 1, 0, 0, 1, 1, 1, 1, 1])
+ if "prescale_function" in kwargs.keys():
+ prescale_function = kwargs["prescale_function"]
+ else:
+
+ def prescale_function(x):
+ return x**prescale_powers
+
+ self.prescale_function = prescale_function
+ self.prescale_factor = self.prescale_function(ratio)
+ assert (
+ len(data_columns) == self.prescale_factor.shape[0]
+ ), "Column Mismatch on Prescale"
+ self.original_scale = pre_scale
+
+ # Scaling Transformation
+ self.scaled = scaler is not None
+ self.scaler = scaler
+
+ # Training Dataset Information
+ self.do_split = False if val_split == 0.0 else True
+ self.validation_split = val_split
+ self.batch_size = batch_size
+ self.do_shuffle = shuffle
+ self.equal_split = False
+ if "equal_split" in kwargs.keys():
+ self.equal_split = kwargs["equal_split"]
+
+ # Classification Labels if they exist
+ self.dataset = self.dataframe[self.data_columns].to_numpy()
+ if len(self.label_columns) == 0:
+ self.labels = None
+ elif not all([column in self.dataframe.columns for column in label_columns]):
+ import warnings
+
+ warnings.warn(
+ "No classification labels found for the dataset", RuntimeWarning
+ )
+ self.labels = None
+ else:
+ self.labels = self.dataframe[self.label_columns].squeeze()
+
+ # Initialize the dataset
+ if "sort_columns" in kwargs.keys():
+ self.sort(kwargs["sort_columns"])
+ if self.do_shuffle:
+ self.shuffle()
+ if self.do_split:
+ self.split()
+ self.refresh_dataset()
+
+ def __len__(self):
+ """# Get how many cases are in the dataset"""
+ return self.dataset.shape[0]
+
+ def __getitem__(self, idx):
+ """# Make Dataset Sliceable"""
+ idx_slice = None
+ slice_stride = 1 if self.batch_size is None else self.batch_size
+ # If we pass a slice, return the slice
+ if type(idx) == slice:
+ idx_slice = idx
+ # If we pass an int, return a batch-size slice
+ else:
+ idx_slice = slice(
+ idx * slice_stride, min([len(self), (idx + 1) * slice_stride])
+ )
+ if self.labels is None:
+ return self.dataset[idx_slice, ...]
+ return self.dataset[idx_slice, ...], self.labels[idx_slice, ...]
+
+ def scale_data(self, data):
+ """# Scale dataset from scaling function"""
+ data = data * self.prescale_factor
+ if not (self.scaler is None):
+ data = self.scaler(data)
+ return data
+
+ def scale(self):
+ """# Scale Dataset"""
+ self.dataset = self.scale_data(self.dataset)
+
+ def refresh_dataset(self, dataframe=None):
+ """# Refresh Dataset
+
+ Regenerate the dataset from a dataframe.
+ Primarily used after a sort or filter."""
+ if dataframe is None:
+ dataframe = self.dataframe
+ self.dataset = dataframe[self.data_columns].to_numpy()
+ if self.labels is not None:
+ self.labels = dataframe[self.label_columns].to_numpy().squeeze()
+ self.scale()
+
+ def sort_on(self, columns):
+ """# Sort Dataset on Column(s)"""
+ from numpy import all
+
+ if type(columns) == str:
+ columns = [columns]
+ if columns is not None:
+ assert all(
+ [column in self.dataframe.columns for column in columns]
+ ), "Dataframe does not contain some provided columns!"
+ self.dataframe = self.dataframe.sort_values(by=columns)
+ self.refresh_dataset()
+
+ def filter_on(self, column, value):
+ """# Filter Dataset on Column Value(s)"""
+ assert column in self.dataframe.columns, "Column DNE"
+ self.working_dataset = self.dataframe[self.dataframe[column].isin(value)]
+ self.refresh_dataset(self.working_dataset)
+
+ def filter_off(self):
+ """# Remove any filter on the dataset"""
+ self.refresh_dataset()
+
+ def unique(self, column):
+ """# Get unique values in a column(s)"""
+ assert column in self.dataframe.columns, "Column DNE"
+ from numpy import unique
+
+ return unique(self.dataframe[column])
+
+ def shuffle_data(self, data, labels=None):
+ """# Shuffle a dataset"""
+ from numpy.random import permutation
+
+ shuffled = permutation(data.shape[0])
+ if labels is not None:
+ assert (
+ self.labels.shape[0] == self.dataset.shape[0]
+ ), "Dataset and Label Shape Mismatch"
+ shuf_data = data[shuffled, ...]
+ shuf_labels = labels[shuffled]
+ if len(labels.shape) > 1:
+ shuf_labels = labels[shuffled,...]
+ return shuf_data, shuf_labels
+ return data[shuffled, ...]
+
+ def shuffle(self):
+ """# Shuffle the dataset"""
+ if self.do_shuffle:
+ if self.labels is None:
+ self.dataset = self.shuffle_data(self.dataset)
+ self.dataset, self.labels = self.shuffle_data(self.dataset, self.labels)
+
+ def split(self):
+ """# Training/ Validation Splitting"""
+ from numpy import floor, unique, where, hstack, delete
+ from numpy.random import permutation
+
+ equal_classes = self.equal_split
+ if not self.do_split:
+ return
+ assert self.validation_split <= 1.0, "Too High"
+ assert self.validation_split > 0.0, "Too Low"
+ train_count = int(floor(self.dataset.shape[0] * (1 - self.validation_split)))
+ training_data = self.dataset[:train_count, ...]
+ training_labels = None
+ validation_data = self.dataset[train_count:, ...]
+ validation_labels = None
+ if self.labels is not None:
+ if equal_classes:
+ # Ensure the split balances the prevalence of each class
+ assert len(self.labels.shape) == 1, "1D Classification Only Currently"
+ classification_breakdown = unique(self.labels, return_counts=True)
+ train_count = min(
+ [
+ train_count,
+ classification_breakdown.shape[0]
+ * min(classification_breakdown[1]),
+ ]
+ )
+ class_size = train_count / classification_breakdown.shape[0]
+ class_indicies = [
+ permutation(where(self.labels == _class)[0])
+ for _class in classification_breakdown[0]
+ ]
+ class_indicies = [indexes[:class_size] for indexes in class_indicies]
+ train_class_indicies = hstack(class_indicies).squeeze()
+ train_class_indicies = permutation(train_class_indicies)
+ training_data = self.dataset[train_class_indicies, ...]
+ training_labels = self.labels[train_class_indicies]
+ if len(self.labels.shape) > 1:
+ training_labels = self.labels[train_class_indicies,...]
+ validation_data = delete(self.dataset, train_class_indicies, axis=0)
+ validation_labels = delete(
+ self.labels, train_class_indicies, axis=0
+ ).squeeze()
+ else:
+ training_labels = self.labels[:train_count]
+ if len(training_labels.shape) > 1:
+ training_labels = self.labels[:train_count, ...]
+ validation_labels = self.labels[train_count:]
+ if len(validation_labels.shape) > 1:
+ validation_labels = self.labels[train_count:, ...]
+ self.training_data = training_data
+ self.validation_data = validation_data
+ self.training = DatasetIterator(training_data, training_labels, self.batch_size)
+ self.validation = DatasetIterator(
+ validation_data, validation_labels, self.batch_size
+ )
+
+ def reset_iterators(self):
+ """# Reset Train/ Validation Iterators"""
+ self.split()
diff --git a/code/sunlab/common/data/dataset_iterator.py b/code/sunlab/common/data/dataset_iterator.py
new file mode 100644
index 0000000..7c91caa
--- /dev/null
+++ b/code/sunlab/common/data/dataset_iterator.py
@@ -0,0 +1,34 @@
+class DatasetIterator:
+ """# Dataset Iterator
+
+ Creates an iterator object on a dataset and labels"""
+
+ def __init__(self, dataset, labels=None, batch_size=None):
+ """# Initialize the iterator with the dataset and labels
+
+ - batch_size: How many to include in the iteration"""
+ self.dataset = dataset
+ self.labels = labels
+ self.current = 0
+ self.batch_size = (
+ batch_size if batch_size is not None else self.dataset.shape[0]
+ )
+
+ def __iter__(self):
+ """# Iterator Function"""
+ return self
+
+ def __next__(self):
+ """# Next Iteration
+
+ Slice the dataset and labels to provide"""
+ self.cur = self.current
+ self.current += 1
+ if self.cur * self.batch_size < self.dataset.shape[0]:
+ iterator_slice = slice(
+ self.cur * self.batch_size, (self.cur + 1) * self.batch_size
+ )
+ if self.labels is None:
+ return self.dataset[iterator_slice, ...]
+ return self.dataset[iterator_slice, ...], self.labels[iterator_slice, ...]
+ raise StopIteration
diff --git a/code/sunlab/common/data/image_dataset.py b/code/sunlab/common/data/image_dataset.py
new file mode 100644
index 0000000..46e77b6
--- /dev/null
+++ b/code/sunlab/common/data/image_dataset.py
@@ -0,0 +1,75 @@
+class ImageDataset:
+ def __init__(
+ self,
+ base_directory,
+ ext="png",
+ channels=[0],
+ batch_size=None,
+ shuffle=False,
+ rotate=False,
+ rotate_p=1.,
+ ):
+ """# Image Dataset
+
+ Load a directory of images"""
+ from glob import glob
+ from matplotlib.pyplot import imread
+ from numpy import newaxis, vstack
+ from numpy.random import permutation, rand
+
+ self.base_directory = base_directory
+ files = glob(self.base_directory + "*." + ext)
+ self.dataset = []
+ for file in files:
+ im = imread(file)[newaxis, :, :, channels].transpose(0, 3, 1, 2)
+ self.dataset.append(im)
+ # Also add rotations of the image to the dataset
+ if rotate:
+ if rand() < rotate_p:
+ self.dataset.append(im[:, :, ::-1, :])
+ if rand() < rotate_p:
+ self.dataset.append(im[:, :, :, ::-1])
+ if rand() < rotate_p:
+ self.dataset.append(im[:, :, ::-1, ::-1])
+ if rand() < rotate_p:
+ self.dataset.append(im.transpose(0, 1, 3, 2))
+ if rand() < rotate_p:
+ self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, ::-1, :])
+ if rand() < rotate_p:
+ self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, :, ::-1])
+ if rand() < rotate_p:
+ self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, ::-1, ::-1])
+ self.dataset = vstack(self.dataset)
+ if shuffle:
+ self.dataset = self.dataset[permutation(self.dataset.shape[0]), ...]
+ self.batch_size = (
+ batch_size if batch_size is not None else self.dataset.shape[0]
+ )
+
+ def torch(self, device=None):
+ """# Cast to Torch Tensor"""
+ import torch
+
+ if device is None:
+ device = torch.device("cpu")
+ return torch.tensor(self.dataset).to(device)
+
+ def numpy(self):
+ """# Cast to Numpy Array"""
+ return self.dataset
+
+ def __len__(self):
+ """# Return Number of Cases
+
+ (or Number in each Batch)"""
+ return self.dataset.shape[0] // self.batch_size
+
+ def __getitem__(self, index):
+ """# Slice By Batch"""
+ if type(index) == tuple:
+ return self.dataset[index]
+ elif type(index) == int:
+ return self.dataset[
+ index * self.batch_size : (index + 1) * self.batch_size, ...
+ ]
+ return
diff --git a/code/sunlab/common/data/shape_dataset.py b/code/sunlab/common/data/shape_dataset.py
new file mode 100644
index 0000000..5a68736
--- /dev/null
+++ b/code/sunlab/common/data/shape_dataset.py
@@ -0,0 +1,57 @@
+from .dataset import Dataset
+
+
+class ShapeDataset(Dataset):
+ """# Shape Dataset"""
+
+ def __init__(
+ self,
+ dataset_filename,
+ data_columns=[
+ "Area",
+ "MjrAxisLength",
+ "MnrAxisLength",
+ "Eccentricity",
+ "ConvexArea",
+ "EquivDiameter",
+ "Solidity",
+ "Extent",
+ "Perimeter",
+ "ConvexPerim",
+ "FibLen",
+ "InscribeR",
+ "BlebLen",
+ ],
+ label_columns=["Class"],
+ batch_size=None,
+ shuffle=False,
+ val_split=0.0,
+ scaler=None,
+ sort_columns=None,
+ random_seed=4332,
+ pre_scale=10,
+ **kwargs
+ ):
+ """# Initialize Dataset
+ self.dataset = dataset (N, ...)
+ self.labels = labels (N, ...)
+
+ Optional Arguments:
+ - prescale_function: The function that takes the ratio and transforms
+ the dataset by multiplying the prescale_function output
+ - sort_columns: The columns to sort the data by initially
+ - equal_split: If the classifications should be equally split in
+ training"""
+ super().__init__(
+ dataset_filename,
+ data_columns=data_columns,
+ label_columns=label_columns,
+ batch_size=batch_size,
+ shuffle=shuffle,
+ val_split=val_split,
+ scaler=scaler,
+ sort_columns=sort_columns,
+ random_seed=random_seed,
+ pre_scale=pre_scale,
+ **kwargs
+ )
diff --git a/code/sunlab/common/data/utilities.py b/code/sunlab/common/data/utilities.py
new file mode 100644
index 0000000..6b4e6f3
--- /dev/null
+++ b/code/sunlab/common/data/utilities.py
@@ -0,0 +1,119 @@
+from .shape_dataset import ShapeDataset
+from ..scaler.max_abs_scaler import MaxAbsScaler
+
+
+def import_10x(
+ filename,
+ magnification=10,
+ batch_size=None,
+ shuffle=False,
+ val_split=0.0,
+ scaler=None,
+ sort_columns=None,
+):
+ """# Import a 10x Image Dataset
+
+ Pixel-to-micron: ???"""
+ magnification = 10
+ dataset = ShapeDataset(
+ filename,
+ batch_size=batch_size,
+ shuffle=shuffle,
+ pre_scale=magnification,
+ val_split=val_split,
+ scaler=scaler,
+ sort_columns=sort_columns,
+ )
+ return dataset
+
+
+def import_20x(
+ filename,
+ magnification=10,
+ batch_size=None,
+ shuffle=False,
+ val_split=0.0,
+ scaler=None,
+ sort_columns=None,
+):
+ """# Import a 20x Image Dataset
+
+ Pixel-to-micron: ???"""
+ magnification = 20
+ dataset = ShapeDataset(
+ filename,
+ batch_size=batch_size,
+ shuffle=shuffle,
+ pre_scale=magnification,
+ val_split=val_split,
+ scaler=scaler,
+ sort_columns=sort_columns,
+ )
+ return dataset
+
+
+def import_dataset(
+ filename,
+ magnification,
+ batch_size=None,
+ shuffle=False,
+ val_split=0.0,
+ scaler=None,
+ sort_columns=None,
+):
+ """# Import a dataset
+
+ Requires a magnificaiton to be specified"""
+ dataset = ShapeDataset(
+ filename,
+ pre_scale=magnification,
+ batch_size=batch_size,
+ shuffle=shuffle,
+ val_split=val_split,
+ scaler=scaler,
+ sort_columns=sort_columns,
+ )
+ return dataset
+
+
+def import_full_dataset(fname, magnification=20, scaler=None):
+ """# Import a Full Dataset
+
+ If a classification file exists(.txt with a 'Class' header and 'frame','cellnumber' headers), also import it"""
+ from os.path import isfile
+ import pandas as pd
+ import numpy as np
+
+ cfname = fname
+ tfname = cfname[:-3] + "txt"
+ columns = [
+ "frame",
+ "cellnumber",
+ "x-cent",
+ "y-cent",
+ "actinedge",
+ "filopodia",
+ "bleb",
+ "lamellipodia",
+ ]
+ if isfile(tfname):
+ dataset = import_dataset(cfname, magnification=magnification, scaler=scaler)
+ class_df = np.loadtxt(tfname, skiprows=1)
+ class_df = pd.DataFrame(class_df, columns=columns)
+ full_df = pd.merge(
+ dataset.dataframe,
+ class_df,
+ left_on=["Frames", "CellNum"],
+ right_on=["frame", "cellnumber"],
+ )
+ full_df["Class"] = np.argmax(
+ class_df[["actinedge", "filopodia", "bleb", "lamellipodia"]].to_numpy(),
+ axis=-1,
+ )
+ dataset.labels = full_df["Class"].to_numpy()
+ else:
+ dataset = import_dataset(cfname, magnification=magnification, scaler=scaler)
+ full_df = dataset.dataframe
+ dataset.dataframe = full_df
+ dataset.filter_off()
+ return dataset
diff --git a/code/sunlab/common/distribution/__init__.py b/code/sunlab/common/distribution/__init__.py
new file mode 100644
index 0000000..a23cb0c
--- /dev/null
+++ b/code/sunlab/common/distribution/__init__.py
@@ -0,0 +1,7 @@
+from .gaussian_distribution import *
+from .x_gaussian_distribution import *
+from .o_gaussian_distribution import *
+from .s_gaussian_distribution import *
+from .uniform_distribution import *
+from .symmetric_uniform_distribution import *
+from .swiss_roll_distribution import *
diff --git a/code/sunlab/common/distribution/adversarial_distribution.py b/code/sunlab/common/distribution/adversarial_distribution.py
new file mode 100644
index 0000000..675c00e
--- /dev/null
+++ b/code/sunlab/common/distribution/adversarial_distribution.py
@@ -0,0 +1,35 @@
+class AdversarialDistribution:
+ """# Distribution Class to use in Adversarial Autoencoder
+
+ For any distribution to be implemented, make sure to ensure each of the
+ methods are implemented"""
+
+ def __init__(self, N):
+ """# Initialize the distribution for N-dimensions"""
+ self.dims = N
+ return
+
+ def get_full_name(self):
+ """# Return a human-readable name of the distribution"""
+ return self.full_name
+
+ def get_name(self):
+ """# Return a shortened name of the distribution
+
+ Preferrably, the name should include characters that can be included in
+ a file name"""
+ return self.name
+
+ def __str__(self):
+ """# Returns the short name"""
+ return self.get_name()
+
+ def __repr__(self):
+ """# Returns the short name"""
+ return self.get_name()
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use `dist(...)`"""
+ raise NotImplementedError("This distribution has not been implemented yet")
diff --git a/code/sunlab/common/distribution/gaussian_distribution.py b/code/sunlab/common/distribution/gaussian_distribution.py
new file mode 100644
index 0000000..e478ab6
--- /dev/null
+++ b/code/sunlab/common/distribution/gaussian_distribution.py
@@ -0,0 +1,23 @@
+from .adversarial_distribution import *
+
+
+class GaussianDistribution(AdversarialDistribution):
+ """# Gaussian Distribution"""
+
+ def __init__(self, N):
+ """# Gaussian Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ self.full_name = f"{N}-Dimensional Gaussian Distribution"
+ self.name = "G"
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use gauss(N1,...,Nm)"""
+ import numpy as np
+
+ return np.random.multivariate_normal(
+ mean=np.zeros(self.dims), cov=np.eye(self.dims), size=[*args]
+ )
diff --git a/code/sunlab/common/distribution/o_gaussian_distribution.py b/code/sunlab/common/distribution/o_gaussian_distribution.py
new file mode 100644
index 0000000..1222ca1
--- /dev/null
+++ b/code/sunlab/common/distribution/o_gaussian_distribution.py
@@ -0,0 +1,38 @@
+from .adversarial_distribution import *
+
+
+class OGaussianDistribution(AdversarialDistribution):
+ """# O Gaussian Distribution"""
+
+ def __init__(self, N):
+ """# O Gaussian Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ assert self.dims == 2, "This Distribution only Supports 2-Dimensions"
+ self.full_name = "2-Dimensional O-Gaussian Distribution"
+ self.name = "OG"
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use xgauss(case_count)"""
+ import numpy as np
+
+ assert len(args) == 1, "Only 1 argument supported"
+ N = args[0]
+ sample_base = np.zeros((4 * N, 2))
+ sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, 1], cov=[[1, 0], [0, 1]], size=[N]
+ )
+ sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, -1], cov=[[1, 0], [0, 1]], size=[N]
+ )
+ sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, 1], cov=[[1, 0], [0, 1]], size=[N]
+ )
+ sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, -1], cov=[[1, 0], [0, 1]], size=[N]
+ )
+ np.random.shuffle(sample_base)
+ return sample_base[:N, :]
diff --git a/code/sunlab/common/distribution/s_gaussian_distribution.py b/code/sunlab/common/distribution/s_gaussian_distribution.py
new file mode 100644
index 0000000..cace57f
--- /dev/null
+++ b/code/sunlab/common/distribution/s_gaussian_distribution.py
@@ -0,0 +1,40 @@
+from .adversarial_distribution import *
+
+
+class SGaussianDistribution(AdversarialDistribution):
+ """# S Gaussian Distribution"""
+
+ def __init__(self, N, scale=0):
+ """# S Gaussian Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ assert self.dims == 2, "This Distribution only Supports 2-Dimensions"
+ self.full_name = "2-Dimensional S-Gaussian Distribution"
+ self.name = "SG"
+ self.scale = scale
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use xgauss(case_count)"""
+ import numpy as np
+
+ assert len(args) == 1, "Only 1 argument supported"
+ N = args[0]
+ sample_base = np.zeros((4 * N, 2))
+ scale = self.scale
+ sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, 1], cov=[[1, scale], [scale, 1]], size=[N]
+ )
+ sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, -1], cov=[[1, scale], [scale, 1]], size=[N]
+ )
+ sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, 1], cov=[[1, -scale], [-scale, 1]], size=[N]
+ )
+ sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, -1], cov=[[1, -scale], [-scale, 1]], size=[N]
+ )
+ np.random.shuffle(sample_base)
+ return sample_base[:N, :]
diff --git a/code/sunlab/common/distribution/swiss_roll_distribution.py b/code/sunlab/common/distribution/swiss_roll_distribution.py
new file mode 100644
index 0000000..613bfc5
--- /dev/null
+++ b/code/sunlab/common/distribution/swiss_roll_distribution.py
@@ -0,0 +1,42 @@
+from .adversarial_distribution import *
+
+
+class SwissRollDistribution(AdversarialDistribution):
+ """# Swiss Roll Distribution"""
+
+ def __init__(self, N, scaling_factor=0.25, noise_level=0.15):
+ """# Swiss Roll Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ assert (self.dims == 2) or (
+ self.dims == 3
+ ), "This Distribution only Supports 2,3-Dimensions"
+ self.full_name = f"{self.dims}-Dimensional Swiss Roll Distribution Distribution"
+ self.name = f"SR{self.dims}"
+ self.noise_level = noise_level
+ self.scale = scaling_factor
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use xgauss(case_count)"""
+ import numpy as np
+
+ assert len(args) == 1, "Only 1 argument supported"
+ N = args[0]
+ noise = self.noise_level
+ scaling_factor = self.scale
+
+ t = 3 * np.pi / 2 * (1 + 2 * np.random.rand(1, N))
+ h = 21 * np.random.rand(1, N)
+ RANDOM = np.random.randn(3, N) * noise
+ data = (
+ np.concatenate(
+ (scaling_factor * t * np.cos(t), h, scaling_factor * t * np.sin(t))
+ )
+ + RANDOM
+ )
+ if self.dims == 2:
+ return data.T[:, [0, 2]]
+ return data.T[:, [0, 2, 1]]
diff --git a/code/sunlab/common/distribution/symmetric_uniform_distribution.py b/code/sunlab/common/distribution/symmetric_uniform_distribution.py
new file mode 100644
index 0000000..c3a4db0
--- /dev/null
+++ b/code/sunlab/common/distribution/symmetric_uniform_distribution.py
@@ -0,0 +1,21 @@
+from .adversarial_distribution import *
+
+
+class SymmetricUniformDistribution(AdversarialDistribution):
+ """# Symmetric Uniform Distribution on [-1, 1)"""
+
+ def __init__(self, N):
+ """# Symmetric Uniform Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ self.full_name = f"{N}-Dimensional Symmetric Uniform Distribution"
+ self.name = "SU"
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use suniform(N1,...,Nm)"""
+ import numpy as np
+
+ return np.random.rand(*args, self.dims) * 2.0 - 1.0
diff --git a/code/sunlab/common/distribution/uniform_distribution.py b/code/sunlab/common/distribution/uniform_distribution.py
new file mode 100644
index 0000000..3e23e67
--- /dev/null
+++ b/code/sunlab/common/distribution/uniform_distribution.py
@@ -0,0 +1,21 @@
+from .adversarial_distribution import *
+
+
+class UniformDistribution(AdversarialDistribution):
+ """# Uniform Distribution on [0, 1)"""
+
+ def __init__(self, N):
+ """# Uniform Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ self.full_name = f"{N}-Dimensional Uniform Distribution"
+ self.name = "U"
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use uniform(N1,...,Nm)"""
+ import numpy as np
+
+ return np.random.rand(*args, self.dims)
diff --git a/code/sunlab/common/distribution/x_gaussian_distribution.py b/code/sunlab/common/distribution/x_gaussian_distribution.py
new file mode 100644
index 0000000..b4330aa
--- /dev/null
+++ b/code/sunlab/common/distribution/x_gaussian_distribution.py
@@ -0,0 +1,38 @@
+from .adversarial_distribution import *
+
+
+class XGaussianDistribution(AdversarialDistribution):
+ """# X Gaussian Distribution"""
+
+ def __init__(self, N):
+ """# X Gaussian Distribution Initialization
+
+ Initializes the name and dimensions"""
+ super().__init__(N)
+ assert self.dims == 2, "This Distribution only Supports 2-Dimensions"
+ self.full_name = "2-Dimensional X-Gaussian Distribution"
+ self.name = "XG"
+
+ def __call__(self, *args):
+ """# Magic method when calling the distribution
+
+ This method is going to be called when you use xgauss(case_count)"""
+ import numpy as np
+
+ assert len(args) == 1, "Only 1 argument supported"
+ N = args[0]
+ sample_base = np.zeros((4 * N, 2))
+ sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, 1], cov=[[1, 0.7], [0.7, 1]], size=[N]
+ )
+ sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, -1], cov=[[1, 0.7], [0.7, 1]], size=[N]
+ )
+ sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[-1, 1], cov=[[1, -0.7], [-0.7, 1]], size=[N]
+ )
+ sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal(
+ mean=[1, -1], cov=[[1, -0.7], [-0.7, 1]], size=[N]
+ )
+ np.random.shuffle(sample_base)
+ return sample_base[:N, :]
diff --git a/code/sunlab/common/mathlib/__init__.py b/code/sunlab/common/mathlib/__init__.py
new file mode 100644
index 0000000..9b5ed21
--- /dev/null
+++ b/code/sunlab/common/mathlib/__init__.py
@@ -0,0 +1 @@
+from .base import *
diff --git a/code/sunlab/common/mathlib/base.py b/code/sunlab/common/mathlib/base.py
new file mode 100644
index 0000000..38ab14c
--- /dev/null
+++ b/code/sunlab/common/mathlib/base.py
@@ -0,0 +1,57 @@
+import numpy as np
+
+
+def angle(a, b):
+ """# Get Angle Between Row Vectors"""
+ from numpy import arctan2, pi
+
+ theta_a = arctan2(a[:, 1], a[:, 0])
+ theta_b = arctan2(b[:, 1], b[:, 0])
+ d_theta = theta_b - theta_a
+ assert (-pi <= d_theta) and (d_theta <= pi), "Theta difference outside of [-Ï€,Ï€]"
+ return d_theta
+
+
+def normalize(column):
+ """# Normalize Column Vector"""
+ from numpy.linalg import norm
+
+ return column / norm(column, axis=0)
+
+
+def winding(xy_grid, trajectory_start, trajectory_end):
+ """# Get Winding Number on Grid"""
+ from numpy import zeros, cross, clip, arcsin
+
+ trajectories = trajectory_end - trajectory_start
+ winding = zeros((xy_grid.shape[0]))
+ for idx, trajectory in enumerate(trajectories):
+ r = xy_grid - trajectory_start[idx]
+ cross = cross(normalize(trajectory), normalize(r))
+ cross = clip(cross, -1, 1)
+ theta = arcsin(cross)
+ winding += theta
+ return winding
+
+
+def vorticity(xy_grid, trajectory_start, trajectory_end):
+ """# Get Vorticity Number on Grid"""
+ from numpy import zeros, cross
+
+ trajectories = trajectory_end - trajectory_start
+ vorticity = zeros((xy_grid.shape[0]))
+ for idx, trajectory in enumerate(trajectories):
+ r = xy_grid - trajectory_start[idx]
+ vorticity += cross(normalize(trajectory), normalize(r))
+ return vorticity
+
+
+def data_range(data):
+ """# Get the range of values for each row"""
+ from numpy import min, max
+
+ return min(data, axis=0), max(data, axis=0)
+
+
+np.normalize = normalize
+np.range = data_range
diff --git a/code/sunlab/common/mathlib/lyapunov.py b/code/sunlab/common/mathlib/lyapunov.py
new file mode 100644
index 0000000..3c747f1
--- /dev/null
+++ b/code/sunlab/common/mathlib/lyapunov.py
@@ -0,0 +1,54 @@
+def trajectory_to_distances(x):
+ """X: [N,N_t,N_d]
+ ret [N,N_t]"""
+ from numpy import zeros
+ from numpy.linalg import norm
+ from itertools import product, combinations
+
+ x = [x[idx, ...] for idx in range(x.shape[0])]
+ pairwise_trajectories = combinations(x, 2)
+ _N_COMB = len(list(pairwise_trajectories))
+ N_max = x[0].shape[0]
+ distances = zeros((_N_COMB, N_max))
+ pairwise_trajectories = combinations(x, 2)
+ for idx, (a_t, b_t) in enumerate(pairwise_trajectories):
+ distances[idx, :] = norm(a_t[:N_max, :] - b_t[:N_max, :], axis=-1)
+ return distances
+
+
+def Lyapunov_d(X):
+ """X: [N,N_t]
+ λ_n = ln(|dX_n|/|dX_0|)/n; n = [1,2,...]"""
+ from numpy import zeros, log, repeat
+
+ Y = zeros((X.shape[0], X.shape[1] - 1))
+ Y = log(X[:, 1:] / repeat([X[:, 0]], Y.shape[1], axis=0).T) / (
+ repeat([range(Y.shape[1])], Y.shape[0], axis=0) + 1
+ )
+ return Y
+
+
+def Lyapunov_t(X):
+ """X: [N,N_t,N_d]"""
+ return Lyapunov_d(trajectory_to_distances(X))
+
+
+Lyapunov = Lyapunov_d
+
+
+def RelativeDistance_d(X):
+ """X: [N,N_t]
+ λ_n = ln(|dX_n|/|dX_0|)/n; n = [1,2,...]"""
+ from numpy import zeros, log, repeat
+
+ Y = zeros((X.shape[0], X.shape[1] - 1))
+ Y = log(X[:, 1:] / repeat([X[:, 0]], Y.shape[1], axis=0).T)
+ return Y
+
+
+def RelativeDistance_t(X):
+ """X: [N,N_t,N_d]"""
+ return RelativeDistance_d(trajectory_to_distances(X))
+
+
+RelativeDistance = RelativeDistance_d
diff --git a/code/sunlab/common/mathlib/random_walks.py b/code/sunlab/common/mathlib/random_walks.py
new file mode 100644
index 0000000..3aa3bcb
--- /dev/null
+++ b/code/sunlab/common/mathlib/random_walks.py
@@ -0,0 +1,83 @@
+def get_levy_flight(T=50, D=2, t0=0.1, alpha=3, periodic=False):
+ from numpy import vstack
+ from mistree import get_levy_flight as get_flight
+
+ if D == 2:
+ x, y = get_flight(T, mode="2D", periodic=periodic, t_0=t0, alpha=alpha)
+ xy = vstack([x, y]).T
+ elif D == 3:
+ x, y, z = get_flight(T, mode="3D", periodic=periodic, t_0=t0, alpha=alpha)
+ xy = vstack([x, y, z]).T
+ else:
+ raise ValueError(f"Dimension {D} not supported!")
+ return xy
+
+
+def get_levy_flights(N=10, T=50, D=2, t0=0.1, alpha=3, periodic=False):
+ from numpy import moveaxis, array
+
+ trajectories = []
+ for _ in range(N):
+ xy = get_levy_flight(T=T, D=D, t0=t0, alpha=alpha, periodic=periodic)
+ trajectories.append(xy)
+ return moveaxis(array(trajectories), 0, 1)
+
+
+def get_jitter_levy_flights(
+ N=10, T=50, D=2, t0=0.1, alpha=3, periodic=False, noise=5e-2
+):
+ from numpy.random import randn
+
+ trajectories = get_levy_flights(
+ N=N, T=T, D=D, t0=t0, alpha=alpha, periodic=periodic
+ )
+ return trajectories + randn(*trajectories.shape) * noise
+
+
+def get_gaussian_random_walk(T=50, D=2, R=5, step_size=0.5, soft=None):
+ from numpy import array, sin, cos, exp, zeros, pi
+ from numpy.random import randn, uniform, rand
+ from numpy.linalg import norm
+
+ def is_in(x, R=1):
+ from numpy.linalg import norm
+
+ return norm(x) < R
+
+ X = zeros((T, D))
+ for t in range(1, T):
+ while True:
+ if D == 2:
+ angle = uniform(0, pi * 2)
+ step = randn(1) * step_size
+ X[t, :] = X[t - 1, :] + array([cos(angle), sin(angle)]) * step
+ else:
+ X[t, :] = X[t - 1, :] + randn(D) / D * step_size
+ if soft is None:
+ if is_in(X[t, :], R):
+ break
+ elif rand() < exp(-(norm(X[t, :]) - R) * soft):
+ break
+ return X
+
+
+def get_gaussian_random_walks(N=10, T=50, D=2, R=5, step_size=0.5, soft=None):
+ from numpy import moveaxis, array
+
+ trajectories = []
+ for _ in range(N):
+ xy = get_gaussian_random_walk(T=T, D=D, R=R, step_size=step_size, soft=soft)
+ trajectories.append(xy)
+ return moveaxis(array(trajectories), 0, 1)
+
+
+def get_gaussian_sample(T=50, D=2):
+ from numpy.random import randn
+
+ return randn(T, D)
+
+
+def get_gaussian_samples(N=10, T=50, D=2, R=5, step_size=0.5):
+ from numpy.random import randn
+
+ return randn(T, N, D)
diff --git a/code/sunlab/common/plotting/__init__.py b/code/sunlab/common/plotting/__init__.py
new file mode 100644
index 0000000..d6873aa
--- /dev/null
+++ b/code/sunlab/common/plotting/__init__.py
@@ -0,0 +1,2 @@
+from .colors import *
+from .base import *
diff --git a/code/sunlab/common/plotting/base.py b/code/sunlab/common/plotting/base.py
new file mode 100644
index 0000000..aaf4a94
--- /dev/null
+++ b/code/sunlab/common/plotting/base.py
@@ -0,0 +1,270 @@
+from matplotlib import pyplot as plt
+
+
+def blank_plot(_plt=None, _xticks=False, _yticks=False):
+ """# Remove Plot Labels"""
+ if _plt is None:
+ _plt = plt
+ _plt.xlabel("")
+ _plt.ylabel("")
+ _plt.title("")
+ tick_params = {
+ "which": "both",
+ "bottom": _xticks,
+ "left": _yticks,
+ "right": False,
+ "labelleft": False,
+ "labelbottom": False,
+ }
+ _plt.tick_params(**tick_params)
+ for child in plt.gcf().get_children():
+ if child._label == "<colorbar>":
+ child.set_yticks([])
+ axs = _plt.gcf().get_axes()
+ try:
+ axs = axs.flatten()
+ except:
+ ...
+ for ax in axs:
+ ax.set_xlabel("")
+ ax.set_ylabel("")
+ ax.set_title("")
+ ax.tick_params(**tick_params)
+
+
+def save_plot(name, _plt=None, _xticks=False, _yticks=False, tighten=True):
+ """# Save Plot in Multiple Formats"""
+ assert type(name) == str, "Name must be string"
+ from os.path import dirname
+ from os import makedirs
+
+ makedirs(dirname(name), exist_ok=True)
+ if _plt is None:
+ from matplotlib import pyplot as plt
+ _plt = plt
+ _plt.savefig(name + ".png", dpi=1000)
+ blank_plot(_plt, _xticks=_xticks, _yticks=_yticks)
+ if tighten:
+ from matplotlib import pyplot as plt
+ plt.tight_layout()
+ _plt.savefig(name + ".pdf")
+ _plt.savefig(name + ".svg")
+
+
+def scatter_2d(data_2d, labels=None, _plt=None, **matplotlib_kwargs):
+ """# Scatter 2d Data
+
+ - data_2d: 2d-dataset to plot
+ - labels: labels for each case
+ - _plt: Optional specific plot to transform"""
+ from .colors import Pcolor
+
+ if _plt is None:
+ _plt = plt
+
+ def _filter(data, labels=None, _filter_on=None):
+ if labels is None:
+ return data, False
+ else:
+ return data[labels == _filter_on], True
+
+ for _class in [2, 3, 1, 0]:
+ local_data, has_color = _filter(data_2d, labels, _class)
+ if has_color:
+ _plt.scatter(
+ local_data[:, 0],
+ local_data[:, 1],
+ color=Pcolor[_class],
+ **matplotlib_kwargs
+ )
+ else:
+ _plt.scatter(local_data[:, 0], local_data[:, 1], **matplotlib_kwargs)
+ break
+ return _plt
+
+
+def plot_contour(two_d_mask, color="black", color_map=None, raise_error=False):
+ """# Plot Contour of Mask"""
+ from matplotlib.pyplot import contour
+ from numpy import mgrid
+
+ z = two_d_mask
+ x, y = mgrid[: z.shape[1], : z.shape[0]]
+ if color_map is not None:
+ try:
+ color = color_map(color)
+ except Exception as e:
+ if raise_error:
+ raise e
+ try:
+ contour(x, y, z.T, colors=color, linewidth=0.5)
+ except Exception as e:
+ if raise_error:
+ raise e
+
+
+def gaussian_smooth_plot(
+ xy,
+ sigma=0.1,
+ extent=[-1, 1, -1, 1],
+ grid_n=100,
+ grid=None,
+ do_normalize=True,
+):
+ """# Plot Data with Gaussian Smoothening around point"""
+ from numpy import array, ndarray, linspace, meshgrid, zeros_like
+ from numpy import pi, sqrt, exp
+ from numpy.linalg import norm
+
+ if not type(xy) == ndarray:
+ xy = array(xy)
+ if grid is not None:
+ XY = grid
+ else:
+ X = linspace(extent[0], extent[1], grid_n)
+ Y = linspace(extent[2], extent[3], grid_n)
+ XY = array(meshgrid(X, Y)).T
+ smoothed = zeros_like(XY[:, :, 0])
+ factor = 1
+ if do_normalize:
+ factor = (sqrt(2 * pi) * sigma) ** 2.
+ if len(xy.shape) == 1:
+ smoothed = exp(-((norm(xy - XY, axis=-1) / (sqrt(2) * sigma)) ** 2.0)) / factor
+ else:
+ try:
+ from tqdm.notebook import tqdm
+ except Exception:
+
+ def tqdm(x):
+ return x
+
+ for i in tqdm(range(xy.shape[0])):
+ if xy.shape[-1] == 2:
+ smoothed += (
+ exp(-((norm((xy[i, :] - XY), axis=-1) / (sqrt(2) * sigma)) ** 2.0))
+ / factor
+ )
+ elif xy.shape[-1] == 3:
+ smoothed += (
+ exp(-((norm((xy[i, :2] - XY), axis=-1) / (sqrt(2) * sigma)) ** 2.0))
+ / factor
+ * xy[i, 2]
+ )
+ return smoothed, XY
+
+
+def plot_grid_data(xy_grid, data_grid, cbar=False, _plt=None, _cmap="RdBu", grid_min=1):
+ """# Plot Gridded Data"""
+ from numpy import nanmin, nanmax
+ from matplotlib.colors import TwoSlopeNorm
+
+ if _plt is None:
+ _plt = plt
+ norm = TwoSlopeNorm(
+ vmin=nanmin([-grid_min, nanmin(data_grid)]),
+ vcenter=0,
+ vmax=nanmax([grid_min, nanmax(data_grid)]),
+ )
+ _plt.pcolor(xy_grid[:, :, 0], xy_grid[:, :, 1], data_grid, cmap="RdBu", norm=norm)
+ if cbar:
+ _plt.colorbar()
+
+
+def plot_winding(xy_grid, winding_grid, cbar=False, _plt=None):
+ plot_grid_data(xy_grid, winding_grid, cbar=cbar, _plt=_plt, grid_min=1e-5)
+
+
+def plot_vorticity(xy_grid, vorticity_grid, cbar=False, save=False, _plt=None):
+ plot_grid_data(xy_grid, vorticity_grid, cbar=cbar, _plt=_plt, grid_min=1e-1)
+
+
+plt.blank = lambda: blank_plot(plt)
+plt.scatter2d = lambda data, labels=None, **matplotlib_kwargs: scatter_2d(
+ data, labels, plt, **matplotlib_kwargs
+)
+plt.save = save_plot
+
+
+def interpolate_points(df, columns=["Latent-0", "Latent-1"], kind="quadratic", N=500):
+ """# Interpolate points"""
+ from scipy.interpolate import interp1d
+ import numpy as np
+
+ points = df[columns].to_numpy()
+ distance = np.cumsum(np.sqrt(np.sum(np.diff(points, axis=0) ** 2, axis=1)))
+ distance = np.insert(distance, 0, 0) / distance[-1]
+ interpolator = interp1d(distance, points, kind=kind, axis=0)
+ alpha = np.linspace(0, 1, N)
+ interpolated_points = interpolator(alpha)
+ return interpolated_points.reshape(-1, 1, 2)
+
+
+def plot_trajectory(
+ df,
+ Fm=24,
+ FM=96,
+ interpolate=False,
+ interpolation_kind="quadratic",
+ interpolation_N=500,
+ columns=["Latent-0", "Latent-1"],
+ frame_column="Frames",
+ alpha=0.8,
+ lw=4,
+ _plt=None,
+ _z=None,
+):
+ """# Plot Trajectories
+
+ Interpolation possible"""
+ import numpy as np
+ from matplotlib.collections import LineCollection
+ import matplotlib as mpl
+
+ if _plt is None:
+ _plt = plt
+ if type(_plt) == mpl.axes._axes.Axes:
+ _ca = _plt
+ else:
+ try:
+ _ca = _plt.gca()
+ except:
+ _ca = _plt
+ X = df[columns[0]]
+ Y = df[columns[1]]
+ fm, fM = np.min(df[frame_column]), np.max(df[frame_column])
+
+ if interpolate:
+ if interpolation_kind == "cubic":
+ if len(df) <= 3:
+ return
+ elif interpolation_kind == "quadratic":
+ if len(df) <= 2:
+ return
+ else:
+ if len(df) <= 1:
+ return
+ points = interpolate_points(
+ df, kind=interpolation_kind, columns=columns, N=interpolation_N
+ )
+ else:
+ points = np.array([X, Y]).T.reshape(-1, 1, 2)
+
+ segments = np.concatenate([points[:-1], points[1:]], axis=1)
+ lc = LineCollection(
+ segments,
+ cmap=plt.get_cmap("plasma"),
+ norm=mpl.colors.Normalize(Fm, FM),
+ )
+ if _z is not None:
+ from mpl_toolkits.mplot3d.art3d import line_collection_2d_to_3d
+
+ if interpolate:
+ _z = _z # TODO: Interpolate
+ line_collection_2d_to_3d(lc, _z)
+ lc.set_array(np.linspace(fm, fM, points.shape[0]))
+ lc.set_linewidth(lw)
+ lc.set_alpha(alpha)
+ _ca.add_collection(lc)
+ _ca.autoscale()
+ _ca.margins(0.04)
+ return lc
diff --git a/code/sunlab/common/plotting/colors.py b/code/sunlab/common/plotting/colors.py
new file mode 100644
index 0000000..c4fc727
--- /dev/null
+++ b/code/sunlab/common/plotting/colors.py
@@ -0,0 +1,38 @@
+class PhenotypeColors:
+ """# Phenotype Colorings
+
+ Standardization for the different phenotype colors"""
+
+ def __init__(self):
+ """# Empty Construtor"""
+ pass
+
+ def get_basic_colors(self, transition=False):
+ """# Return the Color Names
+
+ - transition: Returns the color for the transition class too"""
+ if transition:
+ return ["yellow", "purple", "green", "blue", "cyan"]
+ return ["yellow", "purple", "green", "blue"]
+
+ def get_colors(self, transition=False):
+ """# Return the Color Names
+
+ - transition: Returns the color for the transition class too"""
+ if transition:
+ return ["#ffff00", "#ff3cfa", "#11f309", "#213ff0", "cyan"]
+ return ["#ffff00", "#ff3cfa", "#11fe09", "#213ff0"]
+
+ def get_colormap(self, transition=False):
+ """# Return the Matplotlib Colormap
+
+ - transition: Returns the color for the transition class too"""
+ from matplotlib.colors import ListedColormap as LC
+
+ return LC(self.get_colors(transition))
+
+
+# Basic Exports
+Pcolor = PhenotypeColors().get_colors()
+Pmap = PhenotypeColors().get_colormap()
+Pmapx = PhenotypeColors().get_colormap(True)
diff --git a/code/sunlab/common/scaler/__init__.py b/code/sunlab/common/scaler/__init__.py
new file mode 100644
index 0000000..2a2281a
--- /dev/null
+++ b/code/sunlab/common/scaler/__init__.py
@@ -0,0 +1,2 @@
+from .max_abs_scaler import *
+from .quantile_scaler import *
diff --git a/code/sunlab/common/scaler/adversarial_scaler.py b/code/sunlab/common/scaler/adversarial_scaler.py
new file mode 100644
index 0000000..7f61725
--- /dev/null
+++ b/code/sunlab/common/scaler/adversarial_scaler.py
@@ -0,0 +1,44 @@
+class AdversarialScaler:
+ """# Scaler Class to use in Adversarial Autoencoder
+
+ For any scaler to be implemented, make sure to ensure each of the methods
+ are implemented:
+ - __init__ (optional)
+ - init
+ - load
+ - save
+ - __call__"""
+
+ def __init__(self, base_directory):
+ """# Scaler initialization
+
+ - Initialize the base directory of the model where it will live"""
+ self.base_directory = base_directory
+
+ def init(self, data):
+ """# Scaler initialization
+
+ Initialize the scaler transformation with the data
+ Should always return self in subclasses"""
+ raise NotImplementedError("Scaler initialization has not been implemented yet")
+
+ def load(self):
+ """# Scaler loading
+
+ Load the data scaler model from a file
+ Should always return self in subclasses"""
+ raise NotImplementedError("Scaler loading has not been implemented yet")
+
+ def save(self):
+ """# Scaler saving
+
+ Save the data scaler model"""
+ raise NotImplementedError("Scaler saving has not been implemented yet")
+
+ def transform(self, *args, **kwargs):
+ """# Scale the given data"""
+ return self.__call__(*args, **kwargs)
+
+ def __call__(self, *args, **kwargs):
+ """# Scale the given data"""
+ raise NotImplementedError("Scaler has not been implemented yet")
diff --git a/code/sunlab/common/scaler/max_abs_scaler.py b/code/sunlab/common/scaler/max_abs_scaler.py
new file mode 100644
index 0000000..56ea589
--- /dev/null
+++ b/code/sunlab/common/scaler/max_abs_scaler.py
@@ -0,0 +1,48 @@
+from .adversarial_scaler import AdversarialScaler
+
+
+class MaxAbsScaler(AdversarialScaler):
+ """# MaxAbsScaler
+
+ Scale the data based on the maximum-absolute value in each column"""
+
+ def __init__(self, base_directory):
+ """# MaxAbsScaler initialization
+
+ - Initialize the base directory of the model where it will live
+ - Initialize the scaler model"""
+ super().__init__(base_directory)
+ from sklearn.preprocessing import MaxAbsScaler as MAS
+
+ self.scaler_base = MAS()
+ self.scaler = None
+
+ def init(self, data):
+ """# Scaler initialization
+
+ Initialize the scaler transformation with the data"""
+ self.scaler = self.scaler_base.fit(data)
+ return self
+
+ def load(self):
+ """# Scaler loading
+
+ Load the data scaler model from a file"""
+ from pickle import load
+
+ with open(f"{self.base_directory}/portable/maxabs_scaler.pkl", "rb") as fhandle:
+ self.scaler = load(fhandle)
+ return self
+
+ def save(self):
+ """# Scaler saving
+
+ Save the data scaler model"""
+ from pickle import dump
+
+ with open(f"{self.base_directory}/portable/maxabs_scaler.pkl", "wb") as fhandle:
+ dump(self.scaler, fhandle)
+
+ def __call__(self, *args, **kwargs):
+ """# Scale the given data"""
+ return self.scaler.transform(*args, **kwargs)
diff --git a/code/sunlab/common/scaler/quantile_scaler.py b/code/sunlab/common/scaler/quantile_scaler.py
new file mode 100644
index 0000000..a0f53fd
--- /dev/null
+++ b/code/sunlab/common/scaler/quantile_scaler.py
@@ -0,0 +1,52 @@
+from .adversarial_scaler import AdversarialScaler
+
+
+class QuantileScaler(AdversarialScaler):
+ """# QuantileScaler
+
+ Scale the data based on the quantile distributions of each column"""
+
+ def __init__(self, base_directory):
+ """# QuantileScaler initialization
+
+ - Initialize the base directory of the model where it will live
+ - Initialize the scaler model"""
+ super().__init__(base_directory)
+ from sklearn.preprocessing import QuantileTransformer as QS
+
+ self.scaler_base = QS()
+ self.scaler = None
+
+ def init(self, data):
+ """# Scaler initialization
+
+ Initialize the scaler transformation with the data"""
+ self.scaler = self.scaler_base.fit(data)
+ return self
+
+ def load(self):
+ """# Scaler loading
+
+ Load the data scaler model from a file"""
+ from pickle import load
+
+ with open(
+ f"{self.base_directory}/portable/quantile_scaler.pkl", "rb"
+ ) as fhandle:
+ self.scaler = load(fhandle)
+ return self
+
+ def save(self):
+ """# Scaler saving
+
+ Save the data scaler model"""
+ from pickle import dump
+
+ with open(
+ f"{self.base_directory}/portable/quantile_scaler.pkl", "wb"
+ ) as fhandle:
+ dump(self.scaler, fhandle)
+
+ def __call__(self, *args, **kwargs):
+ """# Scale the given data"""
+ return self.scaler.transform(*args, **kwargs)
diff --git a/code/sunlab/environment/base/__init__.py b/code/sunlab/environment/base/__init__.py
new file mode 100644
index 0000000..5fc27c1
--- /dev/null
+++ b/code/sunlab/environment/base/__init__.py
@@ -0,0 +1,8 @@
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from copy import deepcopy as dc
+import glob
+from tqdm.notebook import tqdm
+from sunlab.common.mathlib.base import *
+from .fortran import *
diff --git a/code/sunlab/environment/base/cpu.py b/code/sunlab/environment/base/cpu.py
new file mode 100644
index 0000000..d969bd1
--- /dev/null
+++ b/code/sunlab/environment/base/cpu.py
@@ -0,0 +1,4 @@
+import os
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+from . import *
diff --git a/code/sunlab/environment/base/cuda.py b/code/sunlab/environment/base/cuda.py
new file mode 100644
index 0000000..a5c813e
--- /dev/null
+++ b/code/sunlab/environment/base/cuda.py
@@ -0,0 +1,4 @@
+import os
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+from . import *
diff --git a/code/sunlab/environment/base/extras.py b/code/sunlab/environment/base/extras.py
new file mode 100644
index 0000000..b6ddc88
--- /dev/null
+++ b/code/sunlab/environment/base/extras.py
@@ -0,0 +1,7 @@
+from scipy.spatial import KDTree
+from scipy.spatial import ConvexHull
+from scipy.stats import linregress
+from sklearn.manifold import TSNE
+from sklearn.decomposition import PCA, KernelPCA
+from sklearn.cluster import KMeans
+from scipy.optimize import curve_fit
diff --git a/code/sunlab/environment/base/fortran.py b/code/sunlab/environment/base/fortran.py
new file mode 100644
index 0000000..973d09a
--- /dev/null
+++ b/code/sunlab/environment/base/fortran.py
@@ -0,0 +1,8 @@
+try:
+ from ...fortran_src.aae_flib_mamba import *
+except Exception:
+ ...
+try:
+ from ...fortran_src.aae_flib_tfnb import *
+except Exception:
+ ...
diff --git a/code/sunlab/fortran_src/__init__.py b/code/sunlab/fortran_src/__init__.py
new file mode 100644
index 0000000..81bb83a
--- /dev/null
+++ b/code/sunlab/fortran_src/__init__.py
@@ -0,0 +1 @@
+# f2py -c -m aae_flib_tfnb fortran_library.f90 --f90flags='-g -fimplicit-none -O3'
diff --git a/code/sunlab/fortran_src/fortran_library.f90 b/code/sunlab/fortran_src/fortran_library.f90
new file mode 100644
index 0000000..d477995
--- /dev/null
+++ b/code/sunlab/fortran_src/fortran_library.f90
@@ -0,0 +1,96 @@
+! fortran_library
+
+! Mean-Squared Displacement for a time lag `t`
+function msd(a, t) result(b)
+ implicit none
+
+ real(kind=8), intent(in) :: a(:,:)
+ integer(kind=8), intent(in) :: t
+ integer(kind=8) :: i, j
+ real(kind=8) :: s(size(a,1)-t, size(a,2))
+ real(kind=8) :: b(size(a,1)-t)
+
+ s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:)
+ do i = 1, size(a, 1)-t
+ b(i) = 0.0
+ do j = 1, size(a, 2)
+ b(i) = b(i) + s(i, j) ** 2
+ end do
+ b(i) = sqrt(b(i))
+ end do
+end function msd
+
+! Mean-Squared Displacement for a time lag `t`
+function mmsd(a, t) result(z)
+ implicit none
+
+ real(kind=8), intent(in) :: a(:,:)
+ integer(kind=8), intent(in) :: t
+ integer(kind=8) :: i, j
+ real(kind=8) :: s(size(a,1)-t, size(a,2))
+ real(kind=8) :: b(size(a,1)-t)
+ real(kind=8) :: z
+
+ s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:)
+ do i = 1, size(a, 1)-t
+ b(i) = 0.0
+ do j = 1, size(a, 2)
+ b(i) = b(i) + s(i, j) ** 2
+ end do
+ b(i) = sqrt(b(i))
+ end do
+ z = sum(b(:)) / (size(b, 1))
+end function mmsd
+
+! Mean-Squared Displacement for time lags up to `T`
+function msds(a, Ts) result(b)
+ implicit none
+
+ real(kind=8), intent(in) :: a(:,:)
+ integer(kind=8), intent(in) :: Ts
+ integer(kind=8) :: i, j
+ integer(kind=8) :: t
+ real(kind=8) :: s(size(a,1), size(a,2))
+ real(kind=8) :: b(size(a,1),Ts+1)
+
+ do t = 0, Ts
+ s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:)
+ do i = 1, size(a, 1)
+ b(i,t+1) = 0.0
+ if (i <= size(a, 1) - t) then
+ do j = 1, size(a, 2)
+ b(i,t+1) = b(i,t+1) + s(i, j) ** 2
+ end do
+ b(i,t+1) = sqrt(b(i,t+1))
+ end if
+ end do
+ end do
+end function msds
+
+! Mean-Squared Displacement for time lags up to `T`
+! Return the mean for each time lag
+function mmsds(a, Ts) result(z)
+ implicit none
+
+ real(kind=8), intent(in) :: a(:,:)
+ integer(kind=8), intent(in) :: Ts
+ integer(kind=8) :: i, j
+ integer(kind=8) :: t
+ real(kind=8) :: s(size(a,1), size(a,2))
+ real(kind=8) :: b(size(a,1),Ts+1)
+ real(kind=8) :: z(Ts+1)
+
+ do t = 0, Ts
+ s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:)
+ do i = 1, size(a, 1)
+ b(i,t+1) = 0.0
+ if (i <= size(a, 1) - t) then
+ do j = 1, size(a, 2)
+ b(i,t+1) = b(i,t+1) + s(i, j) ** 2
+ end do
+ b(i,t+1) = sqrt(b(i,t+1))
+ end if
+ end do
+ z(t+1) = sum(b(1:(size(a, 1) - t),t+1)) / (size(a, 1) - t)
+ end do
+end function mmsds
diff --git a/code/sunlab/globals.py b/code/sunlab/globals.py
new file mode 100644
index 0000000..eb5800b
--- /dev/null
+++ b/code/sunlab/globals.py
@@ -0,0 +1,265 @@
+DIR_ROOT = "../"
+FILES = {
+ "TRAINING_DATASET": DIR_ROOT + "data/spheroid26_011523_filtered.csv",
+ "TRAINING_DATASET_WIDE_BERTH": DIR_ROOT + "data/spheroid26_011523_exc.csv",
+ "PRETRAINED_MODEL_DIR": DIR_ROOT + "models/current_model/",
+ "PEN_TRACKED": {
+ "AUTO": DIR_ROOT + "data/PEN_automatically_tracked.csv",
+ "MANUAL": DIR_ROOT + "data/PEN_manually_tracked.csv",
+ },
+ "RHO": {
+ "3": DIR_ROOT + "data/Rho_act_Cell3.csv",
+ "4": DIR_ROOT + "data/Rho_act_Cell4.csv",
+ "6": DIR_ROOT + "data/Rho_act_Cell6.csv",
+ "7": DIR_ROOT + "data/Rho_act_Cell7.csv",
+ "8": DIR_ROOT + "data/Rho_act_Cell8.csv",
+ "9": DIR_ROOT + "data/Rho_act_Cell9.csv",
+ "10": DIR_ROOT + "data/Rho_act_Cell10.csv",
+ "11": DIR_ROOT + "data/Rho_act_Cell11.csv",
+ },
+ "CN03": {
+ "3": DIR_ROOT + "data/Rho_act_Cell3.csv",
+ "4": DIR_ROOT + "data/Rho_act_Cell4.csv",
+ "6": DIR_ROOT + "data/Rho_act_Cell6.csv",
+ "7": DIR_ROOT + "data/Rho_act_Cell7.csv",
+ "8": DIR_ROOT + "data/Rho_act_Cell8.csv",
+ "9": DIR_ROOT + "data/Rho_act_Cell9.csv",
+ "10": DIR_ROOT + "data/Rho_act_Cell10.csv",
+ "11": DIR_ROOT + "data/Rho_act_Cell11.csv",
+ },
+ "Y27632": {
+ "1": DIR_ROOT + "data/Y27632_Cell1.csv",
+ "2": DIR_ROOT + "data/Y27632_Cell2.csv",
+ "3": DIR_ROOT + "data/Y27632_Cell3.csv",
+ "4": DIR_ROOT + "data/Y27632_Cell4.csv",
+ "5": DIR_ROOT + "data/Y27632_Cell5.csv",
+ },
+ "HISTOLOGIES": {
+ "J1": DIR_ROOT + "../HistologyProject/J1/svm/svm.csv",
+ "image001": DIR_ROOT + "../HistologyProject/image001/svm/svm.csv",
+ "H4": DIR_ROOT + "../HistologyProject/H4/svm/svm.csv",
+ "H9": DIR_ROOT + "../HistologyProject/H9/svm/svm.csv",
+ },
+ "SPHEROID": {
+ "1p5mgml": DIR_ROOT + "data/spheroid26_011523_filtered.csv",
+ "3mgml": DIR_ROOT + "data/spheroid20_011923_filtered.csv",
+ "4mgml": DIR_ROOT + "data/spheroid22_012323_filtered.csv",
+ "6mgml": DIR_ROOT + "data/spheroid26_012423_filtered.csv",
+ },
+ "SPHEROID_RAW": {
+ "REG": {
+ "1p5mgml": [
+ DIR_ROOT + "data/" + "spheroid15_030322_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid16_030322_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid18_041022_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid26_011523_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid27_090323_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid28_090523_RegularCollagen_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid15_031022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid16_031522_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid18_083022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid19_083022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid20_011923_RegularCollagen_3mgml.csv",
+ ],
+ "4mgml": [
+ DIR_ROOT + "data/" + "spheroid17_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid18_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid19_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid20_083022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid21_083022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid22_012323_RegularCollagen_4mgml.csv",
+ ],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid22_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid23_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid24_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid25_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid26_012423_RegularCollagen_6mgml.csv",
+ ],
+ },
+ "PC": {
+ "1p5mgml": [
+ DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid5_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid9_012723_PhotoCol_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid5_022022_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_022022_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_030322_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_030322_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv",
+ ],
+ "4mgml": [
+ DIR_ROOT + "data/" + "spheroid2_022022_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid5_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_091222_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_091822_PhotoCol_4mgml.csv",
+ ],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_091222_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_022323_PhotoCol_6mgml.csv",
+ ],
+ },
+ },
+ "SPHEROID_RAW_RIC": {
+ "NAME": "(R)esolution (I)mage type (C)oncentration",
+ "QUARTER_HOUR": {
+ "REG": {
+ "1p5mgml": [
+ DIR_ROOT
+ + "data/"
+ + "spheroid28_090523_RegularCollagen_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid20_011923_RegularCollagen_3mgml.csv",
+ ],
+ "4mgml": [
+ DIR_ROOT + "data/" + "spheroid22_012323_RegularCollagen_4mgml.csv",
+ ],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid26_012423_RegularCollagen_6mgml.csv",
+ ],
+ },
+ "PC": {
+ "1p5mgml": [
+ DIR_ROOT + "data/" + "spheroid9_012723_PhotoCol_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv",
+ ],
+ "4mgml": [],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid8_022323_PhotoCol_6mgml.csv",
+ ],
+ },
+ },
+ "DAILY": {
+ "REG": {
+ "1p5mgml": [
+ DIR_ROOT
+ + "data/"
+ + "spheroid15_030322_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT
+ + "data/"
+ + "spheroid16_030322_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT
+ + "data/"
+ + "spheroid17_041022_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT
+ + "data/"
+ + "spheroid18_041022_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT
+ + "data/"
+ + "spheroid26_011523_RegularCollagen_1p5mgml.csv",
+ DIR_ROOT
+ + "data/"
+ + "spheroid27_090323_RegularCollagen_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid15_031022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid16_031522_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid18_083022_RegularCollagen_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid19_083022_RegularCollagen_3mgml.csv",
+ ],
+ "4mgml": [
+ DIR_ROOT + "data/" + "spheroid17_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid18_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid19_031022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid20_083022_RegularCollagen_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid21_083022_RegularCollagen_4mgml.csv",
+ ],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid22_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid23_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid24_031022_RegularCollagen_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid25_031022_RegularCollagen_6mgml.csv",
+ ],
+ },
+ "PC": {
+ "1p5mgml": [
+ DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid5_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_021922_PhotoCol_1p5mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_021922_PhotoCol_1p5mgml.csv",
+ ],
+ "3mgml": [
+ DIR_ROOT + "data/" + "spheroid5_022022_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_022022_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_030322_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid8_030322_PhotoCol_3mgml.csv",
+ DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv",
+ ],
+ "4mgml": [
+ DIR_ROOT + "data/" + "spheroid2_022022_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid5_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_053122_PhotoCol_4mgml.csv",
+ DIR_ROOT + "data/" + "spheroid7_091222_PhotoCol_4mgml.csv",
+ ],
+ "6mgml": [
+ DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_6mgml.csv",
+ DIR_ROOT + "data/" + "spheroid6_091222_PhotoCol_6mgml.csv",
+ ],
+ },
+ },
+ },
+ "SPHEROID_PC": {
+ "1p5mgml": DIR_ROOT + "data/spheroid9_012723_pc_1p5.csv",
+ "3mgml": DIR_ROOT + "data/spheroid12_060923_pc_3.csv",
+ },
+ "SPHEROID_MASKS": {
+ "1p5mgml": DIR_ROOT + "data/spheroid_1p5mgml_spheroid26_011523/images/",
+ "3mgml": DIR_ROOT + "data/spheroid_3mgml_spheroid20_011923/images/",
+ "4mgml": DIR_ROOT + "data/spheroid_4mgml_spheroid22_012323/images/",
+ "6mgml": DIR_ROOT + "data/spheroid_6mgml_spheroid26_012423/images/",
+ "3mgml_pc": DIR_ROOT + "data/spheroid_photocol_3mgml_spheroid12_060923/images/",
+ },
+ "FIGURES": {
+ "2": {
+ "PHENOTYPES_SMOOTHED": DIR_ROOT
+ + "extra_data/PhenotypeGaussianSmoothed.npy",
+ },
+ "3": {
+ "SAMPLED_DATASET": DIR_ROOT + "extra_data/Figure3.SampledDataset.npy",
+ "PAIRWISE_DISTANCES": DIR_ROOT + "extra_data/Figure3.PairwiseDistances.npy",
+ "PAIRWISE_DOT_PRODUCTS": DIR_ROOT
+ + "extra_data/Figure3.PairwiseDotProducts.npy",
+ "TRAJECTORIES": DIR_ROOT + "extra_data/Figure3.Trajectories.npy",
+ },
+ },
+ "PHENOTYPE_GRID": {
+ "IN": DIR_ROOT + "extra_data/PhenotypeGrid.npy",
+ "OUT": DIR_ROOT + "extra_data/PhenotypeGrid_out.npy",
+ },
+ "PHENOTYPE_RGB": DIR_ROOT + "extra_data/PhenotypeColors.npy",
+ "SVM": {
+ "MODEL": DIR_ROOT + "other/svm/SVC_rbf_010820_16942_new.pkl",
+ "SCALER": DIR_ROOT + "other/svm/SVC_rbf_scaler_010820_16942_new.pkl",
+ },
+ "NONPHYSICAL_MASK": DIR_ROOT + "extra_data/NonPhysicalMask.npy",
+}
diff --git a/code/sunlab/sunflow/__init__.py b/code/sunlab/sunflow/__init__.py
new file mode 100644
index 0000000..6e0c959
--- /dev/null
+++ b/code/sunlab/sunflow/__init__.py
@@ -0,0 +1,6 @@
+from ..common import *
+
+from .models import *
+
+from .data import *
+from .plotting import *
diff --git a/code/sunlab/sunflow/data/__init__.py b/code/sunlab/sunflow/data/__init__.py
new file mode 100644
index 0000000..b9a32c0
--- /dev/null
+++ b/code/sunlab/sunflow/data/__init__.py
@@ -0,0 +1 @@
+from .utilities import *
diff --git a/code/sunlab/sunflow/data/utilities.py b/code/sunlab/sunflow/data/utilities.py
new file mode 100644
index 0000000..dcdc36e
--- /dev/null
+++ b/code/sunlab/sunflow/data/utilities.py
@@ -0,0 +1,53 @@
+from sunlab.common import ShapeDataset
+from sunlab.common import MaxAbsScaler
+
+
+def process_and_load_dataset(
+ dataset_file, model_folder, magnification=10, scaler=MaxAbsScaler
+):
+ """# Load a dataset and process a models' Latent Space on the Dataset"""
+ from ..models import load_aae
+ from sunlab.common import import_full_dataset
+
+ model = load_aae(model_folder, normalization_scaler=scaler)
+ dataset = import_full_dataset(
+ dataset_file, magnification=magnification, scaler=model.scaler
+ )
+ latent = model.encoder(dataset.dataset).numpy()
+ assert len(latent.shape) == 2, "Only 1D Latent Vectors Supported"
+ for dim in range(latent.shape[1]):
+ dataset.dataframe[f"Latent-{dim}"] = latent[:, dim]
+ return dataset
+
+
+def process_and_load_datasets(
+ dataset_file_list, model_folder, magnification=10, scaler=MaxAbsScaler
+):
+ from pandas import concat
+ from ..models import load_aae
+
+ dataframes = []
+ datasets = []
+ for dataset_file in dataset_file_list:
+ dataset = process_and_load_dataset(
+ dataset_file, model_folder, magnification, scaler
+ )
+ model = load_aae(model_folder, normalization_scaler=scaler)
+ dataframe = dataset.dataframe
+ for label in ["ActinEdge", "Filopodia", "Bleb", "Lamellipodia"]:
+ if label in dataframe.columns:
+ dataframe[label.lower()] = dataframe[label]
+ if label.lower() not in dataframe.columns:
+ dataframe[label.lower()] = 0
+ latent_columns = [f"Latent-{dim}" for dim in range(model.latent_size)]
+ datasets.append(dataset)
+ dataframes.append(
+ dataframe[
+ dataset.data_columns
+ + dataset.label_columns
+ + latent_columns
+ + ["Frames", "CellNum"]
+ + ["actinedge", "filopodia", "bleb", "lamellipodia"]
+ ]
+ )
+ return datasets, concat(dataframes)
diff --git a/code/sunlab/sunflow/models/__init__.py b/code/sunlab/sunflow/models/__init__.py
new file mode 100644
index 0000000..f111c0a
--- /dev/null
+++ b/code/sunlab/sunflow/models/__init__.py
@@ -0,0 +1,7 @@
+from .autoencoder import Autoencoder
+from .adversarial_autoencoder import AdversarialAutoencoder
+from sunlab.common.data.dataset import Dataset
+from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution
+from sunlab.common.scaler.adversarial_scaler import AdversarialScaler
+from .utilities import create_aae, create_aae_and_dataset
+from .utilities import load_aae, load_aae_and_dataset
diff --git a/code/sunlab/sunflow/models/adversarial_autoencoder.py b/code/sunlab/sunflow/models/adversarial_autoencoder.py
new file mode 100644
index 0000000..4cbb2f8
--- /dev/null
+++ b/code/sunlab/sunflow/models/adversarial_autoencoder.py
@@ -0,0 +1,344 @@
+from sunlab.common.data.dataset import Dataset
+from sunlab.common.scaler.adversarial_scaler import AdversarialScaler
+from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution
+from .encoder import Encoder
+from .decoder import Decoder
+from .discriminator import Discriminator
+from .encoder_discriminator import EncoderDiscriminator
+from .autoencoder import Autoencoder
+from tensorflow.keras import optimizers, metrics, losses
+import tensorflow as tf
+from numpy import ones, zeros, float32, NaN
+
+
+class AdversarialAutoencoder:
+ """# Adversarial Autoencoder
+ - distribution: The distribution used by the adversary to learn on"""
+
+ def __init__(
+ self,
+ model_base_directory,
+ distribution: AdversarialDistribution or None = None,
+ scaler: AdversarialScaler or None = None,
+ ):
+ """# Adversarial Autoencoder Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded
+ - distribution: The distribution the adversary will use
+ - scaler: The scaling function the model will assume on the data"""
+ self.model_base_directory = model_base_directory
+ if distribution is not None:
+ self.distribution = distribution
+ else:
+ self.distribution = None
+ if scaler is not None:
+ self.scaler = scaler(self.model_base_directory)
+ else:
+ self.scaler = None
+
+ def init(
+ self,
+ data=None,
+ data_size=13,
+ autoencoder_layer_size=16,
+ adversary_layer_size=8,
+ latent_size=2,
+ autoencoder_depth=2,
+ dropout=0.0,
+ use_leaky_relu=False,
+ **kwargs,
+ ):
+ """# Initialize AAE model parameters
+ - data_size: int
+ - autoencoder_layer_size: int
+ - adversary_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float
+ - use_leaky_relu: boolean"""
+ self.data_size = data_size
+ self.autoencoder_layer_size = autoencoder_layer_size
+ self.adversary_layer_size = adversary_layer_size
+ self.latent_size = latent_size
+ self.autoencoder_depth = autoencoder_depth
+ self.dropout = dropout
+ self.use_leaky_relu = use_leaky_relu
+ self.save_parameters()
+ self.encoder = Encoder(self.model_base_directory).init()
+ self.decoder = Decoder(self.model_base_directory).init()
+ self.autoencoder = Autoencoder(self.model_base_directory).init(
+ self.encoder, self.decoder
+ )
+ self.discriminator = Discriminator(self.model_base_directory).init()
+ self.encoder_discriminator = EncoderDiscriminator(
+ self.model_base_directory
+ ).init(self.encoder, self.discriminator)
+ if self.distribution is not None:
+ self.distribution = self.distribution(self.latent_size)
+ if (data is not None) and (self.scaler is not None):
+ self.scaler = self.scaler.init(data)
+ self.init_optimizers_and_metrics(**kwargs)
+ return self
+
+ def init_optimizers_and_metrics(
+ self,
+ optimizer=optimizers.Adam,
+ ae_metric=metrics.MeanAbsoluteError,
+ adv_metric=metrics.BinaryCrossentropy,
+ ae_lr=7e-4,
+ adv_lr=3e-4,
+ loss_fn=losses.BinaryCrossentropy,
+ **kwargs,
+ ):
+ """# Set the optimizer, loss function, and metrics"""
+ self.ae_optimizer = optimizer(learning_rate=ae_lr)
+ self.adv_optimizer = optimizer(learning_rate=adv_lr)
+ self.gan_optimizer = optimizer(learning_rate=adv_lr)
+ self.train_ae_metric = ae_metric()
+ self.val_ae_metric = ae_metric()
+ self.train_adv_metric = adv_metric()
+ self.val_adv_metric = adv_metric()
+ self.train_gan_metric = adv_metric()
+ self.val_gan_metric = adv_metric()
+ self.loss_fn = loss_fn()
+
+ def load(self):
+ """# Load the models from their respective files"""
+ self.load_parameters()
+ self.encoder = Encoder(self.model_base_directory).load()
+ self.decoder = Decoder(self.model_base_directory).load()
+ self.autoencoder = Autoencoder(self.model_base_directory).load()
+ self.discriminator = Discriminator(self.model_base_directory).load()
+ self.encoder_discriminator = EncoderDiscriminator(
+ self.model_base_directory
+ ).load()
+ if self.scaler is not None:
+ self.scaler = self.scaler.load()
+ return self
+
+ def save(self, overwrite=False):
+ """# Save each model in the AAE"""
+ self.encoder.save(overwrite=overwrite)
+ self.decoder.save(overwrite=overwrite)
+ self.autoencoder.save(overwrite=overwrite)
+ self.discriminator.save(overwrite=overwrite)
+ self.encoder_discriminator.save(overwrite=overwrite)
+ if self.scaler is not None:
+ self.scaler.save()
+
+ def save_parameters(self):
+ """# Save the AAE parameters in a file"""
+ from pickle import dump
+ from os import makedirs
+
+ makedirs(self.model_base_directory + "/portable/", exist_ok=True)
+ parameters = {
+ "data_size": self.data_size,
+ "autoencoder_layer_size": self.autoencoder_layer_size,
+ "adversary_layer_size": self.adversary_layer_size,
+ "latent_size": self.latent_size,
+ "autoencoder_depth": self.autoencoder_depth,
+ "dropout": self.dropout,
+ "use_leaky_relu": self.use_leaky_relu,
+ }
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "wb"
+ ) as phandle:
+ dump(parameters, phandle)
+
+ def load_parameters(self):
+ """# Load the AAE parameters from a file"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.autoencoder_layer_size = parameters["autoencoder_layer_size"]
+ self.adversary_layer_size = parameters["adversary_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.autoencoder_depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+ return parameters
+
+ def summary(self):
+ """# Summarize each model in the AAE"""
+ self.encoder.summary()
+ self.decoder.summary()
+ self.autoencoder.summary()
+ self.discriminator.summary()
+ self.encoder_discriminator.summary()
+
+ @tf.function
+ def train_step(self, x, y):
+ """# Training Step
+
+ 1. Train the Autoencoder
+ 2. (If distribution is given) Train the discriminator
+ 3. (If the distribution is given) Train the encoder_discriminator"""
+ # Autoencoder Training
+ with tf.GradientTape() as tape:
+ decoded_vector = self.autoencoder(x, training=True)
+ ae_loss_value = self.loss_fn(y, decoded_vector)
+ grads = tape.gradient(ae_loss_value, self.autoencoder.model.trainable_weights)
+ self.ae_optimizer.apply_gradients(
+ zip(grads, self.autoencoder.model.trainable_weights)
+ )
+ self.train_ae_metric.update_state(y, decoded_vector)
+ if self.distribution is not None:
+ # Adversary Trainig
+ with tf.GradientTape() as tape:
+ latent_vector = self.encoder(x)
+ fakepred = self.distribution(x.shape[0])
+ discbatch_x = tf.concat([latent_vector, fakepred], axis=0)
+ discbatch_y = tf.concat([zeros(x.shape[0]), ones(x.shape[0])], axis=0)
+ adversary_vector = self.discriminator(discbatch_x, training=True)
+ adv_loss_value = self.loss_fn(discbatch_y, adversary_vector)
+ grads = tape.gradient(
+ adv_loss_value, self.discriminator.model.trainable_weights
+ )
+ self.adv_optimizer.apply_gradients(
+ zip(grads, self.discriminator.model.trainable_weights)
+ )
+ self.train_adv_metric.update_state(discbatch_y, adversary_vector)
+ # Gan Training
+ with tf.GradientTape() as tape:
+ gan_vector = self.encoder_discriminator(x, training=True)
+ adv_vector = tf.convert_to_tensor(ones((x.shape[0], 1), dtype=float32))
+ gan_loss_value = self.loss_fn(gan_vector, adv_vector)
+ grads = tape.gradient(gan_loss_value, self.encoder.model.trainable_weights)
+ self.gan_optimizer.apply_gradients(
+ zip(grads, self.encoder.model.trainable_weights)
+ )
+ self.train_gan_metric.update_state(adv_vector, gan_vector)
+ return (ae_loss_value, adv_loss_value, gan_loss_value)
+ return (ae_loss_value, None, None)
+
+ @tf.function
+ def test_step(self, x, y):
+ """# Test Step - On validation data
+
+ 1. Evaluate the Autoencoder
+ 2. (If distribution is given) Evaluate the discriminator
+ 3. (If the distribution is given) Evaluate the encoder_discriminator"""
+ val_decoded_vector = self.autoencoder(x, training=False)
+ self.val_ae_metric.update_state(y, val_decoded_vector)
+
+ if self.distribution is not None:
+ latent_vector = self.encoder(x)
+ fakepred = self.distribution(x.shape[0])
+ discbatch_x = tf.concat([latent_vector, fakepred], axis=0)
+ discbatch_y = tf.concat([zeros(x.shape[0]), ones(x.shape[0])], axis=0)
+ adversary_vector = self.discriminator(discbatch_x, training=False)
+ self.val_adv_metric.update_state(discbatch_y, adversary_vector)
+
+ gan_vector = self.encoder_discriminator(x, training=False)
+ self.val_gan_metric.update_state(ones(x.shape[0]), gan_vector)
+
+ # Garbage Collect at the end of each epoch
+ def on_epoch_end(self, _epoch, logs=None):
+ """# Cleanup environment to prevent memory leaks each epoch"""
+ import gc
+ from tensorflow.keras import backend as k
+
+ gc.collect()
+ k.clear_session()
+
+ def train(
+ self,
+ dataset: Dataset,
+ epoch_count: int = 1,
+ output=False,
+ output_freq=1,
+ fmt="%i[%.3f]: %.2e %.2e %.2e %.2e %.2e %.2e",
+ ):
+ """# Train the model on a dataset
+
+ - dataset: ataset = Dataset to train the model on, which as the
+ training and validation iterators set up
+ - epoch_count: int = The number of epochs to train
+ - output: boolean = Whether or not to output training information
+ - output_freq: int = The number of epochs between each output"""
+ from time import time
+ from numpy import array as narray
+
+ def fmtter(x):
+ return x if x is not None else -1
+
+ epoch_data = []
+ dataset.reset_iterators()
+
+ self.test_step(dataset.dataset, dataset.dataset)
+ val_ae = self.val_ae_metric.result()
+ val_adv = self.val_adv_metric.result()
+ val_gan = self.val_gan_metric.result()
+ self.val_ae_metric.reset_states()
+ self.val_adv_metric.reset_states()
+ self.val_gan_metric.reset_states()
+ print(
+ fmt
+ % (
+ 0,
+ NaN,
+ val_ae,
+ fmtter(val_adv),
+ fmtter(val_gan),
+ NaN,
+ NaN,
+ NaN,
+ )
+ )
+ for epoch in range(epoch_count):
+ start_time = time()
+
+ for step, (x_batch_train, y_batch_train) in enumerate(dataset.training):
+ ae_lv, adv_lv, gan_lv = self.train_step(x_batch_train, x_batch_train)
+
+ train_ae = self.train_ae_metric.result()
+ train_adv = self.train_adv_metric.result()
+ train_gan = self.train_gan_metric.result()
+ self.train_ae_metric.reset_states()
+ self.train_adv_metric.reset_states()
+ self.train_gan_metric.reset_states()
+
+ for step, (x_batch_val, y_batch_val) in enumerate(dataset.validation):
+ self.test_step(x_batch_val, x_batch_val)
+
+ val_ae = self.val_ae_metric.result()
+ val_adv = self.val_adv_metric.result()
+ val_gan = self.val_gan_metric.result()
+ self.val_ae_metric.reset_states()
+ self.val_adv_metric.reset_states()
+ self.val_gan_metric.reset_states()
+
+ epoch_data.append(
+ (
+ epoch,
+ train_ae,
+ val_ae,
+ fmtter(train_adv),
+ fmtter(val_adv),
+ fmtter(train_gan),
+ fmtter(val_gan),
+ )
+ )
+ if output and (epoch + 1) % output_freq == 0:
+ print(
+ fmt
+ % (
+ epoch + 1,
+ time() - start_time,
+ train_ae,
+ fmtter(train_adv),
+ fmtter(train_gan),
+ val_ae,
+ fmtter(val_adv),
+ fmtter(val_gan),
+ )
+ )
+ self.on_epoch_end(epoch)
+ dataset.reset_iterators()
+ return narray(epoch_data)
diff --git a/code/sunlab/sunflow/models/autoencoder.py b/code/sunlab/sunflow/models/autoencoder.py
new file mode 100644
index 0000000..473d00d
--- /dev/null
+++ b/code/sunlab/sunflow/models/autoencoder.py
@@ -0,0 +1,85 @@
+class Autoencoder:
+ """# Autoencoder Model
+
+ Constructs an encoder-decoder model"""
+
+ def __init__(self, model_base_directory):
+ """# Autoencoder Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded"""
+ self.model_base_directory = model_base_directory
+
+ def init(self, encoder, decoder):
+ """# Initialize an Autoencoder
+
+ - encoder: The encoder to use
+ - decoder: The decoder to use"""
+ from tensorflow import keras
+
+ self.load_parameters()
+ self.model = keras.models.Sequential()
+ self.model.add(encoder.model)
+ self.model.add(decoder.model)
+ self.model._name = "Autoencoder"
+ return self
+
+ def load(self):
+ """# Load an existing Autoencoder"""
+ from os import listdir
+
+ if "autoencoder.keras" not in listdir(f"{self.model_base_directory}/portable/"):
+ return None
+ import tensorflow as tf
+
+ self.model = tf.keras.models.load_model(
+ f"{self.model_base_directory}/portable/autoencoder.keras", compile=False
+ )
+ self.model._name = "Autoencoder"
+ return self
+
+ def save(self, overwrite=False):
+ """# Save the current Autoencoder
+
+ - Overwrite: overwrite any existing autoencoder that has been saved"""
+ from os import listdir
+
+ if overwrite:
+ self.model.save(f"{self.model_base_directory}/portable/autoencoder.keras")
+ return True
+ if "autoencoder.keras" in listdir(f"{self.model_base_directory}/portable/"):
+ return False
+ self.model.save(f"{self.model_base_directory}/portable/autoencoder.keras")
+ return True
+
+ def load_parameters(self):
+ """# Load Autoencoder Model Parameters from File
+ The file needs to have the following parameters defined:
+ - data_size: int
+ - autoencoder_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float (set to 0. if you don't want a dropout layer)
+ - use_leaky_relu: boolean"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.layer_size = parameters["autoencoder_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+
+ def summary(self):
+ """# Returns the summary of the Autoencoder model"""
+ return self.model.summary()
+
+ def __call__(self, *args, **kwargs):
+ """# Callable
+
+ When calling the autoencoder class, return the model's output"""
+ return self.model(*args, **kwargs)
diff --git a/code/sunlab/sunflow/models/decoder.py b/code/sunlab/sunflow/models/decoder.py
new file mode 100644
index 0000000..40ea190
--- /dev/null
+++ b/code/sunlab/sunflow/models/decoder.py
@@ -0,0 +1,127 @@
+class Decoder:
+ """# Decoder Model
+
+ Constructs a decoder model with a certain depth of intermediate layers of
+ fixed size"""
+
+ def __init__(self, model_base_directory):
+ """# Decoder Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded"""
+ self.model_base_directory = model_base_directory
+
+ def init(self):
+ """# Initialize a new Decoder
+
+ Expects a model parameters file to already exist in the initialization
+ base directory when initializing the model"""
+ from tensorflow import keras
+ from tensorflow.keras import layers
+
+ self.load_parameters()
+ assert self.depth >= 0, "Depth must be non-negative"
+ self.model = keras.models.Sequential()
+ if self.depth == 0:
+ self.model.add(
+ layers.Dense(
+ self.data_size,
+ input_shape=(self.latent_size,),
+ activation=None,
+ name="decoder_latent_vector",
+ )
+ )
+ else:
+ self.model.add(
+ layers.Dense(
+ self.layer_size,
+ input_shape=(self.latent_size,),
+ activation=None,
+ name="decoder_dense_1",
+ )
+ )
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ for _d in range(1, self.depth):
+ self.model.add(
+ layers.Dense(
+ self.layer_size, activation=None, name=f"decoder_dense_{_d+1}"
+ )
+ )
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ self.model.add(
+ layers.Dense(
+ self.data_size, activation=None, name="decoder_output_vector"
+ )
+ )
+ self.model._name = "Decoder"
+ return self
+
+ def load(self):
+ """# Load an existing Decoder"""
+ from os import listdir
+
+ if "decoder.keras" not in listdir(f"{self.model_base_directory}/portable/"):
+ return None
+ import tensorflow as tf
+
+ self.model = tf.keras.models.load_model(
+ f"{self.model_base_directory}/portable/decoder.keras", compile=False
+ )
+ self.model._name = "Decoder"
+ return self
+
+ def save(self, overwrite=False):
+ """# Save the current Decoder
+
+ - Overwrite: overwrite any existing decoder that has been saved"""
+ from os import listdir
+
+ if overwrite:
+ self.model.save(f"{self.model_base_directory}/portable/decoder.keras")
+ return True
+ if "decoder.keras" in listdir(f"{self.model_base_directory}/portable/"):
+ return False
+ self.model.save(f"{self.model_base_directory}/portable/decoder.keras")
+ return True
+
+ def load_parameters(self):
+ """# Load Decoder Model Parameters from File
+ The file needs to have the following parameters defined:
+ - data_size: int
+ - autoencoder_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float (set to 0. if you don't want a dropout layer)
+ - use_leaky_relu: boolean"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.layer_size = parameters["autoencoder_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+
+ def summary(self):
+ """# Returns the summary of the Decoder model"""
+ return self.model.summary()
+
+ def __call__(self, *args, **kwargs):
+ """# Callable
+
+ When calling the decoder class, return the model's output"""
+ return self.model(*args, **kwargs)
diff --git a/code/sunlab/sunflow/models/discriminator.py b/code/sunlab/sunflow/models/discriminator.py
new file mode 100644
index 0000000..38bed56
--- /dev/null
+++ b/code/sunlab/sunflow/models/discriminator.py
@@ -0,0 +1,132 @@
+class Discriminator:
+ """# Discriminator Model
+
+ Constructs a discriminator model with a certain depth of intermediate
+ layers of fixed size"""
+
+ def __init__(self, model_base_directory):
+ """# Discriminator Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded"""
+ self.model_base_directory = model_base_directory
+
+ def init(self):
+ """# Initialize a new Discriminator
+
+ Expects a model parameters file to already exist in the initialization
+ base directory when initializing the model"""
+ from tensorflow import keras
+ from tensorflow.keras import layers
+
+ self.load_parameters()
+ assert self.depth >= 0, "Depth must be non-negative"
+ self.model = keras.models.Sequential()
+ if self.depth == 0:
+ self.model.add(
+ layers.Dense(
+ 1,
+ input_shape=(self.latent_size,),
+ activation=None,
+ name="discriminator_output_vector",
+ )
+ )
+ else:
+ self.model.add(
+ layers.Dense(
+ self.layer_size,
+ input_shape=(self.latent_size,),
+ activation=None,
+ name="discriminator_dense_1",
+ )
+ )
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ for _d in range(1, self.depth):
+ self.model.add(
+ layers.Dense(
+ self.layer_size,
+ activation=None,
+ name=f"discriminator_dense_{_d+1}",
+ )
+ )
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ self.model.add(
+ layers.Dense(
+ 1, activation="sigmoid", name="discriminator_output_vector"
+ )
+ )
+ self.model._name = "Discriminator"
+ return self
+
+ def load(self):
+ """# Load an existing Discriminator"""
+ from os import listdir
+
+ if "discriminator.keras" not in listdir(
+ f"{self.model_base_directory}/portable/"
+ ):
+ return None
+ import tensorflow as tf
+
+ self.model = tf.keras.models.load_model(
+ f"{self.model_base_directory}/portable/discriminator.keras", compile=False
+ )
+ self.model._name = "Discriminator"
+ return self
+
+ def save(self, overwrite=False):
+ """# Save the current Discriminator
+
+ - Overwrite: overwrite any existing discriminator that has been
+ saved"""
+ from os import listdir
+
+ if overwrite:
+ self.model.save(f"{self.model_base_directory}/portable/discriminator.keras")
+ return True
+ if "discriminator.keras" in listdir(f"{self.model_base_directory}/portable/"):
+ return False
+ self.model.save(f"{self.model_base_directory}/portable/discriminator.keras")
+ return True
+
+ def load_parameters(self):
+ """# Load Discriminator Model Parameters from File
+ The file needs to have the following parameters defined:
+ - data_size: int
+ - adversary_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float (set to 0. if you don't want a dropout layer)
+ - use_leaky_relu: boolean"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.layer_size = parameters["adversary_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+
+ def summary(self):
+ """# Returns the summary of the Discriminator model"""
+ return self.model.summary()
+
+ def __call__(self, *args, **kwargs):
+ """# Callable
+
+ When calling the discriminator class, return the model's output"""
+ return self.model(*args, **kwargs)
diff --git a/code/sunlab/sunflow/models/encoder.py b/code/sunlab/sunflow/models/encoder.py
new file mode 100644
index 0000000..22d1a9a
--- /dev/null
+++ b/code/sunlab/sunflow/models/encoder.py
@@ -0,0 +1,140 @@
+class Encoder:
+ """# Encoder Model
+
+ Constructs an encoder model with a certain depth of intermediate layers of
+ fixed size"""
+
+ def __init__(self, model_base_directory):
+ """# Encoder Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded"""
+ self.model_base_directory = model_base_directory
+
+ def init(self):
+ """# Initialize a new Encoder
+
+ Expects a model parameters file to already exist in the initialization
+ base directory when initializing the model"""
+ from tensorflow import keras
+ from tensorflow.keras import layers
+
+ # Load in the model parameters
+ self.load_parameters()
+ assert self.depth >= 0, "Depth must be non-negative"
+
+ # Create the model
+ self.model = keras.models.Sequential()
+ # At zero depth, connect input and output layer directly
+ if self.depth == 0:
+ self.model.add(
+ layers.Dense(
+ self.latent_size,
+ input_shape=(self.data_size,),
+ activation=None,
+ name="encoder_latent_vector",
+ )
+ )
+ # Otherwise, add fixed-sized layers between them
+ else:
+ self.model.add(
+ layers.Dense(
+ self.layer_size,
+ input_shape=(self.data_size,),
+ activation=None,
+ name="encoder_dense_1",
+ )
+ )
+ # Use LeakyReLU if specified
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ # Include a droput layer if specified
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ for _d in range(1, self.depth):
+ self.model.add(
+ layers.Dense(
+ self.layer_size, activation=None, name=f"encoder_dense_{_d+1}"
+ )
+ )
+ # Use LeakyReLU if specified
+ if self.use_leaky_relu:
+ self.model.add(layers.LeakyReLU())
+ else:
+ self.model.add(layers.ReLU())
+ # Include a droput layer if specified
+ if self.dropout > 0.0:
+ self.model.add(layers.Dropout(self.dropout))
+ self.model.add(
+ layers.Dense(
+ self.latent_size, activation=None, name="encoder_latent_vector"
+ )
+ )
+ self.model._name = "Encoder"
+ return self
+
+ def load(self):
+ """# Load an existing Encoder"""
+ from os import listdir
+
+ # If the encoder is not found, return None
+ if "encoder.keras" not in listdir(f"{self.model_base_directory}/portable/"):
+ return None
+ # Otherwise, load the encoder
+ # compile=False suppresses warnings about training
+ # If you want to train it, you will need to recompile it
+ import tensorflow as tf
+
+ self.model = tf.keras.models.load_model(
+ f"{self.model_base_directory}/portable/encoder.keras", compile=False
+ )
+ self.model._name = "Encoder"
+ return self
+
+ def save(self, overwrite=False):
+ """# Save the current Encoder
+
+ - Overwrite: overwrite any existing encoder that has been saved"""
+ from os import listdir
+
+ if overwrite:
+ self.model.save(f"{self.model_base_directory}/portable/encoder.keras")
+ return True
+ if "encoder.keras" in listdir(f"{self.model_base_directory}/portable/"):
+ return False
+ self.model.save(f"{self.model_base_directory}/portable/encoder.keras")
+ return True
+
+ def load_parameters(self):
+ """# Load Encoder Model Parameters from File
+ The file needs to have the following parameters defined:
+ - data_size: int
+ - autoencoder_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float (set to 0. if you don't want a dropout layer)
+ - use_leaky_relu: boolean"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.layer_size = parameters["autoencoder_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+
+ def summary(self):
+ """# Returns the summary of the Encoder model"""
+ return self.model.summary()
+
+ def __call__(self, *args, **kwargs):
+ """# Callable
+
+ When calling the encoder class, return the model's output"""
+ return self.model(*args, **kwargs)
diff --git a/code/sunlab/sunflow/models/encoder_discriminator.py b/code/sunlab/sunflow/models/encoder_discriminator.py
new file mode 100644
index 0000000..5efb6af
--- /dev/null
+++ b/code/sunlab/sunflow/models/encoder_discriminator.py
@@ -0,0 +1,96 @@
+class EncoderDiscriminator:
+ """# EncoderDiscriminator Model
+
+ Constructs an encoder-discriminator model"""
+
+ def __init__(self, model_base_directory):
+ """# EncoderDiscriminator Model Initialization
+
+ - model_base_directory: The base folder directory where the model will
+ be saved/ loaded"""
+ self.model_base_directory = model_base_directory
+
+ def init(self, encoder, discriminator):
+ """# Initialize a EncoderDiscriminator
+
+ - encoder: The encoder to use
+ - discriminator: The discriminator to use"""
+ from tensorflow import keras
+
+ self.load_parameters()
+ self.model = keras.models.Sequential()
+ self.model.add(encoder.model)
+ self.model.add(discriminator.model)
+ self.model._name = "EncoderDiscriminator"
+ return self
+
+ def load(self):
+ """# Load an existing EncoderDiscriminator"""
+ from os import listdir
+
+ if "encoder_discriminator.keras" not in listdir(
+ f"{self.model_base_directory}/portable/"
+ ):
+ return None
+ import tensorflow as tf
+
+ self.model = tf.keras.models.load_model(
+ f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras",
+ compile=False,
+ )
+ self.model._name = "EncoderDiscriminator"
+ return self
+
+ def save(self, overwrite=False):
+ """# Save the current EncoderDiscriminator
+
+ - Overwrite: overwrite any existing encoder_discriminator that has been
+ saved"""
+ from os import listdir
+
+ if overwrite:
+ self.model.save(
+ f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras"
+ )
+ return True
+ if "encoder_discriminator.keras" in listdir(
+ f"{self.model_base_directory}/portable/"
+ ):
+ return False
+ self.model.save(
+ f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras"
+ )
+ return True
+
+ def load_parameters(self):
+ """# Load EncoderDiscriminator Model Parameters from File
+ The file needs to have the following parameters defined:
+ - data_size: int
+ - autoencoder_layer_size: int
+ - latent_size: int
+ - autoencoder_depth: int
+ - dropout: float (set to 0. if you don't want a dropout layer)
+ - use_leaky_relu: boolean"""
+ from pickle import load
+
+ with open(
+ f"{self.model_base_directory}/portable/model_parameters.pkl", "rb"
+ ) as phandle:
+ parameters = load(phandle)
+ self.data_size = parameters["data_size"]
+ self.layer_size = parameters["autoencoder_layer_size"]
+ self.latent_size = parameters["latent_size"]
+ self.depth = parameters["autoencoder_depth"]
+ self.dropout = parameters["dropout"]
+ self.use_leaky_relu = parameters["use_leaky_relu"]
+
+ def summary(self):
+ """# Returns the summary of the EncoderDiscriminator model"""
+ return self.model.summary()
+
+ def __call__(self, *args, **kwargs):
+ """# Callable
+
+ When calling the encoder_discriminator class, return the model's
+ output"""
+ return self.model(*args, **kwargs)
diff --git a/code/sunlab/sunflow/models/utilities.py b/code/sunlab/sunflow/models/utilities.py
new file mode 100644
index 0000000..ab0c2a6
--- /dev/null
+++ b/code/sunlab/sunflow/models/utilities.py
@@ -0,0 +1,93 @@
+# Higher-level functions
+
+from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution
+from sunlab.common.scaler.adversarial_scaler import AdversarialScaler
+from sunlab.common.data.utilities import import_dataset
+from .adversarial_autoencoder import AdversarialAutoencoder
+
+
+def create_aae(
+ dataset_file_name,
+ model_directory,
+ normalization_scaler: AdversarialScaler,
+ distribution: AdversarialDistribution or None,
+ magnification=10,
+ latent_size=2,
+):
+ """# Create Adversarial Autoencoder
+
+ - dataset_file_name: str = Path to the dataset file
+ - model_directory: str = Path to save the model in
+ - normalization_scaler: AdversarialScaler = Data normalization Scaler Model
+ - distribution: AdversarialDistribution = Distribution for the Adversary
+ - magnification: int = The Magnification of the Dataset"""
+ dataset = import_dataset(dataset_file_name, magnification)
+ model = AdversarialAutoencoder(
+ model_directory, distribution, normalization_scaler
+ ).init(dataset.dataset, latent_size=latent_size)
+ return model
+
+
+def create_aae_and_dataset(
+ dataset_file_name,
+ model_directory,
+ normalization_scaler: AdversarialScaler,
+ distribution: AdversarialDistribution or None,
+ magnification=10,
+ batch_size=1024,
+ shuffle=True,
+ val_split=0.1,
+ latent_size=2,
+):
+ """# Create Adversarial Autoencoder and Load the Dataset
+
+ - dataset_file_name: str = Path to the dataset file
+ - model_directory: str = Path to save the model in
+ - normalization_scaler: AdversarialScaler = Data normalization Scaler Model
+ - distribution: AdversarialDistribution = Distribution for the Adversary
+ - magnification: int = The Magnification of the Dataset"""
+ model = create_aae(
+ dataset_file_name,
+ model_directory,
+ normalization_scaler,
+ distribution,
+ magnification=magnification,
+ latent_size=latent_size,
+ )
+ dataset = import_dataset(
+ dataset_file_name,
+ magnification,
+ batch_size=batch_size,
+ shuffle=shuffle,
+ val_split=val_split,
+ scaler=model.scaler,
+ )
+ return model, dataset
+
+
+def load_aae(model_directory, normalization_scaler: AdversarialScaler):
+ """# Load Adversarial Autoencoder
+
+ - model_directory: str = Path to save the model in
+ - normalization_scaler: AdversarialScaler = Data normalization Scaler Model
+ """
+ return AdversarialAutoencoder(model_directory, None, normalization_scaler).load()
+
+
+def load_aae_and_dataset(
+ dataset_file_name,
+ model_directory,
+ normalization_scaler: AdversarialScaler,
+ magnification=10,
+):
+ """# Load Adversarial Autoencoder
+
+ - dataset_file_name: str = Path to the dataset file
+ - model_directory: str = Path to save the model in
+ - normalization_scaler: AdversarialScaler = Data normalization Scaler Model
+ - magnification: int = The Magnification of the Dataset"""
+ model = load_aae(model_directory, normalization_scaler)
+ dataset = import_dataset(
+ dataset_file_name, magnification=magnification, scaler=model.scaler
+ )
+ return model, dataset
diff --git a/code/sunlab/sunflow/plotting/__init__.py b/code/sunlab/sunflow/plotting/__init__.py
new file mode 100644
index 0000000..36e00e6
--- /dev/null
+++ b/code/sunlab/sunflow/plotting/__init__.py
@@ -0,0 +1 @@
+from .model_extensions import *
diff --git a/code/sunlab/sunflow/plotting/model_extensions.py b/code/sunlab/sunflow/plotting/model_extensions.py
new file mode 100644
index 0000000..087f8d3
--- /dev/null
+++ b/code/sunlab/sunflow/plotting/model_extensions.py
@@ -0,0 +1,289 @@
+from matplotlib import pyplot as plt
+from sunlab.common.data.shape_dataset import ShapeDataset
+from sunlab.globals import DIR_ROOT
+
+
+def get_nonphysical_masks(
+ model,
+ xrange=[-1, 1],
+ yrange=[-1, 1],
+ bins=[500, 500],
+ equivdiameter_threshold=10,
+ solidity_threshold=0.1,
+ area_threshold=100,
+ perimeter_threshold=10,
+ area_max_threshold=7000,
+ perimeter_max_threshold=350,
+ area_min_threshold=100,
+ perimeter_min_threshold=5,
+ consistency_check=False,
+):
+ """# Generate the Nonphysical Masks in Grid for Model
+
+ Hard Constraints:
+ - Non-negative values
+ - Ratios no greater than 1
+
+ Soft Constraints:
+ - Area/ Perimeter Thresholds"""
+ import numpy as np
+
+ x = np.linspace(xrange[0], xrange[1], bins[0])
+ y = np.linspace(yrange[0], yrange[1], bins[1])
+ X, Y = np.meshgrid(x, y)
+ X, Y = X.reshape((bins[0], bins[1], 1)), Y.reshape((bins[0], bins[1], 1))
+ XY = np.concatenate([X.reshape((-1, 1)), Y.reshape((-1, 1))], axis=-1)
+ dec_v = model.decoder(XY).numpy().reshape((bins[0] * bins[1], 13))
+ lXY = model.scaler.scaler.inverse_transform(dec_v).reshape((bins[0], bins[1], 13))
+ # Hard Limits
+ non_negative_mask = np.all(lXY > 0, axis=-1)
+ solidity_mask = np.abs(lXY[:, :, 6]) <= 1
+ extent_upper_bound_mask = lXY[:, :, 7] <= 1
+ # Soft Extremas
+ area_max_mask = lXY[:, :, 4] < area_max_threshold
+ perimeter_max_mask = lXY[:, :, 9] < perimeter_max_threshold
+ area_min_mask = lXY[:, :, 4] > area_min_threshold
+ perimeter_min_mask = lXY[:, :, 9] > perimeter_min_threshold
+ # Self-Consistency
+ man_solidity_mask = np.abs(lXY[:, :, 0] / lXY[:, :, 4]) <= 1
+ equivalent_diameter_mask = (
+ np.abs(lXY[:, :, 5] - np.sqrt(4 * np.abs(lXY[:, :, 0]) / np.pi))
+ < equivdiameter_threshold
+ )
+ convex_area_mask = lXY[:, :, 0] < lXY[:, :, 4] + area_threshold
+ convex_perimeter_mask = lXY[:, :, 9] < lXY[:, :, 8] + perimeter_threshold
+ mask_info = {
+ "non-negative": non_negative_mask,
+ "solidity": solidity_mask,
+ "extent-max": extent_upper_bound_mask,
+ #
+ "area-max": area_max_mask,
+ "perimeter-max": perimeter_max_mask,
+ "area-min": area_min_mask,
+ "perimeter-min": perimeter_min_mask,
+ #
+ "computed-solidity": man_solidity_mask,
+ "equivalent-diameter": equivalent_diameter_mask,
+ "convex-area": convex_area_mask,
+ "convex-perimeter": convex_perimeter_mask,
+ }
+ if not consistency_check:
+ mask_info = {
+ "non-negative": non_negative_mask,
+ "solidity": solidity_mask,
+ "extent-max": extent_upper_bound_mask,
+ #
+ "area-max": area_max_mask,
+ "perimeter-max": perimeter_max_mask,
+ "area-min": area_min_mask,
+ "perimeter-min": perimeter_min_mask,
+ }
+ mask_list = [mask_info[key] for key in mask_info.keys()]
+ return np.all(mask_list, axis=0), X, Y, mask_info
+
+
+def excavate(input_2d_array):
+ """# Return Boundaries for Masked Array
+
+ Use X, Y directions only"""
+ from copy import deepcopy as dc
+ from numpy import nan_to_num, zeros_like, abs
+
+ data_2d_array = dc(input_2d_array)
+ data_2d_array = nan_to_num(data_2d_array, nan=20)
+ # X-Gradient
+ x_grad = zeros_like(data_2d_array)
+ x_grad[:-1, :] = data_2d_array[1:, :] - data_2d_array[:-1, :]
+ x_grad[(abs(x_grad) > 10)] = 10
+ x_grad[(abs(x_grad) < 10) & (abs(x_grad) > 0)] = 1
+ x_grad[x_grad == 1] = 0.5
+ x_grad[x_grad > 1] = 1
+ # Y-Gradient
+ y_grad = zeros_like(data_2d_array)
+ y_grad[:, :-1] = data_2d_array[:, 1:] - data_2d_array[:, :-1]
+ y_grad[(abs(y_grad) > 10)] = 10
+ y_grad[(abs(y_grad) < 10) & (abs(y_grad) > 0)] = 1
+ y_grad[y_grad == 1] = 0.5
+ y_grad[y_grad > 1] = 1
+ return x_grad, y_grad
+
+
+def excavate_extra(input_2d_array, N=1):
+ """# Return Boundaries for Masked Array
+
+ Use all 8 directions"""
+ from copy import deepcopy as dc
+ from numpy import nan_to_num, zeros_like, abs
+
+ data_2d_array = dc(input_2d_array)
+ data_2d_array = nan_to_num(data_2d_array, nan=20)
+ # X-Gradient
+ x_grad = zeros_like(data_2d_array)
+ x_grad[:-N, :] = data_2d_array[N:, :] - data_2d_array[:-N, :]
+ x_grad[(abs(x_grad) > 10)] = 10
+ x_grad[(abs(x_grad) < 10) & (abs(x_grad) > 0)] = 1
+ x_grad[x_grad == 1] = 0.5
+ x_grad[x_grad > 1] = 1
+ # Y-Gradient
+ y_grad = zeros_like(data_2d_array)
+ y_grad[:, :-N] = data_2d_array[:, N:] - data_2d_array[:, :-N]
+ y_grad[(abs(y_grad) > 10)] = 10
+ y_grad[(abs(y_grad) < 10) & (abs(y_grad) > 0)] = 1
+ y_grad[y_grad == 1] = 0.5
+ y_grad[y_grad > 1] = 1
+ # XY-Gradient
+ xy_grad = zeros_like(data_2d_array)
+ xy_grad[:-N, :-N] = data_2d_array[N:, N:] - data_2d_array[:-N, :-N]
+ xy_grad[(abs(xy_grad) > 10)] = 10
+ xy_grad[(abs(xy_grad) < 10) & (abs(xy_grad) > 0)] = 1
+ xy_grad[xy_grad == 1] = 0.5
+ xy_grad[xy_grad > 1] = 1
+ # X(-Y)-Gradient
+ yx_grad = zeros_like(data_2d_array)
+ yx_grad[:-N, :-N] = data_2d_array[N:, :-N] - data_2d_array[:-N, N:]
+ yx_grad[(abs(yx_grad) > 10)] = 10
+ yx_grad[(abs(yx_grad) < 10) & (abs(yx_grad) > 0)] = 1
+ yx_grad[yx_grad == 1] = 0.5
+ yx_grad[yx_grad > 1] = 1
+ xyn_grad = dc(yx_grad)
+ # (-X)Y-Gradient
+ xny_grad = zeros_like(data_2d_array)
+ xny_grad[:-N, :-N] = data_2d_array[:-N, N:] - data_2d_array[N:, :-N]
+ xny_grad[(abs(xy_grad) > 10)] = 10
+ xny_grad[(abs(xy_grad) < 10) & (abs(xy_grad) > 0)] = 1
+ xny_grad[xy_grad == 1] = 0.5
+ xny_grad[xy_grad > 1] = 1
+ # (-X)(-Y)-Gradient
+ xnyn_grad = zeros_like(data_2d_array)
+ xnyn_grad[:-N, :-N] = data_2d_array[:-N, :-N] - data_2d_array[N:, N:]
+ xnyn_grad[(abs(yx_grad) > 10)] = 10
+ xnyn_grad[(abs(yx_grad) < 10) & (abs(yx_grad) > 0)] = 1
+ xnyn_grad[yx_grad == 1] = 0.5
+ xnyn_grad[yx_grad > 1] = 1
+ return x_grad, y_grad, xy_grad, xyn_grad, xny_grad, xnyn_grad
+
+
+def excavate_outline(arr, thickness=1):
+ """# Generate Transparency Mask with NaNs"""
+ from numpy import sum, abs, NaN
+
+ outline = sum(abs(excavate_extra(arr, thickness)), axis=0)
+ outline[outline == 0] = NaN
+ outline[outline > 0] = 0
+ return outline
+
+
+def get_boundary_outline(
+ aae_model_object,
+ pixel_classification_file=None,
+ include_transition_regions=False,
+ border_thickness=3,
+ bin_count=800,
+ xrange=[-6.5, 6.5],
+ yrange=[-4.5, 4.5],
+ threshold=0.75,
+):
+ """# Get Boundary Outlines"""
+ from copy import deepcopy
+ import numpy as np
+
+ if pixel_classification_file is None:
+ pixel_classification_file = "../../extra_data/PhenotypePixels_65x45_800.npy"
+ base_classification = np.loadtxt(pixel_classification_file)
+ base_classification = base_classification.reshape((bin_count, bin_count, 4))
+ max_classification_probability = np.zeros((bin_count, bin_count, 1))
+ max_classification_probability[:, :, 0] = (
+ np.max(base_classification, axis=-1) < threshold
+ )
+ classes_with_include_transition_regions = np.concatenate(
+ [base_classification, max_classification_probability], axis=-1
+ )
+ if include_transition_regions:
+ phenotype_probabilities = deepcopy(
+ np.argsort(classes_with_include_transition_regions[:, :, :], axis=-1)[
+ :, :, -1
+ ]
+ ).astype(np.float32)
+ else:
+ phenotype_probabilities = deepcopy(
+ np.argsort(classes_with_include_transition_regions[:, :, :-1], axis=-1)[
+ :, :, -1
+ ]
+ ).astype(np.float32)
+ nonphysical_mask, _, _, _ = get_nonphysical_masks(
+ aae_model_object, xrange=xrange, yrange=yrange, bins=[bin_count, bin_count]
+ )
+ nonphysical_mask = nonphysical_mask.astype(np.float32)
+ nonphysical_mask[nonphysical_mask == 0] = np.NaN
+ nonphysical_mask[nonphysical_mask == 1] = 0
+ nonphysical_mask = nonphysical_mask.T
+ phenotype_regions = deepcopy(phenotype_probabilities.T + nonphysical_mask.T)
+ outline = excavate_outline(phenotype_regions, border_thickness)
+ return outline
+
+
+def apply_boundary(
+ model_loc=DIR_ROOT + "models/current_model/",
+ border_thickness=3,
+ include_transition_regions=False,
+ threshold=0.7,
+ alpha=1,
+ _plt=None,
+):
+ """# Apply Boundary to Plot
+
+ Use Pregenerated Boundary by Default for Speed"""
+ from ..models import load_aae
+ from sunlab.common.scaler import MaxAbsScaler
+ import numpy as np
+
+ if _plt is None:
+ _plt = plt
+ if (model_loc == model_loc) and (border_thickness == 3) and (threshold == 0.7):
+ XYM = np.load(DIR_ROOT + "extra_data/OutlineXYM.npy")
+ XY = XYM[:2, :, :]
+ if include_transition_regions:
+ outline = XYM[3, :, :]
+ else:
+ outline = XYM[2, :, :]
+ _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha)
+ return
+ model = load_aae(model_loc, MaxAbsScaler)
+ bin_count = 800
+ xrange = [-6.5, 6.5]
+ yrange = [-4.5, 4.5]
+ rng = [xrange, yrange]
+ X = np.linspace(rng[0][0], rng[0][1], bin_count)
+ Y = np.linspace(rng[1][0], rng[1][1], bin_count)
+ XY = np.array(np.meshgrid(X, Y))
+ kwparams = {
+ "bin_count": bin_count,
+ "xrange": xrange,
+ "yrange": yrange,
+ }
+
+ include_tregions = include_transition_regions
+ outline = get_boundary_outline(
+ model,
+ border_thickness=border_thickness,
+ include_transition_regions=include_tregions,
+ threshold=threshold,
+ **kwparams
+ )
+ _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha)
+
+
+plt.apply_boundary = apply_boundary
+
+
+def plot_shape_dataset(self, model, *args, **kwargs):
+ """# Plot Shape Dataset"""
+ if self.labels is None:
+ plt.scatter2d(model.encoder(self.dataset), *args, **kwargs)
+ else:
+ plt.scatter2d(model.encoder(self.dataset), self.labels, *args, **kwargs)
+
+
+ShapeDataset.plot = lambda model, *args, **kwargs: plot_shape_dataset(
+ model, *args, **kwargs
+)
diff --git a/code/sunlab/suntorch/__init__.py b/code/sunlab/suntorch/__init__.py
new file mode 100644
index 0000000..d394e27
--- /dev/null
+++ b/code/sunlab/suntorch/__init__.py
@@ -0,0 +1,3 @@
+from ..common import *
+from .models import *
+from .plotting import *
diff --git a/code/sunlab/suntorch/data/__init__.py b/code/sunlab/suntorch/data/__init__.py
new file mode 100644
index 0000000..b9a32c0
--- /dev/null
+++ b/code/sunlab/suntorch/data/__init__.py
@@ -0,0 +1 @@
+from .utilities import *
diff --git a/code/sunlab/suntorch/data/utilities.py b/code/sunlab/suntorch/data/utilities.py
new file mode 100644
index 0000000..05318f5
--- /dev/null
+++ b/code/sunlab/suntorch/data/utilities.py
@@ -0,0 +1,55 @@
+from sunlab.common import ShapeDataset
+from sunlab.common import MaxAbsScaler
+
+
+def process_and_load_dataset(
+ dataset_file, model_folder, magnification=10, scaler=MaxAbsScaler
+):
+ """# Load a dataset and process a models' Latent Space on the Dataset"""
+ raise NotImplemented("Still Implementing for PyTorch")
+ from ..models import load_aae
+ from sunlab.common import import_full_dataset
+
+ model = load_aae(model_folder, normalization_scaler=scaler)
+ dataset = import_full_dataset(
+ dataset_file, magnification=magnification, scaler=model.scaler
+ )
+ latent = model.encoder(dataset.dataset).numpy()
+ assert len(latent.shape) == 2, "Only 1D Latent Vectors Supported"
+ for dim in range(latent.shape[1]):
+ dataset.dataframe[f"Latent-{dim}"] = latent[:, dim]
+ return dataset
+
+
+def process_and_load_datasets(
+ dataset_file_list, model_folder, magnification=10, scaler=MaxAbsScaler
+):
+ from pandas import concat
+ from ..models import load_aae
+
+ raise NotImplemented("Still Implementing for PyTorch")
+ dataframes = []
+ datasets = []
+ for dataset_file in dataset_file_list:
+ dataset = process_and_load_dataset(
+ dataset_file, model_folder, magnification, scaler
+ )
+ model = load_aae(model_folder, normalization_scaler=scaler)
+ dataframe = dataset.dataframe
+ for label in ["ActinEdge", "Filopodia", "Bleb", "Lamellipodia"]:
+ if label in dataframe.columns:
+ dataframe[label.lower()] = dataframe[label]
+ if label.lower() not in dataframe.columns:
+ dataframe[label.lower()] = 0
+ latent_columns = [f"Latent-{dim}" for dim in range(model.latent_size)]
+ datasets.append(dataset)
+ dataframes.append(
+ dataframe[
+ dataset.data_columns
+ + dataset.label_columns
+ + latent_columns
+ + ["Frames", "CellNum"]
+ + ["actinedge", "filopodia", "bleb", "lamellipodia"]
+ ]
+ )
+ return datasets, concat(dataframes)
diff --git a/code/sunlab/suntorch/models/__init__.py b/code/sunlab/suntorch/models/__init__.py
new file mode 100644
index 0000000..03d6a45
--- /dev/null
+++ b/code/sunlab/suntorch/models/__init__.py
@@ -0,0 +1,3 @@
+from .adversarial_autoencoder import AdversarialAutoencoder
+from .autoencoder import Autoencoder
+from .variational.autoencoder import VariationalAutoencoder
diff --git a/code/sunlab/suntorch/models/adversarial_autoencoder.py b/code/sunlab/suntorch/models/adversarial_autoencoder.py
new file mode 100644
index 0000000..e3e8a06
--- /dev/null
+++ b/code/sunlab/suntorch/models/adversarial_autoencoder.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from .encoder import Encoder
+from .decoder import Decoder
+from .discriminator import Discriminator
+from .common import *
+
+
+def dummy_distribution(*args):
+ raise NotImplementedError("Give a distribution")
+
+
+class AdversarialAutoencoder:
+ """# Adversarial Autoencoder Model"""
+
+ def __init__(
+ self,
+ data_dim,
+ latent_dim,
+ enc_dec_size,
+ disc_size,
+ negative_slope=0.3,
+ dropout=0.0,
+ distribution=dummy_distribution,
+ ):
+ self.encoder = Encoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.decoder = Decoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.discriminator = Discriminator(
+ disc_size, latent_dim, negative_slope=negative_slope, dropout=dropout
+ )
+ self.data_dim = data_dim
+ self.latent_dim = latent_dim
+ self.p = dropout
+ self.negative_slope = negative_slope
+ self.distribution = distribution
+ return
+
+ def parameters(self):
+ return (
+ *self.encoder.parameters(),
+ *self.decoder.parameters(),
+ *self.discriminator.parameters(),
+ )
+
+ def train(self):
+ self.encoder.train(True)
+ self.decoder.train(True)
+ self.discriminator.train(True)
+ return self
+
+ def eval(self):
+ self.encoder.train(False)
+ self.decoder.train(False)
+ self.discriminator.train(False)
+ return self
+
+ def encode(self, data):
+ return self.encoder(data)
+
+ def decode(self, data):
+ return self.decoder(data)
+
+ def __call__(self, data):
+ return self.decode(self.encode(data))
+
+ def save(self, base="./"):
+ torch.save(self.encoder.state_dict(), base + "weights_encoder.pt")
+ torch.save(self.decoder.state_dict(), base + "weights_decoder.pt")
+ torch.save(self.discriminator.state_dict(), base + "weights_discriminator.pt")
+ return self
+
+ def load(self, base="./"):
+ self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt"))
+ self.encoder.eval()
+ self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt"))
+ self.decoder.eval()
+ self.discriminator.load_state_dict(
+ torch.load(base + "weights_discriminator.pt")
+ )
+ self.discriminator.eval()
+ return self
+
+ def to(self, device):
+ self.encoder.to(device)
+ self.decoder.to(device)
+ self.discriminator.to(device)
+ return self
+
+ def cuda(self):
+ if torch.cuda.is_available():
+ self.encoder = self.encoder.cuda()
+ self.decoder = self.decoder.cuda()
+ self.discriminator = self.discriminator.cuda()
+ return self
+
+ def cpu(self):
+ self.encoder = self.encoder.cpu()
+ self.decoder = self.decoder.cpu()
+ self.discriminator = self.discriminator.cpu()
+ return self
+
+ def init_optimizers(self, recon_lr=1e-4, adv_lr=5e-5):
+ self.optim_E_gen = torch.optim.Adam(self.encoder.parameters(), lr=adv_lr)
+ self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr)
+ self.optim_D_dec = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr)
+ self.optim_D_dis = torch.optim.Adam(self.discriminator.parameters(), lr=adv_lr)
+ return self
+
+ def init_losses(self, recon_loss_fn=F.binary_cross_entropy):
+ self.recon_loss_fn = recon_loss_fn
+ return self
+
+ def train_step(self, raw_data, scale=1.0):
+ data = to_var(raw_data.view(raw_data.size(0), -1))
+
+ self.encoder.zero_grad()
+ self.decoder.zero_grad()
+ self.discriminator.zero_grad()
+
+ z = self.encoder(data)
+ X = self.decoder(z)
+ self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS)
+ self.recon_loss.backward()
+ self.optim_E_enc.step()
+ self.optim_D_dec.step()
+
+ self.encoder.eval()
+ z_gaussian = to_var(self.distribution(data.size(0), self.latent_dim) * scale)
+ z_gaussian_fake = self.encoder(data)
+ y_gaussian = self.discriminator(z_gaussian)
+ y_gaussian_fake = self.discriminator(z_gaussian_fake)
+ self.D_loss = -torch.mean(
+ torch.log(y_gaussian + EPS) + torch.log(1 - y_gaussian_fake + EPS)
+ )
+ self.D_loss.backward()
+ self.optim_D_dis.step()
+
+ self.encoder.train()
+ z_gaussian = self.encoder(data)
+ y_gaussian = self.discriminator(z_gaussian)
+ self.G_loss = -torch.mean(torch.log(y_gaussian + EPS))
+ self.G_loss.backward()
+ self.optim_E_gen.step()
+ return
+
+ def losses(self):
+ try:
+ return self.recon_loss, self.D_loss, self.G_loss
+ except:
+ ...
+ return
diff --git a/code/sunlab/suntorch/models/autoencoder.py b/code/sunlab/suntorch/models/autoencoder.py
new file mode 100644
index 0000000..232f180
--- /dev/null
+++ b/code/sunlab/suntorch/models/autoencoder.py
@@ -0,0 +1,114 @@
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from .encoder import Encoder
+from .decoder import Decoder
+from .common import *
+
+
+class Autoencoder:
+ """# Autoencoder Model"""
+
+ def __init__(
+ self, data_dim, latent_dim, enc_dec_size, negative_slope=0.3, dropout=0.0
+ ):
+ self.encoder = Encoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.decoder = Decoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.data_dim = data_dim
+ self.latent_dim = latent_dim
+ self.p = dropout
+ self.negative_slope = negative_slope
+ return
+
+ def parameters(self):
+ return (*self.encoder.parameters(), *self.decoder.parameters())
+
+ def train(self):
+ self.encoder.train(True)
+ self.decoder.train(True)
+ return self
+
+ def eval(self):
+ self.encoder.train(False)
+ self.decoder.train(False)
+ return self
+
+ def encode(self, data):
+ return self.encoder(data)
+
+ def decode(self, data):
+ return self.decoder(data)
+
+ def __call__(self, data):
+ return self.decode(self.encode(data))
+
+ def save(self, base="./"):
+ torch.save(self.encoder.state_dict(), base + "weights_encoder.pt")
+ torch.save(self.decoder.state_dict(), base + "weights_decoder.pt")
+ return self
+
+ def load(self, base="./"):
+ self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt"))
+ self.encoder.eval()
+ self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt"))
+ self.decoder.eval()
+ return self
+
+ def to(self, device):
+ self.encoder.to(device)
+ self.decoder.to(device)
+ self.discriminator.to(device)
+ return self
+
+ def cuda(self):
+ if torch.cuda.is_available():
+ self.encoder = self.encoder.cuda()
+ self.decoder = self.decoder.cuda()
+ return self
+
+ def cpu(self):
+ self.encoder = self.encoder.cpu()
+ self.decoder = self.decoder.cpu()
+ return self
+
+ def init_optimizers(self, recon_lr=1e-4):
+ self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr)
+ self.optim_D = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr)
+ return self
+
+ def init_losses(self, recon_loss_fn=F.binary_cross_entropy):
+ self.recon_loss_fn = recon_loss_fn
+ return self
+
+ def train_step(self, raw_data):
+ data = to_var(raw_data.view(raw_data.size(0), -1))
+
+ self.encoder.zero_grad()
+ self.decoder.zero_grad()
+ z = self.encoder(data)
+ X = self.decoder(z)
+ self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS)
+ self.recon_loss.backward()
+ self.optim_E_enc.step()
+ self.optim_D.step()
+ return
+
+ def losses(self):
+ try:
+ return self.recon_loss
+ except:
+ ...
+ return
diff --git a/code/sunlab/suntorch/models/common.py b/code/sunlab/suntorch/models/common.py
new file mode 100644
index 0000000..f10930e
--- /dev/null
+++ b/code/sunlab/suntorch/models/common.py
@@ -0,0 +1,12 @@
+from torch.autograd import Variable
+
+EPS = 1e-15
+
+
+def to_var(x):
+ """# Convert to variable"""
+ import torch
+
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
diff --git a/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py b/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py
new file mode 100644
index 0000000..970f717
--- /dev/null
+++ b/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py
@@ -0,0 +1,190 @@
+import torch
+from torch import nn
+
+
+class ConvolutionalVariationalAutoencoder(nn.Module):
+ def __init__(self, latent_dims, hidden_dims, image_shape, dropout=0.0):
+ super(ConvolutionalVariationalAutoencoder, self).__init__()
+
+ self.latent_dims = latent_dims # Size of the latent space layer
+ self.hidden_dims = (
+ hidden_dims # List of hidden layers number of filters/channels
+ )
+ self.image_shape = image_shape # Input image shape
+
+ self.last_channels = self.hidden_dims[-1]
+ self.in_channels = self.image_shape[0]
+ # Simple formula to get the number of neurons after the last convolution layer is flattened
+ self.flattened_channels = int(
+ self.last_channels
+ * (self.image_shape[1] / (2 ** len(self.hidden_dims))) ** 2
+ )
+
+ # For each hidden layer we will create a Convolution Block
+ modules = []
+ for h_dim in self.hidden_dims:
+ modules.append(
+ nn.Sequential(
+ nn.Conv2d(
+ in_channels=self.in_channels,
+ out_channels=h_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ ),
+ nn.BatchNorm2d(h_dim),
+ nn.LeakyReLU(),
+ nn.Dropout(p=dropout),
+ )
+ )
+
+ self.in_channels = h_dim
+
+ self.encoder = nn.Sequential(*modules)
+
+ # Here are our layers for our latent space distribution
+ self.fc_mu = nn.Linear(self.flattened_channels, latent_dims)
+ self.fc_var = nn.Linear(self.flattened_channels, latent_dims)
+
+ # Decoder input layer
+ self.decoder_input = nn.Linear(latent_dims, self.flattened_channels)
+
+ # For each Convolution Block created on the Encoder we will do a symmetric Decoder with the same Blocks, but using ConvTranspose
+ self.hidden_dims.reverse()
+ modules = []
+ for h_dim in self.hidden_dims:
+ modules.append(
+ nn.Sequential(
+ nn.ConvTranspose2d(
+ in_channels=self.in_channels,
+ out_channels=h_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1,
+ ),
+ nn.BatchNorm2d(h_dim),
+ nn.LeakyReLU(),
+ nn.Dropout(p=dropout),
+ )
+ )
+
+ self.in_channels = h_dim
+
+ self.decoder = nn.Sequential(*modules)
+
+ # The final layer the reconstructed image have the same dimensions as the input image
+ self.final_layer = nn.Sequential(
+ nn.Conv2d(
+ in_channels=self.in_channels,
+ out_channels=self.image_shape[0],
+ kernel_size=3,
+ padding=1,
+ ),
+ nn.Sigmoid(),
+ )
+
+ def get_latent_dims(self):
+
+ return self.latent_dims
+
+ def encode(self, input):
+ """
+ Encodes the input by passing through the encoder network
+ and returns the latent codes.
+ """
+ result = self.encoder(input)
+ result = torch.flatten(result, start_dim=1)
+ # Split the result into mu and var componentsbof the latent Gaussian distribution
+ mu = self.fc_mu(result)
+ log_var = self.fc_var(result)
+
+ return [mu, log_var]
+
+ def decode(self, z):
+ """
+ Maps the given latent codes onto the image space.
+ """
+ result = self.decoder_input(z)
+ result = result.view(
+ -1,
+ self.last_channels,
+ int(self.image_shape[1] / (2 ** len(self.hidden_dims))),
+ int(self.image_shape[1] / (2 ** len(self.hidden_dims))),
+ )
+ result = self.decoder(result)
+ result = self.final_layer(result)
+
+ return result
+
+ def reparameterize(self, mu, log_var):
+ """
+ Reparameterization trick to sample from N(mu, var) from N(0,1).
+ """
+ std = torch.exp(0.5 * log_var)
+ eps = torch.randn_like(std)
+
+ return mu + eps * std
+
+ def forward(self, input):
+ """
+ Forward method which will encode and decode our image.
+ """
+ mu, log_var = self.encode(input)
+ z = self.reparameterize(mu, log_var)
+
+ return [self.decode(z), input, mu, log_var, z]
+
+ def loss_function(self, recons, input, mu, log_var):
+ """
+ Computes VAE loss function
+ """
+ recons_loss = nn.functional.binary_cross_entropy(
+ recons.reshape(recons.shape[0], -1),
+ input.reshape(input.shape[0], -1),
+ reduction="none",
+ ).sum(dim=-1)
+
+ kld_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp(), dim=-1)
+
+ loss = (recons_loss + kld_loss).mean(dim=0)
+
+ return loss
+
+ def sample(self, num_samples, device):
+ """
+ Samples from the latent space and return the corresponding
+ image space map.
+ """
+ z = torch.randn(num_samples, self.latent_dims)
+ z = z.to(device)
+ samples = self.decode(z)
+
+ return samples
+
+ def generate(self, x):
+ """
+ Given an input image x, returns the reconstructed image
+ """
+ return self.forward(x)[0]
+
+ def interpolate(self, starting_inputs, ending_inputs, device, granularity=10):
+ """This function performs a linear interpolation in the latent space of the autoencoder
+ from starting inputs to ending inputs. It returns the interpolation trajectories.
+ """
+ mu, log_var = self.encode(starting_inputs.to(device))
+ starting_z = self.reparameterize(mu, log_var)
+
+ mu, log_var = self.encode(ending_inputs.to(device))
+ ending_z = self.reparameterize(mu, log_var)
+
+ t = torch.linspace(0, 1, granularity).to(device)
+
+ intep_line = torch.kron(
+ starting_z.reshape(starting_z.shape[0], -1), (1 - t).unsqueeze(-1)
+ ) + torch.kron(ending_z.reshape(ending_z.shape[0], -1), t.unsqueeze(-1))
+
+ decoded_line = self.decode(intep_line).reshape(
+ (starting_inputs.shape[0], t.shape[0]) + (starting_inputs.shape[1:])
+ )
+ return decoded_line
diff --git a/code/sunlab/suntorch/models/decoder.py b/code/sunlab/suntorch/models/decoder.py
new file mode 100644
index 0000000..2eeb7a4
--- /dev/null
+++ b/code/sunlab/suntorch/models/decoder.py
@@ -0,0 +1,33 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import sigmoid
+
+
+class Decoder(nn.Module):
+ """# Decoder Neural Network
+ X_dim: Output dimension shape
+ N: Inner neuronal layer size
+ z_dim: Input dimension shape
+ """
+
+ def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3):
+ super(Decoder, self).__init__()
+ self.lin1 = nn.Linear(z_dim, N)
+ self.lin2 = nn.Linear(N, N)
+ self.lin3 = nn.Linear(N, X_dim)
+ self.p = dropout
+ self.negative_slope = negative_slope
+
+ def forward(self, x):
+ x = self.lin1(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin2(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin3(x)
+ return sigmoid(x)
diff --git a/code/sunlab/suntorch/models/discriminator.py b/code/sunlab/suntorch/models/discriminator.py
new file mode 100644
index 0000000..9249095
--- /dev/null
+++ b/code/sunlab/suntorch/models/discriminator.py
@@ -0,0 +1,32 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import sigmoid
+
+
+class Discriminator(nn.Module):
+ """# Discriminator Neural Network
+ N: Inner neuronal layer size
+ z_dim: Input dimension shape
+ """
+
+ def __init__(self, N, z_dim, dropout=0.0, negative_slope=0.3):
+ super(Discriminator, self).__init__()
+ self.lin1 = nn.Linear(z_dim, N)
+ self.lin2 = nn.Linear(N, N)
+ self.lin3 = nn.Linear(N, 1)
+ self.p = dropout
+ self.negative_slope = negative_slope
+
+ def forward(self, x):
+ x = self.lin1(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin2(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin3(x)
+ return sigmoid(x)
diff --git a/code/sunlab/suntorch/models/encoder.py b/code/sunlab/suntorch/models/encoder.py
new file mode 100644
index 0000000..e6f88c7
--- /dev/null
+++ b/code/sunlab/suntorch/models/encoder.py
@@ -0,0 +1,32 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Encoder(nn.Module):
+ """# Encoder Neural Network
+ X_dim: Input dimension shape
+ N: Inner neuronal layer size
+ z_dim: Output dimension shape
+ """
+
+ def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3):
+ super(Encoder, self).__init__()
+ self.lin1 = nn.Linear(X_dim, N)
+ self.lin2 = nn.Linear(N, N)
+ self.lin3gauss = nn.Linear(N, z_dim)
+ self.p = dropout
+ self.negative_slope = negative_slope
+
+ def forward(self, x):
+ x = self.lin1(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin2(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ xgauss = self.lin3gauss(x)
+ return xgauss
diff --git a/code/sunlab/suntorch/models/variational/autoencoder.py b/code/sunlab/suntorch/models/variational/autoencoder.py
new file mode 100644
index 0000000..e335704
--- /dev/null
+++ b/code/sunlab/suntorch/models/variational/autoencoder.py
@@ -0,0 +1,128 @@
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+from .encoder import Encoder
+from .decoder import Decoder
+from .common import *
+
+
+class VariationalAutoencoder:
+ """# Variational Autoencoder Model"""
+
+ def __init__(
+ self, data_dim, latent_dim, enc_dec_size, negative_slope=0.3, dropout=0.0
+ ):
+ self.encoder = Encoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.decoder = Decoder(
+ data_dim,
+ enc_dec_size,
+ latent_dim,
+ negative_slope=negative_slope,
+ dropout=dropout,
+ )
+ self.data_dim = data_dim
+ self.latent_dim = latent_dim
+ self.p = dropout
+ self.negative_slope = negative_slope
+ return
+
+ def parameters(self):
+ return (*self.encoder.parameters(), *self.decoder.parameters())
+
+ def train(self):
+ self.encoder.train(True)
+ self.decoder.train(True)
+ return self
+
+ def eval(self):
+ self.encoder.train(False)
+ self.decoder.train(False)
+ return self
+
+ def encode(self, data):
+ return self.encoder(data)
+
+ def decode(self, data):
+ return self.decoder(data)
+
+ def reparameterization(self, mean, var):
+ epsilon = torch.randn_like(var)
+ if torch.cuda.is_available():
+ epsilon = epsilon.cuda()
+ z = mean + var * epsilon
+ return z
+
+ def forward(self, x):
+ mean, log_var = self.encoder(x)
+ z = self.reparameterization(mean, torch.exp(0.5 * log_var))
+ X = self.decoder(z)
+ return X, mean, log_var
+
+ def __call__(self, data):
+ return self.forward(data)
+
+ def save(self, base="./"):
+ torch.save(self.encoder.state_dict(), base + "weights_encoder.pt")
+ torch.save(self.decoder.state_dict(), base + "weights_decoder.pt")
+ return self
+
+ def load(self, base="./"):
+ self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt"))
+ self.encoder.eval()
+ self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt"))
+ self.decoder.eval()
+ return self
+
+ def to(self, device):
+ self.encoder.to(device)
+ self.decoder.to(device)
+ return self
+
+ def cuda(self):
+ if torch.cuda.is_available():
+ self.encoder = self.encoder.cuda()
+ self.decoder = self.decoder.cuda()
+ return self
+
+ def cpu(self):
+ self.encoder = self.encoder.cpu()
+ self.decoder = self.decoder.cpu()
+ return self
+
+ def init_optimizers(self, recon_lr=1e-4):
+ self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr)
+ self.optim_D = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr)
+ return self
+
+ def init_losses(self, recon_loss_fn=F.binary_cross_entropy):
+ self.recon_loss_fn = recon_loss_fn
+ return self
+
+ def train_step(self, raw_data):
+ data = to_var(raw_data.view(raw_data.size(0), -1))
+
+ self.encoder.zero_grad()
+ self.decoder.zero_grad()
+ X, _, _ = self.forward(data)
+ # mean, log_var = self.encoder(data)
+ # z = self.reparameterization(mean, torch.exp(0.5 * log_var))
+ # X = self.decoder(z)
+ self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS)
+ self.recon_loss.backward()
+ self.optim_E_enc.step()
+ self.optim_D.step()
+ return
+
+ def losses(self):
+ try:
+ return self.recon_loss
+ except:
+ ...
+ return
diff --git a/code/sunlab/suntorch/models/variational/common.py b/code/sunlab/suntorch/models/variational/common.py
new file mode 100644
index 0000000..f10930e
--- /dev/null
+++ b/code/sunlab/suntorch/models/variational/common.py
@@ -0,0 +1,12 @@
+from torch.autograd import Variable
+
+EPS = 1e-15
+
+
+def to_var(x):
+ """# Convert to variable"""
+ import torch
+
+ if torch.cuda.is_available():
+ x = x.cuda()
+ return Variable(x)
diff --git a/code/sunlab/suntorch/models/variational/decoder.py b/code/sunlab/suntorch/models/variational/decoder.py
new file mode 100644
index 0000000..2eeb7a4
--- /dev/null
+++ b/code/sunlab/suntorch/models/variational/decoder.py
@@ -0,0 +1,33 @@
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import sigmoid
+
+
+class Decoder(nn.Module):
+ """# Decoder Neural Network
+ X_dim: Output dimension shape
+ N: Inner neuronal layer size
+ z_dim: Input dimension shape
+ """
+
+ def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3):
+ super(Decoder, self).__init__()
+ self.lin1 = nn.Linear(z_dim, N)
+ self.lin2 = nn.Linear(N, N)
+ self.lin3 = nn.Linear(N, X_dim)
+ self.p = dropout
+ self.negative_slope = negative_slope
+
+ def forward(self, x):
+ x = self.lin1(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin2(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin3(x)
+ return sigmoid(x)
diff --git a/code/sunlab/suntorch/models/variational/encoder.py b/code/sunlab/suntorch/models/variational/encoder.py
new file mode 100644
index 0000000..b08202f
--- /dev/null
+++ b/code/sunlab/suntorch/models/variational/encoder.py
@@ -0,0 +1,34 @@
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Encoder(nn.Module):
+ """# Encoder Neural Network
+ X_dim: Input dimension shape
+ N: Inner neuronal layer size
+ z_dim: Output dimension shape
+ """
+
+ def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3):
+ super(Encoder, self).__init__()
+ self.lin1 = nn.Linear(X_dim, N)
+ self.lin2 = nn.Linear(N, N)
+ self.lin3mu = nn.Linear(N, z_dim)
+ self.lin3sigma = nn.Linear(N, z_dim)
+ self.p = dropout
+ self.negative_slope = negative_slope
+
+ def forward(self, x):
+ x = self.lin1(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ x = self.lin2(x)
+ if self.p > 0.0:
+ x = F.dropout(x, p=self.p, training=self.training)
+ x = F.leaky_relu(x, negative_slope=self.negative_slope)
+
+ mu = self.lin3mu(x)
+ sigma = self.lin3sigma(x)
+ return mu, sigma
diff --git a/code/sunlab/suntorch/plotting/__init__.py b/code/sunlab/suntorch/plotting/__init__.py
new file mode 100644
index 0000000..36e00e6
--- /dev/null
+++ b/code/sunlab/suntorch/plotting/__init__.py
@@ -0,0 +1 @@
+from .model_extensions import *
diff --git a/code/sunlab/suntorch/plotting/model_extensions.py b/code/sunlab/suntorch/plotting/model_extensions.py
new file mode 100644
index 0000000..33f0191
--- /dev/null
+++ b/code/sunlab/suntorch/plotting/model_extensions.py
@@ -0,0 +1,34 @@
+from matplotlib import pyplot as plt
+from sunlab.common.data.shape_dataset import ShapeDataset
+from sunlab.globals import DIR_ROOT
+
+
+def apply_boundary(
+ model_loc=DIR_ROOT + "models/current_model/",
+ border_thickness=3,
+ include_transition_regions=False,
+ threshold=0.7,
+ alpha=1,
+ _plt=None,
+):
+ """# Apply Boundary to Plot
+
+ Use Pregenerated Boundary by Default for Speed"""
+ from sunlab.common.scaler import MaxAbsScaler
+ import numpy as np
+
+ if _plt is None:
+ _plt = plt
+ if (model_loc == model_loc) and (border_thickness == 3) and (threshold == 0.7):
+ XYM = np.load(DIR_ROOT + "extra_data/OutlineXYM.npy")
+ XY = XYM[:2, :, :]
+ if include_transition_regions:
+ outline = XYM[3, :, :]
+ else:
+ outline = XYM[2, :, :]
+ _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha)
+ return
+ raise NotImplemented("Not Yet Implemented for PyTorch!")
+
+
+plt.apply_boundary = apply_boundary
diff --git a/code/sunlab/transform_data.py b/code/sunlab/transform_data.py
new file mode 100644
index 0000000..d6e3813
--- /dev/null
+++ b/code/sunlab/transform_data.py
@@ -0,0 +1,799 @@
+from sklearn import preprocessing
+import numpy as np
+
+# import features
+
+
+def import_train_set(train_file_name="AllResults.txt"):
+ featurelist = []
+
+ with open(train_file_name, "r") as infile:
+ for line in infile:
+ featurelist.append(line.strip())
+
+ # so now, featurelist[1] has names of things in form
+ # 'Area, MajorAxisLength, ... Class'
+ FeatureNames = [x.strip() for x in featurelist[0].split(",")]
+ # FeatureNames has form ['Area','MajorAxisLength',....'Class']
+ # which is what I wanted
+
+ AllData = [
+ [float(x.strip()) for x in featurelist[i].split(",")]
+ for i in range(1, len(featurelist))
+ ]
+
+ # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]],
+ # the last input is the class.
+
+ classes = [int(i[-1]) for i in AllData]
+
+ # classes contains the class number from which the data is from
+
+ # want to delete target from AllData.
+
+ X = [i[0:-1] for i in AllData]
+
+ # X has form similar to Data. So when we reshape, we want the output to be
+ # X = array([[0,1,2,...]
+ # [1,2,3,...]])
+
+ Data = np.asarray(X, order="F")
+
+ # this has the right form, is uses fortran column-major style memory representation vs row major C-style
+ # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64
+ # both have same indexing calls, so I think we're in business.
+
+ # looks exactly correct, or at least like iris data set target.
+ Target = np.asarray(classes)
+ return (Data, Target)
+
+
+########################################################################
+# for training purposes, the number of samples in data must be divisible by 256
+
+
+def Trim_Train_Data(Data, Target, max_length=None, balance=False):
+ ####
+ # Inputs: Data is numpy array with N samples (rows) and M measures (cols)
+ # Target is 1xN samples with ground truth
+ # max_length defines maximum length of training data. Should be divisible by 256, might want to code that...
+ # balance is boolean if you wish to have same number of samples in each class.
+ print("Class lengths are = ", [sum(Target == i) for i in set(Target)])
+ if not balance:
+ if (
+ np.shape(Data)[0] / 256 != np.round(np.shape(Data)[0] / 256)
+ or max_length < np.shape(Data)[0]
+ ):
+ print("Trimming data for training purposes...")
+ if not max_length:
+ max_length = 256 * (np.floor(np.shape(Data)[0] / 256))
+ else:
+ if max_length / 256 != np.round(max_length / 256):
+ # must make it divisible by 256
+ max_length = int(np.floor(max_length / 256) * 256)
+ print(
+ "Your given max_length was not divisible by 256. New max length is = %d"
+ % max_length
+ )
+ # determine percentages of each class.
+ cs = np.unique(Target)
+ ps = np.zeros(shape=(1, len(cs)))
+ ps = ps[0]
+ rows_to_take = np.array([])
+ for i in range(len(cs)):
+ ps[i] = np.sum(Target == cs[i]) / len(Target)
+ goodrows = np.where(Target == cs[i])[0]
+ rows_to_take = np.append(
+ rows_to_take, goodrows[0 : int(np.floor(ps[i] * max_length))]
+ )
+
+ ad_row = 0
+ class_ind = 0
+ while len(rows_to_take) != max_length:
+ # need to supplament.
+ goodrows = np.where(Target == cs[class_ind])[0]
+ rows_to_take = np.append(
+ rows_to_take,
+ goodrows[int(np.floor(ps[class_ind] * max_length)) + 1 + ad_row],
+ )
+ class_ind = class_ind + 1
+ if class_ind > len(cs):
+ class_ind = 0
+ ad_row = ad_row + 1
+ rows_to_take = rows_to_take.astype(int)
+ X_train_scaled = Data[rows_to_take, :]
+ Y_train = Target[rows_to_take]
+ print("Complete")
+ else:
+ X_train_scaled = Data
+ Y_train = Target
+ print("Final training length = %d" % X_train_scaled.shape[0])
+ print(
+ "Class lengths after trimming are = ",
+ [sum(Y_train == i) for i in set(Y_train)],
+ )
+ return (X_train_scaled, Y_train)
+ else:
+ # determine which has the minimum number of cases.
+ cs = np.unique(Target)
+ lens = np.zeros((len(cs)))
+ for i in range(len(cs)):
+ lens[i] = sum(Target == cs[i])
+
+ # randomly sample from each class now that number of samples.
+ min_len = int(min(lens))
+ rows_to_take = np.array([])
+ for i in range(len(cs)):
+ possiblerows = np.where(Target == cs[i])[0]
+ # now sample without replacement.
+ rows_to_take = np.append(
+ rows_to_take, np.random.choice(possiblerows, min_len, replace=False)
+ )
+ if len(rows_to_take) / 256 != np.round(
+ len(rows_to_take) / 256
+ ) or max_length < len(rows_to_take):
+ # trim until correct size.
+ if not max_length:
+ max_length = 256 * (np.floor(np.shape(Data)[0] / 256))
+ else:
+ if max_length / 256 != np.round(max_length / 256):
+ # must make it divisible by 256
+ max_length = int(np.floor(max_length / 256) * 256)
+ print(
+ "Your given max_length was not divisible by 256. New max length is = %d"
+ % max_length
+ )
+ # use min_len now to delete entries.
+ timearound = 0
+ pheno = len(cs) # start at the end
+ while len(rows_to_take) > max_length:
+ # entry to delete is
+ # first (min_len-round)*range(1,len(np.unique(Target))+1) -1
+ # print("%d entry delete" % (((min_len-timearound)*pheno) - 1))
+ rows_to_take = np.delete(
+ rows_to_take, ((min_len - timearound) * pheno) - 1
+ )
+ pheno = pheno - 1
+ if pheno < 1:
+ pheno = len(cs)
+ timearound = timearound + 1
+ rows_to_take = rows_to_take.astype(int)
+ X_train_scaled = Data[rows_to_take, :]
+ Y_train = Target[rows_to_take]
+ print("Final training length = %d" % X_train_scaled.shape[0])
+ print(
+ "Class lengths after trimming are = ",
+ [sum(Y_train == i) for i in set(Y_train)],
+ )
+ return (X_train_scaled, Y_train)
+
+
+#############################REMOVE OUTLIER DATA########################
+# How? Do this after scaling the data, then compute a z-score. We'll check the data after that.
+
+
+def Remove_Outliers(Data, Target):
+ # for each class, detect outliers.
+ # we'll begin by using z-scoring. This assumes data is described by a Guassian
+ # which is why it is vital to do this AFTER scaling the data.
+ # I plotted the data, it is absolutely not Gaussian.
+ # I tried DBSCAN machine learning algorithm but it is really not helpful.
+ # However, the data IS perhaps Gaussian after embedding. We can clean the signal AFTER by sending in
+ # the emebedded data in 1, 2, or 3 dimensions and removing points that are beyond a standard deviation.
+ # Data is TSNE embedded.
+ zscores = np.zeros(np.shape(Data))
+ for pheno in np.unique(Target):
+ # find rows where phenotype is correct.
+ prows = np.where(Target == pheno)[0]
+ for dim in range(np.shape(Data)[1]):
+ # calculate the mean.
+ m = np.mean(Data[prows, dim])
+ # calculate std.
+ s = np.std(Data[prows, dim])
+ for example in range(len(prows)):
+ zscores[prows[example], dim] = (Data[prows[example], dim] - m) / s
+
+ # now you calculated the zscores for each element. Apply a threshold
+ # good "thumb-rule" thresholds can be: 2.5, 3, 3.5, or more.
+ zthresh = 2.5
+
+ zscores = zscores > 2.5
+
+ badrows = [i for i in range(np.shape(zscores)[0]) if zscores[i].any()]
+
+ Data = np.delete(Data, badrows, axis=0)
+ Target = np.delete(Target, badrows, axis=0)
+
+ return (Data, Target)
+
+
+##############################POST AUGMENTATION#########################
+def Augment_Size(Data, Target, max_copies=0, s=0.2, balance=False, augment_class=None):
+ max_copies = int(max_copies)
+ # augment only the copies made by scaling the unit based measures.
+ # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len
+
+ # first, determine if we desire class balance.
+ if balance:
+ # determine which class has maximum number of samples.
+ cs = np.unique(Target)
+ vals = [sum(Target == cs[i]) for i in cs]
+ print(
+ "Class %d has max number of samples, increasing other classes via size augmentation"
+ % np.argmax(vals)
+ )
+ for i in range(len(cs)):
+ if i != np.argmax(vals):
+ # determine how many samples need to be made.
+ to_make = int(vals[np.argmax(vals)] - vals[i])
+ # randomly sample rows from Data with the correct phenotype cs[i]
+ possible_rows = np.where(Target == cs[i])[0]
+ # sample to_make numbers from possible_rows.
+ sampled_rows = np.random.choice(possible_rows, to_make, replace=True)
+ newrows = Data[sampled_rows, :]
+ size_vary = s * np.random.rand(1, to_make)[0]
+ # vary size.
+ for v in range(to_make):
+ if np.random.rand() < 0.5:
+ newrows[v, 0] = (
+ newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
+ else:
+ newrows[v, 0] = (
+ newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
+ Data = np.concatenate((Data, newrows), axis=0)
+ yadd = np.ones(to_make) * cs[i]
+ Target = np.concatenate((Target, yadd.astype(int)), axis=0)
+
+ Data = Data[np.argsort(Target), :]
+ Target = Target[np.argsort(Target)]
+
+ if augment_class is None:
+ if max_copies > 0:
+ print(
+ "Augmenting each class with additional %d samples via size augmentation"
+ % max_copies
+ )
+ cs = np.unique(Target)
+ for i in range(len(cs)):
+ # generate n = max_copies of Data.
+ possible_rows = np.where(Target == cs[i])[0]
+ # sample to_make numbers from possible_rows.
+ sampled_rows = np.random.choice(possible_rows, max_copies, replace=True)
+ newrows = Data[sampled_rows, :]
+ size_vary = s * np.random.rand(1, max_copies)[0]
+ # vary size.
+ for v in range(max_copies):
+ if np.random.rand() < 0.5:
+ newrows[v, 0] = (
+ newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
+ else:
+ newrows[v, 0] = (
+ newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
+ Data = np.concatenate((Data, newrows), axis=0)
+ yadd = np.ones(max_copies) * cs[i]
+ Target = np.concatenate((Target, yadd.astype(int)), axis=0)
+
+ Data = Data[np.argsort(Target), :]
+ Target = Target[np.argsort(Target)]
+
+ else:
+ augment_class = int(augment_class)
+ if max_copies > 0:
+ print(
+ "Augmenting Class = %d with additional %d samples via size augmentation"
+ % (augment_class, max_copies)
+ )
+ # generate n = max_copies of Data.
+ possible_rows = np.where(Target == augment_class)[0]
+ # sample to_make numbers from possible_rows.
+ sampled_rows = np.random.choice(possible_rows, max_copies, replace=True)
+ newrows = Data[sampled_rows, :]
+ size_vary = s * np.random.rand(1, max_copies)[0]
+ # vary size.
+ for v in range(max_copies):
+ if np.random.rand() < 0.5:
+ newrows[v, 0] = (
+ newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v]
+ else:
+ newrows[v, 0] = (
+ newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v]
+ newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v]
+ newrows[v, 4] = (
+ newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v]
+ )
+ newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v]
+ newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v]
+ newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v]
+ newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v]
+ newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v]
+ newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v]
+ Data = np.concatenate((Data, newrows), axis=0)
+ yadd = np.ones(max_copies) * augment_class
+ Target = np.concatenate((Target, yadd.astype(int)), axis=0)
+
+ Data = Data[np.argsort(Target), :]
+ Target = Target[np.argsort(Target)]
+
+ return (Data, Target)
+
+
+########################################################################
+########################################################################
+####### IMPORT THE DEV SET #####
+########################################################################
+########################################################################
+def import_dev_set(dev_file_name="DevResults.txt"):
+ print("Importing the dev set...")
+
+ # import features
+ featurelist = []
+
+ with open(dev_file_name, "r") as infile:
+ for line in infile:
+ featurelist.append(line.strip())
+
+ # so now, featurelist[1] has names of things in form 'Area, MajorAxisLength, ... Class'
+ FeatureNames = [x.strip() for x in featurelist[0].split(",")]
+ # FeatureNames has form ['Area','MajorAxisLength',....'Class'] which is what I wanted
+
+ DevData = [
+ [float(x.strip()) for x in featurelist[i].split(",")]
+ for i in range(1, len(featurelist))
+ ]
+
+ # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]], the last input is the class.
+
+ Devclasses = [int(i[-1]) for i in DevData]
+
+ # classes contains the class number from which the data is from
+
+ # want to delete target from AllData.
+
+ DevX = [i[0:-1] for i in DevData]
+
+ # X has form similar to Data. So when we reshape, we want the output to be
+ # X = array([[0,1,2,...]
+ # [1,2,3,...]])
+
+ X_dev = np.asarray(DevX, order="F")
+
+ # add aspect ratio as last column of data
+ AR = []
+ for i in range(len(X_dev)):
+ AR.append(X_dev[i, 1] / X_dev[i, 2])
+
+ AR = np.asarray(AR)
+
+ AR = AR.reshape((len(AR), 1))
+
+ X_dev = np.append(X_dev, AR, 1) # concatenates arrays appropriately.
+
+ # add form factor as last column of data
+ # P^2/Area
+ FF = []
+ for i in range(len(X_dev)):
+ FF.append(X_dev[i, 8] * X_dev[i, 8] / X_dev[i, 0])
+ FF = np.asarray(FF)
+ FF = FF.reshape((len(FF), 1))
+ X_dev = np.append(X_dev, FF, 1)
+
+ # this has the right form, is uses fortran column-major style memory representation vs row major C-style
+ # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64
+ # both have same indexing calls, so I think we're in business.
+
+ # looks exactly correct, or at least like iris data set target.
+ y_dev = np.asarray(Devclasses)
+
+ return (X_dev, y_dev, FeatureNames)
+
+
+########################################################################
+#########DATA IS IN THE SAME FORM AS IS FOUND IN IRIS DATASET###########
+########################################################################
+# Target = Target classes (0-4) for training and validation (type, numpy.int64, array)
+# Data = Data for training and validation to be split. (type, numpy.float64, array)
+# FeatureNames = Feature names for each column of data. (type, 'str', python list)
+########################################################################
+# print "Data is now in the same form as that found in Iris Dataset"
+# print "Splitting the training dataset into train/val"
+
+
+def apply_normalization(X_train, max_norm=False, l1_norm=False, l2_norm=False):
+ ########################################################
+ if max_norm:
+ print("Normalizing data using l1_norm")
+ X_train = X_train / np.max(np.abs(X_train), 0)[None, :]
+ if l1_norm:
+ print("Normalizing data using l1_norm")
+ X_train = X_train / np.sum(X_train, 0)[None, :]
+ if l2_norm:
+ print("Normalizing data using l1_norm")
+ X_train = X_train / np.sqrt(np.sum(X_train * X_train, 0))[None, :]
+
+ return X_train
+
+
+########################################################################
+
+
+def preprocess_train_data(X_train, d=2):
+
+ ############### SPLITTING THE DATASET ##################
+ # First split the dataset so it is as if we only had a training set then a eval set.
+ # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state =
+ # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%.
+ ########################################################
+ if d > 1:
+ print("Increasing dimensionality of dataset using cross terms")
+ #################INCREASING FEATURES####################
+ poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True)
+ # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models,
+ # we are getting 90.83%, which is exactly even with deep learning models.
+
+ X_train = poly.fit_transform(X_train)
+ # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]]
+ # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
+ # X_test = poly.fit_transform(X_test)
+ # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
+ # X_dev = poly.fit_transform(X_dev)
+
+ ########################################################
+
+ print("Scaling the data")
+ ################# SCALE THE DATA #######################
+ # Scale the data. Each attribute in the dataset must be independently scaled, that is
+ # 0 mean, and unit variance. Doing this returns the z-scores of the data
+ # Z = (x - mu) / sigma
+
+ # , QuantileTransformer(output_distribution='normal')
+ scaler = preprocessing.RobustScaler().fit(X_train)
+ # preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!!
+
+ X_train_scaled = scaler.fit_transform(X_train)
+
+ # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance.
+
+ # X_dev_scaled = scaler.transform(X_dev)
+
+ ##########################################################
+
+ return (X_train_scaled, scaler) # , target_feature_names)
+
+
+def preprocess_test_data(X_dev, scaler, d=2):
+ ############### SPLITTING THE DATASET ##################
+ # First split the dataset so it is as if we only had a training set then a eval set.
+ # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state =
+ # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%.
+ ########################################################
+
+ print("Increasing dimensionality of dataset using cross terms")
+ #################INCREASING FEATURES####################
+ poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True)
+ # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models,
+ # we are getting 90.83%, which is exactly even with deep learning models.
+
+ # X_train = poly.fit_transform(X_train)
+ # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]]
+ # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
+ # X_test = poly.fit_transform(X_test)
+ # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True)
+ X_dev = poly.fit_transform(X_dev)
+
+ ########################################################
+
+ print("Scaling the data")
+ ################# SCALE THE DATA #######################
+ # Scale the data. Each attribute in the dataset must be independently scaled, that is
+ # 0 mean, and unit variance. Doing this returns the z-scores of the data
+ # Z = (x - mu) / sigma
+
+ # scaler = preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!!
+
+ # X_train_scaled = scaler.transform(X_train)
+
+ # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance.
+
+ X_dev_scaled = scaler.transform(X_dev)
+
+ ##########################################################
+
+ return X_dev_scaled
+
+
+def Add_Measures(
+ Data,
+ FeatureNames=None,
+ add_AR=True,
+ add_FF=True,
+ add_convexity=True,
+ add_curl_old=True,
+ add_curl=True,
+ add_sphericity=True,
+ add_InscribedArea=True,
+ add_BlebRel=True,
+):
+ ############### EXPANDING THE DATASET ##################
+ # Add measures of Aspect Ratio, Form Factor, Convexity, Curl, and Sphericity
+ # Input: Data must be an np array with N (row) examples x M (cols) measures.
+ # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len
+ ########################################################
+ if add_AR:
+ AR = []
+ for i in range(len(Data)):
+ AR.append(Data[i, 1] / Data[i, 2])
+
+ AR = np.asarray(AR)
+
+ AR = AR.reshape((len(AR), 1))
+
+ Data = np.append(Data, AR, 1) # concatenates arrays appropriately.
+ if FeatureNames is not None:
+ FeatureNames.extend(["AR"])
+
+ if add_FF:
+ # this measure is really compactness, if you multiply each by 4 pi
+ # note this is different from roundness, which would use convex perimeter
+ FF = []
+ for i in range(len(Data)):
+ FF.append(Data[i, 0] / (Data[i, 8] * Data[i, 8]))
+ # FF.append(Data[i,8]*Data[i,8] / Data[i,0])
+
+ FF = np.asarray(FF)
+ FF = FF.reshape((len(FF), 1))
+ Data = np.append(Data, FF, 1)
+ if FeatureNames is not None:
+ FeatureNames.extend(["FF"])
+
+ if add_convexity:
+ CC = []
+ for i in range(len(Data)):
+ CC.append(Data[i, 8] / Data[i, 9])
+
+ CC = np.asarray(CC)
+ CC = CC.reshape((len(CC), 1))
+ Data = np.append(Data, CC, 1)
+ if FeatureNames is not None:
+ FeatureNames.extend(["Convexity"])
+
+ if add_curl_old:
+ # tells how curled the object is. might help for lamellipodia.
+ # curl is length / fiber length. (I assume length here can be major axis length)
+ # fiber length definition is (perimeter - sqrt(perimeter^2 - 16*Area)) / 4
+
+ # this definition does not work for a circle. Note that the result will be imaginary.
+ # I changed the 16 to a 4Pi. This should be fine.
+ cc = []
+ for i in range(len(Data)):
+ if (4 * np.pi * Data[i, 0]) <= (Data[i, 8] * Data[i, 8]):
+ fiber_length = (
+ Data[i, 8]
+ - np.sqrt((Data[i, 8] * Data[i, 8]) - (4 * np.pi * Data[i, 0]))
+ ) / np.pi # 4
+ cc.append(Data[i, 1] / fiber_length)
+ else:
+ fiber_length = Data[i, 8] / np.pi # 4
+ cc.append(Data[i, 1] / fiber_length)
+
+ cc = np.asarray(cc)
+ cc = cc.reshape((len(cc), 1))
+ Data = np.append(Data, cc, 1)
+ if FeatureNames is not None:
+ FeatureNames.extend(["Curl_old"])
+
+ if add_curl:
+ cc = []
+ for i in range(len(Data)):
+ cc.append(Data[i, 1] / Data[i, 10])
+
+ cc = np.asarray(cc)
+ cc = cc.reshape((len(cc), 1))
+ Data = np.append(Data, cc, 1)
+ # bound between 0 and 1 if major axis length could be replaced by feret diameter.
+ if FeatureNames is not None:
+ FeatureNames.extend(["Curl"])
+
+ if add_sphericity:
+ ss = []
+ for i in range(len(Data)):
+ ss.append(Data[i, 11] * 2 / Data[i, 1])
+
+ ss = np.asarray(ss)
+ ss = ss.reshape((len(ss), 1))
+ Data = np.append(Data, ss, 1)
+ # bound between 0 and 1 where 1 is a circle, perfectly spherical, and 0 is not at all.
+ # would be better if we had feret diameter instead of major axis.
+ if FeatureNames is not None:
+ FeatureNames.extend(["Sphericity"])
+
+ if add_InscribedArea:
+ aa = []
+ for i in range(len(Data)):
+ aa.append(Data[i, 1] * Data[i, 1] * np.pi / Data[i, 11])
+
+ aa = np.asarray(aa)
+ aa = aa.reshape((len(aa), 1))
+ Data = np.append(Data, aa, 1)
+ if FeatureNames is not None:
+ FeatureNames.extend(["InArea"])
+
+ if add_BlebRel:
+ bb = []
+ for i in range(len(Data)):
+ bb.append(Data[i, 12] / Data[i, 11])
+
+ bb = np.asarray(bb)
+ bb = bb.reshape((len(bb), 1))
+ Data = np.append(Data, bb, 1)
+ if FeatureNames is not None:
+ FeatureNames.extend(["Bleb_Rel"])
+
+ if FeatureNames is not None:
+ return (Data, FeatureNames)
+ else:
+ return Data
+
+
+def Exclude_Measures(
+ Data,
+ FeatureNames=None,
+ ex_Area=False,
+ ex_MjrAxis=False,
+ ex_MnrAxis=False,
+ ex_Ecc=False,
+ ex_ConA=False,
+ ex_EqD=False,
+ ex_Sol=False,
+ ex_Ext=False,
+ ex_Per=False,
+ ex_conPer=False,
+ ex_FL=False,
+ ex_InR=False,
+ ex_bleb=False,
+):
+ # Area,MjrAxis,MnrAxis,Ecc,ConA,EqD,Sol,Ext,Per,conPer,FL,InR
+
+ del_cols = []
+ if ex_Area:
+ del_cols.append(0)
+ if ex_MjrAxis:
+ del_cols.append(1)
+ if ex_MnrAxis:
+ del_cols.append(2)
+ if ex_Ecc:
+ del_cols.append(3)
+ if ex_ConA:
+ del_cols.append(4)
+ if ex_EqD:
+ del_cols.append(5)
+ if ex_Sol:
+ del_cols.append(6)
+ if ex_Ext:
+ del_cols.append(7)
+ if ex_Per:
+ del_cols.append(8)
+ if ex_conPer:
+ del_cols.append(9)
+ if ex_FL:
+ del_cols.append(10)
+ if ex_InR:
+ del_cols.append(11)
+ if ex_bleb:
+ del_cols.append(12)
+
+ Data = np.delete(Data, del_cols, 1)
+ if FeatureNames is not None:
+ FeatureNames = [i for j, i in enumerate(FeatureNames) if j not in del_cols]
+ return (Data, FeatureNames)
+ else:
+ return Data
+
+
+def open_and_save_test_data(fpath, csvfilename, txtfilename, ratio):
+ # fpath = '/volumes/chris stuff/chemsensing/chemsensing/Y27632_120518/Results/'
+ # /Rho_Act_120118/Results_after/'
+ # filename = 'FinalResults_after'
+ # option to delete certain measures if done so in training.
+ # order should go like
+ # %frame number%correctedNum%area%centroidx%centroidy%major%minor%eccentricity
+ # %orientation%convex area%filledarea%equivDiameter%solidity%extent%perimeter
+ # %perimeter old%convex perimeter%fiber length%%max in radii%bleb length%centersx%centersy
+
+ data = np.genfromtxt(
+ fpath + csvfilename + ".csv",
+ delimiter=",",
+ usecols=[2, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 19],
+ skip_header=1,
+ )
+ # was cols 3,6,7,8,10,12,13,14,15
+ frames_cell = np.genfromtxt(
+ fpath + csvfilename + ".csv", delimiter=",", usecols=[0, 1], skip_header=1
+ )
+ # add aspect ratio as last column of data
+
+ data[:, 0] = data[:, 0] * ratio * ratio # area
+ data[:, 1] = data[:, 1] * ratio # mjr
+ data[:, 2] = data[:, 2] * ratio # MnrAxis
+ # ecc unitless
+ data[:, 4] = data[:, 4] * ratio * ratio # ConvexArea
+ data[:, 5] = data[:, 5] * ratio # EquivDiameter
+ # Solidity
+ # Extent
+ data[:, 8] = data[:, 8] * ratio # Perimeter
+ data[:, 9] = data[:, 9] * ratio # conPerim
+ data[:, 10] = data[:, 10] * ratio # FibLen
+ data[:, 11] = data[:, 11] * ratio # max inscribed r
+ data[:, 12] = data[:, 12] * ratio # bleblen
+
+ preds = np.genfromtxt(
+ fpath + "/" + txtfilename + ".txt",
+ delimiter=" ",
+ usecols=[4, 5, 6, 7],
+ skip_header=1,
+ )
+ y_target = np.where(np.max(preds, 1) > 0.7, np.argmax(preds, 1), 4)
+ # y_target = np.reshape(y_target,(len(y_target),1))
+
+ return (data, y_target, frames_cell)