diff options
Diffstat (limited to 'code/sunlab')
68 files changed, 4938 insertions, 0 deletions
diff --git a/code/sunlab/__init__.py b/code/sunlab/__init__.py new file mode 100644 index 0000000..0ccac86 --- /dev/null +++ b/code/sunlab/__init__.py @@ -0,0 +1,17 @@ +from .common import * + +font = {"family": "DejaVu Sans", "weight": "regular", "size": 20} +import matplotlib + +matplotlib.rc("font", **font) + + +def set_font(ptsize=10): + font = {"family": "DejaVu Sans", "weight": "regular", "size": ptsize} + import matplotlib + + matplotlib.rc("font", **font) + + +def set_font_l(ptsize=20): + set_font(ptsize) diff --git a/code/sunlab/common/__init__.py b/code/sunlab/common/__init__.py new file mode 100644 index 0000000..cb6716c --- /dev/null +++ b/code/sunlab/common/__init__.py @@ -0,0 +1,5 @@ +from .data import * +from .distribution import * +from .scaler import * +from .mathlib import * +from .plotting import * diff --git a/code/sunlab/common/data/__init__.py b/code/sunlab/common/data/__init__.py new file mode 100644 index 0000000..3e26874 --- /dev/null +++ b/code/sunlab/common/data/__init__.py @@ -0,0 +1,6 @@ +from .basic import * +from .dataset import * +from .dataset_iterator import * +from .shape_dataset import * +from .image_dataset import * +from .utilities import * diff --git a/code/sunlab/common/data/basic.py b/code/sunlab/common/data/basic.py new file mode 100644 index 0000000..bb2e912 --- /dev/null +++ b/code/sunlab/common/data/basic.py @@ -0,0 +1,6 @@ +import numpy + + +numpy.load_dat = lambda *args, **kwargs: numpy.load( + *args, **kwargs, allow_pickle=True +).item() diff --git a/code/sunlab/common/data/dataset.py b/code/sunlab/common/data/dataset.py new file mode 100644 index 0000000..8589abf --- /dev/null +++ b/code/sunlab/common/data/dataset.py @@ -0,0 +1,255 @@ +from .dataset_iterator import DatasetIterator + + +class Dataset: + """# Dataset Superclass""" + + base_scale = 10.0 + + def __init__( + self, + dataset_filename, + data_columns=[], + label_columns=[], + batch_size=None, + shuffle=False, + val_split=0.0, + scaler=None, + sort_columns=None, + random_seed=4332, + pre_scale=10.0, + **kwargs + ): + """# Initialize Dataset + self.dataset = dataset (N, ...) + self.labels = labels (N, ...) + + Optional Arguments: + - prescale_function: The function that takes the ratio and transforms + the dataset by multiplying the prescale_function output + - sort_columns: The columns to sort the data by initially + - equal_split: If the classifications should be equally split in + training""" + from pandas import read_csv + from numpy import array, all + from numpy.random import seed + + if seed is not None: + seed(random_seed) + + # Basic Dataset Information + self.data_columns = data_columns + self.label_columns = label_columns + self.source = dataset_filename + self.dataframe = read_csv(self.source) + + # Pre-scaling Transformation + prescale_ratio = self.base_scale / pre_scale + ratio = prescale_ratio + prescale_powers = array([2, 1, 1, 0, 2, 1, 0, 0, 1, 1, 1, 1, 1]) + if "prescale_function" in kwargs.keys(): + prescale_function = kwargs["prescale_function"] + else: + + def prescale_function(x): + return x**prescale_powers + + self.prescale_function = prescale_function + self.prescale_factor = self.prescale_function(ratio) + assert ( + len(data_columns) == self.prescale_factor.shape[0] + ), "Column Mismatch on Prescale" + self.original_scale = pre_scale + + # Scaling Transformation + self.scaled = scaler is not None + self.scaler = scaler + + # Training Dataset Information + self.do_split = False if val_split == 0.0 else True + self.validation_split = val_split + self.batch_size = batch_size + self.do_shuffle = shuffle + self.equal_split = False + if "equal_split" in kwargs.keys(): + self.equal_split = kwargs["equal_split"] + + # Classification Labels if they exist + self.dataset = self.dataframe[self.data_columns].to_numpy() + if len(self.label_columns) == 0: + self.labels = None + elif not all([column in self.dataframe.columns for column in label_columns]): + import warnings + + warnings.warn( + "No classification labels found for the dataset", RuntimeWarning + ) + self.labels = None + else: + self.labels = self.dataframe[self.label_columns].squeeze() + + # Initialize the dataset + if "sort_columns" in kwargs.keys(): + self.sort(kwargs["sort_columns"]) + if self.do_shuffle: + self.shuffle() + if self.do_split: + self.split() + self.refresh_dataset() + + def __len__(self): + """# Get how many cases are in the dataset""" + return self.dataset.shape[0] + + def __getitem__(self, idx): + """# Make Dataset Sliceable""" + idx_slice = None + slice_stride = 1 if self.batch_size is None else self.batch_size + # If we pass a slice, return the slice + if type(idx) == slice: + idx_slice = idx + # If we pass an int, return a batch-size slice + else: + idx_slice = slice( + idx * slice_stride, min([len(self), (idx + 1) * slice_stride]) + ) + if self.labels is None: + return self.dataset[idx_slice, ...] + return self.dataset[idx_slice, ...], self.labels[idx_slice, ...] + + def scale_data(self, data): + """# Scale dataset from scaling function""" + data = data * self.prescale_factor + if not (self.scaler is None): + data = self.scaler(data) + return data + + def scale(self): + """# Scale Dataset""" + self.dataset = self.scale_data(self.dataset) + + def refresh_dataset(self, dataframe=None): + """# Refresh Dataset + + Regenerate the dataset from a dataframe. + Primarily used after a sort or filter.""" + if dataframe is None: + dataframe = self.dataframe + self.dataset = dataframe[self.data_columns].to_numpy() + if self.labels is not None: + self.labels = dataframe[self.label_columns].to_numpy().squeeze() + self.scale() + + def sort_on(self, columns): + """# Sort Dataset on Column(s)""" + from numpy import all + + if type(columns) == str: + columns = [columns] + if columns is not None: + assert all( + [column in self.dataframe.columns for column in columns] + ), "Dataframe does not contain some provided columns!" + self.dataframe = self.dataframe.sort_values(by=columns) + self.refresh_dataset() + + def filter_on(self, column, value): + """# Filter Dataset on Column Value(s)""" + assert column in self.dataframe.columns, "Column DNE" + self.working_dataset = self.dataframe[self.dataframe[column].isin(value)] + self.refresh_dataset(self.working_dataset) + + def filter_off(self): + """# Remove any filter on the dataset""" + self.refresh_dataset() + + def unique(self, column): + """# Get unique values in a column(s)""" + assert column in self.dataframe.columns, "Column DNE" + from numpy import unique + + return unique(self.dataframe[column]) + + def shuffle_data(self, data, labels=None): + """# Shuffle a dataset""" + from numpy.random import permutation + + shuffled = permutation(data.shape[0]) + if labels is not None: + assert ( + self.labels.shape[0] == self.dataset.shape[0] + ), "Dataset and Label Shape Mismatch" + shuf_data = data[shuffled, ...] + shuf_labels = labels[shuffled] + if len(labels.shape) > 1: + shuf_labels = labels[shuffled,...] + return shuf_data, shuf_labels + return data[shuffled, ...] + + def shuffle(self): + """# Shuffle the dataset""" + if self.do_shuffle: + if self.labels is None: + self.dataset = self.shuffle_data(self.dataset) + self.dataset, self.labels = self.shuffle_data(self.dataset, self.labels) + + def split(self): + """# Training/ Validation Splitting""" + from numpy import floor, unique, where, hstack, delete + from numpy.random import permutation + + equal_classes = self.equal_split + if not self.do_split: + return + assert self.validation_split <= 1.0, "Too High" + assert self.validation_split > 0.0, "Too Low" + train_count = int(floor(self.dataset.shape[0] * (1 - self.validation_split))) + training_data = self.dataset[:train_count, ...] + training_labels = None + validation_data = self.dataset[train_count:, ...] + validation_labels = None + if self.labels is not None: + if equal_classes: + # Ensure the split balances the prevalence of each class + assert len(self.labels.shape) == 1, "1D Classification Only Currently" + classification_breakdown = unique(self.labels, return_counts=True) + train_count = min( + [ + train_count, + classification_breakdown.shape[0] + * min(classification_breakdown[1]), + ] + ) + class_size = train_count / classification_breakdown.shape[0] + class_indicies = [ + permutation(where(self.labels == _class)[0]) + for _class in classification_breakdown[0] + ] + class_indicies = [indexes[:class_size] for indexes in class_indicies] + train_class_indicies = hstack(class_indicies).squeeze() + train_class_indicies = permutation(train_class_indicies) + training_data = self.dataset[train_class_indicies, ...] + training_labels = self.labels[train_class_indicies] + if len(self.labels.shape) > 1: + training_labels = self.labels[train_class_indicies,...] + validation_data = delete(self.dataset, train_class_indicies, axis=0) + validation_labels = delete( + self.labels, train_class_indicies, axis=0 + ).squeeze() + else: + training_labels = self.labels[:train_count] + if len(training_labels.shape) > 1: + training_labels = self.labels[:train_count, ...] + validation_labels = self.labels[train_count:] + if len(validation_labels.shape) > 1: + validation_labels = self.labels[train_count:, ...] + self.training_data = training_data + self.validation_data = validation_data + self.training = DatasetIterator(training_data, training_labels, self.batch_size) + self.validation = DatasetIterator( + validation_data, validation_labels, self.batch_size + ) + + def reset_iterators(self): + """# Reset Train/ Validation Iterators""" + self.split() diff --git a/code/sunlab/common/data/dataset_iterator.py b/code/sunlab/common/data/dataset_iterator.py new file mode 100644 index 0000000..7c91caa --- /dev/null +++ b/code/sunlab/common/data/dataset_iterator.py @@ -0,0 +1,34 @@ +class DatasetIterator: + """# Dataset Iterator + + Creates an iterator object on a dataset and labels""" + + def __init__(self, dataset, labels=None, batch_size=None): + """# Initialize the iterator with the dataset and labels + + - batch_size: How many to include in the iteration""" + self.dataset = dataset + self.labels = labels + self.current = 0 + self.batch_size = ( + batch_size if batch_size is not None else self.dataset.shape[0] + ) + + def __iter__(self): + """# Iterator Function""" + return self + + def __next__(self): + """# Next Iteration + + Slice the dataset and labels to provide""" + self.cur = self.current + self.current += 1 + if self.cur * self.batch_size < self.dataset.shape[0]: + iterator_slice = slice( + self.cur * self.batch_size, (self.cur + 1) * self.batch_size + ) + if self.labels is None: + return self.dataset[iterator_slice, ...] + return self.dataset[iterator_slice, ...], self.labels[iterator_slice, ...] + raise StopIteration diff --git a/code/sunlab/common/data/image_dataset.py b/code/sunlab/common/data/image_dataset.py new file mode 100644 index 0000000..46e77b6 --- /dev/null +++ b/code/sunlab/common/data/image_dataset.py @@ -0,0 +1,75 @@ +class ImageDataset: + def __init__( + self, + base_directory, + ext="png", + channels=[0], + batch_size=None, + shuffle=False, + rotate=False, + rotate_p=1., + ): + """# Image Dataset + + Load a directory of images""" + from glob import glob + from matplotlib.pyplot import imread + from numpy import newaxis, vstack + from numpy.random import permutation, rand + + self.base_directory = base_directory + files = glob(self.base_directory + "*." + ext) + self.dataset = [] + for file in files: + im = imread(file)[newaxis, :, :, channels].transpose(0, 3, 1, 2) + self.dataset.append(im) + # Also add rotations of the image to the dataset + if rotate: + if rand() < rotate_p: + self.dataset.append(im[:, :, ::-1, :]) + if rand() < rotate_p: + self.dataset.append(im[:, :, :, ::-1]) + if rand() < rotate_p: + self.dataset.append(im[:, :, ::-1, ::-1]) + if rand() < rotate_p: + self.dataset.append(im.transpose(0, 1, 3, 2)) + if rand() < rotate_p: + self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, ::-1, :]) + if rand() < rotate_p: + self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, :, ::-1]) + if rand() < rotate_p: + self.dataset.append(im.transpose(0, 1, 3, 2)[:, :, ::-1, ::-1]) + self.dataset = vstack(self.dataset) + if shuffle: + self.dataset = self.dataset[permutation(self.dataset.shape[0]), ...] + self.batch_size = ( + batch_size if batch_size is not None else self.dataset.shape[0] + ) + + def torch(self, device=None): + """# Cast to Torch Tensor""" + import torch + + if device is None: + device = torch.device("cpu") + return torch.tensor(self.dataset).to(device) + + def numpy(self): + """# Cast to Numpy Array""" + return self.dataset + + def __len__(self): + """# Return Number of Cases + + (or Number in each Batch)""" + return self.dataset.shape[0] // self.batch_size + + def __getitem__(self, index): + """# Slice By Batch""" + if type(index) == tuple: + return self.dataset[index] + elif type(index) == int: + return self.dataset[ + index * self.batch_size : (index + 1) * self.batch_size, ... + ] + return diff --git a/code/sunlab/common/data/shape_dataset.py b/code/sunlab/common/data/shape_dataset.py new file mode 100644 index 0000000..5a68736 --- /dev/null +++ b/code/sunlab/common/data/shape_dataset.py @@ -0,0 +1,57 @@ +from .dataset import Dataset + + +class ShapeDataset(Dataset): + """# Shape Dataset""" + + def __init__( + self, + dataset_filename, + data_columns=[ + "Area", + "MjrAxisLength", + "MnrAxisLength", + "Eccentricity", + "ConvexArea", + "EquivDiameter", + "Solidity", + "Extent", + "Perimeter", + "ConvexPerim", + "FibLen", + "InscribeR", + "BlebLen", + ], + label_columns=["Class"], + batch_size=None, + shuffle=False, + val_split=0.0, + scaler=None, + sort_columns=None, + random_seed=4332, + pre_scale=10, + **kwargs + ): + """# Initialize Dataset + self.dataset = dataset (N, ...) + self.labels = labels (N, ...) + + Optional Arguments: + - prescale_function: The function that takes the ratio and transforms + the dataset by multiplying the prescale_function output + - sort_columns: The columns to sort the data by initially + - equal_split: If the classifications should be equally split in + training""" + super().__init__( + dataset_filename, + data_columns=data_columns, + label_columns=label_columns, + batch_size=batch_size, + shuffle=shuffle, + val_split=val_split, + scaler=scaler, + sort_columns=sort_columns, + random_seed=random_seed, + pre_scale=pre_scale, + **kwargs + ) diff --git a/code/sunlab/common/data/utilities.py b/code/sunlab/common/data/utilities.py new file mode 100644 index 0000000..6b4e6f3 --- /dev/null +++ b/code/sunlab/common/data/utilities.py @@ -0,0 +1,119 @@ +from .shape_dataset import ShapeDataset +from ..scaler.max_abs_scaler import MaxAbsScaler + + +def import_10x( + filename, + magnification=10, + batch_size=None, + shuffle=False, + val_split=0.0, + scaler=None, + sort_columns=None, +): + """# Import a 10x Image Dataset + + Pixel-to-micron: ???""" + magnification = 10 + dataset = ShapeDataset( + filename, + batch_size=batch_size, + shuffle=shuffle, + pre_scale=magnification, + val_split=val_split, + scaler=scaler, + sort_columns=sort_columns, + ) + return dataset + + +def import_20x( + filename, + magnification=10, + batch_size=None, + shuffle=False, + val_split=0.0, + scaler=None, + sort_columns=None, +): + """# Import a 20x Image Dataset + + Pixel-to-micron: ???""" + magnification = 20 + dataset = ShapeDataset( + filename, + batch_size=batch_size, + shuffle=shuffle, + pre_scale=magnification, + val_split=val_split, + scaler=scaler, + sort_columns=sort_columns, + ) + return dataset + + +def import_dataset( + filename, + magnification, + batch_size=None, + shuffle=False, + val_split=0.0, + scaler=None, + sort_columns=None, +): + """# Import a dataset + + Requires a magnificaiton to be specified""" + dataset = ShapeDataset( + filename, + pre_scale=magnification, + batch_size=batch_size, + shuffle=shuffle, + val_split=val_split, + scaler=scaler, + sort_columns=sort_columns, + ) + return dataset + + +def import_full_dataset(fname, magnification=20, scaler=None): + """# Import a Full Dataset + + If a classification file exists(.txt with a 'Class' header and 'frame','cellnumber' headers), also import it""" + from os.path import isfile + import pandas as pd + import numpy as np + + cfname = fname + tfname = cfname[:-3] + "txt" + columns = [ + "frame", + "cellnumber", + "x-cent", + "y-cent", + "actinedge", + "filopodia", + "bleb", + "lamellipodia", + ] + if isfile(tfname): + dataset = import_dataset(cfname, magnification=magnification, scaler=scaler) + class_df = np.loadtxt(tfname, skiprows=1) + class_df = pd.DataFrame(class_df, columns=columns) + full_df = pd.merge( + dataset.dataframe, + class_df, + left_on=["Frames", "CellNum"], + right_on=["frame", "cellnumber"], + ) + full_df["Class"] = np.argmax( + class_df[["actinedge", "filopodia", "bleb", "lamellipodia"]].to_numpy(), + axis=-1, + ) + dataset.labels = full_df["Class"].to_numpy() + else: + dataset = import_dataset(cfname, magnification=magnification, scaler=scaler) + full_df = dataset.dataframe + dataset.dataframe = full_df + dataset.filter_off() + return dataset diff --git a/code/sunlab/common/distribution/__init__.py b/code/sunlab/common/distribution/__init__.py new file mode 100644 index 0000000..a23cb0c --- /dev/null +++ b/code/sunlab/common/distribution/__init__.py @@ -0,0 +1,7 @@ +from .gaussian_distribution import * +from .x_gaussian_distribution import * +from .o_gaussian_distribution import * +from .s_gaussian_distribution import * +from .uniform_distribution import * +from .symmetric_uniform_distribution import * +from .swiss_roll_distribution import * diff --git a/code/sunlab/common/distribution/adversarial_distribution.py b/code/sunlab/common/distribution/adversarial_distribution.py new file mode 100644 index 0000000..675c00e --- /dev/null +++ b/code/sunlab/common/distribution/adversarial_distribution.py @@ -0,0 +1,35 @@ +class AdversarialDistribution: + """# Distribution Class to use in Adversarial Autoencoder + + For any distribution to be implemented, make sure to ensure each of the + methods are implemented""" + + def __init__(self, N): + """# Initialize the distribution for N-dimensions""" + self.dims = N + return + + def get_full_name(self): + """# Return a human-readable name of the distribution""" + return self.full_name + + def get_name(self): + """# Return a shortened name of the distribution + + Preferrably, the name should include characters that can be included in + a file name""" + return self.name + + def __str__(self): + """# Returns the short name""" + return self.get_name() + + def __repr__(self): + """# Returns the short name""" + return self.get_name() + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use `dist(...)`""" + raise NotImplementedError("This distribution has not been implemented yet") diff --git a/code/sunlab/common/distribution/gaussian_distribution.py b/code/sunlab/common/distribution/gaussian_distribution.py new file mode 100644 index 0000000..e478ab6 --- /dev/null +++ b/code/sunlab/common/distribution/gaussian_distribution.py @@ -0,0 +1,23 @@ +from .adversarial_distribution import * + + +class GaussianDistribution(AdversarialDistribution): + """# Gaussian Distribution""" + + def __init__(self, N): + """# Gaussian Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + self.full_name = f"{N}-Dimensional Gaussian Distribution" + self.name = "G" + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use gauss(N1,...,Nm)""" + import numpy as np + + return np.random.multivariate_normal( + mean=np.zeros(self.dims), cov=np.eye(self.dims), size=[*args] + ) diff --git a/code/sunlab/common/distribution/o_gaussian_distribution.py b/code/sunlab/common/distribution/o_gaussian_distribution.py new file mode 100644 index 0000000..1222ca1 --- /dev/null +++ b/code/sunlab/common/distribution/o_gaussian_distribution.py @@ -0,0 +1,38 @@ +from .adversarial_distribution import * + + +class OGaussianDistribution(AdversarialDistribution): + """# O Gaussian Distribution""" + + def __init__(self, N): + """# O Gaussian Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + assert self.dims == 2, "This Distribution only Supports 2-Dimensions" + self.full_name = "2-Dimensional O-Gaussian Distribution" + self.name = "OG" + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use xgauss(case_count)""" + import numpy as np + + assert len(args) == 1, "Only 1 argument supported" + N = args[0] + sample_base = np.zeros((4 * N, 2)) + sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, 1], cov=[[1, 0], [0, 1]], size=[N] + ) + sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, -1], cov=[[1, 0], [0, 1]], size=[N] + ) + sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, 1], cov=[[1, 0], [0, 1]], size=[N] + ) + sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, -1], cov=[[1, 0], [0, 1]], size=[N] + ) + np.random.shuffle(sample_base) + return sample_base[:N, :] diff --git a/code/sunlab/common/distribution/s_gaussian_distribution.py b/code/sunlab/common/distribution/s_gaussian_distribution.py new file mode 100644 index 0000000..cace57f --- /dev/null +++ b/code/sunlab/common/distribution/s_gaussian_distribution.py @@ -0,0 +1,40 @@ +from .adversarial_distribution import * + + +class SGaussianDistribution(AdversarialDistribution): + """# S Gaussian Distribution""" + + def __init__(self, N, scale=0): + """# S Gaussian Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + assert self.dims == 2, "This Distribution only Supports 2-Dimensions" + self.full_name = "2-Dimensional S-Gaussian Distribution" + self.name = "SG" + self.scale = scale + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use xgauss(case_count)""" + import numpy as np + + assert len(args) == 1, "Only 1 argument supported" + N = args[0] + sample_base = np.zeros((4 * N, 2)) + scale = self.scale + sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, 1], cov=[[1, scale], [scale, 1]], size=[N] + ) + sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, -1], cov=[[1, scale], [scale, 1]], size=[N] + ) + sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, 1], cov=[[1, -scale], [-scale, 1]], size=[N] + ) + sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, -1], cov=[[1, -scale], [-scale, 1]], size=[N] + ) + np.random.shuffle(sample_base) + return sample_base[:N, :] diff --git a/code/sunlab/common/distribution/swiss_roll_distribution.py b/code/sunlab/common/distribution/swiss_roll_distribution.py new file mode 100644 index 0000000..613bfc5 --- /dev/null +++ b/code/sunlab/common/distribution/swiss_roll_distribution.py @@ -0,0 +1,42 @@ +from .adversarial_distribution import * + + +class SwissRollDistribution(AdversarialDistribution): + """# Swiss Roll Distribution""" + + def __init__(self, N, scaling_factor=0.25, noise_level=0.15): + """# Swiss Roll Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + assert (self.dims == 2) or ( + self.dims == 3 + ), "This Distribution only Supports 2,3-Dimensions" + self.full_name = f"{self.dims}-Dimensional Swiss Roll Distribution Distribution" + self.name = f"SR{self.dims}" + self.noise_level = noise_level + self.scale = scaling_factor + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use xgauss(case_count)""" + import numpy as np + + assert len(args) == 1, "Only 1 argument supported" + N = args[0] + noise = self.noise_level + scaling_factor = self.scale + + t = 3 * np.pi / 2 * (1 + 2 * np.random.rand(1, N)) + h = 21 * np.random.rand(1, N) + RANDOM = np.random.randn(3, N) * noise + data = ( + np.concatenate( + (scaling_factor * t * np.cos(t), h, scaling_factor * t * np.sin(t)) + ) + + RANDOM + ) + if self.dims == 2: + return data.T[:, [0, 2]] + return data.T[:, [0, 2, 1]] diff --git a/code/sunlab/common/distribution/symmetric_uniform_distribution.py b/code/sunlab/common/distribution/symmetric_uniform_distribution.py new file mode 100644 index 0000000..c3a4db0 --- /dev/null +++ b/code/sunlab/common/distribution/symmetric_uniform_distribution.py @@ -0,0 +1,21 @@ +from .adversarial_distribution import * + + +class SymmetricUniformDistribution(AdversarialDistribution): + """# Symmetric Uniform Distribution on [-1, 1)""" + + def __init__(self, N): + """# Symmetric Uniform Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + self.full_name = f"{N}-Dimensional Symmetric Uniform Distribution" + self.name = "SU" + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use suniform(N1,...,Nm)""" + import numpy as np + + return np.random.rand(*args, self.dims) * 2.0 - 1.0 diff --git a/code/sunlab/common/distribution/uniform_distribution.py b/code/sunlab/common/distribution/uniform_distribution.py new file mode 100644 index 0000000..3e23e67 --- /dev/null +++ b/code/sunlab/common/distribution/uniform_distribution.py @@ -0,0 +1,21 @@ +from .adversarial_distribution import * + + +class UniformDistribution(AdversarialDistribution): + """# Uniform Distribution on [0, 1)""" + + def __init__(self, N): + """# Uniform Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + self.full_name = f"{N}-Dimensional Uniform Distribution" + self.name = "U" + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use uniform(N1,...,Nm)""" + import numpy as np + + return np.random.rand(*args, self.dims) diff --git a/code/sunlab/common/distribution/x_gaussian_distribution.py b/code/sunlab/common/distribution/x_gaussian_distribution.py new file mode 100644 index 0000000..b4330aa --- /dev/null +++ b/code/sunlab/common/distribution/x_gaussian_distribution.py @@ -0,0 +1,38 @@ +from .adversarial_distribution import * + + +class XGaussianDistribution(AdversarialDistribution): + """# X Gaussian Distribution""" + + def __init__(self, N): + """# X Gaussian Distribution Initialization + + Initializes the name and dimensions""" + super().__init__(N) + assert self.dims == 2, "This Distribution only Supports 2-Dimensions" + self.full_name = "2-Dimensional X-Gaussian Distribution" + self.name = "XG" + + def __call__(self, *args): + """# Magic method when calling the distribution + + This method is going to be called when you use xgauss(case_count)""" + import numpy as np + + assert len(args) == 1, "Only 1 argument supported" + N = args[0] + sample_base = np.zeros((4 * N, 2)) + sample_base[0 * N : (0 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, 1], cov=[[1, 0.7], [0.7, 1]], size=[N] + ) + sample_base[1 * N : (1 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, -1], cov=[[1, 0.7], [0.7, 1]], size=[N] + ) + sample_base[2 * N : (2 + 1) * N, :] = np.random.multivariate_normal( + mean=[-1, 1], cov=[[1, -0.7], [-0.7, 1]], size=[N] + ) + sample_base[3 * N : (3 + 1) * N, :] = np.random.multivariate_normal( + mean=[1, -1], cov=[[1, -0.7], [-0.7, 1]], size=[N] + ) + np.random.shuffle(sample_base) + return sample_base[:N, :] diff --git a/code/sunlab/common/mathlib/__init__.py b/code/sunlab/common/mathlib/__init__.py new file mode 100644 index 0000000..9b5ed21 --- /dev/null +++ b/code/sunlab/common/mathlib/__init__.py @@ -0,0 +1 @@ +from .base import * diff --git a/code/sunlab/common/mathlib/base.py b/code/sunlab/common/mathlib/base.py new file mode 100644 index 0000000..38ab14c --- /dev/null +++ b/code/sunlab/common/mathlib/base.py @@ -0,0 +1,57 @@ +import numpy as np + + +def angle(a, b): + """# Get Angle Between Row Vectors""" + from numpy import arctan2, pi + + theta_a = arctan2(a[:, 1], a[:, 0]) + theta_b = arctan2(b[:, 1], b[:, 0]) + d_theta = theta_b - theta_a + assert (-pi <= d_theta) and (d_theta <= pi), "Theta difference outside of [-π,π]" + return d_theta + + +def normalize(column): + """# Normalize Column Vector""" + from numpy.linalg import norm + + return column / norm(column, axis=0) + + +def winding(xy_grid, trajectory_start, trajectory_end): + """# Get Winding Number on Grid""" + from numpy import zeros, cross, clip, arcsin + + trajectories = trajectory_end - trajectory_start + winding = zeros((xy_grid.shape[0])) + for idx, trajectory in enumerate(trajectories): + r = xy_grid - trajectory_start[idx] + cross = cross(normalize(trajectory), normalize(r)) + cross = clip(cross, -1, 1) + theta = arcsin(cross) + winding += theta + return winding + + +def vorticity(xy_grid, trajectory_start, trajectory_end): + """# Get Vorticity Number on Grid""" + from numpy import zeros, cross + + trajectories = trajectory_end - trajectory_start + vorticity = zeros((xy_grid.shape[0])) + for idx, trajectory in enumerate(trajectories): + r = xy_grid - trajectory_start[idx] + vorticity += cross(normalize(trajectory), normalize(r)) + return vorticity + + +def data_range(data): + """# Get the range of values for each row""" + from numpy import min, max + + return min(data, axis=0), max(data, axis=0) + + +np.normalize = normalize +np.range = data_range diff --git a/code/sunlab/common/mathlib/lyapunov.py b/code/sunlab/common/mathlib/lyapunov.py new file mode 100644 index 0000000..3c747f1 --- /dev/null +++ b/code/sunlab/common/mathlib/lyapunov.py @@ -0,0 +1,54 @@ +def trajectory_to_distances(x): + """X: [N,N_t,N_d] + ret [N,N_t]""" + from numpy import zeros + from numpy.linalg import norm + from itertools import product, combinations + + x = [x[idx, ...] for idx in range(x.shape[0])] + pairwise_trajectories = combinations(x, 2) + _N_COMB = len(list(pairwise_trajectories)) + N_max = x[0].shape[0] + distances = zeros((_N_COMB, N_max)) + pairwise_trajectories = combinations(x, 2) + for idx, (a_t, b_t) in enumerate(pairwise_trajectories): + distances[idx, :] = norm(a_t[:N_max, :] - b_t[:N_max, :], axis=-1) + return distances + + +def Lyapunov_d(X): + """X: [N,N_t] + λ_n = ln(|dX_n|/|dX_0|)/n; n = [1,2,...]""" + from numpy import zeros, log, repeat + + Y = zeros((X.shape[0], X.shape[1] - 1)) + Y = log(X[:, 1:] / repeat([X[:, 0]], Y.shape[1], axis=0).T) / ( + repeat([range(Y.shape[1])], Y.shape[0], axis=0) + 1 + ) + return Y + + +def Lyapunov_t(X): + """X: [N,N_t,N_d]""" + return Lyapunov_d(trajectory_to_distances(X)) + + +Lyapunov = Lyapunov_d + + +def RelativeDistance_d(X): + """X: [N,N_t] + λ_n = ln(|dX_n|/|dX_0|)/n; n = [1,2,...]""" + from numpy import zeros, log, repeat + + Y = zeros((X.shape[0], X.shape[1] - 1)) + Y = log(X[:, 1:] / repeat([X[:, 0]], Y.shape[1], axis=0).T) + return Y + + +def RelativeDistance_t(X): + """X: [N,N_t,N_d]""" + return RelativeDistance_d(trajectory_to_distances(X)) + + +RelativeDistance = RelativeDistance_d diff --git a/code/sunlab/common/mathlib/random_walks.py b/code/sunlab/common/mathlib/random_walks.py new file mode 100644 index 0000000..3aa3bcb --- /dev/null +++ b/code/sunlab/common/mathlib/random_walks.py @@ -0,0 +1,83 @@ +def get_levy_flight(T=50, D=2, t0=0.1, alpha=3, periodic=False): + from numpy import vstack + from mistree import get_levy_flight as get_flight + + if D == 2: + x, y = get_flight(T, mode="2D", periodic=periodic, t_0=t0, alpha=alpha) + xy = vstack([x, y]).T + elif D == 3: + x, y, z = get_flight(T, mode="3D", periodic=periodic, t_0=t0, alpha=alpha) + xy = vstack([x, y, z]).T + else: + raise ValueError(f"Dimension {D} not supported!") + return xy + + +def get_levy_flights(N=10, T=50, D=2, t0=0.1, alpha=3, periodic=False): + from numpy import moveaxis, array + + trajectories = [] + for _ in range(N): + xy = get_levy_flight(T=T, D=D, t0=t0, alpha=alpha, periodic=periodic) + trajectories.append(xy) + return moveaxis(array(trajectories), 0, 1) + + +def get_jitter_levy_flights( + N=10, T=50, D=2, t0=0.1, alpha=3, periodic=False, noise=5e-2 +): + from numpy.random import randn + + trajectories = get_levy_flights( + N=N, T=T, D=D, t0=t0, alpha=alpha, periodic=periodic + ) + return trajectories + randn(*trajectories.shape) * noise + + +def get_gaussian_random_walk(T=50, D=2, R=5, step_size=0.5, soft=None): + from numpy import array, sin, cos, exp, zeros, pi + from numpy.random import randn, uniform, rand + from numpy.linalg import norm + + def is_in(x, R=1): + from numpy.linalg import norm + + return norm(x) < R + + X = zeros((T, D)) + for t in range(1, T): + while True: + if D == 2: + angle = uniform(0, pi * 2) + step = randn(1) * step_size + X[t, :] = X[t - 1, :] + array([cos(angle), sin(angle)]) * step + else: + X[t, :] = X[t - 1, :] + randn(D) / D * step_size + if soft is None: + if is_in(X[t, :], R): + break + elif rand() < exp(-(norm(X[t, :]) - R) * soft): + break + return X + + +def get_gaussian_random_walks(N=10, T=50, D=2, R=5, step_size=0.5, soft=None): + from numpy import moveaxis, array + + trajectories = [] + for _ in range(N): + xy = get_gaussian_random_walk(T=T, D=D, R=R, step_size=step_size, soft=soft) + trajectories.append(xy) + return moveaxis(array(trajectories), 0, 1) + + +def get_gaussian_sample(T=50, D=2): + from numpy.random import randn + + return randn(T, D) + + +def get_gaussian_samples(N=10, T=50, D=2, R=5, step_size=0.5): + from numpy.random import randn + + return randn(T, N, D) diff --git a/code/sunlab/common/plotting/__init__.py b/code/sunlab/common/plotting/__init__.py new file mode 100644 index 0000000..d6873aa --- /dev/null +++ b/code/sunlab/common/plotting/__init__.py @@ -0,0 +1,2 @@ +from .colors import * +from .base import * diff --git a/code/sunlab/common/plotting/base.py b/code/sunlab/common/plotting/base.py new file mode 100644 index 0000000..aaf4a94 --- /dev/null +++ b/code/sunlab/common/plotting/base.py @@ -0,0 +1,270 @@ +from matplotlib import pyplot as plt + + +def blank_plot(_plt=None, _xticks=False, _yticks=False): + """# Remove Plot Labels""" + if _plt is None: + _plt = plt + _plt.xlabel("") + _plt.ylabel("") + _plt.title("") + tick_params = { + "which": "both", + "bottom": _xticks, + "left": _yticks, + "right": False, + "labelleft": False, + "labelbottom": False, + } + _plt.tick_params(**tick_params) + for child in plt.gcf().get_children(): + if child._label == "<colorbar>": + child.set_yticks([]) + axs = _plt.gcf().get_axes() + try: + axs = axs.flatten() + except: + ... + for ax in axs: + ax.set_xlabel("") + ax.set_ylabel("") + ax.set_title("") + ax.tick_params(**tick_params) + + +def save_plot(name, _plt=None, _xticks=False, _yticks=False, tighten=True): + """# Save Plot in Multiple Formats""" + assert type(name) == str, "Name must be string" + from os.path import dirname + from os import makedirs + + makedirs(dirname(name), exist_ok=True) + if _plt is None: + from matplotlib import pyplot as plt + _plt = plt + _plt.savefig(name + ".png", dpi=1000) + blank_plot(_plt, _xticks=_xticks, _yticks=_yticks) + if tighten: + from matplotlib import pyplot as plt + plt.tight_layout() + _plt.savefig(name + ".pdf") + _plt.savefig(name + ".svg") + + +def scatter_2d(data_2d, labels=None, _plt=None, **matplotlib_kwargs): + """# Scatter 2d Data + + - data_2d: 2d-dataset to plot + - labels: labels for each case + - _plt: Optional specific plot to transform""" + from .colors import Pcolor + + if _plt is None: + _plt = plt + + def _filter(data, labels=None, _filter_on=None): + if labels is None: + return data, False + else: + return data[labels == _filter_on], True + + for _class in [2, 3, 1, 0]: + local_data, has_color = _filter(data_2d, labels, _class) + if has_color: + _plt.scatter( + local_data[:, 0], + local_data[:, 1], + color=Pcolor[_class], + **matplotlib_kwargs + ) + else: + _plt.scatter(local_data[:, 0], local_data[:, 1], **matplotlib_kwargs) + break + return _plt + + +def plot_contour(two_d_mask, color="black", color_map=None, raise_error=False): + """# Plot Contour of Mask""" + from matplotlib.pyplot import contour + from numpy import mgrid + + z = two_d_mask + x, y = mgrid[: z.shape[1], : z.shape[0]] + if color_map is not None: + try: + color = color_map(color) + except Exception as e: + if raise_error: + raise e + try: + contour(x, y, z.T, colors=color, linewidth=0.5) + except Exception as e: + if raise_error: + raise e + + +def gaussian_smooth_plot( + xy, + sigma=0.1, + extent=[-1, 1, -1, 1], + grid_n=100, + grid=None, + do_normalize=True, +): + """# Plot Data with Gaussian Smoothening around point""" + from numpy import array, ndarray, linspace, meshgrid, zeros_like + from numpy import pi, sqrt, exp + from numpy.linalg import norm + + if not type(xy) == ndarray: + xy = array(xy) + if grid is not None: + XY = grid + else: + X = linspace(extent[0], extent[1], grid_n) + Y = linspace(extent[2], extent[3], grid_n) + XY = array(meshgrid(X, Y)).T + smoothed = zeros_like(XY[:, :, 0]) + factor = 1 + if do_normalize: + factor = (sqrt(2 * pi) * sigma) ** 2. + if len(xy.shape) == 1: + smoothed = exp(-((norm(xy - XY, axis=-1) / (sqrt(2) * sigma)) ** 2.0)) / factor + else: + try: + from tqdm.notebook import tqdm + except Exception: + + def tqdm(x): + return x + + for i in tqdm(range(xy.shape[0])): + if xy.shape[-1] == 2: + smoothed += ( + exp(-((norm((xy[i, :] - XY), axis=-1) / (sqrt(2) * sigma)) ** 2.0)) + / factor + ) + elif xy.shape[-1] == 3: + smoothed += ( + exp(-((norm((xy[i, :2] - XY), axis=-1) / (sqrt(2) * sigma)) ** 2.0)) + / factor + * xy[i, 2] + ) + return smoothed, XY + + +def plot_grid_data(xy_grid, data_grid, cbar=False, _plt=None, _cmap="RdBu", grid_min=1): + """# Plot Gridded Data""" + from numpy import nanmin, nanmax + from matplotlib.colors import TwoSlopeNorm + + if _plt is None: + _plt = plt + norm = TwoSlopeNorm( + vmin=nanmin([-grid_min, nanmin(data_grid)]), + vcenter=0, + vmax=nanmax([grid_min, nanmax(data_grid)]), + ) + _plt.pcolor(xy_grid[:, :, 0], xy_grid[:, :, 1], data_grid, cmap="RdBu", norm=norm) + if cbar: + _plt.colorbar() + + +def plot_winding(xy_grid, winding_grid, cbar=False, _plt=None): + plot_grid_data(xy_grid, winding_grid, cbar=cbar, _plt=_plt, grid_min=1e-5) + + +def plot_vorticity(xy_grid, vorticity_grid, cbar=False, save=False, _plt=None): + plot_grid_data(xy_grid, vorticity_grid, cbar=cbar, _plt=_plt, grid_min=1e-1) + + +plt.blank = lambda: blank_plot(plt) +plt.scatter2d = lambda data, labels=None, **matplotlib_kwargs: scatter_2d( + data, labels, plt, **matplotlib_kwargs +) +plt.save = save_plot + + +def interpolate_points(df, columns=["Latent-0", "Latent-1"], kind="quadratic", N=500): + """# Interpolate points""" + from scipy.interpolate import interp1d + import numpy as np + + points = df[columns].to_numpy() + distance = np.cumsum(np.sqrt(np.sum(np.diff(points, axis=0) ** 2, axis=1))) + distance = np.insert(distance, 0, 0) / distance[-1] + interpolator = interp1d(distance, points, kind=kind, axis=0) + alpha = np.linspace(0, 1, N) + interpolated_points = interpolator(alpha) + return interpolated_points.reshape(-1, 1, 2) + + +def plot_trajectory( + df, + Fm=24, + FM=96, + interpolate=False, + interpolation_kind="quadratic", + interpolation_N=500, + columns=["Latent-0", "Latent-1"], + frame_column="Frames", + alpha=0.8, + lw=4, + _plt=None, + _z=None, +): + """# Plot Trajectories + + Interpolation possible""" + import numpy as np + from matplotlib.collections import LineCollection + import matplotlib as mpl + + if _plt is None: + _plt = plt + if type(_plt) == mpl.axes._axes.Axes: + _ca = _plt + else: + try: + _ca = _plt.gca() + except: + _ca = _plt + X = df[columns[0]] + Y = df[columns[1]] + fm, fM = np.min(df[frame_column]), np.max(df[frame_column]) + + if interpolate: + if interpolation_kind == "cubic": + if len(df) <= 3: + return + elif interpolation_kind == "quadratic": + if len(df) <= 2: + return + else: + if len(df) <= 1: + return + points = interpolate_points( + df, kind=interpolation_kind, columns=columns, N=interpolation_N + ) + else: + points = np.array([X, Y]).T.reshape(-1, 1, 2) + + segments = np.concatenate([points[:-1], points[1:]], axis=1) + lc = LineCollection( + segments, + cmap=plt.get_cmap("plasma"), + norm=mpl.colors.Normalize(Fm, FM), + ) + if _z is not None: + from mpl_toolkits.mplot3d.art3d import line_collection_2d_to_3d + + if interpolate: + _z = _z # TODO: Interpolate + line_collection_2d_to_3d(lc, _z) + lc.set_array(np.linspace(fm, fM, points.shape[0])) + lc.set_linewidth(lw) + lc.set_alpha(alpha) + _ca.add_collection(lc) + _ca.autoscale() + _ca.margins(0.04) + return lc diff --git a/code/sunlab/common/plotting/colors.py b/code/sunlab/common/plotting/colors.py new file mode 100644 index 0000000..c4fc727 --- /dev/null +++ b/code/sunlab/common/plotting/colors.py @@ -0,0 +1,38 @@ +class PhenotypeColors: + """# Phenotype Colorings + + Standardization for the different phenotype colors""" + + def __init__(self): + """# Empty Construtor""" + pass + + def get_basic_colors(self, transition=False): + """# Return the Color Names + + - transition: Returns the color for the transition class too""" + if transition: + return ["yellow", "purple", "green", "blue", "cyan"] + return ["yellow", "purple", "green", "blue"] + + def get_colors(self, transition=False): + """# Return the Color Names + + - transition: Returns the color for the transition class too""" + if transition: + return ["#ffff00", "#ff3cfa", "#11f309", "#213ff0", "cyan"] + return ["#ffff00", "#ff3cfa", "#11fe09", "#213ff0"] + + def get_colormap(self, transition=False): + """# Return the Matplotlib Colormap + + - transition: Returns the color for the transition class too""" + from matplotlib.colors import ListedColormap as LC + + return LC(self.get_colors(transition)) + + +# Basic Exports +Pcolor = PhenotypeColors().get_colors() +Pmap = PhenotypeColors().get_colormap() +Pmapx = PhenotypeColors().get_colormap(True) diff --git a/code/sunlab/common/scaler/__init__.py b/code/sunlab/common/scaler/__init__.py new file mode 100644 index 0000000..2a2281a --- /dev/null +++ b/code/sunlab/common/scaler/__init__.py @@ -0,0 +1,2 @@ +from .max_abs_scaler import * +from .quantile_scaler import * diff --git a/code/sunlab/common/scaler/adversarial_scaler.py b/code/sunlab/common/scaler/adversarial_scaler.py new file mode 100644 index 0000000..7f61725 --- /dev/null +++ b/code/sunlab/common/scaler/adversarial_scaler.py @@ -0,0 +1,44 @@ +class AdversarialScaler: + """# Scaler Class to use in Adversarial Autoencoder + + For any scaler to be implemented, make sure to ensure each of the methods + are implemented: + - __init__ (optional) + - init + - load + - save + - __call__""" + + def __init__(self, base_directory): + """# Scaler initialization + + - Initialize the base directory of the model where it will live""" + self.base_directory = base_directory + + def init(self, data): + """# Scaler initialization + + Initialize the scaler transformation with the data + Should always return self in subclasses""" + raise NotImplementedError("Scaler initialization has not been implemented yet") + + def load(self): + """# Scaler loading + + Load the data scaler model from a file + Should always return self in subclasses""" + raise NotImplementedError("Scaler loading has not been implemented yet") + + def save(self): + """# Scaler saving + + Save the data scaler model""" + raise NotImplementedError("Scaler saving has not been implemented yet") + + def transform(self, *args, **kwargs): + """# Scale the given data""" + return self.__call__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + """# Scale the given data""" + raise NotImplementedError("Scaler has not been implemented yet") diff --git a/code/sunlab/common/scaler/max_abs_scaler.py b/code/sunlab/common/scaler/max_abs_scaler.py new file mode 100644 index 0000000..56ea589 --- /dev/null +++ b/code/sunlab/common/scaler/max_abs_scaler.py @@ -0,0 +1,48 @@ +from .adversarial_scaler import AdversarialScaler + + +class MaxAbsScaler(AdversarialScaler): + """# MaxAbsScaler + + Scale the data based on the maximum-absolute value in each column""" + + def __init__(self, base_directory): + """# MaxAbsScaler initialization + + - Initialize the base directory of the model where it will live + - Initialize the scaler model""" + super().__init__(base_directory) + from sklearn.preprocessing import MaxAbsScaler as MAS + + self.scaler_base = MAS() + self.scaler = None + + def init(self, data): + """# Scaler initialization + + Initialize the scaler transformation with the data""" + self.scaler = self.scaler_base.fit(data) + return self + + def load(self): + """# Scaler loading + + Load the data scaler model from a file""" + from pickle import load + + with open(f"{self.base_directory}/portable/maxabs_scaler.pkl", "rb") as fhandle: + self.scaler = load(fhandle) + return self + + def save(self): + """# Scaler saving + + Save the data scaler model""" + from pickle import dump + + with open(f"{self.base_directory}/portable/maxabs_scaler.pkl", "wb") as fhandle: + dump(self.scaler, fhandle) + + def __call__(self, *args, **kwargs): + """# Scale the given data""" + return self.scaler.transform(*args, **kwargs) diff --git a/code/sunlab/common/scaler/quantile_scaler.py b/code/sunlab/common/scaler/quantile_scaler.py new file mode 100644 index 0000000..a0f53fd --- /dev/null +++ b/code/sunlab/common/scaler/quantile_scaler.py @@ -0,0 +1,52 @@ +from .adversarial_scaler import AdversarialScaler + + +class QuantileScaler(AdversarialScaler): + """# QuantileScaler + + Scale the data based on the quantile distributions of each column""" + + def __init__(self, base_directory): + """# QuantileScaler initialization + + - Initialize the base directory of the model where it will live + - Initialize the scaler model""" + super().__init__(base_directory) + from sklearn.preprocessing import QuantileTransformer as QS + + self.scaler_base = QS() + self.scaler = None + + def init(self, data): + """# Scaler initialization + + Initialize the scaler transformation with the data""" + self.scaler = self.scaler_base.fit(data) + return self + + def load(self): + """# Scaler loading + + Load the data scaler model from a file""" + from pickle import load + + with open( + f"{self.base_directory}/portable/quantile_scaler.pkl", "rb" + ) as fhandle: + self.scaler = load(fhandle) + return self + + def save(self): + """# Scaler saving + + Save the data scaler model""" + from pickle import dump + + with open( + f"{self.base_directory}/portable/quantile_scaler.pkl", "wb" + ) as fhandle: + dump(self.scaler, fhandle) + + def __call__(self, *args, **kwargs): + """# Scale the given data""" + return self.scaler.transform(*args, **kwargs) diff --git a/code/sunlab/environment/base/__init__.py b/code/sunlab/environment/base/__init__.py new file mode 100644 index 0000000..5fc27c1 --- /dev/null +++ b/code/sunlab/environment/base/__init__.py @@ -0,0 +1,8 @@ +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from copy import deepcopy as dc +import glob +from tqdm.notebook import tqdm +from sunlab.common.mathlib.base import * +from .fortran import * diff --git a/code/sunlab/environment/base/cpu.py b/code/sunlab/environment/base/cpu.py new file mode 100644 index 0000000..d969bd1 --- /dev/null +++ b/code/sunlab/environment/base/cpu.py @@ -0,0 +1,4 @@ +import os + +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +from . import * diff --git a/code/sunlab/environment/base/cuda.py b/code/sunlab/environment/base/cuda.py new file mode 100644 index 0000000..a5c813e --- /dev/null +++ b/code/sunlab/environment/base/cuda.py @@ -0,0 +1,4 @@ +import os + +os.environ["CUDA_VISIBLE_DEVICES"] = "1" +from . import * diff --git a/code/sunlab/environment/base/extras.py b/code/sunlab/environment/base/extras.py new file mode 100644 index 0000000..b6ddc88 --- /dev/null +++ b/code/sunlab/environment/base/extras.py @@ -0,0 +1,7 @@ +from scipy.spatial import KDTree +from scipy.spatial import ConvexHull +from scipy.stats import linregress +from sklearn.manifold import TSNE +from sklearn.decomposition import PCA, KernelPCA +from sklearn.cluster import KMeans +from scipy.optimize import curve_fit diff --git a/code/sunlab/environment/base/fortran.py b/code/sunlab/environment/base/fortran.py new file mode 100644 index 0000000..973d09a --- /dev/null +++ b/code/sunlab/environment/base/fortran.py @@ -0,0 +1,8 @@ +try: + from ...fortran_src.aae_flib_mamba import * +except Exception: + ... +try: + from ...fortran_src.aae_flib_tfnb import * +except Exception: + ... diff --git a/code/sunlab/fortran_src/__init__.py b/code/sunlab/fortran_src/__init__.py new file mode 100644 index 0000000..81bb83a --- /dev/null +++ b/code/sunlab/fortran_src/__init__.py @@ -0,0 +1 @@ +# f2py -c -m aae_flib_tfnb fortran_library.f90 --f90flags='-g -fimplicit-none -O3' diff --git a/code/sunlab/fortran_src/fortran_library.f90 b/code/sunlab/fortran_src/fortran_library.f90 new file mode 100644 index 0000000..d477995 --- /dev/null +++ b/code/sunlab/fortran_src/fortran_library.f90 @@ -0,0 +1,96 @@ +! fortran_library + +! Mean-Squared Displacement for a time lag `t` +function msd(a, t) result(b) + implicit none + + real(kind=8), intent(in) :: a(:,:) + integer(kind=8), intent(in) :: t + integer(kind=8) :: i, j + real(kind=8) :: s(size(a,1)-t, size(a,2)) + real(kind=8) :: b(size(a,1)-t) + + s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:) + do i = 1, size(a, 1)-t + b(i) = 0.0 + do j = 1, size(a, 2) + b(i) = b(i) + s(i, j) ** 2 + end do + b(i) = sqrt(b(i)) + end do +end function msd + +! Mean-Squared Displacement for a time lag `t` +function mmsd(a, t) result(z) + implicit none + + real(kind=8), intent(in) :: a(:,:) + integer(kind=8), intent(in) :: t + integer(kind=8) :: i, j + real(kind=8) :: s(size(a,1)-t, size(a,2)) + real(kind=8) :: b(size(a,1)-t) + real(kind=8) :: z + + s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:) + do i = 1, size(a, 1)-t + b(i) = 0.0 + do j = 1, size(a, 2) + b(i) = b(i) + s(i, j) ** 2 + end do + b(i) = sqrt(b(i)) + end do + z = sum(b(:)) / (size(b, 1)) +end function mmsd + +! Mean-Squared Displacement for time lags up to `T` +function msds(a, Ts) result(b) + implicit none + + real(kind=8), intent(in) :: a(:,:) + integer(kind=8), intent(in) :: Ts + integer(kind=8) :: i, j + integer(kind=8) :: t + real(kind=8) :: s(size(a,1), size(a,2)) + real(kind=8) :: b(size(a,1),Ts+1) + + do t = 0, Ts + s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:) + do i = 1, size(a, 1) + b(i,t+1) = 0.0 + if (i <= size(a, 1) - t) then + do j = 1, size(a, 2) + b(i,t+1) = b(i,t+1) + s(i, j) ** 2 + end do + b(i,t+1) = sqrt(b(i,t+1)) + end if + end do + end do +end function msds + +! Mean-Squared Displacement for time lags up to `T` +! Return the mean for each time lag +function mmsds(a, Ts) result(z) + implicit none + + real(kind=8), intent(in) :: a(:,:) + integer(kind=8), intent(in) :: Ts + integer(kind=8) :: i, j + integer(kind=8) :: t + real(kind=8) :: s(size(a,1), size(a,2)) + real(kind=8) :: b(size(a,1),Ts+1) + real(kind=8) :: z(Ts+1) + + do t = 0, Ts + s(:, :) = a((t + 1):(size(a,1)), :) - a(1:(size(a,1)-t),:) + do i = 1, size(a, 1) + b(i,t+1) = 0.0 + if (i <= size(a, 1) - t) then + do j = 1, size(a, 2) + b(i,t+1) = b(i,t+1) + s(i, j) ** 2 + end do + b(i,t+1) = sqrt(b(i,t+1)) + end if + end do + z(t+1) = sum(b(1:(size(a, 1) - t),t+1)) / (size(a, 1) - t) + end do +end function mmsds diff --git a/code/sunlab/globals.py b/code/sunlab/globals.py new file mode 100644 index 0000000..eb5800b --- /dev/null +++ b/code/sunlab/globals.py @@ -0,0 +1,265 @@ +DIR_ROOT = "../" +FILES = { + "TRAINING_DATASET": DIR_ROOT + "data/spheroid26_011523_filtered.csv", + "TRAINING_DATASET_WIDE_BERTH": DIR_ROOT + "data/spheroid26_011523_exc.csv", + "PRETRAINED_MODEL_DIR": DIR_ROOT + "models/current_model/", + "PEN_TRACKED": { + "AUTO": DIR_ROOT + "data/PEN_automatically_tracked.csv", + "MANUAL": DIR_ROOT + "data/PEN_manually_tracked.csv", + }, + "RHO": { + "3": DIR_ROOT + "data/Rho_act_Cell3.csv", + "4": DIR_ROOT + "data/Rho_act_Cell4.csv", + "6": DIR_ROOT + "data/Rho_act_Cell6.csv", + "7": DIR_ROOT + "data/Rho_act_Cell7.csv", + "8": DIR_ROOT + "data/Rho_act_Cell8.csv", + "9": DIR_ROOT + "data/Rho_act_Cell9.csv", + "10": DIR_ROOT + "data/Rho_act_Cell10.csv", + "11": DIR_ROOT + "data/Rho_act_Cell11.csv", + }, + "CN03": { + "3": DIR_ROOT + "data/Rho_act_Cell3.csv", + "4": DIR_ROOT + "data/Rho_act_Cell4.csv", + "6": DIR_ROOT + "data/Rho_act_Cell6.csv", + "7": DIR_ROOT + "data/Rho_act_Cell7.csv", + "8": DIR_ROOT + "data/Rho_act_Cell8.csv", + "9": DIR_ROOT + "data/Rho_act_Cell9.csv", + "10": DIR_ROOT + "data/Rho_act_Cell10.csv", + "11": DIR_ROOT + "data/Rho_act_Cell11.csv", + }, + "Y27632": { + "1": DIR_ROOT + "data/Y27632_Cell1.csv", + "2": DIR_ROOT + "data/Y27632_Cell2.csv", + "3": DIR_ROOT + "data/Y27632_Cell3.csv", + "4": DIR_ROOT + "data/Y27632_Cell4.csv", + "5": DIR_ROOT + "data/Y27632_Cell5.csv", + }, + "HISTOLOGIES": { + "J1": DIR_ROOT + "../HistologyProject/J1/svm/svm.csv", + "image001": DIR_ROOT + "../HistologyProject/image001/svm/svm.csv", + "H4": DIR_ROOT + "../HistologyProject/H4/svm/svm.csv", + "H9": DIR_ROOT + "../HistologyProject/H9/svm/svm.csv", + }, + "SPHEROID": { + "1p5mgml": DIR_ROOT + "data/spheroid26_011523_filtered.csv", + "3mgml": DIR_ROOT + "data/spheroid20_011923_filtered.csv", + "4mgml": DIR_ROOT + "data/spheroid22_012323_filtered.csv", + "6mgml": DIR_ROOT + "data/spheroid26_012423_filtered.csv", + }, + "SPHEROID_RAW": { + "REG": { + "1p5mgml": [ + DIR_ROOT + "data/" + "spheroid15_030322_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid16_030322_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid18_041022_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid26_011523_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid27_090323_RegularCollagen_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid28_090523_RegularCollagen_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid15_031022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid16_031522_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid18_083022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid19_083022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid20_011923_RegularCollagen_3mgml.csv", + ], + "4mgml": [ + DIR_ROOT + "data/" + "spheroid17_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid18_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid19_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid20_083022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid21_083022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid22_012323_RegularCollagen_4mgml.csv", + ], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid22_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid23_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid24_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid25_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid26_012423_RegularCollagen_6mgml.csv", + ], + }, + "PC": { + "1p5mgml": [ + DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid5_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid7_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid8_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid9_012723_PhotoCol_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid5_022022_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid6_022022_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid7_030322_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid8_030322_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv", + ], + "4mgml": [ + DIR_ROOT + "data/" + "spheroid2_022022_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid3_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid4_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid5_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid6_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid7_091222_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid8_091822_PhotoCol_4mgml.csv", + ], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid6_091222_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid8_022323_PhotoCol_6mgml.csv", + ], + }, + }, + "SPHEROID_RAW_RIC": { + "NAME": "(R)esolution (I)mage type (C)oncentration", + "QUARTER_HOUR": { + "REG": { + "1p5mgml": [ + DIR_ROOT + + "data/" + + "spheroid28_090523_RegularCollagen_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid20_011923_RegularCollagen_3mgml.csv", + ], + "4mgml": [ + DIR_ROOT + "data/" + "spheroid22_012323_RegularCollagen_4mgml.csv", + ], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid26_012423_RegularCollagen_6mgml.csv", + ], + }, + "PC": { + "1p5mgml": [ + DIR_ROOT + "data/" + "spheroid9_012723_PhotoCol_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv", + ], + "4mgml": [], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid8_022323_PhotoCol_6mgml.csv", + ], + }, + }, + "DAILY": { + "REG": { + "1p5mgml": [ + DIR_ROOT + + "data/" + + "spheroid15_030322_RegularCollagen_1p5mgml.csv", + DIR_ROOT + + "data/" + + "spheroid16_030322_RegularCollagen_1p5mgml.csv", + DIR_ROOT + + "data/" + + "spheroid17_041022_RegularCollagen_1p5mgml.csv", + DIR_ROOT + + "data/" + + "spheroid18_041022_RegularCollagen_1p5mgml.csv", + DIR_ROOT + + "data/" + + "spheroid26_011523_RegularCollagen_1p5mgml.csv", + DIR_ROOT + + "data/" + + "spheroid27_090323_RegularCollagen_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid15_031022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid16_031522_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid17_041022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid18_083022_RegularCollagen_3mgml.csv", + DIR_ROOT + "data/" + "spheroid19_083022_RegularCollagen_3mgml.csv", + ], + "4mgml": [ + DIR_ROOT + "data/" + "spheroid17_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid18_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid19_031022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid20_083022_RegularCollagen_4mgml.csv", + DIR_ROOT + "data/" + "spheroid21_083022_RegularCollagen_4mgml.csv", + ], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid22_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid23_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid24_031022_RegularCollagen_6mgml.csv", + DIR_ROOT + "data/" + "spheroid25_031022_RegularCollagen_6mgml.csv", + ], + }, + "PC": { + "1p5mgml": [ + DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid5_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid7_021922_PhotoCol_1p5mgml.csv", + DIR_ROOT + "data/" + "spheroid8_021922_PhotoCol_1p5mgml.csv", + ], + "3mgml": [ + DIR_ROOT + "data/" + "spheroid5_022022_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid6_022022_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid7_030322_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid8_030322_PhotoCol_3mgml.csv", + DIR_ROOT + "data/" + "spheroid12_060923_PhotoCol_3mgml.csv", + ], + "4mgml": [ + DIR_ROOT + "data/" + "spheroid2_022022_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid3_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid4_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid5_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid6_053122_PhotoCol_4mgml.csv", + DIR_ROOT + "data/" + "spheroid7_091222_PhotoCol_4mgml.csv", + ], + "6mgml": [ + DIR_ROOT + "data/" + "spheroid1_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid2_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid3_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid4_021922_PhotoCol_6mgml.csv", + DIR_ROOT + "data/" + "spheroid6_091222_PhotoCol_6mgml.csv", + ], + }, + }, + }, + "SPHEROID_PC": { + "1p5mgml": DIR_ROOT + "data/spheroid9_012723_pc_1p5.csv", + "3mgml": DIR_ROOT + "data/spheroid12_060923_pc_3.csv", + }, + "SPHEROID_MASKS": { + "1p5mgml": DIR_ROOT + "data/spheroid_1p5mgml_spheroid26_011523/images/", + "3mgml": DIR_ROOT + "data/spheroid_3mgml_spheroid20_011923/images/", + "4mgml": DIR_ROOT + "data/spheroid_4mgml_spheroid22_012323/images/", + "6mgml": DIR_ROOT + "data/spheroid_6mgml_spheroid26_012423/images/", + "3mgml_pc": DIR_ROOT + "data/spheroid_photocol_3mgml_spheroid12_060923/images/", + }, + "FIGURES": { + "2": { + "PHENOTYPES_SMOOTHED": DIR_ROOT + + "extra_data/PhenotypeGaussianSmoothed.npy", + }, + "3": { + "SAMPLED_DATASET": DIR_ROOT + "extra_data/Figure3.SampledDataset.npy", + "PAIRWISE_DISTANCES": DIR_ROOT + "extra_data/Figure3.PairwiseDistances.npy", + "PAIRWISE_DOT_PRODUCTS": DIR_ROOT + + "extra_data/Figure3.PairwiseDotProducts.npy", + "TRAJECTORIES": DIR_ROOT + "extra_data/Figure3.Trajectories.npy", + }, + }, + "PHENOTYPE_GRID": { + "IN": DIR_ROOT + "extra_data/PhenotypeGrid.npy", + "OUT": DIR_ROOT + "extra_data/PhenotypeGrid_out.npy", + }, + "PHENOTYPE_RGB": DIR_ROOT + "extra_data/PhenotypeColors.npy", + "SVM": { + "MODEL": DIR_ROOT + "other/svm/SVC_rbf_010820_16942_new.pkl", + "SCALER": DIR_ROOT + "other/svm/SVC_rbf_scaler_010820_16942_new.pkl", + }, + "NONPHYSICAL_MASK": DIR_ROOT + "extra_data/NonPhysicalMask.npy", +} diff --git a/code/sunlab/sunflow/__init__.py b/code/sunlab/sunflow/__init__.py new file mode 100644 index 0000000..6e0c959 --- /dev/null +++ b/code/sunlab/sunflow/__init__.py @@ -0,0 +1,6 @@ +from ..common import * + +from .models import * + +from .data import * +from .plotting import * diff --git a/code/sunlab/sunflow/data/__init__.py b/code/sunlab/sunflow/data/__init__.py new file mode 100644 index 0000000..b9a32c0 --- /dev/null +++ b/code/sunlab/sunflow/data/__init__.py @@ -0,0 +1 @@ +from .utilities import * diff --git a/code/sunlab/sunflow/data/utilities.py b/code/sunlab/sunflow/data/utilities.py new file mode 100644 index 0000000..dcdc36e --- /dev/null +++ b/code/sunlab/sunflow/data/utilities.py @@ -0,0 +1,53 @@ +from sunlab.common import ShapeDataset +from sunlab.common import MaxAbsScaler + + +def process_and_load_dataset( + dataset_file, model_folder, magnification=10, scaler=MaxAbsScaler +): + """# Load a dataset and process a models' Latent Space on the Dataset""" + from ..models import load_aae + from sunlab.common import import_full_dataset + + model = load_aae(model_folder, normalization_scaler=scaler) + dataset = import_full_dataset( + dataset_file, magnification=magnification, scaler=model.scaler + ) + latent = model.encoder(dataset.dataset).numpy() + assert len(latent.shape) == 2, "Only 1D Latent Vectors Supported" + for dim in range(latent.shape[1]): + dataset.dataframe[f"Latent-{dim}"] = latent[:, dim] + return dataset + + +def process_and_load_datasets( + dataset_file_list, model_folder, magnification=10, scaler=MaxAbsScaler +): + from pandas import concat + from ..models import load_aae + + dataframes = [] + datasets = [] + for dataset_file in dataset_file_list: + dataset = process_and_load_dataset( + dataset_file, model_folder, magnification, scaler + ) + model = load_aae(model_folder, normalization_scaler=scaler) + dataframe = dataset.dataframe + for label in ["ActinEdge", "Filopodia", "Bleb", "Lamellipodia"]: + if label in dataframe.columns: + dataframe[label.lower()] = dataframe[label] + if label.lower() not in dataframe.columns: + dataframe[label.lower()] = 0 + latent_columns = [f"Latent-{dim}" for dim in range(model.latent_size)] + datasets.append(dataset) + dataframes.append( + dataframe[ + dataset.data_columns + + dataset.label_columns + + latent_columns + + ["Frames", "CellNum"] + + ["actinedge", "filopodia", "bleb", "lamellipodia"] + ] + ) + return datasets, concat(dataframes) diff --git a/code/sunlab/sunflow/models/__init__.py b/code/sunlab/sunflow/models/__init__.py new file mode 100644 index 0000000..f111c0a --- /dev/null +++ b/code/sunlab/sunflow/models/__init__.py @@ -0,0 +1,7 @@ +from .autoencoder import Autoencoder +from .adversarial_autoencoder import AdversarialAutoencoder +from sunlab.common.data.dataset import Dataset +from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution +from sunlab.common.scaler.adversarial_scaler import AdversarialScaler +from .utilities import create_aae, create_aae_and_dataset +from .utilities import load_aae, load_aae_and_dataset diff --git a/code/sunlab/sunflow/models/adversarial_autoencoder.py b/code/sunlab/sunflow/models/adversarial_autoencoder.py new file mode 100644 index 0000000..4cbb2f8 --- /dev/null +++ b/code/sunlab/sunflow/models/adversarial_autoencoder.py @@ -0,0 +1,344 @@ +from sunlab.common.data.dataset import Dataset +from sunlab.common.scaler.adversarial_scaler import AdversarialScaler +from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution +from .encoder import Encoder +from .decoder import Decoder +from .discriminator import Discriminator +from .encoder_discriminator import EncoderDiscriminator +from .autoencoder import Autoencoder +from tensorflow.keras import optimizers, metrics, losses +import tensorflow as tf +from numpy import ones, zeros, float32, NaN + + +class AdversarialAutoencoder: + """# Adversarial Autoencoder + - distribution: The distribution used by the adversary to learn on""" + + def __init__( + self, + model_base_directory, + distribution: AdversarialDistribution or None = None, + scaler: AdversarialScaler or None = None, + ): + """# Adversarial Autoencoder Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded + - distribution: The distribution the adversary will use + - scaler: The scaling function the model will assume on the data""" + self.model_base_directory = model_base_directory + if distribution is not None: + self.distribution = distribution + else: + self.distribution = None + if scaler is not None: + self.scaler = scaler(self.model_base_directory) + else: + self.scaler = None + + def init( + self, + data=None, + data_size=13, + autoencoder_layer_size=16, + adversary_layer_size=8, + latent_size=2, + autoencoder_depth=2, + dropout=0.0, + use_leaky_relu=False, + **kwargs, + ): + """# Initialize AAE model parameters + - data_size: int + - autoencoder_layer_size: int + - adversary_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float + - use_leaky_relu: boolean""" + self.data_size = data_size + self.autoencoder_layer_size = autoencoder_layer_size + self.adversary_layer_size = adversary_layer_size + self.latent_size = latent_size + self.autoencoder_depth = autoencoder_depth + self.dropout = dropout + self.use_leaky_relu = use_leaky_relu + self.save_parameters() + self.encoder = Encoder(self.model_base_directory).init() + self.decoder = Decoder(self.model_base_directory).init() + self.autoencoder = Autoencoder(self.model_base_directory).init( + self.encoder, self.decoder + ) + self.discriminator = Discriminator(self.model_base_directory).init() + self.encoder_discriminator = EncoderDiscriminator( + self.model_base_directory + ).init(self.encoder, self.discriminator) + if self.distribution is not None: + self.distribution = self.distribution(self.latent_size) + if (data is not None) and (self.scaler is not None): + self.scaler = self.scaler.init(data) + self.init_optimizers_and_metrics(**kwargs) + return self + + def init_optimizers_and_metrics( + self, + optimizer=optimizers.Adam, + ae_metric=metrics.MeanAbsoluteError, + adv_metric=metrics.BinaryCrossentropy, + ae_lr=7e-4, + adv_lr=3e-4, + loss_fn=losses.BinaryCrossentropy, + **kwargs, + ): + """# Set the optimizer, loss function, and metrics""" + self.ae_optimizer = optimizer(learning_rate=ae_lr) + self.adv_optimizer = optimizer(learning_rate=adv_lr) + self.gan_optimizer = optimizer(learning_rate=adv_lr) + self.train_ae_metric = ae_metric() + self.val_ae_metric = ae_metric() + self.train_adv_metric = adv_metric() + self.val_adv_metric = adv_metric() + self.train_gan_metric = adv_metric() + self.val_gan_metric = adv_metric() + self.loss_fn = loss_fn() + + def load(self): + """# Load the models from their respective files""" + self.load_parameters() + self.encoder = Encoder(self.model_base_directory).load() + self.decoder = Decoder(self.model_base_directory).load() + self.autoencoder = Autoencoder(self.model_base_directory).load() + self.discriminator = Discriminator(self.model_base_directory).load() + self.encoder_discriminator = EncoderDiscriminator( + self.model_base_directory + ).load() + if self.scaler is not None: + self.scaler = self.scaler.load() + return self + + def save(self, overwrite=False): + """# Save each model in the AAE""" + self.encoder.save(overwrite=overwrite) + self.decoder.save(overwrite=overwrite) + self.autoencoder.save(overwrite=overwrite) + self.discriminator.save(overwrite=overwrite) + self.encoder_discriminator.save(overwrite=overwrite) + if self.scaler is not None: + self.scaler.save() + + def save_parameters(self): + """# Save the AAE parameters in a file""" + from pickle import dump + from os import makedirs + + makedirs(self.model_base_directory + "/portable/", exist_ok=True) + parameters = { + "data_size": self.data_size, + "autoencoder_layer_size": self.autoencoder_layer_size, + "adversary_layer_size": self.adversary_layer_size, + "latent_size": self.latent_size, + "autoencoder_depth": self.autoencoder_depth, + "dropout": self.dropout, + "use_leaky_relu": self.use_leaky_relu, + } + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "wb" + ) as phandle: + dump(parameters, phandle) + + def load_parameters(self): + """# Load the AAE parameters from a file""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.autoencoder_layer_size = parameters["autoencoder_layer_size"] + self.adversary_layer_size = parameters["adversary_layer_size"] + self.latent_size = parameters["latent_size"] + self.autoencoder_depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + return parameters + + def summary(self): + """# Summarize each model in the AAE""" + self.encoder.summary() + self.decoder.summary() + self.autoencoder.summary() + self.discriminator.summary() + self.encoder_discriminator.summary() + + @tf.function + def train_step(self, x, y): + """# Training Step + + 1. Train the Autoencoder + 2. (If distribution is given) Train the discriminator + 3. (If the distribution is given) Train the encoder_discriminator""" + # Autoencoder Training + with tf.GradientTape() as tape: + decoded_vector = self.autoencoder(x, training=True) + ae_loss_value = self.loss_fn(y, decoded_vector) + grads = tape.gradient(ae_loss_value, self.autoencoder.model.trainable_weights) + self.ae_optimizer.apply_gradients( + zip(grads, self.autoencoder.model.trainable_weights) + ) + self.train_ae_metric.update_state(y, decoded_vector) + if self.distribution is not None: + # Adversary Trainig + with tf.GradientTape() as tape: + latent_vector = self.encoder(x) + fakepred = self.distribution(x.shape[0]) + discbatch_x = tf.concat([latent_vector, fakepred], axis=0) + discbatch_y = tf.concat([zeros(x.shape[0]), ones(x.shape[0])], axis=0) + adversary_vector = self.discriminator(discbatch_x, training=True) + adv_loss_value = self.loss_fn(discbatch_y, adversary_vector) + grads = tape.gradient( + adv_loss_value, self.discriminator.model.trainable_weights + ) + self.adv_optimizer.apply_gradients( + zip(grads, self.discriminator.model.trainable_weights) + ) + self.train_adv_metric.update_state(discbatch_y, adversary_vector) + # Gan Training + with tf.GradientTape() as tape: + gan_vector = self.encoder_discriminator(x, training=True) + adv_vector = tf.convert_to_tensor(ones((x.shape[0], 1), dtype=float32)) + gan_loss_value = self.loss_fn(gan_vector, adv_vector) + grads = tape.gradient(gan_loss_value, self.encoder.model.trainable_weights) + self.gan_optimizer.apply_gradients( + zip(grads, self.encoder.model.trainable_weights) + ) + self.train_gan_metric.update_state(adv_vector, gan_vector) + return (ae_loss_value, adv_loss_value, gan_loss_value) + return (ae_loss_value, None, None) + + @tf.function + def test_step(self, x, y): + """# Test Step - On validation data + + 1. Evaluate the Autoencoder + 2. (If distribution is given) Evaluate the discriminator + 3. (If the distribution is given) Evaluate the encoder_discriminator""" + val_decoded_vector = self.autoencoder(x, training=False) + self.val_ae_metric.update_state(y, val_decoded_vector) + + if self.distribution is not None: + latent_vector = self.encoder(x) + fakepred = self.distribution(x.shape[0]) + discbatch_x = tf.concat([latent_vector, fakepred], axis=0) + discbatch_y = tf.concat([zeros(x.shape[0]), ones(x.shape[0])], axis=0) + adversary_vector = self.discriminator(discbatch_x, training=False) + self.val_adv_metric.update_state(discbatch_y, adversary_vector) + + gan_vector = self.encoder_discriminator(x, training=False) + self.val_gan_metric.update_state(ones(x.shape[0]), gan_vector) + + # Garbage Collect at the end of each epoch + def on_epoch_end(self, _epoch, logs=None): + """# Cleanup environment to prevent memory leaks each epoch""" + import gc + from tensorflow.keras import backend as k + + gc.collect() + k.clear_session() + + def train( + self, + dataset: Dataset, + epoch_count: int = 1, + output=False, + output_freq=1, + fmt="%i[%.3f]: %.2e %.2e %.2e %.2e %.2e %.2e", + ): + """# Train the model on a dataset + + - dataset: ataset = Dataset to train the model on, which as the + training and validation iterators set up + - epoch_count: int = The number of epochs to train + - output: boolean = Whether or not to output training information + - output_freq: int = The number of epochs between each output""" + from time import time + from numpy import array as narray + + def fmtter(x): + return x if x is not None else -1 + + epoch_data = [] + dataset.reset_iterators() + + self.test_step(dataset.dataset, dataset.dataset) + val_ae = self.val_ae_metric.result() + val_adv = self.val_adv_metric.result() + val_gan = self.val_gan_metric.result() + self.val_ae_metric.reset_states() + self.val_adv_metric.reset_states() + self.val_gan_metric.reset_states() + print( + fmt + % ( + 0, + NaN, + val_ae, + fmtter(val_adv), + fmtter(val_gan), + NaN, + NaN, + NaN, + ) + ) + for epoch in range(epoch_count): + start_time = time() + + for step, (x_batch_train, y_batch_train) in enumerate(dataset.training): + ae_lv, adv_lv, gan_lv = self.train_step(x_batch_train, x_batch_train) + + train_ae = self.train_ae_metric.result() + train_adv = self.train_adv_metric.result() + train_gan = self.train_gan_metric.result() + self.train_ae_metric.reset_states() + self.train_adv_metric.reset_states() + self.train_gan_metric.reset_states() + + for step, (x_batch_val, y_batch_val) in enumerate(dataset.validation): + self.test_step(x_batch_val, x_batch_val) + + val_ae = self.val_ae_metric.result() + val_adv = self.val_adv_metric.result() + val_gan = self.val_gan_metric.result() + self.val_ae_metric.reset_states() + self.val_adv_metric.reset_states() + self.val_gan_metric.reset_states() + + epoch_data.append( + ( + epoch, + train_ae, + val_ae, + fmtter(train_adv), + fmtter(val_adv), + fmtter(train_gan), + fmtter(val_gan), + ) + ) + if output and (epoch + 1) % output_freq == 0: + print( + fmt + % ( + epoch + 1, + time() - start_time, + train_ae, + fmtter(train_adv), + fmtter(train_gan), + val_ae, + fmtter(val_adv), + fmtter(val_gan), + ) + ) + self.on_epoch_end(epoch) + dataset.reset_iterators() + return narray(epoch_data) diff --git a/code/sunlab/sunflow/models/autoencoder.py b/code/sunlab/sunflow/models/autoencoder.py new file mode 100644 index 0000000..473d00d --- /dev/null +++ b/code/sunlab/sunflow/models/autoencoder.py @@ -0,0 +1,85 @@ +class Autoencoder: + """# Autoencoder Model + + Constructs an encoder-decoder model""" + + def __init__(self, model_base_directory): + """# Autoencoder Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded""" + self.model_base_directory = model_base_directory + + def init(self, encoder, decoder): + """# Initialize an Autoencoder + + - encoder: The encoder to use + - decoder: The decoder to use""" + from tensorflow import keras + + self.load_parameters() + self.model = keras.models.Sequential() + self.model.add(encoder.model) + self.model.add(decoder.model) + self.model._name = "Autoencoder" + return self + + def load(self): + """# Load an existing Autoencoder""" + from os import listdir + + if "autoencoder.keras" not in listdir(f"{self.model_base_directory}/portable/"): + return None + import tensorflow as tf + + self.model = tf.keras.models.load_model( + f"{self.model_base_directory}/portable/autoencoder.keras", compile=False + ) + self.model._name = "Autoencoder" + return self + + def save(self, overwrite=False): + """# Save the current Autoencoder + + - Overwrite: overwrite any existing autoencoder that has been saved""" + from os import listdir + + if overwrite: + self.model.save(f"{self.model_base_directory}/portable/autoencoder.keras") + return True + if "autoencoder.keras" in listdir(f"{self.model_base_directory}/portable/"): + return False + self.model.save(f"{self.model_base_directory}/portable/autoencoder.keras") + return True + + def load_parameters(self): + """# Load Autoencoder Model Parameters from File + The file needs to have the following parameters defined: + - data_size: int + - autoencoder_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float (set to 0. if you don't want a dropout layer) + - use_leaky_relu: boolean""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.layer_size = parameters["autoencoder_layer_size"] + self.latent_size = parameters["latent_size"] + self.depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + + def summary(self): + """# Returns the summary of the Autoencoder model""" + return self.model.summary() + + def __call__(self, *args, **kwargs): + """# Callable + + When calling the autoencoder class, return the model's output""" + return self.model(*args, **kwargs) diff --git a/code/sunlab/sunflow/models/decoder.py b/code/sunlab/sunflow/models/decoder.py new file mode 100644 index 0000000..40ea190 --- /dev/null +++ b/code/sunlab/sunflow/models/decoder.py @@ -0,0 +1,127 @@ +class Decoder: + """# Decoder Model + + Constructs a decoder model with a certain depth of intermediate layers of + fixed size""" + + def __init__(self, model_base_directory): + """# Decoder Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded""" + self.model_base_directory = model_base_directory + + def init(self): + """# Initialize a new Decoder + + Expects a model parameters file to already exist in the initialization + base directory when initializing the model""" + from tensorflow import keras + from tensorflow.keras import layers + + self.load_parameters() + assert self.depth >= 0, "Depth must be non-negative" + self.model = keras.models.Sequential() + if self.depth == 0: + self.model.add( + layers.Dense( + self.data_size, + input_shape=(self.latent_size,), + activation=None, + name="decoder_latent_vector", + ) + ) + else: + self.model.add( + layers.Dense( + self.layer_size, + input_shape=(self.latent_size,), + activation=None, + name="decoder_dense_1", + ) + ) + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + for _d in range(1, self.depth): + self.model.add( + layers.Dense( + self.layer_size, activation=None, name=f"decoder_dense_{_d+1}" + ) + ) + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + self.model.add( + layers.Dense( + self.data_size, activation=None, name="decoder_output_vector" + ) + ) + self.model._name = "Decoder" + return self + + def load(self): + """# Load an existing Decoder""" + from os import listdir + + if "decoder.keras" not in listdir(f"{self.model_base_directory}/portable/"): + return None + import tensorflow as tf + + self.model = tf.keras.models.load_model( + f"{self.model_base_directory}/portable/decoder.keras", compile=False + ) + self.model._name = "Decoder" + return self + + def save(self, overwrite=False): + """# Save the current Decoder + + - Overwrite: overwrite any existing decoder that has been saved""" + from os import listdir + + if overwrite: + self.model.save(f"{self.model_base_directory}/portable/decoder.keras") + return True + if "decoder.keras" in listdir(f"{self.model_base_directory}/portable/"): + return False + self.model.save(f"{self.model_base_directory}/portable/decoder.keras") + return True + + def load_parameters(self): + """# Load Decoder Model Parameters from File + The file needs to have the following parameters defined: + - data_size: int + - autoencoder_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float (set to 0. if you don't want a dropout layer) + - use_leaky_relu: boolean""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.layer_size = parameters["autoencoder_layer_size"] + self.latent_size = parameters["latent_size"] + self.depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + + def summary(self): + """# Returns the summary of the Decoder model""" + return self.model.summary() + + def __call__(self, *args, **kwargs): + """# Callable + + When calling the decoder class, return the model's output""" + return self.model(*args, **kwargs) diff --git a/code/sunlab/sunflow/models/discriminator.py b/code/sunlab/sunflow/models/discriminator.py new file mode 100644 index 0000000..38bed56 --- /dev/null +++ b/code/sunlab/sunflow/models/discriminator.py @@ -0,0 +1,132 @@ +class Discriminator: + """# Discriminator Model + + Constructs a discriminator model with a certain depth of intermediate + layers of fixed size""" + + def __init__(self, model_base_directory): + """# Discriminator Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded""" + self.model_base_directory = model_base_directory + + def init(self): + """# Initialize a new Discriminator + + Expects a model parameters file to already exist in the initialization + base directory when initializing the model""" + from tensorflow import keras + from tensorflow.keras import layers + + self.load_parameters() + assert self.depth >= 0, "Depth must be non-negative" + self.model = keras.models.Sequential() + if self.depth == 0: + self.model.add( + layers.Dense( + 1, + input_shape=(self.latent_size,), + activation=None, + name="discriminator_output_vector", + ) + ) + else: + self.model.add( + layers.Dense( + self.layer_size, + input_shape=(self.latent_size,), + activation=None, + name="discriminator_dense_1", + ) + ) + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + for _d in range(1, self.depth): + self.model.add( + layers.Dense( + self.layer_size, + activation=None, + name=f"discriminator_dense_{_d+1}", + ) + ) + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + self.model.add( + layers.Dense( + 1, activation="sigmoid", name="discriminator_output_vector" + ) + ) + self.model._name = "Discriminator" + return self + + def load(self): + """# Load an existing Discriminator""" + from os import listdir + + if "discriminator.keras" not in listdir( + f"{self.model_base_directory}/portable/" + ): + return None + import tensorflow as tf + + self.model = tf.keras.models.load_model( + f"{self.model_base_directory}/portable/discriminator.keras", compile=False + ) + self.model._name = "Discriminator" + return self + + def save(self, overwrite=False): + """# Save the current Discriminator + + - Overwrite: overwrite any existing discriminator that has been + saved""" + from os import listdir + + if overwrite: + self.model.save(f"{self.model_base_directory}/portable/discriminator.keras") + return True + if "discriminator.keras" in listdir(f"{self.model_base_directory}/portable/"): + return False + self.model.save(f"{self.model_base_directory}/portable/discriminator.keras") + return True + + def load_parameters(self): + """# Load Discriminator Model Parameters from File + The file needs to have the following parameters defined: + - data_size: int + - adversary_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float (set to 0. if you don't want a dropout layer) + - use_leaky_relu: boolean""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.layer_size = parameters["adversary_layer_size"] + self.latent_size = parameters["latent_size"] + self.depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + + def summary(self): + """# Returns the summary of the Discriminator model""" + return self.model.summary() + + def __call__(self, *args, **kwargs): + """# Callable + + When calling the discriminator class, return the model's output""" + return self.model(*args, **kwargs) diff --git a/code/sunlab/sunflow/models/encoder.py b/code/sunlab/sunflow/models/encoder.py new file mode 100644 index 0000000..22d1a9a --- /dev/null +++ b/code/sunlab/sunflow/models/encoder.py @@ -0,0 +1,140 @@ +class Encoder: + """# Encoder Model + + Constructs an encoder model with a certain depth of intermediate layers of + fixed size""" + + def __init__(self, model_base_directory): + """# Encoder Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded""" + self.model_base_directory = model_base_directory + + def init(self): + """# Initialize a new Encoder + + Expects a model parameters file to already exist in the initialization + base directory when initializing the model""" + from tensorflow import keras + from tensorflow.keras import layers + + # Load in the model parameters + self.load_parameters() + assert self.depth >= 0, "Depth must be non-negative" + + # Create the model + self.model = keras.models.Sequential() + # At zero depth, connect input and output layer directly + if self.depth == 0: + self.model.add( + layers.Dense( + self.latent_size, + input_shape=(self.data_size,), + activation=None, + name="encoder_latent_vector", + ) + ) + # Otherwise, add fixed-sized layers between them + else: + self.model.add( + layers.Dense( + self.layer_size, + input_shape=(self.data_size,), + activation=None, + name="encoder_dense_1", + ) + ) + # Use LeakyReLU if specified + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + # Include a droput layer if specified + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + for _d in range(1, self.depth): + self.model.add( + layers.Dense( + self.layer_size, activation=None, name=f"encoder_dense_{_d+1}" + ) + ) + # Use LeakyReLU if specified + if self.use_leaky_relu: + self.model.add(layers.LeakyReLU()) + else: + self.model.add(layers.ReLU()) + # Include a droput layer if specified + if self.dropout > 0.0: + self.model.add(layers.Dropout(self.dropout)) + self.model.add( + layers.Dense( + self.latent_size, activation=None, name="encoder_latent_vector" + ) + ) + self.model._name = "Encoder" + return self + + def load(self): + """# Load an existing Encoder""" + from os import listdir + + # If the encoder is not found, return None + if "encoder.keras" not in listdir(f"{self.model_base_directory}/portable/"): + return None + # Otherwise, load the encoder + # compile=False suppresses warnings about training + # If you want to train it, you will need to recompile it + import tensorflow as tf + + self.model = tf.keras.models.load_model( + f"{self.model_base_directory}/portable/encoder.keras", compile=False + ) + self.model._name = "Encoder" + return self + + def save(self, overwrite=False): + """# Save the current Encoder + + - Overwrite: overwrite any existing encoder that has been saved""" + from os import listdir + + if overwrite: + self.model.save(f"{self.model_base_directory}/portable/encoder.keras") + return True + if "encoder.keras" in listdir(f"{self.model_base_directory}/portable/"): + return False + self.model.save(f"{self.model_base_directory}/portable/encoder.keras") + return True + + def load_parameters(self): + """# Load Encoder Model Parameters from File + The file needs to have the following parameters defined: + - data_size: int + - autoencoder_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float (set to 0. if you don't want a dropout layer) + - use_leaky_relu: boolean""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.layer_size = parameters["autoencoder_layer_size"] + self.latent_size = parameters["latent_size"] + self.depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + + def summary(self): + """# Returns the summary of the Encoder model""" + return self.model.summary() + + def __call__(self, *args, **kwargs): + """# Callable + + When calling the encoder class, return the model's output""" + return self.model(*args, **kwargs) diff --git a/code/sunlab/sunflow/models/encoder_discriminator.py b/code/sunlab/sunflow/models/encoder_discriminator.py new file mode 100644 index 0000000..5efb6af --- /dev/null +++ b/code/sunlab/sunflow/models/encoder_discriminator.py @@ -0,0 +1,96 @@ +class EncoderDiscriminator: + """# EncoderDiscriminator Model + + Constructs an encoder-discriminator model""" + + def __init__(self, model_base_directory): + """# EncoderDiscriminator Model Initialization + + - model_base_directory: The base folder directory where the model will + be saved/ loaded""" + self.model_base_directory = model_base_directory + + def init(self, encoder, discriminator): + """# Initialize a EncoderDiscriminator + + - encoder: The encoder to use + - discriminator: The discriminator to use""" + from tensorflow import keras + + self.load_parameters() + self.model = keras.models.Sequential() + self.model.add(encoder.model) + self.model.add(discriminator.model) + self.model._name = "EncoderDiscriminator" + return self + + def load(self): + """# Load an existing EncoderDiscriminator""" + from os import listdir + + if "encoder_discriminator.keras" not in listdir( + f"{self.model_base_directory}/portable/" + ): + return None + import tensorflow as tf + + self.model = tf.keras.models.load_model( + f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras", + compile=False, + ) + self.model._name = "EncoderDiscriminator" + return self + + def save(self, overwrite=False): + """# Save the current EncoderDiscriminator + + - Overwrite: overwrite any existing encoder_discriminator that has been + saved""" + from os import listdir + + if overwrite: + self.model.save( + f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras" + ) + return True + if "encoder_discriminator.keras" in listdir( + f"{self.model_base_directory}/portable/" + ): + return False + self.model.save( + f"{self.model_base_directory}/portable/encoder_discriminator" + ".keras" + ) + return True + + def load_parameters(self): + """# Load EncoderDiscriminator Model Parameters from File + The file needs to have the following parameters defined: + - data_size: int + - autoencoder_layer_size: int + - latent_size: int + - autoencoder_depth: int + - dropout: float (set to 0. if you don't want a dropout layer) + - use_leaky_relu: boolean""" + from pickle import load + + with open( + f"{self.model_base_directory}/portable/model_parameters.pkl", "rb" + ) as phandle: + parameters = load(phandle) + self.data_size = parameters["data_size"] + self.layer_size = parameters["autoencoder_layer_size"] + self.latent_size = parameters["latent_size"] + self.depth = parameters["autoencoder_depth"] + self.dropout = parameters["dropout"] + self.use_leaky_relu = parameters["use_leaky_relu"] + + def summary(self): + """# Returns the summary of the EncoderDiscriminator model""" + return self.model.summary() + + def __call__(self, *args, **kwargs): + """# Callable + + When calling the encoder_discriminator class, return the model's + output""" + return self.model(*args, **kwargs) diff --git a/code/sunlab/sunflow/models/utilities.py b/code/sunlab/sunflow/models/utilities.py new file mode 100644 index 0000000..ab0c2a6 --- /dev/null +++ b/code/sunlab/sunflow/models/utilities.py @@ -0,0 +1,93 @@ +# Higher-level functions + +from sunlab.common.distribution.adversarial_distribution import AdversarialDistribution +from sunlab.common.scaler.adversarial_scaler import AdversarialScaler +from sunlab.common.data.utilities import import_dataset +from .adversarial_autoencoder import AdversarialAutoencoder + + +def create_aae( + dataset_file_name, + model_directory, + normalization_scaler: AdversarialScaler, + distribution: AdversarialDistribution or None, + magnification=10, + latent_size=2, +): + """# Create Adversarial Autoencoder + + - dataset_file_name: str = Path to the dataset file + - model_directory: str = Path to save the model in + - normalization_scaler: AdversarialScaler = Data normalization Scaler Model + - distribution: AdversarialDistribution = Distribution for the Adversary + - magnification: int = The Magnification of the Dataset""" + dataset = import_dataset(dataset_file_name, magnification) + model = AdversarialAutoencoder( + model_directory, distribution, normalization_scaler + ).init(dataset.dataset, latent_size=latent_size) + return model + + +def create_aae_and_dataset( + dataset_file_name, + model_directory, + normalization_scaler: AdversarialScaler, + distribution: AdversarialDistribution or None, + magnification=10, + batch_size=1024, + shuffle=True, + val_split=0.1, + latent_size=2, +): + """# Create Adversarial Autoencoder and Load the Dataset + + - dataset_file_name: str = Path to the dataset file + - model_directory: str = Path to save the model in + - normalization_scaler: AdversarialScaler = Data normalization Scaler Model + - distribution: AdversarialDistribution = Distribution for the Adversary + - magnification: int = The Magnification of the Dataset""" + model = create_aae( + dataset_file_name, + model_directory, + normalization_scaler, + distribution, + magnification=magnification, + latent_size=latent_size, + ) + dataset = import_dataset( + dataset_file_name, + magnification, + batch_size=batch_size, + shuffle=shuffle, + val_split=val_split, + scaler=model.scaler, + ) + return model, dataset + + +def load_aae(model_directory, normalization_scaler: AdversarialScaler): + """# Load Adversarial Autoencoder + + - model_directory: str = Path to save the model in + - normalization_scaler: AdversarialScaler = Data normalization Scaler Model + """ + return AdversarialAutoencoder(model_directory, None, normalization_scaler).load() + + +def load_aae_and_dataset( + dataset_file_name, + model_directory, + normalization_scaler: AdversarialScaler, + magnification=10, +): + """# Load Adversarial Autoencoder + + - dataset_file_name: str = Path to the dataset file + - model_directory: str = Path to save the model in + - normalization_scaler: AdversarialScaler = Data normalization Scaler Model + - magnification: int = The Magnification of the Dataset""" + model = load_aae(model_directory, normalization_scaler) + dataset = import_dataset( + dataset_file_name, magnification=magnification, scaler=model.scaler + ) + return model, dataset diff --git a/code/sunlab/sunflow/plotting/__init__.py b/code/sunlab/sunflow/plotting/__init__.py new file mode 100644 index 0000000..36e00e6 --- /dev/null +++ b/code/sunlab/sunflow/plotting/__init__.py @@ -0,0 +1 @@ +from .model_extensions import * diff --git a/code/sunlab/sunflow/plotting/model_extensions.py b/code/sunlab/sunflow/plotting/model_extensions.py new file mode 100644 index 0000000..087f8d3 --- /dev/null +++ b/code/sunlab/sunflow/plotting/model_extensions.py @@ -0,0 +1,289 @@ +from matplotlib import pyplot as plt +from sunlab.common.data.shape_dataset import ShapeDataset +from sunlab.globals import DIR_ROOT + + +def get_nonphysical_masks( + model, + xrange=[-1, 1], + yrange=[-1, 1], + bins=[500, 500], + equivdiameter_threshold=10, + solidity_threshold=0.1, + area_threshold=100, + perimeter_threshold=10, + area_max_threshold=7000, + perimeter_max_threshold=350, + area_min_threshold=100, + perimeter_min_threshold=5, + consistency_check=False, +): + """# Generate the Nonphysical Masks in Grid for Model + + Hard Constraints: + - Non-negative values + - Ratios no greater than 1 + + Soft Constraints: + - Area/ Perimeter Thresholds""" + import numpy as np + + x = np.linspace(xrange[0], xrange[1], bins[0]) + y = np.linspace(yrange[0], yrange[1], bins[1]) + X, Y = np.meshgrid(x, y) + X, Y = X.reshape((bins[0], bins[1], 1)), Y.reshape((bins[0], bins[1], 1)) + XY = np.concatenate([X.reshape((-1, 1)), Y.reshape((-1, 1))], axis=-1) + dec_v = model.decoder(XY).numpy().reshape((bins[0] * bins[1], 13)) + lXY = model.scaler.scaler.inverse_transform(dec_v).reshape((bins[0], bins[1], 13)) + # Hard Limits + non_negative_mask = np.all(lXY > 0, axis=-1) + solidity_mask = np.abs(lXY[:, :, 6]) <= 1 + extent_upper_bound_mask = lXY[:, :, 7] <= 1 + # Soft Extremas + area_max_mask = lXY[:, :, 4] < area_max_threshold + perimeter_max_mask = lXY[:, :, 9] < perimeter_max_threshold + area_min_mask = lXY[:, :, 4] > area_min_threshold + perimeter_min_mask = lXY[:, :, 9] > perimeter_min_threshold + # Self-Consistency + man_solidity_mask = np.abs(lXY[:, :, 0] / lXY[:, :, 4]) <= 1 + equivalent_diameter_mask = ( + np.abs(lXY[:, :, 5] - np.sqrt(4 * np.abs(lXY[:, :, 0]) / np.pi)) + < equivdiameter_threshold + ) + convex_area_mask = lXY[:, :, 0] < lXY[:, :, 4] + area_threshold + convex_perimeter_mask = lXY[:, :, 9] < lXY[:, :, 8] + perimeter_threshold + mask_info = { + "non-negative": non_negative_mask, + "solidity": solidity_mask, + "extent-max": extent_upper_bound_mask, + # + "area-max": area_max_mask, + "perimeter-max": perimeter_max_mask, + "area-min": area_min_mask, + "perimeter-min": perimeter_min_mask, + # + "computed-solidity": man_solidity_mask, + "equivalent-diameter": equivalent_diameter_mask, + "convex-area": convex_area_mask, + "convex-perimeter": convex_perimeter_mask, + } + if not consistency_check: + mask_info = { + "non-negative": non_negative_mask, + "solidity": solidity_mask, + "extent-max": extent_upper_bound_mask, + # + "area-max": area_max_mask, + "perimeter-max": perimeter_max_mask, + "area-min": area_min_mask, + "perimeter-min": perimeter_min_mask, + } + mask_list = [mask_info[key] for key in mask_info.keys()] + return np.all(mask_list, axis=0), X, Y, mask_info + + +def excavate(input_2d_array): + """# Return Boundaries for Masked Array + + Use X, Y directions only""" + from copy import deepcopy as dc + from numpy import nan_to_num, zeros_like, abs + + data_2d_array = dc(input_2d_array) + data_2d_array = nan_to_num(data_2d_array, nan=20) + # X-Gradient + x_grad = zeros_like(data_2d_array) + x_grad[:-1, :] = data_2d_array[1:, :] - data_2d_array[:-1, :] + x_grad[(abs(x_grad) > 10)] = 10 + x_grad[(abs(x_grad) < 10) & (abs(x_grad) > 0)] = 1 + x_grad[x_grad == 1] = 0.5 + x_grad[x_grad > 1] = 1 + # Y-Gradient + y_grad = zeros_like(data_2d_array) + y_grad[:, :-1] = data_2d_array[:, 1:] - data_2d_array[:, :-1] + y_grad[(abs(y_grad) > 10)] = 10 + y_grad[(abs(y_grad) < 10) & (abs(y_grad) > 0)] = 1 + y_grad[y_grad == 1] = 0.5 + y_grad[y_grad > 1] = 1 + return x_grad, y_grad + + +def excavate_extra(input_2d_array, N=1): + """# Return Boundaries for Masked Array + + Use all 8 directions""" + from copy import deepcopy as dc + from numpy import nan_to_num, zeros_like, abs + + data_2d_array = dc(input_2d_array) + data_2d_array = nan_to_num(data_2d_array, nan=20) + # X-Gradient + x_grad = zeros_like(data_2d_array) + x_grad[:-N, :] = data_2d_array[N:, :] - data_2d_array[:-N, :] + x_grad[(abs(x_grad) > 10)] = 10 + x_grad[(abs(x_grad) < 10) & (abs(x_grad) > 0)] = 1 + x_grad[x_grad == 1] = 0.5 + x_grad[x_grad > 1] = 1 + # Y-Gradient + y_grad = zeros_like(data_2d_array) + y_grad[:, :-N] = data_2d_array[:, N:] - data_2d_array[:, :-N] + y_grad[(abs(y_grad) > 10)] = 10 + y_grad[(abs(y_grad) < 10) & (abs(y_grad) > 0)] = 1 + y_grad[y_grad == 1] = 0.5 + y_grad[y_grad > 1] = 1 + # XY-Gradient + xy_grad = zeros_like(data_2d_array) + xy_grad[:-N, :-N] = data_2d_array[N:, N:] - data_2d_array[:-N, :-N] + xy_grad[(abs(xy_grad) > 10)] = 10 + xy_grad[(abs(xy_grad) < 10) & (abs(xy_grad) > 0)] = 1 + xy_grad[xy_grad == 1] = 0.5 + xy_grad[xy_grad > 1] = 1 + # X(-Y)-Gradient + yx_grad = zeros_like(data_2d_array) + yx_grad[:-N, :-N] = data_2d_array[N:, :-N] - data_2d_array[:-N, N:] + yx_grad[(abs(yx_grad) > 10)] = 10 + yx_grad[(abs(yx_grad) < 10) & (abs(yx_grad) > 0)] = 1 + yx_grad[yx_grad == 1] = 0.5 + yx_grad[yx_grad > 1] = 1 + xyn_grad = dc(yx_grad) + # (-X)Y-Gradient + xny_grad = zeros_like(data_2d_array) + xny_grad[:-N, :-N] = data_2d_array[:-N, N:] - data_2d_array[N:, :-N] + xny_grad[(abs(xy_grad) > 10)] = 10 + xny_grad[(abs(xy_grad) < 10) & (abs(xy_grad) > 0)] = 1 + xny_grad[xy_grad == 1] = 0.5 + xny_grad[xy_grad > 1] = 1 + # (-X)(-Y)-Gradient + xnyn_grad = zeros_like(data_2d_array) + xnyn_grad[:-N, :-N] = data_2d_array[:-N, :-N] - data_2d_array[N:, N:] + xnyn_grad[(abs(yx_grad) > 10)] = 10 + xnyn_grad[(abs(yx_grad) < 10) & (abs(yx_grad) > 0)] = 1 + xnyn_grad[yx_grad == 1] = 0.5 + xnyn_grad[yx_grad > 1] = 1 + return x_grad, y_grad, xy_grad, xyn_grad, xny_grad, xnyn_grad + + +def excavate_outline(arr, thickness=1): + """# Generate Transparency Mask with NaNs""" + from numpy import sum, abs, NaN + + outline = sum(abs(excavate_extra(arr, thickness)), axis=0) + outline[outline == 0] = NaN + outline[outline > 0] = 0 + return outline + + +def get_boundary_outline( + aae_model_object, + pixel_classification_file=None, + include_transition_regions=False, + border_thickness=3, + bin_count=800, + xrange=[-6.5, 6.5], + yrange=[-4.5, 4.5], + threshold=0.75, +): + """# Get Boundary Outlines""" + from copy import deepcopy + import numpy as np + + if pixel_classification_file is None: + pixel_classification_file = "../../extra_data/PhenotypePixels_65x45_800.npy" + base_classification = np.loadtxt(pixel_classification_file) + base_classification = base_classification.reshape((bin_count, bin_count, 4)) + max_classification_probability = np.zeros((bin_count, bin_count, 1)) + max_classification_probability[:, :, 0] = ( + np.max(base_classification, axis=-1) < threshold + ) + classes_with_include_transition_regions = np.concatenate( + [base_classification, max_classification_probability], axis=-1 + ) + if include_transition_regions: + phenotype_probabilities = deepcopy( + np.argsort(classes_with_include_transition_regions[:, :, :], axis=-1)[ + :, :, -1 + ] + ).astype(np.float32) + else: + phenotype_probabilities = deepcopy( + np.argsort(classes_with_include_transition_regions[:, :, :-1], axis=-1)[ + :, :, -1 + ] + ).astype(np.float32) + nonphysical_mask, _, _, _ = get_nonphysical_masks( + aae_model_object, xrange=xrange, yrange=yrange, bins=[bin_count, bin_count] + ) + nonphysical_mask = nonphysical_mask.astype(np.float32) + nonphysical_mask[nonphysical_mask == 0] = np.NaN + nonphysical_mask[nonphysical_mask == 1] = 0 + nonphysical_mask = nonphysical_mask.T + phenotype_regions = deepcopy(phenotype_probabilities.T + nonphysical_mask.T) + outline = excavate_outline(phenotype_regions, border_thickness) + return outline + + +def apply_boundary( + model_loc=DIR_ROOT + "models/current_model/", + border_thickness=3, + include_transition_regions=False, + threshold=0.7, + alpha=1, + _plt=None, +): + """# Apply Boundary to Plot + + Use Pregenerated Boundary by Default for Speed""" + from ..models import load_aae + from sunlab.common.scaler import MaxAbsScaler + import numpy as np + + if _plt is None: + _plt = plt + if (model_loc == model_loc) and (border_thickness == 3) and (threshold == 0.7): + XYM = np.load(DIR_ROOT + "extra_data/OutlineXYM.npy") + XY = XYM[:2, :, :] + if include_transition_regions: + outline = XYM[3, :, :] + else: + outline = XYM[2, :, :] + _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha) + return + model = load_aae(model_loc, MaxAbsScaler) + bin_count = 800 + xrange = [-6.5, 6.5] + yrange = [-4.5, 4.5] + rng = [xrange, yrange] + X = np.linspace(rng[0][0], rng[0][1], bin_count) + Y = np.linspace(rng[1][0], rng[1][1], bin_count) + XY = np.array(np.meshgrid(X, Y)) + kwparams = { + "bin_count": bin_count, + "xrange": xrange, + "yrange": yrange, + } + + include_tregions = include_transition_regions + outline = get_boundary_outline( + model, + border_thickness=border_thickness, + include_transition_regions=include_tregions, + threshold=threshold, + **kwparams + ) + _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha) + + +plt.apply_boundary = apply_boundary + + +def plot_shape_dataset(self, model, *args, **kwargs): + """# Plot Shape Dataset""" + if self.labels is None: + plt.scatter2d(model.encoder(self.dataset), *args, **kwargs) + else: + plt.scatter2d(model.encoder(self.dataset), self.labels, *args, **kwargs) + + +ShapeDataset.plot = lambda model, *args, **kwargs: plot_shape_dataset( + model, *args, **kwargs +) diff --git a/code/sunlab/suntorch/__init__.py b/code/sunlab/suntorch/__init__.py new file mode 100644 index 0000000..d394e27 --- /dev/null +++ b/code/sunlab/suntorch/__init__.py @@ -0,0 +1,3 @@ +from ..common import * +from .models import * +from .plotting import * diff --git a/code/sunlab/suntorch/data/__init__.py b/code/sunlab/suntorch/data/__init__.py new file mode 100644 index 0000000..b9a32c0 --- /dev/null +++ b/code/sunlab/suntorch/data/__init__.py @@ -0,0 +1 @@ +from .utilities import * diff --git a/code/sunlab/suntorch/data/utilities.py b/code/sunlab/suntorch/data/utilities.py new file mode 100644 index 0000000..05318f5 --- /dev/null +++ b/code/sunlab/suntorch/data/utilities.py @@ -0,0 +1,55 @@ +from sunlab.common import ShapeDataset +from sunlab.common import MaxAbsScaler + + +def process_and_load_dataset( + dataset_file, model_folder, magnification=10, scaler=MaxAbsScaler +): + """# Load a dataset and process a models' Latent Space on the Dataset""" + raise NotImplemented("Still Implementing for PyTorch") + from ..models import load_aae + from sunlab.common import import_full_dataset + + model = load_aae(model_folder, normalization_scaler=scaler) + dataset = import_full_dataset( + dataset_file, magnification=magnification, scaler=model.scaler + ) + latent = model.encoder(dataset.dataset).numpy() + assert len(latent.shape) == 2, "Only 1D Latent Vectors Supported" + for dim in range(latent.shape[1]): + dataset.dataframe[f"Latent-{dim}"] = latent[:, dim] + return dataset + + +def process_and_load_datasets( + dataset_file_list, model_folder, magnification=10, scaler=MaxAbsScaler +): + from pandas import concat + from ..models import load_aae + + raise NotImplemented("Still Implementing for PyTorch") + dataframes = [] + datasets = [] + for dataset_file in dataset_file_list: + dataset = process_and_load_dataset( + dataset_file, model_folder, magnification, scaler + ) + model = load_aae(model_folder, normalization_scaler=scaler) + dataframe = dataset.dataframe + for label in ["ActinEdge", "Filopodia", "Bleb", "Lamellipodia"]: + if label in dataframe.columns: + dataframe[label.lower()] = dataframe[label] + if label.lower() not in dataframe.columns: + dataframe[label.lower()] = 0 + latent_columns = [f"Latent-{dim}" for dim in range(model.latent_size)] + datasets.append(dataset) + dataframes.append( + dataframe[ + dataset.data_columns + + dataset.label_columns + + latent_columns + + ["Frames", "CellNum"] + + ["actinedge", "filopodia", "bleb", "lamellipodia"] + ] + ) + return datasets, concat(dataframes) diff --git a/code/sunlab/suntorch/models/__init__.py b/code/sunlab/suntorch/models/__init__.py new file mode 100644 index 0000000..03d6a45 --- /dev/null +++ b/code/sunlab/suntorch/models/__init__.py @@ -0,0 +1,3 @@ +from .adversarial_autoencoder import AdversarialAutoencoder +from .autoencoder import Autoencoder +from .variational.autoencoder import VariationalAutoencoder diff --git a/code/sunlab/suntorch/models/adversarial_autoencoder.py b/code/sunlab/suntorch/models/adversarial_autoencoder.py new file mode 100644 index 0000000..e3e8a06 --- /dev/null +++ b/code/sunlab/suntorch/models/adversarial_autoencoder.py @@ -0,0 +1,165 @@ +import torch +import torch.nn.functional as F +from torch.autograd import Variable + +from .encoder import Encoder +from .decoder import Decoder +from .discriminator import Discriminator +from .common import * + + +def dummy_distribution(*args): + raise NotImplementedError("Give a distribution") + + +class AdversarialAutoencoder: + """# Adversarial Autoencoder Model""" + + def __init__( + self, + data_dim, + latent_dim, + enc_dec_size, + disc_size, + negative_slope=0.3, + dropout=0.0, + distribution=dummy_distribution, + ): + self.encoder = Encoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.decoder = Decoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.discriminator = Discriminator( + disc_size, latent_dim, negative_slope=negative_slope, dropout=dropout + ) + self.data_dim = data_dim + self.latent_dim = latent_dim + self.p = dropout + self.negative_slope = negative_slope + self.distribution = distribution + return + + def parameters(self): + return ( + *self.encoder.parameters(), + *self.decoder.parameters(), + *self.discriminator.parameters(), + ) + + def train(self): + self.encoder.train(True) + self.decoder.train(True) + self.discriminator.train(True) + return self + + def eval(self): + self.encoder.train(False) + self.decoder.train(False) + self.discriminator.train(False) + return self + + def encode(self, data): + return self.encoder(data) + + def decode(self, data): + return self.decoder(data) + + def __call__(self, data): + return self.decode(self.encode(data)) + + def save(self, base="./"): + torch.save(self.encoder.state_dict(), base + "weights_encoder.pt") + torch.save(self.decoder.state_dict(), base + "weights_decoder.pt") + torch.save(self.discriminator.state_dict(), base + "weights_discriminator.pt") + return self + + def load(self, base="./"): + self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt")) + self.encoder.eval() + self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt")) + self.decoder.eval() + self.discriminator.load_state_dict( + torch.load(base + "weights_discriminator.pt") + ) + self.discriminator.eval() + return self + + def to(self, device): + self.encoder.to(device) + self.decoder.to(device) + self.discriminator.to(device) + return self + + def cuda(self): + if torch.cuda.is_available(): + self.encoder = self.encoder.cuda() + self.decoder = self.decoder.cuda() + self.discriminator = self.discriminator.cuda() + return self + + def cpu(self): + self.encoder = self.encoder.cpu() + self.decoder = self.decoder.cpu() + self.discriminator = self.discriminator.cpu() + return self + + def init_optimizers(self, recon_lr=1e-4, adv_lr=5e-5): + self.optim_E_gen = torch.optim.Adam(self.encoder.parameters(), lr=adv_lr) + self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr) + self.optim_D_dec = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr) + self.optim_D_dis = torch.optim.Adam(self.discriminator.parameters(), lr=adv_lr) + return self + + def init_losses(self, recon_loss_fn=F.binary_cross_entropy): + self.recon_loss_fn = recon_loss_fn + return self + + def train_step(self, raw_data, scale=1.0): + data = to_var(raw_data.view(raw_data.size(0), -1)) + + self.encoder.zero_grad() + self.decoder.zero_grad() + self.discriminator.zero_grad() + + z = self.encoder(data) + X = self.decoder(z) + self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS) + self.recon_loss.backward() + self.optim_E_enc.step() + self.optim_D_dec.step() + + self.encoder.eval() + z_gaussian = to_var(self.distribution(data.size(0), self.latent_dim) * scale) + z_gaussian_fake = self.encoder(data) + y_gaussian = self.discriminator(z_gaussian) + y_gaussian_fake = self.discriminator(z_gaussian_fake) + self.D_loss = -torch.mean( + torch.log(y_gaussian + EPS) + torch.log(1 - y_gaussian_fake + EPS) + ) + self.D_loss.backward() + self.optim_D_dis.step() + + self.encoder.train() + z_gaussian = self.encoder(data) + y_gaussian = self.discriminator(z_gaussian) + self.G_loss = -torch.mean(torch.log(y_gaussian + EPS)) + self.G_loss.backward() + self.optim_E_gen.step() + return + + def losses(self): + try: + return self.recon_loss, self.D_loss, self.G_loss + except: + ... + return diff --git a/code/sunlab/suntorch/models/autoencoder.py b/code/sunlab/suntorch/models/autoencoder.py new file mode 100644 index 0000000..232f180 --- /dev/null +++ b/code/sunlab/suntorch/models/autoencoder.py @@ -0,0 +1,114 @@ +import torch +import torch.nn.functional as F +from torch.autograd import Variable + +from .encoder import Encoder +from .decoder import Decoder +from .common import * + + +class Autoencoder: + """# Autoencoder Model""" + + def __init__( + self, data_dim, latent_dim, enc_dec_size, negative_slope=0.3, dropout=0.0 + ): + self.encoder = Encoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.decoder = Decoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.data_dim = data_dim + self.latent_dim = latent_dim + self.p = dropout + self.negative_slope = negative_slope + return + + def parameters(self): + return (*self.encoder.parameters(), *self.decoder.parameters()) + + def train(self): + self.encoder.train(True) + self.decoder.train(True) + return self + + def eval(self): + self.encoder.train(False) + self.decoder.train(False) + return self + + def encode(self, data): + return self.encoder(data) + + def decode(self, data): + return self.decoder(data) + + def __call__(self, data): + return self.decode(self.encode(data)) + + def save(self, base="./"): + torch.save(self.encoder.state_dict(), base + "weights_encoder.pt") + torch.save(self.decoder.state_dict(), base + "weights_decoder.pt") + return self + + def load(self, base="./"): + self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt")) + self.encoder.eval() + self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt")) + self.decoder.eval() + return self + + def to(self, device): + self.encoder.to(device) + self.decoder.to(device) + self.discriminator.to(device) + return self + + def cuda(self): + if torch.cuda.is_available(): + self.encoder = self.encoder.cuda() + self.decoder = self.decoder.cuda() + return self + + def cpu(self): + self.encoder = self.encoder.cpu() + self.decoder = self.decoder.cpu() + return self + + def init_optimizers(self, recon_lr=1e-4): + self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr) + self.optim_D = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr) + return self + + def init_losses(self, recon_loss_fn=F.binary_cross_entropy): + self.recon_loss_fn = recon_loss_fn + return self + + def train_step(self, raw_data): + data = to_var(raw_data.view(raw_data.size(0), -1)) + + self.encoder.zero_grad() + self.decoder.zero_grad() + z = self.encoder(data) + X = self.decoder(z) + self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS) + self.recon_loss.backward() + self.optim_E_enc.step() + self.optim_D.step() + return + + def losses(self): + try: + return self.recon_loss + except: + ... + return diff --git a/code/sunlab/suntorch/models/common.py b/code/sunlab/suntorch/models/common.py new file mode 100644 index 0000000..f10930e --- /dev/null +++ b/code/sunlab/suntorch/models/common.py @@ -0,0 +1,12 @@ +from torch.autograd import Variable + +EPS = 1e-15 + + +def to_var(x): + """# Convert to variable""" + import torch + + if torch.cuda.is_available(): + x = x.cuda() + return Variable(x) diff --git a/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py b/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py new file mode 100644 index 0000000..970f717 --- /dev/null +++ b/code/sunlab/suntorch/models/convolutional/variational/autoencoder.py @@ -0,0 +1,190 @@ +import torch +from torch import nn + + +class ConvolutionalVariationalAutoencoder(nn.Module): + def __init__(self, latent_dims, hidden_dims, image_shape, dropout=0.0): + super(ConvolutionalVariationalAutoencoder, self).__init__() + + self.latent_dims = latent_dims # Size of the latent space layer + self.hidden_dims = ( + hidden_dims # List of hidden layers number of filters/channels + ) + self.image_shape = image_shape # Input image shape + + self.last_channels = self.hidden_dims[-1] + self.in_channels = self.image_shape[0] + # Simple formula to get the number of neurons after the last convolution layer is flattened + self.flattened_channels = int( + self.last_channels + * (self.image_shape[1] / (2 ** len(self.hidden_dims))) ** 2 + ) + + # For each hidden layer we will create a Convolution Block + modules = [] + for h_dim in self.hidden_dims: + modules.append( + nn.Sequential( + nn.Conv2d( + in_channels=self.in_channels, + out_channels=h_dim, + kernel_size=3, + stride=2, + padding=1, + ), + nn.BatchNorm2d(h_dim), + nn.LeakyReLU(), + nn.Dropout(p=dropout), + ) + ) + + self.in_channels = h_dim + + self.encoder = nn.Sequential(*modules) + + # Here are our layers for our latent space distribution + self.fc_mu = nn.Linear(self.flattened_channels, latent_dims) + self.fc_var = nn.Linear(self.flattened_channels, latent_dims) + + # Decoder input layer + self.decoder_input = nn.Linear(latent_dims, self.flattened_channels) + + # For each Convolution Block created on the Encoder we will do a symmetric Decoder with the same Blocks, but using ConvTranspose + self.hidden_dims.reverse() + modules = [] + for h_dim in self.hidden_dims: + modules.append( + nn.Sequential( + nn.ConvTranspose2d( + in_channels=self.in_channels, + out_channels=h_dim, + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + ), + nn.BatchNorm2d(h_dim), + nn.LeakyReLU(), + nn.Dropout(p=dropout), + ) + ) + + self.in_channels = h_dim + + self.decoder = nn.Sequential(*modules) + + # The final layer the reconstructed image have the same dimensions as the input image + self.final_layer = nn.Sequential( + nn.Conv2d( + in_channels=self.in_channels, + out_channels=self.image_shape[0], + kernel_size=3, + padding=1, + ), + nn.Sigmoid(), + ) + + def get_latent_dims(self): + + return self.latent_dims + + def encode(self, input): + """ + Encodes the input by passing through the encoder network + and returns the latent codes. + """ + result = self.encoder(input) + result = torch.flatten(result, start_dim=1) + # Split the result into mu and var componentsbof the latent Gaussian distribution + mu = self.fc_mu(result) + log_var = self.fc_var(result) + + return [mu, log_var] + + def decode(self, z): + """ + Maps the given latent codes onto the image space. + """ + result = self.decoder_input(z) + result = result.view( + -1, + self.last_channels, + int(self.image_shape[1] / (2 ** len(self.hidden_dims))), + int(self.image_shape[1] / (2 ** len(self.hidden_dims))), + ) + result = self.decoder(result) + result = self.final_layer(result) + + return result + + def reparameterize(self, mu, log_var): + """ + Reparameterization trick to sample from N(mu, var) from N(0,1). + """ + std = torch.exp(0.5 * log_var) + eps = torch.randn_like(std) + + return mu + eps * std + + def forward(self, input): + """ + Forward method which will encode and decode our image. + """ + mu, log_var = self.encode(input) + z = self.reparameterize(mu, log_var) + + return [self.decode(z), input, mu, log_var, z] + + def loss_function(self, recons, input, mu, log_var): + """ + Computes VAE loss function + """ + recons_loss = nn.functional.binary_cross_entropy( + recons.reshape(recons.shape[0], -1), + input.reshape(input.shape[0], -1), + reduction="none", + ).sum(dim=-1) + + kld_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp(), dim=-1) + + loss = (recons_loss + kld_loss).mean(dim=0) + + return loss + + def sample(self, num_samples, device): + """ + Samples from the latent space and return the corresponding + image space map. + """ + z = torch.randn(num_samples, self.latent_dims) + z = z.to(device) + samples = self.decode(z) + + return samples + + def generate(self, x): + """ + Given an input image x, returns the reconstructed image + """ + return self.forward(x)[0] + + def interpolate(self, starting_inputs, ending_inputs, device, granularity=10): + """This function performs a linear interpolation in the latent space of the autoencoder + from starting inputs to ending inputs. It returns the interpolation trajectories. + """ + mu, log_var = self.encode(starting_inputs.to(device)) + starting_z = self.reparameterize(mu, log_var) + + mu, log_var = self.encode(ending_inputs.to(device)) + ending_z = self.reparameterize(mu, log_var) + + t = torch.linspace(0, 1, granularity).to(device) + + intep_line = torch.kron( + starting_z.reshape(starting_z.shape[0], -1), (1 - t).unsqueeze(-1) + ) + torch.kron(ending_z.reshape(ending_z.shape[0], -1), t.unsqueeze(-1)) + + decoded_line = self.decode(intep_line).reshape( + (starting_inputs.shape[0], t.shape[0]) + (starting_inputs.shape[1:]) + ) + return decoded_line diff --git a/code/sunlab/suntorch/models/decoder.py b/code/sunlab/suntorch/models/decoder.py new file mode 100644 index 0000000..2eeb7a4 --- /dev/null +++ b/code/sunlab/suntorch/models/decoder.py @@ -0,0 +1,33 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch import sigmoid + + +class Decoder(nn.Module): + """# Decoder Neural Network + X_dim: Output dimension shape + N: Inner neuronal layer size + z_dim: Input dimension shape + """ + + def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3): + super(Decoder, self).__init__() + self.lin1 = nn.Linear(z_dim, N) + self.lin2 = nn.Linear(N, N) + self.lin3 = nn.Linear(N, X_dim) + self.p = dropout + self.negative_slope = negative_slope + + def forward(self, x): + x = self.lin1(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin2(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin3(x) + return sigmoid(x) diff --git a/code/sunlab/suntorch/models/discriminator.py b/code/sunlab/suntorch/models/discriminator.py new file mode 100644 index 0000000..9249095 --- /dev/null +++ b/code/sunlab/suntorch/models/discriminator.py @@ -0,0 +1,32 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch import sigmoid + + +class Discriminator(nn.Module): + """# Discriminator Neural Network + N: Inner neuronal layer size + z_dim: Input dimension shape + """ + + def __init__(self, N, z_dim, dropout=0.0, negative_slope=0.3): + super(Discriminator, self).__init__() + self.lin1 = nn.Linear(z_dim, N) + self.lin2 = nn.Linear(N, N) + self.lin3 = nn.Linear(N, 1) + self.p = dropout + self.negative_slope = negative_slope + + def forward(self, x): + x = self.lin1(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin2(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin3(x) + return sigmoid(x) diff --git a/code/sunlab/suntorch/models/encoder.py b/code/sunlab/suntorch/models/encoder.py new file mode 100644 index 0000000..e6f88c7 --- /dev/null +++ b/code/sunlab/suntorch/models/encoder.py @@ -0,0 +1,32 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class Encoder(nn.Module): + """# Encoder Neural Network + X_dim: Input dimension shape + N: Inner neuronal layer size + z_dim: Output dimension shape + """ + + def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3): + super(Encoder, self).__init__() + self.lin1 = nn.Linear(X_dim, N) + self.lin2 = nn.Linear(N, N) + self.lin3gauss = nn.Linear(N, z_dim) + self.p = dropout + self.negative_slope = negative_slope + + def forward(self, x): + x = self.lin1(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin2(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + xgauss = self.lin3gauss(x) + return xgauss diff --git a/code/sunlab/suntorch/models/variational/autoencoder.py b/code/sunlab/suntorch/models/variational/autoencoder.py new file mode 100644 index 0000000..e335704 --- /dev/null +++ b/code/sunlab/suntorch/models/variational/autoencoder.py @@ -0,0 +1,128 @@ +import torch +import torch.nn.functional as F +from torch.autograd import Variable + +from .encoder import Encoder +from .decoder import Decoder +from .common import * + + +class VariationalAutoencoder: + """# Variational Autoencoder Model""" + + def __init__( + self, data_dim, latent_dim, enc_dec_size, negative_slope=0.3, dropout=0.0 + ): + self.encoder = Encoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.decoder = Decoder( + data_dim, + enc_dec_size, + latent_dim, + negative_slope=negative_slope, + dropout=dropout, + ) + self.data_dim = data_dim + self.latent_dim = latent_dim + self.p = dropout + self.negative_slope = negative_slope + return + + def parameters(self): + return (*self.encoder.parameters(), *self.decoder.parameters()) + + def train(self): + self.encoder.train(True) + self.decoder.train(True) + return self + + def eval(self): + self.encoder.train(False) + self.decoder.train(False) + return self + + def encode(self, data): + return self.encoder(data) + + def decode(self, data): + return self.decoder(data) + + def reparameterization(self, mean, var): + epsilon = torch.randn_like(var) + if torch.cuda.is_available(): + epsilon = epsilon.cuda() + z = mean + var * epsilon + return z + + def forward(self, x): + mean, log_var = self.encoder(x) + z = self.reparameterization(mean, torch.exp(0.5 * log_var)) + X = self.decoder(z) + return X, mean, log_var + + def __call__(self, data): + return self.forward(data) + + def save(self, base="./"): + torch.save(self.encoder.state_dict(), base + "weights_encoder.pt") + torch.save(self.decoder.state_dict(), base + "weights_decoder.pt") + return self + + def load(self, base="./"): + self.encoder.load_state_dict(torch.load(base + "weights_encoder.pt")) + self.encoder.eval() + self.decoder.load_state_dict(torch.load(base + "weights_decoder.pt")) + self.decoder.eval() + return self + + def to(self, device): + self.encoder.to(device) + self.decoder.to(device) + return self + + def cuda(self): + if torch.cuda.is_available(): + self.encoder = self.encoder.cuda() + self.decoder = self.decoder.cuda() + return self + + def cpu(self): + self.encoder = self.encoder.cpu() + self.decoder = self.decoder.cpu() + return self + + def init_optimizers(self, recon_lr=1e-4): + self.optim_E_enc = torch.optim.Adam(self.encoder.parameters(), lr=recon_lr) + self.optim_D = torch.optim.Adam(self.decoder.parameters(), lr=recon_lr) + return self + + def init_losses(self, recon_loss_fn=F.binary_cross_entropy): + self.recon_loss_fn = recon_loss_fn + return self + + def train_step(self, raw_data): + data = to_var(raw_data.view(raw_data.size(0), -1)) + + self.encoder.zero_grad() + self.decoder.zero_grad() + X, _, _ = self.forward(data) + # mean, log_var = self.encoder(data) + # z = self.reparameterization(mean, torch.exp(0.5 * log_var)) + # X = self.decoder(z) + self.recon_loss = self.recon_loss_fn(X + EPS, data + EPS) + self.recon_loss.backward() + self.optim_E_enc.step() + self.optim_D.step() + return + + def losses(self): + try: + return self.recon_loss + except: + ... + return diff --git a/code/sunlab/suntorch/models/variational/common.py b/code/sunlab/suntorch/models/variational/common.py new file mode 100644 index 0000000..f10930e --- /dev/null +++ b/code/sunlab/suntorch/models/variational/common.py @@ -0,0 +1,12 @@ +from torch.autograd import Variable + +EPS = 1e-15 + + +def to_var(x): + """# Convert to variable""" + import torch + + if torch.cuda.is_available(): + x = x.cuda() + return Variable(x) diff --git a/code/sunlab/suntorch/models/variational/decoder.py b/code/sunlab/suntorch/models/variational/decoder.py new file mode 100644 index 0000000..2eeb7a4 --- /dev/null +++ b/code/sunlab/suntorch/models/variational/decoder.py @@ -0,0 +1,33 @@ +import torch.nn as nn +import torch.nn.functional as F +from torch import sigmoid + + +class Decoder(nn.Module): + """# Decoder Neural Network + X_dim: Output dimension shape + N: Inner neuronal layer size + z_dim: Input dimension shape + """ + + def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3): + super(Decoder, self).__init__() + self.lin1 = nn.Linear(z_dim, N) + self.lin2 = nn.Linear(N, N) + self.lin3 = nn.Linear(N, X_dim) + self.p = dropout + self.negative_slope = negative_slope + + def forward(self, x): + x = self.lin1(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin2(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin3(x) + return sigmoid(x) diff --git a/code/sunlab/suntorch/models/variational/encoder.py b/code/sunlab/suntorch/models/variational/encoder.py new file mode 100644 index 0000000..b08202f --- /dev/null +++ b/code/sunlab/suntorch/models/variational/encoder.py @@ -0,0 +1,34 @@ +import torch.nn as nn +import torch.nn.functional as F + + +class Encoder(nn.Module): + """# Encoder Neural Network + X_dim: Input dimension shape + N: Inner neuronal layer size + z_dim: Output dimension shape + """ + + def __init__(self, X_dim, N, z_dim, dropout=0.0, negative_slope=0.3): + super(Encoder, self).__init__() + self.lin1 = nn.Linear(X_dim, N) + self.lin2 = nn.Linear(N, N) + self.lin3mu = nn.Linear(N, z_dim) + self.lin3sigma = nn.Linear(N, z_dim) + self.p = dropout + self.negative_slope = negative_slope + + def forward(self, x): + x = self.lin1(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + x = self.lin2(x) + if self.p > 0.0: + x = F.dropout(x, p=self.p, training=self.training) + x = F.leaky_relu(x, negative_slope=self.negative_slope) + + mu = self.lin3mu(x) + sigma = self.lin3sigma(x) + return mu, sigma diff --git a/code/sunlab/suntorch/plotting/__init__.py b/code/sunlab/suntorch/plotting/__init__.py new file mode 100644 index 0000000..36e00e6 --- /dev/null +++ b/code/sunlab/suntorch/plotting/__init__.py @@ -0,0 +1 @@ +from .model_extensions import * diff --git a/code/sunlab/suntorch/plotting/model_extensions.py b/code/sunlab/suntorch/plotting/model_extensions.py new file mode 100644 index 0000000..33f0191 --- /dev/null +++ b/code/sunlab/suntorch/plotting/model_extensions.py @@ -0,0 +1,34 @@ +from matplotlib import pyplot as plt +from sunlab.common.data.shape_dataset import ShapeDataset +from sunlab.globals import DIR_ROOT + + +def apply_boundary( + model_loc=DIR_ROOT + "models/current_model/", + border_thickness=3, + include_transition_regions=False, + threshold=0.7, + alpha=1, + _plt=None, +): + """# Apply Boundary to Plot + + Use Pregenerated Boundary by Default for Speed""" + from sunlab.common.scaler import MaxAbsScaler + import numpy as np + + if _plt is None: + _plt = plt + if (model_loc == model_loc) and (border_thickness == 3) and (threshold == 0.7): + XYM = np.load(DIR_ROOT + "extra_data/OutlineXYM.npy") + XY = XYM[:2, :, :] + if include_transition_regions: + outline = XYM[3, :, :] + else: + outline = XYM[2, :, :] + _plt.pcolor(XY[0, :, :], XY[1, :, :], outline, cmap="gray", alpha=alpha) + return + raise NotImplemented("Not Yet Implemented for PyTorch!") + + +plt.apply_boundary = apply_boundary diff --git a/code/sunlab/transform_data.py b/code/sunlab/transform_data.py new file mode 100644 index 0000000..d6e3813 --- /dev/null +++ b/code/sunlab/transform_data.py @@ -0,0 +1,799 @@ +from sklearn import preprocessing +import numpy as np + +# import features + + +def import_train_set(train_file_name="AllResults.txt"): + featurelist = [] + + with open(train_file_name, "r") as infile: + for line in infile: + featurelist.append(line.strip()) + + # so now, featurelist[1] has names of things in form + # 'Area, MajorAxisLength, ... Class' + FeatureNames = [x.strip() for x in featurelist[0].split(",")] + # FeatureNames has form ['Area','MajorAxisLength',....'Class'] + # which is what I wanted + + AllData = [ + [float(x.strip()) for x in featurelist[i].split(",")] + for i in range(1, len(featurelist)) + ] + + # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]], + # the last input is the class. + + classes = [int(i[-1]) for i in AllData] + + # classes contains the class number from which the data is from + + # want to delete target from AllData. + + X = [i[0:-1] for i in AllData] + + # X has form similar to Data. So when we reshape, we want the output to be + # X = array([[0,1,2,...] + # [1,2,3,...]]) + + Data = np.asarray(X, order="F") + + # this has the right form, is uses fortran column-major style memory representation vs row major C-style + # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64 + # both have same indexing calls, so I think we're in business. + + # looks exactly correct, or at least like iris data set target. + Target = np.asarray(classes) + return (Data, Target) + + +######################################################################## +# for training purposes, the number of samples in data must be divisible by 256 + + +def Trim_Train_Data(Data, Target, max_length=None, balance=False): + #### + # Inputs: Data is numpy array with N samples (rows) and M measures (cols) + # Target is 1xN samples with ground truth + # max_length defines maximum length of training data. Should be divisible by 256, might want to code that... + # balance is boolean if you wish to have same number of samples in each class. + print("Class lengths are = ", [sum(Target == i) for i in set(Target)]) + if not balance: + if ( + np.shape(Data)[0] / 256 != np.round(np.shape(Data)[0] / 256) + or max_length < np.shape(Data)[0] + ): + print("Trimming data for training purposes...") + if not max_length: + max_length = 256 * (np.floor(np.shape(Data)[0] / 256)) + else: + if max_length / 256 != np.round(max_length / 256): + # must make it divisible by 256 + max_length = int(np.floor(max_length / 256) * 256) + print( + "Your given max_length was not divisible by 256. New max length is = %d" + % max_length + ) + # determine percentages of each class. + cs = np.unique(Target) + ps = np.zeros(shape=(1, len(cs))) + ps = ps[0] + rows_to_take = np.array([]) + for i in range(len(cs)): + ps[i] = np.sum(Target == cs[i]) / len(Target) + goodrows = np.where(Target == cs[i])[0] + rows_to_take = np.append( + rows_to_take, goodrows[0 : int(np.floor(ps[i] * max_length))] + ) + + ad_row = 0 + class_ind = 0 + while len(rows_to_take) != max_length: + # need to supplament. + goodrows = np.where(Target == cs[class_ind])[0] + rows_to_take = np.append( + rows_to_take, + goodrows[int(np.floor(ps[class_ind] * max_length)) + 1 + ad_row], + ) + class_ind = class_ind + 1 + if class_ind > len(cs): + class_ind = 0 + ad_row = ad_row + 1 + rows_to_take = rows_to_take.astype(int) + X_train_scaled = Data[rows_to_take, :] + Y_train = Target[rows_to_take] + print("Complete") + else: + X_train_scaled = Data + Y_train = Target + print("Final training length = %d" % X_train_scaled.shape[0]) + print( + "Class lengths after trimming are = ", + [sum(Y_train == i) for i in set(Y_train)], + ) + return (X_train_scaled, Y_train) + else: + # determine which has the minimum number of cases. + cs = np.unique(Target) + lens = np.zeros((len(cs))) + for i in range(len(cs)): + lens[i] = sum(Target == cs[i]) + + # randomly sample from each class now that number of samples. + min_len = int(min(lens)) + rows_to_take = np.array([]) + for i in range(len(cs)): + possiblerows = np.where(Target == cs[i])[0] + # now sample without replacement. + rows_to_take = np.append( + rows_to_take, np.random.choice(possiblerows, min_len, replace=False) + ) + if len(rows_to_take) / 256 != np.round( + len(rows_to_take) / 256 + ) or max_length < len(rows_to_take): + # trim until correct size. + if not max_length: + max_length = 256 * (np.floor(np.shape(Data)[0] / 256)) + else: + if max_length / 256 != np.round(max_length / 256): + # must make it divisible by 256 + max_length = int(np.floor(max_length / 256) * 256) + print( + "Your given max_length was not divisible by 256. New max length is = %d" + % max_length + ) + # use min_len now to delete entries. + timearound = 0 + pheno = len(cs) # start at the end + while len(rows_to_take) > max_length: + # entry to delete is + # first (min_len-round)*range(1,len(np.unique(Target))+1) -1 + # print("%d entry delete" % (((min_len-timearound)*pheno) - 1)) + rows_to_take = np.delete( + rows_to_take, ((min_len - timearound) * pheno) - 1 + ) + pheno = pheno - 1 + if pheno < 1: + pheno = len(cs) + timearound = timearound + 1 + rows_to_take = rows_to_take.astype(int) + X_train_scaled = Data[rows_to_take, :] + Y_train = Target[rows_to_take] + print("Final training length = %d" % X_train_scaled.shape[0]) + print( + "Class lengths after trimming are = ", + [sum(Y_train == i) for i in set(Y_train)], + ) + return (X_train_scaled, Y_train) + + +#############################REMOVE OUTLIER DATA######################## +# How? Do this after scaling the data, then compute a z-score. We'll check the data after that. + + +def Remove_Outliers(Data, Target): + # for each class, detect outliers. + # we'll begin by using z-scoring. This assumes data is described by a Guassian + # which is why it is vital to do this AFTER scaling the data. + # I plotted the data, it is absolutely not Gaussian. + # I tried DBSCAN machine learning algorithm but it is really not helpful. + # However, the data IS perhaps Gaussian after embedding. We can clean the signal AFTER by sending in + # the emebedded data in 1, 2, or 3 dimensions and removing points that are beyond a standard deviation. + # Data is TSNE embedded. + zscores = np.zeros(np.shape(Data)) + for pheno in np.unique(Target): + # find rows where phenotype is correct. + prows = np.where(Target == pheno)[0] + for dim in range(np.shape(Data)[1]): + # calculate the mean. + m = np.mean(Data[prows, dim]) + # calculate std. + s = np.std(Data[prows, dim]) + for example in range(len(prows)): + zscores[prows[example], dim] = (Data[prows[example], dim] - m) / s + + # now you calculated the zscores for each element. Apply a threshold + # good "thumb-rule" thresholds can be: 2.5, 3, 3.5, or more. + zthresh = 2.5 + + zscores = zscores > 2.5 + + badrows = [i for i in range(np.shape(zscores)[0]) if zscores[i].any()] + + Data = np.delete(Data, badrows, axis=0) + Target = np.delete(Target, badrows, axis=0) + + return (Data, Target) + + +##############################POST AUGMENTATION######################### +def Augment_Size(Data, Target, max_copies=0, s=0.2, balance=False, augment_class=None): + max_copies = int(max_copies) + # augment only the copies made by scaling the unit based measures. + # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len + + # first, determine if we desire class balance. + if balance: + # determine which class has maximum number of samples. + cs = np.unique(Target) + vals = [sum(Target == cs[i]) for i in cs] + print( + "Class %d has max number of samples, increasing other classes via size augmentation" + % np.argmax(vals) + ) + for i in range(len(cs)): + if i != np.argmax(vals): + # determine how many samples need to be made. + to_make = int(vals[np.argmax(vals)] - vals[i]) + # randomly sample rows from Data with the correct phenotype cs[i] + possible_rows = np.where(Target == cs[i])[0] + # sample to_make numbers from possible_rows. + sampled_rows = np.random.choice(possible_rows, to_make, replace=True) + newrows = Data[sampled_rows, :] + size_vary = s * np.random.rand(1, to_make)[0] + # vary size. + for v in range(to_make): + if np.random.rand() < 0.5: + newrows[v, 0] = ( + newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v] + else: + newrows[v, 0] = ( + newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v] + Data = np.concatenate((Data, newrows), axis=0) + yadd = np.ones(to_make) * cs[i] + Target = np.concatenate((Target, yadd.astype(int)), axis=0) + + Data = Data[np.argsort(Target), :] + Target = Target[np.argsort(Target)] + + if augment_class is None: + if max_copies > 0: + print( + "Augmenting each class with additional %d samples via size augmentation" + % max_copies + ) + cs = np.unique(Target) + for i in range(len(cs)): + # generate n = max_copies of Data. + possible_rows = np.where(Target == cs[i])[0] + # sample to_make numbers from possible_rows. + sampled_rows = np.random.choice(possible_rows, max_copies, replace=True) + newrows = Data[sampled_rows, :] + size_vary = s * np.random.rand(1, max_copies)[0] + # vary size. + for v in range(max_copies): + if np.random.rand() < 0.5: + newrows[v, 0] = ( + newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v] + else: + newrows[v, 0] = ( + newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v] + Data = np.concatenate((Data, newrows), axis=0) + yadd = np.ones(max_copies) * cs[i] + Target = np.concatenate((Target, yadd.astype(int)), axis=0) + + Data = Data[np.argsort(Target), :] + Target = Target[np.argsort(Target)] + + else: + augment_class = int(augment_class) + if max_copies > 0: + print( + "Augmenting Class = %d with additional %d samples via size augmentation" + % (augment_class, max_copies) + ) + # generate n = max_copies of Data. + possible_rows = np.where(Target == augment_class)[0] + # sample to_make numbers from possible_rows. + sampled_rows = np.random.choice(possible_rows, max_copies, replace=True) + newrows = Data[sampled_rows, :] + size_vary = s * np.random.rand(1, max_copies)[0] + # vary size. + for v in range(max_copies): + if np.random.rand() < 0.5: + newrows[v, 0] = ( + newrows[v, 0] + newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] + newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] + newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] + newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] + newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] + newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] + newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] + newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] + newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] + newrows[v, 11] * size_vary[v] + else: + newrows[v, 0] = ( + newrows[v, 0] - newrows[v, 0] * size_vary[v] * size_vary[v] + ) + newrows[v, 1] = newrows[v, 1] - newrows[v, 1] * size_vary[v] + newrows[v, 2] = newrows[v, 2] - newrows[v, 2] * size_vary[v] + newrows[v, 4] = ( + newrows[v, 4] - newrows[v, 4] * size_vary[v] * size_vary[v] + ) + newrows[v, 5] = newrows[v, 5] - newrows[v, 5] * size_vary[v] + newrows[v, 7] = newrows[v, 7] - newrows[v, 7] * size_vary[v] + newrows[v, 8] = newrows[v, 8] - newrows[v, 8] * size_vary[v] + newrows[v, 9] = newrows[v, 9] - newrows[v, 9] * size_vary[v] + newrows[v, 10] = newrows[v, 10] - newrows[v, 10] * size_vary[v] + newrows[v, 11] = newrows[v, 11] - newrows[v, 11] * size_vary[v] + Data = np.concatenate((Data, newrows), axis=0) + yadd = np.ones(max_copies) * augment_class + Target = np.concatenate((Target, yadd.astype(int)), axis=0) + + Data = Data[np.argsort(Target), :] + Target = Target[np.argsort(Target)] + + return (Data, Target) + + +######################################################################## +######################################################################## +####### IMPORT THE DEV SET ##### +######################################################################## +######################################################################## +def import_dev_set(dev_file_name="DevResults.txt"): + print("Importing the dev set...") + + # import features + featurelist = [] + + with open(dev_file_name, "r") as infile: + for line in infile: + featurelist.append(line.strip()) + + # so now, featurelist[1] has names of things in form 'Area, MajorAxisLength, ... Class' + FeatureNames = [x.strip() for x in featurelist[0].split(",")] + # FeatureNames has form ['Area','MajorAxisLength',....'Class'] which is what I wanted + + DevData = [ + [float(x.strip()) for x in featurelist[i].split(",")] + for i in range(1, len(featurelist)) + ] + + # Data is in form [[1,2,3,....0.0],[3,3,1,...0.0],...[5,3,1,...0.0]], the last input is the class. + + Devclasses = [int(i[-1]) for i in DevData] + + # classes contains the class number from which the data is from + + # want to delete target from AllData. + + DevX = [i[0:-1] for i in DevData] + + # X has form similar to Data. So when we reshape, we want the output to be + # X = array([[0,1,2,...] + # [1,2,3,...]]) + + X_dev = np.asarray(DevX, order="F") + + # add aspect ratio as last column of data + AR = [] + for i in range(len(X_dev)): + AR.append(X_dev[i, 1] / X_dev[i, 2]) + + AR = np.asarray(AR) + + AR = AR.reshape((len(AR), 1)) + + X_dev = np.append(X_dev, AR, 1) # concatenates arrays appropriately. + + # add form factor as last column of data + # P^2/Area + FF = [] + for i in range(len(X_dev)): + FF.append(X_dev[i, 8] * X_dev[i, 8] / X_dev[i, 0]) + FF = np.asarray(FF) + FF = FF.reshape((len(FF), 1)) + X_dev = np.append(X_dev, FF, 1) + + # this has the right form, is uses fortran column-major style memory representation vs row major C-style + # the notation is scientific, where iris data set looks like a float. CHECKED: Both are type numpy.float64 + # both have same indexing calls, so I think we're in business. + + # looks exactly correct, or at least like iris data set target. + y_dev = np.asarray(Devclasses) + + return (X_dev, y_dev, FeatureNames) + + +######################################################################## +#########DATA IS IN THE SAME FORM AS IS FOUND IN IRIS DATASET########### +######################################################################## +# Target = Target classes (0-4) for training and validation (type, numpy.int64, array) +# Data = Data for training and validation to be split. (type, numpy.float64, array) +# FeatureNames = Feature names for each column of data. (type, 'str', python list) +######################################################################## +# print "Data is now in the same form as that found in Iris Dataset" +# print "Splitting the training dataset into train/val" + + +def apply_normalization(X_train, max_norm=False, l1_norm=False, l2_norm=False): + ######################################################## + if max_norm: + print("Normalizing data using l1_norm") + X_train = X_train / np.max(np.abs(X_train), 0)[None, :] + if l1_norm: + print("Normalizing data using l1_norm") + X_train = X_train / np.sum(X_train, 0)[None, :] + if l2_norm: + print("Normalizing data using l1_norm") + X_train = X_train / np.sqrt(np.sum(X_train * X_train, 0))[None, :] + + return X_train + + +######################################################################## + + +def preprocess_train_data(X_train, d=2): + + ############### SPLITTING THE DATASET ################## + # First split the dataset so it is as if we only had a training set then a eval set. + # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state = + # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%. + ######################################################## + if d > 1: + print("Increasing dimensionality of dataset using cross terms") + #################INCREASING FEATURES#################### + poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True) + # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models, + # we are getting 90.83%, which is exactly even with deep learning models. + + X_train = poly.fit_transform(X_train) + # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]] + # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True) + # X_test = poly.fit_transform(X_test) + # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True) + # X_dev = poly.fit_transform(X_dev) + + ######################################################## + + print("Scaling the data") + ################# SCALE THE DATA ####################### + # Scale the data. Each attribute in the dataset must be independently scaled, that is + # 0 mean, and unit variance. Doing this returns the z-scores of the data + # Z = (x - mu) / sigma + + # , QuantileTransformer(output_distribution='normal') + scaler = preprocessing.RobustScaler().fit(X_train) + # preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!! + + X_train_scaled = scaler.fit_transform(X_train) + + # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance. + + # X_dev_scaled = scaler.transform(X_dev) + + ########################################################## + + return (X_train_scaled, scaler) # , target_feature_names) + + +def preprocess_test_data(X_dev, scaler, d=2): + ############### SPLITTING THE DATASET ################## + # First split the dataset so it is as if we only had a training set then a eval set. + # X_train, X_test, y_train, y_test = train_test_split(Data, Target, test_size = .3)#.25)#, random_state = + # default has shuffle = True. test_size sets the proportion of the data set to include in the test, here 25%. + ######################################################## + + print("Increasing dimensionality of dataset using cross terms") + #################INCREASING FEATURES#################### + poly = preprocessing.PolynomialFeatures(degree=d, interaction_only=True) + # IN SOME MODELS with 2 polynomial features, we are getting 90% exactly. In some polynomial 3 models, + # we are getting 90.83%, which is exactly even with deep learning models. + + # X_train = poly.fit_transform(X_train) + # target_feature_names = ['x'.join(['{}^{}'.format(pair[0],pair[1]) for pair in tuple if pair[1]!=0]) for tuple in [zip(FeatureNames,p) for p in poly.powers_]] + # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True) + # X_test = poly.fit_transform(X_test) + # poly=preprocessing.PolynomialFeatures(degree = 2, interaction_only = True) + X_dev = poly.fit_transform(X_dev) + + ######################################################## + + print("Scaling the data") + ################# SCALE THE DATA ####################### + # Scale the data. Each attribute in the dataset must be independently scaled, that is + # 0 mean, and unit variance. Doing this returns the z-scores of the data + # Z = (x - mu) / sigma + + # scaler = preprocessing.StandardScaler().fit(X_train) #IMPORTANT NOTE: We are scaling based only on training data!!!! + + # X_train_scaled = scaler.transform(X_train) + + # X_test_scaled = scaler.transform(X_test) # will be used later to evaluate the performance. + + X_dev_scaled = scaler.transform(X_dev) + + ########################################################## + + return X_dev_scaled + + +def Add_Measures( + Data, + FeatureNames=None, + add_AR=True, + add_FF=True, + add_convexity=True, + add_curl_old=True, + add_curl=True, + add_sphericity=True, + add_InscribedArea=True, + add_BlebRel=True, +): + ############### EXPANDING THE DATASET ################## + # Add measures of Aspect Ratio, Form Factor, Convexity, Curl, and Sphericity + # Input: Data must be an np array with N (row) examples x M (cols) measures. + # Measures should go: Area, MjrAxis, MnrAxis, Ecc,ConA,EqD,Sol,Ext,Per,conPer,fiber_length,InscribeR,bleb_len + ######################################################## + if add_AR: + AR = [] + for i in range(len(Data)): + AR.append(Data[i, 1] / Data[i, 2]) + + AR = np.asarray(AR) + + AR = AR.reshape((len(AR), 1)) + + Data = np.append(Data, AR, 1) # concatenates arrays appropriately. + if FeatureNames is not None: + FeatureNames.extend(["AR"]) + + if add_FF: + # this measure is really compactness, if you multiply each by 4 pi + # note this is different from roundness, which would use convex perimeter + FF = [] + for i in range(len(Data)): + FF.append(Data[i, 0] / (Data[i, 8] * Data[i, 8])) + # FF.append(Data[i,8]*Data[i,8] / Data[i,0]) + + FF = np.asarray(FF) + FF = FF.reshape((len(FF), 1)) + Data = np.append(Data, FF, 1) + if FeatureNames is not None: + FeatureNames.extend(["FF"]) + + if add_convexity: + CC = [] + for i in range(len(Data)): + CC.append(Data[i, 8] / Data[i, 9]) + + CC = np.asarray(CC) + CC = CC.reshape((len(CC), 1)) + Data = np.append(Data, CC, 1) + if FeatureNames is not None: + FeatureNames.extend(["Convexity"]) + + if add_curl_old: + # tells how curled the object is. might help for lamellipodia. + # curl is length / fiber length. (I assume length here can be major axis length) + # fiber length definition is (perimeter - sqrt(perimeter^2 - 16*Area)) / 4 + + # this definition does not work for a circle. Note that the result will be imaginary. + # I changed the 16 to a 4Pi. This should be fine. + cc = [] + for i in range(len(Data)): + if (4 * np.pi * Data[i, 0]) <= (Data[i, 8] * Data[i, 8]): + fiber_length = ( + Data[i, 8] + - np.sqrt((Data[i, 8] * Data[i, 8]) - (4 * np.pi * Data[i, 0])) + ) / np.pi # 4 + cc.append(Data[i, 1] / fiber_length) + else: + fiber_length = Data[i, 8] / np.pi # 4 + cc.append(Data[i, 1] / fiber_length) + + cc = np.asarray(cc) + cc = cc.reshape((len(cc), 1)) + Data = np.append(Data, cc, 1) + if FeatureNames is not None: + FeatureNames.extend(["Curl_old"]) + + if add_curl: + cc = [] + for i in range(len(Data)): + cc.append(Data[i, 1] / Data[i, 10]) + + cc = np.asarray(cc) + cc = cc.reshape((len(cc), 1)) + Data = np.append(Data, cc, 1) + # bound between 0 and 1 if major axis length could be replaced by feret diameter. + if FeatureNames is not None: + FeatureNames.extend(["Curl"]) + + if add_sphericity: + ss = [] + for i in range(len(Data)): + ss.append(Data[i, 11] * 2 / Data[i, 1]) + + ss = np.asarray(ss) + ss = ss.reshape((len(ss), 1)) + Data = np.append(Data, ss, 1) + # bound between 0 and 1 where 1 is a circle, perfectly spherical, and 0 is not at all. + # would be better if we had feret diameter instead of major axis. + if FeatureNames is not None: + FeatureNames.extend(["Sphericity"]) + + if add_InscribedArea: + aa = [] + for i in range(len(Data)): + aa.append(Data[i, 1] * Data[i, 1] * np.pi / Data[i, 11]) + + aa = np.asarray(aa) + aa = aa.reshape((len(aa), 1)) + Data = np.append(Data, aa, 1) + if FeatureNames is not None: + FeatureNames.extend(["InArea"]) + + if add_BlebRel: + bb = [] + for i in range(len(Data)): + bb.append(Data[i, 12] / Data[i, 11]) + + bb = np.asarray(bb) + bb = bb.reshape((len(bb), 1)) + Data = np.append(Data, bb, 1) + if FeatureNames is not None: + FeatureNames.extend(["Bleb_Rel"]) + + if FeatureNames is not None: + return (Data, FeatureNames) + else: + return Data + + +def Exclude_Measures( + Data, + FeatureNames=None, + ex_Area=False, + ex_MjrAxis=False, + ex_MnrAxis=False, + ex_Ecc=False, + ex_ConA=False, + ex_EqD=False, + ex_Sol=False, + ex_Ext=False, + ex_Per=False, + ex_conPer=False, + ex_FL=False, + ex_InR=False, + ex_bleb=False, +): + # Area,MjrAxis,MnrAxis,Ecc,ConA,EqD,Sol,Ext,Per,conPer,FL,InR + + del_cols = [] + if ex_Area: + del_cols.append(0) + if ex_MjrAxis: + del_cols.append(1) + if ex_MnrAxis: + del_cols.append(2) + if ex_Ecc: + del_cols.append(3) + if ex_ConA: + del_cols.append(4) + if ex_EqD: + del_cols.append(5) + if ex_Sol: + del_cols.append(6) + if ex_Ext: + del_cols.append(7) + if ex_Per: + del_cols.append(8) + if ex_conPer: + del_cols.append(9) + if ex_FL: + del_cols.append(10) + if ex_InR: + del_cols.append(11) + if ex_bleb: + del_cols.append(12) + + Data = np.delete(Data, del_cols, 1) + if FeatureNames is not None: + FeatureNames = [i for j, i in enumerate(FeatureNames) if j not in del_cols] + return (Data, FeatureNames) + else: + return Data + + +def open_and_save_test_data(fpath, csvfilename, txtfilename, ratio): + # fpath = '/volumes/chris stuff/chemsensing/chemsensing/Y27632_120518/Results/' + # /Rho_Act_120118/Results_after/' + # filename = 'FinalResults_after' + # option to delete certain measures if done so in training. + # order should go like + # %frame number%correctedNum%area%centroidx%centroidy%major%minor%eccentricity + # %orientation%convex area%filledarea%equivDiameter%solidity%extent%perimeter + # %perimeter old%convex perimeter%fiber length%%max in radii%bleb length%centersx%centersy + + data = np.genfromtxt( + fpath + csvfilename + ".csv", + delimiter=",", + usecols=[2, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 18, 19], + skip_header=1, + ) + # was cols 3,6,7,8,10,12,13,14,15 + frames_cell = np.genfromtxt( + fpath + csvfilename + ".csv", delimiter=",", usecols=[0, 1], skip_header=1 + ) + # add aspect ratio as last column of data + + data[:, 0] = data[:, 0] * ratio * ratio # area + data[:, 1] = data[:, 1] * ratio # mjr + data[:, 2] = data[:, 2] * ratio # MnrAxis + # ecc unitless + data[:, 4] = data[:, 4] * ratio * ratio # ConvexArea + data[:, 5] = data[:, 5] * ratio # EquivDiameter + # Solidity + # Extent + data[:, 8] = data[:, 8] * ratio # Perimeter + data[:, 9] = data[:, 9] * ratio # conPerim + data[:, 10] = data[:, 10] * ratio # FibLen + data[:, 11] = data[:, 11] * ratio # max inscribed r + data[:, 12] = data[:, 12] * ratio # bleblen + + preds = np.genfromtxt( + fpath + "/" + txtfilename + ".txt", + delimiter=" ", + usecols=[4, 5, 6, 7], + skip_header=1, + ) + y_target = np.where(np.max(preds, 1) > 0.7, np.argmax(preds, 1), 4) + # y_target = np.reshape(y_target,(len(y_target),1)) + + return (data, y_target, frames_cell) |