Source code for sciSOM.Plotting.SOM_plots

import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from typing import Union
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import colors
from ..SOM_recall.recall import SOM_location_recall


[docs] def plot_SOM_gird_neurons(weight_cube: np.ndarray) -> None: """ This function take in a nunr file from NeuroScope and converts it into a useful format to us Then it uses the data in the nunr file to identify which data samples belong to each PE Finally it takes this data and plots it such that we can overlay any data we want. Parameters: ------------------- weight_cube : np.ndarray Weight cube after an SOM has been trained Returns: ---------------- None """ xgrid, ygrid, data_dim = np.shape(weight_cube) # Plotting section fig, ax = plt.subplots(nrows=ygrid, ncols=xgrid, figsize=(5, 5)) a = 1 for i in np.arange(ygrid): for j in np.arange(xgrid): ax[j,i].plot(weight_cube[j,i,:]) ax[i,j].axis('off') ax[i,j].set_xlim(0, data_dim) ax[i,j].set_ylim(0, 1)
#plt.show()
[docs] def plot_mU_matrix(weight_cube: np.ndarray, data: np.ndarray, set_costum_min_max: bool = False, fence_vmin: float = None, fence_vmax: float = None, density_vmin: float = None, density_vmax: float = None, log_density: bool = False, fence_on: bool = True): """ Plots the mU-matrix; defined here as the data density per cell and the lines between cells representing the distance between adjacent cells. This implementation mimicks the major aspects of the mU-matrix in NueroScope (refer to [cite figure such as such of mU-matrix paper]). Complete information on the NeuroScope implementation of the mU-matrix is given in respective NeuroScope documentation, available upon request from [Erzsébet Merényi](erzsebet@rice.edu| [ Prof. Merényi was not consulted on the implementation of sciSOM functions that intend to mimic NeuroScope functionalities of the same name, nor did she have opportunity to inspect proof of faithfulness to the same-name module in NeuroScope or correctness of the corresponding sciSOM code. Therefore, Dr. Merényi and the NeuroScope group take no responsibility for the likeness and the correctness of the functions implemented to mimic (partial) NeuroScope capabilities in sciSOM. Parameters: ------------------- weight_cube : np.ndarray Weight cube after an SOM has been trained data : np.ndarray Data used to train the SOM or data to be mapped to the SOM set_costum_min_max : bool If True, the user can set the vmin and vmax for fences fence_vmin : float Minimum value for the fences fence_vmax : float Maximum value for the fences density_vmin : float Minimum value for the density matrix (not implemented yet) density_vmax : float Maximum value for the density matrix (not implemented yet) log_density : bool If True applies a log to the density matrix calculation fence_on : bool If False removes fences from mU matrix image Returns: ---------------- None """ height, width, som_dim = np.shape(weight_cube) w_cube = weight_cube data_points, data_dim = np.shape(data) assert som_dim == data_dim cmap = LinearSegmentedColormap.from_list('black_to_red', ['black', 'red']) som_shape = (height, width) # Initialize grid to store counts of data points mapped to each node count_grid = np.zeros(som_shape) # Calculate the BMU (Best Matching Unit) for each data point for point in data: # Compute distances to each neuron distances = np.linalg.norm(weight_cube - point, axis=-1) # Find index of the neuron with the smallest distance bmu_index = np.unravel_index(np.argmin(distances), som_shape) count_grid[bmu_index] += 1 # Normalize count_grid for color mapping if log_density == True: count_grid = np.log10(count_grid + 1) if set_costum_min_max is False: norm_counts = count_grid / np.max(count_grid) else: if density_vmax == None: norm_counts = count_grid / np.max(count_grid) else: norm_counts = count_grid / np.max(density_vmax) # In progress if fence_on: down_shifted_weight_cube = np.vstack((w_cube[-1:, :, :], w_cube[:-1, :, :])) right_shifted_weight_cube = np.hstack((w_cube[:, -1:, :], w_cube[:, :-1, :], )) vertical_lines = np.sqrt(np.sum((w_cube - down_shifted_weight_cube) ** 2, axis=-1)) horizontal_lines = np.sqrt(np.sum((w_cube - right_shifted_weight_cube) ** 2, axis=-1)) # Need to drop first row/column since its comparing opposite edges if set_costum_min_max == False: vmin = min(np.min(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:])) vmax = max(np.max(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:])) elif set_costum_min_max == True: if ((fence_vmin or fence_vmax) == None): vmin = min(np.min(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:])) vmax = max(np.max(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:])) else: vmin = fence_vmin vmax = fence_vmax fig, ax = plt.subplots() for i in range(height): for j in range(width): ax.add_patch(plt.Rectangle((j, height - i - 1), 1, 1, color=cmap(norm_counts[i, j]), ec='black')) if fence_on: for i in range(height): for j in range(width): if i < height - 1: # Vertical line (between current and below) #u_diff = np.linalg.norm(weightcube[i, j] - weightcube[i + 1, j]) color = plt.cm.gray(vertical_lines[i+1,j] / vmax) ax.plot([j, j + 1], [height - i - 1, height - i - 1], color=color) if j < width - 1: color = plt.cm.gray(horizontal_lines[i,j+1] / vmax) ax.plot([j + 1, j + 1], [height - i - 1, height - i], color=color) ax.set_xlim(0, width) ax.set_ylim(0, height) ax.set_aspect('equal') ax.axis('off') # Turn off the axis plt.subplots_adjust(left=0, right=1, top=1, bottom=0) plt.show()
[docs] def calculate_distance_btw_adjacent_prototypes(weight_cube: np.ndarray): """ Calculate the distance (fences) for each adjacent neuron in an SOM. (Need to review this function, dont fully remember what is going on in the implementation) Parameters ---------- weight_cube : np.ndarray The weight cube for the SOM Returns ------- distance_btw_proto : np.ndarray The distance matrix for neurons in the SOM """ x, y, _ = weight_cube.shape distance_btw_proto = np.zeros((x, y)) for i in range(x): for j in range(y): neighbors = [] if i > 0: neighbors.append(weight_cube[i-1, j]) if i < x-1: neighbors.append(weight_cube[i+1, j]) if j > 0: neighbors.append(weight_cube[i, j-1]) if j < y-1: neighbors.append(weight_cube[i, j+1]) distances = [np.linalg.norm(weight_cube[i, j] - neighbor) for neighbor in neighbors] distance_btw_proto[i, j] = np.mean(distances) return distance_btw_proto
[docs] def calculate_density_matrix(weight_cube: np.ndarray, dataset: np.ndarray) -> np.ndarray: """ Calculate density matrix for a given som weight cube and dataset. **This function is not working as intended, need to review it** It is not acutally using the information of the u_matrix Parameters ---------- weight_cube : np.ndarray SOM weight cube dataset: Data in the same form given to the SOM as input for training Returns ------- density_matrix : np.ndarray The density matrix for the given dataset """ x, y, _ = weight_cube.shape density_matrix = np.zeros((x, y)) for data_point in dataset: distances = cdist(weight_cube.reshape(-1, weight_cube.shape[-1]), [data_point], metric='euclidean') #print(np.shape(weight_cube.reshape(-1, weight_cube.shape[-1]))) #print(np.shape(data_point)) bmus = np.argmin(distances) x_idx, y_idx = np.unravel_index(bmus, (x, y)) density_matrix[x_idx, y_idx] += 1 return density_matrix
[docs] def display_density_matrix(density_matrix: np.ndarray): """ Display the density matrix as an image. Parameters ---------- density_matrix : np.ndarray The density matrix to display """ import matplotlib.pyplot as plt plt.imshow(density_matrix, cmap='viridis') plt.colorbar() plt.title('Density Matrix') plt.show()
[docs] def rise_time_vs_area_SOM_clusters(data: np.ndarray, colors: Union[list, np.ndarray], n_rows: int, n_cols: int): """ Plots the rise time vs area for each cluster in the SOM. Takes in the data from peaklet level data using the SOM classification and outputs a grid of plots showing each cluster. Parameters ---------- data : np.ndarray strudtured array with XENONnT data of data type peaks or peaklet colors : list or np.ndarray list of colors used by the SOM n_rows: number of coulmns in grid with the plots n_cols: number of rows in grid with the plots """ fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(24, 18)) # Generalize this later colors = np.vstack((colors, np.array([0,0,0]).reshape((1, 3)))) num = 0 for i in np.arange(n_rows): for j in np.arange(n_cols): ax[i,j].scatter(data['area'][data['type'] == num], -data['area_decile_from_midpoint'][data['type'] == num][:,1], s=0.5, color = colors[num]/255, alpha = 1) ax[i,j].set_xscale('log') ax[i,j].set_yscale('log') ax[i,j].set_xlim(1,10000000) ax[i,j].set_ylim(10,100000) num = num + 1
[docs] def SOM_gird_avg_wavefrom_per_cell(input_data: np.ndarray, weight_cube: np.ndarray, output_img_name: str = 'avg_waveform.png', save_fig: bool = False, is_struct_array: bool = True): """ Generates image of the average waveform for each cell in the SOM grid. This function take in a nunr file from NeuroScope and converts it into a useful format to us Then it uses the data in the nunr file to identify which data samples belong to each PE Finally it takes this data and plots it such that we can overlay any data we want. Parameters ---------- input_data : int waveforms (peaks, peaklets) nunr_file_input : str text file output from neuroscope grid_x_dim : int SOM x-dimension grid_y_dim: int SOM y-dimension x_dim_data_cube : int x-dimension of the input data cube for the SOM output_img_name : str name of file to save the image to + path is_struct_array : bool does the data need to be accessed as peaks['data']? """ # Plotting section xgrid, ygrid, dim = np.shape(weight_cube) datapoints, data_dim = np.shape(input_data) assert dim == data_dim # Need to assign a location for each tuple # Need to also take into account dead neurons location_info = SOM_location_recall(weight_cube, input_data) fig, ax = plt.subplots(nrows=ygrid, ncols=xgrid, figsize=(5, 5)) a = 1 # Modify this monstrosity to deal with the current data formate # Remember dead neurons !!! for i in range(ygrid): for j in range(xgrid): loc_data = input_data[np.all(location_info.T == [j,i], axis=1)] if loc_data.size > 0: if is_struct_array == True: ax[i,j].plot(np.mean(loc_data['data'], axis = 0), alpha = a, color = 'black') elif is_struct_array == False: ax[i,j].plot(np.mean(loc_data, axis = 0), alpha = a, color = 'black') else: # Maybe replace this with red X's? ax[i,j].plot(np.zeros(data_dim), alpha = a, color = 'red') #kind = kind + 1 #ax[i,j].set_xlabel('Sample #') if is_struct_array == True: ax[i,j].set_xlim(0, data_dim) ax[i,j].set_ylim(0, 1) else: ax[i,j].set_xlim(0, data_dim) ax[i,j].set_ylim(0, 1) ax[i,j].axis('off') if save_fig == True: fig.savefig(output_img_name, bbox_inches='tight')
[docs] def SOM_location_recall(weight_cube: np.ndarray, normalized_data: np.ndarray,) -> np.ndarray: """ Takes the data, the weight cube and the classification map and assignes each data point a label based on their cluster. Parameters ---------- array_to_fill : np.ndarray structured array to fill with the classification data_in_SOM_fmt : np.ndarray data to classify in the SOM format weight_cube : np.ndarray SOM weight cube reference_map : np.ndarray reference map for the SOM Returns ------- array_to_fill : np.ndarray structured array with the SOM classification added """ # Want to make it so it works with different metrics in the future #array_to_fill = np.empty((len(normalized_data), 2)) [SOM_xdim, SOM_ydim, _] = weight_cube.shape distances = cdist( weight_cube.reshape(-1, weight_cube.shape[-1]), normalized_data, metric="euclidean" ) w_neuron = np.argmin(distances, axis=0) x_idx, y_idx = np.unravel_index(w_neuron, (SOM_xdim, SOM_ydim)) array_to_fill = np.vstack((y_idx, x_idx)) return array_to_fill