import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from typing import Union
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import colors
from ..SOM_recall.recall import SOM_location_recall
[docs]
def plot_SOM_gird_neurons(weight_cube: np.ndarray) -> None:
"""
This function take in a nunr file from NeuroScope and converts it into a useful format to us
Then it uses the data in the nunr file to identify which data samples belong to each PE
Finally it takes this data and plots it such that we can overlay any data we want.
Parameters:
-------------------
weight_cube : np.ndarray
Weight cube after an SOM has been trained
Returns:
----------------
None
"""
xgrid, ygrid, data_dim = np.shape(weight_cube)
# Plotting section
fig, ax = plt.subplots(nrows=ygrid, ncols=xgrid, figsize=(5, 5))
a = 1
for i in np.arange(ygrid):
for j in np.arange(xgrid):
ax[j,i].plot(weight_cube[j,i,:])
ax[i,j].axis('off')
ax[i,j].set_xlim(0, data_dim)
ax[i,j].set_ylim(0, 1)
#plt.show()
[docs]
def plot_mU_matrix(weight_cube: np.ndarray,
data: np.ndarray,
set_costum_min_max: bool = False,
fence_vmin: float = None,
fence_vmax: float = None,
density_vmin: float = None,
density_vmax: float = None,
log_density: bool = False,
fence_on: bool = True):
"""
Plots the mU-matrix; defined here as the data density per cell
and the lines between cells representing the distance between
adjacent cells.
This implementation mimicks the major aspects of the mU-matrix in
NueroScope (refer to [cite figure such as such of mU-matrix paper]).
Complete information on the NeuroScope implementation of the mU-matrix
is given in respective NeuroScope documentation, available upon request from
[Erzsébet Merényi](erzsebet@rice.edu| [
Prof. Merényi was not consulted on the implementation of sciSOM functions that
intend to mimic NeuroScope functionalities of the same name, nor did she have
opportunity to inspect proof of faithfulness to the same-name module in NeuroScope
or correctness of the corresponding sciSOM code. Therefore, Dr. Merényi and the
NeuroScope group take no responsibility for the likeness and the correctness of
the functions implemented to mimic (partial) NeuroScope capabilities in sciSOM.
Parameters:
-------------------
weight_cube : np.ndarray
Weight cube after an SOM has been trained
data : np.ndarray
Data used to train the SOM or data to be mapped to the SOM
set_costum_min_max : bool
If True, the user can set the vmin and vmax for fences
fence_vmin : float
Minimum value for the fences
fence_vmax : float
Maximum value for the fences
density_vmin : float
Minimum value for the density matrix (not implemented yet)
density_vmax : float
Maximum value for the density matrix (not implemented yet)
log_density : bool
If True applies a log to the density matrix calculation
fence_on : bool
If False removes fences from mU matrix image
Returns:
----------------
None
"""
height, width, som_dim = np.shape(weight_cube)
w_cube = weight_cube
data_points, data_dim = np.shape(data)
assert som_dim == data_dim
cmap = LinearSegmentedColormap.from_list('black_to_red', ['black', 'red'])
som_shape = (height, width)
# Initialize grid to store counts of data points mapped to each node
count_grid = np.zeros(som_shape)
# Calculate the BMU (Best Matching Unit) for each data point
for point in data:
# Compute distances to each neuron
distances = np.linalg.norm(weight_cube - point, axis=-1)
# Find index of the neuron with the smallest distance
bmu_index = np.unravel_index(np.argmin(distances), som_shape)
count_grid[bmu_index] += 1
# Normalize count_grid for color mapping
if log_density == True:
count_grid = np.log10(count_grid + 1)
if set_costum_min_max is False:
norm_counts = count_grid / np.max(count_grid)
else:
if density_vmax == None:
norm_counts = count_grid / np.max(count_grid)
else:
norm_counts = count_grid / np.max(density_vmax)
# In progress
if fence_on:
down_shifted_weight_cube = np.vstack((w_cube[-1:, :, :],
w_cube[:-1, :, :]))
right_shifted_weight_cube = np.hstack((w_cube[:, -1:, :],
w_cube[:, :-1, :], ))
vertical_lines = np.sqrt(np.sum((w_cube - down_shifted_weight_cube) ** 2,
axis=-1))
horizontal_lines = np.sqrt(np.sum((w_cube - right_shifted_weight_cube) ** 2,
axis=-1))
# Need to drop first row/column since its comparing opposite edges
if set_costum_min_max == False:
vmin = min(np.min(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:]))
vmax = max(np.max(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:]))
elif set_costum_min_max == True:
if ((fence_vmin or fence_vmax) == None):
vmin = min(np.min(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:]))
vmax = max(np.max(vertical_lines[1:,:]), np.min(horizontal_lines[:,1:]))
else:
vmin = fence_vmin
vmax = fence_vmax
fig, ax = plt.subplots()
for i in range(height):
for j in range(width):
ax.add_patch(plt.Rectangle((j, height - i - 1), 1, 1,
color=cmap(norm_counts[i, j]),
ec='black'))
if fence_on:
for i in range(height):
for j in range(width):
if i < height - 1: # Vertical line (between current and below)
#u_diff = np.linalg.norm(weightcube[i, j] - weightcube[i + 1, j])
color = plt.cm.gray(vertical_lines[i+1,j] / vmax)
ax.plot([j, j + 1], [height - i - 1, height - i - 1], color=color)
if j < width - 1:
color = plt.cm.gray(horizontal_lines[i,j+1] / vmax)
ax.plot([j + 1, j + 1], [height - i - 1, height - i], color=color)
ax.set_xlim(0, width)
ax.set_ylim(0, height)
ax.set_aspect('equal')
ax.axis('off') # Turn off the axis
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
plt.show()
[docs]
def calculate_distance_btw_adjacent_prototypes(weight_cube: np.ndarray):
"""
Calculate the distance (fences) for each adjacent neuron in an SOM.
(Need to review this function, dont fully remember what is going on in the
implementation)
Parameters
----------
weight_cube : np.ndarray
The weight cube for the SOM
Returns
-------
distance_btw_proto : np.ndarray
The distance matrix for neurons in the SOM
"""
x, y, _ = weight_cube.shape
distance_btw_proto = np.zeros((x, y))
for i in range(x):
for j in range(y):
neighbors = []
if i > 0:
neighbors.append(weight_cube[i-1, j])
if i < x-1:
neighbors.append(weight_cube[i+1, j])
if j > 0:
neighbors.append(weight_cube[i, j-1])
if j < y-1:
neighbors.append(weight_cube[i, j+1])
distances = [np.linalg.norm(weight_cube[i, j] - neighbor) for neighbor in neighbors]
distance_btw_proto[i, j] = np.mean(distances)
return distance_btw_proto
[docs]
def calculate_density_matrix(weight_cube: np.ndarray,
dataset: np.ndarray) -> np.ndarray:
"""
Calculate density matrix for a given som weight cube and dataset.
**This function is not working as intended, need to review it**
It is not acutally using the information of the u_matrix
Parameters
----------
weight_cube : np.ndarray
SOM weight cube
dataset:
Data in the same form given to the SOM as input for training
Returns
-------
density_matrix : np.ndarray
The density matrix for the given dataset
"""
x, y, _ = weight_cube.shape
density_matrix = np.zeros((x, y))
for data_point in dataset:
distances = cdist(weight_cube.reshape(-1, weight_cube.shape[-1]), [data_point], metric='euclidean')
#print(np.shape(weight_cube.reshape(-1, weight_cube.shape[-1])))
#print(np.shape(data_point))
bmus = np.argmin(distances)
x_idx, y_idx = np.unravel_index(bmus, (x, y))
density_matrix[x_idx, y_idx] += 1
return density_matrix
[docs]
def display_density_matrix(density_matrix: np.ndarray):
"""
Display the density matrix as an image.
Parameters
----------
density_matrix : np.ndarray
The density matrix to display
"""
import matplotlib.pyplot as plt
plt.imshow(density_matrix, cmap='viridis')
plt.colorbar()
plt.title('Density Matrix')
plt.show()
[docs]
def rise_time_vs_area_SOM_clusters(data: np.ndarray, colors: Union[list, np.ndarray],
n_rows: int, n_cols: int):
"""
Plots the rise time vs area for each cluster in the SOM.
Takes in the data from peaklet level data using the SOM classification
and outputs a grid of plots showing each cluster.
Parameters
----------
data : np.ndarray
strudtured array with XENONnT data of data type peaks or peaklet
colors : list or np.ndarray
list of colors used by the SOM
n_rows:
number of coulmns in grid with the plots
n_cols:
number of rows in grid with the plots
"""
fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(24, 18))
# Generalize this later
colors = np.vstack((colors, np.array([0,0,0]).reshape((1, 3))))
num = 0
for i in np.arange(n_rows):
for j in np.arange(n_cols):
ax[i,j].scatter(data['area'][data['type'] == num],
-data['area_decile_from_midpoint'][data['type'] == num][:,1],
s=0.5, color = colors[num]/255, alpha = 1)
ax[i,j].set_xscale('log')
ax[i,j].set_yscale('log')
ax[i,j].set_xlim(1,10000000)
ax[i,j].set_ylim(10,100000)
num = num + 1
[docs]
def SOM_gird_avg_wavefrom_per_cell(input_data: np.ndarray,
weight_cube: np.ndarray,
output_img_name: str = 'avg_waveform.png',
save_fig: bool = False,
is_struct_array: bool = True):
"""
Generates image of the average waveform for each cell in the SOM grid.
This function take in a nunr file from NeuroScope and converts it into a useful format to us
Then it uses the data in the nunr file to identify which data samples belong to each PE
Finally it takes this data and plots it such that we can overlay any data we want.
Parameters
----------
input_data : int
waveforms (peaks, peaklets)
nunr_file_input : str
text file output from neuroscope
grid_x_dim : int
SOM x-dimension
grid_y_dim: int
SOM y-dimension
x_dim_data_cube : int
x-dimension of the input data cube for the SOM
output_img_name : str
name of file to save the image to + path
is_struct_array : bool
does the data need to be accessed as peaks['data']?
"""
# Plotting section
xgrid, ygrid, dim = np.shape(weight_cube)
datapoints, data_dim = np.shape(input_data)
assert dim == data_dim
# Need to assign a location for each tuple
# Need to also take into account dead neurons
location_info = SOM_location_recall(weight_cube, input_data)
fig, ax = plt.subplots(nrows=ygrid, ncols=xgrid, figsize=(5, 5))
a = 1
# Modify this monstrosity to deal with the current data formate
# Remember dead neurons !!!
for i in range(ygrid):
for j in range(xgrid):
loc_data = input_data[np.all(location_info.T == [j,i], axis=1)]
if loc_data.size > 0:
if is_struct_array == True:
ax[i,j].plot(np.mean(loc_data['data'], axis = 0), alpha = a, color = 'black')
elif is_struct_array == False:
ax[i,j].plot(np.mean(loc_data, axis = 0), alpha = a, color = 'black')
else:
# Maybe replace this with red X's?
ax[i,j].plot(np.zeros(data_dim), alpha = a, color = 'red')
#kind = kind + 1
#ax[i,j].set_xlabel('Sample #')
if is_struct_array == True:
ax[i,j].set_xlim(0, data_dim)
ax[i,j].set_ylim(0, 1)
else:
ax[i,j].set_xlim(0, data_dim)
ax[i,j].set_ylim(0, 1)
ax[i,j].axis('off')
if save_fig == True:
fig.savefig(output_img_name, bbox_inches='tight')
[docs]
def SOM_location_recall(weight_cube: np.ndarray,
normalized_data: np.ndarray,) -> np.ndarray:
"""
Takes the data, the weight cube and the classification map and assignes each
data point a label based on their cluster.
Parameters
----------
array_to_fill : np.ndarray
structured array to fill with the classification
data_in_SOM_fmt : np.ndarray
data to classify in the SOM format
weight_cube : np.ndarray
SOM weight cube
reference_map : np.ndarray
reference map for the SOM
Returns
-------
array_to_fill : np.ndarray
structured array with the SOM classification added
"""
# Want to make it so it works with different metrics in the future
#array_to_fill = np.empty((len(normalized_data), 2))
[SOM_xdim, SOM_ydim, _] = weight_cube.shape
distances = cdist(
weight_cube.reshape(-1, weight_cube.shape[-1]), normalized_data, metric="euclidean"
)
w_neuron = np.argmin(distances, axis=0)
x_idx, y_idx = np.unravel_index(w_neuron, (SOM_xdim, SOM_ydim))
array_to_fill = np.vstack((y_idx, x_idx))
return array_to_fill