import matplotlib import numpy as np from matplotlib import pyplot as plt from sklearn.mixture import GaussianMixture from astroML.clustering import HierarchicalClustering, get_graph_segments import os import sys import warnings import multiprocess as mp warnings.simplefilter(action='ignore', category=FutureWarning) from tqdm import tqdm def get_clusters(filename,edge_cutoff,savefolder): data = np.genfromtxt(filename,skip_header=1,delimiter=",") X = data[:,1:3] xmin, xmax = (0, 83000) ymin, ymax = (0, 83000) #------------------------------------------------------------ # Compute the MST clustering model print("\tFitting clustering model") n_neighbors = 10 cluster_cutoff = 400 model = HierarchicalClustering(n_neighbors=n_neighbors, edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) model.fit(X) n_components = model.n_components_ labels = model.labels_ #------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, model.cluster_graph_) #------------------------------------------------------------ # Fit a GMM to each individual cluster Nx = 500 Ny = 500 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) print("\tsaving clusters") for i in range(n_components): ind = (labels == i) Npts = ind.sum() Nclusters = min(12, Npts / 5) gmm = GaussianMixture(n_components=int(Nclusters)).fit(X[ind]) np.savetxt(os.path.join(savefolder,f"Group {i}.csv"),data[ind,:],delimiter=",") dens = np.exp(gmm.score_samples(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) #---------------------------------------------------------------------- # Plot the results fig,axs = plt.subplots(1,3,sharey=True,figsize=(12, 4)) print("\tplotting points") norm = matplotlib.colors.Normalize(vmin=np.min(labels),vmax=np.max(labels)) cm = matplotlib.cm cm.norm=norm cm = cm.get_cmap('tab20') axs[0].scatter(X[:, 1], X[:, 0], s=1, lw=0, c=labels,cmap=cm) axs[0].set_xlim(ymin, ymax) axs[0].set_ylim(xmin, xmax) axs[0].xaxis.set_major_formatter(plt.NullFormatter()) axs[0].set_xlabel('(nm)') axs[0].set_ylabel('(nm)') print("\tplotting Tree1") axs[1].plot(T_y, T_x, c='k', lw=0.5) axs[1].set_xlim(ymin, ymax) axs[1].set_ylim(xmin, xmax) axs[1].xaxis.set_major_formatter(plt.NullFormatter()) axs[1].set_xlabel('(nm)') print("\tplotting Tree2") axs[2].plot(T_trunc_y, T_trunc_x, c='k', lw=0.5) axs[2].imshow(density.T, origin='lower', cmap=plt.cm.hot_r, extent=[ymin, ymax, xmin, xmax]) axs[2].set_xlim(ymin, ymax) axs[2].set_ylim(xmin, xmax) axs[2].set_xlabel('(nm)') plt.savefig(os.path.join(savefolder,'plot overview'),dpi=500) def Group_analysis(main_datafolder): subfolders_paths = [os.path.join(main_datafolder, i) for i in os.listdir(main_datafolder) if i[0] != "." and i[:5] == "Group"] def run_on_cluster(gn): filename = gn.split("/")[-1].split(".")[0] print(f"\tMeasuring aggregate time-seires for {filename}") data = np.genfromtxt(gn, skip_header=1, delimiter=",") frames = [] xmin, xmax = (np.min(data[:, 1]), np.max(data[:, 1])) ymin, ymax = (np.min(data[:, 2]), np.max(data[:, 2])) for t in tqdm(range(0, int(np.max(data[:, 0])))): # if t % 50 == 0: #print(t) X = data[:, 1:3][data[:, 0] < t] if len(X) > 2: # ------------------------------------------------------------ # Compute the MST clustering model edge_cutoff = 0.99 cluster_cutoff = 10 distcut = 400 model = HierarchicalClustering(n_neighbors=np.min([10, len(X) - 1]), edge_cutoff=edge_cutoff, min_cluster_size=cluster_cutoff) try: model.fit(X) n_components, labels, clustergraph = model.compute_clusters(min_cluster_size=cluster_cutoff, distcutoff=distcut) # ------------------------------------------------------------ # Get the x, y coordinates of the beginning and end of each line segment T_x, T_y = get_graph_segments(model.X_train_, model.full_tree_) T_trunc_x, T_trunc_y = get_graph_segments(model.X_train_, clustergraph) if np.all(np.bincount(labels[labels > -1]) == 0): frames.append((None, None, None, None, 0, t)) else: mostocc = np.argmax(np.bincount(labels[labels > -1])) # Fit a GMM to biggest cluster Nx = 500 Ny = 500 Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T density = np.zeros(Xgrid.shape[0]) ind = (labels == mostocc) Npts = ind.sum() Nclusters = min(25, Npts / 5) gmm = GaussianMixture(n_components=int(Nclusters)).fit(X[ind]) dens = np.exp(gmm.score_samples(Xgrid)) density += dens / dens.max() density = density.reshape((Ny, Nx)) gridarea = ((xmax - xmin) / Nx) * ((ymax - ymin) / Ny) size = np.sum(density > np.mean(density)) * gridarea frames.append((X[labels == mostocc], T_trunc_x, T_trunc_y, density, size, t)) except: frames.append((None, None, None, None, -1, t)) import matplotlib.animation as animation fig = plt.figure(figsize=(16, 8)) ax1 = fig.add_subplot(231, aspect='equal') ax2 = fig.add_subplot(232, aspect='equal') ax3 = fig.add_subplot(212, aspect="auto") ax4 = fig.add_subplot(233, aspect="equal") ts = np.array([frames[i][5] for i in range(len(frames))]) sizes = np.array([frames[i][4] for i in range(len(frames))]) def animator(i): if i % 50 == 0: print(f"\tGroup {filename} reached frame {i} out of {len(frames)}") ax1.cla() ax2.cla() ax3.cla() ax4.cla() if frames[i][0] is None: ax3.set_xlim((0, frames[-1][5])) ax3.set_ylim(0, frames[-1][4]) alin = ax3.plot([ts[:i]], [sizes[:i]], "ko") ax3.set(xlabel="Frame", ylabel=f"Size in $nm^2$") else: ax1.scatter(frames[i][0][:, 0], frames[i][0][:, 1], c='k', s=0.5) ax2.plot(frames[i][1], frames[i][2], c='k', lw=0.5) ax1.set_xlim((xmin, xmax)) ax1.set_ylim((ymin, ymax)) ax1.imshow(frames[i][3], origin='lower', cmap=plt.cm.hot_r, extent=[xmin, xmax, ymin, ymax]) ax2.set_xlim((xmin, xmax)) ax2.set_ylim((ymin, ymax)) ax4.imshow(frames[i][3] > np.mean(frames[i][3]), origin='lower', extent=[xmin, xmax, ymin, ymax]) ax4.set_xlim((xmin, xmax)) ax4.set_ylim((ymin, ymax)) # fit nonzero growth def Logistic(x, vmax, k, kD, v0): return vmax / (1 + np.exp(-k * (x - kD))) + v0 def line(x, x0, Rate, Offset): return (x - x0) * Rate + Offset def twoline(x, x01, r1, Offset, r2, switch): x02 = switch - (switch - x01) * r1 / r2 if x < switch: return (x - x01) * r1 + Offset else: return (x - x02) * r2 + Offset mask = sizes > 0 x, y = ts[mask][:i], sizes[mask][:i] if i == len(frames) - 1: np.savetxt(os.path.join(main_datafolder, f"{filename} Growth curve"), np.array([ts, sizes]).T) ax3.set_xlim((0, frames[-1][5])) ax3.set_ylim(0, frames[-1][4]) alin = ax3.plot([ts[:i]], [sizes[:i]], "ko") ax3.set(xlabel="Frame", ylabel="Size in pixels") else: ax3.set_xlim((0, frames[-1][5])) ax3.set_ylim(0, frames[-1][4]) alin = ax3.plot([ts[:i]], [sizes[:i]], "ko") ax3.set(xlabel="Frame", ylabel="Size in pixels") print(f"\tMaking movie for {filename}") ani = animation.FuncAnimation(fig, animator, interval=400, blit=False, frames=len(frames)) Writer = animation.writers['ffmpeg'] writer = Writer(fps=10, metadata=dict(artist='Me'), bitrate=1800) print(os.path.join(main_datafolder, f'Clustered {filename}.mp4')) ani.save(os.path.join(main_datafolder, f'Clustered {filename}.mp4'), writer=writer) return None with mp.Pool(6) as f: output = f.map(run_on_cluster,subfolders_paths) #initialize variable clusterdone = True moviefile = sys.argv[1] cutoff = np.float(sys.argv[2]) directory_name = moviefile[:-4] if not os.path.isdir(directory_name): os.mkdir(directory_name) #run clustering if not os.path.isfile(os.path.join(directory_name,'plot overview.png')): while clusterdone: print(f"Making clusters with cutoff {cutoff}") get_clusters(moviefile,cutoff,directory_name) textin = input("y if cluster ok, newedgecutoff if not ") if not textin == "y": clusterdone = True cutoff = np.float(textin) else: clusterdone = False #run group analysis print("Running single-aggregate analysis") Group_analysis(directory_name)