Overlapping binding positions for Data_Vero_Kymographs

p853_250706_p502_averaged_output_events
p968_250702_p502_averaged_output_events
p853_250706_p511_averaged_output_events

(plot-numpy1) jhuang@WS-2290C:~/DATA/Data_Vero_Kymographs/Binding_positions$ python overlap_v16_calculate_average_matix.py

  1. Averaging: Compute the averaged values for each (position, time_bin) point.

(plot-numpy1) jhuang@WS-2290C:~/DATA/Data_Vero_Kymographs/Binding_positions$ python overlap_v17_draw_plot.py

  1. Threshold-based detection: Apply a signal threshold (e.g., 0.16) to identify bound states at each position and time bin.

  2. Merging neighboring events: Combine adjacent bound regions in both position and time.

  3. Event filtering: Only merged events that satisfy the following conditions are reported as binding events and annotated in the plot:

    • min_duration = 1 # minimum number of time bins
    • min_range = 0.08 μm # minimum position spread for an event
  4. Event statistics: Calculate time bin ranges, durations, peak sizes, and average positions for each event.

Source code of overlap_v16_calculate_average_matix.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from scipy.ndimage import label

# List of input CSV files
file_list = glob.glob("tracks_p853_250706_p511/*.csv")
print(f"Found {len(file_list)} CSV files: {file_list}")

# Ensure output directory exists
output_dir = os.path.dirname(file_list[0]) if file_list else "."
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Collect all data
all_data = []
all_positions = []
for file in file_list:
    try:
        df = pd.read_csv(file, sep=';').rename(columns=lambda x: x.replace('# ', ''))
        positions = df['position (μm)'].values
        time_bins = df.columns[1:].values
        time_bin_data = df[df.columns[1:]].values
        all_positions.append(positions)
        all_data.append(time_bin_data)
    except Exception as e:
        print(f"Error processing {file} for data collection: {e}")
        continue

# Determine common position range and interpolate
min_pos = np.min([pos.min() for pos in all_positions if len(pos) > 0])
max_pos = np.max([pos.max() for pos in all_positions if len(pos) > 0])
max_len = max(len(pos) for pos in all_positions if len(pos) > 0)
common_positions = np.linspace(min_pos, max_pos, max_len)

# Interpolate each file's data
interpolated_data = []
for positions, time_bin_data in zip(all_positions, all_data):
    if len(positions) != time_bin_data.shape[0]:
        print("Warning: Mismatch in dimensions, skipping interpolation.")
        continue
    interpolated = np.zeros((time_bin_data.shape[1], len(common_positions)))
    for i in range(time_bin_data.shape[1]):
        interpolated[i] = np.interp(common_positions, positions, time_bin_data[:, i])
    interpolated_data.append(interpolated)

# Compute averaged data
if interpolated_data:
    averaged_data = np.mean(interpolated_data, axis=0)
else:
    print("No data available for averaging.")
    exit()

# Save averaged CSV
output_df = pd.DataFrame(averaged_data.T, columns=[f"time bin {i}" for i in range(averaged_data.shape[0])])
output_df.insert(0, 'position (μm)', common_positions)
output_csv_path = os.path.join(output_dir, 'averaged_output.csv')
output_df.to_csv(output_csv_path, sep=';', index=False)
print(f"Saved averaged output to {output_csv_path}")

# Plot combined kymograph
plt.figure(figsize=(10, 6))
max_ptp = np.max([np.ptp(line) for line in averaged_data]) if averaged_data.size > 0 else 1
padding = 0.1 * np.max(averaged_data) if np.max(averaged_data) > 0 else 1
offset_step = max_ptp + padding
num_bins = averaged_data.shape[0]

for i in range(num_bins):
    line = averaged_data[i]
    offset = i * offset_step
    plt.plot(common_positions, line + offset, 'b-', linewidth=0.5)

y_ticks = [i * offset_step for i in range(num_bins)]
y_labels = [f"time bin {i}" for i in range(num_bins)]
plt.yticks(y_ticks, y_labels)
plt.xlabel('Position (μm)')
plt.ylabel('Binding Density')
plt.title('Combined Kymograph Across All Files')
combined_output_path = os.path.join(output_dir, 'combined_binding_density.png')
plt.savefig(combined_output_path, facecolor='white', edgecolor='none')
plt.close()
print(f"Saved combined kymograph plot to {combined_output_path}")

# ======================================================
# Binding Event Detection + Plotting
# ======================================================

#signal_threshold = 0.2   # min photon density
#min_duration = 1         # min time bins
#min_range = 0.1         # μm, min position spread for an event
signal_threshold = 0.16   # min photon density
min_duration = 1         # min time bins
min_range = 0.08         # μm, min position spread for an event

for file in file_list:
    try:
        df = pd.read_csv(file, sep=';').rename(columns=lambda x: x.replace('# ', ''))
        positions = df['position (μm)'].values
        time_bins = df.columns[1:]
        photons_matrix = df[time_bins].values

        # Step 1: Binary mask
        mask = photons_matrix > signal_threshold

        # Step 2: Connected components
        labeled, num_features = label(mask, structure=np.ones((3, 3)))

        events = []
        for i in range(1, num_features + 1):
            coords = np.argwhere(labeled == i)
            pos_idx = coords[:, 0]
            time_idx = coords[:, 1]

            start_bin = time_idx.min()
            end_bin = time_idx.max()
            duration = end_bin - start_bin + 1

            pos_range = positions[pos_idx].max() - positions[pos_idx].min()

            # Apply filters
            if duration < min_duration or pos_range < min_range:
                continue

            avg_pos = positions[pos_idx].mean()
            peak_size = photons_matrix[pos_idx, time_idx].max()

            events.append({
                "start_bin": int(start_bin),
                "end_bin": int(end_bin),
                "duration_bins": int(duration),
                "pos_range": float(pos_range),
                "avg_position": float(avg_pos),
                "peak_size": float(peak_size)
            })

        # Print results
        print(f"\nBinding events for {os.path.basename(file)}:")
        if not events:
            print("No binding events detected.")
        else:
            for ev in events:
                print(
                    f" - Time bins {ev['start_bin']}–{ev['end_bin']} "
                    f"(duration {ev['duration_bins']}), "
                    f"Pos range: {ev['pos_range']:.2f} μm, "
                    f"Peak: {ev['peak_size']:.2f}, "
                    f"Avg pos: {ev['avg_position']:.2f} μm"
                )

        # Plot heatmap with detected events
        plt.figure(figsize=(10, 6))
        plt.imshow(
            photons_matrix.T,
            aspect='auto',
            origin='lower',
            extent=[positions.min(), positions.max(), 0, len(time_bins)],
            cmap='viridis'
        )
        plt.colorbar(label="Photon counts")
        plt.xlabel("Position (μm)")
        plt.ylabel("Time bin")
        plt.title(f"Kymograph with Binding Events: {os.path.basename(file)}")

        # Overlay events
        for ev in events:
            mid_bin = (ev["start_bin"] + ev["end_bin"]) / 2
            plt.scatter(ev["avg_position"], mid_bin, color="red", marker="o", s=40)
            plt.text(
                ev["avg_position"], mid_bin + 0.5,
                f"{ev['duration_bins']} bins, {ev['pos_range']:.2f} μm",
                color="white", fontsize=7, ha="center"
            )

        out_path = os.path.join(output_dir, os.path.basename(file).replace(".csv", "_events.png"))
        plt.savefig(out_path, dpi=150, bbox_inches="tight")
        plt.close()
        print(f"Saved event plot to {out_path}")

    except Exception as e:
        print(f"Error processing {file}: {e}")
        continue

Source code of overlap_v17_draw_plot.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from scipy.ndimage import label

# List of input CSV files
file_list = glob.glob("tracks_p853_250706_p511/*.csv")
print(f"Found {len(file_list)} CSV files: {file_list}")

# Ensure output directory exists
output_dir = os.path.dirname(file_list[0]) if file_list else "."
debug_dir = os.path.join(output_dir, "debug_outputs")
os.makedirs(debug_dir, exist_ok=True)

# Parameters
signal_threshold = 0.16   # min photon density
min_duration = 1         # min time bins
min_range = 0.08         # μm, min position spread for an event

# Process each file
for file in file_list:
    try:
        # Read data
        df = pd.read_csv(file, sep=';').rename(columns=lambda x: x.replace('# ', ''))
        positions = df['position (μm)'].values
        time_bins = df.columns[1:]
        photons_matrix = df[time_bins].values

        # =========================
        # Step 1: Threshold mask
        # =========================
        mask = (photons_matrix > signal_threshold).astype(int)

        mask_df = pd.DataFrame(mask, columns=[f"time {i}" for i in range(mask.shape[1])])
        mask_df.insert(0, "position (μm)", positions)
        mask_path = os.path.join(debug_dir, os.path.basename(file).replace(".csv", "_mask.csv"))
        mask_df.to_csv(mask_path, sep=";", index=False)
        print(f"Saved mask matrix to {mask_path}")

        # =========================
        # Step 2: Connected components
        # =========================
        labeled, num_features = label(mask, structure=np.ones((3, 3)))

        labeled_df = pd.DataFrame(labeled, columns=[f"time {i}" for i in range(labeled.shape[1])])
        labeled_df.insert(0, "position (μm)", positions)
        labeled_path = os.path.join(debug_dir, os.path.basename(file).replace(".csv", "_labeled.csv"))
        labeled_df.to_csv(labeled_path, sep=";", index=False)
        print(f"Saved labeled matrix to {labeled_path}")

        # =========================
        # Step 3: Extract events
        # =========================
        events = []
        for i in range(1, num_features + 1):
            coords = np.argwhere(labeled == i)
            pos_idx = coords[:, 0]
            time_idx = coords[:, 1]

            start_bin = time_idx.min()
            end_bin = time_idx.max()
            duration = end_bin - start_bin + 1
            pos_range = positions[pos_idx].max() - positions[pos_idx].min()

            # Apply filters
            if duration < min_duration or pos_range < min_range:
                continue

            avg_pos = positions[pos_idx].mean()
            peak_size = photons_matrix[pos_idx, time_idx].max()

            events.append({
                "event_id": i,
                "start_bin": int(start_bin),
                "end_bin": int(end_bin),
                "duration_bins": int(duration),
                "pos_range": float(pos_range),
                "avg_position": float(avg_pos),
                "peak_size": float(peak_size)
            })

        # Save event table
        events_df = pd.DataFrame(events)
        event_table_path = os.path.join(debug_dir, os.path.basename(file).replace(".csv", "_events.csv"))
        events_df.to_csv(event_table_path, sep=";", index=False)
        print(f"Saved event table to {event_table_path}")

        # =========================
        # Step 4: Plot with debug overlays
        # =========================
        plt.figure(figsize=(10, 6))
        plt.imshow(
            photons_matrix.T,
            aspect='auto',
            origin='lower',
            extent=[positions.min(), positions.max(), 0, len(time_bins)],
            cmap='viridis'
        )
        plt.colorbar(label="Binding density (tracks)")    #Photon counts
        plt.xlabel("Position (μm)")
        plt.ylabel("Time bin")
        plt.title(f"Kymograph with Binding Events: {os.path.basename(file)}")

        for ev in events:
            mid_bin = (ev["start_bin"] + ev["end_bin"]) / 2
            plt.scatter(ev["avg_position"], mid_bin, color="red", marker="o", s=40)
            plt.text(
                ev["avg_position"], mid_bin + 0.5,
                f"ID {ev['event_id']}, {ev['duration_bins']} bins, {ev['pos_range']:.2f} μm",
                color="white", fontsize=7, ha="center"
            )

        out_path = os.path.join(debug_dir, os.path.basename(file).replace(".csv", "_events.png"))
        plt.savefig(out_path, dpi=150, bbox_inches="tight")
        plt.close()
        print(f"Saved debug plot to {out_path}")

    except Exception as e:
        print(f"Error processing {file}: {e}")
        continue

Leave a Reply

Your email address will not be published. Required fields are marked *