10_ctx_cc_connection_and_correlation

[1]:

import pandas as pd
import scanpy as sc
from scipy.spatial import cKDTree
from tqdm import tqdm
import matplotlib.pyplot as plt
import anndata as ad
import numpy as np
import os
import warnings
from scipy.stats import pearsonr
import seaborn as sns
import re
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache
import umap
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

warnings.filterwarnings('ignore')

/mnt/Data16Tc/home/haichao/anaconda3/envs/SpaCon_clone/lib/python3.8/site-packages/umap/distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/mnt/Data16Tc/home/haichao/anaconda3/envs/SpaCon_clone/lib/python3.8/site-packages/umap/distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/mnt/Data16Tc/home/haichao/anaconda3/envs/SpaCon_clone/lib/python3.8/site-packages/umap/distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/mnt/Data16Tc/home/haichao/anaconda3/envs/SpaCon_clone/lib/python3.8/site-packages/umap/umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()

[2]:

ctx_43_regions = ['FRP', 'MOp', 'MOs', 'SSp-n', 'SSp-bfd', 'SSp-ll', 'SSp-m', 'SSp-ul', 'SSp-tr', 'SSp-un', 'SSs', 'GU', 'VISC', 'AUDd', 'AUDp', 'AUDpo', 'AUDv', 'VISal', 'VISam', 'VISl', 'VISp', 'VISpl', 'VISpm', 'VISli', 'VISpor', 'ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBm', 'ORBvl', 'AId', 'AIp', 'AIv', 'RSPagl', 'RSPd', 'RSPv', 'VISa', 'VISrl', 'TEa', 'PERI', 'ECT']

conn data

[3]:

mcc = MouseConnectivityCache(resolution=100)
structure_tree = mcc.get_structure_tree()
annotation = mcc.get_annotation_volume()[0]
cc_coor = []
for x in tqdm(range(132)):
    for y in range(80):
        for z in range(59):
            point = tuple((x,y,z))
            point_id = annotation[point]
            # print(point_id)
            if point_id == 0:
                continue
            parent = structure_tree.ancestor_ids([point_id])[0][0]
            parent_name = structure_tree.get_structures_by_id([parent])[0]['acronym']
            if parent_name.startswith('cc'):
                cc_coor.append([x,y,z])
len(cc_coor)

 11%|█         | 14/132 [00:00<00:01, 59.79it/s]100%|██████████| 132/132 [00:04<00:00, 29.30it/s]

[3]:

[4]:

# cc_coor = pd.read_csv('./cc_xyz_coor_100um.csv')
file_path = '/mnt/Data16Ta/feiyao/tracing_for_haichao/results/'
files = os.listdir(file_path)
files = [i for i in files if 'npy' in i]
ctx2cc_conn_list = []
idx = []
for f in files:
    ctx_region = f.split('-')[1]
    if f.split('-')[2] != 'mean_mean':
        ctx_region = f"{f.split('-')[1]}-{f.split('-')[2]}"
    tmp = np.load(file_path + f)
    conn_tmp = [tmp[x, y, z] for x, y, z in cc_coor]
    ctx2cc_conn_list.append(conn_tmp)
    idx.append(ctx_region)
ctx2cc_conn = pd.DataFrame(data=ctx2cc_conn_list, index=idx, dtype=float)
ctx2cc_conn = ctx2cc_conn.loc[:, (ctx2cc_conn != 0).any(axis=0)]
ctx2cc_conn_raw = ctx2cc_conn.copy()
ctx2cc_conn.shape

[4]:

(43, 98)

[5]:

ctx2cc_conn = ctx2cc_conn.transpose()

[42]:

coor = [cc_coor[i] for i in ctx2cc_conn_raw.columns]
coor = np.array(coor)
plt.figure(figsize=(8,4))
plt.scatter(coor[:, 0], coor[:, 1], c=np.arange(coor.shape[0]), cmap='coolwarm')
plt.gca().invert_yaxis()
plt.colorbar()
plt.savefig('./cc/cc_id_map.pdf', format='pdf')

_images/10_ctx_cc_connection_and_correlation_7_0.png

[5]:

ctx2cc_conn.columns = range(ctx2cc_conn.shape[1])

[13]:

sns.clustermap(ctx2cc_conn, figsize=(10,10))
# sns.heatmap(ctx2cc_conn)
plt.savefig('./cc/cc_raw_heatmap.pdf', format='pdf')

_images/10_ctx_cc_connection_and_correlation_9_0.png

[6]:

columns_to_drop = [col for col in ctx2cc_conn.columns if (ctx2cc_conn[col] < 0.001).all()]
columns_to_drop

[6]:

['ECT',
 'SSp-un',
 'SSp-tr',
 'SSp-n',
 'AIv',
 'VISrl',
 'AIp',
 'PERI',
 'SSp-ul',
 'GU',
 'VISp',
 'MOs',
 'RSPagl',
 'MOp',
 'VISC',
 'SSs',
 'SSp-ll',
 'AId']

[7]:

rows_to_drop = ctx2cc_conn[(ctx2cc_conn < 0.001).all(axis=1)].index
len(rows_to_drop)

[7]:

[8]:

ctx2cc_conn.drop(columns=columns_to_drop, inplace=True)
ctx2cc_conn.drop(index=rows_to_drop, inplace=True)
# ctx2cc_conn

[19]:

import scipy.cluster.hierarchy as sch
import numpy as np

# Customized rows and columns level clustering
# Here, use the Linkage function of Scipy to generate hierarchical clustering
row_linkage = sch.linkage(ctx2cc_conn, method='single')
col_linkage = sch.linkage(ctx2cc_conn.T, method='single')

# Draw clustermap with custom cluster
g = sns.clustermap(ctx2cc_conn, row_linkage=row_linkage, col_linkage=col_linkage)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
plt.savefig('./cc/cc_sel_heatmap.pdf', format='pdf')

_images/10_ctx_cc_connection_and_correlation_13_0.png

cluster cc

[10]:

from scipy.cluster import hierarchy
col_linkage = g.dendrogram_col.linkage
col_clusters = hierarchy.fcluster(col_linkage, t=0.5, criterion='distance')
reordered_cols = g.dendrogram_col.reordered_ind
col_labels = np.array(ctx2cc_conn.columns)[reordered_cols]
col_labels

[10]:

array(['RSPv', 'RSPd', 'VISpm', 'VISa', 'VISam', 'SSp-bfd', 'VISpor',
       'VISli', 'TEa', 'VISpl', 'VISl', 'AUDv', 'VISal', 'AUDpo', 'AUDp',
       'AUDd', 'FRP', 'ORBl', 'ORBvl', 'ORBm', 'PL', 'ILA', 'SSp-m',
       'ACAd', 'ACAv'], dtype=object)

[11]:

# Use UMAP for 2D dimension reduction
reducer = umap.UMAP(n_components=2, random_state=0)
embedding = reducer.fit_transform(ctx2cc_conn)

# Classification of UMAP results
n_clusters=3

from sklearn.mixture import GaussianMixture
# Use GMM for clustering
gmm = GaussianMixture(n_components=n_clusters, random_state=0)
gmm.fit(embedding)
clusters = gmm.predict(embedding)

# Convert Embedding to DataFrame for easy operation
embedding_df = pd.DataFrame(embedding, index=ctx2cc_conn.index, columns=['UMAP1', 'UMAP2'])
embedding_df['Cluster'] = clusters

# Get discrete color board
# color_list = plt.cm.tab10(np.linspace(0, 1, n_clusters))
color_list = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd','#8c564b', '#17becf', '#e377c2']

fig, ax = plt.subplots(figsize=(6, 5))
from scipy.spatial import ConvexHull
from shapely.geometry import Polygon
# Draw a 2D UMAP diagram with clustering results
for cluster in range(n_clusters):
    subset = embedding_df[embedding_df['Cluster'] == cluster]
    points = subset[['UMAP1', 'UMAP2']].values

    # Get color
    color = color_list[cluster]

    # Draw a scattered point map
    ax.scatter(subset['UMAP1'], subset['UMAP2'], color=color, label=f'cc{cluster+1}', s=50, alpha=1,
                )

    # Calculate and draw the expanded and smooth convex bag
    if len(points) > 2:  # Ne convex bags require at least 3 points
        hull = ConvexHull(points)
        hull_points = points[hull.vertices]

        # Create polygon and expand
        poly = Polygon(hull_points).buffer(0.2)
        # Make the polygon smoother
        smooth_poly = poly.buffer(0.2, resolution=16, join_style=1).buffer(-0.2, resolution=16, join_style=1)
        # Get the coordinates of polygonal shape
        x, y = smooth_poly.exterior.xy
        # Draw a smooth polygon
        ax.fill(x, y, alpha=0.3, fc=color, ec=color)

plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xticks([])
plt.yticks([])
plt.xlabel('UMAP1')
plt.ylabel('UMAP2')
plt.tight_layout()
# plt.savefig('./cc/cc_conn_umap.pdf', format='pdf')

_images/10_ctx_cc_connection_and_correlation_16_0.png

[12]:

embedding_df['Cluster'] = embedding_df['Cluster'].astype(str)

[13]:

cc_df_sel = pd.DataFrame(columns=['x', 'y', 'c'])
coor = [cc_coor[i] for i in embedding_df.index]
coor = np.array(coor)
cc_df_sel[['x', 'y']] = coor[:, :2]
cc_df_sel['c'] = embedding_df['Cluster'].values
cc_df_sel

[13]:

	x	y	c
0	43	35	2
1	43	36	2
2	43	37	2
3	43	38	2
4	44	36	2
5	44	37	2
6	50	30	0
7	51	29	0
8	52	29	0
9	53	26	0
10	53	28	0
11	54	26	0
12	55	25	0
13	68	18	1
14	69	18	1
15	70	17	1
16	70	18	1
17	71	17	1
18	71	18	1
19	72	17	1
20	72	18	1
21	73	16	1
22	73	17	1
23	73	18	1
24	74	16	1
25	74	17	1
26	74	18	1
27	75	16	1
28	75	17	1
29	75	18	1
30	76	17	1

[14]:

cc_df_all = pd.DataFrame(columns=['x', 'y'])
coor = [cc_coor[i] for i in ctx2cc_conn_raw.columns]
coor = np.array(coor)
cc_df_all[['x', 'y']] = coor[:, :2]
# cc_df_tmp['c'] = embedding_df['Cluster'].values
cc_df_all

[14]:

	x	y
0	43	33
1	43	34
2	43	35
3	43	36
4	43	37
...	...	...
93	74	18
94	75	16
95	75	17
96	75	18
97	76	17

98 rows × 2 columns

[15]:

cc_df_tmp = pd.merge(cc_df_all, cc_df_sel, on=['x', 'y'], how='left')
cc_df_tmp

[15]:

	x	y	c
0	43	33	NaN
1	43	34	NaN
2	43	35	2
3	43	36	2
4	43	37	2
...	...	...	...
93	74	18	1
94	75	16	1
95	75	17	1
96	75	18	1
97	76	17	1

98 rows × 3 columns

[16]:

categories = cc_df_sel['c'].unique()
# Draw the scattered point for each category
plt.scatter(cc_df_all['x'], cc_df_all['y'], color = '#D3D3D3')
cm = ['#2ca02c', '#2377b4', '#ff7f0e']
i=0
for category in categories:
    subset = cc_df_sel[cc_df_sel['c'] == category]
    plt.scatter(subset['x'], subset['y'], label=f'cc{int(category)+1}', color=cm[i])
    i=i+1
plt.legend()
# plt.savefig('./cc/cc.pdf', format='pdf')

[16]:

<matplotlib.legend.Legend at 0x7fe944493fa0>

_images/10_ctx_cc_connection_and_correlation_21_1.png

[17]:

th_cluster_order = []
num = []
for i in range(3):
    tmp = embedding_df[embedding_df['Cluster'] == str(i)]
    th_cluster_order = th_cluster_order + tmp.index.tolist()
    num.append(len(tmp))
    # print(tmp.index.tolist())
th_cluster_order

[17]:

[19]:

ctx2cc_conn = ctx2cc_conn.loc[th_cluster_order, col_labels]
cluster_labels = ['cc1']*num[0] + ['cc2']*num[1] + ['cc3']*num[2]

ctx2cc_conn['cluster'] = cluster_labels
ctx2cc_conn_cluster_mean = ctx2cc_conn.groupby('cluster').mean()
sns.clustermap(ctx2cc_conn_cluster_mean, figsize=(10, 3))
# plt.savefig('./cc/ctx_cc_conn.pdf', format='pdf')

[19]:

<seaborn.matrix.ClusterGrid at 0x7fe944454220>

_images/10_ctx_cc_connection_and_correlation_23_1.png

[35]:

ctx2cc_conn_cluster_mean.to_csv('./data/ctx_cc_conn_filtered.csv')

gene exp data

[20]:

adata_in = sc.read_h5ad('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/adata_processed.h5ad')
allen_region = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/allen_region.csv')
adata_in.obs['region'] = allen_region['region'].values
meta = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/cell_metadata_with_cluster_annotation.csv')
meta = meta.set_index('cell_label')
meta = meta.loc[adata_in.obs.index.to_list()]
adata_in.obs['cell_type'] = meta['class'].to_list()

adata_out = sc.read_h5ad('/mnt/Data18Td/Data/haichao/merfish_raw_data_zxw3/out_cell_adata/adata_out_cell_distance_q0.3/after_qc/Zhuang-ABCA-3.001.h5ad')
# adata_out = sc.read_h5ad('/mnt/Data18Td/Data/haichao/merfish_raw_data_zxw3/out_cell_adata/after_qc/Zhuang-ABCA-3.001.h5ad')
adata_out.obs['region'] = adata_in.obs.loc[adata_out.obs_names]['region'].values

adata_in = adata_in[adata_in.obs['cell_type'].str.contains('Glut')]
adata_out.obs

[20]:

	totalRNA	brain_section_label	x	y	z	n_genes_by_counts	total_counts	region
100008170567769574864159172860058606533	175	Zhuang-ABCA-3.001	19.063883	33.423346	54.106801	100	175	MOB
100019036144713180485707288452329906643	373	Zhuang-ABCA-3.001	44.290637	63.872748	53.902462	189	373	NDB
10002377904544842423531242460024745973	277	Zhuang-ABCA-3.001	63.860525	7.998013	54.043841	126	277	RSPd2/3
100027648052649525810014127621472143070	435	Zhuang-ABCA-3.001	113.480480	41.056674	54.349928	165	435	arb
100029875144524072265954931895494067096	60	Zhuang-ABCA-3.001	11.572676	37.921554	54.370336	39	60	MOB
...	...	...	...	...	...	...	...	...
99961718914838042706216314325649765172	627	Zhuang-ABCA-3.001	113.199100	23.661374	54.096322	157	627	CENT3
9996242280180655885872867452807576494	157	Zhuang-ABCA-3.001	86.896566	18.326938	54.027272	100	157	SCop
999921392501518309214993564089572563	97	Zhuang-ABCA-3.001	21.082878	31.635078	54.090905	67	97	ORBm1
99995909784199304193294645747252265238	154	Zhuang-ABCA-3.001	111.605478	22.417974	54.070819	93	154	CENT3
99996568769251334694329666788053033728	200	Zhuang-ABCA-3.001	112.945756	31.433985	54.093405	95	200	CENT3

83602 rows × 8 columns

[21]:

gene_com = list(set(adata_in.var_names) & set(adata_out.var_names))
adata_in = adata_in[:, gene_com]
adata_out = adata_out[:, gene_com]

[22]:

adata_ctx = adata_in[adata_in.obs['region'].str.startswith(tuple(ctx2cc_conn_cluster_mean.columns))]
adata_ctx

[22]:

View of AnnData object with n_obs × n_vars = 67823 × 1111
    obs: 'brain_section_label', 'x', 'y', 'z', 'x_ccf', 'y_ccf', 'z_ccf', 'region', 'cell_type'

[24]:

ctx1 = ['SSp-m', 'ACAd', 'ACAv']
ctx2=['RSPv','RSPd', 'VISpm', 'VISa', 'VISam', 'SSp-bfd', 'VISpor', 'VISli', 'TEa', 'VISpl', 'VISl', 'AUDv', 'VISal', 'AUDpo', 'AUDp', 'AUDd']
ctx3 = ['FRP', 'ORBl', 'ORBvl', 'ORBm', 'PL', 'ILA']
adata_ctx.obs['cc_ctx'] = None
adata_ctx.obs.loc[adata_ctx.obs['region'].str.startswith(tuple(ctx1)), 'cc_ctx'] = 'ctx1'
adata_ctx.obs.loc[adata_ctx.obs['region'].str.startswith(tuple(ctx2)), 'cc_ctx'] = 'ctx2'
adata_ctx.obs.loc[adata_ctx.obs['region'].str.startswith(tuple(ctx3)), 'cc_ctx'] = 'ctx3'
# adata_ctx.obs.loc[adata_ctx.obs['region'].str.startswith(tuple(ctx4['feature'])), 'cc_ctx'] = 'ctx4'
adata_ctx = adata_ctx[adata_ctx.obs['cc_ctx'].notna()]

def split_string(s):
    parts = re.split(r'(\d+)', s)
    return parts[0] if parts else ''
adata_ctx.obs['region_corr'] = adata_ctx.obs['region'].apply(split_string)
adata_ctx.obs

[24]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type	cc_ctx	region_corr
cell_label
26411547786725657279968630020392489253	Zhuang-ABCA-3.023	70.979368	44.436003	12.009219	7.097937	4.443600	1.200922	TEa5	02 NP-CT-L6b Glut	ctx2	TEa
51881204226374601340923791448380690325	Zhuang-ABCA-3.023	71.048105	43.963880	11.994586	7.104811	4.396388	1.199459	TEa5	02 NP-CT-L6b Glut	ctx2	TEa
9451154036366067416684391406873704139	Zhuang-ABCA-3.023	70.906348	44.496406	12.010524	7.090635	4.449641	1.201052	TEa5	01 IT-ET Glut	ctx2	TEa
171871543800699640490603639355149752394	Zhuang-ABCA-3.023	71.407038	43.722307	11.991027	7.140704	4.372231	1.199103	TEa5	01 IT-ET Glut	ctx2	TEa
103032537075799430231760598323601697566	Zhuang-ABCA-3.023	71.756868	42.845081	11.969093	7.175687	4.284508	1.196909	TEa5	01 IT-ET Glut	ctx2	TEa
...	...	...	...	...	...	...	...	...	...	...	...
76782871153522898745220481389975181375	Zhuang-ABCA-3.009	99.188067	15.861129	38.190379	9.918807	1.586113	3.819038	RSPd2/3	01 IT-ET Glut	ctx2	RSPd
787969273575687407995158747342318857	Zhuang-ABCA-3.009	99.457911	15.644515	38.208613	9.945791	1.564451	3.820861	RSPd2/3	01 IT-ET Glut	ctx2	RSPd
90320078306590901695895422976195202341	Zhuang-ABCA-3.009	99.669320	15.954429	38.226878	9.966932	1.595443	3.822688	RSPd1	01 IT-ET Glut	ctx2	RSPd
94251932557658106169391763576538731088	Zhuang-ABCA-3.009	99.532633	16.481350	38.226079	9.953263	1.648135	3.822608	RSPd1	01 IT-ET Glut	ctx2	RSPd
94737806914081356525582745432967518089	Zhuang-ABCA-3.009	99.422280	16.628568	38.220187	9.942228	1.662857	3.822019	RSPd2/3	01 IT-ET Glut	ctx2	RSPd

67823 rows × 11 columns

[25]:

regions = adata_ctx.obs['region_corr'].values
gene_expression = adata_ctx.X.A
# Create a DataFrame, integrate regional information and gene expression matrix
df = pd.DataFrame(gene_expression, columns=adata_ctx.var_names)
df['region_corr'] = regions
#Colonally group and calculate the average gene expression
ctx_region_mean_expression = df.groupby('region_corr').mean()
# Convert the result to the matrix format of the area*gene
ctx_region_gene_matrix = ctx_region_mean_expression.values
ctx_region_gene_matrix.shape

[25]:

(25, 1111)

[26]:

adata_cc = adata_out[adata_out.obs['region'].str.startswith('cc')]
tmp = adata_cc.copy()
idx = []
coordinate = cc_df_tmp[['x', 'y']].values
kdtree = cKDTree(coordinate)
for coord in tqdm(adata_cc.obs[['x', 'y']].values):
    # Query recent neighbor indexes
    _, nearest_index = kdtree.query(coord)
    idx.append(nearest_index)
plt.figure(figsize=(9,5))
### match the cc cluster
adata_cc.obs['NT_index'] = idx
adata_cc.obs['cc'] = cc_df_tmp.loc[adata_cc.obs['NT_index']]['c'].values
adata_cc = adata_cc[adata_cc.obs['cc'].notna()]
adata_cc.obs['cc'] = 'cc'+((adata_cc.obs['cc']).astype(int)+1).astype(str)
#### plot
categories = adata_cc.obs['cc'].unique()
plt.scatter(adata_out.obs['x'], adata_out.obs['y'], s=1, c='#d3d3d3')
plt.scatter(tmp.obs['x'], tmp.obs['y'], s=1, c='#8f8f8f')

# Draw the scattered point for each category
cm = ['#ff7f0e', '#2ca02c', '#2377b4']
# cm = ['#6597b9', '#6bad6b', '#e9a161']
i=0
for category in categories:
    subset = adata_cc[adata_cc.obs['cc'] == category]
    plt.scatter(subset.obs['x'], subset.obs['y'], label=category, s=3, c=cm[i])
    # print(f'cc_{int(category)}')
    i=i+1
plt.legend()
plt.gca().invert_yaxis()
# plt.savefig('cc_area.png', dpi=600)

100%|██████████| 1187/1187 [00:00<00:00, 32725.57it/s]

_images/10_ctx_cc_connection_and_correlation_31_1.png

[27]:

adata_cc.obs['region_corr'] = adata_cc.obs['cc']
adata_cc.obs

[27]:

	totalRNA	brain_section_label	x	y	z	n_genes_by_counts	total_counts	region	NT_index	cc	region_corr
100699349310225087355794400029769253599	289	Zhuang-ABCA-3.001	67.394394	17.726380	54.104598	138	289	ccb	74	cc2	cc2
100902451446327482578212394659210886364	65	Zhuang-ABCA-3.001	43.321564	36.220293	54.177884	47	65	ccg	3	cc3	cc3
101091759418520737635298400963209655403	282	Zhuang-ABCA-3.001	70.727172	17.820763	54.078071	104	282	ccs	83	cc2	cc2
101177908791681271480760545204792047403	357	Zhuang-ABCA-3.001	53.695160	26.428293	54.015113	124	357	ccb	43	cc1	cc1
101227086293358235805510357201736468429	239	Zhuang-ABCA-3.001	41.673989	36.956672	54.114538	115	239	ccg	4	cc3	cc3
...	...	...	...	...	...	...	...	...	...	...	...
96560296481515876710645847659207426556	58	Zhuang-ABCA-3.001	43.282826	35.133529	54.189423	39	58	ccg	2	cc3	cc3
96578052950946015015587401781979033053	101	Zhuang-ABCA-3.001	72.815062	15.920409	54.055571	59	101	ccs	88	cc2	cc2
96728852215317096694357846973382843058	100	Zhuang-ABCA-3.001	41.845540	34.861770	54.158130	69	100	ccg	2	cc3	cc3
97838821125536355149234993233022684525	133	Zhuang-ABCA-3.001	70.380641	18.437770	54.082850	83	133	ccb	80	cc2	cc2
98228718176791721432106544380776578140	115	Zhuang-ABCA-3.001	42.396503	36.154184	54.151896	68	115	ccg	3	cc3	cc3

498 rows × 11 columns

[28]:

adata_cc_ctx = ad.concat([adata_ctx, adata_cc])
adata_cc_ctx

[28]:

AnnData object with n_obs × n_vars = 68321 × 1111
    obs: 'brain_section_label', 'x', 'y', 'z', 'region', 'region_corr'

[29]:

sc.pp.normalize_total(adata_cc_ctx, target_sum=1e4)
sc.pp.log1p(adata_cc_ctx)
adata_cc_ctx

[29]:

AnnData object with n_obs × n_vars = 68321 × 1111
    obs: 'brain_section_label', 'x', 'y', 'z', 'region', 'region_corr'
    uns: 'log1p'

[30]:

regions = adata_cc_ctx.obs['region_corr'].values
gene_expression = adata_cc_ctx.X.A

# Create a DataFrame to integrate regional information and gene expression matrix together
df = pd.DataFrame(gene_expression, columns=adata_cc_ctx.var_names)
df['region_corr'] = regions

# Calculate the average gene expression by regional grouping and calculate the average gene
region_mean_expression = df.groupby('region_corr').mean()
# Convert the result to the matrix format of the regional*gene
region_gene_matrix = region_mean_expression.values
region_gene_matrix.shape

[30]:

(28, 1111)

[31]:

corr_all = np.corrcoef(region_gene_matrix)
corr_all.shape

[31]:

(28, 28)

[32]:

cc = [f'cc{i+1}' for i in range(n_clusters)]
cc

[32]:

['cc1', 'cc2', 'cc3']

[33]:

cc_subregions = adata_cc_ctx[adata_cc_ctx.obs['region'].str.startswith('cc')].obs['region_corr'].unique()
ctx_subregions = adata_cc_ctx[adata_cc_ctx.obs['region'].str.startswith(tuple(ctx_43_regions))].obs['region_corr'].unique()

region_labels = region_mean_expression.index
ctx_indices = [i for i, label in enumerate(region_labels) if label in ctx_subregions]
cc_indices = [i for i, label in enumerate(region_labels) if label in cc_subregions]
corr = corr_all[np.ix_(ctx_indices, cc_indices)]

ctx_regions_df = [region_labels[i] for i in ctx_indices]
cc_regions_df = [region_labels[i] for i in cc_indices]
corr_df = pd.DataFrame(corr, index=ctx_regions_df, columns=cc_regions_df)
corr_df.columns = cc
corr_df

[33]:

	cc1	cc2	cc3
ACAd	0.278914	0.262625	0.301169
ACAv	0.281494	0.265369	0.304893
AUDd	0.288976	0.261093	0.312110
AUDp	0.298039	0.275037	0.320481
AUDpo	0.289891	0.262205	0.313013
AUDv	0.298164	0.277583	0.321607
FRP	0.277721	0.246925	0.296658
ILA	0.247419	0.234853	0.275192
ORBl	0.289135	0.269547	0.309606
ORBm	0.269378	0.250819	0.295708
ORBvl	0.281068	0.254882	0.303450
PL	0.261306	0.244187	0.287497
RSPd	0.292823	0.274363	0.312567
RSPv	0.283483	0.282744	0.301130
SSp-bfd	0.298980	0.278068	0.317007
SSp-m	0.300344	0.277375	0.317821
TEa	0.286117	0.262562	0.312671
VISa	0.290089	0.264966	0.312611
VISal	0.288796	0.260408	0.309858
VISam	0.297986	0.274052	0.324184
VISl	0.291718	0.268541	0.318078
VISli	0.274898	0.244883	0.294951
VISpl	0.289472	0.265645	0.313973
VISpm	0.299108	0.274570	0.323666
VISpor	0.274229	0.245647	0.298697

[34]:

ctx2cc_conn_cluster_mean = pd.read_csv('./data/ctx_cc_conn_filtered.csv')
ctx2cc_conn_cluster_mean = ctx2cc_conn_cluster_mean.transpose()
ctx2cc_conn_cluster_mean = ctx2cc_conn_cluster_mean.loc[corr_df.index]
ctx2cc_conn_cluster_mean

[34]:

cluster	cc1	cc2	cc3
ACAd	0.000454	3.466430e-05	9.106244e-05
ACAv	0.000533	2.746207e-05	9.169748e-05
AUDd	0.000002	6.115146e-04	1.393698e-06
AUDp	0.000003	5.699260e-04	4.200837e-06
AUDpo	0.000003	5.697359e-04	4.588037e-06
AUDv	0.000007	4.365286e-04	8.039079e-06
FRP	0.000018	3.140131e-07	1.314226e-03
ILA	0.000013	1.089208e-06	7.981067e-04
ORBl	0.000029	6.969948e-07	1.154751e-03
ORBm	0.000014	8.764364e-07	1.038650e-03
ORBvl	0.000018	8.880653e-07	1.177964e-03
PL	0.000038	1.144148e-06	7.018622e-04
RSPd	0.000019	7.062989e-04	1.888608e-06
RSPv	0.000049	8.105875e-04	3.025978e-06
SSp-bfd	0.000019	4.317370e-04	3.731170e-07
SSp-m	0.000799	2.815791e-06	1.012444e-05
TEa	0.000010	3.333817e-04	1.138775e-05
VISa	0.000005	7.333210e-04	5.921385e-07
VISal	0.000003	4.756454e-04	1.858051e-06
VISam	0.000006	8.601038e-04	5.310773e-07
VISl	0.000002	3.263364e-04	6.685498e-06
VISli	0.000002	4.238643e-04	9.831281e-06
VISpl	0.000002	2.921813e-04	1.066889e-05
VISpm	0.000006	5.447638e-04	4.145406e-07
VISpor	0.000003	4.092621e-04	1.611255e-05

[38]:

df1_long = corr_df.reset_index().melt(id_vars='index', var_name='cc', value_name='Correlation')
df2_long = ctx2cc_conn_cluster_mean.reset_index().melt(id_vars='index', var_name='cc', value_name='Connection_strength')
# Merge two long format dataframe
df_merged = pd.merge(df1_long, df2_long, on=['index', 'cc'])
df_merged = df_merged[df_merged['Connection_strength'] > 0.0001]
# Calculate the number of phase relationships and P value
correlation, p_value = pearsonr(df_merged['Correlation'], df_merged['Connection_strength'])

# Draw chart
plt.figure(figsize=(4.5, 4))
sns.regplot(data=df_merged, x='Connection_strength', y='Correlation',
            scatter_kws={'s': 30, 'alpha': 1},
            line_kws={'linewidth': 0.5, 'color': '#D8383A', 'linestyle': '-'})

# Set different colors according to CC1, CC2, CC3
colors = ['#2377b4', '#ff7f0e', '#2ca02c']

for i, c in enumerate(cc):
    subset = df_merged[df_merged['cc'] == c]
    plt.scatter(subset['Connection_strength'], subset['Correlation'], s=30, color=colors[i], label=c)

plt.title(f'r = {correlation:.3f}, p = {p_value:.2e}', fontweight='bold')
plt.legend()

plt.tight_layout()
# plt.show()
# plt.savefig('cc_ctx_corr.pdf', format='pdf')

_images/10_ctx_cc_connection_and_correlation_42_0.png