5_ctx_th_gene_analysis

[62]:

import pandas as pd
import anndata
import scanpy as sc
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import warnings
from scipy.stats import pearsonr
import seaborn as sns
import os
from matplotlib.patches import Patch
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
warnings.filterwarnings('ignore')

[2]:

ctx_regions = ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl', 'AId', 'SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp',
               'MOs', 'VISal', 'VISl', 'VISp', 'VISpor', 'VISrl', 'VISam', 'VISpm', 'RSPd', 'RSPv', 'AUDp']
th_regions = ['VPL', 'VPM', 'PO', 'PoT', 'VPMpc', 'VPLpc', 'SPFp',
              'MG', 'PIL', 'PP', 'SGN', 'AD', 'AV', 'LD', 'LP', 'VAL', 'PF', 'CL',
              'SubG', 'LGv', 'IGL', 'POL', 'MD', 'IMD', 'CM', 'SMT',
              'SPA', 'VM', 'PCN', 'PVT', 'PT', 'RE', 'SPFm', 'Xi', 'RH', 'IAM', 'PR',
              'LH', 'MH', 'IAD', 'RT']

[3]:

adata_in = sc.read_h5ad('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/adata_processed.h5ad')
allen_region = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/allen_region.csv')
adata_in.obs['region'] = allen_region['region'].values
# add cell type
meta = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/cell_metadata_with_cluster_annotation.csv')
meta = meta.set_index('cell_label')
meta = meta.loc[adata_in.obs.index.to_list()]
adata_in.obs['cell_type'] = meta['class'].to_list()
adata_in.obs

[3]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type
cell_label
198904341065180396762707397604803217407	Zhuang-ABCA-3.023	49.206853	44.877634	12.168155	4.920685	4.487763	1.216815	SSs1	33 Vascular
252199681526991424029643077826220097990	Zhuang-ABCA-3.023	48.973992	44.813761	12.179006	4.897399	4.481376	1.217901	SSs1	33 Vascular
277720971126854564514249564750701518375	Zhuang-ABCA-3.023	48.791066	44.577722	12.192707	4.879107	4.457772	1.219271	SSs1	33 Vascular
31551867344111790264292067056219852271	Zhuang-ABCA-3.023	48.830489	44.426120	12.195078	4.883049	4.442612	1.219508	SSs1	33 Vascular
131102494428104399865219008178262036485	Zhuang-ABCA-3.023	48.308843	43.028156	12.267879	4.830884	4.302816	1.226788	SSs1	34 Immune
...	...	...	...	...	...	...	...	...	...
318102106429791409781741726367984532777	Zhuang-ABCA-3.009	131.090716	69.334275	41.436743	13.109072	6.933427	4.143674	MDRNd	30 Astro-Epen
35262847161560382172299767067854387528	Zhuang-ABCA-3.009	131.216032	69.494070	41.351034	13.121603	6.949407	4.135103	MDRNd	33 Vascular
75415866509570969932943497000463821106	Zhuang-ABCA-3.009	131.415152	70.764504	40.800403	13.141515	7.076450	4.080040	sctd	24 MY Glut
12350978322417280063239916106423065862	Zhuang-ABCA-3.009	131.646167	71.182557	40.595995	13.164617	7.118256	4.059599	sctd	24 MY Glut
327554758863546024460748891922509519354	Zhuang-ABCA-3.009	131.658892	71.414675	40.501356	13.165889	7.141468	4.050136	sctd	24 MY Glut

1566842 rows × 9 columns

[4]:

adata_th = adata_in[adata_in.obs['region'].isin(th_regions)]
adata_th = adata_th[adata_th.obs['cell_type'].str.contains('Glut')]

adata_ctx = adata_in[adata_in.obs['region'].str.startswith(tuple(ctx_regions))]
adata_ctx = adata_ctx[adata_ctx.obs['cell_type'].str.contains('Glut')]
adata_ctx.obs

[4]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type
cell_label
207252950882079766503645227815929952400	Zhuang-ABCA-3.023	50.597984	41.393473	12.239274	5.059798	4.139347	1.223927	SSs2/3	01 IT-ET Glut
311894855078226645952213910865897976013	Zhuang-ABCA-3.023	50.420950	41.271525	12.251970	5.042095	4.127152	1.225197	SSs1	01 IT-ET Glut
125208524519663791324346814779771999476	Zhuang-ABCA-3.023	50.959183	43.276307	12.158869	5.095918	4.327631	1.215887	SSs2/3	01 IT-ET Glut
12594778395225515056477813574460470379	Zhuang-ABCA-3.023	49.836112	42.209685	12.238386	4.983611	4.220968	1.223839	SSs1	01 IT-ET Glut
148621603142722639702356861951538418099	Zhuang-ABCA-3.023	51.023440	42.722536	12.174236	5.102344	4.272254	1.217424	SSs2/3	01 IT-ET Glut
...	...	...	...	...	...	...	...	...	...
109907444386227227105910475953076858179	Zhuang-ABCA-3.009	99.982600	10.826473	38.394998	9.998260	1.082647	3.839500	VISp2/3	01 IT-ET Glut
125697108736839342189105218784221779435	Zhuang-ABCA-3.009	99.842186	10.515567	38.416597	9.984219	1.051557	3.841660	VISp2/3	01 IT-ET Glut
174685434506973661349248592409864249537	Zhuang-ABCA-3.009	100.204030	10.343042	38.424450	10.020403	1.034304	3.842445	VISp1	01 IT-ET Glut
213147423751373953623065876460723241943	Zhuang-ABCA-3.009	99.804367	10.276117	38.433453	9.980437	1.027612	3.843345	VISp1	01 IT-ET Glut
254117510291590640559120967821550650493	Zhuang-ABCA-3.009	99.842682	10.830816	38.395274	9.984268	1.083082	3.839527	VISp2/3	01 IT-ET Glut

122802 rows × 9 columns

[5]:

adata = anndata.concat([adata_ctx, adata_th])
adata.obs

[5]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type
cell_label
207252950882079766503645227815929952400	Zhuang-ABCA-3.023	50.597984	41.393473	12.239274	5.059798	4.139347	1.223927	SSs2/3	01 IT-ET Glut
311894855078226645952213910865897976013	Zhuang-ABCA-3.023	50.420950	41.271525	12.251970	5.042095	4.127152	1.225197	SSs1	01 IT-ET Glut
125208524519663791324346814779771999476	Zhuang-ABCA-3.023	50.959183	43.276307	12.158869	5.095918	4.327631	1.215887	SSs2/3	01 IT-ET Glut
12594778395225515056477813574460470379	Zhuang-ABCA-3.023	49.836112	42.209685	12.238386	4.983611	4.220968	1.223839	SSs1	01 IT-ET Glut
148621603142722639702356861951538418099	Zhuang-ABCA-3.023	51.023440	42.722536	12.174236	5.102344	4.272254	1.217424	SSs2/3	01 IT-ET Glut
...	...	...	...	...	...	...	...	...	...
137125155786416424728071422508382942054	Zhuang-ABCA-3.009	86.480430	34.094203	37.248728	8.648043	3.409420	3.724873	SGN	19 MB Glut
186321231466624970722021094909324401885	Zhuang-ABCA-3.009	86.443977	35.015822	37.291043	8.644398	3.501582	3.729104	POL	19 MB Glut
262284519603134366801326445274337827961	Zhuang-ABCA-3.009	86.388989	32.866518	37.212756	8.638899	3.286652	3.721276	SGN	19 MB Glut
33608739852097367198466784523454261485	Zhuang-ABCA-3.009	86.904204	34.752896	37.268567	8.690420	3.475290	3.726857	POL	19 MB Glut
303385550649730012456703013017610179856	Zhuang-ABCA-3.009	88.389510	37.024369	37.400811	8.838951	3.702437	3.740081	POL	19 MB Glut

133403 rows × 9 columns

[6]:

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata

[6]:

AnnData object with n_obs × n_vars = 133403 × 1122
    obs: 'brain_section_label', 'x', 'y', 'z', 'x_ccf', 'y_ccf', 'z_ccf', 'region', 'cell_type'
    uns: 'log1p'

ctx layer56 th correlation

L5/L6 corr

[8]:

layer = '56'
th_ctx_coorelation = pd.DataFrame(index=ctx_regions, columns=th_regions, dtype=float)
for ctx_region in tqdm(ctx_regions):
    adata_ctx_area = adata[adata.obs['region'].str.startswith((ctx_region+layer[0], ctx_region+layer[-1]))]
    for th_region in th_regions:
        adata_th_area = adata[adata.obs['region']==th_region]
        if adata_th_area.shape[0] == 0:
            continue
        ctx_gene = np.mean(adata_ctx_area.X.A, axis=0)
        th_gene = np.mean(adata_th_area.X.A, axis=0)
        corr, p_value = pearsonr(ctx_gene, th_gene)
        th_ctx_coorelation.loc[ctx_region, th_region] = corr
# th_ctx_coorelation

100%|██████████| 25/25 [00:13<00:00,  1.84it/s]

[9]:

col_list = th_ctx_coorelation.columns[~th_ctx_coorelation.isna().any()].tolist()
th_ctx_coorelation = th_ctx_coorelation[col_list]
# th_ctx_coorelation

[10]:

tempdf = pd.read_excel('./data/layer56_to_th_connection_strength.xlsx', index_col=0, sheet_name=None)
Rbp4_L5 = tempdf['Sheet1']
Ntsr1_Syt6_L6 = tempdf['Sheet2']

Rbp4_L5 = Rbp4_L5.replace('TN', -10)
Rbp4_L5.loc['SSs'] = Rbp4_L5.loc[['SSs-1', 'SSs-2']].mean(axis=0)
Rbp4_L5.loc['MOs'] = Rbp4_L5.loc[['MOs-1', 'MOs-2']].mean(axis=0)
Rbp4_L5 = Rbp4_L5.drop(['SSs-1', 'SSs-2', 'MOs-1', 'MOs-2'], axis=0)
Rbp4_L5 = Rbp4_L5.loc[:, col_list]
# ctx_regions.remove("AId")
Rbp4_L5 = Rbp4_L5.loc[ctx_regions]

Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.replace('TN', -10)
Ntsr1_Syt6_L6.loc['SSs'] = Ntsr1_Syt6_L6.loc[['SSs-1', 'SSs-2']].mean(axis=0)
Ntsr1_Syt6_L6.loc['MOs'] = Ntsr1_Syt6_L6.loc[['MOs-1', 'MOs-2']].mean(axis=0)
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.drop(['SSs-1', 'SSs-2', 'MOs-1', 'MOs-2'], axis=0)
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.loc[:, col_list]
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.loc[ctx_regions]

# Ntsr1_Syt6_L6

[266]:

# sns.heatmap((Rbp4_L5+Ntsr1_Syt6_L6)/2)
# Define color mapping
cluster_labels = ['Prefrontal']*6 + ['Lateral']*1 + ['Somatomotor']*8 + ['Visual']*5 + ['Medial']*4 + ['Aud']

cluster_colors = {'Prefrontal': '#ff0000', 'Lateral': '#bea149', 'Somatomotor': '#f9922b',
                  'Visual': '#90bff9', 'Medial': '#5252a9', 'Aud': '#7c429b'}
# Create Label Color
row_colors = pd.Series(cluster_labels).map(cluster_colors)

fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap((Rbp4_L5+Ntsr1_Syt6_L6)/2, cmap='coolwarm', ax=ax, yticklabels=False)
# Add cluster segmentation line
prev_cluster = cluster_labels[0]
for idx, label in enumerate(cluster_labels):
    if label != prev_cluster:
        ax.axhline(idx, color='black', linewidth=3)
        prev_cluster = label
# Add row label color
idx=0
for id, label in zip(Ntsr1_Syt6_L6.index, cluster_labels):
    ax.text(-0.5, idx + 0.5, id, color=cluster_colors[label], va='center', ha='right')
    idx = idx+1
ax.set_ylabel('')
# Add legends
legend_handles = [Patch(color=color, label=cluster) for cluster, color in cluster_colors.items()]
ax.legend(handles=legend_handles, bbox_to_anchor=(-0.3, 0.6), loc='upper left', borderaxespad=0.)
ax.set_title('L5(Rbp4-Cre_KL100) / L6(Ntsr1+Syt6) Average Projection Volume', fontweight='bold')
plt.tight_layout()
# plt.savefig('./L56_mean_conn.pdf', format='pdf')

[184]:

connect = (Rbp4_L5+Ntsr1_Syt6_L6)/2
connect

[184]:

	VPL	VPM	PO	PoT	VPMpc	VPLpc	SPFp	PIL	PP	SGN	...	PT	RE	SPFm	RH	IAM	PR	LH	MH	IAD	RT
anchor source
ACAd	-1.090171	-6.342319	-5.426504	-5.605730	-6.008217	-10.000000	-5.566647	-5.895726	-2.527515	-10.000000	...	-2.727347	-1.183582	-1.670217	-1.755893	-1.809540	-1.685012	-1.448781	-6.879172	-1.144436	-0.769605
ACAv	-1.423084	-10.000000	-10.000000	-5.608990	-1.929727	-10.000000	-1.132599	-5.927264	-2.399007	-10.000000	...	-1.873386	-1.182032	-1.403915	-1.655801	-1.468856	-1.057059	-5.430204	-10.000000	-0.900422	-0.413475
PL	-10.000000	-10.000000	-10.000000	-10.000000	-5.992111	-10.000000	-5.882870	-5.961731	-10.000000	-10.000000	...	-1.389215	-0.970923	-5.878069	-1.704030	-1.890465	-1.035770	-1.639700	-10.000000	-1.569406	-0.807789
ILA	-6.325188	-10.000000	-10.000000	-10.000000	-6.017124	-10.000000	-5.751314	-2.520655	-6.144408	-6.509481	...	-0.546665	-0.545810	-1.654686	-1.603711	-1.813752	-0.737798	-1.087582	-1.804623	-1.274479	-0.642914
ORBl	-6.224208	-6.003722	-5.437890	-10.000000	-1.905725	-10.000000	-5.918426	-10.000000	-10.000000	-10.000000	...	-2.467256	-1.554422	-6.000569	-1.589246	-2.697192	-5.901769	-1.834357	-10.000000	-2.160936	-0.693127
ORBvl	-5.882161	-10.000000	-10.000000	-6.375555	-2.185265	-10.000000	-5.950850	-6.211784	-10.000000	-6.643945	...	-0.828830	-0.776871	-5.913809	-1.420968	-1.698088	-0.956903	-1.414943	-6.478384	-5.357311	-0.545583
AId	-10.000000	-5.641845	-5.325552	-6.050576	-1.452111	-6.043647	-6.252712	-10.000000	-10.000000	-10.000000	...	-2.700659	-5.744953	-5.990823	-2.209329	-6.914940	-5.937606	-3.096440	-10.000000	-10.000000	-5.233617
SSs	-0.533973	-0.245893	-0.284354	-1.007490	-1.754370	-5.729620	-3.632334	-5.785201	-7.888268	-5.437407	...	-10.000000	-4.436421	-4.193741	-4.288500	-8.377699	-8.356392	-6.792026	-10.000000	-10.000000	-0.683757
SSp-bfd	-5.483834	-0.286921	-0.440568	-1.150783	-6.616602	-10.000000	-2.125942	-5.626288	-5.918936	-1.722711	...	-10.000000	-6.576067	-3.461558	-3.051209	-10.000000	-6.952895	-10.000000	-10.000000	-3.256035	-0.630895
SSp-ll	-0.424806	-0.984444	-0.714249	-1.830208	-6.880859	-10.000000	-5.680297	-5.937618	-10.000000	-2.661697	...	-10.000000	-2.811056	-6.272099	-3.059279	-10.000000	-10.000000	-2.820684	-10.000000	-3.891500	-0.704974
SSp-ul	-0.512134	-4.997219	-0.126221	-1.606216	-1.955906	-2.141609	-5.729040	-10.000000	-10.000000	-10.000000	...	-10.000000	-6.455041	-10.000000	-3.065086	-10.000000	-10.000000	-10.000000	-10.000000	-10.000000	-0.765330
SSp-n	-0.563616	-0.029009	-0.229639	-1.545879	-2.328005	-2.089110	-5.543640	-10.000000	-10.000000	-10.000000	...	-10.754420	-6.051322	-6.353382	-2.713103	-10.000000	-6.398222	-10.000000	-10.000000	-10.000000	-0.544709
SSp-m	-0.538166	0.065971	-0.230394	-5.600657	-1.676116	-6.019576	-10.000000	-6.087167	-10.000000	-10.000000	...	-10.000000	-6.391604	-10.754420	-6.380348	-10.000000	-10.000000	-10.000000	-10.000000	-10.000000	-0.589138
MOp	-0.129938	-5.183974	-0.205113	-5.752578	-6.204141	-6.578336	-10.000000	-10.000000	-10.000000	-2.334184	...	-10.000000	-6.272292	-6.135587	-6.590445	-10.000000	-10.000000	-10.000000	-10.000000	-10.000000	-0.459391
MOs	-0.818288	-0.664884	-0.153652	-3.602665	-3.845986	-4.315142	-3.629701	-6.234426	-8.224540	-6.119077	...	-10.000000	-1.882719	-8.014224	-1.730154	-4.054899	-7.984730	-6.475172	-10.000000	-4.038434	-2.843340
VISal	-1.034583	-5.354679	-0.599560	-5.774497	-6.619113	-10.000000	-2.364640	-1.993791	-5.664126	-1.134140	...	-10.000000	-6.173552	-6.443739	-2.782023	-6.598952	-2.466052	-10.000000	-10.000000	-10.000000	-0.740894
VISl	-5.777306	-5.321867	-1.004276	-5.966014	-10.000000	-10.000000	-10.000000	-5.907370	-5.862698	-5.799969	...	-10.000000	-6.257293	-10.000000	-10.000000	-10.000000	-10.000000	-6.888282	-10.000000	-6.836267	-0.577579
VISp	-1.422962	-5.804932	-5.833879	-6.346772	-10.754420	-10.000000	-10.000000	-6.621287	-10.000000	-1.955335	...	-10.000000	-6.214321	-10.000000	-6.558477	-6.892084	-6.579267	-10.000000	-10.000000	-10.000000	-0.617637
VISpor	-2.192462	-2.376547	-5.726122	-2.671269	-7.021998	-7.002497	-10.000000	-2.298210	-6.092443	-1.422456	...	-10.000000	-10.000000	-10.000000	-10.000000	-10.754420	-6.612503	-6.340301	-10.000000	-10.000000	-1.387643
VISrl	-0.791890	-0.535124	-0.339014	-1.277799	-10.000000	-10.000000	-5.761978	-2.823176	-6.031412	-1.422637	...	-10.000000	-2.532333	-6.264313	-6.090899	-6.429685	-6.377060	-10.000000	-10.000000	-6.126003	-0.644216
VISam	-0.527118	-5.517881	-0.478789	-1.373619	-10.000000	-10.000000	-2.186025	-5.816067	-5.942121	-1.356229	...	-6.514253	-1.600301	-6.087990	-2.147693	-3.042415	-1.905533	-2.012084	-6.859115	-1.975844	-0.398044
VISpm	-0.916817	-5.587206	-1.151516	-10.000000	-10.000000	-10.000000	-6.169564	-5.918708	-10.000000	-6.051864	...	-6.427577	-1.872311	-6.168127	-2.308679	-4.021444	-2.168330	-10.000000	-10.000000	-5.965749	-0.397564
RSPd	-5.670737	-10.000000	-5.986022	-10.000000	-10.000000	-10.000000	-5.812420	-5.987416	-6.104530	-10.000000	...	-10.000000	-6.027620	-6.161968	-2.701966	-5.965488	-6.153988	-5.932982	-10.000000	-6.228290	-0.731420
RSPv	-1.311758	-10.000000	-10.000000	-10.000000	-7.055145	-10.000000	-10.000000	-2.714168	-3.050242	-10.000000	...	-10.000000	-2.677701	-2.675464	-6.199900	-5.892359	-10.000000	-6.172558	-10.000000	-5.758051	-0.739684
AUDp	-5.334003	-5.230048	-5.778810	-1.208616	-10.000000	-10.000000	-2.475106	-1.604605	-5.675994	-0.896259	...	-10.000000	-2.779426	-6.144770	-10.000000	-10.000000	-6.260222	-6.894197	-10.000000	-10.000000	-0.885349

25 rows × 37 columns

[206]:

data = connect
# Sort each line and get the name after sorting
sorted_columns = data.apply(lambda x: x.sort_values(ascending=False).index.tolist(), axis=1)

# Create a new DataFrame, which contains sorted data
sorted_data = pd.DataFrame(index=data.index, columns=data.columns)
for i, row in enumerate(sorted_columns):
    sorted_data.iloc[i] = data.loc[data.index[i], row].values

# Make sure all the values ​​in sorted_data are numerical types
sorted_data = sorted_data.astype(float)

# Create a DataFrame with the same shape as sorted data, which is used to store the name
annotation_data = pd.DataFrame(sorted_columns.tolist(), index=data.index, columns=data.columns)

# Draw a hot picture
plt.figure(figsize=(20, 10))
ax = sns.heatmap(sorted_data, annot=annotation_data, fmt='', cmap='coolwarm', cbar_kws={'label': 'Value'}, annot_kws={'color': 'black'})

# Add white border
from matplotlib.patches import Rectangle
for i, row in enumerate(sorted_data.values):
    for j, value in enumerate(row):
        if value > -2 or value <= -10:
            color = '#6bad6b' if value > -2 else '#5d5d5d'
            rect = Rectangle((j, i), 1, 1, fill=False, edgecolor=color, lw=0.6)
            ax.add_patch(rect)

# Adjust the layout
plt.tight_layout()

# plt.savefig('./L56_tp_tn_conn.pdf', format='pdf')

gene anasys

25 ctx area

[62]:

ctx_region_order = ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl', 'AId', 'SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp', 'MOs', 'VISal', 'VISl', 'VISp', 'VISpor', 'VISrl', 'VISam', 'VISpm', 'RSPd', 'RSPv', 'AUDp']

[ ]:

th_tp = {}
th_tn = {}
for c in ctx_region_order:
    tmp = connect.loc[c]
    th_tp[c] = tmp[tmp>-2].index.tolist()
    th_tn[c] = tmp[tmp==-10].index.tolist()

[230]:

ctx_gene_exp_list = []
tp_gene_exp_list = []
tn_gene_exp_list = []
gene_dict = {}

for area in tqdm(ctx_region_order):
    adata.obs['deg'] = 'nan'
    adata.obs.loc[adata.obs['region'].str.startswith((area+'5', area+'6')), 'deg'] = area
    adata.obs.loc[adata.obs['region'].isin(th_tp[area]), 'deg'] = 'connected'
    adata.obs.loc[adata.obs['region'].isin(th_tn[area]), 'deg'] = 'unconnected'
    adata_sel = adata[adata.obs['deg'] != 'nan']

    # ctx vs unconnected
    sc.tl.rank_genes_groups(adata_sel, 'deg', groups=[area], reference='unconnected', method='wilcoxon')
    a_vs_c = sc.get.rank_genes_groups_df(adata_sel, group=area)

    # connected vs unconnected
    sc.tl.rank_genes_groups(adata_sel, 'deg', groups=['connected'], reference='unconnected', method='wilcoxon')
    b_vs_c = sc.get.rank_genes_groups_df(adata_sel, group='connected')

    # Find out the gene of the expression closer to ctx in connected
    similar_genes = []
    for gene in b_vs_c['names']:
        if gene in a_vs_c['names'].values:
            a_logfc = a_vs_c[a_vs_c['names'] == gene]['logfoldchanges'].values[0]
            b_logfc = b_vs_c[b_vs_c['names'] == gene]['logfoldchanges'].values[0]
            if np.sign(a_logfc) == np.sign(b_logfc) and a_logfc > 2 and b_logfc > 2:
                similar_genes.append(gene)
    gene_dict[area] = similar_genes
    if len(similar_genes) == 0:
        print(area)
    adata_sel_ctx = adata_sel[adata_sel.obs['deg'] == area]
    adata_sel_tp = adata_sel[adata_sel.obs['deg'] == 'connected']
    adata_sel_tn = adata_sel[adata_sel.obs['deg'] == 'unconnected']

    # Calculate the average expression
    ctx_mean = adata_sel_ctx[:, similar_genes].X.A.mean(axis=0)
    tp_mean = adata_sel_tp[:, similar_genes].X.A.mean(axis=0)
    tn_mean = adata_sel_tn[:, similar_genes].X.A.mean(axis=0)

    # Sum of calculations
    total_mean = ctx_mean + tp_mean + tn_mean

    # Calculation ratio
    ctx_proportion = ctx_mean / total_mean
    tp_proportion = tp_mean / total_mean
    tn_proportion = tn_mean / total_mean

    ctx_gene_exp_list.append(ctx_proportion)
    tp_gene_exp_list.append(tp_proportion)
    tn_gene_exp_list.append(tn_proportion)

100%|██████████| 25/25 [00:35<00:00,  1.42s/it]

[ ]:

df_tp_vs_tn = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in gene_dict.items()]))
df_tp_vs_tn

[212]:

def merge_lists(original_list, group_sizes):
    merged_list = []
    start = 0
    for size in group_sizes:
        end = start + size
        group = original_list[start:end]
        if group:
            merged_group = np.concatenate(group, axis=0)
            merged_list.append(merged_group)
        start = end
    return merged_list

# Definition group size
group_sizes = [6, 1, 8, 5, 4, 1]

# Merge list
merged_tn = merge_lists(tn_gene_exp_list, group_sizes)
merged_tp = merge_lists(tp_gene_exp_list, group_sizes)
merged_ctx = merge_lists(ctx_gene_exp_list, group_sizes)
# len(merged_tn)
categories = ['Prefrontal', 'Lateral', 'Somatomotor', 'Visual', 'Medial', 'Aud']
experiments = ['connected', 'unconnected', 'cortex']
experiments_data = [merged_tp, merged_tn, merged_ctx]

data = []
for exp_name, experiment in zip(experiments, experiments_data):
    for category, values in zip(categories, experiment):
        for value in values:
            data.append({
                'Experiment': exp_name,
                'Category': category,
                'Value': value
            })

df = pd.DataFrame(data)

custom_palette = {
    'cortex': '#478ccf',
    'connected': '#6bad6b',
    'unconnected': '#8d8d8d'
}
custom_markers = {
    # 'cortex': 'D',
    'connected': 'o',
    'unconnected': 's'
}
plt.figure(figsize=(10,5))
sns.lineplot(data=df, x='Category', y='Value', hue='Experiment', marker='o', palette=custom_palette, dashes=False, linewidth=0.5, markersize=6)

plt.xlabel('ctx_module')
plt.ylabel('percentage')
plt.legend(bbox_to_anchor=(0.3, 1))
plt.tight_layout()

# plt.savefig('./stereo_L56_Moudel_tp_tn_compare_line_gene.pdf', format='pdf')

[ ]:

ctx_gene_exp_percent = [i.mean() for i in ctx_gene_exp_list]
tp_gene_exp_percent = [i.mean() for i in tp_gene_exp_list]
tn_gene_exp_percent = [i.mean() for i in tn_gene_exp_list]

[232]:

feature = ctx_region_order
value1 = ctx_gene_exp_percent
value2 = tp_gene_exp_percent
value3 = tn_gene_exp_percent

N = len(ctx_gene_exp_list)
# Set the angle of the radar diagram to cut a round surface in a flat separation
angles=np.linspace(0, 2*np.pi, N, endpoint=False)
# In order to close the radar map, the following steps need
value1=np.concatenate((value1,[value1[0]]))
value2=np.concatenate((value2,[value2[0]]))
value3=np.concatenate((value3,[value3[0]]))
angles=np.concatenate((angles,[angles[0]]))
feature = np.concatenate((feature, [feature[0]]))
fig=plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, polar=True)
# Draw a line map
ax.plot(angles, value1, 'o-', markersize=2, linewidth=0.5, label = 'cortex', color='#478ccf')
# Fill color
ax.fill(angles, value1, alpha=0.2, color='#478ccf')
# Draw the second folding drawing
ax.plot(angles, value2, 'o-', markersize=2, linewidth=0.5, label = 'connected_th', color='#6bad6b')
ax.fill(angles, value2, alpha=0.2, color='#6bad6b')
# Draw a line map
ax.plot(angles, value3, 'o-', markersize=2, linewidth=0.5, label = 'unconnected_th', color='#8d8d8d')
# Fill color
ax.fill(angles, value3, alpha=0.2, color='#8d8d8d')


# Add the tags of each feature
# ax.set_thetagrids(angles * 180/np.pi, feature)
ax.set_thetagrids(angles * 180/np.pi, feature)  # FRAC parameter setting label distance MAX ( *TP, *TN)+0.05 to depart the center
ax.tick_params(axis='x', pad=6)  # Set the distance between the label and the axis
# Set the range of radar chart
# ax.set_ylim(0.25, 0.45)
# Add title
# plt.title(f'Layer5/6_CTX_in & TH_in', fontweight='bold')
# Add grid line
# ax.grid(True)
ax.grid(True, linewidth=0.25, alpha=0.7)

# ax.set_rgrids([0, 0.2, 0.4, 0.6, 0.8], labels=['', '0.2', '0.4', '0.6', ''])
# Set diagram
plt.legend(loc='center left', bbox_to_anchor=(1, 1))
plt.tight_layout()
# plt.savefig('./stereo_l56_module_ave_exp_radar_plot_gene.pdf', format='pdf')

6 module

[63]:

ctx_module = {'Prefrontal': ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl'], 'Lateral': ['AId'], 'Somatomotor' :['SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp', 'MOs'],
              'Visual': ['VISal', 'VISl', 'VISp', 'VISpor', 'VISrl'], 'Medial': ['VISam', 'VISpm', 'RSPd', 'RSPv'], 'Aud': ['AUDp']}
th_tp_module = {}
th_tn_module = {}
# Iterate through each context region
for i, c in enumerate(ctx_module.keys()):
    # Create a temporary DataFrame for each region
    if layer == '5':
        tmp = Rbp4_L5.loc[ctx_module[c]].values.flatten()
    elif layer == '6':
        tmp = Ntsr1_Syt6_L6.loc[ctx_module[c]].values.flatten()
    elif layer == '56':
        connect = (Rbp4_L5+Ntsr1_Syt6_L6)/2
        tmp = connect.loc[ctx_module[c]]

    column_means = tmp.mean()
    # Find the index with the largest five columns on average
    top_5_columns = column_means.nlargest(5).index.tolist()
    th_tp_module[c] = top_5_columns

    bottom_5_columns = column_means.nsmallest(5).index.tolist()
    th_tn_module[c] = bottom_5_columns
th_tp_module

[63]:

{'Prefrontal': ['MD', 'RT', 'VM', 'VAL', 'RE'],
 'Lateral': ['MD', 'PF', 'VPMpc', 'VAL', 'SPA'],
 'Somatomotor': ['PO', 'RT', 'VAL', 'VPL', 'PF'],
 'Visual': ['LP', 'LD', 'RT', 'IGL', 'LGv'],
 'Medial': ['LD', 'LP', 'RT', 'VAL', 'LGv'],
 'Aud': ['RT', 'SGN', 'LP', 'POL', 'PoT']}

[64]:

module_color={'Prefrontal': '#ff0000',
'Lateral': '#ffff66',
 'Somatomotor': '#f9922b',
 'Visual': '#90bff9',
 'Medial': '#5252a9',
 'Aud': '#7c429b'}

[71]:

adata.obs['deg'] = 'nan'
area = 'Visual'
area5 = [i+'5' for i in ctx_module[area]]
area6 = [i+'6' for i in ctx_module[area]]
area56 = area5 + area6
adata.obs.loc[adata.obs['region'].str.startswith(tuple(area56)), 'deg'] = area
# adata.obs.loc[adata.obs['region'].str.startswith(tuple(ctx_module[area])), 'deg'] = area
adata.obs.loc[adata.obs['region'].isin(th_tp_module[area]), 'deg'] = 'connected'
adata.obs.loc[adata.obs['region'].isin(th_tn_module[area]), 'deg'] = 'unconnected'
adata.obs

[71]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type	deg
cell_label
207252950882079766503645227815929952400	Zhuang-ABCA-3.023	50.597984	41.393473	12.239274	5.059798	4.139347	1.223927	SSs2/3	01 IT-ET Glut	nan
311894855078226645952213910865897976013	Zhuang-ABCA-3.023	50.420950	41.271525	12.251970	5.042095	4.127152	1.225197	SSs1	01 IT-ET Glut	nan
125208524519663791324346814779771999476	Zhuang-ABCA-3.023	50.959183	43.276307	12.158869	5.095918	4.327631	1.215887	SSs2/3	01 IT-ET Glut	nan
12594778395225515056477813574460470379	Zhuang-ABCA-3.023	49.836112	42.209685	12.238386	4.983611	4.220968	1.223839	SSs1	01 IT-ET Glut	nan
148621603142722639702356861951538418099	Zhuang-ABCA-3.023	51.023440	42.722536	12.174236	5.102344	4.272254	1.217424	SSs2/3	01 IT-ET Glut	nan
...	...	...	...	...	...	...	...	...	...	...
137125155786416424728071422508382942054	Zhuang-ABCA-3.009	86.480430	34.094203	37.248728	8.648043	3.409420	3.724873	SGN	19 MB Glut	nan
186321231466624970722021094909324401885	Zhuang-ABCA-3.009	86.443977	35.015822	37.291043	8.644398	3.501582	3.729104	POL	19 MB Glut	nan
262284519603134366801326445274337827961	Zhuang-ABCA-3.009	86.388989	32.866518	37.212756	8.638899	3.286652	3.721276	SGN	19 MB Glut	nan
33608739852097367198466784523454261485	Zhuang-ABCA-3.009	86.904204	34.752896	37.268567	8.690420	3.475290	3.726857	POL	19 MB Glut	nan
303385550649730012456703013017610179856	Zhuang-ABCA-3.009	88.389510	37.024369	37.400811	8.838951	3.702437	3.740081	POL	19 MB Glut	nan

133403 rows × 10 columns

[72]:

adata_sel = adata[adata.obs['deg'] != 'nan']
adata_sel.obs

[72]:

	brain_section_label	x	y	z	x_ccf	y_ccf	z_ccf	region	cell_type	deg
cell_label
188052700459908704250468015984246340174	Zhuang-ABCA-3.017	78.511319	17.025034	23.450480	7.851132	1.702503	2.345048	VISrl5	01 IT-ET Glut	Visual
244536282695664703731093924351585311804	Zhuang-ABCA-3.017	78.839495	17.453570	23.457475	7.883949	1.745357	2.345747	VISrl5	01 IT-ET Glut	Visual
207737713088111645489974231015219723398	Zhuang-ABCA-3.017	78.672275	18.071984	23.473123	7.867227	1.807198	2.347312	VISrl6a	01 IT-ET Glut	Visual
282030992965481098907637606735824510623	Zhuang-ABCA-3.017	78.575583	19.064710	23.497188	7.857558	1.906471	2.349719	VISal6a	02 NP-CT-L6b Glut	Visual
97661006042925800380741778764498794751	Zhuang-ABCA-3.017	78.796093	19.073735	23.494524	7.879609	1.907373	2.349452	VISal6a	02 NP-CT-L6b Glut	Visual
...	...	...	...	...	...	...	...	...	...	...
156947303650960194776993747274328011056	Zhuang-ABCA-3.009	84.812512	30.863025	37.253313	8.481251	3.086302	3.725331	LP	18 TH Glut	connected
172268668275251998778345610664715788667	Zhuang-ABCA-3.009	84.008563	33.037907	37.280443	8.400856	3.303791	3.728044	LP	18 TH Glut	connected
248145458296088509137955235524055794146	Zhuang-ABCA-3.009	84.917639	32.177712	37.243865	8.491764	3.217771	3.724386	LP	18 TH Glut	connected
27966221641202106948948402442419133181	Zhuang-ABCA-3.009	84.392370	32.908652	37.267710	8.439237	3.290865	3.726771	LP	18 TH Glut	connected
7071071614544513006737101022316271447	Zhuang-ABCA-3.009	83.936955	31.485775	37.276005	8.393695	3.148577	3.727601	LP	18 TH Glut	connected

9473 rows × 10 columns

[75]:

sc.tl.rank_genes_groups(adata_sel, 'deg', groups=[area], reference='unconnected', method='wilcoxon')
a_vs_c = sc.get.rank_genes_groups_df(adata_sel, group=area)

sc.tl.rank_genes_groups(adata_sel, 'deg', groups=['connected'], reference='unconnected', method='wilcoxon')
b_vs_c = sc.get.rank_genes_groups_df(adata_sel, group='connected')

[ ]:

similar_genes = []
for gene in b_vs_c['names']:
    if gene in a_vs_c['names'].values:
        a_logfc = a_vs_c[a_vs_c['names'] == gene]['logfoldchanges'].values[0]
        b_logfc = b_vs_c[b_vs_c['names'] == gene]['logfoldchanges'].values[0]
        # if np.sign(a_logfc) == np.sign(b_logfc) and abs(a_logfc - b_logfc) < 0.4 and a_logfc>2 and b_logfc>2:
        if np.sign(a_logfc) == np.sign(b_logfc) and a_logfc > 2 and b_logfc > 2:
            similar_genes.append(gene)

print(f"similar_genes: {len(similar_genes)}")
print(similar_genes)

[80]:

# Set data
df = adata_sel[:, similar_genes].to_df()
df['cluster'] = adata_sel.obs['deg'].values
df_mean = df.groupby('cluster').mean()
# For every column
column_sums = df_mean.sum()
# The value of each column is removed to the corresponding sum
df_percentage = df_mean.div(column_sums)
df_percentage = df_percentage.loc[[area, 'connected', 'unconnected']]

categories = similar_genes
colors = [module_color[area],'#89bd89', '#bdbdbd']
labels = [area, 'connected', 'unconnected']  # Tags corresponding to color
data = df_percentage.values

# Create a chart
fig, ax = plt.subplots(figsize=(9, 6))

# Draw a stack of stacks
x = np.arange(len(categories))
bottom = np.zeros(len(categories))
width=0.6
for i, row in enumerate(data):
    ax.bar(x, row, bottom=bottom, color=colors[i], edgecolor='white',width=width, label=labels[i])
    bottom += row

# Add full -coverage ribbon
for j in range(len(categories) - 1):
    left = x[j]+width/2
    right = x[j+1]-width/2
    left_data = data[:, j]
    right_data = data[:, j+1]

    left_cumsum = np.cumsum(left_data)
    right_cumsum = np.cumsum(right_data)

    # Add all ribbons from the bottom to the top
    for i in range(len(colors)):
        left_bottom = left_cumsum[i-1] if i > 0 else 0
        left_top = left_cumsum[i]
        right_bottom = right_cumsum[i-1] if i > 0 else 0
        right_top = right_cumsum[i]

        ax.fill([left, right, right, left],
                [left_bottom, right_bottom, right_top, left_top],
                color=colors[i], alpha=0.3)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))  # Figure placed on the outside of the upper right corner

# Set chart style
# ax.set_title('Full Coverage Ribbon Stacked Bar Chart')
ax.set_xlabel('gene')
ax.set_ylabel('Expression percentage')
plt.xticks(x, rotation=45)
ax.set_xticklabels(categories)

plt.tight_layout()
# plt.savefig(f'./Supp_fig/tp_tn_gene_deg_{area}.pdf', format='pdf')

[37]:

gene_path = f'./gene/{area}/'
os.makedirs(gene_path, exist_ok=True)

[82]:

a_vs_c['comparison'] = f'{area}_vs_TN'
b_vs_c['comparison'] = 'TP_vs_TN'
df = pd.concat([a_vs_c, b_vs_c])
df['regulation'] = df['logfoldchanges'].apply(lambda x: 'Up' if x > 0 else 'Down')
df = df[df['logfoldchanges'] > 2]

genes_to_label = similar_genes
comparisons = [f'{area}_vs_TN', 'TP_vs_TN']

# Create graphics and coordinate shafts
fig, ax = plt.subplots(figsize=(6, 6))

# Use Seaborn's Stripplot function
ax = sns.stripplot(x='comparison', y='logfoldchanges', hue='regulation', data=df,
                   jitter=0.3, size=4, palette={'Up': '#fa7f6f', 'Down': '#82b0d2'}, alpha=0.7, orient='v')
sns.stripplot(x='comparison', y='logfoldchanges', data=df[df['names'].isin(genes_to_label)],
              jitter=0.2, size=6, color='black', alpha=1, ax=ax, orient='v')

# Add notes and tags
label_x = 0.5  #The X coordinate of the label (in the middle of the two groups of scattered dots)
label_offset = 0.1  # Vertical spacing between labels
max_y = df['logfoldchanges'].max()
min_y = df['logfoldchanges'].min()
label_y_range = max_y - min_y
label_y_start = min_y + label_y_range * 0.1  # Start the label from the position of 10%of the Y axis

for i, gene in enumerate(genes_to_label):
    gene_data = df[df['names'] == gene]
    label_y = label_y_start + i * label_offset * label_y_range

    for j, comp in enumerate(comparisons):
        if not gene_data[gene_data['comparison'] == comp].empty:
            x = j
            y = gene_data[gene_data['comparison'] == comp]['logfoldchanges'].values[0]
            ax.plot([x, label_x], [y, label_y], color='gray', linestyle='--', linewidth=0.5)

    ax.text(label_x, label_y, gene, fontsize=8, ha='center', va='center')

# Set diagram
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Up', markersize=8, markerfacecolor='#fa7f6f', alpha=0.7),
    # Line2D([0], [0], marker='o', color='w', label='Down', markersize=8, markerfacecolor='#82b0d2', alpha=0.7),
    Line2D([0], [0], marker='o', color='w', label='Common', markersize=8, markerfacecolor='black', alpha=1)
]
ax.legend(handles=legend_elements, title='', bbox_to_anchor=(1.05, 1), loc='upper left')

# plt.axhline(y=2, color='black', linestyle='--', linewidth=2, alpha=0.7, zorder=11)
# Set the title and label
plt.xlabel('Comparison', fontsize=12)
plt.ylabel('logfoldchanges', fontsize=12)

# Add a box with a comparative name
width = 0.7  # width
height = 1.9  # high
x_offset = -width/2  # Adjust the X offset to maintain the center
y_offset = 0  # Adjust Y offset to keep in the middle
cm=[module_color[area], '#89bd89']
for i, comp in enumerate(comparisons):
    rect = plt.Rectangle((i+x_offset, y_offset), width, height, fill=True, facecolor=cm[i], alpha=0.9, zorder=10)
    ax.add_patch(rect)
    ax.text(i, 1, comp, ha='center', va='center', fontweight='bold', color='black', zorder=11)

# Remove the X -axis tag
ax.set_xticklabels([])

# Adjust the range of the X -axis and leave space for the middle label
plt.xlim(-0.5, len(comparisons) - 0.5)

# 调整图的布局
plt.tight_layout()

# plt.savefig(gene_path + 'tp_tn_gene_deg.pdf', format='pdf')