5_ctx_th_gene_analysis

[62]:
import pandas as pd
import anndata
import scanpy as sc
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import warnings
from scipy.stats import pearsonr
import seaborn as sns
import os
from matplotlib.patches import Patch
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
warnings.filterwarnings('ignore')
[2]:
ctx_regions = ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl', 'AId', 'SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp',
               'MOs', 'VISal', 'VISl', 'VISp', 'VISpor', 'VISrl', 'VISam', 'VISpm', 'RSPd', 'RSPv', 'AUDp']
th_regions = ['VPL', 'VPM', 'PO', 'PoT', 'VPMpc', 'VPLpc', 'SPFp',
              'MG', 'PIL', 'PP', 'SGN', 'AD', 'AV', 'LD', 'LP', 'VAL', 'PF', 'CL',
              'SubG', 'LGv', 'IGL', 'POL', 'MD', 'IMD', 'CM', 'SMT',
              'SPA', 'VM', 'PCN', 'PVT', 'PT', 'RE', 'SPFm', 'Xi', 'RH', 'IAM', 'PR',
              'LH', 'MH', 'IAD', 'RT']
[3]:
adata_in = sc.read_h5ad('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/adata_processed.h5ad')
allen_region = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/allen_region.csv')
adata_in.obs['region'] = allen_region['region'].values
# add cell type
meta = pd.read_csv('/mnt/Data16Tc/home/haichao/code/SpaCon/Data/N_20231213_zxw/mouse_3/cell_metadata_with_cluster_annotation.csv')
meta = meta.set_index('cell_label')
meta = meta.loc[adata_in.obs.index.to_list()]
adata_in.obs['cell_type'] = meta['class'].to_list()
adata_in.obs
[3]:
brain_section_label x y z x_ccf y_ccf z_ccf region cell_type
cell_label
198904341065180396762707397604803217407 Zhuang-ABCA-3.023 49.206853 44.877634 12.168155 4.920685 4.487763 1.216815 SSs1 33 Vascular
252199681526991424029643077826220097990 Zhuang-ABCA-3.023 48.973992 44.813761 12.179006 4.897399 4.481376 1.217901 SSs1 33 Vascular
277720971126854564514249564750701518375 Zhuang-ABCA-3.023 48.791066 44.577722 12.192707 4.879107 4.457772 1.219271 SSs1 33 Vascular
31551867344111790264292067056219852271 Zhuang-ABCA-3.023 48.830489 44.426120 12.195078 4.883049 4.442612 1.219508 SSs1 33 Vascular
131102494428104399865219008178262036485 Zhuang-ABCA-3.023 48.308843 43.028156 12.267879 4.830884 4.302816 1.226788 SSs1 34 Immune
... ... ... ... ... ... ... ... ... ...
318102106429791409781741726367984532777 Zhuang-ABCA-3.009 131.090716 69.334275 41.436743 13.109072 6.933427 4.143674 MDRNd 30 Astro-Epen
35262847161560382172299767067854387528 Zhuang-ABCA-3.009 131.216032 69.494070 41.351034 13.121603 6.949407 4.135103 MDRNd 33 Vascular
75415866509570969932943497000463821106 Zhuang-ABCA-3.009 131.415152 70.764504 40.800403 13.141515 7.076450 4.080040 sctd 24 MY Glut
12350978322417280063239916106423065862 Zhuang-ABCA-3.009 131.646167 71.182557 40.595995 13.164617 7.118256 4.059599 sctd 24 MY Glut
327554758863546024460748891922509519354 Zhuang-ABCA-3.009 131.658892 71.414675 40.501356 13.165889 7.141468 4.050136 sctd 24 MY Glut

1566842 rows × 9 columns

[4]:
adata_th = adata_in[adata_in.obs['region'].isin(th_regions)]
adata_th = adata_th[adata_th.obs['cell_type'].str.contains('Glut')]

adata_ctx = adata_in[adata_in.obs['region'].str.startswith(tuple(ctx_regions))]
adata_ctx = adata_ctx[adata_ctx.obs['cell_type'].str.contains('Glut')]
adata_ctx.obs
[4]:
brain_section_label x y z x_ccf y_ccf z_ccf region cell_type
cell_label
207252950882079766503645227815929952400 Zhuang-ABCA-3.023 50.597984 41.393473 12.239274 5.059798 4.139347 1.223927 SSs2/3 01 IT-ET Glut
311894855078226645952213910865897976013 Zhuang-ABCA-3.023 50.420950 41.271525 12.251970 5.042095 4.127152 1.225197 SSs1 01 IT-ET Glut
125208524519663791324346814779771999476 Zhuang-ABCA-3.023 50.959183 43.276307 12.158869 5.095918 4.327631 1.215887 SSs2/3 01 IT-ET Glut
12594778395225515056477813574460470379 Zhuang-ABCA-3.023 49.836112 42.209685 12.238386 4.983611 4.220968 1.223839 SSs1 01 IT-ET Glut
148621603142722639702356861951538418099 Zhuang-ABCA-3.023 51.023440 42.722536 12.174236 5.102344 4.272254 1.217424 SSs2/3 01 IT-ET Glut
... ... ... ... ... ... ... ... ... ...
109907444386227227105910475953076858179 Zhuang-ABCA-3.009 99.982600 10.826473 38.394998 9.998260 1.082647 3.839500 VISp2/3 01 IT-ET Glut
125697108736839342189105218784221779435 Zhuang-ABCA-3.009 99.842186 10.515567 38.416597 9.984219 1.051557 3.841660 VISp2/3 01 IT-ET Glut
174685434506973661349248592409864249537 Zhuang-ABCA-3.009 100.204030 10.343042 38.424450 10.020403 1.034304 3.842445 VISp1 01 IT-ET Glut
213147423751373953623065876460723241943 Zhuang-ABCA-3.009 99.804367 10.276117 38.433453 9.980437 1.027612 3.843345 VISp1 01 IT-ET Glut
254117510291590640559120967821550650493 Zhuang-ABCA-3.009 99.842682 10.830816 38.395274 9.984268 1.083082 3.839527 VISp2/3 01 IT-ET Glut

122802 rows × 9 columns

[5]:
adata = anndata.concat([adata_ctx, adata_th])
adata.obs
[5]:
brain_section_label x y z x_ccf y_ccf z_ccf region cell_type
cell_label
207252950882079766503645227815929952400 Zhuang-ABCA-3.023 50.597984 41.393473 12.239274 5.059798 4.139347 1.223927 SSs2/3 01 IT-ET Glut
311894855078226645952213910865897976013 Zhuang-ABCA-3.023 50.420950 41.271525 12.251970 5.042095 4.127152 1.225197 SSs1 01 IT-ET Glut
125208524519663791324346814779771999476 Zhuang-ABCA-3.023 50.959183 43.276307 12.158869 5.095918 4.327631 1.215887 SSs2/3 01 IT-ET Glut
12594778395225515056477813574460470379 Zhuang-ABCA-3.023 49.836112 42.209685 12.238386 4.983611 4.220968 1.223839 SSs1 01 IT-ET Glut
148621603142722639702356861951538418099 Zhuang-ABCA-3.023 51.023440 42.722536 12.174236 5.102344 4.272254 1.217424 SSs2/3 01 IT-ET Glut
... ... ... ... ... ... ... ... ... ...
137125155786416424728071422508382942054 Zhuang-ABCA-3.009 86.480430 34.094203 37.248728 8.648043 3.409420 3.724873 SGN 19 MB Glut
186321231466624970722021094909324401885 Zhuang-ABCA-3.009 86.443977 35.015822 37.291043 8.644398 3.501582 3.729104 POL 19 MB Glut
262284519603134366801326445274337827961 Zhuang-ABCA-3.009 86.388989 32.866518 37.212756 8.638899 3.286652 3.721276 SGN 19 MB Glut
33608739852097367198466784523454261485 Zhuang-ABCA-3.009 86.904204 34.752896 37.268567 8.690420 3.475290 3.726857 POL 19 MB Glut
303385550649730012456703013017610179856 Zhuang-ABCA-3.009 88.389510 37.024369 37.400811 8.838951 3.702437 3.740081 POL 19 MB Glut

133403 rows × 9 columns

[6]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata
[6]:
AnnData object with n_obs × n_vars = 133403 × 1122
    obs: 'brain_section_label', 'x', 'y', 'z', 'x_ccf', 'y_ccf', 'z_ccf', 'region', 'cell_type'
    uns: 'log1p'

ctx layer56 th correlation

L5/L6 corr

[8]:
layer = '56'
th_ctx_coorelation = pd.DataFrame(index=ctx_regions, columns=th_regions, dtype=float)
for ctx_region in tqdm(ctx_regions):
    adata_ctx_area = adata[adata.obs['region'].str.startswith((ctx_region+layer[0], ctx_region+layer[-1]))]
    for th_region in th_regions:
        adata_th_area = adata[adata.obs['region']==th_region]
        if adata_th_area.shape[0] == 0:
            continue
        ctx_gene = np.mean(adata_ctx_area.X.A, axis=0)
        th_gene = np.mean(adata_th_area.X.A, axis=0)
        corr, p_value = pearsonr(ctx_gene, th_gene)
        th_ctx_coorelation.loc[ctx_region, th_region] = corr
# th_ctx_coorelation
100%|██████████| 25/25 [00:13<00:00,  1.84it/s]
[9]:
col_list = th_ctx_coorelation.columns[~th_ctx_coorelation.isna().any()].tolist()
th_ctx_coorelation = th_ctx_coorelation[col_list]
# th_ctx_coorelation
[10]:
tempdf = pd.read_excel('./data/layer56_to_th_connection_strength.xlsx', index_col=0, sheet_name=None)
Rbp4_L5 = tempdf['Sheet1']
Ntsr1_Syt6_L6 = tempdf['Sheet2']

Rbp4_L5 = Rbp4_L5.replace('TN', -10)
Rbp4_L5.loc['SSs'] = Rbp4_L5.loc[['SSs-1', 'SSs-2']].mean(axis=0)
Rbp4_L5.loc['MOs'] = Rbp4_L5.loc[['MOs-1', 'MOs-2']].mean(axis=0)
Rbp4_L5 = Rbp4_L5.drop(['SSs-1', 'SSs-2', 'MOs-1', 'MOs-2'], axis=0)
Rbp4_L5 = Rbp4_L5.loc[:, col_list]
# ctx_regions.remove("AId")
Rbp4_L5 = Rbp4_L5.loc[ctx_regions]

Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.replace('TN', -10)
Ntsr1_Syt6_L6.loc['SSs'] = Ntsr1_Syt6_L6.loc[['SSs-1', 'SSs-2']].mean(axis=0)
Ntsr1_Syt6_L6.loc['MOs'] = Ntsr1_Syt6_L6.loc[['MOs-1', 'MOs-2']].mean(axis=0)
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.drop(['SSs-1', 'SSs-2', 'MOs-1', 'MOs-2'], axis=0)
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.loc[:, col_list]
Ntsr1_Syt6_L6 = Ntsr1_Syt6_L6.loc[ctx_regions]

# Ntsr1_Syt6_L6
[266]:
# sns.heatmap((Rbp4_L5+Ntsr1_Syt6_L6)/2)
# Define color mapping
cluster_labels = ['Prefrontal']*6 + ['Lateral']*1 + ['Somatomotor']*8 + ['Visual']*5 + ['Medial']*4 + ['Aud']

cluster_colors = {'Prefrontal': '#ff0000', 'Lateral': '#bea149', 'Somatomotor': '#f9922b',
                  'Visual': '#90bff9', 'Medial': '#5252a9', 'Aud': '#7c429b'}
# Create Label Color
row_colors = pd.Series(cluster_labels).map(cluster_colors)

fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap((Rbp4_L5+Ntsr1_Syt6_L6)/2, cmap='coolwarm', ax=ax, yticklabels=False)
# Add cluster segmentation line
prev_cluster = cluster_labels[0]
for idx, label in enumerate(cluster_labels):
    if label != prev_cluster:
        ax.axhline(idx, color='black', linewidth=3)
        prev_cluster = label
# Add row label color
idx=0
for id, label in zip(Ntsr1_Syt6_L6.index, cluster_labels):
    ax.text(-0.5, idx + 0.5, id, color=cluster_colors[label], va='center', ha='right')
    idx = idx+1
ax.set_ylabel('')
# Add legends
legend_handles = [Patch(color=color, label=cluster) for cluster, color in cluster_colors.items()]
ax.legend(handles=legend_handles, bbox_to_anchor=(-0.3, 0.6), loc='upper left', borderaxespad=0.)
ax.set_title('L5(Rbp4-Cre_KL100) / L6(Ntsr1+Syt6) Average Projection Volume', fontweight='bold')
plt.tight_layout()
# plt.savefig('./L56_mean_conn.pdf', format='pdf')
_images/5_ctx_th_gene_analysis_12_0.png
[184]:
connect = (Rbp4_L5+Ntsr1_Syt6_L6)/2
connect
[184]:
VPL VPM PO PoT VPMpc VPLpc SPFp PIL PP SGN ... PT RE SPFm RH IAM PR LH MH IAD RT
anchor source
ACAd -1.090171 -6.342319 -5.426504 -5.605730 -6.008217 -10.000000 -5.566647 -5.895726 -2.527515 -10.000000 ... -2.727347 -1.183582 -1.670217 -1.755893 -1.809540 -1.685012 -1.448781 -6.879172 -1.144436 -0.769605
ACAv -1.423084 -10.000000 -10.000000 -5.608990 -1.929727 -10.000000 -1.132599 -5.927264 -2.399007 -10.000000 ... -1.873386 -1.182032 -1.403915 -1.655801 -1.468856 -1.057059 -5.430204 -10.000000 -0.900422 -0.413475
PL -10.000000 -10.000000 -10.000000 -10.000000 -5.992111 -10.000000 -5.882870 -5.961731 -10.000000 -10.000000 ... -1.389215 -0.970923 -5.878069 -1.704030 -1.890465 -1.035770 -1.639700 -10.000000 -1.569406 -0.807789
ILA -6.325188 -10.000000 -10.000000 -10.000000 -6.017124 -10.000000 -5.751314 -2.520655 -6.144408 -6.509481 ... -0.546665 -0.545810 -1.654686 -1.603711 -1.813752 -0.737798 -1.087582 -1.804623 -1.274479 -0.642914
ORBl -6.224208 -6.003722 -5.437890 -10.000000 -1.905725 -10.000000 -5.918426 -10.000000 -10.000000 -10.000000 ... -2.467256 -1.554422 -6.000569 -1.589246 -2.697192 -5.901769 -1.834357 -10.000000 -2.160936 -0.693127
ORBvl -5.882161 -10.000000 -10.000000 -6.375555 -2.185265 -10.000000 -5.950850 -6.211784 -10.000000 -6.643945 ... -0.828830 -0.776871 -5.913809 -1.420968 -1.698088 -0.956903 -1.414943 -6.478384 -5.357311 -0.545583
AId -10.000000 -5.641845 -5.325552 -6.050576 -1.452111 -6.043647 -6.252712 -10.000000 -10.000000 -10.000000 ... -2.700659 -5.744953 -5.990823 -2.209329 -6.914940 -5.937606 -3.096440 -10.000000 -10.000000 -5.233617
SSs -0.533973 -0.245893 -0.284354 -1.007490 -1.754370 -5.729620 -3.632334 -5.785201 -7.888268 -5.437407 ... -10.000000 -4.436421 -4.193741 -4.288500 -8.377699 -8.356392 -6.792026 -10.000000 -10.000000 -0.683757
SSp-bfd -5.483834 -0.286921 -0.440568 -1.150783 -6.616602 -10.000000 -2.125942 -5.626288 -5.918936 -1.722711 ... -10.000000 -6.576067 -3.461558 -3.051209 -10.000000 -6.952895 -10.000000 -10.000000 -3.256035 -0.630895
SSp-ll -0.424806 -0.984444 -0.714249 -1.830208 -6.880859 -10.000000 -5.680297 -5.937618 -10.000000 -2.661697 ... -10.000000 -2.811056 -6.272099 -3.059279 -10.000000 -10.000000 -2.820684 -10.000000 -3.891500 -0.704974
SSp-ul -0.512134 -4.997219 -0.126221 -1.606216 -1.955906 -2.141609 -5.729040 -10.000000 -10.000000 -10.000000 ... -10.000000 -6.455041 -10.000000 -3.065086 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -0.765330
SSp-n -0.563616 -0.029009 -0.229639 -1.545879 -2.328005 -2.089110 -5.543640 -10.000000 -10.000000 -10.000000 ... -10.754420 -6.051322 -6.353382 -2.713103 -10.000000 -6.398222 -10.000000 -10.000000 -10.000000 -0.544709
SSp-m -0.538166 0.065971 -0.230394 -5.600657 -1.676116 -6.019576 -10.000000 -6.087167 -10.000000 -10.000000 ... -10.000000 -6.391604 -10.754420 -6.380348 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -0.589138
MOp -0.129938 -5.183974 -0.205113 -5.752578 -6.204141 -6.578336 -10.000000 -10.000000 -10.000000 -2.334184 ... -10.000000 -6.272292 -6.135587 -6.590445 -10.000000 -10.000000 -10.000000 -10.000000 -10.000000 -0.459391
MOs -0.818288 -0.664884 -0.153652 -3.602665 -3.845986 -4.315142 -3.629701 -6.234426 -8.224540 -6.119077 ... -10.000000 -1.882719 -8.014224 -1.730154 -4.054899 -7.984730 -6.475172 -10.000000 -4.038434 -2.843340
VISal -1.034583 -5.354679 -0.599560 -5.774497 -6.619113 -10.000000 -2.364640 -1.993791 -5.664126 -1.134140 ... -10.000000 -6.173552 -6.443739 -2.782023 -6.598952 -2.466052 -10.000000 -10.000000 -10.000000 -0.740894
VISl -5.777306 -5.321867 -1.004276 -5.966014 -10.000000 -10.000000 -10.000000 -5.907370 -5.862698 -5.799969 ... -10.000000 -6.257293 -10.000000 -10.000000 -10.000000 -10.000000 -6.888282 -10.000000 -6.836267 -0.577579
VISp -1.422962 -5.804932 -5.833879 -6.346772 -10.754420 -10.000000 -10.000000 -6.621287 -10.000000 -1.955335 ... -10.000000 -6.214321 -10.000000 -6.558477 -6.892084 -6.579267 -10.000000 -10.000000 -10.000000 -0.617637
VISpor -2.192462 -2.376547 -5.726122 -2.671269 -7.021998 -7.002497 -10.000000 -2.298210 -6.092443 -1.422456 ... -10.000000 -10.000000 -10.000000 -10.000000 -10.754420 -6.612503 -6.340301 -10.000000 -10.000000 -1.387643
VISrl -0.791890 -0.535124 -0.339014 -1.277799 -10.000000 -10.000000 -5.761978 -2.823176 -6.031412 -1.422637 ... -10.000000 -2.532333 -6.264313 -6.090899 -6.429685 -6.377060 -10.000000 -10.000000 -6.126003 -0.644216
VISam -0.527118 -5.517881 -0.478789 -1.373619 -10.000000 -10.000000 -2.186025 -5.816067 -5.942121 -1.356229 ... -6.514253 -1.600301 -6.087990 -2.147693 -3.042415 -1.905533 -2.012084 -6.859115 -1.975844 -0.398044
VISpm -0.916817 -5.587206 -1.151516 -10.000000 -10.000000 -10.000000 -6.169564 -5.918708 -10.000000 -6.051864 ... -6.427577 -1.872311 -6.168127 -2.308679 -4.021444 -2.168330 -10.000000 -10.000000 -5.965749 -0.397564
RSPd -5.670737 -10.000000 -5.986022 -10.000000 -10.000000 -10.000000 -5.812420 -5.987416 -6.104530 -10.000000 ... -10.000000 -6.027620 -6.161968 -2.701966 -5.965488 -6.153988 -5.932982 -10.000000 -6.228290 -0.731420
RSPv -1.311758 -10.000000 -10.000000 -10.000000 -7.055145 -10.000000 -10.000000 -2.714168 -3.050242 -10.000000 ... -10.000000 -2.677701 -2.675464 -6.199900 -5.892359 -10.000000 -6.172558 -10.000000 -5.758051 -0.739684
AUDp -5.334003 -5.230048 -5.778810 -1.208616 -10.000000 -10.000000 -2.475106 -1.604605 -5.675994 -0.896259 ... -10.000000 -2.779426 -6.144770 -10.000000 -10.000000 -6.260222 -6.894197 -10.000000 -10.000000 -0.885349

25 rows × 37 columns

[206]:
data = connect
# Sort each line and get the name after sorting
sorted_columns = data.apply(lambda x: x.sort_values(ascending=False).index.tolist(), axis=1)

# Create a new DataFrame, which contains sorted data
sorted_data = pd.DataFrame(index=data.index, columns=data.columns)
for i, row in enumerate(sorted_columns):
    sorted_data.iloc[i] = data.loc[data.index[i], row].values

# Make sure all the values ​​in sorted_data are numerical types
sorted_data = sorted_data.astype(float)

# Create a DataFrame with the same shape as sorted data, which is used to store the name
annotation_data = pd.DataFrame(sorted_columns.tolist(), index=data.index, columns=data.columns)

# Draw a hot picture
plt.figure(figsize=(20, 10))
ax = sns.heatmap(sorted_data, annot=annotation_data, fmt='', cmap='coolwarm', cbar_kws={'label': 'Value'}, annot_kws={'color': 'black'})

# Add white border
from matplotlib.patches import Rectangle
for i, row in enumerate(sorted_data.values):
    for j, value in enumerate(row):
        if value > -2 or value <= -10:
            color = '#6bad6b' if value > -2 else '#5d5d5d'
            rect = Rectangle((j, i), 1, 1, fill=False, edgecolor=color, lw=0.6)
            ax.add_patch(rect)

# Adjust the layout
plt.tight_layout()

# plt.savefig('./L56_tp_tn_conn.pdf', format='pdf')
_images/5_ctx_th_gene_analysis_14_0.png

gene anasys

25 ctx area

[62]:
ctx_region_order = ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl', 'AId', 'SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp', 'MOs', 'VISal', 'VISl', 'VISp', 'VISpor', 'VISrl', 'VISam', 'VISpm', 'RSPd', 'RSPv', 'AUDp']
[ ]:
th_tp = {}
th_tn = {}
for c in ctx_region_order:
    tmp = connect.loc[c]
    th_tp[c] = tmp[tmp>-2].index.tolist()
    th_tn[c] = tmp[tmp==-10].index.tolist()
[230]:
ctx_gene_exp_list = []
tp_gene_exp_list = []
tn_gene_exp_list = []
gene_dict = {}

for area in tqdm(ctx_region_order):
    adata.obs['deg'] = 'nan'
    adata.obs.loc[adata.obs['region'].str.startswith((area+'5', area+'6')), 'deg'] = area
    adata.obs.loc[adata.obs['region'].isin(th_tp[area]), 'deg'] = 'connected'
    adata.obs.loc[adata.obs['region'].isin(th_tn[area]), 'deg'] = 'unconnected'
    adata_sel = adata[adata.obs['deg'] != 'nan']

    # ctx vs unconnected
    sc.tl.rank_genes_groups(adata_sel, 'deg', groups=[area], reference='unconnected', method='wilcoxon')
    a_vs_c = sc.get.rank_genes_groups_df(adata_sel, group=area)

    # connected vs unconnected
    sc.tl.rank_genes_groups(adata_sel, 'deg', groups=['connected'], reference='unconnected', method='wilcoxon')
    b_vs_c = sc.get.rank_genes_groups_df(adata_sel, group='connected')

    # Find out the gene of the expression closer to ctx in connected
    similar_genes = []
    for gene in b_vs_c['names']:
        if gene in a_vs_c['names'].values:
            a_logfc = a_vs_c[a_vs_c['names'] == gene]['logfoldchanges'].values[0]
            b_logfc = b_vs_c[b_vs_c['names'] == gene]['logfoldchanges'].values[0]
            if np.sign(a_logfc) == np.sign(b_logfc) and a_logfc > 2 and b_logfc > 2:
                similar_genes.append(gene)
    gene_dict[area] = similar_genes
    if len(similar_genes) == 0:
        print(area)
    adata_sel_ctx = adata_sel[adata_sel.obs['deg'] == area]
    adata_sel_tp = adata_sel[adata_sel.obs['deg'] == 'connected']
    adata_sel_tn = adata_sel[adata_sel.obs['deg'] == 'unconnected']

    # Calculate the average expression
    ctx_mean = adata_sel_ctx[:, similar_genes].X.A.mean(axis=0)
    tp_mean = adata_sel_tp[:, similar_genes].X.A.mean(axis=0)
    tn_mean = adata_sel_tn[:, similar_genes].X.A.mean(axis=0)

    # Sum of calculations
    total_mean = ctx_mean + tp_mean + tn_mean

    # Calculation ratio
    ctx_proportion = ctx_mean / total_mean
    tp_proportion = tp_mean / total_mean
    tn_proportion = tn_mean / total_mean

    ctx_gene_exp_list.append(ctx_proportion)
    tp_gene_exp_list.append(tp_proportion)
    tn_gene_exp_list.append(tn_proportion)
100%|██████████| 25/25 [00:35<00:00,  1.42s/it]
[ ]:
df_tp_vs_tn = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in gene_dict.items()]))
df_tp_vs_tn
[212]:
def merge_lists(original_list, group_sizes):
    merged_list = []
    start = 0
    for size in group_sizes:
        end = start + size
        group = original_list[start:end]
        if group:
            merged_group = np.concatenate(group, axis=0)
            merged_list.append(merged_group)
        start = end
    return merged_list

# Definition group size
group_sizes = [6, 1, 8, 5, 4, 1]

# Merge list
merged_tn = merge_lists(tn_gene_exp_list, group_sizes)
merged_tp = merge_lists(tp_gene_exp_list, group_sizes)
merged_ctx = merge_lists(ctx_gene_exp_list, group_sizes)
# len(merged_tn)
categories = ['Prefrontal', 'Lateral', 'Somatomotor', 'Visual', 'Medial', 'Aud']
experiments = ['connected', 'unconnected', 'cortex']
experiments_data = [merged_tp, merged_tn, merged_ctx]

data = []
for exp_name, experiment in zip(experiments, experiments_data):
    for category, values in zip(categories, experiment):
        for value in values:
            data.append({
                'Experiment': exp_name,
                'Category': category,
                'Value': value
            })

df = pd.DataFrame(data)

custom_palette = {
    'cortex': '#478ccf',
    'connected': '#6bad6b',
    'unconnected': '#8d8d8d'
}
custom_markers = {
    # 'cortex': 'D',
    'connected': 'o',
    'unconnected': 's'
}
plt.figure(figsize=(10,5))
sns.lineplot(data=df, x='Category', y='Value', hue='Experiment', marker='o', palette=custom_palette, dashes=False, linewidth=0.5, markersize=6)

plt.xlabel('ctx_module')
plt.ylabel('percentage')
plt.legend(bbox_to_anchor=(0.3, 1))
plt.tight_layout()

# plt.savefig('./stereo_L56_Moudel_tp_tn_compare_line_gene.pdf', format='pdf')
[ ]:
ctx_gene_exp_percent = [i.mean() for i in ctx_gene_exp_list]
tp_gene_exp_percent = [i.mean() for i in tp_gene_exp_list]
tn_gene_exp_percent = [i.mean() for i in tn_gene_exp_list]
[232]:
feature = ctx_region_order
value1 = ctx_gene_exp_percent
value2 = tp_gene_exp_percent
value3 = tn_gene_exp_percent

N = len(ctx_gene_exp_list)
# Set the angle of the radar diagram to cut a round surface in a flat separation
angles=np.linspace(0, 2*np.pi, N, endpoint=False)
# In order to close the radar map, the following steps need
value1=np.concatenate((value1,[value1[0]]))
value2=np.concatenate((value2,[value2[0]]))
value3=np.concatenate((value3,[value3[0]]))
angles=np.concatenate((angles,[angles[0]]))
feature = np.concatenate((feature, [feature[0]]))
fig=plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, polar=True)
# Draw a line map
ax.plot(angles, value1, 'o-', markersize=2, linewidth=0.5, label = 'cortex', color='#478ccf')
# Fill color
ax.fill(angles, value1, alpha=0.2, color='#478ccf')
# Draw the second folding drawing
ax.plot(angles, value2, 'o-', markersize=2, linewidth=0.5, label = 'connected_th', color='#6bad6b')
ax.fill(angles, value2, alpha=0.2, color='#6bad6b')
# Draw a line map
ax.plot(angles, value3, 'o-', markersize=2, linewidth=0.5, label = 'unconnected_th', color='#8d8d8d')
# Fill color
ax.fill(angles, value3, alpha=0.2, color='#8d8d8d')


# Add the tags of each feature
# ax.set_thetagrids(angles * 180/np.pi, feature)
ax.set_thetagrids(angles * 180/np.pi, feature)  # FRAC parameter setting label distance MAX ( *TP, *TN)+0.05 to depart the center
ax.tick_params(axis='x', pad=6)  # Set the distance between the label and the axis
# Set the range of radar chart
# ax.set_ylim(0.25, 0.45)
# Add title
# plt.title(f'Layer5/6_CTX_in & TH_in', fontweight='bold')
# Add grid line
# ax.grid(True)
ax.grid(True, linewidth=0.25, alpha=0.7)

# ax.set_rgrids([0, 0.2, 0.4, 0.6, 0.8], labels=['', '0.2', '0.4', '0.6', ''])
# Set diagram
plt.legend(loc='center left', bbox_to_anchor=(1, 1))
plt.tight_layout()
# plt.savefig('./stereo_l56_module_ave_exp_radar_plot_gene.pdf', format='pdf')
_images/5_ctx_th_gene_analysis_23_0.png

6 module

[63]:
ctx_module = {'Prefrontal': ['ACAd', 'ACAv', 'PL', 'ILA', 'ORBl', 'ORBvl'], 'Lateral': ['AId'], 'Somatomotor' :['SSs', 'SSp-bfd', 'SSp-ll', 'SSp-ul', 'SSp-n', 'SSp-m', 'MOp', 'MOs'],
              'Visual': ['VISal', 'VISl', 'VISp', 'VISpor', 'VISrl'], 'Medial': ['VISam', 'VISpm', 'RSPd', 'RSPv'], 'Aud': ['AUDp']}
th_tp_module = {}
th_tn_module = {}
# Iterate through each context region
for i, c in enumerate(ctx_module.keys()):
    # Create a temporary DataFrame for each region
    if layer == '5':
        tmp = Rbp4_L5.loc[ctx_module[c]].values.flatten()
    elif layer == '6':
        tmp = Ntsr1_Syt6_L6.loc[ctx_module[c]].values.flatten()
    elif layer == '56':
        connect = (Rbp4_L5+Ntsr1_Syt6_L6)/2
        tmp = connect.loc[ctx_module[c]]

    column_means = tmp.mean()
    # Find the index with the largest five columns on average
    top_5_columns = column_means.nlargest(5).index.tolist()
    th_tp_module[c] = top_5_columns

    bottom_5_columns = column_means.nsmallest(5).index.tolist()
    th_tn_module[c] = bottom_5_columns
th_tp_module
[63]:
{'Prefrontal': ['MD', 'RT', 'VM', 'VAL', 'RE'],
 'Lateral': ['MD', 'PF', 'VPMpc', 'VAL', 'SPA'],
 'Somatomotor': ['PO', 'RT', 'VAL', 'VPL', 'PF'],
 'Visual': ['LP', 'LD', 'RT', 'IGL', 'LGv'],
 'Medial': ['LD', 'LP', 'RT', 'VAL', 'LGv'],
 'Aud': ['RT', 'SGN', 'LP', 'POL', 'PoT']}
[64]:
module_color={'Prefrontal': '#ff0000',
'Lateral': '#ffff66',
 'Somatomotor': '#f9922b',
 'Visual': '#90bff9',
 'Medial': '#5252a9',
 'Aud': '#7c429b'}
[71]:
adata.obs['deg'] = 'nan'
area = 'Visual'
area5 = [i+'5' for i in ctx_module[area]]
area6 = [i+'6' for i in ctx_module[area]]
area56 = area5 + area6
adata.obs.loc[adata.obs['region'].str.startswith(tuple(area56)), 'deg'] = area
# adata.obs.loc[adata.obs['region'].str.startswith(tuple(ctx_module[area])), 'deg'] = area
adata.obs.loc[adata.obs['region'].isin(th_tp_module[area]), 'deg'] = 'connected'
adata.obs.loc[adata.obs['region'].isin(th_tn_module[area]), 'deg'] = 'unconnected'
adata.obs
[71]:
brain_section_label x y z x_ccf y_ccf z_ccf region cell_type deg
cell_label
207252950882079766503645227815929952400 Zhuang-ABCA-3.023 50.597984 41.393473 12.239274 5.059798 4.139347 1.223927 SSs2/3 01 IT-ET Glut nan
311894855078226645952213910865897976013 Zhuang-ABCA-3.023 50.420950 41.271525 12.251970 5.042095 4.127152 1.225197 SSs1 01 IT-ET Glut nan
125208524519663791324346814779771999476 Zhuang-ABCA-3.023 50.959183 43.276307 12.158869 5.095918 4.327631 1.215887 SSs2/3 01 IT-ET Glut nan
12594778395225515056477813574460470379 Zhuang-ABCA-3.023 49.836112 42.209685 12.238386 4.983611 4.220968 1.223839 SSs1 01 IT-ET Glut nan
148621603142722639702356861951538418099 Zhuang-ABCA-3.023 51.023440 42.722536 12.174236 5.102344 4.272254 1.217424 SSs2/3 01 IT-ET Glut nan
... ... ... ... ... ... ... ... ... ... ...
137125155786416424728071422508382942054 Zhuang-ABCA-3.009 86.480430 34.094203 37.248728 8.648043 3.409420 3.724873 SGN 19 MB Glut nan
186321231466624970722021094909324401885 Zhuang-ABCA-3.009 86.443977 35.015822 37.291043 8.644398 3.501582 3.729104 POL 19 MB Glut nan
262284519603134366801326445274337827961 Zhuang-ABCA-3.009 86.388989 32.866518 37.212756 8.638899 3.286652 3.721276 SGN 19 MB Glut nan
33608739852097367198466784523454261485 Zhuang-ABCA-3.009 86.904204 34.752896 37.268567 8.690420 3.475290 3.726857 POL 19 MB Glut nan
303385550649730012456703013017610179856 Zhuang-ABCA-3.009 88.389510 37.024369 37.400811 8.838951 3.702437 3.740081 POL 19 MB Glut nan

133403 rows × 10 columns

[72]:
adata_sel = adata[adata.obs['deg'] != 'nan']
adata_sel.obs
[72]:
brain_section_label x y z x_ccf y_ccf z_ccf region cell_type deg
cell_label
188052700459908704250468015984246340174 Zhuang-ABCA-3.017 78.511319 17.025034 23.450480 7.851132 1.702503 2.345048 VISrl5 01 IT-ET Glut Visual
244536282695664703731093924351585311804 Zhuang-ABCA-3.017 78.839495 17.453570 23.457475 7.883949 1.745357 2.345747 VISrl5 01 IT-ET Glut Visual
207737713088111645489974231015219723398 Zhuang-ABCA-3.017 78.672275 18.071984 23.473123 7.867227 1.807198 2.347312 VISrl6a 01 IT-ET Glut Visual
282030992965481098907637606735824510623 Zhuang-ABCA-3.017 78.575583 19.064710 23.497188 7.857558 1.906471 2.349719 VISal6a 02 NP-CT-L6b Glut Visual
97661006042925800380741778764498794751 Zhuang-ABCA-3.017 78.796093 19.073735 23.494524 7.879609 1.907373 2.349452 VISal6a 02 NP-CT-L6b Glut Visual
... ... ... ... ... ... ... ... ... ... ...
156947303650960194776993747274328011056 Zhuang-ABCA-3.009 84.812512 30.863025 37.253313 8.481251 3.086302 3.725331 LP 18 TH Glut connected
172268668275251998778345610664715788667 Zhuang-ABCA-3.009 84.008563 33.037907 37.280443 8.400856 3.303791 3.728044 LP 18 TH Glut connected
248145458296088509137955235524055794146 Zhuang-ABCA-3.009 84.917639 32.177712 37.243865 8.491764 3.217771 3.724386 LP 18 TH Glut connected
27966221641202106948948402442419133181 Zhuang-ABCA-3.009 84.392370 32.908652 37.267710 8.439237 3.290865 3.726771 LP 18 TH Glut connected
7071071614544513006737101022316271447 Zhuang-ABCA-3.009 83.936955 31.485775 37.276005 8.393695 3.148577 3.727601 LP 18 TH Glut connected

9473 rows × 10 columns

[75]:
sc.tl.rank_genes_groups(adata_sel, 'deg', groups=[area], reference='unconnected', method='wilcoxon')
a_vs_c = sc.get.rank_genes_groups_df(adata_sel, group=area)

sc.tl.rank_genes_groups(adata_sel, 'deg', groups=['connected'], reference='unconnected', method='wilcoxon')
b_vs_c = sc.get.rank_genes_groups_df(adata_sel, group='connected')
[ ]:
similar_genes = []
for gene in b_vs_c['names']:
    if gene in a_vs_c['names'].values:
        a_logfc = a_vs_c[a_vs_c['names'] == gene]['logfoldchanges'].values[0]
        b_logfc = b_vs_c[b_vs_c['names'] == gene]['logfoldchanges'].values[0]
        # if np.sign(a_logfc) == np.sign(b_logfc) and abs(a_logfc - b_logfc) < 0.4 and a_logfc>2 and b_logfc>2:
        if np.sign(a_logfc) == np.sign(b_logfc) and a_logfc > 2 and b_logfc > 2:
            similar_genes.append(gene)

print(f"similar_genes: {len(similar_genes)}")
print(similar_genes)
[80]:
# Set data
df = adata_sel[:, similar_genes].to_df()
df['cluster'] = adata_sel.obs['deg'].values
df_mean = df.groupby('cluster').mean()
# For every column
column_sums = df_mean.sum()
# The value of each column is removed to the corresponding sum
df_percentage = df_mean.div(column_sums)
df_percentage = df_percentage.loc[[area, 'connected', 'unconnected']]

categories = similar_genes
colors = [module_color[area],'#89bd89', '#bdbdbd']
labels = [area, 'connected', 'unconnected']  # Tags corresponding to color
data = df_percentage.values

# Create a chart
fig, ax = plt.subplots(figsize=(9, 6))

# Draw a stack of stacks
x = np.arange(len(categories))
bottom = np.zeros(len(categories))
width=0.6
for i, row in enumerate(data):
    ax.bar(x, row, bottom=bottom, color=colors[i], edgecolor='white',width=width, label=labels[i])
    bottom += row

# Add full -coverage ribbon
for j in range(len(categories) - 1):
    left = x[j]+width/2
    right = x[j+1]-width/2
    left_data = data[:, j]
    right_data = data[:, j+1]

    left_cumsum = np.cumsum(left_data)
    right_cumsum = np.cumsum(right_data)

    # Add all ribbons from the bottom to the top
    for i in range(len(colors)):
        left_bottom = left_cumsum[i-1] if i > 0 else 0
        left_top = left_cumsum[i]
        right_bottom = right_cumsum[i-1] if i > 0 else 0
        right_top = right_cumsum[i]

        ax.fill([left, right, right, left],
                [left_bottom, right_bottom, right_top, left_top],
                color=colors[i], alpha=0.3)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))  # Figure placed on the outside of the upper right corner

# Set chart style
# ax.set_title('Full Coverage Ribbon Stacked Bar Chart')
ax.set_xlabel('gene')
ax.set_ylabel('Expression percentage')
plt.xticks(x, rotation=45)
ax.set_xticklabels(categories)

plt.tight_layout()
# plt.savefig(f'./Supp_fig/tp_tn_gene_deg_{area}.pdf', format='pdf')
_images/5_ctx_th_gene_analysis_31_0.png
[37]:
gene_path = f'./gene/{area}/'
os.makedirs(gene_path, exist_ok=True)
[82]:
a_vs_c['comparison'] = f'{area}_vs_TN'
b_vs_c['comparison'] = 'TP_vs_TN'
df = pd.concat([a_vs_c, b_vs_c])
df['regulation'] = df['logfoldchanges'].apply(lambda x: 'Up' if x > 0 else 'Down')
df = df[df['logfoldchanges'] > 2]

genes_to_label = similar_genes
comparisons = [f'{area}_vs_TN', 'TP_vs_TN']

# Create graphics and coordinate shafts
fig, ax = plt.subplots(figsize=(6, 6))

# Use Seaborn's Stripplot function
ax = sns.stripplot(x='comparison', y='logfoldchanges', hue='regulation', data=df,
                   jitter=0.3, size=4, palette={'Up': '#fa7f6f', 'Down': '#82b0d2'}, alpha=0.7, orient='v')
sns.stripplot(x='comparison', y='logfoldchanges', data=df[df['names'].isin(genes_to_label)],
              jitter=0.2, size=6, color='black', alpha=1, ax=ax, orient='v')

# Add notes and tags
label_x = 0.5  #The X coordinate of the label (in the middle of the two groups of scattered dots)
label_offset = 0.1  # Vertical spacing between labels
max_y = df['logfoldchanges'].max()
min_y = df['logfoldchanges'].min()
label_y_range = max_y - min_y
label_y_start = min_y + label_y_range * 0.1  # Start the label from the position of 10%of the Y axis

for i, gene in enumerate(genes_to_label):
    gene_data = df[df['names'] == gene]
    label_y = label_y_start + i * label_offset * label_y_range

    for j, comp in enumerate(comparisons):
        if not gene_data[gene_data['comparison'] == comp].empty:
            x = j
            y = gene_data[gene_data['comparison'] == comp]['logfoldchanges'].values[0]
            ax.plot([x, label_x], [y, label_y], color='gray', linestyle='--', linewidth=0.5)

    ax.text(label_x, label_y, gene, fontsize=8, ha='center', va='center')

# Set diagram
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Up', markersize=8, markerfacecolor='#fa7f6f', alpha=0.7),
    # Line2D([0], [0], marker='o', color='w', label='Down', markersize=8, markerfacecolor='#82b0d2', alpha=0.7),
    Line2D([0], [0], marker='o', color='w', label='Common', markersize=8, markerfacecolor='black', alpha=1)
]
ax.legend(handles=legend_elements, title='', bbox_to_anchor=(1.05, 1), loc='upper left')

# plt.axhline(y=2, color='black', linestyle='--', linewidth=2, alpha=0.7, zorder=11)
# Set the title and label
plt.xlabel('Comparison', fontsize=12)
plt.ylabel('logfoldchanges', fontsize=12)

# Add a box with a comparative name
width = 0.7  # width
height = 1.9  # high
x_offset = -width/2  # Adjust the X offset to maintain the center
y_offset = 0  # Adjust Y offset to keep in the middle
cm=[module_color[area], '#89bd89']
for i, comp in enumerate(comparisons):
    rect = plt.Rectangle((i+x_offset, y_offset), width, height, fill=True, facecolor=cm[i], alpha=0.9, zorder=10)
    ax.add_patch(rect)
    ax.text(i, 1, comp, ha='center', va='center', fontweight='bold', color='black', zorder=11)

# Remove the X -axis tag
ax.set_xticklabels([])

# Adjust the range of the X -axis and leave space for the middle label
plt.xlim(-0.5, len(comparisons) - 0.5)

# 调整图的布局
plt.tight_layout()

# plt.savefig(gene_path + 'tp_tn_gene_deg.pdf', format='pdf')
_images/5_ctx_th_gene_analysis_33_0.png