update metabolism pipline
This commit is contained in:
@@ -468,7 +468,7 @@ open (CMD,">","$commands_dir/kegg_Annotation.commands") or die $!;
|
||||
. " -i $outdir/Function_Annotation_dir/KEGG_Analysis "
|
||||
. " -o $outdir/Meta_mid_KEGG_analysis_dir ";
|
||||
my $cmd9 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/daixie_kegg_pathway_plot.pl -i $outdir/ -o $outdir/ ";
|
||||
my $cmd10 = " perl $Bin/scripts/plot_meta_KEGG_histogram_v2.pl -i $outdir/DEM_Analysis_dir -o $outdir/Function_Annotation_dir/KEGG_Analysis -map $mapfile ";#kegg分类柱状图和文件
|
||||
my $cmd10 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_meta_KEGG_histogram_v2.pl -i $outdir/DEM_Analysis_dir -o $outdir/Function_Annotation_dir/KEGG_Analysis -map $mapfile ";#kegg分类柱状图和文件
|
||||
my $cmd11 = " perl /share/nas2/fangbj/pipline/lianhefenxi/rna_pro/V1.2_nuohe/scripts/convertall.pl -i $outdir/Function_Annotation_dir/KEGG_Analysis ";#kegg分类柱状图和文件
|
||||
print CMD $cmd1,"\n";
|
||||
print CMD $cmd2,"\n";
|
||||
@@ -600,6 +600,49 @@ if ($vs_num >1 && $vs_num <=5) {
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 13.1 : Common metabolite list -----------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
my $common_id_file = "$outdir/venn_dir/common_id";
|
||||
my $common_status_file = "$outdir/venn_dir/common_id.status.txt";
|
||||
my $upset_file = "$outdir/venn_dir/upset_list.xls";
|
||||
my $venn_file = "$outdir/venn_dir/venn_list.xls";
|
||||
|
||||
if ($vs_num < 2) {
|
||||
`: > $common_id_file`;
|
||||
`echo SINGLE_COMPARISON > $common_status_file`;
|
||||
print STDERR "Skip Step 13.1 : less than 2 comparisons, common metabolite list is not applicable.\n";
|
||||
} else {
|
||||
|
||||
open (CMD,">","$commands_dir/common_id.commands") or die $!;
|
||||
|
||||
my $cmd1 = " if [ -s $upset_file ]; then "
|
||||
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $upset_file > $common_id_file ; "
|
||||
. " elif [ -s $venn_file ]; then "
|
||||
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $venn_file > $common_id_file ; "
|
||||
. " else "
|
||||
. " : > $common_id_file ; "
|
||||
. " fi ; "
|
||||
. " if [ -s $common_id_file ]; then "
|
||||
. " echo OK > $common_status_file ; "
|
||||
. " else "
|
||||
. " echo NO_COMMON_METABOLITES > $common_status_file ; "
|
||||
. " fi ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_id.commands","common_id.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14 : All Summary ------------------------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
@@ -607,7 +650,7 @@ print STDERR "\n\n\n------------------------------------------------------------
|
||||
|
||||
mkdir "$outdir/summary" unless (-d "$outdir/summary") ;
|
||||
open (CMD,">","$commands_dir/summary.commands") or die $!;
|
||||
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.2_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
|
||||
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
|
||||
. " -anno $outdir/all_samples/all_metabolite_mapping.txt "
|
||||
. " -o $outdir/summary/All.samples.summary_info.xls ";
|
||||
|
||||
@@ -617,6 +660,181 @@ print STDERR "\n\n\n------------------------------------------------------------
|
||||
Pipline_sh_commands("summary.commands","summary.ok");
|
||||
}
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.1 : Key metabolite trend analysis ----------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
mkdir "$outdir/Trend_Analysis" unless (-d "$outdir/Trend_Analysis");
|
||||
open (CMD,">","$commands_dir/trend_analysis.commands") or die $!;
|
||||
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_metabolite_trend_allgroups.R "
|
||||
. " $outdir/summary/All.samples.summary_info.xls "
|
||||
. " $groupfile "
|
||||
. " $outdir/Trend_Analysis "
|
||||
. " 5 0.05 12 1 "
|
||||
. " $outdir/Meta_mid_statistical_analysis_dir ";
|
||||
|
||||
# my $cmd2 = " mkdir -p $outdir/Trend_Analysis/detail_data ";
|
||||
#
|
||||
# my $cmd3 = " mv $outdir/Trend_Analysis/key_metabolite_trend_lineplot.pdf "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_lineplot.png "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.pdf "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.png "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_unique.xls "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_group_mean_se.xls "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_long_format.xls "
|
||||
# . " $outdir/Trend_Analysis/detail_data/ 2>/dev/null || true ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
# print CMD $cmd2,"\n";
|
||||
# print CMD $cmd3,"\n";
|
||||
|
||||
close (CMD);
|
||||
Pipline_sh_commands("trend_analysis.commands","trend_analysis.ok");
|
||||
}
|
||||
|
||||
|
||||
# - 5:每个对比组取前 5 个
|
||||
# - 0.05:P 值阈值
|
||||
# - 12:报告展示前 12 个
|
||||
# - 1:VIP 阈值,表示 VIP >= 1
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.2 : Key pathway metabolite heatmap ---------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
mkdir "$outdir/Key_Pathway_Heatmap" unless (-d "$outdir/Key_Pathway_Heatmap");
|
||||
|
||||
open (CMD,">","$commands_dir/key_pathway_heatmap.commands") or die $!;
|
||||
|
||||
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_pathway_metabolite_heatmap.R "
|
||||
. " $outdir/summary/All.samples.summary_info.xls "
|
||||
. " $groupfile "
|
||||
. " $outdir/DEM_Analysis_dir "
|
||||
. " $outdir/Meta_mid_statistical_analysis_dir "
|
||||
. " $outdir/Key_Pathway_Heatmap "
|
||||
. " 5 30 1 "; #这里最后的 1 就是“排除泛通路”;如果你以后想保留这些大通路,改成 0 就行
|
||||
|
||||
|
||||
# my $cmd2 = " mkdir -p $outdir/Key_Pathway_Heatmap/detail_data ";
|
||||
#
|
||||
# my $cmd3 = " mv $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.pdf "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.png "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolites.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_long_format.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_group_mean_se.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_matrix.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/detail_data/ 2>/dev/null || true ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
# print CMD $cmd2,"\n";
|
||||
# print CMD $cmd3,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("key_pathway_heatmap.commands","key_pathway_heatmap.ok");
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.3 : Common metabolite KEGG enrichment ------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
my $common_id_file = "$outdir/venn_dir/common_id";
|
||||
my $common_num = 0;
|
||||
|
||||
if (-e $common_id_file && -s $common_id_file) {
|
||||
open (IN,$common_id_file) or die $!;
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
next if (/^\s*$/);
|
||||
$common_num++;
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
|
||||
if ($vs_num < 2) {
|
||||
print STDERR "Skip Step 14.3 : less than 2 comparisons, common metabolite enrichment is not applicable.\n";
|
||||
} elsif (!-e $common_id_file || -z $common_id_file) {
|
||||
print STDERR "Skip Step 14.3 : common_id file not found or empty.\n";
|
||||
} elsif ($common_num < 2) {
|
||||
print STDERR "Skip Step 14.3 : fewer than 2 common metabolites, enrichment is not applicable.\n";
|
||||
} else {
|
||||
|
||||
mkdir "$outdir/Common_KEGG_Enrichment" unless (-d "$outdir/Common_KEGG_Enrichment");
|
||||
|
||||
open (CMD,">","$commands_dir/common_kegg_enrichment.commands") or die $!;
|
||||
|
||||
my $cmd1 = " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/common_kegg_enrichment.py "
|
||||
. " -i $common_id_file "
|
||||
. " -m $outdir/all_samples/all_metabolite_mapping.txt "
|
||||
. " -b $outdir/raw_analysis_dir/mean_all.txt ";
|
||||
|
||||
if (defined $organism_ko && $organism_ko ne "") {
|
||||
$cmd1 .= " -g $organism_ko ";
|
||||
}
|
||||
|
||||
$cmd1 .= " -o $outdir/Common_KEGG_Enrichment ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_kegg_enrichment.commands","common_kegg_enrichment.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.4 : Common KEGG network --------------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
mkdir "$outdir/Common_KEGG_Enrichment/network" unless (-d "$outdir/Common_KEGG_Enrichment/network");
|
||||
|
||||
my $common_kegg_network_dir = "$outdir/Common_KEGG_Enrichment/network";
|
||||
my $common_kegg_enrich_file = "$outdir/Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls";
|
||||
|
||||
if ($vs_num < 2) {
|
||||
print STDERR "Skip Step 14.4 : less than 2 comparisons, common KEGG network is not applicable.\n";
|
||||
}else{
|
||||
open (CMD,">","$commands_dir/common_kegg_network.commands") or die $!;
|
||||
|
||||
my $awk_check = q{awk -F'\t' 'NR==1{for(i=1;i<=NF;i++){if(tolower($i)=="pvalue") p=i} next} p>0 && $p+0<0.05{found=1} END{exit found?0:1}'};
|
||||
|
||||
my $cmd1 = " if [ -s $common_kegg_enrich_file ] && "
|
||||
. " $awk_check $common_kegg_enrich_file ; then "
|
||||
. " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_common_kegg_pathway_metabolite_network.py "
|
||||
. " -i $common_kegg_enrich_file "
|
||||
. " -o $common_kegg_network_dir "
|
||||
. " --sig-cutoff 0.05 "
|
||||
. " --max-top 10 "
|
||||
. " --fallback-top 0 "
|
||||
. " --max-metabolites 20 "
|
||||
. " --min-metabolite-label-degree 2 "
|
||||
. " --max-metabolite-labels 20 "
|
||||
. " ; "
|
||||
. " else "
|
||||
. " echo 'Skip common KEGG pathway-metabolite network: no significant pathway or enrichment table is empty.' "
|
||||
. " ; fi ";
|
||||
|
||||
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_kegg_network.commands","common_kegg_network.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 15 : gsea analysis-----------------------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
|
||||
@@ -554,7 +554,7 @@ print STDERR "\n\n\n------------------------------------------------------------
|
||||
mkdir("$outdir/summary") unless (-d "$outdir/summary") ;
|
||||
|
||||
open (CMD,">","$commands_dir/summary.commands") or die $!;
|
||||
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.2_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
|
||||
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
|
||||
. " -anno $outdir/all_samples/all_metabolite_mapping.txt "
|
||||
. " -o $outdir/summary/All.samples.summary_info.xls ";
|
||||
|
||||
@@ -566,6 +566,52 @@ print STDERR "\n\n\n------------------------------------------------------------
|
||||
}
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 13.1 : Common metabolite list -----------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
my $common_id_file = "$outdir/venn_dir/common_id";
|
||||
my $common_status_file = "$outdir/venn_dir/common_id.status.txt";
|
||||
my $upset_file = "$outdir/venn_dir/upset_list.xls";
|
||||
my $venn_file = "$outdir/venn_dir/venn_list.xls";
|
||||
|
||||
if ($vs_num < 2) {
|
||||
`: > $common_id_file`;
|
||||
`echo SINGLE_COMPARISON > $common_status_file`;
|
||||
print STDERR "Skip Step 13.1 : less than 2 comparisons, common metabolite list is not applicable.\n";
|
||||
} else {
|
||||
|
||||
open (CMD,">","$commands_dir/common_id.commands") or die $!;
|
||||
|
||||
my $cmd1 = " if [ -s $upset_file ]; then "
|
||||
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $upset_file > $common_id_file ; "
|
||||
. " elif [ -s $venn_file ]; then "
|
||||
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $venn_file > $common_id_file ; "
|
||||
. " else "
|
||||
. " : > $common_id_file ; "
|
||||
. " fi ; "
|
||||
. " if [ -s $common_id_file ]; then "
|
||||
. " echo OK > $common_status_file ; "
|
||||
. " else "
|
||||
. " echo NO_COMMON_METABOLITES > $common_status_file ; "
|
||||
. " fi ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_id.commands","common_id.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
@@ -605,6 +651,180 @@ print STDERR "\n\n\n------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.1 : Key metabolite trend analysis ----------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
mkdir "$outdir/Trend_Analysis" unless (-d "$outdir/Trend_Analysis");
|
||||
open (CMD,">","$commands_dir/trend_analysis.commands") or die $!;
|
||||
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_metabolite_trend_allgroups.R "
|
||||
. " $outdir/summary/All.samples.summary_info.xls "
|
||||
. " $groupfile "
|
||||
. " $outdir/Trend_Analysis "
|
||||
. " 5 0.05 12 1 "
|
||||
. " $outdir/Meta_mid_statistical_analysis_dir ";
|
||||
|
||||
# my $cmd2 = " mkdir -p $outdir/Trend_Analysis/detail_data ";
|
||||
#
|
||||
# my $cmd3 = " mv $outdir/Trend_Analysis/key_metabolite_trend_lineplot.pdf "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_lineplot.png "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.pdf "
|
||||
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.png "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_unique.xls "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_group_mean_se.xls "
|
||||
# . " $outdir/Trend_Analysis/selected_metabolites_long_format.xls "
|
||||
# . " $outdir/Trend_Analysis/detail_data/ 2>/dev/null || true ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
# print CMD $cmd2,"\n";
|
||||
# print CMD $cmd3,"\n";
|
||||
|
||||
close (CMD);
|
||||
Pipline_sh_commands("trend_analysis.commands","trend_analysis.ok");
|
||||
}
|
||||
|
||||
|
||||
# - 5:每个对比组取前 5 个
|
||||
# - 0.05:P 值阈值
|
||||
# - 12:报告展示前 12 个
|
||||
# - 1:VIP 阈值,表示 VIP >= 1
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.2 : Key pathway metabolite heatmap ---------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
mkdir "$outdir/Key_Pathway_Heatmap" unless (-d "$outdir/Key_Pathway_Heatmap");
|
||||
|
||||
open (CMD,">","$commands_dir/key_pathway_heatmap.commands") or die $!;
|
||||
|
||||
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_pathway_metabolite_heatmap.R "
|
||||
. " $outdir/summary/All.samples.summary_info.xls "
|
||||
. " $groupfile "
|
||||
. " $outdir/DEM_Analysis_dir "
|
||||
. " $outdir/Meta_mid_statistical_analysis_dir "
|
||||
. " $outdir/Key_Pathway_Heatmap "
|
||||
. " 5 30 1 "; #这里最后的 1 就是“排除泛通路”;如果你以后想保留这些大通路,改成 0 就行
|
||||
|
||||
|
||||
# my $cmd2 = " mkdir -p $outdir/Key_Pathway_Heatmap/detail_data ";
|
||||
#
|
||||
# my $cmd3 = " mv $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.pdf "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.png "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolites.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_long_format.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_group_mean_se.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_matrix.xls "
|
||||
# . " $outdir/Key_Pathway_Heatmap/detail_data/ 2>/dev/null || true ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
# print CMD $cmd2,"\n";
|
||||
# print CMD $cmd3,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("key_pathway_heatmap.commands","key_pathway_heatmap.ok");
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.3 : Common metabolite KEGG enrichment ------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
my $common_id_file = "$outdir/venn_dir/common_id";
|
||||
my $common_num = 0;
|
||||
|
||||
if (-e $common_id_file && -s $common_id_file) {
|
||||
open (IN,$common_id_file) or die $!;
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
next if (/^\s*$/);
|
||||
$common_num++;
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
|
||||
if ($vs_num < 2) {
|
||||
print STDERR "Skip Step 14.3 : less than 2 comparisons, common metabolite enrichment is not applicable.\n";
|
||||
} elsif (!-e $common_id_file || -z $common_id_file) {
|
||||
print STDERR "Skip Step 14.3 : common_id file not found or empty.\n";
|
||||
} elsif ($common_num < 2) {
|
||||
print STDERR "Skip Step 14.3 : fewer than 2 common metabolites, enrichment is not applicable.\n";
|
||||
} else {
|
||||
|
||||
mkdir "$outdir/Common_KEGG_Enrichment" unless (-d "$outdir/Common_KEGG_Enrichment");
|
||||
|
||||
open (CMD,">","$commands_dir/common_kegg_enrichment.commands") or die $!;
|
||||
|
||||
my $cmd1 = " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/common_kegg_enrichment.py "
|
||||
. " -i $common_id_file "
|
||||
. " -m $outdir/all_samples/all_metabolite_mapping.txt "
|
||||
. " -b $outdir/raw_analysis_dir/mean_all.txt ";
|
||||
|
||||
if (defined $organism_ko && $organism_ko ne "") {
|
||||
$cmd1 .= " -g $organism_ko ";
|
||||
}
|
||||
|
||||
$cmd1 .= " -o $outdir/Common_KEGG_Enrichment ";
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_kegg_enrichment.commands","common_kegg_enrichment.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
|
||||
."--------------------- Step 14.4 : Common KEGG network --------------------------\n"
|
||||
."--------------------------------------------------------------------------------\n\n\n";
|
||||
{
|
||||
|
||||
mkdir "$outdir/Common_KEGG_Enrichment/network" unless (-d "$outdir/Common_KEGG_Enrichment/network");
|
||||
|
||||
my $common_kegg_network_dir = "$outdir/Common_KEGG_Enrichment/network";
|
||||
my $common_kegg_enrich_file = "$outdir/Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls";
|
||||
|
||||
if ($vs_num < 2) {
|
||||
print STDERR "Skip Step 14.4 : less than 2 comparisons, common KEGG network is not applicable.\n";
|
||||
}else{
|
||||
open (CMD,">","$commands_dir/common_kegg_network.commands") or die $!;
|
||||
|
||||
my $awk_check = q{awk -F'\t' 'NR==1{for(i=1;i<=NF;i++){if(tolower($i)=="pvalue") p=i} next} p>0 && $p+0<0.05{found=1} END{exit found?0:1}'};
|
||||
|
||||
my $cmd1 = " if [ -s $common_kegg_enrich_file ] && "
|
||||
. " $awk_check $common_kegg_enrich_file ; then "
|
||||
. " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_common_kegg_pathway_metabolite_network.py "
|
||||
. " -i $common_kegg_enrich_file "
|
||||
. " -o $common_kegg_network_dir "
|
||||
. " --sig-cutoff 0.05 "
|
||||
. " --max-top 10 "
|
||||
. " --fallback-top 0 "
|
||||
. " --max-metabolites 20 "
|
||||
. " --min-metabolite-label-degree 2 "
|
||||
. " --max-metabolite-labels 20 "
|
||||
. " ; "
|
||||
. " else "
|
||||
. " echo 'Skip common KEGG pathway-metabolite network: no significant pathway or enrichment table is empty.' "
|
||||
. " ; fi ";
|
||||
|
||||
|
||||
|
||||
print CMD $cmd1,"\n";
|
||||
close (CMD);
|
||||
|
||||
Pipline_sh_commands("common_kegg_network.commands","common_kegg_network.ok");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#######################################################################################
|
||||
|
||||
@@ -350,7 +350,8 @@ $writer->emptyTag('file','desc'=>"",'name'=>"处理后的峰面积数据表见
|
||||
|
||||
#3.3 代谢物全注释
|
||||
#sunxk 20200515
|
||||
$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1');
|
||||
#$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理,包括代谢物在常见数据库的编号索引、分类信息和通路信息等。部分代谢物数据库映射表展示如下,所有代谢物数据库映射表请详见结果文件all_metabolite_mapping.txt表。",'type'=>"type1");
|
||||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 代谢物数据库映射表(部分)",'path'=>"$indir/html_report/template/all_metabolite_mapping3L.txt");
|
||||
#$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理,包括代谢物在常见数据库的编号索引、分类信息和通路信息等。所有代谢物数据库映射表详见结果文件all_metabolite_mapping.txt。",'type'=>"type1");
|
||||
@@ -437,7 +438,8 @@ $writer->emptyTag('file','desc'=>"",'name'=>"前10代谢物占比结果文件,
|
||||
###############################
|
||||
###############################
|
||||
|
||||
$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1');
|
||||
#$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('p','desc'=>"由于代谢组数据具有高维度、高噪声及变量间多重共线性的特点,单一变量的传统统计方法难以全面揭示数据内在规律,因此采用多元统计方法进行降维与分类建模。",'type'=>"type1");
|
||||
$writer->emptyTag('p','desc'=>"本分析使用R语言ropls包<a href=\"#ref5\">[5]</a>,包括:PCA、PLS-DA和OPLS-DA。",'type'=>"type1");
|
||||
|
||||
@@ -643,7 +645,8 @@ if (-e "$indir/$vs1/Statistical_Analysis/OPLS-DA_Permutation.png") {
|
||||
#3.8 单变量统计分析 (UVA) ,差异代谢物的筛选和火山图
|
||||
###############################
|
||||
###############################
|
||||
$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1');
|
||||
#$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
|
||||
|
||||
$writer->emptyTag('p','desc'=>"代谢组学数据具有高维、多变量的特点,通常需结合单变量统计分析(univariate analysis, UVA)与多元统计分析进行综合挖掘,以提高差异代谢物筛选的准确性<a href=\"#ref9\">[9]</a>。本研究中,对具有生物学重复的样本采用基于线性模型和经验贝叶斯方法的 limma 流程进行单变量差异分析。考虑到代谢组数据通常具有变量数多而样本重复数有限的特征,传统逐个变量的 t 检验或非参数检验在小样本条件下往往存在方差估计不稳定、统计功效受限等问题。limma 通过经验贝叶斯方法对各变量的方差进行收缩估计,可在保留线性模型分析框架的同时提高方差估计稳定性和差异检验的稳健性,这一思想已在高维组学数据分析的方法学研究中得到系统阐述<a href=\"#ref10\">[10]</a><a href=\"#ref11\">[11]</a>。进一步的比较研究表明,在高维、小样本的连续型组学数据中,基于经验贝叶斯方差建模的 moderated t 检验通常较传统 t 检验表现出更稳定的统计表现,并已在蛋白质组等相邻高通量数据分析中显示出良好的应用实用性<a href=\"#ref12\">[12]</a><a href=\"#ref13\">[13]</a>。此外,代谢组学统计分析流程研究及近年来的实际研究也已将 limma 纳入差异特征筛选框架中,进一步支持了其在差异代谢物筛选中的可行性与适用性<a href=\"#ref14\">[14]</a><a href=\"#ref15\">[15]</a>。",'type'=>"type1");
|
||||
@@ -806,7 +809,7 @@ $writer->emptyTag('p','desc'=>"柱状图以纵向条纹的高度表示代谢物
|
||||
my @barplot =glob ("$indir/$vs1/Statistical_Analysis/barplot/*barplot.png");
|
||||
if (scalar(@barplot) != 0) {
|
||||
&piclist("$vs1 组间柱状图","注:纵坐标为物质相对含量,横坐标为组别;图中代谢物以编号标识,具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/barplot/*barplot.png","../../$vs1","_barplot.png","");
|
||||
$pid++;
|
||||
# $pid++;
|
||||
}else{
|
||||
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成柱状图。",'type'=>"type1");
|
||||
}
|
||||
@@ -816,7 +819,7 @@ $writer->emptyTag('p','desc'=>"小提琴-柱状复合图结合了数据分布形
|
||||
my @violin =glob ("$indir/$vs1/Statistical_Analysis/violin/*violin.png");
|
||||
if (scalar(@violin) != 0) {
|
||||
&piclist("$vs1 组间小提琴箱线复合图","注:纵坐标为物质相对含量,横坐标为组别;图中代谢物以编号标识,具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/violin/*violin.png","$indir/$vs1","_violin.png","");
|
||||
$pid++;
|
||||
# $pid++;
|
||||
}else{
|
||||
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成小提琴图。",'type'=>"type1");
|
||||
}
|
||||
@@ -825,6 +828,229 @@ $writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间柱状图路
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间小提琴-柱状复合图路径:complete/*_vs_*/Statistical_Analysis/violin/Metabolite_*_violin.{pdf,png}",'path'=>"../$vs1/Statistical_Analysis/violin",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"代谢物编号对应表路径:complete/*_vs_*/Statistical_Analysis/Means.xls",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
$writer->emptyTag('h3','name'=>"3.10.1 关键差异代谢物跨组趋势分析",'type'=>'type1','desc'=>"");
|
||||
|
||||
my $trend_dir = "$indir/07_Trend_Analysis";
|
||||
my $trend_line_png = "$trend_dir/key_metabolite_trend_lineplot_report_top12.png";
|
||||
my $trend_box_png = "$trend_dir/key_metabolite_trend_boxplot_report_top12.png";
|
||||
my $trend_report_table = "$trend_dir/selected_metabolites_report_top12.xls";
|
||||
my $trend_label_table = "$trend_dir/selected_metabolites_figure_label.xls";
|
||||
my $trend_bycomp_table = "$trend_dir/selected_metabolites_by_comparison.xls";
|
||||
|
||||
if (-e $trend_line_png) {
|
||||
$writer->emptyTag('p','desc'=>"为展示各对比组中具有代表性的关键差异代谢物在全部实验分组中的整体变化趋势,本分析以各对比组差异代谢物结果为基础,按绝对值log2FoldChange从大到小排序;若绝对值log2FoldChange相同,则按VIP值从大到小排序。最终每个对比组选取前5个代表性差异代谢物作为候选集合。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"将所有对比组候选代谢物合并后进行去重,并按以下规则综合排序:1)最大绝对值log2FoldChange值(Max_abs_log2FC,降序);2)最大VIP值(Max_VIP,降序);3)最小P值(Best_Pvalue,升序)。排序后选取前12个代表性代谢物用于报告正文展示,完整结果请见对应结果文件。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物,图中以M编号标识,其与具体代谢物名称的对应关系见selected_metabolites_figure_label.xls。折线连接各分组均值,误差线表示标准误(SE),散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组趋势折线图",'type'=>"img-width-max",'path'=>$trend_line_png);
|
||||
$pid++;
|
||||
|
||||
if (-e $trend_box_png) {
|
||||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物,箱体表示四分位分布范围,中线表示中位数,散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组箱线图",'type'=>"img-width-max",'path'=>$trend_box_png);
|
||||
$pid++;
|
||||
}
|
||||
|
||||
if (-e $trend_report_table) {
|
||||
$writer->emptyTag('table','desc'=>"注:FigureLabel为图中代谢物编号;Selected_in_comparisons为该代谢物来源对比组;Best_Pvalue为所有来源对比组中的最小P值;Max_abs_log2FC为所有来源对比组中的最大绝对值log2FoldChange;Max_VIP为所有来源对比组中的最大VIP值;Direction_summary为变化方向汇总。",'type'=>"0",'name'=>"表$tid. 报告展示关键差异代谢物列表",'path'=>$trend_report_table);
|
||||
$tid++;
|
||||
}
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"各对比组候选代谢物表路径:complete/07_Trend_Analysis/selected_metabolites_by_comparison.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_by_comparison.xls",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物编号对应表路径:complete/07_Trend_Analysis/selected_metabolites_figure_label.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_figure_label.xls",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物趋势分析结果路径:complete/07_Trend_Analysis/",'path'=>"../07_Trend_Analysis",'type'=>"xls");
|
||||
}else{
|
||||
$writer->emptyTag('p','desc'=>"未生成关键差异代谢物跨组趋势分析结果。",'type'=>"type1");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
$writer->emptyTag('h3','name'=>"3.10.2 共有差异代谢物KEGG通路富集分析",'type'=>'type1','desc'=>"");
|
||||
|
||||
my $common_kegg_dir = "$indir/08_Common_KEGG_Enrichment";
|
||||
my $common_bubble_top20_png = "$common_kegg_dir/common_kegg_bubbleplot_top20.png";
|
||||
my $common_bubble_top20_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top20.pdf";
|
||||
my $common_bubble_top5_png = "$common_kegg_dir/common_kegg_bubbleplot_top5.png";
|
||||
my $common_bubble_top5_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top5.pdf";
|
||||
my $common_enrich_table = "$common_kegg_dir/common_kegg_pathway_enrichment.xls";
|
||||
my $common_enrich_top5_table = "$common_kegg_dir/common_kegg_pathway_enrichment_top5.xls";
|
||||
my $common_matched_table = "$common_kegg_dir/common_matched_metabolites.xls";
|
||||
my $common_hit_detail_table = "$common_kegg_dir/common_kegg_pathway_hit_detail.xls";
|
||||
my $common_no_pathway_table = "$common_kegg_dir/common_no_pathway_metabolites.xls";
|
||||
my $common_summary_table = "$common_kegg_dir/common_kegg_summary.xls";
|
||||
|
||||
my $common_id_file = "$indir/04_venn_dir/common_id";
|
||||
my $upset_file = "$indir/04_venn_dir/upset_list.xls";
|
||||
my $venn_file = "$indir/04_venn_dir/venn_list.xls";
|
||||
|
||||
my $compare_num = 0;
|
||||
my $common_num = 0;
|
||||
my $set_file = "";
|
||||
|
||||
if (-e $upset_file) {
|
||||
$set_file = $upset_file;
|
||||
} elsif (-e $venn_file) {
|
||||
$set_file = $venn_file;
|
||||
}
|
||||
|
||||
if ($set_file ne "") {
|
||||
open (IN,$set_file) or die $!;
|
||||
my $head = <IN>;
|
||||
chomp($head);
|
||||
my @head = split /\t/,$head;
|
||||
$compare_num = scalar(@head) - 1;
|
||||
close IN;
|
||||
}
|
||||
|
||||
if (-e $common_id_file && -s $common_id_file) {
|
||||
open (IN,$common_id_file) or die $!;
|
||||
while (<IN>) {
|
||||
chomp;
|
||||
next if (/^\s*$/);
|
||||
$common_num++;
|
||||
}
|
||||
close IN;
|
||||
}
|
||||
|
||||
if ($set_file eq "") {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"未检测到差异代谢物交集结果文件,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||||
|
||||
} elsif ($compare_num < 2) {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"本项目仅包含1个对比组,无法进行跨比较组共有差异代谢物筛选,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||||
|
||||
} elsif ($common_num < 2) {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"本项目包含多个对比组,但按“所有比较组共有”标准未筛选到足够数量的共有差异代谢物,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||||
|
||||
} elsif (-e $common_enrich_table) {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"为进一步解析多个比较组共有差异代谢物的潜在生物学功能,对共有差异代谢物进行了KEGG通路富集分析。共有差异代谢物来源于差异代谢物交集结果。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"分析时,以共有差异代谢物作为前景集,以本项目进入正式分析流程的全部代谢物中具有KEGG通路注释的代谢物作为背景集。随后将共有差异代谢物与总代谢物注释表进行匹配,提取其中具有KEGG pathway注释的代谢物用于富集分析,未匹配到注释表或虽匹配成功但无KEGG pathway注释的代谢物不参与本次通路富集统计。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"富集分析采用超几何检验评估共有差异代谢物在各KEGG通路中的富集显著性,并进一步进行Benjamini-Hochberg多重检验校正,同时提供Holm校正结果。相关结果可从整体通路富集结果表及显著通路补充结果中进行解读。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"需要说明的是,共有差异代谢物并不一定全部具有可用的KEGG pathway注释,因此最终参与富集分析的代谢物数通常会少于共有差异代谢物总数。相关匹配结果及无KEGG通路注释情况见补充附件。",'type'=>"type1");
|
||||
|
||||
if (-e $common_bubble_top20_png) {
|
||||
|
||||
$writer->emptyTag('pic','desc'=>"注:横坐标为富集因子(Enrichment factor),纵坐标为KEGG通路名称,气泡大小表示命中该通路的共有差异代谢物数量,气泡颜色表示富集分析P值。",'name'=>"图$pid. 共有差异代谢物KEGG通路富集气泡图(Top20)",'type'=>"img-width-max",'path'=>$common_bubble_top20_png);
|
||||
$pid++;
|
||||
|
||||
}
|
||||
|
||||
if (-e $common_bubble_top5_png) {
|
||||
|
||||
$writer->emptyTag('pic','desc'=>"注:该图基于富集结果中P值小于0.05的通路绘制,按显著性排序最多展示前5条;若显著通路不足5条,则按实际条数展示。横坐标为MetaRatio,纵坐标为KEGG通路名称,气泡大小表示命中该通路的共有差异代谢物数量,气泡颜色表示-log10(Pvalue)。",'name'=>"图$pid. 共有差异代谢物显著KEGG通路富集气泡图(Top5)",'type'=>"img-width-max",'path'=>$common_bubble_top5_png);
|
||||
$pid++;
|
||||
|
||||
}
|
||||
|
||||
$writer->emptyTag('table','desc'=>"注:ID表示KEGG通路编号;Description表示KEGG通路名称;Total表示背景集中注释到该通路的代谢物数;Hits表示共有差异代谢物中命中该通路的代谢物数;MetaboliteRatio表示命中该通路的共有差异代谢物数占实际参与富集分析总代谢物数的比例;BgRatio表示背景集中命中该通路的代谢物数占背景集中可参与通路富集总代谢物数的比例;pvalue表示超几何检验得到的富集显著性P值;p.adjust表示Benjamini-Hochberg方法校正后的FDR;Holm_adjust表示Holm方法校正后的P值;Enrichment_factor表示富集因子;Diffmeta表示命中该通路的共有差异代谢物;Allmeta表示背景集中注释到该通路的全部代谢物。若行数超过30行,只展示30行。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG通路富集结果表(部分)",'path'=>"$indir/html_report/template/common_kegg_pathway_enrichment.xls");
|
||||
$tid++;
|
||||
|
||||
|
||||
if (-e $common_summary_table) {
|
||||
|
||||
$writer->emptyTag('table','desc'=>"注:Item表示统计项目名称,Value表示对应统计值。InputMetabolites表示输入共有差异代谢物总数;UsedForEnrichment表示最终参与KEGG富集分析的代谢物数;NoPathwayAnnotation表示匹配成功但无可用KEGGpathway注释的代谢物数;NotFoundOrNotInBackground表示未匹配到注释表或未进入背景集的代谢物数;BackgroundMetabolites表示背景集总代谢物数;BackgroundWithPathway表示背景集中具有KEGG通路注释的代谢物数;EnrichedPathwaysTested表示参与富集检验的通路数;AllowedPathwayCount表示允许保留的通路数。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG富集分析统计汇总表",'path'=>$common_summary_table);
|
||||
$tid++;
|
||||
|
||||
}
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG通路富集结果表路径:complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'type'=>"xls");
|
||||
|
||||
if (-e $common_enrich_top5_table) {
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"显著通路Top5结果表路径(补充):complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'type'=>"xls");
|
||||
}
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物注释匹配信息表路径:complete/08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'type'=>"xls");
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物通路命中明细表路径(补充):complete/08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'type'=>"xls");
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"无KEGG通路注释的共有差异代谢物列表路径(补充):complete/08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'type'=>"xls");
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG富集分析结果目录路径:complete/08_Common_KEGG_Enrichment/",'path'=>"../08_Common_KEGG_Enrichment",'type'=>"xls");
|
||||
|
||||
} else {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"已筛选到共有差异代谢物,但在KEGG注释及通路过滤后未生成共有差异代谢物KEGG通路富集分析结果。",'type'=>"type1");
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
$writer->emptyTag('h3','name'=>"3.10.3 共有差异代谢物KEGG通路-代谢物网络分析",'type'=>'type1','desc'=>"");
|
||||
|
||||
my $common_network_dir = "$indir/08_Common_KEGG_Enrichment/network";
|
||||
my $common_network_png = "$common_network_dir/common_kegg_pathway_metabolite_network.png";
|
||||
my $common_network_summary = "$common_network_dir/common_kegg_pathway_metabolite_network_summary.xls";
|
||||
my $common_network_pathways = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_pathways.xls";
|
||||
my $common_network_metas = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_metabolites.xls";
|
||||
my $common_network_nodes = "$common_network_dir/common_kegg_pathway_metabolite_network_nodes.xls";
|
||||
my $common_network_edges = "$common_network_dir/common_kegg_pathway_metabolite_network_edges.xls";
|
||||
|
||||
|
||||
if (! -e $common_enrich_table) {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"未生成共有差异代谢物KEGG富集分析结果,因此未开展通路-代谢物网络分析。",'type'=>"type1");
|
||||
|
||||
} elsif (-e $common_network_png) {
|
||||
|
||||
$writer->emptyTag('p','desc'=>"为直观展示共有差异代谢物与显著富集KEGG通路之间的对应关系,本分析基于共有差异代谢物KEGG富集结果构建通路-代谢物网络图。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"筛选规则为:优先保留富集分析P值小于0.05的显著通路,并按P值升序、富集因子降序、命中代谢物数降序排序,最多展示前10条通路;随后从入选通路命中的共有差异代谢物中去重筛选,最多保留20个代谢物。代谢物排序规则为degree降序、关联通路最小P值升序、关联通路最大Hits降序、代谢物名称升序。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('pic','desc'=>"注:外圈节点为KEGG通路,图中以通路编号显示;内圈节点为共有差异代谢物。连线表示该代谢物命中对应KEGG通路。代谢物节点大小表示其连接的通路数量,完整节点和边信息见结果表。",'name'=>"图$pid. 共有差异代谢物KEGG通路-代谢物网络图",'type'=>"img-width-max",'path'=>$common_network_png);
|
||||
$pid++;
|
||||
|
||||
if (-e $common_network_pathways) {
|
||||
$writer->emptyTag('table','desc'=>"注:SelectionOrder为通路筛选排序;ID为KEGG通路编号;Description为通路名称;Hits为共有差异代谢物命中该通路的数量;Total为背景集中注释到该通路的代谢物数量;pvalue为富集分析P值;p.adjust为BH校正后的P值;Holm_adjust为Holm校正后的P值;Enrichment_factor为富集因子;Diffmeta为命中该通路的共有差异代谢物;Allmeta为背景集中注释到该通路的全部代谢物。",'type'=>"0",'name'=>"表$tid. 网络图入选KEGG通路列表",'path'=>$common_network_pathways);
|
||||
$tid++;
|
||||
}
|
||||
|
||||
if (-e $common_network_metas) {
|
||||
$writer->emptyTag('table','desc'=>"注:SelectionOrder为代谢物筛选排序;Metabolite为共有差异代谢物名称;Degree为该代谢物连接的入选通路数量;Best_pathway_pvalue为该代谢物关联通路中的最小富集P值;Best_pathway_hits为该代谢物关联通路中的最大Hits值;PathwayIDs为该代谢物关联的入选KEGG通路编号。",'type'=>"0",'name'=>"表$tid. 网络图入选共有差异代谢物列表",'path'=>$common_network_metas);
|
||||
$tid++;
|
||||
}
|
||||
|
||||
if (-e $common_network_summary) {
|
||||
$writer->emptyTag('table','desc'=>"注:Item表示统计项目名称,Value表示对应统计值。InputPathways为输入富集通路数量;SelectedPathways为最终入选网络图的通路数量;TotalSignificantPathways为P值小于0.05的显著通路数量;MaxMetabolites为网络图允许展示的最大代谢物数量;UniqueMetabolitesBeforeFilter为筛选前去重代谢物数量;Metabolites为最终入选网络图的代谢物数量;Edges为通路-代谢物连线数量。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物通路网络统计汇总表",'path'=>$common_network_summary);
|
||||
$tid++;
|
||||
}
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"网络图节点信息表路径:complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"网络图连线信息表路径:complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'type'=>"xls");
|
||||
|
||||
}else{
|
||||
$writer->emptyTag('p','desc'=>"共有差异代谢物KEGG富集结果中未筛选到P值小于0.05的显著通路,因此未生成共有差异代谢物KEGG通路-代谢物网络图。",'type'=>"type1");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
###############################
|
||||
###############################
|
||||
#3.11 差异代谢物的层次聚类分析
|
||||
@@ -875,7 +1101,8 @@ if (-e "$indir/$vs1/Hierarchical_Clustering_Analysis/cor_heatmap.png") {
|
||||
#3.13 差异代谢物的KEGG注释
|
||||
###############################
|
||||
###############################
|
||||
$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1');
|
||||
#$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||||
$writer->emptyTag('p','desc'=>"生物体中的复杂代谢反应及调控并非独立进行,而是由基因和蛋白质构成的通路网络协同作用,最终导致代谢组的系统性变化。对这些代谢与调控通路的分析,可更全面系统地揭示实验条件改变引发的生物学过程变化、性状或疾病发生机理及药物作用机制等核心问题。",'type'=>"type1");
|
||||
$writer->emptyTag('p','desc'=>"京都基因与基因组百科全书(Kyoto Encyclopedia of Genes and Genomes, KEGG)Pathway数据库<a href=\"#ref19\">[19]</a><a href=\"#ref20\">[20]</a>(www.kegg.jp/kegg/pathway.html)以基因和基因组功能信息为基础,以代谢反应为线索,串联潜在代谢途径及对应调控蛋白,以图解形式展示细胞生理生化过程(如能量代谢、物质运输、信号传递、细胞周期调控等)及保守子通路信息,是代谢网络研究最常用的通路数据库。",'type'=>"type1");
|
||||
$writer->emptyTag('p','desc'=>"我们整理了对应物种中差异代谢物映射的所有通路,部分通路结果展示如下(不足20通路则展示全部),所有通路结果请详见结果文件KEGG_Pathway.xls表。",'type'=>"type1");
|
||||
@@ -892,7 +1119,7 @@ $writer->emptyTag('p','desc'=>"基于上述结果,我们在KEGG通路图上标
|
||||
my @kegg =glob ("$indir/$vs1/KEGG_Analysis/pathway/*.png");
|
||||
if (scalar(@kegg) != 0) {
|
||||
&piclist("$vs1组KEGG通路图","","$indir/$vs1/KEGG_Analysis/pathway/*.png","$indir/$vs1",".png","");
|
||||
$pid++;
|
||||
# $pid++;
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG通路图路径:complete/*_vs_*/KEGG_Analysis/pathway",'path'=>"../$vs1/KEGG_Analysis/pathway",'type'=>"xls");
|
||||
|
||||
@@ -914,6 +1141,11 @@ if (-e "$indir/$vs1/KEGG_Analysis/kegg_compound_barplot.png") {
|
||||
$writer->emptyTag('p','desc'=>"无差异代谢物匹配到KEGG分类层级,无法生成分类注释图。",'type'=>"type1");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
###############################
|
||||
###############################
|
||||
#3.14 差异代谢物的通路类型分析
|
||||
@@ -1011,6 +1243,52 @@ if (-e "$indir/$vs1/Pathway_Analysis/Bubble_plot.png") {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
$writer->emptyTag('h3','name'=>"3.15.1 关键通路相关差异代谢物热图分析",'type'=>'type1','desc'=>"");
|
||||
|
||||
my $pathway_top_n = 5;
|
||||
my $pathway_report_top_n = 30;
|
||||
my $pathway_heatmap_dir = "$indir/09_Key_Pathway_Heatmap";
|
||||
my $pathway_heatmap_png = "$pathway_heatmap_dir/key_pathway_metabolite_heatmap_report_top30.png";
|
||||
my $pathway_heatmap_table = "$pathway_heatmap_dir/key_pathway_metabolite_report_top30.xls";
|
||||
my $pathway_heatmap_label = "$pathway_heatmap_dir/key_pathway_figure_label.xls";
|
||||
my $pathway_heatmap_summary = "$pathway_heatmap_dir/key_pathway_summary.xls";
|
||||
|
||||
if (-e $pathway_heatmap_png) {
|
||||
$writer->emptyTag('p','desc'=>"为从通路层面综合展示不同对比组中具有代表性的差异代谢变化模式,本分析基于各对比组的代谢通路富集结果,对每个对比组先筛选富集分析P值小于0.05的显著通路,再按P值从小到大排序;若P值相同,则按富集因子(Enrichment factor)从高到低排序;若仍相同,则按命中差异代谢物数(Hits)从高到低排序。最终每个对比组选取前".$pathway_top_n."条关键通路用于后续分析。对于通路范围过大、解释较泛的背景通路(如Metabolic pathways等),本分析选择不纳入关键通路候选范围,以提高结果的聚焦性与可解释性。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('p','desc'=>"在获得各对比组关键通路后,进一步提取这些通路中命中的差异代谢物(Diffmeta),汇总后去重,并按以下规则综合排序:1)该代谢物出现在多少个对比组的入选关键通路中(ComparisonCount,降序);2)该代谢物出现在多少条入选关键通路中(PathwayCount,降序);3)该代谢物在所有来源对比组中的最小P值(BestPvalue,升序);4)该代谢物在所有来源对比组中的最大绝对log2FoldChange值(Max_abs_log2FC,降序)。排序后选取前".$pathway_report_top_n."个代表性代谢物用于报告正文展示,完整结果请见对应结果文件。",'type'=>"type1");
|
||||
|
||||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为关键通路相关差异代谢物编号。颜色表示同一代谢物在不同分组中的行标准化相对变化水平(Row z-score):红色表示该代谢物在对应分组中相对较高,蓝色表示相对较低,白色表示接近该代谢物在各组中的平均水平。图中代谢物以M编号显示,其与具体代谢物名称的对应关系见key_pathway_figure_label.xls。该图用于比较同一代谢物在不同组间的变化趋势,不用于不同代谢物之间绝对丰度大小的直接比较。",'name'=>"图$pid. 关键通路相关差异代谢物热图",'type'=>"img-width-max",'path'=>$pathway_heatmap_png);
|
||||
$pid++;
|
||||
|
||||
if (-e $pathway_heatmap_table) {
|
||||
$writer->emptyTag('table','desc'=>"注:该表为报告热图中展示的关键通路相关差异代谢物列表。MetaboliteRank为代谢物综合排序名次;FigureLabel为图中代谢物编号;ComparisonCount为该代谢物出现在多少个对比组的入选关键通路中;PathwayCount为该代谢物出现在多少条入选关键通路中;BestPvalue为该代谢物在所有来源对比组中的最小P值;Max_abs_log2FC为该代谢物在所有来源对比组中的最大绝对log2FoldChange值;Comparison为来源对比组;SourcePathwayID和SourcePathway分别为来源关键通路编号和名称;PathwayPvalue、Enrichment_factor和Hits分别为对应关键通路的富集P值、富集因子和命中差异代谢物数;Pvalue、log2FC和abs_log2FC为该代谢物在对应来源对比组中的差异统计信息。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 报告展示关键通路相关差异代谢物列表(部分)",'path'=>"$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
|
||||
$tid++;
|
||||
}
|
||||
|
||||
if (-e $pathway_heatmap_summary) {
|
||||
$writer->emptyTag('table','desc'=>"注:该表汇总各对比组选出的关键通路。Comparison为对比组名称;PathwayID为KEGG通路编号;Pathway为KEGG通路名称;HitCount为该通路命中的差异代谢物数量;pvalue为通路富集分析P值;Enrichment_factor为富集因子;Metabolites为该通路命中的差异代谢物名称列表。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 各对比组关键通路汇总表(部分)",'path'=>"$indir/html_report/template/key_pathway_summary.xls");
|
||||
$tid++;
|
||||
}
|
||||
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路汇总表路径:complete/09_Key_Pathway_Heatmap/key_pathway_summary.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_summary.xls",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路代谢物编号对应表路径:complete/09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'type'=>"xls");
|
||||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路相关差异代谢物热图结果路径:complete/09_Key_Pathway_Heatmap/",'path'=>"../09_Key_Pathway_Heatmap",'type'=>"xls");
|
||||
}else{
|
||||
$writer->emptyTag('p','desc'=>"未生成关键通路相关差异代谢物热图分析结果。",'type'=>"type1");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
$writer->emptyTag('h2','name'=>"3.16 差异代谢物GSEA分析",'type'=>'type1','desc'=>"");
|
||||
$writer->emptyTag('p','desc'=>"传统代谢物功能富集方法基于超几何检验,仅针对差异表达代谢物进行分析。但差异倍数(FC)阈值的设置可能过滤掉部分重要代谢物,导致遗漏关键生物通路或功能模块。GSEA分析(Gene Set Enrichment Analysis)可弥补这一不足,通过整合所有代谢物的表达趋势,更全面揭示代谢物集合在生物系统中的调控作用。",'type'=>"type1");
|
||||
|
||||
@@ -1342,6 +1620,18 @@ sub extractInfo{
|
||||
$pathway_pvalue_min = "未检出显著通路" if !defined $pathway_pvalue_min || $pathway_pvalue_min eq "";
|
||||
|
||||
|
||||
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls") {
|
||||
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls","$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
|
||||
}
|
||||
|
||||
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls") {
|
||||
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls","$indir/html_report/template/key_pathway_summary.xls");
|
||||
}
|
||||
|
||||
if (-e "complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls") {
|
||||
&HeadN30("complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls","$indir/html_report/template/common_kegg_pathway_enrichment.xls");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub Cnv {
|
||||
|
||||
Reference in New Issue
Block a user