update metabolism pipline

This commit is contained in:
2026-06-01 09:53:12 +08:00
parent 16d9dee69a
commit 18e391ba2f
3 changed files with 738 additions and 10 deletions

View File

@@ -468,7 +468,7 @@ open (CMD,">","$commands_dir/kegg_Annotation.commands") or die $!;
. " -i $outdir/Function_Annotation_dir/KEGG_Analysis "
. " -o $outdir/Meta_mid_KEGG_analysis_dir ";
my $cmd9 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/daixie_kegg_pathway_plot.pl -i $outdir/ -o $outdir/ ";
my $cmd10 = " perl $Bin/scripts/plot_meta_KEGG_histogram_v2.pl -i $outdir/DEM_Analysis_dir -o $outdir/Function_Annotation_dir/KEGG_Analysis -map $mapfile ";#kegg分类柱状图和文件
my $cmd10 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_meta_KEGG_histogram_v2.pl -i $outdir/DEM_Analysis_dir -o $outdir/Function_Annotation_dir/KEGG_Analysis -map $mapfile ";#kegg分类柱状图和文件
my $cmd11 = " perl /share/nas2/fangbj/pipline/lianhefenxi/rna_pro/V1.2_nuohe/scripts/convertall.pl -i $outdir/Function_Annotation_dir/KEGG_Analysis ";#kegg分类柱状图和文件
print CMD $cmd1,"\n";
print CMD $cmd2,"\n";
@@ -600,6 +600,49 @@ if ($vs_num >1 && $vs_num <=5) {
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 13.1 : Common metabolite list -----------------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
my $common_id_file = "$outdir/venn_dir/common_id";
my $common_status_file = "$outdir/venn_dir/common_id.status.txt";
my $upset_file = "$outdir/venn_dir/upset_list.xls";
my $venn_file = "$outdir/venn_dir/venn_list.xls";
if ($vs_num < 2) {
`: > $common_id_file`;
`echo SINGLE_COMPARISON > $common_status_file`;
print STDERR "Skip Step 13.1 : less than 2 comparisons, common metabolite list is not applicable.\n";
} else {
open (CMD,">","$commands_dir/common_id.commands") or die $!;
my $cmd1 = " if [ -s $upset_file ]; then "
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $upset_file > $common_id_file ; "
. " elif [ -s $venn_file ]; then "
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $venn_file > $common_id_file ; "
. " else "
. " : > $common_id_file ; "
. " fi ; "
. " if [ -s $common_id_file ]; then "
. " echo OK > $common_status_file ; "
. " else "
. " echo NO_COMMON_METABOLITES > $common_status_file ; "
. " fi ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_id.commands","common_id.ok");
}
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14 : All Summary ------------------------------------\n"
."--------------------------------------------------------------------------------\n\n\n";
@@ -607,7 +650,7 @@ print STDERR "\n\n\n------------------------------------------------------------
mkdir "$outdir/summary" unless (-d "$outdir/summary") ;
open (CMD,">","$commands_dir/summary.commands") or die $!;
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.2_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
. " -anno $outdir/all_samples/all_metabolite_mapping.txt "
. " -o $outdir/summary/All.samples.summary_info.xls ";
@@ -617,6 +660,181 @@ print STDERR "\n\n\n------------------------------------------------------------
Pipline_sh_commands("summary.commands","summary.ok");
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.1 : Key metabolite trend analysis ----------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Trend_Analysis" unless (-d "$outdir/Trend_Analysis");
open (CMD,">","$commands_dir/trend_analysis.commands") or die $!;
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_metabolite_trend_allgroups.R "
. " $outdir/summary/All.samples.summary_info.xls "
. " $groupfile "
. " $outdir/Trend_Analysis "
. " 5 0.05 12 1 "
. " $outdir/Meta_mid_statistical_analysis_dir ";
# my $cmd2 = " mkdir -p $outdir/Trend_Analysis/detail_data ";
#
# my $cmd3 = " mv $outdir/Trend_Analysis/key_metabolite_trend_lineplot.pdf "
# . " $outdir/Trend_Analysis/key_metabolite_trend_lineplot.png "
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.pdf "
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.png "
# . " $outdir/Trend_Analysis/selected_metabolites_unique.xls "
# . " $outdir/Trend_Analysis/selected_metabolites_group_mean_se.xls "
# . " $outdir/Trend_Analysis/selected_metabolites_long_format.xls "
# . " $outdir/Trend_Analysis/detail_data/ 2>/dev/null || true ";
print CMD $cmd1,"\n";
# print CMD $cmd2,"\n";
# print CMD $cmd3,"\n";
close (CMD);
Pipline_sh_commands("trend_analysis.commands","trend_analysis.ok");
}
# - 5每个对比组取前 5 个
# - 0.05P 值阈值
# - 12报告展示前 12 个
# - 1VIP 阈值,表示 VIP >= 1
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.2 : Key pathway metabolite heatmap ---------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Key_Pathway_Heatmap" unless (-d "$outdir/Key_Pathway_Heatmap");
open (CMD,">","$commands_dir/key_pathway_heatmap.commands") or die $!;
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_pathway_metabolite_heatmap.R "
. " $outdir/summary/All.samples.summary_info.xls "
. " $groupfile "
. " $outdir/DEM_Analysis_dir "
. " $outdir/Meta_mid_statistical_analysis_dir "
. " $outdir/Key_Pathway_Heatmap "
. " 5 30 1 "; #这里最后的 1 就是“排除泛通路”;如果你以后想保留这些大通路,改成 0 就行
# my $cmd2 = " mkdir -p $outdir/Key_Pathway_Heatmap/detail_data ";
#
# my $cmd3 = " mv $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.pdf "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.png "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolites.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_long_format.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_group_mean_se.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_matrix.xls "
# . " $outdir/Key_Pathway_Heatmap/detail_data/ 2>/dev/null || true ";
print CMD $cmd1,"\n";
# print CMD $cmd2,"\n";
# print CMD $cmd3,"\n";
close (CMD);
Pipline_sh_commands("key_pathway_heatmap.commands","key_pathway_heatmap.ok");
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.3 : Common metabolite KEGG enrichment ------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
my $common_id_file = "$outdir/venn_dir/common_id";
my $common_num = 0;
if (-e $common_id_file && -s $common_id_file) {
open (IN,$common_id_file) or die $!;
while (<IN>) {
chomp;
next if (/^\s*$/);
$common_num++;
}
close IN;
}
if ($vs_num < 2) {
print STDERR "Skip Step 14.3 : less than 2 comparisons, common metabolite enrichment is not applicable.\n";
} elsif (!-e $common_id_file || -z $common_id_file) {
print STDERR "Skip Step 14.3 : common_id file not found or empty.\n";
} elsif ($common_num < 2) {
print STDERR "Skip Step 14.3 : fewer than 2 common metabolites, enrichment is not applicable.\n";
} else {
mkdir "$outdir/Common_KEGG_Enrichment" unless (-d "$outdir/Common_KEGG_Enrichment");
open (CMD,">","$commands_dir/common_kegg_enrichment.commands") or die $!;
my $cmd1 = " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/common_kegg_enrichment.py "
. " -i $common_id_file "
. " -m $outdir/all_samples/all_metabolite_mapping.txt "
. " -b $outdir/raw_analysis_dir/mean_all.txt ";
if (defined $organism_ko && $organism_ko ne "") {
$cmd1 .= " -g $organism_ko ";
}
$cmd1 .= " -o $outdir/Common_KEGG_Enrichment ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_kegg_enrichment.commands","common_kegg_enrichment.ok");
}
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.4 : Common KEGG network --------------------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Common_KEGG_Enrichment/network" unless (-d "$outdir/Common_KEGG_Enrichment/network");
my $common_kegg_network_dir = "$outdir/Common_KEGG_Enrichment/network";
my $common_kegg_enrich_file = "$outdir/Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls";
if ($vs_num < 2) {
print STDERR "Skip Step 14.4 : less than 2 comparisons, common KEGG network is not applicable.\n";
}else{
open (CMD,">","$commands_dir/common_kegg_network.commands") or die $!;
my $awk_check = q{awk -F'\t' 'NR==1{for(i=1;i<=NF;i++){if(tolower($i)=="pvalue") p=i} next} p>0 && $p+0<0.05{found=1} END{exit found?0:1}'};
my $cmd1 = " if [ -s $common_kegg_enrich_file ] && "
. " $awk_check $common_kegg_enrich_file ; then "
. " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_common_kegg_pathway_metabolite_network.py "
. " -i $common_kegg_enrich_file "
. " -o $common_kegg_network_dir "
. " --sig-cutoff 0.05 "
. " --max-top 10 "
. " --fallback-top 0 "
. " --max-metabolites 20 "
. " --min-metabolite-label-degree 2 "
. " --max-metabolite-labels 20 "
. " ; "
. " else "
. " echo 'Skip common KEGG pathway-metabolite network: no significant pathway or enrichment table is empty.' "
. " ; fi ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_kegg_network.commands","common_kegg_network.ok");
}
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 15 : gsea analysis-----------------------------------\n"
."--------------------------------------------------------------------------------\n\n\n";

View File

@@ -554,7 +554,7 @@ print STDERR "\n\n\n------------------------------------------------------------
mkdir("$outdir/summary") unless (-d "$outdir/summary") ;
open (CMD,">","$commands_dir/summary.commands") or die $!;
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.2_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
my $cmd1 = " perl /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/summaryDEM_all.pl -i $outdir/DEM_Analysis_dir -n $outdir/raw_analysis_dir/mean_all.txt "
. " -anno $outdir/all_samples/all_metabolite_mapping.txt "
. " -o $outdir/summary/All.samples.summary_info.xls ";
@@ -566,6 +566,52 @@ print STDERR "\n\n\n------------------------------------------------------------
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 13.1 : Common metabolite list -----------------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
my $common_id_file = "$outdir/venn_dir/common_id";
my $common_status_file = "$outdir/venn_dir/common_id.status.txt";
my $upset_file = "$outdir/venn_dir/upset_list.xls";
my $venn_file = "$outdir/venn_dir/venn_list.xls";
if ($vs_num < 2) {
`: > $common_id_file`;
`echo SINGLE_COMPARISON > $common_status_file`;
print STDERR "Skip Step 13.1 : less than 2 comparisons, common metabolite list is not applicable.\n";
} else {
open (CMD,">","$commands_dir/common_id.commands") or die $!;
my $cmd1 = " if [ -s $upset_file ]; then "
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $upset_file > $common_id_file ; "
. " elif [ -s $venn_file ]; then "
. " awk -F'\\t' 'NR==1{n=NF;next}{ok=1;for(i=2;i<=n;i++){if(\$i!=1){ok=0;break}} if(ok) print \$1}' $venn_file > $common_id_file ; "
. " else "
. " : > $common_id_file ; "
. " fi ; "
. " if [ -s $common_id_file ]; then "
. " echo OK > $common_status_file ; "
. " else "
. " echo NO_COMMON_METABOLITES > $common_status_file ; "
. " fi ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_id.commands","common_id.ok");
}
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
@@ -605,6 +651,180 @@ print STDERR "\n\n\n------------------------------------------------------------
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.1 : Key metabolite trend analysis ----------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Trend_Analysis" unless (-d "$outdir/Trend_Analysis");
open (CMD,">","$commands_dir/trend_analysis.commands") or die $!;
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_metabolite_trend_allgroups.R "
. " $outdir/summary/All.samples.summary_info.xls "
. " $groupfile "
. " $outdir/Trend_Analysis "
. " 5 0.05 12 1 "
. " $outdir/Meta_mid_statistical_analysis_dir ";
# my $cmd2 = " mkdir -p $outdir/Trend_Analysis/detail_data ";
#
# my $cmd3 = " mv $outdir/Trend_Analysis/key_metabolite_trend_lineplot.pdf "
# . " $outdir/Trend_Analysis/key_metabolite_trend_lineplot.png "
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.pdf "
# . " $outdir/Trend_Analysis/key_metabolite_trend_boxplot.png "
# . " $outdir/Trend_Analysis/selected_metabolites_unique.xls "
# . " $outdir/Trend_Analysis/selected_metabolites_group_mean_se.xls "
# . " $outdir/Trend_Analysis/selected_metabolites_long_format.xls "
# . " $outdir/Trend_Analysis/detail_data/ 2>/dev/null || true ";
print CMD $cmd1,"\n";
# print CMD $cmd2,"\n";
# print CMD $cmd3,"\n";
close (CMD);
Pipline_sh_commands("trend_analysis.commands","trend_analysis.ok");
}
# - 5每个对比组取前 5 个
# - 0.05P 值阈值
# - 12报告展示前 12 个
# - 1VIP 阈值,表示 VIP >= 1
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.2 : Key pathway metabolite heatmap ---------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Key_Pathway_Heatmap" unless (-d "$outdir/Key_Pathway_Heatmap");
open (CMD,">","$commands_dir/key_pathway_heatmap.commands") or die $!;
my $cmd1 = " /share/nas2/fangbj/biosoft/miniconda3/envs/R4/bin/Rscript /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_key_pathway_metabolite_heatmap.R "
. " $outdir/summary/All.samples.summary_info.xls "
. " $groupfile "
. " $outdir/DEM_Analysis_dir "
. " $outdir/Meta_mid_statistical_analysis_dir "
. " $outdir/Key_Pathway_Heatmap "
. " 5 30 1 "; #这里最后的 1 就是“排除泛通路”;如果你以后想保留这些大通路,改成 0 就行
# my $cmd2 = " mkdir -p $outdir/Key_Pathway_Heatmap/detail_data ";
#
# my $cmd3 = " mv $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.pdf "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_heatmap.png "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolites.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_long_format.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_group_mean_se.xls "
# . " $outdir/Key_Pathway_Heatmap/key_pathway_metabolite_matrix.xls "
# . " $outdir/Key_Pathway_Heatmap/detail_data/ 2>/dev/null || true ";
print CMD $cmd1,"\n";
# print CMD $cmd2,"\n";
# print CMD $cmd3,"\n";
close (CMD);
Pipline_sh_commands("key_pathway_heatmap.commands","key_pathway_heatmap.ok");
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.3 : Common metabolite KEGG enrichment ------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
my $common_id_file = "$outdir/venn_dir/common_id";
my $common_num = 0;
if (-e $common_id_file && -s $common_id_file) {
open (IN,$common_id_file) or die $!;
while (<IN>) {
chomp;
next if (/^\s*$/);
$common_num++;
}
close IN;
}
if ($vs_num < 2) {
print STDERR "Skip Step 14.3 : less than 2 comparisons, common metabolite enrichment is not applicable.\n";
} elsif (!-e $common_id_file || -z $common_id_file) {
print STDERR "Skip Step 14.3 : common_id file not found or empty.\n";
} elsif ($common_num < 2) {
print STDERR "Skip Step 14.3 : fewer than 2 common metabolites, enrichment is not applicable.\n";
} else {
mkdir "$outdir/Common_KEGG_Enrichment" unless (-d "$outdir/Common_KEGG_Enrichment");
open (CMD,">","$commands_dir/common_kegg_enrichment.commands") or die $!;
my $cmd1 = " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/common_kegg_enrichment.py "
. " -i $common_id_file "
. " -m $outdir/all_samples/all_metabolite_mapping.txt "
. " -b $outdir/raw_analysis_dir/mean_all.txt ";
if (defined $organism_ko && $organism_ko ne "") {
$cmd1 .= " -g $organism_ko ";
}
$cmd1 .= " -o $outdir/Common_KEGG_Enrichment ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_kegg_enrichment.commands","common_kegg_enrichment.ok");
}
}
print STDERR "\n\n\n--------------------------------------------------------------------------------\n"
."--------------------- Step 14.4 : Common KEGG network --------------------------\n"
."--------------------------------------------------------------------------------\n\n\n";
{
mkdir "$outdir/Common_KEGG_Enrichment/network" unless (-d "$outdir/Common_KEGG_Enrichment/network");
my $common_kegg_network_dir = "$outdir/Common_KEGG_Enrichment/network";
my $common_kegg_enrich_file = "$outdir/Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls";
if ($vs_num < 2) {
print STDERR "Skip Step 14.4 : less than 2 comparisons, common KEGG network is not applicable.\n";
}else{
open (CMD,">","$commands_dir/common_kegg_network.commands") or die $!;
my $awk_check = q{awk -F'\t' 'NR==1{for(i=1;i<=NF;i++){if(tolower($i)=="pvalue") p=i} next} p>0 && $p+0<0.05{found=1} END{exit found?0:1}'};
my $cmd1 = " if [ -s $common_kegg_enrich_file ] && "
. " $awk_check $common_kegg_enrich_file ; then "
. " python3 /share/nas2/fangbj/pipline/daixie/v1.4_auto/scripts/plot_common_kegg_pathway_metabolite_network.py "
. " -i $common_kegg_enrich_file "
. " -o $common_kegg_network_dir "
. " --sig-cutoff 0.05 "
. " --max-top 10 "
. " --fallback-top 0 "
. " --max-metabolites 20 "
. " --min-metabolite-label-degree 2 "
. " --max-metabolite-labels 20 "
. " ; "
. " else "
. " echo 'Skip common KEGG pathway-metabolite network: no significant pathway or enrichment table is empty.' "
. " ; fi ";
print CMD $cmd1,"\n";
close (CMD);
Pipline_sh_commands("common_kegg_network.commands","common_kegg_network.ok");
}
}
#######################################################################################

View File

@@ -350,7 +350,8 @@ $writer->emptyTag('file','desc'=>"",'name'=>"处理后的峰面积数据表见
#3.3 代谢物全注释
#sunxk 20200515
$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1');
#$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理包括代谢物在常见数据库的编号索引、分类信息和通路信息等。部分代谢物数据库映射表展示如下所有代谢物数据库映射表请详见结果文件all_metabolite_mapping.txt表。",'type'=>"type1");
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 代谢物数据库映射表(部分)",'path'=>"$indir/html_report/template/all_metabolite_mapping3L.txt");
#$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理包括代谢物在常见数据库的编号索引、分类信息和通路信息等。所有代谢物数据库映射表详见结果文件all_metabolite_mapping.txt。",'type'=>"type1");
@@ -437,7 +438,8 @@ $writer->emptyTag('file','desc'=>"",'name'=>"前10代谢物占比结果文件
###############################
###############################
$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1');
#$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('p','desc'=>"由于代谢组数据具有高维度、高噪声及变量间多重共线性的特点,单一变量的传统统计方法难以全面揭示数据内在规律,因此采用多元统计方法进行降维与分类建模。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"本分析使用R语言ropls包<a href=\"#ref5\">[5]</a>包括PCA、PLS-DA和OPLS-DA。",'type'=>"type1");
@@ -643,7 +645,8 @@ if (-e "$indir/$vs1/Statistical_Analysis/OPLS-DA_Permutation.png") {
#3.8 单变量统计分析 (UVA) ,差异代谢物的筛选和火山图
###############################
###############################
$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1');
#$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('p','desc'=>"代谢组学数据具有高维、多变量的特点通常需结合单变量统计分析univariate analysis, UVA与多元统计分析进行综合挖掘以提高差异代谢物筛选的准确性<a href=\"#ref9\">[9]</a>。本研究中,对具有生物学重复的样本采用基于线性模型和经验贝叶斯方法的 limma 流程进行单变量差异分析。考虑到代谢组数据通常具有变量数多而样本重复数有限的特征,传统逐个变量的 t 检验或非参数检验在小样本条件下往往存在方差估计不稳定、统计功效受限等问题。limma 通过经验贝叶斯方法对各变量的方差进行收缩估计,可在保留线性模型分析框架的同时提高方差估计稳定性和差异检验的稳健性,这一思想已在高维组学数据分析的方法学研究中得到系统阐述<a href=\"#ref10\">[10]</a><a href=\"#ref11\">[11]</a>。进一步的比较研究表明,在高维、小样本的连续型组学数据中,基于经验贝叶斯方差建模的 moderated t 检验通常较传统 t 检验表现出更稳定的统计表现,并已在蛋白质组等相邻高通量数据分析中显示出良好的应用实用性<a href=\"#ref12\">[12]</a><a href=\"#ref13\">[13]</a>。此外,代谢组学统计分析流程研究及近年来的实际研究也已将 limma 纳入差异特征筛选框架中,进一步支持了其在差异代谢物筛选中的可行性与适用性<a href=\"#ref14\">[14]</a><a href=\"#ref15\">[15]</a>。",'type'=>"type1");
@@ -806,7 +809,7 @@ $writer->emptyTag('p','desc'=>"柱状图以纵向条纹的高度表示代谢物
my @barplot =glob ("$indir/$vs1/Statistical_Analysis/barplot/*barplot.png");
if (scalar(@barplot) != 0) {
&piclist("$vs1 组间柱状图","注纵坐标为物质相对含量横坐标为组别图中代谢物以编号标识具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/barplot/*barplot.png","../../$vs1","_barplot.png","");
$pid++;
# $pid++;
}else{
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成柱状图。",'type'=>"type1");
}
@@ -816,7 +819,7 @@ $writer->emptyTag('p','desc'=>"小提琴-柱状复合图结合了数据分布形
my @violin =glob ("$indir/$vs1/Statistical_Analysis/violin/*violin.png");
if (scalar(@violin) != 0) {
&piclist("$vs1 组间小提琴箱线复合图","注纵坐标为物质相对含量横坐标为组别图中代谢物以编号标识具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/violin/*violin.png","$indir/$vs1","_violin.png","");
$pid++;
# $pid++;
}else{
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成小提琴图。",'type'=>"type1");
}
@@ -825,6 +828,229 @@ $writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间柱状图路
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间小提琴-柱状复合图路径complete/*_vs_*/Statistical_Analysis/violin/Metabolite_*_violin.{pdf,png}",'path'=>"../$vs1/Statistical_Analysis/violin",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"代谢物编号对应表路径complete/*_vs_*/Statistical_Analysis/Means.xls",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
$writer->emptyTag('h3','name'=>"3.10.1 关键差异代谢物跨组趋势分析",'type'=>'type1','desc'=>"");
my $trend_dir = "$indir/07_Trend_Analysis";
my $trend_line_png = "$trend_dir/key_metabolite_trend_lineplot_report_top12.png";
my $trend_box_png = "$trend_dir/key_metabolite_trend_boxplot_report_top12.png";
my $trend_report_table = "$trend_dir/selected_metabolites_report_top12.xls";
my $trend_label_table = "$trend_dir/selected_metabolites_figure_label.xls";
my $trend_bycomp_table = "$trend_dir/selected_metabolites_by_comparison.xls";
if (-e $trend_line_png) {
$writer->emptyTag('p','desc'=>"为展示各对比组中具有代表性的关键差异代谢物在全部实验分组中的整体变化趋势本分析以各对比组差异代谢物结果为基础按绝对值log2FoldChange从大到小排序若绝对值log2FoldChange相同则按VIP值从大到小排序。最终每个对比组选取前5个代表性差异代谢物作为候选集合。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"将所有对比组候选代谢物合并后进行去重并按以下规则综合排序1最大绝对值log2FoldChange值Max_abs_log2FC降序2最大VIP值Max_VIP降序3最小P值Best_Pvalue升序。排序后选取前12个代表性代谢物用于报告正文展示完整结果请见对应结果文件。",'type'=>"type1");
$writer->emptyTag('pic','desc'=>"注横坐标为实验分组纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物图中以M编号标识其与具体代谢物名称的对应关系见selected_metabolites_figure_label.xls。折线连接各分组均值误差线表示标准误SE散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组趋势折线图",'type'=>"img-width-max",'path'=>$trend_line_png);
$pid++;
if (-e $trend_box_png) {
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物,箱体表示四分位分布范围,中线表示中位数,散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组箱线图",'type'=>"img-width-max",'path'=>$trend_box_png);
$pid++;
}
if (-e $trend_report_table) {
$writer->emptyTag('table','desc'=>"注FigureLabel为图中代谢物编号Selected_in_comparisons为该代谢物来源对比组Best_Pvalue为所有来源对比组中的最小P值Max_abs_log2FC为所有来源对比组中的最大绝对值log2FoldChangeMax_VIP为所有来源对比组中的最大VIP值Direction_summary为变化方向汇总。",'type'=>"0",'name'=>"表$tid. 报告展示关键差异代谢物列表",'path'=>$trend_report_table);
$tid++;
}
$writer->emptyTag('file','desc'=>"",'name'=>"各对比组候选代谢物表路径complete/07_Trend_Analysis/selected_metabolites_by_comparison.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_by_comparison.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物编号对应表路径complete/07_Trend_Analysis/selected_metabolites_figure_label.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_figure_label.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物趋势分析结果路径complete/07_Trend_Analysis/",'path'=>"../07_Trend_Analysis",'type'=>"xls");
}else{
$writer->emptyTag('p','desc'=>"未生成关键差异代谢物跨组趋势分析结果。",'type'=>"type1");
}
$writer->emptyTag('h3','name'=>"3.10.2 共有差异代谢物KEGG通路富集分析",'type'=>'type1','desc'=>"");
my $common_kegg_dir = "$indir/08_Common_KEGG_Enrichment";
my $common_bubble_top20_png = "$common_kegg_dir/common_kegg_bubbleplot_top20.png";
my $common_bubble_top20_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top20.pdf";
my $common_bubble_top5_png = "$common_kegg_dir/common_kegg_bubbleplot_top5.png";
my $common_bubble_top5_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top5.pdf";
my $common_enrich_table = "$common_kegg_dir/common_kegg_pathway_enrichment.xls";
my $common_enrich_top5_table = "$common_kegg_dir/common_kegg_pathway_enrichment_top5.xls";
my $common_matched_table = "$common_kegg_dir/common_matched_metabolites.xls";
my $common_hit_detail_table = "$common_kegg_dir/common_kegg_pathway_hit_detail.xls";
my $common_no_pathway_table = "$common_kegg_dir/common_no_pathway_metabolites.xls";
my $common_summary_table = "$common_kegg_dir/common_kegg_summary.xls";
my $common_id_file = "$indir/04_venn_dir/common_id";
my $upset_file = "$indir/04_venn_dir/upset_list.xls";
my $venn_file = "$indir/04_venn_dir/venn_list.xls";
my $compare_num = 0;
my $common_num = 0;
my $set_file = "";
if (-e $upset_file) {
$set_file = $upset_file;
} elsif (-e $venn_file) {
$set_file = $venn_file;
}
if ($set_file ne "") {
open (IN,$set_file) or die $!;
my $head = <IN>;
chomp($head);
my @head = split /\t/,$head;
$compare_num = scalar(@head) - 1;
close IN;
}
if (-e $common_id_file && -s $common_id_file) {
open (IN,$common_id_file) or die $!;
while (<IN>) {
chomp;
next if (/^\s*$/);
$common_num++;
}
close IN;
}
if ($set_file eq "") {
$writer->emptyTag('p','desc'=>"未检测到差异代谢物交集结果文件因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
} elsif ($compare_num < 2) {
$writer->emptyTag('p','desc'=>"本项目仅包含1个对比组无法进行跨比较组共有差异代谢物筛选因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
} elsif ($common_num < 2) {
$writer->emptyTag('p','desc'=>"本项目包含多个对比组但按“所有比较组共有”标准未筛选到足够数量的共有差异代谢物因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
} elsif (-e $common_enrich_table) {
$writer->emptyTag('p','desc'=>"为进一步解析多个比较组共有差异代谢物的潜在生物学功能对共有差异代谢物进行了KEGG通路富集分析。共有差异代谢物来源于差异代谢物交集结果。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"分析时以共有差异代谢物作为前景集以本项目进入正式分析流程的全部代谢物中具有KEGG通路注释的代谢物作为背景集。随后将共有差异代谢物与总代谢物注释表进行匹配提取其中具有KEGG pathway注释的代谢物用于富集分析未匹配到注释表或虽匹配成功但无KEGG pathway注释的代谢物不参与本次通路富集统计。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"富集分析采用超几何检验评估共有差异代谢物在各KEGG通路中的富集显著性并进一步进行Benjamini-Hochberg多重检验校正同时提供Holm校正结果。相关结果可从整体通路富集结果表及显著通路补充结果中进行解读。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"需要说明的是共有差异代谢物并不一定全部具有可用的KEGG pathway注释因此最终参与富集分析的代谢物数通常会少于共有差异代谢物总数。相关匹配结果及无KEGG通路注释情况见补充附件。",'type'=>"type1");
if (-e $common_bubble_top20_png) {
$writer->emptyTag('pic','desc'=>"注横坐标为富集因子Enrichment factor纵坐标为KEGG通路名称气泡大小表示命中该通路的共有差异代谢物数量气泡颜色表示富集分析P值。",'name'=>"图$pid. 共有差异代谢物KEGG通路富集气泡图Top20",'type'=>"img-width-max",'path'=>$common_bubble_top20_png);
$pid++;
}
if (-e $common_bubble_top5_png) {
$writer->emptyTag('pic','desc'=>"注该图基于富集结果中P值小于0.05的通路绘制按显著性排序最多展示前5条若显著通路不足5条则按实际条数展示。横坐标为MetaRatio纵坐标为KEGG通路名称气泡大小表示命中该通路的共有差异代谢物数量气泡颜色表示-log10(Pvalue)。",'name'=>"图$pid. 共有差异代谢物显著KEGG通路富集气泡图Top5",'type'=>"img-width-max",'path'=>$common_bubble_top5_png);
$pid++;
}
$writer->emptyTag('table','desc'=>"注ID表示KEGG通路编号Description表示KEGG通路名称Total表示背景集中注释到该通路的代谢物数Hits表示共有差异代谢物中命中该通路的代谢物数MetaboliteRatio表示命中该通路的共有差异代谢物数占实际参与富集分析总代谢物数的比例BgRatio表示背景集中命中该通路的代谢物数占背景集中可参与通路富集总代谢物数的比例pvalue表示超几何检验得到的富集显著性P值p.adjust表示Benjamini-Hochberg方法校正后的FDRHolm_adjust表示Holm方法校正后的P值Enrichment_factor表示富集因子Diffmeta表示命中该通路的共有差异代谢物Allmeta表示背景集中注释到该通路的全部代谢物。若行数超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG通路富集结果表(部分)",'path'=>"$indir/html_report/template/common_kegg_pathway_enrichment.xls");
$tid++;
if (-e $common_summary_table) {
$writer->emptyTag('table','desc'=>"注Item表示统计项目名称Value表示对应统计值。InputMetabolites表示输入共有差异代谢物总数UsedForEnrichment表示最终参与KEGG富集分析的代谢物数NoPathwayAnnotation表示匹配成功但无可用KEGGpathway注释的代谢物数NotFoundOrNotInBackground表示未匹配到注释表或未进入背景集的代谢物数BackgroundMetabolites表示背景集总代谢物数BackgroundWithPathway表示背景集中具有KEGG通路注释的代谢物数EnrichedPathwaysTested表示参与富集检验的通路数AllowedPathwayCount表示允许保留的通路数。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG富集分析统计汇总表",'path'=>$common_summary_table);
$tid++;
}
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG通路富集结果表路径complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'type'=>"xls");
if (-e $common_enrich_top5_table) {
$writer->emptyTag('file','desc'=>"",'name'=>"显著通路Top5结果表路径补充complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'type'=>"xls");
}
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物注释匹配信息表路径complete/08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物通路命中明细表路径补充complete/08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"无KEGG通路注释的共有差异代谢物列表路径补充complete/08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG富集分析结果目录路径complete/08_Common_KEGG_Enrichment/",'path'=>"../08_Common_KEGG_Enrichment",'type'=>"xls");
} else {
$writer->emptyTag('p','desc'=>"已筛选到共有差异代谢物但在KEGG注释及通路过滤后未生成共有差异代谢物KEGG通路富集分析结果。",'type'=>"type1");
}
$writer->emptyTag('h3','name'=>"3.10.3 共有差异代谢物KEGG通路-代谢物网络分析",'type'=>'type1','desc'=>"");
my $common_network_dir = "$indir/08_Common_KEGG_Enrichment/network";
my $common_network_png = "$common_network_dir/common_kegg_pathway_metabolite_network.png";
my $common_network_summary = "$common_network_dir/common_kegg_pathway_metabolite_network_summary.xls";
my $common_network_pathways = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_pathways.xls";
my $common_network_metas = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_metabolites.xls";
my $common_network_nodes = "$common_network_dir/common_kegg_pathway_metabolite_network_nodes.xls";
my $common_network_edges = "$common_network_dir/common_kegg_pathway_metabolite_network_edges.xls";
if (! -e $common_enrich_table) {
$writer->emptyTag('p','desc'=>"未生成共有差异代谢物KEGG富集分析结果因此未开展通路-代谢物网络分析。",'type'=>"type1");
} elsif (-e $common_network_png) {
$writer->emptyTag('p','desc'=>"为直观展示共有差异代谢物与显著富集KEGG通路之间的对应关系本分析基于共有差异代谢物KEGG富集结果构建通路-代谢物网络图。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"筛选规则为优先保留富集分析P值小于0.05的显著通路并按P值升序、富集因子降序、命中代谢物数降序排序最多展示前10条通路随后从入选通路命中的共有差异代谢物中去重筛选最多保留20个代谢物。代谢物排序规则为degree降序、关联通路最小P值升序、关联通路最大Hits降序、代谢物名称升序。",'type'=>"type1");
$writer->emptyTag('pic','desc'=>"注外圈节点为KEGG通路图中以通路编号显示内圈节点为共有差异代谢物。连线表示该代谢物命中对应KEGG通路。代谢物节点大小表示其连接的通路数量完整节点和边信息见结果表。",'name'=>"图$pid. 共有差异代谢物KEGG通路-代谢物网络图",'type'=>"img-width-max",'path'=>$common_network_png);
$pid++;
if (-e $common_network_pathways) {
$writer->emptyTag('table','desc'=>"注SelectionOrder为通路筛选排序ID为KEGG通路编号Description为通路名称Hits为共有差异代谢物命中该通路的数量Total为背景集中注释到该通路的代谢物数量pvalue为富集分析P值p.adjust为BH校正后的P值Holm_adjust为Holm校正后的P值Enrichment_factor为富集因子Diffmeta为命中该通路的共有差异代谢物Allmeta为背景集中注释到该通路的全部代谢物。",'type'=>"0",'name'=>"表$tid. 网络图入选KEGG通路列表",'path'=>$common_network_pathways);
$tid++;
}
if (-e $common_network_metas) {
$writer->emptyTag('table','desc'=>"注SelectionOrder为代谢物筛选排序Metabolite为共有差异代谢物名称Degree为该代谢物连接的入选通路数量Best_pathway_pvalue为该代谢物关联通路中的最小富集P值Best_pathway_hits为该代谢物关联通路中的最大Hits值PathwayIDs为该代谢物关联的入选KEGG通路编号。",'type'=>"0",'name'=>"表$tid. 网络图入选共有差异代谢物列表",'path'=>$common_network_metas);
$tid++;
}
if (-e $common_network_summary) {
$writer->emptyTag('table','desc'=>"注Item表示统计项目名称Value表示对应统计值。InputPathways为输入富集通路数量SelectedPathways为最终入选网络图的通路数量TotalSignificantPathways为P值小于0.05的显著通路数量MaxMetabolites为网络图允许展示的最大代谢物数量UniqueMetabolitesBeforeFilter为筛选前去重代谢物数量Metabolites为最终入选网络图的代谢物数量Edges为通路-代谢物连线数量。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物通路网络统计汇总表",'path'=>$common_network_summary);
$tid++;
}
$writer->emptyTag('file','desc'=>"",'name'=>"网络图节点信息表路径complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"网络图连线信息表路径complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'type'=>"xls");
}else{
$writer->emptyTag('p','desc'=>"共有差异代谢物KEGG富集结果中未筛选到P值小于0.05的显著通路因此未生成共有差异代谢物KEGG通路-代谢物网络图。",'type'=>"type1");
}
###############################
###############################
#3.11 差异代谢物的层次聚类分析
@@ -875,7 +1101,8 @@ if (-e "$indir/$vs1/Hierarchical_Clustering_Analysis/cor_heatmap.png") {
#3.13 差异代谢物的KEGG注释
###############################
###############################
$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1');
#$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
$writer->emptyTag('p','desc'=>"生物体中的复杂代谢反应及调控并非独立进行,而是由基因和蛋白质构成的通路网络协同作用,最终导致代谢组的系统性变化。对这些代谢与调控通路的分析,可更全面系统地揭示实验条件改变引发的生物学过程变化、性状或疾病发生机理及药物作用机制等核心问题。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"京都基因与基因组百科全书Kyoto Encyclopedia of Genes and Genomes, KEGGPathway数据库<a href=\"#ref19\">[19]</a><a href=\"#ref20\">[20]</a>www.kegg.jp/kegg/pathway.html以基因和基因组功能信息为基础以代谢反应为线索串联潜在代谢途径及对应调控蛋白以图解形式展示细胞生理生化过程如能量代谢、物质运输、信号传递、细胞周期调控等及保守子通路信息是代谢网络研究最常用的通路数据库。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"我们整理了对应物种中差异代谢物映射的所有通路部分通路结果展示如下不足20通路则展示全部所有通路结果请详见结果文件KEGG_Pathway.xls表。",'type'=>"type1");
@@ -892,7 +1119,7 @@ $writer->emptyTag('p','desc'=>"基于上述结果我们在KEGG通路图上标
my @kegg =glob ("$indir/$vs1/KEGG_Analysis/pathway/*.png");
if (scalar(@kegg) != 0) {
&piclist("$vs1组KEGG通路图","","$indir/$vs1/KEGG_Analysis/pathway/*.png","$indir/$vs1",".png","");
$pid++;
# $pid++;
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG通路图路径complete/*_vs_*/KEGG_Analysis/pathway",'path'=>"../$vs1/KEGG_Analysis/pathway",'type'=>"xls");
@@ -914,6 +1141,11 @@ if (-e "$indir/$vs1/KEGG_Analysis/kegg_compound_barplot.png") {
$writer->emptyTag('p','desc'=>"无差异代谢物匹配到KEGG分类层级无法生成分类注释图。",'type'=>"type1");
}
###############################
###############################
#3.14 差异代谢物的通路类型分析
@@ -1011,6 +1243,52 @@ if (-e "$indir/$vs1/Pathway_Analysis/Bubble_plot.png") {
$writer->emptyTag('h3','name'=>"3.15.1 关键通路相关差异代谢物热图分析",'type'=>'type1','desc'=>"");
my $pathway_top_n = 5;
my $pathway_report_top_n = 30;
my $pathway_heatmap_dir = "$indir/09_Key_Pathway_Heatmap";
my $pathway_heatmap_png = "$pathway_heatmap_dir/key_pathway_metabolite_heatmap_report_top30.png";
my $pathway_heatmap_table = "$pathway_heatmap_dir/key_pathway_metabolite_report_top30.xls";
my $pathway_heatmap_label = "$pathway_heatmap_dir/key_pathway_figure_label.xls";
my $pathway_heatmap_summary = "$pathway_heatmap_dir/key_pathway_summary.xls";
if (-e $pathway_heatmap_png) {
$writer->emptyTag('p','desc'=>"为从通路层面综合展示不同对比组中具有代表性的差异代谢变化模式本分析基于各对比组的代谢通路富集结果对每个对比组先筛选富集分析P值小于0.05的显著通路再按P值从小到大排序若P值相同则按富集因子Enrichment factor从高到低排序若仍相同则按命中差异代谢物数Hits从高到低排序。最终每个对比组选取前".$pathway_top_n."条关键通路用于后续分析。对于通路范围过大、解释较泛的背景通路如Metabolic pathways等本分析选择不纳入关键通路候选范围以提高结果的聚焦性与可解释性。",'type'=>"type1");
$writer->emptyTag('p','desc'=>"在获得各对比组关键通路后进一步提取这些通路中命中的差异代谢物Diffmeta汇总后去重并按以下规则综合排序1该代谢物出现在多少个对比组的入选关键通路中ComparisonCount降序2该代谢物出现在多少条入选关键通路中PathwayCount降序3该代谢物在所有来源对比组中的最小P值BestPvalue升序4该代谢物在所有来源对比组中的最大绝对log2FoldChange值Max_abs_log2FC降序。排序后选取前".$pathway_report_top_n."个代表性代谢物用于报告正文展示,完整结果请见对应结果文件。",'type'=>"type1");
$writer->emptyTag('pic','desc'=>"注横坐标为实验分组纵坐标为关键通路相关差异代谢物编号。颜色表示同一代谢物在不同分组中的行标准化相对变化水平Row z-score红色表示该代谢物在对应分组中相对较高蓝色表示相对较低白色表示接近该代谢物在各组中的平均水平。图中代谢物以M编号显示其与具体代谢物名称的对应关系见key_pathway_figure_label.xls。该图用于比较同一代谢物在不同组间的变化趋势不用于不同代谢物之间绝对丰度大小的直接比较。",'name'=>"图$pid. 关键通路相关差异代谢物热图",'type'=>"img-width-max",'path'=>$pathway_heatmap_png);
$pid++;
if (-e $pathway_heatmap_table) {
$writer->emptyTag('table','desc'=>"注该表为报告热图中展示的关键通路相关差异代谢物列表。MetaboliteRank为代谢物综合排序名次FigureLabel为图中代谢物编号ComparisonCount为该代谢物出现在多少个对比组的入选关键通路中PathwayCount为该代谢物出现在多少条入选关键通路中BestPvalue为该代谢物在所有来源对比组中的最小P值Max_abs_log2FC为该代谢物在所有来源对比组中的最大绝对log2FoldChange值Comparison为来源对比组SourcePathwayID和SourcePathway分别为来源关键通路编号和名称PathwayPvalue、Enrichment_factor和Hits分别为对应关键通路的富集P值、富集因子和命中差异代谢物数Pvalue、log2FC和abs_log2FC为该代谢物在对应来源对比组中的差异统计信息。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 报告展示关键通路相关差异代谢物列表(部分)",'path'=>"$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
$tid++;
}
if (-e $pathway_heatmap_summary) {
$writer->emptyTag('table','desc'=>"注该表汇总各对比组选出的关键通路。Comparison为对比组名称PathwayID为KEGG通路编号Pathway为KEGG通路名称HitCount为该通路命中的差异代谢物数量pvalue为通路富集分析P值Enrichment_factor为富集因子Metabolites为该通路命中的差异代谢物名称列表。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 各对比组关键通路汇总表(部分)",'path'=>"$indir/html_report/template/key_pathway_summary.xls");
$tid++;
}
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路汇总表路径complete/09_Key_Pathway_Heatmap/key_pathway_summary.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_summary.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路代谢物编号对应表路径complete/09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'type'=>"xls");
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路相关差异代谢物热图结果路径complete/09_Key_Pathway_Heatmap/",'path'=>"../09_Key_Pathway_Heatmap",'type'=>"xls");
}else{
$writer->emptyTag('p','desc'=>"未生成关键通路相关差异代谢物热图分析结果。",'type'=>"type1");
}
$writer->emptyTag('h2','name'=>"3.16 差异代谢物GSEA分析",'type'=>'type1','desc'=>"");
$writer->emptyTag('p','desc'=>"传统代谢物功能富集方法基于超几何检验仅针对差异表达代谢物进行分析。但差异倍数FC阈值的设置可能过滤掉部分重要代谢物导致遗漏关键生物通路或功能模块。GSEA分析Gene Set Enrichment Analysis可弥补这一不足通过整合所有代谢物的表达趋势更全面揭示代谢物集合在生物系统中的调控作用。",'type'=>"type1");
@@ -1342,6 +1620,18 @@ sub extractInfo{
$pathway_pvalue_min = "未检出显著通路" if !defined $pathway_pvalue_min || $pathway_pvalue_min eq "";
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls") {
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls","$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
}
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls") {
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls","$indir/html_report/template/key_pathway_summary.xls");
}
if (-e "complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls") {
&HeadN30("complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls","$indir/html_report/template/common_kegg_pathway_enrichment.xls");
}
}
sub Cnv {