1979 lines
144 KiB
Perl
1979 lines
144 KiB
Perl
#!/usr/bin/perl -w
|
||
use strict;
|
||
use warnings;
|
||
#!/usr/bin/perl
|
||
#use warnings;
|
||
use Data::Dumper;
|
||
use Getopt::Long;
|
||
use Getopt::Std;
|
||
use Config::General;
|
||
use FindBin qw($Bin $Script);
|
||
use Cwd qw(abs_path getcwd);
|
||
use File::Basename qw(basename dirname);
|
||
use Spreadsheet::Read;
|
||
use List::Util qw/max min/;
|
||
use XML::Writer;
|
||
use IO::File;
|
||
use Encode;
|
||
use YAML::Tiny ;
|
||
use Pipeliner ;
|
||
my $BEGIN_TIME=time();
|
||
my $version="1.0.0";
|
||
|
||
#######################################################################################
|
||
#######################################################################################
|
||
|
||
my ($detail_cfg, $indir ,$qctype,$plat,$huaqingsu,$dingkong,$repeat,$logo,$IS1,$IS2);
|
||
GetOptions(
|
||
"help|?"=>\&USAGE,
|
||
"id:s"=>\$indir,
|
||
"cfg:s"=>\$detail_cfg,
|
||
"r:s"=>\$repeat,
|
||
"qc:s"=>\$qctype,
|
||
"logo:s"=>\$logo,
|
||
"plat:s"=>\$plat,
|
||
"huaqingsu:s"=>\$huaqingsu,
|
||
"dingkong:s"=>\$dingkong,
|
||
"is1:s"=>\$IS1,
|
||
"is2:s"=>\$IS2,
|
||
)or &USAGE;
|
||
&USAGE unless ($indir and $detail_cfg);
|
||
#--------------------------------------------------------------------------------------;
|
||
# Main Body
|
||
# -------------------------------------------------------------------------------------;
|
||
$indir=abs_path($indir);
|
||
` mkdir -p $indir/html_report/template ` unless(-d "$indir/html_report/template");
|
||
` cp $Bin/template/* $indir/html_report/template `;
|
||
|
||
my $outfile = "$indir/html_report/config_local.xml";
|
||
my %config=&readConfig($detail_cfg);
|
||
|
||
if (not defined $IS1) {
|
||
$IS1="\^\$"
|
||
}
|
||
|
||
if (not defined $IS2) {
|
||
$IS2="\^\$"
|
||
}
|
||
$dingkong||="N";
|
||
$repeat||="Y";
|
||
$huaqingsu||="N";
|
||
$logo||="Y";
|
||
$qctype||="Y";
|
||
$plat||="LC";
|
||
my %sample=();
|
||
my %info=();
|
||
my $group_nums=0;
|
||
my $sample_nums=0;
|
||
my $group_name;
|
||
my $vs_name;
|
||
my $vs_nums=0;
|
||
my $Left_peak_nums=-1;
|
||
my $Right_peak_nums=-1;
|
||
my $vs1;
|
||
&extractInfo;
|
||
my ($pid,$tid)=(1,1);
|
||
my $gseakegg_vs;
|
||
|
||
my $diff_meta1;
|
||
my $diff_meta2;
|
||
my $diff_meta3;
|
||
my $diff_meta4;
|
||
my $diff_meta5;
|
||
my ($max_log2fc, $min_log2fc, $max_meta, $min_meta);
|
||
my $max_meta_numid;
|
||
my $min_meta_numid;
|
||
my $pathway_pvalue_min;
|
||
#--------------------------------------------------------------------------------------;
|
||
#------------------------------- 报告标题 ---------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
my $report_time=&gaintime();
|
||
my $writer = XML::Writer->new(OUTPUT => 'self');
|
||
$writer->xmlDecl('UTF-8');
|
||
$writer->startTag('report');
|
||
$writer->startTag('report1');
|
||
|
||
#$writer->emptyTag('report_version','value'=>$config{report_version});
|
||
|
||
#-----项目名称
|
||
my $report_name;
|
||
if (defined $config{Project_name}) {
|
||
$report_name=$config{Project_name} ;
|
||
$writer->emptyTag('report_name','value'=>$report_name);
|
||
}else{
|
||
print "The title must give ...\n";
|
||
die;
|
||
}
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#------------------------------ 项目基本信息 ------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
|
||
$writer->emptyTag('h1','name'=>"项目基本信息",'type'=>'type1','desc'=>"");
|
||
|
||
#-----客户单位
|
||
my $Customer_unit;
|
||
if (defined $config{Customer_unit}) {
|
||
$Customer_unit=$config{Customer_unit};
|
||
$writer->emptyTag('p','desc'=>"客户单位:$Customer_unit",'type'=>'type1');
|
||
}
|
||
|
||
#------客户名称
|
||
my $Customer_name;
|
||
if (defined $config{Customer_name}) {
|
||
$Customer_name=$config{Customer_name};
|
||
$writer->emptyTag('p','desc'=>"客户名称:$Customer_name",'type'=>'type1');
|
||
}
|
||
|
||
#-----报告单位
|
||
my $Reporting_unit = defined $config{Reporting_unit} ? $config{Reporting_unit} : "南京集思慧远生物科技有限公司";
|
||
$writer->emptyTag('p','desc'=>"报告单位:$Reporting_unit",'type'=>'type1');
|
||
|
||
#-----报告时间
|
||
my $Reporting_time = defined $config{Reporting_time} ? $config{Reporting_time} : &GetDate ;
|
||
$writer->emptyTag('p','desc'=>"报告时间:$Reporting_time",'type'=>'type1');
|
||
|
||
#-----集思慧远售后
|
||
my $shouhou_phone;
|
||
if (defined $config{shouhou_phone}) {
|
||
$shouhou_phone = $config{shouhou_phone};
|
||
$writer->emptyTag('p','desc'=>"售后电话:$shouhou_phone",'type'=>'type1');
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#----------------------------- 1研究背景 -----------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
|
||
$writer->emptyTag('h1','name'=>"1 研究背景",'type'=>'type1','desc'=>"");
|
||
|
||
$writer->emptyTag('p','desc'=>"代谢组学(Metabolomics)概念最早由英国学者Nicholson于1999年提出,是继基因组学和蛋白质组学之后发展起来的学科,也是系统生物学的重要组成部分。基因组学和蛋白质组学分别从基因和蛋白质层面探究生命活动,而细胞内许多生命活动(如细胞信号、能量传递等)均受代谢物调控。代谢组学旨在对生物体内所有代谢物进行定量分析,寻找其与生理病理变化的关联,研究对象多为相对分子质量小于1000的小分子代谢物——它们作为各类代谢路径的底物,参与生物体新陈代谢和生长发育全过程,是生物现象的最终产物。",'type'=>'type1');
|
||
$writer->emptyTag('p','desc'=>"相比基因组和蛋白质组研究,代谢组学具有独特优势:1、基因和蛋白表达的微小变化可通过代谢酶催化反应在代谢物上放大,更易检测分析;2、代谢物变化不仅反映基因组变化,还受环境因素、肠道菌群影响,动态性更强,对生物体变化的反映更灵敏;3、代谢反应及终产物在不同物种生物体系中具有相似性,方法学通用性更高;4、无需建立全基因组测序及大量表达序列数据库,几乎可直接检测所有样本类型,包括全血、血浆/血清、组织、细胞、细胞培养上清、尿液、粪便、食物、唾液、脑脊液、脂肪等。",'type'=>'type1');
|
||
|
||
if ($plat eq "LC") {
|
||
$writer->emptyTag('p','desc'=>"非靶向代谢组学全面检测生物体整个代谢组(metabolome),重点筛选实验组与对照组间有显著变化的代谢特征(metabolic features),鉴定其化学结构,进而阐释这些代谢物及相关代谢通路与生命过程或生命状态的关联。该技术一次实验可检测超过10,000个代谢特征,利于发现新代谢物和新代谢通路,对疾病诊断生物标志物研发及疾病病理研究具有重要意义。",'type'=>'type1');
|
||
|
||
}elsif ($plat eq "GC" && $dingkong eq "N") {
|
||
$writer->emptyTag('p','desc'=>"非靶向代谢组学全面检测生物体整个代谢组(metabolome),重点筛选实验组与对照组间有显著变化的代谢特征(metabolic features),鉴定其化学结构,进而阐释这些代谢物及相关代谢通路与生命过程或生命状态的关联。该技术一次实验可检测超过10,000个代谢特征,利于发现新代谢物和新代谢通路,对疾病诊断生物标志物研发及疾病病理研究具有重要意义。",'type'=>'type1');
|
||
}elsif ($plat eq "GC" && $dingkong eq "Y") {
|
||
$writer->emptyTag('p','desc'=>"顶空(固相微萃取)技术主要用于检测气体、液体或固体样本中的挥发性、半挥发性组分及气味、香味物质。其原理为:将待测样本置于恒温密闭容器中,加热使挥发性组分从样本中挥发,待顶空瓶内气液(或气固)两相达到热力学平衡后,直接抽取顶部气体注入气相色谱-质谱仪进行分离分析,从而实现对目标物质的检测。",'type'=>'type1');
|
||
}elsif ($plat eq "Target") {
|
||
$writer->emptyTag('p','desc'=>"靶向代谢物检测(Targeted Metabolites)针对目标明确的代谢产物(尤其单一或多种代谢途径的产物)进行分析。通过大量天然和生物变异样本,验证预先确定的代谢物或已鉴定的潜在生物标志物的定量信息,需借助分析标准品实现准确定性定量。目前结合液相、气相、三重四极杆、高分辨质谱等技术,提供单糖组成、激素、神经递质、胆汁酸、脂肪酸、酚酸等几十种靶向代谢检测包,通过标准品绘制标准曲线,实现样品中靶向代谢物的绝对定性定量。",'type'=>'type1');
|
||
|
||
}elsif ($plat eq "guangba") {
|
||
$writer->emptyTag('p','desc'=>"植物广泛靶标代谢组数据库涵盖9600余种代谢物,每种代谢物均配有多张标准高分辨二级质谱图。运用高分辨质谱SWATH技术对样品进行超高覆盖率二级质谱扫描,实现代谢物高数量、高准确度鉴定;针对样本特异性构建MRM离子对数据库后,再通过三重四极杆质谱MRM技术进行精确定量。该综合技术具有鉴定物质多、定性定量准、动态范围宽、灵敏度高、重复性好等优势,为植物复杂代谢物检测提供了高效方法。",'type'=>'type1');
|
||
}
|
||
|
||
if ($huaqingsu eq "Y") {
|
||
$writer->emptyTag('p','desc'=>"花青素(anthocyanidin)是一类广泛存在于植物中的水溶性色素,隶属于类黄酮化合物家族,也是植物花、果实等器官的主要呈色物质。自然条件下,游离态的花青素极为罕见,其主要以糖苷化形式(即花色苷,anthocyanin)存在——通常与1个或多个葡萄糖、鼠李糖、半乳糖、木糖、阿拉伯糖等单糖通过糖苷键结合而成。本研究基于UPLC-MS/MS技术平台,建立了靶向花青素类小分子的专属数据库及定量分析方法,可同时检测花青素生物合成通路中的多种相关物质,为生命科学领域的基础研究(如植物色素合成调控、花青素生理功能解析等)提供技术支撑。",'type'=>'type1');
|
||
}
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#---------------------------- 2项目概述 -----------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
|
||
#2.1数据分析流程
|
||
|
||
$writer->emptyTag('h1','name'=>"2 项目概述",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('h2','name'=>"2.1 数据分析流程",'type'=>'type1','desc'=>"");
|
||
|
||
if ($plat eq "LC") {
|
||
$writer->emptyTag('p','desc'=>"基于LC-MS/MS系统 (Orbitrap Exploris 120, Thermo Fisher Scientific, USA) <a href=\"#ref1\">[1]</a><a href=\"#ref2\">[2]</a>的代谢组学项目数据分析主要分为基础数据分析、高级数据分析和个性化数据分析三个部分。基础数据分析是对代谢组的定性定量结果进行单变量统计分析和多元变量统计分析 (Multivariate Analysis, MVA) <a href=\"#ref3\">[3]</a>,筛选出现显著差异的代谢物;个性化数据分析是在基础数据分析的基础上,对显著差异的代谢物进行的一系列生物信息学分析。除此之外,我们还可以针对客户需求,定制高级数据分析,如多组学关联分析,发表文章的图表设计等。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"数据分析流程如下图所示,具体的数据分析内容请参见本报告的分析内容章节。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. LC-MS/MS数据分析流程图",'type'=>"type1",'path'=>"$indir/html_report/template/GC_flow.png");
|
||
#$writer->emptyTag('p','desc'=>"Fig.$pid. Workflow chart of LC-MS/MS data analysis procedure",'type'=>"type1");
|
||
$pid++;
|
||
|
||
}elsif ($plat eq "GC" && $dingkong eq "N") {
|
||
$writer->emptyTag('p','desc'=>"基于气相色谱-质谱联用技术(Gas Chromatography Tandem Mass Spectrometry, GC-MS)<a href=\"#ref1\">[1]</a><a href=\"#ref2\">[2]</a>的代谢组学项目数据分析部分主要分为基础数据分析、高级数据分析和个性化数据分析三个部分。基础数据分析是对代谢组的定性定量结果进行单变量统计分析和多元变量统计分析(Multivariate Analysis, MVA)<a href=\"#ref3\">[3]</a>,筛选出现显著差异的代谢物;个性化数据分析是在基础数据分析的基础上,对显著差异的代谢物进行的一系列生物信息学分析。除此之外,我们还可以针对客户需求,定制高级数据分析,如多组学关联分析,发表文章的图表设计等。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"数据分析流程如下图所示,具体的数据分析内容请参见本报告的分析内容章节。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. GC-MS数据分析流程图",'type'=>"type1",'path'=>"$indir/html_report/template/GC_flow.png");
|
||
#$writer->emptyTag('p','desc'=>"Fig.$pid. Workflow chart of GC-MS data analysis procedure",'type'=>"type1");
|
||
$pid++;
|
||
|
||
}elsif ($plat eq "GC" && $dingkong eq "Y") {
|
||
$writer->emptyTag('p','desc'=>"基于顶空固相微萃取气相色谱-质谱联用技术(HS-Solidphase Micro-Extraction Gas Chromatography -Mass Spectrometry, HS-SPME GC-MS)<a href=\"#ref1\">[1]</a><a href=\"#ref2\">[2]</a>的代谢组学项目数据分析部分主要分为基础数据分析、高级数据分析和个性化数据分析三个部分。基础数据分析是对代谢组的定性定量结果进行单变量统计分析和多元变量统计分析(Multivariate Analysis, MVA)<a href=\"#ref3\">[3]</a>,筛选出现显著差异的代谢物;个性化数据分析是在基础数据分析的基础上,对显著差异的代谢物进行的一系列生物信息学分析。除此之外,我们还可以针对客户需求,定制高级数据分析,如多组学关联分析,发表文章的图表设计等。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"数据分析流程如下图所示,具体的数据分析内容请参见本报告的分析内容章节。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. GC-MS数据分析流程图",'type'=>"type1",'path'=>"$indir/html_report/template/GC_flow.png");
|
||
#$writer->emptyTag('p','desc'=>"Fig.$pid. Workflow chart of GC-MS data analysis procedure",'type'=>"type1");
|
||
$pid++;
|
||
|
||
}elsif ($plat eq "Target") {
|
||
$writer->emptyTag('p','desc'=>"基于超高效液相色谱-质谱技术 (Ultra-high performance liquid chromatography mass spectrometry, UHPLC-MS/MS) <a href=\"#ref1\">[1]</a><a href=\"#ref2\">[2]</a>的代谢组学项目数据分析主要分为基础数据分析、高级数据分析和个性化数据分析三个部分。基础数据分析是对代谢组的定性定量结果进行单变量统计分析和多元变量统计分析 (Multivariate Analysis, MVA) <a href=\"#ref3\">[3]</a>,筛选出现显著差异的代谢物;个性化数据分析是在基础数据分析的基础上,对显著差异的代谢物进行的一系列生物信息学分析。除此之外,我们还可以针对客户需求,定制高级数据分析,如多组学关联分析,发表文章的图表设计等。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"数据分析流程如下图所示,具体的数据分析内容请参见本报告的分析内容章节。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. 靶标数据分析流程图",'type'=>"type1",'path'=>"$indir/html_report/template/GC_flow.png");
|
||
#$writer->emptyTag('p','desc'=>"Fig.$pid. Workflow chart of analysis procedure",'type'=>"type1");
|
||
$pid++;
|
||
|
||
}elsif ($plat eq "guangba") {
|
||
$writer->emptyTag('p','desc'=>"基于LC-MS/MS系统 (Orbitrap Exploris 120, Thermo Fisher Scientific, USA) <a href=\"#ref1\">[1]</a><a href=\"#ref2\">[2]</a>的代谢组学项目数据分析主要分为基础数据分析、高级数据分析和个性化数据分析三个部分。基础数据分析是对代谢组的定性定量结果进行单变量统计分析和多元变量统计分析 (Multivariate Analysis, MVA) <a href=\"#ref3\">[3]</a>,筛选出现显著差异的代谢物;个性化数据分析是在基础数据分析的基础上,对显著差异的代谢物进行的一系列生物信息学分析。除此之外,我们还可以针对客户需求,定制高级数据分析,如多组学关联分析,发表文章的图表设计等。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"数据分析流程如下图所示,具体的数据分析内容请参见本报告的分析内容章节。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. 植物广泛靶标检测分析技术流程图",'type'=>"type1",'path'=>"$indir/html_report/template/GC_flow.png");
|
||
#$writer->emptyTag('p','desc'=>"Fig.$pid. Workflow chart of plant broad target detection and analysis procedure",'type'=>"type1");
|
||
$pid++;
|
||
|
||
}
|
||
|
||
|
||
|
||
#2.2样本信息
|
||
$writer->emptyTag('h2','name'=>"2.2 样本信息",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"$config{Sample_name}样品,共 $group_nums 组,分别为 $group_name ,每组对应的生物学重复分别为 $config{repeat} 例,进行基于LC-MS/MS 的代谢组学分析,共计 $sample_nums 例实验样本。",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"注:Group_name:组名,Sample_num:每组对应的样本数量。",'type'=>"0",'name'=>"表$tid. 样品详细信息表",'path'=>"$indir/html_report/template/Sample_detail_table.txt");
|
||
$tid++;
|
||
|
||
#2.3数据分析信息
|
||
$writer->emptyTag('h2','name'=>"2.3 数据分析信息",'type'=>'type1','desc'=>"");
|
||
#$writer->emptyTag('h3','name'=>"2.3.1 统计分析的对比信息",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"本次分析包括$vs_name 共计$vs_nums 次对比分析。",'type'=>"type1");
|
||
|
||
#2.3.2分析内容
|
||
#$writer->emptyTag('h3','name'=>"2.3.2 统计分析的对比信息",'type'=>'type1','desc'=>"");
|
||
#$writer->emptyTag('p','desc'=>"基础数据分析:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"原始数据质控;数据预处理;基于线性模型和经验贝叶斯方法的单变量统计分析;多元变量统计分析包括主成分分析 (principal component analysis, PCA) 、偏最小二乘法判别分析 (partial least squares-discriminant analysis, PLS-DA) 及正交偏最小二乘法判别分析 (orthogonal projections to latent structures-discriminant analysis, OPLS-DA) 等;差异代谢物筛选和鉴定、差异代谢物的统计分析、差异代谢物的层次聚类分析、差异代谢物的KEGG注释及代谢通路分析 (Pathway Analysis)等。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"个性化数据分析:",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"差异代谢物的KEGG注释,差异代谢物的代谢通路分析 (Pathway Analysis) ,差异代谢物的层次聚类分析等等。",'type'=>"type1");
|
||
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#---------------------------- 3数据分析结果 -------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
$writer->emptyTag('h1','name'=>"3数据分析结果",'type'=>'type1','desc'=>"");
|
||
#$writer->emptyTag('p','desc'=>"LC-MS/MS 平台的电离源为电喷雾电离,有正离子模式 (positive ion mode, POS) 和负离子模式 (negative ion mode, NEG) 两种电离方式,在检测代谢组时结合使用两种方式可以使代谢物覆盖率更高,检测效果也更好,后续的数据分析过程中两组数据分别分析,正负离子模式的结果分别在results/POS 目录和results/NEG 目录中。本报告中以正离子模式为例进行阐述,负离子模式的结果中分析原理和分析步骤部分完全相同,仅数值上略有差异。",'type'=>"type1");
|
||
|
||
|
||
#3.1 原始数据质控结果
|
||
$writer->emptyTag('h2','name'=>"3.1 原始数据质控",'type'=>'type1','desc'=>"");
|
||
if ($qctype eq "Y") {
|
||
|
||
|
||
$writer->emptyTag('p','desc'=>"在进行基于质谱技术的代谢组学研究时,为了获得可靠且高质量的代谢组学数据,通常需进行质量控制 (quality control, QC)<a href=\"#ref4\">[4]</a>,以下对原始数据的质量控制均基于QC 样本进行。",'type'=>"type1");
|
||
#3.1.1 QC样本的PCA
|
||
$writer->emptyTag('h3','name'=>"3.1.1 QC 样本PCA 分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"理论上,QC 样本都是相同的,但是在样品提取、检测分析过程中会有系统误差,导致QC 样本间会有差异,差异越小说明方法稳定性越高数据质量越好,体现在PCA 分析图上就是QC 样本分布越密集。",'type'=>"type1");
|
||
if (-e "$indir/html_report/template/pca_analysis/all_samples_pca.png") {
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. 全部样本 (包括QC 样本) 的PCA 得分散点图",'type'=>"type1",'path'=>"$indir/html_report/template/pca_analysis/all_samples_pca.png");
|
||
# $writer->emptyTag('p','desc'=>"Fig.$pid. Score scatter plot of PCA model for group $vs1",'type'=>"type1");
|
||
$pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无质控样本,且只有一个比对组,详见以下$vs1 的PCA 分析。",'type'=>"type1");
|
||
}
|
||
$writer->emptyTag('p','desc'=>"图中 QC 样本若是聚集并且重复性良好,说明系统稳定。当组内样本量≥4时,椭圆可近似解释为组分布的置信椭圆;当组内样本量≤3时,椭圆仅作为几何包络展示,不作统计推断。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"QC样本PCA结果见:complete/02_all_samples/",'path'=>"../02_all_samples/",'type'=>"xls");
|
||
|
||
# #3.1.2 QC样本的相关性分析
|
||
# $writer->emptyTag('h3','name'=>"3.1.2 QC 样本相关性分析",'type'=>'type1','desc'=>"");
|
||
# $writer->emptyTag('p','desc'=>"根据QC 样本中物质的峰强度,可以计算QC 样本间的相关系数,来区分QC 样品相关情况。将皮尔逊相关系数r (Pearson’s Correlation Coefficient) 作为QC 样品间相关性的评估指标,r 越接近1,QC 样品相关性越强,说明整个方法稳定性越好数据质量越高。",'type'=>"type1");
|
||
# $writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. QC样本的相关性热图",'type'=>"img-width-normal",'path'=>"$indir/QC_analysis/qc_cor.png");
|
||
# $pid++;
|
||
# $writer->emptyTag('p','desc'=>"QC 样本若是两两之间的皮尔森相关系数都接近于1,说明实验数据质量很高。",'type'=>"type1");
|
||
# ##3.1.3 QC样本的RSD统计
|
||
# #$writer->emptyTag('h3','name'=>"3.1.3 QC 样本RSD 分析",'type'=>'type1','desc'=>"");
|
||
# #$writer->emptyTag('p','desc'=>"为了发现生物标记物,下机数据中的特征峰在QC 样本中的相对标准偏差 (relative standard deviation, RSD) ,即变异系数不能超过30%<a href=\"#ref5\">[5]</a>。",'type'=>"type1");
|
||
# #$writer->emptyTag('p','desc'=>"分别计算原始数据中QC 样本各物质峰对应的RSD ,再对所有RSD 进行统计,结果如图所示:",'type'=>"type1");
|
||
# #$writer->emptyTag('pic','desc'=>"注:图中横坐标表示RSD 数值区间,左侧纵坐标表示对应RSD 区间中物质的数量,右侧纵坐标表示对应RSD 区间中物质占全部数量的百分比。",'name'=>"图$pid. QC 样本的RSD 分布图",'type'=>"type1",'path'=>"$indir/QC_analysis/qc_rsd.png");
|
||
# #$pid++;
|
||
# #$writer->emptyTag('p','desc'=>"QC 样本中,如果RSD<30% 的特征峰比例超过70%,说明数据良好。",'type'=>"type1");
|
||
# $writer->emptyTag('file','desc'=>"",'name'=>"原始数据质控结果见:*/QC_analysis/",'path'=>"..r/QC_analysis/",'type'=>"xls");
|
||
# $writer->emptyTag('file','desc'=>"",'name'=>"qc_PCA结果见:*/all_samples/",'path'=>"$indir/all_samples/",'type'=>"xls");
|
||
|
||
#elsif($config{qc} eq "Y"){
|
||
#$writer->emptyTag('p','desc'=>"本项目原始数据质量控制结果见《检测报告》。",'type'=>"type1");
|
||
#}
|
||
|
||
$writer->emptyTag('h3','name'=>"3.1.2 QC 样本相关性分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('pic','desc'=>"注:计算的为spearman相关性,横坐标和纵坐标表示QC样品名称,不同颜色表示相关性大小。",'name'=>"图$pid. QC 样本相关性热图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/qc_corelation.png");
|
||
$pid++;
|
||
|
||
# $writer->emptyTag('h3','name'=>"3.1.3 QC 样本相关性系数和丰度作图",'type'=>'type1','desc'=>"");
|
||
# $writer->emptyTag('pic','desc'=>"注:左下角的是QC样品之间的丰度散点图,横纵坐标为丰度的log2值。右上角为QC样本之间的相关性热图,颜色代表QC样品之间的相关性程度,数字大小也代表不同QC样本之间的相关性程度。中间的为各个QC样本的丰度密度曲线图,横坐标为丰度的log10值,纵坐标为对应的密度值。总图横坐标为QC样本的名称,纵坐标也为QC样本的名称。",'name'=>"图$pid. QC 样本丰度和相关性图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/qc_heatmap.cor.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"QC样本相关性结果见:complete/01_Corr_analysis/qc*",'path'=>"../01_Corr_analysis/",'type'=>"xls");
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.1.3 全部样本相关性分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('pic','desc'=>"注:计算的为spearman相关性,横坐标和纵坐标表示样本名称,不同颜色表示相关性大小。",'name'=>"图$pid. 全部样本相关性热图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/sample_corelation.png");
|
||
$pid++;
|
||
|
||
# $writer->emptyTag('h3','name'=>"3.1.5 全部样本相关性系数和丰度作图",'type'=>'type1','desc'=>"");
|
||
# $writer->emptyTag('pic','desc'=>"注:左下角的是样本之间的丰度散点图,横纵坐标为丰度的log2值。右上角为样本之间的相关性热图,颜色代表样本之间的相关性程度,数字大小也代表不同样品之间的相关性程度。中间的为各个样品的丰度密度曲线图,横坐标为丰度的log10值,纵坐标为对应的密度值。总图横坐标为样品的名称,纵坐标也为样品的名称。",'name'=>"图$pid. 全部样本丰度和相关性图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/sample_heatmap.cor.png");
|
||
# $pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"全部样本相关性结果见:complete/01_Corr_analysis/sample*",'path'=>"../01_Corr_analysis/",'type'=>"xls");
|
||
|
||
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"本项目未设置QC 样本,无法对原始数据进行质量控制。",'type'=>"type1");
|
||
|
||
#3.1.1 所有样本的PCA
|
||
$writer->emptyTag('h3','name'=>"3.1.1 所有样本PCA 分析",'type'=>'type1','desc'=>"");
|
||
if (-e "$indir/html_report/template/pca_analysis/all_samples_pca.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:PCA 得分图中每个点代表一个样本,不同颜色表示不同分组。横坐标和纵坐标分别表示前两个主成分,括号中的百分比表示相应主成分对样本总体变异的解释率。图形根据分组数量自动调整展示方式:组数较少时显示椭圆和样本标签,组数中等时仅显示椭圆边界,组数较多时仅显示样本点。当组内样本量大于等于 4 时,椭圆用于展示组内离散趋势;当组内样本量小于等于 3 时,椭圆仅作为几何包络辅助展示,不作统计推断。",'name'=>"图$pid. 全部样本的PCA 得分散点图",'type'=>"type1",'path'=>"$indir/html_report/template/pca_analysis/all_samples_pca.png");
|
||
# $writer->emptyTag('p','desc'=>"Fig.$pid. Score scatter plot of PCA model for group $vs1",'type'=>"type1");
|
||
$pid++;
|
||
}
|
||
$writer->emptyTag('p','desc'=>"图中PCA样本若是聚集,重复性良好,说明系统稳定。",'type'=>"type1");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"全部样本PCA结果见:complete/02_all_samples/",'path'=>"../02_all_samples/",'type'=>"xls");
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.1.2 全部样本相关性分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('pic','desc'=>"注:计算的为spearman相关性,横坐标和纵坐标表示样本名称,不同颜色表示相关性大小。",'name'=>"图$pid. 全部样本相关性热图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/sample_corelation.png");
|
||
$pid++;
|
||
|
||
# $writer->emptyTag('h3','name'=>"3.1.3 全部样本相关性系数和丰度作图",'type'=>'type1','desc'=>"");
|
||
# $writer->emptyTag('pic','desc'=>"注:左下角的是样本之间的丰度散点图,横纵坐标为丰度的log2值。右上角为样本之间的相关性热图,颜色代表样本之间的相关性程度,数字大小也代表不同样品之间的相关性程度。中间的为各个样品的丰度密度曲线图,横坐标为丰度的log10值,纵坐标为对应的密度值。总图横坐标为样品的名称,纵坐标也为样品的名称。",'name'=>"图$pid. 全部样本丰度和相关性图",'type'=>"type1",'path'=>"$indir/01_Corr_analysis/sample_heatmap.cor.png");
|
||
# $pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"全部样本相关性结果见:complete/01_Corr_analysis/sample*",'path'=>"../01_Corr_analysis/",'type'=>"xls");
|
||
|
||
|
||
}
|
||
|
||
|
||
#3.2 原始数据预处理
|
||
|
||
$writer->emptyTag('h2','name'=>"3.2 原始数据预处理",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"原始数据包含$sample_nums 个实验样本,原始数据详见结果文件“RawData”中的下机数据表。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"为更好地进行数据分析,我们对原始数据进行了一系列预处理(data management),主要包括以下步骤:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"1) 数据过滤:去除无确定物质名称及无谱图比对相似性的数据。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) 缺失值处理:组内样品缺失数量大于50%的物质直接过滤;缺失数量小于50%的,采用K最近邻(K-Nearest Neighbor, KNN)算法进行缺失值插补(missing value imputation)。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"3) 归一化处理(normalization):样品含内标物质(internal standard, IS)时,采用内标进行归一化;无内标时,采用总和归一化(sum normalization)。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"原始数据经预处理后,共保留$Left_peak_nums 个代谢物。",'type'=>"type1");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"处理后的峰面积数据表见:complete_dir/02_all_samples/mean_all.txt",'path'=>"../02_all_samples/mean_all.txt",'type'=>"xls");
|
||
|
||
#3.3 代谢物全注释
|
||
#sunxk 20200515
|
||
$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1');
|
||
#$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理,包括代谢物在常见数据库的编号索引、分类信息和通路信息等。部分代谢物数据库映射表展示如下,所有代谢物数据库映射表请详见结果文件all_metabolite_mapping.txt表。",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 代谢物数据库映射表(部分)",'path'=>"$indir/html_report/template/all_metabolite_mapping3L.txt");
|
||
#$writer->emptyTag('p','desc'=>"我们将本项目所有代谢物在本地数据库进行物质信息搜索整理,包括代谢物在常见数据库的编号索引、分类信息和通路信息等。所有代谢物数据库映射表详见结果文件all_metabolite_mapping.txt。",'type'=>"type1");
|
||
#$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 代谢物数据库映射表",'path'=>"$indir/02_all_samples/all_metabolite_mapping.txt");
|
||
$tid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"1) Metabolite :代谢物名称;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) Synonyms:该代谢物在HMDB数据库中的同义词;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"3) Formula :该物质的化学组成;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"4) Exact.Mass :该物质的精确分子量;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"5) NIKKAJI :该物质在NIKKAJI 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"6) ChEBI :该物质在ChEBI 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"7) PubChem :该物质在PubChem 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"8) CAS :该物质的CAS 号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"9) KEGG id :该物质在KEGG COMPOUND 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"10) HMDB :该物质在HMDB 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"11) KingDom :该物质在HMDB 数据库中的一级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"12) Super Class :该物质在HMDB 数据库中的二级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"13) Class :该物质在HMDB 数据库中的三级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"14) Sub Class :该物质在HMDB 数据库中的四级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"15) KEGG.LINK :该物质的KEGG COMPOUND 数据库链接;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"16) Pathway :该物质映射的KEGGPATHWAY。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"表中各列分别为代谢物映射不同数据库的索引,此处我们取KEGG compound ID 进行后续分析,用户可根据自己对其他数据库信息的需要,查找对应的数据库索引。",'type'=>"type1");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"代谢物的数据库映射表见:complete/02_all_samples/all_metabolite_mapping.txt",'path'=>"../02_all_samples/all_metabolite_mapping.txt",'type'=>"xls");
|
||
|
||
#3.4 全部代谢物聚类分析
|
||
$writer->emptyTag('h2','name'=>"3.4 所有代谢物聚类热图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"根据样本数量和特征数目采用不同的标准化策略:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"- 当样本数≤2时,进行列方向的无中心化缩放。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"- 当样本数>2时,进行行方向的Z-score标准化。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"随后使用欧几里得距离对特征进行层次聚类,绘制包含分组注释的聚类热图。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"结果显示样本间的表达模式相似性,便于识别共表达的特征群。",'type'=>"type1");
|
||
if (-e "$indir/02_all_samples/all_samples.heatmap.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:横向为样本信息,纵向为代谢物信息。颜色图例为标准化处理后得到的表达量(颜色越红代表表达量越高)。",'name'=>"图$pid. 全部代谢物的聚类热图",'type'=>"img-width-max",'path'=>"$indir/02_all_samples/all_samples.heatmap.png");
|
||
$pid++;
|
||
# $writer->emptyTag('p','desc'=>"Fig.$pid. Heatmap of hierarchical clustering analysis for group $vs1",'type'=>"type1");
|
||
}
|
||
#$writer->emptyTag('file','desc'=>"注:横坐标代表样品名称及样品的聚类结果,纵坐标代表的差异代谢物 及物质 的聚类结果。图中不同的列代表不同的样品,不同的行代表不同的代谢物。颜色代表了代谢物在样品中的相对含量水平");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"全部代谢物聚类热图文件见:complete/02_all_samples/all_samples.heatmap.png",'path'=>"../02_all_samples/all_samples.heatmap.png",'type'=>"xls");
|
||
|
||
|
||
|
||
#3.5.1 样本树状图分析
|
||
$writer->emptyTag('h2','name'=>"3.5 聚类分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('h3','name'=>"3.5.1 样本聚类树分析",'type'=>'type1','desc'=>"");
|
||
|
||
$writer->emptyTag('p','desc'=>"通过计算样本间的欧几里得距离矩阵,采用完全连接法(complete linkage)的层次聚类对所有样本进行聚类分析,构建反映样本相似性的聚类树。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"该方法是一种自底向上的聚合策略:依次寻找距离最近的两个簇进行合并,直至所有样本汇聚为一个簇,从而形成树状结构。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"以$vs1 为例,样本聚类树如下图所示:",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"",'name'=>"图$pid. $vs1 样本聚类树",'type'=>"type1",'path'=>"$indir/$vs1/Hierarchical_Clustering_Analysis/sample_tree.png");
|
||
$pid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"样本聚类树分析结果见:complete/*_vs_*/Hierarchical_Clustering_Analysis/",'path'=>"../$vs1/Hierarchical_Clustering_Analysis/",'type'=>"xls");
|
||
|
||
|
||
#3.5.2 kmeans聚类分析
|
||
$writer->emptyTag('h3','name'=>"3.5.2 kmeans聚类分析",'type'=>'type1','desc'=>"");
|
||
if ($repeat eq "Y") {
|
||
$writer->emptyTag('p','desc'=>"为进一步分析不同代谢物在各处理组间的变化模式,基于标准化后的代谢物丰度数据进行k-means聚类分析。该方法可将具有相似变化趋势的代谢物划分到同一类别中,从而帮助识别不同代谢物集合的整体响应模式。图中每个子图代表一个聚类类别,浅色线表示单个代谢物的变化趋势,黑色线表示该类别内所有代谢物的平均变化趋势。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"k-means聚类趋势图展示了不同聚类类别中代谢物在处理组间的标准化丰度变化模式。每个分面对应一个聚类类别,标题中的total表示该类别包含的代谢物数量。",'name'=>"图$pid. kmeans聚类图",'type'=>"type1",'path'=>"$indir/02_all_samples/kmeans/kmeans_metabolites.png");
|
||
$pid++;
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"为进一步分析不同代谢物在各样本间的变化模式,基于标准化后的代谢物丰度数据进行k-means聚类分析。该方法可将具有相似变化趋势的代谢物划分到同一类别中,从而帮助识别不同代谢物集合的整体响应模式。图中每个子图代表一个聚类类别,浅色线表示单个代谢物的变化趋势,黑色线表示该类别内所有代谢物的平均变化趋势。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"k-means聚类趋势图展示了不同聚类类别中代谢物在样本间的标准化丰度变化模式。每个分面对应一个聚类类别,标题中的total表示该类别包含的代谢物数量。",'name'=>"图$pid. kmeans聚类图",'type'=>"type1",'path'=>"$indir/02_all_samples/kmeans/kmeans_metabolites.png");
|
||
$pid++;
|
||
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"k-means聚类分析结果文件包含各代谢物的标准化丰度值及其对应的聚类类别:complete/02_all_samples/kmeans/kmeans_metabolites.xls",'path'=>"../02_all_samples/kmeans/kmeans_metabolites.xls",'type'=>"xls");
|
||
|
||
|
||
#3.6 top10 代谢物占比柱状图
|
||
$writer->emptyTag('h2','name'=>"3.6 Top 10 代谢物相对百分含量",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"为展示样本中主要代谢物的组成特征,选取相对含量排名前10的代谢物进行可视化分析。基于总和标准化后的相对丰度数据,计算各代谢物在不同样本中的百分比占比,用于反映其在整体代谢物组成中的相对贡献。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"柱状图展示了前10代谢物在不同样本中的相对百分含量分布。不同颜色代表不同代谢物,纵坐标表示各代谢物的占比(%),可用于比较主要代谢物组成在各组间的差异。",'name'=>"图$pid. Top 10 代谢物相对百分含量柱状图",'type'=>"type1",'path'=>"$indir/02_all_samples/barplot/sum_normalized_top10_barplot.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"前10代谢物占比结果文件,包含各代谢物在不同样本中的总和标准化相对含量:complete/02_all_samples/barplot/sum_normalized_top10.txt",'path'=>"../02_all_samples/barplot/sum_normalized_top10.txt",'type'=>"xls");
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.7 多元统计分析
|
||
###############################
|
||
###############################
|
||
|
||
$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1');
|
||
#$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
$writer->emptyTag('p','desc'=>"由于代谢组数据具有高维度、高噪声及变量间多重共线性的特点,单一变量的传统统计方法难以全面揭示数据内在规律,因此采用多元统计方法进行降维与分类建模。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"本分析使用R语言ropls包<a href=\"#ref5\">[5]</a>,包括:PCA、PLS-DA和OPLS-DA。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('h3','name'=>"3.7.1 标准化处理",'type'=>'type1','desc'=>"");
|
||
if ($plat eq "Target") {
|
||
$writer->emptyTag('p','desc'=>"在进行多元统计分析前,需要对代谢组学数据进行适当的数据变换和标准化处理,以减少高丰度代谢物的主导影响并改善数据分布特征。针对代谢组数据常见的偏态分布及较大数值跨度,本分析对原始定量数据加伪计数后进行log2转换,其中伪计数设为全数据中最小正值的一半,以避免0值取对数带来的影响。",'type'=>"type1");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"在进行多元统计分析前,首先在内标标准化的基础上对代谢组学数据进行进一步的数据变换与预处理,以减少高丰度代谢物的主导影响并改善数据分布特征。针对代谢组数据常见的偏态分布及较大数值跨度,本分析对原始定量数据加伪计数后进行log2转换,其中伪计数设为全数据中最小正值的一半,以避免0值取对数带来的影响。",'type'=>"type1");
|
||
|
||
}
|
||
|
||
$writer->emptyTag('p','desc'=>"在此基础上,ropls建模过程中采用自动标准化(UV scaling)进行预处理,以降低不同代谢物量纲差异对模型的影响。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"此外,在建模前对低方差变量进行过滤,并记录过滤结果(VariableVariance_Filter.txt),以避免近零方差变量对模型稳定性造成影响。",'type'=>"type1");
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.7.2 建模参数与小样本解释策略",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"本流程根据样本量自动选择交叉验证与建模参数,实际参数记录见(ModelParam_Log.txt)。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"当对比组总样本数≤6或最小组样本数≤3时,监督模型过拟合风险较高,结论优先参考PLS-DA及其置换检验结果;OPLS-DA用于辅助解释。",'type'=>"type1");
|
||
|
||
if (-e "$indir/$vs1/Statistical_Analysis/ModelParam_Log.txt") {
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"$vs1 对比组模型参数日志参见:complete/$vs1/Statistical_Analysis/ModelParam_Log.txt",'path'=>"../$vs1/Statistical_Analysis",'type'=>"txt");
|
||
}
|
||
if (-e "$indir/$vs1/Statistical_Analysis/VariableVariance_Filter.txt") {
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"$vs1 对比组变量方差过滤记录参见:complete/$vs1/Statistical_Analysis/VariableVariance_Filter.txt",'path'=>"../$vs1/Statistical_Analysis",'type'=>"txt");
|
||
}
|
||
|
||
#################################
|
||
#################################
|
||
#3.7.3主成分分析 (PCA)
|
||
$writer->emptyTag('h3','name'=>"3.7.3 主成分分析(PCA)",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"主成分分析 (principal component analysis, PCA) <a href=\"#ref6\">[6]</a>是一种通过正交变换将可能存在相关性的变量转换为一组线性不相关变量(即主成分)的统计方法。该方法能够在保留最多信息的前提下,将高维数据降维至二维或三维空间进行可视化。换句话说,主成分分析是通过特征值分解或奇异值分解,找到数据变异最大的方向。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"PCA是一种无监督降维方法,用于观察样本整体分布趋势、组内离散程度及潜在离群样本。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"PCA模型中,R2X表示各主成分所能解释的原数据变异比例。",'type'=>"type1");
|
||
|
||
#模型参数表
|
||
$writer->emptyTag('p','desc'=>"PCA模型的评估参数如下表所示:",'type'=>"type1");
|
||
if (-e "$indir/03_DEM_stat/Comparison_PCA_summary.txt") {
|
||
$writer->emptyTag('table',
|
||
'desc'=>"注:pre-主成分数量;R2X(cum)-累计可解释方差。",
|
||
'type'=>"0",
|
||
'name'=>"表$tid. PCA模型验证参数汇总",
|
||
'path'=>"$indir/03_DEM_stat/Comparison_PCA_summary.txt"
|
||
);
|
||
$tid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"PCA模型参数详情参见:complete/03_DEM_stat/Comparison_PCA_summary.txt",'path'=>"../03_DEM_stat",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成有效的PCA模型参数。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
#PCA得分图
|
||
$writer->emptyTag('p','desc'=>"PCA得分图如下:",'type'=>"type1");
|
||
if (-e "$indir/html_report/template/pca_analysis/$vs1\_pca.png") {
|
||
$writer->emptyTag('pic',
|
||
'desc'=>"注:PCA 得分图中每个点代表一个样本,不同颜色表示不同分组。横坐标和纵坐标分别表示前两个主成分,括号中的百分比表示相应主成分对样本总体变异的解释率。图形根据分组数量自动调整展示方式:组数较少时显示椭圆和样本标签,组数中等时仅显示椭圆边界,组数较多时仅显示样本点。当组内样本量大于等于 4 时,椭圆用于展示组内离散趋势;当组内样本量小于等于 3 时,椭圆仅作为几何包络辅助展示,不作统计推断。",
|
||
'name'=>"图$pid. $vs1 的PCA得分图",
|
||
'type'=>"type1",
|
||
'path'=>"$indir/html_report/template/pca_analysis/$vs1\_pca.png"
|
||
);
|
||
$pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成PCA得分图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
####################################
|
||
#3.7.4 偏最小二乘法判别分析(PLS-DA)
|
||
$writer->emptyTag('h3','name'=>"3.7.4 偏最小二乘法判别分析(PLS-DA)",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"为弥补无监督方法(如PCA)在组间判别方面的局限性,本研究采用偏最小二乘判别分析(Partial Least Squares-Discriminant Analysis, PLS-DA)<a href=\"#ref7\">[7]</a>。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"PLS-DA为有监督模式识别方法,在降维时同时考虑自变量(代谢物表达矩阵)与因变量(分组信息)之间的关系,以增强组间差异信号。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"与仅分解X矩阵的PCA不同,PLS-DA联合分解X与Y矩阵,并通过潜变量提取实现分类相关信息的压缩表示。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"模型评估指标主要包括:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"- R2X:X变量的累计解释方差,反映模型对代谢物数据结构的解释能力;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"- R2Y:Y变量(分组信息)的累计解释方差,反映模型对分组结构的拟合能力;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"- Q2:模型经交叉验证得到的累计预测能力,反映模型泛化性能。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"模型解释应综合R2Y与Q2进行判断:前者侧重拟合能力,后者侧重预测能力;二者需结合置换检验结果与样本规模共同评估。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"为评估过拟合风险,本流程对PLS-DA进行置换检验(Permutation test)。该检验通过随机打乱分组标签构建随机模型,并与原始模型比较其判别性能。",'type'=>"type1");
|
||
|
||
# 模型参数表
|
||
$writer->emptyTag('p','desc'=>"PLS-DA模型评估参数如下表所示:",'type'=>"type1");
|
||
if (-e "$indir/03_DEM_stat/Comparison_PLSDA_summary.txt") {
|
||
|
||
$writer->emptyTag('table',
|
||
'desc'=>"注:pre-主成分数量;R2X-X变量累计解释方差;R2Y-Y变量累计解释方差;Q2-交叉验证累计预测能力。",
|
||
'type'=>"0",
|
||
'name'=>"表$tid. PLS-DA模型验证参数",
|
||
'path'=>"$indir/03_DEM_stat/Comparison_PLSDA_summary.txt"
|
||
);
|
||
|
||
$tid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"PLS-DA模型参数详见:complete/03_DEM_stat/Comparison_PLSDA_summary.txt",'path'=>"../03_DEM_stat",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成有效的PLS-DA模型参数。",'type'=>"type1");
|
||
}
|
||
|
||
# PLS-DA得分图
|
||
$writer->emptyTag('p','desc'=>"PLS-DA得分图如下:",'type'=>"type1");
|
||
if (-e "$indir/html_report/template/pca_analysis/$vs1\_pls.png") {
|
||
$writer->emptyTag('pic',
|
||
'desc'=>"注:PLS-DA 得分图中每个点代表一个样本,不同颜色表示不同分组。坐标轴表示模型提取的得分成分,括号中的百分比表示对应成分对自变量矩阵变异的解释比例。图形根据分组数量自动调整展示方式:分组较少时显示椭圆和样本标签,分组较多时仅显示椭圆边界或样本点。当组内样本量大于等于 4 时,椭圆用于展示组内离散趋势;当组内样本量小于等于 3 时,椭圆仅作为几何包络辅助展示,不作统计推断。若模型仅提取到 1 个有效预测成分,则自动以一维得分图展示。",
|
||
'name'=>"图$pid. $vs1 的PLS-DA得分图",
|
||
'type'=>"type1",
|
||
'path'=>"$indir/html_report/template/pca_analysis/$vs1\_pls.png"
|
||
);
|
||
$pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成PLS-DA得分图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
# PLS-DA置换检验图
|
||
$writer->emptyTag('p','desc'=>"PLS-DA置换检验图如下:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Statistical_Analysis/PLS-DA_Permutation.png") {
|
||
$writer->emptyTag('pic',
|
||
'desc'=>"注:横坐标为Similarity(y, yperm),纵坐标为R2Y与Q2。原始模型通常以参考线表示,置换模型以散点表示;若原始模型整体优于置换模型,提示模型具有较好的稳定性与非随机性。",
|
||
'name'=>"图$pid. $vs1 的PLS-DA模型置换检验图",
|
||
'type'=>"type1",
|
||
'path'=>"$indir/$vs1/Statistical_Analysis/PLS-DA_Permutation.png"
|
||
);
|
||
$pid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"判读原则:置换检验中,若原始模型的R2Y/Q2参考线整体高于置换散点分布,且高相似度区域(接近原始标签)与低相似度区域(随机标签)呈现清晰梯度,提示模型具有较好的非随机性与稳定性。若置换散点大量接近或超过原始模型,则提示模型可能存在过拟合风险。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"统计解释:p值用于衡量原始模型优于随机模型的证据强度。通常p值越小,支持模型非随机性的证据越强;但模型有效性仍需结合样本量、交叉验证结果及生物学一致性综合判断,不建议仅依据单一阈值下结论。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"在小样本条件下,监督模型对数据扰动更敏感,建议优先结合PLS-DA置换检验结果进行稳健性判断,并将OPLS-DA结果作为辅助解释。",'type'=>"type1");
|
||
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成PLS-DA置换检验图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
#########################################
|
||
# 3.7.5 正交偏最小二乘判别分析(OPLS-DA)
|
||
$writer->emptyTag('h3','name'=>"3.7.5 正交-偏最小二乘判别分析(OPLS-DA)",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"正交偏最小二乘判别分析(Orthogonal Projections to Latent Structures Discriminant Analysis, OPLS-DA)是PLS-DA的扩展方法<a href=\"#ref8\">[8]</a>。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"该方法将X矩阵分解为与分组相关的预测成分和与分组无关的正交成分,从而在一定程度上降低无关系统变异对分类结果的影响,提升模型可解释性。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"OPLS-DA更适用于两组比较场景;在小样本条件下,其结果应结合PLS-DA与置换检验进行谨慎解释。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"模型评估指标包括R2X、R2Y和Q2:R2Y反映对分组信息的拟合能力,Q2反映交叉验证预测能力。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"变量重要性投影(VIP)可用于辅助筛选差异贡献较大的代谢物,常以VIP>1作为经验参考阈值。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"为评估OPLS-DA模型的过拟合风险,采用置换检验(Permutation test)对模型稳定性进行验证。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"置换检验通过随机打乱分组标签并重复建模,将原始模型与置换模型的R2Y、Q2分布进行比较,以判断模型区分能力是否明显高于随机水平。",'type'=>"type1");
|
||
|
||
# 模型参数表
|
||
$writer->emptyTag('p','desc'=>"OPLS-DA模型评估参数如下表所示:",'type'=>"type1");
|
||
if (-e "$indir/03_DEM_stat/Comparison_OPLSDA_summary.txt") {
|
||
|
||
$writer->emptyTag('table',
|
||
'desc'=>"注:pre-主成分数;R2X-X变量累计解释方差;R2Y-Y变量累计解释方差;Q2-交叉验证累计预测能力。",
|
||
'type'=>"0",
|
||
'name'=>"表$tid. OPLS-DA模型验证参数",
|
||
'path'=>"$indir/03_DEM_stat/Comparison_OPLSDA_summary.txt"
|
||
);
|
||
|
||
$tid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"OPLS-DA模型参数详情参见:complete/03_DEM_stat/Comparison_OPLSDA_summary.txt",'path'=>"../03_DEM_stat",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成有效的OPLS-DA模型参数。",'type'=>"type1");
|
||
}
|
||
|
||
# OPLS-DA得分图
|
||
$writer->emptyTag('p','desc'=>"OPLS-DA得分图如下:",'type'=>"type1");
|
||
if (-e "$indir/html_report/template/pca_analysis/$vs1\_opls.png") {
|
||
$writer->emptyTag('pic',
|
||
'desc'=>"注:OPLS-DA 得分图中每个点代表一个样本,不同颜色表示不同分组。横坐标表示预测得分成分 Score(p1),纵坐标表示正交得分成分 OrthoScore(o1),括号中的百分比表示对应成分对自变量矩阵变异的解释比例。图形根据分组数量自动调整展示方式:组数较少时显示椭圆和样本标签,组数中等时仅显示椭圆边界,组数较多时仅显示样本点。当组内样本量大于等于 4 时,椭圆用于展示组内离散趋势;当组内样本量小于等于 3 时,椭圆仅作为几何包络辅助展示,不作统计推断。",
|
||
'name'=>"图$pid. $vs1 的OPLS-DA得分图",
|
||
'type'=>"type1",
|
||
'path'=>"$indir/html_report/template/pca_analysis/$vs1\_opls.png"
|
||
);
|
||
$pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成OPLS-DA得分图。",'type'=>"type1");
|
||
}
|
||
|
||
# OPLS-DA置换检验
|
||
$writer->emptyTag('p','desc'=>"置换检验结果如图所示:",'type'=>"type1");
|
||
|
||
if (-e "$indir/$vs1/Statistical_Analysis/OPLS-DA_Permutation.png") {
|
||
$writer->emptyTag('pic',
|
||
'desc'=>"注:横坐标为Similarity(y, yperm),纵坐标为R2Y与Q2。若原始模型参考线整体高于置换散点分布,且p值较小,提示模型具有较好的非随机性与稳定性。",
|
||
'name'=>"图$pid. $vs1 的OPLS-DA模型置换检验图",
|
||
'type'=>"type1",
|
||
'path'=>"$indir/$vs1/Statistical_Analysis/OPLS-DA_Permutation.png"
|
||
);
|
||
$pid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"判读原则:置换检验中,若原始模型的R2Y/Q2参考线整体高于置换散点分布,且高相似度区域(接近原始标签)与低相似度区域(随机标签)呈现清晰梯度,提示模型具有较好的非随机性与稳定性。若置换散点大量接近或超过原始模型,则提示模型可能存在过拟合风险。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"统计解释:p值用于衡量原始模型优于随机模型的证据强度。通常p值越小,支持模型非随机性的证据越强;但模型有效性仍需结合样本量、交叉验证结果及生物学一致性综合判断,不建议仅依据单一阈值下结论。",'type'=>"type1");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成OPLS-DA置换检验图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.8 单变量统计分析 (UVA) ,差异代谢物的筛选和火山图
|
||
###############################
|
||
###############################
|
||
$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1');
|
||
#$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
|
||
|
||
$writer->emptyTag('p','desc'=>"代谢组学数据具有高维、多变量的特点,通常需结合单变量统计分析(univariate analysis, UVA)与多元统计分析进行综合挖掘,以提高差异代谢物筛选的准确性<a href=\"#ref9\">[9]</a>。本研究中,对具有生物学重复的样本采用基于线性模型和经验贝叶斯方法的 limma 流程进行单变量差异分析。考虑到代谢组数据通常具有变量数多而样本重复数有限的特征,传统逐个变量的 t 检验或非参数检验在小样本条件下往往存在方差估计不稳定、统计功效受限等问题。limma 通过经验贝叶斯方法对各变量的方差进行收缩估计,可在保留线性模型分析框架的同时提高方差估计稳定性和差异检验的稳健性,这一思想已在高维组学数据分析的方法学研究中得到系统阐述<a href=\"#ref10\">[10]</a><a href=\"#ref11\">[11]</a>。进一步的比较研究表明,在高维、小样本的连续型组学数据中,基于经验贝叶斯方差建模的 moderated t 检验通常较传统 t 检验表现出更稳定的统计表现,并已在蛋白质组等相邻高通量数据分析中显示出良好的应用实用性<a href=\"#ref12\">[12]</a><a href=\"#ref13\">[13]</a>。此外,代谢组学统计分析流程研究及近年来的实际研究也已将 limma 纳入差异特征筛选框架中,进一步支持了其在差异代谢物筛选中的可行性与适用性<a href=\"#ref14\">[14]</a><a href=\"#ref15\">[15]</a>。",'type'=>"type1");
|
||
|
||
|
||
if ($repeat eq "Y") {
|
||
# if ($group_nums == 2) {
|
||
$writer->emptyTag('p','desc'=>"若样本具有生物学重复且为两组比较,则采用 limma 方法进行差异分析。首先对代谢物定量数据加伪计数后进行 log2 转换,其中伪计数设为全数据中最小正值的一半;随后基于线性模型并结合经验贝叶斯方法计算各代谢物的差异检验 P 值,并进一步采用 Benjamini-Hochberg 方法进行 FDR 校正,得到 Q 值。结合 OPLS-DA 模型第一主成分的变量重要性投影(Variable Importance in Projection, VIP)进行综合筛选,差异代谢物筛选标准为 P 值 <= 0.05 且 VIP 值 >= 1。差异代谢物上下调方向根据实验组相对于对照组的 Fold Change(FC)判定:FC >= 1.2 为上调,FC <= 1/1.2 为下调。所有统计分析结果可参见统计分析结果表。",'type'=>"type1");
|
||
# } else {
|
||
# $writer->emptyTag('p','desc'=>"若样本具有生物学重复且为多组比较,则采用 limma 方法进行总体差异分析。首先对代谢物定量数据加伪计数后进行 log2 转换,其中伪计数设为全数据中最小正值的一半;随后基于线性模型并结合经验贝叶斯方法进行多组总体检验,计算各代谢物在不同组间的差异检验 P 值,并进一步采用 Benjamini-Hochberg 方法进行 FDR 校正,得到 Q 值。结合 PLS-DA 模型的变量重要性投影(Variable Importance in Projection, VIP)进行综合筛选,差异代谢物筛选标准为 P 值 <= 0.05 且 VIP 值 >= 1。由于多组总体检验仅用于判断代谢物在不同组间是否存在显著差异,故不进一步定义统一的上下调方向。所有统计分析结果可参见统计分析结果表。",'type'=>"type1");
|
||
# }
|
||
} else {
|
||
$writer->emptyTag('p','desc'=>"若样本无生物学重复,则不进行统计学显著性检验,仅结合 Fold Change(FC)进行差异代谢物的探索性筛选。相关方法学研究指出,FC 可作为代谢组学单变量差异特征的筛选指标,具体阈值可根据研究目的和数据特征设定<a href=\"#ref16\">[16]</a>;同时,已有代谢组学应用研究采用 FC >= 1.2 作为差异代谢物筛选阈值之一<a href=\"#ref17\">[17]</a>。因此,本研究以上调标准 FC >= 1.2、下调标准 FC <= 1/1.2 进行筛选,上下调代谢物合并即为最终差异代谢物集合。所有统计分析结果可参见统计分析结果表。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
$writer->emptyTag('p','desc'=>"差异代谢物筛选的结果以$vs1 为例进行说明,部分差异代谢物结果展示如下(不足20代谢物则展示全部),所有差异代谢物结果请详见结果文件Differentially_Expressed_Metabolites.xls表。",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 差异代谢物筛选表(部分)",'path'=>"$indir/html_report/template/diff_analysis/Diff.xls");
|
||
$tid++;
|
||
$writer->emptyTag('p','desc'=>"差异代谢物筛选表的主要内容包括:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"1) id :该物质在本次定性分析中的唯一数据编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) Peak :物质在数据库中的名称;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"3) rt :该物质的色谱保留时间;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"4) mz :该物质的质荷比;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"5) Mean OO :该物质在该组对比内的一个实验组的相对定量均值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"6) Mean XX :该物质在该组对比内的另一个实验组的相对定量均值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"7) VIP :来自OPLS-DA 模型的VIP 值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"8) Pvalue :来自t-test 的P-value ;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"9) Qvalue :P值经BH法调整后的假阳性率 ;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"10) FoldChange :两组实验物质定量的比值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"11) LOG_FoldChange :FoldChange 取以2 为底的对数。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"12) significant :上下调。",'type'=>"type1");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"统计分析结果表见:complete/*_vs_*/Statistical_Analysis/Statistical_Analysis_results.xls",'path'=>"../$vs1/Statistical_Analysis/Statistical_Analysis_results.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物筛选表见:complete/*_vs_*/Statistical_Analysis/Differentially_Expressed_Metabolites.xls",'path'=>"../$vs1/Statistical_Analysis/Differentially_Expressed_Metabolites.xls",'type'=>"xls");
|
||
|
||
|
||
#火山图
|
||
$writer->emptyTag('h3','name'=>"3.8.1 差异代谢物火山图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"我们将差异代谢物筛选结果以火山图(volcano plot)的形式进行可视化,结果如图所示:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Statistical_Analysis/volcano_plot.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:火山图中每个点代表一种代谢物。横坐标为两组对比中各代谢物的差异倍数(log₂FC,以2为底的对数);纵坐标为学生t检验的P值(-log₁₀P,以10为底的对数取负)。横坐标绝对值越大,代表代谢物在两组样本间的表达量倍数差异越显著;纵坐标值越大,表明差异的统计学可靠性越强。点的大小对应VIP值大小(值越大点越大);红色点代表差异上调代谢物,蓝色点代表差异下调代谢物,灰色点代表未达差异筛选标准的代谢物。图中标注了P值最小的5个代谢物名称。",'name'=>"图$pid. $vs1 差异代谢物筛选火山图",'type'=>"type1",'path'=>"$indir/$vs1/Statistical_Analysis/volcano_plot.png");
|
||
# $writer->emptyTag('p','desc'=>"Fig.$pid. Volcano plot for group $vs1",'type'=>"type1");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"不带代谢物标签的火山图如下所示:",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"注:火山图中每个点代表一种代谢物。横坐标为两组对比中各代谢物的差异倍数(log₂FC,以2为底的对数);纵坐标为学生t检验的P值(-log₁₀P,以10为底的对数取负)。横坐标绝对值越大,代表代谢物在两组样本间的表达量倍数差异越显著;纵坐标值越大,表明差异的统计学可靠性越强。点的大小对应VIP值大小(值越大点越大);红色点代表差异上调代谢物,蓝色点代表差异下调代谢物,灰色点代表未达差异筛选标准的代谢物。",'name'=>"图$pid. $vs1 差异代谢物筛选火山图",'type'=>"type1",'path'=>"$indir/$vs1/Statistical_Analysis/volcano_plot_nolabel.png");
|
||
$pid++;
|
||
|
||
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物火山图见:complete/*_vs_*/Statistical_Analysis/volcano_plot*",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
|
||
|
||
|
||
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"样本无生物学重复,无法绘制火山图",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
#venn图分析
|
||
$writer->emptyTag('h3','name'=>"3.8.2 差异代谢物韦恩图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"利用韦恩图(Venn图)可直观展示各对比组差异代谢物的共有与特有情况,清晰呈现组间重叠数目(若对比组数>5组,则采用UpSet图替代展示)。",'type'=>"type1");
|
||
if (-e "$indir/04_venn_dir/venn.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:不同颜色图形代表不同对比组;图形交叠部分的数字,代表对应对比组共有的差异代谢物个数(两组合交为两组共有,多组合交为多组共有)。",'name'=>"图$pid. 差异代谢物韦恩图",'type'=>"type1",'path'=>"$indir/04_venn_dir/venn.png");
|
||
$pid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"韦恩图分析结果目录见:complete_dir/04_venn_dir/*",'path'=>"../04_venn_dir/",'type'=>"xls");
|
||
|
||
}elsif (-e "$indir/04_venn_dir/upset.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:左侧柱状图展示各对比组名称及对应差异代谢物总数;右下方点矩阵中,横向点代表对应对比组,点的连接表示这些组存在交集(对应传统韦恩图的图形重叠);上方柱状图表示对应交集情况下的差异代谢物数量。",'name'=>"图$pid. 差异代谢物UpSet图",'type'=>"type1",'path'=>"$indir/04_venn_dir/upset.png");
|
||
$pid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"UpSet图分析结果目录见:complete_dir/04_venn_dir/*",'path'=>"../04_venn_dir/",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"仅1个对比组,无法绘制韦恩图/UpSet图。",'type'=>"type1");
|
||
}
|
||
|
||
#top20 vip 气泡图
|
||
|
||
$writer->emptyTag('h3','name'=>"3.8.3 Top20 VIP差异代谢物气泡图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"我们将筛选得到的差异代谢物按VIP值排序,选取Top20(不足20个则取全部差异代谢物),以气泡图形式可视化,结果如图所示:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Statistical_Analysis/Bubble_VIP_plot.png") {
|
||
$writer->emptyTag('p','desc'=>"$vs1 组VIP值排名前5的差异代谢物分别为:",'type'=>"type1");
|
||
if (defined($diff_meta1)) {
|
||
$writer->emptyTag('p','desc'=>"$diff_meta1",'type'=>"type1");
|
||
}
|
||
if (defined($diff_meta2)) {
|
||
$writer->emptyTag('p','desc'=>"$diff_meta2",'type'=>"type1");
|
||
}
|
||
if (defined($diff_meta3)) {
|
||
$writer->emptyTag('p','desc'=>"$diff_meta3",'type'=>"type1");
|
||
}
|
||
if (defined($diff_meta4)) {
|
||
$writer->emptyTag('p','desc'=>"$diff_meta4",'type'=>"type1");
|
||
}
|
||
if (defined($diff_meta5)) {
|
||
$writer->emptyTag('p','desc'=>"$diff_meta5",'type'=>"type1");
|
||
}
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:图中每个气泡代表一种差异代谢物;横坐标为Top20差异代谢物的变量投影重要度分数(VIP scores),纵坐标为代谢物名称;气泡大小与VIP值正相关(气泡越大,VIP值越高);颜色代表P值大小(颜色越深P值越小,差异越显著)。",'name'=>"图$pid. $vs1 Top20 VIP差异代谢物气泡图",'type'=>"type1",'path'=>"$indir/$vs1/Statistical_Analysis/Bubble_VIP_plot.png");
|
||
# $writer->emptyTag('p','desc'=>"Fig.$pid. Volcano plot for group $vs1",'type'=>"type1");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"气泡图文件路径:complete/*_vs_*/Statistical_Analysis/Bubble_VIP_plot*",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物或样本无生物学重复时,无法绘制该气泡图",'type'=>"type1");
|
||
}
|
||
|
||
$writer->emptyTag('h3','name'=>"3.8.4 差异代谢物表达差异倍数分析图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"我们将筛选得到的差异代谢物按Log₂FoldChange绝对值排序,选取Top50(不足50个则取全部差异代谢物),以柱状图形式可视化。以$vs1组为例,结果如图所示:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Statistical_Analysis/stat.diff_foldchange.png") {
|
||
$writer->emptyTag('p','desc'=>"$vs1组差异代谢物中,上调最显著的是$max_meta,下调最显著的是$min_meta。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为差异代谢物的Log₂FoldChange(差异倍数以2为底的对数值),纵坐标为差异代谢物名称。红色柱代表代谢物含量上调,绿色柱代表代谢物含量下调。",'name'=>"图$pid. $vs1代谢物表达差异倍数分析图",'type'=>"type1",'path'=>"$indir/$vs1/Statistical_Analysis/stat.diff_foldchange.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"代谢物表达差异倍数分析图路径:complete/*_vs_*/Statistical_Analysis/stat.diff_foldchange.p*",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,无法绘制该分析图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.9 差异代谢物的数目统计
|
||
###############################
|
||
###############################
|
||
#差异代谢物数目统计
|
||
$writer->emptyTag('h2','name'=>"3.9 差异代谢物数目统计",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"差异代谢物数目统计结果如下表所示:",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"注:Group_name:差异代谢物分组信息;All_sig_diff:显著差异代谢物数目;up_regulated:上调代谢物数目;down_regulated:下调代谢物数目。",
|
||
'type'=>"0",'name'=>"表$tid. 差异代谢物数目统计表",'path'=>"$indir/03_DEM_stat/dem.stat.xls");
|
||
$tid++;
|
||
if (-e "$indir/03_DEM_stat/dem.stat.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为各分组信息,纵坐标为差异代谢物数量;不同颜色分别代表上调(up)和下调(down)的代谢物。",'name'=>"图$pid. 差异代谢物统计柱形图",'type'=>"img-width-max",'path'=>"$indir/03_DEM_stat/dem.stat.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物数目统计表路径:complete/03_DEM_stat/dem.stat.xls",'path'=>"../03_DEM_stat/dem.stat.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,无法绘制该统计柱形图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.10 差异代谢物的统计分析
|
||
###############################
|
||
###############################
|
||
#柱状图
|
||
$writer->emptyTag('h2','name'=>"3.10 差异代谢物统计分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"我们对每种差异代谢物的相对含量进行可视化展示(包括柱状图和小提琴图),直观呈现其在组间的数据分布(若差异代谢物超过20个,仅展示前20张图片,其余可参见结果文件对应目录)。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"以下图片统一以差异代谢物编号标识,具体编号与代谢物的对应关系可参考Means.xls文件。根据差异代谢物表达差异倍数分析结果:上调最显著的$max_meta 对应编号为Metabolite $max_meta_numid;下调最显著的$min_meta 对应编号为Metabolite $min_meta_numid。",'type'=>"type1");
|
||
|
||
# 柱状图说明
|
||
$writer->emptyTag('p','desc'=>"柱状图以纵向条纹的高度表示代谢物相对含量,误差线代表标准误差,直观反映组间含量差异。",'type'=>"type1");
|
||
my @barplot =glob ("$indir/$vs1/Statistical_Analysis/barplot/*barplot.png");
|
||
if (scalar(@barplot) != 0) {
|
||
&piclist("$vs1 组间柱状图","注:纵坐标为物质相对含量,横坐标为组别;图中代谢物以编号标识,具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/barplot/*barplot.png","../../$vs1","_barplot.png","");
|
||
# $pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成柱状图。",'type'=>"type1");
|
||
}
|
||
|
||
# 小提琴图说明
|
||
$writer->emptyTag('p','desc'=>"小提琴-柱状复合图结合了数据分布形态(小提琴图)与集中趋势(柱状图):小提琴轮廓通过核密度估计展示数据概率分布(宽度反映密度,越宽表示该区间数据越集中),可识别多峰性或偏态;内部箱线图标记中位数等统计量,柱高量化组间差异,适用于多组对比与分布细节解析。",'type'=>"type1");
|
||
my @violin =glob ("$indir/$vs1/Statistical_Analysis/violin/*violin.png");
|
||
if (scalar(@violin) != 0) {
|
||
&piclist("$vs1 组间小提琴箱线复合图","注:纵坐标为物质相对含量,横坐标为组别;图中代谢物以编号标识,具体对应关系见Means.xls文件。","$indir/$vs1/Statistical_Analysis/violin/*violin.png","$indir/$vs1","_violin.png","");
|
||
# $pid++;
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,未生成小提琴图。",'type'=>"type1");
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间柱状图路径:complete/*_vs_*/Statistical_Analysis/barplot/Metabolite_*_barplot.{pdf,png}",'path'=>"../$vs1/Statistical_Analysis/barplot",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物组间小提琴-柱状复合图路径:complete/*_vs_*/Statistical_Analysis/violin/Metabolite_*_violin.{pdf,png}",'path'=>"../$vs1/Statistical_Analysis/violin",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"代谢物编号对应表路径:complete/*_vs_*/Statistical_Analysis/Means.xls",'path'=>"../$vs1/Statistical_Analysis",'type'=>"xls");
|
||
|
||
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.10.1 关键差异代谢物跨组趋势分析",'type'=>'type1','desc'=>"");
|
||
|
||
my $trend_dir = "$indir/07_Trend_Analysis";
|
||
my $trend_line_png = "$trend_dir/key_metabolite_trend_lineplot_report_top12.png";
|
||
my $trend_box_png = "$trend_dir/key_metabolite_trend_boxplot_report_top12.png";
|
||
my $trend_report_table = "$trend_dir/selected_metabolites_report_top12.xls";
|
||
my $trend_label_table = "$trend_dir/selected_metabolites_figure_label.xls";
|
||
my $trend_bycomp_table = "$trend_dir/selected_metabolites_by_comparison.xls";
|
||
|
||
if (-e $trend_line_png) {
|
||
$writer->emptyTag('p','desc'=>"为展示各对比组中具有代表性的关键差异代谢物在全部实验分组中的整体变化趋势,本分析以各对比组差异代谢物结果为基础,按绝对值log2FoldChange从大到小排序;若绝对值log2FoldChange相同,则按VIP值从大到小排序。最终每个对比组选取前5个代表性差异代谢物作为候选集合。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"将所有对比组候选代谢物合并后进行去重,并按以下规则综合排序:1)最大绝对值log2FoldChange值(Max_abs_log2FC,降序);2)最大VIP值(Max_VIP,降序);3)最小P值(Best_Pvalue,升序)。排序后选取前12个代表性代谢物用于报告正文展示,完整结果请见对应结果文件。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物,图中以M编号标识,其与具体代谢物名称的对应关系见selected_metabolites_figure_label.xls。折线连接各分组均值,误差线表示标准误(SE),散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组趋势折线图",'type'=>"img-width-max",'path'=>$trend_line_png);
|
||
$pid++;
|
||
|
||
if (-e $trend_box_png) {
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为标准化丰度值。每个分面代表一个关键差异代谢物,箱体表示四分位分布范围,中线表示中位数,散点表示各样本实际观测值。",'name'=>"图$pid. 关键差异代谢物跨组箱线图",'type'=>"img-width-max",'path'=>$trend_box_png);
|
||
$pid++;
|
||
}
|
||
|
||
if (-e $trend_report_table) {
|
||
$writer->emptyTag('table','desc'=>"注:FigureLabel为图中代谢物编号;Selected_in_comparisons为该代谢物来源对比组;Best_Pvalue为所有来源对比组中的最小P值;Max_abs_log2FC为所有来源对比组中的最大绝对值log2FoldChange;Max_VIP为所有来源对比组中的最大VIP值;Direction_summary为变化方向汇总。",'type'=>"0",'name'=>"表$tid. 报告展示关键差异代谢物列表",'path'=>$trend_report_table);
|
||
$tid++;
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"各对比组候选代谢物表路径:complete/07_Trend_Analysis/selected_metabolites_by_comparison.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_by_comparison.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物编号对应表路径:complete/07_Trend_Analysis/selected_metabolites_figure_label.xls",'path'=>"../07_Trend_Analysis/selected_metabolites_figure_label.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关键差异代谢物趋势分析结果路径:complete/07_Trend_Analysis/",'path'=>"../07_Trend_Analysis",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成关键差异代谢物跨组趋势分析结果。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.10.2 共有差异代谢物KEGG通路富集分析",'type'=>'type1','desc'=>"");
|
||
|
||
my $common_kegg_dir = "$indir/08_Common_KEGG_Enrichment";
|
||
my $common_bubble_top20_png = "$common_kegg_dir/common_kegg_bubbleplot_top20.png";
|
||
my $common_bubble_top20_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top20.pdf";
|
||
my $common_bubble_top5_png = "$common_kegg_dir/common_kegg_bubbleplot_top5.png";
|
||
my $common_bubble_top5_pdf = "$common_kegg_dir/common_kegg_bubbleplot_top5.pdf";
|
||
my $common_enrich_table = "$common_kegg_dir/common_kegg_pathway_enrichment.xls";
|
||
my $common_enrich_top5_table = "$common_kegg_dir/common_kegg_pathway_enrichment_top5.xls";
|
||
my $common_matched_table = "$common_kegg_dir/common_matched_metabolites.xls";
|
||
my $common_hit_detail_table = "$common_kegg_dir/common_kegg_pathway_hit_detail.xls";
|
||
my $common_no_pathway_table = "$common_kegg_dir/common_no_pathway_metabolites.xls";
|
||
my $common_summary_table = "$common_kegg_dir/common_kegg_summary.xls";
|
||
|
||
my $common_id_file = "$indir/04_venn_dir/common_id";
|
||
my $upset_file = "$indir/04_venn_dir/upset_list.xls";
|
||
my $venn_file = "$indir/04_venn_dir/venn_list.xls";
|
||
|
||
my $compare_num = 0;
|
||
my $common_num = 0;
|
||
my $set_file = "";
|
||
|
||
if (-e $upset_file) {
|
||
$set_file = $upset_file;
|
||
} elsif (-e $venn_file) {
|
||
$set_file = $venn_file;
|
||
}
|
||
|
||
if ($set_file ne "") {
|
||
open (IN,$set_file) or die $!;
|
||
my $head = <IN>;
|
||
chomp($head);
|
||
my @head = split /\t/,$head;
|
||
$compare_num = scalar(@head) - 1;
|
||
close IN;
|
||
}
|
||
|
||
if (-e $common_id_file && -s $common_id_file) {
|
||
open (IN,$common_id_file) or die $!;
|
||
while (<IN>) {
|
||
chomp;
|
||
next if (/^\s*$/);
|
||
$common_num++;
|
||
}
|
||
close IN;
|
||
}
|
||
|
||
if ($set_file eq "") {
|
||
|
||
$writer->emptyTag('p','desc'=>"未检测到差异代谢物交集结果文件,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||
|
||
} elsif ($compare_num < 2) {
|
||
|
||
$writer->emptyTag('p','desc'=>"本项目仅包含1个对比组,无法进行跨比较组共有差异代谢物筛选,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||
|
||
} elsif ($common_num < 2) {
|
||
|
||
$writer->emptyTag('p','desc'=>"本项目包含多个对比组,但按“所有比较组共有”标准未筛选到足够数量的共有差异代谢物,因此未开展共有差异代谢物KEGG通路富集分析。",'type'=>"type1");
|
||
|
||
} elsif (-e $common_enrich_table) {
|
||
|
||
$writer->emptyTag('p','desc'=>"为进一步解析多个比较组共有差异代谢物的潜在生物学功能,对共有差异代谢物进行了KEGG通路富集分析。共有差异代谢物来源于差异代谢物交集结果。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"分析时,以共有差异代谢物作为前景集,以本项目进入正式分析流程的全部代谢物中具有KEGG通路注释的代谢物作为背景集。随后将共有差异代谢物与总代谢物注释表进行匹配,提取其中具有KEGG pathway注释的代谢物用于富集分析,未匹配到注释表或虽匹配成功但无KEGG pathway注释的代谢物不参与本次通路富集统计。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"富集分析采用超几何检验评估共有差异代谢物在各KEGG通路中的富集显著性,并进一步进行Benjamini-Hochberg多重检验校正,同时提供Holm校正结果。相关结果可从整体通路富集结果表及显著通路补充结果中进行解读。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"需要说明的是,共有差异代谢物并不一定全部具有可用的KEGG pathway注释,因此最终参与富集分析的代谢物数通常会少于共有差异代谢物总数。相关匹配结果及无KEGG通路注释情况见补充附件。",'type'=>"type1");
|
||
|
||
if (-e $common_bubble_top20_png) {
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为富集因子(Enrichment factor),纵坐标为KEGG通路名称,气泡大小表示命中该通路的共有差异代谢物数量,气泡颜色表示富集分析P值。",'name'=>"图$pid. 共有差异代谢物KEGG通路富集气泡图(Top20)",'type'=>"img-width-max",'path'=>$common_bubble_top20_png);
|
||
$pid++;
|
||
|
||
}
|
||
|
||
if (-e $common_bubble_top5_png) {
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:该图基于富集结果中P值小于0.05的通路绘制,按显著性排序最多展示前5条;若显著通路不足5条,则按实际条数展示。横坐标为MetaRatio,纵坐标为KEGG通路名称,气泡大小表示命中该通路的共有差异代谢物数量,气泡颜色表示-log10(Pvalue)。",'name'=>"图$pid. 共有差异代谢物显著KEGG通路富集气泡图(Top5)",'type'=>"img-width-max",'path'=>$common_bubble_top5_png);
|
||
$pid++;
|
||
|
||
}
|
||
|
||
$writer->emptyTag('table','desc'=>"注:ID表示KEGG通路编号;Description表示KEGG通路名称;Total表示背景集中注释到该通路的代谢物数;Hits表示共有差异代谢物中命中该通路的代谢物数;MetaboliteRatio表示命中该通路的共有差异代谢物数占实际参与富集分析总代谢物数的比例;BgRatio表示背景集中命中该通路的代谢物数占背景集中可参与通路富集总代谢物数的比例;pvalue表示超几何检验得到的富集显著性P值;p.adjust表示Benjamini-Hochberg方法校正后的FDR;Holm_adjust表示Holm方法校正后的P值;Enrichment_factor表示富集因子;Diffmeta表示命中该通路的共有差异代谢物;Allmeta表示背景集中注释到该通路的全部代谢物。若行数超过30行,只展示30行。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG通路富集结果表(部分)",'path'=>"$indir/html_report/template/common_kegg_pathway_enrichment.xls");
|
||
$tid++;
|
||
|
||
|
||
if (-e $common_summary_table) {
|
||
|
||
$writer->emptyTag('table','desc'=>"注:Item表示统计项目名称,Value表示对应统计值。InputMetabolites表示输入共有差异代谢物总数;UsedForEnrichment表示最终参与KEGG富集分析的代谢物数;NoPathwayAnnotation表示匹配成功但无可用KEGGpathway注释的代谢物数;NotFoundOrNotInBackground表示未匹配到注释表或未进入背景集的代谢物数;BackgroundMetabolites表示背景集总代谢物数;BackgroundWithPathway表示背景集中具有KEGG通路注释的代谢物数;EnrichedPathwaysTested表示参与富集检验的通路数;AllowedPathwayCount表示允许保留的通路数。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物KEGG富集分析统计汇总表",'path'=>$common_summary_table);
|
||
$tid++;
|
||
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG通路富集结果表路径:complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls",'type'=>"xls");
|
||
|
||
if (-e $common_enrich_top5_table) {
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"显著通路Top5结果表路径(补充):complete/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment_top5.xls",'type'=>"xls");
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物注释匹配信息表路径:complete/08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_matched_metabolites.xls",'type'=>"xls");
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物通路命中明细表路径(补充):complete/08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'path'=>"../08_Common_KEGG_Enrichment/common_kegg_pathway_hit_detail.xls",'type'=>"xls");
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"无KEGG通路注释的共有差异代谢物列表路径(补充):complete/08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'path'=>"../08_Common_KEGG_Enrichment/common_no_pathway_metabolites.xls",'type'=>"xls");
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"共有差异代谢物KEGG富集分析结果目录路径:complete/08_Common_KEGG_Enrichment/",'path'=>"../08_Common_KEGG_Enrichment",'type'=>"xls");
|
||
|
||
} else {
|
||
|
||
$writer->emptyTag('p','desc'=>"已筛选到共有差异代谢物,但在KEGG注释及通路过滤后未生成共有差异代谢物KEGG通路富集分析结果。",'type'=>"type1");
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.10.3 共有差异代谢物KEGG通路-代谢物网络分析",'type'=>'type1','desc'=>"");
|
||
|
||
my $common_network_dir = "$indir/08_Common_KEGG_Enrichment/network";
|
||
my $common_network_png = "$common_network_dir/common_kegg_pathway_metabolite_network.png";
|
||
my $common_network_summary = "$common_network_dir/common_kegg_pathway_metabolite_network_summary.xls";
|
||
my $common_network_pathways = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_pathways.xls";
|
||
my $common_network_metas = "$common_network_dir/common_kegg_pathway_metabolite_network_selected_metabolites.xls";
|
||
my $common_network_nodes = "$common_network_dir/common_kegg_pathway_metabolite_network_nodes.xls";
|
||
my $common_network_edges = "$common_network_dir/common_kegg_pathway_metabolite_network_edges.xls";
|
||
|
||
|
||
if (! -e $common_enrich_table) {
|
||
|
||
$writer->emptyTag('p','desc'=>"未生成共有差异代谢物KEGG富集分析结果,因此未开展通路-代谢物网络分析。",'type'=>"type1");
|
||
|
||
} elsif (-e $common_network_png) {
|
||
|
||
$writer->emptyTag('p','desc'=>"为直观展示共有差异代谢物与显著富集KEGG通路之间的对应关系,本分析基于共有差异代谢物KEGG富集结果构建通路-代谢物网络图。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"筛选规则为:优先保留富集分析P值小于0.05的显著通路,并按P值升序、富集因子降序、命中代谢物数降序排序,最多展示前10条通路;随后从入选通路命中的共有差异代谢物中去重筛选,最多保留20个代谢物。代谢物排序规则为degree降序、关联通路最小P值升序、关联通路最大Hits降序、代谢物名称升序。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:外圈节点为KEGG通路,图中以通路编号显示;内圈节点为共有差异代谢物。连线表示该代谢物命中对应KEGG通路。代谢物节点大小表示其连接的通路数量,完整节点和边信息见结果表。",'name'=>"图$pid. 共有差异代谢物KEGG通路-代谢物网络图",'type'=>"img-width-max",'path'=>$common_network_png);
|
||
$pid++;
|
||
|
||
if (-e $common_network_pathways) {
|
||
$writer->emptyTag('table','desc'=>"注:SelectionOrder为通路筛选排序;ID为KEGG通路编号;Description为通路名称;Hits为共有差异代谢物命中该通路的数量;Total为背景集中注释到该通路的代谢物数量;pvalue为富集分析P值;p.adjust为BH校正后的P值;Holm_adjust为Holm校正后的P值;Enrichment_factor为富集因子;Diffmeta为命中该通路的共有差异代谢物;Allmeta为背景集中注释到该通路的全部代谢物。",'type'=>"0",'name'=>"表$tid. 网络图入选KEGG通路列表",'path'=>$common_network_pathways);
|
||
$tid++;
|
||
}
|
||
|
||
if (-e $common_network_metas) {
|
||
$writer->emptyTag('table','desc'=>"注:SelectionOrder为代谢物筛选排序;Metabolite为共有差异代谢物名称;Degree为该代谢物连接的入选通路数量;Best_pathway_pvalue为该代谢物关联通路中的最小富集P值;Best_pathway_hits为该代谢物关联通路中的最大Hits值;PathwayIDs为该代谢物关联的入选KEGG通路编号。",'type'=>"0",'name'=>"表$tid. 网络图入选共有差异代谢物列表",'path'=>$common_network_metas);
|
||
$tid++;
|
||
}
|
||
|
||
if (-e $common_network_summary) {
|
||
$writer->emptyTag('table','desc'=>"注:Item表示统计项目名称,Value表示对应统计值。InputPathways为输入富集通路数量;SelectedPathways为最终入选网络图的通路数量;TotalSignificantPathways为P值小于0.05的显著通路数量;MaxMetabolites为网络图允许展示的最大代谢物数量;UniqueMetabolitesBeforeFilter为筛选前去重代谢物数量;Metabolites为最终入选网络图的代谢物数量;Edges为通路-代谢物连线数量。",'type'=>"0",'name'=>"表$tid. 共有差异代谢物通路网络统计汇总表",'path'=>$common_network_summary);
|
||
$tid++;
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"网络图节点信息表路径:complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_nodes.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"网络图连线信息表路径:complete/08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'path'=>"../08_Common_KEGG_Enrichment/network/common_kegg_pathway_metabolite_network_edges.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"共有差异代谢物KEGG富集结果中未筛选到P值小于0.05的显著通路,因此未生成共有差异代谢物KEGG通路-代谢物网络图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.11 差异代谢物的层次聚类分析
|
||
###############################
|
||
###############################
|
||
$writer->emptyTag('h2','name'=>"3.11 差异代谢物聚类热图",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"差异代谢物在生物学上常具有功能相似性或互补性,或受同一代谢通路调控,因此在不同实验组间表现出相似或相反的表达特征。对其进行层次聚类分析,可将表达特征一致的代谢物归为一类,清晰揭示代谢物在组间的表达变化规律。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"针对每组对比,我们基于差异代谢物定量值计算欧氏距离矩阵,采用完全连锁法仅对代谢物进行聚类(样本不聚类),并以热图形式展示<a href=\"#ref18\">[18]</a>。此处以$vs1组为例,结果如下:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Hierarchical_Clustering_Analysis/heatmap.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为样品名称(样品不进行聚类,按分组顺序排列),上方色块为样品分组注释(不同颜色代表不同组别);纵坐标为差异代谢物及代谢物聚类结果(行代表代谢物,按聚类结果排序);颜色深浅反映代谢物在样品中的相对含量水平(红色代表高表达,蓝色代表低表达)。",'name'=>"图$pid. $vs1组差异代谢物层次聚类热图",'type'=>"img-width-max",'path'=>"$indir/$vs1/Hierarchical_Clustering_Analysis/heatmap.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"聚类热图数据文件路径:complete/*_vs_*/Hierarchical_Clustering_Analysis/hierarchical_clustering_data_matrix.xls",'path'=>"../$vs1/Hierarchical_Clustering_Analysis/hierarchical_clustering_data_matrix.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,无法绘制聚类热图。",'type'=>"type1");
|
||
}
|
||
|
||
###############################
|
||
###############################
|
||
#3.12 差异代谢物的关联分析
|
||
###############################
|
||
###############################
|
||
$writer->emptyTag('h2','name'=>"3.12 差异代谢物关联分析",'type'=>'type1','desc'=>"");
|
||
|
||
$writer->emptyTag('p','desc'=>"差异代谢物关联分析旨在研究代谢物间变化趋势的一致性,通过计算所有差异代谢物两两之间的皮尔森相关系数,分析其相关性。相关性可揭示代谢物变化的协同性:变化趋势相同为正相关,相反为负相关。",'type'=>"type1");
|
||
|
||
if ($repeat eq "Y" ){
|
||
$writer->emptyTag('p','desc'=>"两个代谢物的线性关系越强,相关系数越趋近于1(正相关)或-1(负相关)。同时进行统计检验,P-value ≤ 0.05 表示相关性显著(以符号标注);热图中红色代表正相关,蓝色代表负相关,白色代表无相关性。",'type'=>"type1");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"两个代谢物的线性关系越强,相关系数越趋近于1(正相关)或-1(负相关);热图中红色代表正相关,蓝色代表负相关,白色代表无相关性。",'type'=>"type1");
|
||
}
|
||
|
||
if (-e "$indir/$vs1/Hierarchical_Clustering_Analysis/cor_heatmap.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:图中横轴和纵轴均代表差异代谢物,仅展示下三角矩阵(避免重复);代谢物按AOE聚类方法排序,使相关性相似的代谢物聚集在一起。颜色代表代谢物间的相关性强度:深红色趋近于1(强正相关),深蓝色趋近于-1(强负相关),白色为0(无相关性);有生物学重复时,P-value ≤ 0.05的显著相关关系会标注符号,无重复时不展示显著性标注。",'name'=>"图$pid. $vs1差异代谢物关联分析热图",'type'=>"type1",'path'=>"$indir/$vs1/Hierarchical_Clustering_Analysis/cor_heatmap.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关联分析结果文件路径:complete/*_vs_*/Hierarchical_Clustering_Analysis/cor_heatmap.xls",'path'=>"../$vs1/Hierarchical_Clustering_Analysis/cor_heatmap.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物,无法绘制关联分析热图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.13 差异代谢物的KEGG注释
|
||
###############################
|
||
###############################
|
||
$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1');
|
||
#$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
$writer->emptyTag('p','desc'=>"生物体中的复杂代谢反应及调控并非独立进行,而是由基因和蛋白质构成的通路网络协同作用,最终导致代谢组的系统性变化。对这些代谢与调控通路的分析,可更全面系统地揭示实验条件改变引发的生物学过程变化、性状或疾病发生机理及药物作用机制等核心问题。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"京都基因与基因组百科全书(Kyoto Encyclopedia of Genes and Genomes, KEGG)Pathway数据库<a href=\"#ref19\">[19]</a><a href=\"#ref20\">[20]</a>(www.kegg.jp/kegg/pathway.html)以基因和基因组功能信息为基础,以代谢反应为线索,串联潜在代谢途径及对应调控蛋白,以图解形式展示细胞生理生化过程(如能量代谢、物质运输、信号传递、细胞周期调控等)及保守子通路信息,是代谢网络研究最常用的通路数据库。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"我们整理了对应物种中差异代谢物映射的所有通路,部分通路结果展示如下(不足20通路则展示全部),所有通路结果请详见结果文件KEGG_Pathway.xls表。",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. KEGG通路注释信息表(部分)",'path'=>"$indir/html_report/template/KEGG_Analysis/KEGG_Pathway.xls");
|
||
$tid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG通路注释信息路径:complete/*_vs_*/KEGG_Analysis/KEGG_Pathway.xls",'path'=>"../$vs1/KEGG_Analysis/KEGG_Pathway.xls",'type'=>"xls");
|
||
|
||
|
||
$writer->emptyTag('p','desc'=>"KEGG通路注释信息表包含以下核心信息:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"1) KEGG pathway:代谢物富集通路的KEGG PATHWAY数据库ID、通路名称及该通路中的差异代谢物个数;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) Compounds:该通路中差异代谢物的KEGG COMPOUND ID及代谢物名称。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"基于上述结果,我们在KEGG通路图上标记差异代谢物:红色代表显著上调代谢物,绿色代表显著下调代谢物。以下以$vs1组为例展示KEGG通路图(若通路图超过20张,仅展示前20张,其余可参见结果文件对应目录):",'type'=>"type1");
|
||
|
||
my @kegg =glob ("$indir/$vs1/KEGG_Analysis/pathway/*.png");
|
||
if (scalar(@kegg) != 0) {
|
||
&piclist("$vs1组KEGG通路图","","$indir/$vs1/KEGG_Analysis/pathway/*.png","$indir/$vs1",".png","");
|
||
# $pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG通路图路径:complete/*_vs_*/KEGG_Analysis/pathway",'path'=>"../$vs1/KEGG_Analysis/pathway",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"差异代谢物未注释到任何KEGG通路,无通路图生成。",'type'=>"type1");
|
||
}
|
||
|
||
# KEGG分类注释
|
||
$writer->emptyTag('p','desc'=>"KEGG分类注释按层级划分,官网共收录659种化合物(https://www.kegg.jp/brite/br08001)。其中层级A包含9大类:(1)Organic acids 有机酸;(2)Lipids 脂质;(3)Carbohydrates 碳水化合物;(4)Nucleic acids 核酸;(5)Peptides 肽;(6)Vitamins and cofactors 维生素和辅因子;(7)Steroids 类固醇;(8)Hormones and transmitters 激素和递质;(9)Antibiotics 抗生素。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"将每组对比的差异代谢物与官网659种化合物进行匹配,匹配成功的差异代谢物KEGG分类注释图如下所示:",'type'=>"type1");
|
||
|
||
if (-e "$indir/$vs1/KEGG_Analysis/kegg_compound_barplot.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:图例代表KEGG分类层级A,Y轴代表层级B,不同颜色对应不同层级A类别。",'name'=>"图$pid. $vs1组KEGG分类注释图",'type'=>"type1",'path'=>"$indir/$vs1/KEGG_Analysis/kegg_compound_barplot.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG分类注释信息路径:complete/*_vs_*/KEGG_Analysis/kegg_compound_classification.xls",'path'=>"../$vs1/KEGG_Analysis/kegg_compound_classification.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"无差异代谢物匹配到KEGG分类层级,无法生成分类注释图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
###############################
|
||
###############################
|
||
#3.14 差异代谢物的通路类型分析
|
||
###############################
|
||
###############################
|
||
$writer->emptyTag('h2','name'=>"3.14 差异代谢物通路类型分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"对差异代谢物的KEGG注释结果按KEGG通路类型进行分类,分类结果如图所示:",'type'=>"type1");
|
||
if (-e "$indir/$vs1/KEGG_Analysis/meta_Kegg_histogram.png") {
|
||
$writer->emptyTag('pic','desc'=>"注:图中左侧为KEGG通路二级分类,右侧为一级分类,不同颜色代表不同一级分类;横坐标表示注释到该二级通路的代谢物占总注释代谢物的比例,占比超过20%的以百分比标注。",'name'=>"图$pid. $vs1通路类型分析图",'type'=>"type1",'path'=>"$indir/$vs1/KEGG_Analysis/meta_Kegg_histogram.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"通路类型分析图路径:complete/*_vs_*/KEGG_Analysis/meta_Kegg_histogram.p*",'path'=>"../$vs1/KEGG_Analysis/",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"差异代谢物未富集到任何KEGG通路,无法生成通路类型分析图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
# 3.15 差异代谢物通路富集分析
|
||
$writer->emptyTag('h2','name'=>"3.15 差异代谢物通路富集分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"KEGG注释分析仅能识别差异代谢物参与的所有通路,而要明确这些通路与实验条件的关联性,需进一步进行通路富集分析。通过富集分析可筛选出与代谢物差异关联最显著的关键通路<a href=\"#ref21\">[21]</a>。富集分析中,富集因子(Enrichment Factor)是衡量通路富集程度的核心指标,其计算公式及逻辑如下:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"富集因子 = (该通路中差异代谢物数量 ÷ 该通路总代谢物数量) ÷ (所有差异代谢物总数 ÷ 背景库中代谢物总数)",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"公式中变量定义:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"(1)该通路中差异代谢物数量:该通路中被鉴定为差异代谢物的数量;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"(2)该通路总代谢物数量:背景数据库中该通路包含的所有代谢物总数;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"(3)所有差异代谢物总数:实验中鉴定到的全部差异代谢物数量;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"(4)背景库中代谢物总数:分析所参考的数据库中包含的所有代谢物总数。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"计算逻辑:富集因子通过比较“通路内差异代谢物的比例”与“整体差异代谢物的比例”,反映该通路的差异代谢物是否高于随机期望水平。若富集因子>1,表明该通路中差异代谢物的比例高于整体背景,即存在富集;数值越大,说明通路与样本差异的关联性越强,富集程度越显著。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"分析首先通过差异代谢物映射至KEGG、PubChem等权威数据库,部分差异代谢物映射映射结果展示如下(不足20代谢物则展示全部),所有差异代谢物映射映射结果请详见结果文件Metabolite_Mapping.xls表。",'type'=>"type1");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 差异代谢物映射表(部分)",'path'=>"$indir/html_report/template/KEGG_Analysis/Mapping.xls");
|
||
$tid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"1) Metabolite :代谢物名称;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) Synonyms :该代谢物在HMDB 数据库中的同义词;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"3) Formula :该物质的化学组成;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"4) Exact.Mass :该物质的精确分子量;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"5) NIKKAJI :该物质在NIKKAJI 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"6) ChEBI :该物质在ChEBI 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"7) PubChem :该物质在PubChem 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"8) CAS :该物质的CAS 号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"9) KEGG id :该物质在KEGG COMPOUND 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"10) HMDB :该物质在HMDB 数据库中的编号;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"11) KingDom :该物质在HMDB 数据库中的一级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"12) Super Class :该物质在HMDB 数据库中的二级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"13) Class :该物质在HMDB 数据库中的三级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"14) Sub Class :该物质在HMDB 数据库中的四级分类;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"15) KEGG.LINK :该物质的KEGG COMPOUND 数据库链接;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"16) Pathway :该物质映射的KEGGPATHWAY。",'type'=>"type1");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物数据库映射表见:complete/*_vs_*/KEGG_Analysis/Metabolite_Mapping.xls",'path'=>"../$vs1/KEGG_Analysis/Metabolite_Mapping.xls",'type'=>"xls");
|
||
|
||
$writer->emptyTag('p','desc'=>"获得差异代谢物的匹配信息后,我们基于对应通路数据库进行代谢通路富集分析。该分析通过检验差异代谢物在特定通路中是否存在过度富集(over-presentation),筛选出具有统计学意义的关键通路。部分差异代谢物通路富集分析结果展示如下(不足20通路则展示全部),所有差异代谢物通路富集分析结果请详见结果文件Pathway_Analysis.xls表。",'type'=>"type1");
|
||
|
||
#代谢物通路
|
||
if (-e "$indir/html_report/template/Pathway_Analysis/Pathway.xls") {
|
||
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 代谢通路富集分析表(部分)",'path'=>"$indir/html_report/template/Pathway_Analysis/Pathway.xls");
|
||
$tid++;
|
||
$writer->emptyTag('p','desc'=>"代谢通路富集分析表包括以下内容:",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"1) ID :KEGG通路ID;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"2) Description :代谢通路名称;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"3) Total :该通路注释到的背景代谢物总数;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"4) Hits :该通路命中的差异代谢物数;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"5) MetaboliteRatio :该通路命中的差异代谢物数 / 全部可做通路映射的差异代谢物总数(m/diffall);",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"6) BgRatio :该通路注释到的背景代谢物总数 / 全部可做通路映射的背景代谢物总数(M/all);",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"7) pvalue :代谢通路富集分析的P值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"8) p.adjust :经错误发现率(false discovery rate, FDR)方法进行多重假设检验校正后的P值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"9) Holm_adjust :经Holm-Bonferroni方法进行多重假设检验校正后的P值;",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"10) Enrichment_factor :富集因子 Enrichment factor = (m/M) / (diffall/all);",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"11) Diffmeta :差异代谢物列表(;分隔);",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"12) Allmeta :该通路背景代谢物列表(;分隔);",'type'=>"type1");
|
||
|
||
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物通路富集分析表路径:complete/*_vs_*/Pathway_Analysis/Pathway_Analysis.xls",'path'=>"../$vs1/Pathway_Analysis/Pathway_Analysis.xls",'type'=>"xls");
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"差异代谢物未富集到任何KEGG通路,无法生成通路富集表。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
# 气泡图
|
||
$writer->emptyTag('p','desc'=>"气泡图是KEGG富集分析结果的可视化展示方式,通过富集因子(Enrichment factor)、P值(P-value)及富集到该通路的差异代谢物个数,综合衡量KEGG通路的富集程度。其中,富集因子越大表示富集程度越高;P值取值范围为[0,1],越接近0表示富集越显著。",'type'=>"type1");
|
||
if (-e "$indir/$vs1/Pathway_Analysis/Bubble_plot.png") {
|
||
$writer->emptyTag('p','desc'=>"图中P值越小、富集因子越大,通路的参考价值越高;反之则参考价值越低。本次分析选取富集显著性最可靠(即P值最小)的前20条通路进行展示。",'type'=>"type1");
|
||
$writer->emptyTag('p','desc'=>"如下图所示,$vs1组通路富集分析中,富集最显著的代谢通路为$pathway_pvalue_min。",'type'=>"type1");
|
||
$writer->emptyTag('pic','desc'=>"注:图中每一行代表一条KEGG通路;横坐标为富集因子,数值越大表示差异代谢物在该通路中的富集水平越高;气泡颜色对应P值大小(颜色越深通常P值越小,富集越显著);气泡大小代表该通路中注释到的差异代谢物个数(气泡越大个数越多)。",'name'=>"图$pid. $vs1通路富集分析气泡图",'type'=>"type1",'path'=>"$indir/$vs1/Pathway_Analysis/Bubble_plot.png");
|
||
$pid++;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"差异代谢物通路富集分析表路径:complete/*_vs_*/Pathway_Analysis/Pathway_Analysis.xls",'path'=>"../$vs1/Pathway_Analysis/Pathway_Analysis.xls",'type'=>"xls");
|
||
|
||
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"差异代谢物未富集到任何KEGG通路,无法生成富集分析气泡图。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h3','name'=>"3.15.1 关键通路相关差异代谢物热图分析",'type'=>'type1','desc'=>"");
|
||
|
||
my $pathway_top_n = 5;
|
||
my $pathway_report_top_n = 30;
|
||
my $pathway_heatmap_dir = "$indir/09_Key_Pathway_Heatmap";
|
||
my $pathway_heatmap_png = "$pathway_heatmap_dir/key_pathway_metabolite_heatmap_report_top30.png";
|
||
my $pathway_heatmap_table = "$pathway_heatmap_dir/key_pathway_metabolite_report_top30.xls";
|
||
my $pathway_heatmap_label = "$pathway_heatmap_dir/key_pathway_figure_label.xls";
|
||
my $pathway_heatmap_summary = "$pathway_heatmap_dir/key_pathway_summary.xls";
|
||
|
||
if (-e $pathway_heatmap_png) {
|
||
$writer->emptyTag('p','desc'=>"为从通路层面综合展示不同对比组中具有代表性的差异代谢变化模式,本分析基于各对比组的代谢通路富集结果,对每个对比组先筛选富集分析P值小于0.05的显著通路,再按P值从小到大排序;若P值相同,则按富集因子(Enrichment factor)从高到低排序;若仍相同,则按命中差异代谢物数(Hits)从高到低排序。最终每个对比组选取前".$pathway_top_n."条关键通路用于后续分析。对于通路范围过大、解释较泛的背景通路(如Metabolic pathways等),本分析选择不纳入关键通路候选范围,以提高结果的聚焦性与可解释性。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"在获得各对比组关键通路后,进一步提取这些通路中命中的差异代谢物(Diffmeta),汇总后去重,并按以下规则综合排序:1)该代谢物出现在多少个对比组的入选关键通路中(ComparisonCount,降序);2)该代谢物出现在多少条入选关键通路中(PathwayCount,降序);3)该代谢物在所有来源对比组中的最小P值(BestPvalue,升序);4)该代谢物在所有来源对比组中的最大绝对log2FoldChange值(Max_abs_log2FC,降序)。排序后选取前".$pathway_report_top_n."个代表性代谢物用于报告正文展示,完整结果请见对应结果文件。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:横坐标为实验分组,纵坐标为关键通路相关差异代谢物编号。颜色表示同一代谢物在不同分组中的行标准化相对变化水平(Row z-score):红色表示该代谢物在对应分组中相对较高,蓝色表示相对较低,白色表示接近该代谢物在各组中的平均水平。图中代谢物以M编号显示,其与具体代谢物名称的对应关系见key_pathway_figure_label.xls。该图用于比较同一代谢物在不同组间的变化趋势,不用于不同代谢物之间绝对丰度大小的直接比较。",'name'=>"图$pid. 关键通路相关差异代谢物热图",'type'=>"img-width-max",'path'=>$pathway_heatmap_png);
|
||
$pid++;
|
||
|
||
if (-e $pathway_heatmap_table) {
|
||
$writer->emptyTag('table','desc'=>"注:该表为报告热图中展示的关键通路相关差异代谢物列表。MetaboliteRank为代谢物综合排序名次;FigureLabel为图中代谢物编号;ComparisonCount为该代谢物出现在多少个对比组的入选关键通路中;PathwayCount为该代谢物出现在多少条入选关键通路中;BestPvalue为该代谢物在所有来源对比组中的最小P值;Max_abs_log2FC为该代谢物在所有来源对比组中的最大绝对log2FoldChange值;Comparison为来源对比组;SourcePathwayID和SourcePathway分别为来源关键通路编号和名称;PathwayPvalue、Enrichment_factor和Hits分别为对应关键通路的富集P值、富集因子和命中差异代谢物数;Pvalue、log2FC和abs_log2FC为该代谢物在对应来源对比组中的差异统计信息。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 报告展示关键通路相关差异代谢物列表(部分)",'path'=>"$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
|
||
$tid++;
|
||
}
|
||
|
||
if (-e $pathway_heatmap_summary) {
|
||
$writer->emptyTag('table','desc'=>"注:该表汇总各对比组选出的关键通路。Comparison为对比组名称;PathwayID为KEGG通路编号;Pathway为KEGG通路名称;HitCount为该通路命中的差异代谢物数量;pvalue为通路富集分析P值;Enrichment_factor为富集因子;Metabolites为该通路命中的差异代谢物名称列表。若表格超过30行只展示30行。",'type'=>"0",'name'=>"表$tid. 各对比组关键通路汇总表(部分)",'path'=>"$indir/html_report/template/key_pathway_summary.xls");
|
||
$tid++;
|
||
}
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路汇总表路径:complete/09_Key_Pathway_Heatmap/key_pathway_summary.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_summary.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路代谢物编号对应表路径:complete/09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'path'=>"../09_Key_Pathway_Heatmap/key_pathway_figure_label.xls",'type'=>"xls");
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"关键通路相关差异代谢物热图结果路径:complete/09_Key_Pathway_Heatmap/",'path'=>"../09_Key_Pathway_Heatmap",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"未生成关键通路相关差异代谢物热图分析结果。",'type'=>"type1");
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
$writer->emptyTag('h2','name'=>"3.16 差异代谢物GSEA分析",'type'=>'type1','desc'=>"");
|
||
$writer->emptyTag('p','desc'=>"传统代谢物功能富集方法基于超几何检验,仅针对差异表达代谢物进行分析。但差异倍数(FC)阈值的设置可能过滤掉部分重要代谢物,导致遗漏关键生物通路或功能模块。GSEA分析(Gene Set Enrichment Analysis)可弥补这一不足,通过整合所有代谢物的表达趋势,更全面揭示代谢物集合在生物系统中的调控作用。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"GSEA首先将所有代谢物按丰度差异排序,通过计算富集分数(Enrichment Score, ES)评估代谢物集合在排序列表中的分布趋势(富集于顶部或底部),再通过置换检验确定富集的显著性;分析过程中仅纳入包含≥5个代谢物的通路(避免极小集合的随机富集噪音)。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('p','desc'=>"结合差异代谢物的表达量变化,对KEGG通路进行GSEA分析,以下以 $vs1 为例说明(筛选pvalue值最小的20个通路展示),完整表格请查看结果文件KEGG_GSEA.xls。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('table','desc'=>"注:(1)ID:KEGG通路编号;(2)Description:KEGG通路功能描述;(3)setSize:该通路注释的代谢物数量(分析时仅纳入setSize≥5的通路);(4)enrichmentScore:富集分数;(5)Normalized ES:标准化富集分数(NES);(6)pvalue:富集显著性P值;(7)p.adjust:经BH校正后的P值;(8)rank:富集分数达峰值时的位置;(9)core_enrichment:对富集分数起主要贡献的代谢物(Leading edge subset)",'type'=>"0",'name'=>"表$tid. GSEA-KEGG富集结果(部分)",'path'=>"$indir/html_report/template/KEGG_GSEA.xls");
|
||
$tid++;
|
||
|
||
|
||
|
||
|
||
# 核心修改3:图片注释明确P值<0.05为显著,且关联p.adjust的校正逻辑
|
||
if (-e "$indir/06_GSEA_dir/$vs1/KEGG/KEGG_GSEA_NES.barplot.png") {
|
||
$writer->emptyTag('p','desc'=>"筛选P值最小的30条通路用于展示GSEA分析结果,以下图为例。",'type'=>"type1");
|
||
|
||
$writer->emptyTag('pic','desc'=>"注:图中每一行代表一条KEGG通路;横坐标为标准化富集分数(NES),绝对值越大表示富集强度越高;柱状颜色对应P值大小,颜色越深(红色越红)表示P值越小,富集越显著。",'name'=>"图$pid. $vs1 的KEGG通路GSEA分析柱状图",'type'=>"type1",'path'=>"$indir/06_GSEA_dir/$vs1/KEGG/KEGG_GSEA_NES.barplot.png");
|
||
$pid++;
|
||
|
||
$writer->emptyTag('p','desc'=>"不同对比组差异代谢物的KEGG-GSEA富集图展示如下(若超过20张,仅展示前20张,其余可参见结果文件对应目录):",'type'=>"type1");
|
||
|
||
|
||
&piclist("不同对比组差异代谢物KEGG-GSEA富集图","注:富集图分为三部分:"
|
||
. "1. 富集分数(ES)折线图:展示沿排序代谢物集计算时ES值的动态变化,最高峰对应的ES值为该代谢物集的富集分数;"
|
||
. "2. 代谢物分布(hits)图:以线条标记通路代谢物在排序列表中的位置(按log₂FoldChange从大到小排序);"
|
||
. "3. 代谢物表达趋势热图:红色表示在B组高表达,蓝色表示在A组高表达,颜色深度对应信噪比(Signal2noise,基于log₂FoldChange计算)。"
|
||
. "ES反映代谢物集合在排序列表中的累积富集程度,受集合在列表中的位置影响;NES为标准化后的ES,可跨通路比较:"
|
||
. "NES>0表示代谢物集合富集于列表上部(倾向于上调);NES<0表示富集于列表下部(倾向于下调);NES≈0表示无显著富集。",
|
||
"$indir/06_GSEA_dir/*/KEGG/plot/*.png","$indir/06_GSEA_dir",".GseaVis.png","");
|
||
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"KEGG-GSEA分析结果文件夹路径:complete_dir/06_GSEA_dir/*/KEGG",
|
||
'path'=>"../06_GSEA_dir",'type'=>"xls");
|
||
}else{
|
||
$writer->emptyTag('p','desc'=>"差异代谢物 GSEA 分析未富集到显著的代谢通路。",'type'=>"type1");
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#---------------------------- 附录 ------------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
$writer->emptyTag('h1','name'=>"4 附录",'type'=>'type1','desc'=>"");
|
||
|
||
$writer->emptyTag('h2','name'=>"4.1 目录结构",'type'=>'type1','desc'=>"4.1 目录结构");
|
||
if ($repeat eq "Y") {
|
||
$writer->emptyTag('pre','name'=>"",'desc'=>"$Bin/result_tree.list",'type'=>"type1");
|
||
}else{
|
||
$writer->emptyTag('pre','name'=>"",'desc'=>"$Bin/result_tree_norep.list",'type'=>"type1");
|
||
|
||
}
|
||
|
||
|
||
$writer->emptyTag('h2','name'=>"4.2 软件使用列表",'type'=>'type1','desc'=>"4.2 软件使用列表");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 软件汇总表",'path'=>"$indir/html_report/template/daixie-software.txt");
|
||
$tid++;
|
||
|
||
$writer->emptyTag('h2','name'=>"4.3 数据库汇总表",'type'=>'type1','desc'=>"4.3 数据库汇总表");
|
||
$writer->emptyTag('table','desc'=>"",'type'=>"0",'name'=>"表$tid. 数据库汇总表",'path'=>"$indir/html_report/template/daixie-database.txt");
|
||
$tid++;
|
||
|
||
$writer->emptyTag('h2','name'=>"4.4 缩略词表",'type'=>'type1','desc'=>"4.4 缩略词表");
|
||
$writer->emptyTag('table','desc'=>"注:缩略词表按照英文首字母排序。",'type'=>"0",'name'=>"",'path'=>"$indir/html_report/template/LC_abbreviated_table.txt");
|
||
|
||
$writer->emptyTag('h2','name'=>"4.5 参考文献",'type'=>'type1','desc'=>"4.5 参考文献");
|
||
|
||
|
||
|
||
#--------------------------------------------------------------------------------------;
|
||
#----------------------------- 5 : 参考文献 -------------------------------------------;
|
||
#--------------------------------------------------------------------------------------;
|
||
$writer->startTag('ref_list',desc=>"参考文献",type=>"type1",name=>"参考文献");
|
||
&reference ($writer,[
|
||
["THEODORIDIS G, GIKA H G, WILSON I D. LC-MS-based methodology for global metabolite profiling in metabonomics/metabolomics[J]. TrAC Trends in Analytical Chemistry, 2008, 27(3): 251-60.","https://www.sciencedirect.com/science/article/abs/pii/S0165993608000095","1"],
|
||
["ZHOU B, XIAO J F, TULI L, et al. LC-MS-based metabolomics[J]. Molecular BioSystems, 2012, 8(2): 470-81.","https://pubs.rsc.org/en/content/articlelanding/2012/MB/C1MB05350G#!divAbstract","2"],
|
||
["HAIR J F. Multivariate data analysis 7th Edition. 2010.","http://ishare.iask.sina.com.cn/f/ovKhWidcOa.html","3"],
|
||
["DUNN W B, BROADHURST D, BEGLEY P, et al. Procedures for large-scale metabolic profiling of serum and plasma using gas chromatography and liquid chromatography coupled to mass spectrometry[J]. Nature protocols, 2011, 6(7): 1060-83.","https://www.nature.com/articles/nprot.2011.335","4"],
|
||
["Kieffer D A, Piccolo B D, Vaziri N D, et al. Resistant starch alters gut microbiome and metabolomic profiles concurrent with amelioration of chronic kidney disease in rats[J]. Am J Physiol Renal Physiol, 2016, 310(9):F857-71.","https://journals.physiology.org/doi/full/10.1152/ajprenal.00513.2015","5"],
|
||
["JOLLIFFE I. Principal component analysis[M]. Wiley Online Library, 2002.","https://baike.baidu.com/item/%E4%B8%BB%E6%88%90%E5%88%86%E5%88%86%E6%9E%90/829840?fromtitle=principal%20component%20analysis&fromid=11190368&fr=aladdin","6"],
|
||
["Boulesteix, AL & Strimmer, K Partial least squares: a versatile tool for the analysis of high-dimensionalgenomic data. Briefings in bioinformatics. 8,32-44 (2007)","7"],
|
||
["TRYGG J, WOLD S. Orthogonal projections to latent structures(OPLS)[J]. Journal of chemometrics, 2002, 162020/10/203): 119-28.","https://doi.org/10.1002/cem.1071","8"],
|
||
["SACCENTI E, HOEFSLOOT H C, SMILDE A K, et al. Reflections on univariate and multivariate analysis of metabolomics data[J]. Metabolomics, 2014, 10(3): 361-374.","https://doi.org/10.1007/s11306-013-0598-6","9"],
|
||
["JEANMOUGIN M, DE REYNIES A, MARISA L, et al. Should We Abandon the t-Test in the Analysis of Gene Expression Microarray Data: A Comparison of Variance Modeling Strategies[J]. PLoS ONE, 2010, 5(9): e12336.","https://doi.org/10.1371/journal.pone.0012336","10"],
|
||
["SMYTH G K. Linear Models and Empirical Bayes Methods for Assessing Differential Expression in Microarray Experiments[J]. Statistical Applications in Genetics and Molecular Biology, 2004, 3(1): Article 3.","https://doi.org/10.2202/1544-6115.1027","11"],
|
||
["KAMMERS K, COLE R N, TIENGWE C, et al. Detecting Significant Changes in Protein Abundance[J]. EuPA Open Proteomics, 2015, 7: 11-19.","https://doi.org/10.1016/j.euprot.2015.02.002","12"],
|
||
["TING L, COWLEY M J, HOON S L, et al. Normalization and Statistical Analysis of Quantitative Proteomics Data Generated by Metabolic Labeling[J]. Molecular & Cellular Proteomics, 2009, 8(10): 2227-2242.","https://doi.org/10.1074/mcp.M800462-MCP200","13"],
|
||
["VERDOODT F, BHATTI S F M, MOLINA J, et al. Plasma metabolome reveals altered oxidative stress, inflammation, and amino acid metabolism in dogs with idiopathic epilepsy[J]. Epilepsia, 2025, 66: 1315-1328.","https://doi.org/10.1111/epi.18256","14"],
|
||
["ANTONELLI J, CLAGGETT B L, HENGLIN M, et al. Statistical Workflow for Feature Selection in Human Metabolomics Data[J]. Metabolites, 2019, 9(7): 143.","https://doi.org/10.3390/metabo9070143","15"],
|
||
["VINAIXA M, SAMINO S, SAEZ I, et al. A Guideline to Univariate Statistical Analysis for LC/MS-Based Untargeted Metabolomics-Derived Data[J]. Metabolites, 2012, 2(4):775-795.","https://doi.org/10.3390/metabo2040775","16"],
|
||
["WANG J, FENG Y, XU S, et al. Non-targeted LC-MS metabolomics reveals serum metabolites for high-altitude adaptation in Tibetan donkeys[J]. Scientific Reports, 2025,15(1): 46.","https://doi.org/10.1038/s41598-024-83544-8","17"],
|
||
["KOLDE R. Pheatmap: pretty heatmaps [J]. R package version, 2012, 61","https://doi.org/10.1371/annotation/622db7c2-5ca8-402a-b4f2-666dc31172ab","18"],
|
||
["KANEHISA M, GOTO S. KEGG: kyoto enCKclopedia of genes and genomes[J]. Nucleic Acids Res, 2000, 28(1): 27-30.","https://www.genome.jp/kegg/","19"],
|
||
["KANEHISA M, SATO Y, KAWASHIMA M, et al. KEGG as a reference resource for gene and protein annotation[J]. Nucleic acids research, 2015, gkv1070.","https://doi.org/10.1093/nar/gkv1070","20"],
|
||
["XIA J, SINELNIKOV I V, HAN B, et al. MetaboAnalyst 3.0—making metabolomics more meaningful[J]. Nucleic acids research, 2015, 43(W1): W251-W7.","https://doi.org/10.1016/j.chom.2015.01.011","21"]
|
||
]);
|
||
$writer->endTag('ref_list');
|
||
|
||
$writer->endTag('report1');
|
||
|
||
|
||
##------------ 帮助文档----------------
|
||
#$writer->startTag('report2');
|
||
#
|
||
#$writer->emptyTag('h2','name'=>"3.3 代谢物全注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
#$writer->emptyTag('p','desc'=>"使用自行开发的Perl程序,基于HMDB数据库(V5.0)和KEGG COMPOUND数据库,对预处理后的代谢物数据进行全面注释。",'type'=>"type1");
|
||
#
|
||
#$writer->emptyTag('h2','name'=>"3.7 多元统计分析",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
#$writer->emptyTag('p','desc'=>"使用R(v3.6.2)的ropls程序包对预处理的代谢组数据进行多元统计分析。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(1) 对数据进行单位方差标准化(UV scaling)处理。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(2) 主成分分析(PCA),绘制得分散点图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(3) 偏最小二乘判别分析(PLS-DA),绘制得分散点图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(4) 正交偏最小二乘判别分析(OPLS-DA),绘制得分散点图,进行置换检验,并计算变量重要性投影(VIP)值。",'type'=>"type1");
|
||
#
|
||
#$writer->emptyTag('h2','name'=>"3.8 差异代谢物筛选",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
#$writer->emptyTag('p','desc'=>"使用自行开发的Perl程序对各比较组数据进行学生t检验,并计算差异倍数(FC)。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"差异代谢物筛选标准如下:",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"- 有重复样本:P-value<0.05且VIP>1;",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"- 无重复样本:P-value<0.05且FC>1.2或FC<0.83;",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"- 上调标准:有重复时FC>1,无重复时FC>1.2;",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"- 下调标准:有重复时FC<1,无重复时FC<0.83。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(1) 使用R(v3.6.2)的ggplot2程序包绘制火山图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(2) 使用R(v3.6.2)的ggpubr程序包绘制差异代谢物的柱状图与小提琴复合图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(3) 使用R(v3.6.2)的pheatmap程序包,采用层次聚类法绘制差异代谢物聚类热图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(4) 使用R(v3.6.2)的corrplot程序包绘制差异代谢物相关性热图。",'type'=>"type1");
|
||
#
|
||
#$writer->emptyTag('h2','name'=>"3.13 差异代谢物KEGG注释",'type'=>'type1','desc'=>"","quest"=>"1","id"=>"asplic");
|
||
#$writer->emptyTag('p','desc'=>"使用KEGG PATHWAY数据库对差异代谢物进行通路功能分析。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(1) 使用自行开发的Perl程序,基于KEGG PATHWAY数据库对差异代谢物进行通路注释,并绘制相应通路图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(2) 根据注释结果进行通路类型分析并绘制通路类型分析图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(3) 结合所有物质与差异物质的注释结果进行通路富集分析,并绘制富集气泡图。",'type'=>"type1");
|
||
#$writer->emptyTag('p','desc'=>"(4) 富集因子计算方法:(该通路上的差异代谢物数目/该通路上的总代谢物数目)/(差异代谢物总数/总代谢物数目)。",'type'=>"type1");
|
||
#
|
||
#
|
||
#
|
||
#$writer->endTag('report2');
|
||
|
||
|
||
$writer->endTag('report');
|
||
open OUT,">:utf8", "$outfile";
|
||
my $xmlstr=&decorate($writer->to_string);
|
||
$xmlstr = Encode::decode("utf-8", $xmlstr);
|
||
print OUT $xmlstr;
|
||
close(OUT);
|
||
$writer->end();
|
||
|
||
#if(-d "$indir" && $logo eq "Y" ){
|
||
# print "python $Bin/htmlConvert/xml2HtmlConverter_v1.py -i $outfile -o $indir/html_report\n";
|
||
# system("python $Bin/htmlConvert/xml2HtmlConverter_v1.py -i $outfile -o $indir/html_report -n report");
|
||
#}elsif (-d "$indir" && $logo eq "N" ) {
|
||
# print "python $Bin/htmlConvert/xml2HtmlConverter_v2_nologo.py -i $outfile -o $indir/html_report\n";
|
||
# system("python $Bin/htmlConvert/xml2HtmlConverter_v2_nologo.py -i $outfile -o $indir/html_report -n report");
|
||
#
|
||
#}
|
||
|
||
if(-d "$indir" && $logo eq "Y" ){
|
||
system("/share/nas2/soft/biosoft/Miniconda3/v23.5.2/miniconda3/bin/python $Bin/htmlConvert/xml2HtmlConverter.py -i $outfile -o $indir/html_report -n report");
|
||
}elsif (-d "$indir" && $logo eq "N" ) {
|
||
system("/share/nas2/soft/biosoft/Miniconda3/v23.5.2/miniconda3/bin/python $Bin/htmlConvert/xml2HtmlConverter_nologo.py -i $outfile -o $indir/html_report -n report");
|
||
}
|
||
|
||
|
||
|
||
|
||
`rm $outfile`;
|
||
`rm -rf $indir/html_report/template `;
|
||
`rm -rf $indir/html_report/src `;
|
||
|
||
#######################################################################################
|
||
print STDOUT "\nDone. Total elapsed time : ",time()-$BEGIN_TIME,"s\n";
|
||
#######################################################################################
|
||
|
||
# -------------------------------------------------------------------------------------
|
||
sub extractInfo{
|
||
open (IN,"$config{group_file}") or die $!;
|
||
my @group;
|
||
my %gnum;
|
||
while (<IN>) {
|
||
chomp;
|
||
next if (/^$/);
|
||
my %seen;
|
||
my ($sam,$gro)=split/\t/;
|
||
$sample_nums++;
|
||
push (@group,$gro);
|
||
my @unique = grep { !$seen{$_}++ } @group;
|
||
$group_nums=scalar(@unique);
|
||
$group_name=join (" ,",@unique);
|
||
$gnum{$gro}++;
|
||
}
|
||
close (IN);
|
||
|
||
open (OUT,">","$indir/html_report/template/Sample_detail_table.txt") or die $!;
|
||
print OUT "Group_name\tSample_num\n";
|
||
foreach my $group (sort {$a cmp $b} keys %gnum) {
|
||
print OUT "$group\t$gnum{$group}\n";
|
||
}
|
||
close (OUT);
|
||
|
||
open (IN,"$config{vs_file}") or die $!;
|
||
my @vs;
|
||
while (<IN>) {
|
||
chomp;
|
||
next if (/^$/);
|
||
$vs_nums++;
|
||
my ($v1,$v2)=split/\t/;
|
||
my $vs="$v1\_vs\_$v2";
|
||
push (@vs,$vs);
|
||
$vs_name=join (" ,",@vs);
|
||
$vs1=$vs[0];
|
||
}
|
||
|
||
|
||
open (IN,"$config{mean_file}") or die $!;
|
||
while (<IN>) {
|
||
chomp;
|
||
next if (/\b\Q$IS1\E\b/) ;
|
||
next if (/^$/);
|
||
$Left_peak_nums++;
|
||
}
|
||
close(IN);
|
||
|
||
# open (IN,"$config{mean_file2}") or die $!;
|
||
# while (<IN>) {
|
||
# chomp;
|
||
# next if (/$IS2/) ;
|
||
# next if (/^$/);
|
||
# $Right_peak_nums++;
|
||
# }
|
||
# close(IN);
|
||
|
||
if (-e "$indir/$vs1/Statistical_Analysis/CV-ANOVA.txt") {
|
||
open (IN,"$indir/$vs1/Statistical_Analysis/CV-ANOVA.txt") or die $!;
|
||
open (OUT,">","$indir/html_report/template/CV-ANOVA.txt") or die $!;
|
||
while (<IN>) {
|
||
chomp;
|
||
my @infos=split/\t/;
|
||
my @info;
|
||
for (my $i=0;$i<@infos ;$i++) {
|
||
if ($infos[$i] eq "" || $infos[$i] eq " ") {
|
||
$infos[$i]="--";
|
||
push (@info,"$infos[$i]");
|
||
}else{
|
||
push (@info,$infos[$i]);
|
||
}
|
||
}
|
||
print OUT join ("\t",@infos),"\n";
|
||
}
|
||
close(OUT);
|
||
close(IN);
|
||
}
|
||
|
||
my $all_metabolite_File="$indir/02_all_samples/all_metabolite_mapping.txt";
|
||
my $template_all_metabolite_File="$indir/html_report/template/all_metabolite_mapping3L.txt";
|
||
open (IN,"<","$all_metabolite_File") or die $!;
|
||
open (OUT,">","$template_all_metabolite_File") or die $!;
|
||
my $n=0;
|
||
while (<IN>) {
|
||
chomp;
|
||
next if (/^$/);
|
||
my @info=split/\t/;
|
||
next if (scalar(@info) eq 1);
|
||
print OUT "$_\n";
|
||
last if ($n > 19);
|
||
$n++;
|
||
}
|
||
close(IN);
|
||
close(OUT);
|
||
|
||
mkdir "$indir/html_report/template/pca_analysis" unless (-d "$indir/html_report/template/pca_analysis") ;
|
||
my $pca_File="$indir/02_all_samples/All_Sample_PCA.png";
|
||
my $template_pca_File="$indir/html_report/template/pca_analysis";
|
||
`cp $pca_File $template_pca_File/all_samples_pca.png`;
|
||
# `cp $pca_File $indir/QC_analysis/qc_pca.png`;
|
||
|
||
my $vspca_File="$indir/$vs1/Statistical_Analysis/PCA_Score.png";
|
||
my $vspls_File="$indir/$vs1/Statistical_Analysis/PLS-DA_Score.png";
|
||
my $vsopls_File="$indir/$vs1/Statistical_Analysis/OPLS-DA_Score.png";
|
||
#Loading Scatter Plot [M1].png
|
||
my $vsloading_File="$indir/$vs1/Statistical_Analysis/OPLS-DA_Loading.png";
|
||
|
||
my @picsa = glob "$indir/$vs1/Statistical_Analysis/*.png";
|
||
if (scalar(@picsa) != 0) {
|
||
`cp $vspca_File $template_pca_File/$vs1\_pca.png`;
|
||
`cp $vspls_File $template_pca_File/$vs1\_pls.png`;
|
||
`cp $vsopls_File $template_pca_File/$vs1\_opls.png`;
|
||
`cp $vsloading_File $template_pca_File/$vs1\_loading.png`;
|
||
}
|
||
|
||
#KEGG-GSEA分析
|
||
# my @gseakegg_table =glob ("$indir/06_GSEA_dir/*/KEGG/KEGG_GSEA.xls");
|
||
# if (scalar(@gseakegg_table) != 0) {
|
||
# # 获取第一个文件的完整路径
|
||
# my $file_path = $gseakegg_table[-4];
|
||
# # 拆分路径
|
||
# my @dirs = split('/', $file_path);
|
||
# # 假设路径结构:... /06_GSEA_dir/对比组名/KEGG/KEGG_GSEA.xls
|
||
# # 对比组名位于倒数第三位
|
||
# $gseakegg_vs = $dirs[-3]; # 索引-3表示倒数第三个
|
||
my $gseakegg_table_file="$indir/06_GSEA_dir/$vs1/KEGG/KEGG_GSEA.xls";
|
||
my $template_gseakegg_File="$indir/html_report/template/KEGG_GSEA.xls";
|
||
&HeadN20($gseakegg_table_file,$template_gseakegg_File);
|
||
# }
|
||
|
||
mkdir "$indir/html_report/template/diff_analysis" unless (-d "$indir/html_report/template/diff_analysis") ;
|
||
&Cnv("$indir/$vs1/Statistical_Analysis/Differentially_Expressed_Metabolites.xls","$indir/html_report/template/diff_analysis/Diff.xls");
|
||
|
||
mkdir "$indir/html_report/template/KEGG_Analysis" unless (-d "$indir/html_report/template/KEGG_Analysis") ;
|
||
&Cnv("$indir/$vs1/KEGG_Analysis/KEGG_Pathway.xls","$indir/html_report/template/KEGG_Analysis/KEGG_Pathway.xls");
|
||
&Cnv("$indir/$vs1/KEGG_Analysis/Metabolite_Mapping.xls","$indir/html_report/template/KEGG_Analysis/Mapping.xls");
|
||
|
||
mkdir "$indir/html_report/template/Pathway_Analysis" unless (-d "$indir/html_report/template/Pathway_Analysis") ;
|
||
&Cnv("$indir/$vs1/Pathway_Analysis/Pathway_Analysis.xls","$indir/html_report/template/Pathway_Analysis/Pathway.xls");
|
||
|
||
if ($repeat eq "Y") {
|
||
#呈现最显著的5个代谢物
|
||
($diff_meta1, $diff_meta2, $diff_meta3, $diff_meta4, $diff_meta5) = extract_top5_metabolites("$indir/$vs1/Statistical_Analysis/stat.diff.txt");
|
||
}
|
||
#上升下降最显著的差异代谢物
|
||
($max_meta, $min_meta) = find_extreme_metabolites("$indir/$vs1/Statistical_Analysis/stat.diff.txt");
|
||
|
||
# 调用子程序获取对应的上升下降最显著的差异代谢物id
|
||
$max_meta_numid = get_meta_numid("$indir/$vs1/Statistical_Analysis/mean.txt", $max_meta);
|
||
$min_meta_numid = get_meta_numid("$indir/$vs1/Statistical_Analysis/mean.txt", $min_meta);
|
||
|
||
# 调用子程序,最显著的通路
|
||
$pathway_pvalue_min = get_first_pathway("$indir/$vs1/Statistical_Analysis/pathway.txt");
|
||
|
||
$pathway_pvalue_min = "未检出显著通路" if !defined $pathway_pvalue_min || $pathway_pvalue_min eq "";
|
||
|
||
|
||
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls") {
|
||
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_metabolite_report_top30.xls","$indir/html_report/template/key_pathway_metabolite_report_top30.xls");
|
||
}
|
||
|
||
if (-e "$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls") {
|
||
&HeadN30("$indir/09_Key_Pathway_Heatmap/key_pathway_summary.xls","$indir/html_report/template/key_pathway_summary.xls");
|
||
}
|
||
|
||
if (-e "complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls") {
|
||
&HeadN30("complete_dir/08_Common_KEGG_Enrichment/common_kegg_pathway_enrichment.xls","$indir/html_report/template/common_kegg_pathway_enrichment.xls");
|
||
}
|
||
|
||
}
|
||
|
||
sub Cnv {
|
||
my ($infile,$outfile)=@_;
|
||
open (OUT ,">","$outfile") or die $!;
|
||
|
||
my $spreadsheet=ReadData($infile) or die $!;#读excle表
|
||
my $sheet_count=$spreadsheet->[0]{sheets} or die $!;#查有几个sheet表
|
||
|
||
my $n=0;
|
||
for my $i ($sheet_count) {
|
||
my $sheet = $spreadsheet->[$i];
|
||
printf("%s - %2d: [%-s] %3d Cols, %5d Rows\n",$infile,$i,$sheet->{label},$sheet->{maxcol},$sheet->{maxrow});#label:sheet名;maxcol:最大列数;maxrow:最大行数
|
||
for (my $i=1;$i<$sheet->{maxrow}+1;$i++) {
|
||
my @info;
|
||
for (my $j=1;$j<$sheet->{maxcol}+1;$j++) {
|
||
push @info,$sheet->{cell}[$j][$i];
|
||
# push (@info,map {
|
||
# my $data = $sheet->{cell}[$_][$j];
|
||
# defined $data ? $data : "-";
|
||
# }1..$sheet->{maxcol});
|
||
# print join ("\t",@info),"##\n";
|
||
}
|
||
if ($info[1] eq "") {
|
||
next;
|
||
}
|
||
print OUT join ("\t",@info),"\n";
|
||
last if ($n > 19) ;
|
||
$n++;
|
||
}
|
||
}
|
||
close (OUT);
|
||
}
|
||
|
||
sub CP {
|
||
my ($inFile,$outDir)=@_;
|
||
system("cp $inFile $outDir");
|
||
}
|
||
|
||
sub HeadN3{
|
||
my ($infile,$outfile)=@_;
|
||
if (-e $infile) {
|
||
system("head -3 $infile > $outfile");
|
||
}
|
||
}
|
||
sub HeadN20{
|
||
my ($infile,$outfile)=@_;
|
||
if (-e $infile) {
|
||
system("head -20 $infile > $outfile");
|
||
}
|
||
}
|
||
sub HeadN30{
|
||
my ($infile,$outfile)=@_;
|
||
if (-e $infile) {
|
||
system("head -30 $infile > $outfile");
|
||
}
|
||
}
|
||
# 定义子程序:提取前5个差异代谢物名称
|
||
sub extract_top5_metabolites {
|
||
my ($file) = @_;
|
||
my @metabolites;
|
||
|
||
open(my $fh, '<', $file) or die "无法打开文件 '$file': $!";
|
||
while (my $line = <$fh>) {
|
||
next if $. == 1; # 跳过标题行(第一行)
|
||
chomp $line;
|
||
my ($metabolite) = split /\t/, $line; # 按制表符分割,提取第一列
|
||
push @metabolites, $metabolite;
|
||
last if @metabolites >= 5; # 取满5个后退出循环
|
||
}
|
||
close $fh;
|
||
|
||
return @metabolites[0..4]; # 返回前5个代谢物名称
|
||
}
|
||
|
||
# 定义子程序:提取LOG_FoldChange的最大/最小值对应的代谢物名称
|
||
sub find_extreme_metabolites {
|
||
my ($file) = @_;
|
||
|
||
my $first_line = 1; # 标记是否为标题行
|
||
|
||
open(my $fh, '<', $file) or die "无法打开文件 '$file': $!";
|
||
while (my $line = <$fh>) {
|
||
next if $line =~ /^Peak/; # 跳过标题行
|
||
chomp $line;
|
||
my @cols = split /\t/, $line;
|
||
|
||
# 提取代谢物名称和LOG_FoldChange值
|
||
my $metabolite = $cols[0];
|
||
my $log2fc = $cols[-3]; # 倒数第3列是LOG_FoldChange
|
||
|
||
# 跳过无效值(如NA或非数字)
|
||
next unless $log2fc =~ /^-?\d+\.?\d*$/;
|
||
|
||
# 初始化或比较
|
||
if (!defined $max_log2fc || $log2fc > $max_log2fc) {
|
||
$max_log2fc = $log2fc;
|
||
$max_meta = $metabolite;
|
||
}
|
||
if (!defined $min_log2fc || $log2fc < $min_log2fc) {
|
||
$min_log2fc = $log2fc;
|
||
$min_meta = $metabolite;
|
||
}
|
||
}
|
||
close $fh;
|
||
|
||
return ($max_meta, $min_meta);
|
||
}
|
||
|
||
# 定义子程序:根据代谢物名称提取对应的id
|
||
sub get_meta_numid {
|
||
my ($file, $target_meta) = @_;
|
||
my $numid;
|
||
|
||
open(my $fh, '<', $file) or die "无法打开文件 '$file': $!";
|
||
while (my $line = <$fh>) {
|
||
chomp $line;
|
||
my @cols = split /\t/, $line;
|
||
|
||
# 跳过标题行(假设第一行是标题)
|
||
next if $. == 1 && $cols[0] eq 'id' && $cols[1] eq 'Peak';
|
||
|
||
# 匹配目标代谢物名称(不区分大小写)
|
||
if (defined $cols[1] && $cols[1] eq $target_meta) {
|
||
$numid = $cols[0]; # 提取第一列的id
|
||
last; # 找到后立即退出循环
|
||
}
|
||
}
|
||
close $fh;
|
||
|
||
return $numid;
|
||
}
|
||
|
||
|
||
# 定义子程序:提取第一行数据的第一列(跳过标题行)
|
||
#sub get_first_pathway {
|
||
# my ($file) = @_;
|
||
# open(my $fh, '<', $file) or die "无法打开文件 '$file': $!";
|
||
#
|
||
# while (my $line = <$fh>) {
|
||
# next if $. == 1; # 跳过标题行
|
||
# chomp $line;
|
||
# my ($pathway) = split /\t/, $line; # 分割第一列
|
||
# close $fh;
|
||
# return $pathway; # 返回后立即退出
|
||
# }
|
||
#
|
||
# close $fh;
|
||
#}
|
||
|
||
sub get_first_pathway {
|
||
my ($file) = @_;
|
||
open(my $fh, '<', $file) or die "无法打开文件 '$file': $!";
|
||
|
||
my $is_header = 1;
|
||
while (my $line = <$fh>) {
|
||
if ($is_header) { # 跳过标题行
|
||
$is_header = 0;
|
||
next;
|
||
}
|
||
chomp $line;
|
||
next if $line =~ /^\s*$/;
|
||
my @cols = split /\t/, $line;
|
||
my $pathway = $cols[1];
|
||
close $fh;
|
||
return defined $pathway ? $pathway : ""; # 返回后立即退出
|
||
}
|
||
|
||
close $fh;
|
||
return "";
|
||
}
|
||
|
||
|
||
|
||
|
||
sub HeadN8 {
|
||
my ($inFile,$outFile)=@_;
|
||
if (-e $inFile) {
|
||
system("head -8 $inFile > $outFile");
|
||
}
|
||
}
|
||
sub HeadN10 {
|
||
my ($inFile,$outFile)=@_;
|
||
if (-e $inFile) {
|
||
system("head -10 $inFile > $outFile");
|
||
}
|
||
}
|
||
sub decorate{
|
||
my $xmlstr=shift;
|
||
$xmlstr=~s/(<[^>]*)\>/$1\>\n/mgo;
|
||
return $xmlstr;
|
||
}
|
||
|
||
sub reference {
|
||
my ($writer,$list)=@_;
|
||
my ($data,$value,$unit,$item);
|
||
foreach $data (@$list) {
|
||
$writer->emptyTag('ref','name'=>$$data[0],'link'=>$$data[1],'id'=>$$data[2]);
|
||
#$writer->characters($$data[2]);
|
||
}
|
||
}
|
||
|
||
|
||
sub piclist{
|
||
#&piclist("碱基测序错误率分布图","注:横坐标为Reads的碱基位置,纵坐标为单碱基错误率。","$indir/01.dataQC/*/*.quality.png","$indir/01.dataQC","quality.png");
|
||
#传5个参数,分别为图的名称,备注,图的路径,路径,图的后缀,图的简要描述信息,5个信息。
|
||
my ($name,$desc,$pics,$path,$suffix,$sdesc)=@_;
|
||
$sdesc||="";
|
||
$writer->startTag('pic_list','name'=>"图$pid. $name",'desc'=>"$desc",'type'=>"type1");
|
||
$pid++;
|
||
my @images=glob("$pics");
|
||
my $images_nums=scalar (@images);
|
||
if ($images_nums <= 20) {
|
||
foreach my $s (@images){
|
||
my $base=basename $s;
|
||
my $dir=dirname $s;
|
||
# my $tmp=(split(/$path/,$dir))[1];
|
||
my $sampleID=$base;
|
||
$sampleID=~s/$suffix$//g;
|
||
|
||
#print $tmp,"\n";
|
||
# my $new =$path.$tmp;
|
||
$writer->emptyTag('pic','desc'=>"$sampleID$sdesc",'name'=>"$base",'type'=>"type1",'path'=>"$dir/$base");
|
||
}
|
||
$writer->endTag('pic_list');
|
||
}else{
|
||
foreach my $s (@images[0..19]){
|
||
my $base=basename $s;
|
||
my $dir=dirname $s;
|
||
# my $tmp=(split(/$path/,$dir))[1];
|
||
my $sampleID=$base;
|
||
$sampleID=~s/$suffix$//g;
|
||
#print $sampleID,"\n";
|
||
# my $new =$path.$tmp;
|
||
$writer->emptyTag('pic','desc'=>"$sampleID$sdesc",'name'=>"$base",'type'=>"type1",'path'=>"$dir/$base");
|
||
}
|
||
$writer->endTag('pic_list');
|
||
}
|
||
}
|
||
|
||
|
||
|
||
sub filelist{
|
||
my ($desc,$tabs,$path)=@_;
|
||
$writer->startTag('file_list','name'=>"",'desc'=>"$desc",'type'=>"xls");
|
||
my @files=glob("$tabs");
|
||
foreach my $s(@files){
|
||
my $base=basename $s;
|
||
$writer->emptyTag('file','desc'=>"",'name'=>"$base",'action'=>"xls",'path'=>"$path/$base",'type'=>"xls");
|
||
}
|
||
$writer->endTag('file_list');
|
||
}
|
||
|
||
sub gaintime{
|
||
my $timestamp=time();
|
||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($timestamp);
|
||
my $y = $year + 1900;
|
||
my $m = $mon + 1;
|
||
$timestamp=sprintf("%4d-%02d-%02d",$y,$m,$mday);
|
||
return $timestamp
|
||
}
|
||
|
||
sub readConfig{
|
||
my $configFile=shift;
|
||
my $d=Config::General->new(-ConfigFile => "$configFile");
|
||
my %config=$d->getall;
|
||
return %config;
|
||
}
|
||
|
||
sub run_or_die{
|
||
my ($cmd) = @_ ;
|
||
&show_log($cmd);
|
||
my $flag = system($cmd) ;
|
||
if ($flag != 0){
|
||
&show_log("Error: command fail: $cmd");
|
||
exit(1);
|
||
}
|
||
&show_log("done.");
|
||
return ;
|
||
}
|
||
|
||
sub show_log{
|
||
my ($txt) = @_ ;
|
||
my $time = time();
|
||
my($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = localtime($time);
|
||
$wday = $yday = $isdst = 0;
|
||
my $Time=sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year+1900, $mon+1, $day, $hour, $min, $sec);
|
||
print "$Time:\t$txt\n" ;
|
||
}
|
||
|
||
sub Integer_Three_Digit{#
|
||
my $interger = shift;
|
||
$interger=~s/(?<=\d)(?=(\d\d\d)+$)/,/g;
|
||
return $interger;
|
||
}
|
||
sub format_figure{#
|
||
my $figure = shift;
|
||
if (!defined $figure) {
|
||
die;
|
||
}
|
||
if ($figure=~/\./) {
|
||
if ($figure == 100) {
|
||
$figure = 100;
|
||
} else {
|
||
$figure = sprintf("%.2f",$figure);
|
||
}
|
||
}else{
|
||
$figure = Integer_Three_Digit($figure);
|
||
}
|
||
return $figure;
|
||
}
|
||
|
||
sub GetDate {
|
||
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst)=localtime(time());
|
||
return sprintf("%4d年%02d月%02d日", $year+1900, $mon+1, $day);
|
||
}
|
||
|
||
sub GetTime {
|
||
my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst)=localtime(time());
|
||
return sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year+1900, $mon+1, $day, $hour, $min, $sec);
|
||
}
|
||
|
||
sub USAGE {#
|
||
my $usage=<<"USAGE";
|
||
Program : $Script
|
||
Version : $version
|
||
Contact : fangbj <fangbj\@genepioneer.com>
|
||
Description:web html report
|
||
|
||
Usage:
|
||
-id <path> input images path, forced
|
||
-cfg <file> detail_cfg
|
||
-r <repeat> Y or N ,default Y
|
||
-logo <logo> Y or N ,default Y
|
||
-is1 <ISname> POSexample:IS2 (optionals)
|
||
-is2 <ISname> NEGexample:IS1 (optionals)
|
||
-h Help
|
||
|
||
Example:
|
||
perl $Script -id indir -cfg detail_cfg -r Y -logo Y -is1 IS1 -is2 IS2
|
||
|
||
USAGE
|
||
print $usage;
|
||
exit;
|
||
} |