R语言:肿瘤突变负荷分析

发布于:2024-05-14 ⋅ 阅读:(142) ⋅ 点赞:(0)

> merge_maf <- function(metadata, path){
  #通过合并path,还有sample sheet前两列得到每一个文件的完整路径
  filenames <- file.path(path, metadata$file_id, metadata$file_name,
                         fsep = .Platform$file.sep)

  message ('############### Merging maf data ################\n',
           '### This step may take a few minutes ###\n')
    #通过lapply循环去读每一个样本的maf,然后通过rbind合并成矩阵,按行来合并
    #colClasses指定所有列为字符串
    mafMatrix <- do.call("rbind", lapply(filenames, function(fl)
      read.table(gzfile(fl),header=T,sep="\t",quote="",fill=T,colClasses="character")))
    return (mafMatrix)
}
#定义去除重复样本的函数FilterDuplicate
> FilterDuplicate <- function(metadata) {
  filter <- which(duplicated(metadata[,'sample']))
  if (length(filter) != 0) {
    metadata <- metadata[-filter,]
  }
  message (paste('Removed', length(filter), 'samples', sep=' '))
  return (metadata)
}
 

#读入maf的sample sheet文件
> metaMatrix.maf=read.table("maf_sample_sheet.tsv",sep="\t",header=T)
#替换.为下划线,转换成小写,sample_id替换成sample
>names(metaMatrix.maf)=gsub("sample_id","sample",gsub("\\.","_",tolower(names(metaMatrix.maf))))
#删掉最后一列sample_type中的空格
> metaMatrix.maf$sample_type=gsub(" ","",metaMatrix.maf$sample_type)

#删掉重复的样本
> metaMatrix.maf <- FilterDuplicate(metaMatrix.maf)
#调用merge_maf函数合并maf的矩阵
> maf_value=merge_maf(metadata=metaMatrix.maf, 
                     path="maf_data"
                     )

> #查看前三行前十列
> maf_value[1:3,1:10]
  Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type
1       PLOD1           5351  WUGSC     GRCh38       chr1       11957858     11957858      +      Missense_Mutation          SNP
2         IVL           3713  WUGSC     GRCh38       chr1      152910652    152910652      +      Nonsense_Mutation          SNP
3       OBSCN          84033  WUGSC     GRCh38       chr1      228256740    228256740      +      Missense_Mutation          SNP

#保存合并后的maf文件
> write.table(file="combined_maf_value.txt",maf_value,row.names=F,quote=F,sep="\t")

#TMB打分

> BiocManager::install("maftools")
> library(maftools)
> laml <- read.maf(maf = "combined_maf_value.txt")
> tmb_table_wt_log = tmb(maf = laml)

#tmb_table_wt_log = tmb(maf = laml): 这行代码调用了 tmb() 函数,计算了基于 MAF 数据的肿瘤突变负荷(TMB)。参数 maf 接受了之前读取的 laml 数据框作为输入,然后将结果赋值给了 tmb_table_wt_log 变量。

write.table(tmb_table_wt_log,file="TMB_log.txt",sep="\t",row.names=F)

#突变负荷分析

> library(limma)
> library(ggplot2)
#install.packages("ggpubr")
> library(ggpubr)
#install.packages("ggExtra")
> library(ggExtra)

> expFile="geneExp.txt"    
> tmbFile="TMB.txt"     

> rt=read.table(expFile, header=T, sep="\t", check.names=F, row.names=1)
> gene=colnames(rt)[1]


> tumorData=rt[rt$Type=="Tumor",1,drop=F]
> tumorData=as.matrix(tumorData)
> rownames(tumorData)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3\\", rownames(tumorData))
> data=avereps(tumorData)


> tmb=read.table(tmbFile, header=T, sep="\t", check.names=F, row.names=1)
> rownames(tmb)=gsub("(.*?)\\-(.*?)\\-(.*?)\\-.*", "\\1\\-\\2\\-\\3\\", rownames(tmb))
> tmb=avereps(tmb)

> sameSample=intersect(row.names(data), row.names(tmb))
> data=data[sameSample,,drop=F]
> tmb=tmb[sameSample,,drop=F]
> rt=cbind(data, tmb)


> x=as.numeric(rt[,gene])
> y=log2(as.numeric(rt[,"total_perMB_log"])+1)
> df1=as.data.frame(cbind(x,y))
> corT=cor.test(x, y, method="spearman")

> p1=ggplot(df1, aes(x, y)) + 
            xlab(paste0(gene, " expression"))+ylab("Tumor mutation burden")+
            geom_point()+ geom_smooth(method="lm",formula = y ~ x) + theme_bw()+
            stat_cor(method = 'spearman', aes(x =x, y =y))
p1

> p2=ggMarginal(p1, type = "density", xparams = list(fill = "orange"),yparams = list(fill = "blue"))

> p2

学习交流


网站公告

今日签到

点亮在社区的每一天
去签到