Specify signatures against specific tissues or cell lines by removing genes with high expression in the background.

remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for matrix,matrix,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for DGEList,matrix,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,DGEList,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,ExpressionSet,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,SummarizedExperiment,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,Seurat,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,character,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,missing,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

# S4 method for ANY,ANY,vector
remove_bg_exp(
  sig_data,
  bg_data = "CCLE",
  markers,
  s_group_col = NULL,
  s_target_group = NULL,
  b_group_col = NULL,
  b_target_group = NULL,
  snr = 1,
  ...,
  filter = NULL,
  gene_id = "SYMBOL",
  s_slot = "counts",
  b_slot = "counts",
  ccle_tpm = NULL,
  ccle_meta = NULL
)

Arguments

sig_data

log-transformed expression object, can be matrix or DGEList, as signal data

bg_data

'CCLE' or log-transformed expression object as background data

markers

vector, a vector of gene names, listed the gene symbols to be filtered. Must be gene SYMBOLs

s_group_col

vector or character, to specify the group of signal target_groups, or column name of group, default NULL

s_target_group

pattern, specify the target group of interest in sig_data, default NULL

b_group_col

vector or character, to specify the group of background target_groups, or column name of depmap::depmap_metadata(), e.g. 'primary_disease', default NULL

b_target_group

pattern, specify the target_group of interest in bg_data, e.g. 'colorectal', default NULL

snr

num, the cutoff of SNR to screen markers which are not or lowly expressed in bg_data

...

params for grep() to find matched cell lines in bg_data

filter

NULL or a vector of 2 num, filter condition to remove low expression genes in bg_data, the 1st for logcounts, the 2nd for samples size

gene_id

character, specify the gene ID type of rownames of expression data, could be one of 'ENSEMBL', 'SYMBOL', 'ENTREZ'..., default 'SYMBOL'

s_slot

character, specify which slot to use of DGEList, sce or seurat object for sig_data, optional, default 'counts'

b_slot

character, specify which slot to use of DGEList, sce or seurat object for bg_data, optional, default 'counts'

ccle_tpm

ccle_tpm data from depmap::depmap_TPM(), only used when data = 'CCLE', default NULL

ccle_meta

ccle_meta data from depmap::depmap_metadata(), only used when data = 'CCLE', default NULL

Value

a vector of genes after filtration

Examples

data("im_data_6", "nk_markers", "ccle_crc_5")
remove_bg_exp(
  sig_data = Biobase::exprs(im_data_6),
  bg_data = ccle_crc_5,
  im_data_6$`celltype:ch1`, "NK", ## for sig_data
  "cancer", "CRC", ## for bg_data
  markers = nk_markers$HGNC_Symbol[40:50],
  filter = c(1, 2),
  gene_id = c("ENSEMBL", "SYMBOL")
)
#> 'select()' returned 1:many mapping between keys and columns
#>  [1] "GZMB"    "IL15"    "GZMH"    "GZMK"    "GZMM"    "IFNG"    "IL12RB2"
#>  [8] "IL18R1"  "IL18RAP" "IL2RB"   "KIR2DL1"