filter low expression genes, normalize data by 'TMM' and apply limma::voom(), limma::lmFit() and limma::treat() on normalized data

process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  ...
)

# S4 method for DGEList,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  ...
)

# S4 method for matrix,vector,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for Matrix,vector,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for ExpressionSet,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for SummarizedExperiment,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  batch = NULL,
  ...
)

# S4 method for Seurat,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  batch = NULL,
  ...
)

Arguments

data

expression object

group_col

character, column name of coldata to specify the DE comparisons

target_group

pattern, specify the group of interest, e.g. NK

normalize

logical, if the expr in data is raw counts needs to be normalized

filter

a vector of 2 numbers, filter condition to remove low expression genes, the 1st for min.counts (if normalize = TRUE) or CPM/TPM (if normalize = FALSE), the 2nd for samples size 'large.n'

lfc

num, cutoff of logFC for DE analysis

p

num, cutoff of p value for DE analysis and permutation test if feature_selection = "rankproduct"

markers

vector, a vector of gene names, listed the gene symbols to be kept anyway after filtration. Default 'NULL' means no special genes need to be kept.

gene_id

character, specify the gene ID target_group of rownames of expression data when markers is not NULL, could be one of 'ENSEMBL', 'SYMBOL', 'ENTREZ'..., default 'SYMBOL'

slot

character, specify which slot to use only for DGEList, sce or seurat object, optional, default 'counts'

...

params for voom_fit_treat()

batch

vector of character, column name(s) of coldata to be treated as batch effect factor, default NULL

Value

A DGEList containing vfit by limma::voom() (if normalize = TRUE) and tfit by limma::treat()

Examples

data("im_data_6")
proc_data <- process_data(
  im_data_6,
  group_col = "celltype:ch1",
  target_group = "NK"
)
#>        NK-Neutrophils NK-Monocytes NK-B.cells NK-CD4 NK-CD8
#> Down             4009         3944       3146   2694   2153
#> NotSig           1476         2678       4405   4985   6183
#> Up               4926         3789       2860   2732   2075