process data — process

filter low expression genes, normalize data by 'TMM' and apply limma::voom(), limma::lmFit() and limma::treat() on normalized data

process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  ...
)

# S4 method for DGEList,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  ...
)

# S4 method for matrix,vector,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for Matrix,vector,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for ExpressionSet,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  batch = NULL,
  ...
)

# S4 method for SummarizedExperiment,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  batch = NULL,
  ...
)

# S4 method for Seurat,character,character
process_data(
  data,
  group_col,
  target_group,
  normalize = TRUE,
  filter = c(10, 10),
  lfc = 0,
  p = 0.05,
  markers = NULL,
  gene_id = "SYMBOL",
  slot = "counts",
  batch = NULL,
  ...
)

Arguments

data: expression object
group_col: character, column name of coldata to specify the DE comparisons
target_group: pattern, specify the group of interest, e.g. NK
normalize: logical, if the expr in data is raw counts needs to be normalized
filter: a vector of 2 numbers, filter condition to remove low expression genes, the 1st for min.counts (if normalize = TRUE) or CPM/TPM (if normalize = FALSE), the 2nd for samples size 'large.n'
lfc: num, cutoff of logFC for DE analysis
p: num, cutoff of p value for DE analysis and permutation test if feature_selection = "rankproduct"
markers: vector, a vector of gene names, listed the gene symbols to be kept anyway after filtration. Default 'NULL' means no special genes need to be kept.
gene_id: character, specify the gene ID target_group of rownames of expression data when markers is not NULL, could be one of 'ENSEMBL', 'SYMBOL', 'ENTREZ'..., default 'SYMBOL'
slot: character, specify which slot to use only for DGEList, sce or seurat object, optional, default 'counts'
...: params for voom_fit_treat()
batch: vector of character, column name(s) of coldata to be treated as batch effect factor, default NULL

Value

A DGEList containing vfit by limma::voom() (if normalize = TRUE) and tfit by limma::treat()

Examples

data("im_data_6")
proc_data <- process_data(
  im_data_6,
  group_col = "celltype:ch1",
  target_group = "NK"
)
#>        NK-Neutrophils NK-Monocytes NK-B.cells NK-CD4 NK-CD8
#> Down             4009         3944       3146   2694   2153
#> NotSig           1476         2678       4405   4985   6183
#> Up               4926         3789       2860   2732   2075