Annotate FST/DMR slide windows with genomic features.
Usage
anno_fst_dmr(
gff_file,
format = "auto",
genomic_ranges,
chrom_col = "CHROM",
start_col = "BIN_START",
end_col = "BIN_END",
upstream = 2000,
downstream = 200,
ignore_strand = TRUE,
features = c("promoter", "UTR5", "gene", "exon", "intron", "CDS", "UTR3", "intergenic")
)Arguments
- gff_file
Genomic structural annotation
GFF3/GTFfile path.- format
Format of GFF3/GTF file. ("auto", "gff3", "gtf").
- genomic_ranges
Genomic ranges file (e.g., FST, DMR). (chromosome (prefix: chr), start, end).
- chrom_col
Chromosome column name. (FST: "CHROM", DMR: "chr").
- start_col
Start column name. (FST: "BIN_START", DMR: "start").
- end_col
End column name. (FST: "BIN_END", DMR: "end").
- upstream
Promoter upstream (bp). (2000).
- downstream
Promoter downstream (bp). (200).
- ignore_strand
Whether to ignore strand when computing overlaps. (TRUE).
- features
Which feature types to annotate. c("promoter", "UTR5", "gene", "exon", "intron", "CDS", "UTR3", "intergenic").
Examples
# Example GFF3 file in GAnnoViz
gff_file <- system.file(
"extdata",
"example.gff3.gz",
package = "GAnnoViz")
# Annotate FST
fst_file <- system.file(
"extdata",
"example.fst",
package = "GAnnoViz")
fst <- read.table(
file = fst_file,
header = TRUE,
sep = "\t",
na.strings = NA,
stringsAsFactors = FALSE
)
head(fst)
#> CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST MEAN_FST
#> 1 chr1 1 10000 119 0.034869746 0.027472144
#> 2 chr1 20001 30000 150 -0.009735326 -0.019103882
#> 3 chr1 40001 50000 587 -0.013269573 -0.004886184
#> 4 chr1 60001 70000 256 0.036700847 0.027643440
#> 5 chr1 80001 90000 842 0.020847846 0.011547497
#> 6 chr1 100001 110000 892 0.001750007 0.002121159
res <- anno_fst_dmr(
gff_file = gff_file,
format = "auto",
genomic_ranges = fst_file,
chrom_col = "CHROM",
start_col = "BIN_START",
end_col = "BIN_END",
upstream = 2000,
downstream = 200,
ignore_strand = TRUE,
features = c("promoter", "UTR5", "gene", "exon", "intron", "CDS", "UTR3", "intergenic")
)
#> Import genomic features from the file as a GRanges object ...
#> OK
#> Prepare the 'metadata' data frame ...
#> OK
#> Make the TxDb object ...
#> OK
head(res)
#> CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST MEAN_FST anno_type
#> 1 chr1 1 10000 119 0.034869746 0.027472144 intergenic
#> 2 chr1 20001 30000 150 -0.009735326 -0.019103882 intergenic
#> 3 chr1 40001 50000 587 -0.013269573 -0.004886184 intergenic
#> 4 chr1 60001 70000 256 0.036700847 0.027643440 intergenic
#> 5 chr1 80001 90000 842 0.020847846 0.011547497 intergenic
#> 6 chr1 100001 110000 892 0.001750007 0.002121159 intergenic
#> gene_id
#> 1
#> 2
#> 3
#> 4
#> 5
#> 6
# Annotate DMR
dmr_file <- system.file(
"extdata",
"example.dmr",
package = "GAnnoViz")
dmr <- read.table(
file = dmr_file,
header = TRUE,
sep = "\t",
na.strings = NA,
stringsAsFactors = FALSE
)
head(dmr)
#> chr start end strand pvalue qvalue meth.diff
#> 1 chr1 4930001 4932000 * 1e-07 0.009117 89.850
#> 2 chr1 4936001 4938000 * 1e-08 0.005656 -82.314
#> 3 chr1 5670001 5672000 * 1e-09 0.011313 -69.908
#> 4 chr1 5914001 5916000 * 1e-09 0.001782 -93.310
#> 5 chr1 8576001 8578000 * 1e-08 0.009556 -68.589
#> 6 chr1 9098001 9100000 * 1e-07 0.009733 -68.756
res <- anno_fst_dmr(
gff_file = gff_file,
format = "auto",
genomic_ranges = dmr_file,
chrom_col = "chr",
start_col = "start",
end_col = "end",
upstream = 2000,
downstream = 200,
ignore_strand = TRUE,
features = c("promoter", "UTR5", "gene", "exon", "intron", "CDS", "UTR3", "intergenic")
)
#> Import genomic features from the file as a GRanges object ...
#> OK
#> Prepare the 'metadata' data frame ...
#> OK
#> Make the TxDb object ...
#> OK
head(res)
#> chr start end strand pvalue qvalue meth.diff anno_type
#> 1 chr1 4930001 4932000 * 1e-07 0.009117 89.850 gene,intron
#> 2 chr1 4936001 4938000 * 1e-08 0.005656 -82.314 gene,exon,intron
#> 3 chr1 5670001 5672000 * 1e-09 0.011313 -69.908 gene,intron
#> 4 chr1 5914001 5916000 * 1e-09 0.001782 -93.310 intergenic
#> 5 chr1 8576001 8578000 * 1e-08 0.009556 -68.589 gene,intron
#> 6 chr1 9098001 9100000 * 1e-07 0.009733 -68.756 gene,intron
#> gene_id
#> 1 ENSMUSG00000033813(g),ENSMUSG00000104217(g)
#> 2 ENSMUSG00000033813(g),ENSMUSG00000104217(g)
#> 3 ENSMUSG00000025905(g)
#> 4
#> 5 ENSMUSG00000025909(g)
#> 6 ENSMUSG00000025909(g)