library(vroom)
library(tidyverse)
<- "/home/yangrui/data"
work_dir <- "Geisinger_DBD_Genes_Database.Full_LoF_Table_Data.csv"
file
setwd(work_dir)
# keep high confidence candidate genes:
# 1: genes with three or more de novo pathogenic loss-of-function variants
# AR: genes with autosomal inheiritance
<- vroom(file) %>%
df filter(Tier %in% c("1", "AR")) %>%
arrange(Tier) %>%
distinct()
vroom_write(df, file = "Geisinger_DBD_Genes_Database.Full_LoF_Table_Data.high_confidence.tsv")
1 Introduction
Retrieve high-confidence NDD genes from various databases.
2 Geisinger DBD Genes Database
Go to Geisinger DBD Genes Database.
3 SysNDD
Go to SysNDD.
library(vroom)
library(tidyverse)
<- "/home/yangrui/data"
work_dir <- "SysNDD.sysndd_gene_table.txt"
file
setwd(work_dir)
# keep high confidence candidate genes: Definitive
<- vroom(file) %>%
df filter(entities_category %in% c("Definitive")) %>%
arrange(entities_category) %>%
distinct()
vroom_write(df, file = "SysNDD.sysndd_gene_table.high_confidence.tsv")
4 SFARI GENE
Go to SFARI GENE.
library(vroom)
library(tidyverse)
<- "/home/yangrui/data"
work_dir <- "ASD.SFARI_Gene.genes_04-03-2025release_04-15-2025export.csv"
file
setwd(work_dir)
# keep high confidence candidate genes: 1
<- vroom(file) %>%
df filter(`gene-score` %in% c(1)) %>%
arrange(`gene-score`) %>%
distinct()
vroom_write(df, file = "ASD.SFARI_Gene.genes_04-03-2025release_04-15-2025export.high_confidence.tsv")
5 Gene2Phenotype
Go to Gene2Phenotype.
library(vroom)
library(tidyverse)
<- "/home/yangrui/data"
work_dir <- "DD.Gene2Phenotype.G2P_DD_2025-04-15.csv"
file
setwd(work_dir)
# keep high confidence candidate genes: definitive
<- vroom(file) %>%
df filter(confidence %in% c("definitive")) %>%
arrange(confidence) %>%
distinct()
vroom_write(df, file = "DD.Gene2Phenotype.G2P_DD_2025-04-15.high_confidence.tsv")
# whether a disease is NDD is recognized by AI
<- "DD.Gene2Phenotype.G2P_DD_2025-04-15.high_confidence.tsv"
high_confidence_file <- "DD.Gene2Phenotype.G2P_DD_2025-04-15.high_confidence.with_NDD_flag.tsv"
ndd_label_file
<- vroom(high_confidence_file)
high_confidence_df <- vroom(ndd_label_file)
ndd_label_df <- inner_join(high_confidence_df, ndd_label_df, by = c("disease name" = "disease_name")) %>%
df filter(is_ndd == "Yes") %>%
distinct()
vroom_write(df, file = "NDD.Gene2Phenotype.G2P_DD_2025-04-15.high_confidence.tsv")
6 GeneTrek
Go to GeneTrek.
library(vroom)
library(tidyverse)
<- "/home/yangrui/data"
work_dir <- "NDD.GeneTrek.genetrek_data_2024-04-26.tsv"
file
setwd(work_dir)
# keep high confidence candidate genes:
# High Confidence Epilepsy Genes
# or
# High Confidence NDD genes v3
<- vroom(file) %>%
df filter((`High Confidence Epilepsy Genes` | `High Confidence NDD genes v3`) & (`Gene type` == "protein-coding")) %>%
distinct()
vroom_write(df, file = "NDD.GeneTrek.genetrek_data_2024-04-26.high_confidence.tsv")
7 SPARK
Go to SPARK.