#########################################################################:#################################### ############################################################################################################# ############################################################################################################# ### ### Simulate RNA-Seq data ### 1/2/2016 Qike Li ### revised on 2/9/2017 Samir Rachid Zaim ### ### the following code is modified based on the ### ~/Dropbox/Qike/Adaptive cutoff/DEseq related/suppl_ii.Rnw line 1094 ### ############################################################################################################# ############################################################################################################# ############################################################################################################# start <- Sys.time() source('SimulateRNASeqV4_SRZ.R') source('kMEn.R') #load('../data/GO_list.RData') library(data.table) library(stringr) library(parallel) library(doParallel) library(snow) library(snowfall) #library(foreach) #no_cores = detectCores() #registerDoParallel(no_cores) ################################################## ### ### Parameters of Interest ### - Geneset: 40|200 ### - % responsive: 5%|10%|25% ### - FC of Responsive Transcripts: ~N(mu=4, sigma^2) ### - % up v. down regulated: 100%|50%|25% ### - Num Patients: 10|20|30 ### ################################################## ### Load config file & set params go.bp <- read.delim2("/rsgrps/yves/Lab-Tools/GeneSets/GO/2015/go_bp_all.txt", stringsAsFactors=F) neg.bin.param <- fread('../data/heterogenic_negative_binomial_parameters.csv',header=T); names(neg.bin.param)[1] <- 'Gene' setkey(neg.bin.param, Gene) ## change go.bp into GO_list format unique_pathid <- unique(go.bp$path_id) GO_list <- list() transform_to_list <- function(path_id){ GO_list[[path_id]] <- go.bp$symbol[go.bp$path_id %in% path_id] } GO_list <- mclapply(unique_pathid, function(x) transform_to_list(x), mc.cores=detectCores()) GO_list <- setNames(GO_list, unique_pathid) ### #### restrict gene ontologies for which we have all parameter values estimated #### from brain data GO_list_lengths = unlist(lapply(GO_list,length)) go_list_size40_reduced <- Match_FullList(GO_list = GO_list, pathways_of_interest = names(which(GO_list_lengths==40)), tissue_specific_params = neg.bin.param) go_list_size200_reduced <- Match_FullList(GO_list = GO_list, pathways_of_interest = names(which(GO_list_lengths>150 & GO_list_lengths < 250)), tissue_specific_params = neg.bin.param) go_list_40_200 <- list(go_list_size40 = go_list_size40_reduced, go_list_size200=go_list_size200_reduced) rm(go_list_size40_reduced, go_list_size200_reduced) ## test all conditions # simulate N patients with same condition, ############################## ### need to create a function ### that will take a set of ### parameters, and run the ### simulation m=2000 times ### and save the get_t.test_kmeans <- function(i, isogenic=F){ current_seed = i+1000 set.seed(current_seed) cohort <- generate_cohort(neg.bin.param= neg.bin.param , DEG_pct = deg, Up_REG = up, N=N, geneset_size=size, go_list_40_200 = go_list_40_200, verbose = F, isogenic=isogenic) geneset_name = cohort$geneset_name pvals <- get_t.test_pvalues(simulated_matrix = cohort$simulated_matrix ) preds <- get_kmeans_results(simulated_matrix = cohort$simulated_matrix, geneset_name=geneset_name ) return(list(T_test =pvals, KMeans = preds, Cohort=cohort)) } args = commandArgs(trailingOnly=TRUE) print(args) #set.seed(123456) reps=1000 size = as.numeric(args[1]) N = as.numeric(args[2]) deg = as.numeric(args[3]) up = as.numeric(args[4]) file_name = as.character(args[5]) folder_name = 'full_results' isogenic=F print(c(size,N,deg,up,reps)) sfInit(parallel=T, cpus=detectCores(), type='SOCK') sfLibrary(snowfall) sfSource('kMEn.R') sfSource('SimulateRNASeqV4_SRZ.R') sfExport('go_list_40_200','neg.bin.param','size','deg','up','N','GO_list','reps','get_t.test_kmeans', 'isogenic') #cl <- makeCluster(no_cores, type='FORK') #sfClusterSetupRNG() #set.seed(1234, kind="L'Ecuyer-CMRG") #clusterSetRNGStream() full_list = sfLapply( 1:reps, function(x) get_t.test_kmeans(x, isogenic=isogenic)) sfStop() if(isogenic){ folder_name='isogenic' } setwd(paste('/rsgrps/yves/samir/simulation_study2017/results/',folder_name,sep='')) save(full_list, file=paste(file_name,'.R',sep='')) print(paste('saved ', file_name,'.R in output', sep='')) print(start - Sys.time()) setwd('/rsgrps/yves/samir/simulation_study2017/code')