1.	library(sim1000G)  
2.	library(FamEvent)  
3.	library(frailtypack)  
4.	  
5.	set.seed(122)  
6.	# Initialize simulator of family data using 1000 genome haplotypes for each cluster  
7.	vcf <- readVCF("region.vcf.gz", maxNumberOfVariants = 100, min_maf = 0.02, max_maf = 0.1)  
8.	readGeneticMap(chromosome = 4)  
9.	startSimulation(vcf, totalNumberOfIndividuals = 2000)  
10.	  
11.	  
12.	# Number of families  
13.	nfam <- 200  
14.	# Define parameters of the model  
15.	Beta <- c(0.5,1.0,0.5,0.3)  
16.	ver <- length(Beta)  
17.	sig2 <- 0.7  
18.	Weibull <- c(0.007,3)  
19.	variation <- "IBD"  
20.	  
21.	  
22.	  # Simulate families  
23.	  SIM$reset()  
24.	  ## For simulations, we set the cM that the regions spans to 4000 to diversify the IBD matrix  
25.	  ## Remove for normal use  
26.	  SIM$cm = seq( 0,4000, length = length(SIM$cm) )  
27.	    
28.	  # Generate family pedigries and their genotypes  
29.	  time_x_families <- function(){  
30.	    fam <- lapply(paste(1:nfam), function(x){ noffspring2 <- sample(c(1:2), 1, replace=TRUE,prob=c(0.5, 0.5))#, 0.1482  
31.	    noffspring3 = sample(c(1,2), noffspring2, replace=TRUE,prob=c(0.8518, 0.1482))#rep(1, noffspring2)#  
32.	    newFamily3generations(x,  noffspring2 = noffspring2, noffspring3)})  
33.	    fam <- do.call(rbind, fam)  
34.	      
35.	    fam  
36.	  }  
37.	    
38.	  fam <- time_x_families()  
39.	    
40.	  # Transform the data to have ids from 1 to n, n - number of individuals  
41.	  nmemb <-as.vector(table(fam$fid))  
42.	  cumsum_nmemb <- c(0,cumsum(nmemb))  
43.	  fam$ID <- 1:dim(fam)[1]  
44.	  fam$fatherID <- 0  
45.	  fam$fatherID <- sapply(1:dim(fam)[1], function(x) fam$ID[as.numeric(fam$father[which(fam$ID==x)])+cumsum_nmemb[fam$fid[x]]])  
46.	  fam$fatherID <- ifelse(fam$father==0, 0, fam$fatherID)  
47.	  fam$fatherID <- as.numeric(fam$fatherID)  
48.	  fam$motherID <- 0  
49.	  fam$motherID <- sapply(1:dim(fam)[1], function(x) fam$ID[as.numeric(fam$mother[which(fam$ID==x)])+cumsum_nmemb[fam$fid[x]]])  
50.	  fam$motherID <- ifelse(fam$mother==0, 0, fam$motherID)  
51.	  fam$motherID <- as.numeric(fam$motherID)  
52.	  fam <- fam[,c(1,8:10,5:7)]  
53.	  colnames(fam) <- c("famID", "ID", "fatherID", "motherID", "gender", "generation", "gtindex")  
54.	    
55.	    
56.	    
57.	  # Create data of genotypes  
58.	  genotype <- rep(0,length(SIM$gt1[1,]))  
59.	  for(i in  1:dim(fam)[1]){  
60.	    gen <- paste(SIM$gt1[as.numeric(fam$gtindex[i]),],SIM$gt2[as.numeric(fam$gtindex[i]),], sep = "")  
61.	    gen <- ifelse(gen == "00", 3, gen)  
62.	    gen <- ifelse(gen%in%c("10","01"), 2, gen)  
63.	    gen <- ifelse(gen == "11", 1, gen)  
64.	    genotype <- rbind(genotype, as.numeric(gen))  
65.	  }  
66.	    
67.	  genotype <- genotype[-1,]  
68.	  colnames(genotype) <- paste("Gen", 1:dim(genotype)[2], sep="")  
69.	    
70.	  # Create mean IBD matrix  
71.	  n = SIM$individuals_generated  
72.	  vec <- rep(0, length(nmemb))  
73.	  vec2 <- vec  
74.	  vec2[2:length(vec)] <- vec[2:length(vec)] + nmemb[-length(nmemb)]   
75.	  vec3 <- cumsum(vec2)+1  
76.	  nmemb2 <- cumsum(nmemb)  
77.	  IBD2matrix <- matrix(rep(0,n^2), nrow = n)   
78.	    
79.	  for(i in 1:length(nmemb)){   
80.	    IBD2matrix[vec3[i]:nmemb2[i],vec3[i]:nmemb2[i]] <- sapply(vec3[i]:nmemb2[i], function(y) {  
81.	      z = sapply(vec3[i]:nmemb2[i], function(x) computePairIBD12(x,y))  
82.	      # names(z) = 1:n  
83.	      z  
84.	    })  
85.	  }  
86.	  colnames(IBD2matrix) = 1:nrow(IBD2matrix);rownames(IBD2matrix) = 1:nrow(IBD2matrix)  
87.	    
88.	  # Generate time-to-event data using simfam function from the package FamEvent  
89.	  fam_FamEvent <- simfam(N.fam = length(unique(fam$famID)), design = "pop+", variation = variation,   
90.	                           base.dist = "Weibull", frailty.dist = "lognormal", depend = sig2,  
91.	                           base.parms = Weibull, vbeta = Beta,  
92.	                           agemin = 0, data.fam = fam, genotype = NULL, IBD = IBD2matrix,  
93.	                           age1 = c(95, 2.5), age2 = c(75, 2))  
94.	    
95.	  fam_FamEvent$gender2 <- ifelse(fam_FamEvent$gender == 0, 2, fam_FamEvent$gender)  
96.	  family <- fam_FamEvent[order(fam_FamEvent$indID),]   
97.	    
98.	  family$t0 <- 0 # for frailtypack, we create dummy variable 0 for starting time of observation  
99.	    
100.	  # Fit the model using function frailtyPenal from frailtypack package  
101.	  fit <- frailtyPenal(Surv(t0, time, status) ~ gender +cluster(famID), data = family, hazard="Weibull",  
102.	                      RandDist = "LogN", print.times = FALSE, init.B = Beta[1], covMatrix1 = IBD2matrix,   
103.	                      recurrentAG = TRUE, maxit = 35,  
104.	                      proband = family$proband, currentage = family$currentage)   
105.	    
106.	    
107.	  #Genotypes included in generating time-to-event data  
108.	    fam_FamEvent <- simfam(N.fam = length(unique(fam$famID)), design = "pop+", variation = variation,   
109.	                           base.dist = "Weibull", frailty.dist = "lognormal", depend = sig2,  
110.	                           base.parms = Weibull, vbeta = Beta,  
111.	                           agemin = 0, data.fam = fam, genotype = genotype[,1:3], IBD = IBD2matrix,  
112.	                           age1 = c(95, 2.5), age2 = c(75, 2))  
113.	  
114.	    
115.	  fam_FamEvent$gender2 <- ifelse(fam_FamEvent$gender == 0, 2, fam_FamEvent$gender)  
116.	  family <- fam_FamEvent[order(fam_FamEvent$indID),]   
117.	    
118.	  family$t0 <- 0 # for frailtypack, we create dummy variable 0 for starting time of observation  
119.	    
120.	  # Fit the model using function frailtyPenal from frailtypack package  
121.	  fit2 <- frailtyPenal(Surv(t0, time, status) ~ gender +cluster(famID), data = family, hazard="Weibull",  
122.	                      RandDist = "LogN", print.times = FALSE, init.B = Beta[1], covMatrix1 = IBD2matrix,   
123.	                      recurrentAG = TRUE, maxit = 35,  
124.	                      proband = family$proband, currentage = family$currentage)      
125.	    

