-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathPartiallyFD_simulation.r
More file actions
194 lines (160 loc) · 10.2 KB
/
PartiallyFD_simulation.r
File metadata and controls
194 lines (160 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
##########################################################################################
##########################################################################################
## Supplement for "Partially Observed Functional Data: The Case of Systematically Missing"
## by Dominik Liebl & Stefan Rameseder
## Generate partially observed functional data from different DGPs and apply estimators
rm(list = ls())
library("devtools")
install_github("stefanrameseder/PartiallyFD")
library("PartiallyFD")
##########################################################################################
## Generate Functional Data:
p <- 501 # Number of evaluation points per function
comp_dom <- seq(from = 0, to = 1,len = p) # Seq. of equidistant evaluation points for complete domain
small_dom <- comp_dom[ comp_dom <= 0.5 ] # Seq. of equidistant evaluation points for small domain
nbasis_dgp <- 5 # Number of basis functions
nbasis_est <- 5 # Number of basis functions for estimation process
seq.ev <- seq(from = 10, to = 2, len = nbasis_dgp) # Seq. of eigenvalues or variances
sd <- sqrt(seq.ev[1]) # standard deviation of normal marginal; in our case the same as the first score
xi_means <- c(5,2,0,0,0)
mean_dom <- 1 # mean of right domain border
prob <- 0.9 # binomial probability for normal marginal
basis <- c(BSpline = "bspline", Fourier = "fourier", Monomial = "monomial", Power = "power")
base <- basis[2] # Just for Fourier
x_var <- sum(seq.ev)
alpha <- 0.05 # significance level for bootstraps
B <- 1000 #number of bootstraps
printout <- 1
der <- 1
base <- basis[which(basis == "fourier")]
name <- names(base)
f_stat <- PartiallyFD:::f_stat2 # Choose the test statistic function for Romano Wolf
b_choice <- "Med" # or Max # The choice method for the overall basis selection
maxAllowedBasis <- 51 # The number of maximal allowed overall basis length
basis_seq <- seq(3,maxAllowedBasis,2) # possible basis lengths
derFds <- FALSE # Should the estimator be used with the derivatives of the functional data sample or with the basis projection
percentageCompDom <- 0.02 # how many functions should be forced to be observed over the whole domain
sd_part <- 1 # Which part of the d s should be trnaformed to 0.5 or 1 (here 100%)
xi_1_d_cor <- 0.995 # the correlation in the truncated normal case
d_var <- 1 # The variance of d in the truncated normal cases
sigma_sc <- 3 # the variance of the sigma in the transformation1 case
positions <- c(0.625, 0.75, 0.875, 1)
DGP_names <- c("SD","SC", "ID", "IC")
ver <- "FirstSimulation" # Name of dataset to save
# The Supplied Setting list for all DGPs
suppliedSettings <- list(sd_part = sd_part, xi_1_d_cor = xi_1_d_cor, d_var = d_var, sigma_sc = sigma_sc)
replications <- 500
ns <- c(50, 150, 250, 500) # Different n's
require(parallel) # for parallel spline smoothing
for(n in ns){ # n <- 50
# Version for Saving the .RData
print(version <- paste0(ver,"_p", p, "_n", n, "_R", replications, "_SD", 100*suppliedSettings$sd_part, "_SD", 1000*suppliedSettings$xi_1_d_cor,"_BC_", b_choice,maxAllowedBasis ))
# make the cluster
cl <- makeCluster(8)
# load required libraries
clusterEvalQ(cl, {
library("xtable")
library("systemfit") # install.packages("systemfit") # for SUR estimation
library("fda") # convenient basis functions
library("scales") # colors
library(copula) # install.packages("copula")
library(psych)
library(vcdExtra) # install.packages("vcdExtra")
library(skewt) # install.packages("skewt") Skew T Distribution
library(e1071) # for skewness function
library(MASS) # mvrnorm
library(tmvtnorm) # install.packages("tmvtnorm")
library(xtable) # latex tables
library(MBESS) # install.packages("MBESS")
library(RColorBrewer) # for color definitions
library(forecast) # for arima estimation
require(MASS) # for df
require(coneproj) # cone projections for smoothing parameter lambda
require(orthogonalsplinebasis) # cone projections for smoothing parameter lambda
library(PartiallyFD) # nice corrplots
})
## export required objects
clusterExport(cl , varlist = c("p", "n", "comp_dom", "small_dom", "nbasis_dgp", "nbasis_est", "sigma_sc",
"basis_seq", "seq.ev", "sd", "xi_means", "mean_dom", "prob", "b_choice",
"cor", "basis", "x_var", "alpha", "B", "printout", "der", "f_stat", "derFds", "percentageCompDom",
"DGP_names", "replications", "base", "suppliedSettings", "maxAllowedBasis") )
# For each base in Basis
# for each dgp ind DGPs
# for all replications
sys.time <- system.time(resDGP <- lapply(DGP_names, function(dgp){ # dgp <- "SC"
#system.time(resDGP <- lapply(DGP_names, function(dgp){ # dgp <- "ID"
# Define basis once
basis_fd <- get(paste0("create.",base,".basis"))(rangeval = range(comp_dom), nbasis = 5)
trueMean <- PartiallyFD:::calcTrueMean(basis_fd, xi_means, comp_dom)
# Calculate true covariance via cov(s,t) = sum_i=1^b lambda_i * psi_i(t)*psi_i(s)
trueCov <- PartiallyFD:::calcTrueCovariance(basis_fd, seq.ev, comp_dom, Lfdobj=0)
# Calculate true derivative of covariance via cov''(s,t) = sum_i=1^b lambda_i * psi_i'(t)*psi'_i(s)
trueCovDer <- PartiallyFD:::calcTrueCovariance(basis_fd, seq.ev, comp_dom, Lfdobj=1)
# For all replications
set.seed(10)
resRep <- parLapply(cl, 1:replications, function(rep){ # rep = 1
### Simulate for n cruves xi_i1 and d_i data, where xi_1_mean is the first entry of xi_means
if(dgp == "IC"){
sim_data <- PartiallyFD:::simDataIC(n, xi_1_mean = xi_means[1], ev = seq.ev[1], suppliedSettings)
} else if (dgp == "ID"){
sim_data <- PartiallyFD:::simDataID(n, xi_1_mean = xi_means[1], ev = seq.ev[1], suppliedSettings)
} else if (dgp == "SC"){
sim_data <- PartiallyFD:::simDataSC(n, xi_1_mean = xi_means[1], ev = seq.ev[1], suppliedSettings)
} else if (dgp == "SD"){
sim_data <- PartiallyFD:::simDataSD(n, xi_1_mean = xi_means[1], ev = seq.ev[1], suppliedSettings)
} else {
print("Could not find simulation function")
}
### If the maximum value of d != 1, define percentageCompDom% of the sample as observed on the full domain
sim_data[rev(order(sim_data[ ,"d"]))[1:round( percentageCompDom * n ,0)] ,"d"] <- max(comp_dom)
### Construct corresponding functional data set of n curves observed at p points
simFD <- PartiallyFD:::simMeanBias( sim_data = sim_data, nbasis_dgp = nbasis_dgp, p =p, n = n, seq.ev = seq.ev,
comp_dom = comp_dom, small_dom = small_dom, basis = basis_fd, xi_means = xi_means)
### Calculate the Krauss Mean
krausMean <- rowMeans(simFD$X_ad_m, na.rm = TRUE)
### Choose a number of basis elements b by Minimizing the BIC over all projections onto basis_seq
bicMat <- PartiallyFD:::selBasisLength(X = simFD$X_ad_m, comp_dom, base = "fourier", basis_seq)
### Select an overall basis length b via Median or Max Basis Length
selBasis <- PartiallyFD:::calcMedBasisDim(bicMat, basis_seq)
### Take evaluated psi_i and "der" derivative of psi_i of chosen fourier basis of length selBasis
basis_fd_chosen <- get(paste0("create.",base,".basis"))(rangeval = range(comp_dom), nbasis = selBasis)
Psi_i <- eval.basis( evalarg = comp_dom, basisobj = basis_fd_chosen)
Psi_i_der <- eval.basis( evalarg = comp_dom, basisobj = basis_fd_chosen, Lfdobj = der)
### Estimate coefficients/scores in that chosen basis psi_i, i = 1, ..., selBasis
coefs <- apply(simFD$X_ad_m, 2, function(X_i){ # X_i <- simFD$X_ad_m[, 1]
dom_ind <- which(!is.na(X_i))
y <- as.numeric(na.omit(X_i)) # length(y)
X_mat <- Psi_i[dom_ind, ]
return(coefs <- lm(y ~ -1 + X_mat)$coefficients)
})
### Estimate the FTC Estimator like defined in the paper
ftcMean <- PartiallyFD:::calcFTCEstimator(fds = simFD$X_ad_m, evalBase = Psi_i, evalDer = Psi_i_der, coefs, small_dom, comp_dom, derFds = derFds)
ftcMean_prime <- ftcMean$firstDer
ftcMean <- ftcMean$mean
### Estimate the FTC Covariance like defined in the paper small_dom = comp_dom
ftcCov <- PartiallyFD:::calcFTCCovariance(fds = simFD$X_ad_m, ftcMean = ftcMean, ftcMean_prime = ftcMean_prime, evalBase = Psi_i, evalDer = Psi_i_der, coefs, small_dom, comp_dom)
krausCov <- PartiallyFD:::calcKrausCovariance(fds = simFD$X_ad_m, fmean = krausMean)
if(any(dgp == c("SD", "SC"))){ # dgp = "IC"
krausCov_ftc <- PartiallyFD:::calcKrausCovariance(fds = simFD$X_ad_m, fmean = krausMean)
} else {
krausCov_ftc <- krausCov
}
### Apply RomanoWolf with F-Statistic
romWolf <- PartiallyFD:::RomWolf(X = t(coefs), alpha, B, printout = FALSE, d = sim_data[, "d"], f_stat = f_stat) # plot(X[,1], d)
return(list(krausMean = krausMean,
ftcMean = ftcMean,
ftcCov = ftcCov,
krausCov = krausCov,
krausCov_ftc = krausCov_ftc,
selBasis = selBasis,
romWolf = romWolf,
cor = cor(sim_data)))
})
return(resRep)
}))
names(resDGP) <- DGP_names
stopCluster(cl)
print(file <- paste0("PartiallyFD_sim_", version,".RData"))
print(sys.time)
save(resDGP, file = paste0("PartiallyFD_sim_", version,".RData"))
}