-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathminimumTime_functions.R
More file actions
233 lines (187 loc) · 7.35 KB
/
minimumTime_functions.R
File metadata and controls
233 lines (187 loc) · 7.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# Functions to calculate the minimum time required to detect trends
# Clean column names (lowercase, snake case, etc.)
clean_vec <- function(x, refactor = FALSE) {
require(magrittr, quietly = TRUE)
if (!(is.character(x) || is.factor(x))) {
return(x)
} else {
old_names <- as.character(x)
new_names <- old_names %>%
gsub("'", "", .) %>%
gsub("\"", "", .) %>%
gsub("%", "percent", .) %>%
gsub("^[ ]+", "", .) %>%
make.names(.) %>%
gsub("[.]+", "_", .) %>%
gsub("[_]+", "_", .) %>%
tolower(.) %>%
gsub("_$", "", .)
return(new_names)
}
}
# Standardize response values
standardize <- function(data) {
# Ensure year is numeric
data$year <- as.numeric(as.character(data$year))
# Standardize the response column
data$stand.response <- (data$response - mean(data$response, na.rm = TRUE)) /
sd(data$response, na.rm = TRUE)
# Reorganize the columns based on what exists
if ("site" %in% names(data)) {
data <- data[, c("site", "year", "response", "stand.response")]
} else {
data <- data[, c("year", "response", "stand.response")]
}
return(data)
}
# Fit linear regression without site data
linefit_no_site <- function(data) {
# Fit a simple linear regression model to standardized response over time
model <- lm(stand.response ~ year, data = na.omit(data))
# Extract model summary
t.model <- coef(summary(model))
slope_pval <- summary(model)$coefficients["year", "Pr(>|t|)"]
# Package key statistics into a clean data frame
output <- data.frame(
start_year = min(data$year, na.rm = TRUE), # First year in the window
N_data = nrow(data), # Number of observations
N_years = length(unique(data$year)), # Number of unique years
slope = t.model["year", "Estimate"], # Slope estimate
slope_se = t.model["year", "Std. Error"], # Slope standard error
p_value = slope_pval, # p-value for slope
intercept = t.model["(Intercept)", "Estimate"], # Intercept estimate
intercept_se = t.model["(Intercept)", "Std. Error"], # Intercept standard error
intercept_p_value = t.model["(Intercept)", "Pr(>|t|)"] # p-value for intercept
)
return(output)
}
# Fit linear regression with site data
linefit_with_site <- function(data) {
# Load required libraries
library(lme4)
library(lmerTest)
data <- na.omit(data) # Clean data
n_sites <- length(unique(data$site)) # Count number of unique sites
if (n_sites < 3) {
# Fallback to simple linear model if not enough sites for random effects
return(linefit_no_site(data))
}
# Fit a linear mixed-effects model with random intercept for site
model <- lmer(stand.response ~ year + (1 | site), data = data)
t.model <- coef(summary(model))
slope_pval <- t.model["year", "Pr(>|t|)"]
# Package fixed-effect statistics into a clean data frame
output <- data.frame(
start_year = min(data$year, na.rm = TRUE),
N_data = nrow(data),
N_years = length(unique(data$year)),
slope = t.model["year", "Estimate"],
slope_se = t.model["year", "Std. Error"],
p_value = slope_pval,
intercept = t.model["(Intercept)", "Estimate"],
intercept_se = t.model["(Intercept)", "Std. Error"],
intercept_p_value = t.model["(Intercept)", "Pr(>|t|)"]
)
return(output)
}
# Function to determine which linefit model to use
linefit_wrapper <- function(data) {
# Decide which model to use based on presence of 'site' column
if ("site" %in% names(data)) {
result <- linefit_with_site(data)
attr(result, "model_type") <- "lmer" # Attach model type info
return(result)
} else {
result <- linefit_no_site(data)
attr(result, "model_type") <- "lm"
return(result)
}
}
# Subset data according to the window size
breakup <- function(data, window, verbose = FALSE) {
remaining <- data # Working copy of data
# Initialize output data frame to store stats for each window
output <- data.frame(
start_year = numeric(0),
N_data = numeric(0),
N_years = numeric(0),
slope = numeric(0),
slope_se = numeric(0),
p_value = numeric(0),
intercept = numeric(0),
intercept_se = numeric(0),
intercept_p_value = numeric(0),
model_type = character(0) # Track which model was used (lm or lmer)
)
# Count total unique years in the dataset
numyears <- length(unique(data$year))
# Loop through the dataset, shifting the window one year at a time
while (numyears > (window - 1)) {
chunk <- subset(remaining, year < (min(year) + window)) # Subset a chunk
# Only process if full window of unique years exists
if (window == length(unique(chunk$year))) {
out <- linefit_wrapper(chunk) # Fit appropriate model
out$model_type <- attr(out, "model_type") # Add model type info
# Optional logging to console
if (verbose) {
message("Fitting model from ", min(chunk$year), " to ", max(chunk$year),
" | Model used: ", out$model_type)
}
output <- rbind(output, out)
}
# Move window forward by removing the earliest year
remaining <- subset(remaining, year > min(year))
numyears <- length(unique(remaining$year))
}
return(output)
}
# Subset data across all window sizes
multiple_breakups <- function(data, verbose = FALSE) {
data1 <- standardize(data) # Standardize response across all data
count <- length(data1$year) # Total number of data points
output <- data.frame() # Empty frame to collect all breakups
# Loop through all possible window sizes (min 3 years)
for (i in 3:count) {
outeach <- breakup(data1, i, verbose = verbose)
output <- rbind(output, outeach)
}
return(output)
}
# Calculate minimum time using trend magnitude metric
min_time_magnitude <- function(data, min_percent = 95, error_multiplier = 1) {
# Step 1: Run all breakups for various window sizes
test <- multiple_breakups(data)
# Step 2: Use the slope of the longest window as the "true" slope
count <- nrow(test)
true_slope <- test[count, "slope"]
true_error <- test[count, "slope_se"] * sqrt(test[count, "N_data"]) * error_multiplier
# Step 3: Define the accepted slope range
max_true <- true_slope + true_error
min_true <- true_slope - true_error
# Step 4: Get list of all window sizes
windows <- unique(test$N_years)
stability <- max(windows)
prop.vec <- c()
# Step 5: Loop through each window size and calculate % within true range
for (i in 1:length(windows)) {
window_length <- windows[i]
test_subset <- test[test$N_years == window_length, ]
number_of_windows <- nrow(test_subset)
# Check if slope falls within the accepted range
correct_subset <- test_subset[
(test_subset$slope < max_true) & (test_subset$slope > min_true), ]
number_of_correct <- nrow(correct_subset)
percentage_correct <- 100 * number_of_correct / number_of_windows
prop.vec <- c(prop.vec, percentage_correct)
if (percentage_correct >= min_percent && window_length < stability) {
stability <- window_length
}
}
# Step 6: Return results as dataframe
df <- data.frame(
window_length = windows,
proportion_correct = prop.vec,
stability_time = stability
)
return(df)
}