library(segmented)
library(shiny)
library(lattice)
library(latticeExtra)
da <- read.csv("df02.txt",
sep = "\t")
str(da)
'data.frame': 21 obs. of 3 variables:
$ x : num 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 ...
$ Y1: num 5.5 7.2 10.4 7.4 7.2 9.1 15.9 12.7 11.3 14.4 ...
$ Y2: num 0.8 1.5 2.6 1.5 1.4 2.1 3.9 3.3 3 3.9 ...
print(xyplot(Y1 ~ x, data = da,
type = c("p", "g", "smooth"), pch = 16,
strip = strip.custom(bg = "white"),
col = "tomato",
xlab = "", ylab = ""),
position = c(0,0, 0.5, 1), more = TRUE)
print(xyplot(Y2 ~ x, data = da,
type = c("p", "g", "smooth"), pch = 16,
strip = strip.custom(bg = "white"),
col = "aquamarine",
xlab = "", ylab = ""),
position = c(0.5,0, 1, 1), more = TRUE)
_______________________________________________________________________
#------------------
g <- numeric(length = 20)
ll <- for (i in 1:20) {
m <- i/10
da$v <- with(da, ifelse(x <= m, 0, x - m))
m1 <- lm(Y1 ~ x + v, data = da)
g[i] <- AIC(m1)
}
which.min(g) # 1.9
[1] 19
#-----------------
# Using the segmented package
s <-lm(Y1 ~ x, data = da)
o <- segmented(s, seg.Z=~x)
summary(o)
***Regression Model with Segmented Relationship(s)***
Call:
segmented.lm(obj = s, seg.Z = ~x)
Estimated Break-Point(s):
Est. St.Err
1.895 0.037
Meaningful coefficients of the linear terms:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.537 1.914 2.370 0.0299 *
x 13.144 1.817 7.234 1.4e-06 ***
U1.x 135.403 61.372 2.206 NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.338 on 17 degrees of freedom
Multiple R-Squared: 0.857, Adjusted R-squared: 0.8317
Convergence attained in 2 iterations with relative change 0
#----------------
# Second Dataset
#---------------
g <- numeric(length = 20)
ll <- for (i in 1:20) {
m <- i/10
da$v <- with(da, ifelse(x <= m, 0, x - m))
m1 <- lm(Y2 ~ x + v, data = da)
g[i] <- AIC(m1)
}
which.min(g) # 1.5
[1] 15
#-----------------
# Using the segmented package
s <-lm(Y2 ~ x, data = da)
o <- segmented(s, seg.Z=~x)
o$psi[2]
[1] 1.542088
\[ \begin{align} y = \beta_{0} + \beta_{1} x , & \quad x \leq c \\ y = \beta_{0} + \beta_{2} x + \theta (\beta_{1}-\beta_{2}), & \quad x > c \end{align} \]
\[ \begin{align} y = \beta_{0} + \beta_{1} x , & \quad x \leq c \\ y = \beta_{0} + \beta_{2} x , & \quad x > c \end{align} \]
m <- lm(Y1 ~ x, data = da)
da$pred <- predict(m)
#----------------------
plot(Y1 ~ x, data = da)
da <- da[order(da$Y1),]
with(subset(da, x < 1), lines(x, pred, col = "2"))
da$sec <- m$coefficients[2] + 8 * da$x
with(subset(da, x >= 1), lines(x, sec, col = "2"))
#-----------------------
\[ \begin{align} y = \beta_{0} + \beta_{1} x , & \quad x \leq c \\ y = \beta_{2} + \beta_{1} x , & \quad x > c \end{align} \]
plot(Y1 ~ x, data = da)
da <- da[order(da$x),]
with(subset(da, x < 1), lines(x, pred, col = "2"))
da$sec <- 1 + m$coefficients[2] * da$x
with(subset(da, x >= 1), lines(x, sec, col = "2"))
\[ \begin{align} y = \beta_{0}, & \quad x \leq c \\ y = \beta_{1}, & \quad x > c \end{align} \]
plot(Y1 ~ x, data = da)
da <- da[order(da$x),]
da$c <- rep(15, nrow(da))
da$b <- rep(25, nrow(da))
with(subset(da, x < 1), lines(x, c, col = "2"))
with(subset(da, x >= 1), lines(x, b, col = "2"))
m1 <- lm(Y1 ~ x, data = da)
m2 <- lm(log(Y1) ~ x, data = da)
da$v <- with(da, ifelse(x <= 1.9, 0, x - 1.9))
m3 <- lm(Y1 ~ x + v, data = da)
m4 <- lm(Y1 ~ poly(x, 2), data = da)
m6 <- glm(Y1 ~ x, family = "Gamma"(link = "inverse"),
data = da)
#------------------------
pred <- data.frame(m1 = predict(m1), m2 = exp(predict(m2)),
m3 = predict(m3), m4 = predict(m4),
m6 = predict(m6, type = "response"))
pred$x <- da$x
pred <- pred[order(pred$x),]
xyplot(Y1 ~ x,
pch = 16,
jitter.x = TRUE,
type = c("p", "g"),
xlab = expression(x[1]),
ylab = list(label = "y",
rot = 0),
scales = list(y = list(tick.number = 8)),
da, col = 1,
key = list(space = "bottom",
cex.title = 1.15,
text = list(c(
paste0("Linear Regression - aic = ",
round(AIC(m1), 3)),
paste0("Transformed Response Variable - aic = ",
round(AIC(m2), 3)),
paste0("Piecewise Regression - aic = ",
round(AIC(m3), 3)),
paste0("Polinomial Regression - aic = ",
round(AIC(m4), 3)),
paste0("Generalized Linear Regression - aic = ",
round(AIC(m6), 3)))),
lines =
list(col = c("yellow",
"springgreen", "steelblue",
"violetred", "tomato"),
lwd = 3))
)+
as.layer(xyplot(m1 + m2 + m3 + m4 + m6 ~ x ,
data = pred, type = "l",
auto.key = TRUE,
col = c("yellow", "springgreen", "steelblue",
"violetred", "tomato"),
lwd = 2))
#-------------------------------------------------------------
# Outlier Exclusion - for convergence of the Non-Linear Model
#------------------------------------------------------------
m1 <- lm(Y1 ~ x, data = da[-21,])
m2 <- lm(log(Y1) ~ x, data = da[-21,])
da$v <- with(da, ifelse(x <= 0.7, 0, x - 0.7))
m3 <- lm(Y1 ~ x + v, data = da[-21,])
m4 <- lm(Y1 ~ poly(x, 2), data = da[-21,])
# Incomplete Gamma Model
m5 <- nls(Y1 ~ (a*(x^b))*exp(-c*x),
start = c(a = 10, b = 0.01, c = -0.9),
data = da[-21,])
m6 <- glm(Y1 ~ x, family = "Gamma"(link = "inverse"),
data = da[-21,])
#------------------------
pred <- data.frame(m1 = predict(m1), m2 = exp(predict(m2)),
m3 = predict(m3), m4 = predict(m4),
m5 = predict(m5),
m6 = predict(m6, type = "response"))
pred$x <- da[-21,]$x
pred <- pred[order(pred$x),]
xyplot(Y1 ~ x,
pch = 16,
jitter.x = TRUE,
type = c("p", "g"),
xlab = expression(x[1]),
ylab = list(label = "y",
rot = 0),
scales = list(y = list(tick.number = 8)),
da, col = 1,
key = list(space = "bottom",
cex.title = 1.15,
text = list(c(
paste0("Linear Regression - aic = ",
round(AIC(m1), 3)),
paste0("Transformed Response Variable - aic = ",
round(AIC(m2), 3)),
paste0("Piecewise Regression - aic = ",
round(AIC(m3), 3)),
paste0("Polinomial Regression - aic = ",
round(AIC(m4), 3)),
paste0("Non-Linear Regression - aic = ",
round(AIC(m5), 3)),
paste0("Generalized Linear Regression - aic = ",
round(AIC(m6), 3)))),
lines =
list(col = c("yellow", "turquoise",
"springgreen", "steelblue",
"violetred", "tomato"),
lwd = 3))
)+
as.layer(xyplot(m1 + m2 + m3 + m4 + m5 + m6 ~ x ,
data = pred, type = "l",
auto.key = TRUE,
col = c("yellow", "turquoise",
"springgreen", "steelblue",
"violetred", "tomato"),
lwd = 2))
The AIC of the transformed response variable model is not comparable to others as it is.
\[ \begin{align} y = \beta_{0} + \beta_{1} x_{1} + \beta_{2} x_{2} + \beta_{3} x_{3}, \\ \\ x_{2}=\left\{ \begin{array}{ll} x_{1}-c_{1}, \quad c_{1} \leq x\\ 0, cc. \\ \end{array} \right. \\ x_{3}=\left\{ \begin{array}{ll} x_{1}-c_{2}, \quad c_{2} \leq x\\ 0, cc. \\ \end{array} \right. \end{align} \]
#----------------------------------------------------------------
da$x2 <- with(da, ifelse(x < 0.5, 0, x-0.5))
da$x3 <- with(da, ifelse(x < 0.5 | x >= 1.3, 0, x-1.3))
da$x4 <- with(da, ifelse(x >= 1.3, 0, x-1.3))
m1 <- lm(Y1 ~ x + x2 + x3, data = da) # With interval
m2 <- lm(Y1 ~ x + x2 + x4, data = da) # Without interval
logLik(m1); logLik(m2)
'log Lik.' -61.6259 (df=5)
'log Lik.' -61.38696 (df=5)
plot(Y1 ~ x, data = da,
main = paste0("Model with the interval - loglik = ",
round(logLik(m1), 4)))
da$pred1 <- predict(m1)
with(subset(da, x < 0.5), lines(x, pred1, col = "2", lwd = 2))
with(subset(da, 0.5 > x | x <= 1.3),
lines(x, pred1, col = "2", lwd = 2))
with(subset(da, x >= 1.3), lines(x, pred1, col = "2", lwd = 2))
plot(Y1 ~ x, data = da, main =
paste0("Model without the interval - loglik = ",
round(logLik(m2), 3)))
da$pred2 <- predict(m2)
with(subset(da, x < 0.5), lines(x, pred2, col = "2", lwd = 2))
with(subset(da, 0.5 > x | x <= 1.3),
lines(x, pred2, col = "2", lwd = 2))
with(subset(da, x >= 1.3), lines(x, pred2, col = "2", lwd = 2))