## ----a1, message=FALSE-------------------------------------------------------- library(agrodata) d <- data_rice("yield") head(d, n=3) ## ----y1----------------------------------------------------------------------- length(unique(d$fid)) ## ----y10---------------------------------------------------------------------- table(d$year) ## ----y20---------------------------------------------------------------------- unique(d$zone) ## ----y30---------------------------------------------------------------------- boxplot(d$y) ## ----y35---------------------------------------------------------------------- sort(d$y)[1:40] ## ----y50---------------------------------------------------------------------- yzt <- aggregate(d$y, d[, c("zone", "year")], median, na.rm=TRUE) colnames(yzt)[3] = "y_zt" head(yzt, 3) ## ----y60---------------------------------------------------------------------- yz <- aggregate(d$y, d[, "zone", drop=FALSE], median, na.rm=TRUE) colnames(yz)[2] = "y_z" head(yz, 3) ## ----y70---------------------------------------------------------------------- n <- aggregate(d$fid, d[,c("zone", "year")], length) colnames(n)[3] = "n" head(n, 3) ## ----y77---------------------------------------------------------------------- z <- merge(n, yzt) z <- merge(z, yz) head(z, 3) ## ----y90---------------------------------------------------------------------- z$y_dz <- z$y_zt / z$y_z head(z, 3) ## ----y95, echo=FALSE---------------------------------------------------------- x <- c("region", "Region", "zone", "zone name", "year", "year", "fid", "farmer ID", "y", "reported yield for a farmer (i) in a year (t)", "y_zt", "mean zone yield in a year t", "y_z", "long term average zone yield") m <- matrix(x, ncol=2, byrow=TRUE) colnames(m) <- c("variable", "description") knitr::kable(m) ## ----y75---------------------------------------------------------------------- saveRDS(z, "zonal_rice_yield.rds") ## ----y79---------------------------------------------------------------------- dz <- merge(d, z) saveRDS(dz, "hh_rice_yield.rds") ## ----a10, message=FALSE------------------------------------------------------- idx <- data_rice("indices") head(idx, n=3) ## ----rice88, echo=FALSE------------------------------------------------------- x = c("region", "Region", "zone", "zone name", "year", "year", "ndvi", "ndvi index", "evi index", "evi", "gpp", "Gross Primary Productivity index", "et", "evapo-transpiration index", "lai", "leaf area index") m <- matrix(x, ncol=2, byrow=TRUE) colnames(m) <- c("variable", "description") knitr::kable(m) ## ----a20---------------------------------------------------------------------- z <- merge(z, idx[,-1]) head(z, 3) ## ----a15---------------------------------------------------------------------- cr <- cor(z[,-c(1:3, 5)]) cr[,1:2] ## ----a25, fig.width=10-------------------------------------------------------- par(mfrow=c(1,2)) plot(z$gpp, z$y_zt, xlab="GPP index", ylab="Yield (kg/ha)") plot(z$evi, z$y_zt, xlab="EVI index", ylab="Yield (kg/ha)") ## ----reg10-------------------------------------------------------------------- m1 <- lm (y_zt ~ gpp, data=z) cf <- coefficients(m1) cf plot(y_zt ~ gpp, data=z) abline(m1, col="red") summary(m1) ## ----reg20-------------------------------------------------------------------- m2 <- lm (y_zt ~ evi, data=z) summary(m2) ## ----reg30-------------------------------------------------------------------- m3 <- lm (y_zt ~ gpp + evi, data=z) summary(m3) ## ----reg40-------------------------------------------------------------------- m4 <- lm (y_zt ~ gpp + evi +I(gpp^2) + zone, data=z) summary(m4) ## ----rice99, message=FALSE---------------------------------------------------- library(randomForest) rf <- randomForest(y_zt ~ gpp+evi+ndvi+gpp+et, data=z) rf ## ----reg50-------------------------------------------------------------------- models <- list(m1, m2, m3, m4) r2 <- sapply(models, function(i) round(summary(i)$r.squared, 2)) ar2 <- sapply(models, function(i) round(summary(i)$adj.r.squared, 2)) aic <- sapply(models, AIC) r <- rbind(r2=r2, ar2=ar2, aic=round(aic, 2)) colnames(r) <- c("m1", "m2", "m3", "m4") r ## ----rmse--------------------------------------------------------------------- rmse <- function(observed, predicted){ i <- observed < 1500 error <- observed[i] - predicted[i] sqrt(mean(error^2)) } ## ----reg60-------------------------------------------------------------------- library(agro) k <- 5 set.seed(123) f <- agro::make_groups(z, k) result <- matrix(nrow=k, ncol=5) colnames(result) <- c("m1", "m2", "m3", "m4", "rf") ## ----reg61-------------------------------------------------------------------- for (i in 1:k) { train <- z[f!=i, ] test <- z[f==i, ] cm1 <- lm (y_zt ~ gpp, data=train) cm2 <- lm (y_zt ~ evi, data=train) cm3 <- lm (y_zt ~ gpp + evi, data=train) cm4 <- lm (y_zt ~ gpp + evi +I(gpp^2) + zone, data=train) p <- predict(cm1, test) result[i,"m1"] <- rmse(test$y_zt, p) result[i,"m2"] <- rmse(test$y_zt, predict(cm2, test)) result[i,"m3"] <- rmse(test$y_zt, predict(cm3, test)) result[i,"m4"] <- rmse(test$y_zt, predict(cm4, test)) crf <- randomForest(y_zt ~ gpp+evi+ndvi+gpp+et, data=train) result[i,"rf"] <- rmse(test$y_zt, predict(crf, test)) } colMeans(result) ## ----rice100------------------------------------------------------------------ plot(z$y_zt, predict(m1), xlim=c(750, 3000), ylim=c(750, 3000), xlab="observed yield (kg/ha)", ylab="predicted yield (kg/ha)") abline(a=0,b=1, col="red") ## ----rice110------------------------------------------------------------------ mods <- list(m1=m1, m2=m2, m3=m3, m4=m4) saveRDS(mods, "rice_models.rds")