Waterbase is an online subset of DONAR, the water-related database of Rijkswaterstaat. Data can be downloaded using HTML commands.This is an R script that compiles a list with substances and stations to HTML commands, checks whethere there is any data for that particular combination and combines the data in one table.
Two files are needed:
WATERBASE_locations
WATERBASE_parameters
```
#########################################
#Update Waterbase files
#Author: M.P. Weeber
#Company: Deltares
#########################################
rm(list = objects())
library("RCurl")
library("stringr")
library("reshape2")
library("tcltk")
library("downloader")
library("ncdf")
library("chron")
mainDir = getwd()
subDir = "DATA"
subDir2 = "dump"
setwd(mainDir)
destination_data = file.path(mainDir, subDir)
destination_dump = file.path(mainDir, subDir2)
#Create Cache folder + dump folder
# dir.create(file.path(mainDir,subDir))
# dir.create(file.path(mainDir,subDir2))
# open required locations HEADER???
WATERBASE_locations = read.csv("YOURLOCATIONS.csv", sep = ";", stringsAsFactor = FALSE)
# open required parameters
WATERBASE_parameters = read.csv("YOURPARAMETERS.csv", sep = ";", stringsAsFactor = FALSE)
#Parts URL
WATERBASE_1 = "http://live.waterbase.nl/wboutput.cfm?loc="
WATERBASE_2 = "&byear=1700&bmonth=01&bday=01&eyear=2014&emonth=12&eday=31&output=Tekst&whichform=1"
log = c("")
# loop for locations
for(i in 1:length(WATERBASE_locations[,1])){
for(j in 1:length(WATERBASE_parameters[,1])){
# Clear old
if(!(i == 1 & j == 1)){
rm(list = c("WATERBASE_data","file","file2","REAL_WATERBASE_URL"))
}
# Naming for file name
substantie_char = gsub("/","_",gsub(" ","_",WATERBASE_parameters[j,3]))
get_id = gsub("%7C","",gsub("&wbwns=","",WATERBASE_parameters[j,4]))
file_location = file.path(destination_data,paste("id",get_id,"-",WATERBASE_locations[i,],
"-170001010000-201406140000.txt",sep = ""))
file_location_dump = file.path(destination_dump, paste(WATERBASE_locations[i,],
"_",substantie_char,".txt", sep = ""))
#Naming for URL
locatie = WATERBASE_locations[i,4]
substantie_code = WATERBASE_parameters[j,4]
substantie = gsub("%","%25",gsub("/","%2F",gsub(" ","+",WATERBASE_parameters[j,3])))
#Download
#Get link to the files
WATERBASE_data <- paste(WATERBASE_1,locatie,substantie_code,substantie,WATERBASE_2, sep ="")
#Connect to repos to get substances
file = getURI(WATERBASE_data)
file2 = unlist(str_split(file, "window.location ="))
REAL_WATERBASE_URL = unlist(str_split(file2[3],"'"))[2]
# Check if data exists
if(is.na(REAL_WATERBASE_URL)){
#Report to Log
log = c(log,paste("The combination ",locatie," : ",substantie_char," does not exist!", sep = ""))
}else{
#Download the data
#manuele download
#shell.exec(WATERBASE_data)
#n <- readline(prompt="Enter anything to continue or Q to quit: ")
#if(n == "Q"){
# stop(print("Quited execution"))
#}
download.file(REAL_WATERBASE_URL,destfile = file_location,mode = "w")
}
}
}
#check if file contains measurements else remove
setwd(destination_data)
files_to_check = list.files(destination_data)
for(k in 1:length(files_to_check)){
file = readLines(files_to_check[k])
if(length(file) == 5){
# save files without data in log
log = c(log,paste(files_to_check[k]," does not contain data!",sep = ""))
# remove files without data
file.remove(files_to_check[k])
}else{}
}
files_to_bind = list.files(destination_data)
setwd(destination_data)
#test
m = 10; data = read.csv(files_to_bind[m], sep = ";", na.strings = "NA", skip = 3)
for(m in 1:length(files_to_bind)){ ##length(files_to_bind)
data = read.csv(files_to_bind[m], sep = ";", na.strings = "NA", skip = 3)
if((m == 1)){
collected <- data
}
collected = rbind(data, collected)
}
write.csv2(collected, "collected-data.csv", row.names = F)
#Evaluate script
log
warnings()
print("Done.")
```