Waterbase is an online subset of DONAR, the water-related database of Rijkswaterstaat. Data can be downloaded using HTML commands.This is an R script that compiles a list with substances and stations to HTML commands, checks whethere there is any data for that particular combination and combines the data in one table.
Two files are needed:
WATERBASE_locations
WATERBASE_parameters
######################################### #Update Waterbase files #Author: M.P. Weeber #Company: Deltares ######################################### rm(list = objects()) library("RCurl") library("stringr") library("reshape2") library("tcltk") library("downloader") library("ncdf") library("chron") mainDir = getwd() subDir = "DATA" subDir2 = "dump" setwd(mainDir) destination_data = file.path(mainDir, subDir) destination_dump = file.path(mainDir, subDir2) #Create Cache folder + dump folder # dir.create(file.path(mainDir,subDir)) # dir.create(file.path(mainDir,subDir2)) # open required locations HEADER??? WATERBASE_locations = read.csv("YOURLOCATIONS.csv", sep = ";", stringsAsFactor = FALSE) # open required parameters WATERBASE_parameters = read.csv("YOURPARAMETERS.csv", sep = ";", stringsAsFactor = FALSE) #Parts URL WATERBASE_1 = "http://live.waterbase.nl/wboutput.cfm?loc=" WATERBASE_2 = "&byear=1700&bmonth=01&bday=01&eyear=2014&emonth=12&eday=31&output=Tekst&whichform=1" log = c("") # loop for locations for(i in 1:length(WATERBASE_locations[,1])){ for(j in 1:length(WATERBASE_parameters[,1])){ # Clear old if(!(i == 1 & j == 1)){ rm(list = c("WATERBASE_data","file","file2","REAL_WATERBASE_URL")) } # Naming for file name substantie_char = gsub("/","_",gsub(" ","_",WATERBASE_parameters[j,3])) get_id = gsub("%7C","",gsub("&wbwns=","",WATERBASE_parameters[j,4])) file_location = file.path(destination_data,paste("id",get_id,"-",WATERBASE_locations[i,], "-170001010000-201406140000.txt",sep = "")) file_location_dump = file.path(destination_dump, paste(WATERBASE_locations[i,], "_",substantie_char,".txt", sep = "")) #Naming for URL locatie = WATERBASE_locations[i,4] substantie_code = WATERBASE_parameters[j,4] substantie = gsub("%","%25",gsub("/","%2F",gsub(" ","+",WATERBASE_parameters[j,3]))) #Download #Get link to the files WATERBASE_data <- paste(WATERBASE_1,locatie,substantie_code,substantie,WATERBASE_2, sep ="") #Connect to repos to get substances file = getURI(WATERBASE_data) file2 = unlist(str_split(file, "window.location =")) REAL_WATERBASE_URL = unlist(str_split(file2[3],"'"))[2] # Check if data exists if(is.na(REAL_WATERBASE_URL)){ #Report to Log log = c(log,paste("The combination ",locatie," : ",substantie_char," does not exist!", sep = "")) }else{ #Download the data #manuele download #shell.exec(WATERBASE_data) #n <- readline(prompt="Enter anything to continue or Q to quit: ") #if(n == "Q"){ # stop(print("Quited execution")) #} download.file(REAL_WATERBASE_URL,destfile = file_location,mode = "w") } } } #check if file contains measurements else remove setwd(destination_data) files_to_check = list.files(destination_data) for(k in 1:length(files_to_check)){ file = readLines(files_to_check[k]) if(length(file) == 5){ # save files without data in log log = c(log,paste(files_to_check[k]," does not contain data!",sep = "")) # remove files without data file.remove(files_to_check[k]) }else{} } files_to_bind = list.files(destination_data) setwd(destination_data) #test m = 10; data = read.csv(files_to_bind[m], sep = ";", na.strings = "NA", skip = 3) for(m in 1:length(files_to_bind)){ ##length(files_to_bind) data = read.csv(files_to_bind[m], sep = ";", na.strings = "NA", skip = 3) if((m == 1)){ collected <- data } collected = rbind(data, collected) } write.csv2(collected, "collected-data.csv", row.names = F) #Evaluate script log warnings() print("Done.")