In these example we will show how to benefit from OPeNDAP subsetting.
We will request subsets from worldwide digital elevevation maps (DEMs) that are too big to request as a whole: resulting in approx. 500 MB (over 10,000 x 20,000 points) or 2 GB (over 20,000 x 40,000) for respectively a 1 minute or 30 second grids. The code below was made by Maarten Plieger from KNMI for an NMDC sprint session.
# OPeNDAP_subsetting_with_R_tutorial: Author: Maarten Plieger (KNMI), date: 2011-Oct-10 # $Date: 2011-10-14 15:47:25 +0200 (vr, 14 okt 2011) $ # $Revision: 5340 $ # $HeadURL: https://repos.deltares.nl/repos/OpenEarthTools/trunk/r/io/opendap/OPeNDAP_subsetting_with_R_tutorial.R $ # This document is also posted on a wiki: http://public.deltares.nl/display/OET/OPeNDAP+subsetting+with+R # R script to obtain data from opendap and convert it to a SpatialGridDataFrame # Install the required packages: # To install sp, use the command: install.packages("sp") # To install ncdf for linux, use the command: install.packages("ncdf") # To install ncdf for Windows, you need to install a precompiled version by: # Menu->Packages->Install Package(s) from local zip files and select ncdf.zip # Pre compiled versions for windows with opendap are currently experimental # Load the packages: library("sp") library("ncdf") getOpenDapURLAsSpatialGrid = function(opendapURL,variableName,bboxInDegrees){ print(paste("Loading opendapURL",opendapURL)); # Open the dataset dataset = open.ncdf(opendapURL) bbox=bboxInDegrees; # Get lon and lat variables, which are the dimensions of depth. For this specific dataset they have the names lon and lat G.x=get.var.ncdf(dataset,"lon") G.y=get.var.ncdf(dataset,"lat") # Make a selection of indices which fall in our subsetting window # E.g. translate degrees to indices of arrays. xindicesinwindow=which(G.x>bbox[1]&G.x<bbox[3]); xmin=min(xindicesinwindow) xmax=max(xindicesinwindow) xcount=(xmax-xmin)+1; # needs to be at least 1 yindicesinwindow=which(G.y>bbox[2]&G.y<bbox[4]); ymin=min(yindicesinwindow) ymax=max(yindicesinwindow) ycount=(ymax-ymin)+1;# needs to be at least 1 print(paste("Indices:",xmin,ymin,xmax,ymax));# <== print bbox in indices # Get the variable depth G.z=get.var.ncdf(dataset, variableName,start=c(xmin,ymin), count=c(xcount,ycount)); # Transpose this dataset, sometimes X and Y are swapped #G.z=t(G.z) # At the beginning we loaded the complete lat and lon variables # in order to find which indices belong in our subset window # In order to create a spatialdatagrid frame, we need to make the lat and lon variables # the same size as the requested matrix. E.g. The lat and lon (or y and x) needs to be subsetted: G.sx = G.x[xmin:xmax] G.sy = G.y[ymin:ymax] # Optionally create dims with equal cellsizes # This is sometimes needed because there can be small errors in the values of the x and y variables. makeCellsizesEqual=TRUE if(makeCellsizesEqual){ # Make cellsizes equal for X dimension cellsizex=(G.sx[length(G.sx)]-G.sx[1])/(length(G.sx)-1) tempX=(((1:length(G.sx))-1))*cellsizex+G.sx[1] G.sx=tempX # Make cellsizes equal for Y dimension cellsizey=(G.sy[length(G.sy)]-G.sy[1])/(length(G.sy)-1) tempY=(((1:length(G.sy))-1))*cellsizey+G.sy[1] G.sy=tempY } # We have now x, y, and z complete. In order to create a SpatialGridDataFrame # We need to make the shape of all variables the same # This means that the x and y variables also need to become a matrix. # Create a matrix of X values G.mx=rep(G.sx,dim(G.z)[2]) # Create a matrix field of Y values G.my=(as.vector(t(matrix(rep(G.sy,dim(G.z)[1]),nrow=dim(G.z)[2],ncol=dim(G.z)[1])))) # Make a dataframe of the X, Y and Z values myspatialgrid=data.frame(topo=as.vector(G.z),lon=G.mx,lat=G.my) # We have now gathered all information required to create a SpatialGridDataFrame object # Assign X and Y coordinates coordinates(myspatialgrid)=~lon+lat # Make a gridded dataset, previousely the object was just a bunch of points with XY coodinates gridded(myspatialgrid) = TRUE fullgrid(myspatialgrid) = TRUE # This can be converted to a SpatialGridDataFrame myspatialgrid = as(myspatialgrid, "SpatialGridDataFrame") # Optionally assign a projection string to this object attributes(myspatialgrid)$proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs <>") myspatialgrid; } # Set the bounding box window we want to subset for, in degrees # order: min-x, min-y, max-x, max-y bboxInDegrees=c(0,50,10,55) url_grid = array(); url_grid[1] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/etopo1_bed_g2'; url_grid[2] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/etopo2_v2c.nc'; url_grid[3] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/srtm30plus_v1.nc'; url_grid[4] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/srtm30plus_v6'; url_grid[5] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/smith_sandwell_v9.1.nc'; url_grid[6] = 'http://geoport.whoi.edu/thredds/dodsC/bathy/smith_sandwell_v11'; # Get the data, choose i=1 till 6 i=6; topogrid=getOpenDapURLAsSpatialGrid(url_grid[i] ,"topo",bboxInDegrees); print(paste("mean:",mean(topogrid$topo))); spplot(topogrid,at=c(-60:40,200),col.regions=bpy.colors,main=url_grid[i],xlab=paste("Mean: ",mean(topogrid$topo))) |
Download the code of this R example (repos,manual download)
Example for transect data, thanx to student Afra Asjes (VU).
##Constructing dataframe from netcdf file in R #Open ncdf4(only works on 32-bit) library(ncdf4) #Download netcdf file url_grid<-"http://opendap.deltares.nl/thredds/fileServer/opendap/rijkswaterstaat/jarkus/grids/jarkusKB134_1110.nc" download.file(url_grid, "jarkusKB134_1110.nc", method = "auto",quiet = FALSE, mode="wb", cacheOK = TRUE) #Open file and check variables ncin<-nc_open("jarkusKB134_1110.nc") print(ncin) #Get variables from netcdf file lon <- ncvar_get(ncin, "lon") nlon <- dim(lon) head(lon) lon.a<-as.vector(lon) lat <- ncvar_get(ncin, "lat", verbose = F) nlat <- dim(lat) head(lat) lat.a<-as.vector(lat) print(c(nlon,nlat)) t<-ncvar_get(ncin,"time") time<-as.vector(t) altitude<-ncvar_get(ncin,"z") dim(altitude) #Select part of the variable of interest, here altitude m <- 1 altitude.slice <- altitude[, , m] altitude.vec<-as.vector(altitude.slice) #Combine into data frame altitude.data<-data.frame(cbind(time,lon.a,lat.a,altitude)) names(altitude.data)<-c("time","lon","lat","altitude") head(altitude.data) #Altitude data contains missing values; delete those rows altitude.data<-altitude.data[!is.na(altitude.data$altitude),] head(altitude.data) #Save as CSV write.csv(altitude.data,file="jarkusKB134_1110.csv",row.names=FALSE) |
See also: OPeNDAP access with R, OPeNDAP subsetting with Matlab,OPeNDAP subsetting with python, PostgreSQL access with R