/* ================================================================
* Delft FEWS
* ================================================================
*
* Project Info: http://www.wldelft.nl/soft/fews/index.html
* Project Lead: Karel Heynert (karel.heynert@wldelft.nl)
*
* (C) Copyright 2003, by WL | Delft Hydraulics
* P.O. Box 177
* 2600 MH Delft
* The Netherlands
* http://www.wldelft.nl
*
* DELFT-FEWS is a sophisticated collection of modules designed
* for building a FEWS customised to the specific requirements
* of individual agencies. An open modelling approach allows users
* to add their own modules in an efficient way.
*
* ----------------------------------------------------------------
* ShefTimeSeriesParser
* ----------------------------------------------------------------
* (C) Copyright 2003, by WL | Delft Hydraulics
*
* Original Author: Arjen Markus
*/
package nl.wldelft.fews.system.plugin.dataImport;
import nl.wldelft.util.ExceptionUtils;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.io.FileParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.RelativeEquidistantTimeStep;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import org.apache.log4j.Logger;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.regex.Pattern;
/**
* TimeSeries parser for SHEF version 2.0. Note: only the .E and .A messages are implemented and
* http://www.nws.noaa.gov/os/hod/SHManual/SHMan051_shef.htm
* http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf
*/
public class ShefTimeSeriesParser implements FileParser<TimeSeriesContentHandler> {
private static final Logger log = Logger.getLogger(ShefTimeSeriesParser.class);
public static final String readerType = "SHEF";
private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE = ".A";
private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE = ".AR";
private static final Pattern COMPILE_PATTERN_ER_AR = Pattern.compile("\\.ER?$|.AR?$");
private static final Pattern COMPILE_PATTERN_ER = Pattern.compile("\\.ER?$");
private static final Pattern COMPILE_PATTERN_ER_D_AR_D = Pattern.compile("\\.ER?\\d+|\\.AR?\\d+");
private static final Pattern COMPILE_PATTERN_D_STAR = Pattern.compile("DS.*|DN.*|DH.*|DD.*|DM.*|DY.*|DJ.*|DR.*");
private static final Pattern COMPILE_PATTERN_D = Pattern.compile("D.*|");
private static final Pattern COMPILE_PATTERN_DI = Pattern.compile("DI.*");
private static final Pattern COMPILE_PATTERN_DH_DD = Pattern.compile("DH\\d\\d.*");
private static final Pattern COMPILE_PATTERN_START_DIGIT = Pattern.compile("[^0-9]");
private static final Pattern COMPILE_PATTERN_DIH = Pattern.compile(".*DIH.*");
private static final Pattern COMPILE_PATTERN_DIN = Pattern.compile(".*DIN.*");
private static final Pattern COMPILE_PATTERN_DID = Pattern.compile(".*DID.*");
private char quoteChar = '\"';
private Calendar calendar = new GregorianCalendar();
private TimeSeriesContentHandler contentHandler = null;
// Variables for parsing the file
private long time = 0;
private long dtime = 0;
private boolean separatorOnLastLine = false;
private static final int E_TYPE = 0;
private static final int A_TYPE = 1;
private int messageType = 0;
// string array contains all fields before the first / separator
private int firstSlashSeparatorIdx = 0;
@Override
public void parse(File file, TimeSeriesContentHandler contentHandler) throws IOException {
this.contentHandler = contentHandler;
calendar.setTimeZone(this.contentHandler.getDefaultTimeZone());
boolean isValid = readFile(file);
if (!isValid) {
throw new IOException("Error parsing: " + file.getName());
}
}
private boolean readFile(File file) {
BufferedReader reader = null;
boolean done = false;
//Open file
try {
//noinspection resource
reader = new BufferedReader(new FileReader(file));
done = true;
} catch (FileNotFoundException e) {
log.error(file + " could not be opened.", e);
}
//Read/parse the file
if (done) {
try {
if (!parseFile(reader)) {
done = false;
log.error("The file " + file + " has unknown format.");
}
} catch (IOException e) {
done = false;
log.error("Error while reading the file " +
file + " : " + ExceptionUtils.getMessage(e), e);
}
closeReader(reader);
}
return done;
}
private static void closeReader(BufferedReader reader) {
try {
reader.close();
} catch (IOException e) {
log.error("Cannot close file " + reader +
" : " + ExceptionUtils.getMessage(e), e);
}
}
/**
* Read file content and store it into the memory
* Comments on the SHEF file format:
* - The method recognises .ER and .Ed lines only (d=digit)
* - All other lines are ignored at the moment
* <p/>
* Note:
* We assume that the fields are separated by spaces and that the
* .ER and .E records do not contain timeseries data!
*
* @return
* @throws IOException
*/
private boolean parseFile(BufferedReader reader) throws IOException {
boolean okay = true;
String line;
while ((line = reader.readLine()) != null && okay) {
StringBuilder commentFreeLine = removeCommentsFromLine(line);
String[] pieces = TextUtils.split(commentFreeLine.toString(), ':', '\0', quoteChar, false);
//noinspection UnusedAssignment
String[] fields = TextUtils.split(pieces[0], ' ', '\0', quoteChar, true);
// get fields, first part is split by space next part by '/'
// first split with separator '/' will split up line in first part containing spaces (position part)
// followed by the datastring fields (separated by '/')
String[] tmpPieces = pieces[0].split("/");
String[] positionFields = TextUtils.split(tmpPieces[0], ' ', '\"');
firstSlashSeparatorIdx = positionFields.length;
fields = new String[tmpPieces.length + positionFields.length - 1];
System.arraycopy(positionFields, 0, fields, 0, positionFields.length);
System.arraycopy(tmpPieces, 1, fields, positionFields.length, tmpPieces.length - 1);
//Header lines start with .E or .ER (but may comtain data)
//Data-only lines start with .Ed or .ERd - d a digit
if (fields.length > 0) {
if (COMPILE_PATTERN_ER_AR.matcher(fields[0]).matches()) {
this.messageType = COMPILE_PATTERN_ER.matcher(fields[0]).matches() ? E_TYPE : A_TYPE;
// bug in OHD output for .A messages. missing '/' slash between parameter id and
// value. Remove this when fixed
if (this.messageType == A_TYPE) {
String[] splitfields = TextUtils.split(fields[fields.length - 1], ' ', '\"');
if (splitfields.length > 1) {
String[] tmpFields = new String[fields.length + 1];
System.arraycopy(fields, 0, tmpFields, 0, fields.length - 1);
System.arraycopy(splitfields, 0, tmpFields, fields.length - 1, 2);
fields = tmpFields;
}
}
if (SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE.equals(fields[0])) {
// no headers all data is on one line with multiple parameters.
parseMultiParameterSeriesData(fields);
} else {
// .AR type. Revision on earlier measurement. Assumption is that only one parameter at a time is passed.
okay = getSeriesParameters(fields);
if (okay) {
// see if there are any values on this row, if so start fill series data
// because parameter and timestep are mandatory values can be started
// from firstSlashSeparator untill end of fields
if (fields.length == (firstSlashSeparatorIdx + 2) && SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE.equals(fields[0])) {
String value = fields[fields.length - 1];
okay = getContinuationSeriesData(value);
}
if (fields.length > firstSlashSeparatorIdx + 2) {
for (int i = firstSlashSeparatorIdx + 2; i < fields.length; i++) {
if (isFloat(fields[i])) {
String[] values = new String[fields.length - i];
System.arraycopy(fields, i, values, 0, fields.length - i);
okay = getSeriesData(values);
break;
}
}
}
}
}
} else if (COMPILE_PATTERN_ER_D_AR_D.matcher(fields[0]).matches()) {
// continued line
String[] datafields;
if (positionFields.length == 1 && separatorOnLastLine) {
// slash between rowcontinuation and first value. If previous line ended with a slash
// a null value is assumed
fields[0] = null;
datafields = fields;
} else {
// skip first column with normal linecontinuation
datafields = new String[fields.length - 1];
System.arraycopy(fields, 1, datafields, 0, fields.length - 1);
}
okay = getSeriesData(datafields);
}
separatorOnLastLine = line.endsWith("/");
}
}
return okay;
}
private StringBuilder removeCommentsFromLine(String line) {
//Split the line into separate fields:
//Remove any comment first
String[] piecesWithComments = TextUtils.split(line, ':', ':', quoteChar, true);
StringBuilder commentFreeLine = new StringBuilder(line.length());
if (!line.contains(":")) {
commentFreeLine.append(line);
} else {
int togglePosition = 0;
if (line.startsWith(":")) {
togglePosition = 1; // if the line starts with a : , the first entry should be skipped.
}
if (piecesWithComments.length > 0) {
// we found some comments.
for (int i = 0; i < piecesWithComments.length; i++) {
if (i % 2 == togglePosition) {
// the comment toggle is off.
// see: http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf
commentFreeLine.append(piecesWithComments[i]);
}
}
}
}
return commentFreeLine;
}
private static boolean isFloat(String value) {
try {
//noinspection UnusedDeclaration
Float f = TextUtils.parseFloat(value);
} catch (NumberFormatException e) {
return false;
}
return true;
}
private void parseMultiParameterSeriesData(String[] fields) {
// .A ANAW1 20170215 P DH2400 /DH08 /HGIRX 8.37 /QRIRX 41.69
// Couting fields from 0:
// Field 1 is the name of the location
// Field 2 is the date (possibly without a year)
// Skip all /D parts.
// parameter code 1
// parameter value 1
// ..
// parameter code N
// parameter value N
// get location id
String locationId = fields[1];
String date = fields[2];
String observationTime = "";
// get time, as a combination of date, [observation time]
// get observation time if exist
int ifield = 3;
for (; ifield < fields.length; ifield++) {
if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) {
observationTime = fields[ifield];
break;
}
}
// The start date/time, and the time step:
time = parseDate(date, observationTime);
DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader();
timeSeriesHeader.setLocationId(locationId);
ifield = firstSlashSeparatorIdx;
// get parameter id, i.e. the first field without a D in prefix
for (; ifield < fields.length; ifield++) {
if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) {
break;
// Index of first parameter was found.
}
}
List<String> parameterValueList = new ArrayList<>();
for (int i = ifield; i < fields.length; i++) {
if (fields[i] == null) continue;
String[] result = fields[i].split(" ");
for (int j = 0; j < result.length; j++) {
parameterValueList.add(result[j]);
}
}
if (parameterValueList.size() % 2 != 0) {
log.warn("SHEF import line of type .A (single station, multiple parameters) doesn't contain a consistent number of parameters and values. Skipping line");
return;
}
for (int i = 0; i< parameterValueList.size() / 2; i++) {
String paramId = parameterValueList.get(i*2);
float value = parseValue(parameterValueList.get(i*2 + 1));
timeSeriesHeader.setParameterId(paramId);
contentHandler.setTimeSeriesHeader(timeSeriesHeader);
contentHandler.setTime(time);
contentHandler.setValue(value);
contentHandler.applyCurrentFields();
}
}
private boolean getSeriesParameters(String[] fields) {
// Couting fields from 0:
// Field 1 is the name of the location
// Field 2 is the date (possibly without a year)
// Field 3 is the timezone (optional!)
// observation time (optional)
// creation date (optional)
// units code (optional)
// Data string qualifier (optional)
// Duration code (optional)
// parameter code
// the time interval (only for .E messages)
// get location id
String locationId = fields[1];
String date = fields[2];
String observationTime = "";
// get time, as a combination of date, [observation time]
// get observation time if exist
int ifield = 3;
for (; ifield < fields.length; ifield++) {
if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) {
observationTime = fields[ifield];
break;
}
}
// The start date/time, and the time step:
time = parseDate(date, observationTime);
ifield = firstSlashSeparatorIdx;
// get parameter id, i.e. the first field without a D in prefix
String parameterId = null;
for (; ifield < fields.length; ifield++) {
if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) {
parameterId = fields[ifield];
break;
}
}
// get timestep field, mandatory only for .E messages
for (; ifield < fields.length; ifield++) {
if (COMPILE_PATTERN_DI.matcher(fields[ifield]).matches()) {
String timestep = fields[ifield];
dtime = parseTimeStep(timestep);
break;
}
}
DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader();
timeSeriesHeader.setLocationId(locationId);
timeSeriesHeader.setParameterId(parameterId);
if (this.messageType == E_TYPE) {
timeSeriesHeader.setTimeStep(RelativeEquidistantTimeStep.getInstance(dtime, time));
timeSeriesHeader.setForecastTime(time);
}
contentHandler.setTimeSeriesHeader(timeSeriesHeader);
if (this.messageType == E_TYPE) {
return time != 0 && dtime != 0; //AM: error conditions?
} else {
return time != 0;
}
}
private boolean getSeriesData(String[] fields) {
for (int i = 0; i < fields.length; i++) {
float value = parseValue(fields[i]);
contentHandler.setTime(time);
contentHandler.setValue(value);
contentHandler.applyCurrentFields();
time += dtime;
}
return true;
}
private boolean getContinuationSeriesData(String valueString) {
float value = parseValue(valueString);
contentHandler.setTime(time);
contentHandler.setValue(value);
contentHandler.applyCurrentFields();
return true;
}
private long parseDate(String str, String timestr) {
int year;
int month;
int day;
if (str.length() == 8) {
year = Integer.parseInt(str.substring(0, 4));
month = Integer.parseInt(str.substring(4, 6));
day = Integer.parseInt(str.substring(6, 8));
} else {
if (str.length() == 6) {
year = 100 * (calendar.get(Calendar.YEAR) / 100) + Integer.parseInt(str.substring(0, 2));
month = Integer.parseInt(str.substring(2, 4));
day = Integer.parseInt(str.substring(4, 6));
} else {
// only only month and day are given according to shef 2.0 spec take the last 12 months
// before current and take the year that matches the month day. i.e. check if date is in future
// with current year, if so take last year
year = calendar.get(Calendar.YEAR);
month = Integer.parseInt(str.substring(0, 2));
day = Integer.parseInt(str.substring(2, 4));
}
if (month > calendar.get(Calendar.MONTH) + 5) {
year--;
}
if (month < calendar.get(Calendar.MONTH) - 6) {
year++;
}
}
// The time string (actually a composite thing)
// We expect something like: DH12/... If not, ignore this field
// -- AM: TODO!
int hour = 0;
int minute = 0;
int second = 0;
if (COMPILE_PATTERN_DH_DD.matcher(timestr).matches()) {
hour = Integer.parseInt(timestr.substring(2, 4));
if (timestr.length() == 6) {
minute = TextUtils.tryParseInt(timestr.substring(4, 6), 0);
}
if (timestr.length() == 8) {
second = TextUtils.tryParseInt(timestr.substring(6, 8), 0);
}
}
calendar.clear();
//noinspection MagicConstant
calendar.set(year, month - 1, day, hour, minute, second); /* Correct for the offset: month numbers start at 0*/
return calendar.getTimeInMillis();
}
private static long parseTimeStep(String str) {
long scale;
long value = Long.parseLong(COMPILE_PATTERN_START_DIGIT.matcher(str).replaceAll(""));
if (COMPILE_PATTERN_DIH.matcher(str).matches()) {
scale = 3600 * 1000;
} else if (COMPILE_PATTERN_DIN.matcher(str).matches()) {
scale = 60 * 1000;
} else if (COMPILE_PATTERN_DID.matcher(str).matches()) {
scale = 86400 * 1000;
} else {
scale = 0;
}
return scale * value;
}
private static float parseValue(String valueText) {
float value = Float.NaN;
if (valueText != null) {
valueText = valueText.trim();
if (!valueText.isEmpty()) {
// Missing value can be marked +, -, m, mm, M, MM, -9999
if (!(valueText.equalsIgnoreCase("M") || valueText.equalsIgnoreCase("MM") || valueText.equalsIgnoreCase("-") ||
valueText.equalsIgnoreCase("+") || valueText.equalsIgnoreCase("-9999"))) {
try {
value = TextUtils.parseFloat(valueText);
} catch (NumberFormatException e) {
// TODO : According to the specs of SHEF:
// If no legitimate value is found, the value is treated as a null field or no report.
// So we should return missingValue
value = Float.NaN;
}
}
}
}
return value;
}
}
|