package nl.wldelft.fews.system.plugin.dataImport; import nl.wldelft.util.ExceptionUtils; import nl.wldelft.util.TextUtils; import nl.wldelft.util.io.FileParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.RelativeEquidistantTimeStep; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import org.apache.log4j.Logger; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.GregorianCalendar; import java.util.List; import java.util.regex.Pattern; /** * TimeSeries parser for SHEF version 2.0. Note: only the .E and .A messages are implemented and * http://www.nws.noaa.gov/os/hod/SHManual/SHMan051_shef.htm * http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf */ public class ShefTimeSeriesParser implements FileParser<TimeSeriesContentHandler> { private static final Logger log = Logger.getLogger(ShefTimeSeriesParser.class); public static final String readerType = "SHEF"; private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE = ".A"; private static final Pattern COMPILE_PATTERN_A_CONTINUATION = Pattern.compile(".A\\d+"); // Start with a .A followed by one or more digits. private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE = ".AR"; private static final Pattern COMPILE_PATTERN_ER_AR = Pattern.compile("\\.ER?$|.AR?$"); private static final Pattern COMPILE_PATTERN_ER = Pattern.compile("\\.ER?$"); private static final Pattern COMPILE_PATTERN_ER_D_AR_D = Pattern.compile("\\.ER?\\d+|\\.AR?\\d+"); private static final Pattern COMPILE_PATTERN_D_STAR = Pattern.compile("DS.*|DN.*|DH.*|DD.*|DM.*|DY.*|DJ.*|DR.*"); private static final Pattern COMPILE_PATTERN_D = Pattern.compile("D.*|"); private static final Pattern COMPILE_PATTERN_DI = Pattern.compile("DI.*"); private static final Pattern COMPILE_PATTERN_DH_DD = Pattern.compile("DH\\d\\d.*"); private static final Pattern COMPILE_PATTERN_START_DIGIT = Pattern.compile("[^0-9]"); private static final Pattern COMPILE_PATTERN_DIH = Pattern.compile(".*DIH.*"); private static final Pattern COMPILE_PATTERN_DIN = Pattern.compile(".*DIN.*"); private static final Pattern COMPILE_PATTERN_DID = Pattern.compile(".*DID.*"); private char quoteChar = '\"'; private Calendar calendar = new GregorianCalendar(); private TimeSeriesContentHandler contentHandler = null; // Variables for parsing the file private long time = 0; private long dtime = 0; private boolean separatorOnLastLine = false; private static final int E_TYPE = 0; private static final int A_TYPE = 1; private int messageType = 0; private String aContinuationLocationId = null; // string array contains all fields before the first / separator private int firstSlashSeparatorIdx = 0; @Override public void parse(File file, TimeSeriesContentHandler contentHandler) throws IOException { this.contentHandler = contentHandler; calendar.setTimeZone(this.contentHandler.getDefaultTimeZone()); boolean isValid = readFile(file); if (!isValid) { throw new IOException("Error parsing: " + file.getName()); } } private boolean readFile(File file) { BufferedReader reader = null; boolean done = false; //Open file try { //noinspection resource reader = new BufferedReader(new FileReader(file)); done = true; } catch (FileNotFoundException e) { log.error(file + " could not be opened.", e); } //Read/parse the file if (done) { try { if (!parseFile(reader)) { done = false; log.error("The file " + file + " has unknown format."); } } catch (IOException e) { done = false; log.error("Error while reading the file " + file + " : " + ExceptionUtils.getMessage(e), e); } closeReader(reader); } return done; } private static void closeReader(BufferedReader reader) { try { reader.close(); } catch (IOException e) { log.error("Cannot close file " + reader + " : " + ExceptionUtils.getMessage(e), e); } } /** * Read file content and store it into the memory * Comments on the SHEF file format: * - The method recognises .ER and .Ed lines only (d=digit) * - All other lines are ignored at the moment * <p/> * Note: * We assume that the fields are separated by spaces and that the * .ER and .E records do not contain timeseries data! * * @return * @throws IOException */ private boolean parseFile(BufferedReader reader) throws IOException { boolean okay = true; String line; while ((line = reader.readLine()) != null && okay) { StringBuilder commentFreeLine = removeCommentsFromLine(line); String[] pieces = TextUtils.split(commentFreeLine.toString(), ':', '\0', quoteChar, false); //noinspection UnusedAssignment String[] fields = TextUtils.split(pieces[0], ' ', '\0', quoteChar, true); // get fields, first part is split by space next part by '/' // first split with separator '/' will split up line in first part containing spaces (position part) // followed by the datastring fields (separated by '/') String[] tmpPieces = pieces[0].split("/"); String[] positionFields = TextUtils.split(tmpPieces[0], ' ', '\"'); firstSlashSeparatorIdx = positionFields.length; fields = new String[tmpPieces.length + positionFields.length - 1]; System.arraycopy(positionFields, 0, fields, 0, positionFields.length); System.arraycopy(tmpPieces, 1, fields, positionFields.length, tmpPieces.length - 1); //Header lines start with .E or .ER (but may comtain data) //Data-only lines start with .Ed or .ERd - d a digit if (fields.length > 0) { if (COMPILE_PATTERN_A_CONTINUATION.matcher(fields[0]).matches()) { parseMultiParameterContinuationSeriesData(fields); } else if (COMPILE_PATTERN_ER_AR.matcher(fields[0]).matches()) { this.messageType = COMPILE_PATTERN_ER.matcher(fields[0]).matches() ? E_TYPE : A_TYPE; // bug in OHD output for .A messages. missing '/' slash between parameter id and // value. Remove this when fixed if (this.messageType == A_TYPE) { String[] splitfields = TextUtils.split(fields[fields.length - 1], ' ', '\"'); if (splitfields.length > 1) { String[] tmpFields = new String[fields.length + 1]; System.arraycopy(fields, 0, tmpFields, 0, fields.length - 1); System.arraycopy(splitfields, 0, tmpFields, fields.length - 1, 2); fields = tmpFields; } } if (SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE.equals(fields[0])) { // no headers all data is on one line with multiple parameters. parseMultiParameterSeriesData(fields); } else { // .AR type. Revision on earlier measurement. Assumption is that only one parameter at a time is passed. okay = getSeriesParameters(fields); if (okay) { // see if there are any values on this row, if so start fill series data // because parameter and timestep are mandatory values can be started // from firstSlashSeparator untill end of fields if (fields.length == (firstSlashSeparatorIdx + 2) && SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE.equals(fields[0])) { String value = fields[fields.length - 1]; okay = getContinuationSeriesData(value); } if (fields.length > firstSlashSeparatorIdx + 2) { for (int i = firstSlashSeparatorIdx + 2; i < fields.length; i++) { if (isFloat(fields[i])) { String[] values = new String[fields.length - i]; System.arraycopy(fields, i, values, 0, fields.length - i); okay = getSeriesData(values); break; } } } } } } else if (COMPILE_PATTERN_ER_D_AR_D.matcher(fields[0]).matches()) { // continued line String[] datafields; if (positionFields.length == 1 && separatorOnLastLine) { // slash between rowcontinuation and first value. If previous line ended with a slash // a null value is assumed fields[0] = null; datafields = fields; } else { // skip first column with normal linecontinuation datafields = new String[fields.length - 1]; System.arraycopy(fields, 1, datafields, 0, fields.length - 1); } okay = getSeriesData(datafields); } separatorOnLastLine = line.endsWith("/"); } } return okay; } private StringBuilder removeCommentsFromLine(String line) { //Split the line into separate fields: //Remove any comment first String[] piecesWithComments = TextUtils.split(line, ':', ':', quoteChar, true); StringBuilder commentFreeLine = new StringBuilder(line.length()); if (!line.contains(":")) { commentFreeLine.append(line); } else { int togglePosition = 0; if (line.startsWith(":")) { togglePosition = 1; // if the line starts with a : , the first entry should be skipped. } if (piecesWithComments.length > 0) { // we found some comments. for (int i = 0; i < piecesWithComments.length; i++) { if (i % 2 == togglePosition) { // the comment toggle is off. // see: http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf commentFreeLine.append(piecesWithComments[i]); } } } } return commentFreeLine; } private static boolean isFloat(String value) { try { //noinspection UnusedDeclaration Float f = TextUtils.parseFloat(value); } catch (NumberFormatException e) { return false; } return true; } // Continuation of .A field with multiple parameters per line. private void parseMultiParameterContinuationSeriesData(String[] fields) { DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(aContinuationLocationId); List<String> parameterValueList = new ArrayList<>(); for (int i = 1; i < fields.length; i++) { if (fields[i] == null) continue; String[] result = fields[i].split(" "); for (int j = 0; j < result.length; j++) { parameterValueList.add(result[j]); } } writeMultipleParametersSeries(timeSeriesHeader, parameterValueList); } private void writeMultipleParametersSeries(DefaultTimeSeriesHeader timeSeriesHeader, List<String> parameterValueList) { if (parameterValueList.size() % 2 != 0) { if (!parameterValueList.isEmpty()) { // Check on special symbols like DC (date creation). String code = parameterValueList.get(0); if (code.startsWith("DC")) return; // Creation date. Can be ignored. } log.warn("SHEF import line of type .A[0-9]* (single station, multiple parameters) doesn't contain a consistent number of parameters and values. Skipping line"); return; } for (int i = 0; i < parameterValueList.size() / 2; i++) { String paramId = parameterValueList.get(i * 2); float value = parseValue(parameterValueList.get(i * 2 + 1)); timeSeriesHeader.setParameterId(paramId); contentHandler.setTimeSeriesHeader(timeSeriesHeader); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); } } private void parseMultiParameterSeriesData(String[] fields) { // .A ANAW1 20170215 P DH2400 /DH08 /HGIRX 8.37 /QRIRX 41.69 // Couting fields from 0: // Field 1 is the name of the location // Field 2 is the date (possibly without a year) // Skip all /D parts. // parameter code 1 // parameter value 1 // .. // parameter code N // parameter value N // get location id String locationId = fields[1]; String date = fields[2]; String observationTime = ""; // get time, as a combination of date, [observation time] // get observation time if exist int ifield = 3; for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { observationTime = fields[ifield]; break; } } // The start date/time, and the time step: time = parseDate(date, observationTime); aContinuationLocationId =locationId; // Keep location id in case .A continutations are found. DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(locationId); ifield = firstSlashSeparatorIdx; // get parameter id, i.e. the first field without a D in prefix for (; ifield < fields.length; ifield++) { if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) { break; // Index of first parameter was found. } } List<String> parameterValueList = new ArrayList<>(); for (int i = ifield; i < fields.length; i++) { if (fields[i] == null) continue; String[] result = fields[i].split(" "); for (int j = 0; j < result.length; j++) { parameterValueList.add(result[j]); } } writeMultipleParametersSeries(timeSeriesHeader, parameterValueList); } private boolean getSeriesParameters(String[] fields) { // Couting fields from 0: // Field 1 is the name of the location // Field 2 is the date (possibly without a year) // Field 3 is the timezone (optional!) // observation time (optional) // creation date (optional) // units code (optional) // Data string qualifier (optional) // Duration code (optional) // parameter code // the time interval (only for .E messages) // get location id String locationId = fields[1]; String date = fields[2]; String observationTime = ""; // get time, as a combination of date, [observation time] // get observation time if exist int ifield = 3; for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { observationTime = fields[ifield]; break; } } // The start date/time, and the time step: time = parseDate(date, observationTime); ifield = firstSlashSeparatorIdx; // get parameter id, i.e. the first field without a D in prefix String parameterId = null; for (; ifield < fields.length; ifield++) { if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) { parameterId = fields[ifield]; break; } } // get timestep field, mandatory only for .E messages for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_DI.matcher(fields[ifield]).matches()) { String timestep = fields[ifield]; dtime = parseTimeStep(timestep); break; } } DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(locationId); timeSeriesHeader.setParameterId(parameterId); if (this.messageType == E_TYPE) { timeSeriesHeader.setTimeStep(RelativeEquidistantTimeStep.getInstance(dtime, time)); timeSeriesHeader.setForecastTime(time); } contentHandler.setTimeSeriesHeader(timeSeriesHeader); if (this.messageType == E_TYPE) { return time != 0 && dtime != 0; //AM: error conditions? } else { return time != 0; } } private boolean getSeriesData(String[] fields) { for (int i = 0; i < fields.length; i++) { float value = parseValue(fields[i]); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); time += dtime; } return true; } private boolean getContinuationSeriesData(String valueString) { float value = parseValue(valueString); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); return true; } private long parseDate(String str, String timestr) { int year; int month; int day; if (str.length() == 8) { year = Integer.parseInt(str.substring(0, 4)); month = Integer.parseInt(str.substring(4, 6)); day = Integer.parseInt(str.substring(6, 8)); } else { if (str.length() == 6) { year = 100 * (calendar.get(Calendar.YEAR) / 100) + Integer.parseInt(str.substring(0, 2)); month = Integer.parseInt(str.substring(2, 4)); day = Integer.parseInt(str.substring(4, 6)); } else { // only only month and day are given according to shef 2.0 spec take the last 12 months // before current and take the year that matches the month day. i.e. check if date is in future // with current year, if so take last year year = calendar.get(Calendar.YEAR); month = Integer.parseInt(str.substring(0, 2)); day = Integer.parseInt(str.substring(2, 4)); } if (month > calendar.get(Calendar.MONTH) + 5) { year--; } if (month < calendar.get(Calendar.MONTH) - 6) { year++; } } // The time string (actually a composite thing) // We expect something like: DH12/... If not, ignore this field // -- AM: TODO! int hour = 0; int minute = 0; int second = 0; if (COMPILE_PATTERN_DH_DD.matcher(timestr).matches()) { hour = Integer.parseInt(timestr.substring(2, 4)); if (timestr.length() == 6) { minute = TextUtils.tryParseInt(timestr.substring(4, 6), 0); } if (timestr.length() == 8) { second = TextUtils.tryParseInt(timestr.substring(6, 8), 0); } } calendar.clear(); //noinspection MagicConstant calendar.set(year, month - 1, day, hour, minute, second); /* Correct for the offset: month numbers start at 0*/ return calendar.getTimeInMillis(); } private static long parseTimeStep(String str) { long scale; long value = Long.parseLong(COMPILE_PATTERN_START_DIGIT.matcher(str).replaceAll("")); if (COMPILE_PATTERN_DIH.matcher(str).matches()) { scale = 3600 * 1000; } else if (COMPILE_PATTERN_DIN.matcher(str).matches()) { scale = 60 * 1000; } else if (COMPILE_PATTERN_DID.matcher(str).matches()) { scale = 86400 * 1000; } else { scale = 0; } return scale * value; } private static float parseValue(String valueText) { float value = Float.NaN; if (valueText != null) { valueText = valueText.trim(); if (!valueText.isEmpty()) { // Missing value can be marked +, -, m, mm, M, MM, -9999 if (!(valueText.equalsIgnoreCase("M") || valueText.equalsIgnoreCase("MM") || valueText.equalsIgnoreCase("-") || valueText.equalsIgnoreCase("+") || valueText.equalsIgnoreCase("-9999"))) { try { value = TextUtils.parseFloat(valueText); } catch (NumberFormatException e) { // TODO : According to the specs of SHEF: // If no legitimate value is found, the value is treated as a null field or no report. // So we should return missingValue value = Float.NaN; } } } } return value; } }