package nl.wldelft.fews.system.plugin.dataImport; import nl.wldelft.util.ExceptionUtils; import nl.wldelft.util.TextUtils; import nl.wldelft.util.io.FileParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.RelativeEquidistantTimeStep; import nl.wldelft.util.timeseries.SimpleEquidistantTimeStep; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import nl.wldelft.util.timeseries.TimeStep; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.GregorianCalendar; import java.util.List; import java.util.regex.Pattern; /** * TimeSeries parser for SHEF version 2.0. * http://www.nws.noaa.gov/os/hod/SHManual/SHMan051_shef.htm * http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf */ public class ShefTimeSeriesParser implements FileParser<TimeSeriesContentHandler> { private static final Logger log = LogManager.getLogger(); public static final String readerType = "SHEF"; private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE = ".A"; private static final String END_TOKEN_TYPE = ".END"; private static final Pattern COMPILE_PATTERN_B_HEADER = Pattern.compile(".B|.BR"); // B type record private static final Pattern COMPILE_PATTERN_A_CONTINUATION = Pattern.compile(".A\\d+"); // Start with a .A followed by one or more digits. private static final Pattern COMPILE_PATTERN_B_CONTINUATION = Pattern.compile(".B\\d+|.BR\\d+"); // Start with a .B followed by one or more digits. private static final String SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE = ".AR"; private static final Pattern COMPILE_PATTERN_ER_AR = Pattern.compile("\\.ER?$|.AR?$"); private static final Pattern COMPILE_PATTERN_ER = Pattern.compile("\\.ER?$"); private static final Pattern COMPILE_PATTERN_ER_D_AR_D = Pattern.compile("\\.ER?\\d+|\\.AR?\\d+"); private static final Pattern COMPILE_PATTERN_D_STAR = Pattern.compile("DS.*|DN.*|DH.*|DD.*|DM.*|DY.*|DJ.*|DR.*"); private static final Pattern COMPILE_PATTERN_D = Pattern.compile("D.*|"); private static final Pattern COMPILE_PATTERN_DI = Pattern.compile("DI.*"); private static final Pattern COMPILE_PATTERN_DH_DD = Pattern.compile("DH\\d\\d.*"); private static final Pattern COMPILE_PATTERN_START_DIGIT = Pattern.compile("[^0-9]"); private static final Pattern COMPILE_PATTERN_DIH = Pattern.compile(".*DIH.*"); private static final Pattern COMPILE_PATTERN_DIN = Pattern.compile(".*DIN.*"); private static final Pattern COMPILE_PATTERN_DID = Pattern.compile(".*DID.*"); private static final Pattern COMPILE_PATTERN_DR_SHIFT = Pattern.compile("DRS*[+-]\\d+|DRN*[+-]\\d+|DRH*[+-]\\d+|DRD*[+-]\\d+|DRM*[+-]\\d+|DRD*[+-]\\d+"); private char quoteChar = '\"'; private Calendar calendar = new GregorianCalendar(); private TimeSeriesContentHandler contentHandler = null; // Variables for parsing the file private long time = 0; private long dtime = 0; private boolean separatorOnLastLine = false; private static final int E_TYPE = 0; private static final int A_TYPE = 1; private int messageType = 0; private String aContinuationLocationId = null; // string array contains all fields before the first / separator private int firstSlashSeparatorIdx = 0; @Override public void parse(File file, TimeSeriesContentHandler contentHandler) throws Exception { this.contentHandler = contentHandler; calendar.setTimeZone(this.contentHandler.getDefaultTimeZone()); boolean isValid = readFile(file); if (!isValid) { throw new IOException("Error parsing: " + file.getName()); } } private boolean readFile(File file) throws Exception { BufferedReader reader = null; boolean done = false; //Open file try { //noinspection resource reader = new BufferedReader(new FileReader(file)); done = true; } catch (FileNotFoundException e) { log.error(file + " could not be opened.", e); } //Read/parse the file if (done) { try { if (!parseFile(reader)) { done = false; log.error("The file " + file + " has unknown format."); } } catch (IOException e) { done = false; log.error("Error while reading the file " + file + " : " + ExceptionUtils.getMessage(e), e); } closeReader(reader); } return done; } private static void closeReader(BufferedReader reader) { try { reader.close(); } catch (IOException e) { log.error("Cannot close file " + reader + " : " + ExceptionUtils.getMessage(e), e); } } /** * Read file content and store it into the memory * Comments on the SHEF file format: * - The method recognises .ER and .Ed lines only (d=digit) * - All other lines are ignored at the moment * <p/> * Note: * We assume that the fields are separated by spaces and that the * .ER and .E records do not contain timeseries data! * * @return * @throws IOException */ private boolean parseFile(BufferedReader reader) throws Exception { boolean okay = true; String line; while ((line = reader.readLine()) != null && okay) { StringBuilder commentFreeLine = removeCommentsFromLine(line); String[] pieces = TextUtils.split(commentFreeLine.toString(), ':', '\0', quoteChar, false); //noinspection UnusedAssignment String[] fields = TextUtils.split(pieces[0], ' ', '\0', quoteChar, true); // get fields, first part is split by space next part by '/' // first split with separator '/' will split up line in first part containing spaces (position part) // followed by the datastring fields (separated by '/') String[] tmpPieces = pieces[0].split("/"); String[] positionFields = TextUtils.split(tmpPieces[0], ' ', '\"'); firstSlashSeparatorIdx = positionFields.length; fields = new String[tmpPieces.length + positionFields.length - 1]; System.arraycopy(positionFields, 0, fields, 0, positionFields.length); System.arraycopy(tmpPieces, 1, fields, positionFields.length, tmpPieces.length - 1); //Header lines start with .E or .ER (but may comtain data) //Data-only lines start with .Ed or .ERd - d a digit if (fields.length > 0) { if (COMPILE_PATTERN_A_CONTINUATION.matcher(fields[0]).matches()) { parseMultiParameterContinuationSeriesData(fields); } else if (COMPILE_PATTERN_ER_AR.matcher(fields[0]).matches()) { this.messageType = COMPILE_PATTERN_ER.matcher(fields[0]).matches() ? E_TYPE : A_TYPE; // bug in OHD output for .A messages. missing '/' slash between parameter id and // value. Remove this when fixed if (this.messageType == A_TYPE) { String[] splitfields = TextUtils.split(fields[fields.length - 1], ' ', '\"'); if (splitfields.length > 1) { String[] tmpFields = new String[fields.length + 1]; System.arraycopy(fields, 0, tmpFields, 0, fields.length - 1); System.arraycopy(splitfields, 0, tmpFields, fields.length - 1, 2); fields = tmpFields; } } if (SINGLE_LOCATION_MULTIPLE_PARAMETERS_TYPE.equals(fields[0])) { // .A type no headers all data is on one line with multiple parameters. parseMultiParameterSeriesData(fields); } else { // .AR type. Revision on earlier measurement. Assumption is that only one parameter at a time is passed. okay = getSeriesParameters(fields); if (okay) { // see if there are any values on this row, if so start fill series data // because parameter and timestep are mandatory values can be started // from firstSlashSeparator untill end of fields if (fields.length == (firstSlashSeparatorIdx + 2) && SINGLE_LOCATION_MULTIPLE_PARAMETERS_CONTINUATION_TYPE.equals(fields[0])) { String value = fields[fields.length - 1]; okay = getContinuationSeriesData(value); } if (fields.length > firstSlashSeparatorIdx + 2) { for (int i = firstSlashSeparatorIdx + 2; i < fields.length; i++) { if (isFloat(fields[i])) { String[] values = new String[fields.length - i]; System.arraycopy(fields, i, values, 0, fields.length - i); okay = getSeriesData(values); break; } } } } } } else if (COMPILE_PATTERN_B_HEADER.matcher(fields[0]).matches()) { // parse complete .B type record parseMultipleLocationMultipleParametersSeries(fields, reader); } else if (COMPILE_PATTERN_ER_D_AR_D.matcher(fields[0]).matches()) { // continued line String[] datafields; if (positionFields.length == 1 && separatorOnLastLine) { // slash between rowcontinuation and first value. If previous line ended with a slash // a null value is assumed fields[0] = null; datafields = fields; } else { // skip first column with normal linecontinuation datafields = new String[fields.length - 1]; System.arraycopy(fields, 1, datafields, 0, fields.length - 1); } okay = getSeriesData(datafields); } separatorOnLastLine = line.endsWith("/"); } } return okay; } private StringBuilder removeCommentsFromLine(String line) { //Split the line into separate fields: //Remove any comment first String[] piecesWithComments = TextUtils.split(line, ':', ':', quoteChar, true); StringBuilder commentFreeLine = new StringBuilder(line.length()); if (!line.contains(":")) { commentFreeLine.append(line); } else { int togglePosition = 0; if (line.startsWith(":")) { togglePosition = 1; // if the line starts with a : , the first entry should be skipped. } if (piecesWithComments.length > 0) { // we found some comments. for (int i = 0; i < piecesWithComments.length; i++) { if (i % 2 == togglePosition) { // the comment toggle is off. // see: http://www.nws.noaa.gov/om/water/resources/SHEF_CodeManual_5July2012.pdf commentFreeLine.append(piecesWithComments[i]); } } } } return commentFreeLine; } private static boolean isFloat(String value) { try { //noinspection UnusedDeclaration Float f = TextUtils.parseFloat(value); } catch (NumberFormatException e) { return false; } return true; } // Read a complete set of .B type records spanning multiple lines private void parseMultipleLocationMultipleParametersSeries(String[] fields, BufferedReader reader) throws Exception { // get time, as a combination of date, [observation time] String date = fields[2]; int ifield = 3; while (!COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { ifield++; } String observationTime = fields[ifield]; // The start date/time, and the time step: time = parseDate(date, observationTime); // skip optional D* fields while (COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) { ifield++; } // The list of parameter/time shift headers, including .Bp continuation lines, if any String line; String header; List<String> headerList = new ArrayList<>(); do { headerList.addAll(Arrays.asList(fields).subList(ifield, fields.length)); // continue on next line ? line = removeCommentsFromLine(reader.readLine()).toString(); line = line.replace(" ", " "); int ix = line.indexOf(" "); header = ix > 0 ? line.substring(0, ix) : ""; String newline = line.substring(ix + 1).replace(" ", ""); fields = TextUtils.split(newline, '/'); // some inconsistency in '/' separators on continuation lines ! ifield = TextUtils.equals(fields[0].trim(), "") ? 1 : 0; } while (COMPILE_PATTERN_B_CONTINUATION.matcher(header).matches()); // The data lines do { line = removeCommentsFromLine(line).toString(); line = line.replace(" ", " "); if (!line.isEmpty()) { int ix = line.indexOf(" "); String location = line.substring(0, ix); line = line.substring(ix + 1).replace(" ", ""); fields = TextUtils.split(line, '/'); writeBFormatLineContent(time, location, headerList, fields); } } while ((line = reader.readLine()) != null && !line.contains(END_TOKEN_TYPE)); } private void writeBFormatLineContent(long time, String location, List<String> headerList, String[] fields) throws Exception { DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(location); long startTime = time; int iheader = 0; for (int ifield = 0; ifield < fields.length; ifield++) { // in addition to parameter names, header may contain "DRx" date/time shift codes while (COMPILE_PATTERN_DR_SHIFT.matcher(headerList.get(iheader)).matches()) { time = applyDateTimeShift(startTime, headerList.get(iheader)); iheader++; } // in addition to location code, line may contain date-time override code while (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { time = applyDateTimeOverride(startTime, fields[ifield]); ifield++; } timeSeriesHeader.setParameterId(headerList.get(iheader)); String text = fields[ifield].trim(); float value = TextUtils.equals("", text) || TextUtils.equals("+", text) || TextUtils.equals("M", text) ? Float.NaN : Float.parseFloat(text); contentHandler.setTimeSeriesHeader(timeSeriesHeader); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); iheader++; } } private long applyDateTimeOverride(long time, String token) throws Exception { if (!COMPILE_PATTERN_D_STAR.matcher(token).matches()) return time; calendar.setTimeInMillis(time); String value = token.substring(2); String code = token.substring(0, 2); switch (code) { case "DY": calendar.set(Calendar.YEAR, Integer.parseInt(value.substring(0, 2))); calendar.set(Calendar.MONTH, Integer.parseInt(value.substring(2, 4))); calendar.set(Calendar.DAY_OF_MONTH, Integer.parseInt(value.substring(4, 6))); calendar.set(Calendar.HOUR_OF_DAY, Integer.parseInt(value.substring(6, 8))); calendar.set(Calendar.MINUTE, Integer.parseInt(value.substring(8, 10))); if (value.length() > 10) calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(10))); break; case "DM": calendar.set(Calendar.MONTH, Integer.parseInt(value.substring(0, 2))); calendar.set(Calendar.DAY_OF_MONTH, Integer.parseInt(value.substring(2, 4))); calendar.set(Calendar.HOUR_OF_DAY, Integer.parseInt(value.substring(4, 6))); calendar.set(Calendar.MINUTE, Integer.parseInt(value.substring(6, 8))); if (value.length() > 8) calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(8))); break; case "DD": calendar.set(Calendar.DAY_OF_MONTH, Integer.parseInt(value.substring(0, 2))); calendar.set(Calendar.HOUR_OF_DAY, Integer.parseInt(value.substring(2, 4))); calendar.set(Calendar.MINUTE, Integer.parseInt(value.substring(4, 6))); if (value.length() > 6) calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(6))); break; case "DH": calendar.set(Calendar.HOUR_OF_DAY, Integer.parseInt(value.substring(0, 2))); calendar.set(Calendar.MINUTE, Integer.parseInt(value.substring(2, 4))); if (value.length() > 4) { calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(4, 6))); } break; case "DN": calendar.set(Calendar.MINUTE, Integer.parseInt(value.substring(0, 2))); if (value.length() > 2) { calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(2, 4))); } break; case "DS": calendar.set(Calendar.SECOND, Integer.parseInt(value.substring(0, 2))); break; default: throw new Exception("ShefTimeSeriesParser: invalid 'Date/Time override code' : " + token); } return calendar.getTimeInMillis(); } private long applyDateTimeShift(long time, String token) { // see SHEF_CodeManual_5July2012.pdf table 13a if (token.indexOf("DR") != 0) return time; calendar.setTimeInMillis(time); String unit = token.substring(2, 3).toUpperCase(); int increment = Integer.parseInt(token.substring(3)); switch (unit) { case "S": calendar.add(Calendar.SECOND, increment); break; case "N": calendar.add(Calendar.MINUTE, increment); break; case "H": calendar.add(Calendar.HOUR_OF_DAY, increment); break; case "D": calendar.add(Calendar.DATE, increment); break; case "M": calendar.add(Calendar.MONTH, increment); break; default: throw new IllegalArgumentException("ShefTimeSeriesParser: invalid 'Date Relative code' : " + token); } return calendar.getTimeInMillis(); } // Continuation of .A field with multiple parameters per line. private void parseMultiParameterContinuationSeriesData(String[] fields) { DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(aContinuationLocationId); List<String> parameterValueList = new ArrayList<>(); for (int i = 1; i < fields.length; i++) { if (fields[i] == null) continue; String[] result = fields[i].split(" "); parameterValueList.addAll(Arrays.asList(result)); } writeMultipleParametersSeries(timeSeriesHeader, parameterValueList); } private void writeMultipleParametersSeries(DefaultTimeSeriesHeader timeSeriesHeader, List<String> parameterValueList) { if (parameterValueList.size() % 2 != 0) { if (!parameterValueList.isEmpty()) { // Check on special symbols like DC (date creation). String code = parameterValueList.get(0); if (code.startsWith("DC")) return; // Creation date. Can be ignored. } log.warn("SHEF import line of type .A[0-9]* (single station, multiple parameters) doesn't contain a consistent number of parameters and values. Skipping line"); return; } for (int i = 0; i < parameterValueList.size() / 2; i++) { String paramId = parameterValueList.get(i * 2); float value = parseValue(parameterValueList.get(i * 2 + 1)); timeSeriesHeader.setParameterId(paramId); contentHandler.setTimeSeriesHeader(timeSeriesHeader); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); } } private void parseMultiParameterSeriesData(String[] fields) { // .A ANAW1 20170215 P DH2400 /DH08 /HGIRX 8.37 /QRIRX 41.69 // Couting fields from 0: // Field 1 is the name of the location // Field 2 is the date (possibly without a year) // Skip all /D parts. // parameter code 1 // parameter value 1 // .. // parameter code N // parameter value N // get location id String locationId = fields[1]; String date = fields[2]; String observationTime = ""; // get time, as a combination of date, [observation time] // get observation time if exist int ifield = 3; for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { observationTime = fields[ifield]; break; } } // The start date/time, and the time step: time = parseDate(date, observationTime); aContinuationLocationId =locationId; // Keep location id in case .A continutations are found. DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(locationId); ifield = firstSlashSeparatorIdx; // get parameter id, i.e. the first field without a D in prefix for (; ifield < fields.length; ifield++) { if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) { break; // Index of first parameter was found. } } List<String> parameterValueList = new ArrayList<>(); for (int i = ifield; i < fields.length; i++) { if (fields[i] == null) continue; String[] result = fields[i].split(" "); for (int j = 0; j < result.length; j++) { parameterValueList.add(result[j]); } } writeMultipleParametersSeries(timeSeriesHeader, parameterValueList); } private boolean getSeriesParameters(String[] fields) { // Couting fields from 0: // Field 1 is the name of the location // Field 2 is the date (possibly without a year) // Field 3 is the timezone (optional!) // observation time (optional) // creation date (optional) // units code (optional) // Data string qualifier (optional) // Duration code (optional) // parameter code // the time interval (only for .E messages) // get location id String locationId = fields[1]; String date = fields[2]; String observationTime = ""; // get time, as a combination of date, [observation time] // get observation time if exist int ifield = 3; for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_D_STAR.matcher(fields[ifield]).matches()) { observationTime = fields[ifield]; break; } } // The start date/time, and the time step: time = parseDate(date, observationTime); ifield = firstSlashSeparatorIdx; // get parameter id, i.e. the first field without a D in prefix String parameterId = null; for (; ifield < fields.length; ifield++) { if (!COMPILE_PATTERN_D.matcher(fields[ifield]).matches()) { parameterId = fields[ifield]; break; } } // get timestep field, mandatory only for .E messages for (; ifield < fields.length; ifield++) { if (COMPILE_PATTERN_DI.matcher(fields[ifield]).matches()) { String timestep = fields[ifield]; dtime = parseTimeStep(timestep); break; } } DefaultTimeSeriesHeader timeSeriesHeader = new DefaultTimeSeriesHeader(); timeSeriesHeader.setLocationId(locationId); timeSeriesHeader.setParameterId(parameterId); if (this.messageType == E_TYPE) { TimeStep relativeEqTimeStep = RelativeEquidistantTimeStep.getInstance(dtime, time); timeSeriesHeader.setTimeStep(SimpleEquidistantTimeStep.getInstance(relativeEqTimeStep.getMaximumStepMillis())); //supports only equidistant time steps, see parseTimeStep timeSeriesHeader.setForecastTime(time); } contentHandler.setTimeSeriesHeader(timeSeriesHeader); if (this.messageType == E_TYPE) { return time != 0 && dtime != 0; //AM: error conditions? } else { return time != 0; } } private boolean getSeriesData(String[] fields) { for (int i = 0; i < fields.length; i++) { float value = parseValue(fields[i]); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); time += dtime; } return true; } private boolean getContinuationSeriesData(String valueString) { float value = parseValue(valueString); contentHandler.setTime(time); contentHandler.setValue(value); contentHandler.applyCurrentFields(); return true; } private long parseDate(String str, String timestr) { int year; int month; int day; if (str.length() == 8) { year = Integer.parseInt(str.substring(0, 4)); month = Integer.parseInt(str.substring(4, 6)); day = Integer.parseInt(str.substring(6, 8)); } else { if (str.length() == 6) { year = 100 * (calendar.get(Calendar.YEAR) / 100) + Integer.parseInt(str.substring(0, 2)); month = Integer.parseInt(str.substring(2, 4)); day = Integer.parseInt(str.substring(4, 6)); } else { // only only month and day are given according to shef 2.0 spec take the last 12 months // before current and take the year that matches the month day. i.e. check if date is in future // with current year, if so take last year. year = calendar.get(Calendar.YEAR); month = Integer.parseInt(str.substring(0, 2)); day = Integer.parseInt(str.substring(2, 4)); if (month > calendar.get(Calendar.MONTH) + 5) { year--; } if (month < calendar.get(Calendar.MONTH) - 6) { year++; } } } // The time string (actually a composite thing) // We expect something like: DH12/... If not, ignore this field // -- AM: TODO! int hour = 0; int minute = 0; int second = 0; if (COMPILE_PATTERN_DH_DD.matcher(timestr).matches()) { hour = Integer.parseInt(timestr.substring(2, 4)); if (timestr.length() == 6) { minute = TextUtils.tryParseInt(timestr.substring(4, 6), 0); } if (timestr.length() == 8) { second = TextUtils.tryParseInt(timestr.substring(6, 8), 0); } } calendar.clear(); //noinspection MagicConstant calendar.set(year, month - 1, day, hour, minute, second); /* Correct for the offset: month numbers start at 0*/ return calendar.getTimeInMillis(); } private static long parseTimeStep(String str) { long scale; long value = Long.parseLong(COMPILE_PATTERN_START_DIGIT.matcher(str).replaceAll("")); if (COMPILE_PATTERN_DIH.matcher(str).matches()) { scale = 3600 * 1000; } else if (COMPILE_PATTERN_DIN.matcher(str).matches()) { scale = 60 * 1000; } else if (COMPILE_PATTERN_DID.matcher(str).matches()) { scale = 86400 * 1000; } else { scale = 0; } return scale * value; } private static float parseValue(String valueText) { float value = Float.NaN; if (valueText != null) { valueText = valueText.trim(); if (!valueText.isEmpty()) { // Missing value can be marked +, -, m, mm, M, MM, -9999 if (!(valueText.equalsIgnoreCase("M") || valueText.equalsIgnoreCase("MM") || valueText.equalsIgnoreCase("-") || valueText.equalsIgnoreCase("+") || valueText.equalsIgnoreCase("-9999"))) { try { value = TextUtils.parseFloat(valueText); } catch (NumberFormatException e) { // TODO : According to the specs of SHEF: // If no legitimate value is found, the value is treated as a null field or no report. // So we should return missingValue value = Float.NaN; } } } } return value; } }