package nl.wldelft.timeseriesparsers; import nl.wldelft.util.TextUtils; import nl.wldelft.util.io.LineReader; import nl.wldelft.util.io.TextParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import org.apache.log4j.Logger; import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.List; import java.util.TimeZone; /** * TimeSeries reader for TVA EDStransmission * * <p> * A detailed description can be found in JIRA issue FEWS-11705 *<pre> * * File format contains unicode control characters. * * There are SOH (start of heading) and ETX (end of text) markers. * * Dates can be recognized as follows: * Every [SOH][ETX] that is followed by a * is a date: * * [SOH][ETX]*LOCATIONID,YY,DAYS,TIME,TZ[ETX] * * LOCATIONID = 2 character location id. * YY = 2 digits years. 14 == 2014 * DAYS = number of days in a year. * TIME = either two digit (minute). If that is the case you can assume 00hr. Otherwise 4 digit (HHmm) * TZ = D: daylight saving time. Can also be a C (central standard time) * * // timezone should be set to CDT (for daylight savings) or CST (for central standard time) * * *</pre> */ public class EDStransmissionTimeSeriesParser implements TextParser<TimeSeriesContentHandler> { private static final Logger log = Logger.getLogger(EDStransmissionTimeSeriesParser.class); private TimeSeriesContentHandler contentHandler = null; private final static char ETX_CHAR = 3; // Unicode ETX character. private final static char SOH_CHAR = 1; // Unicode ETX character. private final static String SOH = Character.toString(SOH_CHAR); private final static String ETX = Character.toString(ETX_CHAR); @SuppressWarnings("StringConcatenationMissingWhitespace") private final static String SOHETX = SOH + ETX; @SuppressWarnings("StringConcatenationMissingWhitespace") private final static String ETXSOH = ETX + SOH; @SuppressWarnings("StringConcatenationMissingWhitespace") private final static String DATE_START = SOHETX + '*'; private final static char DECIMAL_SEPARATOR = '.'; // CST - Central Standard Time is 6 hours behind GMT private final static TimeZone DEFAULT_TIMEZONE = TimeZone.getTimeZone("GMT-6"); // CDT - Central Daylight Time is 5 hours behind GMT private final static TimeZone DAYLIGHT_SAVINGS_TIMEZONE = TimeZone.getTimeZone("GMT-5"); private LineReader reader = null; // calender with the default time zone private Calendar defaultCalendar = null; // calender with the daylight savings time zone private Calendar daylightSavingsCalendar = null; private Calendar currentCalendar = null; // years can be specified with 2 digits. The base year contains the first 2 digits of the current millennium. private String calYearBase = ""; List<String> valuesRead = new ArrayList(); @Override public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws IOException { this.contentHandler = contentHandler; this.contentHandler.addMissingValueRange(-999.9999f, -999f); this.reader = reader; this.daylightSavingsCalendar = new GregorianCalendar(DAYLIGHT_SAVINGS_TIMEZONE); this.defaultCalendar = new GregorianCalendar(DEFAULT_TIMEZONE); this.defaultCalendar.setTime(new Date()); // set the current time. this.currentCalendar = this.defaultCalendar; String calYear = String.valueOf(currentCalendar.get(Calendar.YEAR)); this.calYearBase = calYear.substring(0, 2); // needed for years specified in 2 digits. for (String buffer; (buffer = reader.readLine()) != null; ) { if (buffer.isEmpty()) continue; // skip empty lines. if (buffer.startsWith(SOHETX)) { // either a new date or a new value if (buffer.startsWith(DATE_START)) { parseNewDate(buffer); } else { parseNewValue(buffer); } } else { // line doesn't start with any encodings. Next line of values. parseValuesContinued(buffer); } } } /** * Parse values that are continued on a next line. * * @throws IOException */ private void parseValuesContinued(String parsedLine) { String line = parsedLine; // check if this is the last line. if (line.endsWith(ETXSOH)) { // end of values. line = line.substring(0, line.length() - 2); addToValues(line); parseValues(valuesRead); valuesRead.clear(); } else { // add to the existing list of values. addToValues(line); } } /** * A new value has been found. * * @throws IOException */ private void parseNewValue(String parsedLine) { if (!valuesRead.isEmpty()) { // write any values not written yet. parseValues(valuesRead); valuesRead.clear(); } String line = parsedLine.substring(2); if (line.endsWith(ETXSOH)) { // end of values. line = line.trim(); // trim the control characters. addToValues(line); parseValues(valuesRead); valuesRead.clear(); } else { addToValues(line); } } private void parseNewDate(String parsedLine) throws IOException { // a new date and location. String line = parsedLine.substring(3).trim(); // trim space and ETX // line now contains all date and location fields. String[] value = TextUtils.split(line, ','); parseHeader(value); } private void addToValues(String line) { String[] value = TextUtils.split(line, ','); for (int i = 0; i < value.length; i++) { if (!TextUtils.equals("", value[i])) { valuesRead.add(value[i].trim()); // remove any spaces from values.. } } } /** * The header determines the date for which the current timeseries values are imported. * * LOCATIONID,YY,DAYS,TIME,TZ * * LOCATIONID = 2 character location id. * YY = 2 digits years. 14 == 2014 * DAYS = number of days in a year. * TIME = either two digit (minute). If that is the case you can assume 00hr. Otherwise 4 digit (HHmm) * TZ = D: daylight saving time. Can also be a C (central standard time) * * @param header EDS header information. * @throws IOException */ private void parseHeader(String[] header) throws IOException { if (header.length != 5) { throw new IOException("Location and date should be 5 columsn\n" + reader.getFileAndLineNumber()); } String year = header[1]; if (year.length() == 2) { //noinspection StringConcatenationMissingWhitespace year = this.calYearBase + year; } else { throw new IOException("Year should be specified with 2 digits. Year found was: '" + year + "' \n" + reader.getFileAndLineNumber()); } String timeZone = header[4]; if (TextUtils.equals("C", timeZone)) { currentCalendar = defaultCalendar; } else if (TextUtils.equals("D", timeZone)) { currentCalendar = daylightSavingsCalendar; } else { log.warn("No timezone information found in date specification."); } String days = header[2]; // number of days in a year. currentCalendar.set(Calendar.YEAR, Integer.parseInt(year)); currentCalendar.set(Calendar.DAY_OF_YEAR, Integer.parseInt(days)); String time = header[3]; if (time.length() == 2) { //noinspection StringConcatenationMissingWhitespace time = "00" + time; // 00 hours.. } int hourOfTheDay = Integer.parseInt(time.substring(0, 2)); currentCalendar.set(Calendar.HOUR_OF_DAY, hourOfTheDay); currentCalendar.set(Calendar.MINUTE, Integer.parseInt(time.substring(2, 4))); currentCalendar.set(Calendar.SECOND, 0); currentCalendar.set(Calendar.MILLISECOND, 0); } private void parseValues(List<String> values) { DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader(); header.setLocationId(values.get(0)); this.contentHandler.setTime(currentCalendar.getTimeInMillis()); for (int i = 1; i < values.size(); i++) { String param = values.get(i).substring(0, 2); String value = values.get(i).substring(2); if (value.startsWith("#")) { value = "-999"; } header.setParameterId(param); this.contentHandler.setTimeSeriesHeader(header); if (contentHandler.isCurrentTimeSeriesHeaderForCurrentTimeRejected()) continue; this.contentHandler.setValue(DECIMAL_SEPARATOR, value); this.contentHandler.applyCurrentFields(); } } }