package nl.wldelft.timeseriesparsers;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.TimeZone;
/**
* TimeSeries reader for TVA EDStransmission
*
* <p>
* A detailed description can be found in JIRA issue FEWS-11705
*<pre>
*
* File format contains unicode control characters.
*
* There are SOH (start of heading) and ETX (end of text) markers.
*
* Dates can be recognized as follows:
* Every [SOH][ETX] that is followed by a * is a date:
*
* [SOH][ETX]*LOCATIONID,YY,DAYS,TIME,TZ[ETX]
*
* LOCATIONID = 2 character location id.
* YY = 2 digits years. 14 == 2014
* DAYS = number of days in a year.
* TIME = either two digit (minute). If that is the case you can assume 00hr. Otherwise 4 digit (HHmm)
* TZ = D: daylight saving time. Can also be a C (central standard time)
*
* // timezone should be set to CDT (for daylight savings) or CST (for central standard time)
*
*
*</pre>
*/
public class EDStransmissionTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final Logger log = Logger.getLogger(EDStransmissionTimeSeriesParser.class);
private TimeSeriesContentHandler contentHandler = null;
private final static char ETX_CHAR = 3; // Unicode ETX character.
private final static char SOH_CHAR = 1; // Unicode ETX character.
private final static String SOH = Character.toString(SOH_CHAR);
private final static String ETX = Character.toString(ETX_CHAR);
@SuppressWarnings("StringConcatenationMissingWhitespace")
private final static String SOHETX = SOH + ETX;
@SuppressWarnings("StringConcatenationMissingWhitespace")
private final static String ETXSOH = ETX + SOH;
@SuppressWarnings("StringConcatenationMissingWhitespace")
private final static String DATE_START = SOHETX + '*';
private final static char DECIMAL_SEPARATOR = '.';
// CST - Central Standard Time is 6 hours behind GMT
private final static TimeZone DEFAULT_TIMEZONE = TimeZone.getTimeZone("GMT-6");
// CDT - Central Daylight Time is 5 hours behind GMT
private final static TimeZone DAYLIGHT_SAVINGS_TIMEZONE = TimeZone.getTimeZone("GMT-5");
private LineReader reader = null;
// calender with the default time zone
private Calendar defaultCalendar = null;
// calender with the daylight savings time zone
private Calendar daylightSavingsCalendar = null;
private Calendar currentCalendar = null;
// years can be specified with 2 digits. The base year contains the first 2 digits of the current millennium.
private String calYearBase = "";
List<String> valuesRead = new ArrayList();
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws IOException {
TODO this.contentHandler = contentHandler;
this.contentHandler.addMissingValueRange(-999.9999f, -999f);
this.reader = reader;
this.daylightSavingsCalendar = new GregorianCalendar(DAYLIGHT_SAVINGS_TIMEZONE);
this.defaultCalendar = new GregorianCalendar(DEFAULT_TIMEZONE);
this.defaultCalendar.setTime(new Date()); // set the current time.
this.currentCalendar = this.defaultCalendar;
String calYear = String.valueOf(currentCalendar.get(Calendar.YEAR));
this.calYearBase = calYear.substring(0, 2); // needed for years specified in 2 digits.
for (String buffer; (buffer = reader.readLine()) != null; ) {
if (buffer.isEmpty()) continue; // skip empty lines.
if (buffer.startsWith(SOHETX)) {
// either a new date or a new value
if (buffer.startsWith(DATE_START)) {
parseNewDate(buffer);
} else {
parseNewValue(buffer);
}
} else {
// line doesn't start with any encodings. Next line of values.
parseValuesContinued(buffer);
}
}
}
/**
* Parse values that are continued on a next line.
*
* @throws IOException
*/
private void parseValuesContinued(String parsedLine) {
String line = parsedLine;
// check if this is the last line.
if (line.endsWith(ETXSOH)) {
// end of values.
line = line.substring(0, line.length() - 2);
addToValues(line);
parseValues(valuesRead);
valuesRead.clear();
} else {
// add to the existing list of values.
addToValues(line);
}
}
/**
* A new value has been found.
*
* @throws IOException
*/
private void parseNewValue(String parsedLine) {
if (!valuesRead.isEmpty()) {
// write any values not written yet.
parseValues(valuesRead);
valuesRead.clear();
}
String line = parsedLine.substring(2);
if (line.endsWith(ETXSOH)) {
// end of values.
line = line.trim(); // trim the control characters.
addToValues(line);
parseValues(valuesRead);
valuesRead.clear();
} else {
addToValues(line);
}
}
private void parseNewDate(String parsedLine) throws IOException {
// a new date and location.
String line = parsedLine.substring(3).trim(); // trim space and ETX
// line now contains all date and location fields.
String[] value = TextUtils.split(line, ',');
parseHeader(value);
}
private void addToValues(String line) {
String[] value = TextUtils.split(line, ',');
for (int i = 0; i < value.length; i++) {
if (!TextUtils.equals("", value[i])) {
valuesRead.add(value[i].trim()); // remove any spaces from values..
}
}
}
/**
* The header determines the date for which the current timeseries values are imported.
*
* LOCATIONID,YY,DAYS,TIME,TZ
*
* LOCATIONID = 2 character location id.
* YY = 2 digits years. 14 == 2014
* DAYS = number of days in a year.
* TIME = either two digit (minute). If that is the case you can assume 00hr. Otherwise 4 digit (HHmm)
* TZ = D: daylight saving time. Can also be a C (central standard time)
*
* @param header EDS header information.
* @throws IOException
*/
private void parseHeader(String[] header) throws IOException {
if (header.length != 5) {
throw new IOException("Location and date should be 5 columsn\n" + reader.getFileAndLineNumber());
}
String year = header[1];
if (year.length() == 2) {
//noinspection StringConcatenationMissingWhitespace
year = this.calYearBase + year;
} else {
throw new IOException("Year should be specified with 2 digits. Year found was: '" + year + "' \n" + reader.getFileAndLineNumber());
}
String timeZone = header[4];
if (TextUtils.equals("C", timeZone)) {
currentCalendar = defaultCalendar;
} else if (TextUtils.equals("D", timeZone)) {
currentCalendar = daylightSavingsCalendar;
} else {
log.warn("No timezone information found in date specification.");
}
String days = header[2]; // number of days in a year.
currentCalendar.set(Calendar.YEAR, Integer.parseInt(year));
currentCalendar.set(Calendar.DAY_OF_YEAR, Integer.parseInt(days));
String time = header[3];
if (time.length() == 2) {
//noinspection StringConcatenationMissingWhitespace
time = "00" + time; // 00 hours..
}
int hourOfTheDay = Integer.parseInt(time.substring(0, 2));
currentCalendar.set(Calendar.HOUR_OF_DAY, hourOfTheDay);
currentCalendar.set(Calendar.MINUTE, Integer.parseInt(time.substring(2, 4)));
currentCalendar.set(Calendar.SECOND, 0);
currentCalendar.set(Calendar.MILLISECOND, 0);
}
private void parseValues(List<String> values) {
DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
header.setLocationId(values.get(0));
this.contentHandler.setTime(currentCalendar.getTimeInMillis());
for (int i = 1; i < values.size(); i++) {
String param = values.get(i).substring(0, 2);
String value = values.get(i).substring(2);
if (value.startsWith("#")) {
value = "-999";
}
header.setParameterId(param);
this.contentHandler.setTimeSeriesHeader(header);
if (contentHandler.isCurrentTimeSeriesHeaderForCurrentTimeRejected()) continue;
this.contentHandler.setValue(DECIMAL_SEPARATOR, value);
this.contentHandler.applyCurrentFields();
}
}
}
|