package nl.wldelft.timeseriesparsers; import nl.wldelft.util.TextUtils; import nl.wldelft.util.io.LineReader; import nl.wldelft.util.io.TextParser; import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader; import nl.wldelft.util.timeseries.TimeSeriesContentHandler; import java.io.IOException; /** * TimeSeries reader for CSV files with a simple structure (quite like the CSV files FEWS can export) * <p> * A detailed description can be found in JIRA issue FEWS-1995 *<pre> *Example *Location Names,Bewdley,Saxons Lode *Location Ids,EA_H-2001,EA_H-2032 *Time,Rainfall,Rainfall *2003-03-01 01:00:00,-999,-999 *2003-03-01 01:15:00,1.000,1.000 *2003-03-01 01:30:00,2.000,2.000 *2003-03-01 01:45:00,3.000,3.000 *2003-03-01 02:00:00,4.000,4.000 *2003-03-01 02:15:00,-999,5.000 *2003-03-01 02:30:00,6.000,6.000 *2003-03-01 02:45:00,7.000,7.000 *2003-03-01 03:00:00,8.000,8.000 *2003-03-01 03:15:00,9.000,9.000 *2003-03-01 03:30:00,10.000,10.000 *2003-03-01 03:45:00,11.000,11.000 *2003-03-01 04:00:00,12.000,12.000 *2003-03-01 04:15:00,13.000,13.000 *2003-03-01 04:30:00,14.000,14.986 *</pre> */ public class CsvTimeSeriesParser implements TextParser<TimeSeriesContentHandler> { private static final char[] QUOTE_CHARACTERS = new char[]{'\'', '\"'}; private char decimalSeparator = '\0'; private int columnCount = 0; private LineReader reader = null; private TimeSeriesContentHandler contentHandler = null; private char columnSeparatorChar = '\0'; @Override public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws IOException { this.contentHandler = contentHandler; this.contentHandler.addMissingValueRange(-999.9999f, -999f); this.reader = reader; parseHeader(); for (String[] buffer = new String[columnCount]; this.reader.readLine(columnSeparatorChar, buffer) != -1;) { this.contentHandler.setTime(this.contentHandler.getDefaultTimeZone(), "yyyy-MM-dd HH:mm:ss", buffer[0]); for (int i = 1; i < columnCount; i++) { this.contentHandler.setTimeSeriesHeader(i); this.contentHandler.setValue(decimalSeparator, buffer[i]); this.contentHandler.applyCurrentFields(); } } } // The first few lines contain vital information about the file: // - Whether the separator character is a , or a ; // - The names of the parameters and locations private void parseHeader() throws IOException { String locationNamesLine = reader.readLine(); if (locationNamesLine.contains(";") && !locationNamesLine.contains(",")) { columnSeparatorChar = ';'; decimalSeparator = ','; } else { columnSeparatorChar = ','; decimalSeparator = '.'; } String[] locationIdsLine = reader.readLine(columnSeparatorChar); String[] parameterIdsAndUnitsLine = reader.readLine(columnSeparatorChar); if (locationIdsLine.length != parameterIdsAndUnitsLine.length) throw new IOException("Number of locations not the same as the number of parameters\n" + reader.getFileAndLineNumber()); columnCount = locationIdsLine.length; DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader(); for (int i = 1; i < columnCount; i++) { header.setLocationId(locationIdsLine[i]); String parAndUnit = parameterIdsAndUnitsLine[i]; header.setParameterId(getName(parAndUnit)); header.setUnit(getUnit(parAndUnit)); contentHandler.createTimeSeriesHeaderAlias(i, header); } } private static String getName(String string) { String res = TextUtils.leftFrom(string, '['); if (res == null) res = string; if (res == null) return null; return TextUtils.removeCharacters(res, QUOTE_CHARACTERS); } private static String getUnit(String string) { String res = TextUtils.getTextBetween(string, "[", "]"); if (res == null) return null; return TextUtils.removeCharacters(res, QUOTE_CHARACTERS); } }