package nl.wldelft.timeseriesparsers;
import nl.wldelft.util.TextUtils;
import nl.wldelft.util.io.LineReader;
import nl.wldelft.util.io.TextParser;
import nl.wldelft.util.timeseries.DefaultTimeSeriesHeader;
import nl.wldelft.util.timeseries.TimeSeriesContentHandler;
import java.io.IOException;
/**
* TimeSeries reader for CSV files with a simple structure (quite like the CSV files FEWS can export)
* <p>
* A detailed description can be found in JIRA issue FEWS-1995
*<pre>
*Example
*Location Names,Bewdley,Saxons Lode
*Location Ids,EA_H-2001,EA_H-2032
*Time,Rainfall,Rainfall
*2003-03-01 01:00:00,-999,-999
*2003-03-01 01:15:00,1.000,1.000
*2003-03-01 01:30:00,2.000,2.000
*2003-03-01 01:45:00,3.000,3.000
*2003-03-01 02:00:00,4.000,4.000
*2003-03-01 02:15:00,-999,5.000
*2003-03-01 02:30:00,6.000,6.000
*2003-03-01 02:45:00,7.000,7.000
*2003-03-01 03:00:00,8.000,8.000
*2003-03-01 03:15:00,9.000,9.000
*2003-03-01 03:30:00,10.000,10.000
*2003-03-01 03:45:00,11.000,11.000
*2003-03-01 04:00:00,12.000,12.000
*2003-03-01 04:15:00,13.000,13.000
*2003-03-01 04:30:00,14.000,14.986
*</pre>
*/
public class CsvTimeSeriesParser implements TextParser<TimeSeriesContentHandler> {
private static final char[] QUOTE_CHARACTERS = new char[]{'\'', '\"'};
private char decimalSeparator = '\0';
private int columnCount = 0;
private LineReader reader = null;
private TimeSeriesContentHandler contentHandler = null;
private char columnSeparatorChar = '\0';
@Override
public void parse(LineReader reader, String virtualFileName, TimeSeriesContentHandler contentHandler) throws IOException {
this.contentHandler = contentHandler;
this.contentHandler.addMissingValueRange(-999.9999f, -999f);
this.reader = reader;
parseHeader();
for (String[] buffer = new String[columnCount]; this.reader.readLine(columnSeparatorChar, buffer) != -1;) {
this.contentHandler.setTime(this.contentHandler.getDefaultTimeZone(), "yyyy-MM-dd HH:mm:ss", buffer[0]);
for (int i = 1; i < columnCount; i++) {
this.contentHandler.setTimeSeriesHeader(i);
this.contentHandler.setValue(decimalSeparator, buffer[i]);
this.contentHandler.applyCurrentFields();
}
}
}
// The first few lines contain vital information about the file:
// - Whether the separator character is a , or a ;
// - The names of the parameters and locations
private void parseHeader() throws IOException {
String locationNamesLine = reader.readLine();
if (locationNamesLine.contains(";") && !locationNamesLine.contains(",")) {
columnSeparatorChar = ';';
decimalSeparator = ',';
} else {
columnSeparatorChar = ',';
decimalSeparator = '.';
}
String[] locationIdsLine = reader.readLine(columnSeparatorChar);
String[] parameterIdsAndUnitsLine = reader.readLine(columnSeparatorChar);
if (locationIdsLine.length != parameterIdsAndUnitsLine.length)
throw new IOException("Number of locations not the same as the number of parameters\n" + reader.getFileAndLineNumber());
columnCount = locationIdsLine.length;
DefaultTimeSeriesHeader header = new DefaultTimeSeriesHeader();
for (int i = 1; i < columnCount; i++) {
header.setLocationId(locationIdsLine[i]);
String parAndUnit = parameterIdsAndUnitsLine[i];
header.setParameterId(getName(parAndUnit));
header.setUnit(getUnit(parAndUnit));
contentHandler.createTimeSeriesHeaderAlias(i, header);
}
}
private static String getName(String string) {
String res = TextUtils.leftFrom(string, '[');
if (res == null) res = string;
if (res == null) return null;
return TextUtils.removeCharacters(res, QUOTE_CHARACTERS);
}
private static String getUnit(String string) {
String res = TextUtils.getTextBetween(string, "[", "]");
if (res == null) return null;
return TextUtils.removeCharacters(res, QUOTE_CHARACTERS);
}
}
|