voorbeeld resultaat Dinter O2

Code

library(tidyverse)
library(leaflet)
library(sf)
library(plotly)
# library(anomalize)
library(slider)
library(anomaly)

# read custom functions
source('../scripts/test_get_tsfeatures.R')

Data inlezen

Code

locs <- read_csv2("../data/raw/locatie_mapping.csv")
sensor_lims <- read_csv2("../data/raw/meet_range_sensoren.csv") %>% 
  filter(path_length %in% c(1, NA))
par_ranges <- readxl::read_excel('../data/raw/validation_master_v1.xlsx', sheet = 'realistic_range')
df_dt <-
  arrow::read_parquet("../data/modified/sensor_data_tot.parquet", as_data_frame = FALSE) %>%
  filter(project == "Project RWZI Dinther") %>%
  collect() %>%
  mutate(
    sensor = case_when(
      sensor == 'C4E' ~ 'Ponsel C4E',
      sensor == 'OPTOD' ~ 'Ponsel OPTOD',
      sensor == 'PHEHT' ~ 'Ponsel PHEHT',
      TRUE ~ sensor
    )
  )
# df_extra <- read_csv("../data/intermediate/data_voorbereidingworkshop_extra.csv") %>%
#    mutate(
#     sensor = case_when(
#       sensor == 'C4E' ~ 'Ponsel C4E',
#       sensor == 'PHEHT' ~ 'Ponsel PHEHT',
#       TRUE ~ sensor
#     ),
#     datetime = ymd_hms(datetime, tz = 'CET')
#   )
df <- filter(df_dt, parameter_combi == 'aEFFLDI003 - 2620015709 WA1111-EU-V2 / Dissolved Oxygen (OPTOD)(mg/L)')  

s_lims <- semi_join(sensor_lims, df)
lims <- c(s_lims$ondergrens, s_lims$bovengrens)

# oh <- read_csv('../data/raw/data_oh_voor_workshop.csv') %>% 
#   mutate(datum = ymd_hms(datum, tz = 'CET'))
# 
# oh_saqc <- oh %>% select(datum_start = datum) %>% 
#   mutate(maint = datum_start + hours(3))
# 
# df_lab_cor <- read_csv('../data/raw/lab_cor_data_workshop.csv') %>% 
#   mutate(datetime_compare = ymd_hms(datetime_compare, tz = 'CET'))

criteria tabellen

Code

sensor_lims %>%
  knitr::kable(caption = glue::glue("Sensor ranges, dit geval: {s_lims$sensor} {s_lims$parameter_code} {s_lims$hoedanigheid_code}"))

Sensor ranges, dit geval: Ponsel OPTOD O2 NVT
sensor	parameter_code	hoedanigheid_code	eenheid_code	ondergrens	bovengrens	path_length
Ponsel PHEHT	pH	NVT	DIMSLS	0	14	NA
Ponsel PHEHT	T	NVT	oC	-5	50	NA
Ponsel C4E	T	NVT	oC	-5	50	NA
Ponsel C4E	GELDHD	NVT	uS/cm	0	2000	NA
Ponsel OPTOD	O2	NVT	mg/l	0	20	NA
Ponsel OPTOD	O2	NVT	%	0	200	NA
Ponsel OPTOD	T	NVT	oC	-5	50	NA
Eureka Manta +35	pH	NVT	DIMSLS	0	14	NA
Eureka Manta +35	GELDHD	NVT	uS/cm	0	5000	NA
Eureka Manta +35	O2	NVT	mg/l	0	50	NA
Eureka Manta +35	O2	NVT	%	0	500	NA
Eureka Manta +35	T	NVT	oC	-5	50	NA
YSI EXO 3	pH	NVT	DIMSLS	0	14	NA
YSI EXO 3	GELDHD	NVT	mS/cm	0	100	NA
YSI EXO 3	GELDHD	NVT	uS/cm	0	100000	NA
YSI EXO 3	O2	NVT	mg/l	0	50	NA
YSI EXO 3	O2	NVT	%	0	500	NA
YSI EXO 3	T	NVT	oC	-5	50	NA
Trios NICO	NO3	nf	mg/l	0	266	1
Trios NICO	NO3	Nnf	mg/l	0	60	1
OTT EcoN	NO3	nf	mg/l	0	266	1
OTT EcoN	NO3	Nnf	mg/l	0	60	1
YSI EXO 2	pH	NVT	DIMSLS	0	14	NA
YSI EXO 2	GELDHD	NVT	uS/cm	0	100000	NA
YSI EXO 2	O2	NVT	mg/l	0	50	NA
YSI EXO 2	O2	NVT	%	0	500	NA
YSI EXO 2	T	NVT	oC	-5	50	NA

Code

par_ranges %>% select(parameter_id, jaar_min, jaar_max) %>% knitr::kable(caption = "Realistische ranges per parameter, dit geval Zuurstof_opgelost_mg/l")

Realistische ranges per parameter, dit geval Zuurstof_opgelost_mg/l
parameter_id	jaar_min	jaar_max
Ammonium_mg/l	0.00	1000.0
Ammonium_N_mg/l_AW	0.00	1000.0
Ammonium_N_mg/l_OW	0.00	1000.0
Chlorofyl-a_blauwalg_ug/l	0.00	5000.0
Chlorofyl-a_groenalg_ug/l	0.00	5000.0
Doorzicht_dm	0.00	617.0
Geleidendheid_25oC_uS/cm	100.00	1000.0
Geleidendheid_25oC_mS/m	10.00	100.0
Lichtintensiteit_1/m	0.00	28000.0
Lichtintensiteit_ref_A_212_nm_SNR	0.00	28000.0
Lichtintensiteit_ref_B_254_nm_SNR	0.00	28000.0
Lichtintensiteit_ref_C_360_nm_SNR	0.00	28000.0
Lichtintensiteit_ref_D_reference_diode_SNR	0.00	28000.0
Nitraat_mg/l	0.00	50.0
Nitraat_N_mg/l	0.00	50.0
pH	2.00	13.0
Ptot_P_mg/l	0.00	4.0
Redoxpotentiaal_mV	-400.00	800.0
Relative_fluoresence_units_blauwalg	0.00	100.0
Relative_fluoresence_units_groenalg	0.00	100.0
SAC_254_nm	0.00	1500.0
Saliniteit_PSU	0.05	0.5
SQI	0.00	1.0
Temperatuur_oC_OW	NA	NA
Totaal_opgeloste_bestanddelen_mg/l	0.00	500.0
Troebelheid_NTU	0.00	10000.0
TRP_mg/l	0.00	1.0
Zuurstof_verzadiging_%	0.00	300.0
Zuurstof_opgelost_mg/l	NA	NA
Chlorofyl-a_blauwalg_ug/l	0.00	400.0
Chlorofyl-a_blauwalg_ug/l	0.00	400.0
Chlorofyl-a_groenalg_ug/l	0.00	400.0
Chlorofyl-a_groenalg_ug/l	0.00	400.0
Geleidendheid_25oC_mS/m	25.00	100.0
Geleidendheid_25oC_mS/m	25.00	250.0
Geleidendheid_25oC_mS/m	30.00	260.0
Geleidendheid_25oC_mS/m	43.00	54.0
Geleidendheid_25oC_mS/m	20.00	83.0
Temperatuur_oC_OW	1.00	28.0
Temperatuur_oC_OW	6.00	28.0
Temperatuur_oC_OW	9.00	30.0
Temperatuur_oC_OW	4.00	28.0
Temperatuur_oC_OW	4.00	28.0
pH	6.00	11.0
pH	6.00	11.0
pH	6.00	9.0
pH	7.00	11.0
pH	7.00	11.0
Zuurstof_verzadiging_%	0.00	180.0
Zuurstof_verzadiging_%	0.00	130.0
Zuurstof_verzadiging_%	0.00	100.0
Zuurstof_verzadiging_%	0.00	250.0
Zuurstof_verzadiging_%	0.00	250.0
Zuurstof_opgelost_mg/l	NA	NA
Zuurstof_opgelost_mg/l	NA	NA
Zuurstof_opgelost_mg/l	NA	NA
Zuurstof_opgelost_mg/l	NA	NA
Zuurstof_opgelost_mg/l	NA	NA

Visualiseer originele data serie

Code

p <- ggplot(df, aes(x = datetime, y = waarde)) +
  geom_point(size = 0.1, shape = 1) +
  geom_line() +
  labs(x = NULL, y = glue::glue("{s_lims$parameter_code} [{s_lims$eenheid_code}]"))
 
#ggplotly(p)
p

Check op onrealistische waarden

Code

lims_real <-  c(0, 12)
df <- df %>% mutate(waarde_realistisch = between(waarde, left = lims_real[1], right = lims_real[2]))

ggplot(df, aes(x = datetime, y = waarde)) +
  geom_line() +
      geom_point(aes(col = waarde_realistisch, shape = waarde_realistisch)) +
      labs(x = NULL, y = "NO3-N [mg/l]") +
      scale_color_manual(values = c("red", "black")) +
      scale_shape_manual(values = c(1, NA))

Check op flatlines

Code

df <- get_flatline(dataset = df, value_col = "waarde", threshold = 0, n_measurements = 10)


ggplot(filter(df, !is.na(flatline)), aes(x = datetime, y = waarde)) +
  geom_line() +
      geom_point(aes(col = flatline, shape = flatline)) +
      labs(x = NULL, y = "NO3-N [mg/l]") +
      scale_color_manual(values = c("black", "red")) +
      scale_shape_manual(values = c(NA, 1))

Check overige outliers

Code

df_trans <- filter(df, flatline == FALSE, waarde_realistisch == TRUE) %>% 
  distinct(datetime, waarde) %>% 
    group_by(datetime) %>%
    summarise(waarde = mean(waarde, na.rm = TRUE)) %>%
    ungroup() %>%
    transform_data_uv(time_col = 'datetime')

  df_trans$features <- slide(df_trans, ~get_tsfeatures(.x$waarde), .before = 9, .complete = FALSE)
  df_trans <- df_trans %>% unnest_wider(features)
  
col_keep <- is.na(df_trans) %>% colMeans()  
col_keep <- names(col_keep[col_keep < 1])
col_keep <- col_keep[!col_keep %in%  c('datetime', 'waarde', 'time')] 
df_testout <- df_trans[, col_keep]

out_forest <- isotree::isolation.forest(data = df_testout, ntrees = 500)

df_testout$datetime <- df_trans$datetime
df_testout$waarde <- df_trans$waarde
df_testout$iforest_score <- predict(out_forest, df_testout)
iforest_th <- quantile(df_testout$iforest_score, probs = 0.99, na.rm = TRUE, names = FALSE)
df_testout$iforest_type <- df_testout$iforest_score >= iforest_th

#plot
ggplot(df_testout, aes(x = datetime, y = waarde)) +
  geom_line() +
      geom_point(aes(col = iforest_type, shape = iforest_type)) +
      # labs(title = par) +
      scale_color_manual(values = c("black", "red")) +
      scale_shape_manual(values = c(NA, 1))

Disclaimer

Space shortcuts

Page tree

voorbeeld resultaat Dinter O2

voorbeeld resultaat Dinter O2

Data inlezen

criteria tabellen

Visualiseer originele data serie

Check op onrealistische waarden

Check op flatlines

Check overige outliers

Space shortcuts

Page tree

Voorbeeld resultaat Dinter O2

voorbeeld resultaat Dinter O2

Data inlezen

criteria tabellen

Visualiseer originele data serie

Check op onrealistische waarden

Check op flatlines

Check overige outliers