diff --git a/.gitignore b/.gitignore index 583b596..af350e8 100644 --- a/.gitignore +++ b/.gitignore @@ -136,6 +136,9 @@ uv.lock # Quarto docs/_site/ +docs/.quarto/ +docs/**/*.quarto_ipynb* +docs/api/*.qmd +!docs/api/_metadata.yml -# created by quartodoc -docs/api \ No newline at end of file +# created by quartodoc \ No newline at end of file diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index ad29309..0000000 --- a/docs/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.quarto/ -**/*.quarto_ipynb diff --git a/docs/Makefile b/docs/Makefile index 7d837ac..f1c20a6 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,4 +20,5 @@ preview: api uv run quarto preview clean: - rm -rf _site api objects.json + rm -rf _site objects.json + rm -f api/*.qmd diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 34bab29..007db88 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -2,13 +2,15 @@ project: type: website website: - title: "tsod" + title: "" page-footer: "© 2025 DHI Group" repo-url: https://github.com/DHI/tsod repo-actions: [edit] repo-subdir: docs + page-navigation: true navbar: + logo: https://raw.githubusercontent.com/DHI/tsod/main/images/logo/tsod.png tools: - icon: github menu: @@ -17,13 +19,27 @@ website: - text: Report a Bug url: https://github.com/DHI/tsod/issues left: - - href: index.qmd - text: Home - - href: getting-started.qmd - text: Getting Started - - href: design.qmd - - href: api/index.qmd - text: API Reference + - text: Home + href: index.qmd + - text: User Guide + href: user-guide/getting-started.qmd + - text: Examples + href: examples/index.qmd + - text: API Reference + href: api/index.qmd + + sidebar: + - title: "User Guide" + style: docked + contents: + - user-guide/getting-started.qmd + - user-guide/design.qmd + - title: "Examples" + style: docked + contents: + - examples/index.qmd + - examples/quick-start.qmd + - examples/combining-detectors.qmd filters: - interlinks diff --git a/docs/examples/combining-detectors.qmd b/docs/examples/combining-detectors.qmd new file mode 100644 index 0000000..f80da2d --- /dev/null +++ b/docs/examples/combining-detectors.qmd @@ -0,0 +1,68 @@ +--- +title: Combining detectors +description: Combine multiple detectors for realistic water-domain anomaly detection +jupyter: tsod +--- + +This example demonstrates how to combine detectors for a flow time series with both spikes and flatline behavior. + +## Imports + +```{python} +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from tsod import CombinedDetector, RangeDetector, ConstantValueDetector +``` + +## Create sample data + +```{python} +rng = np.random.default_rng(7) +time = pd.date_range("2025-02-01", periods=300, freq="15min") + +flow = 45 + 6 * np.sin(np.linspace(0, 8 * np.pi, len(time))) + rng.normal(0, 0.8, len(time)) + +# Out-of-range spikes +flow[[80, 210]] = [72, 10] + +# Sensor flatline period +flow[130:145] = flow[129] + +series = pd.Series(flow, index=time, name="flow_m3s") +series.head() +``` + +## Fit if relevant + +```{python} +normal_window = series.iloc[:100] + +range_detector = RangeDetector(quantiles=(0.01, 0.99)) +range_detector.fit(normal_window) + +constant_detector = ConstantValueDetector(window_size=6) + +detector = CombinedDetector([range_detector, constant_detector]) +``` + +## Detect anomalies + +```{python} +anomalies = detector.detect(series) +anomalies.sum() +``` + +## Visualize results + +```{python} +fig, ax = plt.subplots(figsize=(11, 4)) +series.plot(ax=ax, label="Flow") +series[anomalies].plot(ax=ax, linestyle="", marker="o", color="crimson", label="Anomaly") +ax.set_ylabel("m³/s") +ax.set_title("Combined detector output") +ax.legend() +``` + +This draft can later be replaced with a domain-specific case based on observed station data. diff --git a/docs/examples/index.qmd b/docs/examples/index.qmd new file mode 100644 index 0000000..84f9854 --- /dev/null +++ b/docs/examples/index.qmd @@ -0,0 +1,34 @@ +--- +title: Examples +toc: false +--- + +# Examples + +This section contains water-domain examples for using **tsod**. + +Each example follows the same practical structure: + +- show imports +- create sample data +- demonstrate `detect()` and `fit()` where relevant +- show visualization of results + +## Available examples + +### [Quick start](quick-start.qmd) + +A minimal end-to-end example to get running quickly with a simple detector workflow. + +### [Combining detectors](combining-detectors.qmd) + +Demonstrates how to combine multiple detectors and interpret the merged anomaly signal. + +### [Water level example notebook](https://github.com/DHI/tsod/blob/main/notebooks/Example%20Water%20Level.ipynb) + +A realistic notebook example from the water domain based on observed water-level time series. + +## Planned additions + +Additional examples from different water-related domains will be added over time. + diff --git a/docs/examples/quick-start.qmd b/docs/examples/quick-start.qmd new file mode 100644 index 0000000..1eddc76 --- /dev/null +++ b/docs/examples/quick-start.qmd @@ -0,0 +1,58 @@ +--- +title: Quick start +description: Detect basic anomalies in a synthetic water-level time series +jupyter: tsod +--- + +This example shows a minimal end-to-end workflow on water-level data. + +## Imports + +```{python} +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from tsod import RangeDetector +``` + +## Create sample data + +```{python} +rng = np.random.default_rng(42) +time = pd.date_range("2025-01-01", periods=240, freq="h") + +baseline = 1.5 + 0.2 * np.sin(np.linspace(0, 6 * np.pi, len(time))) +noise = rng.normal(0, 0.03, len(time)) +water_level = baseline + noise + +# Inject a few unrealistic spikes +water_level[[40, 120, 180]] = [2.4, 0.2, 2.6] + +series = pd.Series(water_level, index=time, name="water_level_m") +series.head() +``` + +## Detect anomalies + +```{python} +detector = RangeDetector(min_value=0.8, max_value=2.1) +anomalies = detector.detect(series) + +anomalies.sum() +``` + +## Visualize results + +```{python} +fig, ax = plt.subplots(figsize=(10, 4)) +series.plot(ax=ax, label="Water level") +series[anomalies].plot(ax=ax, linestyle="", marker="o", color="red", label="Anomaly") +ax.set_ylabel("m") +ax.set_title("Quick anomaly screening") +ax.legend() +``` + +## Next step + +Try combining multiple detectors for a more robust signal in [Combining detectors](combining-detectors.qmd). diff --git a/docs/index.qmd b/docs/index.qmd index 5577cf2..9806e30 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -20,12 +20,13 @@ format-links: false Install **tsod** with [`pip`](https://pypi.org/project/tsod/) and get up and running in minutes - +[**Getting started**](user-guide/getting-started.qmd) ## {{< fa brands python >}} **It's just Python** Use familiar Python workflows to integrate anomaly detection into your models and pipelines +[**API Reference**](api/index.qmd) ::: @@ -40,6 +41,7 @@ Choose from detectors like `RangeDetector` and `ConstantValueDetector` to identi **tsod** is licensed under MIT and the source code is available on [GitHub](https://github.com/DHI/tsod) +[**Design philosophy**](user-guide/design.qmd) ::: diff --git a/docs/design.qmd b/docs/user-guide/design.qmd similarity index 97% rename from docs/design.qmd rename to docs/user-guide/design.qmd index ce25eb4..411108c 100644 --- a/docs/design.qmd +++ b/docs/user-guide/design.qmd @@ -1,41 +1,41 @@ -# Design philosophy - - -## {{< fa brands python >}} Familiar - -tsod aims to use a syntax familiar to users of scientific computing libraries such as Pandas & sckit-learn. - -## {{< fa download >}} Easy to install - -```bash -$ pip install tsod -``` - - -## {{< fa brands osi >}} Open Source​ -tsod is an open source project licensed under the MIT license. -The software is provided free of charge with the source code available for inspection and modification. - -Contributions are welcome! - -## {{< fa comments >}} Easy to collaborate -By developing tsod on GitHub along with a completely open discussion, we believe that the collaboration between developers and end-users results in a useful library. - -## {{< fa list-ol >}} Reproducible -By providing the historical versions of tsod on PyPI it is possible to reproduce the behaviour of an older existing system, based on an older version. - -**Install specific version** - -```bash -pip install tsod==0.2.0 -``` - -## {{< fa brands github >}} Easy access to new features -Features are being added all the time, by developers at DHI in offices all around the globe as well as external contributors using tsod in their work. -These new features are always available from the [main branch on GitHub](https://github.com/DHI/tsod) and thanks to automated testing, it is always possible to verify that the tests passes before downloading a new development version. - -**Install development version** - -```bash -$ pip install https://github.com/DHI/tsod/archive/main.zip +# Design philosophy + + +## {{< fa brands python >}} Familiar + +tsod aims to use a syntax familiar to users of scientific computing libraries such as Pandas & sckit-learn. + +## {{< fa download >}} Easy to install + +```bash +$ pip install tsod +``` + + +## {{< fa brands osi >}} Open Source​ +tsod is an open source project licensed under the MIT license. +The software is provided free of charge with the source code available for inspection and modification. + +Contributions are welcome! + +## {{< fa comments >}} Easy to collaborate +By developing tsod on GitHub along with a completely open discussion, we believe that the collaboration between developers and end-users results in a useful library. + +## {{< fa list-ol >}} Reproducible +By providing the historical versions of tsod on PyPI it is possible to reproduce the behaviour of an older existing system, based on an older version. + +**Install specific version** + +```bash +pip install tsod==0.2.0 +``` + +## {{< fa brands github >}} Easy access to new features +Features are being added all the time, by developers at DHI in offices all around the globe as well as external contributors using tsod in their work. +These new features are always available from the [main branch on GitHub](https://github.com/DHI/tsod) and thanks to automated testing, it is always possible to verify that the tests passes before downloading a new development version. + +**Install development version** + +```bash +$ pip install https://github.com/DHI/tsod/archive/main.zip ``` \ No newline at end of file diff --git a/docs/getting-started.qmd b/docs/user-guide/getting-started.qmd similarity index 95% rename from docs/getting-started.qmd rename to docs/user-guide/getting-started.qmd index 5c2a9c0..5273396 100644 --- a/docs/getting-started.qmd +++ b/docs/user-guide/getting-started.qmd @@ -1,53 +1,57 @@ -Getting started -=============== - -![](https://raw.githubusercontent.com/DHI/tsod/main/images/anomaly.png) - -Sensors often provide faulty or missing observations. These anomalies must be detected automatically and replaced with more feasible values before feeding the data to numerical simulation engines as boundary conditions or real time decision systems. - -This package aims to provide examples and algorithms for detecting anomalies in time series data specifically tailored to DHI users and the water domain. It is simple to install and deploy operationally and is accessible to everyone (open-source). - -`tsod` is library for timeseries data. The format of a timeseries is always a [](`pandas.Series`) and in some cases with a [](`pandas.DatetimeIndex`) - -1. Get data in the form of a a [](`pandas.Series`) (see Data formats below) -2. Select one or more detectors e.g. [](`~tsod.RangeDetector`) or [](`~tsod.ConstantValueDetector`) -3. Define parameters (e.g. min/max, max rate of change) or... -4. Fit parameters based on normal data, i.e. without outliers -5. Detect outliers in any dataset - -Example -------- - -```{python} -import pandas as pd -from tsod import RangeDetector -rd = RangeDetector(max_value=2.0) -data = pd.Series([0.0, 1.0, 3.0]) # 3.0 is out of range i.e. an anomaly -anom = rd.detect(data) -anom -``` - -```{python} -data[anom] # get anomalous data -``` - -```{python} -data[~anom] # get normal data -``` - - -Saving and loading ------------------- -Save a configured detector -```python -cd = CombinedDetector([ConstantValueDetector(), RangeDetector()]) -cd.fit(normal_data) -cd.save("detector.joblib") -``` - -... and then later load it from disk -```python -my_detector = tsod.load("detector.joblib") -my_detector.detect(some_data) -``` +--- +title: Getting started +execute: + enabled: false +--- + + +![](https://raw.githubusercontent.com/DHI/tsod/main/images/anomaly.png) + +Sensors often provide faulty or missing observations. These anomalies must be detected automatically and replaced with more feasible values before feeding the data to numerical simulation engines as boundary conditions or real time decision systems. + +This package aims to provide examples and algorithms for detecting anomalies in time series data specifically tailored to DHI users and the water domain. It is simple to install and deploy operationally and is accessible to everyone (open-source). + +`tsod` is library for timeseries data. The format of a timeseries is always a [](`pandas.Series`) and in some cases with a [](`pandas.DatetimeIndex`) + +1. Get data in the form of a a [](`pandas.Series`) (see Data formats below) +2. Select one or more detectors e.g. [](`~tsod.RangeDetector`) or [](`~tsod.ConstantValueDetector`) +3. Define parameters (e.g. min/max, max rate of change) or... +4. Fit parameters based on normal data, i.e. without outliers +5. Detect outliers in any dataset + +Example +------- + +```{python} +import pandas as pd +from tsod import RangeDetector +rd = RangeDetector(max_value=2.0) +data = pd.Series([0.0, 1.0, 3.0]) # 3.0 is out of range i.e. an anomaly +anom = rd.detect(data) +anom +``` + +```{python} +data[anom] # get anomalous data +``` + +```{python} +data[~anom] # get normal data +``` + + +Saving and loading +------------------ +Save a configured detector +```python +cd = CombinedDetector([ConstantValueDetector(), RangeDetector()]) +cd.fit(normal_data) +cd.save("detector.joblib") +``` + +... and then later load it from disk +```python +my_detector = tsod.load("detector.joblib") +my_detector.detect(some_data) +``` \ No newline at end of file