diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 5fa60135..00000000
--- a/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-ms2pip/_models_c/**/*.c filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml
index 68f1bb3a..a400f556 100644
--- a/.github/workflows/build_and_publish.yml
+++ b/.github/workflows/build_and_publish.yml
@@ -5,49 +5,20 @@ on:
     types: [created]
 
 jobs:
-  build-sdist:
-    name: Build source distribution
+  build:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-        with:
-          lfs: "true"
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-      - name: Build sdist
-        run: python -m build --sdist --outdir dist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-source
-          path: dist
-
-  build-wheels:
-    name: Build wheels on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-15, macos-15-intel]
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          lfs: "true"
-      - name: Build wheels
-        uses: pypa/cibuildwheel@v3.4.0
-        with:
-          output-dir: dist
+      - uses: astral-sh/setup-uv@v6
+      - name: Build sdist and wheel
+        run: uv build --out-dir dist
       - uses: actions/upload-artifact@v4
         with:
-          name: dist-${{ matrix.os }}
+          name: dist
           path: dist
 
   publish-to-pypi:
-    needs: [build-sdist, build-wheels]
+    needs: [build]
     runs-on: ubuntu-latest
     environment:
       name: pypi
@@ -56,10 +27,8 @@ jobs:
       id-token: write
     steps:
       - uses: actions/download-artifact@v4
-      - name: Move wheels to dist
-        run: |
-          mkdir dist/
-          mv dist-*/*.whl dist/
-          mv dist-*/*.tar.gz dist/
+        with:
+          name: dist
+          path: dist
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 023fd57c..bc5ed0c2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,40 +8,36 @@ on:
   workflow_dispatch:
 
 jobs:
-  build:
+  lint:
     runs-on: ubuntu-latest
-    strategy:
-      max-parallel: 4
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v6
+      - run: uvx ruff check --output-format=github ./ms2pip
 
+  typecheck:
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v6
         with:
-          lfs: "true"
+          enable-cache: true
+      - run: uv run --all-extras --group dev --python 3.13 ty check ms2pip/
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v6
         with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
+          enable-cache: true
+      - name: Install and test
         run: |
-          python -m pip install --upgrade pip
-          pip install ruff
-
-      - name: Check with Ruff
-        run: ruff check --output-format=github ./ms2pip
-
-      - name: Build and install ms2pip
-        run: |
-          pip install .[dev]
-
-      - name: Test with pytest
-        run: |
-          pytest
-
-      - name: Test installation
+          uv run --python ${{ matrix.python-version }} pytest
+      - name: Test CLI
         run: |
-          ms2pip --help
+          uv run --python ${{ matrix.python-version }} ms2pip --help
diff --git a/.gitignore b/.gitignore
index 4e76cd43..8a137c9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,5 @@
-.pytest_cache/
-
-# Custom MS2PIP stuff
+# Custom
 data/
-*_pyx.c
-*_pyx_*.c
-ms2pip/models_xgboost/*.xgboost
 
 # Pytest
 .pytest_cache/
@@ -98,6 +93,7 @@ celerybeat-schedule
 .venv/
 venv/
 ENV/
+.venv-*/
 
 # Spyder project settings
 .spyderproject
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 248bf4ca..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,3 +0,0 @@
-include ms2pip/_cython_modules/*.c
-include ms2pip/_models_c/*/*.c
-include ms2pip/_models_c/*.h
diff --git a/README.rst b/README.rst
index 58a3f933..6557943e 100644
--- a/README.rst
+++ b/README.rst
@@ -36,7 +36,8 @@ peptide fragmentation spectrum that accurately resembles its observed equivalent
 can be used to validate peptide identifications, generate proteome-wide spectral libraries, or to
 select discriminative transitions for targeted proteomics. MS²PIP employs the
 `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_ machine learning algorithm and is written in
-Python and C.
+Python, with helper functions in Rust
+(`ms2rescore-rs <https://github.com/compomics/ms2rescore-rs>`_).
 
 .. figure:: https://raw.githubusercontent.com/compomics/ms2pip/v4.0.0/img/mirror-DVAQIFNNILR-2.png
 
diff --git a/docs/source/api/ms2pip.result.rst b/docs/source/api/ms2pip.result.rst
index aee97a08..5946933f 100644
--- a/docs/source/api/ms2pip.result.rst
+++ b/docs/source/api/ms2pip.result.rst
@@ -3,3 +3,4 @@ ms2pip.result
 *************
 
 .. automodule:: ms2pip.result
+   :members:
diff --git a/docs/source/api/ms2pip.spectrum.rst b/docs/source/api/ms2pip.spectrum.rst
index 0bcea63b..0374cab5 100644
--- a/docs/source/api/ms2pip.spectrum.rst
+++ b/docs/source/api/ms2pip.spectrum.rst
@@ -3,3 +3,4 @@ ms2pip.spectrum
 ***************
 
 .. automodule:: ms2pip.spectrum
+   :members:
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 5ca4efbd..3604ef14 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -6,17 +6,12 @@ Pip package
 .. image:: https://flat.badgen.net/badge/install%20with/pip/green
    :target: https://pypi.org/project/ms2pip/
 
-With Python 3.9 or higher, run:
+With Python 3.11 or higher, run:
 
 .. code-block:: bash
 
    pip install ms2pip
 
-Compiled wheels are available for various Python versions on 64bit Linux,
-Windows, and macOS. This should install MS²PIP in a few seconds. For other
-platforms, MS²PIP can be built from source, although it can take a while
-to compile the large prediction models.
-
 We recommend using a `venv <https://docs.python.org/3/library/venv.html>`__ or
 `conda <https://docs.conda.io/en/latest/>`__ virtual environment.
 
diff --git a/ms2pip/__init__.py b/ms2pip/__init__.py
index 2eaef20c..a0fbe937 100644
--- a/ms2pip/__init__.py
+++ b/ms2pip/__init__.py
@@ -1,13 +1,14 @@
 # isort: skip_file
 """MS2PIP: Accurate and versatile peptide fragmentation spectrum prediction."""
 
-__version__ = "4.2.0-alpha.2"
+__version__ = "4.2.0-alpha.3"
 __all__ = [
     "predict_single",
     "predict_batch",
     "predict_library",
     "correlate",
     "correlate_preloaded",
+    "correlate_single",
     "get_training_data",
     "annotate_spectra",
     "download_models",
@@ -26,6 +27,7 @@
     predict_library,
     correlate,
     correlate_preloaded,
+    correlate_single,
     get_training_data,
     annotate_spectra,
     download_models,
diff --git a/ms2pip/__main__.py b/ms2pip/__main__.py
index b01ec7e1..1dc6beaf 100644
--- a/ms2pip/__main__.py
+++ b/ms2pip/__main__.py
@@ -1,7 +1,6 @@
 import logging
 import sys
 from pathlib import Path
-from typing import Optional
 
 import click
 import rich
@@ -32,7 +31,7 @@
 
 def _infer_output_name(
     input_filename: str,
-    output_name: Optional[str] = None,
+    output_name: str | None = None,
 ) -> Path:
     """Infer output filename from input filename if output_filename was not defined."""
     if output_name:
@@ -50,9 +49,7 @@ def cli(*args, **kwargs):
         format="%(message)s",
         datefmt="%Y-%m-%d %H:%M:%S",
         level=LOGGING_LEVELS[kwargs["logging_level"]],
-        handlers=[
-            RichHandler(rich_tracebacks=True, show_level=True, show_path=False)
-        ],
+        handlers=[RichHandler(rich_tracebacks=True, show_level=True, show_path=False)],
     )
     rich.print(build_credits())
 
@@ -79,7 +76,7 @@ def predict_single(*args, **kwargs):
     # Write output
     rich.print(build_prediction_table(predicted_spectrum))
     write_spectra(output_name, [result], output_format)
-    if plot:
+    if plot and predicted_spectrum:
         spectrum_to_png(predicted_spectrum, output_name)
 
 
@@ -142,6 +139,9 @@ def predict_library(*args, **kwargs):
 @click.option("--model", type=click.Choice(MODELS), default="HCD")
 @click.option("--model-dir")
 @click.option("--ms2-tolerance", type=float, default=0.02)
+@click.option(
+    "--ms2-tolerance-mode", type=click.Choice(["Da", "ppm"], case_sensitive=False), default="Da"
+)
 @click.option("--processes", "-n", type=int)
 def correlate(*args, **kwargs):
     # Parse arguments
@@ -171,6 +171,9 @@ def correlate(*args, **kwargs):
 @click.option("--spectrum-id-pattern", "-p")
 @click.option("--model", type=click.Choice(MODELS), default="HCD")
 @click.option("--ms2-tolerance", type=float, default=0.02)
+@click.option(
+    "--ms2-tolerance-mode", type=click.Choice(["Da", "ppm"], case_sensitive=False), default="Da"
+)
 @click.option("--processes", "-n", type=int)
 def get_training_data(*args, **kwargs):
     # Parse arguments
@@ -193,6 +196,9 @@ def get_training_data(*args, **kwargs):
 @click.option("--spectrum-id-pattern", "-p")
 @click.option("--model", type=click.Choice(MODELS), default="HCD")
 @click.option("--ms2-tolerance", type=float, default=0.02)
+@click.option(
+    "--ms2-tolerance-mode", type=click.Choice(["Da", "ppm"], case_sensitive=False), default="Da"
+)
 @click.option("--processes", "-n", type=int)
 def annotate_spectra(*args, **kwargs):
     # Parse arguments
@@ -203,8 +209,8 @@ def annotate_spectra(*args, **kwargs):
     results = ms2pip.core.annotate_spectra(*args, **kwargs)
 
     # Write intensities
-    output_name_int = output_name.with_name(output_name.stem + "_observations").with_suffix()
-    logger.info(f"Writing intensities to {output_name_int.with_suffix('.tsv')}")
+    output_name = output_name.with_name(output_name.stem + "_observations")
+    logger.info(f"Writing intensities to {output_name.with_suffix('.tsv')}")
     write_spectra(output_name, results, "tsv")
 
 
diff --git a/ms2pip/_cython_modules/__init__.py b/ms2pip/_cython_modules/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/ms2pip/_cython_modules/ms2pip_features_c.c b/ms2pip/_cython_modules/ms2pip_features_c.c
deleted file mode 100644
index 19749192..00000000
--- a/ms2pip/_cython_modules/ms2pip_features_c.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// Compute feature vectors from peptide
-unsigned int* get_v_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge)
-	{
-	int i,j,k;
-
-	int fnum = 1; //first value in v is its length
-
-	for (i=0; i < 19; i++) {
-		count_n[i] = 0;
-		count_c[i] = 0;
-	}
-
-	//I need this for Omega
-	//important for sptms!!
-	peptide_buf[0] = peptide[0];
-	for (i=0; i < peplen; i++) {
-		if (peptide[i+1] > 18) {
-			peptide_buf[i+1] = sptm_mapper[peptide[i+1]];
-		}
-		else {
-			peptide_buf[i+1] = peptide[i+1];
-		}
-		count_c[peptide_buf[i+1]]++;
-	}
-
-	int num_shared = 0;
-
-	shared_features[num_shared++] = peplen;
-	shared_features[num_shared++] = charge;
-
-	shared_features[num_shared] = 0;
-	if (charge == 1) {
-		shared_features[num_shared] = 1;
-		}
-	num_shared++;
-	shared_features[num_shared] = 0;
-	if (charge == 2) {
-		shared_features[num_shared] =1;
-		}
-	num_shared++;
-	shared_features[num_shared] = 0;
-	if (charge == 3) {
-		shared_features[num_shared] =1;
-		}
-	num_shared++;
-	shared_features[num_shared] = 0;
-	if (charge == 4) {
-		shared_features[num_shared] =1;
-		}
-	num_shared++;
-	shared_features[num_shared] = 0;
-	if (charge >= 5) {
-		shared_features[num_shared] =1;
-		}
-	num_shared++;
-
-	for (j=0; j < num_props; j++) {
-		for (i=0; i < peplen; i++) {
-			props_buffer[i] = props[j][peptide_buf[i+1]];
-		}
-		qsort(props_buffer,peplen,sizeof(unsigned int),cmpfunc);
-		shared_features[num_shared++] = props_buffer[0];
-		shared_features[num_shared++] = props_buffer[(int)(0.25*(peplen-1))];
-		shared_features[num_shared++] = props_buffer[(int)(0.5*(peplen-1))];
-		shared_features[num_shared++] = props_buffer[(int)(0.75*(peplen-1))];
-		shared_features[num_shared++] = props_buffer[peplen-1];
-	}
-
-	for (i=0; i < peplen-1; i++) {
-		for (j=0; j<num_shared; j++) {
-			v[fnum++] = shared_features[j];
-		}
-		v[fnum++] = i+1; // length of N-term ion
-		v[fnum++] = peplen-i;  // length of C-term ion
-		count_n[peptide_buf[i+1]]++;
-		count_c[peptide_buf[peplen-i]]--;
-
-		for (j=0; j < 19; j++) {
-			v[fnum++] = count_n[j];  // amino acid count in N-term ion
-			v[fnum++] = count_c[j];  // amino acid count in C-term ion
-		}
-
-		for (j=0; j < num_props; j++) {
-			v[fnum++] = props[j][peptide_buf[1]];
-			v[fnum++] = props[j][peptide_buf[peplen]];
-			if (i==0) {
-				v[fnum++] = 0;
-			}
-			else {
-				v[fnum++] = props[j][peptide_buf[i-1]];
-			}
-			v[fnum++] = props[j][peptide_buf[i]];
-			v[fnum++] = props[j][peptide_buf[i+1]];
-			if (i==(peplen-1)) {
-				v[fnum++] = 0;
-			}
-			else {
-				v[fnum++] = props[j][peptide_buf[i+2]];
-			}
-			unsigned int s = 0;
-			for (k=0; k <= i; k++) {
-				props_buffer[k] = props[j][peptide_buf[k+1]];
-				s+= props_buffer[k];
-			}
-			v[fnum++] = s;
-			qsort(props_buffer,i+1,sizeof(unsigned int),cmpfunc);
-			v[fnum++] = props_buffer[0];
-			v[fnum++] = props_buffer[(int)(0.25*i)];
-			v[fnum++] = props_buffer[(int)(0.5*i)];
-			v[fnum++] = props_buffer[(int)(0.75*i)];
-			v[fnum++] = props_buffer[i];
-			s = 0;
-			for (k=i+1; k < peplen; k++) {
-				props_buffer[k-i-1] = props[j][peptide_buf[k+1]];
-				s+= props_buffer[k-i-1];
-			}
-			v[fnum++] = s;
-			qsort(props_buffer,peplen-i-1,sizeof(unsigned int),cmpfunc);
-			v[fnum++] = props_buffer[0];
-			v[fnum++] = props_buffer[(int)(0.25*(peplen-i-1))];
-			v[fnum++] = props_buffer[(int)(0.5*(peplen-i-1))];
-			v[fnum++] = props_buffer[(int)(0.75*(peplen-i-1))];
-			v[fnum++] = props_buffer[peplen-i-2];
-		}
-	}
-	v[0] = fnum-1;
-	return v;
-}
diff --git a/ms2pip/_cython_modules/ms2pip_init_c.c b/ms2pip/_cython_modules/ms2pip_init_c.c
deleted file mode 100644
index 66aa33fc..00000000
--- a/ms2pip/_cython_modules/ms2pip_init_c.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-
-float* amino_masses;
-unsigned short* amino_F;
-unsigned short* sptm_mapper;
-float ntermmod;
-
-
-// For ms2pip_features_c.c
-unsigned int v[300000];
-
-int num_props = 4;
-unsigned int props[5][19] = {
-	{37,35,59,129,94,0,210,81,191,106,101,117,115,343,49,90,60,134,104}, //basicity
-	{68,23,33,29,70,58,41,73,32,66,38,0,40,39,44,53,71,51,55}, //helicity
-	{51,75,25,35,100,16,3,94,0,82,12,0,22,22,21,39,80,98,70}, //hydrophobicity
-	{32,23,0,4,27,32,48,32,69,29,26,35,28,79,29,28,31,31,28}, //pI
-	//{71,103,115,129,147,57,137,113,128,131,114,97,128,156,87,101,99,186,163} //mass
-};
-
-unsigned int props_buffer[100]; //100 is max pep length
-unsigned int shared_features[100]; //100 is max num shared features
-unsigned int count_n[19];
-unsigned int count_c[19];
-unsigned short peptide_buf[200]; //IONBOT
-
-
-// Function required in ms2pip_features_c_general.c and ms2pip_features_c_catboost.c
-int cmpfunc (const void * a, const void * b) {
-   return ( *(int*)a - *(int*)b );
-}
-
-
-// This function initializes amino acid masses and PTMs from a configuration file generated by Omega
-void init_ms2pip(char* amino_masses_fname, char* modifications_fname, char* modifications_fname_sptm) {
-	int i;
-	int nummods;
-	int nummods_sptm;
-	float mz;
-	int numptm;
-	int before;
-	int after;
-
-	FILE* f = fopen(modifications_fname,"rt");
-	fscanf(f,"%i\n",&nummods);
-	fclose(f);
-
-	f = fopen(modifications_fname_sptm,"rt");
-	fscanf(f,"%i\n",&nummods_sptm);
-	fclose(f);
-
-	//malloc
-	amino_masses = (float*) malloc((38+nummods+nummods_sptm)*sizeof(float));
-	amino_F = (unsigned short*) malloc((38+nummods+nummods_sptm)*sizeof(unsigned short));
-	sptm_mapper = (unsigned short*) malloc((38+nummods+nummods_sptm)*sizeof(unsigned short));
-
-	f = fopen(amino_masses_fname,"rt");
-	for (i=0; i< 19; i++) {
-		fscanf(f,"%f\n",&amino_masses[i]);
-		amino_F[i] = (unsigned short) (amino_masses[i]-57.021464);
-		}
-	fscanf(f,"%f\n",&ntermmod);
-	fclose(f);
-
-	for (i=0; i< 19; i++) {
-		amino_masses[19+i]=amino_masses[i];
-		amino_F[19+i] = amino_F[i];
-		}
-
-	f = fopen(modifications_fname_sptm,"rt");
-	fscanf(f,"%i\n",&nummods_sptm);
-	for (i=0; i< nummods_sptm; i++) {
-		fscanf(f,"%f,%i,%i,%i\n",&mz,&numptm,&before,&after);
-		sptm_mapper[after] = before;
-		sptm_mapper[after] = before;
-		if (after > 18) {
-			if (before<0) {
-				amino_masses[after] = mz;
-			}
-			else
-			{
-				amino_masses[after] = amino_masses[before]+mz;
-				amino_F[after] = (unsigned short) (amino_masses[before]+mz - 57.021464);
-			}
-			}
-		}
-	fclose(f);
-	f = fopen(modifications_fname,"rt");
-	fscanf(f,"%i\n",&nummods);
-	for (i=0; i< nummods; i++) {
-		fscanf(f,"%f,%i,%i,%i\n",&mz,&numptm,&before,&after);
-		if (after > 18) {
-			if (before<0) {
-				amino_masses[after] = mz;
-			}
-			else
-			{
-				amino_masses[after] = amino_masses[before]+mz;
-				amino_F[after] = (unsigned short) (amino_masses[before]+mz - 57.021464);
-			}
-			}
-		}
-	fclose(f);
-}
diff --git a/ms2pip/_cython_modules/ms2pip_peaks_c.c b/ms2pip/_cython_modules/ms2pip_peaks_c.c
deleted file mode 100644
index 607e4250..00000000
--- a/ms2pip/_cython_modules/ms2pip_peaks_c.c
+++ /dev/null
@@ -1,982 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "ms2pip_init_c.c"
-#include "ms2pip_features_c.c"
-
-#include "../_models_c/HCD-2019.h"
-#include "../_models_c/TMT.h"
-
-float membuffer[10000];
-float ions[2000];
-float mzs[2000];
-float predictions[1000];
-
-struct annotations{
-	float* peaks;
-	float* msms;
-};
-typedef struct annotations annotations;
-
-//compute feature vector from peptide + predict intensities
-float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce)
-	{
-	unsigned int* v = get_v_ms2pip(peplen, peptide, modpeptide, charge);
-	int fnum = v[0]/(peplen-1);
-	int i;
-
-	// HCD
-	if (model_id == 1) {
-		for (i=0; i < peplen-1; i++) {
-			predictions[0*(peplen-1)+i] = score_HCD_B(v+1+(i*fnum))+0.5;
-			predictions[2*(peplen-1)-i-1] = score_HCD_Y(v+1+(i*fnum))+0.5;
-		}
-	}
-
-	// TMT
-	else if (model_id == 3) {
-		for (i=0; i < peplen-1; i++) {
-		    predictions[0*(peplen-1)+i] = score_TMT_B(v+1+(i*fnum))+0.5;
-		    predictions[2*(peplen-1)-i-1] = score_TMT_Y(v+1+(i*fnum))+0.5;
-		}
-	}
-	// EThcD
-	// else if (model_id == 6) {
-	// 	for (i=0; i < peplen-1; i++) {
-	// 		predictions[0*(peplen-1)+i] = score_EThcD_B(v+1+(i*fnum))+0.5;
-	// 		predictions[2*(peplen-1)-i-1] = score_EThcD_Y(v+1+(i*fnum))+0.5;
-	// 		predictions[2*(peplen-1)+i] = score_EThcD_C(v+1+(i*fnum))+0.5;
-	// 		predictions[4*(peplen-1)-i-1] = score_EThcD_Z(v+1+(i*fnum))+0.5;
-	// 	}
-	// }
-
-	// HCDch2
-	else if (model_id == 7) {
-		for (i=0; i < peplen-1; i++) {
-			predictions[0*(peplen-1)+i] = score_HCD_B(v+1+(i*fnum))+0.5;
-			predictions[2*(peplen-1)-i-1] = score_HCD_Y(v+1+(i*fnum))+0.5;
-			predictions[2*(peplen-1)+i] = score_HCD_B2(v+1+(i*fnum))+0.5;
-			predictions[4*(peplen-1)-i-1] = score_HCD_Y2(v+1+(i*fnum))+0.5;
-		}
-	}
-
-	else {
-		return NULL;
-	}
-	return predictions;
-}
-
-
-//get fragment ion mz values (b, y)
-float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide)
-	{
-	int i,j;
-	float mz;
-	j=0;
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = mz+1.007236;  //b-ion
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = 18.0105647 + mz + 1.007236;  //y-ion
-	}
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = (mz + 1.007236 + 1.007236)/2;  //b2-ion: (b-ion + H)/2
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = (18.0105647 + mz + 1.007236 + 1.007236)/2;  //y2-ion: (y-ion + H)/2
-	}
-
-	return membuffer;
-}
-
-
-//get fragment ion mz values (b, y, c, z)
-float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide)
-	{
-	int i,j;
-	float mz;
-	j=0;
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = mz + 1.007236;  //b-ion
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = 18.0105647 + mz + 1.007236;  //y-ion
-	}
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = mz + 1.007825032 + 17.0265491;  //c-ion: peptide + H + NH3
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = mz + 17.00273965 - 15.01089904 + 1.007825032;  //z-ion: peptide + OH - NH
-	}
-
-	return membuffer;
-}
-
-
-//get fragment ion mz values (b, y, b++, y++)
-float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide)
-	{
-	int i,j;
-	float mz;
-	j=0;
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = mz+1.007236;  //b-ion
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = 18.0105647 + mz + 1.007236;  //y-ion
-	}
-
-	mz = 0;
-	if (modpeptide[0] != 0) {
-		mz = amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = (mz + 1.007236 + 1.007236)/2;  //b2-ion: (b-ion + H)/2
-	}
-
-	mz = 0;
-	if (modpeptide[peplen+1] != 0) {
-		mz = amino_masses[modpeptide[peplen+1]];
-	}
-	for (i=peplen; i > 1; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j++] = (18.0105647 + mz + 1.007236 + 1.007236)/2;  //y2-ion: (y-ion + H)/2
-	}
-
-	return membuffer;
-}
-
-//get all fragment ion peaks from spectrum
-annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	{
-	int i,tmp;
-	float mz;
-	int msms_pos;
-	int mem_pos;
-	float max, maxmz, tmp2;
-
-	//for (i=0; i < numpeaks; i++) {
-	//  fprintf(stderr,"m %f\n",msms[i]);
-	//}
-
-	for (i=0; i < 18*(peplen-1); i++) { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x
-		ions[i] = -9.96578428466; //HARD CODED!!
-		mzs[i] = 0; //HARD CODED!!
-	}
-
-	//b-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	int pos = 0;
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[pos++] = mz+1.007236-27.99492; // a
-		membuffer[pos++] = mz+1.007236-18.010565; // -H2O
-		membuffer[pos++] = mz+1.007236-17.026001; // -NH3
-		membuffer[pos++] = mz+1.007236; // b
-		membuffer[pos++] = mz+1.007236+17.02654; // c
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= 5*(peplen-1)) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			maxmz = msms[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-						maxmz = msms[tmp];
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[mem_pos] = max;
-			mzs[mem_pos] = maxmz;
-			mem_pos += 1;
-		}
-	}
-
-	//b2-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	pos = 0;
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[pos++] = (mz+1.007236+1.007236-27.99492)/2.; // a
-		membuffer[pos++] = (mz+1.007236+1.007236-18.010565)/2.; // -H2O
-		membuffer[pos++] = (mz+1.007236+1.007236-17.026001)/2.; // -NH3
-		membuffer[pos++] = (mz+1.007236+1.007236)/2.; // b
-		membuffer[pos++] = (mz+1.007236+1.007236+17.02654)/2.; // c
-	}
-
-	msms_pos = 0;
-	mem_pos=0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= 5*(peplen-1)) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			maxmz = msms[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-						maxmz = msms[tmp];
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[5*(peplen-1)+mem_pos] = max;
-			mzs[5*(peplen-1)+mem_pos] = maxmz;
-			mem_pos += 1;
-		}
-	}
-
-
-	// y-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	pos = 0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[pos++] = 18.0105647+mz+1.007236-18.010565; //-H2O
-		membuffer[pos++] = 18.0105647+mz+1.007236-17.02545; // z
-		membuffer[pos++] = 18.0105647+mz+1.007236; // y
-		membuffer[pos++] = 18.0105647+mz+1.007236+25.97926; // x
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= 4*(peplen-1)) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			maxmz = msms[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-						maxmz = msms[tmp];
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[10*(peplen-1)+mem_pos] = max;
-			mzs[10*(peplen-1)+mem_pos] = maxmz;
-			mem_pos += 1;
-		}
-	}
-
-	// y2-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	pos = 0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[pos++] = (18.0105647+mz+1.007236+1.007236-18.010565)/2.; //-H2O
-		membuffer[pos++] = (18.0105647+mz+1.007236+1.007236-17.02545)/2.; // z
-		membuffer[pos++] = (18.0105647+mz+1.007236+1.007236)/2.; // y
-		membuffer[pos++] = (18.0105647+mz+1.007236+1.007236+25.97926)/2.; // x
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= 4*(peplen-1)) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			maxmz = msms[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-						maxmz = msms[tmp];
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[14*(peplen-1)+mem_pos] = max;
-			mzs[14*(peplen-1)+mem_pos] = maxmz;
-			mem_pos += 1;
-		}
-	}
-
-	//for (i=0; i < 18*(peplen-1); i++) { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x
-	//    fprintf(stderr,"%f ",ions[i]); //HARD CODED!!
-	//}
-	//fprintf(stderr,"\n");
-
-	struct annotations r = {ions,mzs};
-
-	return r;
-}
-
-//get fragment ion peaks from spectrum
-float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	{
-	int i,j,tmp;
-	float mz;
-	int msms_pos;
-	int mem_pos;
-	float max, tmp2;
-
-	//for (i=0; i < numpeaks; i++) {
-	//  fprintf(stderr,"m %f\n",msms[i]);
-	//}
-
-	for (i=0; i < 2*(peplen-1); i++) {
-		ions[i] = -9.96578428466; //HARD CODED!!
-	}
-
-	//b-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[i-1] = mz+1.007236;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	// y-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	j=0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j] = 18.0105647+mz+1.007236;
-		j++;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-
-	return ions;
-}
-
-
-
-//get fragment ion peaks from spectrum (b, y, c, z)
-float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	{
-	int i,j,tmp;
-	float mz;
-	int msms_pos;
-	int mem_pos;
-	float max, tmp2;
-
-	for (i=0; i < 4*(peplen-1); i++) {
-		ions[i] = -9.96578428466; //HARD CODED!!
-	}
-
-	//b-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[i-1] = mz+1.007236;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	// y-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	j=0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j] = 18.0105647+mz+1.007236;
-		j++;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	//c-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[i-1] = mz + 1.007825032 + 17.026549;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[2*(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	// z-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	j=0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j] = mz + 17.00274 - 15.010899 + 1.007825032;
-		j++;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[3*(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	return ions;
-}
-
-
-//get fragment ion peaks from spectrum (b, y, b++, y++)
-float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	{
-	int i,j,tmp;
-	float mz;
-	int msms_pos;
-	int mem_pos;
-	float max, tmp2;
-
-	//for (i=0; i < numpeaks; i++) {
-	//  fprintf(stderr,"m %f\n",msms[i]);
-	//}
-
-	for (i=0; i < 4*(peplen-1); i++) {
-		ions[i] = -9.96578428466; //HARD CODED!!
-	}
-
-	//b-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[i-1] = mz+1.007236;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	// y-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	j=0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j] = 18.0105647+mz+1.007236;
-		j++;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	//b2-ions
-	mz = ntermmod;
-	if (modpeptide[0] != 0) {
-		mz += amino_masses[modpeptide[0]];
-	}
-	for (i=1; i < peplen; i++) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[i-1] = (mz + 1.007236 + 1.007236)/2;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[2*(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	// y2-ions
-	mz = 0.;
-	if (modpeptide[peplen+1] != 0) {
-		mz += modpeptide[peplen+1];
-	}
-	j=0;
-	for (i=peplen; i >= 2; i--) {
-		mz += amino_masses[modpeptide[i]];
-		membuffer[j] = (18.0105647 + mz + 1.007236 + 1.007236)/2;
-		j++;
-	}
-
-	msms_pos = 0;
-	mem_pos = 0;
-	while (1) {
-		if (msms_pos >= numpeaks) {
-			break;
-		}
-		if (mem_pos >= peplen-1) {
-			break;
-		}
-		mz = membuffer[mem_pos];
-		if (msms[msms_pos] > (mz+tolmz)) {
-			mem_pos += 1;
-		}
-		else if (msms[msms_pos] < (mz-tolmz)) {
-			msms_pos += 1;
-		}
-		else {
-			max = peaks[msms_pos];
-			tmp = msms_pos + 1;
-			if (tmp < numpeaks) {
-				while (msms[tmp] <= (mz+tolmz)) {
-					tmp2 = peaks[tmp];
-					if (max < tmp2) {
-						max = tmp2;
-					}
-					tmp += 1;
-					if (tmp == numpeaks) {
-						break;
-					}
-				}
-			}
-			ions[3*(peplen-1)+mem_pos] = max;
-			mem_pos += 1;
-		}
-	}
-
-	return ions;
-}
diff --git a/ms2pip/_cython_modules/ms2pip_pyx.pyx b/ms2pip/_cython_modules/ms2pip_pyx.pyx
deleted file mode 100644
index 0e980ba6..00000000
--- a/ms2pip/_cython_modules/ms2pip_pyx.pyx
+++ /dev/null
@@ -1,187 +0,0 @@
-#cython: language_level=3
-import sys
-import numpy as np
-cimport numpy as np
-
-
-NUM_ION_TYPES_MAPPING = {'general': 2, 'etd': 4, 'ch2': 4, 'all': 18}
-
-
-cdef extern from "ms2pip_peaks_c.c":
-
-	ctypedef struct annotations:
-		float* peaks
-		float* msms
-
-	void init_ms2pip(char* amino_masses_fname, char* modifications_fname, char* modifications_fname_sptm)
-
-	unsigned int* get_v_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge)
-
-	float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce)
-
-	float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide)
-	float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide)
-	float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide)
-
-	annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-	float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz)
-
-
-def ms2pip_init(amino_masses_fname, modifications_fname, modifications_fname_sptm):
-	if not isinstance(amino_masses_fname, bytearray):
-		amino_masses_fname = bytearray(amino_masses_fname.encode())
-	if not isinstance(modifications_fname, bytearray):
-		modifications_fname = bytearray(modifications_fname.encode())
-	if not isinstance(modifications_fname_sptm, bytearray):
-		modifications_fname_sptm = bytearray(modifications_fname_sptm.encode())
-	init_ms2pip(amino_masses_fname, modifications_fname, modifications_fname_sptm)
-
-
-def get_vector(np.ndarray[unsigned short, ndim=1, mode="c"] peptide,
-			   np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-			   charge):
-
-	cdef unsigned int* results = get_v_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge)
-
-	r = []
-	offset = 0
-	fnum = int(results[0] / (len(peptide) - 3))
-	for i in range(len(peptide) - 3):
-		v = []
-		for j in range(fnum):
-			v.append(results[j + 1 + offset])
-		offset += fnum
-		r.append(np.array(v, dtype=np.uint16))
-
-	return r
-
-
-def get_predictions(np.ndarray[unsigned short, ndim=1, mode="c"] peptide,
-					np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-					charge, model_id, peaks_version, ce):
-	cdef float* results = get_p_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge, model_id, ce)
-	if results is NULL:
-		raise NotImplementedError(model_id)
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-
-def get_targets(*args):
-	if args[4] == 'general':
-		result = get_targets_general(*args)
-	if args[4] == 'etd':
-		result = get_targets_etd(*args)
-	if args[4] == 'ch2':
-		result = get_targets_ch2(*args)
-	if args[4] == 'all':
-		result = get_targets_all(*args)
-	return result
-
-
-def get_targets_all(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-						np.ndarray[float, ndim=1, mode="c"] msms,
-						np.ndarray[float, ndim=1, mode="c"] peaks,
-						fragerror, peaks_version):
-	cdef annotations results
-	results = get_t_ms2pip_all(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror)
-	result_peaks = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)):
-		result_peaks.append(results.peaks[i])
-	result_mzs = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)):
-		result_mzs.append(results.msms[i])
-	return (result_mzs,result_peaks)
-
-
-def get_targets_general(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-						np.ndarray[float, ndim=1, mode="c"] msms,
-						np.ndarray[float, ndim=1, mode="c"] peaks,
-						fragerror, peaks_version):
-	cdef float* results = get_t_ms2pip_general(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror)
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): #SD: HAd to change this
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-def get_targets_etd(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-						np.ndarray[float, ndim=1, mode="c"] msms,
-						np.ndarray[float, ndim=1, mode="c"] peaks,
-						fragerror, peaks_version):
-	cdef float* results = get_t_ms2pip_etd(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror)
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-
-def get_targets_ch2(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-						np.ndarray[float, ndim=1, mode="c"] msms,
-						np.ndarray[float, ndim=1, mode="c"] peaks,
-						fragerror, peaks_version):
-	cdef float* results = get_t_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror)
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-
-def get_mzs(*args):
-	if args[1] == 'general':
-		result = get_mzs_general(*args)
-	if args[1] == 'etd':
-		result = get_mzs_etd(*args)
-	if args[1] == 'ch2':
-		result = get_mzs_ch2(*args)
-	return result
-
-
-def get_mzs_general(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-					peaks_version):
-	cdef float* results = get_mz_ms2pip_general(len(modpeptide)-2, &modpeptide[0])
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-
-def get_mzs_etd(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-				peaks_version):
-	cdef float* results = get_mz_ms2pip_etd(len(modpeptide)-2, &modpeptide[0])
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
-
-
-def get_mzs_ch2(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide,
-				peaks_version):
-	cdef float* results = get_mz_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0])
-	result_parsed = []
-	for i in range(NUM_ION_TYPES_MAPPING[peaks_version]):
-		tmp = []
-		for j in range(len(modpeptide)-3):
-			tmp.append(results[(len(modpeptide)-3) * i + j])
-		result_parsed.append(tmp)
-	return result_parsed
diff --git a/ms2pip/_models_c/HCD-2019.h b/ms2pip/_models_c/HCD-2019.h
deleted file mode 100644
index 4887791b..00000000
--- a/ms2pip/_models_c/HCD-2019.h
+++ /dev/null
@@ -1,4 +0,0 @@
-float score_HCD_B(unsigned int* v);
-float score_HCD_B2(unsigned int* v);
-float score_HCD_Y(unsigned int* v);
-float score_HCD_Y2(unsigned int* v);
diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c
deleted file mode 100644
index ae8fcf62..00000000
--- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d0ab6fdc7c3b2324228912757d1c55bd56fdebbe066a0b5fc0305f0c7bbb9969
-size 8726156
diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c
deleted file mode 100644
index 19a3d6a4..00000000
--- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b2e27aab327de1a883500f7bb019ff25c9412141fb19ffe1a189eb092bdebd5b
-size 1664299
diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c
deleted file mode 100644
index 755926fc..00000000
--- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:105a9323043df6a192d300adaf3867391ef9e6e03599f6bc393289c3f62b982d
-size 9116794
diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c
deleted file mode 100644
index 4dd0a7b4..00000000
--- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c1be44093ce4b3085b49142ed5a263c01a79c90b3bcaea4c0e829e3b587f2a3
-size 9063001
diff --git a/ms2pip/_models_c/TMT.h b/ms2pip/_models_c/TMT.h
deleted file mode 100644
index 6ec67231..00000000
--- a/ms2pip/_models_c/TMT.h
+++ /dev/null
@@ -1,2 +0,0 @@
-float score_TMT_B(unsigned int* v);
-float score_TMT_Y(unsigned int* v);
diff --git a/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c b/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c
deleted file mode 100644
index c23a7f9a..00000000
--- a/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:49252e3d1b42df924b422f118870f237ec541d58db0037b4800444999277d055
-size 2348087
diff --git a/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c b/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c
deleted file mode 100644
index f3635860..00000000
--- a/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5429cc869363e3383ce41a78329870898ce6e6c3b5fcf9ed1ca5eaac9417df42
-size 4323734
diff --git a/ms2pip/_spectrum_processing.py b/ms2pip/_spectrum_processing.py
new file mode 100644
index 00000000..8aed4cb6
--- /dev/null
+++ b/ms2pip/_spectrum_processing.py
@@ -0,0 +1,271 @@
+"""Internal spectrum annotation, target extraction, and PSM-spectrum matching."""
+
+from __future__ import annotations
+
+import re
+from collections import defaultdict
+from collections.abc import Generator
+from pathlib import Path
+
+import numpy as np
+from psm_utils import PSM, PSMList
+from psm_utils import Peptidoform
+from ms2rescore_rs import (
+    MS2Spectrum,  # type: ignore[ty:unresolved-import]
+    Precursor,  # type: ignore[ty:unresolved-import]
+    annotate_ms2_spectra,  # type: ignore[ty:unresolved-import]
+    get_ms2_spectra,  # type: ignore[ty:unresolved-import]
+)
+
+import ms2pip.exceptions as exceptions
+from ms2pip.constants import MODELS
+from ms2pip.spectrum import ObservedSpectrum
+
+
+def proforma_to_mass_shift(peptidoform: Peptidoform) -> str:
+    """
+    Convert a Peptidoform to a mass-shift ProForma string.
+
+    Replaces all modification labels with numeric mass shifts so that
+    ms2rescore-rs can parse them. Handles sequence modifications and
+    N/C-terminal modifications.
+
+    Note: This does not handle ProForma features like labile modifications,
+    unlocalized modifications, tagged intervals, or isotope labels. These are
+    not used by ms2pip.
+    """
+    parts = []
+    n_term = peptidoform.properties.get("n_term")
+    if n_term:
+        for mod in n_term:
+            parts.append(f"[{mod.mass:+.4f}]-")
+    for aa, mods in peptidoform.parsed_sequence:
+        parts.append(aa)
+        if mods:
+            for mod in mods:
+                parts.append(f"[{mod.mass:+.4f}]")  # type: ignore[ty:unresolved-attribute]
+    c_term = peptidoform.properties.get("c_term")
+    if c_term:
+        for mod in c_term:
+            parts.append(f"-[{mod.mass:+.4f}]")
+    if peptidoform.precursor_charge:
+        parts.append(f"/{peptidoform.precursor_charge}")
+    return "".join(parts)
+
+
+def read_raw_spectra(spectrum_file: str) -> Generator[MS2Spectrum, None, None]:
+    """Read MS2 spectra as raw ms2rescore-rs objects (no conversion to ObservedSpectrum)."""
+    try:
+        spectra = get_ms2_spectra(str(spectrum_file))
+    except ValueError as e:
+        raise exceptions.UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes) from e
+
+    for spectrum in spectra:
+        if str(spectrum.identifier) == "" or len(spectrum.mz) == 0 or len(spectrum.intensity) == 0:
+            continue
+        yield spectrum
+
+
+def to_observed_spectrum(spectrum: MS2Spectrum) -> ObservedSpectrum:
+    """Convert an MS2Spectrum to an ObservedSpectrum."""
+    return ObservedSpectrum(
+        mz=np.array(spectrum.mz, dtype=np.float32),
+        intensity=np.array(spectrum.intensity, dtype=np.float32),
+        identifier=str(spectrum.identifier),
+        precursor_mz=float(spectrum.precursor.mz),
+        precursor_charge=int(spectrum.precursor.charge),
+        retention_time=float(spectrum.precursor.rt),
+    )
+
+
+def annotate_spectrum(
+    spectrum: ObservedSpectrum,
+    psm: PSM,
+    model: str,
+    ms2_tolerance: float,
+    ms2_tolerance_mode: str,
+) -> list[list[tuple]]:
+    """
+    Annotate an ObservedSpectrum using ms2rescore-rs.
+
+    Returns peak annotations as plain Python lists of ``(series, position, charge)`` tuples.
+    """
+    ms2_spectrum = MS2Spectrum(
+        identifier=spectrum.identifier or "",
+        mz=list(spectrum.mz),
+        intensity=list(spectrum.intensity),
+        precursor=Precursor(
+            mz=float(spectrum.precursor_mz) if spectrum.precursor_mz else 0.0,
+            charge=int(spectrum.precursor_charge) if spectrum.precursor_charge else 0,
+            rt=float(spectrum.retention_time) if spectrum.retention_time else 0.0,
+        ),
+    )
+    frag_model = MODELS[model]["fragmentation"]
+    proforma = proforma_to_mass_shift(psm.peptidoform)
+    seq_len = len(psm.peptidoform.parsed_sequence)
+
+    annotated = annotate_ms2_spectra(
+        spectra=[ms2_spectrum],
+        proformas=[proforma],
+        seq_lens=[seq_len],
+        fragmentation_model=frag_model,
+        mass_mode="monoisotopic",
+        tolerance_value=float(ms2_tolerance),
+        tolerance_mode=ms2_tolerance_mode.lower(),
+    )
+    return [
+        [(a.series, a.position, a.charge) for a in peak_anns]
+        for peak_anns in annotated[0].peak_annotations
+    ]
+
+
+def targets_from_annotations(
+    peak_annotations: list,
+    intensity: np.ndarray,
+    ion_types: list[str],
+    seq_len: int,
+) -> dict[str, np.ndarray]:
+    """
+    Extract observed intensity targets from peak annotations.
+
+    Converts per-peak fragment annotations into per-ion-type intensity arrays.
+
+    Parameters
+    ----------
+    peak_annotations
+        Per-peak annotations. Each element is a list of annotations for that peak.
+        Annotations can be :py:class:`ms2rescore_rs.FragmentAnnotation` objects or
+        ``(series, position, charge)`` tuples.
+    intensity
+        Preprocessed intensity array (TIC-normalized, log2-transformed).
+    ion_types
+        Ion types to extract, e.g. ``["b", "y"]`` or ``["b", "y", "b2", "y2"]``.
+    seq_len
+        Length of the peptide sequence (number of amino acids).
+
+    Returns
+    -------
+    targets
+        Dict mapping ion type to intensity array of length ``seq_len - 1``.
+
+    """
+    n_ions = seq_len - 1
+    floor_value = np.float32(np.log2(0.001))
+    targets = {ion: np.full(n_ions, floor_value, dtype=np.float32) for ion in ion_types}
+
+    for peak_idx, peak_anns in enumerate(peak_annotations):
+        for ann in peak_anns:
+            if isinstance(ann, tuple):
+                series, position, charge = ann
+            else:
+                series, position, charge = ann.series, ann.position, ann.charge
+
+            ion_key = series if charge == 1 else f"{series}{charge}"
+
+            if ion_key not in targets:
+                continue
+
+            pos = position - 1
+            if 0 <= pos < n_ions:
+                if intensity[peak_idx] > targets[ion_key][pos]:
+                    targets[ion_key][pos] = intensity[peak_idx]
+
+    return targets
+
+
+def load_and_match_spectra(
+    psm_list: PSMList,
+    spectrum_file: str | Path,
+    spectrum_id_pattern: str,
+    model: str,
+    ms2_tolerance: float,
+    ms2_tolerance_mode: str,
+) -> list[tuple[int, PSM, ObservedSpectrum, list]]:
+    """
+    Read spectra from file, annotate, preprocess, and match to PSMs.
+
+    Reads raw MS2Spectrum objects, matches to PSMs by spectrum ID, batch-annotates
+    all matched spectra in a single Rust call, then converts to ObservedSpectrum
+    and preprocesses.
+
+    Returns list of (psm_index, psm, preprocessed_spectrum, peak_annotations) tuples.
+    """
+    try:
+        spectrum_id_regex = re.compile(spectrum_id_pattern)
+    except TypeError:
+        spectrum_id_regex = re.compile(r"(.*)")
+
+    psms_by_specid = defaultdict(list)
+    for i, psm in enumerate(psm_list):
+        psms_by_specid[str(psm.spectrum_id)].append((i, psm))
+
+    # Step 1: Read raw spectra and match to PSMs (no conversion yet)
+    matched_raw: list[tuple[str, MS2Spectrum, list[tuple[int, PSM]]]] = []
+    for spectrum in read_raw_spectra(str(spectrum_file)):
+        match = spectrum_id_regex.search(str(spectrum.identifier))
+        try:
+            spectrum_id = match[1]  # type: ignore[ty:not-subscriptable]
+        except (TypeError, IndexError):
+            raise exceptions.TitlePatternError(
+                f"Spectrum title pattern `{spectrum_id_pattern}` could not be matched to "
+                f"spectrum ID `{spectrum.identifier}`. "
+                " Are you sure that the regex contains a capturing group?"
+            )
+
+        if spectrum_id not in psms_by_specid:
+            continue
+
+        matched_raw.append((spectrum_id, spectrum, psms_by_specid[spectrum_id]))
+
+    if not matched_raw:
+        return []
+
+    # Step 2: Batch annotate all matched spectra (single Rust call, Rayon-parallelized)
+    batch_spectra = []
+    batch_proformas = []
+    batch_seq_lens = []
+    batch_indices = []  # (matched_raw_idx, psm_within_spectrum_idx)
+
+    for raw_idx, (_, spectrum, psm_pairs) in enumerate(matched_raw):
+        for psm_idx, (_, psm) in enumerate(psm_pairs):
+            batch_spectra.append(spectrum)
+            batch_proformas.append(proforma_to_mass_shift(psm.peptidoform))
+            batch_seq_lens.append(len(psm.peptidoform.parsed_sequence))
+            batch_indices.append((raw_idx, psm_idx))
+
+    frag_model = MODELS[model]["fragmentation"]
+    annotated_spectra = annotate_ms2_spectra(
+        spectra=batch_spectra,
+        proformas=batch_proformas,
+        seq_lens=batch_seq_lens,
+        fragmentation_model=frag_model,
+        mass_mode="monoisotopic",
+        tolerance_value=float(ms2_tolerance),
+        tolerance_mode=ms2_tolerance_mode.lower(),
+    )
+
+    # Step 3: Convert to ObservedSpectrum, preprocess, and assemble results
+    preprocessed_cache: dict[str, ObservedSpectrum] = {}
+    results = []
+
+    for batch_idx, (raw_idx, psm_idx) in enumerate(batch_indices):
+        spec_id, raw_spectrum, psm_pairs = matched_raw[raw_idx]
+        psm_index, psm = psm_pairs[psm_idx]
+
+        if spec_id not in preprocessed_cache:
+            obs = to_observed_spectrum(raw_spectrum)
+            for label_type in ["iTRAQ", "TMT"]:
+                if label_type in model:
+                    obs.remove_reporter_ions(label_type)
+            obs.tic_norm()
+            obs.log2_transform()
+            preprocessed_cache[spec_id] = obs
+
+        peak_annotations = [
+            [(a.series, a.position, a.charge) for a in peak_anns]
+            for peak_anns in annotated_spectra[batch_idx].peak_annotations
+        ]
+
+        results.append((psm_index, psm, preprocessed_cache[spec_id], peak_annotations))
+
+    return results
diff --git a/ms2pip/_utils/dlib.py b/ms2pip/_utils/dlib.py
index 50714dad..004ef3b7 100644
--- a/ms2pip/_utils/dlib.py
+++ b/ms2pip/_utils/dlib.py
@@ -2,7 +2,6 @@
 
 import zlib
 from pathlib import Path
-from typing import Union
 
 import numpy
 import sqlalchemy
@@ -44,9 +43,9 @@ def process_result_value(self, value, dialect):
         decompressed = zlib.decompress(value)
         return numpy.frombuffer(decompressed, dtype=self.dtype).tolist()
 
-    def copy(self):
+    def copy(self):  # type: ignore[ty:invalid-method-override]
         # NOTE: length will be passed through to BLOB
-        return CompressedArray(self.dtype, self.impl.length)
+        return CompressedArray(self.dtype, self.impl.length)  # type: ignore[ty:unresolved-attribute]
 
 
 metadata = MetaData()
@@ -100,6 +99,6 @@ def copy(self):
 )
 
 
-def open_sqlite(filename: Union[str, Path]) -> Connection:
+def open_sqlite(filename: str | Path) -> Connection:
     engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
     return engine.connect()
diff --git a/ms2pip/_utils/encoder.py b/ms2pip/_utils/encoder.py
deleted file mode 100644
index da1dc0be..00000000
--- a/ms2pip/_utils/encoder.py
+++ /dev/null
@@ -1,322 +0,0 @@
-"""Peptide and modification handling for MS2PIP."""
-from __future__ import annotations
-
-import logging
-import os
-import tempfile
-from typing import Generator
-
-import numpy as np
-from psm_utils import Peptidoform, PSMList
-from pyteomics import proforma
-
-import ms2pip.exceptions as exceptions
-
-logger = logging.getLogger(__name__)
-
-AMINO_ACIDS = [
-    "A",
-    "C",
-    "D",
-    "E",
-    "F",
-    "G",
-    "H",
-    "I",
-    "K",
-    "M",
-    "N",
-    "P",
-    "Q",
-    "R",
-    "S",
-    "T",
-    "V",
-    "W",
-    "Y",
-]
-
-AMINO_ACID_MASSES = [
-    71.037114,
-    103.00919,
-    115.026943,
-    129.042593,
-    147.068414,
-    57.021464,
-    137.058912,
-    113.084064,
-    128.094963,
-    131.040485,
-    114.042927,
-    97.052764,
-    128.058578,
-    156.101111,
-    87.032028,
-    101.047679,
-    99.068414,
-    186.079313,
-    163.063329,
-]
-
-AMINO_ACID_IDS = {a: i for i, a in enumerate(AMINO_ACIDS)}
-AMINO_ACID_IDS["L"] = AMINO_ACID_IDS["I"]
-
-
-class Encoder:
-    """Modification-aware encoding of peptidoforms."""
-
-    def __init__(self) -> None:
-        """
-        Modification-aware encoding of peptidoforms.
-
-        MS²PIP requires all modification mass shifts to be written to a file for use in C code
-        before running. This class handles the encoding of peptides and peptidoforms for
-        modifications that have been defined.
-
-        Encoder files are to be passed on to the ``ms2pip_pyx.ms2pip_init`` function. E.g.,
-        ``ms2pip_pyx.ms2pip_init(*encoder.encoder_files)``.
-
-        Notes
-        -----
-        - Either used as a context manager or manually call :py:meth:`write_encoder_files` before
-          use and :py:meth:`remove_encoder_files` after use.
-        - Fixed, labile, and unlocalized modifications are ignored. Fixed modifications
-          should therefore already have been applied (see
-          :py:meth:`psm_utils.psm_list.PSMList.apply_fixed_modifications`).
-
-        """
-        self.modifications = {}
-        self.encoder_files = None
-
-        self._next_mod_id = 38  # Avoid clash with amino acids and mutations (ionbot compatibility)
-
-    def __enter__(self):
-        if not self.encoder_files:
-            self.write_encoder_files()
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.remove_encoder_files()
-
-    def __repr__(self) -> str:
-        return "{}.{}(modifications={})".format(
-            self.__class__.__module__,
-            self.__class__.__qualname__,
-            self.modifications,
-        )
-
-    @classmethod
-    def from_peptidoform(cls, peptidoform: Peptidoform) -> Encoder:
-        """
-        Create Encoder instance from peptidoform.
-
-        Parameters
-        ----------
-        peptidoform : Peptidoform
-            Peptidoform to use for modification configuration.
-
-        Returns
-        -------
-        Encoder
-            Encoder instance with modifications configured.
-
-        """
-        encoder = cls()
-        encoder._configure_from_peptidoform(peptidoform)
-        encoder.write_encoder_files()
-        return encoder
-
-    @classmethod
-    def from_psm_list(cls, psm_list: PSMList) -> Encoder:
-        """
-        Create Encoder instance from PSMList.
-
-        Parameters
-        ----------
-        psm_list : PSMList
-            PSMList to use for modification configuration.
-
-        Returns
-        -------
-        Encoder
-            Encoder instance with modifications configured.
-
-        """
-        encoder = cls()
-        encoder._configure_from_psm_list(psm_list)
-        encoder.write_encoder_files()
-        return encoder
-
-    def _configure_modification(self, target: str, modification: proforma.TagBase):
-        """
-        Add single pyteomics.proforma modification to configuration.
-
-        Parameters
-        ----------
-        target : str
-            Target amino acid one-letter code or terminus (``n_term`` or ``c_term``).
-        modification : pyteomics.proforma.TagBase
-            Modification to add.
-
-        """
-        if target == "n_term":
-            amino_acid_id = -1
-        elif target == "c_term":
-            amino_acid_id = -2
-        elif target in AMINO_ACID_IDS:
-            amino_acid_id = AMINO_ACID_IDS[target]
-        else:
-            logger.warning(f"Skipping modification for invalid amino acid: {target}")
-            return None
-
-        self.modifications[(target, str(modification))] = {
-            "mod_id": self._next_mod_id,
-            "mass_shift": modification.mass,
-            "amino_acid": target,
-            "amino_acid_id": amino_acid_id,
-            "modification": modification,
-        }
-        self._next_mod_id += 1
-
-    def _configure_from_peptidoform(self, peptidoform: Peptidoform):
-        """Configure encoder with modifications from single Peptidoform."""
-        # Get unique modifications from psm
-        unique_modifications = dict()
-        try:
-            for aa, mods in peptidoform.parsed_sequence:
-                if mods:
-                    unique_modifications.update({(aa, str(mod)): mod for mod in mods})
-            for term in ["n_term", "c_term"]:
-                if peptidoform.properties[term]:
-                    unique_modifications.update(
-                        {(term, str(mod)): mod for mod in peptidoform.properties[term]}
-                    )
-        except KeyError as e:
-            raise exceptions.UnresolvableModificationError(e.args[0]) from e
-
-        # Add modification entries
-        for (target, _), mod in unique_modifications.items():
-            self._configure_modification(target, mod)
-
-    def _configure_from_psm_list(self, psm_list: PSMList):
-        """Configure encoder with modifications from PSMList."""
-        # Get unique modifications from psm_list
-        unique_modifications = dict()
-        try:
-            for psm in psm_list:
-                for aa, mods in psm.peptidoform.parsed_sequence:
-                    if mods:
-                        unique_modifications.update({(aa, str(mod)): mod for mod in mods})
-                for term in ["n_term", "c_term"]:
-                    if psm.peptidoform.properties[term]:
-                        unique_modifications.update(
-                            {(term, str(mod)): mod for mod in psm.peptidoform.properties[term]}
-                        )
-        except KeyError as e:
-            raise exceptions.UnresolvableModificationError(e.args[0]) from e
-
-        # Add modification entries
-        for (target, _), mod in unique_modifications.items():
-            self._configure_modification(target, mod)
-
-    def write_encoder_files(self) -> str:
-        """Write configured masses to temporary files for use in C code."""
-        # AA file
-        amino_file = tempfile.NamedTemporaryFile(delete=False, mode="w", newline="\n")
-        for m in AMINO_ACID_MASSES:
-            amino_file.write("{}\n".format(m))
-        amino_file.write("0\n")
-        amino_file.close()
-
-        # PTM file
-        mod_file = tempfile.NamedTemporaryFile(delete=False, mode="w", newline="\n")
-        mod_file.write("{}\n".format(len(self.modifications)))
-        for mod in self.modifications.values():
-            mod_file.write(
-                "{},1,{},{}\n".format(mod["mass_shift"], mod["amino_acid_id"], mod["mod_id"])
-            )
-        mod_file.close()
-
-        # SPTM file (ionbot compatibility)
-        mod_file2 = tempfile.NamedTemporaryFile(delete=False, mode="w", newline="\n")
-        mod_file2.write("0\n")
-        mod_file2.close()
-
-        # Store temporary file names
-        self.encoder_files = (amino_file.name, mod_file.name, mod_file2.name)
-
-    def remove_encoder_files(self):
-        """Remove temporary encoder files."""
-        if self.encoder_files:
-            for f in self.encoder_files:
-                os.remove(f)
-            self.encoder_files = None
-
-    @staticmethod
-    def validate_peptidoform(peptidoform: Peptidoform):
-        """Validate whether a peptidoform can be encoded for MS²PIP."""
-        # Charge required
-        if peptidoform.precursor_charge is None:
-            raise exceptions.InvalidPeptidoformError("Peptidoform charge is required.")
-
-        # Peptides longer then 101 lead to "Segmentation fault (core dumped)"
-        if len(peptidoform.parsed_sequence) > 100:
-            raise exceptions.InvalidPeptidoformError(
-                "Peptidoform sequence cannot be longer than 100 amino acids."
-            )
-        elif len(peptidoform.parsed_sequence) < 4:
-            raise exceptions.InvalidPeptidoformError(
-                "Peptidoform sequence cannot be shorter than 4 amino acids."
-            )
-
-    def encode_peptide(self, peptidoform: Peptidoform) -> np.ndarray:
-        """Encode a peptide (without modifications) for MS²PIP."""
-        self.validate_peptidoform(peptidoform)
-
-        try:
-            encoded = [0] + [AMINO_ACID_IDS[aa] for aa, _ in peptidoform.parsed_sequence] + [0]
-        except KeyError as e:
-            raise exceptions.InvalidAminoAcidError(
-                f"Unsupported amino acid found in peptide `{peptidoform.proforma}`"
-            ) from e
-        return np.array(encoded, dtype=np.uint16)
-
-    def encode_peptidoform(self, peptidoform: Peptidoform) -> np.ndarray:
-        """
-        Encode a peptidoform for MS²PIP.
-
-        Notes
-        -----
-        - Multiple modifications per site is not supported. The first modification is used.
-        - Fixed, labile, and unlocalized modifications are ignored. Fixed modifications
-        should therefore already have been applied (see
-        :py:meth:`psm_utils.PSMList.apply_fixed_modifications`).
-
-        """
-
-        def _generate_encoding(peptidoform) -> Generator[int, None, None]:
-            if peptidoform.properties["n_term"]:
-                mod_str = str(peptidoform.properties["n_term"][0])
-                yield self.modifications["n_term", mod_str]["mod_id"]
-            else:
-                yield 0
-
-            for aa, mods in peptidoform.parsed_sequence:
-                try:
-                    if not mods:
-                        yield AMINO_ACID_IDS[aa]
-                    else:
-                        yield self.modifications[aa, str(mods[0])]["mod_id"]
-                except KeyError as e:
-                    raise exceptions.InvalidAminoAcidError(
-                        f"Unsupported amino acid found in peptide `{peptidoform.proforma}`"
-                    ) from e
-
-            if peptidoform.properties["c_term"]:
-                mod_str = str(peptidoform.properties["c_term"][0])
-                yield self.modifications["c_term", mod_str]["mod_id"]
-            else:
-                yield 0
-
-        self.validate_peptidoform(peptidoform)
-        return np.array(list(_generate_encoding(peptidoform)), dtype=np.uint16)
diff --git a/ms2pip/_utils/feature_names.py b/ms2pip/_utils/feature_names.py
index 35632ab8..ea0c8fa7 100644
--- a/ms2pip/_utils/feature_names.py
+++ b/ms2pip/_utils/feature_names.py
@@ -1,4 +1,7 @@
-from ms2pip._utils.encoder import AMINO_ACIDS
+AMINO_ACIDS = [
+    "A", "C", "D", "E", "F", "G", "H", "I", "K", "M",
+    "N", "P", "Q", "R", "S", "T", "V", "W", "Y",
+]
 
 
 def get_feature_names():
diff --git a/ms2pip/_utils/ion_mobility.py b/ms2pip/_utils/ion_mobility.py
index 7dceb9f6..530703cd 100644
--- a/ms2pip/_utils/ion_mobility.py
+++ b/ms2pip/_utils/ion_mobility.py
@@ -14,7 +14,7 @@ class IonMobility:
     def __init__(self, processes=1) -> None:
         # Lazy import to avoid loading loading heavy dependencies when not needed
         try:
-            from im2deep.im2deep import predict_ccs  # noqa: F401
+            from im2deep.im2deep import predict_ccs  # noqa: F401  # type: ignore[ty:unresolved-import]
 
             self.predict_fn = predict_ccs
             self.processes = processes
diff --git a/ms2pip/_utils/psm_input.py b/ms2pip/_utils/psm_input.py
index 7a0cacc6..ee57e461 100644
--- a/ms2pip/_utils/psm_input.py
+++ b/ms2pip/_utils/psm_input.py
@@ -1,8 +1,5 @@
-from __future__ import annotations
-
 import logging
 from pathlib import Path
-from typing import Union
 
 import psm_utils.io.peptide_record
 from psm_utils import PSMList
@@ -10,7 +7,7 @@
 logger = logging.getLogger(__name__)
 
 
-def read_psms(psms: Union[str, Path, PSMList], filetype: Union[str, None]) -> PSMList:
+def read_psms(psms: str | Path | PSMList, filetype: str | None) -> PSMList:
     """Read PSMList or PSM file."""
     # Read PSMs
     if isinstance(psms, (str, Path)):
diff --git a/ms2pip/_utils/retention_time.py b/ms2pip/_utils/retention_time.py
index 043f24a2..675688cb 100644
--- a/ms2pip/_utils/retention_time.py
+++ b/ms2pip/_utils/retention_time.py
@@ -71,7 +71,7 @@ def _init_deeplc(self):
         """
         # Only import if DeepLC will be used, otherwise lots of extra heavy
         # dependencies (e.g. Tensorflow) are imported as well
-        import deeplc
+        import deeplc  # type: ignore[ty:unresolved-import]
 
         deeplc_params = self.config["deeplc"]
         if "calibration_file" in deeplc_params and deeplc_params["calibration_file"]:
@@ -109,9 +109,7 @@ def _prepare_deeplc_peptide_df(self):
     def _run_deeplc(self):
         """Run DeepLC."""
         logger.info("Predicting retention times with DeepLC...")
-        self.deeplc_preds = self.deeplc_predictor.make_preds(
-            seq_df=self.deeplc_pep_df.fillna("")
-        )
+        self.deeplc_preds = self.deeplc_predictor.make_preds(seq_df=self.deeplc_pep_df.fillna(""))  # type: ignore[ty:unresolved-attribute]
 
     def _parse_deeplc_preds(self):
         """Add DeepLC predictions to peprec DataFrame."""
diff --git a/ms2pip/_utils/xgb_models.py b/ms2pip/_utils/xgb_models.py
index 6e933a91..f20a5733 100644
--- a/ms2pip/_utils/xgb_models.py
+++ b/ms2pip/_utils/xgb_models.py
@@ -5,58 +5,130 @@
 import os
 import urllib.request
 from itertools import islice
+from pathlib import Path
 
 import numpy as np
 import xgboost as xgb
 
-from ms2pip.exceptions import InvalidXGBoostModelError
+import ms2pip.exceptions as exceptions
+from ms2pip.constants import MODELS
 
 logger = logging.getLogger(__name__)
 
+_MAX_PREDICTION_THREADS = 16
 
-def validate_requested_xgb_model(xgboost_model_files, xgboost_model_hashes, model_dir):
-    """Validate requested XGBoost models, and download if necessary"""
-    for _, model_file in xgboost_model_files.items():
+
+def validate_model(model: str, model_dir: str | Path | None = None) -> Path:
+    """
+    Validate model name and ensure XGBoost model files are available.
+
+    Downloads missing model files if necessary.
+
+    Parameters
+    ----------
+    model
+        Model name (must be a key in :data:`ms2pip.constants.MODELS`).
+    model_dir
+        Directory for XGBoost model files. Default: ``~/.ms2pip``.
+
+    Returns
+    -------
+    model_dir
+        Resolved model directory as Path.
+
+    """
+    model_dir = Path(model_dir) if model_dir else Path.home() / ".ms2pip"
+    if model not in MODELS:
+        raise exceptions.UnknownModelError(model)
+    logger.debug("Using %s model", model)
+    _validate_requested_xgb_model(
+        MODELS[model]["xgboost_model_files"],
+        MODELS[model]["model_hash"],
+        model_dir,
+    )
+    return model_dir
+
+
+def _validate_requested_xgb_model(xgboost_model_files, xgboost_model_hashes, model_dir):
+    """Validate requested XGBoost models, and download if necessary."""
+    for model_file in xgboost_model_files.values():
         if not _check_model_presence(model_file, xgboost_model_hashes[model_file], model_dir):
             _download_model(model_file, xgboost_model_hashes[model_file], model_dir)
 
 
-def get_predictions_xgb(features, num_ions, model_params, model_dir, processes=1):
+def load_xgb_models(
+    model_params: dict,
+    model_dir,
+    processes: int | None = None,
+) -> dict:
     """
-    Get predictions starting from feature vectors in DMatrix object.
+    Load XGBoost models from disk.
+
+    Returns a dict of ion_type -> xgb.Booster that can be passed to
+    :func:`predict_intensities` to avoid re-loading on every call.
 
     Parameters
     ----------
-    features: xgb.DMatrix, np.ndarray
-        Feature vectors in DMatrix object or as Numpy array.
-    num_ions: list[int]
-        List with number of ions (per series) for each peptide, i.e. peptide length - 1
-    model_params: dict
-        Model configuration as defined in ms2pip.ms2pipC.MODELS.
-    model_dir: str
+    model_params
+        Model configuration dict (must contain ``xgboost_model_files``).
+    model_dir
         Directory where model files are stored.
-    processes: int
-        Number of CPUs to use in multiprocessing
+    processes
+        Number of threads for XGBoost prediction. Capped internally.
 
     """
-    # Init models
-    xgboost_models = _initialize_xgb_models(
-        model_params["xgboost_model_files"],
-        model_dir,
-        processes,
+    nthread = min(
+        processes if processes is not None else (os.cpu_count() or 1),
+        _MAX_PREDICTION_THREADS,
     )
-    if isinstance(features, np.ndarray):
-        features = xgb.DMatrix(features)
+    return _initialize_xgb_models(model_params["xgboost_model_files"], model_dir, nthread)
+
+
+def predict_intensities(
+    features: np.ndarray,
+    num_ions: list[int],
+    model_params: dict,
+    model_dir,
+    processes: int | None = None,
+    xgb_models: dict | None = None,
+) -> list[dict[str, np.ndarray]]:
+    """
+    Predict intensities from feature vectors using XGBoost models.
+
+    Parameters
+    ----------
+    features
+        Feature vectors as numpy array. Will be converted to DMatrix internally.
+    num_ions
+        Number of ions (per series) for each peptide, i.e. peptide length - 1.
+    model_params
+        Model configuration dict (must contain ``xgboost_model_files``).
+    model_dir
+        Directory where model files are stored.
+    processes
+        Number of threads for XGBoost prediction. Capped internally to avoid
+        diminishing returns. By default, uses all available cores (up to cap).
+    xgb_models
+        Pre-loaded XGBoost models (from :func:`load_xgb_models`). When provided,
+        ``model_params``, ``model_dir``, and ``processes`` are ignored for model
+        loading.
+
+    Returns
+    -------
+    predictions
+        List of dicts mapping ion type to predicted intensity array, one per peptide.
+
+    """
+    if xgb_models is None:
+        xgb_models = load_xgb_models(model_params, model_dir, processes)
+    dmatrix = xgb.DMatrix(features)
 
     logger.debug("Predicting intensities from XGBoost model files...")
     prediction_dict = {}
-    for ion_type, xgb_model in xgboost_models.items():
-        # Get predictions from XGBoost model
-        preds = xgb_model.predict(features)
-        preds = preds.clip(min=np.log2(0.001))  # Clip negative intensities
-        xgb_model.__del__()
+    for ion_type, xgb_model in xgb_models.items():
+        preds = xgb_model.predict(dmatrix)
+        preds = preds.clip(min=np.log2(0.001))
 
-        # Reshape into arrays for each peptide
         if ion_type.lower() in ["x", "y", "y2", "z"]:
             preds = _split_list_by_lengths(preds, num_ions, reverse=True)
         elif ion_type.lower() in ["a", "b", "b2", "c"]:
@@ -65,10 +137,8 @@ def get_predictions_xgb(features, num_ions, model_params, model_dir, processes=1
             raise ValueError(f"Unsupported ion_type: {ion_type}")
         prediction_dict[ion_type] = preds
 
-    # Convert to list per peptide with dicts per ion type
     num_peptides = len(list(prediction_dict.values())[0])
-    predictions = [{k: v[i] for k, v in prediction_dict.items()} for i in range(num_peptides)]
-    return predictions
+    return [{k: v[i] for k, v in prediction_dict.items()} for i in range(num_peptides)]
 
 
 def _split_list_by_lengths(list_in, lengths, reverse=False):
@@ -91,18 +161,19 @@ def _check_model_presence(model, model_hash, model_dir):
 
 def _download_model(model, model_hash, model_dir):
     """Download the xgboost model from the Genesis server."""
-    if not os.path.isdir(model_dir):
-        os.mkdir(model_dir)
+    os.makedirs(model_dir, exist_ok=True)
     filename = os.path.join(model_dir, model)
 
     logger.info(f"Downloading {model} to {filename}...")
     try:
-        urllib.request.urlretrieve(f"https://genesis.ugent.be/uvpublicdata/ms2pip/{model}", filename)
+        urllib.request.urlretrieve(
+            f"https://genesis.ugent.be/uvpublicdata/ms2pip/{model}", filename
+        )
     except Exception:
         logger.warning("Falling back to Zenodo for model downloads.")
         urllib.request.urlretrieve(f"https://zenodo.org/records/13270668/files/{model}", filename)
     if not _check_model_integrity(filename, model_hash):
-        raise InvalidXGBoostModelError()
+        raise exceptions.InvalidXGBoostModelError()
 
 
 def _check_model_integrity(filename, model_hash):
@@ -117,7 +188,7 @@ def _check_model_integrity(filename, model_hash):
     if sha1_hash.hexdigest() == model_hash:
         return True
     else:
-        logger.warn("Model hash not recognized.")
+        logger.warning("Model hash not recognized.")
         return False
 
 
diff --git a/ms2pip/constants.py b/ms2pip/constants.py
index c72affe2..b55a2c31 100644
--- a/ms2pip/constants.py
+++ b/ms2pip/constants.py
@@ -1,15 +1,9 @@
 """Constants and fixed configurations for MS²PIP."""
 
-# Models and their properties
-# id is passed to get_predictions to select model
-# ion_types is required to write the ion types in the headers of the result files
-# features_version is required to select the features version
 MODELS = {
     "CID": {
-        "id": 0,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20190107_CID_train_B.xgboost",
             "y": "model_20190107_CID_train_Y.xgboost",
@@ -20,16 +14,20 @@
         },
     },
     "HCD2019": {
-        "id": 1,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
+        "xgboost_model_files": {
+            "b": "model_20190107_HCD_train_B.xgboost",
+            "y": "model_20190107_HCD_train_Y.xgboost",
+        },
+        "model_hash": {
+            "model_20190107_HCD_train_B.xgboost": "2503856c382806672e4b85f6b0ccc1f3093acc1b",
+            "model_20190107_HCD_train_Y.xgboost": "867bbc9940f75845b3f4f845d429b3780c997a02",
+        },
     },
     "TTOF5600": {
-        "id": 2,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20190107_TTOF5600_train_B.xgboost",
             "y": "model_20190107_TTOF5600_train_Y.xgboost",
@@ -40,16 +38,20 @@
         },
     },
     "TMT": {
-        "id": 3,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
+        "xgboost_model_files": {
+            "b": "model_20190107_TMT_train_B.xgboost",
+            "y": "model_20190107_TMT_train_Y.xgboost",
+        },
+        "model_hash": {
+            "model_20190107_TMT_train_B.xgboost": "352073a591d45a2e3181818f5feef99c22755af7",
+            "model_20190107_TMT_train_Y.xgboost": "d9a73bff21ab504bb91eb386f20cd8a86d60c95d",
+        },
     },
     "iTRAQ": {
-        "id": 4,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20190107_iTRAQ_train_B.xgboost",
             "y": "model_20190107_iTRAQ_train_Y.xgboost",
@@ -60,10 +62,8 @@
         },
     },
     "iTRAQphospho": {
-        "id": 5,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20190107_iTRAQphospho_train_B.xgboost",
             "y": "model_20190107_iTRAQphospho_train_Y.xgboost",
@@ -73,18 +73,25 @@
             "model_20190107_iTRAQphospho_train_Y.xgboost": "261b2e1810a299ed7ebf193ce1fb81a608c07d3b",
         },
     },
-    # ETD': {'id': 6, 'ion_types': ['B', 'Y', 'C', 'Z'], 'peaks_version': 'etd', 'features_version': 'normal'},
     "HCDch2": {
-        "id": 7,
         "ion_types": ["B", "Y", "B2", "Y2"],
-        "peaks_version": "ch2",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
+        "xgboost_model_files": {
+            "b": "model_20190107_HCD_train_B.xgboost",
+            "y": "model_20190107_HCD_train_Y.xgboost",
+            "b2": "model_20190107_HCD_train_B2.xgboost",
+            "y2": "model_20190107_HCD_train_Y2.xgboost",
+        },
+        "model_hash": {
+            "model_20190107_HCD_train_B.xgboost": "2503856c382806672e4b85f6b0ccc1f3093acc1b",
+            "model_20190107_HCD_train_Y.xgboost": "867bbc9940f75845b3f4f845d429b3780c997a02",
+            "model_20190107_HCD_train_B2.xgboost": "2df86d3576e85bfd25cc149d723f1613baf854d0",
+            "model_20190107_HCD_train_Y2.xgboost": "0a116ad9f14925fc70e3eceed9484b16ca8edddb",
+        },
     },
     "CIDch2": {
-        "id": 8,
         "ion_types": ["B", "Y", "B2", "Y2"],
-        "peaks_version": "ch2",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20190107_CID_train_B.xgboost",
             "y": "model_20190107_CID_train_Y.xgboost",
@@ -99,10 +106,8 @@
         },
     },
     "HCD2021": {
-        "id": 9,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20210416_HCD2021_B.xgboost",
             "y": "model_20210416_HCD2021_Y.xgboost",
@@ -113,10 +118,8 @@
         },
     },
     "Immuno-HCD": {
-        "id": 10,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20210316_Immuno_HCD_B.xgboost",
             "y": "model_20210316_Immuno_HCD_Y.xgboost",
@@ -127,10 +130,8 @@
         },
     },
     "CID-TMT": {
-        "id": 11,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20220104_CID_TMT_B.xgboost",
             "y": "model_20220104_CID_TMT_Y.xgboost",
@@ -141,10 +142,8 @@
         },
     },
     "timsTOF2023": {
-        "id": 12,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20230912_timsTOF_B.xgboost",
             "y": "model_20230912_timsTOF_Y.xgboost",
@@ -155,10 +154,8 @@
         },
     },
     "timsTOF2024": {
-        "id": 13,
         "ion_types": ["B", "Y"],
-        "peaks_version": "general",
-        "features_version": "normal",
+        "fragmentation": "cidhcd",
         "xgboost_model_files": {
             "b": "model_20240105_timsTOF_B.xgboost",
             "y": "model_20240105_timsTOF_Y.xgboost",
diff --git a/ms2pip/core.py b/ms2pip/core.py
index 5bf93c6b..70445f0e 100644
--- a/ms2pip/core.py
+++ b/ms2pip/core.py
@@ -1,87 +1,325 @@
 #!/usr/bin/env python
 from __future__ import annotations
 
-import itertools
 import logging
-import multiprocessing
-import multiprocessing.dummy
-import re
-from collections import defaultdict
+import os
+from collections.abc import Generator
 from math import ceil
 from pathlib import Path
-from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
+from ms2rescore_rs import (
+    AnnotatedMS2Spectrum,  # type: ignore[ty:unresolved-import]
+    MS2Spectrum,  # type: ignore[ty:unresolved-import]
+    Precursor,  # type: ignore[ty:unresolved-import]
+    annotate_ms2_spectra,  # type: ignore[ty:unresolved-import]
+    ms2pip_compute_features,  # type: ignore[ty:unresolved-import]
+    ms2pip_compute_theoretical_mz,  # type: ignore[ty:unresolved-import]
+)
 from psm_utils import PSM, Peptidoform, PSMList
 from rich.progress import track
-from ms2rescore_rs import MS2Spectrum
 
 import ms2pip.exceptions as exceptions
-from ms2pip._cython_modules import ms2pip_pyx
-from ms2pip._utils.encoder import Encoder
-from ms2pip._utils.feature_names import get_feature_names
 from ms2pip._utils.ion_mobility import IonMobility
 from ms2pip._utils.psm_input import read_psms
 from ms2pip._utils.retention_time import RetentionTime
-from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
+from ms2pip._utils.xgb_models import load_xgb_models, predict_intensities, validate_model
 from ms2pip.constants import MODELS
 from ms2pip.result import ProcessingResult, calculate_correlations
 from ms2pip.search_space import ProteomeSearchSpace
 from ms2pip.spectrum import ObservedSpectrum
-from ms2pip.spectrum_input import read_spectrum_file
-from ms2pip.spectrum_output import SUPPORTED_FORMATS
+from ms2pip._spectrum_processing import (
+    annotate_spectrum,
+    load_and_match_spectra,
+    proforma_to_mass_shift,
+    targets_from_annotations,
+)
 
 logger = logging.getLogger(__name__)
 
+NUM_FEATURES = 139
 
-def predict_single(
-    peptidoform: Union[Peptidoform, str],
-    model: Optional[str] = "HCD",
-    model_dir: Optional[Union[str, Path]] = None,
-) -> ProcessingResult:
+
+def _set_rayon_threads(processes: int | None) -> None:
+    """Set RAYON_NUM_THREADS if processes is specified and not already set."""
+    if processes is None:
+        return
+    if "RAYON_NUM_THREADS" in os.environ:
+        logger.debug(
+            "RAYON_NUM_THREADS already set to %s; not overriding with processes=%d",
+            os.environ["RAYON_NUM_THREADS"],
+            processes,
+        )
+        return
+    os.environ["RAYON_NUM_THREADS"] = str(processes)
+
+
+def _predict_batch_internal(
+    psm_list: PSMList,
+    model: str,
+    model_dir: str | Path | None = None,
+    processes: int | None = None,
+    xgb_models: dict | None = None,
+) -> list[ProcessingResult]:
     """
-    Predict fragmentation spectrum for a single peptide.\f
+    Batch predict features, m/z, and intensities for all PSMs.
+
+    Uses ms2rescore-rs for feature computation and theoretical m/z calculation
+    (both internally parallelized via Rayon), then XGBoost for intensity prediction.
     """
-    if isinstance(peptidoform, str):
-        peptidoform = Peptidoform(peptidoform)
-    psm = PSM(peptidoform=peptidoform, spectrum_id=0)
-    model_dir = model_dir if model_dir else Path.home() / ".ms2pip"
+    model_dir = validate_model(model, model_dir)
+    if not psm_list:
+        return []
+
+    ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
+    frag_model = MODELS[model]["fragmentation"]
+
+    proformas = [proforma_to_mass_shift(psm.peptidoform) for psm in psm_list]
+    num_ions = [len(psm.peptidoform.parsed_sequence) - 1 for psm in psm_list]
+
+    _set_rayon_threads(processes)
+
+    # Batch compute theoretical m/z (single Rust call, Rayon-parallelized)
+    logger.debug("Computing theoretical m/z for %d peptides...", len(proformas))
+    all_mz = ms2pip_compute_theoretical_mz(proformas, ion_types, frag_model, "monoisotopic")
+
+    # Batch compute features (single Rust call, Rayon-parallelized)
+    logger.debug("Computing features for %d peptides...", len(proformas))
+    all_features = ms2pip_compute_features(proformas)
+
+    # Predict intensities with XGBoost
+    logger.debug("Predicting intensities with XGBoost...")
+    predictions = predict_intensities(
+        np.concatenate([f.reshape(-1, NUM_FEATURES) for f in all_features]),
+        num_ions,
+        MODELS[model],
+        model_dir,
+        processes=processes,
+        xgb_models=xgb_models,
+    )
+
+    # Assemble results
+    results = []
+    for i, psm in enumerate(psm_list):
+        results.append(
+            ProcessingResult(
+                psm_index=i,
+                psm=psm,
+                theoretical_mz={k: np.array(v, dtype=np.float32) for k, v in all_mz[i].items()},
+                predicted_intensity=predictions[i],
+            )
+        )
+    return results
+
+
+def _correlate_internal(
+    psm_spectrum_annotations: list[tuple[int, PSM, ObservedSpectrum, list]],
+    model: str,
+    model_dir: str | Path | None = None,
+    vector_file: bool = False,
+    annotations_only: bool = False,
+    processes: int | None = None,
+) -> list[ProcessingResult]:
+    """
+    Core correlation logic: extract targets, compute features/predictions, assemble results.
+
+    Parameters
+    ----------
+    psm_spectrum_annotations
+        List of (psm_index, psm, preprocessed_spectrum, peak_annotations) tuples.
+        Annotations are per-peak lists of ``(series, position, charge)`` tuples.
+    model
+        Name of prediction model.
+    model_dir
+        Directory for XGBoost model files.
+    vector_file
+        If True, return feature vectors instead of predictions (for training).
+    annotations_only
+        If True, return only m/z and observed intensities (no predictions).
+
+    """
+    if not annotations_only and not vector_file:
+        model_dir = validate_model(model, model_dir)
     ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
+    frag_model = MODELS[model]["fragmentation"]
+
+    _set_rayon_threads(processes)
+
+    if not psm_spectrum_annotations:
+        return []
+
+    # Step 1: Extract targets from pre-computed annotations
+    all_targets = []
+    for psm_index, psm, spectrum, peak_annotations in psm_spectrum_annotations:
+        if not psm.peptidoform.precursor_charge:
+            psm.peptidoform.precursor_charge = spectrum.precursor_charge  # type: ignore[ty:invalid-assignment]
 
-    with Encoder.from_peptidoform(peptidoform) as encoder:
-        ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
-        result = _process_peptidoform(0, psm, model, encoder, ion_types=ion_types)
+        seq_len = len(psm.peptidoform.parsed_sequence)
+        targets = targets_from_annotations(
+            peak_annotations, spectrum.intensity.astype(np.float32), ion_types, seq_len
+        )
+        all_targets.append(targets)
+
+    proformas = [proforma_to_mass_shift(psm.peptidoform) for _, psm, _, _ in psm_spectrum_annotations]
+    num_ions = [
+        len(psm.peptidoform.parsed_sequence) - 1 for _, psm, _, _ in psm_spectrum_annotations
+    ]
+
+    # Step 2: Compute features (needed for training and prediction, not annotation-only)
+    all_features = None
+    if not annotations_only:
+        logger.debug("Computing features for %d peptides...", len(proformas))
+        all_features = ms2pip_compute_features(proformas)
+
+    # Step 3: Compute theoretical m/z (needed for annotation and prediction, not training)
+    all_mz = None
+    if not vector_file:
+        logger.debug("Computing theoretical m/z for %d peptides...", len(proformas))
+        all_mz = ms2pip_compute_theoretical_mz(proformas, ion_types, frag_model, "monoisotopic")
 
-        if "xgboost_model_files" in MODELS[model].keys():
-            validate_requested_xgb_model(
-                MODELS[model]["xgboost_model_files"],
-                MODELS[model]["model_hash"],
-                model_dir,
+    # Step 4: Assemble results based on mode
+    results = []
+
+    if vector_file:
+        # Training mode: return feature vectors + observed targets
+        assert all_features is not None
+        for i, (psm_index, psm, _spectrum, _ann) in enumerate(psm_spectrum_annotations):
+            results.append(
+                ProcessingResult(
+                    psm_index=psm_index,
+                    psm=psm,
+                    theoretical_mz=None,
+                    predicted_intensity=None,
+                    observed_intensity=all_targets[i],
+                    correlation=None,
+                    feature_vectors=all_features[i],
+                )
             )
-            predictions = np.array(
-                get_predictions_xgb(
-                    result.feature_vectors,
-                    [len(peptidoform.parsed_sequence) - 1],
-                    MODELS[model],
-                    model_dir,
+
+    elif annotations_only:
+        # Annotation mode: return m/z + observed targets
+        assert all_mz is not None
+        for i, (psm_index, psm, _spectrum, _ann) in enumerate(psm_spectrum_annotations):
+            mz = {k: np.array(v, dtype=np.float32) for k, v in all_mz[i].items()}
+            results.append(
+                ProcessingResult(
+                    psm_index=psm_index,
+                    psm=psm,
+                    theoretical_mz=mz,
+                    predicted_intensity=None,
+                    observed_intensity=all_targets[i],
+                    correlation=None,
+                    feature_vectors=None,
                 )
             )
-            result.predicted_intensity = predictions[0]  # Only one spectrum in predictions
-            result.feature_vectors = None
 
-    return result
+    else:
+        # Prediction mode: compute XGBoost predictions
+        assert all_features is not None and all_mz is not None
+        logger.debug("Predicting intensities with XGBoost...")
+        predictions = predict_intensities(
+            np.concatenate([f.reshape(-1, NUM_FEATURES) for f in all_features]),
+            num_ions,
+            MODELS[model],
+            model_dir,
+            processes=processes,
+        )
+
+        for i, (psm_index, psm, _spectrum, _ann) in enumerate(psm_spectrum_annotations):
+            mz = {k: np.array(v, dtype=np.float32) for k, v in all_mz[i].items()}
+            results.append(
+                ProcessingResult(
+                    psm_index=psm_index,
+                    psm=psm,
+                    theoretical_mz=mz,
+                    predicted_intensity=predictions[i],
+                    observed_intensity=all_targets[i],
+                )
+            )
+
+    return results
+
+
+def _into_batches(iterable, batch_size: int) -> Generator[list, None, None]:
+    """Accumulate iterator elements into batches of a given size."""
+    batch = []
+    for item in iterable:
+        batch.append(item)
+        if len(batch) == batch_size:
+            yield batch
+            batch = []
+    if batch:
+        yield batch
+
+
+def _assemble_training_data(results: list[ProcessingResult], model: str) -> pd.DataFrame:
+    """Assemble training data from results list to single pandas DataFrame."""
+    from ms2pip._utils.feature_names import get_feature_names
+
+    ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
+
+    training_data = pd.DataFrame(
+        np.vstack([r.feature_vectors for r in results if r.feature_vectors is not None]),
+        columns=get_feature_names(),
+    )
+    training_data["psm_index"] = np.concatenate(
+        [
+            np.repeat(r.psm_index, r.feature_vectors.shape[0])
+            for r in results
+            if r.feature_vectors is not None
+        ]
+    )
+    for ion_type in ion_types:
+        if ion_type in ["a", "b", "b2", "c"]:
+            training_data[f"target_{ion_type}"] = np.concatenate(
+                [
+                    r.observed_intensity[ion_type]
+                    for r in results
+                    if r.feature_vectors is not None and r.observed_intensity is not None
+                ]
+            )
+        elif ion_type in ["x", "y", "y2", "z"]:
+            training_data[f"target_{ion_type}"] = np.concatenate(
+                [
+                    r.observed_intensity[ion_type][::-1]
+                    for r in results
+                    if r.feature_vectors is not None and r.observed_intensity is not None
+                ]
+            )
+
+    training_data = training_data[
+        ["psm_index"] + get_feature_names() + [f"target_{it}" for it in ion_types]
+    ]
+
+    return training_data
+
+
+def predict_single(
+    peptidoform: Peptidoform | str,
+    model: str = "HCD",
+    model_dir: str | Path | None = None,
+) -> ProcessingResult:
+    """
+    Predict fragmentation spectrum for a single peptide.\f
+    """
+    if isinstance(peptidoform, str):
+        peptidoform = Peptidoform(peptidoform)
+    psm = PSM(peptidoform=peptidoform, spectrum_id=0)
+    psm_list = PSMList(psm_list=[psm])
+    results = _predict_batch_internal(psm_list, model, model_dir)
+    return results[0]
 
 
 def predict_batch(
-    psms: Union[PSMList, str, Path],
+    psms: PSMList | str | Path,
     add_retention_time: bool = False,
     add_ion_mobility: bool = False,
-    psm_filetype: Optional[str] = None,
-    model: Optional[str] = "HCD",
-    model_dir: Optional[Union[str, Path]] = None,
-    processes: Optional[int] = None,
-) -> List[ProcessingResult]:
+    psm_filetype: str | None = None,
+    model: str = "HCD",
+    model_dir: str | Path | None = None,
+    processes: int | None = None,
+) -> list[ProcessingResult]:
     """
     Predict fragmentation spectra for a batch of peptides.\f
 
@@ -101,11 +339,12 @@ def predict_batch(
     model_dir
         Directory where XGBoost model files are stored. Default: `~/.ms2pip`.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Returns
     -------
-    predictions: List[ProcessingResult]
+    predictions: list[ProcessingResult]
         Predicted spectra with theoretical m/z and predicted intensity values.
 
     """
@@ -123,29 +362,20 @@ def predict_batch(
         im_predictor = IonMobility(processes=processes)
         im_predictor.add_im_predictions(psm_list)
 
-    with Encoder.from_psm_list(psm_list) as encoder:
-        ms2pip_parallelized = _Parallelized(
-            encoder=encoder,
-            model=model,
-            model_dir=model_dir,
-            processes=processes,
-        )
-        logger.info("Processing peptides...")
-        results = ms2pip_parallelized.process_peptides(psm_list)
-
-    return results
+    logger.info("Processing peptides...")
+    return _predict_batch_internal(psm_list, model, model_dir, processes=processes)
 
 
 def predict_library(
-    fasta_file: Optional[Union[str, Path]] = None,
-    config: Optional[Union[ProteomeSearchSpace, dict, str, Path]] = None,
+    fasta_file: str | Path | None = None,
+    config: ProteomeSearchSpace | dict | str | Path | None = None,
     add_retention_time: bool = False,
     add_ion_mobility: bool = False,
-    model: Optional[str] = "HCD",
-    model_dir: Optional[Union[str, Path]] = None,
+    model: str = "HCD",
+    model_dir: str | Path | None = None,
     batch_size: int = 100000,
-    processes: Optional[int] = None,
-) -> Generator[ProcessingResult, None, None]:
+    processes: int | None = None,
+) -> Generator[list[ProcessingResult], None, None]:
     """
     Predict spectral library from protein FASTA file.\f
 
@@ -168,58 +398,66 @@ def predict_library(
     batch_size
         Number of peptides to process in each batch.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Yields
     ------
-    predictions: List[ProcessingResult]
+    predictions: list[ProcessingResult]
         Predicted spectra with theoretical m/z and predicted intensity values.
 
     """
     if fasta_file and config:
-        # Use provided proteome, but overwrite fasta_file
-        config = ProteomeSearchSpace.from_any(config)
-        config.fasta_file = fasta_file
+        search_space = ProteomeSearchSpace.from_any(config)
+        search_space.fasta_file = Path(fasta_file)
     elif fasta_file and not config:
-        # Default proteome search space with provided fasta_file
-        config = ProteomeSearchSpace(fasta_file=fasta_file)
+        search_space = ProteomeSearchSpace(fasta_file=fasta_file)
     elif not fasta_file and config:
-        # Use provided proteome
-        config = ProteomeSearchSpace.from_any(config)
+        search_space = ProteomeSearchSpace.from_any(config)
     else:
         raise ValueError("Either `fasta_file` or `config` must be provided.")
 
-    search_space = ProteomeSearchSpace.from_any(config)
     search_space.build()
 
+    # Pre-load XGBoost models once for all batches
+    model_dir = validate_model(model, model_dir)
+    xgb_models = load_xgb_models(MODELS[model], model_dir, processes)
+
     for batch in track(
         _into_batches(search_space, batch_size=batch_size),
         description="Predicting spectra...",
         total=ceil(len(search_space) / batch_size),
     ):
-        yield predict_batch(
-            search_space.filter_psms_by_mz(PSMList(psm_list=list(batch))),
-            add_retention_time=add_retention_time,
-            add_ion_mobility=add_ion_mobility,
-            model=model,
-            model_dir=model_dir,
+        psm_list = search_space.filter_psms_by_mz(PSMList(psm_list=list(batch)))
+
+        if add_retention_time:
+            RetentionTime(processes=processes).add_rt_predictions(psm_list)
+        if add_ion_mobility:
+            IonMobility(processes=processes).add_im_predictions(psm_list)
+
+        yield _predict_batch_internal(
+            psm_list,
+            model,
+            model_dir,
             processes=processes,
+            xgb_models=xgb_models,
         )
 
 
 def correlate(
-    psms: Union[PSMList, str, Path],
-    spectrum_file: Union[str, Path],
-    psm_filetype: Optional[str] = None,
-    spectrum_id_pattern: Optional[str] = None,
+    psms: PSMList | str | Path,
+    spectrum_file: str | Path,
+    psm_filetype: str | None = None,
+    spectrum_id_pattern: str | None = None,
     compute_correlations: bool = False,
     add_retention_time: bool = False,
     add_ion_mobility: bool = False,
-    model: Optional[str] = "HCD",
-    model_dir: Optional[Union[str, Path]] = None,
+    model: str = "HCD",
+    model_dir: str | Path | None = None,
     ms2_tolerance: float = 0.02,
-    processes: Optional[int] = None,
-) -> List[ProcessingResult]:
+    ms2_tolerance_mode: str = "Da",
+    processes: int | None = None,
+) -> list[ProcessingResult]:
     """
     Compare predicted and observed intensities and optionally compute correlations.\f
 
@@ -246,13 +484,16 @@ def correlate(
     model_dir
         Directory where XGBoost model files are stored. Default: `~/.ms2pip`.
     ms2_tolerance
-        MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
+        MS2 tolerance for observed spectrum peak annotation. By default, 0.02.
+    ms2_tolerance_mode
+        Unit of the MS2 tolerance: ``"Da"`` or ``"ppm"``. By default, ``"Da"``.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Returns
     -------
-    results: List[ProcessingResult]
+    results: list[ProcessingResult]
         Predicted spectra with theoretical m/z and predicted intensity values, and optionally,
         correlations.
 
@@ -270,18 +511,22 @@ def correlate(
         im_predictor = IonMobility(processes=processes)
         im_predictor.add_im_predictions(psm_list)
 
-    with Encoder.from_psm_list(psm_list) as encoder:
-        ms2pip_parallelized = _Parallelized(
-            encoder=encoder,
-            model=model,
-            model_dir=model_dir,
-            ms2_tolerance=ms2_tolerance,
-            processes=processes,
+    # Validate runs and collections
+    if len(psm_list.collections) != 1 or len(psm_list.runs) != 1:
+        raise exceptions.InvalidInputError("PSMs should be for a single run and collection.")
+
+    logger.info("Processing spectra and peptides...")
+    matched = load_and_match_spectra(
+        psm_list, spectrum_file, spectrum_id_pattern, model, ms2_tolerance, ms2_tolerance_mode
+    )
+
+    if not matched:
+        raise exceptions.NoMatchingSpectraFound(
+            "No spectra matching spectrum IDs from PSM list could be found in provided file."
         )
-        logger.info("Processing spectra and peptides...")
-        results = ms2pip_parallelized.process_spectra(psm_list, spectrum_file, spectrum_id_pattern)
 
-    # Correlations also requested
+    results = _correlate_internal(matched, model, model_dir, processes=processes)
+
     if compute_correlations:
         logger.info("Computing correlations")
         calculate_correlations(results)
@@ -291,25 +536,28 @@ def correlate(
 
 
 def correlate_preloaded(
-    psms: Union[PSMList, List[PSM]],
+    psms: PSMList | list[PSM],
     compute_correlations: bool = False,
-    model: Optional[str] = "HCD",
-    model_dir: Optional[Union[str, Path]] = None,
+    model: str = "HCD",
+    model_dir: str | Path | None = None,
     ms2_tolerance: float = 0.02,
-    processes: Optional[int] = None,
-) -> List[ProcessingResult]:
+    ms2_tolerance_mode: str = "Da",
+    processes: int | None = None,
+) -> list[ProcessingResult]:
     """
     Compare predicted and observed intensities for PSMs with preloaded spectra.\f
 
-    Processes PSMs that already have :py:class:`ms2rescore_rs.MS2Spectrum`
-    objects in their ``spectrum`` attribute. It extracts the spectra, performs predictions,
-    and optionally computes correlations.
+    Processes PSMs that already have :py:class:`ms2rescore_rs.MS2Spectrum` or
+    :py:class:`ms2rescore_rs.AnnotatedMS2Spectrum` objects in their ``spectrum``
+    attribute.
 
     Parameters
     ----------
     psms
         PSMList or list of PSM objects. Each PSM must have an
-        :py:class:`ms2rescore_rs.MS2Spectrum` object in its ``spectrum`` attribute.
+        :py:class:`ms2rescore_rs.MS2Spectrum` or
+        :py:class:`ms2rescore_rs.AnnotatedMS2Spectrum` object in its ``spectrum``
+        attribute.
     compute_correlations
         Compute correlations between predictions and targets. Default: False.
     model
@@ -317,21 +565,25 @@ def correlate_preloaded(
     model_dir
         Directory where XGBoost model files are stored. Default: `~/.ms2pip`.
     ms2_tolerance
-        MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
+        MS2 tolerance for observed spectrum peak annotation. By default, 0.02.
+        Only used when spectra are not already annotated.
+    ms2_tolerance_mode
+        Unit of the MS2 tolerance: ``"Da"`` or ``"ppm"``. By default, ``"Da"``.
+        Only used when spectra are not already annotated.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Returns
     -------
-    results: List[ProcessingResult]
+    results: list[ProcessingResult]
         ProcessingResult objects with theoretical m/z, predicted intensity, and observed
         intensity values, and optionally, correlations.
 
     Raises
     ------
     ValueError
-        If PSMs do not contain :py:class:`ms2rescore_rs.MS2Spectrum` objects in the
-        ``spectrum`` attribute.
+        If PSMs do not contain spectrum objects in the ``spectrum`` attribute.
 
     """
     if isinstance(psms, list):
@@ -339,16 +591,26 @@ def correlate_preloaded(
     else:
         psm_list = psms
 
-    if not all(psm_list["spectrum"]) or not (isinstance(psm_list["spectrum"][0], MS2Spectrum)):
-        raise ValueError("PSMs must contain MS2Spectrum objects in the 'spectrum' attribute.")
+    first_spectrum = psm_list["spectrum"][0]
+    if not all(psm_list["spectrum"]) or not isinstance(
+        first_spectrum, (MS2Spectrum, AnnotatedMS2Spectrum)
+    ):
+        raise ValueError(
+            "PSMs must contain MS2Spectrum or AnnotatedMS2Spectrum objects "
+            "in the 'spectrum' attribute."
+        )
+
+    spectra_are_annotated = isinstance(first_spectrum, AnnotatedMS2Spectrum)
 
-    # Convert MS2Spectrum -> ObservedSpectrum and preprocess
-    preloaded_spectra: Dict[str, ObservedSpectrum] = {}
+    # Convert to ObservedSpectrum and preprocess; store annotations if present
+    preloaded_spectra: dict[str, ObservedSpectrum] = {}
+    preloaded_annotations: dict[str, list] | None = {} if spectra_are_annotated else None
     for psm in psm_list:
         spec_id = str(psm.spectrum_id)
         if spec_id in preloaded_spectra:
             continue
         spectrum = psm.spectrum
+        assert spectrum is not None
         obs = ObservedSpectrum(
             mz=np.array(spectrum.mz, dtype=np.float32),
             intensity=np.array(spectrum.intensity, dtype=np.float32),
@@ -363,23 +625,86 @@ def correlate_preloaded(
         obs.tic_norm()
         obs.log2_transform()
         preloaded_spectra[spec_id] = obs
+        if spectra_are_annotated:
+            assert isinstance(spectrum, AnnotatedMS2Spectrum)
+            assert preloaded_annotations is not None
+            preloaded_annotations[spec_id] = [
+                [(a.series, a.position, a.charge) for a in peak_anns]
+                for peak_anns in spectrum.peak_annotations
+            ]
+
+    # Build PSM-spectrum-annotation tuples
+    # For unannotated spectra, batch annotate using ms2rescore-rs
+    psm_spectrum_annotations = []
+    needs_annotation = []  # indices into psm_spectrum_annotations that need annotation
+
+    for i, psm in enumerate(psm_list):
+        spec_id = str(psm.spectrum_id)
+        spectrum = preloaded_spectra.get(spec_id)
+        if spectrum is None:
+            continue
+        if preloaded_annotations is not None and spec_id in preloaded_annotations:
+            psm_spectrum_annotations.append((i, psm, spectrum, preloaded_annotations[spec_id]))
+        else:
+            psm_spectrum_annotations.append((i, psm, spectrum, None))
+            needs_annotation.append(len(psm_spectrum_annotations) - 1)
 
-    # Delegate to _Parallelized
-    with Encoder.from_psm_list(psm_list) as encoder:
-        ms2pip_parallelized = _Parallelized(
-            encoder=encoder,
-            model=model,
-            model_dir=model_dir,
-            ms2_tolerance=ms2_tolerance,
-            processes=processes,
+    if not psm_spectrum_annotations:
+        raise exceptions.NoMatchingSpectraFound(
+            "No spectra matching spectrum IDs from PSM list could be found."
+        )
+
+    # Batch annotate any unannotated spectra
+    if needs_annotation:
+        frag_model = MODELS[model]["fragmentation"]
+        batch_spectra = []
+        batch_proformas = []
+        batch_seq_lens = []
+        for idx in needs_annotation:
+            _, psm, spectrum, _ = psm_spectrum_annotations[idx]
+            batch_spectra.append(
+                MS2Spectrum(
+                    identifier=spectrum.identifier or "",
+                    mz=list(spectrum.mz),
+                    intensity=list(spectrum.intensity),
+                    precursor=Precursor(
+                        mz=float(spectrum.precursor_mz) if spectrum.precursor_mz else 0.0,
+                        charge=int(spectrum.precursor_charge) if spectrum.precursor_charge else 0,
+                        rt=float(spectrum.retention_time) if spectrum.retention_time else 0.0,
+                    ),
+                )
+            )
+            batch_proformas.append(proforma_to_mass_shift(psm.peptidoform))
+            batch_seq_lens.append(len(psm.peptidoform.parsed_sequence))
+
+        annotated = annotate_ms2_spectra(
+            spectra=batch_spectra,
+            proformas=batch_proformas,
+            seq_lens=batch_seq_lens,
+            fragmentation_model=frag_model,
+            mass_mode="monoisotopic",
+            tolerance_value=float(ms2_tolerance),
+            tolerance_mode=ms2_tolerance_mode.lower(),
         )
-        logger.info("Processing spectra and peptides...")
-        results = ms2pip_parallelized.process_preloaded_spectra(psm_list, preloaded_spectra)
+
+        for j, idx in enumerate(needs_annotation):
+            psm_index, psm, spectrum, _ = psm_spectrum_annotations[idx]
+            peak_annotations = [
+                [(a.series, a.position, a.charge) for a in peak_anns]
+                for peak_anns in annotated[j].peak_annotations
+            ]
+            psm_spectrum_annotations[idx] = (psm_index, psm, spectrum, peak_annotations)
+
+    logger.info("Processing spectra and peptides...")
+    results = _correlate_internal(psm_spectrum_annotations, model, model_dir, processes=processes)
 
     if compute_correlations:
         logger.info("Computing correlations")
         calculate_correlations(results)
-        logger.info(f"Median correlation: {np.median([r.correlation for r in results if r.correlation is not None])}")
+        logger.info(
+            f"Median correlation: "
+            f"{np.median([r.correlation for r in results if r.correlation is not None])}"
+        )
 
     return results
 
@@ -387,6 +712,7 @@ def correlate_preloaded(
 def correlate_single(
     observed_spectrum: ObservedSpectrum,
     ms2_tolerance: float = 0.02,
+    ms2_tolerance_mode: str = "Da",
     model: str = "HCD",
 ) -> ProcessingResult:
     """
@@ -397,7 +723,9 @@ def correlate_single(
     observed_spectrum
         ObservedSpectrum instance with observed m/z and intensity values and peptidoform.
     ms2_tolerance
-        MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
+        MS2 tolerance for observed spectrum peak annotation. By default, 0.02.
+    ms2_tolerance_mode
+        Unit of the MS2 tolerance: ``"Da"`` or ``"ppm"``. By default, ``"Da"``.
     model
         Model to use for prediction. Default: "HCD".
 
@@ -407,47 +735,41 @@ def correlate_single(
         Result with theoretical m/z, predicted intensity, observed intensity, and correlation.
 
     """
-    # Check peptidoform in observed spectrum
     if not isinstance(observed_spectrum.peptidoform, Peptidoform):
         raise ValueError("Peptidoform must be set in observed spectrum to correlate.")
 
-    # Annotate spectrum and get target intensities
-    with Encoder.from_peptidoform(observed_spectrum.peptidoform) as encoder:
-        ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
-        enc_peptidoform = encoder.encode_peptidoform(observed_spectrum.peptidoform)
-        targets = ms2pip_pyx.get_targets(
-            enc_peptidoform,
-            observed_spectrum.mz.astype(np.float32),
-            observed_spectrum.intensity.astype(np.float32),
-            float(ms2_tolerance),
-            MODELS[model]["peaks_version"],
-        )
+    # Preprocess a copy of the spectrum (TIC normalization + log2 transform)
+    preprocessed = observed_spectrum.model_copy(deep=True)
+    for label_type in ["iTRAQ", "TMT"]:
+        if label_type in model:
+            preprocessed.remove_reporter_ions(label_type)
+    preprocessed.tic_norm()
+    preprocessed.log2_transform()
 
-    # Reshape to dict with intensities per ion type
+    psm = PSM(peptidoform=observed_spectrum.peptidoform, spectrum_id=0)
+    annotated = annotate_spectrum(preprocessed, psm, model, ms2_tolerance, ms2_tolerance_mode)
     ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
-    observed_intensity = {
-        i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001))  # Clip negative intensities
-        for i, p in zip(ion_types, targets)
-    }
+    seq_len = len(observed_spectrum.peptidoform.parsed_sequence)
+    observed_intensity = targets_from_annotations(
+        annotated, preprocessed.intensity.astype(np.float32), ion_types, seq_len
+    )
 
-    # Predict spectrum and add target intensities
     result = predict_single(observed_spectrum.peptidoform, model=model)
     result.observed_intensity = observed_intensity
 
-    # Add correlation
     calculate_correlations([result])
-
     return result
 
 
 def get_training_data(
-    psms: Union[PSMList, str, Path],
-    spectrum_file: Union[str, Path],
-    psm_filetype: Optional[str] = None,
-    spectrum_id_pattern: Optional[str] = None,
-    model: Optional[str] = "HCD",
+    psms: PSMList | str | Path,
+    spectrum_file: str | Path,
+    psm_filetype: str | None = None,
+    spectrum_id_pattern: str | None = None,
+    model: str = "HCD",
     ms2_tolerance: float = 0.02,
-    processes: Optional[int] = None,
+    ms2_tolerance_mode: str = "Da",
+    processes: int | None = None,
 ):
     """
     Extract feature vectors and target intensities from observed spectra for training.\f
@@ -468,9 +790,12 @@ def get_training_data(
         Model to use as reference for the ion types that are extracted from the observed spectra.
         Default: "HCD", which results in the extraction of singly charged b- and y-ions.
     ms2_tolerance
-        MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
+        MS2 tolerance for observed spectrum peak annotation. By default, 0.02.
+    ms2_tolerance_mode
+        Unit of the MS2 tolerance: ``"Da"`` or ``"ppm"``. By default, ``"Da"``.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Returns
     -------
@@ -481,32 +806,40 @@ def get_training_data(
     psm_list = read_psms(psms, filetype=psm_filetype)
     spectrum_id_pattern = spectrum_id_pattern if spectrum_id_pattern else "(.*)"
 
-    with Encoder.from_psm_list(psm_list) as encoder:
-        ms2pip_parallelized = _Parallelized(
-            encoder=encoder,
-            model=model,
-            ms2_tolerance=ms2_tolerance,
-            processes=processes,
-        )
-        logger.info("Processing spectra and peptides...")
-        results = ms2pip_parallelized.process_spectra(
-            psm_list, spectrum_file, spectrum_id_pattern, vector_file=True
+    if len(psm_list.collections) != 1 or len(psm_list.runs) != 1:
+        raise exceptions.InvalidInputError("PSMs should be for a single run and collection.")
+
+    logger.info("Processing spectra and peptides...")
+    matched = load_and_match_spectra(
+        psm_list, spectrum_file, spectrum_id_pattern, model, ms2_tolerance, ms2_tolerance_mode
+    )
+
+    if not matched:
+        raise exceptions.NoMatchingSpectraFound(
+            "No spectra matching spectrum IDs from PSM list could be found in provided file."
         )
 
-        logger.info("Assembling training data in DataFrame...")
-        training_data = _assemble_training_data(results, model)
+    results = _correlate_internal(
+        matched,
+        model,
+        model_dir=None,
+        vector_file=True,
+        processes=processes,
+    )
 
-    return training_data
+    logger.info("Assembling training data in DataFrame...")
+    return _assemble_training_data(results, model)
 
 
 def annotate_spectra(
-    psms: Union[PSMList, str, Path],
-    spectrum_file: Union[str, Path],
-    psm_filetype: Optional[str] = None,
-    spectrum_id_pattern: Optional[str] = None,
-    model: Optional[str] = "HCD",
+    psms: PSMList | str | Path,
+    spectrum_file: str | Path,
+    psm_filetype: str | None = None,
+    spectrum_id_pattern: str | None = None,
+    model: str = "HCD",
     ms2_tolerance: float = 0.02,
-    processes: Optional[int] = None,
+    ms2_tolerance_mode: str = "Da",
+    processes: int | None = None,
 ):
     """
     Annotate observed spectra.\f
@@ -527,37 +860,45 @@ def annotate_spectra(
         Model to use as reference for the ion types that are extracted from the observed spectra.
         Default: "HCD", which results in the extraction of singly charged b- and y-ions.
     ms2_tolerance
-        MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
+        MS2 tolerance for observed spectrum peak annotation. By default, 0.02.
+    ms2_tolerance_mode
+        Unit of the MS2 tolerance: ``"Da"`` or ``"ppm"``. By default, ``"Da"``.
     processes
-        Number of parallel processes for multiprocessing steps. By default, all available.
+        Number of threads for Rayon (Rust) and XGBoost parallelism. By default,
+        all available.
 
     Returns
     -------
-    results: List[ProcessingResult]
+    results: list[ProcessingResult]
         List of ProcessingResult objects with theoretical m/z and observed intensity values.
 
     """
     psm_list = read_psms(psms, filetype=psm_filetype)
     spectrum_id_pattern = spectrum_id_pattern if spectrum_id_pattern else "(.*)"
 
-    with Encoder.from_psm_list(psm_list) as encoder:
-        ms2pip_parallelized = _Parallelized(
-            encoder=encoder,
-            model=model,
-            ms2_tolerance=ms2_tolerance,
-            processes=processes,
-        )
-        logger.info("Processing spectra and peptides...")
-        results = ms2pip_parallelized.process_spectra(
-            psm_list, spectrum_file, spectrum_id_pattern, vector_file=False, annotations_only=True
+    if len(psm_list.collections) != 1 or len(psm_list.runs) != 1:
+        raise exceptions.InvalidInputError("PSMs should be for a single run and collection.")
+
+    logger.info("Processing spectra and peptides...")
+    matched = load_and_match_spectra(
+        psm_list, spectrum_file, spectrum_id_pattern, model, ms2_tolerance, ms2_tolerance_mode
+    )
+
+    if not matched:
+        raise exceptions.NoMatchingSpectraFound(
+            "No spectra matching spectrum IDs from PSM list could be found in provided file."
         )
 
-    return results
+    return _correlate_internal(
+        matched,
+        model,
+        model_dir=None,
+        annotations_only=True,
+        processes=processes,
+    )
 
 
-def download_models(
-    models: Optional[List[str]] = None, model_dir: Optional[Union[str, Path]] = None
-):
+def download_models(models: list[str] | None = None, model_dir: str | Path | None = None):
     """
     Download all specified models to the specified directory.
 
@@ -577,619 +918,9 @@ def download_models(
         models = list(MODELS.keys())
 
     for model in models:
-        try:
-            if "xgb_model_files" in MODELS[model].keys():
-                continue
-        except KeyError:
+        if model not in MODELS:
             raise exceptions.UnknownModelError(model)
+        if "xgboost_model_files" not in MODELS[model]:
+            continue
         logger.debug("Downloading %s model files", model)
-        validate_requested_xgb_model(
-            MODELS[model]["xgboost_model_files"],
-            MODELS[model]["model_hash"],
-            model_dir,
-        )
-
-
-class _Parallelized:
-    """Implementations of common multiprocessing functionality across MS²PIP usage modes."""
-
-    def __init__(
-        self,
-        encoder: Encoder = None,
-        model: Optional[str] = None,
-        model_dir: Optional[Union[str, Path]] = None,
-        ms2_tolerance: float = 0.02,
-        processes: Optional[int] = None,
-    ):
-        """
-        Implementations of common multiprocessing functionality across MS²PIP usage modes.
-
-        Parameters
-        ----------
-        encoding
-            Configured encoding class instance. Required if input peptides contain modifications.
-        model
-            Name of the model to use for predictions. Overrides configuration file.
-        model_dir
-            Custom directory for downloaded XGBoost model files. By default, `~/.ms2pip` is used.
-        ms2_tolerance
-            MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
-        processes
-            Number of parallel processes for multiprocessing steps. By default, all available.
-
-        """
-        # Input parameters
-        self.encoder = encoder
-        self.model = model
-        self.model_dir = model_dir if model_dir else Path.home() / ".ms2pip"
-        self.ms2_tolerance = ms2_tolerance
-        self.processes = processes if processes else multiprocessing.cpu_count()
-
-        # Setup encoder if not configured
-        if not self.encoder:
-            self.encoder = Encoder()
-            self.encoder.write_encoder_files()
-
-        # Validate requested model
-        if self.model in MODELS.keys():
-            logger.debug("Using %s model", self.model)
-            if "xgboost_model_files" in MODELS[self.model].keys():
-                validate_requested_xgb_model(
-                    MODELS[self.model]["xgboost_model_files"],
-                    MODELS[self.model]["model_hash"],
-                    self.model_dir,
-                )
-        else:
-            raise exceptions.UnknownModelError(self.model)
-
-    def _get_pool(self):
-        """Get multiprocessing pool."""
-        logger.debug(f"Starting workers (processes={self.processes})...")
-        if multiprocessing.current_process().daemon:
-            logger.warning(
-                "MS²PIP is running in a daemon process. Disabling multiprocessing as daemonic "
-                "processes cannot have children."
-            )
-            return multiprocessing.dummy.Pool(1)
-        elif self.processes == 1:
-            logger.debug("Using dummy multiprocessing pool.")
-            return multiprocessing.dummy.Pool(1)
-        else:
-            return multiprocessing.get_context("spawn").Pool(self.processes)
-
-    def _validate_output_formats(self, output_formats: List[str]) -> List[str]:
-        """Validate requested output formats."""
-        if not output_formats:
-            self.output_formats = ["csv"]
-        else:
-            for output_format in output_formats:
-                if output_format not in SUPPORTED_FORMATS:
-                    raise exceptions.UnknownOutputFormatError(output_format)
-            self.output_formats = output_formats
-
-    def _execute_in_pool(self, psm_list: PSMList, func: Callable, args: tuple):
-        """Execute function in multiprocessing pool."""
-
-        def get_chunk_size(n_items, n_processes):
-            """Get optimal chunk size for multiprocessing."""
-            if n_items < 5000:
-                return n_items
-            else:
-                max_chunk_size = 50000
-                n_chunks = ceil(ceil(n_items / n_processes) / max_chunk_size) * n_processes
-                return ceil(n_items / n_chunks)
-
-        def to_chunks(_list, chunk_size):
-            """Split _list into chunks of size chunk_size."""
-
-            def _generate_chunks():
-                for i in range(0, len(_list), chunk_size):
-                    yield _list[i : i + chunk_size]
-
-            _list = list(_list)
-            return list(_generate_chunks())
-
-        def _enumerated_psm_list_by_spectrum_id(psm_list, spectrum_ids_chunk):
-            selected_indices = np.flatnonzero(np.isin(psm_list["spectrum_id"], spectrum_ids_chunk))
-            return [(i, psm_list.psm_list[i]) for i in selected_indices]
-
-        with self._get_pool() as pool:
-            if not psm_list:
-                logger.warning("No PSMs to process.")
-                return []
-
-            # Split PSMList into chunks
-            if func == _process_spectra:
-                # Split by spectrum_id to keep PSMs for same spectrum together
-                spectrum_ids = set(psm_list["spectrum_id"])
-                chunk_size = get_chunk_size(len(spectrum_ids), pool._processes)
-                chunks = [
-                    _enumerated_psm_list_by_spectrum_id(psm_list, spectrum_ids_chunk)
-                    for spectrum_ids_chunk in to_chunks(spectrum_ids, chunk_size)
-                ]
-            else:
-                # Simple split by PSM
-                chunk_size = get_chunk_size(len(psm_list), pool._processes)
-                chunks = to_chunks(list(enumerate(psm_list)), chunk_size)
-
-            logger.debug(f"Processing {len(chunks)} chunk(s) of ~{chunk_size} entries each.")
-
-            # Add jobs to pool
-            mp_results = []
-            for psm_list_chunk in chunks:
-                mp_results.append(pool.apply_async(func, args=(psm_list_chunk, *args)))
-
-            # Gather results
-            # results = [
-            #     r.get()
-            #     for r in track(
-            #         mp_results,
-            #         disable=len(chunks) == 1,
-            #         description="Processing chunks...",
-            #         transient=True,
-            #         show_speed=False,
-            #     )
-            # ]
-            results = [r.get() for r in mp_results]
-
-        # Sort results by input order
-        results = list(
-            sorted(
-                itertools.chain.from_iterable(results),
-                key=lambda result: result.psm_index,
-            )
-        )
-
-        return results
-
-    def process_peptides(self, psm_list: PSMList) -> List[ProcessingResult]:
-        """Process peptides in parallel."""
-        results = self._execute_in_pool(
-            psm_list,
-            _process_peptides,
-            (self.encoder, self.model),
-        )
-        logger.debug(f"Gathered data for {len(results)} peptides.")
-
-        # Add XGBoost predictions if required
-        if "xgboost_model_files" in MODELS[self.model].keys():
-            results = self._add_xgboost_predictions(results)
-
-        return results
-
-    def process_spectra(
-        self,
-        psm_list: PSMList,
-        spectrum_file: Union[str, Path],
-        spectrum_id_pattern: str,
-        vector_file: bool = False,
-        annotations_only: bool = False,
-    ) -> List[ProcessingResult]:
-        """
-        Process PSMs and observed spectra in parallel
-
-        Parameters
-        ----------
-        psm_list
-            psm_utils.PSMList instance with PSMs to process
-        spectrum_file
-            Filename of spectrum file
-        spectrum_id_pattern
-            Regular expression pattern to apply to spectrum titles before matching to
-            peptide file entries
-        vector_file
-            If feature vectors should be extracted instead of predictions
-        annotations_only
-            If only peak annotations should be extracted from the spectrum file
-
-        """
-        # Validate runs and collections
-        if not len(psm_list.collections) == 1 or not len(psm_list.runs) == 1:
-            raise exceptions.InvalidInputError("PSMs should be for a single run and collection.")
-
-        args = (
-            spectrum_file,
-            vector_file,
-            self.encoder,
-            self.model,
-            self.ms2_tolerance,
-            spectrum_id_pattern,
-            annotations_only,
-            None,  # preloaded_spectra
-        )
-        results = self._execute_in_pool(psm_list, _process_spectra, args)
-
-        # Validate number of results
-        if not results:
-            raise exceptions.NoMatchingSpectraFound(
-                "No spectra matching spectrum IDs from PSM list could be found in provided file."
-            )
-        logger.debug(f"Gathered data for {len(results)} PSMs.")
-
-        # Add XGBoost predictions if required
-        if (
-            not (vector_file or annotations_only)
-            and "xgboost_model_files" in MODELS[self.model].keys()
-        ):
-            results = self._add_xgboost_predictions(results)
-
-        return results
-
-    def process_preloaded_spectra(
-        self,
-        psm_list: PSMList,
-        preloaded_spectra: Dict[str, ObservedSpectrum],
-    ) -> List[ProcessingResult]:
-        """
-        Process PSMs with pre-loaded and preprocessed ObservedSpectrum objects.
-
-        Parameters
-        ----------
-        psm_list
-            psm_utils.PSMList instance with PSMs to process.
-        preloaded_spectra
-            Dictionary mapping spectrum IDs to preprocessed ObservedSpectrum objects.
-
-        """
-        args = (
-            None,  # spec_file
-            False,  # vector_file
-            self.encoder,
-            self.model,
-            self.ms2_tolerance,
-            None,  # spectrum_id_pattern
-            False,  # annotations_only
-            preloaded_spectra,
-        )
-        results = self._execute_in_pool(psm_list, _process_spectra, args)
-
-        if not results:
-            raise exceptions.NoMatchingSpectraFound(
-                "No spectra matching spectrum IDs from PSM list could be found."
-            )
-        logger.debug(f"Gathered data for {len(results)} PSMs.")
-
-        if "xgboost_model_files" in MODELS[self.model].keys():
-            results = self._add_xgboost_predictions(results)
-
-        return results
-
-    def _add_xgboost_predictions(self, results: List[ProcessingResult]) -> List[ProcessingResult]:
-        """
-        Add XGBoost predictions to results.
-
-        Notes
-        -----
-        This functions is applied after the parallel processing, as XGBoost implements its own
-        multiprocessing.
-        """
-
-        if "xgboost_model_files" not in MODELS[self.model].keys():
-            raise ValueError("XGBoost model files not found in MODELS dictionary.")
-
-        logger.debug("Converting feature vectors to XGBoost DMatrix...")
-        import xgboost as xgb
-
-        results_to_predict = [r for r in results if r.feature_vectors is not None]
-
-        if not results_to_predict:
-            return results
-
-        num_ions = [len(r.psm.peptidoform.parsed_sequence) - 1 for r in results_to_predict]
-        xgb_vector = xgb.DMatrix(np.vstack(list(r.feature_vectors for r in results_to_predict)))
-
-        predictions = get_predictions_xgb(
-            xgb_vector,
-            num_ions,
-            MODELS[self.model],
-            self.model_dir,
-            processes=self.processes,
-        )
-
-        logger.debug("Adding XGBoost predictions to results...")
-        for result, preds in zip(results_to_predict, predictions):
-            result.predicted_intensity = preds
-            result.feature_vectors = None
-
-        return results
-
-
-def _process_peptidoform(
-    psm_index: int,
-    psm: PSM,
-    model: str,
-    encoder: Encoder,
-    ion_types: Optional[List[str]] = None,
-) -> ProcessingResult:
-    """
-    Process a single peptidoform from a PSM.
-
-    Get theoretical m/z and predicted intensities (from C model) or feature vectors (for XGBoost
-    model) for a single peptidoform from a PSM.
-
-    Notes
-    -----
-    - ``ms2pip_pyx.init()`` must be called before this function is called.
-    - Optionally, lowercase version of ``ion_types`` from the model configuration can be provided
-    to save computational time.
-
-    """
-    peptidoform = psm.peptidoform
-    if not ion_types:
-        ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
-
-    enc_peptide = encoder.encode_peptide(peptidoform)
-    enc_peptidoform = encoder.encode_peptidoform(peptidoform)
-
-    # Get ion mzs and map to ion types
-    mz = ms2pip_pyx.get_mzs(enc_peptidoform, MODELS[model]["peaks_version"])
-    mz = {i: np.array(mz, dtype=np.float32) for i, mz in zip(ion_types, mz)}
-
-    # Get predictions from XGBoost models.
-    if "xgboost_model_files" in MODELS[model].keys():
-        predictions = None
-        feature_vectors = np.array(
-            ms2pip_pyx.get_vector(enc_peptide, enc_peptidoform, peptidoform.precursor_charge),
-            dtype=np.uint16,
-        )
-    # Or get predictions from C models.
-    else:
-        predictions = ms2pip_pyx.get_predictions(
-            enc_peptide,
-            enc_peptidoform,
-            peptidoform.precursor_charge,
-            MODELS[model]["id"],
-            MODELS[model]["peaks_version"],
-            30.0,  # TODO: Remove CE feature
-        )
-        predictions = {
-            i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001))  # Clip negative intensities
-            for i, p in zip(ion_types, predictions)
-        }
-        feature_vectors = None
-
-    return ProcessingResult(
-        psm_index=psm_index,
-        psm=psm,
-        theoretical_mz=mz,
-        predicted_intensity=predictions,
-        observed_intensity=None,
-        feature_vectors=feature_vectors,
-    )
-
-
-def _process_peptides(
-    enumerated_psm_list: List[Tuple[int, PSM]],
-    encoder: Encoder,
-    model: str,
-) -> List[ProcessingResult]:
-    """
-    Predict spectrum for each entry in PeptideRecord DataFrame.
-
-    Parameters
-    ----------
-    enumerated_psm_list
-        List of tuples of (index, PSM) for each PSM in the input file.
-    encoder
-        Configured encoder to use for peptide and peptidoform encoding
-    model
-        Name of prediction model to be used
-
-    """
-    ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
-    results = []
-    ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
-
-    for psm_index, psm in enumerated_psm_list:
-        try:
-            result = _process_peptidoform(psm_index, psm, model, encoder, ion_types)
-        except (
-            exceptions.InvalidPeptidoformError,
-            exceptions.InvalidAminoAcidError,
-        ):
-            result = ProcessingResult(psm_index=psm_index, psm=psm)
-        results.append(result)
-
-    return results
-
-
-def _process_spectra(
-    enumerated_psm_list: List[Tuple[int, PSM]],
-    spec_file: Optional[str],
-    vector_file: bool,
-    encoder: Encoder,
-    model: str,
-    ms2_tolerance: float,
-    spectrum_id_pattern: Optional[str],
-    annotations_only: bool = False,
-    preloaded_spectra: Optional[Dict[str, ObservedSpectrum]] = None,
-) -> List[ProcessingResult]:
-    """
-    Perform requested tasks for each spectrum in spectrum file or from preloaded spectra.
-
-    Parameters
-    ----------
-    enumerated_psm_list
-        List of tuples of (index, PSM) for each PSM in the input file.
-    spec_file
-        Filename of spectrum file. Not used when ``preloaded_spectra`` is provided.
-    vector_file
-        If feature vectors should be extracted instead of predictions.
-    encoder: Encoder
-        Configured encoder to use for peptide and peptidoform encoding.
-    model
-        Name of prediction model to be used.
-    ms2_tolerance
-        Fragmentation spectrum m/z error tolerance in Dalton.
-    spectrum_id_pattern
-        Regular expression pattern to apply to spectrum titles before matching to
-        peptide file entries. Not used when ``preloaded_spectra`` is provided.
-    annotations_only
-        If only peak annotations should be extracted from the spectrum file.
-    preloaded_spectra
-        Optional dictionary mapping spectrum IDs to preprocessed ObservedSpectrum objects.
-        When provided, spectra are looked up directly instead of reading from ``spec_file``.
-
-    """
-    ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
-    results = []
-    ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
-
-    def _process_psm_with_spectrum(psm_index, psm, spectrum):
-        """Process a single PSM against an observed spectrum."""
-        try:
-            enc_peptidoform = encoder.encode_peptidoform(psm.peptidoform)
-        except exceptions.InvalidAminoAcidError:
-            return ProcessingResult(psm_index=psm_index, psm=psm)
-
-        targets = ms2pip_pyx.get_targets(
-            enc_peptidoform,
-            spectrum.mz.astype(np.float32),
-            spectrum.intensity.astype(np.float32),
-            float(ms2_tolerance),
-            MODELS[model]["peaks_version"],
-        )
-        targets = {i: np.array(t, dtype=np.float32) for i, t in zip(ion_types, targets)}
-
-        if not psm.peptidoform.precursor_charge:
-            psm.peptidoform.precursor_charge = spectrum.precursor_charge
-
-        if vector_file:
-            enc_peptide = encoder.encode_peptide(psm.peptidoform)
-            feature_vectors = np.array(
-                ms2pip_pyx.get_vector(
-                    enc_peptide, enc_peptidoform, psm.peptidoform.precursor_charge
-                ),
-                dtype=np.uint16,
-            )
-            return ProcessingResult(
-                psm_index=psm_index,
-                psm=psm,
-                theoretical_mz=None,
-                predicted_intensity=None,
-                observed_intensity=targets,
-                correlation=None,
-                feature_vectors=feature_vectors,
-            )
-
-        elif annotations_only:
-            mz = ms2pip_pyx.get_mzs(enc_peptidoform, MODELS[model]["peaks_version"])
-            mz = {i: np.array(mz, dtype=np.float32) for i, mz in zip(ion_types, mz)}
-            return ProcessingResult(
-                psm_index=psm_index,
-                psm=psm,
-                theoretical_mz=mz,
-                predicted_intensity=None,
-                observed_intensity=targets,
-                correlation=None,
-                feature_vectors=None,
-            )
-
-        else:
-            try:
-                result = _process_peptidoform(psm_index, psm, model, encoder, ion_types)
-            except (
-                exceptions.InvalidPeptidoformError,
-                exceptions.InvalidAminoAcidError,
-            ):
-                return ProcessingResult(psm_index=psm_index, psm=psm)
-            else:
-                result.observed_intensity = targets
-                return result
-
-    if preloaded_spectra is not None:
-        # Spectra are already preprocessed; look up by spectrum ID
-        for psm_index, psm in enumerated_psm_list:
-            spectrum = preloaded_spectra.get(str(psm.spectrum_id))
-            if spectrum is None:
-                continue
-            results.append(_process_psm_with_spectrum(psm_index, psm, spectrum))
-    else:
-        # Read spectra from file and preprocess
-        assert spec_file is not None, "spec_file is required when preloaded_spectra is not provided"
-        try:
-            spectrum_id_regex = re.compile(spectrum_id_pattern)
-        except TypeError:
-            spectrum_id_regex = re.compile(r"(.*)")
-
-        psms_by_specid = defaultdict(list)
-        for psm_index, psm in enumerated_psm_list:
-            psms_by_specid[str(psm.spectrum_id)].append((psm_index, psm))
-
-        for spectrum in read_spectrum_file(spec_file):
-            match = spectrum_id_regex.search(spectrum.identifier)
-            try:
-                spectrum_id = match[1]
-            except (TypeError, IndexError):
-                raise exceptions.TitlePatternError(
-                    f"Spectrum title pattern `{spectrum_id_pattern}` could not be matched to "
-                    f"spectrum ID `{spectrum.identifier}`. "
-                    " Are you sure that the regex contains a capturing group?"
-                )
-
-            if spectrum_id not in psms_by_specid:
-                continue
-
-            # Spectrum preprocessing
-            for label_type in ["iTRAQ", "TMT"]:
-                if label_type in model:
-                    spectrum.remove_reporter_ions(label_type)
-            # spectrum.remove_precursor()  # TODO: Decide to implement this or not
-            spectrum.tic_norm()
-            spectrum.log2_transform()
-
-            for psm_index, psm in psms_by_specid[spectrum_id]:
-                results.append(_process_psm_with_spectrum(psm_index, psm, spectrum))
-
-    return results
-
-
-def _assemble_training_data(results: List[ProcessingResult], model: str) -> pd.DataFrame:
-    """Assemble training data from results list to single pandas DataFrame."""
-    # Get ion types
-    ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
-
-    # Assemble feature vectors, PSM indices, and targets
-    training_data = pd.DataFrame(
-        np.vstack([r.feature_vectors for r in results if r.feature_vectors is not None]),
-        columns=get_feature_names(),
-    )
-    training_data["psm_index"] = np.concatenate(
-        [
-            np.repeat(r.psm_index, r.feature_vectors.shape[0])
-            for r in results
-            if r.feature_vectors is not None
-        ]
-    )
-    for ion_type in ion_types:
-        if ion_type in ["a", "b", "b2", "c"]:
-            training_data[f"target_{ion_type}"] = np.concatenate(
-                [r.observed_intensity[ion_type] for r in results if r.feature_vectors is not None]
-            )
-        elif ion_type in ["x", "y", "y2", "z"]:
-            training_data[f"target_{ion_type}"] = np.concatenate(
-                [
-                    r.observed_intensity[ion_type][::-1]
-                    for r in results
-                    if r.feature_vectors is not None
-                ]
-            )
-
-    # Reorder columns
-    training_data = training_data[
-        ["psm_index"] + get_feature_names() + [f"target_{it}" for it in ion_types]
-    ]
-
-    return training_data
-
-
-def _into_batches(iterable: Iterable[Any], batch_size: int) -> Generator[List[Any], None, None]:
-    """Accumulate iterator elements into batches of a given size."""
-    batch = []
-    for item in iterable:
-        batch.append(item)
-        if len(batch) == batch_size:
-            yield batch
-            batch = []
-    if batch:
-        yield batch
+        validate_model(model, model_dir)
diff --git a/ms2pip/plot.py b/ms2pip/plot.py
index 56f7bc63..5a9c2b97 100644
--- a/ms2pip/plot.py
+++ b/ms2pip/plot.py
@@ -1,5 +1,4 @@
 from pathlib import Path
-from typing import Union
 
 from ms2pip.spectrum import Spectrum
 
@@ -12,7 +11,7 @@
     _can_plot = False
 
 
-def spectrum_to_png(spectrum: Spectrum, filepath: Union[str, Path]):
+def spectrum_to_png(spectrum: Spectrum, filepath: str | Path):
     """Plot a single spectrum and write to a PNG file."""
     if not _can_plot:
         raise ImportError("Matplotlib and spectrum_utils are required to plot spectra.")
diff --git a/ms2pip/result.py b/ms2pip/result.py
index 3cf7eb73..37ef6beb 100644
--- a/ms2pip/result.py
+++ b/ms2pip/result.py
@@ -3,8 +3,9 @@
 from __future__ import annotations
 
 import csv
-from typing import Any, Dict, List, Optional, Tuple
 from logging import getLogger
+from pathlib import Path
+from typing import Any
 
 import numpy as np
 from psm_utils import PSM
@@ -12,32 +13,31 @@
 
 try:
     import spectrum_utils.plot as sup
-    import spectrum_utils.spectrum as sus
 except ImportError:
-    sus = None
-    sup = None
+    sup = None  # type: ignore[ty:invalid-assignment]
 
 from ms2pip.spectrum import ObservedSpectrum, PredictedSpectrum
 
 logger = getLogger(__name__)
 
+
 class ProcessingResult(BaseModel):
     """Result of processing a single PSM."""
 
     psm_index: int
-    psm: Optional[PSM] = None
-    theoretical_mz: Optional[Dict[str, np.ndarray]] = None
-    predicted_intensity: Optional[Dict[str, np.ndarray]] = None
-    observed_intensity: Optional[Dict[str, np.ndarray]] = None
-    correlation: Optional[float] = None
-    feature_vectors: Optional[np.ndarray] = None
+    psm: PSM
+    theoretical_mz: dict[str, np.ndarray] | None = None
+    predicted_intensity: dict[str, np.ndarray] | None = None
+    observed_intensity: dict[str, np.ndarray] | None = None
+    correlation: float | None = None
+    feature_vectors: np.ndarray | None = None
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     def __init__(__pydantic_self__, **data: Any) -> None:
         """Result of processing a single PSM."""
         super().__init__(**data)
 
-    def as_spectra(self) -> Tuple[Optional[PredictedSpectrum], Optional[ObservedSpectrum]]:
+    def as_spectra(self) -> tuple[PredictedSpectrum | None, ObservedSpectrum | None]:
         """Convert result to predicted and observed spectra."""
         if not self.theoretical_mz:
             raise ValueError("Theoretical m/z values required to convert to spectra.")
@@ -110,15 +110,17 @@ def plot_spectra(self):
         return ax
 
 
-def calculate_correlations(results: List[ProcessingResult]) -> None:
+def calculate_correlations(results: list[ProcessingResult]) -> None:
     """Calculate and add Pearson correlations to list of results."""
     for result in results:
-        pred_int = np.concatenate([i for i in result.predicted_intensity.values()])
-        obs_int = np.concatenate([i for i in result.observed_intensity.values()])
+        if result.predicted_intensity is None or result.observed_intensity is None:
+            continue
+        pred_int = np.concatenate(list(result.predicted_intensity.values()))
+        obs_int = np.concatenate(list(result.observed_intensity.values()))
         result.correlation = np.corrcoef(pred_int, obs_int)[0][1]
 
 
-def write_correlations(results: List["ProcessingResult"], output_file: str) -> None:
+def write_correlations(results: list[ProcessingResult], output_file: str | Path) -> None:
     """Write correlations to CSV file."""
     with open(output_file, "wt") as f:
         fieldnames = ["psm_index", "correlation"]
diff --git a/ms2pip/search_space.py b/ms2pip/search_space.py
index 72eab413..c9c88240 100644
--- a/ms2pip/search_space.py
+++ b/ms2pip/search_space.py
@@ -73,12 +73,14 @@
 
 import multiprocessing
 import multiprocessing.dummy
+import multiprocessing.pool
 from collections import defaultdict
+from collections.abc import Generator
 from functools import partial
 from itertools import chain, combinations, product
 from logging import getLogger
 from pathlib import Path
-from typing import Any, Dict, Generator, List, Optional, Union
+from typing import Any
 
 import numpy as np
 import pyteomics.fasta
@@ -94,12 +96,12 @@ class ModificationConfig(BaseModel):
     """Configuration for a single modification in the search space."""
 
     label: str
-    amino_acid: Optional[str] = None
-    peptide_n_term: Optional[bool] = False
-    protein_n_term: Optional[bool] = False
-    peptide_c_term: Optional[bool] = False
-    protein_c_term: Optional[bool] = False
-    fixed: Optional[bool] = False
+    amino_acid: str | None = None
+    peptide_n_term: bool | None = False
+    protein_n_term: bool | None = False
+    peptide_c_term: bool | None = False
+    protein_c_term: bool | None = False
+    fixed: bool | None = False
 
     def __init__(self, **data: Any):
         """
@@ -159,15 +161,15 @@ class ProteomeSearchSpace(BaseModel):
     fasta_file: Path
     min_length: int = 8
     max_length: int = 30
-    min_precursor_mz: Optional[float] = 0
-    max_precursor_mz: Optional[float] = np.inf
+    min_precursor_mz: float | None = 0
+    max_precursor_mz: float | None = np.inf
     cleavage_rule: str = "trypsin"
     missed_cleavages: int = 2
     semi_specific: bool = False
     add_decoys: bool = False
-    modifications: List[ModificationConfig] = DEFAULT_MODIFICATIONS
+    modifications: list[ModificationConfig] = DEFAULT_MODIFICATIONS
     max_variable_modifications: int = 3
-    charges: List[int] = [2, 3]
+    charges: list[int] = [2, 3]
 
     def __init__(self, **data: Any):
         """
@@ -204,7 +206,7 @@ def __init__(self, **data: Any):
         """
 
         super().__init__(**data)
-        self._peptidoform_spaces: List[_PeptidoformSearchSpace] = []
+        self._peptidoform_spaces: list[_PeptidoformSearchSpace] = []
 
     @field_validator("modifications")
     @classmethod
@@ -234,7 +236,7 @@ def __len__(self):
         return sum(len(pep_space) for pep_space in self._peptidoform_spaces)
 
     @classmethod
-    def from_any(cls, _input: Union[dict, str, Path, ProteomeSearchSpace]) -> ProteomeSearchSpace:
+    def from_any(cls, _input: dict | str | Path | ProteomeSearchSpace) -> ProteomeSearchSpace:
         """
         Create ProteomeSearchSpace from various input types.
 
@@ -271,7 +273,7 @@ def build(self, processes: int = 1):
         self._add_modifications(processes)
         self._add_charges()
 
-    def __iter__(self) -> Generator[PSM, None, None]:
+    def __iter__(self) -> Generator[PSM, None, None]:  # type: ignore[ty:invalid-method-override]
         """
         Generate PSMs from search space.
 
@@ -304,14 +306,14 @@ def filter_psms_by_mz(self, psms: PSMList) -> PSMList:
             psm_list=[
                 psm
                 for psm in psms
-                if self.min_precursor_mz <= psm.peptidoform.theoretical_mz <= self.max_precursor_mz
+                if self.min_precursor_mz <= psm.peptidoform.theoretical_mz <= self.max_precursor_mz  # type: ignore[ty:unsupported-operator]
             ]
         )
 
     def _digest_fasta(self, processes: int = 1):
         """Digest FASTA file to peptides and populate search space."""
         # Convert to string to avoid issues with Path objects
-        self.fasta_file = str(self.fasta_file)
+        self.fasta_file = str(self.fasta_file)  # type: ignore[ty:invalid-assignment]
         n_proteins = _count_fasta_entries(self.fasta_file)
         if self.add_decoys:
             fasta_db = pyteomics.fasta.decoy_db(
@@ -394,11 +396,11 @@ class _PeptidoformSearchSpace(BaseModel):
     """Search space for a given amino acid sequence."""
 
     sequence: str
-    proteins: List[str]
-    is_n_term: Optional[bool] = None
-    is_c_term: Optional[bool] = None
-    modification_options: List[Dict[int, ModificationConfig]] = []
-    charge_options: List[int] = []
+    proteins: list[str]
+    is_n_term: bool | None = None
+    is_c_term: bool | None = None
+    modification_options: list[dict[int, ModificationConfig]] = []
+    charge_options: list[int] = []
 
     def __init__(self, **data: Any):
         """
@@ -425,7 +427,7 @@ def __init__(self, **data: Any):
     def __len__(self):
         return len(self.modification_options) * len(self.charge_options)
 
-    def __iter__(self) -> Generator[str, None, None]:
+    def __iter__(self) -> Generator[str, None, None]:  # type: ignore[ty:invalid-method-override]
         """Yield peptidoform strings with given charges and modifications."""
         if not self.charge_options:
             raise ValueError("Peptide charge options not defined.")
@@ -437,7 +439,7 @@ def __iter__(self) -> Generator[str, None, None]:
 
     @staticmethod
     def _construct_peptidoform_string(
-        sequence: str, modifications: Dict[int, ModificationConfig], charge: int
+        sequence: str, modifications: dict[int, ModificationConfig], charge: int
     ) -> str:
         if not modifications:
             return f"{sequence}/{charge}"
@@ -469,7 +471,7 @@ def _digest_single_protein(
     cleavage_rule: str = "trypsin",
     missed_cleavages: int = 2,
     semi_specific: bool = False,
-) -> List[_PeptidoformSearchSpace]:
+) -> list[_PeptidoformSearchSpace]:
     """Digest protein sequence and return a list of validated peptides."""
 
     def valid_residues(sequence: str) -> bool:
@@ -516,8 +518,8 @@ def _count_fasta_entries(filename: Path) -> int:
 
 
 def _restructure_modifications_by_target(
-    modifications: List[ModificationConfig],
-) -> Dict[str, Dict[str, List[ModificationConfig]]]:
+    modifications: list[ModificationConfig],
+) -> dict[str, dict[str, list[ModificationConfig]]]:
     """Restructure variable modifications to options per side chain or terminus."""
     modifications_by_target = {
         "sidechain": defaultdict(lambda: []),
@@ -552,9 +554,9 @@ def add_mod(mod, target, amino_acid):
 
 def _get_modification_possibilities_by_site(
     peptide: _PeptidoformSearchSpace,
-    modifications_by_target: Dict[str, Dict[str, List[ModificationConfig]]],
-    modifications: List[ModificationConfig],
-) -> Dict[Union[str, int], List[ModificationConfig]]:
+    modifications_by_target: dict[str, dict[str, list[ModificationConfig]]],
+    modifications: list[ModificationConfig],
+) -> dict[str | int, list[ModificationConfig]]:
     """Get all possible modifications for each site in a peptide sequence."""
     possibilities_by_site = defaultdict(list)
 
@@ -609,10 +611,10 @@ def _get_modification_possibilities_by_site(
 
 def _get_peptidoform_modification_versions(
     peptide: _PeptidoformSearchSpace,
-    modifications: List[ModificationConfig],
-    modifications_by_target: Dict[str, Dict[str, List[ModificationConfig]]],
+    modifications: list[ModificationConfig],
+    modifications_by_target: dict[str, dict[str, list[ModificationConfig]]],
     max_variable_modifications: int = 3,
-) -> List[Dict[Union[str, int], List[ModificationConfig]]]:
+) -> list[dict[str | int, list[ModificationConfig]]]:
     """
     Get all potential combinations of modifications for a peptide sequence.
 
@@ -660,7 +662,7 @@ def _get_peptidoform_modification_versions(
     return modification_versions
 
 
-def _get_pool(processes: int) -> Union[multiprocessing.Pool, multiprocessing.dummy.Pool]:
+def _get_pool(processes: int) -> multiprocessing.pool.Pool:
     """Get a multiprocessing pool with the given number of processes."""
     # TODO: fix None default value for processes
     if processes > 1:
diff --git a/ms2pip/spectrum.py b/ms2pip/spectrum.py
index 8e49a389..c4830b0c 100644
--- a/ms2pip/spectrum.py
+++ b/ms2pip/spectrum.py
@@ -3,17 +3,18 @@
 from __future__ import annotations
 
 import warnings
-from typing import Any, Optional, Union
+from typing import Any
 
 import numpy as np
 from psm_utils import Peptidoform
 from pydantic import model_validator, field_validator, ConfigDict, BaseModel
+
 try:
     import spectrum_utils.spectrum as sus
     import spectrum_utils.plot as sup
 except ImportError:
-    sus = None
-    sup = None
+    sus = None  # type: ignore[ty:invalid-assignment]
+    sup = None  # type: ignore[ty:invalid-assignment]
 
 
 class Spectrum(BaseModel):
@@ -21,14 +22,14 @@ class Spectrum(BaseModel):
 
     mz: np.ndarray
     intensity: np.ndarray
-    annotations: Optional[np.ndarray] = None
-    identifier: Optional[str] = None
-    peptidoform: Optional[Union[Peptidoform, str]] = None
-    precursor_mz: Optional[float] = None
-    precursor_charge: Optional[int] = None
-    retention_time: Optional[float] = None
-    mass_tolerance: Optional[float] = None
-    mass_tolerance_unit: Optional[str] = None
+    annotations: np.ndarray | None = None
+    identifier: str | None = None
+    peptidoform: Peptidoform | str | None = None
+    precursor_mz: float | None = None
+    precursor_charge: int | None = None
+    retention_time: float | None = None
+    mass_tolerance: float | None = None
+    mass_tolerance_unit: str | None = None
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
@@ -71,7 +72,7 @@ def __repr__(self) -> str:
 
     @model_validator(mode="after")
     @classmethod
-    def check_array_lengths(cls, data: dict):
+    def check_array_lengths(cls, data):
         if len(data.mz) != len(data.intensity):
             raise ValueError("Array lengths do not match.")
         if data.annotations is not None:
@@ -149,7 +150,7 @@ def to_spectrum_utils(self):
             if not self.peptidoform:
                 raise ValueError("`precursor_charge` or `peptidoform` must be set.")
             else:
-                precursor_charge = self.peptidoform.precursor_charge
+                precursor_charge = self.peptidoform.precursor_charge  # type: ignore[ty:unresolved-attribute]
 
         if self.precursor_mz:
             precursor_mz = self.precursor_mz
@@ -158,19 +159,21 @@ def to_spectrum_utils(self):
                 raise ValueError("`precursor_mz` or `peptidoform` must be set.")
             else:
                 warnings.warn("precursor_mz not set, using theoretical precursor m/z.")
-                precursor_mz = self.peptidoform.theoretical_mz
+                precursor_mz = self.peptidoform.theoretical_mz  # type: ignore[ty:unresolved-attribute]
 
         spectrum = sus.MsmsSpectrum(
             identifier=self.identifier if self.identifier else "spectrum",
-            precursor_mz=precursor_mz,
-            precursor_charge=precursor_charge,
+            precursor_mz=precursor_mz,  # type: ignore[ty:invalid-argument-type]
+            precursor_charge=precursor_charge,  # type: ignore[ty:invalid-argument-type]
             mz=self.mz,
             intensity=self.intensity,
-            retention_time=self.retention_time,
+            retention_time=self.retention_time,  # type: ignore[ty:invalid-argument-type]
         )
         if self.peptidoform:
             spectrum.annotate_proforma(
-                str(self.peptidoform), self.mass_tolerance, self.mass_tolerance_unit
+                str(self.peptidoform),
+                self.mass_tolerance,  # type: ignore[ty:invalid-argument-type]
+                self.mass_tolerance_unit,  # type: ignore[ty:invalid-argument-type]
             )
         return spectrum
 
@@ -184,7 +187,5 @@ class ObservedSpectrum(Spectrum):
 class PredictedSpectrum(Spectrum):
     """Predicted MS2 spectrum."""
 
-    mass_tolerance: Optional[float] = 0.001
-    mass_tolerance_unit: Optional[str] = "Da"
-
-    pass
+    mass_tolerance: float | None = 0.001
+    mass_tolerance_unit: str | None = "Da"
diff --git a/ms2pip/spectrum_input.py b/ms2pip/spectrum_input.py
index 8674c722..1ff8cab3 100644
--- a/ms2pip/spectrum_input.py
+++ b/ms2pip/spectrum_input.py
@@ -1,12 +1,14 @@
 """Read MS2 spectra."""
 
+from __future__ import annotations
+
+from collections.abc import Generator
 from pathlib import Path
-from typing import Generator
 
 import numpy as np
-from ms2rescore_rs import get_ms2_spectra
+from ms2rescore_rs import get_ms2_spectra  # type: ignore[ty:unresolved-import]
 
-from ms2pip.exceptions import UnsupportedSpectrumFiletypeError
+import ms2pip.exceptions as exceptions
 from ms2pip.spectrum import ObservedSpectrum
 
 
@@ -31,8 +33,8 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
     """
     try:
         spectra = get_ms2_spectra(str(spectrum_file))
-    except ValueError:
-        raise UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes)
+    except ValueError as e:
+        raise exceptions.UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes) from e
 
     for spectrum in spectra:
         obs_spectrum = ObservedSpectrum(
@@ -40,7 +42,7 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None,
             intensity=np.array(spectrum.intensity, dtype=np.float32),
             identifier=str(spectrum.identifier),
             precursor_mz=float(spectrum.precursor.mz),
-            precursor_charge=float(spectrum.precursor.charge),
+            precursor_charge=int(spectrum.precursor.charge),
             retention_time=float(spectrum.precursor.rt),
         )
         # Workaround for mobiusklein/mzdata#3
diff --git a/ms2pip/spectrum_output.py b/ms2pip/spectrum_output.py
index c7690246..da3c081e 100644
--- a/ms2pip/spectrum_output.py
+++ b/ms2pip/spectrum_output.py
@@ -45,11 +45,12 @@
 import warnings
 from abc import ABC, abstractmethod
 from collections import defaultdict
+from collections.abc import Generator
 from io import StringIO
-from pathlib import Path
 from os import PathLike
+from pathlib import Path
 from time import localtime, strftime
-from typing import Any, Dict, Generator, List, Optional, Union
+from typing import Any
 
 import numpy as np
 from psm_utils import PSM, Peptidoform
@@ -64,8 +65,8 @@
 
 
 def write_spectra(
-    filename: Union[str, PathLike],
-    processing_results: List[ProcessingResult],
+    filename: str | PathLike,
+    processing_results: list[ProcessingResult],
     file_format: str = "tsv",
     write_mode: str = "w",
 ):
@@ -94,7 +95,7 @@ class _Writer(ABC):
 
     suffix = ""
 
-    def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
+    def __init__(self, filename: str | PathLike, write_mode: str = "w"):
         self.filename = Path(filename).with_suffix(self.suffix)
         self.write_mode = write_mode
 
@@ -136,7 +137,7 @@ def _file_object(self):
             self.open()
             return self._open_file
 
-    def write(self, processing_results: List[ProcessingResult]):
+    def write(self, processing_results: list[ProcessingResult]):
         """Write multiple processing results to file."""
         for result in processing_results:
             self._write_result(result)
@@ -162,7 +163,7 @@ class TSV(_Writer):
         "im",
     ]
 
-    def write(self, processing_results: List[ProcessingResult]):
+    def write(self, processing_results: list[ProcessingResult]):
         """Write multiple processing results to file."""
         writer = csv.DictWriter(
             self._file_object, fieldnames=self.field_names, delimiter="\t", lineterminator="\n"
@@ -172,7 +173,7 @@ def write(self, processing_results: List[ProcessingResult]):
         for result in processing_results:
             self._write_result(result, writer)
 
-    def _write_result(self, result: ProcessingResult, writer: csv.DictWriter):
+    def _write_result(self, result: ProcessingResult, writer: csv.DictWriter):  # type: ignore[ty:invalid-method-override]
         """Write single processing result to file."""
         # Only write results with predictions or observations
         if not result.theoretical_mz:
@@ -189,7 +190,7 @@ def _write_row(result: ProcessingResult, ion_type: str, ion_index: int):
             "psm_index": result.psm_index,
             "ion_type": ion_type,
             "ion_number": ion_index + 1,
-            "mz": "{:.8f}".format(result.theoretical_mz[ion_type][ion_index]),
+            "mz": "{:.8f}".format(result.theoretical_mz[ion_type][ion_index]),  # type: ignore[ty:not-subscriptable]
             "predicted": "{:.8f}".format(result.predicted_intensity[ion_type][ion_index])
             if result.predicted_intensity
             else None,
@@ -206,7 +207,7 @@ class MSP(_Writer):
 
     suffix = ".msp"
 
-    def write(self, results: List[ProcessingResult]):
+    def write(self, results: list[ProcessingResult]):  # type: ignore[ty:invalid-method-override]
         """Write multiple processing results to file."""
         for result in results:
             self._write_result(result)
@@ -214,20 +215,20 @@ def write(self, results: List[ProcessingResult]):
     def _write_result(self, result: ProcessingResult):
         """Write single processing result to file."""
         predicted_spectrum = result.as_spectra()[0]
-        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4
-        peaks = zip(predicted_spectrum.mz, intensity_normalized, predicted_spectrum.annotations)
+        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4  # type: ignore[ty:unresolved-attribute]
+        peaks = zip(predicted_spectrum.mz, intensity_normalized, predicted_spectrum.annotations)  # type: ignore[ty:invalid-argument-type, ty:unresolved-attribute]
 
         # Header
         lines = [
             f"Name: {result.psm.peptidoform.sequence}/{result.psm.get_precursor_charge()}",
             f"MW: {result.psm.peptidoform.theoretical_mass}",
             self._format_comment_line(result.psm),
-            f"Num peaks: {len(predicted_spectrum.mz)}",
+            f"Num peaks: {len(predicted_spectrum.mz)}",  # type: ignore[ty:unresolved-attribute]
         ]
 
         # Peaks
         lines.extend(
-            f"{mz:.8f}\t{intensity:.8f}\t{annotation}/0.0" for mz, intensity, annotation in peaks
+            f"{mz:.8f}\t{intensity:.8f}\t{annotation}/0.0" for mz, intensity, annotation in peaks  # type: ignore[ty:not-iterable]
         )
 
         # Write to file
@@ -241,8 +242,8 @@ def _format_modifications(peptidoform: Peptidoform):
         def _format_single_modification(
             amino_acid: str,
             position: int,
-            modifications: Optional[List[proforma.ModificationBase]],
-        ) -> Union[str, None]:
+            modifications: list[proforma.ModificationBase] | None,
+        ) -> str | None:
             """Get modification label from :py:class:`proforma.ModificationBase` list."""
             if not modifications:
                 return None
@@ -255,7 +256,7 @@ def _format_single_modification(
                 return f"{position},{amino_acid},{modification.value}"
 
         sequence_mods = [
-            _format_single_modification(aa, pos + 1, mods)
+            _format_single_modification(aa, pos + 1, mods)  # type: ignore[ty:invalid-argument-type]
             for pos, (aa, mods) in enumerate(peptidoform.parsed_sequence)
         ]
         n_term = _format_single_modification(
@@ -278,7 +279,7 @@ def _format_parent_mass(peptidoform: Peptidoform) -> str:
         return f"Parent={peptidoform.theoretical_mz}"
 
     @staticmethod
-    def _format_protein_string(psm: PSM) -> Union[str, None]:
+    def _format_protein_string(psm: PSM) -> str | None:
         """Format protein list as string."""
         if psm.protein_list:
             return f"Protein={','.join(psm.protein_list)}"
@@ -286,7 +287,7 @@ def _format_protein_string(psm: PSM) -> Union[str, None]:
             return None
 
     @staticmethod
-    def _format_retention_time(psm: PSM) -> Union[str, None]:
+    def _format_retention_time(psm: PSM) -> str | None:
         """Format retention time as string."""
         if psm.retention_time:
             return f"RetentionTime={psm.retention_time}"
@@ -294,7 +295,7 @@ def _format_retention_time(psm: PSM) -> Union[str, None]:
             return None
 
     @staticmethod
-    def _format_ion_mobility(psm: PSM) -> Union[str, None]:
+    def _format_ion_mobility(psm: PSM) -> str | None:
         """Format ion mobility as string."""
         if psm.ion_mobility:
             return f"IonMobility={psm.ion_mobility}"
@@ -334,7 +335,7 @@ class MGF(_Writer):
 
     suffix = ".mgf"
 
-    def write(self, results: List[ProcessingResult]):
+    def write(self, results: list[ProcessingResult]):  # type: ignore[ty:invalid-method-override]
         """Write multiple processing results to file."""
         for result in results:
             self._write_result(result)
@@ -342,8 +343,8 @@ def write(self, results: List[ProcessingResult]):
     def _write_result(self, result: ProcessingResult):
         """Write single processing result to file."""
         predicted_spectrum = result.as_spectra()[0]
-        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4
-        peaks = zip(predicted_spectrum.mz, intensity_normalized)
+        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4  # type: ignore[ty:unresolved-attribute]
+        peaks = zip(predicted_spectrum.mz, intensity_normalized)  # type: ignore[ty:unresolved-attribute]
 
         # Header
         lines = [
@@ -384,7 +385,7 @@ class Spectronaut(_Writer):
         "FragmentLossType",
     ]
 
-    def write(self, processing_results: List[ProcessingResult]):
+    def write(self, processing_results: list[ProcessingResult]):
         """Write multiple processing results to file."""
         writer = csv.DictWriter(
             self._file_object, fieldnames=self.field_names, delimiter="\t", lineterminator="\n"
@@ -394,7 +395,7 @@ def write(self, processing_results: List[ProcessingResult]):
         for result in processing_results:
             self._write_result(result, writer)
 
-    def _write_result(self, result: ProcessingResult, writer: csv.DictWriter):
+    def _write_result(self, result: ProcessingResult, writer: csv.DictWriter):  # type: ignore[ty:invalid-method-override]
         """Write single processing result to file."""
         # Only write results with predictions
         if result.predicted_intensity is None:
@@ -404,7 +405,7 @@ def _write_result(self, result: ProcessingResult, writer: csv.DictWriter):
             writer.writerow({**psm_info, **fragment_info})
 
     @staticmethod
-    def _process_psm(psm: PSM) -> Dict[str, Any]:
+    def _process_psm(psm: PSM) -> dict[str, Any]:
         """Process PSM to Spectronaut format."""
         return {
             "ModifiedPeptide": _peptidoform_str_without_charge(psm.peptidoform),
@@ -417,23 +418,23 @@ def _process_psm(psm: PSM) -> Dict[str, Any]:
         }
 
     @staticmethod
-    def _yield_fragment_info(result: ProcessingResult) -> Generator[Dict[str, Any], None, None]:
+    def _yield_fragment_info(result: ProcessingResult) -> Generator[dict[str, Any], None, None]:
         """Yield fragment information for a processing result."""
         # Normalize intensities
         intensities = {
             ion_type: _unlogarithmize(intensities)
-            for ion_type, intensities in result.predicted_intensity.items()
+            for ion_type, intensities in result.predicted_intensity.items()  # type: ignore[ty:unresolved-attribute]
         }
         max_intensity = max(itertools.chain(*intensities.values()))
         intensities = {
             ion_type: _basepeak_normalize(intensities[ion_type], basepeak=max_intensity)
             for ion_type in intensities
         }
-        for ion_type in result.predicted_intensity:
+        for ion_type in result.predicted_intensity:  # type: ignore[ty:not-iterable]
             fragment_type = ion_type[0].lower()
             fragment_charge = ion_type[1:] if len(ion_type) > 1 else "1"
             for ion_index, (intensity, mz) in enumerate(
-                zip(intensities[ion_type], result.theoretical_mz[ion_type])
+                zip(intensities[ion_type], result.theoretical_mz[ion_type])  # type: ignore[ty:invalid-argument-type, ty:not-subscriptable]
             ):
                 yield {
                     "RelativeFragmentIntensity": f"{intensity:.8f}",
@@ -468,7 +469,7 @@ class Bibliospec(_Writer):
         "ion-mobility",
     ]
 
-    def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
+    def __init__(self, filename: str | PathLike, write_mode: str = "w"):
         super().__init__(filename, write_mode)
         self.ssl_file = self.filename.with_suffix(self.ssl_suffix)
         self.ms2_file = self.filename.with_suffix(self.ms2_suffix)
@@ -514,7 +515,7 @@ def _ms2_file_object(self):
             self.open()
             return self._open_ms2_file
 
-    def write(self, processing_results: List[ProcessingResult]):
+    def write(self, processing_results: list[ProcessingResult]):
         """Write multiple processing results to file."""
         # Create CSV writer
         ssl_dict_writer = csv.DictWriter(
@@ -553,7 +554,7 @@ def _write_result(
         modified_sequence: str,
         scan_number: int,
         writer: csv.DictWriter,
-    ):
+    ):  # type: ignore[ty:invalid-method-override]
         """Write single processing result to files."""
         self._write_result_to_ssl(result, modified_sequence, scan_number, writer)
         self._write_result_to_ms2(result, modified_sequence, scan_number)
@@ -584,8 +585,8 @@ def _write_result_to_ms2(
     ):
         """Write single processing result to the MS2 file."""
         predicted_spectrum = result.as_spectra()[0]
-        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4
-        peaks = zip(predicted_spectrum.mz, intensity_normalized)
+        intensity_normalized = _basepeak_normalize(predicted_spectrum.intensity) * 1e4  # type: ignore[ty:unresolved-attribute]
+        peaks = zip(predicted_spectrum.mz, intensity_normalized)  # type: ignore[ty:unresolved-attribute]
 
         # Header
         lines = [
@@ -607,8 +608,8 @@ def _format_modified_sequence(peptidoform: Peptidoform) -> str:
         """Format modified sequence as string for Spectronaut."""
         modification_dict = defaultdict(list)
         for term, position in [("n_term", 0), ("c_term", len(peptidoform) - 1)]:
-            if peptidoform.properties[term]:
-                modification_dict[position].extend(peptidoform.properties[term])
+            if peptidoform.properties[term]:  # type: ignore[ty:invalid-key]
+                modification_dict[position].extend(peptidoform.properties[term])  # type: ignore[ty:invalid-key]
         for position, (_, mods) in enumerate(peptidoform.parsed_sequence):
             if mods:
                 modification_dict[position].extend(mods)
@@ -620,7 +621,7 @@ def _format_modified_sequence(peptidoform: Peptidoform) -> str:
         )
 
     @staticmethod
-    def _get_last_ssl_scan_number(ssl_file: Union[str, PathLike, StringIO]):
+    def _get_last_ssl_scan_number(ssl_file: str | PathLike | StringIO):
         """Read scan number of last line in a Bibliospec SSL file."""
         if isinstance(ssl_file, StringIO):
             ssl_file.seek(0)
@@ -652,7 +653,7 @@ def open(self):
             self._open_file = self.filename.unlink(missing_ok=True)
         self._open_file = dlib.open_sqlite(self.filename)
 
-    def write(self, processing_results: List[ProcessingResult]):
+    def write(self, processing_results: list[ProcessingResult]):
         """Write MS2PIP predictions to a DLIB SQLite file."""
         connection = self._file_object
         dlib.metadata.create_all(connection.engine)
@@ -667,13 +668,13 @@ def _format_modified_sequence(peptidoform: Peptidoform) -> str:
         """Format modified sequence as string for DLIB."""
         # Sum all sequential mass shifts for each position
         masses = [
-            sum(mod.mass for mod in mods) if mods else 0 for _, mods in peptidoform.parsed_sequence
+            sum(mod.mass for mod in mods) if mods else 0 for _, mods in peptidoform.parsed_sequence  # type: ignore[ty:unresolved-attribute]
         ]
 
         # Add N- and C-terminal modifications
         for term, position in [("n_term", 0), ("c_term", len(peptidoform) - 1)]:
-            if peptidoform.properties[term]:
-                masses[position] += sum(mod.mass for mod in peptidoform.properties[term])
+            if peptidoform.properties[term]:  # type: ignore[ty:invalid-key]
+                masses[position] += sum(mod.mass for mod in peptidoform.properties[term])  # type: ignore[ty:invalid-key]
 
         # Format modified sequence
         return "".join(
@@ -700,9 +701,9 @@ def _write_metadata(connection: Connection):
 
     @staticmethod
     def _write_entries(
-        processing_results: List[ProcessingResult],
+        processing_results: list[ProcessingResult],
         connection: Connection,
-        output_filename: Union[str, PathLike],
+        output_filename: str | PathLike,
     ):
         """Write spectra to DLIB SQLite file."""
         with connection.begin():
@@ -711,8 +712,8 @@ def _write_entries(
                     raise ValueError("Retention time required to write DLIB file.")
 
                 spectrum = result.as_spectra()[0]
-                intensity_normalized = _basepeak_normalize(spectrum.intensity) * 1e4
-                n_peaks = len(spectrum.mz)
+                intensity_normalized = _basepeak_normalize(spectrum.intensity) * 1e4  # type: ignore[ty:unresolved-attribute]
+                n_peaks = len(spectrum.mz)  # type: ignore[ty:unresolved-attribute]
 
                 connection.execute(
                     dlib.Entry.insert().values(
@@ -724,7 +725,7 @@ def _write_entries(
                         RTInSeconds=result.psm.retention_time,
                         Score=0,
                         MassEncodedLength=n_peaks,
-                        MassArray=spectrum.mz.tolist(),
+                        MassArray=spectrum.mz.tolist(),  # type: ignore[ty:unresolved-attribute]
                         IntensityEncodedLength=n_peaks,
                         IntensityArray=intensity_normalized.tolist(),
                         SourceFile=str(output_filename),
@@ -732,7 +733,7 @@ def _write_entries(
                 )
 
     @staticmethod
-    def _write_peptide_to_protein(results: List[ProcessingResult], connection: Connection):
+    def _write_peptide_to_protein(results: list[ProcessingResult], connection: Connection):
         """Write peptide-to-protein mappings to DLIB SQLite file."""
         peptide_to_proteins = {
             (result.psm.peptidoform.sequence, protein)
@@ -780,12 +781,12 @@ def _peptidoform_str_without_charge(peptidoform: Peptidoform) -> str:
     return re.sub(r"\/\d+$", "", str(peptidoform))
 
 
-def _unlogarithmize(intensities: np.array) -> np.array:
+def _unlogarithmize(intensities: np.array) -> np.array:  # type: ignore[ty:invalid-type-form]
     """Undo logarithmic transformation of intensities."""
     return (2**intensities) - 0.001
 
 
-def _basepeak_normalize(intensities: np.array, basepeak: Optional[float] = None) -> np.array:
+def _basepeak_normalize(intensities: np.array, basepeak: float | None = None) -> np.array:  # type: ignore[ty:invalid-type-form]
     """Normalize intensities to most intense peak."""
     if not basepeak:
         basepeak = intensities.max()
diff --git a/pyproject.toml b/pyproject.toml
index 002ca21e..61b4df8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,14 +24,12 @@ authors = [
 classifiers = [
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: Apache Software License",
-    "Operating System :: POSIX :: Linux",
-    "Operating System :: MacOS :: MacOS X",
-    "Operating System :: Microsoft :: Windows",
+    "Operating System :: OS Independent",
     "Programming Language :: Python :: 3 :: Only",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Development Status :: 5 - Production/Stable",
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 dependencies = [
     "numpy>=1.25,<3",
     "pandas>=1,<3",
@@ -45,12 +43,14 @@ dependencies = [
     "pydantic>=2",
     "werkzeug>=2",
     "psm_utils>=1.0",
-    "ms2rescore-rs>=0.5.0a0,<2",
+    "ms2rescore-rs>=0.5.0a1,<2",
 ]
-
 [project.optional-dependencies]
 plotting = ["matplotlib>=3.0", "spectrum-utils>=0.4"]
-dev = ["black", "isort>5", "pytest"]
+
+[dependency-groups]
+notebook = ["ipykernel", "ipywidgets", "pip"]
+dev = ["pytest", "ruff", "ty"]
 docs = [
     "sphinx",
     "numpydoc>=1,<2",
@@ -74,7 +74,7 @@ publication = "https://doi.org/10.1093/nar/gkad335/"
 ms2pip = "ms2pip.__main__:main"
 
 [build-system]
-requires = ["setuptools", "cython", "numpy>=2.0"]
+requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools.packages.find]
@@ -82,19 +82,8 @@ include = ["ms2pip*"]
 
 [tool.black]
 line-length = 99
-target-version = ['py310']
+target-version = ['py311']
 
 [tool.ruff]
 line-length = 99
-target-version = 'py310'
-
-[tool.cibuildwheel]
-# Keep Python targets explicit so CI updates don't start publishing untested wheels.
-build = "cp3{10,11,12,13}-manylinux_x86_64 cp3{10,11,12,13}-win_amd64 cp3{10,11,12,13}-macosx_{x86_64,arm64}"
-test-command = "ms2pip --help"
-
-# Prevent building from source for packages with complex C/C++/Rust dependencies
-environment = { PIP_ONLY_BINARY = "pyarrow,pandas,numpy,lxml,xgboost,scipy,ms2rescore-rs" }
-
-[tool.cibuildwheel.macos]
-before-all = "brew install libomp"
+target-version = 'py311'
diff --git a/setup.py b/setup.py
deleted file mode 100644
index b389ab08..00000000
--- a/setup.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import os
-import platform
-from glob import glob
-
-import numpy
-from Cython.Distutils import build_ext
-from setuptools import setup
-from setuptools.extension import Extension
-
-
-def _get_version():
-    with open("ms2pip/__init__.py") as f:
-        for line in f:
-            if line.startswith("__version__"):
-                return line.split("=")[1].strip().strip('"').strip("'")
-
-
-to_remove = [
-    "ms2pip/_cython_modules/ms2pip_pyx.c*",
-    "ms2pip/_cython_modules/ms2pip_pyx.so",
-]
-_ = [[os.remove(f) for f in glob(pat)] for pat in to_remove]
-
-# Large machine-written C model files require optimization to be disabled
-compile_args = {
-    "Linux": [
-        "-O0",
-        "-fno-var-tracking",
-        "-Wno-unused-result",
-        "-Wno-cpp",
-        "-Wno-unused-function",
-    ],
-    "Darwin": [
-        "-O0",
-    ],
-    "Windows": [
-        "/Od",
-        "/DEBUG",
-        "/GL-",
-        "/bigobj",
-        "/wd4244",
-    ],
-}
-
-extensions = [
-    Extension(
-        "ms2pip._cython_modules.ms2pip_pyx",
-        sources=["ms2pip/_cython_modules/ms2pip_pyx.pyx"] + glob("ms2pip/_models_c/*/*.c"),
-        extra_compile_args=compile_args[platform.system()],
-    )
-]
-
-setup(
-    version=_get_version(),
-    ext_modules=extensions,
-    include_dirs=[numpy.get_include()],
-    cmdclass={"build_ext": build_ext},
-)
diff --git a/tests/test_core.py b/tests/test_core.py
index 771521f9..5f90dea2 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,22 +1,10 @@
 import numpy as np
-from psm_utils import PSM, Peptidoform
-import pandas as pd
+from psm_utils import PSM, PSMList, Peptidoform
 
-from ms2pip.core import get_training_data, predict_single
+from ms2pip.core import predict_batch, predict_library, predict_single
 from ms2pip.result import ProcessingResult
 
 
-def _test_get_training_data():
-    expected_df = pd.read_feather("tests/test_data/massivekb_selected_500.feather")
-    output_df = get_training_data(
-        "tests/test_data/massivekb_selected_500.peprec",
-        "tests/test_data/massivekb_selected_500.mgf",
-        model="HCD",
-        ms2_tolerance=0.02,
-        processes=1
-    )
-    pd.testing.assert_frame_equal(expected_df, output_df)
-
 def test_predict_single():
     pep = Peptidoform("ACDE/2")
     result = predict_single(pep)
@@ -25,8 +13,8 @@ def test_predict_single():
         psm_index=0,
         psm=PSM(peptidoform=pep, spectrum_id=0),
         theoretical_mz={
-            "b": np.array([72.04435, 175.05354, 290.08047], dtype=np.float32),
-            "y": np.array([148.0604, 263.0873, 366.0965], dtype=np.float32),
+            "b": np.array([72.04439, 175.05357, 290.0805], dtype=np.float32),
+            "y": np.array([148.06044, 263.08737, 366.09656], dtype=np.float32),
         },
         predicted_intensity={
             "b": np.array([-9.14031, -7.6102686, -7.746709], dtype=np.float32),
@@ -41,8 +29,65 @@ def test_predict_single():
     assert result.psm == expected.psm
     np.testing.assert_array_almost_equal(result.theoretical_mz["b"], expected.theoretical_mz["b"])
     np.testing.assert_array_almost_equal(result.theoretical_mz["y"], expected.theoretical_mz["y"])
-    np.testing.assert_array_almost_equal(result.predicted_intensity["b"], expected.predicted_intensity["b"])
-    np.testing.assert_array_almost_equal(result.predicted_intensity["y"], expected.predicted_intensity["y"])
+    np.testing.assert_array_almost_equal(
+        result.predicted_intensity["b"], expected.predicted_intensity["b"]
+    )
+    np.testing.assert_array_almost_equal(
+        result.predicted_intensity["y"], expected.predicted_intensity["y"]
+    )
     assert result.observed_intensity == expected.observed_intensity
     assert result.correlation == expected.correlation
     assert result.feature_vectors == expected.feature_vectors
+
+
+def test_predict_single_modified():
+    result = predict_single("AC[+57.0215]M[+15.9949]DEK/2")
+
+    assert result.predicted_intensity is not None
+    assert result.theoretical_mz is not None
+    assert set(result.theoretical_mz.keys()) == {"b", "y"}
+    # 6 residues → 5 cleavage sites
+    assert len(result.theoretical_mz["b"]) == 5
+    assert len(result.theoretical_mz["y"]) == 5
+    assert len(result.predicted_intensity["b"]) == 5
+    # m/z values should be positive and increasing for b-ions
+    assert all(result.theoretical_mz["b"] > 0)
+    assert all(np.diff(result.theoretical_mz["b"]) > 0)
+
+
+def test_predict_batch():
+    psm_list = PSMList(
+        psm_list=[
+            PSM(peptidoform=Peptidoform("ACDE/2"), spectrum_id=0),
+            PSM(peptidoform=Peptidoform("PEPTIDEK/3"), spectrum_id=1),
+            PSM(peptidoform=Peptidoform("AAAAAAA/2"), spectrum_id=2),
+        ]
+    )
+    results = predict_batch(psm_list)
+
+    assert len(results) == 3
+    for i, result in enumerate(results):
+        assert result.psm_index == i
+        assert result.predicted_intensity is not None
+        assert result.theoretical_mz is not None
+        assert set(result.theoretical_mz.keys()) == {"b", "y"}
+        assert result.feature_vectors is None
+
+    # Check correct number of ions per peptide
+    assert len(results[0].theoretical_mz["b"]) == 3  # ACDE: 4 residues → 3
+    assert len(results[1].theoretical_mz["b"]) == 7  # PEPTIDEK: 8 residues → 7
+    assert len(results[2].theoretical_mz["b"]) == 6  # AAAAAAA: 7 residues → 6
+
+
+def test_predict_library():
+    batches = list(
+        predict_library(fasta_file="tests/test_data/test.fasta", batch_size=100)
+    )
+
+    assert len(batches) >= 1
+    for batch in batches:
+        assert isinstance(batch, list)
+        for result in batch:
+            assert isinstance(result, ProcessingResult)
+            assert result.predicted_intensity is not None
+            assert result.theoretical_mz is not None
diff --git a/tests/test_correlate.py b/tests/test_correlate.py
new file mode 100644
index 00000000..6056c2ab
--- /dev/null
+++ b/tests/test_correlate.py
@@ -0,0 +1,158 @@
+import numpy as np
+import pytest
+from psm_utils import PSM, Peptidoform
+from ms2rescore_rs import AnnotatedMS2Spectrum, FragmentAnnotation, MS2Spectrum, Precursor
+
+from ms2pip.core import correlate_preloaded, correlate_single
+from ms2pip.spectrum import ObservedSpectrum
+
+
+def _make_observed_spectrum(peptidoform="ACDEFK/2"):
+    """Create an ObservedSpectrum with realistic peaks for testing."""
+    return ObservedSpectrum(
+        mz=np.array(
+            [72.044, 147.113, 175.054, 276.167, 290.081, 389.251, 488.319, 566.192],
+            dtype=np.float32,
+        ),
+        intensity=np.array(
+            [50.0, 100.0, 80.0, 200.0, 60.0, 150.0, 300.0, 40.0],
+            dtype=np.float32,
+        ),
+        identifier="test_spectrum",
+        peptidoform=peptidoform,
+        precursor_mz=341.65,
+        precursor_charge=2,
+        retention_time=100.0,
+    )
+
+
+def test_correlate_single():
+    obs = _make_observed_spectrum()
+    result = correlate_single(obs, ms2_tolerance=0.02, model="HCD")
+
+    assert result.predicted_intensity is not None
+    assert result.observed_intensity is not None
+    assert result.correlation is not None
+    assert not np.isnan(result.correlation)
+    assert set(result.predicted_intensity.keys()) == {"b", "y"}
+    assert set(result.observed_intensity.keys()) == {"b", "y"}
+
+
+def test_correlate_single_ppm_tolerance():
+    obs = _make_observed_spectrum()
+    result = correlate_single(obs, ms2_tolerance=20.0, ms2_tolerance_mode="ppm", model="HCD")
+
+    assert result.predicted_intensity is not None
+    assert result.observed_intensity is not None
+    assert result.correlation is not None
+
+
+def test_correlate_single_requires_peptidoform():
+    obs = ObservedSpectrum(
+        mz=np.array([100.0], dtype=np.float32),
+        intensity=np.array([1.0], dtype=np.float32),
+        identifier="test",
+    )
+    with pytest.raises(ValueError, match="Peptidoform must be set"):
+        correlate_single(obs)
+
+
+def test_correlate_preloaded_annotated():
+    ann_spec = AnnotatedMS2Spectrum(
+        identifier="test_spectrum",
+        mz=[175.054, 276.167, 488.319],
+        intensity=[100.0, 200.0, 300.0],
+        precursor=Precursor(mz=341.65, charge=2, rt=100.0),
+        peak_annotations=[
+            [FragmentAnnotation(series="b", position=2, charge=1)],
+            [FragmentAnnotation(series="y", position=4, charge=1)],
+            [],
+        ],
+    )
+    psm = PSM(
+        peptidoform=Peptidoform("ACDEFK/2"),
+        spectrum_id="test_spectrum",
+        spectrum=ann_spec,
+    )
+
+    results = correlate_preloaded([psm], model="HCD")
+
+    assert len(results) == 1
+    result = results[0]
+    assert result.predicted_intensity is not None
+    assert result.observed_intensity is not None
+    assert set(result.observed_intensity.keys()) == {"b", "y"}
+    # Annotated b2 position should have non-floor intensity
+    floor = np.log2(0.001)
+    assert result.observed_intensity["b"][1] > floor
+
+
+def test_correlate_preloaded_raw():
+    raw_spec = MS2Spectrum(
+        identifier="test_spectrum",
+        mz=[175.054, 276.167, 488.319],
+        intensity=[100.0, 200.0, 300.0],
+        precursor=Precursor(mz=341.65, charge=2, rt=100.0),
+    )
+    psm = PSM(
+        peptidoform=Peptidoform("ACDEFK/2"),
+        spectrum_id="test_spectrum",
+        spectrum=raw_spec,
+    )
+
+    results = correlate_preloaded(
+        [psm], model="HCD", ms2_tolerance=0.02, ms2_tolerance_mode="Da"
+    )
+
+    assert len(results) == 1
+    assert results[0].predicted_intensity is not None
+    assert results[0].observed_intensity is not None
+
+
+def test_correlate_preloaded_ppm_tolerance():
+    raw_spec = MS2Spectrum(
+        identifier="test_spectrum",
+        mz=[175.054, 276.167],
+        intensity=[100.0, 200.0],
+        precursor=Precursor(mz=341.65, charge=2, rt=100.0),
+    )
+    psm = PSM(
+        peptidoform=Peptidoform("ACDEFK/2"),
+        spectrum_id="test_spectrum",
+        spectrum=raw_spec,
+    )
+
+    results = correlate_preloaded(
+        [psm], model="HCD", ms2_tolerance=20.0, ms2_tolerance_mode="ppm"
+    )
+
+    assert len(results) == 1
+    assert results[0].observed_intensity is not None
+
+
+def test_correlate_preloaded_invalid_spectrum():
+    psm = PSM(
+        peptidoform=Peptidoform("ACDEFK/2"),
+        spectrum_id="test",
+    )
+    with pytest.raises(ValueError, match="MS2Spectrum or AnnotatedMS2Spectrum"):
+        correlate_preloaded([psm])
+
+
+def test_correlate_preloaded_multiple_psms():
+    psms = []
+    for i, pep in enumerate(["ACDEFK/2", "PEPTIDEK/3"]):
+        spec = MS2Spectrum(
+            identifier=f"spec_{i}",
+            mz=[175.054, 276.167],
+            intensity=[100.0, 200.0],
+            precursor=Precursor(mz=400.0, charge=2, rt=float(i * 10)),
+        )
+        psms.append(PSM(peptidoform=Peptidoform(pep), spectrum_id=f"spec_{i}", spectrum=spec))
+
+    results = correlate_preloaded(psms, model="HCD")
+
+    assert len(results) == 2
+    for result in results:
+        assert result.predicted_intensity is not None
+        assert result.observed_intensity is not None
diff --git a/tests/test_correlation.py b/tests/test_correlation.py
new file mode 100644
index 00000000..62eaaaaa
--- /dev/null
+++ b/tests/test_correlation.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+from ms2pip.correlation import ms2pip_pearson, spectral_angle
+
+
+def test_ms2pip_pearson():
+    true = np.array([100.0, 200.0, 50.0, 300.0])
+    pred = np.array([90.0, 210.0, 40.0, 310.0])
+
+    corr = ms2pip_pearson(true, pred)
+
+    assert isinstance(corr, float)
+    assert 0.9 < corr <= 1.0
+
+
+def test_ms2pip_pearson_identical():
+    arr = np.array([100.0, 200.0, 50.0, 300.0])
+    corr = ms2pip_pearson(arr, arr)
+
+    assert abs(corr - 1.0) < 1e-6
+
+
+def test_ms2pip_pearson_anticorrelated():
+    true = np.array([100.0, 200.0, 300.0])
+    pred = np.array([300.0, 200.0, 100.0])
+
+    corr = ms2pip_pearson(true, pred)
+
+    assert corr < 0
+
+
+def test_spectral_angle():
+    true = np.array([100.0, 200.0, 50.0, 300.0])
+    pred = np.array([90.0, 210.0, 40.0, 310.0])
+
+    sa = spectral_angle(true, pred)
+
+    assert isinstance(sa, float)
+    assert 0.0 < sa <= 1.0
+
+
+def test_spectral_angle_identical():
+    arr = np.array([100.0, 200.0, 50.0, 300.0])
+    sa = spectral_angle(arr, arr)
+
+    assert abs(sa - 1.0) < 1e-6
+
+
+def test_spectral_angle_orthogonal():
+    true = np.array([1.0, 0.0])
+    pred = np.array([0.0, 1.0])
+
+    sa = spectral_angle(true, pred)
+
+    assert sa < 0.5
diff --git a/tests/test_encoder.py b/tests/test_encoder.py
deleted file mode 100644
index 5feb5f96..00000000
--- a/tests/test_encoder.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import pytest
-from psm_utils import Peptidoform, PSM, PSMList
-
-from ms2pip._utils.encoder import Encoder
-
-
-class TestEncoder:
-    def test_from_peptidoform(self):
-        test_cases = [
-            # Peptidoform, {(target, label): (amino_acid, amino_acid_id, mass_shift)}
-            ("ACDEK", {}),
-            ("AC[+57.021464]DEK", {("C", "+57.021464"): ("C", 1, 57.021464)}),
-            ("AC[U:4]", {("C", "UNIMOD:4"): ("C", 1, 57.021464)}),
-            ("AC[formula:H3C2NO]", {("C", "Formula:H3C2NO"): ("C", 1, 57.021464)}),
-            ("[Acetyl]-ACDE", {("n_term", "Acetyl"): ("n_term", -1, 42.010565)}),
-            ("ACDE-[Amidated]", {("c_term", "Amidated"): ("c_term", -2, -0.984016)}),
-            (
-                "AC[+57.021464]DE-[Amidated]",
-                {
-                    ("C", "+57.021464"): ("C", 1, 57.021464),
-                    ("c_term", "Amidated"): ("c_term", -2, -0.984016),
-                },
-            ),
-            (
-                "[Acetyl]-AC[+57.021464]DE",
-                {
-                    ("n_term", "Acetyl"): ("n_term", -1, 42.010565),
-                    ("C", "+57.021464"): ("C", 1, 57.021464),
-                },
-            ),
-        ]
-
-        for peptidoform, expected_mods in test_cases:
-            encoder = Encoder.from_peptidoform(Peptidoform(peptidoform))
-            for key, modification in encoder.modifications.items():
-                for item_key, expected_item in zip(
-                    ["amino_acid", "amino_acid_id", "mass_shift"], expected_mods[key]
-                ):
-                    if isinstance(expected_item, float):
-                        assert modification[item_key] == pytest.approx(expected_item)
-                    else:
-                        assert modification[item_key] == expected_item
-
-    def test_from_psm_list(self):
-        psm_list = PSMList(psm_list=[
-            PSM(peptidoform="AC[+57.021464]DEK", spectrum_id=0),
-            PSM(peptidoform="AC[U:4]", spectrum_id=1),
-            PSM(peptidoform="AC[formula:H3C2NO]", spectrum_id=2),
-            PSM(peptidoform="[Acetyl]-ACDE", spectrum_id=3),
-            PSM(peptidoform="ACDE-[Amidated]",spectrum_id= 4)
-        ])
-        expected = {
-            ("C", "+57.021464"): {
-                "mod_id": 38,
-                "mass_shift": 57.021464,
-                "amino_acid": "C",
-                "amino_acid_id": 1,
-            },
-            ("C", "UNIMOD:4"): {
-                "mod_id": 39,
-                "mass_shift": 57.021464,
-                "amino_acid": "C",
-                "amino_acid_id": 1,
-            },
-            ("C", "Formula:H3C2NO"): {
-                "mod_id": 40,
-                "mass_shift": 57.02146372057,
-                "amino_acid": "C",
-                "amino_acid_id": 1,
-            },
-            ("n_term", "Acetyl"): {
-                "mod_id": 41,
-                "mass_shift": 42.010565,
-                "amino_acid": "n_term",
-                "amino_acid_id": -1,
-            },
-            ("c_term", "Amidated"): {
-                "mod_id": 42,
-                "mass_shift": -0.984016,
-                "amino_acid": "c_term",
-                "amino_acid_id": -2,
-            },
-        }
-
-        encoder = Encoder.from_psm_list(psm_list)
-        for modification_key, modification_dict in encoder.modifications.items():
-            for item_key, expected_item in expected[modification_key].items():
-                if isinstance(expected_item, float):
-                    assert modification_dict[item_key] == pytest.approx(expected_item)
-                else:
-                    assert modification_dict[item_key] == expected_item
diff --git a/tests/test_spectrum_processing.py b/tests/test_spectrum_processing.py
new file mode 100644
index 00000000..2a174b48
--- /dev/null
+++ b/tests/test_spectrum_processing.py
@@ -0,0 +1,138 @@
+import numpy as np
+from psm_utils import PSM, Peptidoform
+
+from ms2pip._spectrum_processing import (
+    annotate_spectrum,
+    proforma_to_mass_shift,
+    targets_from_annotations,
+)
+
+
+def test_targets_from_annotations_basic():
+    """Test basic target extraction from annotations."""
+    # 4-residue peptide → 3 cleavage sites
+    peak_annotations = [
+        [("b", 1, 1)],  # peak 0 matches b1
+        [],  # peak 1 unmatched
+        [("y", 2, 1)],  # peak 2 matches y2
+    ]
+    intensity = np.array([5.0, 3.0, 8.0], dtype=np.float32)
+
+    targets = targets_from_annotations(peak_annotations, intensity, ["b", "y"], seq_len=4)
+
+    floor = np.float32(np.log2(0.001))
+    assert targets["b"][0] == 5.0  # b1 at position 0
+    assert targets["b"][1] == floor  # b2 unmatched
+    assert targets["b"][2] == floor  # b3 unmatched
+    assert targets["y"][0] == floor  # y1 unmatched
+    assert targets["y"][1] == 8.0  # y2 at position 1
+    assert targets["y"][2] == floor  # y3 unmatched
+
+
+def test_targets_from_annotations_charge2():
+    """Test that charge-2 annotations map to b2/y2 ion types."""
+    peak_annotations = [
+        [("b", 1, 2)],  # charge 2 → maps to "b2"
+    ]
+    intensity = np.array([10.0], dtype=np.float32)
+
+    targets = targets_from_annotations(
+        peak_annotations, intensity, ["b", "y", "b2", "y2"], seq_len=4
+    )
+
+    assert targets["b2"][0] == 10.0
+    floor = np.float32(np.log2(0.001))
+    assert targets["b"][0] == floor  # charge-1 b is not matched
+
+
+def test_targets_from_annotations_highest_intensity():
+    """Test that the highest intensity is kept when multiple peaks match."""
+    peak_annotations = [
+        [("b", 1, 1)],  # peak 0: b1 with intensity 3.0
+        [("b", 1, 1)],  # peak 1: b1 with intensity 7.0
+    ]
+    intensity = np.array([3.0, 7.0], dtype=np.float32)
+
+    targets = targets_from_annotations(peak_annotations, intensity, ["b", "y"], seq_len=4)
+
+    assert targets["b"][0] == 7.0  # highest wins
+
+
+def test_targets_from_annotations_ignores_unknown_ion_types():
+    """Test that annotations for ion types not in the target list are ignored."""
+    peak_annotations = [
+        [("c", 1, 1)],  # c-ion not in requested types
+    ]
+    intensity = np.array([10.0], dtype=np.float32)
+
+    targets = targets_from_annotations(peak_annotations, intensity, ["b", "y"], seq_len=4)
+
+    floor = np.float32(np.log2(0.001))
+    assert all(v == floor for v in targets["b"])
+    assert all(v == floor for v in targets["y"])
+
+
+def test_targets_from_annotations_fragment_annotation_objects():
+    """Test that FragmentAnnotation objects (not tuples) also work."""
+    from ms2rescore_rs import FragmentAnnotation
+
+    peak_annotations = [
+        [FragmentAnnotation(series="b", position=1, charge=1)],
+    ]
+    intensity = np.array([5.0], dtype=np.float32)
+
+    targets = targets_from_annotations(peak_annotations, intensity, ["b", "y"], seq_len=4)
+
+    assert targets["b"][0] == 5.0
+
+
+def test_annotate_spectrum():
+    """Test that annotate_spectrum returns annotations for matching peaks."""
+    spectrum = __import__("ms2pip.spectrum", fromlist=["ObservedSpectrum"]).ObservedSpectrum(
+        mz=np.array([72.044, 175.054, 290.081], dtype=np.float32),
+        intensity=np.array([100.0, 200.0, 300.0], dtype=np.float32),
+        identifier="test",
+        precursor_mz=250.0,
+        precursor_charge=2,
+        retention_time=100.0,
+    )
+    psm = PSM(peptidoform=Peptidoform("ACDE/2"), spectrum_id="test")
+
+    annotations = annotate_spectrum(spectrum, psm, "HCD", 0.02, "Da")
+
+    # Should return one list of annotations per peak
+    assert len(annotations) == 3
+    # At least some peaks should be annotated (b1 at ~72.044, b2 at ~175.054)
+    annotated_peaks = [i for i, anns in enumerate(annotations) if len(anns) > 0]
+    assert len(annotated_peaks) > 0
+
+
+def test_proforma_to_mass_shift_unmodified():
+    result = proforma_to_mass_shift(Peptidoform("PEPTIDE/2"))
+    assert result == "PEPTIDE/2"
+
+
+def test_proforma_to_mass_shift_unimod_names():
+    result = proforma_to_mass_shift(Peptidoform("PEPTC[UNIMOD:Carbamidomethyl]M[UNIMOD:Oxidation]IDE/2"))
+    assert "[+" in result
+    assert "Carbamidomethyl" not in result
+    assert "Oxidation" not in result
+    assert result.startswith("PEPTC[+57.0215]M[+15.9949]")
+    assert result.endswith("/2")
+
+
+def test_proforma_to_mass_shift_nterm():
+    result = proforma_to_mass_shift(Peptidoform("[UNIMOD:Acetyl]-PEPTIDE/2"))
+    assert result.startswith("[+42.0106]-PEPTIDE")
+    assert result.endswith("/2")
+
+
+def test_proforma_to_mass_shift_already_mass_shift():
+    result = proforma_to_mass_shift(Peptidoform("PEPTC[+57.0215]IDE/2"))
+    assert result == "PEPTC[+57.0215]IDE/2"
+
+
+def test_proforma_to_mass_shift_no_charge():
+    result = proforma_to_mass_shift(Peptidoform("PEPTIDE"))
+    assert result == "PEPTIDE"
+    assert "/" not in result