diff --git a/astro.config.mjs b/astro.config.mjs
index d82197eb..0d853b46 100644
--- a/astro.config.mjs
+++ b/astro.config.mjs
@@ -13,6 +13,9 @@ export default defineConfig({
starlight({
title: "Open Podcast API",
favicon: "favicon.ico",
+ customCss: [
+ './src/styles/custom.css',
+ ],
social: [
{
icon: "github",
@@ -47,11 +50,26 @@ export default defineConfig({
},
{
label: "Subscriptions",
+ badge: {
+ text: "Core",
+ variant: "caution",
+ },
collapsed: true,
autogenerate: {
directory: "specs/subscriptions",
},
},
+ {
+ label: "Episodes",
+ badge: {
+ text: "Core",
+ variant: "caution",
+ },
+ collapsed: true,
+ autogenerate: {
+ directory: "specs/episodes",
+ },
+ },
],
},
...openAPISidebarGroups,
@@ -68,7 +86,9 @@ export default defineConfig({
"TabItem",
],
},
- "src/components/SponsorCallout.astro"
+ "src/components/SponsorCallout.astro",
+ "src/components/BadgeOptional.astro",
+ "src/components/BadgeCore.astro"
],
}),
],
diff --git a/schema.yml b/schema.yml
index 13205552..02626936 100644
--- a/schema.yml
+++ b/schema.yml
@@ -7,6 +7,8 @@ info:
tags:
- name: Subscriptions
description: All actions relating to subscription management
+ - name: Episodes
+ description: All actions relating to episode management
paths:
/subscriptions:
get:
@@ -202,6 +204,51 @@ paths:
security:
- podcast_auth:
- read:subscriptions
+ /episodes:
+ get:
+ tags:
+ - Episodes
+ summary: Retrieve all episodes for the authenticated user
+ description: Retrieve all episodes that has changed for the authenticated user since the provided timestamp
+ operationId: getEpisodes
+ parameters:
+ - in: query
+ name: since
+ schema:
+ type: string
+ format: date-time
+ required: false
+ example:
+ '2022-04-23T18:25:43.511Z'
+ - in: query
+ name: page
+ schema:
+ type: number
+ required: false
+ example:
+ 1
+ - in: query
+ name: per_page
+ schema:
+ type: number
+ required: false
+ example:
+ 5
+ responses:
+ '200':
+ description: Successful operation
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Episodes'
+ application/xml:
+ schema:
+ $ref: '#/components/schemas/Episodes'
+ '401':
+ $ref: '#/components/responses/Unauthorized'
+ security:
+ - podcast_auth:
+ - read:subscriptions
components:
responses:
Unauthorized:
@@ -357,6 +404,7 @@ components:
guid:
type: string
format: guid
+ description: The unique identifier (guid) for the subscription, as declared in the feed or generated by the server
is_subscribed:
type: boolean
subscription_changed:
@@ -550,6 +598,167 @@ components:
subscription_changed: 2023-02-23T14:41:00.000Z
guid_changed: 2023-02-23T14:41:00.000Z
new_guid: 965fcecf-ce04-482b-b57c-3119b866cc61
+ Episode:
+ xml:
+ name: episode
+ required:
+ - podcast_guid
+ - sync_id
+ - episode_guid
+ - title
+ - publish_date
+ - enclosure_url
+ - episode_url
+ type: object
+ properties:
+ podcast_guid:
+ type: string
+ format: guid
+ sync_id:
+ type: string
+ format: guid
+ episode_guid:
+ type: string
+ title:
+ type: string
+ publish_date:
+ type: string
+ format: date-time
+ enclosure_url:
+ type: string
+ format: url
+ episode_url:
+ type: string
+ format: url
+ playback_position:
+ value:
+ type: number
+ format: integer
+ timestamp:
+ type: string
+ format: date-time
+ played_status:
+ value:
+ type: boolean
+ timestamp:
+ type: string
+ format: date-time
+ new_status:
+ value:
+ type: boolean
+ timestamp:
+ type: string
+ format: date-time
+ download_status:
+ value:
+ type: boolean
+ timestamp:
+ type: string
+ format: date-time
+ favorite_status:
+ value:
+ type: boolean
+ timestamp:
+ type: string
+ format: date-time
+ example:
+ podcast_guid: 31740ac6-e39d-49cd-9179-634bcecf4143
+ sync_id: cff3ea32-4215-4f98-bc23-5358d1f35b55
+ episode_guid: https://example.com/podcast/episode-5-the-history-of-RSS
+ title: The history of RSS
+ publish_date: 2022-04-24T17:53:21.573Z
+ enclosure_url: https://example.com/podcast/episode-5-the-history-of-RSS.mp3
+ episode_url: https://example.com/podcast/episode-5-the-history-of-RSS
+ playback_position:
+ value: 0
+ timestamp: 2024-11-02T13:19
+ played_status:
+ value: true
+ timestamp: 2024-11-02T13:19
+ new_status:
+ value: false
+ timestamp: 2024-10-30T17:31
+ download_status:
+ value: false
+ timestamp: 2024-11-02T13:19
+ favorite_status:
+ value: false
+ timestamp: 2024-11-02T13:19
+ Episodes:
+ required:
+ - total
+ - page
+ - per_page
+ - episodes
+ xml:
+ name: episodes
+ type: object
+ properties:
+ total:
+ type: number
+ page:
+ type: number
+ per_page:
+ type: number
+ next:
+ type: string
+ format: url
+ previous:
+ type: string
+ format: url
+ episodes:
+ type: array
+ items:
+ $ref: '#/components/schemas/Episode'
+ example:
+ total: 2
+ page: 1
+ per_page: 5
+ episodes:
+ - podcast_guid: 31740ac6-e39d-49cd-9179-634bcecf4143
+ sync_id: cff3ea32-4215-4f98-bc23-5358d1f35b55
+ episode_guid: https://example.com/podcast/episode-5-the-history-of-RSS
+ title: The history of RSS
+ publish_date: 2022-04-24T17:53:21.573Z
+ enclosure_url: https://example.com/podcast/episode-5-the-history-of-RSS.mp3
+ episode_url: https://example.com/podcast/episode-5-the-history-of-RSS
+ playback_position:
+ value: 0
+ timestamp: 2024-11-02T13:19
+ played_status:
+ value: true
+ timestamp: 2024-11-02T13:19
+ new_status:
+ value: false
+ timestamp: 2024-10-30T17:31
+ download_status:
+ value: false
+ timestamp: 2024-11-02T13:19
+ favorite_status:
+ value: false
+ timestamp: 2024-11-02T13:19
+ - podcast_guid: 9d6786c9-ed48-470d-acbe-e593547f4b5b
+ sync_id: 5773f457-e71b-417d-8ea8-f07c38a03a3e
+ episode_guid: 01999e25-08cd-4f29-a61e-6ca459b40d27
+ title: Walk with the weatherman
+ publish_date: 2022-04-27T19:35:20.000Z
+ enclosure_url: https://op3.dev/e/https://podcasts.example2.net/audio/@digitalcitizen/49-walk-with-the-weatherman.mp3
+ episode_url: https://podcasts.example2.net/@digitalcitizen/episodes/49-walk-with-the-weatherman
+ playback_position:
+ value: 2100
+ timestamp: 2024-11-01T17:38
+ played_status:
+ value: false
+ timestamp: 2024-04-28T09:20
+ new_status:
+ value: false
+ timestamp: 2024-11-01T17:02
+ download_status:
+ value: true
+ timestamp: 2024-11-01T17:02
+ favorite_status:
+ value: false
+ timestamp: 2024-04-28T09:20
Deletion:
xml:
name: deletion
@@ -599,8 +808,8 @@ components:
implicit:
authorizationUrl: https://test.openpodcastapi.com/oauth/authorize
scopes:
- write:subscriptions: modify subscription information for your account
- read:subscriptions: read your subscription information
+ write:subscriptions: modify subscription information & related episodes for your account
+ read:subscriptions: read your subscription information & related episodes
api_key:
type: apiKey
name: api_key
diff --git a/src/components/BadgeCore.astro b/src/components/BadgeCore.astro
new file mode 100644
index 00000000..a8ae4311
--- /dev/null
+++ b/src/components/BadgeCore.astro
@@ -0,0 +1,5 @@
+---
+import { Badge } from '@astrojs/starlight/components';
+---
+
+
\ No newline at end of file
diff --git a/src/components/BadgeOptional.astro b/src/components/BadgeOptional.astro
new file mode 100644
index 00000000..2cae475c
--- /dev/null
+++ b/src/components/BadgeOptional.astro
@@ -0,0 +1,5 @@
+---
+import { Badge } from '@astrojs/starlight/components';
+---
+
+
\ No newline at end of file
diff --git a/src/content/docs/specs/episodes/add-update.mdx b/src/content/docs/specs/episodes/add-update.mdx
new file mode 100644
index 00000000..d00a57ad
--- /dev/null
+++ b/src/content/docs/specs/episodes/add-update.mdx
@@ -0,0 +1,60 @@
+---
+title: Add or update episodes
+description: Create new or update existing episodes for a user
+sidebar:
+ order: 5
+ badge:
+ text: Core
+ variant: caution
+---
+
+import CoreAction from "@partials/_core-action.mdx";
+
+
+
+```http title="Endpoint"
+POST /v1/episodes
+```
+
+This endpoint enables clients to register new episodes for or change information of episodes relating to the authenticated user. It returns an array of `success` responses for newly added episodes, and an array of `failure` responses for episodes that couldn't be added.
+
+This endpoint only accepts an array of objects, as it serves both for the creation and updating of a single and a batch of episodes.
+
+## Related fields
+
+See the [Overview](index) page for an overview of the fields used for this end-point.
+
+## Request parameters
+
+The client MUST provide a list of objects containing a set of parameters, which depend on the presence of a `sync_id`:
+* if the `sync_id` of the episode is known:
+ * `podcast_guid`
+ * `sync_id` (of the episode)
+* if there is no (known) `sync_id` (yet):
+ * all identifier fields except `sync_id` (`podcast_guid`, `episode_guid`, `title`, `publish_date`, `enclosure_url`, `episode_url`),
+ * `temporary_id` (optional),
+* and always: any data data fields that were changed
+
+## Client side behavior
+
+:::note[Reminder: pull first, post later]
+As discussed in the [Generic principles], clients SHOULD pull first and post later. In he process, clients are expected to do some [deduplication](identification-deduplication).
+:::
+
+Clients SHOULD adopt a **lazy synchronization** approach. This means:
+* not syncing immediately when a new episode is found (e.g. after refreshing a feed)
+* only syncing after an episode has been interacted with by the user or system (e.g. after initiating an automatic download)
+
+Clients MAY leave the `sync_id` empty when registering new episodes, and leave the generation of the `sync_id` to the server.
+
+Clients MAY provide a `temporary_id` with the request (e.g. the local database index) that the server will reflect in its response, so that the client can match the episodes from the server response more easily.
+
+## Server side behavior
+
+Servers SHOULD NOT rely on clients for episode discovery. If the server also has a 'client' component (a user interface to interact with subscriptions and episodes), it SHOULD independently ensure episode discovery (refresh feeds or rely on third party episode APIs). This is due to the lazy synchronization applied by clients - if servers do not fetch episodes themselves then the user might notice certain episodes are missing.
+
+### `sync_id`
+
+Servers MUST respond with a `sync_id` at all times. If none exists, they MUST be generated. Episode `sync_id`s are of type guid and could, for example, be the internal episode ID.
+
+Servers MUST respond with the `temporary_id` for the episode if it was provided by the client. ??What did we say again about asynchronous processing? How long does this temporary ID need to be preserved by the server??
\ No newline at end of file
diff --git a/src/content/docs/specs/episodes/get-all-from-subscription.mdx b/src/content/docs/specs/episodes/get-all-from-subscription.mdx
new file mode 100644
index 00000000..ef7bd817
--- /dev/null
+++ b/src/content/docs/specs/episodes/get-all-from-subscription.mdx
@@ -0,0 +1,22 @@
+---
+title: Get all episodes of a podcast
+description: Get all episodes for a subscription
+sidebar:
+ order: 4
+ badge:
+ text: Core
+ variant: caution
+---
+
+import CoreAction from "@partials/_core-action.mdx";
+
+
+
+```http title="Endpoint"
+GET /v1/subscriptions/{guid}/episodes
+```
+
+TO DO. See [2023-05-30](https://pad.funkwhale.audio/oCfs5kJ6QTu02d_oVHW7DA#) meeting notes.
+
+
+`GET/PUT /subscriptions/{guid}/episodes/{sync_id}` --> Really needed? `GET/PUT /episodes` would suffice probably.
\ No newline at end of file
diff --git a/src/content/docs/specs/episodes/get-all.mdx b/src/content/docs/specs/episodes/get-all.mdx
new file mode 100644
index 00000000..ebf5b0f2
--- /dev/null
+++ b/src/content/docs/specs/episodes/get-all.mdx
@@ -0,0 +1,262 @@
+---
+title: Get all episodes
+description: Get all episodes for a user
+sidebar:
+ order: 3
+ badge:
+ text: Core
+ variant: caution
+---
+
+import CoreAction from "@partials/_core-action.mdx";
+
+
+
+```http title="Endpoint"
+GET /v1/episodes
+```
+
+This endpoint enables clients to return all episode information relating to the authenticated user. It returns pagination information and an array of `episodes`.
+
+While supported, this endpoint is expected to be used rarely in practice. More often, episodes are retrieved per queue (TBD) or per subscription (TBD).
+
+## Response fields
+
+### Metadata
+
+| Field | Type | Required? | Description |
+| ---------- | ------ | --------- | ------------------------------------------------ |
+| `total` | Number | Yes | The total number of objects returned by the call |
+| `page` | Number | Yes | The number of the page returned in the call |
+| `per_page` | Number | Yes | The number of results returned per page |
+| `next` | String | No | The URL for the next page of results |
+| `previous` | String | No | The URL for the previous page of results |
+
+### Episode fields
+
+| Group | Field | Type | Required? | Description |
+| ---------- | ----------------------------------- | --------------- | --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Identifier | `podcast_guid` | String \ | Yes | The globally unique ID of the parent podcast |
+| Identifier | `sync_id` | String \ | Yes | The synchronisation ID of the episode, globally unique at the server and its clients |
+| Identifier | `episode_guid` | String \ | Yes | The globally unique ID of the episode, as present in the RSS feed ([`guid` tag](https://www.rssboard.org/rss-specification#ltguidgtSubelementOfLtitemgt)) |
+| Identifier | `title` | String | Yes | The title of the episode, as present in the RSS feed (`title` tag) |
+| Identifier | `publish_date` | Datetime | Yes | The date of publishing of the episode, as present in the RSS feed ([`pubDate` tag](https://www.rssboard.org/rss-specification#ltpubdategtSubelementOfLtitemgt)). Presented in [ISO 8601 format] |
+| Identifier | `enclosure_url` | String | Yes | The media file of the episode, as present in the RSS feed ([`enclosure` tag](https://www.rssboard.org/rss-specification#ltenclosuregtSubelementOfLtitemgt)) |
+| Identifier | `episode_url` | String | Yes | The (webpage) URL of the episode, as present in the RSS feed (`link`tag) |
+| Data | `playback_position` | Integer | ??YES/NO??| The most recent playback position in seconds |
+| Data | `played_status` | Boolean | No | Whether the episode has been (marked as such) |
+| Data | `new_status` | Boolean | No | Whether the user (manually) interacted with the episode.
_Example:_ In AntennaPod this is used to indicate whether an episode is in the Inbox |
+| Data | `download_status` | Boolean | No | Whether the episode is downloaded on the client. For further details, see below. |
+| Data | `favorite_status` | Boolean | No | Whether the episode has been favorited by the user |
+
+The server can ignore the other identifier fields, if it found an episode based on the `podcast_guid` and the `sync_id`.
+
+:::note[Why all idenifiers are required]
+
+Assume client A has refreshed a feed locally, and client B hasn't done so yet. In order for client B to do episode matching with what it receives from the server, it would need to have more data than just the `sync_id`. In this scenario, how does the server know whether client B has already refreshed the feed and is aware of the episode or not (i.e. whether it can rely on `sync_id` alone or needs to send more information)? The server cannot know, thus a 'conversation' between server and client would be needed:
+* `S` "here's a new episode with podcast_guid x and sync_id y"
+* `C` "I don't recognise this one - tell me more!"
+* `S` "Ok, here's all information you can use for episode matching: …"
+
+To avoid such conversation, and as the sending of all matching data involves only a few bytes, all identifiers are sent always.
+
+:::
+
+Remember that each of the data fields MUST have both a 'value' and a 'timestamp'.
+
+:::note[Discussion details]
+See meeting notes from [2024-06-19](https://pad.funkwhale.audio/s/I3F_C2NbQ#GET-episode-information)
+:::
+
+## Parameters
+
+The client MAY add the following parameters to their call:
+
+| Field | Type | In | Required? | Description |
+| ---------- | -------- | ----- | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `since` | DateTime | Query | No | The date from which the server should return objects. The server only returns entries whose `timestamp` data for any of the fields are greater than this parameter. Expected in [ISO 8601 format] |
+| `page` | Number | Query | No | The page of results to be returned by the server. Defaults to `1` if not present |
+| `per_page` | Number | Query | No | The number of results to return in each call. Defaults to `50` if not present |
+
+:::note
+If no `since` parameter is provided, the server MUST return all current subscription information.
+:::
+
+## Server-side behavior
+
+No particular behavior expected.
+
+## Client behavior
+
+The client SHOULD update its local episode data to match the information returned in the response.
+
+## Example request
+
+
+
+
+ ```console
+ $ curl -X 'GET' \
+ '/v1/episodes?since=2024-04-23T18%3A25%3A34.511Z&page=1&per_page=5' \
+ -H 'accept: application/json'
+ ```
+
+
+
+
+ ```console
+ $ curl -X 'GET' \
+ '/v1/episodes?since=2024-04-23T18%3A25%3A34.511Z&page=1&per_page=5' \
+ -H 'accept: application/xml'
+ ```
+
+
+
+
+## Example 200 response
+
+
+
+
+ ```json
+ {
+ "total": 2,
+ "page": 1,
+ "per_page": 5,
+ "episodes": [
+ {
+ "podcast_guid": "31740ac6-e39d-49cd-9179-634bcecf4143",
+ "sync_id": "cff3ea32-4215-4f98-bc23-5358d1f35b55",
+ "episode_guid": "https://example.com/podcast/episode-5-the-history-of-RSS",
+ "title": "The history of RSS",
+ "publish_date": "2022-04-24T17:53:21.573Z",
+ "enclosure_url": "https://example.com/podcast/episode-5-the-history-of-RSS.mp3",
+ "episode_url": "https://example.com/podcast/episode-5-the-history-of-RSS",
+ "playback_position": {
+ "value": 0,
+ "timestamp": "2024-11-02T13:19"
+ },
+ "played_status": {
+ "value": true,
+ "timestamp": "2024-11-02T13:19"
+ },
+ "new_status": {
+ "value": false,
+ "timestamp": "2024-10-30T17:31"
+ },
+ "download_status": {
+ "value": false,
+ "timestamp": "2024-11-02T13:19"
+ },
+ "favorite_status": {
+ "value": false,
+ "timestamp": "2024-11-02T13:19"
+ }
+ },
+ {
+ "podcast_guid": "9d6786c9-ed48-470d-acbe-e593547f4b5b",
+ "sync_id": "5773f457-e71b-417d-8ea8-f07c38a03a3e",
+ "episode_guid": "01999e25-08cd-4f29-a61e-6ca459b40d27",
+ "title": "Walk with the weatherman",
+ "publish_date": "2022-04-27T19:35:20.000Z",
+ "enclosure_url": "https://op3.dev/e/https://podcasts.example2.net/audio/@digitalcitizen/49-walk-with-the-weatherman.mp3",
+ "episode_url": "https://podcasts.example2.net/@digitalcitizen/episodes/49-walk-with-the-weatherman",
+ "playback_position": {
+ "value": 2100,
+ "timestamp": "2024-11-01T17:38"
+ },
+ "played_status": {
+ "value": false,
+ "timestamp": "2024-04-28T09:20"
+ },
+ "new_status": {
+ "value": false,
+ "timestamp": "2024-11-01T17:02"
+ },
+ "download_status": {
+ "value": true,
+ "timestamp": "2024-11-01T17:02"
+ },
+ "favorite_status": {
+ "value": false,
+ "timestamp": "2024-04-28T09:20"
+ }
+ }
+ ]
+ }
+ ```
+
+
+
+
+ ```xml
+
+
+ 2
+ 1
+ 5
+
+ 31740ac6-e39d-49cd-9179-634bcecf4143
+ cff3ea32-4215-4f98-bc23-5358d1f35b55
+ https://example.com/podcast/episode-5-the-history-of-RSS
+ The history of RSS
+ 2022-04-24T17:53:21.573Z
+ https://example.com/podcast/episode-5-the-history-of-RSS.mp3
+ https://example.com/podcast/episode-5-the-history-of-RSS
+
+ 0
+ 2024-11-02T13:19
+
+
+ true
+ 2024-11-02T13:19
+
+
+ false
+ 2024-10-30T17:31
+
+
+ false
+ 2024-11-02T13:19
+
+
+ false
+ 2024-11-02T13:19
+
+
+
+ 9d6786c9-ed48-470d-acbe-e593547f4b5b
+ 5773f457-e71b-417d-8ea8-f07c38a03a3e
+ 01999e25-08cd-4f29-a61e-6ca459b40d27
+ Walk with the weatherman
+ 2022-04-27T19:35:20.000Z
+ https://op3.dev/e/https://podcasts.example2.net/audio/@digitalcitizen/49-walk-with-the-weatherman.mp3
+ https://podcasts.example2.net/@digitalcitizen/episodes/49-walk-with-the-weatherman
+
+ 2100
+ 2024-11-01T17:38
+
+
+ false
+ 2024-04-28T09:20
+
+
+ false
+ 2024-11-01T17:02
+
+
+ true
+ 2024-11-01T17:02
+
+
+ false
+ 2024-04-28T09:20
+
+
+
+ ```
+
+
+
+
+[ISO 8601 format]: https://www.iso.org/iso-8601-date-and-time-format.html
diff --git a/src/content/docs/specs/episodes/identification-deduplication.mdx b/src/content/docs/specs/episodes/identification-deduplication.mdx
new file mode 100644
index 00000000..b9e276b9
--- /dev/null
+++ b/src/content/docs/specs/episodes/identification-deduplication.mdx
@@ -0,0 +1,113 @@
+---
+title: '[WIP] Episode identification & deduplicating'
+description: TBD
+sidebar:
+ label: Idenfication & deduplication
+ order: 2
+---
+
+:::note[Episode matching & deduplication]
+TODO: Describe or reference to information about episode matching and deduplication. See:
+* 2023-05-30 https://pad.funkwhale.audio/oCfs5kJ6QTu02d_oVHW7DA#Deduplication
+* 2024-04-16 https://pad.funkwhale.audio/s/6mWuDexgz#Episode-deduplication
+* 2024-04-24 https://pad.funkwhale.audio/kIRwEOYDRNqTA4np6vbBVg#
+* 2024-05-06 https://pad.funkwhale.audio/2iNOQ2N2RF-ICMQcPWyyYQ# ✅ Comments processed
+:::
+
+Decentralisation is a key aspect of the Open Podcast API specification. This means that clients need to be able to independently retrieve data from RSS feeds, which might cause duplication if episodes aren't matched correctly. In a decentralised model, entities might also not necessarily trust other entities, which could be misbehaving and (accidentally) cause all episodes to be merged into one.
+
+To enable a reliable and decentralised approach without duplicate episodes, three principles are adopted:
+* extensive provision of identifiers for episode matching - see the previous page [Identifier fields](../#identifier-fields)
+* relying on clients to do episode matching - described below
+* `sync_id`s are used to exchange episode data after a first exchange about the episode
+
+## Waterfall episode matching
+
+:::note[Guidelines]
+While each server and client should do as it pleases, based on previous experience the Open Podcast API team recommends applying the following.
+:::
+
+The waterfall process to match episodes is run:
+* at the client side when refreshing a feed
+* at the server side when recieving a 'new episode' instruction from a client
+* at the client side when joins an existing family (the 'pull first, post later' principle)
+
+Clients and servers might also skip deduplication if a new episode is provided by a known and trusted entity (e.g. if server and client are from the same developer and have adopted shared logic).
+
+Matching must happen only within a single feed, not across feeds. This is because the same `episode_guid` could (accidentally) be used between different feeds. There is one **exception**, however: if it concerns a [remote item](https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/tags/remote-item.md) the `episode_guid` is *expected* to be same as in the RSS feed identified in the `feedUrl` or `feedGuid` attributes of the `podcast:remoteItem` element.
+* If a client or server supports the `remoteItem` tag, it is expected to treat the two episodes as a 'duplicate'. (See also note on Tombstoning in the [Overview](..) page.)
+
+### Waterfall order
+The waterfall checks fields in this order:
+1. `episode_guid`
+2. `enclosure_url`
+3. Matching of at least 2 out of 3 relevant fields:
+ 1. `publish_date`
+ 2. `episode_url`
+ 3. `title`
+
+:::tip[What if a feed has the same episode twice with the same `episode_guid`?]
+Imagine, a podcaster goes on holiday, and republishes existing episode. While usually they do record a new intro, it's defendable to release a new item but with the same `episode_guid`.
+
+In this case, as per the waterfall order above, we would recommend marking the episode as duplicate. While released on a different date, the content is largely the same and if the user has already listened to it, they probably wouldn't want to listen to it again. At the same time, in case the episode wasn't listened to yet, a client might decide to display it again as it were a new episode.
+:::
+
+:::note[Why 2 out of 3 in the last step?]
+We recognize that not all servers and clients might have all data at all times. Podverse, for example, cleans its database after a while and drops the `title` data. With the current guideline, they would still be able to reliable deduplciate episodes.
+:::
+
+### Waterfall principles
+In the waterfall process, the following principles apply:
+* Field data must be exact matches in order to proceed through the waterfall.
+* For each step:
+ * If there is no match, considering again all episodes in the next step.
+ * If there are one or multiple exact matches, consider (only) those matched episodes in the next step.
+* When the end of the waterfall is reached and
+ * no match is found, then the episode is considered unique
+ * one or more matches are found, then the episode(s) is (are) considered a duplicate and must be deduplicated (see [deduplication](#deduplication) endpoint)
+
+:::note[Open question: step out of waterfall if guid match?]
+In the [2023-05-30](https://pad.funkwhale.audio/oCfs5kJ6QTu02d_oVHW7DA#:~:text=matching%20proposal%20in%20pseudo-code%20(click%20to%20expand)) meeting notes we have pseudo code that notes that if the GUID is present, we shoudl decide exclusively based on it. Later notes from [2024-05-06](https://pad.funkwhale.audio/2iNOQ2N2RF-ICMQcPWyyYQ#:~:text=episode%E2%80%99%20from%20client.-,guid,-Enclosure%20URL) don't explicitly reject this principle.
+
+:::note[Differing (conflicting) matching principles]
+It is possible that clients have different deduplication methods. How would this work?
+* AntennaPod and Kasts have different methods
+* AntennaPod finds a new episode in the RSS feed, determines it a duplicate and merges the two episodes locally. It doesn't inform the server.
+* Kasts finds the same new episode in the RSS feed, but determines it unique. It informs the server via `POST /v1/episodes`.
+* The server does its own deduplication, as recommended by the spec [K-NL: This is a bit of a duplication of work?] The server might agree with AntennaPod or with Kasts, depending on its own standards.
+ * If the server determines the episode unique: it generates a `sync_id` and sends this back to Kasts. It informs AntennaPod of a new episode (the next time AntennaPod asks 'What's new since x'), which trusts the server about this new episode, and creates an additional entry in its database. Both episodes now exist.
+ * If the server determines the episode a duplicate: it replies with a 'fail' message for the episode, noting that it considers a duplicate of episode `podcast_guid`,`sync_id`. [K-NL: podcast guid is normally superfluous as the episode can only be duplicate within a feed, and even with remote items the `sync_id` should still be unique and usable.]
+:::
+
+:::note[A partial waterfall]
+From the [2024-05-06](https://pad.funkwhale.audio/2iNOQ2N2RF-ICMQcPWyyYQ#) meeting notes:
+We could imaging that, for efficiency purposes, a server only does step 1 of the waterfall (`episode_guid`). Then, at a later time the client can check in more detail and use the deduplication mechanism and -endpoint to deduplicate
+:::
+
+:::note[Open question from meeting notes]
+> [2024-05-06](https://pad.funkwhale.audio/2iNOQ2N2RF-ICMQcPWyyYQ#)
+> Is there a difference between ‘episode matching’ (assigning a sync-ID at client level) and deduplication?
+:::
+
+[K-NL: the Capabilities response/endpoint should probably also inform the client of episode matching capabilities during regular processing, in addition to the declaring of a deduplciation endpoint.]
+
+## Deduplication endpoint
+
+TO BE DESCRIBED - documentation should go to dedicated endpoint page.
+
+### Deduplication and data storage
+
+Deduplication MUST be a per-user action. [K-NL: Why does it? [The notes don't explain which scenarios would require this.](https://pad.funkwhale.audio/kIRwEOYDRNqTA4np6vbBVg#Question-about-how-server-stores-episode-data) Probably because of what I noted under `sync_id` on the [Overview](..#identifier-fields) page?]
+
+Database architecture of servers is of course up to the implementers, but an important consideration. Single-user ('small web') servers might keep a simple structure with one row per episode, while big instances would probably leverage multiple tables to store episode data efficiently.
+
+## Sync scenarios
+Given the decentralised nature there are certain sync scenarios which could lead to the need to match and deduplicate episodes. The above processes normally ensures that the following scenarios are handled graciously.
+
+### 1 episode data gets out of sync and has to be rematched
+A user has two phones, both of which independently pull the RSS feed. Phone A dies right after it refreshed the feed. Phone B pulls the RSS feed as well, informs the server of a new episode, and receives a `sync_id` for the episode. The podcast publisher doesn't follow protocol and gives the episode a new `episode_guid` while fixing a typo in the `title`. Phone B picks up both changes and tells the server to update its data too. Phone A comes back online and syncs with the server, not aware (yet) of the changed `episode_guid` and `title`. It tells the server to create a new a new episode.
+* If the server supports deduplication, it can itentify the new episode and tell Phone A that the episode already exists, providing the updated fields together with the `sync_id`.
+* If the server does not support deduplication, it will create a new episode in the database which is also passed on to Phone B. Phone B recognises that it is a duplicate, and makes a call to the 'deduplication' endpoint. When that's done, the server informs phone A of the new situation.
+
+### 2 new client with existing data joins server
+A user has a phone and a tablet which both refresh feeds locally. The phone is already syncing with a server, but the tablet is not. The user links the tablet with the server for the first time. The server sends all information it has to the tablet, which then does deduplication, and informs the server if it has any more recent information that should be applied.
\ No newline at end of file
diff --git a/src/content/docs/specs/episodes/index.mdx b/src/content/docs/specs/episodes/index.mdx
new file mode 100644
index 00000000..62d8faa7
--- /dev/null
+++ b/src/content/docs/specs/episodes/index.mdx
@@ -0,0 +1,146 @@
+---
+title: Episodes endpoint
+description: An endpoint for syncing episodes between devices.
+prev: "Subscriptions: Deletion status endpoint"
+sidebar:
+ label: Overview
+ order: 1
+---
+
+import CoreEndpoint from "@partials/_core-endpoint.mdx";
+
+
+
+The episodes endpoint allows synchronising user-generated episode metadata. As the RSS feed is the authoritative source of truth, episode metadata such as title are only synchronised in some cases for episode identification and matching.
+
+Clients can query the endpoint by specifying the datetime from which they want to fetch changes to ensure they only fetch information that is relevant to them since their last sync.
+
+## Important data fields
+
+We distinguish two types of data fields: _identifier fields_ (used to identify and match episodes) and _data fields_ (used to synchronise users' interaction with episodes).
+
+:::note[Tombstoning]
+Servers SHOULD hold all previous `guid` and `feed_url` field data with a link to the succeeding data (such that a path of values can be followed) or with a link to the most recent data. This enables the server to handle situations in which clients submit old data. For example:
+
+- A user finds a podcast, whose URL had changed, and adds the old URL in the app. Because the client doesn't have the old URL in its database, it recognizes the podcast as **new** and POSTs the `feed_url` to the `/subscriptions` endpoint. If the user is already subscribed to the podcast (with the current feed URL) this would lead to a duplicate subscription.
+- A user has a device that they didn't use for a very long time. In that time, a podcaster added a GUID in their feed, leading to updated data in this field. When the client connects to the server again to pull all episode changes since the last connection, it retrieves episodes with their current subscription `guid`. The client won't recognize the subscription and fail to update the status of episodes.
+ :::
+
+### Identifier fields
+
+| Field | Type | Nullable? | Description |
+| -------------------------------- | --------------- | --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `podcast_guid` | String \ | No | The globally unique ID of the podcast |
+| `sync_id` | String \ | No | The synchronisation ID of the episode, MUST globally unique across the server and its clients [K-NL: and what about users - do users' episodes share the same `sync_id`s? It would make sense but at the same time user 1 might use a client X which does deduplication in a different way than client Y and so the two users would get different deduplication results, and thus their episodes in the server's database couldn't be merged.] |
+| `temporary_id` | String | Yes | The ID used by the client when sending new episode information to the server, to make episode identification easier when receiving a response |
+| `episode_guid` | String \ | Yes | The globally unique ID of the episode, as present in the RSS feed ([`guid` tag](https://www.rssboard.org/rss-specification#ltguidgtSubelementOfLtitemgt)) |
+| `title` | String | Yes | The title of the episode, as present in the RSS feed (`title` tag) |
+| `publish_date` | Datetime | Yes | The date of publishing of the episode, as present in the RSS feed ([`pubDate` tag](https://www.rssboard.org/rss-specification#ltpubdategtSubelementOfLtitemgt)). Presented in [ISO 8601 format] |
+| `enclosure_url` | String | Yes | The media file of the episode, as present in the RSS feed ([`enclosure` tag](https://www.rssboard.org/rss-specification#ltenclosuregtSubelementOfLtitemgt)) |
+| `episode_url` | String | Yes | The (webpage) URL of the episode, as present in the RSS feed (`link`tag) |
+
+### Data fields
+
+| Field | Type | Nullable? | Description |
+| ----------------------------------- | -------------- | --------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `playback_position` | Integer | Yes/No? | The most recent playback position |
+| `played_status` | Boolean | No | Whether the episode has ben (marked as) |
+| `new_status` | Boolean | Yes | Whether the user (manually) interacted with the episode.
_Example:_ In AntennaPod this is used to indicate whether an episode is in the Inbox |
+| `download_status` | Boolean | Yes | Whether the episode is downloaded on the client. For further details, see below.
+| `favorite_status` | Boolean | Yes | Whether the episode has been favorited by the user
+
+:::note[Potential future fields]
+- Bookmarks: timed bookmarks of this episode with optinally a description text. Related to AntennaPod feature request [#1946](https://github.com/AntennaPod/AntennaPod/issues/1946).
+- Is ignored: whether the episode has been marked as not interesting. Related to AntennaPod feature request [#5237](https://github.com/AntennaPod/AntennaPod/issues/5237).
+- Tags: labels applied to this episode. Related to Kasts feature request // Can we add a URL as reference?
+- Queues: the queues this episode is in, although it might be covered through a dedicated `queues` andpoint.
+:::
+
+## Implementation details
+### Deduplication
+
+When fetching a feed, several scenarios could lead to duplicated episdes if not matched correctly. To ensure that in these cases episode must be matched and deduplicated to ensure their data is still synced. For details on this topic, please see [Episode matching & deduplication](matching-deduplication).
+
+### Download status
+
+The `download_status` is a _declaration of intent_, not an indication of the current status. If a user downloads an episode on client A, this client passes on this value to the server and thereby to other clients. Client B can then:
+* download immediately
+* download later (e.g. as soon as a WiFi connection is availalbe)
+
+It is up to the implementers whether this applies both to automatic and manual downloads, or only to manual downloads.
+
+While an optional field, if `download_status` is supported both by the server and the client, the client is expected to respect this field value. If the client may not download due to space limitations or won't download at all, then it should not declare support for this field.
+
+:::note[Discussion details]
+See meeting notes from [2024-02-27](https://pad.funkwhale.audio/s/88C5eXrRq)
+:::
+
+## Timestamping changes to resolve field conflicts
+
+To prevent unresolvable conflicts at field level, apps are expected to record timestamps. These MUST be the _time at which a field value was changed in the client_ (by the user or system). More recent values take presedence. The time of sending by the client or processing by the client should _not_ be sent.
+
+:::note[Example]
+Scenario:
+1. User plays first quarter of an episode on client A, which is offline.
+2. User skips first quarter and starts playback until halfway through the episode on client B, which is offline.
+3. Client B comes online and synchronises changes.
+4. Client A comes online and synchronises changes.
+
+In this case, the changes made on client B should take presedence, as they were made later in time than the changes on client A, even though client A most recently synchronised its data.
+:::
+
+To enable this, all _data fields_ are nested objects with a `value` and a `timestamp` field:
+
+
+
+
+ ```json {8, 11} collapse={2-4}
+ {
+ "played_status": {
+ "value": true,
+ "timestamp": "2024-06-19T15:46"
+ }
+ }
+ ```
+
+
+
+
+ ```xml {8, 11} collapse={3-5}
+
+
+ true
+ 2024-06-19T15:46
+
+ ```
+
+
+
+
+Timestamps are recorded in [ISO 8601 format]. When a new episode is created, timestamps are set to current time.
+
+:::note[Discussion details]
+See meeting notes from [2024-02-27](https://pad.funkwhale.audio/s/6mWuDexgz#Data-timestamps)
+
+**Important open discussion point:**
+
+Sending back & forth timestamps is not needed if client always first pulls before push, and assuming that the client stores the timestamps of these changes locally. Maybe we should note this as a requirement, rather than submitting the timestamps.
+Kasts, for example, keeps a log of changes which is wiped on each sync, and always has the timestamp of latest sync.
+
+Does this still work, though, with the scenario laid out above, where clients don't sync in order of changes being applied (but in order coming online)?
+
+:::
+
+
+:::note[Tombstoning]
+[K-NL: This is copied from the subscriptions endpoint, and should probably be moved to a 'general principles' page that applies to all endpoints. 'Pull first, post later' should go there as well.]
+
+[K-NL: We should probably also use Tombstoning for [remote item](https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/tags/remote-item.md) episodes. While they can be merged at server side, if a client doesn't support 'remote items' (and just have the same episode twice in the database - would they, or would this episode tag just be skipped?) then they must still have a way to POST and GET data about that duplicate episode. For this purpose, episode tombstones should probably also keep their own `sync_id`s, so that clients can still use it as it were a normal (and not a tombstoned) episode.]
+
+Servers SHOULD hold all previous `guid` and `feed_url` field data with a link to the succeeding data (such that a path of values can be followed) or with a link to the most recent data. This enables the server to handle situations in which clients submit old data. For example:
+
+- A user finds a podcast, whose URL had changed, and adds the old URL in the app. Because the client doesn't have the old URL in its database, it recognizes the podcast as **new** and POSTs the `feed_url` to the `/subscriptions` endpoint. If the user is already subscribed to the podcast (with the current feed URL) this would lead to a duplicate subscription.
+- A user has a device that they didn't use for a very long time. In that time, a podcaster added a GUID in their feed, leading to updated data in this field. When the client connects to the server again to pull all episode changes since the last connection, it retrieves episodes with their current subscription `guid`. The client won't recognize the subscription and fail to update the status of episodes.
+ :::
+
+[ISO 8601 format]: https://www.iso.org/iso-8601-date-and-time-format.html
\ No newline at end of file
diff --git a/src/content/docs/specs/index.mdx b/src/content/docs/specs/index.mdx
index 4582eb28..75dac039 100644
--- a/src/content/docs/specs/index.mdx
+++ b/src/content/docs/specs/index.mdx
@@ -34,4 +34,9 @@ Which features a server supports MUST be exposed through a Capabilities endpoint
description="Specs relating to managing subscriptions"
href="/specs/subscriptions"
/>
-
+
+
\ No newline at end of file
diff --git a/src/content/docs/specs/subscriptions/add-new.mdx b/src/content/docs/specs/subscriptions/add-new.mdx
index 422e8920..50765a90 100644
--- a/src/content/docs/specs/subscriptions/add-new.mdx
+++ b/src/content/docs/specs/subscriptions/add-new.mdx
@@ -3,6 +3,9 @@ title: Add a new subscription
description: Add a new subscription
sidebar:
order: 2
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/content/docs/specs/subscriptions/delete.mdx b/src/content/docs/specs/subscriptions/delete.mdx
index 4939f5d4..26325c82 100644
--- a/src/content/docs/specs/subscriptions/delete.mdx
+++ b/src/content/docs/specs/subscriptions/delete.mdx
@@ -3,6 +3,9 @@ title: Delete a subscription
description: Fetch the status of a deletion process
sidebar:
order: 6
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/content/docs/specs/subscriptions/get-all.mdx b/src/content/docs/specs/subscriptions/get-all.mdx
index 69fc7957..eea370e3 100644
--- a/src/content/docs/specs/subscriptions/get-all.mdx
+++ b/src/content/docs/specs/subscriptions/get-all.mdx
@@ -3,6 +3,9 @@ title: Get all subscriptions
description: Get all subscriptions for a user
sidebar:
order: 3
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/content/docs/specs/subscriptions/get-single.mdx b/src/content/docs/specs/subscriptions/get-single.mdx
index 601623ee..6cce7f41 100644
--- a/src/content/docs/specs/subscriptions/get-single.mdx
+++ b/src/content/docs/specs/subscriptions/get-single.mdx
@@ -3,6 +3,9 @@ title: Get a single subscription
description: Get a single subscription for a user
sidebar:
order: 4
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/content/docs/specs/subscriptions/index.mdx b/src/content/docs/specs/subscriptions/index.mdx
index a6975902..09cb9535 100644
--- a/src/content/docs/specs/subscriptions/index.mdx
+++ b/src/content/docs/specs/subscriptions/index.mdx
@@ -20,7 +20,7 @@ Subscriptions represent the feeds a user has subscribed to. A subscription objec
| Field | Type | Nullable? | Description |
| ---------------------- | -------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `feed_url` | String | No | The URL of the podcast RSS feed |
-| `guid` | String\ | No | The globally unique ID of the podcast |
+| `guid` | String\ | No | The globally unique ID of the podcast ?? Should this be renamed to `podcast_guid` to be in line with the episode specification? |
| `is_subscribed` | Boolean | No | Whether the user is subscribed to the podcast |
| `subscription_changed` | Datetime | No | The date on which the `is_subscribed` field was last updated. Presented in [ISO 8601 format] |
| `guid_changed` | Datetime | No | The date on which the podcast's `guid` or `new_guid` was last updated. Presented in [ISO 8601 format] |
diff --git a/src/content/docs/specs/subscriptions/status.mdx b/src/content/docs/specs/subscriptions/status.mdx
index 91b59944..2a73340e 100644
--- a/src/content/docs/specs/subscriptions/status.mdx
+++ b/src/content/docs/specs/subscriptions/status.mdx
@@ -3,6 +3,9 @@ title: Deletion status endpoint
description: Fetch the status of a deletion process
sidebar:
order: 7
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/content/docs/specs/subscriptions/update.mdx b/src/content/docs/specs/subscriptions/update.mdx
index d7bb440a..370cf176 100644
--- a/src/content/docs/specs/subscriptions/update.mdx
+++ b/src/content/docs/specs/subscriptions/update.mdx
@@ -3,6 +3,9 @@ title: Update a subscription
description: Update details about a subscription
sidebar:
order: 5
+ badge:
+ text: Core
+ variant: caution
---
import CoreAction from "@partials/_core-action.mdx";
diff --git a/src/styles/custom.css b/src/styles/custom.css
new file mode 100644
index 00000000..47e5cc3e
--- /dev/null
+++ b/src/styles/custom.css
@@ -0,0 +1,3 @@
+tbody code {
+ overflow-wrap: normal;
+}
\ No newline at end of file