diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 40293964..c8a8a4f7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,6 @@ jobs:
   lint:
     name: lint
     runs-on: ubuntu-latest
-    
 
     steps:
       - uses: actions/checkout@v4
@@ -30,6 +29,7 @@ jobs:
 
       - name: Run lints
         run: ./scripts/lint
+
   test:
     name: test
     runs-on: ubuntu-latest
@@ -50,4 +50,3 @@ jobs:
 
       - name: Run tests
         run: ./scripts/test
-
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3d2ac0bd..10f30916 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0"
+  ".": "0.2.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index f83abfd1..53f3a7c9 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 33
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/contextual-ai%2Fsunrise-4ed32c3243ce7a772e55bb1ba204736fc3fb1d712d8ca0eb91bac0c7ac626938.yml
+configured_endpoints: 35
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/contextual-ai%2Fsunrise-d79ccb778953ad5c2ae4b99115429c8b3f68b3b23d9b6d90b1b40393f11a4383.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 75759bbd..446ed721 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,39 @@
 # Changelog
 
+## 0.2.0 (2025-02-08)
+
+Full Changelog: [v0.1.0...v0.2.0](https://github.com/ContextualAI/contextual-client-python/compare/v0.1.0...v0.2.0)
+
+### Features
+
+* **api:** update via SDK Studio ([#31](https://github.com/ContextualAI/contextual-client-python/issues/31)) ([c9de385](https://github.com/ContextualAI/contextual-client-python/commit/c9de38561c8663d1e00daa381fcb3183501993cf))
+* **api:** update via SDK Studio ([#32](https://github.com/ContextualAI/contextual-client-python/issues/32)) ([c166d77](https://github.com/ContextualAI/contextual-client-python/commit/c166d77d241e104a80ce0cddeaf2b5cfe7c59669))
+* **api:** update via SDK Studio ([#39](https://github.com/ContextualAI/contextual-client-python/issues/39)) ([9f8c0a6](https://github.com/ContextualAI/contextual-client-python/commit/9f8c0a6d4203953f195cfe5d38a69f8870bc0a9e))
+* **client:** send `X-Stainless-Read-Timeout` header ([#35](https://github.com/ContextualAI/contextual-client-python/issues/35)) ([2ddba9d](https://github.com/ContextualAI/contextual-client-python/commit/2ddba9dc9d8cb0b562c6dd7f8a3a21e2c82295bc))
+
+
+### Bug Fixes
+
+* **tests:** make test_get_platform less flaky ([#26](https://github.com/ContextualAI/contextual-client-python/issues/26)) ([3bc8a69](https://github.com/ContextualAI/contextual-client-python/commit/3bc8a69c6e9255dc1e3247fd1954e5deb5e1c155))
+
+
+### Chores
+
+* **internal:** avoid pytest-asyncio deprecation warning ([#27](https://github.com/ContextualAI/contextual-client-python/issues/27)) ([e6f70cd](https://github.com/ContextualAI/contextual-client-python/commit/e6f70cdff84defcb3b9d77e3aa0c66e9d17774d5))
+* **internal:** bummp ruff dependency ([#34](https://github.com/ContextualAI/contextual-client-python/issues/34)) ([f3a23c2](https://github.com/ContextualAI/contextual-client-python/commit/f3a23c21168a5ef99626e50782ae902c780b4059))
+* **internal:** change default timeout to an int ([#33](https://github.com/ContextualAI/contextual-client-python/issues/33)) ([280fc1f](https://github.com/ContextualAI/contextual-client-python/commit/280fc1fcce2a011bda2b895b39b85db682cc0c8c))
+* **internal:** codegen related update ([#23](https://github.com/ContextualAI/contextual-client-python/issues/23)) ([d1f86c3](https://github.com/ContextualAI/contextual-client-python/commit/d1f86c3bc54440925725dd9c535082fa7d29d100))
+* **internal:** codegen related update ([#30](https://github.com/ContextualAI/contextual-client-python/issues/30)) ([0cbc82e](https://github.com/ContextualAI/contextual-client-python/commit/0cbc82e361567e9f0c44f9b5519d404fcba91fef))
+* **internal:** fix type traversing dictionary params ([#36](https://github.com/ContextualAI/contextual-client-python/issues/36)) ([04a1eab](https://github.com/ContextualAI/contextual-client-python/commit/04a1eaba9f246089baa2c26dac29b22e9f63f9dc))
+* **internal:** minor formatting changes ([#29](https://github.com/ContextualAI/contextual-client-python/issues/29)) ([9d063fb](https://github.com/ContextualAI/contextual-client-python/commit/9d063fbf86e64803fcc684305a67dae3a31775a0))
+* **internal:** minor style changes ([#28](https://github.com/ContextualAI/contextual-client-python/issues/28)) ([1cbda0a](https://github.com/ContextualAI/contextual-client-python/commit/1cbda0a834e06cbb4afdbc922e4e9f894cb21d40))
+* **internal:** minor type handling changes ([#37](https://github.com/ContextualAI/contextual-client-python/issues/37)) ([dd9a8e8](https://github.com/ContextualAI/contextual-client-python/commit/dd9a8e898c56fc55b9e61de09419a66ad398b7b3))
+
+
+### Documentation
+
+* **raw responses:** fix duplicate `the` ([#25](https://github.com/ContextualAI/contextual-client-python/issues/25)) ([5342fdf](https://github.com/ContextualAI/contextual-client-python/commit/5342fdfbecdd99f14d0033736ebf91700bc74f0e))
+
 ## 0.1.0 (2025-01-15)
 
 Full Changelog: [v0.1.0-alpha.2...v0.1.0](https://github.com/ContextualAI/contextual-client-python/compare/v0.1.0-alpha.2...v0.1.0)
diff --git a/api.md b/api.md
index 6e65bff3..582dfe73 100644
--- a/api.md
+++ b/api.md
@@ -186,3 +186,27 @@ from contextual.types import LMUnitCreateResponse
 Methods:
 
 - <code title="post /lmunit">client.lmunit.<a href="./src/contextual/resources/lmunit.py">create</a>(\*\*<a href="src/contextual/types/lmunit_create_params.py">params</a>) -> <a href="./src/contextual/types/lmunit_create_response.py">LMUnitCreateResponse</a></code>
+
+# Rerank
+
+Types:
+
+```python
+from contextual.types import RerankCreateResponse
+```
+
+Methods:
+
+- <code title="post /rerank">client.rerank.<a href="./src/contextual/resources/rerank.py">create</a>(\*\*<a href="src/contextual/types/rerank_create_params.py">params</a>) -> <a href="./src/contextual/types/rerank_create_response.py">RerankCreateResponse</a></code>
+
+# Generate
+
+Types:
+
+```python
+from contextual.types import GenerateCreateResponse
+```
+
+Methods:
+
+- <code title="post /generate">client.generate.<a href="./src/contextual/resources/generate.py">create</a>(\*\*<a href="src/contextual/types/generate_create_params.py">params</a>) -> <a href="./src/contextual/types/generate_create_response.py">GenerateCreateResponse</a></code>
diff --git a/mypy.ini b/mypy.ini
index 1a1c394d..c6b994be 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -41,7 +41,7 @@ cache_fine_grained = True
 # ```
 # Changing this codegen to make mypy happy would increase complexity
 # and would not be worth it.
-disable_error_code = func-returns-value
+disable_error_code = func-returns-value,overload-cannot-match
 
 # https://github.com/python/mypy/issues/12162
 [mypy.overrides]
diff --git a/pyproject.toml b/pyproject.toml
index 5ee3f351..25e201e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "contextual-client"
-version = "0.1.0"
+version = "0.2.0"
 description = "The official Python library for the Contextual AI API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -129,6 +129,7 @@ testpaths = ["tests"]
 addopts = "--tb=short"
 xfail_strict = true
 asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
 filterwarnings = [
   "error"
 ]
@@ -176,7 +177,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TCH004",
+  "TC004",
   # import rules
   "TID251",
 ]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 5c179819..83d02e00 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -49,7 +49,7 @@ markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-mypy==1.13.0
+mypy==1.14.1
 mypy-extensions==1.0.0
     # via mypy
 nest-asyncio==1.6.0
@@ -69,7 +69,7 @@ pydantic-core==2.27.1
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.390
+pyright==1.1.392.post0
 pytest==8.3.3
     # via pytest-asyncio
 pytest-asyncio==0.24.0
@@ -79,7 +79,7 @@ pytz==2023.3.post1
     # via dirty-equals
 respx==0.22.0
 rich==13.7.1
-ruff==0.6.9
+ruff==0.9.4
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
diff --git a/scripts/bootstrap b/scripts/bootstrap
index 8c5c60eb..e84fe62c 100755
--- a/scripts/bootstrap
+++ b/scripts/bootstrap
@@ -4,7 +4,7 @@ set -e
 
 cd "$(dirname "$0")/.."
 
-if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+if ! command -v rye >/dev/null 2>&1 && [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
   brew bundle check >/dev/null 2>&1 || {
     echo "==> Installing Homebrew dependencies…"
     brew bundle
diff --git a/scripts/lint b/scripts/lint
index c22d2f81..7f5dd6b7 100755
--- a/scripts/lint
+++ b/scripts/lint
@@ -9,4 +9,3 @@ rye run lint
 
 echo "==> Making sure it imports"
 rye run python -c 'import contextual'
-
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94f..0cf2bd2f 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
         with _collect_error(match):
             code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/src/contextual/_base_client.py b/src/contextual/_base_client.py
index 3e53d5d2..b1d92d6b 100644
--- a/src/contextual/_base_client.py
+++ b/src/contextual/_base_client.py
@@ -418,10 +418,17 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
-        # Don't set the retry count header if it was already set or removed by the caller. We check
+        # Don't set these headers if they were already set or removed by the caller. We check
         # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
-        if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
             headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
 
         return headers
 
diff --git a/src/contextual/_client.py b/src/contextual/_client.py
index 0aa4bcb5..d255c5d0 100644
--- a/src/contextual/_client.py
+++ b/src/contextual/_client.py
@@ -24,7 +24,7 @@
     get_async_library,
 )
 from ._version import __version__
-from .resources import lmunit
+from .resources import lmunit, rerank, generate
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import APIStatusError, ContextualAIError
 from ._base_client import (
@@ -51,6 +51,8 @@ class ContextualAI(SyncAPIClient):
     datastores: datastores.DatastoresResource
     agents: agents.AgentsResource
     lmunit: lmunit.LMUnitResource
+    rerank: rerank.RerankResource
+    generate: generate.GenerateResource
     with_raw_response: ContextualAIWithRawResponse
     with_streaming_response: ContextualAIWithStreamedResponse
 
@@ -117,6 +119,8 @@ def __init__(
         self.datastores = datastores.DatastoresResource(self)
         self.agents = agents.AgentsResource(self)
         self.lmunit = lmunit.LMUnitResource(self)
+        self.rerank = rerank.RerankResource(self)
+        self.generate = generate.GenerateResource(self)
         self.with_raw_response = ContextualAIWithRawResponse(self)
         self.with_streaming_response = ContextualAIWithStreamedResponse(self)
 
@@ -232,6 +236,8 @@ class AsyncContextualAI(AsyncAPIClient):
     datastores: datastores.AsyncDatastoresResource
     agents: agents.AsyncAgentsResource
     lmunit: lmunit.AsyncLMUnitResource
+    rerank: rerank.AsyncRerankResource
+    generate: generate.AsyncGenerateResource
     with_raw_response: AsyncContextualAIWithRawResponse
     with_streaming_response: AsyncContextualAIWithStreamedResponse
 
@@ -298,6 +304,8 @@ def __init__(
         self.datastores = datastores.AsyncDatastoresResource(self)
         self.agents = agents.AsyncAgentsResource(self)
         self.lmunit = lmunit.AsyncLMUnitResource(self)
+        self.rerank = rerank.AsyncRerankResource(self)
+        self.generate = generate.AsyncGenerateResource(self)
         self.with_raw_response = AsyncContextualAIWithRawResponse(self)
         self.with_streaming_response = AsyncContextualAIWithStreamedResponse(self)
 
@@ -414,6 +422,8 @@ def __init__(self, client: ContextualAI) -> None:
         self.datastores = datastores.DatastoresResourceWithRawResponse(client.datastores)
         self.agents = agents.AgentsResourceWithRawResponse(client.agents)
         self.lmunit = lmunit.LMUnitResourceWithRawResponse(client.lmunit)
+        self.rerank = rerank.RerankResourceWithRawResponse(client.rerank)
+        self.generate = generate.GenerateResourceWithRawResponse(client.generate)
 
 
 class AsyncContextualAIWithRawResponse:
@@ -421,6 +431,8 @@ def __init__(self, client: AsyncContextualAI) -> None:
         self.datastores = datastores.AsyncDatastoresResourceWithRawResponse(client.datastores)
         self.agents = agents.AsyncAgentsResourceWithRawResponse(client.agents)
         self.lmunit = lmunit.AsyncLMUnitResourceWithRawResponse(client.lmunit)
+        self.rerank = rerank.AsyncRerankResourceWithRawResponse(client.rerank)
+        self.generate = generate.AsyncGenerateResourceWithRawResponse(client.generate)
 
 
 class ContextualAIWithStreamedResponse:
@@ -428,6 +440,8 @@ def __init__(self, client: ContextualAI) -> None:
         self.datastores = datastores.DatastoresResourceWithStreamingResponse(client.datastores)
         self.agents = agents.AgentsResourceWithStreamingResponse(client.agents)
         self.lmunit = lmunit.LMUnitResourceWithStreamingResponse(client.lmunit)
+        self.rerank = rerank.RerankResourceWithStreamingResponse(client.rerank)
+        self.generate = generate.GenerateResourceWithStreamingResponse(client.generate)
 
 
 class AsyncContextualAIWithStreamedResponse:
@@ -435,6 +449,8 @@ def __init__(self, client: AsyncContextualAI) -> None:
         self.datastores = datastores.AsyncDatastoresResourceWithStreamingResponse(client.datastores)
         self.agents = agents.AsyncAgentsResourceWithStreamingResponse(client.agents)
         self.lmunit = lmunit.AsyncLMUnitResourceWithStreamingResponse(client.lmunit)
+        self.rerank = rerank.AsyncRerankResourceWithStreamingResponse(client.rerank)
+        self.generate = generate.AsyncGenerateResourceWithStreamingResponse(client.generate)
 
 
 Client = ContextualAI
diff --git a/src/contextual/_constants.py b/src/contextual/_constants.py
index a2ac3b6f..6ddf2c71 100644
--- a/src/contextual/_constants.py
+++ b/src/contextual/_constants.py
@@ -6,7 +6,7 @@
 OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 1 minute
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=60.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
 
diff --git a/src/contextual/_models.py b/src/contextual/_models.py
index 9a918aab..c4401ff8 100644
--- a/src/contextual/_models.py
+++ b/src/contextual/_models.py
@@ -172,7 +172,7 @@ def to_json(
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
 
     # Override the 'construct' method in a way that supports recursive parsing without validation.
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
@@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object:
 
     If the given value does not match the expected type then it is returned as-is.
     """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
     if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
         type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
@@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 
diff --git a/src/contextual/_response.py b/src/contextual/_response.py
index d91238c9..51fc249d 100644
--- a/src/contextual/_response.py
+++ b/src/contextual/_response.py
@@ -136,6 +136,8 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to and is_annotated_type(cast_to):
             cast_to = extract_type_arg(cast_to, 0)
 
+        origin = get_origin(cast_to) or cast_to
+
         if self._is_sse_stream:
             if to:
                 if not is_stream_class_type(to):
@@ -195,8 +197,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == bool:
             return cast(R, response.text.lower() == "true")
 
-        origin = get_origin(cast_to) or cast_to
-
         if origin == APIResponse:
             raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
 
@@ -210,7 +210,13 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
             raise TypeError(
                 "Pydantic models must subclass our base model type, e.g. `from contextual import BaseModel`"
             )
diff --git a/src/contextual/_utils/_transform.py b/src/contextual/_utils/_transform.py
index a6b62cad..18afd9d8 100644
--- a/src/contextual/_utils/_transform.py
+++ b/src/contextual/_utils/_transform.py
@@ -25,7 +25,7 @@
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -164,9 +164,14 @@ def _transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
@@ -307,9 +312,14 @@ async def _async_transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return await _async_transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
diff --git a/src/contextual/_version.py b/src/contextual/_version.py
index 780de0f9..6fa6e77d 100644
--- a/src/contextual/_version.py
+++ b/src/contextual/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "contextual"
-__version__ = "0.1.0"  # x-release-please-version
+__version__ = "0.2.0"  # x-release-please-version
diff --git a/src/contextual/resources/__init__.py b/src/contextual/resources/__init__.py
index 5d288207..a46db872 100644
--- a/src/contextual/resources/__init__.py
+++ b/src/contextual/resources/__init__.py
@@ -16,6 +16,22 @@
     LMUnitResourceWithStreamingResponse,
     AsyncLMUnitResourceWithStreamingResponse,
 )
+from .rerank import (
+    RerankResource,
+    AsyncRerankResource,
+    RerankResourceWithRawResponse,
+    AsyncRerankResourceWithRawResponse,
+    RerankResourceWithStreamingResponse,
+    AsyncRerankResourceWithStreamingResponse,
+)
+from .generate import (
+    GenerateResource,
+    AsyncGenerateResource,
+    GenerateResourceWithRawResponse,
+    AsyncGenerateResourceWithRawResponse,
+    GenerateResourceWithStreamingResponse,
+    AsyncGenerateResourceWithStreamingResponse,
+)
 from .datastores import (
     DatastoresResource,
     AsyncDatastoresResource,
@@ -44,4 +60,16 @@
     "AsyncLMUnitResourceWithRawResponse",
     "LMUnitResourceWithStreamingResponse",
     "AsyncLMUnitResourceWithStreamingResponse",
+    "RerankResource",
+    "AsyncRerankResource",
+    "RerankResourceWithRawResponse",
+    "AsyncRerankResourceWithRawResponse",
+    "RerankResourceWithStreamingResponse",
+    "AsyncRerankResourceWithStreamingResponse",
+    "GenerateResource",
+    "AsyncGenerateResource",
+    "GenerateResourceWithRawResponse",
+    "AsyncGenerateResourceWithRawResponse",
+    "GenerateResourceWithStreamingResponse",
+    "AsyncGenerateResourceWithStreamingResponse",
 ]
diff --git a/src/contextual/resources/agents/agents.py b/src/contextual/resources/agents/agents.py
index e662b69d..8696a9e3 100644
--- a/src/contextual/resources/agents/agents.py
+++ b/src/contextual/resources/agents/agents.py
@@ -81,7 +81,7 @@ def tune(self) -> TuneResource:
     @cached_property
     def with_raw_response(self) -> AgentsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -136,7 +136,7 @@ def create(
           suggested_queries: These queries will show up as suggestions in the Contextual UI when users load
               the agent. We recommend including common queries that users will ask, as well as
               complex queries so users understand the types of complex queries the system can
-              handle.
+              handle. The max length of all the suggested queries is 1000.
 
           system_prompt: Instructions that your agent references when generating responses. Note that we
               do not guarantee that the system will follow these instructions exactly.
@@ -199,7 +199,7 @@ def update(
           suggested_queries: These queries will show up as suggestions in the Contextual UI when users load
               the agent. We recommend including common queries that users will ask, as well as
               complex queries so users understand the types of complex queries the system can
-              handle.
+              handle. The max length of all the suggested queries is 1000.
 
           system_prompt: Instructions that your agent references when generating responses. Note that we
               do not guarantee that the system will follow these instructions exactly.
@@ -375,7 +375,7 @@ def tune(self) -> AsyncTuneResource:
     @cached_property
     def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -430,7 +430,7 @@ async def create(
           suggested_queries: These queries will show up as suggestions in the Contextual UI when users load
               the agent. We recommend including common queries that users will ask, as well as
               complex queries so users understand the types of complex queries the system can
-              handle.
+              handle. The max length of all the suggested queries is 1000.
 
           system_prompt: Instructions that your agent references when generating responses. Note that we
               do not guarantee that the system will follow these instructions exactly.
@@ -493,7 +493,7 @@ async def update(
           suggested_queries: These queries will show up as suggestions in the Contextual UI when users load
               the agent. We recommend including common queries that users will ask, as well as
               complex queries so users understand the types of complex queries the system can
-              handle.
+              handle. The max length of all the suggested queries is 1000.
 
           system_prompt: Instructions that your agent references when generating responses. Note that we
               do not guarantee that the system will follow these instructions exactly.
diff --git a/src/contextual/resources/agents/datasets/datasets.py b/src/contextual/resources/agents/datasets/datasets.py
index ba9a5bc5..c07ea914 100644
--- a/src/contextual/resources/agents/datasets/datasets.py
+++ b/src/contextual/resources/agents/datasets/datasets.py
@@ -24,7 +24,7 @@ def evaluate(self) -> EvaluateResource:
     @cached_property
     def with_raw_response(self) -> DatasetsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -49,7 +49,7 @@ def evaluate(self) -> AsyncEvaluateResource:
     @cached_property
     def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/agents/datasets/evaluate.py b/src/contextual/resources/agents/datasets/evaluate.py
index fdf4c21b..7eda6600 100644
--- a/src/contextual/resources/agents/datasets/evaluate.py
+++ b/src/contextual/resources/agents/datasets/evaluate.py
@@ -49,7 +49,7 @@ class EvaluateResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EvaluateResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -81,20 +81,20 @@ def create(
     ) -> CreateDatasetResponse:
         """
         Create a new evaluation `Dataset` for the specified `Agent` using the provided
-        JSONL file. A `Dataset` is a versioned collection of samples conforming to a
-        particular schema, and can be used to store `Evaluation` test-sets and retrieve
-        `Evaluation` results.
+        JSONL or CSV file. A `Dataset` is a versioned collection of samples conforming
+        to a particular schema, and can be used to store `Evaluation` test-sets and
+        retrieve `Evaluation` results.
 
         Each `Dataset` is versioned and validated against its schema during creation and
         subsequent updates. The provided `Dataset` file must conform to the schema
         defined for the `dataset_type`.
 
-        File schema for `dataset_type` `evaluation_set` is a JSONL or CSV file where
-        each line is one JSON object with the following required keys:
+        File schema for `dataset_type` `evaluation_set` is a CSV file or a JSONL file
+        where each line is one JSON object. The following keys are required:
 
-        - `prompt` (required, `string`): Prompt or question
+        - `prompt` (`string`): Prompt or question
 
-        - `reference` (required, `string`): Required reference or ground truth response
+        - `reference` (`string`): Reference or ground truth response
 
         Args:
           agent_id: Agent ID to associate with the evaluation dataset
@@ -103,7 +103,7 @@ def create(
 
           dataset_type: Type of evaluation dataset which determines its schema and validation rules.
 
-          file: JSONL file containing the evaluation dataset
+          file: JSONL or CSV file containing the evaluation dataset
 
           extra_headers: Send extra headers
 
@@ -228,12 +228,12 @@ def update(
         Create a new version of the dataset by appending content to the `Dataset` and
         validating against its schema.
 
-        File schema for `dataset_type` `evaluation_set` is a JSONL file where each line
-        is one JSON object with the following required keys:
+        File schema for `dataset_type` `evaluation_set` is a CSV file or a JSONL file
+        where each line is one JSON object. The following keys are required:
 
         - `prompt` (`string`): Prompt or question
 
-        - `reference` (`string`): Required reference or ground truth response
+        - `reference` (`string`): Reference or ground truth response
 
         Args:
           agent_id: Agent ID associated with the evaluation dataset
@@ -243,7 +243,7 @@ def update(
           dataset_type: Type of evaluation dataset which determines its schema and validation rules.
               Must match the `dataset_type` used at dataset creation time.
 
-          file: JSONL file containing the entries to append to the evaluation dataset
+          file: JSONL or CSV file containing the entries to append to the evaluation dataset
 
           extra_headers: Send extra headers
 
@@ -427,7 +427,7 @@ class AsyncEvaluateResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEvaluateResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -459,20 +459,20 @@ async def create(
     ) -> CreateDatasetResponse:
         """
         Create a new evaluation `Dataset` for the specified `Agent` using the provided
-        JSONL file. A `Dataset` is a versioned collection of samples conforming to a
-        particular schema, and can be used to store `Evaluation` test-sets and retrieve
-        `Evaluation` results.
+        JSONL or CSV file. A `Dataset` is a versioned collection of samples conforming
+        to a particular schema, and can be used to store `Evaluation` test-sets and
+        retrieve `Evaluation` results.
 
         Each `Dataset` is versioned and validated against its schema during creation and
         subsequent updates. The provided `Dataset` file must conform to the schema
         defined for the `dataset_type`.
 
-        File schema for `dataset_type` `evaluation_set` is a JSONL or CSV file where
-        each line is one JSON object with the following required keys:
+        File schema for `dataset_type` `evaluation_set` is a CSV file or a JSONL file
+        where each line is one JSON object. The following keys are required:
 
-        - `prompt` (required, `string`): Prompt or question
+        - `prompt` (`string`): Prompt or question
 
-        - `reference` (required, `string`): Required reference or ground truth response
+        - `reference` (`string`): Reference or ground truth response
 
         Args:
           agent_id: Agent ID to associate with the evaluation dataset
@@ -481,7 +481,7 @@ async def create(
 
           dataset_type: Type of evaluation dataset which determines its schema and validation rules.
 
-          file: JSONL file containing the evaluation dataset
+          file: JSONL or CSV file containing the evaluation dataset
 
           extra_headers: Send extra headers
 
@@ -606,12 +606,12 @@ async def update(
         Create a new version of the dataset by appending content to the `Dataset` and
         validating against its schema.
 
-        File schema for `dataset_type` `evaluation_set` is a JSONL file where each line
-        is one JSON object with the following required keys:
+        File schema for `dataset_type` `evaluation_set` is a CSV file or a JSONL file
+        where each line is one JSON object. The following keys are required:
 
         - `prompt` (`string`): Prompt or question
 
-        - `reference` (`string`): Required reference or ground truth response
+        - `reference` (`string`): Reference or ground truth response
 
         Args:
           agent_id: Agent ID associated with the evaluation dataset
@@ -621,7 +621,7 @@ async def update(
           dataset_type: Type of evaluation dataset which determines its schema and validation rules.
               Must match the `dataset_type` used at dataset creation time.
 
-          file: JSONL file containing the entries to append to the evaluation dataset
+          file: JSONL or CSV file containing the entries to append to the evaluation dataset
 
           extra_headers: Send extra headers
 
diff --git a/src/contextual/resources/agents/evaluate/evaluate.py b/src/contextual/resources/agents/evaluate/evaluate.py
index 42d22329..11c00402 100644
--- a/src/contextual/resources/agents/evaluate/evaluate.py
+++ b/src/contextual/resources/agents/evaluate/evaluate.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Mapping, Optional, cast
+from typing import List, Mapping, cast
 from typing_extensions import Literal
 
 import httpx
@@ -45,7 +45,7 @@ def jobs(self) -> JobsResource:
     @cached_property
     def with_raw_response(self) -> EvaluateResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -68,7 +68,7 @@ def create(
         metrics: List[Literal["equivalence", "groundedness"]],
         evalset_file: FileTypes | NotGiven = NOT_GIVEN,
         evalset_name: str | NotGiven = NOT_GIVEN,
-        llm_model_id: Optional[str] | NotGiven = NOT_GIVEN,
+        llm_model_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -152,7 +152,7 @@ def jobs(self) -> AsyncJobsResource:
     @cached_property
     def with_raw_response(self) -> AsyncEvaluateResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -175,7 +175,7 @@ async def create(
         metrics: List[Literal["equivalence", "groundedness"]],
         evalset_file: FileTypes | NotGiven = NOT_GIVEN,
         evalset_name: str | NotGiven = NOT_GIVEN,
-        llm_model_id: Optional[str] | NotGiven = NOT_GIVEN,
+        llm_model_id: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
diff --git a/src/contextual/resources/agents/evaluate/jobs.py b/src/contextual/resources/agents/evaluate/jobs.py
index f26341ec..80983b2b 100644
--- a/src/contextual/resources/agents/evaluate/jobs.py
+++ b/src/contextual/resources/agents/evaluate/jobs.py
@@ -24,7 +24,7 @@ class JobsResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> JobsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -168,7 +168,7 @@ class AsyncJobsResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/agents/query.py b/src/contextual/resources/agents/query.py
index 1b6e1a41..e7ec483c 100644
--- a/src/contextual/resources/agents/query.py
+++ b/src/contextual/resources/agents/query.py
@@ -39,7 +39,7 @@ class QueryResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> QueryResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -60,6 +60,7 @@ def create(
         agent_id: str,
         *,
         messages: Iterable[query_create_params.Message],
+        include_retrieval_content_text: bool | NotGiven = NOT_GIVEN,
         retrievals_only: bool | NotGiven = NOT_GIVEN,
         conversation_id: str | NotGiven = NOT_GIVEN,
         llm_model_id: str | NotGiven = NOT_GIVEN,
@@ -82,7 +83,14 @@ def create(
               multiple objects to provide conversation history. Last message in the list must
               be a `user`-sent message (i.e. `role` equals `"user"`).
 
-          retrievals_only: Set to `true` to skip generation of the response.
+          include_retrieval_content_text: Ignored if `retrievals_only` is True. Set to `true` to include the text of the
+              retrieved contents in the response. If `false`, only metadata about the
+              retrieved contents will be included, not content text. Content text and other
+              metadata can also be fetched separately using the
+              `/agents/{agent_id}/query/{message_id}/retrieval/info` endpoint.
+
+          retrievals_only: Set to `true` to fetch retrieval content and metadata, and then skip generation
+              of the response.
 
           conversation_id: An optional alternative to providing message history in the `messages` field. If
               provided, all messages in the `messages` list prior to the latest user-sent
@@ -119,7 +127,13 @@ def create(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"retrievals_only": retrievals_only}, query_create_params.QueryCreateParams),
+                query=maybe_transform(
+                    {
+                        "include_retrieval_content_text": include_retrieval_content_text,
+                        "retrievals_only": retrievals_only,
+                    },
+                    query_create_params.QueryCreateParams,
+                ),
             ),
             cast_to=QueryResponse,
         )
@@ -307,7 +321,7 @@ class AsyncQueryResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncQueryResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -328,6 +342,7 @@ async def create(
         agent_id: str,
         *,
         messages: Iterable[query_create_params.Message],
+        include_retrieval_content_text: bool | NotGiven = NOT_GIVEN,
         retrievals_only: bool | NotGiven = NOT_GIVEN,
         conversation_id: str | NotGiven = NOT_GIVEN,
         llm_model_id: str | NotGiven = NOT_GIVEN,
@@ -350,7 +365,14 @@ async def create(
               multiple objects to provide conversation history. Last message in the list must
               be a `user`-sent message (i.e. `role` equals `"user"`).
 
-          retrievals_only: Set to `true` to skip generation of the response.
+          include_retrieval_content_text: Ignored if `retrievals_only` is True. Set to `true` to include the text of the
+              retrieved contents in the response. If `false`, only metadata about the
+              retrieved contents will be included, not content text. Content text and other
+              metadata can also be fetched separately using the
+              `/agents/{agent_id}/query/{message_id}/retrieval/info` endpoint.
+
+          retrievals_only: Set to `true` to fetch retrieval content and metadata, and then skip generation
+              of the response.
 
           conversation_id: An optional alternative to providing message history in the `messages` field. If
               provided, all messages in the `messages` list prior to the latest user-sent
@@ -388,7 +410,11 @@ async def create(
                 extra_body=extra_body,
                 timeout=timeout,
                 query=await async_maybe_transform(
-                    {"retrievals_only": retrievals_only}, query_create_params.QueryCreateParams
+                    {
+                        "include_retrieval_content_text": include_retrieval_content_text,
+                        "retrievals_only": retrievals_only,
+                    },
+                    query_create_params.QueryCreateParams,
                 ),
             ),
             cast_to=QueryResponse,
diff --git a/src/contextual/resources/agents/tune/jobs.py b/src/contextual/resources/agents/tune/jobs.py
index edf2b718..b8f324fa 100644
--- a/src/contextual/resources/agents/tune/jobs.py
+++ b/src/contextual/resources/agents/tune/jobs.py
@@ -24,7 +24,7 @@ class JobsResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> JobsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -168,7 +168,7 @@ class AsyncJobsResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/agents/tune/models.py b/src/contextual/resources/agents/tune/models.py
index c911ee51..c7ad6284 100644
--- a/src/contextual/resources/agents/tune/models.py
+++ b/src/contextual/resources/agents/tune/models.py
@@ -23,7 +23,7 @@ class ModelsResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -79,7 +79,7 @@ class AsyncModelsResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/agents/tune/tune.py b/src/contextual/resources/agents/tune/tune.py
index a71dd32e..83352b9e 100644
--- a/src/contextual/resources/agents/tune/tune.py
+++ b/src/contextual/resources/agents/tune/tune.py
@@ -56,7 +56,7 @@ def models(self) -> ModelsResource:
     @cached_property
     def with_raw_response(self) -> TuneResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -111,21 +111,21 @@ def create(
               JSON object represents a single training example. The four required fields are
               `guideline`, `prompt`, `reference`, and `knowledge`.
 
-              - `knowledge` (`list[str]`): Knowledge or retrievals used to generate the
-                reference response, as a list of string text chunks
+              - `knowledge` (`list[str]`): Retrieved knowledge used to generate the reference
+                answer. `knowledge` is a list of retrieved text chunks.
 
-              - `reference` field should be the model's response to the prompt.
+              - `reference` (`str`): The gold-standard answer to the prompt.
 
-              - `guideline` (`str): Guidelines or criteria for model output
+              - `guideline` (`str`): Guidelines for model output.
 
-              - `prompt` (required, `string`): Prompt or question model should respond to.
+              - `prompt` (`str`): Question for the model to respond to.
 
               Example:
 
               ```json
               [
                 {
-                  "guideline": "The response should be accurate.",
+                  "guideline": "The answer should be accurate.",
                   "prompt": "What was last quarter's revenue?",
                   "reference": "According to recent reports, the Q3 revenue was $1.2 million, a 0.1 million increase from Q2.",
                   "knowledge": [
@@ -189,7 +189,7 @@ def models(self) -> AsyncModelsResource:
     @cached_property
     def with_raw_response(self) -> AsyncTuneResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -244,21 +244,21 @@ async def create(
               JSON object represents a single training example. The four required fields are
               `guideline`, `prompt`, `reference`, and `knowledge`.
 
-              - `knowledge` (`list[str]`): Knowledge or retrievals used to generate the
-                reference response, as a list of string text chunks
+              - `knowledge` (`list[str]`): Retrieved knowledge used to generate the reference
+                answer. `knowledge` is a list of retrieved text chunks.
 
-              - `reference` field should be the model's response to the prompt.
+              - `reference` (`str`): The gold-standard answer to the prompt.
 
-              - `guideline` (`str): Guidelines or criteria for model output
+              - `guideline` (`str`): Guidelines for model output.
 
-              - `prompt` (required, `string`): Prompt or question model should respond to.
+              - `prompt` (`str`): Question for the model to respond to.
 
               Example:
 
               ```json
               [
                 {
-                  "guideline": "The response should be accurate.",
+                  "guideline": "The answer should be accurate.",
                   "prompt": "What was last quarter's revenue?",
                   "reference": "According to recent reports, the Q3 revenue was $1.2 million, a 0.1 million increase from Q2.",
                   "knowledge": [
diff --git a/src/contextual/resources/datastores/datastores.py b/src/contextual/resources/datastores/datastores.py
index 666101b6..a0791411 100644
--- a/src/contextual/resources/datastores/datastores.py
+++ b/src/contextual/resources/datastores/datastores.py
@@ -43,7 +43,7 @@ def documents(self) -> DocumentsResource:
     @cached_property
     def with_raw_response(self) -> DatastoresResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -245,7 +245,7 @@ def documents(self) -> AsyncDocumentsResource:
     @cached_property
     def with_raw_response(self) -> AsyncDatastoresResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/datastores/documents.py b/src/contextual/resources/datastores/documents.py
index 90750a00..67b7ba6e 100644
--- a/src/contextual/resources/datastores/documents.py
+++ b/src/contextual/resources/datastores/documents.py
@@ -36,7 +36,7 @@ class DocumentsResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> DocumentsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -268,7 +268,7 @@ class AsyncDocumentsResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncDocumentsResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/generate.py b/src/contextual/resources/generate.py
new file mode 100644
index 00000000..9a1beb6e
--- /dev/null
+++ b/src/contextual/resources/generate.py
@@ -0,0 +1,218 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+
+import httpx
+
+from ..types import generate_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.generate_create_response import GenerateCreateResponse
+
+__all__ = ["GenerateResource", "AsyncGenerateResource"]
+
+
+class GenerateResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> GenerateResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
+        """
+        return GenerateResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> GenerateResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#with_streaming_response
+        """
+        return GenerateResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        knowledge: List[str],
+        messages: Iterable[generate_create_params.Message],
+        model: str,
+        system_prompt: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GenerateCreateResponse:
+        """
+        Generate a response using Contextual's Grounded Language Model (GLM), an LLM
+        engineered specifically to prioritize faithfulness to in-context retrievals over
+        parametric knowledge to reduce hallucinations in Retrieval-Augmented Generation.
+
+        The total request cannot exceed 6,100 tokens.
+
+        Args:
+          knowledge: The knowledge sources the model can use when generating a response.
+
+          messages: List of messages in the conversation so far. The last message must be from the
+              user.
+
+          model: The version of the Contextual's GLM to use. Currently, we just have "v1".
+
+          system_prompt: Instructions that the model follows when generating responses. Note that we do
+              not guarantee that the model follows these instructions exactly.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/generate",
+            body=maybe_transform(
+                {
+                    "knowledge": knowledge,
+                    "messages": messages,
+                    "model": model,
+                    "system_prompt": system_prompt,
+                },
+                generate_create_params.GenerateCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GenerateCreateResponse,
+        )
+
+
+class AsyncGenerateResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncGenerateResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncGenerateResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncGenerateResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#with_streaming_response
+        """
+        return AsyncGenerateResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        knowledge: List[str],
+        messages: Iterable[generate_create_params.Message],
+        model: str,
+        system_prompt: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> GenerateCreateResponse:
+        """
+        Generate a response using Contextual's Grounded Language Model (GLM), an LLM
+        engineered specifically to prioritize faithfulness to in-context retrievals over
+        parametric knowledge to reduce hallucinations in Retrieval-Augmented Generation.
+
+        The total request cannot exceed 6,100 tokens.
+
+        Args:
+          knowledge: The knowledge sources the model can use when generating a response.
+
+          messages: List of messages in the conversation so far. The last message must be from the
+              user.
+
+          model: The version of the Contextual's GLM to use. Currently, we just have "v1".
+
+          system_prompt: Instructions that the model follows when generating responses. Note that we do
+              not guarantee that the model follows these instructions exactly.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/generate",
+            body=await async_maybe_transform(
+                {
+                    "knowledge": knowledge,
+                    "messages": messages,
+                    "model": model,
+                    "system_prompt": system_prompt,
+                },
+                generate_create_params.GenerateCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=GenerateCreateResponse,
+        )
+
+
+class GenerateResourceWithRawResponse:
+    def __init__(self, generate: GenerateResource) -> None:
+        self._generate = generate
+
+        self.create = to_raw_response_wrapper(
+            generate.create,
+        )
+
+
+class AsyncGenerateResourceWithRawResponse:
+    def __init__(self, generate: AsyncGenerateResource) -> None:
+        self._generate = generate
+
+        self.create = async_to_raw_response_wrapper(
+            generate.create,
+        )
+
+
+class GenerateResourceWithStreamingResponse:
+    def __init__(self, generate: GenerateResource) -> None:
+        self._generate = generate
+
+        self.create = to_streamed_response_wrapper(
+            generate.create,
+        )
+
+
+class AsyncGenerateResourceWithStreamingResponse:
+    def __init__(self, generate: AsyncGenerateResource) -> None:
+        self._generate = generate
+
+        self.create = async_to_streamed_response_wrapper(
+            generate.create,
+        )
diff --git a/src/contextual/resources/lmunit.py b/src/contextual/resources/lmunit.py
index d0b9fb9e..23caecf8 100644
--- a/src/contextual/resources/lmunit.py
+++ b/src/contextual/resources/lmunit.py
@@ -28,7 +28,7 @@ class LMUnitResource(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> LMUnitResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
@@ -105,7 +105,7 @@ class AsyncLMUnitResource(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncLMUnitResourceWithRawResponse:
         """
-        This property can be used as a prefix for any HTTP method call to return the
+        This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
         For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
diff --git a/src/contextual/resources/rerank.py b/src/contextual/resources/rerank.py
new file mode 100644
index 00000000..79b738c6
--- /dev/null
+++ b/src/contextual/resources/rerank.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+
+import httpx
+
+from ..types import rerank_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.rerank_create_response import RerankCreateResponse
+
+__all__ = ["RerankResource", "AsyncRerankResource"]
+
+
+class RerankResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> RerankResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
+        """
+        return RerankResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RerankResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#with_streaming_response
+        """
+        return RerankResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        documents: List[str],
+        model: str,
+        query: str,
+        top_n: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RerankCreateResponse:
+        """
+        Rank a list of documents according to their relevance to a query.
+
+        The total request cannot exceed 400,000 tokens. The combined length of any
+        document and the query must not exceed 4,000 tokens.
+
+        Args:
+          documents: The texts to be reranked according to their relevance to the query
+
+          model: The version of the reranker to use. Currently, we just have "v1".
+
+          query: The string against which documents will be ranked for relevance
+
+          top_n: The number of top-ranked results to return
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/rerank",
+            body=maybe_transform(
+                {
+                    "documents": documents,
+                    "model": model,
+                    "query": query,
+                    "top_n": top_n,
+                },
+                rerank_create_params.RerankCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RerankCreateResponse,
+        )
+
+
+class AsyncRerankResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncRerankResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRerankResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRerankResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/ContextualAI/contextual-client-python#with_streaming_response
+        """
+        return AsyncRerankResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        documents: List[str],
+        model: str,
+        query: str,
+        top_n: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RerankCreateResponse:
+        """
+        Rank a list of documents according to their relevance to a query.
+
+        The total request cannot exceed 400,000 tokens. The combined length of any
+        document and the query must not exceed 4,000 tokens.
+
+        Args:
+          documents: The texts to be reranked according to their relevance to the query
+
+          model: The version of the reranker to use. Currently, we just have "v1".
+
+          query: The string against which documents will be ranked for relevance
+
+          top_n: The number of top-ranked results to return
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/rerank",
+            body=await async_maybe_transform(
+                {
+                    "documents": documents,
+                    "model": model,
+                    "query": query,
+                    "top_n": top_n,
+                },
+                rerank_create_params.RerankCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RerankCreateResponse,
+        )
+
+
+class RerankResourceWithRawResponse:
+    def __init__(self, rerank: RerankResource) -> None:
+        self._rerank = rerank
+
+        self.create = to_raw_response_wrapper(
+            rerank.create,
+        )
+
+
+class AsyncRerankResourceWithRawResponse:
+    def __init__(self, rerank: AsyncRerankResource) -> None:
+        self._rerank = rerank
+
+        self.create = async_to_raw_response_wrapper(
+            rerank.create,
+        )
+
+
+class RerankResourceWithStreamingResponse:
+    def __init__(self, rerank: RerankResource) -> None:
+        self._rerank = rerank
+
+        self.create = to_streamed_response_wrapper(
+            rerank.create,
+        )
+
+
+class AsyncRerankResourceWithStreamingResponse:
+    def __init__(self, rerank: AsyncRerankResource) -> None:
+        self._rerank = rerank
+
+        self.create = async_to_streamed_response_wrapper(
+            rerank.create,
+        )
diff --git a/src/contextual/types/__init__.py b/src/contextual/types/__init__.py
index 85eba32b..e539092d 100644
--- a/src/contextual/types/__init__.py
+++ b/src/contextual/types/__init__.py
@@ -12,8 +12,12 @@
 from .create_agent_output import CreateAgentOutput as CreateAgentOutput
 from .list_agents_response import ListAgentsResponse as ListAgentsResponse
 from .lmunit_create_params import LMUnitCreateParams as LMUnitCreateParams
+from .rerank_create_params import RerankCreateParams as RerankCreateParams
 from .datastore_list_params import DatastoreListParams as DatastoreListParams
+from .generate_create_params import GenerateCreateParams as GenerateCreateParams
 from .lmunit_create_response import LMUnitCreateResponse as LMUnitCreateResponse
+from .rerank_create_response import RerankCreateResponse as RerankCreateResponse
 from .datastore_create_params import DatastoreCreateParams as DatastoreCreateParams
+from .generate_create_response import GenerateCreateResponse as GenerateCreateResponse
 from .list_datastores_response import ListDatastoresResponse as ListDatastoresResponse
 from .create_datastore_response import CreateDatastoreResponse as CreateDatastoreResponse
diff --git a/src/contextual/types/agent_create_params.py b/src/contextual/types/agent_create_params.py
index e77ca0cd..2e26b459 100644
--- a/src/contextual/types/agent_create_params.py
+++ b/src/contextual/types/agent_create_params.py
@@ -26,7 +26,7 @@ class AgentCreateParams(TypedDict, total=False):
     These queries will show up as suggestions in the Contextual UI when users load
     the agent. We recommend including common queries that users will ask, as well as
     complex queries so users understand the types of complex queries the system can
-    handle.
+    handle. The max length of all the suggested queries is 1000.
     """
 
     system_prompt: str
diff --git a/src/contextual/types/agent_metadata.py b/src/contextual/types/agent_metadata.py
index 5f25666c..5a5dee93 100644
--- a/src/contextual/types/agent_metadata.py
+++ b/src/contextual/types/agent_metadata.py
@@ -30,7 +30,7 @@ class AgentMetadata(BaseModel):
     These queries will show up as suggestions in the Contextual UI when users load
     the agent. We recommend including common queries that users will ask, as well as
     complex queries so users understand the types of complex queries the system can
-    handle.
+    handle. The max length of all the suggested queries is 1000.
     """
 
     system_prompt: Optional[str] = None
diff --git a/src/contextual/types/agent_update_params.py b/src/contextual/types/agent_update_params.py
index a0afd705..58fd9f04 100644
--- a/src/contextual/types/agent_update_params.py
+++ b/src/contextual/types/agent_update_params.py
@@ -25,7 +25,7 @@ class AgentUpdateParams(TypedDict, total=False):
     These queries will show up as suggestions in the Contextual UI when users load
     the agent. We recommend including common queries that users will ask, as well as
     complex queries so users understand the types of complex queries the system can
-    handle.
+    handle. The max length of all the suggested queries is 1000.
     """
 
     system_prompt: str
diff --git a/src/contextual/types/agents/datasets/evaluate_create_params.py b/src/contextual/types/agents/datasets/evaluate_create_params.py
index 645f8c88..04d8daba 100644
--- a/src/contextual/types/agents/datasets/evaluate_create_params.py
+++ b/src/contextual/types/agents/datasets/evaluate_create_params.py
@@ -17,4 +17,4 @@ class EvaluateCreateParams(TypedDict, total=False):
     """Type of evaluation dataset which determines its schema and validation rules."""
 
     file: Required[FileTypes]
-    """JSONL file containing the evaluation dataset"""
+    """JSONL or CSV file containing the evaluation dataset"""
diff --git a/src/contextual/types/agents/datasets/evaluate_update_params.py b/src/contextual/types/agents/datasets/evaluate_update_params.py
index 9bab396f..7dedca3e 100644
--- a/src/contextual/types/agents/datasets/evaluate_update_params.py
+++ b/src/contextual/types/agents/datasets/evaluate_update_params.py
@@ -20,4 +20,4 @@ class EvaluateUpdateParams(TypedDict, total=False):
     """
 
     file: Required[FileTypes]
-    """JSONL file containing the entries to append to the evaluation dataset"""
+    """JSONL or CSV file containing the entries to append to the evaluation dataset"""
diff --git a/src/contextual/types/agents/evaluate/list_evaluation_jobs_response.py b/src/contextual/types/agents/evaluate/list_evaluation_jobs_response.py
index 757706e1..5e22201f 100644
--- a/src/contextual/types/agents/evaluate/list_evaluation_jobs_response.py
+++ b/src/contextual/types/agents/evaluate/list_evaluation_jobs_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
 from datetime import datetime
 from typing_extensions import Literal
 
@@ -22,6 +22,24 @@ class EvaluationRound(BaseModel):
     user_email: str
     """Email of the user who launched the evaluation round"""
 
+    finished_at: Optional[datetime] = None
+    """Timestamp indicating when the evaluation round finished processing"""
+
+    num_failed_predictions: Optional[int] = None
+    """Number of predictions that failed during the evaluation round"""
+
+    num_predictions: Optional[int] = None
+    """Total number of predictions made during the evaluation round"""
+
+    num_successful_predictions: Optional[int] = None
+    """Number of predictions that were successful during the evaluation round"""
+
+    processing_started_at: Optional[datetime] = None
+    """Timestamp indicating when the evaluation round started processing"""
+
+    summary_results: Optional[object] = None
+    """Score of the evaluation round"""
+
 
 class ListEvaluationJobsResponse(BaseModel):
     evaluation_rounds: List[EvaluationRound]
diff --git a/src/contextual/types/agents/evaluate_create_params.py b/src/contextual/types/agents/evaluate_create_params.py
index def12aa3..b84f2b31 100644
--- a/src/contextual/types/agents/evaluate_create_params.py
+++ b/src/contextual/types/agents/evaluate_create_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import List
 from typing_extensions import Literal, Required, TypedDict
 
 from ..._types import FileTypes
@@ -28,5 +28,5 @@ class EvaluateCreateParams(TypedDict, total=False):
     provided, but not both.
     """
 
-    llm_model_id: Optional[str]
+    llm_model_id: str
     """ID of the model to evaluate. Uses the default model if not specified."""
diff --git a/src/contextual/types/agents/query_create_params.py b/src/contextual/types/agents/query_create_params.py
index 7c7e7eb6..3cbe5c8b 100644
--- a/src/contextual/types/agents/query_create_params.py
+++ b/src/contextual/types/agents/query_create_params.py
@@ -16,8 +16,20 @@ class QueryCreateParams(TypedDict, total=False):
     must be a `user`-sent message (i.e. `role` equals `"user"`).
     """
 
+    include_retrieval_content_text: bool
+    """Ignored if `retrievals_only` is True.
+
+    Set to `true` to include the text of the retrieved contents in the response. If
+    `false`, only metadata about the retrieved contents will be included, not
+    content text. Content text and other metadata can also be fetched separately
+    using the `/agents/{agent_id}/query/{message_id}/retrieval/info` endpoint.
+    """
+
     retrievals_only: bool
-    """Set to `true` to skip generation of the response."""
+    """
+    Set to `true` to fetch retrieval content and metadata, and then skip generation
+    of the response.
+    """
 
     conversation_id: str
     """An optional alternative to providing message history in the `messages` field.
@@ -41,4 +53,4 @@ class Message(TypedDict, total=False):
     """Content of the message"""
 
     role: Required[Literal["user", "system", "assistant"]]
-    """Role of sender"""
+    """Role of the sender"""
diff --git a/src/contextual/types/agents/query_response.py b/src/contextual/types/agents/query_response.py
index 93f41cc9..3444768e 100644
--- a/src/contextual/types/agents/query_response.py
+++ b/src/contextual/types/agents/query_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Optional
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -27,11 +27,11 @@ class RetrievalContent(BaseModel):
     Will be `file` for any docs ingested through ingestion API.
     """
 
-    content: Optional[str] = None
-    """Retrieved content"""
+    content_text: Optional[str] = None
+    """Text of the retrieved content.
 
-    extras: Optional[Dict[str, str]] = None
-    """Reserved for extra metadata"""
+    Included in response to a query if `include_retrieval_content_text` is True
+    """
 
     number: Optional[int] = None
     """Index of the retrieved item in the retrieval_contents list (starting from 1)"""
@@ -59,7 +59,7 @@ class Message(BaseModel):
     """Content of the message"""
 
     role: Literal["user", "system", "assistant"]
-    """Role of sender"""
+    """Role of the sender"""
 
 
 class QueryResponse(BaseModel):
diff --git a/src/contextual/types/agents/retrieval_info_response.py b/src/contextual/types/agents/retrieval_info_response.py
index e923357b..ca9d8f86 100644
--- a/src/contextual/types/agents/retrieval_info_response.py
+++ b/src/contextual/types/agents/retrieval_info_response.py
@@ -11,6 +11,9 @@ class ContentMetadata(BaseModel):
     content_id: str
     """Id of the content."""
 
+    content_text: str
+    """Text of the content."""
+
     height: float
     """Height of the image."""
 
diff --git a/src/contextual/types/agents/tune_create_params.py b/src/contextual/types/agents/tune_create_params.py
index 3124f9b4..7d90470e 100644
--- a/src/contextual/types/agents/tune_create_params.py
+++ b/src/contextual/types/agents/tune_create_params.py
@@ -17,21 +17,21 @@ class TuneCreateParams(TypedDict, total=False):
     JSON object represents a single training example. The four required fields are
     `guideline`, `prompt`, `reference`, and `knowledge`.
 
-    - `knowledge` (`list[str]`): Knowledge or retrievals used to generate the
-      reference response, as a list of string text chunks
+    - `knowledge` (`list[str]`): Retrieved knowledge used to generate the reference
+      answer. `knowledge` is a list of retrieved text chunks.
 
-    - `reference` field should be the model's response to the prompt.
+    - `reference` (`str`): The gold-standard answer to the prompt.
 
-    - `guideline` (`str): Guidelines or criteria for model output
+    - `guideline` (`str`): Guidelines for model output.
 
-    - `prompt` (required, `string`): Prompt or question model should respond to.
+    - `prompt` (`str`): Question for the model to respond to.
 
     Example:
 
     ```json
     [
       {
-        "guideline": "The response should be accurate.",
+        "guideline": "The answer should be accurate.",
         "prompt": "What was last quarter's revenue?",
         "reference": "According to recent reports, the Q3 revenue was $1.2 million, a 0.1 million increase from Q2.",
         "knowledge": [
diff --git a/src/contextual/types/generate_create_params.py b/src/contextual/types/generate_create_params.py
new file mode 100644
index 00000000..ccb6e0d6
--- /dev/null
+++ b/src/contextual/types/generate_create_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["GenerateCreateParams", "Message"]
+
+
+class GenerateCreateParams(TypedDict, total=False):
+    knowledge: Required[List[str]]
+    """The knowledge sources the model can use when generating a response."""
+
+    messages: Required[Iterable[Message]]
+    """List of messages in the conversation so far.
+
+    The last message must be from the user.
+    """
+
+    model: Required[str]
+    """The version of the Contextual's GLM to use. Currently, we just have "v1"."""
+
+    system_prompt: str
+    """Instructions that the model follows when generating responses.
+
+    Note that we do not guarantee that the model follows these instructions exactly.
+    """
+
+
+class Message(TypedDict, total=False):
+    content: Required[str]
+    """Content of the message"""
+
+    role: Required[Literal["user", "system", "assistant"]]
+    """Role of the sender"""
diff --git a/src/contextual/types/generate_create_response.py b/src/contextual/types/generate_create_response.py
new file mode 100644
index 00000000..a786167b
--- /dev/null
+++ b/src/contextual/types/generate_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["GenerateCreateResponse"]
+
+
+class GenerateCreateResponse(BaseModel):
+    response: str
+    """The model's response to the last user message."""
diff --git a/src/contextual/types/rerank_create_params.py b/src/contextual/types/rerank_create_params.py
new file mode 100644
index 00000000..06ace6cf
--- /dev/null
+++ b/src/contextual/types/rerank_create_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RerankCreateParams"]
+
+
+class RerankCreateParams(TypedDict, total=False):
+    documents: Required[List[str]]
+    """The texts to be reranked according to their relevance to the query"""
+
+    model: Required[str]
+    """The version of the reranker to use. Currently, we just have "v1"."""
+
+    query: Required[str]
+    """The string against which documents will be ranked for relevance"""
+
+    top_n: int
+    """The number of top-ranked results to return"""
diff --git a/src/contextual/types/rerank_create_response.py b/src/contextual/types/rerank_create_response.py
new file mode 100644
index 00000000..091d3c7c
--- /dev/null
+++ b/src/contextual/types/rerank_create_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from .._models import BaseModel
+
+__all__ = ["RerankCreateResponse", "Result"]
+
+
+class Result(BaseModel):
+    index: int
+    """Index of the document in the input list, starting with 0"""
+
+    relevance_score: float
+    """
+    Relevance scores assess how likely a document is to have information that is
+    helpful to answer the query. Our model outputs the scores in a wide range, and
+    we normalize scores to a 0-1 scale and truncate the response to 8 decimal
+    places. Our reranker is designed for RAG, so its purpose is to check whether a
+    document has information that is helpful to answer the query. A reranker that is
+    designed for direct Q&A (Question & Answer) would behave differently.
+    """
+
+
+class RerankCreateResponse(BaseModel):
+    results: List[Result]
+    """
+    The ranked list of documents containing the index of the document and the
+    relevance score, sorted by relevance score.
+    """
diff --git a/tests/api_resources/agents/test_query.py b/tests/api_resources/agents/test_query.py
index 2f690c03..2e7da653 100644
--- a/tests/api_resources/agents/test_query.py
+++ b/tests/api_resources/agents/test_query.py
@@ -45,6 +45,7 @@ def test_method_create_with_all_params(self, client: ContextualAI) -> None:
                     "role": "user",
                 }
             ],
+            include_retrieval_content_text=True,
             retrievals_only=True,
             conversation_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             llm_model_id="llm_model_id",
@@ -171,7 +172,7 @@ def test_method_metrics_with_all_params(self, client: ContextualAI) -> None:
             agent_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             created_after=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_before=parse_datetime("2019-12-27T18:11:19.117Z"),
-            limit=0,
+            limit=1000,
             offset=0,
         )
         assert_matches_type(QueryMetricsResponse, query, path=["response"])
@@ -287,6 +288,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncContextual
                     "role": "user",
                 }
             ],
+            include_retrieval_content_text=True,
             retrievals_only=True,
             conversation_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             llm_model_id="llm_model_id",
@@ -413,7 +415,7 @@ async def test_method_metrics_with_all_params(self, async_client: AsyncContextua
             agent_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e",
             created_after=parse_datetime("2019-12-27T18:11:19.117Z"),
             created_before=parse_datetime("2019-12-27T18:11:19.117Z"),
-            limit=0,
+            limit=1000,
             offset=0,
         )
         assert_matches_type(QueryMetricsResponse, query, path=["response"])
diff --git a/tests/api_resources/test_generate.py b/tests/api_resources/test_generate.py
new file mode 100644
index 00000000..7cbf83f2
--- /dev/null
+++ b/tests/api_resources/test_generate.py
@@ -0,0 +1,156 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from contextual import ContextualAI, AsyncContextualAI
+from tests.utils import assert_matches_type
+from contextual.types import GenerateCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestGenerate:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: ContextualAI) -> None:
+        generate = client.generate.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        )
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: ContextualAI) -> None:
+        generate = client.generate.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+            system_prompt="system_prompt",
+        )
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: ContextualAI) -> None:
+        response = client.generate.with_raw_response.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        generate = response.parse()
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: ContextualAI) -> None:
+        with client.generate.with_streaming_response.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            generate = response.parse()
+            assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncGenerate:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncContextualAI) -> None:
+        generate = await async_client.generate.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        )
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncContextualAI) -> None:
+        generate = await async_client.generate.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+            system_prompt="system_prompt",
+        )
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncContextualAI) -> None:
+        response = await async_client.generate.with_raw_response.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        generate = await response.parse()
+        assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncContextualAI) -> None:
+        async with async_client.generate.with_streaming_response.create(
+            knowledge=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "user",
+                }
+            ],
+            model="model",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            generate = await response.parse()
+            assert_matches_type(GenerateCreateResponse, generate, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_rerank.py b/tests/api_resources/test_rerank.py
new file mode 100644
index 00000000..cd4e4f76
--- /dev/null
+++ b/tests/api_resources/test_rerank.py
@@ -0,0 +1,116 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from contextual import ContextualAI, AsyncContextualAI
+from tests.utils import assert_matches_type
+from contextual.types import RerankCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestRerank:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: ContextualAI) -> None:
+        rerank = client.rerank.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        )
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: ContextualAI) -> None:
+        rerank = client.rerank.create(
+            documents=["string"],
+            model="model",
+            query="x",
+            top_n=0,
+        )
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: ContextualAI) -> None:
+        response = client.rerank.with_raw_response.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        rerank = response.parse()
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: ContextualAI) -> None:
+        with client.rerank.with_streaming_response.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            rerank = response.parse()
+            assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncRerank:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncContextualAI) -> None:
+        rerank = await async_client.rerank.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        )
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncContextualAI) -> None:
+        rerank = await async_client.rerank.create(
+            documents=["string"],
+            model="model",
+            query="x",
+            top_n=0,
+        )
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncContextualAI) -> None:
+        response = await async_client.rerank.with_raw_response.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        rerank = await response.parse()
+        assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncContextualAI) -> None:
+        async with async_client.rerank.with_streaming_response.create(
+            documents=["string"],
+            model="model",
+            query="x",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            rerank = await response.parse()
+            assert_matches_type(RerankCreateResponse, rerank, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/test_client.py b/tests/test_client.py
index b1e2ab37..2c685292 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -6,6 +6,7 @@
 import os
 import sys
 import json
+import time
 import asyncio
 import inspect
 import subprocess
@@ -1635,10 +1636,20 @@ async def test_main() -> None:
             [sys.executable, "-c", test_code],
             text=True,
         ) as process:
-            try:
-                process.wait(2)
-                if process.returncode:
-                    raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
-            except subprocess.TimeoutExpired as e:
-                process.kill()
-                raise AssertionError("calling get_platform using asyncify resulted in a hung process") from e
+            timeout = 10  # seconds
+
+            start_time = time.monotonic()
+            while True:
+                return_code = process.poll()
+                if return_code is not None:
+                    if return_code != 0:
+                        raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
+
+                    # success
+                    break
+
+                if time.monotonic() - start_time > timeout:
+                    process.kill()
+                    raise AssertionError("calling get_platform using asyncify resulted in a hung process")
+
+                time.sleep(0.1)
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 394a4baa..0566b45d 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -2,7 +2,7 @@
 
 import io
 import pathlib
-from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
@@ -388,6 +388,15 @@ def my_iter() -> Iterable[Baz8]:
     }
 
 
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+    class DictItems(TypedDict):
+        foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+    assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
 class TypedDictIterableUnionStr(TypedDict):
     foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]