pydantic_ai.models.wrapper

CompletedStreamedResponse

Bases: StreamedResponse

A StreamedResponse that wraps an already-completed ModelResponse.

Used by durable execution integrations (Temporal, Prefect, DBOS) where the actual stream is consumed within a task/activity and only the final response is returned.

Source code in pydantic_ai_slim/pydantic_ai/models/wrapper.py

class CompletedStreamedResponse(StreamedResponse):
    """A `StreamedResponse` that wraps an already-completed `ModelResponse`.

    Used by durable execution integrations (Temporal, Prefect, DBOS) where the
    actual stream is consumed within a task/activity and only the final response
    is returned.
    """

    def __init__(self, model_request_parameters: ModelRequestParameters, response: ModelResponse):
        super().__init__(model_request_parameters)
        self.response = response

    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
        return
        # noinspection PyUnreachableCode
        yield

    def get(self) -> ModelResponse:
        return self.response

    def usage(self) -> RequestUsage:
        return self.response.usage  # pragma: no cover

    @property
    def model_name(self) -> str:
        return self.response.model_name or ''  # pragma: no cover

    @property
    def provider_name(self) -> str:
        return self.response.provider_name or ''  # pragma: no cover

    @property
    def provider_url(self) -> str | None:
        return self.response.provider_url  # pragma: no cover

    @property
    def timestamp(self) -> datetime:
        return self.response.timestamp  # pragma: no cover

ReplayStreamedResponse

Bases: StreamedResponse

A StreamedResponse that replays a completed ModelResponse as synthetic stream events.

Unlike CompletedStreamedResponse which yields no events, this class converts each response part into PartStartEvent + PartDeltaEvent + PartEndEvent sequences. This allows streaming consumers (event_stream_handler, run_stream_events, etc.) to work transparently when a capability short-circuits wrap_model_request and returns a complete ModelResponse instead of calling the handler.

Primarily used by durable execution capabilities (Temporal, DBOS, Prefect) where model requests are executed in activities/steps and only the final response is returned, but streaming callers still expect events.

Source code in pydantic_ai_slim/pydantic_ai/models/wrapper.py

class ReplayStreamedResponse(StreamedResponse):
    """A `StreamedResponse` that replays a completed `ModelResponse` as synthetic stream events.

    Unlike `CompletedStreamedResponse` which yields no events, this class converts each
    response part into `PartStartEvent` + `PartDeltaEvent` + `PartEndEvent` sequences.
    This allows streaming consumers (`event_stream_handler`, `run_stream_events`, etc.)
    to work transparently when a capability short-circuits `wrap_model_request` and
    returns a complete `ModelResponse` instead of calling the handler.

    Primarily used by durable execution capabilities (Temporal, DBOS, Prefect) where
    model requests are executed in activities/steps and only the final response is
    returned, but streaming callers still expect events.
    """

    def __init__(self, model_request_parameters: ModelRequestParameters, response: ModelResponse):
        super().__init__(model_request_parameters)
        self.response = response

    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
        for part in self.response.parts:
            # Register the part with the parts manager (always returns PartStartEvent for new parts)
            start_event = self._parts_manager.handle_part(vendor_part_id=None, part=part)
            assert isinstance(start_event, PartStartEvent)
            yield start_event

            # Emit a delta with the full content
            index = start_event.index
            if isinstance(part, TextPart) and part.content:
                yield PartDeltaEvent(index=index, delta=TextPartDelta(content_delta=part.content))
            elif isinstance(part, ThinkingPart) and part.content:
                yield PartDeltaEvent(index=index, delta=ThinkingPartDelta(content_delta=part.content))
            elif isinstance(part, ToolCallPart):
                yield PartDeltaEvent(
                    index=index,
                    delta=ToolCallPartDelta(args_delta=part.args),
                )
            # PartEndEvent is added automatically by StreamedResponse.__aiter__

    def get(self) -> ModelResponse:
        return self.response

    def usage(self) -> RequestUsage:
        return self.response.usage

    @property
    def model_name(self) -> str:
        return self.response.model_name or ''

    @property
    def provider_name(self) -> str:
        return self.response.provider_name or ''

    @property
    def provider_url(self) -> str | None:
        return self.response.provider_url

    @property
    def timestamp(self) -> datetime:
        return self.response.timestamp

WrapperModel `dataclass`

Bases: Model

Model which wraps another model.

Does nothing on its own, used as a base class.

Source code in pydantic_ai_slim/pydantic_ai/models/wrapper.py

@dataclass(init=False)
class WrapperModel(Model):
    """Model which wraps another model.

    Does nothing on its own, used as a base class.
    """

    wrapped: Model
    """The underlying model being wrapped."""

    def __init__(self, wrapped: Model | KnownModelName):
        super().__init__()
        self.wrapped = infer_model(wrapped)

    async def request(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> ModelResponse:
        return await self.wrapped.request(messages, model_settings, model_request_parameters)

    async def count_tokens(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> RequestUsage:
        return await self.wrapped.count_tokens(messages, model_settings, model_request_parameters)

    @asynccontextmanager
    async def request_stream(
        self,
        messages: list[ModelMessage],
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
        run_context: RunContext[Any] | None = None,
    ) -> AsyncIterator[StreamedResponse]:
        async with self.wrapped.request_stream(
            messages, model_settings, model_request_parameters, run_context
        ) as response_stream:
            yield response_stream

    def customize_request_parameters(self, model_request_parameters: ModelRequestParameters) -> ModelRequestParameters:
        return self.wrapped.customize_request_parameters(model_request_parameters)

    def prepare_request(
        self,
        model_settings: ModelSettings | None,
        model_request_parameters: ModelRequestParameters,
    ) -> tuple[ModelSettings | None, ModelRequestParameters]:
        return self.wrapped.prepare_request(model_settings, model_request_parameters)

    @property
    def model_name(self) -> str:
        return self.wrapped.model_name

    @property
    def system(self) -> str:
        return self.wrapped.system

    @property
    def profile(self) -> ModelProfile:  # type: ignore[override]
        return self.wrapped.profile

    @property
    def settings(self) -> ModelSettings | None:
        """Get the settings from the wrapped model."""
        return self.wrapped.settings

    def __getattr__(self, item: str):
        return getattr(self.wrapped, item)

wrapped `instance-attribute`

wrapped: Model = infer_model(wrapped)

The underlying model being wrapped.

settings `property`

settings: ModelSettings | None

Get the settings from the wrapped model.

pydantic_ai.models.wrapper

CompletedStreamedResponse

ReplayStreamedResponse

WrapperModel dataclass

wrapped instance-attribute

settings property

WrapperModel `dataclass`

wrapped `instance-attribute`

settings `property`