diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index efd609d..3e22aef 100644 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,7 +1,7 @@ lockVersion: 2.0.0 id: cfd52247-6a25-4c6d-bbce-fe6fce0cd69d management: - docChecksum: a5b3a567dd4de3ab77a9f0b23d4a9f10 + docChecksum: 7419f3b58a64f08efb375ead9e169446 docVersion: 1.0.0 speakeasyVersion: 1.666.0 generationVersion: 2.768.0 @@ -60,12 +60,18 @@ generatedFiles: - docs/components/chaterrorerror.md - docs/components/chatgenerationparams.md - docs/components/chatgenerationparamsdatacollection.md + - docs/components/chatgenerationparamsimageconfig.md - docs/components/chatgenerationparamsmaxprice.md + - docs/components/chatgenerationparamspluginautorouter.md - docs/components/chatgenerationparamspluginfileparser.md - docs/components/chatgenerationparamspluginmoderation.md - docs/components/chatgenerationparamspluginresponsehealing.md - docs/components/chatgenerationparamspluginunion.md - docs/components/chatgenerationparamspluginweb.md + - docs/components/chatgenerationparamspreferredmaxlatency.md + - docs/components/chatgenerationparamspreferredmaxlatencyunion.md + - docs/components/chatgenerationparamspreferredminthroughput.md + - docs/components/chatgenerationparamspreferredminthroughputunion.md - docs/components/chatgenerationparamsprovider.md - docs/components/chatgenerationparamsresponseformatjsonobject.md - docs/components/chatgenerationparamsresponseformatpython.md @@ -126,6 +132,7 @@ generatedFiles: - docs/components/filepath.md - docs/components/filepathtype.md - docs/components/forbiddenresponseerrordata.md + - docs/components/idautorouter.md - docs/components/idfileparser.md - docs/components/idmoderation.md - docs/components/idresponsehealing.md @@ -141,6 +148,7 @@ generatedFiles: - docs/components/message.md - docs/components/messagecontent.md - docs/components/messagedeveloper.md + - docs/components/modality.md - docs/components/model.md - docs/components/modelarchitecture.md - docs/components/modelarchitectureinstructtype.md @@ -251,9 +259,11 @@ generatedFiles: - docs/components/openresponsesreasoningtype.md - docs/components/openresponsesrequest.md - docs/components/openresponsesrequestignore.md + - docs/components/openresponsesrequestimageconfig.md - docs/components/openresponsesrequestmaxprice.md - docs/components/openresponsesrequestonly.md - docs/components/openresponsesrequestorder.md + - docs/components/openresponsesrequestpluginautorouter.md - docs/components/openresponsesrequestpluginfileparser.md - docs/components/openresponsesrequestpluginmoderation.md - docs/components/openresponsesrequestpluginresponsehealing.md @@ -318,7 +328,12 @@ generatedFiles: - docs/components/pdfengine.md - docs/components/pdfparserengine.md - docs/components/pdfparseroptions.md + - docs/components/percentilelatencycutoffs.md + - docs/components/percentilestats.md + - docs/components/percentilethroughputcutoffs.md - docs/components/perrequestlimits.md + - docs/components/preferredmaxlatency.md + - docs/components/preferredminthroughput.md - docs/components/pricing.md - docs/components/prompt.md - docs/components/prompttokensdetails.md @@ -386,6 +401,7 @@ generatedFiles: - docs/components/responsesoutputitemfunctioncallstatusunion.md - docs/components/responsesoutputitemfunctioncalltype.md - docs/components/responsesoutputitemreasoning.md + - docs/components/responsesoutputitemreasoningformat.md - docs/components/responsesoutputitemreasoningstatuscompleted.md - docs/components/responsesoutputitemreasoningstatusincomplete.md - docs/components/responsesoutputitemreasoningstatusinprogress.md @@ -399,6 +415,7 @@ generatedFiles: - docs/components/responsesoutputmessagestatusinprogress.md - docs/components/responsesoutputmessagestatusunion.md - docs/components/responsesoutputmessagetype.md + - docs/components/responsesoutputmodality.md - docs/components/responsessearchcontextsize.md - docs/components/responseswebsearchcalloutput.md - docs/components/responseswebsearchcalloutputtype.md @@ -684,7 +701,12 @@ generatedFiles: - src/openrouter/components/paymentrequiredresponseerrordata.py - src/openrouter/components/pdfparserengine.py - src/openrouter/components/pdfparseroptions.py + - src/openrouter/components/percentilelatencycutoffs.py + - src/openrouter/components/percentilestats.py + - src/openrouter/components/percentilethroughputcutoffs.py - src/openrouter/components/perrequestlimits.py + - src/openrouter/components/preferredmaxlatency.py + - src/openrouter/components/preferredminthroughput.py - src/openrouter/components/providername.py - src/openrouter/components/provideroverloadedresponseerrordata.py - src/openrouter/components/providerpreferences.py @@ -716,6 +738,7 @@ generatedFiles: - src/openrouter/components/responsesoutputitemfunctioncall.py - src/openrouter/components/responsesoutputitemreasoning.py - src/openrouter/components/responsesoutputmessage.py + - src/openrouter/components/responsesoutputmodality.py - src/openrouter/components/responsessearchcontextsize.py - src/openrouter/components/responseswebsearchcalloutput.py - src/openrouter/components/responseswebsearchuserlocation.py @@ -982,7 +1005,7 @@ examples: slug: "" responses: "200": - application/json: {"data": {"id": "openai/gpt-4", "name": "GPT-4", "created": 1692901234, "description": "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy.", "architecture": {"tokenizer": "GPT", "instruct_type": "chatml", "modality": "text->text", "input_modalities": ["text"], "output_modalities": ["text"]}, "endpoints": [{"name": "OpenAI: GPT-4", "model_name": "GPT-4", "context_length": 8192, "pricing": {"prompt": "0.00003", "completion": "0.00006"}, "provider_name": "OpenAI", "tag": "openai", "quantization": "fp16", "max_completion_tokens": 4096, "max_prompt_tokens": 8192, "supported_parameters": ["temperature", "top_p", "max_tokens", "frequency_penalty", "presence_penalty"], "uptime_last_30m": 99.5, "supports_implicit_caching": true}]}} + application/json: {"data": {"id": "openai/gpt-4", "name": "GPT-4", "created": 1692901234, "description": "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy.", "architecture": {"tokenizer": "GPT", "instruct_type": "chatml", "modality": "text->text", "input_modalities": ["text"], "output_modalities": ["text"]}, "endpoints": [{"name": "OpenAI: GPT-4", "model_name": "GPT-4", "context_length": 8192, "pricing": {"prompt": "0.00003", "completion": "0.00006"}, "provider_name": "OpenAI", "tag": "openai", "quantization": "fp16", "max_completion_tokens": 4096, "max_prompt_tokens": 8192, "supported_parameters": ["temperature", "top_p", "max_tokens", "frequency_penalty", "presence_penalty"], "uptime_last_30m": 99.5, "supports_implicit_caching": true, "latency_last_30m": {"p50": 0.25, "p75": 0.35, "p90": 0.48, "p99": 0.85}, "throughput_last_30m": {"p50": 45.2, "p75": 38.5, "p90": 28.3, "p99": 15.1}}]}} "404": application/json: {"error": {"code": 404, "message": "Resource not found"}} "500": @@ -991,7 +1014,7 @@ examples: speakeasy-default-list-endpoints-zdr: responses: "200": - application/json: {"data": [{"name": "OpenAI: GPT-4", "model_name": "GPT-4", "context_length": 8192, "pricing": {"prompt": "0.00003", "completion": "0.00006"}, "provider_name": "OpenAI", "tag": "openai", "quantization": "fp16", "max_completion_tokens": 4096, "max_prompt_tokens": 8192, "supported_parameters": ["temperature", "top_p", "max_tokens"], "uptime_last_30m": 99.5, "supports_implicit_caching": true}]} + application/json: {"data": [{"name": "OpenAI: GPT-4", "model_name": "GPT-4", "context_length": 8192, "pricing": {"prompt": "0.00003", "completion": "0.00006"}, "provider_name": "OpenAI", "tag": "openai", "quantization": "fp16", "max_completion_tokens": 4096, "max_prompt_tokens": 8192, "supported_parameters": ["temperature", "top_p", "max_tokens"], "uptime_last_30m": 99.5, "supports_implicit_caching": true, "latency_last_30m": {"p50": 25.5, "p75": 35.2, "p90": 48.7, "p99": 85.3}, "throughput_last_30m": {"p50": 25.5, "p75": 35.2, "p90": 48.7, "p99": 85.3}}]} "500": application/json: {"error": {"code": 500, "message": "Internal Server Error"}} getParameters: diff --git a/.speakeasy/in.openapi.yaml b/.speakeasy/in.openapi.yaml index 4047ff2..b978e35 100644 --- a/.speakeasy/in.openapi.yaml +++ b/.speakeasy/in.openapi.yaml @@ -268,7 +268,24 @@ components: allOf: - $ref: '#/components/schemas/OutputItemReasoning' - type: object - properties: {} + properties: + signature: + type: string + nullable: true + description: A signature for the reasoning content, used for verification + example: EvcBCkgIChABGAIqQKkSDbRuVEQUk9qN1odC098l9SEj... + format: + type: string + nullable: true + enum: + - unknown + - openai-responses-v1 + - azure-openai-responses-v1 + - xai-responses-v1 + - anthropic-claude-v1 + - google-gemini-v1 + description: The format of the reasoning content + example: anthropic-claude-v1 example: id: reasoning-123 type: reasoning @@ -279,6 +296,8 @@ components: content: - type: reasoning_text text: First, we analyze the problem... + signature: EvcBCkgIChABGAIqQKkSDbRuVEQUk9qN1odC098l9SEj... + format: anthropic-claude-v1 description: An output item containing reasoning OutputItemFunctionCall: type: object @@ -3224,6 +3243,7 @@ components: enum: - unknown - openai-responses-v1 + - azure-openai-responses-v1 - xai-responses-v1 - anthropic-claude-v1 - google-gemini-v1 @@ -3418,6 +3438,11 @@ components: example: summary: auto enabled: true + ResponsesOutputModality: + type: string + enum: + - text + - image OpenAIResponsesIncludable: type: string enum: @@ -3470,12 +3495,12 @@ components: - Fireworks - Friendli - GMICloud - - GoPomelo - Google - Google AI Studio - Groq - Hyperbolic - Inception + - Inceptron - InferenceNet - Infermatic - Inflection @@ -3500,13 +3525,14 @@ components: - Phala - Relace - SambaNova + - Seed - SiliconFlow - Sourceful - Stealth - StreamLake - Switchpoint - - Targon - Together + - Upstage - Venice - WandB - Xiaomi @@ -3551,6 +3577,74 @@ components: type: string description: A value in string format that is a large number example: 1000 + PercentileThroughputCutoffs: + type: object + properties: + p50: + type: number + nullable: true + description: Minimum p50 throughput (tokens/sec) + p75: + type: number + nullable: true + description: Minimum p75 throughput (tokens/sec) + p90: + type: number + nullable: true + description: Minimum p90 throughput (tokens/sec) + p99: + type: number + nullable: true + description: Minimum p99 throughput (tokens/sec) + description: Percentile-based throughput cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + example: + p50: 100 + p90: 50 + PreferredMinThroughput: + anyOf: + - type: number + - $ref: '#/components/schemas/PercentileThroughputCutoffs' + - nullable: true + description: >- + Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with + percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in + routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if + it meets the threshold. + example: 100 + PercentileLatencyCutoffs: + type: object + properties: + p50: + type: number + nullable: true + description: Maximum p50 latency (seconds) + p75: + type: number + nullable: true + description: Maximum p75 latency (seconds) + p90: + type: number + nullable: true + description: Maximum p90 latency (seconds) + p99: + type: number + nullable: true + description: Maximum p99 latency (seconds) + description: Percentile-based latency cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + example: + p50: 5 + p90: 10 + PreferredMaxLatency: + anyOf: + - type: number + - $ref: '#/components/schemas/PercentileLatencyCutoffs' + - nullable: true + description: >- + Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific + cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using + fallback models, this may cause a fallback model to be used instead of the primary model if it meets the + threshold. + example: 5 WebSearchEngine: type: string enum: @@ -3639,6 +3733,25 @@ components: minimum: 0 top_k: type: number + image_config: + type: object + additionalProperties: + anyOf: + - type: string + - type: number + description: >- + Provider-specific image configuration options. Keys and values vary by model/provider. See + https://openrouter.ai/docs/features/multimodal/image-generation for more details. + example: + aspect_ratio: '16:9' + modalities: + type: array + items: + $ref: '#/components/schemas/ResponsesOutputModality' + description: Output modalities for the response. Supported values are "text" and "image". + example: + - text + - image prompt_cache_key: type: string nullable: true @@ -3774,43 +3887,38 @@ components: The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. preferred_min_throughput: - type: number - nullable: true - description: >- - Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, - but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used - instead of the primary model if it meets the threshold. - example: 100 + $ref: '#/components/schemas/PreferredMinThroughput' preferred_max_latency: - type: number - nullable: true - description: >- - Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are - deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead - of the primary model if it meets the threshold. - example: 5 - min_throughput: - type: number - nullable: true - deprecated: true - description: >- - **DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for - preferred_min_throughput. - example: 100 - x-speakeasy-deprecation-message: Use preferred_min_throughput instead. - max_latency: - type: number - nullable: true - deprecated: true - description: '**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.' - example: 5 - x-speakeasy-deprecation-message: Use preferred_max_latency instead. + $ref: '#/components/schemas/PreferredMaxLatency' additionalProperties: false description: When multiple model providers are available, optionally indicate your routing preference. plugins: type: array items: oneOf: + - type: object + properties: + id: + type: string + enum: + - auto-router + enabled: + type: boolean + description: Set to false to disable the auto-router plugin for this request. Defaults to true. + allowed_models: + type: array + items: + type: string + description: >- + List of model patterns to filter which models the auto-router can route between. Supports + wildcards (e.g., "anthropic/*" matches all Anthropic models). When not specified, uses the default + supported models list. + example: + - anthropic/* + - openai/gpt-4o + - google/* + required: + - id - type: object properties: id: @@ -4132,37 +4240,9 @@ components: The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. preferred_min_throughput: - type: number - nullable: true - description: >- - Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but - are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead - of the primary model if it meets the threshold. - example: 100 + $ref: '#/components/schemas/PreferredMinThroughput' preferred_max_latency: - type: number - nullable: true - description: >- - Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are - deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of - the primary model if it meets the threshold. - example: 5 - min_throughput: - type: number - nullable: true - deprecated: true - description: >- - **DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for - preferred_min_throughput. - example: 100 - x-speakeasy-deprecation-message: Use preferred_min_throughput instead. - max_latency: - type: number - nullable: true - deprecated: true - description: '**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.' - example: 5 - x-speakeasy-deprecation-message: Use preferred_max_latency instead. + $ref: '#/components/schemas/PreferredMaxLatency' description: Provider routing preferences for the request. PublicPricing: type: object @@ -4595,6 +4675,34 @@ components: - -5 - -10 example: 0 + PercentileStats: + type: object + nullable: true + properties: + p50: + type: number + description: Median (50th percentile) + example: 25.5 + p75: + type: number + description: 75th percentile + example: 35.2 + p90: + type: number + description: 90th percentile + example: 48.7 + p99: + type: number + description: 99th percentile + example: 85.3 + required: + - p50 + - p75 + - p90 + - p99 + description: >- + Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible + when authenticated with an API key or cookie; returns null for unauthenticated requests. PublicEndpoint: type: object properties: @@ -4662,6 +4770,15 @@ components: nullable: true supports_implicit_caching: type: boolean + latency_last_30m: + $ref: '#/components/schemas/PercentileStats' + throughput_last_30m: + allOf: + - $ref: '#/components/schemas/PercentileStats' + - description: >- + Throughput percentiles in tokens per second over the last 30 minutes. Throughput measures output token + generation speed. Only visible when authenticated with an API key or cookie; returns null for + unauthenticated requests. required: - name - model_name @@ -4675,6 +4792,8 @@ components: - supported_parameters - uptime_last_30m - supports_implicit_caching + - latency_last_30m + - throughput_last_30m description: Information about a specific model endpoint example: name: 'OpenAI: GPT-4' @@ -4697,6 +4816,16 @@ components: status: 0 uptime_last_30m: 99.5 supports_implicit_caching: true + latency_last_30m: + p50: 0.25 + p75: 0.35 + p90: 0.48 + p99: 0.85 + throughput_last_30m: + p50: 45.2 + p75: 38.5 + p90: 28.3 + p99: 15.1 ListEndpointsResponse: type: object properties: @@ -4800,6 +4929,16 @@ components: status: default uptime_last_30m: 99.5 supports_implicit_caching: true + latency_last_30m: + p50: 0.25 + p75: 0.35 + p90: 0.48 + p99: 0.85 + throughput_last_30m: + p50: 45.2 + p75: 38.5 + p90: 28.3 + p99: 15.1 __schema0: type: array items: @@ -4832,12 +4971,12 @@ components: - Fireworks - Friendli - GMICloud - - GoPomelo - Google - Google AI Studio - Groq - Hyperbolic - Inception + - Inceptron - InferenceNet - Infermatic - Inflection @@ -4862,13 +5001,14 @@ components: - Phala - Relace - SambaNova + - Seed - SiliconFlow - Sourceful - Stealth - StreamLake - Switchpoint - - Targon - Together + - Upstage - Venice - WandB - Xiaomi @@ -4951,6 +5091,7 @@ components: enum: - unknown - openai-responses-v1 + - azure-openai-responses-v1 - xai-responses-v1 - anthropic-claude-v1 - google-gemini-v1 @@ -5174,6 +5315,8 @@ components: properties: cached_tokens: type: number + cache_write_tokens: + type: number audio_tokens: type: number video_tokens: @@ -5521,20 +5664,60 @@ components: request: $ref: '#/components/schemas/__schema1' preferred_min_throughput: + description: >- + Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object + with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are + deprioritized in routing. When using fallback models, this may cause a fallback model to be used + instead of the primary model if it meets the threshold. anyOf: - - type: number + - anyOf: + - type: number + - type: object + properties: + p50: + anyOf: + - type: number + - type: 'null' + p75: + anyOf: + - type: number + - type: 'null' + p90: + anyOf: + - type: number + - type: 'null' + p99: + anyOf: + - type: number + - type: 'null' - type: 'null' preferred_max_latency: + description: >- + Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with + percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are + deprioritized in routing. When using fallback models, this may cause a fallback model to be used + instead of the primary model if it meets the threshold. anyOf: - - type: number - - type: 'null' - min_throughput: - anyOf: - - type: number - - type: 'null' - max_latency: - anyOf: - - type: number + - anyOf: + - type: number + - type: object + properties: + p50: + anyOf: + - type: number + - type: 'null' + p75: + anyOf: + - type: number + - type: 'null' + p90: + anyOf: + - type: number + - type: 'null' + p99: + anyOf: + - type: number + - type: 'null' - type: 'null' additionalProperties: false - type: 'null' @@ -5543,6 +5726,19 @@ components: type: array items: oneOf: + - type: object + properties: + id: + type: string + const: auto-router + enabled: + type: boolean + allowed_models: + type: array + items: + type: string + required: + - id - type: object properties: id: @@ -5760,6 +5956,21 @@ components: properties: echo_upstream_body: type: boolean + image_config: + type: object + propertyNames: + type: string + additionalProperties: + anyOf: + - type: string + - type: number + modalities: + type: array + items: + type: string + enum: + - text + - image required: - messages ProviderSortUnion: diff --git a/.speakeasy/out.openapi.yaml b/.speakeasy/out.openapi.yaml index 31a1ea0..1796209 100644 --- a/.speakeasy/out.openapi.yaml +++ b/.speakeasy/out.openapi.yaml @@ -269,7 +269,25 @@ components: allOf: - $ref: '#/components/schemas/OutputItemReasoning' - type: object - properties: {} + properties: + signature: + type: string + nullable: true + description: A signature for the reasoning content, used for verification + example: EvcBCkgIChABGAIqQKkSDbRuVEQUk9qN1odC098l9SEj... + format: + type: string + nullable: true + enum: + - unknown + - openai-responses-v1 + - azure-openai-responses-v1 + - xai-responses-v1 + - anthropic-claude-v1 + - google-gemini-v1 + description: The format of the reasoning content + example: anthropic-claude-v1 + x-speakeasy-unknown-values: allow example: id: reasoning-123 type: reasoning @@ -280,6 +298,8 @@ components: content: - type: reasoning_text text: First, we analyze the problem... + signature: EvcBCkgIChABGAIqQKkSDbRuVEQUk9qN1odC098l9SEj... + format: anthropic-claude-v1 description: An output item containing reasoning OutputItemFunctionCall: type: object @@ -3239,6 +3259,7 @@ components: enum: - unknown - openai-responses-v1 + - azure-openai-responses-v1 - xai-responses-v1 - anthropic-claude-v1 - google-gemini-v1 @@ -3434,6 +3455,12 @@ components: example: summary: auto enabled: true + ResponsesOutputModality: + type: string + enum: + - text + - image + x-speakeasy-unknown-values: allow OpenAIResponsesIncludable: type: string enum: @@ -3487,12 +3514,12 @@ components: - Fireworks - Friendli - GMICloud - - GoPomelo - Google - Google AI Studio - Groq - Hyperbolic - Inception + - Inceptron - InferenceNet - Infermatic - Inflection @@ -3517,13 +3544,14 @@ components: - Phala - Relace - SambaNova + - Seed - SiliconFlow - Sourceful - Stealth - StreamLake - Switchpoint - - Targon - Together + - Upstage - Venice - WandB - Xiaomi @@ -3572,6 +3600,68 @@ components: type: string description: A value in string format that is a large number example: 1000 + PercentileThroughputCutoffs: + type: object + properties: + p50: + type: number + nullable: true + description: Minimum p50 throughput (tokens/sec) + p75: + type: number + nullable: true + description: Minimum p75 throughput (tokens/sec) + p90: + type: number + nullable: true + description: Minimum p90 throughput (tokens/sec) + p99: + type: number + nullable: true + description: Minimum p99 throughput (tokens/sec) + description: Percentile-based throughput cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + example: + p50: 100 + p90: 50 + PreferredMinThroughput: + anyOf: + - type: number + - $ref: '#/components/schemas/PercentileThroughputCutoffs' + - nullable: true + description: >- + Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. + example: 100 + PercentileLatencyCutoffs: + type: object + properties: + p50: + type: number + nullable: true + description: Maximum p50 latency (seconds) + p75: + type: number + nullable: true + description: Maximum p75 latency (seconds) + p90: + type: number + nullable: true + description: Maximum p90 latency (seconds) + p99: + type: number + nullable: true + description: Maximum p99 latency (seconds) + description: Percentile-based latency cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + example: + p50: 5 + p90: 10 + PreferredMaxLatency: + anyOf: + - type: number + - $ref: '#/components/schemas/PercentileLatencyCutoffs' + - nullable: true + description: >- + Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. + example: 5 WebSearchEngine: type: string enum: @@ -3663,6 +3753,24 @@ components: minimum: 0 top_k: type: number + image_config: + type: object + additionalProperties: + anyOf: + - type: string + - type: number + description: >- + Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + example: + aspect_ratio: '16:9' + modalities: + type: array + items: + $ref: '#/components/schemas/ResponsesOutputModality' + description: Output modalities for the response. Supported values are "text" and "image". + example: + - text + - image prompt_cache_key: type: string nullable: true @@ -3788,38 +3896,36 @@ components: description: >- The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. preferred_min_throughput: - type: number - nullable: true - description: >- - Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. - example: 100 + $ref: '#/components/schemas/PreferredMinThroughput' preferred_max_latency: - type: number - nullable: true - description: >- - Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. - example: 5 - min_throughput: - type: number - nullable: true - deprecated: true - description: >- - **DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput. - example: 100 - x-speakeasy-deprecation-message: Use preferred_min_throughput instead. - max_latency: - type: number - nullable: true - deprecated: true - description: '**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.' - example: 5 - x-speakeasy-deprecation-message: Use preferred_max_latency instead. + $ref: '#/components/schemas/PreferredMaxLatency' additionalProperties: false description: When multiple model providers are available, optionally indicate your routing preference. plugins: type: array items: oneOf: + - type: object + properties: + id: + type: string + enum: + - auto-router + enabled: + type: boolean + description: Set to false to disable the auto-router plugin for this request. Defaults to true. + allowed_models: + type: array + items: + type: string + description: >- + List of model patterns to filter which models the auto-router can route between. Supports wildcards (e.g., "anthropic/*" matches all Anthropic models). When not specified, uses the default supported models list. + example: + - anthropic/* + - openai/gpt-4o + - google/* + required: + - id - type: object properties: id: @@ -4129,32 +4235,9 @@ components: description: >- The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. preferred_min_throughput: - type: number - nullable: true - description: >- - Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. - example: 100 + $ref: '#/components/schemas/PreferredMinThroughput' preferred_max_latency: - type: number - nullable: true - description: >- - Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. - example: 5 - min_throughput: - type: number - nullable: true - deprecated: true - description: >- - **DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput. - example: 100 - x-speakeasy-deprecation-message: Use preferred_min_throughput instead. - max_latency: - type: number - nullable: true - deprecated: true - description: '**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.' - example: 5 - x-speakeasy-deprecation-message: Use preferred_max_latency instead. + $ref: '#/components/schemas/PreferredMaxLatency' description: Provider routing preferences for the request. PublicPricing: type: object @@ -4594,6 +4677,33 @@ components: - -10 example: 0 x-speakeasy-unknown-values: allow + PercentileStats: + type: object + nullable: true + properties: + p50: + type: number + description: Median (50th percentile) + example: 25.5 + p75: + type: number + description: 75th percentile + example: 35.2 + p90: + type: number + description: 90th percentile + example: 48.7 + p99: + type: number + description: 99th percentile + example: 85.3 + required: + - p50 + - p75 + - p90 + - p99 + description: >- + Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests. PublicEndpoint: type: object properties: @@ -4661,6 +4771,13 @@ components: nullable: true supports_implicit_caching: type: boolean + latency_last_30m: + $ref: '#/components/schemas/PercentileStats' + throughput_last_30m: + allOf: + - $ref: '#/components/schemas/PercentileStats' + - description: >- + Throughput percentiles in tokens per second over the last 30 minutes. Throughput measures output token generation speed. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests. required: - name - model_name @@ -4674,6 +4791,8 @@ components: - supported_parameters - uptime_last_30m - supports_implicit_caching + - latency_last_30m + - throughput_last_30m description: Information about a specific model endpoint example: name: 'OpenAI: GPT-4' @@ -4696,6 +4815,16 @@ components: status: 0 uptime_last_30m: 99.5 supports_implicit_caching: true + latency_last_30m: + p50: 0.25 + p75: 0.35 + p90: 0.48 + p99: 0.85 + throughput_last_30m: + p50: 45.2 + p75: 38.5 + p90: 28.3 + p99: 15.1 ListEndpointsResponse: type: object properties: @@ -4799,6 +4928,16 @@ components: status: default uptime_last_30m: 99.5 supports_implicit_caching: true + latency_last_30m: + p50: 0.25 + p75: 0.35 + p90: 0.48 + p99: 0.85 + throughput_last_30m: + p50: 45.2 + p75: 38.5 + p90: 28.3 + p99: 15.1 __schema0: type: array items: @@ -4831,12 +4970,12 @@ components: - Fireworks - Friendli - GMICloud - - GoPomelo - Google - Google AI Studio - Groq - Hyperbolic - Inception + - Inceptron - InferenceNet - Infermatic - Inflection @@ -4861,13 +5000,14 @@ components: - Phala - Relace - SambaNova + - Seed - SiliconFlow - Sourceful - Stealth - StreamLake - Switchpoint - - Targon - Together + - Upstage - Venice - WandB - Xiaomi @@ -4951,6 +5091,7 @@ components: enum: - unknown - openai-responses-v1 + - azure-openai-responses-v1 - xai-responses-v1 - anthropic-claude-v1 - google-gemini-v1 @@ -5176,6 +5317,8 @@ components: properties: cached_tokens: type: number + cache_write_tokens: + type: number audio_tokens: type: number video_tokens: @@ -5518,20 +5661,54 @@ components: request: $ref: '#/components/schemas/__schema1' preferred_min_throughput: + description: >- + Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. anyOf: - - type: number + - anyOf: + - type: number + - type: object + properties: + p50: + anyOf: + - type: number + - type: 'null' + p75: + anyOf: + - type: number + - type: 'null' + p90: + anyOf: + - type: number + - type: 'null' + p99: + anyOf: + - type: number + - type: 'null' - type: 'null' preferred_max_latency: + description: >- + Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. anyOf: - - type: number - - type: 'null' - min_throughput: - anyOf: - - type: number - - type: 'null' - max_latency: - anyOf: - - type: number + - anyOf: + - type: number + - type: object + properties: + p50: + anyOf: + - type: number + - type: 'null' + p75: + anyOf: + - type: number + - type: 'null' + p90: + anyOf: + - type: number + - type: 'null' + p99: + anyOf: + - type: number + - type: 'null' - type: 'null' additionalProperties: false - type: 'null' @@ -5540,6 +5717,19 @@ components: type: array items: oneOf: + - type: object + properties: + id: + type: string + const: auto-router + enabled: + type: boolean + allowed_models: + type: array + items: + type: string + required: + - id - type: object properties: id: @@ -5759,6 +5949,22 @@ components: properties: echo_upstream_body: type: boolean + image_config: + type: object + propertyNames: + type: string + additionalProperties: + anyOf: + - type: string + - type: number + modalities: + type: array + items: + type: string + enum: + - text + - image + x-speakeasy-unknown-values: allow required: - messages ProviderSortUnion: diff --git a/.speakeasy/workflow.lock b/.speakeasy/workflow.lock index 83aebe0..0cea211 100644 --- a/.speakeasy/workflow.lock +++ b/.speakeasy/workflow.lock @@ -8,19 +8,20 @@ sources: - latest OpenRouter API: sourceNamespace: open-router-chat-completions-api - sourceRevisionDigest: sha256:92f6f1568ba089ae8e52bd55d859a97e446ae232c4c9ca9302ea64705313c7a0 - sourceBlobDigest: sha256:6bbf6ab7123261f7e0604f1c640e32b5fc8fb6bb503b1bc8b12d0d78ed19fefc + sourceRevisionDigest: sha256:5615c05050362c0944a59e3b5b0437157f9d152faac1509d3c390bbc85e93b1e + sourceBlobDigest: sha256:4f08bbf0b65fd4e617996964ccb1fa33ed7e807f81d8731ba4d00b0a7fd2da98 tags: - latest + - subtree-sync-import-python-sdk - 1.0.0 targets: open-router: source: OpenRouter API sourceNamespace: open-router-chat-completions-api - sourceRevisionDigest: sha256:92f6f1568ba089ae8e52bd55d859a97e446ae232c4c9ca9302ea64705313c7a0 - sourceBlobDigest: sha256:6bbf6ab7123261f7e0604f1c640e32b5fc8fb6bb503b1bc8b12d0d78ed19fefc + sourceRevisionDigest: sha256:5615c05050362c0944a59e3b5b0437157f9d152faac1509d3c390bbc85e93b1e + sourceBlobDigest: sha256:4f08bbf0b65fd4e617996964ccb1fa33ed7e807f81d8731ba4d00b0a7fd2da98 codeSamplesNamespace: open-router-python-code-samples - codeSamplesRevisionDigest: sha256:8340c172a77ca9ffeeea6ca5dce0d69a084a3ba0a4e2e41d098759f546d80da4 + codeSamplesRevisionDigest: sha256:c60fe0121ecdbfb79af3a344b52b8daade826fb9648b925f229e5b1d93b5c7ac workflow: workflowVersion: 1.0.0 speakeasyVersion: 1.666.0 diff --git a/docs/components/chatgenerationparams.md b/docs/components/chatgenerationparams.md index 7bf5109..f01af13 100644 --- a/docs/components/chatgenerationparams.md +++ b/docs/components/chatgenerationparams.md @@ -31,4 +31,6 @@ | `tool_choice` | *Optional[Any]* | :heavy_minus_sign: | N/A | | `tools` | List[[components.ToolDefinitionJSON](../components/tooldefinitionjson.md)] | :heavy_minus_sign: | N/A | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | -| `debug` | [Optional[components.Debug]](../components/debug.md) | :heavy_minus_sign: | N/A | \ No newline at end of file +| `debug` | [Optional[components.Debug]](../components/debug.md) | :heavy_minus_sign: | N/A | +| `image_config` | Dict[str, [components.ChatGenerationParamsImageConfig](../components/chatgenerationparamsimageconfig.md)] | :heavy_minus_sign: | N/A | +| `modalities` | List[[components.Modality](../components/modality.md)] | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/components/chatgenerationparamsimageconfig.md b/docs/components/chatgenerationparamsimageconfig.md new file mode 100644 index 0000000..18403aa --- /dev/null +++ b/docs/components/chatgenerationparamsimageconfig.md @@ -0,0 +1,17 @@ +# ChatGenerationParamsImageConfig + + +## Supported Types + +### `str` + +```python +value: str = /* values here */ +``` + +### `float` + +```python +value: float = /* values here */ +``` + diff --git a/docs/components/chatgenerationparamspluginautorouter.md b/docs/components/chatgenerationparamspluginautorouter.md new file mode 100644 index 0000000..7326614 --- /dev/null +++ b/docs/components/chatgenerationparamspluginautorouter.md @@ -0,0 +1,10 @@ +# ChatGenerationParamsPluginAutoRouter + + +## Fields + +| Field | Type | Required | Description | +| ------------------------ | ------------------------ | ------------------------ | ------------------------ | +| `id` | *Literal["auto-router"]* | :heavy_check_mark: | N/A | +| `enabled` | *Optional[bool]* | :heavy_minus_sign: | N/A | +| `allowed_models` | List[*str*] | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/components/chatgenerationparamspluginunion.md b/docs/components/chatgenerationparamspluginunion.md index 97ccfb1..5457d76 100644 --- a/docs/components/chatgenerationparamspluginunion.md +++ b/docs/components/chatgenerationparamspluginunion.md @@ -3,6 +3,12 @@ ## Supported Types +### `components.ChatGenerationParamsPluginAutoRouter` + +```python +value: components.ChatGenerationParamsPluginAutoRouter = /* values here */ +``` + ### `components.ChatGenerationParamsPluginModeration` ```python diff --git a/docs/components/chatgenerationparamspreferredmaxlatency.md b/docs/components/chatgenerationparamspreferredmaxlatency.md new file mode 100644 index 0000000..323b2f9 --- /dev/null +++ b/docs/components/chatgenerationparamspreferredmaxlatency.md @@ -0,0 +1,11 @@ +# ChatGenerationParamsPreferredMaxLatency + + +## Fields + +| Field | Type | Required | Description | +| ------------------------- | ------------------------- | ------------------------- | ------------------------- | +| `p50` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p75` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p90` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p99` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/components/chatgenerationparamspreferredmaxlatencyunion.md b/docs/components/chatgenerationparamspreferredmaxlatencyunion.md new file mode 100644 index 0000000..3b22111 --- /dev/null +++ b/docs/components/chatgenerationparamspreferredmaxlatencyunion.md @@ -0,0 +1,17 @@ +# ChatGenerationParamsPreferredMaxLatencyUnion + + +## Supported Types + +### `float` + +```python +value: float = /* values here */ +``` + +### `components.ChatGenerationParamsPreferredMaxLatency` + +```python +value: components.ChatGenerationParamsPreferredMaxLatency = /* values here */ +``` + diff --git a/docs/components/chatgenerationparamspreferredminthroughput.md b/docs/components/chatgenerationparamspreferredminthroughput.md new file mode 100644 index 0000000..260394d --- /dev/null +++ b/docs/components/chatgenerationparamspreferredminthroughput.md @@ -0,0 +1,11 @@ +# ChatGenerationParamsPreferredMinThroughput + + +## Fields + +| Field | Type | Required | Description | +| ------------------------- | ------------------------- | ------------------------- | ------------------------- | +| `p50` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p75` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p90` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | +| `p99` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/components/chatgenerationparamspreferredminthroughputunion.md b/docs/components/chatgenerationparamspreferredminthroughputunion.md new file mode 100644 index 0000000..3803e24 --- /dev/null +++ b/docs/components/chatgenerationparamspreferredminthroughputunion.md @@ -0,0 +1,17 @@ +# ChatGenerationParamsPreferredMinThroughputUnion + + +## Supported Types + +### `float` + +```python +value: float = /* values here */ +``` + +### `components.ChatGenerationParamsPreferredMinThroughput` + +```python +value: components.ChatGenerationParamsPreferredMinThroughput = /* values here */ +``` + diff --git a/docs/components/chatgenerationparamsprovider.md b/docs/components/chatgenerationparamsprovider.md index 7c4a48f..3d486fa 100644 --- a/docs/components/chatgenerationparamsprovider.md +++ b/docs/components/chatgenerationparamsprovider.md @@ -3,20 +3,18 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| -| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | -| `data_collection` | [OptionalNullable[components.ChatGenerationParamsDataCollection]](../components/chatgenerationparamsdatacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | -| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | -| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | -| `order` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | -| `only` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | -| `ignore` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | -| `quantizations` | List[[components.Quantizations](../components/quantizations.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | -| `sort` | [OptionalNullable[components.ProviderSortUnion]](../components/providersortunion.md) | :heavy_minus_sign: | The sorting strategy to use for this request, if "order" is not specified. When set, no load balancing is performed. | -| `max_price` | [Optional[components.ChatGenerationParamsMaxPrice]](../components/chatgenerationparamsmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | -| `preferred_min_throughput` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | -| `preferred_max_latency` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | -| `min_throughput` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | -| `max_latency` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | \ No newline at end of file +| Field | Type | Required | Description | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| +| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | +| `data_collection` | [OptionalNullable[components.ChatGenerationParamsDataCollection]](../components/chatgenerationparamsdatacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | +| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | +| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | +| `order` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | +| `only` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | +| `ignore` | List[[components.Schema0](../components/schema0.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | +| `quantizations` | List[[components.Quantizations](../components/quantizations.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | +| `sort` | [OptionalNullable[components.ProviderSortUnion]](../components/providersortunion.md) | :heavy_minus_sign: | The sorting strategy to use for this request, if "order" is not specified. When set, no load balancing is performed. | +| `max_price` | [Optional[components.ChatGenerationParamsMaxPrice]](../components/chatgenerationparamsmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | +| `preferred_min_throughput` | [OptionalNullable[components.ChatGenerationParamsPreferredMinThroughputUnion]](../components/chatgenerationparamspreferredminthroughputunion.md) | :heavy_minus_sign: | Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | +| `preferred_max_latency` | [OptionalNullable[components.ChatGenerationParamsPreferredMaxLatencyUnion]](../components/chatgenerationparamspreferredmaxlatencyunion.md) | :heavy_minus_sign: | Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | \ No newline at end of file diff --git a/docs/components/idautorouter.md b/docs/components/idautorouter.md new file mode 100644 index 0000000..43f83e5 --- /dev/null +++ b/docs/components/idautorouter.md @@ -0,0 +1,8 @@ +# IDAutoRouter + + +## Values + +| Name | Value | +| ------------- | ------------- | +| `AUTO_ROUTER` | auto-router | \ No newline at end of file diff --git a/docs/components/modality.md b/docs/components/modality.md new file mode 100644 index 0000000..4edb378 --- /dev/null +++ b/docs/components/modality.md @@ -0,0 +1,9 @@ +# Modality + + +## Values + +| Name | Value | +| ------- | ------- | +| `TEXT` | text | +| `IMAGE` | image | \ No newline at end of file diff --git a/docs/components/openresponsesreasoningformat.md b/docs/components/openresponsesreasoningformat.md index 1d3f96a..dbf0b1a 100644 --- a/docs/components/openresponsesreasoningformat.md +++ b/docs/components/openresponsesreasoningformat.md @@ -3,10 +3,11 @@ ## Values -| Name | Value | -| --------------------- | --------------------- | -| `UNKNOWN` | unknown | -| `OPENAI_RESPONSES_V1` | openai-responses-v1 | -| `XAI_RESPONSES_V1` | xai-responses-v1 | -| `ANTHROPIC_CLAUDE_V1` | anthropic-claude-v1 | -| `GOOGLE_GEMINI_V1` | google-gemini-v1 | \ No newline at end of file +| Name | Value | +| --------------------------- | --------------------------- | +| `UNKNOWN` | unknown | +| `OPENAI_RESPONSES_V1` | openai-responses-v1 | +| `AZURE_OPENAI_RESPONSES_V1` | azure-openai-responses-v1 | +| `XAI_RESPONSES_V1` | xai-responses-v1 | +| `ANTHROPIC_CLAUDE_V1` | anthropic-claude-v1 | +| `GOOGLE_GEMINI_V1` | google-gemini-v1 | \ No newline at end of file diff --git a/docs/components/openresponsesrequest.md b/docs/components/openresponsesrequest.md index 247144a..7d7e224 100644 --- a/docs/components/openresponsesrequest.md +++ b/docs/components/openresponsesrequest.md @@ -21,6 +21,8 @@ Request schema for Responses endpoint | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | | | `top_k` | *Optional[float]* | :heavy_minus_sign: | N/A | | +| `image_config` | Dict[str, [components.OpenResponsesRequestImageConfig](../components/openresponsesrequestimageconfig.md)] | :heavy_minus_sign: | Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. | {
"aspect_ratio": "16:9"
} | +| `modalities` | List[[components.ResponsesOutputModality](../components/responsesoutputmodality.md)] | :heavy_minus_sign: | Output modalities for the response. Supported values are "text" and "image". | [
"text",
"image"
] | | `prompt_cache_key` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | | | `previous_response_id` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | | | `prompt` | [OptionalNullable[components.OpenAIResponsesPrompt]](../components/openairesponsesprompt.md) | :heavy_minus_sign: | N/A | | diff --git a/docs/components/openresponsesrequestimageconfig.md b/docs/components/openresponsesrequestimageconfig.md new file mode 100644 index 0000000..0bfb1b7 --- /dev/null +++ b/docs/components/openresponsesrequestimageconfig.md @@ -0,0 +1,17 @@ +# OpenResponsesRequestImageConfig + + +## Supported Types + +### `str` + +```python +value: str = /* values here */ +``` + +### `float` + +```python +value: float = /* values here */ +``` + diff --git a/docs/components/openresponsesrequestpluginautorouter.md b/docs/components/openresponsesrequestpluginautorouter.md new file mode 100644 index 0000000..21de153 --- /dev/null +++ b/docs/components/openresponsesrequestpluginautorouter.md @@ -0,0 +1,10 @@ +# OpenResponsesRequestPluginAutoRouter + + +## Fields + +| Field | Type | Required | Description | Example | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `id` | [components.IDAutoRouter](../components/idautorouter.md) | :heavy_check_mark: | N/A | | +| `enabled` | *Optional[bool]* | :heavy_minus_sign: | Set to false to disable the auto-router plugin for this request. Defaults to true. | | +| `allowed_models` | List[*str*] | :heavy_minus_sign: | List of model patterns to filter which models the auto-router can route between. Supports wildcards (e.g., "anthropic/*" matches all Anthropic models). When not specified, uses the default supported models list. | [
"anthropic/*",
"openai/gpt-4o",
"google/*"
] | \ No newline at end of file diff --git a/docs/components/openresponsesrequestpluginunion.md b/docs/components/openresponsesrequestpluginunion.md index ead57c3..e164b2e 100644 --- a/docs/components/openresponsesrequestpluginunion.md +++ b/docs/components/openresponsesrequestpluginunion.md @@ -3,6 +3,12 @@ ## Supported Types +### `components.OpenResponsesRequestPluginAutoRouter` + +```python +value: components.OpenResponsesRequestPluginAutoRouter = /* values here */ +``` + ### `components.OpenResponsesRequestPluginModeration` ```python diff --git a/docs/components/openresponsesrequestprovider.md b/docs/components/openresponsesrequestprovider.md index fd4d909..5fde781 100644 --- a/docs/components/openresponsesrequestprovider.md +++ b/docs/components/openresponsesrequestprovider.md @@ -5,20 +5,18 @@ When multiple model providers are available, optionally indicate your routing pr ## Fields -| Field | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| | -| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | | -| `data_collection` | [OptionalNullable[components.DataCollection]](../components/datacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | allow | -| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. When true, only endpoints that do not retain prompts will be used. | true | -| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only models that allow text distillation. When true, only models where the author has allowed distillation will be used. | true | -| `order` | List[[components.OpenResponsesRequestOrder](../components/openresponsesrequestorder.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | | -| `only` | List[[components.OpenResponsesRequestOnly](../components/openresponsesrequestonly.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | | -| `ignore` | List[[components.OpenResponsesRequestIgnore](../components/openresponsesrequestignore.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | | -| `quantizations` | List[[components.Quantization](../components/quantization.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | | -| `sort` | [OptionalNullable[components.OpenResponsesRequestSort]](../components/openresponsesrequestsort.md) | :heavy_minus_sign: | The sorting strategy to use for this request, if "order" is not specified. When set, no load balancing is performed. | price | -| `max_price` | [Optional[components.OpenResponsesRequestMaxPrice]](../components/openresponsesrequestmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | | -| `preferred_min_throughput` | *OptionalNullable[float]* | :heavy_minus_sign: | Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 100 | -| `preferred_max_latency` | *OptionalNullable[float]* | :heavy_minus_sign: | Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 5 | -| ~~`min_throughput`~~ | *OptionalNullable[float]* | :heavy_minus_sign: | : warning: ** DEPRECATED **: Use preferred_min_throughput instead..

**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput. | 100 | -| ~~`max_latency`~~ | *OptionalNullable[float]* | :heavy_minus_sign: | : warning: ** DEPRECATED **: Use preferred_max_latency instead..

**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency. | 5 | \ No newline at end of file +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| | +| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | | +| `data_collection` | [OptionalNullable[components.DataCollection]](../components/datacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | allow | +| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. When true, only endpoints that do not retain prompts will be used. | true | +| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only models that allow text distillation. When true, only models where the author has allowed distillation will be used. | true | +| `order` | List[[components.OpenResponsesRequestOrder](../components/openresponsesrequestorder.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | | +| `only` | List[[components.OpenResponsesRequestOnly](../components/openresponsesrequestonly.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | | +| `ignore` | List[[components.OpenResponsesRequestIgnore](../components/openresponsesrequestignore.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | | +| `quantizations` | List[[components.Quantization](../components/quantization.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | | +| `sort` | [OptionalNullable[components.OpenResponsesRequestSort]](../components/openresponsesrequestsort.md) | :heavy_minus_sign: | The sorting strategy to use for this request, if "order" is not specified. When set, no load balancing is performed. | price | +| `max_price` | [Optional[components.OpenResponsesRequestMaxPrice]](../components/openresponsesrequestmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | | +| `preferred_min_throughput` | [OptionalNullable[components.PreferredMinThroughput]](../components/preferredminthroughput.md) | :heavy_minus_sign: | Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 100 | +| `preferred_max_latency` | [OptionalNullable[components.PreferredMaxLatency]](../components/preferredmaxlatency.md) | :heavy_minus_sign: | Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 5 | \ No newline at end of file diff --git a/docs/components/percentilelatencycutoffs.md b/docs/components/percentilelatencycutoffs.md new file mode 100644 index 0000000..6a2c04f --- /dev/null +++ b/docs/components/percentilelatencycutoffs.md @@ -0,0 +1,13 @@ +# PercentileLatencyCutoffs + +Percentile-based latency cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + + +## Fields + +| Field | Type | Required | Description | +| ----------------------------- | ----------------------------- | ----------------------------- | ----------------------------- | +| `p50` | *OptionalNullable[float]* | :heavy_minus_sign: | Maximum p50 latency (seconds) | +| `p75` | *OptionalNullable[float]* | :heavy_minus_sign: | Maximum p75 latency (seconds) | +| `p90` | *OptionalNullable[float]* | :heavy_minus_sign: | Maximum p90 latency (seconds) | +| `p99` | *OptionalNullable[float]* | :heavy_minus_sign: | Maximum p99 latency (seconds) | \ No newline at end of file diff --git a/docs/components/percentilestats.md b/docs/components/percentilestats.md new file mode 100644 index 0000000..6f47431 --- /dev/null +++ b/docs/components/percentilestats.md @@ -0,0 +1,13 @@ +# PercentileStats + +Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests. + + +## Fields + +| Field | Type | Required | Description | Example | +| ------------------------ | ------------------------ | ------------------------ | ------------------------ | ------------------------ | +| `p50` | *float* | :heavy_check_mark: | Median (50th percentile) | 25.5 | +| `p75` | *float* | :heavy_check_mark: | 75th percentile | 35.2 | +| `p90` | *float* | :heavy_check_mark: | 90th percentile | 48.7 | +| `p99` | *float* | :heavy_check_mark: | 99th percentile | 85.3 | \ No newline at end of file diff --git a/docs/components/percentilethroughputcutoffs.md b/docs/components/percentilethroughputcutoffs.md new file mode 100644 index 0000000..dff209d --- /dev/null +++ b/docs/components/percentilethroughputcutoffs.md @@ -0,0 +1,13 @@ +# PercentileThroughputCutoffs + +Percentile-based throughput cutoffs. All specified cutoffs must be met for an endpoint to be preferred. + + +## Fields + +| Field | Type | Required | Description | +| ----------------------------------- | ----------------------------------- | ----------------------------------- | ----------------------------------- | +| `p50` | *OptionalNullable[float]* | :heavy_minus_sign: | Minimum p50 throughput (tokens/sec) | +| `p75` | *OptionalNullable[float]* | :heavy_minus_sign: | Minimum p75 throughput (tokens/sec) | +| `p90` | *OptionalNullable[float]* | :heavy_minus_sign: | Minimum p90 throughput (tokens/sec) | +| `p99` | *OptionalNullable[float]* | :heavy_minus_sign: | Minimum p99 throughput (tokens/sec) | \ No newline at end of file diff --git a/docs/components/preferredmaxlatency.md b/docs/components/preferredmaxlatency.md new file mode 100644 index 0000000..ce96e64 --- /dev/null +++ b/docs/components/preferredmaxlatency.md @@ -0,0 +1,25 @@ +# PreferredMaxLatency + +Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. + + +## Supported Types + +### `float` + +```python +value: float = /* values here */ +``` + +### `components.PercentileLatencyCutoffs` + +```python +value: components.PercentileLatencyCutoffs = /* values here */ +``` + +### `Any` + +```python +value: Any = /* values here */ +``` + diff --git a/docs/components/preferredminthroughput.md b/docs/components/preferredminthroughput.md new file mode 100644 index 0000000..fd49bf8 --- /dev/null +++ b/docs/components/preferredminthroughput.md @@ -0,0 +1,25 @@ +# PreferredMinThroughput + +Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. + + +## Supported Types + +### `float` + +```python +value: float = /* values here */ +``` + +### `components.PercentileThroughputCutoffs` + +```python +value: components.PercentileThroughputCutoffs = /* values here */ +``` + +### `Any` + +```python +value: Any = /* values here */ +``` + diff --git a/docs/components/prompttokensdetails.md b/docs/components/prompttokensdetails.md index a7248a0..844a92d 100644 --- a/docs/components/prompttokensdetails.md +++ b/docs/components/prompttokensdetails.md @@ -3,8 +3,9 @@ ## Fields -| Field | Type | Required | Description | -| ------------------ | ------------------ | ------------------ | ------------------ | -| `cached_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | -| `audio_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | -| `video_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------- | -------------------- | -------------------- | -------------------- | +| `cached_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | +| `cache_write_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | +| `audio_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | +| `video_tokens` | *Optional[float]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/components/providername.md b/docs/components/providername.md index 3f2d1f7..5b56801 100644 --- a/docs/components/providername.md +++ b/docs/components/providername.md @@ -31,12 +31,12 @@ | `FIREWORKS` | Fireworks | | `FRIENDLI` | Friendli | | `GMI_CLOUD` | GMICloud | -| `GO_POMELO` | GoPomelo | | `GOOGLE` | Google | | `GOOGLE_AI_STUDIO` | Google AI Studio | | `GROQ` | Groq | | `HYPERBOLIC` | Hyperbolic | | `INCEPTION` | Inception | +| `INCEPTRON` | Inceptron | | `INFERENCE_NET` | InferenceNet | | `INFERMATIC` | Infermatic | | `INFLECTION` | Inflection | @@ -61,13 +61,14 @@ | `PHALA` | Phala | | `RELACE` | Relace | | `SAMBA_NOVA` | SambaNova | +| `SEED` | Seed | | `SILICON_FLOW` | SiliconFlow | | `SOURCEFUL` | Sourceful | | `STEALTH` | Stealth | | `STREAM_LAKE` | StreamLake | | `SWITCHPOINT` | Switchpoint | -| `TARGON` | Targon | | `TOGETHER` | Together | +| `UPSTAGE` | Upstage | | `VENICE` | Venice | | `WAND_B` | WandB | | `XIAOMI` | Xiaomi | diff --git a/docs/components/providerpreferences.md b/docs/components/providerpreferences.md index e823310..bb33997 100644 --- a/docs/components/providerpreferences.md +++ b/docs/components/providerpreferences.md @@ -5,20 +5,18 @@ Provider routing preferences for the request. ## Fields -| Field | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| | -| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | | -| `data_collection` | [OptionalNullable[components.DataCollection]](../components/datacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | allow | -| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. When true, only endpoints that do not retain prompts will be used. | true | -| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only models that allow text distillation. When true, only models where the author has allowed distillation will be used. | true | -| `order` | List[[components.ProviderPreferencesOrder](../components/providerpreferencesorder.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | | -| `only` | List[[components.ProviderPreferencesOnly](../components/providerpreferencesonly.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | | -| `ignore` | List[[components.ProviderPreferencesIgnore](../components/providerpreferencesignore.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | | -| `quantizations` | List[[components.Quantization](../components/quantization.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | | -| `sort` | [OptionalNullable[components.ProviderPreferencesSortUnion]](../components/providerpreferencessortunion.md) | :heavy_minus_sign: | N/A | | -| `max_price` | [Optional[components.ProviderPreferencesMaxPrice]](../components/providerpreferencesmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | | -| `preferred_min_throughput` | *OptionalNullable[float]* | :heavy_minus_sign: | Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 100 | -| `preferred_max_latency` | *OptionalNullable[float]* | :heavy_minus_sign: | Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 5 | -| ~~`min_throughput`~~ | *OptionalNullable[float]* | :heavy_minus_sign: | : warning: ** DEPRECATED **: Use preferred_min_throughput instead..

**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput. | 100 | -| ~~`max_latency`~~ | *OptionalNullable[float]* | :heavy_minus_sign: | : warning: ** DEPRECATED **: Use preferred_max_latency instead..

**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency. | 5 | \ No newline at end of file +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `allow_fallbacks` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to allow backup providers to serve requests
- true: (default) when the primary provider (or your custom providers in "order") is unavailable, use the next best provider.
- false: use only the primary/custom provider, and return the upstream error if it's unavailable.
| | +| `require_parameters` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to filter providers to only those that support the parameters you've provided. If this setting is omitted or set to false, then providers will receive only the parameters they support, and ignore the rest. | | +| `data_collection` | [OptionalNullable[components.DataCollection]](../components/datacollection.md) | :heavy_minus_sign: | Data collection setting. If no available model provider meets the requirement, your request will return an error.
- allow: (default) allow providers which store user data non-transiently and may train on it

- deny: use only providers which do not collect user data. | allow | +| `zdr` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. When true, only endpoints that do not retain prompts will be used. | true | +| `enforce_distillable_text` | *OptionalNullable[bool]* | :heavy_minus_sign: | Whether to restrict routing to only models that allow text distillation. When true, only models where the author has allowed distillation will be used. | true | +| `order` | List[[components.ProviderPreferencesOrder](../components/providerpreferencesorder.md)] | :heavy_minus_sign: | An ordered list of provider slugs. The router will attempt to use the first provider in the subset of this list that supports your requested model, and fall back to the next if it is unavailable. If no providers are available, the request will fail with an error message. | | +| `only` | List[[components.ProviderPreferencesOnly](../components/providerpreferencesonly.md)] | :heavy_minus_sign: | List of provider slugs to allow. If provided, this list is merged with your account-wide allowed provider settings for this request. | | +| `ignore` | List[[components.ProviderPreferencesIgnore](../components/providerpreferencesignore.md)] | :heavy_minus_sign: | List of provider slugs to ignore. If provided, this list is merged with your account-wide ignored provider settings for this request. | | +| `quantizations` | List[[components.Quantization](../components/quantization.md)] | :heavy_minus_sign: | A list of quantization levels to filter the provider by. | | +| `sort` | [OptionalNullable[components.ProviderPreferencesSortUnion]](../components/providerpreferencessortunion.md) | :heavy_minus_sign: | N/A | | +| `max_price` | [Optional[components.ProviderPreferencesMaxPrice]](../components/providerpreferencesmaxprice.md) | :heavy_minus_sign: | The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion. | | +| `preferred_min_throughput` | [OptionalNullable[components.PreferredMinThroughput]](../components/preferredminthroughput.md) | :heavy_minus_sign: | Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 100 | +| `preferred_max_latency` | [OptionalNullable[components.PreferredMaxLatency]](../components/preferredmaxlatency.md) | :heavy_minus_sign: | Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold. | 5 | \ No newline at end of file diff --git a/docs/components/publicendpoint.md b/docs/components/publicendpoint.md index 3f39e39..dac1698 100644 --- a/docs/components/publicendpoint.md +++ b/docs/components/publicendpoint.md @@ -5,18 +5,20 @@ Information about a specific model endpoint ## Fields -| Field | Type | Required | Description | Example | -| ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -| `name` | *str* | :heavy_check_mark: | N/A | | -| `model_name` | *str* | :heavy_check_mark: | N/A | | -| `context_length` | *float* | :heavy_check_mark: | N/A | | -| `pricing` | [components.Pricing](../components/pricing.md) | :heavy_check_mark: | N/A | | -| `provider_name` | [components.ProviderName](../components/providername.md) | :heavy_check_mark: | N/A | OpenAI | -| `tag` | *str* | :heavy_check_mark: | N/A | | -| `quantization` | [Nullable[components.PublicEndpointQuantization]](../components/publicendpointquantization.md) | :heavy_check_mark: | N/A | fp16 | -| `max_completion_tokens` | *Nullable[float]* | :heavy_check_mark: | N/A | | -| `max_prompt_tokens` | *Nullable[float]* | :heavy_check_mark: | N/A | | -| `supported_parameters` | List[[components.Parameter](../components/parameter.md)] | :heavy_check_mark: | N/A | | -| `status` | [Optional[components.EndpointStatus]](../components/endpointstatus.md) | :heavy_minus_sign: | N/A | 0 | -| `uptime_last_30m` | *Nullable[float]* | :heavy_check_mark: | N/A | | -| `supports_implicit_caching` | *bool* | :heavy_check_mark: | N/A | | \ No newline at end of file +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `name` | *str* | :heavy_check_mark: | N/A | | +| `model_name` | *str* | :heavy_check_mark: | N/A | | +| `context_length` | *float* | :heavy_check_mark: | N/A | | +| `pricing` | [components.Pricing](../components/pricing.md) | :heavy_check_mark: | N/A | | +| `provider_name` | [components.ProviderName](../components/providername.md) | :heavy_check_mark: | N/A | OpenAI | +| `tag` | *str* | :heavy_check_mark: | N/A | | +| `quantization` | [Nullable[components.PublicEndpointQuantization]](../components/publicendpointquantization.md) | :heavy_check_mark: | N/A | fp16 | +| `max_completion_tokens` | *Nullable[float]* | :heavy_check_mark: | N/A | | +| `max_prompt_tokens` | *Nullable[float]* | :heavy_check_mark: | N/A | | +| `supported_parameters` | List[[components.Parameter](../components/parameter.md)] | :heavy_check_mark: | N/A | | +| `status` | [Optional[components.EndpointStatus]](../components/endpointstatus.md) | :heavy_minus_sign: | N/A | 0 | +| `uptime_last_30m` | *Nullable[float]* | :heavy_check_mark: | N/A | | +| `supports_implicit_caching` | *bool* | :heavy_check_mark: | N/A | | +| `latency_last_30m` | [Nullable[components.PercentileStats]](../components/percentilestats.md) | :heavy_check_mark: | Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests. | | +| `throughput_last_30m` | [Nullable[components.PercentileStats]](../components/percentilestats.md) | :heavy_check_mark: | N/A | | \ No newline at end of file diff --git a/docs/components/responsesoutputitemreasoning.md b/docs/components/responsesoutputitemreasoning.md index 986ba71..093a7cb 100644 --- a/docs/components/responsesoutputitemreasoning.md +++ b/docs/components/responsesoutputitemreasoning.md @@ -5,11 +5,13 @@ An output item containing reasoning ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | -| `type` | [components.ResponsesOutputItemReasoningType](../components/responsesoutputitemreasoningtype.md) | :heavy_check_mark: | N/A | -| `id` | *str* | :heavy_check_mark: | N/A | -| `content` | List[[components.ReasoningTextContent](../components/reasoningtextcontent.md)] | :heavy_minus_sign: | N/A | -| `summary` | List[[components.ReasoningSummaryText](../components/reasoningsummarytext.md)] | :heavy_check_mark: | N/A | -| `encrypted_content` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | -| `status` | [Optional[components.ResponsesOutputItemReasoningStatusUnion]](../components/responsesoutputitemreasoningstatusunion.md) | :heavy_minus_sign: | N/A | \ No newline at end of file +| Field | Type | Required | Description | Example | +| ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ | +| `type` | [components.ResponsesOutputItemReasoningType](../components/responsesoutputitemreasoningtype.md) | :heavy_check_mark: | N/A | | +| `id` | *str* | :heavy_check_mark: | N/A | | +| `content` | List[[components.ReasoningTextContent](../components/reasoningtextcontent.md)] | :heavy_minus_sign: | N/A | | +| `summary` | List[[components.ReasoningSummaryText](../components/reasoningsummarytext.md)] | :heavy_check_mark: | N/A | | +| `encrypted_content` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | | +| `status` | [Optional[components.ResponsesOutputItemReasoningStatusUnion]](../components/responsesoutputitemreasoningstatusunion.md) | :heavy_minus_sign: | N/A | | +| `signature` | *OptionalNullable[str]* | :heavy_minus_sign: | A signature for the reasoning content, used for verification | EvcBCkgIChABGAIqQKkSDbRuVEQUk9qN1odC098l9SEj... | +| `format_` | [OptionalNullable[components.ResponsesOutputItemReasoningFormat]](../components/responsesoutputitemreasoningformat.md) | :heavy_minus_sign: | The format of the reasoning content | anthropic-claude-v1 | \ No newline at end of file diff --git a/docs/components/responsesoutputitemreasoningformat.md b/docs/components/responsesoutputitemreasoningformat.md new file mode 100644 index 0000000..d3ea6ee --- /dev/null +++ b/docs/components/responsesoutputitemreasoningformat.md @@ -0,0 +1,15 @@ +# ResponsesOutputItemReasoningFormat + +The format of the reasoning content + + +## Values + +| Name | Value | +| --------------------------- | --------------------------- | +| `UNKNOWN` | unknown | +| `OPENAI_RESPONSES_V1` | openai-responses-v1 | +| `AZURE_OPENAI_RESPONSES_V1` | azure-openai-responses-v1 | +| `XAI_RESPONSES_V1` | xai-responses-v1 | +| `ANTHROPIC_CLAUDE_V1` | anthropic-claude-v1 | +| `GOOGLE_GEMINI_V1` | google-gemini-v1 | \ No newline at end of file diff --git a/docs/components/responsesoutputmodality.md b/docs/components/responsesoutputmodality.md new file mode 100644 index 0000000..f94f6c1 --- /dev/null +++ b/docs/components/responsesoutputmodality.md @@ -0,0 +1,9 @@ +# ResponsesOutputModality + + +## Values + +| Name | Value | +| ------- | ------- | +| `TEXT` | text | +| `IMAGE` | image | \ No newline at end of file diff --git a/docs/components/schema0enum.md b/docs/components/schema0enum.md index 92e5d2f..f1c6708 100644 --- a/docs/components/schema0enum.md +++ b/docs/components/schema0enum.md @@ -31,12 +31,12 @@ | `FIREWORKS` | Fireworks | | `FRIENDLI` | Friendli | | `GMI_CLOUD` | GMICloud | -| `GO_POMELO` | GoPomelo | | `GOOGLE` | Google | | `GOOGLE_AI_STUDIO` | Google AI Studio | | `GROQ` | Groq | | `HYPERBOLIC` | Hyperbolic | | `INCEPTION` | Inception | +| `INCEPTRON` | Inceptron | | `INFERENCE_NET` | InferenceNet | | `INFERMATIC` | Infermatic | | `INFLECTION` | Inflection | @@ -61,13 +61,14 @@ | `PHALA` | Phala | | `RELACE` | Relace | | `SAMBA_NOVA` | SambaNova | +| `SEED` | Seed | | `SILICON_FLOW` | SiliconFlow | | `SOURCEFUL` | Sourceful | | `STEALTH` | Stealth | | `STREAM_LAKE` | StreamLake | | `SWITCHPOINT` | Switchpoint | -| `TARGON` | Targon | | `TOGETHER` | Together | +| `UPSTAGE` | Upstage | | `VENICE` | Venice | | `WAND_B` | WandB | | `XIAOMI` | Xiaomi | diff --git a/docs/components/schema5.md b/docs/components/schema5.md index 471b50f..64eb2b3 100644 --- a/docs/components/schema5.md +++ b/docs/components/schema5.md @@ -3,10 +3,11 @@ ## Values -| Name | Value | -| --------------------- | --------------------- | -| `UNKNOWN` | unknown | -| `OPENAI_RESPONSES_V1` | openai-responses-v1 | -| `XAI_RESPONSES_V1` | xai-responses-v1 | -| `ANTHROPIC_CLAUDE_V1` | anthropic-claude-v1 | -| `GOOGLE_GEMINI_V1` | google-gemini-v1 | \ No newline at end of file +| Name | Value | +| --------------------------- | --------------------------- | +| `UNKNOWN` | unknown | +| `OPENAI_RESPONSES_V1` | openai-responses-v1 | +| `AZURE_OPENAI_RESPONSES_V1` | azure-openai-responses-v1 | +| `XAI_RESPONSES_V1` | xai-responses-v1 | +| `ANTHROPIC_CLAUDE_V1` | anthropic-claude-v1 | +| `GOOGLE_GEMINI_V1` | google-gemini-v1 | \ No newline at end of file diff --git a/docs/operations/listendpointsresponse.md b/docs/operations/listendpointsresponse.md index cde8b93..d4a7a58 100644 --- a/docs/operations/listendpointsresponse.md +++ b/docs/operations/listendpointsresponse.md @@ -5,6 +5,6 @@ Returns a list of endpoints ## Fields -| Field | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `data` | [components.ListEndpointsResponse](../components/listendpointsresponse.md) | :heavy_check_mark: | List of available endpoints for a model | {
"id": "openai/gpt-4",
"name": "GPT-4",
"created": 1692901234,
"description": "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy.",
"architecture": {
"tokenizer": "GPT",
"instruct_type": "chatml",
"modality": "text-\u003etext",
"input_modalities": [
"text"
],
"output_modalities": [
"text"
]
},
"endpoints": [
{
"name": "OpenAI: GPT-4",
"model_name": "GPT-4",
"context_length": 8192,
"pricing": {
"prompt": "0.00003",
"completion": "0.00006",
"request": "0",
"image": "0"
},
"provider_name": "OpenAI",
"tag": "openai",
"quantization": "fp16",
"max_completion_tokens": 4096,
"max_prompt_tokens": 8192,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"frequency_penalty",
"presence_penalty"
],
"status": "default",
"uptime_last_30m": 99.5,
"supports_implicit_caching": true
}
]
} | \ No newline at end of file +| Field | Type | Required | Description | Example | +| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `data` | [components.ListEndpointsResponse](../components/listendpointsresponse.md) | :heavy_check_mark: | List of available endpoints for a model | {
"id": "openai/gpt-4",
"name": "GPT-4",
"created": 1692901234,
"description": "GPT-4 is a large multimodal model that can solve difficult problems with greater accuracy.",
"architecture": {
"tokenizer": "GPT",
"instruct_type": "chatml",
"modality": "text-\u003etext",
"input_modalities": [
"text"
],
"output_modalities": [
"text"
]
},
"endpoints": [
{
"name": "OpenAI: GPT-4",
"model_name": "GPT-4",
"context_length": 8192,
"pricing": {
"prompt": "0.00003",
"completion": "0.00006",
"request": "0",
"image": "0"
},
"provider_name": "OpenAI",
"tag": "openai",
"quantization": "fp16",
"max_completion_tokens": 4096,
"max_prompt_tokens": 8192,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"frequency_penalty",
"presence_penalty"
],
"status": "default",
"uptime_last_30m": 99.5,
"supports_implicit_caching": true,
"latency_last_30m": {
"p50": 0.25,
"p75": 0.35,
"p90": 0.48,
"p99": 0.85
},
"throughput_last_30m": {
"p50": 45.2,
"p75": 38.5,
"p90": 28.3,
"p99": 15.1
}
}
]
} | \ No newline at end of file diff --git a/docs/sdks/chat/README.md b/docs/sdks/chat/README.md index 9a66640..6b781c3 100644 --- a/docs/sdks/chat/README.md +++ b/docs/sdks/chat/README.md @@ -63,6 +63,8 @@ with OpenRouter( | `tools` | List[[components.ToolDefinitionJSON](../../components/tooldefinitionjson.md)] | :heavy_minus_sign: | N/A | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | | `debug` | [Optional[components.Debug]](../../components/debug.md) | :heavy_minus_sign: | N/A | +| `image_config` | Dict[str, [components.ChatGenerationParamsImageConfig](../../components/chatgenerationparamsimageconfig.md)] | :heavy_minus_sign: | N/A | +| `modalities` | List[[components.Modality](../../components/modality.md)] | :heavy_minus_sign: | N/A | | `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | ### Response diff --git a/docs/sdks/responses/README.md b/docs/sdks/responses/README.md index 3db5046..102a9d0 100644 --- a/docs/sdks/responses/README.md +++ b/docs/sdks/responses/README.md @@ -52,6 +52,8 @@ with OpenRouter( | `temperature` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | | | `top_p` | *OptionalNullable[float]* | :heavy_minus_sign: | N/A | | | `top_k` | *Optional[float]* | :heavy_minus_sign: | N/A | | +| `image_config` | Dict[str, [components.OpenResponsesRequestImageConfig](../../components/openresponsesrequestimageconfig.md)] | :heavy_minus_sign: | Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. | {
"aspect_ratio": "16:9"
} | +| `modalities` | List[[components.ResponsesOutputModality](../../components/responsesoutputmodality.md)] | :heavy_minus_sign: | Output modalities for the response. Supported values are "text" and "image". | [
"text",
"image"
] | | `prompt_cache_key` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | | | `previous_response_id` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | | | `prompt` | [OptionalNullable[components.OpenAIResponsesPrompt]](../../components/openairesponsesprompt.md) | :heavy_minus_sign: | N/A | | diff --git a/src/openrouter/chat.py b/src/openrouter/chat.py index 7f034a2..d75dbd2 100644 --- a/src/openrouter/chat.py +++ b/src/openrouter/chat.py @@ -76,6 +76,13 @@ def send( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -112,6 +119,8 @@ def send( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -179,6 +188,13 @@ def send( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -215,6 +231,8 @@ def send( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -281,6 +299,13 @@ def send( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -317,6 +342,8 @@ def send( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -374,6 +401,8 @@ def send( ), top_p=top_p, debug=utils.get_pydantic_model(debug, Optional[components.Debug]), + image_config=image_config, + modalities=modalities, ) req = self._build_request( @@ -523,6 +552,13 @@ async def send_async( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -559,6 +595,8 @@ async def send_async( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -626,6 +664,13 @@ async def send_async( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -662,6 +707,8 @@ async def send_async( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -728,6 +775,13 @@ async def send_async( ] = None, top_p: OptionalNullable[float] = UNSET, debug: Optional[Union[components.Debug, components.DebugTypedDict]] = None, + image_config: Optional[ + Union[ + Dict[str, components.ChatGenerationParamsImageConfig], + Dict[str, components.ChatGenerationParamsImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.Modality]] = None, retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, @@ -764,6 +818,8 @@ async def send_async( :param tools: :param top_p: :param debug: + :param image_config: + :param modalities: :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds @@ -821,6 +877,8 @@ async def send_async( ), top_p=top_p, debug=utils.get_pydantic_model(debug, Optional[components.Debug]), + image_config=image_config, + modalities=modalities, ) req = self._build_request_async( diff --git a/src/openrouter/components/__init__.py b/src/openrouter/components/__init__.py index 1106fa8..ec10262 100644 --- a/src/openrouter/components/__init__.py +++ b/src/openrouter/components/__init__.py @@ -38,8 +38,12 @@ from .chatgenerationparams import ( ChatGenerationParams, ChatGenerationParamsDataCollection, + ChatGenerationParamsImageConfig, + ChatGenerationParamsImageConfigTypedDict, ChatGenerationParamsMaxPrice, ChatGenerationParamsMaxPriceTypedDict, + ChatGenerationParamsPluginAutoRouter, + ChatGenerationParamsPluginAutoRouterTypedDict, ChatGenerationParamsPluginFileParser, ChatGenerationParamsPluginFileParserTypedDict, ChatGenerationParamsPluginModeration, @@ -50,6 +54,14 @@ ChatGenerationParamsPluginUnionTypedDict, ChatGenerationParamsPluginWeb, ChatGenerationParamsPluginWebTypedDict, + ChatGenerationParamsPreferredMaxLatency, + ChatGenerationParamsPreferredMaxLatencyTypedDict, + ChatGenerationParamsPreferredMaxLatencyUnion, + ChatGenerationParamsPreferredMaxLatencyUnionTypedDict, + ChatGenerationParamsPreferredMinThroughput, + ChatGenerationParamsPreferredMinThroughputTypedDict, + ChatGenerationParamsPreferredMinThroughputUnion, + ChatGenerationParamsPreferredMinThroughputUnionTypedDict, ChatGenerationParamsProvider, ChatGenerationParamsProviderTypedDict, ChatGenerationParamsResponseFormatJSONObject, @@ -67,6 +79,7 @@ DebugTypedDict, Effort, Engine, + Modality, Pdf, PdfEngine, PdfTypedDict, @@ -454,6 +467,7 @@ OpenResponsesReasoningSummaryTextDoneEventTypedDict, ) from .openresponsesrequest import ( + IDAutoRouter, IDFileParser, IDModeration, IDResponseHealing, @@ -461,12 +475,16 @@ OpenResponsesRequest, OpenResponsesRequestIgnore, OpenResponsesRequestIgnoreTypedDict, + OpenResponsesRequestImageConfig, + OpenResponsesRequestImageConfigTypedDict, OpenResponsesRequestMaxPrice, OpenResponsesRequestMaxPriceTypedDict, OpenResponsesRequestOnly, OpenResponsesRequestOnlyTypedDict, OpenResponsesRequestOrder, OpenResponsesRequestOrderTypedDict, + OpenResponsesRequestPluginAutoRouter, + OpenResponsesRequestPluginAutoRouterTypedDict, OpenResponsesRequestPluginFileParser, OpenResponsesRequestPluginFileParserTypedDict, OpenResponsesRequestPluginModeration, @@ -622,7 +640,21 @@ ) from .pdfparserengine import PDFParserEngine from .pdfparseroptions import PDFParserOptions, PDFParserOptionsTypedDict + from .percentilelatencycutoffs import ( + PercentileLatencyCutoffs, + PercentileLatencyCutoffsTypedDict, + ) + from .percentilestats import PercentileStats, PercentileStatsTypedDict + from .percentilethroughputcutoffs import ( + PercentileThroughputCutoffs, + PercentileThroughputCutoffsTypedDict, + ) from .perrequestlimits import PerRequestLimits, PerRequestLimitsTypedDict + from .preferredmaxlatency import PreferredMaxLatency, PreferredMaxLatencyTypedDict + from .preferredminthroughput import ( + PreferredMinThroughput, + PreferredMinThroughputTypedDict, + ) from .providername import ProviderName from .provideroverloadedresponseerrordata import ( ProviderOverloadedResponseErrorData, @@ -765,6 +797,7 @@ ) from .responsesoutputitemreasoning import ( ResponsesOutputItemReasoning, + ResponsesOutputItemReasoningFormat, ResponsesOutputItemReasoningStatusCompleted, ResponsesOutputItemReasoningStatusInProgress, ResponsesOutputItemReasoningStatusIncomplete, @@ -786,6 +819,7 @@ ResponsesOutputMessageType, ResponsesOutputMessageTypedDict, ) + from .responsesoutputmodality import ResponsesOutputModality from .responsessearchcontextsize import ResponsesSearchContextSize from .responseswebsearchcalloutput import ( ResponsesWebSearchCallOutput, @@ -873,8 +907,12 @@ "ChatErrorErrorTypedDict", "ChatGenerationParams", "ChatGenerationParamsDataCollection", + "ChatGenerationParamsImageConfig", + "ChatGenerationParamsImageConfigTypedDict", "ChatGenerationParamsMaxPrice", "ChatGenerationParamsMaxPriceTypedDict", + "ChatGenerationParamsPluginAutoRouter", + "ChatGenerationParamsPluginAutoRouterTypedDict", "ChatGenerationParamsPluginFileParser", "ChatGenerationParamsPluginFileParserTypedDict", "ChatGenerationParamsPluginModeration", @@ -885,6 +923,14 @@ "ChatGenerationParamsPluginUnionTypedDict", "ChatGenerationParamsPluginWeb", "ChatGenerationParamsPluginWebTypedDict", + "ChatGenerationParamsPreferredMaxLatency", + "ChatGenerationParamsPreferredMaxLatencyTypedDict", + "ChatGenerationParamsPreferredMaxLatencyUnion", + "ChatGenerationParamsPreferredMaxLatencyUnionTypedDict", + "ChatGenerationParamsPreferredMinThroughput", + "ChatGenerationParamsPreferredMinThroughputTypedDict", + "ChatGenerationParamsPreferredMinThroughputUnion", + "ChatGenerationParamsPreferredMinThroughputUnionTypedDict", "ChatGenerationParamsProvider", "ChatGenerationParamsProviderTypedDict", "ChatGenerationParamsResponseFormatJSONObject", @@ -996,6 +1042,7 @@ "FilePathTypedDict", "ForbiddenResponseErrorData", "ForbiddenResponseErrorDataTypedDict", + "IDAutoRouter", "IDFileParser", "IDModeration", "IDResponseHealing", @@ -1019,6 +1066,7 @@ "MessageDeveloper", "MessageDeveloperTypedDict", "MessageTypedDict", + "Modality", "Model", "ModelArchitecture", "ModelArchitectureInstructType", @@ -1185,12 +1233,16 @@ "OpenResponsesRequest", "OpenResponsesRequestIgnore", "OpenResponsesRequestIgnoreTypedDict", + "OpenResponsesRequestImageConfig", + "OpenResponsesRequestImageConfigTypedDict", "OpenResponsesRequestMaxPrice", "OpenResponsesRequestMaxPriceTypedDict", "OpenResponsesRequestOnly", "OpenResponsesRequestOnlyTypedDict", "OpenResponsesRequestOrder", "OpenResponsesRequestOrderTypedDict", + "OpenResponsesRequestPluginAutoRouter", + "OpenResponsesRequestPluginAutoRouterTypedDict", "OpenResponsesRequestPluginFileParser", "OpenResponsesRequestPluginFileParserTypedDict", "OpenResponsesRequestPluginModeration", @@ -1305,6 +1357,16 @@ "PdfTypedDict", "PerRequestLimits", "PerRequestLimitsTypedDict", + "PercentileLatencyCutoffs", + "PercentileLatencyCutoffsTypedDict", + "PercentileStats", + "PercentileStatsTypedDict", + "PercentileThroughputCutoffs", + "PercentileThroughputCutoffsTypedDict", + "PreferredMaxLatency", + "PreferredMaxLatencyTypedDict", + "PreferredMinThroughput", + "PreferredMinThroughputTypedDict", "Pricing", "PricingTypedDict", "Prompt", @@ -1412,6 +1474,7 @@ "ResponsesOutputItemFunctionCallType", "ResponsesOutputItemFunctionCallTypedDict", "ResponsesOutputItemReasoning", + "ResponsesOutputItemReasoningFormat", "ResponsesOutputItemReasoningStatusCompleted", "ResponsesOutputItemReasoningStatusInProgress", "ResponsesOutputItemReasoningStatusIncomplete", @@ -1431,6 +1494,7 @@ "ResponsesOutputMessageStatusUnionTypedDict", "ResponsesOutputMessageType", "ResponsesOutputMessageTypedDict", + "ResponsesOutputModality", "ResponsesSearchContextSize", "ResponsesWebSearchCallOutput", "ResponsesWebSearchCallOutputType", @@ -1554,8 +1618,12 @@ "CodeTypedDict": ".chaterror", "ChatGenerationParams": ".chatgenerationparams", "ChatGenerationParamsDataCollection": ".chatgenerationparams", + "ChatGenerationParamsImageConfig": ".chatgenerationparams", + "ChatGenerationParamsImageConfigTypedDict": ".chatgenerationparams", "ChatGenerationParamsMaxPrice": ".chatgenerationparams", "ChatGenerationParamsMaxPriceTypedDict": ".chatgenerationparams", + "ChatGenerationParamsPluginAutoRouter": ".chatgenerationparams", + "ChatGenerationParamsPluginAutoRouterTypedDict": ".chatgenerationparams", "ChatGenerationParamsPluginFileParser": ".chatgenerationparams", "ChatGenerationParamsPluginFileParserTypedDict": ".chatgenerationparams", "ChatGenerationParamsPluginModeration": ".chatgenerationparams", @@ -1566,6 +1634,14 @@ "ChatGenerationParamsPluginUnionTypedDict": ".chatgenerationparams", "ChatGenerationParamsPluginWeb": ".chatgenerationparams", "ChatGenerationParamsPluginWebTypedDict": ".chatgenerationparams", + "ChatGenerationParamsPreferredMaxLatency": ".chatgenerationparams", + "ChatGenerationParamsPreferredMaxLatencyTypedDict": ".chatgenerationparams", + "ChatGenerationParamsPreferredMaxLatencyUnion": ".chatgenerationparams", + "ChatGenerationParamsPreferredMaxLatencyUnionTypedDict": ".chatgenerationparams", + "ChatGenerationParamsPreferredMinThroughput": ".chatgenerationparams", + "ChatGenerationParamsPreferredMinThroughputTypedDict": ".chatgenerationparams", + "ChatGenerationParamsPreferredMinThroughputUnion": ".chatgenerationparams", + "ChatGenerationParamsPreferredMinThroughputUnionTypedDict": ".chatgenerationparams", "ChatGenerationParamsProvider": ".chatgenerationparams", "ChatGenerationParamsProviderTypedDict": ".chatgenerationparams", "ChatGenerationParamsResponseFormatJSONObject": ".chatgenerationparams", @@ -1583,6 +1659,7 @@ "DebugTypedDict": ".chatgenerationparams", "Effort": ".chatgenerationparams", "Engine": ".chatgenerationparams", + "Modality": ".chatgenerationparams", "Pdf": ".chatgenerationparams", "PdfEngine": ".chatgenerationparams", "PdfTypedDict": ".chatgenerationparams", @@ -1881,6 +1958,7 @@ "OpenResponsesReasoningSummaryTextDoneEvent": ".openresponsesreasoningsummarytextdoneevent", "OpenResponsesReasoningSummaryTextDoneEventType": ".openresponsesreasoningsummarytextdoneevent", "OpenResponsesReasoningSummaryTextDoneEventTypedDict": ".openresponsesreasoningsummarytextdoneevent", + "IDAutoRouter": ".openresponsesrequest", "IDFileParser": ".openresponsesrequest", "IDModeration": ".openresponsesrequest", "IDResponseHealing": ".openresponsesrequest", @@ -1888,12 +1966,16 @@ "OpenResponsesRequest": ".openresponsesrequest", "OpenResponsesRequestIgnore": ".openresponsesrequest", "OpenResponsesRequestIgnoreTypedDict": ".openresponsesrequest", + "OpenResponsesRequestImageConfig": ".openresponsesrequest", + "OpenResponsesRequestImageConfigTypedDict": ".openresponsesrequest", "OpenResponsesRequestMaxPrice": ".openresponsesrequest", "OpenResponsesRequestMaxPriceTypedDict": ".openresponsesrequest", "OpenResponsesRequestOnly": ".openresponsesrequest", "OpenResponsesRequestOnlyTypedDict": ".openresponsesrequest", "OpenResponsesRequestOrder": ".openresponsesrequest", "OpenResponsesRequestOrderTypedDict": ".openresponsesrequest", + "OpenResponsesRequestPluginAutoRouter": ".openresponsesrequest", + "OpenResponsesRequestPluginAutoRouterTypedDict": ".openresponsesrequest", "OpenResponsesRequestPluginFileParser": ".openresponsesrequest", "OpenResponsesRequestPluginFileParserTypedDict": ".openresponsesrequest", "OpenResponsesRequestPluginModeration": ".openresponsesrequest", @@ -2025,8 +2107,18 @@ "PDFParserEngine": ".pdfparserengine", "PDFParserOptions": ".pdfparseroptions", "PDFParserOptionsTypedDict": ".pdfparseroptions", + "PercentileLatencyCutoffs": ".percentilelatencycutoffs", + "PercentileLatencyCutoffsTypedDict": ".percentilelatencycutoffs", + "PercentileStats": ".percentilestats", + "PercentileStatsTypedDict": ".percentilestats", + "PercentileThroughputCutoffs": ".percentilethroughputcutoffs", + "PercentileThroughputCutoffsTypedDict": ".percentilethroughputcutoffs", "PerRequestLimits": ".perrequestlimits", "PerRequestLimitsTypedDict": ".perrequestlimits", + "PreferredMaxLatency": ".preferredmaxlatency", + "PreferredMaxLatencyTypedDict": ".preferredmaxlatency", + "PreferredMinThroughput": ".preferredminthroughput", + "PreferredMinThroughputTypedDict": ".preferredminthroughput", "ProviderName": ".providername", "ProviderOverloadedResponseErrorData": ".provideroverloadedresponseerrordata", "ProviderOverloadedResponseErrorDataTypedDict": ".provideroverloadedresponseerrordata", @@ -2127,6 +2219,7 @@ "ResponsesOutputItemFunctionCallType": ".responsesoutputitemfunctioncall", "ResponsesOutputItemFunctionCallTypedDict": ".responsesoutputitemfunctioncall", "ResponsesOutputItemReasoning": ".responsesoutputitemreasoning", + "ResponsesOutputItemReasoningFormat": ".responsesoutputitemreasoning", "ResponsesOutputItemReasoningStatusCompleted": ".responsesoutputitemreasoning", "ResponsesOutputItemReasoningStatusInProgress": ".responsesoutputitemreasoning", "ResponsesOutputItemReasoningStatusIncomplete": ".responsesoutputitemreasoning", @@ -2145,6 +2238,7 @@ "ResponsesOutputMessageStatusUnionTypedDict": ".responsesoutputmessage", "ResponsesOutputMessageType": ".responsesoutputmessage", "ResponsesOutputMessageTypedDict": ".responsesoutputmessage", + "ResponsesOutputModality": ".responsesoutputmodality", "ResponsesSearchContextSize": ".responsessearchcontextsize", "ResponsesWebSearchCallOutput": ".responseswebsearchcalloutput", "ResponsesWebSearchCallOutputType": ".responseswebsearchcalloutput", diff --git a/src/openrouter/components/_schema0.py b/src/openrouter/components/_schema0.py index 0cf0caf..4da27c8 100644 --- a/src/openrouter/components/_schema0.py +++ b/src/openrouter/components/_schema0.py @@ -36,12 +36,12 @@ "Fireworks", "Friendli", "GMICloud", - "GoPomelo", "Google", "Google AI Studio", "Groq", "Hyperbolic", "Inception", + "Inceptron", "InferenceNet", "Infermatic", "Inflection", @@ -66,13 +66,14 @@ "Phala", "Relace", "SambaNova", + "Seed", "SiliconFlow", "Sourceful", "Stealth", "StreamLake", "Switchpoint", - "Targon", "Together", + "Upstage", "Venice", "WandB", "Xiaomi", diff --git a/src/openrouter/components/_schema3.py b/src/openrouter/components/_schema3.py index c9b2e40..84a7df8 100644 --- a/src/openrouter/components/_schema3.py +++ b/src/openrouter/components/_schema3.py @@ -21,6 +21,7 @@ Literal[ "unknown", "openai-responses-v1", + "azure-openai-responses-v1", "xai-responses-v1", "anthropic-claude-v1", "google-gemini-v1", diff --git a/src/openrouter/components/chatgenerationparams.py b/src/openrouter/components/chatgenerationparams.py index 5cae41e..226d298 100644 --- a/src/openrouter/components/chatgenerationparams.py +++ b/src/openrouter/components/chatgenerationparams.py @@ -80,6 +80,124 @@ class ChatGenerationParamsMaxPrice(BaseModel): request: Optional[Any] = None +class ChatGenerationParamsPreferredMinThroughputTypedDict(TypedDict): + p50: NotRequired[Nullable[float]] + p75: NotRequired[Nullable[float]] + p90: NotRequired[Nullable[float]] + p99: NotRequired[Nullable[float]] + + +class ChatGenerationParamsPreferredMinThroughput(BaseModel): + p50: OptionalNullable[float] = UNSET + + p75: OptionalNullable[float] = UNSET + + p90: OptionalNullable[float] = UNSET + + p99: OptionalNullable[float] = UNSET + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["p50", "p75", "p90", "p99"] + nullable_fields = ["p50", "p75", "p90", "p99"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in type(self).model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +ChatGenerationParamsPreferredMinThroughputUnionTypedDict = TypeAliasType( + "ChatGenerationParamsPreferredMinThroughputUnionTypedDict", + Union[ChatGenerationParamsPreferredMinThroughputTypedDict, float], +) + + +ChatGenerationParamsPreferredMinThroughputUnion = TypeAliasType( + "ChatGenerationParamsPreferredMinThroughputUnion", + Union[ChatGenerationParamsPreferredMinThroughput, float], +) + + +class ChatGenerationParamsPreferredMaxLatencyTypedDict(TypedDict): + p50: NotRequired[Nullable[float]] + p75: NotRequired[Nullable[float]] + p90: NotRequired[Nullable[float]] + p99: NotRequired[Nullable[float]] + + +class ChatGenerationParamsPreferredMaxLatency(BaseModel): + p50: OptionalNullable[float] = UNSET + + p75: OptionalNullable[float] = UNSET + + p90: OptionalNullable[float] = UNSET + + p99: OptionalNullable[float] = UNSET + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["p50", "p75", "p90", "p99"] + nullable_fields = ["p50", "p75", "p90", "p99"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in type(self).model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m + + +ChatGenerationParamsPreferredMaxLatencyUnionTypedDict = TypeAliasType( + "ChatGenerationParamsPreferredMaxLatencyUnionTypedDict", + Union[ChatGenerationParamsPreferredMaxLatencyTypedDict, float], +) + + +ChatGenerationParamsPreferredMaxLatencyUnion = TypeAliasType( + "ChatGenerationParamsPreferredMaxLatencyUnion", + Union[ChatGenerationParamsPreferredMaxLatency, float], +) + + class ChatGenerationParamsProviderTypedDict(TypedDict): allow_fallbacks: NotRequired[Nullable[bool]] r"""Whether to allow backup providers to serve requests @@ -109,10 +227,14 @@ class ChatGenerationParamsProviderTypedDict(TypedDict): r"""The sorting strategy to use for this request, if \"order\" is not specified. When set, no load balancing is performed.""" max_price: NotRequired[ChatGenerationParamsMaxPriceTypedDict] r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: NotRequired[Nullable[float]] - preferred_max_latency: NotRequired[Nullable[float]] - min_throughput: NotRequired[Nullable[float]] - max_latency: NotRequired[Nullable[float]] + preferred_min_throughput: NotRequired[ + Nullable[ChatGenerationParamsPreferredMinThroughputUnionTypedDict] + ] + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + preferred_max_latency: NotRequired[ + Nullable[ChatGenerationParamsPreferredMaxLatencyUnionTypedDict] + ] + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" class ChatGenerationParamsProvider(BaseModel): @@ -160,13 +282,15 @@ class ChatGenerationParamsProvider(BaseModel): max_price: Optional[ChatGenerationParamsMaxPrice] = None r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: OptionalNullable[float] = UNSET - - preferred_max_latency: OptionalNullable[float] = UNSET - - min_throughput: OptionalNullable[float] = UNSET + preferred_min_throughput: OptionalNullable[ + ChatGenerationParamsPreferredMinThroughputUnion + ] = UNSET + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - max_latency: OptionalNullable[float] = UNSET + preferred_max_latency: OptionalNullable[ + ChatGenerationParamsPreferredMaxLatencyUnion + ] = UNSET + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" @model_serializer(mode="wrap") def serialize_model(self, handler): @@ -184,8 +308,6 @@ def serialize_model(self, handler): "max_price", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] nullable_fields = [ "allow_fallbacks", @@ -200,8 +322,6 @@ def serialize_model(self, handler): "sort", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] null_default_fields = [] @@ -331,11 +451,31 @@ class ChatGenerationParamsPluginModeration(BaseModel): ] = "moderation" +class ChatGenerationParamsPluginAutoRouterTypedDict(TypedDict): + id: Literal["auto-router"] + enabled: NotRequired[bool] + allowed_models: NotRequired[List[str]] + + +class ChatGenerationParamsPluginAutoRouter(BaseModel): + ID: Annotated[ + Annotated[ + Literal["auto-router"], AfterValidator(validate_const("auto-router")) + ], + pydantic.Field(alias="id"), + ] = "auto-router" + + enabled: Optional[bool] = None + + allowed_models: Optional[List[str]] = None + + ChatGenerationParamsPluginUnionTypedDict = TypeAliasType( "ChatGenerationParamsPluginUnionTypedDict", Union[ ChatGenerationParamsPluginModerationTypedDict, ChatGenerationParamsPluginResponseHealingTypedDict, + ChatGenerationParamsPluginAutoRouterTypedDict, ChatGenerationParamsPluginFileParserTypedDict, ChatGenerationParamsPluginWebTypedDict, ], @@ -344,6 +484,7 @@ class ChatGenerationParamsPluginModeration(BaseModel): ChatGenerationParamsPluginUnion = Annotated[ Union[ + Annotated[ChatGenerationParamsPluginAutoRouter, Tag("auto-router")], Annotated[ChatGenerationParamsPluginModeration, Tag("moderation")], Annotated[ChatGenerationParamsPluginWeb, Tag("web")], Annotated[ChatGenerationParamsPluginFileParser, Tag("file-parser")], @@ -498,6 +639,25 @@ class Debug(BaseModel): echo_upstream_body: Optional[bool] = None +ChatGenerationParamsImageConfigTypedDict = TypeAliasType( + "ChatGenerationParamsImageConfigTypedDict", Union[str, float] +) + + +ChatGenerationParamsImageConfig = TypeAliasType( + "ChatGenerationParamsImageConfig", Union[str, float] +) + + +Modality = Union[ + Literal[ + "text", + "image", + ], + UnrecognizedStr, +] + + class ChatGenerationParamsTypedDict(TypedDict): messages: List[MessageTypedDict] provider: NotRequired[Nullable[ChatGenerationParamsProviderTypedDict]] @@ -529,6 +689,8 @@ class ChatGenerationParamsTypedDict(TypedDict): tools: NotRequired[List[ToolDefinitionJSONTypedDict]] top_p: NotRequired[Nullable[float]] debug: NotRequired[DebugTypedDict] + image_config: NotRequired[Dict[str, ChatGenerationParamsImageConfigTypedDict]] + modalities: NotRequired[List[Modality]] class ChatGenerationParams(BaseModel): @@ -591,6 +753,12 @@ class ChatGenerationParams(BaseModel): debug: Optional[Debug] = None + image_config: Optional[Dict[str, ChatGenerationParamsImageConfig]] = None + + modalities: Optional[ + List[Annotated[Modality, PlainValidator(validate_open_enum(False))]] + ] = None + @model_serializer(mode="wrap") def serialize_model(self, handler): optional_fields = [ @@ -620,6 +788,8 @@ def serialize_model(self, handler): "tools", "top_p", "debug", + "image_config", + "modalities", ] nullable_fields = [ "provider", diff --git a/src/openrouter/components/chatgenerationtokenusage.py b/src/openrouter/components/chatgenerationtokenusage.py index e8aa115..6079682 100644 --- a/src/openrouter/components/chatgenerationtokenusage.py +++ b/src/openrouter/components/chatgenerationtokenusage.py @@ -72,6 +72,7 @@ def serialize_model(self, handler): class PromptTokensDetailsTypedDict(TypedDict): cached_tokens: NotRequired[float] + cache_write_tokens: NotRequired[float] audio_tokens: NotRequired[float] video_tokens: NotRequired[float] @@ -79,6 +80,8 @@ class PromptTokensDetailsTypedDict(TypedDict): class PromptTokensDetails(BaseModel): cached_tokens: Optional[float] = None + cache_write_tokens: Optional[float] = None + audio_tokens: Optional[float] = None video_tokens: Optional[float] = None diff --git a/src/openrouter/components/openresponsesinput.py b/src/openrouter/components/openresponsesinput.py index af37abe..e40fe56 100644 --- a/src/openrouter/components/openresponsesinput.py +++ b/src/openrouter/components/openresponsesinput.py @@ -60,9 +60,9 @@ OpenResponsesFunctionCallOutputTypedDict, ResponsesOutputMessageTypedDict, OpenResponsesFunctionToolCallTypedDict, - ResponsesOutputItemReasoningTypedDict, ResponsesOutputItemFunctionCallTypedDict, OpenResponsesReasoningTypedDict, + ResponsesOutputItemReasoningTypedDict, ], ) @@ -78,9 +78,9 @@ OpenResponsesFunctionCallOutput, ResponsesOutputMessage, OpenResponsesFunctionToolCall, - ResponsesOutputItemReasoning, ResponsesOutputItemFunctionCall, OpenResponsesReasoning, + ResponsesOutputItemReasoning, ], ) diff --git a/src/openrouter/components/openresponsesreasoning.py b/src/openrouter/components/openresponsesreasoning.py index 1237605..af1dda6 100644 --- a/src/openrouter/components/openresponsesreasoning.py +++ b/src/openrouter/components/openresponsesreasoning.py @@ -55,6 +55,7 @@ Literal[ "unknown", "openai-responses-v1", + "azure-openai-responses-v1", "xai-responses-v1", "anthropic-claude-v1", "google-gemini-v1", diff --git a/src/openrouter/components/openresponsesrequest.py b/src/openrouter/components/openresponsesrequest.py index 4dafb1c..c422085 100644 --- a/src/openrouter/components/openresponsesrequest.py +++ b/src/openrouter/components/openresponsesrequest.py @@ -34,10 +34,16 @@ OpenResponsesWebSearchToolTypedDict, ) from .pdfparseroptions import PDFParserOptions, PDFParserOptionsTypedDict +from .preferredmaxlatency import PreferredMaxLatency, PreferredMaxLatencyTypedDict +from .preferredminthroughput import ( + PreferredMinThroughput, + PreferredMinThroughputTypedDict, +) from .providername import ProviderName from .providersort import ProviderSort from .providersortconfig import ProviderSortConfig, ProviderSortConfigTypedDict from .quantization import Quantization +from .responsesoutputmodality import ResponsesOutputModality from .websearchengine import WebSearchEngine from openrouter.types import ( BaseModel, @@ -139,6 +145,16 @@ def serialize_model(self, handler): ] +OpenResponsesRequestImageConfigTypedDict = TypeAliasType( + "OpenResponsesRequestImageConfigTypedDict", Union[str, float] +) + + +OpenResponsesRequestImageConfig = TypeAliasType( + "OpenResponsesRequestImageConfig", Union[str, float] +) + + ServiceTier = Literal["auto",] @@ -269,14 +285,10 @@ class OpenResponsesRequestProviderTypedDict(TypedDict): r"""The sorting strategy to use for this request, if \"order\" is not specified. When set, no load balancing is performed.""" max_price: NotRequired[OpenResponsesRequestMaxPriceTypedDict] r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: NotRequired[Nullable[float]] - r"""Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - preferred_max_latency: NotRequired[Nullable[float]] - r"""Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - min_throughput: NotRequired[Nullable[float]] - r"""**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput.""" - max_latency: NotRequired[Nullable[float]] - r"""**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.""" + preferred_min_throughput: NotRequired[Nullable[PreferredMinThroughputTypedDict]] + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + preferred_max_latency: NotRequired[Nullable[PreferredMaxLatencyTypedDict]] + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" class OpenResponsesRequestProvider(BaseModel): @@ -327,27 +339,11 @@ class OpenResponsesRequestProvider(BaseModel): max_price: Optional[OpenResponsesRequestMaxPrice] = None r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: OptionalNullable[float] = UNSET - r"""Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + preferred_min_throughput: OptionalNullable[PreferredMinThroughput] = UNSET + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - preferred_max_latency: OptionalNullable[float] = UNSET - r"""Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - - min_throughput: Annotated[ - OptionalNullable[float], - pydantic.Field( - deprecated="warning: ** DEPRECATED ** - Use preferred_min_throughput instead.." - ), - ] = UNSET - r"""**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput.""" - - max_latency: Annotated[ - OptionalNullable[float], - pydantic.Field( - deprecated="warning: ** DEPRECATED ** - Use preferred_max_latency instead.." - ), - ] = UNSET - r"""**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.""" + preferred_max_latency: OptionalNullable[PreferredMaxLatency] = UNSET + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" @model_serializer(mode="wrap") def serialize_model(self, handler): @@ -365,8 +361,6 @@ def serialize_model(self, handler): "max_price", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] nullable_fields = [ "allow_fallbacks", @@ -381,8 +375,6 @@ def serialize_model(self, handler): "sort", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] null_default_fields = [] @@ -488,11 +480,33 @@ class OpenResponsesRequestPluginModeration(BaseModel): id: IDModeration +IDAutoRouter = Literal["auto-router",] + + +class OpenResponsesRequestPluginAutoRouterTypedDict(TypedDict): + id: IDAutoRouter + enabled: NotRequired[bool] + r"""Set to false to disable the auto-router plugin for this request. Defaults to true.""" + allowed_models: NotRequired[List[str]] + r"""List of model patterns to filter which models the auto-router can route between. Supports wildcards (e.g., \"anthropic/*\" matches all Anthropic models). When not specified, uses the default supported models list.""" + + +class OpenResponsesRequestPluginAutoRouter(BaseModel): + id: IDAutoRouter + + enabled: Optional[bool] = None + r"""Set to false to disable the auto-router plugin for this request. Defaults to true.""" + + allowed_models: Optional[List[str]] = None + r"""List of model patterns to filter which models the auto-router can route between. Supports wildcards (e.g., \"anthropic/*\" matches all Anthropic models). When not specified, uses the default supported models list.""" + + OpenResponsesRequestPluginUnionTypedDict = TypeAliasType( "OpenResponsesRequestPluginUnionTypedDict", Union[ OpenResponsesRequestPluginModerationTypedDict, OpenResponsesRequestPluginResponseHealingTypedDict, + OpenResponsesRequestPluginAutoRouterTypedDict, OpenResponsesRequestPluginFileParserTypedDict, OpenResponsesRequestPluginWebTypedDict, ], @@ -501,6 +515,7 @@ class OpenResponsesRequestPluginModeration(BaseModel): OpenResponsesRequestPluginUnion = Annotated[ Union[ + Annotated[OpenResponsesRequestPluginAutoRouter, Tag("auto-router")], Annotated[OpenResponsesRequestPluginModeration, Tag("moderation")], Annotated[OpenResponsesRequestPluginWeb, Tag("web")], Annotated[OpenResponsesRequestPluginFileParser, Tag("file-parser")], @@ -531,6 +546,10 @@ class OpenResponsesRequestTypedDict(TypedDict): temperature: NotRequired[Nullable[float]] top_p: NotRequired[Nullable[float]] top_k: NotRequired[float] + image_config: NotRequired[Dict[str, OpenResponsesRequestImageConfigTypedDict]] + r"""Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details.""" + modalities: NotRequired[List[ResponsesOutputModality]] + r"""Output modalities for the response. Supported values are \"text\" and \"image\".""" prompt_cache_key: NotRequired[Nullable[str]] previous_response_id: NotRequired[Nullable[str]] prompt: NotRequired[Nullable[OpenAIResponsesPromptTypedDict]] @@ -586,6 +605,18 @@ class OpenResponsesRequest(BaseModel): top_k: Optional[float] = None + image_config: Optional[Dict[str, OpenResponsesRequestImageConfig]] = None + r"""Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details.""" + + modalities: Optional[ + List[ + Annotated[ + ResponsesOutputModality, PlainValidator(validate_open_enum(False)) + ] + ] + ] = None + r"""Output modalities for the response. Supported values are \"text\" and \"image\".""" + prompt_cache_key: OptionalNullable[str] = UNSET previous_response_id: OptionalNullable[str] = UNSET @@ -646,6 +677,8 @@ def serialize_model(self, handler): "temperature", "top_p", "top_k", + "image_config", + "modalities", "prompt_cache_key", "previous_response_id", "prompt", diff --git a/src/openrouter/components/percentilelatencycutoffs.py b/src/openrouter/components/percentilelatencycutoffs.py new file mode 100644 index 0000000..fa8175f --- /dev/null +++ b/src/openrouter/components/percentilelatencycutoffs.py @@ -0,0 +1,71 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from openrouter.types import ( + BaseModel, + Nullable, + OptionalNullable, + UNSET, + UNSET_SENTINEL, +) +from pydantic import model_serializer +from typing_extensions import NotRequired, TypedDict + + +class PercentileLatencyCutoffsTypedDict(TypedDict): + r"""Percentile-based latency cutoffs. All specified cutoffs must be met for an endpoint to be preferred.""" + + p50: NotRequired[Nullable[float]] + r"""Maximum p50 latency (seconds)""" + p75: NotRequired[Nullable[float]] + r"""Maximum p75 latency (seconds)""" + p90: NotRequired[Nullable[float]] + r"""Maximum p90 latency (seconds)""" + p99: NotRequired[Nullable[float]] + r"""Maximum p99 latency (seconds)""" + + +class PercentileLatencyCutoffs(BaseModel): + r"""Percentile-based latency cutoffs. All specified cutoffs must be met for an endpoint to be preferred.""" + + p50: OptionalNullable[float] = UNSET + r"""Maximum p50 latency (seconds)""" + + p75: OptionalNullable[float] = UNSET + r"""Maximum p75 latency (seconds)""" + + p90: OptionalNullable[float] = UNSET + r"""Maximum p90 latency (seconds)""" + + p99: OptionalNullable[float] = UNSET + r"""Maximum p99 latency (seconds)""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["p50", "p75", "p90", "p99"] + nullable_fields = ["p50", "p75", "p90", "p99"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in type(self).model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m diff --git a/src/openrouter/components/percentilestats.py b/src/openrouter/components/percentilestats.py new file mode 100644 index 0000000..e979e11 --- /dev/null +++ b/src/openrouter/components/percentilestats.py @@ -0,0 +1,34 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from openrouter.types import BaseModel +from typing_extensions import TypedDict + + +class PercentileStatsTypedDict(TypedDict): + r"""Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests.""" + + p50: float + r"""Median (50th percentile)""" + p75: float + r"""75th percentile""" + p90: float + r"""90th percentile""" + p99: float + r"""99th percentile""" + + +class PercentileStats(BaseModel): + r"""Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests.""" + + p50: float + r"""Median (50th percentile)""" + + p75: float + r"""75th percentile""" + + p90: float + r"""90th percentile""" + + p99: float + r"""99th percentile""" diff --git a/src/openrouter/components/percentilethroughputcutoffs.py b/src/openrouter/components/percentilethroughputcutoffs.py new file mode 100644 index 0000000..17b28ce --- /dev/null +++ b/src/openrouter/components/percentilethroughputcutoffs.py @@ -0,0 +1,71 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from openrouter.types import ( + BaseModel, + Nullable, + OptionalNullable, + UNSET, + UNSET_SENTINEL, +) +from pydantic import model_serializer +from typing_extensions import NotRequired, TypedDict + + +class PercentileThroughputCutoffsTypedDict(TypedDict): + r"""Percentile-based throughput cutoffs. All specified cutoffs must be met for an endpoint to be preferred.""" + + p50: NotRequired[Nullable[float]] + r"""Minimum p50 throughput (tokens/sec)""" + p75: NotRequired[Nullable[float]] + r"""Minimum p75 throughput (tokens/sec)""" + p90: NotRequired[Nullable[float]] + r"""Minimum p90 throughput (tokens/sec)""" + p99: NotRequired[Nullable[float]] + r"""Minimum p99 throughput (tokens/sec)""" + + +class PercentileThroughputCutoffs(BaseModel): + r"""Percentile-based throughput cutoffs. All specified cutoffs must be met for an endpoint to be preferred.""" + + p50: OptionalNullable[float] = UNSET + r"""Minimum p50 throughput (tokens/sec)""" + + p75: OptionalNullable[float] = UNSET + r"""Minimum p75 throughput (tokens/sec)""" + + p90: OptionalNullable[float] = UNSET + r"""Minimum p90 throughput (tokens/sec)""" + + p99: OptionalNullable[float] = UNSET + r"""Minimum p99 throughput (tokens/sec)""" + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["p50", "p75", "p90", "p99"] + nullable_fields = ["p50", "p75", "p90", "p99"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in type(self).model_fields.items(): + k = f.alias or n + val = serialized.get(k) + serialized.pop(k, None) + + optional_nullable = k in optional_fields and k in nullable_fields + is_set = ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields or (optional_nullable and is_set) + ): + m[k] = val + + return m diff --git a/src/openrouter/components/preferredmaxlatency.py b/src/openrouter/components/preferredmaxlatency.py new file mode 100644 index 0000000..523f7c9 --- /dev/null +++ b/src/openrouter/components/preferredmaxlatency.py @@ -0,0 +1,21 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .percentilelatencycutoffs import ( + PercentileLatencyCutoffs, + PercentileLatencyCutoffsTypedDict, +) +from typing import Any, Union +from typing_extensions import TypeAliasType + + +PreferredMaxLatencyTypedDict = TypeAliasType( + "PreferredMaxLatencyTypedDict", Union[PercentileLatencyCutoffsTypedDict, float, Any] +) +r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + + +PreferredMaxLatency = TypeAliasType( + "PreferredMaxLatency", Union[PercentileLatencyCutoffs, float, Any] +) +r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" diff --git a/src/openrouter/components/preferredminthroughput.py b/src/openrouter/components/preferredminthroughput.py new file mode 100644 index 0000000..ceb4be7 --- /dev/null +++ b/src/openrouter/components/preferredminthroughput.py @@ -0,0 +1,22 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from .percentilethroughputcutoffs import ( + PercentileThroughputCutoffs, + PercentileThroughputCutoffsTypedDict, +) +from typing import Any, Union +from typing_extensions import TypeAliasType + + +PreferredMinThroughputTypedDict = TypeAliasType( + "PreferredMinThroughputTypedDict", + Union[PercentileThroughputCutoffsTypedDict, float, Any], +) +r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + + +PreferredMinThroughput = TypeAliasType( + "PreferredMinThroughput", Union[PercentileThroughputCutoffs, float, Any] +) +r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" diff --git a/src/openrouter/components/providername.py b/src/openrouter/components/providername.py index ed50966..e9c1402 100644 --- a/src/openrouter/components/providername.py +++ b/src/openrouter/components/providername.py @@ -33,12 +33,12 @@ "Fireworks", "Friendli", "GMICloud", - "GoPomelo", "Google", "Google AI Studio", "Groq", "Hyperbolic", "Inception", + "Inceptron", "InferenceNet", "Infermatic", "Inflection", @@ -63,13 +63,14 @@ "Phala", "Relace", "SambaNova", + "Seed", "SiliconFlow", "Sourceful", "Stealth", "StreamLake", "Switchpoint", - "Targon", "Together", + "Upstage", "Venice", "WandB", "Xiaomi", diff --git a/src/openrouter/components/providerpreferences.py b/src/openrouter/components/providerpreferences.py index 85dfa40..b0d41b0 100644 --- a/src/openrouter/components/providerpreferences.py +++ b/src/openrouter/components/providerpreferences.py @@ -2,6 +2,11 @@ from __future__ import annotations from .datacollection import DataCollection +from .preferredmaxlatency import PreferredMaxLatency, PreferredMaxLatencyTypedDict +from .preferredminthroughput import ( + PreferredMinThroughput, + PreferredMinThroughputTypedDict, +) from .providername import ProviderName from .providersort import ProviderSort from .quantization import Quantization @@ -14,7 +19,6 @@ UnrecognizedStr, ) from openrouter.utils import validate_open_enum -import pydantic from pydantic import model_serializer from pydantic.functional_validators import PlainValidator from typing import List, Literal, Optional, Union @@ -234,14 +238,10 @@ class ProviderPreferencesTypedDict(TypedDict): sort: NotRequired[Nullable[ProviderPreferencesSortUnionTypedDict]] max_price: NotRequired[ProviderPreferencesMaxPriceTypedDict] r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: NotRequired[Nullable[float]] - r"""Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - preferred_max_latency: NotRequired[Nullable[float]] - r"""Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - min_throughput: NotRequired[Nullable[float]] - r"""**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput.""" - max_latency: NotRequired[Nullable[float]] - r"""**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.""" + preferred_min_throughput: NotRequired[Nullable[PreferredMinThroughputTypedDict]] + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + preferred_max_latency: NotRequired[Nullable[PreferredMaxLatencyTypedDict]] + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" class ProviderPreferences(BaseModel): @@ -291,27 +291,11 @@ class ProviderPreferences(BaseModel): max_price: Optional[ProviderPreferencesMaxPrice] = None r"""The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" - preferred_min_throughput: OptionalNullable[float] = UNSET - r"""Preferred minimum throughput (in tokens per second). Endpoints below this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - - preferred_max_latency: OptionalNullable[float] = UNSET - r"""Preferred maximum latency (in seconds). Endpoints above this threshold may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" + preferred_min_throughput: OptionalNullable[PreferredMinThroughput] = UNSET + r"""Preferred minimum throughput (in tokens per second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints below the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" - min_throughput: Annotated[ - OptionalNullable[float], - pydantic.Field( - deprecated="warning: ** DEPRECATED ** - Use preferred_min_throughput instead.." - ), - ] = UNSET - r"""**DEPRECATED** Use preferred_min_throughput instead. Backwards-compatible alias for preferred_min_throughput.""" - - max_latency: Annotated[ - OptionalNullable[float], - pydantic.Field( - deprecated="warning: ** DEPRECATED ** - Use preferred_max_latency instead.." - ), - ] = UNSET - r"""**DEPRECATED** Use preferred_max_latency instead. Backwards-compatible alias for preferred_max_latency.""" + preferred_max_latency: OptionalNullable[PreferredMaxLatency] = UNSET + r"""Preferred maximum latency (in seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. Endpoints above the threshold(s) may still be used, but are deprioritized in routing. When using fallback models, this may cause a fallback model to be used instead of the primary model if it meets the threshold.""" @model_serializer(mode="wrap") def serialize_model(self, handler): @@ -329,8 +313,6 @@ def serialize_model(self, handler): "max_price", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] nullable_fields = [ "allow_fallbacks", @@ -345,8 +327,6 @@ def serialize_model(self, handler): "sort", "preferred_min_throughput", "preferred_max_latency", - "min_throughput", - "max_latency", ] null_default_fields = [] diff --git a/src/openrouter/components/publicendpoint.py b/src/openrouter/components/publicendpoint.py index c106c8b..22a4645 100644 --- a/src/openrouter/components/publicendpoint.py +++ b/src/openrouter/components/publicendpoint.py @@ -3,6 +3,7 @@ from __future__ import annotations from .endpointstatus import EndpointStatus from .parameter import Parameter +from .percentilestats import PercentileStats, PercentileStatsTypedDict from .providername import ProviderName from openrouter.types import BaseModel, Nullable, UNSET_SENTINEL, UnrecognizedStr from openrouter.utils import validate_open_enum @@ -111,6 +112,9 @@ class PublicEndpointTypedDict(TypedDict): supported_parameters: List[Parameter] uptime_last_30m: Nullable[float] supports_implicit_caching: bool + latency_last_30m: Nullable[PercentileStatsTypedDict] + r"""Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests.""" + throughput_last_30m: Nullable[PercentileStatsTypedDict] status: NotRequired[EndpointStatus] @@ -145,6 +149,11 @@ class PublicEndpoint(BaseModel): supports_implicit_caching: bool + latency_last_30m: Nullable[PercentileStats] + r"""Latency percentiles in milliseconds over the last 30 minutes. Latency measures time to first token. Only visible when authenticated with an API key or cookie; returns null for unauthenticated requests.""" + + throughput_last_30m: Nullable[PercentileStats] + status: Annotated[ Optional[EndpointStatus], PlainValidator(validate_open_enum(True)) ] = None @@ -157,6 +166,8 @@ def serialize_model(self, handler): "max_completion_tokens", "max_prompt_tokens", "uptime_last_30m", + "latency_last_30m", + "throughput_last_30m", ] null_default_fields = [] diff --git a/src/openrouter/components/responsesoutputitem.py b/src/openrouter/components/responsesoutputitem.py index 28966bd..5aff987 100644 --- a/src/openrouter/components/responsesoutputitem.py +++ b/src/openrouter/components/responsesoutputitem.py @@ -38,8 +38,8 @@ ResponsesOutputItemFileSearchCallTypedDict, ResponsesImageGenerationCallTypedDict, ResponsesOutputMessageTypedDict, - ResponsesOutputItemReasoningTypedDict, ResponsesOutputItemFunctionCallTypedDict, + ResponsesOutputItemReasoningTypedDict, ], ) r"""An output item from the response""" diff --git a/src/openrouter/components/responsesoutputitemreasoning.py b/src/openrouter/components/responsesoutputitemreasoning.py index 99fe60e..a94fb36 100644 --- a/src/openrouter/components/responsesoutputitemreasoning.py +++ b/src/openrouter/components/responsesoutputitemreasoning.py @@ -9,10 +9,14 @@ OptionalNullable, UNSET, UNSET_SENTINEL, + UnrecognizedStr, ) +from openrouter.utils import validate_open_enum +import pydantic from pydantic import model_serializer +from pydantic.functional_validators import PlainValidator from typing import List, Literal, Optional, Union -from typing_extensions import NotRequired, TypeAliasType, TypedDict +from typing_extensions import Annotated, NotRequired, TypeAliasType, TypedDict ResponsesOutputItemReasoningType = Literal["reasoning",] @@ -47,6 +51,20 @@ ) +ResponsesOutputItemReasoningFormat = Union[ + Literal[ + "unknown", + "openai-responses-v1", + "azure-openai-responses-v1", + "xai-responses-v1", + "anthropic-claude-v1", + "google-gemini-v1", + ], + UnrecognizedStr, +] +r"""The format of the reasoning content""" + + class ResponsesOutputItemReasoningTypedDict(TypedDict): r"""An output item containing reasoning""" @@ -56,6 +74,10 @@ class ResponsesOutputItemReasoningTypedDict(TypedDict): content: NotRequired[List[ReasoningTextContentTypedDict]] encrypted_content: NotRequired[Nullable[str]] status: NotRequired[ResponsesOutputItemReasoningStatusUnionTypedDict] + signature: NotRequired[Nullable[str]] + r"""A signature for the reasoning content, used for verification""" + format_: NotRequired[Nullable[ResponsesOutputItemReasoningFormat]] + r"""The format of the reasoning content""" class ResponsesOutputItemReasoning(BaseModel): @@ -73,10 +95,28 @@ class ResponsesOutputItemReasoning(BaseModel): status: Optional[ResponsesOutputItemReasoningStatusUnion] = None + signature: OptionalNullable[str] = UNSET + r"""A signature for the reasoning content, used for verification""" + + format_: Annotated[ + Annotated[ + OptionalNullable[ResponsesOutputItemReasoningFormat], + PlainValidator(validate_open_enum(False)), + ], + pydantic.Field(alias="format"), + ] = UNSET + r"""The format of the reasoning content""" + @model_serializer(mode="wrap") def serialize_model(self, handler): - optional_fields = ["content", "encrypted_content", "status"] - nullable_fields = ["encrypted_content"] + optional_fields = [ + "content", + "encrypted_content", + "status", + "signature", + "format", + ] + nullable_fields = ["encrypted_content", "signature", "format"] null_default_fields = [] serialized = handler(self) diff --git a/src/openrouter/components/responsesoutputmodality.py b/src/openrouter/components/responsesoutputmodality.py new file mode 100644 index 0000000..a259061 --- /dev/null +++ b/src/openrouter/components/responsesoutputmodality.py @@ -0,0 +1,14 @@ +"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" + +from __future__ import annotations +from openrouter.types import UnrecognizedStr +from typing import Literal, Union + + +ResponsesOutputModality = Union[ + Literal[ + "text", + "image", + ], + UnrecognizedStr, +] diff --git a/src/openrouter/responses.py b/src/openrouter/responses.py index bb9603b..b1cec20 100644 --- a/src/openrouter/responses.py +++ b/src/openrouter/responses.py @@ -58,6 +58,13 @@ def send( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -109,6 +116,8 @@ def send( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -169,6 +178,13 @@ def send( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -220,6 +236,8 @@ def send( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -279,6 +297,13 @@ def send( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -330,6 +355,8 @@ def send( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -384,6 +411,8 @@ def send( temperature=temperature, top_p=top_p, top_k=top_k, + image_config=image_config, + modalities=modalities, prompt_cache_key=prompt_cache_key, previous_response_id=previous_response_id, prompt=utils.get_pydantic_model( @@ -634,6 +663,13 @@ async def send_async( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -685,6 +721,8 @@ async def send_async( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -745,6 +783,13 @@ async def send_async( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -796,6 +841,8 @@ async def send_async( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -855,6 +902,13 @@ async def send_async( temperature: OptionalNullable[float] = UNSET, top_p: OptionalNullable[float] = UNSET, top_k: Optional[float] = None, + image_config: Optional[ + Union[ + Dict[str, components.OpenResponsesRequestImageConfig], + Dict[str, components.OpenResponsesRequestImageConfigTypedDict], + ] + ] = None, + modalities: Optional[List[components.ResponsesOutputModality]] = None, prompt_cache_key: OptionalNullable[str] = UNSET, previous_response_id: OptionalNullable[str] = UNSET, prompt: OptionalNullable[ @@ -906,6 +960,8 @@ async def send_async( :param temperature: :param top_p: :param top_k: + :param image_config: Provider-specific image configuration options. Keys and values vary by model/provider. See https://openrouter.ai/docs/features/multimodal/image-generation for more details. + :param modalities: Output modalities for the response. Supported values are \"text\" and \"image\". :param prompt_cache_key: :param previous_response_id: :param prompt: @@ -960,6 +1016,8 @@ async def send_async( temperature=temperature, top_p=top_p, top_k=top_k, + image_config=image_config, + modalities=modalities, prompt_cache_key=prompt_cache_key, previous_response_id=previous_response_id, prompt=utils.get_pydantic_model(