diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fa4edea..0410f49 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,22 +14,10 @@ Or [install uv manually](https://docs.astral.sh/uv/getting-started/installation/ $ uv sync --all-extras ``` -You can then run scripts using `uv run python script.py` or by manually activating the virtual environment: +You can then run scripts using `uv run python script.py`: ```sh -# manually activate - https://docs.python.org/3/library/venv.html#how-venvs-work -$ source .venv/bin/activate - -# now you can omit the `uv run` prefix -$ python script.py -``` - -### Without `uv` - -Alternatively if you don't want to install `uv`, you can stick with the standard `pip` setup by ensuring you have the Python version specified in `.python-version`, create a virtual environment however you desire and then install dependencies using this command: - -```sh -$ pip install -r requirements-dev.lock +uv run python script.py ``` ## Modifying/Adding code @@ -103,7 +91,7 @@ If you’d like to use the repository from source, you can either install from g To install via git: ```sh -$ pip install git+ssh://git@github.com/browserbase/stagehand-python#stainless.git +uv run pip install git+ssh://git@github.com/browserbase/stagehand-python#stainless.git ``` Alternatively, you can build from source and install the wheel file: @@ -115,13 +103,13 @@ To create a distributable version of the library, all you have to do is run this ```sh $ uv build # or -$ python -m build +$ uv run python -m build ``` Then to install: ```sh -$ pip install ./path-to-wheel-file.whl +uv run pip install ./path-to-wheel-file.whl ``` ## Running tests @@ -134,7 +122,7 @@ $ npx prism mock path/to/your/openapi.yml ``` ```sh -$ ./scripts/test +$ uv run -- ./scripts/test ``` ## Linting and formatting @@ -145,13 +133,13 @@ This repository uses [ruff](https://github.com/astral-sh/ruff) and To lint: ```sh -$ ./scripts/lint +$ uv run -- ./scripts/lint ``` To format and fix all ruff issues automatically: ```sh -$ ./scripts/format +$ uv run -- ./scripts/format ``` ## Publishing and releases diff --git a/README.md b/README.md index a7564af..1800f53 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,8 @@ It is generated with [Stainless](https://www.stainless.com/). pip install stagehand-alpha ``` +For local development or when working from this repository, sync the dependency lockfile with `uv` (see the Local development section below) before running project scripts. + ## Requirements Python 3.9 or higher. @@ -27,51 +29,57 @@ export BROWSERBASE_API_KEY="your-bb-api-key" export BROWSERBASE_PROJECT_ID="your-bb-project-uuid" export MODEL_API_KEY="sk-proj-your-llm-api-key" -python examples/full_example.py +uv run python examples/full_example.py +``` + +
+Local development + +This repository relies on `uv` to install the sanctioned Python version and dependencies. After cloning, bootstrap the environment with: + +```sh +./scripts/bootstrap ``` +Once the environment is ready, execute repo scripts with `uv run`: + +```sh +uv run python examples/full_example.py +uv run python scripts/download-binary.py +uv run --isolated --all-extras pytest +``` +
## Usage This example demonstrates the full Stagehand workflow: starting a session, navigating to a page, observing possible actions, acting on elements, extracting data, and running an autonomous agent. ```python -from stagehand import Stagehand, __version__ +import asyncio +from stagehand import AsyncStagehand -def main() -> None: - sdk_version = __version__ +async def main() -> None: # Create client using environment variables: # BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY - client = Stagehand() + client = AsyncStagehand() - # Start a new browser session - start_response = client.sessions.start( - model_name="openai/gpt-5-nano", - x_language="python", - x_sdk_version=sdk_version, - ) + # Start a new browser session (returns a session helper bound to a session_id) + session = await client.sessions.create(model_name="openai/gpt-5-nano") - session_id = start_response.data.session_id - print(f"Session started: {session_id}") + print(f"Session started: {session.id}") try: # Navigate to a webpage - client.sessions.navigate( - id=session_id, + await session.navigate( url="https://news.ycombinator.com", frame_id="", # empty string for the main frame - x_language="python", - x_sdk_version=sdk_version, ) print("Navigated to Hacker News") # Observe to find possible actions on the page - observe_response = client.sessions.observe( - id=session_id, + observe_response = await session.observe( instruction="find the link to view comments for the top post", - x_language="python", - x_sdk_version=sdk_version, ) results = observe_response.data.result @@ -83,17 +91,11 @@ def main() -> None: action = results[0].to_dict(exclude_none=True) print("Acting on:", action.get("description")) - act_response = client.sessions.act( - id=session_id, - input=action, - x_language="python", - x_sdk_version=sdk_version, - ) + act_response = await session.act(input=action) print("Act completed:", act_response.data.result.message) # Extract structured data from the page using a JSON schema - extract_response = client.sessions.extract( - id=session_id, + extract_response = await session.extract( instruction="extract the text of the top comment on this page", schema={ "type": "object", @@ -103,8 +105,6 @@ def main() -> None: }, "required": ["commentText"], }, - x_language="python", - x_sdk_version=sdk_version, ) extracted = extract_response.data.result @@ -112,15 +112,12 @@ def main() -> None: print("Extracted author:", author) # Run an autonomous agent to accomplish a complex task - execute_response = client.sessions.execute( - id=session_id, + execute_response = await session.execute( execute_options={ "instruction": f"Find any personal website, GitHub, or LinkedIn profile for the Hacker News user '{author}'.", "max_steps": 10, }, agent_config={"model": "openai/gpt-5-nano"}, - x_language="python", - x_sdk_version=sdk_version, timeout=300.0, ) @@ -128,12 +125,12 @@ def main() -> None: print("Agent success:", execute_response.data.result.success) finally: # End the browser session to clean up resources - client.sessions.end(id=session_id, x_language="python", x_sdk_version=sdk_version) + await session.end() print("Session ended") if __name__ == "__main__": - main() + asyncio.run(main()) ``` ## Client configuration @@ -141,17 +138,17 @@ if __name__ == "__main__": Configure the client using environment variables: ```python -from stagehand import Stagehand +from stagehand import AsyncStagehand -client = Stagehand() +client = AsyncStagehand() ``` Or manually: ```python -from stagehand import Stagehand +from stagehand import AsyncStagehand -client = Stagehand( +client = AsyncStagehand( browserbase_api_key="My Browserbase API Key", browserbase_project_id="My Browserbase Project ID", model_api_key="My Model API Key", @@ -161,9 +158,9 @@ client = Stagehand( Or using a combination of the two approaches: ```python -from stagehand import Stagehand +from stagehand import AsyncStagehand -client = Stagehand( +client = AsyncStagehand( # Configures using environment variables browserbase_api_key="My Browserbase API Key", # override just this one ) @@ -208,7 +205,7 @@ Response objects are Pydantic models. If you want to build a modified copy, pref ## Asynchronous execution -The default client is synchronous. To switch to asynchronous execution, use `AsyncStagehand` and `await` each API call: +This SDK recommends using `AsyncStagehand` and `await`ing each API call: ```python import asyncio @@ -217,10 +214,8 @@ from stagehand import AsyncStagehand async def main() -> None: client = AsyncStagehand() - response = await client.sessions.act( - id="00000000-your-session-id-000000000000", - input="click the first link on the page", - ) + session = await client.sessions.create(model_name="openai/gpt-5-nano") + response = await session.act(input="click the first link on the page") print(response.data) @@ -234,7 +229,7 @@ By default, the async client uses `httpx` for HTTP requests. For improved concur Install `aiohttp`: ```sh -pip install stagehand-alpha[aiohttp] +uv run pip install stagehand-alpha[aiohttp] ``` Then instantiate the client with `http_client=DefaultAioHttpClient()`: @@ -246,10 +241,8 @@ from stagehand import AsyncStagehand, DefaultAioHttpClient async def main() -> None: async with AsyncStagehand(http_client=DefaultAioHttpClient()) as client: - response = await client.sessions.act( - id="00000000-your-session-id-000000000000", - input="click the first link on the page", - ) + session = await client.sessions.create(model_name="openai/gpt-5-nano") + response = await session.act(input="click the first link on the page") print(response.data) @@ -266,36 +259,27 @@ To enable SSE streaming, you must: 2. Tell the client to parse an SSE stream by setting `stream_response=True`. ```python -from stagehand import Stagehand +import asyncio -client = Stagehand() +from stagehand import AsyncStagehand -stream = client.sessions.act( - id="00000000-your-session-id-000000000000", - input="click the first link on the page", - stream_response=True, - x_stream_response="true", -) -for event in stream: - # event is a StreamEvent (type: "system" | "log") - print(event.type, event.data) -``` -The async client uses the exact same interface: +async def main() -> None: + async with AsyncStagehand() as client: + session = await client.sessions.create(model_name="openai/gpt-5-nano") -```python -from stagehand import AsyncStagehand + stream = await client.sessions.act( + id=session.id, + input="click the first link on the page", + stream_response=True, + x_stream_response="true", + ) + async for event in stream: + # event is a StreamEvent (type: "system" | "log") + print(event.type, event.data) -client = AsyncStagehand() -stream = await client.sessions.act( - id="00000000-your-session-id-000000000000", - input="click the first link on the page", - stream_response=True, - x_stream_response="true", -) -async for event in stream: - print(event.type, event.data) +asyncio.run(main()) ``` ## Raw responses @@ -305,14 +289,21 @@ The SDK defines methods that deserialize responses into Pydantic models. However To access this data, prefix any HTTP method call on a client or service with `with_raw_response`: ```python -from stagehand import Stagehand +import asyncio + +from stagehand import AsyncStagehand + + +async def main() -> None: + async with AsyncStagehand() as client: + response = await client.sessions.with_raw_response.start(model_name="openai/gpt-5-nano") + print(response.headers.get("X-My-Header")) + + session = response.parse() # get the object that `sessions.start()` would have returned + print(session.data) -client = Stagehand() -response = client.sessions.with_raw_response.start(model_name="openai/gpt-5-nano") -print(response.headers.get("X-My-Header")) -session = response.parse() # get the object that `sessions.start()` would have returned -print(session.data.session_id) +asyncio.run(main()) ``` ### `.with_streaming_response` @@ -322,13 +313,20 @@ The `with_raw_response` interface eagerly reads the full response body when you To stream the response body (not SSE), use `with_streaming_response` instead. It requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. ```python -from stagehand import Stagehand +import asyncio + +from stagehand import AsyncStagehand + + +async def main() -> None: + async with AsyncStagehand() as client: + async with client.sessions.with_streaming_response.start(model_name="openai/gpt-5-nano") as response: + print(response.headers.get("X-My-Header")) + async for line in response.iter_lines(): + print(line) + -client = Stagehand() -with client.sessions.with_streaming_response.start(model_name="openai/gpt-5-nano") as response: - print(response.headers.get("X-My-Header")) - for line in response.iter_lines(): - print(line) +asyncio.run(main()) ``` ## Error handling @@ -340,22 +338,28 @@ When the API returns a non-success status code (that is, 4xx or 5xx response), a All errors inherit from `stagehand.APIError`. ```python +import asyncio + import stagehand -from stagehand import Stagehand +from stagehand import AsyncStagehand -client = Stagehand() -try: - client.sessions.start(model_name="openai/gpt-5-nano") -except stagehand.APIConnectionError as e: - print("The server could not be reached") - print(e.__cause__) # an underlying Exception, likely raised within httpx. -except stagehand.RateLimitError: - print("A 429 status code was received; we should back off a bit.") -except stagehand.APIStatusError as e: - print("A non-200-range status code was received") - print(e.status_code) - print(e.response) +async def main() -> None: + async with AsyncStagehand() as client: + try: + await client.sessions.start(model_name="openai/gpt-5-nano") + except stagehand.APIConnectionError as e: + print("The server could not be reached") + print(e.__cause__) # an underlying Exception, likely raised within httpx. + except stagehand.RateLimitError: + print("A 429 status code was received; we should back off a bit.") + except stagehand.APIStatusError as e: + print("A non-200-range status code was received") + print(e.status_code) + print(e.response) + + +asyncio.run(main()) ``` Error codes are as follows: @@ -378,13 +382,18 @@ Certain errors are automatically retried 2 times by default, with a short expone You can use the `max_retries` option to configure or disable retry settings: ```python -from stagehand import Stagehand +import asyncio + +from stagehand import AsyncStagehand -# Configure the default for all requests: -client = Stagehand(max_retries=0) -# Or, configure per-request: -client.with_options(max_retries=5).sessions.start(model_name="openai/gpt-5-nano") +async def main() -> None: + async with AsyncStagehand(max_retries=0) as client: + # Or, configure per-request: + await client.with_options(max_retries=5).sessions.start(model_name="openai/gpt-5-nano") + + +asyncio.run(main()) ``` ### Timeouts @@ -419,11 +428,18 @@ To make requests to undocumented endpoints, use `client.get`, `client.post`, and ```python import httpx -from stagehand import Stagehand +from stagehand import AsyncStagehand + +import asyncio + + +async def main() -> None: + async with AsyncStagehand() as client: + response = await client.post("/foo", cast_to=httpx.Response, body={"my_param": True}) + print(response.headers.get("x-foo")) + -client = Stagehand() -response = client.post("/foo", cast_to=httpx.Response, body={"my_param": True}) -print(response.headers.get("x-foo")) +asyncio.run(main()) ``` ### Undocumented request params @@ -443,11 +459,16 @@ By default, the SDK is permissive and will only raise an error if you later try If you would prefer to validate responses upfront, instantiate the client with `_strict_response_validation=True`. An `APIResponseValidationError` will be raised if the API responds with invalid data for the expected schema. ```python -from stagehand import Stagehand, APIResponseValidationError +import asyncio + +from stagehand import APIResponseValidationError, AsyncStagehand try: - with Stagehand(_strict_response_validation=True) as client: - client.sessions.start(model_name="openai/gpt-5-nano") + async def main() -> None: + async with AsyncStagehand(_strict_response_validation=True) as client: + await client.sessions.start(model_name="openai/gpt-5-nano") + + asyncio.run(main()) except APIResponseValidationError as e: print("Response failed schema validation:", e) ``` diff --git a/examples/act_example.py b/examples/act_example.py index 8bde2aa..377c1df 100644 --- a/examples/act_example.py +++ b/examples/act_example.py @@ -16,60 +16,57 @@ import os -from stagehand import Stagehand +from stagehand import AsyncStagehand -def main() -> None: +async def main() -> None: # Create client using environment variables # BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY - client = Stagehand( + async with AsyncStagehand( browserbase_api_key=os.environ.get("BROWSERBASE_API_KEY"), browserbase_project_id=os.environ.get("BROWSERBASE_PROJECT_ID"), model_api_key=os.environ.get("MODEL_API_KEY"), - ) - - # Start a new browser session - start_response = client.sessions.start( - model_name="openai/gpt-5-nano", - ) - - session_id = start_response.data.session_id - print(f"Session started: {session_id}") - - try: - # Navigate to example.com - client.sessions.navigate( - id=session_id, - url="https://www.example.com", - frame_id="", # Empty string for main frame - ) - print("Navigated to example.com") - - # Call act() with a string instruction directly - # This is the key test - passing a string instead of an Action object - print("\nAttempting to call act() with string input...") - act_response = client.sessions.act( - id=session_id, - input="click the 'More information' link", # String instruction + ) as client: + # Start a new browser session + session = await client.sessions.create( + model_name="openai/gpt-5-nano", ) - print(f"Act completed successfully!") - print(f"Result: {act_response.data.result.message}") - print(f"Success: {act_response.data.result.success}") + print(f"Session started: {session.id}") - except Exception as e: - print(f"Error: {e}") - print(f"Error type: {type(e).__name__}") - import traceback - traceback.print_exc() + try: + # Navigate to example.com + await session.navigate( + url="https://www.example.com", + frame_id="", # Empty string for main frame + ) + print("Navigated to example.com") - finally: - # End the session to clean up resources - client.sessions.end( - id=session_id, - ) - print("\nSession ended") + # Call act() with a string instruction directly + # This is the key test - passing a string instead of an Action object + print("\nAttempting to call act() with string input...") + act_response = await session.act( + input="click the 'More information' link", # String instruction + ) + + print(f"Act completed successfully!") + print(f"Result: {act_response.data.result.message}") + print(f"Success: {act_response.data.result.success}") + + except Exception as e: + print(f"Error: {e}") + print(f"Error type: {type(e).__name__}") + import traceback + + traceback.print_exc() + + finally: + # End the session to clean up resources + await session.end() + print("\nSession ended") if __name__ == "__main__": - main() + import asyncio + + asyncio.run(main()) diff --git a/examples/agent_execute.py b/examples/agent_execute.py index 643fbb0..4a4f4ec 100644 --- a/examples/agent_execute.py +++ b/examples/agent_execute.py @@ -16,18 +16,18 @@ import os import json -from stagehand import Stagehand, APIResponseValidationError +from stagehand import AsyncStagehand, APIResponseValidationError -def main() -> None: +async def main() -> None: model_name = os.environ.get("STAGEHAND_MODEL", "openai/gpt-5-nano") # Enable strict response validation so we fail fast if the API response # doesn't match the expected schema (instead of silently constructing models # with missing fields set to None). - with Stagehand(_strict_response_validation=True) as client: + async with AsyncStagehand(_strict_response_validation=True) as client: try: - session = client.sessions.start(model_name=model_name) + session = await client.sessions.create(model_name=model_name) except APIResponseValidationError as e: print("Session start response failed schema validation.") print(f"Base URL: {client.base_url!r}") @@ -37,19 +37,16 @@ def main() -> None: print("Parsed response body:") print(e.body) raise - session_id = session.data.session_id - if not session_id: - raise RuntimeError(f"Expected a session ID from /sessions/start but received {session.to_dict()!r}") + if not session.id: + raise RuntimeError(f"Expected a session ID from /sessions/start but received {session!r}") try: - client.sessions.navigate( - id=session_id, + await session.navigate( url="https://news.ycombinator.com", options={"wait_until": "domcontentloaded"}, ) - result = client.sessions.execute( - id=session_id, + result = await session.execute( agent_config={"model": model_name}, execute_options={ "instruction": "Go to Hacker News and return the titles of the first 3 articles.", @@ -61,10 +58,10 @@ def main() -> None: print("\nFull result:") print(json.dumps(result.data.result.to_dict(), indent=2, default=str)) finally: - # Only attempt cleanup if a valid session ID was created. - if session_id: - client.sessions.end(id=session_id) + await session.end() if __name__ == "__main__": - main() + import asyncio + + asyncio.run(main()) diff --git a/examples/full_example.py b/examples/full_example.py index 5f363d9..e6b1997 100644 --- a/examples/full_example.py +++ b/examples/full_example.py @@ -18,122 +18,111 @@ import os -from stagehand import Stagehand +from stagehand import AsyncStagehand -def main() -> None: +async def main() -> None: # Create client using environment variables # BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY - client = Stagehand( + async with AsyncStagehand( browserbase_api_key=os.environ.get("BROWSERBASE_API_KEY"), browserbase_project_id=os.environ.get("BROWSERBASE_PROJECT_ID"), model_api_key=os.environ.get("MODEL_API_KEY"), - ) - - # Start a new browser session - start_response = client.sessions.start( - model_name="openai/gpt-5-nano", - ) - - session_id = start_response.data.session_id - print(f"Session started: {session_id}") - - try: - # Navigate to Hacker News - client.sessions.navigate( - id=session_id, - url="https://news.ycombinator.com", - frame_id="", # Empty string for main frame + ) as client: + # Start a new browser session (returns a session helper bound to a session_id) + session = await client.sessions.create( + model_name="openai/gpt-5-nano", ) - print("Navigated to Hacker News") - # Observe to find possible actions - looking for the comments link - observe_response = client.sessions.observe( - id=session_id, - instruction="find the link to view comments for the top post", - ) - - results = observe_response.data.result - print(f"Found {len(results)} possible actions") - - if not results: - print("No actions found") - return - - # Use the first result - result = results[0] - print(f"Acting on: {result.description}") - - # Pass the action to Act - act_response = client.sessions.act( - id=session_id, - input=result, # type: ignore[arg-type] - ) - print(f"Act completed: {act_response.data.result.message}") - - # Extract data from the page - # We're now on the comments page, so extract the top comment text - extract_response = client.sessions.extract( - id=session_id, - instruction="extract the text of the top comment on this page", - schema={ - "type": "object", - "properties": { - "commentText": { - "type": "string", - "description": "The text content of the top comment" + print(f"Session started: {session.id}") + + try: + # Navigate to Hacker News + await session.navigate( + url="https://news.ycombinator.com", + frame_id="", # Empty string for main frame + ) + print("Navigated to Hacker News") + + # Observe to find possible actions - looking for the comments link + observe_response = await session.observe( + instruction="find the link to view comments for the top post", + ) + + results = observe_response.data.result + print(f"Found {len(results)} possible actions") + + if not results: + print("No actions found") + return + + # Use the first result + result = results[0] + print(f"Acting on: {result.description}") + + # Pass the action to Act + act_response = await session.act( + input=result, # type: ignore[arg-type] + ) + print(f"Act completed: {act_response.data.result.message}") + + # Extract data from the page + # We're now on the comments page, so extract the top comment text + extract_response = await session.extract( + instruction="extract the text of the top comment on this page", + schema={ + "type": "object", + "properties": { + "commentText": {"type": "string", "description": "The text content of the top comment"}, + "author": {"type": "string", "description": "The username of the comment author"}, }, - "author": { - "type": "string", - "description": "The username of the comment author" - } + "required": ["commentText"], }, - "required": ["commentText"] - }, - ) - - # Get the extracted result - extracted_result = extract_response.data.result - print(f"Extracted data: {extracted_result}") - - # Get the author from the extracted data - author: str = extracted_result.get("author", "unknown") if isinstance(extracted_result, dict) else "unknown" # type: ignore[union-attr] - print(f"Looking up profile for author: {author}") - - # Use the Agent to find the author's profile - # Execute runs an autonomous agent that can navigate and interact with pages - # Use a longer timeout (5 minutes) since agent execution can take a while - execute_response = client.sessions.execute( # pyright: ignore[reportArgumentType] - id=session_id, - execute_options={ - "instruction": ( - f"Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '{author}'. " - f"Click on their username to go to their profile page and look for any links they have shared. " - f"Use Google Search with their username or other details from their profile if you dont find any direct links." - ), - "max_steps": 15, - }, - agent_config={ - "model": { - "model_name": "openai/gpt-5-nano", - "api_key": os.environ.get("MODEL_API_KEY"), + ) + + # Get the extracted result + extracted_result = extract_response.data.result + print(f"Extracted data: {extracted_result}") + + # Get the author from the extracted data + author: str = ( + extracted_result.get("author", "unknown") if isinstance(extracted_result, dict) else "unknown" # type: ignore[union-attr] + ) + print(f"Looking up profile for author: {author}") + + # Use the Agent to find the author's profile + # Execute runs an autonomous agent that can navigate and interact with pages + # Use a longer timeout (5 minutes) since agent execution can take a while + execute_response = await session.execute( # pyright: ignore[reportArgumentType] + execute_options={ + "instruction": ( + f"Find any personal website, GitHub, LinkedIn, or other best profile URL for the Hacker News user '{author}'. " + f"Click on their username to go to their profile page and look for any links they have shared. " + f"Use Google Search with their username or other details from their profile if you dont find any direct links." + ), + "max_steps": 15, }, - "cua": False, - }, - timeout=300.0, # 5 minutes - ) + agent_config={ + "model": { + "model_name": "openai/gpt-5-nano", + "api_key": os.environ.get("MODEL_API_KEY"), + }, + "cua": False, + }, + timeout=300.0, # 5 minutes + ) - print(f"Agent completed: {execute_response.data.result.message}") - print(f"Agent success: {execute_response.data.result.success}") - print(f"Agent actions taken: {len(execute_response.data.result.actions)}") + print(f"Agent completed: {execute_response.data.result.message}") + print(f"Agent success: {execute_response.data.result.success}") + print(f"Agent actions taken: {len(execute_response.data.result.actions)}") - finally: - # End the session to clean up resources - client.sessions.end( - id=session_id, - ) - print("Session ended") + finally: + # End the session to clean up resources + await session.end() + print("Session ended") if __name__ == "__main__": - main() + import asyncio + + asyncio.run(main()) diff --git a/examples/local_example.py b/examples/local_example.py new file mode 100644 index 0000000..d33ecee --- /dev/null +++ b/examples/local_example.py @@ -0,0 +1,80 @@ +""" +Example demonstrating how to run Stagehand in local mode using the SEA binary +that ships with the PyPI wheel. + +Required environment variables: +- BROWSERBASE_API_KEY (can be any value in local mode) +- BROWSERBASE_PROJECT_ID (can be any value in local mode) +- MODEL_API_KEY (used for client configuration even in local mode) +- OPENAI_API_KEY (used by the SEA server for LLM access) + +Install the published wheel before running this script: + `pip install stagehand-alpha` +Then execute this example with the same interpreter: + `python examples/local_example.py` +""" + +import os +import sys +from typing import Optional + +from stagehand import Stagehand + + +def main() -> None: + openai_key = os.environ.get("OPENAI_API_KEY") + if not openai_key: + sys.exit("Set the OPENAI_API_KEY environment variable to run the local server.") + + client = Stagehand( + server="local", + local_openai_api_key=openai_key, + local_ready_timeout_s=30.0, + ) + + session_id: Optional[str] = None + + try: + print("⏳ Starting local session (this will start the embedded SEA binary)...") + session = client.sessions.start( + model_name="openai/gpt-5-nano", + browser={ + "type": "local", + "launchOptions": { + "headless": True, + }, + }, + ) + session_id = session.data.session_id + print(f"✅ Session started: {session_id}") + + print("🌐 Navigating to https://www.example.com...") + client.sessions.navigate( + id=session_id, + url="https://www.example.com", + frame_id="", + ) + print("✅ Navigation complete") + + print("🔍 Extracting the main heading text...") + extract_response = client.sessions.extract( + id=session_id, + instruction="Extract the text of the top-level heading on this page.", + ) + print(f"📄 Extracted data: {extract_response.data.result}") + + except Exception as exc: + print(f"❌ Encountered an error: {exc}") + raise + finally: + if session_id: + print("🛑 Ending session...") + client.sessions.end(id=session_id) + print("✅ Session ended") + print("🔌 Closing client (shuts down the SEA server)...") + client.close() + print("✅ Local server shut down") + + +if __name__ == "__main__": + main() diff --git a/src/stagehand/_client.py b/src/stagehand/_client.py index a95724d..ff7def3 100644 --- a/src/stagehand/_client.py +++ b/src/stagehand/_client.py @@ -34,7 +34,7 @@ if TYPE_CHECKING: from .resources import sessions - from .resources.sessions import SessionsResource, AsyncSessionsResource + from .resources.sessions_helpers import SessionsResourceWithHelpers, AsyncSessionsResourceWithHelpers __all__ = [ "Timeout", @@ -179,10 +179,10 @@ def close(self) -> None: self._sea_server.close() @cached_property - def sessions(self) -> SessionsResource: - from .resources.sessions import SessionsResource + def sessions(self) -> SessionsResourceWithHelpers: + from .resources.sessions_helpers import SessionsResourceWithHelpers - return SessionsResource(self) + return SessionsResourceWithHelpers(self) @cached_property def with_raw_response(self) -> StagehandWithRawResponse: @@ -469,10 +469,10 @@ async def close(self) -> None: await self._sea_server.aclose() @cached_property - def sessions(self) -> AsyncSessionsResource: - from .resources.sessions import AsyncSessionsResource + def sessions(self) -> AsyncSessionsResourceWithHelpers: + from .resources.sessions_helpers import AsyncSessionsResourceWithHelpers - return AsyncSessionsResource(self) + return AsyncSessionsResourceWithHelpers(self) @cached_property def with_raw_response(self) -> AsyncStagehandWithRawResponse: diff --git a/src/stagehand/resources/sessions_helpers.py b/src/stagehand/resources/sessions_helpers.py new file mode 100644 index 0000000..aeab399 --- /dev/null +++ b/src/stagehand/resources/sessions_helpers.py @@ -0,0 +1,110 @@ +# Manually maintained helpers (not generated). + +from __future__ import annotations + +from typing import Union +from datetime import datetime +from typing_extensions import Literal + +import httpx + +from ..types import session_start_params +from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given +from ..session import Session, AsyncSession +from .sessions import SessionsResource, AsyncSessionsResource + + +class SessionsResourceWithHelpers(SessionsResource): + def create( + self, + *, + model_name: str, + act_timeout_ms: float | Omit = omit, + browser: session_start_params.Browser | Omit = omit, + browserbase_session_create_params: session_start_params.BrowserbaseSessionCreateParams | Omit = omit, + browserbase_session_id: str | Omit = omit, + dom_settle_timeout_ms: float | Omit = omit, + experimental: bool | Omit = omit, + self_heal: bool | Omit = omit, + system_prompt: str | Omit = omit, + verbose: Literal[0, 1, 2] | Omit = omit, + wait_for_captcha_solves: bool | Omit = omit, + x_language: Literal["typescript", "python", "playground"] | Omit = omit, + x_sdk_version: str | Omit = omit, + x_sent_at: Union[str, datetime] | Omit = omit, + x_stream_response: Literal["true", "false"] | Omit = omit, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Session: + start_response = self.start( + model_name=model_name, + act_timeout_ms=act_timeout_ms, + browser=browser, + browserbase_session_create_params=browserbase_session_create_params, + browserbase_session_id=browserbase_session_id, + dom_settle_timeout_ms=dom_settle_timeout_ms, + experimental=experimental, + self_heal=self_heal, + system_prompt=system_prompt, + verbose=verbose, + wait_for_captcha_solves=wait_for_captcha_solves, + x_language=x_language, + x_sdk_version=x_sdk_version, + x_sent_at=x_sent_at, + x_stream_response=x_stream_response, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return Session(self._client, start_response.data.session_id) + + +class AsyncSessionsResourceWithHelpers(AsyncSessionsResource): + async def create( + self, + *, + model_name: str, + act_timeout_ms: float | Omit = omit, + browser: session_start_params.Browser | Omit = omit, + browserbase_session_create_params: session_start_params.BrowserbaseSessionCreateParams | Omit = omit, + browserbase_session_id: str | Omit = omit, + dom_settle_timeout_ms: float | Omit = omit, + experimental: bool | Omit = omit, + self_heal: bool | Omit = omit, + system_prompt: str | Omit = omit, + verbose: Literal[0, 1, 2] | Omit = omit, + wait_for_captcha_solves: bool | Omit = omit, + x_language: Literal["typescript", "python", "playground"] | Omit = omit, + x_sdk_version: str | Omit = omit, + x_sent_at: Union[str, datetime] | Omit = omit, + x_stream_response: Literal["true", "false"] | Omit = omit, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> AsyncSession: + start_response = await self.start( + model_name=model_name, + act_timeout_ms=act_timeout_ms, + browser=browser, + browserbase_session_create_params=browserbase_session_create_params, + browserbase_session_id=browserbase_session_id, + dom_settle_timeout_ms=dom_settle_timeout_ms, + experimental=experimental, + self_heal=self_heal, + system_prompt=system_prompt, + verbose=verbose, + wait_for_captcha_solves=wait_for_captcha_solves, + x_language=x_language, + x_sdk_version=x_sdk_version, + x_sent_at=x_sent_at, + x_stream_response=x_stream_response, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + return AsyncSession(self._client, start_response.data.session_id) diff --git a/src/stagehand/session.py b/src/stagehand/session.py new file mode 100644 index 0000000..eebea84 --- /dev/null +++ b/src/stagehand/session.py @@ -0,0 +1,271 @@ +# Manually maintained helpers (not generated). + +from __future__ import annotations + +from typing import TYPE_CHECKING, Union +from datetime import datetime +from typing_extensions import Unpack, Literal + +import httpx + +from .types import ( + session_act_params, + session_execute_params, + session_extract_params, + session_observe_params, + session_navigate_params, +) +from ._types import Body, Omit, Query, Headers, NotGiven, omit, not_given +from .types.session_act_response import SessionActResponse +from .types.session_end_response import SessionEndResponse +from .types.session_execute_response import SessionExecuteResponse +from .types.session_extract_response import SessionExtractResponse +from .types.session_observe_response import SessionObserveResponse +from .types.session_navigate_response import SessionNavigateResponse + +if TYPE_CHECKING: + from ._client import Stagehand, AsyncStagehand + + +class Session: + """A Stagehand session bound to a specific `session_id`.""" + + def __init__(self, client: Stagehand, id: str) -> None: + self._client = client + self.id = id + + def navigate( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_navigate_params.SessionNavigateParams], + ) -> SessionNavigateResponse: + return self._client.sessions.navigate( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + def act( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_act_params.SessionActParamsNonStreaming], + ) -> SessionActResponse: + return self._client.sessions.act( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + def observe( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_observe_params.SessionObserveParamsNonStreaming], + ) -> SessionObserveResponse: + return self._client.sessions.observe( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + def extract( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_extract_params.SessionExtractParamsNonStreaming], + ) -> SessionExtractResponse: + return self._client.sessions.extract( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + def execute( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_execute_params.SessionExecuteParamsNonStreaming], + ) -> SessionExecuteResponse: + return self._client.sessions.execute( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + def end( + self, + *, + x_language: Literal["typescript", "python", "playground"] | Omit = omit, + x_sdk_version: str | Omit = omit, + x_sent_at: Union[str, datetime] | Omit = omit, + x_stream_response: Literal["true", "false"] | Omit = omit, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> SessionEndResponse: + return self._client.sessions.end( + id=self.id, + x_language=x_language, + x_sdk_version=x_sdk_version, + x_sent_at=x_sent_at, + x_stream_response=x_stream_response, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + +class AsyncSession: + """Async variant of `Session`.""" + + def __init__(self, client: AsyncStagehand, id: str) -> None: + self._client = client + self.id = id + + async def navigate( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_navigate_params.SessionNavigateParams], + ) -> SessionNavigateResponse: + return await self._client.sessions.navigate( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + async def act( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_act_params.SessionActParamsNonStreaming], + ) -> SessionActResponse: + return await self._client.sessions.act( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + async def observe( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_observe_params.SessionObserveParamsNonStreaming], + ) -> SessionObserveResponse: + return await self._client.sessions.observe( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + async def extract( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_extract_params.SessionExtractParamsNonStreaming], + ) -> SessionExtractResponse: + return await self._client.sessions.extract( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + async def execute( + self, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + **params: Unpack[session_execute_params.SessionExecuteParamsNonStreaming], + ) -> SessionExecuteResponse: + return await self._client.sessions.execute( + id=self.id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + **params, + ) + + async def end( + self, + *, + x_language: Literal["typescript", "python", "playground"] | Omit = omit, + x_sdk_version: str | Omit = omit, + x_sent_at: Union[str, datetime] | Omit = omit, + x_stream_response: Literal["true", "false"] | Omit = omit, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> SessionEndResponse: + return await self._client.sessions.end( + id=self.id, + x_language=x_language, + x_sdk_version=x_sdk_version, + x_sent_at=x_sent_at, + x_stream_response=x_stream_response, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) diff --git a/tests/test_sessions_create_helper.py b/tests/test_sessions_create_helper.py new file mode 100644 index 0000000..1236119 --- /dev/null +++ b/tests/test_sessions_create_helper.py @@ -0,0 +1,71 @@ +# Manually maintained tests for non-generated helpers. + +from __future__ import annotations + +import os + +import httpx +import pytest +from respx import MockRouter + +from stagehand import Stagehand, AsyncStagehand + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +@pytest.mark.respx(base_url=base_url) +def test_sessions_create_returns_bound_session(respx_mock: MockRouter, client: Stagehand) -> None: + session_id = "00000000-0000-0000-0000-000000000000" + + respx_mock.post("/v1/sessions/start").mock( + return_value=httpx.Response( + 200, + json={ + "success": True, + "data": {"available": True, "sessionId": session_id}, + }, + ) + ) + + navigate_route = respx_mock.post(f"/v1/sessions/{session_id}/navigate").mock( + return_value=httpx.Response( + 200, + json={"success": True, "data": {"result": None}}, + ) + ) + + session = client.sessions.create(model_name="openai/gpt-5-nano") + assert session.id == session_id + + session.navigate(url="https://example.com", frame_id="") + assert navigate_route.called is True + + +@pytest.mark.respx(base_url=base_url) +async def test_async_sessions_create_returns_bound_session( + respx_mock: MockRouter, async_client: AsyncStagehand +) -> None: + session_id = "00000000-0000-0000-0000-000000000000" + + respx_mock.post("/v1/sessions/start").mock( + return_value=httpx.Response( + 200, + json={ + "success": True, + "data": {"available": True, "sessionId": session_id}, + }, + ) + ) + + navigate_route = respx_mock.post(f"/v1/sessions/{session_id}/navigate").mock( + return_value=httpx.Response( + 200, + json={"success": True, "data": {"result": None}}, + ) + ) + + session = await async_client.sessions.create(model_name="openai/gpt-5-nano") + assert session.id == session_id + + await session.navigate(url="https://example.com", frame_id="") + assert navigate_route.called is True diff --git a/uv.lock b/uv.lock index b960d54..512dee9 100644 --- a/uv.lock +++ b/uv.lock @@ -1339,7 +1339,7 @@ wheels = [ [[package]] name = "stagehand-alpha" -version = "1.0.0" +version = "0.2.4" source = { editable = "." } dependencies = [ { name = "anyio" },