Skip to content

Commit

Permalink
fix: merge payload and data fields of Request
Browse files Browse the repository at this point in the history
Closes: #560
  • Loading branch information
vdusek committed Oct 2, 2024
1 parent 4891b73 commit 62b332e
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 22 deletions.
2 changes: 1 addition & 1 deletion docs/examples/code/fill_and_submit_web_form_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async def request_handler(context: HttpCrawlingContext) -> None:
request = Request.from_url(
url='https://httpbin.org/post',
method='POST',
data={
payload={
'custname': 'John Doe',
'custtel': '1234567890',
'custemail': '[email protected]',
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/code/fill_and_submit_web_form_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
request = Request.from_url(
url='https://httpbin.org/post',
method='POST',
data={
payload={
'custname': 'John Doe',
'custtel': '1234567890',
'custemail': '[email protected]',
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/fill_and_submit_web_form.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Now, let's create a POST request with the form fields and their values using the
{RequestExample}
</CodeBlock>

Alternatively, you can send form data as URL parameters using the `query_params` argument. It depends on the form and how it is implemented. However, sending the data as a POST request body using the `data` parameter is generally a better approach.
Alternatively, you can send form data as URL parameters using the `query_params` argument. It depends on the form and how it is implemented. However, sending the data as a POST request body using the `payload` is generally a better approach.

## Implementing the crawler

Expand Down
10 changes: 10 additions & 0 deletions src/crawlee/_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ def from_url(
url: str,
*,
method: HttpMethod = 'GET',
headers: dict[str, str] | None = None,
query_params: HttpQueryParams | None = None,
payload: HttpPayload | None = None,
label: str | None = None,
unique_key: str | None = None,
Expand All @@ -185,6 +187,8 @@ def from_url(
unique_key=unique_key,
id=id,
method=method,
headers=headers,
query_params=query_params,
payload=payload,
**kwargs,
)
Expand Down Expand Up @@ -235,6 +239,8 @@ def from_url(
url: str,
*,
method: HttpMethod = 'GET',
headers: dict[str, str] | None = None,
query_params: HttpQueryParams | None = None,
payload: HttpPayload | None = None,
label: str | None = None,
unique_key: str | None = None,
Expand All @@ -253,6 +259,8 @@ def from_url(
Args:
url: The URL of the request.
method: The HTTP method of the request.
headers: The HTTP headers of the request.
query_params: The query parameters of the URL.
payload: The data to be sent as the request body. Typically used with 'POST' or 'PUT' requests.
label: A custom label to differentiate between request types. This is stored in `user_data`, and it is
used for request routing (different requests go to different handlers).
Expand Down Expand Up @@ -281,6 +289,8 @@ def from_url(
unique_key=unique_key,
id=id,
method=method,
headers=headers,
query_params=query_params,
payload=payload,
**kwargs,
)
Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

HttpQueryParams: TypeAlias = dict[str, str]

HttpPayload: TypeAlias = Union[str, bytes]
HttpPayload: TypeAlias = dict[str, Any]


class EnqueueStrategy(str, Enum):
Expand Down
8 changes: 1 addition & 7 deletions src/crawlee/_utils/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,7 @@ def compute_unique_key(

# Compute and return the extended unique key if required.
if use_extended_unique_key:
if payload is None:
payload_in_bytes = b''
elif isinstance(payload, str):
payload_in_bytes = payload.encode('utf-8')
else:
payload_in_bytes = payload

payload_in_bytes = b'' if payload is None else str(payload).encode('utf-8')
payload_hash = compute_short_hash(payload_in_bytes)
return f'{normalized_method}({payload_hash}):{normalized_url}'

Expand Down
8 changes: 4 additions & 4 deletions src/crawlee/http_clients/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Protocol
from typing import TYPE_CHECKING, Protocol

from crawlee._utils.http import is_status_code_error
from crawlee.errors import HttpStatusCodeError

if TYPE_CHECKING:
from collections.abc import Iterable

from crawlee._types import HttpHeaders, HttpMethod, HttpQueryParams
from crawlee._types import HttpHeaders, HttpMethod, HttpPayload, HttpQueryParams
from crawlee.base_storage_client._models import Request
from crawlee.proxy_configuration import ProxyInfo
from crawlee.sessions import Session
Expand Down Expand Up @@ -115,7 +115,7 @@ async def send_request(
method: HttpMethod = 'GET',
headers: HttpHeaders | None = None,
query_params: HttpQueryParams | None = None,
data: dict[str, Any] | None = None,
payload: HttpPayload | None = None,
session: Session | None = None,
proxy_info: ProxyInfo | None = None,
) -> HttpResponse:
Expand All @@ -128,7 +128,7 @@ async def send_request(
method: The HTTP method to use.
headers: The headers to include in the request.
query_params: The query parameters to include in the request.
data: The data to be sent as the request body.
payload: The data to be sent as the request body.
session: The session associated with the request.
proxy_info: The information about the proxy to be used.
Expand Down
8 changes: 4 additions & 4 deletions src/crawlee/http_clients/_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
if TYPE_CHECKING:
from collections.abc import Iterable

from crawlee._types import HttpMethod, HttpQueryParams
from crawlee._types import HttpMethod, HttpPayload, HttpQueryParams
from crawlee.base_storage_client._models import Request
from crawlee.proxy_configuration import ProxyInfo
from crawlee.statistics import Statistics
Expand Down Expand Up @@ -132,7 +132,7 @@ async def crawl(
method=request.method,
headers=headers,
params=request.query_params,
data=request.data,
data=request.payload,
cookies=session.cookies if session else None,
extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
)
Expand Down Expand Up @@ -167,7 +167,7 @@ async def send_request(
method: HttpMethod = 'GET',
headers: HttpHeaders | None = None,
query_params: HttpQueryParams | None = None,
data: dict[str, Any] | None = None,
payload: HttpPayload | None = None,
session: Session | None = None,
proxy_info: ProxyInfo | None = None,
) -> HttpResponse:
Expand All @@ -179,7 +179,7 @@ async def send_request(
method=method,
headers=headers,
params=query_params,
data=data,
data=payload,
extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
)

Expand Down
6 changes: 3 additions & 3 deletions src/crawlee/http_clients/curl_impersonate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from curl_cffi.requests import Response

from crawlee._types import HttpHeaders, HttpMethod, HttpQueryParams
from crawlee._types import HttpHeaders, HttpMethod, HttpPayload, HttpQueryParams
from crawlee.base_storage_client._models import Request
from crawlee.proxy_configuration import ProxyInfo
from crawlee.sessions import Session
Expand Down Expand Up @@ -152,7 +152,7 @@ async def send_request(
method: HttpMethod = 'GET',
headers: HttpHeaders | None = None,
query_params: HttpQueryParams | None = None,
data: dict[str, Any] | None = None,
payload: HttpPayload | None = None,
session: Session | None = None,
proxy_info: ProxyInfo | None = None,
) -> HttpResponse:
Expand All @@ -165,7 +165,7 @@ async def send_request(
method=method.upper(), # type: ignore # curl-cffi requires uppercase method
headers=headers,
params=query_params,
data=data,
data=payload,
cookies=session.cookies if session else None,
allow_redirects=True,
)
Expand Down

0 comments on commit 62b332e

Please sign in to comment.