From 0bfef64ec694e6695f1c95a5fab343c268b25ec0 Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 1 Oct 2024 16:25:43 +0200 Subject: [PATCH 1/4] post_ps_workflow_request: pagewise configurable --- src/ocrd_network/cli/client.py | 20 +++++++++++++++++--- src/ocrd_network/client_utils.py | 26 +++++++++++++++++--------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py index 9c7f15c88f..a57cb88b82 100644 --- a/src/ocrd_network/cli/client.py +++ b/src/ocrd_network/cli/client.py @@ -2,6 +2,7 @@ from json import dumps from typing import List, Optional, Tuple from ocrd.decorators.parameter_option import parameter_option, parameter_override_option +from ocrd_network.constants import JobState from ocrd_utils import DEFAULT_METS_BASENAME from ocrd_utils.introspect import set_json_key_value_overrides from ocrd_utils.str import parse_json_string_or_file @@ -176,23 +177,36 @@ def check_workflow_job_status(address: Optional[str], workflow_job_id: str): 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default') @click.option('-m', '--path-to-mets', required=True) @click.option('-w', '--path-to-workflow', required=True) -@click.option('-b', '--block', default=False, +@click.option('-p/-P', '--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs") +@click.option('-b', '--block', is_flag=True, default=False, help='If set, the client will block till job timeout, fail or success.') def send_workflow_job_request( address: Optional[str], path_to_mets: str, path_to_workflow: str, + page_wise : bool, block: Optional[bool] ): """ Submit a workflow job to the processing server. """ client = Client(server_addr_processing=address) - workflow_job_id = client.send_workflow_job_request(path_to_wf=path_to_workflow, path_to_mets=path_to_mets) + workflow_job_id = client.send_workflow_job_request( + path_to_wf=path_to_workflow, + path_to_mets=path_to_mets, + page_wise=page_wise, + ) assert workflow_job_id print(f"Workflow job id: {workflow_job_id}") if block: - client.poll_workflow_status(job_id=workflow_job_id) + print(f"Polling state of workflow job {workflow_job_id}") + state = client.poll_workflow_status(job_id=workflow_job_id) + if state != JobState.success: + print(f"Workflow failed with {state}") + exit(1) + else: + print(f"Workflow succeeded") + exit(0) @client_cli.group('workspace') diff --git a/src/ocrd_network/client_utils.py b/src/ocrd_network/client_utils.py index 9b924c16a4..24f3da105c 100644 --- a/src/ocrd_network/client_utils.py +++ b/src/ocrd_network/client_utils.py @@ -1,9 +1,10 @@ +import json from requests import get as request_get, post as request_post from time import sleep from .constants import JobState, NETWORK_PROTOCOLS -def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int): +def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int) -> JobState: if job_type not in ["workflow", "processor"]: raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'") job_state = JobState.unset @@ -47,22 +48,21 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str): return response -def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> str: +def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState: request_url = f"{ps_server_host}/processor/job/{processing_job_id}" response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"}) assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" job_state = response.json()["state"] assert job_state - return job_state - + return getattr(JobState, job_state.lower()) -def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> str: +def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState: request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}" response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"}) assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" job_state = response.json()["state"] assert job_state - return job_state + return getattr(JobState, job_state.lower()) def post_ps_processing_request(ps_server_host: str, processor: str, job_input: dict) -> str: @@ -79,8 +79,13 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d # TODO: Can be extended to include other parameters such as page_wise -def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: str) -> str: - request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise=True" +def post_ps_workflow_request( + ps_server_host: str, + path_to_wf: str, + path_to_mets: str, + page_wise : bool, +) -> str: + request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise={'True' if page_wise else 'False'}" response = request_post( url=request_url, headers={"accept": "application/json; charset=utf-8"}, @@ -88,8 +93,11 @@ def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: ) # print(response.json()) # print(response.__dict__) + json_resp_raw = response.text + # print(f'post_ps_workflow_request >> {response.status_code}') + # print(f'post_ps_workflow_request >> {json_resp_raw}') assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" - wf_job_id = response.json()["job_id"] + wf_job_id = json.loads(json_resp_raw)["job_id"] assert wf_job_id return wf_job_id From 854403de6ea880c31b82463bba3850c07565327d Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 2 Oct 2024 10:38:07 +0200 Subject: [PATCH 2/4] remove shortcuts for page-wise --- src/ocrd_network/cli/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py index 6733f893aa..450cce43fb 100644 --- a/src/ocrd_network/cli/client.py +++ b/src/ocrd_network/cli/client.py @@ -180,7 +180,7 @@ def check_workflow_job_status(address: Optional[str], workflow_job_id: str): 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default') @click.option('-m', '--path-to-mets', required=True) @click.option('-w', '--path-to-workflow', required=True) -@click.option('-p/-P', '--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs") +@click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs") @click.option('-b', '--block', default=False, is_flag=True, help='If set, the client will block till job timeout, fail or success.') @click.option('-p', '--print-state', default=False, is_flag=True, From 4d01e66229bcd63872f4fd93699aa0084792c02c Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 2 Oct 2024 10:40:19 +0200 Subject: [PATCH 3/4] fix: pass page-wise argument to relevant methods --- src/ocrd_network/cli/client.py | 2 +- src/ocrd_network/client.py | 5 +++-- src/ocrd_network/client_utils.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py index 450cce43fb..350cf64b90 100644 --- a/src/ocrd_network/cli/client.py +++ b/src/ocrd_network/cli/client.py @@ -189,7 +189,7 @@ def send_workflow_job_request( address: Optional[str], path_to_mets: str, path_to_workflow: str, - page_wise : bool, + page_wise: bool, block: bool, print_state: bool ): diff --git a/src/ocrd_network/client.py b/src/ocrd_network/client.py index c4315ded4d..1521997942 100644 --- a/src/ocrd_network/client.py +++ b/src/ocrd_network/client.py @@ -60,6 +60,7 @@ def send_processing_job_request(self, processor_name: str, req_params: dict) -> return post_ps_processing_request( ps_server_host=self.server_addr_processing, processor=processor_name, job_input=req_params) - def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str): + def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str, page_wise: bool): return post_ps_workflow_request( - ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets) + ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets, + page_wise=page_wise) diff --git a/src/ocrd_network/client_utils.py b/src/ocrd_network/client_utils.py index b23442e502..456398ecf8 100644 --- a/src/ocrd_network/client_utils.py +++ b/src/ocrd_network/client_utils.py @@ -87,7 +87,7 @@ def post_ps_workflow_request( ps_server_host: str, path_to_wf: str, path_to_mets: str, - page_wise : bool, + page_wise: bool, ) -> str: request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise={'True' if page_wise else 'False'}" response = request_post( From 97427e07326bddc0ff83e4d1ed5eba4cb6631829 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 2 Oct 2024 10:42:00 +0200 Subject: [PATCH 4/4] Update src/ocrd_network/client_utils.py Co-authored-by: Konstantin Baierer --- src/ocrd_network/client_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ocrd_network/client_utils.py b/src/ocrd_network/client_utils.py index 456398ecf8..51db2681a6 100644 --- a/src/ocrd_network/client_utils.py +++ b/src/ocrd_network/client_utils.py @@ -82,7 +82,6 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d return processing_job_id -# TODO: Can be extended to include other parameters such as page_wise def post_ps_workflow_request( ps_server_host: str, path_to_wf: str,