Skip to content

Commit

Permalink
implement --log-filename, wip
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Oct 10, 2023
1 parent 00373c9 commit 6fc6060
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 32 deletions.
1 change: 1 addition & 0 deletions ocrd/ocrd/decorators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def ocrd_cli_wrap_processor(
subcommand=None,
address=None,
queue=None,
log_filename=None,
database=None,
# ocrd_network params end #
**kwargs
Expand Down
1 change: 1 addition & 0 deletions ocrd/ocrd/decorators/ocrd_cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def cli(mets_url):
option('-D', '--dump-module-dir', is_flag=True, default=False),
option('-h', '--help', is_flag=True, default=False),
option('-V', '--version', is_flag=True, default=False),
option('--log-filename', default=None),
# Subcommand, only used for 'worker'/'server'. Cannot be handled in
# click because processors use the @command decorator and even if they
# were using `group`, you cannot combine have a command with
Expand Down
3 changes: 2 additions & 1 deletion ocrd/ocrd/lib.bash
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ ocrd__parse_argv () {
-I|--input-file-grp) ocrd__argv[input_file_grp]=$2 ; shift ;;
-w|--working-dir) ocrd__argv[working_dir]=$(realpath "$2") ; shift ;;
-m|--mets) ocrd__argv[mets_file]=$(realpath "$2") ; shift ;;
--log-filename) ocrd__argv[log_filename]="$2" ; shift ;;
--mets-server-url) ocrd_argv[mets_server_url]="$2" ; shift ;;
--overwrite) ocrd__argv[overwrite]=true ;;
--profile) ocrd__argv[profile]=true ;;
Expand All @@ -168,7 +169,7 @@ ocrd__parse_argv () {
if ! [ -v ocrd__worker_queue ]; then
ocrd__raise "For the Processing Worker --queue is required"
fi
ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}" --log-filename "${ocrd__argv[log_filename]}"
elif [ ${ocrd__subcommand} = "server" ]; then
if ! [ -v ocrd__worker_address ]; then
ocrd__raise "For the Processor Server --address is required"
Expand Down
2 changes: 2 additions & 0 deletions ocrd/ocrd/processor/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None)
--database The MongoDB server address in format
"mongodb://{host}:{port}"
[mongodb://localhost:27018]
--log-filename Filename to redirect STDOUT/STDERR to,
if specified.
'''

processing_server_options = '''\
Expand Down
61 changes: 33 additions & 28 deletions ocrd_network/ocrd_network/process_helpers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json
from typing import List, Optional
import logging
from contextlib import nullcontext

from ocrd.processor.helpers import run_cli, run_processor
from .utils import get_ocrd_workspace_instance
from ocrd_utils import redirect_stderr_and_stdout_to_file


# A wrapper for run_processor() and run_cli()
Expand All @@ -15,44 +17,47 @@ def invoke_processor(
output_file_grps: List[str],
page_id: str,
parameters: dict,
mets_server_url: Optional[str] = None
mets_server_url: Optional[str] = None,
log_filename : str = None,
) -> None:
if not (processor_class or executable):
raise ValueError(f'Missing processor class and executable')
raise ValueError('Missing processor class and executable')
input_file_grps_str = ','.join(input_file_grps)
output_file_grps_str = ','.join(output_file_grps)

workspace = get_ocrd_workspace_instance(
mets_path=abs_path_to_mets,
mets_server_url=mets_server_url
)
ctx_mgr = redirect_stderr_and_stdout_to_file(log_filename) if log_filename else nullcontext()
with ctx_mgr:
workspace = get_ocrd_workspace_instance(
mets_path=abs_path_to_mets,
mets_server_url=mets_server_url
)

if processor_class:
try:
run_processor(
processorClass=processor_class,
if processor_class:
try:
run_processor(
processorClass=processor_class,
workspace=workspace,
input_file_grp=input_file_grps_str,
output_file_grp=output_file_grps_str,
page_id=page_id,
parameter=parameters,
instance_caching=True,
mets_server_url=mets_server_url,
log_level=logging.DEBUG
)
except Exception as e:
raise RuntimeError(f"Python executable '{processor_class.__dict__}' exited with: {e}")
else:
return_code = run_cli(
executable=executable,
workspace=workspace,
mets_url=abs_path_to_mets,
input_file_grp=input_file_grps_str,
output_file_grp=output_file_grps_str,
page_id=page_id,
parameter=parameters,
instance_caching=True,
parameter=json.dumps(parameters),
mets_server_url=mets_server_url,
log_level=logging.DEBUG
)
except Exception as e:
raise RuntimeError(f"Python executable '{processor_class.__dict__}' exited with: {e}")
else:
return_code = run_cli(
executable=executable,
workspace=workspace,
mets_url=abs_path_to_mets,
input_file_grp=input_file_grps_str,
output_file_grp=output_file_grps_str,
page_id=page_id,
parameter=json.dumps(parameters),
mets_server_url=mets_server_url,
log_level=logging.DEBUG
)
if return_code != 0:
raise RuntimeError(f"CLI executable '{executable}' exited with: {return_code}")
if return_code != 0:
raise RuntimeError(f"CLI executable '{executable}' exited with: {return_code}")
9 changes: 6 additions & 3 deletions ocrd_network/ocrd_network/processing_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@


class ProcessingWorker:
def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, processor_class=None) -> None:
def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, processor_class=None, log_filename:str=None) -> None:
initLogging()
logging_suffix = f'{processor_name}.{getpid()}'
self.log = getLogger(f'ocrd_network.processing_worker')
file_handler = logging.FileHandler(f'/tmp/ocrd_worker_{logging_suffix}.log', mode='a')
if not log_filename:
log_filename = f'/tmp/ocrd_worker_{processor_name}.{getpid()}.log'
self.log_filename = log_filename
file_handler = logging.FileHandler(log_filename, mode='a')
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
self.log.addHandler(file_handler)

Expand Down Expand Up @@ -219,6 +221,7 @@ def process_message(self, processing_message: OcrdProcessingMessage) -> None:
input_file_grps=input_file_grps,
output_file_grps=output_file_grps,
page_id=page_id,
log_filename=self.log_filename,
parameters=processing_message.parameters,
mets_server_url=mets_server_url
)
Expand Down

0 comments on commit 6fc6060

Please sign in to comment.