From 750457b995ad10fe9985eb14e91fdc50fdeacb0a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 17:56:12 +0200 Subject: [PATCH] [Fixes #12369] Create a command to regenerate the XML metadata (#12396) (#12401) (cherry picked from commit cc3816da2d332a1eda338706c94585209b28802f) Co-authored-by: Emanuele Tajariol --- geonode/base/management/command_utils.py | 34 +++++ geonode/catalogue/management/__init__.py | 0 .../catalogue/management/commands/__init__.py | 0 .../management/commands/regenerate_xml.py | 126 ++++++++++++++++++ geonode/catalogue/models.py | 7 +- 5 files changed, 165 insertions(+), 2 deletions(-) create mode 100644 geonode/base/management/command_utils.py create mode 100644 geonode/catalogue/management/__init__.py create mode 100644 geonode/catalogue/management/commands/__init__.py create mode 100644 geonode/catalogue/management/commands/regenerate_xml.py diff --git a/geonode/base/management/command_utils.py b/geonode/base/management/command_utils.py new file mode 100644 index 00000000000..9d70f22d49e --- /dev/null +++ b/geonode/base/management/command_utils.py @@ -0,0 +1,34 @@ +import logging +from django.conf import settings + +DEFAULT_COMMAND_LOGGER_NAME = "geonode.commands" + + +def setup_logger(logger_name=DEFAULT_COMMAND_LOGGER_NAME, formatter_name="command", handler_name="command"): + if logger_name not in settings.LOGGING["loggers"]: + format = "%(levelname)-7s %(asctime)s %(message)s" + + settings.LOGGING["formatters"][formatter_name] = { + "format": format + } + settings.LOGGING["handlers"][handler_name] = { + "level": "DEBUG", + "class": "logging.StreamHandler", + "formatter": formatter_name + } + settings.LOGGING["loggers"][logger_name] = { + "handlers": [handler_name], + "level": "INFO", + "propagate": False + } + + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter(fmt=format)) + handler.setLevel(logging.DEBUG) + + logger = logging.getLogger(logger_name) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + logger.propagate = False + + return logger diff --git a/geonode/catalogue/management/__init__.py b/geonode/catalogue/management/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/geonode/catalogue/management/commands/__init__.py b/geonode/catalogue/management/commands/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/geonode/catalogue/management/commands/regenerate_xml.py b/geonode/catalogue/management/commands/regenerate_xml.py new file mode 100644 index 00000000000..ce89abdaaa7 --- /dev/null +++ b/geonode/catalogue/management/commands/regenerate_xml.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +######################################################################### +# +# Copyright (C) 2023 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging + +from django.core.management.base import BaseCommand + +from geonode.base.management import command_utils +from geonode.base.models import ResourceBase +from geonode.layers.models import Dataset + + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Re-create XML metadata documents" + + def add_arguments(self, parser): + parser.add_argument( + '-l', + '--layer', + dest="layers", + action='append', + help="Only process specified layers ") + + parser.add_argument( + "--skip-logger-setup", + action="store_false", + dest="setup_logger", + help='Skips setup of the "geonode.br" logger, "br" handler and "br" format if not present in settings', + ) + parser.add_argument( + '-d', + '--dry-run', + dest="dry-run", + action='store_true', + help="Do not actually perform any change") + + def handle(self, **options): + requested_layers = options.get('layers') + dry_run = options.get('dry-run') + + if options.get("setup_logger"): + logger = command_utils.setup_logger() + + logger.info(f"==== Running command {__name__}") + logger.info(f"{self.help}") + logger.info("") + + logger.debug(f"DRY-RUN is {dry_run}") + logger.debug(f"LAYERS is {requested_layers}") + + try: + + layers = Dataset.objects.all() + tot = len(layers) + logger.info(f"Total layers in GeoNode: {tot}") + i = 0 + cnt_ok = 0 + cnt_bad = 0 + cnt_skip = 0 + + instance: ResourceBase + for instance in layers: + i += 1 + logger.info(f"- {i}/{tot} Processing layer {instance.id} [{instance.typename}] '{instance.title}'") + + if requested_layers and instance.typename not in requested_layers: + logger.info(" - Layer filtered out by args") + cnt_skip += 1 + continue + + if instance.metadata_uploaded and instance.metadata_uploaded_preserve: + logger.info(" - Layer filtered out since it uses custom XML") + cnt_skip += 1 + continue + + try: + good = None + if not dry_run: + try: + try: + # the save() method triggers the metadata regeneration + instance.save() + good = True + except Exception as e: + logger.error(f"Error saving instance '{instance.title}': {e}") + raise e + + except Exception as e: + logger.exception(f"Error processing '{instance.title}': {e}", e) + + if dry_run or good: + logger.info(f" - Done {instance.name}") + cnt_ok += 1 + else: + logger.warning(f"Metadata couldn't be regenerated for instance '{instance.title}' ") + cnt_bad += 1 + + except Exception as e: + raise e + except Exception as e: + raise e + + logger.info("Work completed" + (" [DRYRUN]" if dry_run else "")) + logger.info(f"- Metadata regenerated : {cnt_ok}") + logger.info(f"- Metadata in error : {cnt_bad}") + logger.info(f"- Resources skipped : {cnt_skip}") diff --git a/geonode/catalogue/models.py b/geonode/catalogue/models.py index caebd2fe42e..e575be179e6 100644 --- a/geonode/catalogue/models.py +++ b/geonode/catalogue/models.py @@ -80,11 +80,14 @@ def catalogue_post_save(instance, sender, **kwargs): resource=resources.get(), url=metadata_url, extension="xml", link_type="metadata" ).update(**_d) - # generate an XML document (GeoNode's default is ISO) if instance.metadata_uploaded and instance.metadata_uploaded_preserve: md_doc = etree.tostring(dlxml.fromstring(instance.metadata_xml)) else: - md_doc = catalogue.catalogue.csw_gen_xml(instance, settings.CATALOG_METADATA_TEMPLATE) + # generate an XML document (GeoNode's default is ISO) + raw_xml = catalogue.catalogue.csw_gen_xml(instance, settings.CATALOG_METADATA_TEMPLATE) + md_obj = dlxml.fromstring(raw_xml, parser=etree.XMLParser(remove_blank_text=True)) + md_doc = etree.tostring(md_obj, pretty_print=True, encoding="unicode") + try: csw_anytext = catalogue.catalogue.csw_gen_anytext(md_doc) except Exception as e: