Add a to_xlsx output pipe #46

Signed-off-by: Thomas Druez <[email protected]>
aboutcode-org · Dec 11, 2020 · 7e158fa · 7e158fa
1 parent 1f75a82
commit 7e158fa
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 2 deletions.
diff --git a/etc/requirements/base.txt b/etc/requirements/base.txt
@@ -34,3 +34,6 @@ container_inspector==3.1.2
 
 # ScanCode-toolkit
 scancode-toolkit[full]==3.2.3
+
+# Utilities
+XlsxWriter==1.3.7
diff --git a/scanpipe/management/commands/output.py b/scanpipe/management/commands/output.py
@@ -23,17 +23,18 @@
 from scanpipe.management.commands import ProjectCommand
 from scanpipe.pipes.outputs import to_csv
 from scanpipe.pipes.outputs import to_json
+from scanpipe.pipes.outputs import to_xlsx
 
 
 class Command(ProjectCommand):
- help = "Output project results as JSON or CSV."
+ help = "Output project results as JSON, CSV, or XLSX."
 
  def add_arguments(self, parser):
  super().add_arguments(parser)
  parser.add_argument(
  "--format",
  default="json",
- choices=["json", "csv"],
+ choices=["json", "csv", "xlsx"],
  help="Specifies the output serialization format for the results.",
  )
 
@@ -43,6 +44,7 @@ def handle(self, *args, **options):
  output_function = {
  "json": to_json,
  "csv": to_csv,
+ "xlsx": to_xlsx,
  }.get(options["format"])
 
  output_file = output_function(self.project)

diff --git a/scanpipe/pipes/outputs.py b/scanpipe/pipes/outputs.py
@@ -25,6 +25,8 @@
 
 from django.core.serializers.json import DjangoJSONEncoder
 
+import xlsxwriter
+
 from scancodeio import SCAN_NOTICE
 from scancodeio import __version__ as scancodeio_version
 from scanpipe.api.serializers import CodebaseResourceSerializer
@@ -180,3 +182,48 @@ def to_json(project):
  file.write(chunk)
 
  return output_file
+
+
+def _queryset_to_xlsx_worksheet(queryset, workbook):
+ multivalues_separator = "\n"
+ model_class = queryset.model
+ fieldnames = get_serializer_fields(model_class)
+ model_name = model_class._meta.model_name
+
+ worksheet = workbook.add_worksheet(model_name)
+ worksheet.write_row(row=0, col=0, data=fieldnames)
+
+ for row_index, record in enumerate(queryset.iterator(), start=1):
+ row_data = [getattr(record, field) for field in fieldnames]
+
+ for col_index, value in enumerate(row_data):
+ if isinstance(value, list):
+ value = [
+ list(entry.values())[0] if isinstance(entry, dict) else str(entry)
+ for entry in value
+ ]
+ value = multivalues_separator.join(value)
+ elif isinstance(value, dict):
+ value = json.dumps(value) if value else ""
+
+ worksheet.write(row_index, col_index, value)
+
+
+def to_xlsx(project):
+ """
+ Generate results output for the provided `project` as XLSX format.
+ The output file is created in the `project` output/ directory.
+ Return the path of the generated output file.
+ """
+ output_file = project.get_output_file_path("results", "xlsx")
+
+ querysets = [
+ project.discoveredpackages.all(),
+ project.codebaseresources.without_symlinks(),
+ ]
+
+ with xlsxwriter.Workbook(output_file) as workbook:
+ for queryset in querysets:
+ _queryset_to_xlsx_worksheet(queryset, workbook)
+
+ return output_file