Skip to content

Commit

Permalink
Add a to_xlsx output pipe #46
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Druez <[email protected]>
  • Loading branch information
tdruez committed Dec 11, 2020
1 parent 1f75a82 commit 7e158fa
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 2 deletions.
3 changes: 3 additions & 0 deletions etc/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ container_inspector==3.1.2

# ScanCode-toolkit
scancode-toolkit[full]==3.2.3

# Utilities
XlsxWriter==1.3.7
6 changes: 4 additions & 2 deletions scanpipe/management/commands/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,18 @@
from scanpipe.management.commands import ProjectCommand
from scanpipe.pipes.outputs import to_csv
from scanpipe.pipes.outputs import to_json
from scanpipe.pipes.outputs import to_xlsx


class Command(ProjectCommand):
help = "Output project results as JSON or CSV."
help = "Output project results as JSON, CSV, or XLSX."

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
"--format",
default="json",
choices=["json", "csv"],
choices=["json", "csv", "xlsx"],
help="Specifies the output serialization format for the results.",
)

Expand All @@ -43,6 +44,7 @@ def handle(self, *args, **options):
output_function = {
"json": to_json,
"csv": to_csv,
"xlsx": to_xlsx,
}.get(options["format"])

output_file = output_function(self.project)
Expand Down
47 changes: 47 additions & 0 deletions scanpipe/pipes/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

from django.core.serializers.json import DjangoJSONEncoder

import xlsxwriter

from scancodeio import SCAN_NOTICE
from scancodeio import __version__ as scancodeio_version
from scanpipe.api.serializers import CodebaseResourceSerializer
Expand Down Expand Up @@ -180,3 +182,48 @@ def to_json(project):
file.write(chunk)

return output_file


def _queryset_to_xlsx_worksheet(queryset, workbook):
multivalues_separator = "\n"
model_class = queryset.model
fieldnames = get_serializer_fields(model_class)
model_name = model_class._meta.model_name

worksheet = workbook.add_worksheet(model_name)
worksheet.write_row(row=0, col=0, data=fieldnames)

for row_index, record in enumerate(queryset.iterator(), start=1):
row_data = [getattr(record, field) for field in fieldnames]

for col_index, value in enumerate(row_data):
if isinstance(value, list):
value = [
list(entry.values())[0] if isinstance(entry, dict) else str(entry)
for entry in value
]
value = multivalues_separator.join(value)
elif isinstance(value, dict):
value = json.dumps(value) if value else ""

worksheet.write(row_index, col_index, value)


def to_xlsx(project):
"""
Generate results output for the provided `project` as XLSX format.
The output file is created in the `project` output/ directory.
Return the path of the generated output file.
"""
output_file = project.get_output_file_path("results", "xlsx")

querysets = [
project.discoveredpackages.all(),
project.codebaseresources.without_symlinks(),
]

with xlsxwriter.Workbook(output_file) as workbook:
for queryset in querysets:
_queryset_to_xlsx_worksheet(queryset, workbook)

return output_file

0 comments on commit 7e158fa

Please sign in to comment.