-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Addon pipeline for source string collection (#1160)
* Add addon pipeline for string collection Signed-off-by: Keshav Priyadarshi <[email protected]> * Add test for collect_source_strings pipeline Signed-off-by: Keshav Priyadarshi <[email protected]> * Update dockerfile to install xgettext Signed-off-by: Keshav Priyadarshi <[email protected]> * Update CI to install xgettext Signed-off-by: Keshav Priyadarshi <[email protected]> * Update docs Signed-off-by: Keshav Priyadarshi <[email protected]> * Only supported on Linux Signed-off-by: Keshav Priyadarshi <[email protected]> Co-authored-by: Philippe Ombredanne <[email protected]> * Only supported on Linux Signed-off-by: Keshav Priyadarshi <[email protected]> Co-authored-by: Philippe Ombredanne <[email protected]> * Add CHANGELOG for CollectSourceStrings pipeline Signed-off-by: Keshav Priyadarshi <[email protected]> --------- Signed-off-by: Keshav Priyadarshi <[email protected]> Co-authored-by: Philippe Ombredanne <[email protected]>
- Loading branch information
1 parent
d6389b2
commit 1af8d99
Showing
10 changed files
with
228 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https:/nexB/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https:/nexB/scancode.io for support and download. | ||
|
||
from scanpipe.pipelines import Pipeline | ||
from scanpipe.pipes import source_strings | ||
|
||
|
||
class CollectSourceStrings(Pipeline): | ||
"""Collect source strings from codebase files and keep them in extra data field.""" | ||
|
||
download_inputs = False | ||
is_addon = True | ||
|
||
@classmethod | ||
def steps(cls): | ||
return (cls.collect_and_store_resource_strings,) | ||
|
||
def collect_and_store_resource_strings(self): | ||
""" | ||
Collect source strings from codebase files using gettext and store | ||
them in the extra data field. | ||
""" | ||
source_strings.collect_and_store_resource_strings(self.project, self.log) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https:/nexB/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https:/nexB/scancode.io for support and download. | ||
|
||
from source_inspector import strings_xgettext | ||
|
||
from scanpipe.pipes import LoopProgress | ||
|
||
|
||
class XgettextNotFound(Exception): | ||
pass | ||
|
||
|
||
def collect_and_store_resource_strings(project, logger=None): | ||
""" | ||
Collect source strings from codebase files using xgettext and store | ||
them in the extra data field. | ||
""" | ||
if not strings_xgettext.is_xgettext_installed(): | ||
raise XgettextNotFound( | ||
"``xgettext`` not found. Install ``gettext`` to use this pipeline." | ||
) | ||
|
||
project_files = project.codebaseresources.files() | ||
|
||
resources = project_files.filter( | ||
is_binary=False, | ||
is_archive=False, | ||
is_media=False, | ||
) | ||
|
||
resources_count = resources.count() | ||
|
||
resource_iterator = resources.iterator(chunk_size=2000) | ||
progress = LoopProgress(resources_count, logger) | ||
|
||
for resource in progress.iter(resource_iterator): | ||
_collect_and_store_resource_strings(resource) | ||
|
||
|
||
def _collect_and_store_resource_strings(resource): | ||
""" | ||
Collect strings from a resource using xgettext and store | ||
them in the extra data field. | ||
""" | ||
result = strings_xgettext.collect_strings(resource.location) | ||
strings = [item["string"] for item in result if "string" in item] | ||
resource.update_extra_data({"source_strings": strings}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
# | ||
# http://nexb.com and https:/nexB/scancode.io | ||
# The ScanCode.io software is licensed under the Apache License version 2.0. | ||
# Data generated with ScanCode.io is provided as-is without warranties. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# | ||
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https:/nexB/scancode.io for support and download. | ||
|
||
import sys | ||
from pathlib import Path | ||
from unittest import skipIf | ||
|
||
from django.test import TestCase | ||
|
||
from scanpipe import pipes | ||
from scanpipe.models import Project | ||
from scanpipe.pipes import source_strings | ||
from scanpipe.pipes.input import copy_input | ||
|
||
|
||
class ScanPipeSourceStringsPipesTest(TestCase): | ||
data_location = Path(__file__).parent.parent / "data" | ||
|
||
def setUp(self): | ||
self.project1 = Project.objects.create(name="Analysis") | ||
|
||
@skipIf(sys.platform != "linux", "Only supported on Linux") | ||
def test_scanpipe_pipes_symbols_collect_and_store_resource_strings(self): | ||
dir = self.project1.codebase_path / "codefile" | ||
dir.mkdir(parents=True) | ||
|
||
file_location = self.data_location / "d2d-javascript" / "from" / "main.js" | ||
copy_input(file_location, dir) | ||
|
||
pipes.collect_and_create_codebase_resources(self.project1) | ||
|
||
source_strings.collect_and_store_resource_strings(self.project1) | ||
|
||
main_file = self.project1.codebaseresources.files()[0] | ||
result_extra_data_strings = main_file.extra_data.get("source_strings") | ||
|
||
expected_extra_data_strings = [ | ||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_-+=", # noqa | ||
"Enter the desired length of your password:", | ||
] | ||
self.assertCountEqual(expected_extra_data_strings, result_extra_data_strings) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters