From f6981cf029de6276e8ec2cc5240ab3642785c2c8 Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Fri, 14 Jul 2023 04:56:22 +0530 Subject: [PATCH] Support ignored_resources attribute in ABOUT files #809 (#810) Reference: https://github.com/nexB/scancode.io/issues/809 Signed-off-by: Ayan Sinha Mahapatra --- scanpipe/pipes/d2d.py | 13 + scanpipe/pipes/resolve.py | 4 + .../tests/data/d2d/about_files/expected.json | 381 +++++++++++++++--- .../d2d/about_files/from-with-about-file.zip | Bin 4748 -> 5921 bytes scanpipe/tests/test_pipelines.py | 4 +- setup.cfg | 2 +- 6 files changed, 352 insertions(+), 52 deletions(-) diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index 98107427ac..6d9ab9a610 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -24,6 +24,8 @@ from pathlib import Path from timeit import default_timer as timer +from django.db.models import Q + from scanpipe import pipes from scanpipe.models import CodebaseRelation from scanpipe.models import CodebaseResource @@ -614,12 +616,23 @@ def _map_about_file_resource(project, about_file_resource, to_resources): # Cannot map anything without the about_resource value. return + ignored_resources = [] + if extra_data := package_data.get("extra_data"): + ignored_resources = extra_data.get("ignored_resources") + # Fetch all resources that are covered by the .ABOUT file. codebase_resources = to_resources.filter(path__contains=f"/{filename.lstrip('/')}") if not codebase_resources: # If there's nothing to map on the ``to/`` do not create the package. return + # Ignore resources for paths in `ignored_resources` attribute + if ignored_resources: + lookups = Q() + for resource_path in ignored_resources: + lookups |= Q(**{"path__contains": resource_path}) + codebase_resources = codebase_resources.filter(~lookups) + # Create the Package using .ABOUT data and assigned related codebase_resources pipes.update_or_create_package(project, package_data, codebase_resources) diff --git a/scanpipe/pipes/resolve.py b/scanpipe/pipes/resolve.py index 978c3ada69..5197573773 100644 --- a/scanpipe/pipes/resolve.py +++ b/scanpipe/pipes/resolve.py @@ -89,6 +89,10 @@ def resolve_about_package(input_location): if about_resource := about_data.get("about_resource"): package_data["filename"] = list(about_resource.keys())[0] + if ignored_resources := about_data.get("ignored_resources"): + extra_data = {"ignored_resources": list(ignored_resources.keys())} + package_data["extra_data"] = extra_data + if license_expression := about_data.get("license_expression"): package_data["declared_license_expression"] = license_expression diff --git a/scanpipe/tests/data/d2d/about_files/expected.json b/scanpipe/tests/data/d2d/about_files/expected.json index 4a7e4cb0c2..0f1fbf358a 100644 --- a/scanpipe/tests/data/d2d/about_files/expected.json +++ b/scanpipe/tests/data/d2d/about_files/expected.json @@ -58,7 +58,11 @@ "extracted_license_statement": "", "notice_text": "notice", "source_packages": [], - "extra_data": {}, + "extra_data": { + "ignored_resources": [ + "flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/ConfigurationProvider.class" + ] + }, "package_uid": "", "datasource_id": "", "file_references": [], @@ -138,9 +142,9 @@ "status": "scanned", "tag": "from", "extension": ".ABOUT", - "md5": "d7c3f25f4159f2d93a3203190816443d", - "sha1": "4efb1dc6691b759dd5885080f64b5b4fe06bd518", - "sha256": "bdebcdfbf7f53eba3fa115801fbebc01429fcba7f07aa76556c0eb0ca9d16336", + "md5": "5db9e5cfad2986d7f1e8cd7633100b16", + "sha1": "5a8183c7d9d9dcc56c5c7a57ae6a30003917e810", + "sha256": "da45c05ba8f0a7e434759085ac7048a4d51c5cc19b6af1d34f932bcb4ed1ecb1", "sha512": "", "programming_language": "", "is_binary": false, @@ -152,66 +156,66 @@ "detected_license_expression_spdx": "Apache-2.0", "license_detections": [ { + "license_expression": "apache-2.0", "matches": [ { "score": 100.0, - "matcher": "2-aho", - "end_line": 6, - "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_apache-2.0_for_apache-2.0.RULE", - "start_line": 6, - "matched_text": "license_expression: apache-2.0", - "match_coverage": 100.0, + "start_line": 8, + "end_line": 8, "matched_length": 3, - "rule_relevance": 100, + "match_coverage": 100.0, + "matcher": "2-aho", + "license_expression": "apache-2.0", "rule_identifier": "spdx_license_id_apache-2.0_for_apache-2.0.RULE", - "license_expression": "apache-2.0" + "rule_relevance": 100, + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_apache-2.0_for_apache-2.0.RULE", + "matched_text": "license_expression: apache-2.0" }, { "score": 100.0, - "matcher": "2-aho", - "end_line": 9, - "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_apache-2.0_for_apache-2.0.RULE", - "start_line": 9, - "matched_text": " - key: apache-2.0", - "match_coverage": 100.0, + "start_line": 11, + "end_line": 11, "matched_length": 3, - "rule_relevance": 100, + "match_coverage": 100.0, + "matcher": "2-aho", + "license_expression": "apache-2.0", "rule_identifier": "spdx_license_id_apache-2.0_for_apache-2.0.RULE", - "license_expression": "apache-2.0" + "rule_relevance": 100, + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_apache-2.0_for_apache-2.0.RULE", + "matched_text": " - key: apache-2.0" }, { "score": 100.0, - "matcher": "2-aho", - "end_line": 10, - "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_1039.RULE", - "start_line": 10, - "matched_text": " name: Apache License 2.0", - "match_coverage": 100.0, + "start_line": 12, + "end_line": 12, "matched_length": 5, - "rule_relevance": 100, + "match_coverage": 100.0, + "matcher": "2-aho", + "license_expression": "apache-2.0", "rule_identifier": "apache-2.0_1039.RULE", - "license_expression": "apache-2.0" + "rule_relevance": 100, + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_1039.RULE", + "matched_text": " name: Apache License 2.0" }, { "score": 100.0, - "matcher": "2-aho", - "end_line": 11, - "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", - "start_line": 11, - "matched_text": " file: apache-2.0.LICENSE", - "match_coverage": 100.0, + "start_line": 13, + "end_line": 13, "matched_length": 4, - "rule_relevance": 100, + "match_coverage": 100.0, + "matcher": "2-aho", + "license_expression": "apache-2.0", "rule_identifier": "apache-2.0_176.RULE", - "license_expression": "apache-2.0" + "rule_relevance": 100, + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "matched_text": " file: apache-2.0.LICENSE" } ], - "identifier": "apache_2_0-6ac1a8f1-4540-e467-4f69-5e1984b60129", - "license_expression": "apache-2.0" + "identifier": "apache_2_0-6ac1a8f1-4540-e467-4f69-5e1984b60129" } ], "license_clues": [], - "percentage_of_license_text": 22.39, + "percentage_of_license_text": 18.29, "copyrights": [], "holders": [], "authors": [], @@ -221,12 +225,288 @@ "urls": [ { "url": "https://repo1.maven.org/maven2/log4j/log4j/1.2.13/log4j-1.2.13.jar", - "end_line": 4, - "start_line": 4 + "start_line": 6, + "end_line": 6 } ], "extra_data": {} }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar", + "type": "file", + "name": "flume-ng-node-1.9.0-sources.jar", + "status": "", + "tag": "from", + "extension": ".jar", + "md5": "890f2f18119ee83e5fc63fc438bd2367", + "sha1": "ca559dc7dbbf551f3f8d848c70bc8b2679dc8cf4", + "sha256": "2c41dffc2ae1ea7603f4fb497f8ffecc4c5fec6c52e0033c37764ecdf1866793", + "sha512": "", + "programming_language": "", + "is_binary": true, + "is_text": false, + "is_archive": true, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract", + "type": "directory", + "name": "flume-ng-node-1.9.0-sources.jar-extract", + "status": "ignored-directory", + "tag": "from", + "extension": ".jar-extract", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract", + "type": "directory", + "name": "flume-ng-node-1.9.0-sources.jar-extract", + "status": "mapped", + "tag": "from", + "extension": ".jar-extract", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org", + "type": "directory", + "name": "org", + "status": "ignored-directory", + "tag": "from", + "extension": "", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org/apache", + "type": "directory", + "name": "apache", + "status": "ignored-directory", + "tag": "from", + "extension": "", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume", + "type": "directory", + "name": "flume", + "status": "ignored-directory", + "tag": "from", + "extension": "", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node", + "type": "directory", + "name": "node", + "status": "ignored-directory", + "tag": "from", + "extension": "", + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "programming_language": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [], + "extra_data": {} + }, + { + "path": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/ConfigurationProvider.java", + "type": "file", + "name": "ConfigurationProvider.java", + "status": "scanned", + "tag": "from", + "extension": ".java", + "md5": "318484f1071022f375cd0fd99e9fe844", + "sha1": "dde981f3bc981e630b3e8673321c2d152d39761c", + "sha256": "6768adff7b9027b50ba16fa24a8ac900a0a24c7df34ed0027547d049a7b6e4e6", + "sha512": "", + "programming_language": "Java", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_key_file": false, + "detected_license_expression": "apache-2.0", + "detected_license_expression_spdx": "Apache-2.0", + "license_detections": [ + { + "license_expression": "apache-2.0", + "matches": [ + { + "score": 100.0, + "start_line": 2, + "end_line": 17, + "matched_length": 119, + "match_coverage": 100.0, + "matcher": "2-aho", + "license_expression": "apache-2.0", + "rule_identifier": "apache-2.0_2.RULE", + "rule_relevance": 100, + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_2.RULE", + "matched_text": " * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied. See the License for the\n * specific language governing permissions and limitations\n * under the License." + } + ], + "identifier": "apache_2_0-4bde3f57-78aa-4201-96bf-531cba09e7de" + } + ], + "license_clues": [], + "percentage_of_license_text": 92.25, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "for_packages": [], + "emails": [], + "urls": [ + { + "url": "http://www.apache.org/licenses/LICENSE-2.0", + "start_line": 10, + "end_line": 10 + } + ], + "extra_data": { + "java_package": "org.apache.flume.node" + } + }, { "path": "from/flume-ng-node.NOTICE", "type": "file", @@ -975,9 +1255,7 @@ "holders": [], "authors": [], "package_data": [], - "for_packages": [ - "pkg:maven/log4j/log4j@1.2.13?uuid=fixed-uid-done-for-testing-5642512d1758" - ], + "for_packages": [], "emails": [], "urls": [], "extra_data": {} @@ -1381,11 +1659,6 @@ "to_resource": "to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/Application.class", "map_type": "about_file" }, - { - "from_resource": "from/flume-ng-node-1.9.0-sources.ABOUT", - "to_resource": "to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/ConfigurationProvider.class", - "map_type": "about_file" - }, { "from_resource": "from/flume-ng-node-1.9.0-sources.ABOUT", "to_resource": "to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/EnvVarResolverProperties.class", @@ -1430,6 +1703,16 @@ "from_resource": "from/flume-ng-node-1.9.0-sources.ABOUT", "to_resource": "to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/StaticZooKeeperConfigurationProvider.class", "map_type": "about_file" + }, + { + "from_resource": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract", + "to_resource": "to/flume-ng-node-1.9.0.jar", + "map_type": "jar_to_source" + }, + { + "from_resource": "from/flume-ng-node-1.9.0-sources.jar-extract/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/ConfigurationProvider.java", + "to_resource": "to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/ConfigurationProvider.class", + "map_type": "java_to_class" } ] } \ No newline at end of file diff --git a/scanpipe/tests/data/d2d/about_files/from-with-about-file.zip b/scanpipe/tests/data/d2d/about_files/from-with-about-file.zip index e89f889af08392187b6b533e10f58012bb9dd8aa..c4115b00ada858da6e6d87e15c7065a867111fa3 100644 GIT binary patch delta 1699 zcmeBCU8pA#;LXe;!oUH9j6C9@3}7%(L{reCeM3rV1vdjD%S&bk1|YG~FP~qLo#R=O z>*H*G28I|N1_p#)?#`28V6Bt?3FtUs=w)CyeOmvdj~^>g3p>XlS6&YeplUWCMi`N+ z{0?FSQ1rm$OZ>X^VT~J7belG$U@@+A!s)!j3IeX*Ygp|Wj^vrAUe!9aC}>Jp)_1Y3 z$6F+f#S+Z_{uV0uHR%+S;o+U-a<6!w->BfQ_${XL_N@(5@hrO;9}% zy!0H~(uF^Ko2Nzo4zAiiZQC+d@db^ar-TLU(rVr~E5GCLjD}_A-`?~6eE9Nf;qKj1 zYpUI(A3UBuZ&|Qf{Rz)q`WAcZ#GdD~-dDeT`1CtrP%vtXPWJr=3M@wCU{qH@1fwc2 zl;tO1+8^AuxwhA{M1ShOq-Zwt<|j8p#@s}uqiV@Qxn4RRKIc4^9&qyhq{EV} zc<`2X#>X`G&aDbx1$L`8mU%I5RMFdQ*&uN9ba#W1pvdjFbLLNmz=Cb-ylo?XgeZ>?`**jr8}G*d3^NRnN6{(b08xw5;_*v_7r9p|QjJ<+FD2|JT-R z>EVfZGwF5t)6es)C#vtyou&~sr@ZF!*Rw%cw;sNpCLQy;;K0*6%b&_mDJ_%B5!wC6 zD)aSm@w#G}tvhD(&zdf~LocLumxj@!Uu9FLOcz&wzs|g`qUL6*;Ph{OKe*nqu8*6| z>9hOB_j(1@@B34)Z*-b>tsrylZoS#x_rLJ|cDH7_?U}ZZv02|YHYrqp^Dh3xeN!(l zPDEu>+U6IJZhwBHe)M*x_YA%3bN61Y{N~?Q`Y`@T-it5KkIfceB2zVAZ-$*>>+kk} zUC*~|d#v?;b=jQyj12YfH;(N030jul{C3jYd7o37{@pE`mt3FzGiCRwdlAnTNl35z z@oKxBbW@knWH0Z%=9{Nx$G!XfGJLPprjy^)&MO`7(Y_O@|8v@|`*R~^qEiUXZX(5FOYw5_S{D854TM7)I2uvd`L?X zN<0>O=HP*7cLC46PW*}5+miSbv!yt<359b!+ICao=reAWTRtTVJ8m;h)8H#eQ8~?c zL~IgAdV;o^;JF))oyHwHCW?8DqO(uDG-f)f>n-IwL!*@YXKO}-xwiBP(HsZ06|7I} zQyD*ki@)s0V(f9iG8L3N1H2iTM3`aa@8n=XdwVQd6Im6gq(v@3MHm!-WdH;$X*|t8 z*+EE|kQtz&7P<6LL@~ouVDd#l4R<`HFS2_;h($ delta 552 zcmZ3e*P|*D;LXe;!oa}5!QjXs9{McF^>H@;L=jEFj=si}(h6<{MwYLP3=BXe8~yV6 zMPX`Zg`EmhezfD@MWD{`$vy(Q^+#k|Ql6@|q=0A?eGCk-{k!uH8}PW6*Rs|-l)NeH zTqfq)#^7jWvU}mJo<|X4tS9|$?eAAmec{ZkXm?I^{l6!vdCabHPVQWNWivgjlx4rG zWXC+_I&a0XeEF5FIrGjxa#G}bBXZ?N!e5oDeO9M;ToK;~Dp?)I7`C^?biu>NoRWMe(`qeKAX>?W_3|u~@C`CWk{F_v)?3t{b-gjQu)?JK6My4wm0kaD++jcj+Uc5; zVTW9KJs3GA-{aTO)Z=eP_xH} zfWZv`OB#3cGfdtuC_6b{fJ+lk;3Hdmx@PiiQ5mM!0+WA=Dr0kNfHx}}$b4oXGz8k{ IDhT2M0OO{x8~^|S diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 8a6c771faf..e5577cf60b 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -898,8 +898,8 @@ def test_scanpipe_deploy_to_develop_pipeline_with_about_file(self): exitcode, out = pipeline.execute() self.assertEqual(0, exitcode, msg=out) - self.assertEqual(35, project1.codebaseresources.count()) - self.assertEqual(30, project1.codebaserelations.count()) + self.assertEqual(43, project1.codebaseresources.count()) + self.assertEqual(31, project1.codebaserelations.count()) self.assertEqual(1, project1.discoveredpackages.count()) self.assertEqual(0, project1.discovereddependencies.count()) diff --git a/setup.cfg b/setup.cfg index c84c6c658b..a9d91e6666 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,7 +78,7 @@ install_requires = fetchcode-container==1.2.3.210512; sys_platform == "linux" # Inspectors python-inspector==0.9.7 - aboutcode-toolkit==8.0.0 + aboutcode-toolkit==9.0.0 # Utilities XlsxWriter==3.1.2 openpyxl==3.1.2