diff --git a/.idea/.gitignore b/.idea/.gitignore
index 211cb9a1a..a7c382ed3 100644
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,9 +1 @@
-# Default ignored files
-/shelf/
-gradle.xml
-misc.xml
-uidesigner.xml
workspace.xml
-
-# Editor-based HTTP Client requests
-/httpRequests/
diff --git a/.idea/.name b/.idea/.name
deleted file mode 100644
index 9a7dd5a65..000000000
--- a/.idea/.name
+++ /dev/null
@@ -1 +0,0 @@
-com.larsreimann.api_editor
\ No newline at end of file
diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
deleted file mode 100644
index 68913f846..000000000
--- a/.idea/codeStyles/Project.xml
+++ /dev/null
@@ -1,65 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
deleted file mode 100644
index fb7f4a8a4..000000000
--- a/.idea/compiler.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/gradle.xml b/.idea/gradle.xml
new file mode 100644
index 000000000..2d81666ba
--- /dev/null
+++ b/.idea/gradle.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index 03d9549ea..cce1d8640 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -2,5 +2,6 @@
+
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
index e824b9229..fdc392fe8 100644
--- a/.idea/jarRepositories.xml
+++ b/.idea/jarRepositories.xml
@@ -16,10 +16,5 @@
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/jsLinters/eslint.xml b/.idea/jsLinters/eslint.xml
deleted file mode 100644
index 541945bb0..000000000
--- a/.idea/jsLinters/eslint.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/kotlinc.xml b/.idea/kotlinc.xml
new file mode 100644
index 000000000..dff65d563
--- /dev/null
+++ b/.idea/kotlinc.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 000000000..09adcc7f2
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..42a8ebce1
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules/com.larsreimann.api_editor.iml b/.idea/modules/com.larsreimann.api_editor.iml
deleted file mode 100644
index 84be173a4..000000000
--- a/.idea/modules/com.larsreimann.api_editor.iml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/prettier.xml b/.idea/prettier.xml
deleted file mode 100644
index 0cc0b423e..000000000
--- a/.idea/prettier.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/runConfigurations/All_Tests.xml b/.idea/runConfigurations/All_Tests.xml
deleted file mode 100644
index c3c93d70b..000000000
--- a/.idea/runConfigurations/All_Tests.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- true
- false
-
-
-
\ No newline at end of file
diff --git a/.idea/runConfigurations/Backend_server.xml b/.idea/runConfigurations/Backend_server.xml
deleted file mode 100644
index dc7f47109..000000000
--- a/.idea/runConfigurations/Backend_server.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- true
- false
-
-
-
\ No newline at end of file
diff --git a/.idea/runConfigurations/Build.xml b/.idea/runConfigurations/Build.xml
deleted file mode 100644
index 1aaf652fc..000000000
--- a/.idea/runConfigurations/Build.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- true
- false
-
-
-
\ No newline at end of file
diff --git a/.idea/runConfigurations/Clean.xml b/.idea/runConfigurations/Clean.xml
deleted file mode 100644
index 4c57fcfff..000000000
--- a/.idea/runConfigurations/Clean.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
- true
- false
-
-
-
\ No newline at end of file
diff --git a/.idea/runConfigurations/Server_tests.xml b/.idea/runConfigurations/Server_tests.xml
deleted file mode 100644
index 211931538..000000000
--- a/.idea/runConfigurations/Server_tests.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- false
- true
- false
-
-
-
\ No newline at end of file
diff --git a/package-parser/README.md b/package-parser/README.md
index a38600ab3..7275151ff 100644
--- a/package-parser/README.md
+++ b/package-parser/README.md
@@ -1,13 +1,84 @@
-# Execution of Starter Code
+# library-analyzer
+
+A tool to analyzer client and API code written in Python.
+
+## Usage
+
+```text
+usage: parse-package [-h] {api,usages,improve} ...
+
+Analyze Python code.
+
+positional arguments:
+ {api,usages,improve}
+ api List the API of a package.
+ usages Find usages of API elements.
+ improve Suggest how to improve an existing API.
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+### api command
+
+```text
+usage: parse-package api [-h] -p PACKAGE -o OUT
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PACKAGE, --package PACKAGE
+ The name of the package. It must be installed in the current interpreter.
+ -o OUT, --out OUT Output directory.
+```
+
+### usages command
+
+```text
+usage: parse-package usages [-h] -p PACKAGE -s SRC -t TMP -o OUT
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PACKAGE, --package PACKAGE
+ The name of the package. It must be installed in the current interpreter.
+ -s SRC, --src SRC Directory containing Python code.
+ -t TMP, --tmp TMP Directory where temporary files can be stored (to save progress in case the program crashes).
+ -o OUT, --out OUT Output directory.
+```
+
+### improve command
+
+```text
+usage: parse-package improve [-h] -a API -u USAGES -o OUT [-m MIN]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -a API, --api API File created by the 'api' command.
+ -u USAGES, --usages USAGES
+ File created by the 'usages' command.
+ -o OUT, --out OUT Output directory.
+ -m MIN, --min MIN Minimum number of usages required to keep an API element.
+```
+
+### Example usage
1. Install Python 3.9.
-2. Install [poetry](https://python-poetry.org/docs/master/#installation).
-3. **Only the first time**, install dependencies:
+1. Install [poetry](https://python-poetry.org/docs/master/#installation).
+1. **Only the first time**, install dependencies:
```shell
poetry install
```
-4. Run the tool:
+1. Create a shell with poetry:
```shell
poetry shell
+ ```
+1. Run the commands described above:
+ ```shell
+ # Step 1:
parse-package api -p sklearn -o out
+
+ # Step 2:
+ parse-package usages -p sklearn -s "Kaggle Kernels" -t tmp -o out
+
+ # Step 3:
+ parse-package improve -a "out/scikit-learn__sklearn__1.0__api.json" -u "out/scikit-learn__sklearn__1.0__usages.json" -o out
```
diff --git a/package-parser/package-parser.iml b/package-parser/package-parser.iml
new file mode 100644
index 000000000..904bf2ca5
--- /dev/null
+++ b/package-parser/package-parser.iml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/package-parser/package_parser/cli.py b/package-parser/package_parser/cli.py
index d65a6014f..c82426cf3 100644
--- a/package-parser/package_parser/cli.py
+++ b/package-parser/package_parser/cli.py
@@ -4,11 +4,15 @@
from pathlib import Path
from typing import Any
-from .commands.get_api import get_api
+from .commands.find_usages import find_usages
+from .commands.get_api import distribution, distribution_version, get_api
from .commands.get_dependencies import get_dependencies
+from .commands.suggest_improvements import suggest_improvements
from .utils import ensure_file_exists
__API_COMMAND = "api"
+__USAGES_COMMAND = "usages"
+__IMPROVE_COMMAND = "improve"
class CustomEncoder(json.JSONEncoder):
@@ -20,16 +24,14 @@ def default(self, o: Any) -> Any:
def cli() -> None:
args = __get_args()
-
if args.command == __API_COMMAND:
public_api = get_api(args.package)
public_api_dependencies = get_dependencies(public_api)
- out_dir: Path = args.out
- out_file_api = out_dir.joinpath(
+ out_file_api = args.out.joinpath(
f"{public_api.distribution}__{public_api.package}__{public_api.version}__api.json"
)
- out_file_api_dependencies = out_dir.joinpath(
+ out_file_api_dependencies = args.out.joinpath(
f"{public_api.distribution}__{public_api.package}__{public_api.version}__api_dependencies.json"
)
ensure_file_exists(out_file_api)
@@ -38,6 +40,21 @@ def cli() -> None:
with out_file_api_dependencies.open("w") as f:
json.dump(public_api_dependencies.to_json(), f, indent=2, cls=CustomEncoder)
+ elif args.command == __USAGES_COMMAND:
+ usages = find_usages(args.package, args.src, args.tmp)
+
+ dist = distribution(args.package)
+
+ out_file = args.out.joinpath(
+ f"{dist}__{args.package}__{distribution_version(dist)}__usages.json"
+ )
+ ensure_file_exists(out_file)
+ with out_file.open("w") as f:
+ json.dump(usages.to_json(), f, indent=2)
+
+ elif args.command == __IMPROVE_COMMAND:
+ suggest_improvements(args.api, args.usages, args.out, args.min)
+
def __get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Analyze Python code.")
@@ -45,6 +62,8 @@ def __get_args() -> argparse.Namespace:
# Commands
subparsers = parser.add_subparsers(dest="command")
__add_api_subparser(subparsers)
+ __add_usages_subparser(subparsers)
+ __add_improve_subparser(subparsers)
return parser.parse_args()
@@ -61,3 +80,64 @@ def __add_api_subparser(subparsers: _SubParsersAction) -> None:
api_parser.add_argument(
"-o", "--out", help="Output directory.", type=Path, required=True
)
+
+
+def __add_usages_subparser(subparsers: _SubParsersAction) -> None:
+ usages_parser = subparsers.add_parser(
+ __USAGES_COMMAND, help="Find usages of API elements."
+ )
+ usages_parser.add_argument(
+ "-p",
+ "--package",
+ help="The name of the package. It must be installed in the current interpreter.",
+ type=str,
+ required=True,
+ )
+ usages_parser.add_argument(
+ "-s",
+ "--src",
+ help="Directory containing Python code.",
+ type=Path,
+ required=True,
+ )
+ usages_parser.add_argument(
+ "-t",
+ "--tmp",
+ help="Directory where temporary files can be stored (to save progress in case the program crashes).",
+ type=Path,
+ required=True,
+ )
+ usages_parser.add_argument(
+ "-o", "--out", help="Output directory.", type=Path, required=True
+ )
+
+
+def __add_improve_subparser(subparsers: _SubParsersAction) -> None:
+ improve_parser = subparsers.add_parser(
+ __IMPROVE_COMMAND, help="Suggest how to improve an existing API."
+ )
+ improve_parser.add_argument(
+ "-a",
+ "--api",
+ help="File created by the 'api' command.",
+ type=argparse.FileType("r"),
+ required=True,
+ )
+ improve_parser.add_argument(
+ "-u",
+ "--usages",
+ help="File created by the 'usages' command.",
+ type=argparse.FileType("r"),
+ required=True,
+ )
+ improve_parser.add_argument(
+ "-o", "--out", help="Output directory.", type=Path, required=True
+ )
+ improve_parser.add_argument(
+ "-m",
+ "--min",
+ help="Minimum number of usages required to keep an API element.",
+ type=int,
+ required=False,
+ default=1,
+ )
diff --git a/package-parser/package_parser/commands/find_usages/__init__.py b/package-parser/package_parser/commands/find_usages/__init__.py
new file mode 100644
index 000000000..c0f6a6ee4
--- /dev/null
+++ b/package-parser/package_parser/commands/find_usages/__init__.py
@@ -0,0 +1,9 @@
+from ._find_usages import find_usages
+from ._model import (
+ ClassUsage,
+ FunctionUsage,
+ ParameterUsage,
+ Usage,
+ UsageStore,
+ ValueUsage,
+)
diff --git a/package-parser/package_parser/commands/find_usages/_ast_visitor.py b/package-parser/package_parser/commands/find_usages/_ast_visitor.py
new file mode 100644
index 000000000..48370b1a7
--- /dev/null
+++ b/package-parser/package_parser/commands/find_usages/_ast_visitor.py
@@ -0,0 +1,127 @@
+from typing import Optional
+
+import astroid
+from astroid.arguments import CallSite
+from astroid.helpers import safe_infer
+
+from ._model import Location, UsageStore
+
+
+class _UsageFinder:
+ def __init__(self, package_name: str, python_file: str) -> None:
+ self.package_name: str = package_name
+ self.python_file: str = python_file
+ self.usages: UsageStore = UsageStore()
+
+ def enter_call(self, node: astroid.Call):
+ called_tuple = _analyze_declaration_called_by(node, self.package_name)
+ if called_tuple is None:
+ return
+ called, function_qname, parameters, n_implicit_parameters = called_tuple
+
+ bound_parameters = _bound_parameters(
+ parameters, CallSite.from_call(node), n_implicit_parameters
+ )
+ if bound_parameters is None:
+ return
+
+ location = Location(self.python_file, node.lineno, node.col_offset)
+
+ # Add class usage
+ if (
+ isinstance(called, (astroid.BoundMethod, astroid.UnboundMethod))
+ or isinstance(called, astroid.FunctionDef)
+ and called.is_method()
+ ):
+ self.usages.add_class_usage(
+ ".".join(function_qname.split(".")[:-1]), location
+ )
+
+ # Add function usage
+ self.usages.add_function_usage(function_qname, location)
+
+ # Add parameter & value usage
+ for parameter_name, value in bound_parameters.items():
+ parameter_qname = f"{function_qname}.{parameter_name}"
+ self.usages.add_parameter_usage(parameter_qname, location)
+
+ value = _stringify_value(value)
+ self.usages.add_value_usage(parameter_qname, value, location)
+
+
+def _analyze_declaration_called_by(
+ node: astroid.Call, package_name: str
+) -> Optional[tuple[astroid.NodeNG, str, astroid.Arguments, int]]:
+ """
+ Returns None if the called declaration could not be determined or if it is not relevant for us. Otherwise, it
+ returns a tuple with the form (called, qualified_name, parameters, n_implicit_parameters).
+ """
+
+ called = safe_infer(node.func)
+ if called is None or not __is_relevant_qualified_name(package_name, called.qname()):
+ return None
+
+ n_implicit_parameters = __n_implicit_parameters(called)
+
+ if isinstance(called, astroid.ClassDef):
+ called = __called_constructor(called)
+ if called is None:
+ return None
+
+ if isinstance(
+ called, (astroid.BoundMethod, astroid.UnboundMethod, astroid.FunctionDef)
+ ):
+ return called, called.qname(), called.args, n_implicit_parameters
+ else:
+ return None
+
+
+def __is_relevant_qualified_name(package_name: str, qualified_name: str) -> bool:
+ return qualified_name.startswith(package_name)
+
+
+def __n_implicit_parameters(called: astroid.NodeNG) -> int:
+ return called.implicit_parameters() if hasattr(called, "implicit_parameters") else 0
+
+
+def __called_constructor(class_def: astroid.ClassDef) -> Optional[astroid.FunctionDef]:
+ try:
+ # Use last __init__
+ constructor = class_def.local_attr("__init__")[-1]
+ except astroid.NotFoundError:
+ return None
+
+ if isinstance(constructor, astroid.FunctionDef):
+ return constructor
+ else:
+ return None
+
+
+def _stringify_value(value: astroid.NodeNG):
+ return value.as_string()
+
+
+def _bound_parameters(
+ parameters: astroid.Arguments, arguments: CallSite, n_implicit_parameters: int
+) -> Optional[dict[str, astroid.NodeNG]]:
+ # Improper call
+ if (
+ parameters.args is None
+ or arguments.has_invalid_arguments()
+ or arguments.has_invalid_keywords()
+ ):
+ return None
+
+ result: dict[str, astroid.NodeNG] = arguments.keyword_arguments.copy()
+
+ positional_parameter_names = [
+ it.name for it in (parameters.posonlyargs + parameters.args)
+ ][n_implicit_parameters:]
+
+ for index, arg in enumerate(arguments.positional_arguments):
+ if index >= len(positional_parameter_names):
+ break
+
+ result[positional_parameter_names[index]] = arg
+
+ return result
diff --git a/package-parser/package_parser/commands/find_usages/_find_usages.py b/package-parser/package_parser/commands/find_usages/_find_usages.py
new file mode 100644
index 000000000..9d19a50b6
--- /dev/null
+++ b/package-parser/package_parser/commands/find_usages/_find_usages.py
@@ -0,0 +1,105 @@
+import json
+import multiprocessing
+from multiprocessing import synchronize
+from pathlib import Path
+from typing import Optional
+
+import astroid
+from package_parser.utils import ASTWalker, initialize_and_read_exclude_file, list_files
+
+from ._ast_visitor import _UsageFinder
+from ._model import UsageStore
+
+__N_PROCESSES = 12
+
+
+def find_usages(package_name: str, src_dir: Path, tmp_dir: Path):
+ candidate_python_files = list_files(src_dir, ".py")
+
+ exclude_file = tmp_dir.joinpath("$$$$$exclude$$$$$.txt")
+ excluded_python_files = set(initialize_and_read_exclude_file(exclude_file))
+
+ python_files = [
+ it for it in candidate_python_files if it not in excluded_python_files
+ ]
+
+ tmp_dir.mkdir(parents=True, exist_ok=True)
+
+ lock = multiprocessing.Lock()
+ with multiprocessing.Pool(
+ processes=__N_PROCESSES,
+ initializer=__initialize_process_environment,
+ initargs=(lock,),
+ ) as pool:
+ pool.starmap(
+ __find_usages_in_single_file,
+ [[package_name, it, exclude_file, tmp_dir] for it in python_files],
+ )
+ pool.join()
+ pool.close()
+
+ return _merge_results(tmp_dir)
+
+
+_lock: synchronize.Lock = multiprocessing.Lock()
+
+
+def __initialize_process_environment(lock: synchronize.Lock):
+ global _lock
+ _lock = lock
+
+
+def __find_usages_in_single_file(
+ package_name: str,
+ python_file: str,
+ exclude_file: Path,
+ tmp_dir: Path,
+):
+ print(f"Working on {python_file}")
+
+ try:
+ with open(python_file, "r") as f:
+ source = f.read()
+
+ if __is_relevant_python_file(package_name, source):
+ usage_finder = _UsageFinder(package_name, python_file)
+ ASTWalker(usage_finder).walk(astroid.parse(source))
+
+ tmp_file = tmp_dir.joinpath(
+ python_file.replace("/", "__")
+ .replace("\\", "__")
+ .replace(".py", ".json")
+ )
+ with tmp_file.open("w") as f:
+ json.dump(usage_finder.usages.to_json(), f, indent=2)
+ else:
+ print(f"Skipping {python_file} (irrelevant file)")
+
+ except UnicodeError:
+ print(f"Skipping {python_file} (broken encoding)")
+ except astroid.exceptions.AstroidSyntaxError:
+ print(f"Skipping {python_file} (invalid syntax)")
+ except RecursionError:
+ print(f"Skipping {python_file} (infinite recursion)")
+
+ with _lock:
+ with exclude_file.open("a") as f:
+ f.write(f"{python_file}\n")
+
+
+def __is_relevant_python_file(package_name: str, source_code: str) -> bool:
+ return package_name in source_code
+
+
+def _merge_results(tmp_dir: Path) -> UsageStore:
+ result = UsageStore()
+
+ files = list_files(tmp_dir, ".json")
+ for index, file in enumerate(files):
+ print(f"Merging {file} ({index + 1}/{len(files)})")
+
+ with open(file, "r") as f:
+ other_usage_store = UsageStore.from_json(json.load(f))
+ result.merge_other_into_self(other_usage_store)
+
+ return result
diff --git a/package-parser/package_parser/commands/find_usages/_model.py b/package-parser/package_parser/commands/find_usages/_model.py
new file mode 100644
index 000000000..525093e1b
--- /dev/null
+++ b/package-parser/package_parser/commands/find_usages/_model.py
@@ -0,0 +1,341 @@
+from __future__ import annotations
+
+from typing import Any, Optional
+
+ClassQName = str
+FunctionQName = str
+ParameterQName = str
+StringifiedValue = str
+
+
+class UsageStore:
+ @staticmethod
+ def from_json(json: Any) -> UsageStore:
+ result = UsageStore()
+
+ # Revive class usages
+ class_usages = json["class_usages"]
+ for qname, locations in class_usages.items():
+ for location in locations:
+ result.add_class_usage(qname, Location.from_json(location))
+
+ # Revive function usages
+ function_usages = json["function_usages"]
+ for qname, locations in function_usages.items():
+ for location in locations:
+ result.add_function_usage(qname, Location.from_json(location))
+
+ # Revive parameter usages
+ parameter_usages = json["parameter_usages"]
+ for qname, locations in parameter_usages.items():
+ for location in locations:
+ result.add_parameter_usage(qname, Location.from_json(location))
+
+ # Revive value usages
+ value_usages = json["value_usages"]
+ for parameter_qname, values in value_usages.items():
+ for value, locations in values.items():
+ for location in locations:
+ result.add_value_usage(
+ parameter_qname, value, Location.from_json(location)
+ )
+
+ return result
+
+ def __init__(self) -> None:
+ self.class_usages: dict[ClassQName, list[ClassUsage]] = {}
+ self.function_usages: dict[FunctionQName, list[FunctionUsage]] = {}
+ self.parameter_usages: dict[ParameterQName, list[ParameterUsage]] = {}
+ self.value_usages: dict[
+ ParameterQName, dict[StringifiedValue, list[ValueUsage]]
+ ] = {}
+
+ def add_class_usage(self, qname: ClassQName, location: Location) -> None:
+ self.init_class(qname)
+ self.class_usages[qname].append(ClassUsage(qname, location))
+
+ def init_class(self, qname: ClassQName) -> None:
+ if qname not in self.class_usages:
+ self.class_usages[qname] = []
+
+ def remove_class(self, class_qname: ClassQName) -> None:
+ if class_qname in self.class_usages:
+ del self.class_usages[class_qname]
+
+ for function_qname in list(self.function_usages.keys()):
+ if function_qname.startswith(class_qname):
+ self.remove_function(function_qname)
+
+ def add_function_usage(self, qname: FunctionQName, location: Location) -> None:
+ self.init_function(qname)
+ self.function_usages[qname].append(FunctionUsage(qname, location))
+
+ def init_function(self, qname: FunctionQName) -> None:
+ if qname not in self.function_usages:
+ self.function_usages[qname] = []
+
+ def remove_function(self, function_qname: FunctionQName) -> None:
+ if function_qname in self.function_usages:
+ del self.function_usages[function_qname]
+
+ for parameter_qname in list(self.parameter_usages.keys()):
+ if parameter_qname.startswith(function_qname):
+ self.remove_parameter(parameter_qname)
+
+ def add_parameter_usage(self, qname: ParameterQName, location: Location) -> None:
+ self.init_parameter(qname)
+ self.parameter_usages[qname].append(ParameterUsage(qname, location))
+
+ def init_parameter(self, qname: ParameterQName) -> None:
+ if qname not in self.parameter_usages:
+ self.parameter_usages[qname] = []
+
+ def remove_parameter(self, qname: ParameterQName) -> None:
+ if qname in self.parameter_usages:
+ del self.parameter_usages[qname]
+
+ self.remove_value(qname)
+
+ def add_value_usage(
+ self,
+ parameter_qname: ParameterQName,
+ value: StringifiedValue,
+ location: Location,
+ ) -> None:
+ self.init_value(parameter_qname)
+
+ if value not in self.value_usages[parameter_qname]:
+ self.value_usages[parameter_qname][value] = []
+
+ self.value_usages[parameter_qname][value].append(
+ ValueUsage(parameter_qname, value, location)
+ )
+
+ def init_value(self, parameter_qname: ParameterQName) -> None:
+ if parameter_qname not in self.value_usages:
+ self.value_usages[parameter_qname] = {}
+
+ def remove_value(self, qname: ParameterQName) -> None:
+ if qname in self.value_usages:
+ del self.value_usages[qname]
+
+ def n_class_usages(self, qname: ClassQName) -> int:
+ if qname in self.class_usages:
+ return len(self.class_usages[qname])
+
+ return 0
+
+ def n_function_usages(self, qname: FunctionQName) -> int:
+ if qname in self.function_usages:
+ return len(self.function_usages[qname])
+
+ return 0
+
+ def n_parameter_usages(self, qname: ParameterQName) -> int:
+ if qname in self.parameter_usages:
+ return len(self.parameter_usages[qname])
+
+ return 0
+
+ def n_value_usages(self, qname: ParameterQName, value: str) -> int:
+ if qname in self.value_usages and value in self.value_usages[qname]:
+ return len(self.value_usages[qname][value])
+
+ return 0
+
+ def most_common_value(self, qname: ParameterQName) -> Optional[str]:
+ if qname not in self.value_usages:
+ return None
+
+ result = None
+ count = 0
+
+ for value, usages in self.value_usages[qname].items():
+ if len(usages) > count:
+ result = value
+ count = len(usages)
+
+ return result
+
+ def merge_other_into_self(self, other_usage_store: UsageStore) -> UsageStore:
+ """
+ Merges the other usage store into this one **in-place** and returns this store.
+
+ :param other_usage_store: The usage store to merge into this one.
+ :return: This usage store.
+ """
+
+ # Merge class usages
+ for class_usages in other_usage_store.class_usages.values():
+ for class_usage in class_usages:
+ self.add_class_usage(class_usage.qname, class_usage.location)
+
+ # Merge function usages
+ for function_usages in other_usage_store.function_usages.values():
+ for function_usage in function_usages:
+ self.add_function_usage(function_usage.qname, function_usage.location)
+
+ # Merge parameter usages
+ for parameter_usages in other_usage_store.parameter_usages.values():
+ for parameter_usage in parameter_usages:
+ self.add_parameter_usage(
+ parameter_usage.qname, parameter_usage.location
+ )
+
+ # Merge value usages
+ for value_usages in other_usage_store.value_usages.values():
+ for value_usages_of_parameter in value_usages.values():
+ for value_usage_of_parameter in value_usages_of_parameter:
+ self.add_value_usage(
+ value_usage_of_parameter.parameter_qname,
+ value_usage_of_parameter.value,
+ value_usage_of_parameter.location,
+ )
+
+ return self
+
+ def to_json(self) -> Any:
+ return {
+ "class_usages": {
+ qname: [usage.location.to_json() for usage in usages]
+ for qname, usages in self.class_usages.items()
+ },
+ "function_usages": {
+ qname: [usage.location.to_json() for usage in usages]
+ for qname, usages in self.function_usages.items()
+ },
+ "parameter_usages": {
+ qname: [usage.location.to_json() for usage in usages]
+ for qname, usages in self.parameter_usages.items()
+ },
+ "value_usages": {
+ parameter_qname: {
+ value: [usage.location.to_json() for usage in usages]
+ for value, usages in values.items()
+ }
+ for parameter_qname, values in self.value_usages.items()
+ },
+ }
+
+ def to_count_json(self) -> Any:
+ return {
+ "class_counts": {
+ qname: len(usages)
+ for qname, usages in sorted(
+ self.class_usages.items(),
+ key=lambda item: len(item[1]),
+ reverse=True,
+ )
+ },
+ "function_counts": {
+ qname: len(usages)
+ for qname, usages in sorted(
+ self.function_usages.items(),
+ key=lambda item: len(item[1]),
+ reverse=True,
+ )
+ },
+ "parameter_counts": {
+ qname: len(usages)
+ for qname, usages in sorted(
+ self.parameter_usages.items(),
+ key=lambda item: len(item[1]),
+ reverse=True,
+ )
+ },
+ "value_counts": {
+ parameter_qname: {
+ value: len(usages)
+ for value, usages in sorted(
+ values.items(), key=lambda item: len(item[1]), reverse=True
+ )
+ }
+ for parameter_qname, values in self.value_usages.items()
+ },
+ }
+
+
+class Usage:
+ pass
+
+
+class ClassUsage(Usage):
+ def __init__(self, qname: ClassQName, location: Location) -> None:
+ self.qname: ClassQName = qname
+ self.location: Location = location
+
+ def to_json(self) -> Any:
+ return {"qname": self.qname, "location": self.location.to_json()}
+
+
+class FunctionUsage(Usage):
+ def __init__(self, qname: FunctionQName, location: Location) -> None:
+ self.qname: FunctionQName = qname
+ self.location: Location = location
+
+ def to_json(self) -> Any:
+ return {"qname": self.qname, "location": self.location.to_json()}
+
+
+class ParameterUsage(Usage):
+ def __init__(self, qname: ParameterQName, location: Location) -> None:
+ self.qname: ParameterQName = qname
+ self.location: Location = location
+
+ def to_json(self) -> Any:
+ return {"qname": self.qname, "location": self.location.to_json()}
+
+
+class ValueUsage(Usage):
+ def __init__(
+ self,
+ parameter_qname: ParameterQName,
+ value: StringifiedValue,
+ location: Location,
+ ) -> None:
+ self.parameter_qname: ParameterQName = parameter_qname
+ self.value: StringifiedValue = value
+ self.location: Location = location
+
+ def to_json(self) -> Any:
+ return {
+ "parameter_qname": self.parameter_qname,
+ "value": self.value,
+ "location": self.location.to_json(),
+ }
+
+
+FileName = str
+LineNumber = int
+ColumnNumber = int
+
+
+class Location:
+ @staticmethod
+ def from_json(json: Any) -> Location:
+ return Location(json["file"], json["line"], json["column"])
+
+ def __init__(
+ self, file: FileName, line: Optional[LineNumber], column: Optional[ColumnNumber]
+ ) -> None:
+ self.file: FileName = file
+ self.line: Optional[LineNumber] = line
+ self.column: Optional[ColumnNumber] = column
+
+ def __repr__(self) -> str:
+ return f"{self.file}@{self.line}:{self.column}"
+
+ def __eq__(self, other: object) -> bool:
+ if not isinstance(other, type(self)):
+ return NotImplemented
+ return (
+ self.file == other.file
+ and self.line == other.line
+ and self.column == other.column
+ )
+
+ def __hash__(self) -> int:
+ return hash((self.file, self.line, self.column))
+
+ def to_json(self) -> Any:
+ return {"file": self.file, "line": self.line, "column": self.column}
diff --git a/package-parser/package_parser/commands/suggest_improvements/__init__.py b/package-parser/package_parser/commands/suggest_improvements/__init__.py
new file mode 100644
index 000000000..e1c2d5ae0
--- /dev/null
+++ b/package-parser/package_parser/commands/suggest_improvements/__init__.py
@@ -0,0 +1 @@
+from ._suggest_improvements import suggest_improvements
diff --git a/package-parser/package_parser/commands/suggest_improvements/_suggest_improvements.py b/package-parser/package_parser/commands/suggest_improvements/_suggest_improvements.py
new file mode 100644
index 000000000..90e01315d
--- /dev/null
+++ b/package-parser/package_parser/commands/suggest_improvements/_suggest_improvements.py
@@ -0,0 +1,383 @@
+import json
+from io import TextIOWrapper
+from pathlib import Path
+from typing import Any, Union
+
+from package_parser.commands.find_usages import (
+ ClassUsage,
+ FunctionUsage,
+ UsageStore,
+ ValueUsage,
+)
+from package_parser.commands.get_api import API
+from package_parser.utils import ensure_file_exists, parent_qname
+
+
+def suggest_improvements(
+ api_file: TextIOWrapper, usages_file: TextIOWrapper, out_dir: Path, min_usages: int
+):
+ with api_file:
+ api_json = json.load(api_file)
+ api = API.from_json(api_json)
+
+ with usages_file:
+ usages_json = json.load(usages_file)
+ usages = UsageStore.from_json(usages_json)
+
+ out_dir.mkdir(parents=True, exist_ok=True)
+ base_file_name = api_file.name.replace("__api.json", "")
+
+ __preprocess_usages(usages, api)
+ __print_usage_counts(usages, out_dir, base_file_name)
+ __create_usage_distributions(usages, out_dir, base_file_name)
+ api_size_after_removal = __remove_rarely_used_api_elements(
+ usages, min_usages, out_dir, base_file_name
+ )
+ __write_api_size(api, api_size_after_removal, out_dir, base_file_name)
+ __optional_vs_required_parameters(usages, api, out_dir, base_file_name)
+
+
+def __preprocess_usages(usages: UsageStore, api: API) -> None:
+ __remove_internal_usages(usages, api)
+ __add_unused_api_elements(usages, api)
+ __add_implicit_usages_of_default_value(usages, api)
+
+
+def __print_usage_counts(usages, out_dir, base_file_name):
+ out_file = out_dir.joinpath(f"{base_file_name}__usage_counts.json")
+ ensure_file_exists(out_file)
+ with out_file.open("w") as f:
+ json.dump(usages.to_count_json(), f, indent=2)
+
+
+def __create_usage_distributions(
+ usages: UsageStore, out_dir: Path, base_file_name: str
+) -> None:
+ class_usage_distribution = __create_class_or_function_usage_distribution(
+ usages.class_usages
+ )
+ with out_dir.joinpath(f"{base_file_name}__class_usage_distribution.json").open(
+ "w"
+ ) as f:
+ json.dump(class_usage_distribution, f, indent=2)
+
+ function_usage_distribution = __create_class_or_function_usage_distribution(
+ usages.function_usages
+ )
+ with out_dir.joinpath(f"{base_file_name}__function_usage_distribution.json").open(
+ "w"
+ ) as f:
+ json.dump(function_usage_distribution, f, indent=2)
+
+ parameter_usage_distribution = __create_parameter_usage_distribution(usages)
+ with out_dir.joinpath(f"{base_file_name}__parameter_usage_distribution.json").open(
+ "w"
+ ) as f:
+ json.dump(parameter_usage_distribution, f, indent=2)
+
+
+def __create_class_or_function_usage_distribution(
+ usages: Union[dict[str, list[ClassUsage]], dict[str, list[FunctionUsage]]]
+) -> dict[int, int]:
+ """
+ Creates a dictionary X -> N where N indicates the number of classes/functions that are used at most X times.
+
+ :param usages: Usages of classes/functions.
+ :return: The usage distribution.
+ """
+
+ result = {}
+
+ max_usages = max(len(it) for it in usages.values())
+ for i in range(max_usages + 1):
+ result[i] = len([it for it in usages.values() if len(it) >= i])
+
+ return result
+
+
+def __create_parameter_usage_distribution(usages: UsageStore) -> dict[int, int]:
+ """
+ Creates a dictionary X -> N where N indicates the number of parameters that are set at most X times to a value other
+ than the most commonly used value (which might differ from the default value).
+
+ :param usages: Usage store.
+ :return: The usage distribution.
+ """
+
+ result = {}
+
+ function_usages = usages.function_usages
+ parameter_usages = usages.parameter_usages
+ value_usages = usages.value_usages
+
+ max_usages = max(
+ __n_not_set_to_most_common_value(it, function_usages, value_usages)
+ for it in parameter_usages.keys()
+ )
+
+ for i in range(max_usages + 1):
+ result[i] = len(
+ [
+ it
+ for it in parameter_usages.keys()
+ if usages.n_function_usages(parent_qname(it)) >= i
+ and (
+ parent_qname(parent_qname(it)) not in usages.class_usages
+ or usages.n_class_usages(parent_qname(parent_qname(it))) >= i
+ )
+ and __n_not_set_to_most_common_value(it, function_usages, value_usages)
+ >= i
+ ]
+ )
+
+ return result
+
+
+def __remove_internal_usages(usages: UsageStore, api: API) -> None:
+ """
+ Removes usages of internal parts of the API. It might incorrectly remove some calls to methods that are inherited
+ from internal classes into a public class but these are just fit/predict/etc., i.e. something we want to keep
+ unchanged anyway.
+
+ :param usages: Usage store
+ :param api: Description of the API
+ """
+
+ # Internal classes
+ for class_qname in list(usages.class_usages.keys()):
+ if not api.is_public_class(class_qname):
+ print(f"Removing usages of internal class {class_qname}")
+ usages.remove_class(class_qname)
+
+ # Internal functions
+ for function_qname in list(usages.function_usages.keys()):
+ if not api.is_public_function(function_qname):
+ print(f"Removing usages of internal function {function_qname}")
+ usages.remove_function(function_qname)
+
+ # Internal parameters
+ parameter_qnames = set(api.parameters().keys())
+
+ for parameter_qname in list(usages.parameter_usages.keys()):
+ function_qname = parent_qname(parameter_qname)
+ if parameter_qname not in parameter_qnames or not api.is_public_function(
+ function_qname
+ ):
+ print(f"Removing usages of internal parameter {parameter_qname}")
+ usages.remove_parameter(parameter_qname)
+
+
+def __add_unused_api_elements(usages: UsageStore, api: API) -> None:
+ # Public classes
+ for class_qname in api.classes:
+ if api.is_public_class(class_qname):
+ usages.init_class(class_qname)
+
+ # Public functions
+ for function in api.functions.values():
+ if api.is_public_function(function.qname):
+ usages.init_function(function.qname)
+
+ # "Public" parameters
+ for parameter in function.parameters:
+ parameter_qname = f"{function.qname}.{parameter.name}"
+ usages.init_parameter(parameter_qname)
+ usages.init_value(parameter_qname)
+
+
+def __add_implicit_usages_of_default_value(usages: UsageStore, api: API) -> None:
+ for parameter_qname, parameter_usage_list in list(usages.parameter_usages.items()):
+ default_value = api.get_default_value(parameter_qname)
+ if default_value is None:
+ continue
+
+ function_qname = parent_qname(parameter_qname)
+ function_usage_list = usages.function_usages[function_qname]
+
+ locations_of_implicit_usages_of_default_value = set(
+ [it.location for it in function_usage_list]
+ ) - set([it.location for it in parameter_usage_list])
+
+ for location in locations_of_implicit_usages_of_default_value:
+ usages.add_value_usage(parameter_qname, default_value, location)
+
+
+def __n_not_set_to_most_common_value(
+ parameter_qname: str,
+ function_usages: dict[str, list[FunctionUsage]],
+ value_usages: dict[str, dict[str, list[ValueUsage]]],
+) -> int:
+ """Counts how often a parameter is set to a value other than the most commonly used value."""
+
+ n_total_usage = len(function_usages[parent_qname(parameter_qname)])
+
+ # Parameter is unused
+ # Checking both conditions even though one implies the other to ensure correctness of the program
+ if n_total_usage == 0 and len(value_usages[parameter_qname].values()) == 0:
+ return 0
+
+ n_set_to_most_commonly_used_value = max(
+ len(it) for it in value_usages[parameter_qname].values()
+ )
+
+ return n_total_usage - n_set_to_most_commonly_used_value
+
+
+def __remove_rarely_used_api_elements(
+ usages: UsageStore, min_usages: int, out_dir: Path, base_file_name: str
+) -> dict[str, Any]:
+ """
+ Removes API elements that are used fewer than min_usages times.
+
+ :return: The API size after the individual steps.
+ """
+
+ rarely_used_classes = __remove_rarely_used_classes(usages, min_usages)
+ api_size_after_unused_class_removal = __api_size_to_json(
+ len(usages.class_usages),
+ len(usages.function_usages),
+ len(usages.parameter_usages),
+ )
+ with out_dir.joinpath(
+ f"{base_file_name}__classes_used_fewer_than_{min_usages}_times.json"
+ ).open("w") as f:
+ json.dump(rarely_used_classes, f, indent=2)
+
+ rarely_used_functions = __remove_rarely_used_functions(usages, min_usages)
+ api_size_after_unused_function_removal = __api_size_to_json(
+ len(usages.class_usages),
+ len(usages.function_usages),
+ len(usages.parameter_usages),
+ )
+ with out_dir.joinpath(
+ f"{base_file_name}__functions_used_fewer_than_{min_usages}_times.json"
+ ).open("w") as f:
+ json.dump(rarely_used_functions, f, indent=2)
+
+ rarely_used_parameters = __remove_rarely_used_parameters(usages, min_usages)
+ api_size_after_unused_parameter_removal = __api_size_to_json(
+ len(usages.class_usages),
+ len(usages.function_usages),
+ len(usages.parameter_usages),
+ )
+ with out_dir.joinpath(
+ f"{base_file_name}__parameters_used_fewer_than_{min_usages}_times.json"
+ ).open("w") as f:
+ json.dump(rarely_used_parameters, f, indent=2)
+
+ mostly_useless_parameters = __remove_mostly_useless_parameters(usages, min_usages)
+ api_size_after_useless_parameter_removal = __api_size_to_json(
+ len(usages.class_usages),
+ len(usages.function_usages),
+ len(usages.parameter_usages),
+ )
+ with out_dir.joinpath(
+ f"{base_file_name}__parameters_set_fewer_than_{min_usages}_times_to_value_other_than_most_common.json"
+ ).open("w") as f:
+ json.dump(mostly_useless_parameters, f, indent=2)
+
+ return {
+ "after_unused_class_removal": api_size_after_unused_class_removal,
+ "after_unused_function_removal": api_size_after_unused_function_removal,
+ "after_unused_parameter_removal": api_size_after_unused_parameter_removal,
+ "after_useless_parameter_removal": api_size_after_useless_parameter_removal,
+ }
+
+
+def __remove_rarely_used_classes(usages: UsageStore, min_usages: int) -> list[str]:
+ result = []
+
+ for class_qname in list(usages.class_usages.keys()):
+ if usages.n_class_usages(class_qname) < min_usages:
+ result.append(class_qname)
+ usages.remove_class(class_qname)
+
+ return sorted(result)
+
+
+def __remove_rarely_used_functions(usages: UsageStore, min_usages: int) -> list[str]:
+ result = []
+
+ for function_qname in list(usages.function_usages.keys()):
+ if usages.n_function_usages(function_qname) < min_usages:
+ result.append(function_qname)
+ usages.remove_function(function_qname)
+
+ return sorted(result)
+
+
+def __remove_rarely_used_parameters(usages: UsageStore, min_usages: int) -> list[str]:
+ result = []
+
+ for parameter_qname in list(usages.parameter_usages.keys()):
+ if usages.n_parameter_usages(parameter_qname) < min_usages:
+ result.append(parameter_qname)
+ usages.remove_parameter(parameter_qname)
+
+ return sorted(result)
+
+
+def __remove_mostly_useless_parameters(
+ usages: UsageStore, min_usages: int
+) -> list[str]:
+ result = []
+
+ for parameter_qname in list(usages.parameter_usages.keys()):
+ usage_count = __n_not_set_to_most_common_value(
+ parameter_qname, usages.function_usages, usages.value_usages
+ )
+
+ if usage_count < min_usages:
+ result.append(parameter_qname)
+ usages.remove_parameter(parameter_qname)
+
+ return sorted(result)
+
+
+def __write_api_size(
+ api: API, api_size_after_removal: dict[str, Any], out_dir: Path, base_file_name: str
+) -> None:
+ with out_dir.joinpath(f"{base_file_name}__api_size.json").open("w") as f:
+ json.dump(
+ {
+ "full": __api_size_to_json(
+ api.class_count(), api.function_count(), api.parameter_count()
+ ),
+ "public": __api_size_to_json(
+ api.public_class_count(),
+ api.public_function_count(),
+ api.public_parameter_count(),
+ ),
+ "after_unused_class_removal": api_size_after_removal[
+ "after_unused_class_removal"
+ ],
+ "after_unused_function_removal": api_size_after_removal[
+ "after_unused_function_removal"
+ ],
+ "after_unused_parameter_removal": api_size_after_removal[
+ "after_unused_parameter_removal"
+ ],
+ "after_useless_parameter_removal": api_size_after_removal[
+ "after_useless_parameter_removal"
+ ],
+ },
+ f,
+ indent=2,
+ )
+
+
+def __api_size_to_json(n_classes: int, n_functions: int, n_parameters: int) -> Any:
+ return {
+ "n_classes": n_classes,
+ "n_functions": n_functions,
+ "n_parameters": n_parameters,
+ }
+
+
+def __optional_vs_required_parameters(
+ usages: UsageStore, public_api: API, out_dir: Path, base_file_name: str
+) -> None:
+ # TODO: Determine whether parameter should be constant (already removed)/required/optional based on entropy
+ # TODO: Use must commonly set value as default
+
+ pass