From 965d3d0e38250a0d2f8ece51e6acfd9f2f2c440a Mon Sep 17 00:00:00 2001 From: j3rome Date: Thu, 29 Jul 2021 14:28:37 -0400 Subject: [PATCH] * Fix case sensitivity issue --- py_requirements_guesser/guesser.py | 91 ++++++++++++++++++------------ py_requirements_guesser/utils.py | 14 +++-- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/py_requirements_guesser/guesser.py b/py_requirements_guesser/guesser.py index f980a74..4bbae4f 100644 --- a/py_requirements_guesser/guesser.py +++ b/py_requirements_guesser/guesser.py @@ -6,7 +6,10 @@ from .utils import get_mapping_files_from_pipreqs, get_local_modules, get_packag class Guesser: def __init__(self, force_guess=None, keep_unused_packages=False): + self.keep_unused_packages = keep_unused_packages + # Retrive mapping files from https://github.com/bndr/pipreqs + # The mapping keys are all lowercase (case insensitive match) self.stdlib_list, self.import_to_package_mapping, self.package_to_import_mapping = get_mapping_files_from_pipreqs() # Get local packages @@ -21,26 +24,44 @@ class Guesser: # Retrieve all imported packages in project all_imported_packages = set(get_all_imports(self.stdlib_list)) - # Retrieve packages in requirements.txt - if os.path.exists('requirements.txt'): - packages_in_requirements_version_map = get_packages_from_requirements('requirements.txt') - self.packages_in_requirements = set(packages_in_requirements_version_map.keys()) - else: - packages_in_requirements_version_map = {} - self.packages_in_requirements = set() + # Retrieve packages in requirements.txt + packages_in_requirements = get_packages_from_requirements('requirements.txt') - # Merge packages in requirements.txt and imports - self.all_packages = packages_in_requirements_version_map - extra_packages = all_imported_packages - self.packages_in_requirements - for extra_package in extra_packages: - self.all_packages[extra_package] = None + # Do mapping between import name and package name + self.all_packages = {} + for package_name, version in packages_in_requirements.items(): + package_name_lowercase = package_name.lower() + import_name = self.package_to_import_mapping.get(package_name_lowercase, package_name) - self.keep_unused_packages = keep_unused_packages + self.all_packages[package_name.lower()] = { + 'import_name': import_name, + 'package_name': package_name, + 'version': version, + 'in_requirements': True + } + + + for import_name in all_imported_packages: + package_name = self.import_to_package_mapping.get(import_name, import_name) + package_name_lowercase = package_name.lower() + + if package_name_lowercase not in self.all_packages: + self.all_packages[package_name_lowercase] = { + 'import_name': import_name, + 'package_name': package_name, + 'version': None, + 'in_requirements': False + } def guess_package_versions(self): packages = [] - for package_name, version in self.all_packages.items(): + for package_name_lowercase, package_info in self.all_packages.items(): + package_name = package_info['package_name'] + version = package_info['version'] + import_name = package_info['import_name'] + package_in_requirements = package_info['in_requirements'] + print("\n" + "-"*40) print(f"PACKAGE : {package_name}") if version is None: @@ -52,12 +73,8 @@ class Guesser: import_version = None req_version = None - # Pypi package to import mapping - import_name = self.package_to_import_mapping.get(package_name, package_name) - pypi_package_name = self.import_to_package_mapping.get(package_name, package_name) - # Get available versions from Pypi - available_versions = get_pypi_history(pypi_package_name, ignore_release_candidat=True) + available_versions = get_pypi_history(package_name, ignore_release_candidat=True) if available_versions is None: print(f"[INFO] Couldn't find Pypi releases for package '{package_name}', ignoring") @@ -65,7 +82,10 @@ class Guesser: # Retrieve candidate version based on the first time the package was imported in *.py date_added_via_import = get_date_when_package_committed(import_name, via_requirements=False) - if date_added_via_import is None: + if date_added_via_import is not None: + date_added_via_import_str = date_added_via_import.strftime("%Y-%m-%d") + import_version = find_version_at_date(available_versions, date_added_via_import) + else: print(f" [INFO] Package '{package_name}' is defined in requirements.txt but not used (Or committed), ") if self.keep_unused_packages: print(" will attempts guessing version anyways since --keep_unused_packages is set set") @@ -73,34 +93,33 @@ class Guesser: else: print(f"[INFO] Ignoring package '{package_name}' (Use --keep_unused_packages if you want to keep it)") continue - else: - date_added_via_import_str = date_added_via_import.strftime("%Y-%m-%d") - import_version = find_version_at_date(available_versions, date_added_via_import) + # Retrieve candidate version based on the first time the package was added to requirements.txt - if pypi_package_name.lower() in self.packages_in_requirements: - date_added_via_req = get_date_when_package_committed(pypi_package_name, via_requirements=True) + if package_in_requirements: + date_added_via_req = get_date_when_package_committed(package_name, via_requirements=True) if date_added_via_req is not None: req_version = find_version_at_date(available_versions, date_added_via_req) date_added_via_req_str = date_added_via_req.strftime("%Y-%m-%d") else: print(f" [INFO] Package '{package_name}' was not in requirements.txt, using date of first import (Version {import_version} / {date_added_via_import_str})") choice = 1 - - if choice is None: - if req_version != import_version: - # Ask user to choose version based on either first import date or first added to requirements.txt date - choice = user_response_multi_choices(f"Choose guessing strategy for package '{package_name}'", [ - f'{"First time the package was imported".ljust(50)} (Version {import_version} / {date_added_via_import_str})', - f'{"When the package was added to requirements.txt".ljust(50)} (Version {req_version} / {date_added_via_req_str})' - ]) - else: - # Both requirements.txt and first import resolve to the same version - choice = 1 else: print(f" [INFO] Package '{package_name}' was not found in requirements.txt, using date of first import (Version {import_version} / {date_added_via_import_str})") choice = 1 + + # Ask user to choose version based on either first import date or first added to requirements.txt date + if choice is None: + if req_version != import_version: + choice = user_response_multi_choices(f"Choose guessing strategy for package '{package_name}'", [ + f'{"First time the package was imported".ljust(50)} (Version {import_version} / {date_added_via_import_str})', + f'{"When the package was added to requirements.txt".ljust(50)} (Version {req_version} / {date_added_via_req_str})' + ]) + else: + # Both requirements.txt and first import resolve to the same version + choice = 1 + if choice == 2: version = req_version date = date_added_via_req_str diff --git a/py_requirements_guesser/utils.py b/py_requirements_guesser/utils.py index 511afac..18c42d2 100644 --- a/py_requirements_guesser/utils.py +++ b/py_requirements_guesser/utils.py @@ -137,6 +137,7 @@ def find_version_at_date(available_versions, date): def get_mapping_files_from_pipreqs(tmp_path="/tmp/.py-reqs-guesser"): """ Retrieve 'import -> package' name mapping and standard lib module list + The mapping key is lowercase so that we can match case insensitive These files come from https://github.com/bndr/pipreqs """ @@ -181,8 +182,8 @@ def get_mapping_files_from_pipreqs(tmp_path="/tmp/.py-reqs-guesser"): for line in f.readlines(): import_name, package_name = line.strip().split(":") - from_import_to_package_mapping[import_name] = package_name - from_package_to_import_mapping[package_name] = import_name + from_import_to_package_mapping[import_name.lower()] = package_name + from_package_to_import_mapping[package_name.lower()] = import_name with open(stdlib_filepath, 'r') as f: stdlib = set([l.strip() for l in f.readlines()]) @@ -196,13 +197,16 @@ def get_packages_from_requirements(filepath): """ # TODO : Handle multiple version conditions # TODO : Handle greater than (>). If version contains >, should take the greatest available version at that date. + packages = {} + + if not os.path.exists(filepath): + return packages + with open(filepath, 'r') as f: lines = f.readlines() split_reg = re.compile(r'==|<=|>=|<|>') - packages = {} - for line in lines: splitted = re.split(split_reg, line.strip()) if len(splitted) > 1: @@ -210,7 +214,7 @@ def get_packages_from_requirements(filepath): else: version = None - packages[splitted[0].lower()] = version + packages[splitted[0]] = version return packages