mirror of
https://github.com/J3rome/py-requirements-guesser.git
synced 2024-12-27 19:05:16 +01:00
* Fully working with interactive choices
This commit is contained in:
parent
d3bc3252f8
commit
45440cd735
140
main.py
140
main.py
@ -7,10 +7,7 @@ from xml.etree.ElementTree import fromstring as parse_xml
|
||||
|
||||
import requests
|
||||
|
||||
from utils import load_packages_from_requirements#, user_response_multi, user_response_yes_no
|
||||
|
||||
# TODO : Get all imports from source files
|
||||
# TODO : Exclude imports where version is specified in requirements.txt
|
||||
from utils import load_packages_from_requirements, get_mapping_files_from_pipreqs, user_response_multi_choices, user_response_yes_no, get_python_filename_at_root
|
||||
|
||||
# TODO : Propose choice between date of first import or Added in requirements
|
||||
# TODO : Other choices : When project was created, last commit (That wasnt on md file), 75 percentile of time between commits
|
||||
@ -20,25 +17,32 @@ from utils import load_packages_from_requirements#, user_response_multi, user_re
|
||||
# TODO : Poetry mode ?
|
||||
# - There might be a version in the history (1st Commit : matplotlib==1.0.1 -> 2nd commit : matplotlib==1.1.2 -> 3rd commit : )
|
||||
|
||||
# TODO : import name doesn't always match the pypi name
|
||||
# A potential solution : https://github.com/thebjorn/pydeps/blob/master/pydeps/package_names.py
|
||||
# from pipreqs.pipreqs import get_all_imports, get_pkg_names
|
||||
|
||||
# TODO : Add a mode where file/folder creation/last update is used if no git repo ?
|
||||
|
||||
# TODO : Add jupyter notebook support
|
||||
# TODO : Hide logging argument
|
||||
# TODO : Add switch to keep unused imports
|
||||
|
||||
# FIXME : Some unused imports might be important (Pillow for example)
|
||||
|
||||
EXTRACT_DATE_REGEX = re.compile(r'Date:\s*(\d+)')
|
||||
LETTER_REGEX = re.compile(r'[a-zA-Z]')
|
||||
|
||||
parser = argparse.ArgumentParser("Python Requirements Version Guesser")
|
||||
parser.add_argument('--git_repo_path', type=str, default=None, required=False)
|
||||
parser.add_argument('--git_repo_path', type=str, default=None, required=False) # TODO : CHDIR in this directory if provided
|
||||
|
||||
|
||||
def get_pypi_history(package_name, ignore_release_candidat=True):
|
||||
"""
|
||||
Retrieve version release dates via Pypi JSON api
|
||||
"""
|
||||
resp = requests.get(f"https://pypi.org/pypi/{package_name}/json")
|
||||
|
||||
resp = requests.get(f"https://pypi.org/pypi/{package_name}/json").json()
|
||||
if resp.status_code != 200:
|
||||
print(f"[INFO] Couldn't find package '{package_name} on Pypi. Ignoring")
|
||||
return None
|
||||
|
||||
resp = resp.json()
|
||||
|
||||
versions = []
|
||||
for version, release_info_per_os in resp['releases'].items():
|
||||
@ -73,16 +77,35 @@ def find_version_at_date(available_versions, date):
|
||||
return last_version
|
||||
|
||||
|
||||
def get_date_when_package_added(package_name, via_requirements=False):
|
||||
def get_all_imports(stdlib_list=None):
|
||||
cmd = f'grep -PRoh --include="*.py" "(?<=^import )\\w*|(?<=^from )\\w*" . | sort | uniq'
|
||||
|
||||
try:
|
||||
grep_out = subprocess.check_output(cmd, shell=True).decode().strip()
|
||||
except:
|
||||
grep_out = ""
|
||||
|
||||
if len(grep_out) == 0:
|
||||
raise Exception(f"[ERROR] couldn't find any import statement")
|
||||
|
||||
imports = [l.strip() for l in grep_out.split("\n")]
|
||||
|
||||
if stdlib_list:
|
||||
return [l for l in imports if l not in stdlib_list]
|
||||
|
||||
return imports
|
||||
|
||||
|
||||
def get_date_when_package_added(package_name, via_requirements=False, latest_addition=False):
|
||||
if not via_requirements:
|
||||
search_pattern = f"^import {package_name}$|^from {package_name}"
|
||||
search_pattern = f"^import {package_name}|^from {package_name}"
|
||||
filename = ""
|
||||
else:
|
||||
search_pattern = f"{package_name}$"
|
||||
filename = "requirements.txt"
|
||||
|
||||
# We grep for 'date' | '+ search pattern' so that we keep only commits that insert lines (+)
|
||||
cmd = f"git log -G '{search_pattern}' --date unix -p {filename} | grep -i 'date\\|\\+.*{package_name}'"
|
||||
cmd = f"git log -G '{search_pattern}' --date unix -p {filename} | grep -i '^date:\\|\\+.*{package_name}'"
|
||||
|
||||
try:
|
||||
blame_out = subprocess.check_output(cmd, shell=True).decode().strip()
|
||||
@ -91,7 +114,13 @@ def get_date_when_package_added(package_name, via_requirements=False):
|
||||
|
||||
if len(blame_out) == 0:
|
||||
#return []
|
||||
raise Exception(f"[ERROR] couldn't find package '{package_name}' via git-log")
|
||||
if not via_requirements:
|
||||
msg = f"'{package_name}' is defined in requirements.txt but not used, ignoring"
|
||||
else:
|
||||
msg = f"'{package_name}' was not found in requirements.txt"
|
||||
|
||||
f"[INFO] {msg}"
|
||||
return None
|
||||
|
||||
# Remove commit that are not directly followed by '+ import' (We grepped for this in cmd)
|
||||
# This is ugly.. TODO: figure out a better way in the grep command
|
||||
@ -110,21 +139,89 @@ def get_date_when_package_added(package_name, via_requirements=False):
|
||||
raise Exception("[ERROR] while parsing git-log")
|
||||
|
||||
# Get first date where the line was added
|
||||
return sorted(dates, reverse=True)[0]
|
||||
return sorted(dates, reverse=not latest_addition)[0]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("="*60)
|
||||
print("Python requirements guesser")
|
||||
print("="*60)
|
||||
|
||||
packages_in_requirements = load_packages_from_requirements('requirements.txt')
|
||||
print("\nFollow the steps to guess package versions based on when they were added to git\n")
|
||||
|
||||
|
||||
stdlib_list, from_import_to_package_mapping, from_package_to_import_mapping = get_mapping_files_from_pipreqs()
|
||||
|
||||
local_packages = get_python_filename_at_root()
|
||||
# Remove local_packages from the list of imports
|
||||
stdlib_list.update(local_packages)
|
||||
|
||||
all_imported_packages = set(get_all_imports(stdlib_list))
|
||||
packages_in_requirements_version_map = load_packages_from_requirements('requirements.txt')
|
||||
packages_in_requirements = set(packages_in_requirements_version_map.keys())
|
||||
|
||||
extra_packages = all_imported_packages - packages_in_requirements
|
||||
|
||||
all_packages = packages_in_requirements_version_map
|
||||
for extra_package in extra_packages:
|
||||
all_packages[extra_package] = None
|
||||
|
||||
packages = []
|
||||
|
||||
for package_name, version in packages_in_requirements.items():
|
||||
for package_name, version in all_packages.items():
|
||||
if version is None:
|
||||
date_added = get_date_when_package_added(package_name, via_requirements=True)
|
||||
# TODO : Add argument to auto select one of the options
|
||||
skip_choice = False
|
||||
|
||||
import_name = package_name
|
||||
|
||||
if import_name in from_package_to_import_mapping:
|
||||
import_name = from_package_to_import_mapping[import_name]
|
||||
|
||||
if package_name in from_import_to_package_mapping:
|
||||
package_name = from_import_to_package_mapping[package_name]
|
||||
|
||||
available_versions = get_pypi_history(package_name, ignore_release_candidat=True)
|
||||
version = find_version_at_date(available_versions, date_added)
|
||||
print(f"{package_name} - {date_added}")
|
||||
|
||||
if available_versions is None:
|
||||
continue
|
||||
|
||||
date_added_via_import = get_date_when_package_added(import_name, via_requirements=False)
|
||||
if date_added_via_import is None:
|
||||
print(f"[INFO] Package '{package_name}' is defined in requirements.txt but not used, ignoring")
|
||||
continue
|
||||
date_added_via_import_str = date_added_via_import.strftime("%Y-%m-%d")
|
||||
import_version = find_version_at_date(available_versions, date_added_via_import)
|
||||
|
||||
if package_name in packages_in_requirements:
|
||||
date_added_via_req = get_date_when_package_added(package_name, via_requirements=True)
|
||||
if date_added_via_req is not None:
|
||||
req_version = find_version_at_date(available_versions, date_added_via_req)
|
||||
date_added_via_req_str = date_added_via_req.strftime("%Y-%m-%d")
|
||||
else:
|
||||
choice = 1
|
||||
skip_choice = True
|
||||
print(f"[INFO] Package '{package_name}' was not in requirements.txt, using date of first import (Version {import_version} / {date_added_via_import_str})")
|
||||
|
||||
if not skip_choice and req_version == import_version:
|
||||
print(f"[INFO] Package '{package_name}' was attributed version {req_version}")
|
||||
skip_choice = True
|
||||
choice = 1
|
||||
|
||||
if not skip_choice:
|
||||
choice = user_response_multi_choices(f"Choose guessing strategy for package '{package_name}'", [
|
||||
f'{"First time the package was imported".ljust(50)} (Version {import_version} / {date_added_via_import_str})',
|
||||
f'{"When the package was added to requirements.txt".ljust(50)} (Version {req_version} / {date_added_via_req_str})'
|
||||
])
|
||||
|
||||
if choice == 2:
|
||||
version = req_version
|
||||
else:
|
||||
version = import_version
|
||||
|
||||
else:
|
||||
#print(f"[INFO] Package '{package_name} version is already specified in requirements.txt ({version})")
|
||||
print(f"[INFO] Package {package_name} was not found in requirements.txt, using date of first import (Version {import_version} / {date_added_via_import_str})")
|
||||
version = import_version
|
||||
|
||||
else:
|
||||
print(f"{package_name} version is specified in requirements.txt ({version})")
|
||||
@ -132,6 +229,7 @@ if __name__ == "__main__":
|
||||
packages.append((package_name, version))
|
||||
|
||||
|
||||
print("")
|
||||
# TODO : Write to requirements.txt
|
||||
for package_name, version in sorted(packages, key=lambda x:x[0]):
|
||||
print(f"{package_name}=={version}")
|
||||
|
73
utils.py
73
utils.py
@ -1,17 +1,21 @@
|
||||
import re
|
||||
import os
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
def user_response_multi(message, choices):
|
||||
def user_response_multi_choices(message, choices):
|
||||
print(message)
|
||||
for i, choice in enumerate(choices):
|
||||
print(f'{i+1}. {choice}')
|
||||
print(f' {i+1}. {choice}')
|
||||
|
||||
|
||||
nb_choices = len(choices)
|
||||
resp = int(input(f'{message} [1-{nb_choices}]\n'))
|
||||
resp = input(f'Choose option [1-{nb_choices}]\n')
|
||||
|
||||
if resp not in range(1,nb_choices+1):
|
||||
return user_response_multi(message, choices)
|
||||
if not resp.isdigit() or int(resp) not in range(1,nb_choices+1):
|
||||
print("")
|
||||
return user_response_multi_choices(message, choices)
|
||||
|
||||
return resp
|
||||
return int(resp)
|
||||
|
||||
|
||||
def user_response_yes_no(message):
|
||||
@ -20,11 +24,64 @@ def user_response_yes_no(message):
|
||||
resp = input(message + ' [Y/n]\n').lower()
|
||||
|
||||
if resp not in ['y', 'n']:
|
||||
print("")
|
||||
return user_response_yes_no(message)
|
||||
|
||||
return resp == 'y'
|
||||
|
||||
|
||||
def get_mapping_files_from_pipreqs(tmp_path="/tmp/.py-req-guesser"):
|
||||
"""
|
||||
Retrieve import to package name mapping file and standard lib module list
|
||||
This list comes from https://github.com/bndr/pipreqs
|
||||
"""
|
||||
|
||||
skip_download = False
|
||||
|
||||
if not os.path.exists(tmp_path):
|
||||
os.mkdir(tmp_path)
|
||||
|
||||
mapping_filepath = f"{tmp_path}/mapping"
|
||||
stdlib_filepath = f"{tmp_path}/stdlib"
|
||||
|
||||
if os.path.exists(mapping_filepath) and os.path.exists(stdlib_filepath):
|
||||
# File have already been downloaded
|
||||
skip_download = True
|
||||
|
||||
if not skip_download:
|
||||
msg = "We will download a mapping file from https://github.com/bndr/pipreqs\n" \
|
||||
"Thanks to the maintainers of Pipreqs for keeping the mapping file "\
|
||||
"and the STDlib module list up to date\n" \
|
||||
f"Do you agree to downloading these files in '{tmp_path}' ?"
|
||||
|
||||
if not user_response_yes_no(msg):
|
||||
print("\n\n[ERROR]Pipreqs mapping files are required, I encourage you to inspect the code to make sure everything is safe and rerun this")
|
||||
exit(0)
|
||||
|
||||
print("")
|
||||
# FIXME : This is not really scalable...
|
||||
mapping_url = "https://raw.githubusercontent.com/bndr/pipreqs/90102acdbb23c09574d27df8bd1f568d34e0cfd3/pipreqs/mapping"
|
||||
stdlib_url = "https://raw.githubusercontent.com/bndr/pipreqs/90102acdbb23c09574d27df8bd1f568d34e0cfd3/pipreqs/stdlib"
|
||||
|
||||
urlretrieve(mapping_url, mapping_filepath)
|
||||
urlretrieve(stdlib_url, stdlib_filepath)
|
||||
|
||||
from_import_to_package_mapping = {}
|
||||
from_package_to_import_mapping = {}
|
||||
with open(mapping_filepath, 'r') as f:
|
||||
for line in f.readlines():
|
||||
import_name, package_name = line.strip().split(":")
|
||||
|
||||
from_import_to_package_mapping[import_name] = package_name
|
||||
from_package_to_import_mapping[package_name] = import_name
|
||||
|
||||
with open(stdlib_filepath, 'r') as f:
|
||||
stdlib = set([l.strip() for l in f.readlines()])
|
||||
|
||||
return stdlib, from_import_to_package_mapping, from_package_to_import_mapping
|
||||
|
||||
|
||||
|
||||
def load_packages_from_requirements(filepath):
|
||||
# TODO : Handle when multiple version conditions
|
||||
# TODO : Handle greater than (>). If version contains >, should take the greatest available version at the date. Should fit with minor versions ?
|
||||
@ -47,6 +104,10 @@ def load_packages_from_requirements(filepath):
|
||||
return packages
|
||||
|
||||
|
||||
def get_python_filename_at_root():
|
||||
return [f[:-3] for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.py')]
|
||||
|
||||
|
||||
def detect_os():
|
||||
pass
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user