2021-06-24 07:36:28 +02:00
import re
import os
import argparse
import subprocess
2021-06-28 17:50:58 +02:00
import json
2021-06-24 07:36:28 +02:00
from datetime import datetime
2021-06-28 17:50:58 +02:00
from urllib . request import urlopen
2021-06-24 07:36:28 +02:00
2021-06-24 11:35:02 +02:00
from utils import load_packages_from_requirements , get_mapping_files_from_pipreqs , user_response_multi_choices
2021-06-28 20:52:09 +02:00
from utils import get_date_last_modified_python_file , get_local_modules , validate_cwd_is_git_repo
2021-06-24 08:33:54 +02:00
2021-06-24 07:36:28 +02:00
# TODO : Propose choice between date of first import or Added in requirements
2021-06-24 11:35:02 +02:00
# TODO : Other choices : When project was created, last commit (That wasnt on md file) get_date_last_modified_python_file()
2021-06-24 07:36:28 +02:00
# TODO : Pin also the dependencies tree of the packages Ex : Torch package might install numpy, etc
# TODO : Poetry mode ?
2021-06-24 08:33:54 +02:00
# TODO : Add a mode where file/folder creation/last update is used if no git repo ?
2021-06-24 07:36:28 +02:00
2021-06-24 11:13:01 +02:00
# TODO : Add jupyter notebook support
# TODO : Hide logging argument
# TODO : Add switch to keep unused imports
# FIXME : Some unused imports might be important (Pillow for example)
2021-06-28 20:52:09 +02:00
# LIMITATION : Might get conflicts with local imports
2021-06-24 23:13:06 +02:00
EXTRACT_DATE_REGEX = re . compile ( r ' date \ s- \ s( \ d+) ' )
2021-06-24 07:36:28 +02:00
LETTER_REGEX = re . compile ( r ' [a-zA-Z] ' )
parser = argparse . ArgumentParser ( " Python Requirements Version Guesser " )
2021-06-28 17:50:10 +02:00
parser . add_argument ( ' --write ' , type = str , default = None , required = False , nargs = ' ? ' , const = ' ' )
2021-06-28 20:52:09 +02:00
parser . add_argument ( ' --force_guess ' , type = str , default = None , required = False )
2021-06-24 07:36:28 +02:00
def get_pypi_history ( package_name , ignore_release_candidat = True ) :
2021-06-24 08:33:54 +02:00
"""
Retrieve version release dates via Pypi JSON api
"""
2021-06-28 17:50:58 +02:00
try :
resp = urlopen ( f " https://pypi.org/pypi/ { package_name } /json " )
2021-06-28 18:39:03 +02:00
except Exception as e :
if hasattr ( e , ' getcode ' ) and e . getcode ( ) == 404 :
return None
else :
print ( " [ERROR] Internet access is required to fetch package history from Pypi " )
exit ( 1 )
2021-06-24 11:13:01 +02:00
2021-06-28 17:50:58 +02:00
resp = json . loads ( resp . read ( ) )
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
versions = [ ]
for version , release_info_per_os in resp [ ' releases ' ] . items ( ) :
# Just taking the first platform upload date for now..
# Is it really different for other platforms ? Need to validate
# TODO : Give appropriate version based on os and python Versions resp['info']['requires_dist'] # ['require_python']
if len ( release_info_per_os ) == 0 :
continue
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
if ignore_release_candidat and LETTER_REGEX . search ( version ) :
continue
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
release_info = release_info_per_os [ 0 ]
release_date = datetime . strptime ( release_info [ ' upload_time ' ] . split ( " T " ) [ 0 ] , ' % Y- % m- %d ' )
versions . append ( ( version , release_date ) )
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
# FIXME : Do we really need to sort ? Versions should already be sorted
return sorted ( versions , key = lambda x : x [ 1 ] , reverse = True )
2021-06-24 07:36:28 +02:00
2021-06-24 08:32:38 +02:00
def find_version_at_date ( available_versions , date ) :
2021-06-24 08:33:54 +02:00
last_version = available_versions [ 0 ] [ 0 ]
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
# FIXME : Do binary search
for candidate_version , candidate_date in available_versions :
if date > = candidate_date :
return candidate_version
else :
last_version = candidate_version
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
# Date is older than available versions... Fallback on the oldest available version
return last_version
2021-06-24 07:36:28 +02:00
2021-06-24 11:13:01 +02:00
def get_all_imports ( stdlib_list = None ) :
cmd = f ' grep -PRoh --include= " *.py " " (?<=^import ) \\ w*|(?<=^from ) \\ w* " . | sort | uniq '
try :
grep_out = subprocess . check_output ( cmd , shell = True ) . decode ( ) . strip ( )
except :
grep_out = " "
if len ( grep_out ) == 0 :
raise Exception ( f " [ERROR] couldn ' t find any import statement " )
imports = [ l . strip ( ) for l in grep_out . split ( " \n " ) ]
if stdlib_list :
return [ l for l in imports if l not in stdlib_list ]
return imports
2021-06-24 21:16:33 +02:00
def get_date_when_package_committed ( package_name , via_requirements = False , latest_addition = False ) :
2021-06-24 08:33:54 +02:00
if not via_requirements :
2021-06-24 11:13:01 +02:00
search_pattern = f " ^import { package_name } |^from { package_name } "
2021-06-24 08:33:54 +02:00
filename = " "
else :
search_pattern = f " { package_name } $ "
filename = " requirements.txt "
# We grep for 'date' | '+ search pattern' so that we keep only commits that insert lines (+)
2021-06-24 23:13:06 +02:00
cmd = f " git log -i -G ' { search_pattern } ' --pretty= ' format:date - %at ' --date unix -p { filename } | grep -i ' ^date - \\ | \\ +.* { package_name } ' "
2021-06-24 08:33:54 +02:00
try :
blame_out = subprocess . check_output ( cmd , shell = True ) . decode ( ) . strip ( )
except :
blame_out = " "
if len ( blame_out ) == 0 :
#return []
2021-06-24 11:13:01 +02:00
if not via_requirements :
msg = f " ' { package_name } ' is defined in requirements.txt but not used, ignoring "
else :
msg = f " ' { package_name } ' was not found in requirements.txt "
f " [INFO] { msg } "
return None
2021-06-24 08:33:54 +02:00
# Remove commit that are not directly followed by '+ import' (We grepped for this in cmd)
# This is ugly.. TODO: figure out a better way in the grep command
dates = [ ]
got_plus = False
for line in blame_out . split ( ' \n ' ) [ : : - 1 ] :
if line [ 0 ] == " + " :
got_plus = True
elif got_plus :
got_plus = False
matches = EXTRACT_DATE_REGEX . search ( line )
if matches :
dates . append ( datetime . fromtimestamp ( int ( matches . group ( 1 ) ) ) )
else :
raise Exception ( " [ERROR] while parsing git-log " )
# Get first date where the line was added
2021-06-24 11:13:01 +02:00
return sorted ( dates , reverse = not latest_addition ) [ 0 ]
2021-06-24 07:36:28 +02:00
2021-06-24 23:35:14 +02:00
def guess_package_versions ( package_list , from_import_to_package_mapping , from_package_to_import_mapping , packages_in_requirements , keep_unused_packages = False ) :
2021-06-24 11:13:01 +02:00
packages = [ ]
for package_name , version in all_packages . items ( ) :
2021-06-28 17:50:10 +02:00
print ( " \n " + " - " * 40 )
print ( f " PACKAGE : { package_name } " )
2021-06-24 08:33:54 +02:00
if version is None :
2021-06-24 23:35:14 +02:00
# Reset variables
2021-06-24 23:13:06 +02:00
choice = None
2021-06-24 23:35:14 +02:00
date_added_via_import_str = None
date_added_via_req_str = None
import_version = None
req_version = None
2021-06-24 23:13:06 +02:00
# Pypi package to import mapping
import_name = from_package_to_import_mapping . get ( package_name , package_name )
pypi_package_name = from_import_to_package_mapping . get ( package_name , package_name )
2021-06-24 11:13:01 +02:00
2021-06-24 23:13:06 +02:00
# Get available versions from Pypi
available_versions = get_pypi_history ( pypi_package_name , ignore_release_candidat = True )
2021-06-24 11:13:01 +02:00
if available_versions is None :
2021-06-28 18:39:47 +02:00
print ( f " [INFO] Couldn ' t find Pypi releases for package ' { package_name } ' , ignoring " )
2021-06-24 11:13:01 +02:00
continue
2021-06-24 23:13:06 +02:00
# Retrieve candidate version based on the first time the package was imported in *.py
2021-06-24 21:16:33 +02:00
date_added_via_import = get_date_when_package_committed ( import_name , via_requirements = False )
2021-06-24 11:13:01 +02:00
if date_added_via_import is None :
2021-06-28 17:50:10 +02:00
print ( f " [INFO] Package ' { package_name } ' is defined in requirements.txt but not used (Or committed), " )
2021-06-24 23:35:14 +02:00
if keep_unused_packages :
2021-06-28 17:50:10 +02:00
print ( " will use the requirements version since --keep_unused_packages set " )
2021-06-24 23:35:14 +02:00
choice = 2
else :
2021-06-28 17:50:10 +02:00
print ( f " [INFO] Ignoring package ' { package_name } ' (Use --keep_unused_packages if you want to keep it) " )
2021-06-24 23:35:14 +02:00
continue
else :
date_added_via_import_str = date_added_via_import . strftime ( " % Y- % m- %d " )
import_version = find_version_at_date ( available_versions , date_added_via_import )
2021-06-24 11:13:01 +02:00
2021-06-24 23:13:06 +02:00
# Retrieve candidate version based on the first time the package was added to requirements.txt
if pypi_package_name . lower ( ) in packages_in_requirements :
date_added_via_req = get_date_when_package_committed ( pypi_package_name , via_requirements = True )
2021-06-24 11:13:01 +02:00
if date_added_via_req is not None :
req_version = find_version_at_date ( available_versions , date_added_via_req )
date_added_via_req_str = date_added_via_req . strftime ( " % Y- % m- %d " )
else :
2021-06-28 17:50:10 +02:00
print ( f " [INFO] Package ' { package_name } ' was not in requirements.txt, using date of first import (Version { import_version } / { date_added_via_import_str } ) " )
2021-06-24 11:13:01 +02:00
choice = 1
2021-06-24 23:13:06 +02:00
if choice is None :
if req_version != import_version :
# Ask user to choose version based on either first import date or first added to requirements.txt date
choice = user_response_multi_choices ( f " Choose guessing strategy for package ' { package_name } ' " , [
f ' { " First time the package was imported " . ljust ( 50 ) } (Version { import_version } / { date_added_via_import_str } ) ' ,
f ' { " When the package was added to requirements.txt " . ljust ( 50 ) } (Version { req_version } / { date_added_via_req_str } ) '
] )
else :
2021-06-24 23:35:14 +02:00
# Both requirements.txt and first import resolve to the same version
2021-06-24 23:13:06 +02:00
choice = 1
2021-06-24 11:13:01 +02:00
else :
2021-06-28 17:50:10 +02:00
print ( f " [INFO] Package ' { package_name } ' was not found in requirements.txt, using date of first import (Version { import_version } / { date_added_via_import_str } ) " )
2021-06-24 23:13:06 +02:00
choice = 1
if choice == 2 :
version = req_version
else :
2021-06-24 11:13:01 +02:00
version = import_version
2021-06-24 07:36:28 +02:00
2021-06-24 23:35:14 +02:00
if version is not None :
print ( f " [INFO] Package ' { package_name } ' was attributed version { version } " )
else :
print ( f " [ERROR] Couldn ' t attribute version to package ' { package_name } ' . Are you sure you commited the changes ? " )
continue
2021-06-24 08:33:54 +02:00
else :
2021-06-28 17:50:10 +02:00
print ( f " [INFO] Package ' { package_name } ' version is specified in requirements.txt (Version { version } ) " )
2021-06-24 07:36:28 +02:00
2021-06-24 08:33:54 +02:00
packages . append ( ( package_name , version ) )
2021-06-24 07:36:28 +02:00
2021-06-24 22:36:52 +02:00
return packages
if __name__ == " __main__ " :
print ( " = " * 60 )
print ( " Python requirements guesser " )
print ( " = " * 60 )
2021-06-28 20:52:09 +02:00
print ( f " Guessing package versions for project ' { os . getcwd ( ) } ' " )
2021-06-24 22:36:52 +02:00
2021-06-28 18:39:47 +02:00
if not validate_cwd_is_git_repo ( ) :
print ( " [ERROR] py-reqs-guesser must be runned inside a git repository " )
exit ( 1 )
2021-06-28 17:50:10 +02:00
2021-06-28 20:52:09 +02:00
print ( " Follow the steps to guess package versions based on when they were added to git. " )
2021-06-28 17:50:10 +02:00
args = parser . parse_args ( )
2021-06-24 22:36:52 +02:00
# Retrive mapping files from https://github.com/bndr/pipreqs
stdlib_list , from_import_to_package_mapping , from_package_to_import_mapping = get_mapping_files_from_pipreqs ( )
# Get local packages
2021-06-28 20:52:09 +02:00
if args . force_guess :
args . force_guess = set ( args . force_guess . strip ( ) . split ( " , " ) )
local_packages = get_local_modules ( print_modules = True , force_guess = args . force_guess )
2021-06-24 22:36:52 +02:00
# Remove local_packages from the list of imports
stdlib_list . update ( local_packages )
# Retrieve all imported packages in project
all_imported_packages = set ( get_all_imports ( stdlib_list ) )
# Retrieve packages in requirements.txt
packages_in_requirements_version_map = load_packages_from_requirements ( ' requirements.txt ' )
packages_in_requirements = set ( packages_in_requirements_version_map . keys ( ) )
# Merge packages in requirements.txt and imports
all_packages = packages_in_requirements_version_map
extra_packages = all_imported_packages - packages_in_requirements
for extra_package in extra_packages :
all_packages [ extra_package ] = None
# Interactive guessing of packages versions
packages = guess_package_versions ( all_packages , from_import_to_package_mapping , from_package_to_import_mapping , packages_in_requirements )
2021-06-24 07:36:28 +02:00
2021-06-28 17:50:10 +02:00
new_requirements_txt = " "
2021-06-24 08:33:54 +02:00
for package_name , version in sorted ( packages , key = lambda x : x [ 0 ] ) :
2021-06-28 17:50:10 +02:00
new_requirements_txt + = f " { package_name } == { version } \n "
2021-06-28 20:52:09 +02:00
print ( " \n " + " = " * 60 + " \n " )
2021-06-28 17:50:10 +02:00
print ( " Requirements.txt : " )
print ( new_requirements_txt )
if args . write is None :
print ( " Use the --write {path} parameter to write the new requirements file " )
else :
if len ( args . write ) == 0 :
args . write = " requirements.txt "
print ( f " Writing requirements to file { args . write } " )
if os . path . exist ( args . write ) and \
not user_response_yes_no ( f " File { args . write } already exist, are you sure you want to overwrite it ? " ) :
exit ( 0 )
# TODO : Write to args.write
2021-06-24 07:36:28 +02:00