1
0
mirror of https://github.com/ansible/awx.git synced 2024-10-31 23:51:09 +03:00
awx/tools/license-audit/license-audit.py

257 lines
7.9 KiB
Python
Executable File

#!/usr/bin/python
#
# Parse out as much licensing information as we can from our vendored directories to create a license report.
# You may need to edit this afterwords to replace any 'UNKNOWN' with actual data.
import csv
import fnmatch
import json
import os
import re
import sys
import yolk.pypi
def usage():
print "license-audit.py <path to tower source> <infile> [<outfile>]"
sys.exit(1)
def read_requirements(towerpath):
filename = '%s/requirements/requirements.txt' % (towerpath,)
ret = {}
try:
f = open(filename)
except:
print "failed to open %s" %(filename,)
return None
data = f.readlines()
f.close()
for line in data:
if line[0] == '#':
continue
if '==' in line:
m = re.match(r"(\S+)==(\S+)",line)
if m:
name = m.group(1)
version = m.group(2)
item = {}
item['name'] = name
item['version'] = version
ret[name] = item
continue
elif line.startswith("git+https"):
l = line.rsplit('/',1)
m = re.match(r"(\S+).git@(\S+)#",l[1])
if m:
name = m.group(1)
version = m.group(2)
if version.startswith('tower_'):
version = version[6:]
item = {}
if name == 'python-ipy':
name='ipy'
item['name'] = name
item['version'] = version
if len(version) > 20:
# it's a sha1sum, read it off the egg spec
lver = l[1].rsplit('-',1)
if lver[1] == l[1]:
lver = l[1].rsplit('_',1)
item['version'] = lver[1][:-1]
ret[name] = item
continue
else:
item = {}
item['name'] = line[:-1]
item['version'] = ''
ret[name] = item
continue
return ret
def get_js(towerpath):
excludes = [ ]
directory = '%s/awx/ui/static/lib' % (towerpath,)
dirlist = os.listdir(directory)
ret = {}
for item in dirlist:
use = True
for exclude in excludes:
if fnmatch.fnmatch(item, exclude):
use = False
if use:
try:
bowerfile = open('%s/%s/bower.json' %(directory, item))
except:
# add dummy entry (should read package.json if it exists)
pkg = {}
pkg['name'] = item
pkg['license'] = 'UNKNOWN'
pkg['url'] = 'UNKNOWN'
ret[item] = pkg
continue
pkginfo = json.load(bowerfile)
bowerfile.close()
pkg = {}
pkg['name'] = item
if 'license' in pkginfo:
pkg['license'] = normalize_license(pkginfo['license'])
else:
pkg['license'] = 'UNKNOWN'
if 'homepage' in pkginfo:
pkg['url'] = pkginfo['homepage']
elif 'url' in pkginfo:
pkg['url'] = pkginfo['url']
else:
pkg['url'] = 'UNKNOWN'
ret[item] = pkg
return ret
def search_requirements(requirements_dict, path):
for item in requirements_dict.values():
if item['path'] == path:
return True
return False
def normalize_license(license):
if not license:
return 'UNKNOWN'
license = license.replace('"','')
if license == 'None':
return 'UNKNOWN'
if license in ['Apache License, Version 2.0', 'Apache License (2.0)', 'Apache License 2.0', 'Apache-2.0', 'Apache License, v2.0', 'APL2']:
return 'Apache 2.0'
if license == 'ISC license':
return 'ISC'
if license == 'MIT License' or license == 'MIT license':
return 'MIT'
if license in ['BSD License', 'Simplified BSD', 'BSD-derived (http://www.repoze.org/LICENSE.txt)', 'BSD-like', 'Modified BSD License']:
return 'BSD'
if license == 'LGPL':
return 'LGPL 2.1'
# Don't embed YOUR ENTIRE LICENSE in your metadata!
if license.find('Copyright 2011-2013 Jeffrey Gelens') != -1:
return 'Apache 2.0'
if license.find('https://github.com/umutbozkurt/django-rest-framework-mongoengine/blob/master/LICENSE') != -1:
return 'MIT'
if license == '"BSD or Apache License, Version 2.0"':
return 'BSD or Apache 2.0'
if license == 'Modified BSD License':
return 'BSD'
if license == 'Python Software Foundation License':
return 'PSF'
return license
def read_csv(filename):
ret = {}
f = open(filename)
if not f:
print "failed to open %s" %(filename,)
return None
reader = csv.reader(f, delimiter=',')
for line in reader:
item = {}
item['name'] = line[0]
item['license'] = line[1]
item['url'] = line[2]
item['source'] = line[3]
ret[line[0]] = item
return ret
def write_csv(filename, data):
keys = data.keys()
keys.sort()
csvfile = open(filename, 'wb')
writer = csv.writer(csvfile, delimiter = ',', lineterminator = '\n')
for key in keys:
item = data[key]
l = (item['name'],item['license'],item['url'],item['source'])
writer.writerow(l)
csvfile.close()
if len(sys.argv) < 3:
usage()
if len(sys.argv) < 4:
outputfile = sys.stdout
else:
outputfile = sys.argv[3]
tower_path = sys.argv[1]
# Read old license CSV
olddata = read_csv(sys.argv[2])
# Read python site-packages README requirements file
requirements = read_requirements(tower_path)
if not olddata or not requirements:
print "No starting data"
sys.exit(1)
# See if there's pip things in our current license list that we don't have now
reqs = requirements.keys()
for item in olddata.values():
if item['source'] == 'pip' and item['name'] not in reqs:
print "Potentially no longer vendoring %s" %(item['name'],)
# Get directory of vendored JS things from the js dir
js_packages = get_js(tower_path)
# See if there's JS things in our current license list that we don't have now
js = js_packages.keys()
for item in olddata.values():
if item['source'] == 'js' and item['name'] not in js:
print "No longer vendoring %s" %(item['name'],)
# Take the requirements file, and get license information where necessary
cs = yolk.pypi.CheeseShop()
for req in requirements.values():
# name sanitization
(pname, pvers) = cs.query_versions_pypi(req['name'])
cs_info = cs.release_data(pname,req['version'])
if not cs_info:
print "Couldn't find '%s==%s'" %(req['name'],req['version'])
if req['name'] not in olddata:
print "... and it's not in the current data. This needs fixed!"
sys.exit(1)
continue
license = normalize_license(cs_info['license'])
url = cs_info['home_page']
try:
data = olddata[req['name']]
except:
print "New item %s" %(req['name'])
item = {}
item['name'] = req['name']
item['license'] = license
item['url'] = url
item['source'] = 'pip'
olddata[req['name']] = item
continue
if license != 'UNKNOWN' and license != data['license']:
data['license'] = license
if url != 'UNKNOWN' and url != data['url']:
data['url'] = url
# Update JS package info
for pkg in js:
if pkg in olddata:
data = olddata[pkg]
new = js_packages[pkg]
if new['license'] != 'UNKNOWN' and new['license'] != data['license']:
data['license'] = new['license']
if new['url'] != 'UNKNOWN' and new['url'] != data['url']:
data['url'] = new['url']
else:
item = {}
item['name'] = pkg
item['license'] = js_packages[pkg]['license']
item['url'] = js_packages[pkg]['url']
item['source'] = 'js'
olddata[pkg] = item
continue
write_csv(outputfile, olddata)