2015-07-10 19:16:35 +03:00
#!/usr/bin/python
#
# Parse out as much licensing information as we can from our vendored directories to create a license report.
# You may need to edit this afterwords to replace any 'UNKNOWN' with actual data.
import csv
import fnmatch
import json
import os
import re
import sys
import yolk . pypi
def usage ( ) :
print " license-audit.py <path to tower source> <infile> [<outfile>] "
sys . exit ( 1 )
def read_requirements ( towerpath ) :
2015-08-28 22:48:54 +03:00
filename = ' %s /requirements/requirements.txt ' % ( towerpath , )
2015-07-10 19:16:35 +03:00
ret = { }
2015-08-28 22:48:54 +03:00
try :
f = open ( filename )
except :
2015-07-10 19:16:35 +03:00
print " failed to open %s " % ( filename , )
return None
data = f . readlines ( )
f . close ( )
for line in data :
2015-08-28 22:48:54 +03:00
if line [ 0 ] == ' # ' :
continue
2015-07-10 19:16:35 +03:00
if ' == ' in line :
2015-08-28 22:48:54 +03:00
m = re . match ( r " ( \ S+)==( \ S+) " , line )
2015-07-10 19:16:35 +03:00
if m :
name = m . group ( 1 )
version = m . group ( 2 )
item = { }
item [ ' name ' ] = name
item [ ' version ' ] = version
ret [ name ] = item
2015-08-28 22:48:54 +03:00
continue
elif line . startswith ( " git+https " ) :
l = line . rsplit ( ' / ' , 1 )
m = re . match ( r " ( \ S+).git@( \ S+)# " , l [ 1 ] )
if m :
name = m . group ( 1 )
version = m . group ( 2 )
if version . startswith ( ' tower_ ' ) :
version = version [ 6 : ]
item = { }
if name == ' python-ipy ' :
name = ' ipy '
item [ ' name ' ] = name
item [ ' version ' ] = version
if len ( version ) > 20 :
# it's a sha1sum, read it off the egg spec
lver = l [ 1 ] . rsplit ( ' - ' , 1 )
if lver [ 1 ] == l [ 1 ] :
lver = l [ 1 ] . rsplit ( ' _ ' , 1 )
item [ ' version ' ] = lver [ 1 ] [ : - 1 ]
ret [ name ] = item
continue
else :
item = { }
item [ ' name ' ] = line [ : - 1 ]
item [ ' version ' ] = ' '
ret [ name ] = item
continue
2015-07-10 19:16:35 +03:00
return ret
def get_js ( towerpath ) :
excludes = [ ]
directory = ' %s /awx/ui/static/lib ' % ( towerpath , )
dirlist = os . listdir ( directory )
ret = { }
for item in dirlist :
use = True
for exclude in excludes :
if fnmatch . fnmatch ( item , exclude ) :
use = False
if use :
try :
bowerfile = open ( ' %s / %s /bower.json ' % ( directory , item ) )
except :
# add dummy entry (should read package.json if it exists)
pkg = { }
pkg [ ' name ' ] = item
pkg [ ' license ' ] = ' UNKNOWN '
pkg [ ' url ' ] = ' UNKNOWN '
ret [ item ] = pkg
continue
pkginfo = json . load ( bowerfile )
bowerfile . close ( )
pkg = { }
pkg [ ' name ' ] = item
2015-07-15 19:10:35 +03:00
if ' license ' in pkginfo :
2015-07-10 19:16:35 +03:00
pkg [ ' license ' ] = normalize_license ( pkginfo [ ' license ' ] )
else :
pkg [ ' license ' ] = ' UNKNOWN '
2015-07-15 19:10:35 +03:00
if ' homepage ' in pkginfo :
2015-07-10 19:16:35 +03:00
pkg [ ' url ' ] = pkginfo [ ' homepage ' ]
2015-07-15 19:10:35 +03:00
elif ' url ' in pkginfo :
2015-07-10 19:16:35 +03:00
pkg [ ' url ' ] = pkginfo [ ' url ' ]
else :
pkg [ ' url ' ] = ' UNKNOWN '
ret [ item ] = pkg
return ret
def search_requirements ( requirements_dict , path ) :
for item in requirements_dict . values ( ) :
if item [ ' path ' ] == path :
return True
return False
def normalize_license ( license ) :
if not license :
return ' UNKNOWN '
license = license . replace ( ' " ' , ' ' )
if license == ' None ' :
return ' UNKNOWN '
2015-08-28 22:48:54 +03:00
if license in [ ' Apache License, Version 2.0 ' , ' Apache License (2.0) ' , ' Apache License 2.0 ' , ' Apache-2.0 ' , ' Apache License, v2.0 ' , ' APL2 ' ] :
2015-07-10 19:16:35 +03:00
return ' Apache 2.0 '
if license == ' ISC license ' :
return ' ISC '
if license == ' MIT License ' or license == ' MIT license ' :
return ' MIT '
2015-08-28 22:48:54 +03:00
if license in [ ' BSD License ' , ' Simplified BSD ' , ' BSD-derived (http://www.repoze.org/LICENSE.txt) ' , ' BSD-like ' , ' Modified BSD License ' ] :
2015-07-10 19:16:35 +03:00
return ' BSD '
if license == ' LGPL ' :
return ' LGPL 2.1 '
# Don't embed YOUR ENTIRE LICENSE in your metadata!
if license . find ( ' Copyright 2011-2013 Jeffrey Gelens ' ) != - 1 :
return ' Apache 2.0 '
if license . find ( ' https://github.com/umutbozkurt/django-rest-framework-mongoengine/blob/master/LICENSE ' ) != - 1 :
return ' MIT '
2015-08-28 22:48:54 +03:00
if license == ' " BSD or Apache License, Version 2.0 " ' :
return ' BSD or Apache 2.0 '
if license == ' Modified BSD License ' :
return ' BSD '
2015-07-10 19:16:35 +03:00
if license == ' Python Software Foundation License ' :
return ' PSF '
return license
def read_csv ( filename ) :
ret = { }
f = open ( filename )
if not f :
print " failed to open %s " % ( filename , )
return None
reader = csv . reader ( f , delimiter = ' , ' )
for line in reader :
item = { }
item [ ' name ' ] = line [ 0 ]
item [ ' license ' ] = line [ 1 ]
item [ ' url ' ] = line [ 2 ]
item [ ' source ' ] = line [ 3 ]
ret [ line [ 0 ] ] = item
return ret
def write_csv ( filename , data ) :
keys = data . keys ( )
keys . sort ( )
csvfile = open ( filename , ' wb ' )
writer = csv . writer ( csvfile , delimiter = ' , ' , lineterminator = ' \n ' )
for key in keys :
item = data [ key ]
l = ( item [ ' name ' ] , item [ ' license ' ] , item [ ' url ' ] , item [ ' source ' ] )
writer . writerow ( l )
csvfile . close ( )
if len ( sys . argv ) < 3 :
usage ( )
if len ( sys . argv ) < 4 :
outputfile = sys . stdout
else :
outputfile = sys . argv [ 3 ]
tower_path = sys . argv [ 1 ]
# Read old license CSV
olddata = read_csv ( sys . argv [ 2 ] )
# Read python site-packages README requirements file
requirements = read_requirements ( tower_path )
if not olddata or not requirements :
print " No starting data "
sys . exit ( 1 )
# See if there's pip things in our current license list that we don't have now
reqs = requirements . keys ( )
for item in olddata . values ( ) :
if item [ ' source ' ] == ' pip ' and item [ ' name ' ] not in reqs :
2015-08-28 22:48:54 +03:00
print " Potentially no longer vendoring %s " % ( item [ ' name ' ] , )
2015-07-10 19:16:35 +03:00
# Get directory of vendored JS things from the js dir
js_packages = get_js ( tower_path )
# See if there's JS things in our current license list that we don't have now
js = js_packages . keys ( )
for item in olddata . values ( ) :
if item [ ' source ' ] == ' js ' and item [ ' name ' ] not in js :
print " No longer vendoring %s " % ( item [ ' name ' ] , )
# Take the requirements file, and get license information where necessary
cs = yolk . pypi . CheeseShop ( )
for req in requirements . values ( ) :
2015-08-28 22:48:54 +03:00
# name sanitization
( pname , pvers ) = cs . query_versions_pypi ( req [ ' name ' ] )
cs_info = cs . release_data ( pname , req [ ' version ' ] )
2015-07-10 19:16:35 +03:00
if not cs_info :
2015-08-28 22:48:54 +03:00
print " Couldn ' t find ' %s == %s ' " % ( req [ ' name ' ] , req [ ' version ' ] )
if req [ ' name ' ] not in olddata :
2015-07-10 19:16:35 +03:00
print " ... and it ' s not in the current data. This needs fixed! "
sys . exit ( 1 )
continue
license = normalize_license ( cs_info [ ' license ' ] )
url = cs_info [ ' home_page ' ]
try :
data = olddata [ req [ ' name ' ] ]
except :
print " New item %s " % ( req [ ' name ' ] )
item = { }
item [ ' name ' ] = req [ ' name ' ]
item [ ' license ' ] = license
item [ ' url ' ] = url
item [ ' source ' ] = ' pip '
olddata [ req [ ' name ' ] ] = item
continue
if license != ' UNKNOWN ' and license != data [ ' license ' ] :
data [ ' license ' ] = license
if url != ' UNKNOWN ' and url != data [ ' url ' ] :
data [ ' url ' ] = url
# Update JS package info
for pkg in js :
2015-08-28 22:48:54 +03:00
if pkg in olddata :
2015-07-10 19:16:35 +03:00
data = olddata [ pkg ]
new = js_packages [ pkg ]
if new [ ' license ' ] != ' UNKNOWN ' and new [ ' license ' ] != data [ ' license ' ] :
data [ ' license ' ] = new [ ' license ' ]
if new [ ' url ' ] != ' UNKNOWN ' and new [ ' url ' ] != data [ ' url ' ] :
data [ ' url ' ] = new [ ' url ' ]
else :
item = { }
item [ ' name ' ] = pkg
item [ ' license ' ] = js_packages [ pkg ] [ ' license ' ]
item [ ' url ' ] = js_packages [ pkg ] [ ' url ' ]
item [ ' source ' ] = ' js '
olddata [ pkg ] = item
continue
2015-07-15 19:10:35 +03:00
write_csv ( outputfile , olddata )