From 04693ecb0f853f0115a8ad7ea3c561baef217fcf Mon Sep 17 00:00:00 2001 From: chris meyers Date: Fri, 13 Apr 2018 15:13:20 -0400 Subject: [PATCH 1/2] remove infinite loop regex * Fancy url finding regex can result in infinite loop for malformed ipv6 urls so replace it with a more nieve regex that can overmatch. * regex's that find malformed ipv6 urls will be passed to urlparse. This can result in a parsing/ValueError. For these cases we redact the entire found URI. --- awx/main/redact.py | 65 +++++++++++++----------- awx/main/tests/unit/test_redact.py | 79 ++++++++++++++++++------------ 2 files changed, 83 insertions(+), 61 deletions(-) diff --git a/awx/main/redact.py b/awx/main/redact.py index 9d3b6a595d..16c8fc0513 100644 --- a/awx/main/redact.py +++ b/awx/main/redact.py @@ -6,8 +6,7 @@ REPLACE_STR = '$encrypted$' class UriCleaner(object): REPLACE_STR = REPLACE_STR - # https://regex101.com/r/sV2dO2/2 - SENSITIVE_URI_PATTERN = re.compile(ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))', re.MULTILINE) # NOQA + SENSITIVE_URI_PATTERN = re.compile(ur'(\w+:(\/?\/?)[^\s]+)', re.MULTILINE) # NOQA @staticmethod def remove_sensitive(cleartext): @@ -17,38 +16,46 @@ class UriCleaner(object): match = UriCleaner.SENSITIVE_URI_PATTERN.search(redactedtext, text_index) if not match: break - o = urlparse.urlsplit(match.group(1)) - if not o.username and not o.password: - if o.netloc and ":" in o.netloc: - # Handle the special case url http://username:password that can appear in SCM url - # on account of a bug? in ansible redaction - (username, password) = o.netloc.split(':') + try: + uri_str = match.group(1) + # May raise a ValueError if invalid URI for one reason or another + o = urlparse.urlsplit(uri_str) + + if not o.username and not o.password: + if o.netloc and ":" in o.netloc: + # Handle the special case url http://username:password that can appear in SCM url + # on account of a bug? in ansible redaction + (username, password) = o.netloc.split(':') + else: + text_index += len(match.group(1)) + continue else: - text_index += len(match.group(1)) - continue - else: - username = o.username - password = o.password + username = o.username + password = o.password - # Given a python MatchObject, with respect to redactedtext, find and - # replace the first occurance of username and the first and second - # occurance of password + # Given a python MatchObject, with respect to redactedtext, find and + # replace the first occurance of username and the first and second + # occurance of password - uri_str = redactedtext[match.start():match.end()] - if username: - uri_str = uri_str.replace(username, UriCleaner.REPLACE_STR, 1) - # 2, just in case the password is $encrypted$ - if password: - uri_str = uri_str.replace(password, UriCleaner.REPLACE_STR, 2) + uri_str = redactedtext[match.start():match.end()] + if username: + uri_str = uri_str.replace(username, UriCleaner.REPLACE_STR, 1) + # 2, just in case the password is $encrypted$ + if password: + uri_str = uri_str.replace(password, UriCleaner.REPLACE_STR, 2) - t = redactedtext[:match.start()] + uri_str - text_index = len(t) - if (match.end() < len(redactedtext)): - t += redactedtext[match.end():] + t = redactedtext[:match.start()] + uri_str + text_index = len(t) + if (match.end() < len(redactedtext)): + t += redactedtext[match.end():] - redactedtext = t - if text_index >= len(redactedtext): - text_index = len(redactedtext) - 1 + redactedtext = t + if text_index >= len(redactedtext): + text_index = len(redactedtext) - 1 + except ValueError: + # Invalid URI, redact the whole URI to be safe + redactedtext = redactedtext[:match.start()] + UriCleaner.REPLACE_STR + redactedtext[match.end():] + text_index = match.start() + len(UriCleaner.REPLACE_STR) return redactedtext diff --git a/awx/main/tests/unit/test_redact.py b/awx/main/tests/unit/test_redact.py index 931ef72ebc..17d948cd27 100644 --- a/awx/main/tests/unit/test_redact.py +++ b/awx/main/tests/unit/test_redact.py @@ -1,4 +1,5 @@ import textwrap +import pytest # AWX from awx.main.redact import UriCleaner @@ -78,60 +79,74 @@ TEST_CLEARTEXT.append({ }) +@pytest.mark.parametrize('username, password, not_uri', [ + ('', '', 'www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), + ('', '', 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), + ('root', 'gigity', 'https://root@gigity@www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), + ('root', 'gigity@', 'https://root:gigity@@@www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), +]) # should redact sensitive usernames and passwords -def test_uri_scm_simple_redacted(): - for uri in TEST_URIS: - redacted_str = UriCleaner.remove_sensitive(str(uri)) - if uri.username: - assert uri.username not in redacted_str - if uri.password: - assert uri.username not in redacted_str +def test_non_uri_redact(username, password, not_uri): + redacted_str = UriCleaner.remove_sensitive(not_uri) + if username: + assert username not in redacted_str + if password: + assert password not in redacted_str + + +def test_multiple_non_uri_redact(): + non_uri = 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd hi ' + non_uri += 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd world ' + non_uri += 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd foo ' + non_uri += 'https://foo:bar@giggity.com bar' + redacted_str = UriCleaner.remove_sensitive(non_uri) + assert redacted_str == '$encrypted$ hi $encrypted$ world $encrypted$ foo https://$encrypted$:$encrypted$@giggity.com bar' # should replace secret data with safe string, UriCleaner.REPLACE_STR -def test_uri_scm_simple_replaced(): - for uri in TEST_URIS: - redacted_str = UriCleaner.remove_sensitive(str(uri)) - assert redacted_str.count(UriCleaner.REPLACE_STR) == uri.get_secret_count() +@pytest.mark.parametrize('uri', TEST_URIS) +def test_uri_scm_simple_replaced(uri): + redacted_str = UriCleaner.remove_sensitive(str(uri)) + assert redacted_str.count(UriCleaner.REPLACE_STR) == uri.get_secret_count() # should redact multiple uris in text -def test_uri_scm_multiple(): +@pytest.mark.parametrize('uri', TEST_URIS) +def test_uri_scm_multiple(uri): cleartext = '' - for uri in TEST_URIS: - cleartext += str(uri) + ' ' - for uri in TEST_URIS: - cleartext += str(uri) + '\n' + cleartext += str(uri) + ' ' + cleartext += str(uri) + '\n' redacted_str = UriCleaner.remove_sensitive(str(uri)) if uri.username: assert uri.username not in redacted_str if uri.password: - assert uri.username not in redacted_str + assert uri.password not in redacted_str # should replace multiple secret data with safe string -def test_uri_scm_multiple_replaced(): +@pytest.mark.parametrize('uri', TEST_URIS) +def test_uri_scm_multiple_replaced(uri): cleartext = '' find_count = 0 - for uri in TEST_URIS: - cleartext += str(uri) + ' ' - find_count += uri.get_secret_count() - for uri in TEST_URIS: - cleartext += str(uri) + '\n' - find_count += uri.get_secret_count() + cleartext += str(uri) + ' ' + find_count += uri.get_secret_count() + + cleartext += str(uri) + '\n' + find_count += uri.get_secret_count() redacted_str = UriCleaner.remove_sensitive(cleartext) assert redacted_str.count(UriCleaner.REPLACE_STR) == find_count # should redact and replace multiple secret data within a complex cleartext blob -def test_uri_scm_cleartext_redact_and_replace(): - for test_data in TEST_CLEARTEXT: - uri = test_data['uri'] - redacted_str = UriCleaner.remove_sensitive(test_data['text']) - assert uri.username not in redacted_str - assert uri.password not in redacted_str - # Ensure the host didn't get redacted - assert redacted_str.count(uri.host) == test_data['host_occurrences'] +@pytest.mark.parametrize('test_data', TEST_CLEARTEXT) +def test_uri_scm_cleartext_redact_and_replace(test_data): + uri = test_data['uri'] + redacted_str = UriCleaner.remove_sensitive(test_data['text']) + assert uri.username not in redacted_str + assert uri.password not in redacted_str + # Ensure the host didn't get redacted + assert redacted_str.count(uri.host) == test_data['host_occurrences'] + From 09d5645b90ff96ba67f0b84c0265679e66fec318 Mon Sep 17 00:00:00 2001 From: chris meyers Date: Fri, 13 Apr 2018 15:26:40 -0400 Subject: [PATCH 2/2] redact project update urls when downloading stdout * For ProjectUpdate jobs. Redact potentially sensitive urls from the output. --- awx/api/views.py | 31 +++++++++++++++++++++++------- awx/main/tests/unit/test_redact.py | 14 ++++++++------ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/awx/api/views.py b/awx/api/views.py index 05c15b9243..efb3f80a95 100644 --- a/awx/api/views.py +++ b/awx/api/views.py @@ -77,6 +77,7 @@ from awx.main.utils import ( from awx.main.utils.encryption import encrypt_value from awx.main.utils.filters import SmartFilter from awx.main.utils.insights import filter_insights_api_response +from awx.main.redact import UriCleaner from awx.api.permissions import ( JobTemplateCallbackPermission, TaskPermission, @@ -4645,9 +4646,17 @@ class UnifiedJobList(ListAPIView): serializer_class = UnifiedJobListSerializer -class StdoutANSIFilter(object): +def redact_ansi(line): + # Remove ANSI escape sequences used to embed event data. + line = re.sub(r'\x1b\[K(?:[A-Za-z0-9+/=]+\x1b\[\d+D)+\x1b\[K', '', line) + # Remove ANSI color escape sequences. + return re.sub(r'\x1b[^m]*m', '', line) + + +class StdoutFilter(object): def __init__(self, fileobj): + self._functions = [] self.fileobj = fileobj self.extra_data = '' if hasattr(fileobj, 'close'): @@ -4659,10 +4668,7 @@ class StdoutANSIFilter(object): line = self.fileobj.readline(size) if not line: break - # Remove ANSI escape sequences used to embed event data. - line = re.sub(r'\x1b\[K(?:[A-Za-z0-9+/=]+\x1b\[\d+D)+\x1b\[K', '', line) - # Remove ANSI color escape sequences. - line = re.sub(r'\x1b[^m]*m', '', line) + line = self.process_line(line) data += line if size > 0 and len(data) > size: self.extra_data = data[size:] @@ -4671,6 +4677,14 @@ class StdoutANSIFilter(object): self.extra_data = '' return data + def register(self, func): + self._functions.append(func) + + def process_line(self, line): + for func in self._functions: + line = func(line) + return line + class UnifiedJobStdout(RetrieveAPIView): @@ -4728,9 +4742,12 @@ class UnifiedJobStdout(RetrieveAPIView): suffix='.ansi' if target_format == 'ansi_download' else '' ) content_fd = unified_job.result_stdout_raw_handle(enforce_max_bytes=False) + redactor = StdoutFilter(content_fd) if target_format == 'txt_download': - content_fd = StdoutANSIFilter(content_fd) - response = HttpResponse(FileWrapper(content_fd), content_type='text/plain') + redactor.register(redact_ansi) + if type(unified_job) == ProjectUpdate: + redactor.register(UriCleaner.remove_sensitive) + response = HttpResponse(FileWrapper(redactor), content_type='text/plain') response["Content-Disposition"] = 'attachment; filename="{}"'.format(filename) return response else: diff --git a/awx/main/tests/unit/test_redact.py b/awx/main/tests/unit/test_redact.py index 17d948cd27..fa59ca43d7 100644 --- a/awx/main/tests/unit/test_redact.py +++ b/awx/main/tests/unit/test_redact.py @@ -79,20 +79,22 @@ TEST_CLEARTEXT.append({ }) -@pytest.mark.parametrize('username, password, not_uri', [ - ('', '', 'www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), - ('', '', 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), - ('root', 'gigity', 'https://root@gigity@www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), - ('root', 'gigity@', 'https://root:gigity@@@www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), +@pytest.mark.parametrize('username, password, not_uri, expected', [ + ('', '', 'www.famfamfam.com](http://www.famfamfam.com/fijdlfd', 'www.famfamfam.com](http://www.famfamfam.com/fijdlfd'), + ('', '', 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd', '$encrypted$'), + ('root', 'gigity', 'https://root@gigity@www.famfamfam.com](http://www.famfamfam.com/fijdlfd', '$encrypted$'), + ('root', 'gigity@', 'https://root:gigity@@@www.famfamfam.com](http://www.famfamfam.com/fijdlfd', '$encrypted$'), ]) # should redact sensitive usernames and passwords -def test_non_uri_redact(username, password, not_uri): +def test_non_uri_redact(username, password, not_uri, expected): redacted_str = UriCleaner.remove_sensitive(not_uri) if username: assert username not in redacted_str if password: assert password not in redacted_str + assert redacted_str == expected + def test_multiple_non_uri_redact(): non_uri = 'https://www.famfamfam.com](http://www.famfamfam.com/fijdlfd hi '