2025-08-03 04:22:09 +03:00 · 2021-11-17 20:17:53 +00:00
parent 0f7e58b0e2
commit dab828f63c
2 changed files with 53 additions and 0 deletions
--- a/python/samba/tests/source_chars.py
+++ b/python/samba/tests/source_chars.py
@ -94,6 +94,14 @@ SAFE_FORMAT_CHARS = {
    '\ufeff'
 }

+# These files legitimately mix left-to-right and right-to-left text.
+# In the real world mixing directions would be normal in bilingual
+# documents, but it is rare in Samba source code.
+BIDI_FILES = {
+    'source4/heimdal/lib/wind/NormalizationTest.txt',
+    'testdata/source-chars-bidi.py',
+}
+

 def get_git_files():
    try:
@ -196,9 +204,15 @@ class CharacterTests(TestCase):
                else:
                    self.fail(f"could not decode {name}: {e}")

+            dirs = set()
            for c in set(s):
                if is_bad_char(c):
                    self.fail(f"{name} has potentially bad format characters!")
+                dirs.add(u.bidirectional(c))
+
+            if 'L' in dirs and 'R' in dirs:
+                if name not in BIDI_FILES:
+                    self.fail(f"{name} has LTR and RTL text ({dirs})")

    def test_unexpected_format_chars_do_fail(self):
        """Test the test"""
@ -212,6 +226,21 @@ class CharacterTests(TestCase):
            bad_chars = [c for c in chars if is_bad_char(c)]
            self.assertEqual(len(bad_chars), n_bad)

+    def test_unexpected_bidi_fails(self):
+        """Test the test"""
+        for name in [
+                'testdata/source-chars-bidi.py'
+        ]:
+            fullname = os.path.join(ROOT, name)
+            with open(fullname) as f:
+                s = f.read()
+
+            dirs = set()
+            for c in set(s):
+                dirs.add(u.bidirectional(c))
+            self.assertIn('L', dirs)
+            self.assertIn('R', dirs)
+

 def check_file_text():
    """If called directly as a script, count the found characters."""
--- a/testdata/source-chars-bidi.py
+++ b/testdata/source-chars-bidi.py
@ -0,0 +1,24 @@
+# Used in samba.tests.source_chars to ensure bi-directional text is
+# caught. (make test TESTS=samba.tests.source_chars)
+
+x = א =2
+ח = n = 3
+
+a = x  # 2 * n * m
+b = א  # 2 * ח * m
+c = "x#"  #  n
+d = "א#"  #  ח
+e = f"x{x}n{n}"
+f = f"א{א}ח{ח}"
+
+print(a)
+print(b)
+print(c)
+print(d)
+print(e)
+print(f)
+
+assert a == b
+assert c == d.replace("א", "x")
+assert e[1] == f[1]
+assert e[3] == f[3]