Merge pull request #31 from huhanGitHub/main

fix: refine ming data quality
bigcode-project · May 5, 2024 · cbdaac3 · cbdaac3
2 parents 828dbab + 28367c6
commit cbdaac3
Show file tree

Hide file tree

Showing 69 changed files with 561 additions and 506 deletions.
diff --git a/data/raw/f_427_ming.py b/data/raw/f_427_ming.py
@@ -54,26 +54,34 @@ def run_tests():
 
 
 class TestCases(unittest.TestCase):
-    def test_case_1(self):
-        result = f_427(['1a2b3c4d', '5e6f7g8h'])
-        self.assertEqual(result, '426614caa490f2c185aebf58f1d4adac')
 
-    def test_case_2(self):
+    def test_normal_functionality(self):
+        """Test the function with default parameters."""
         result = f_427()
-        self.assertEqual(result, 'aa1f8c53e0aee57fccd07b90a902579a')
-
-    def test_case_3(self):
-        result = f_427(['12121212', '34343434'])
-        self.assertEqual(result, 'b523721fccb8fe2e7bf999e74e25056f')
-
-    def test_case_4(self):
-        result = f_427(['1VVVVVVV', '3VVVVVVV', 'F3fF3fF3'])
-        self.assertEqual(result, 'fae7b34f299d23a584fbc19c2fcdf865')
-
-    def test_case_5(self):
-        # test error message
+        self.assertIsInstance(result, str)
+
+    def test_custom_keys_list(self):
+        """Test the function with a custom list of hexadecimal keys."""
+        custom_keys = ['1A2FC614', '1B0FC614', '1C9FC614']
+        result = f_427(hex_keys=custom_keys)
+        self.assertIsInstance(result, str)
+
+    def test_empty_key_list(self):
+        """Test the function with an empty list to check for error handling."""
+        with self.assertRaises(IndexError):
+            f_427(hex_keys=[])
+
+    def test_invalid_hexadecimal(self):
+        """Test the function with an invalid hexadecimal string."""
+        invalid_keys = ['ZZZ', '4A0FC614']
         with self.assertRaises(ValueError):
-            f_427(['1a2b3c4d', '5e6f7g8h', 'invalid_hex'])
+            f_427(hex_keys=invalid_keys)
+
+    def test_consistent_output_with_same_seed(self):
+        """Test that the same seed returns the same result."""
+        result1 = f_427(seed=99)
+        result2 = f_427(seed=99)
+        self.assertEqual(result1, result2)
 
 
 if __name__ == "__main__":

diff --git a/data/raw/f_429_ming.py b/data/raw/f_429_ming.py
@@ -33,41 +33,42 @@ def f_429(hex_string=KEY):
 
 class TestCases(unittest.TestCase):
 
-    def test_case_1(self):
-        # Test with default key
+    def test_default_functionality(self):
+        """Test the function with default parameters."""
         result = f_429()
-        self.assertEqual(result, b'x\x9c\xf3\xeb\x93\xef\x01\x00\x03\xb0\x01\x88')
+        self.assertIsInstance(result, bytes)
 
-    def test_case_2(self):
-        # Test with a different hex string
-        hex_string = "ABCD12"
+    def test_valid_custom_hex_string(self):
+        """Test the function with a valid custom hexadecimal string."""
+        hex_string = '1A2FC614'  # Example hex string
         result = f_429(hex_string)
-        self.assertEqual(result, b'x\x9c\xf3\xd6>+\x04\x00\x03]\x01V')
+        self.assertIsInstance(result, bytes)
 
-    def test_case_3(self):
-        # Test with another different hex string
-        hex_string = "DEADBEEF"
-        result = f_429(hex_string)
-        self.assertEqual(result, b'x\x9c\xf3\x8f[\xbb\x1f\x00\x04s\x02\x1a')
+    def test_invalid_hex_string(self):
+        """Test the function with an invalid hexadecimal string."""
+        with self.assertRaises(ValueError):
+            f_429(hex_string='ZZZZZZZZ')
 
-    def test_case_4(self):
-        # Test with a hex string that has a smaller length
-        hex_string = "00AA"
-        result = f_429(hex_string)
-        self.assertEqual(result, b'x\x9cs\xd6b`\x00\x00\x01\x8e\x00n')
+    def test_boundary_hex_value(self):
+        """Test the function with a large boundary hexadecimal value."""
+        boundary_hex = 'FFFFFFFF'  # Maximum float value before overflow in some contexts
+        result = f_429(boundary_hex)
+        self.assertIsInstance(result, bytes)
+
+    def test_zero_value(self):
+        """Test the function with a hex string representing zero."""
+        zero_hex = '00000000'
+        result = f_429(zero_hex)
+        self.assertIsInstance(result, bytes)
 
-    def test_case_5(self):
-        # Test with a hex string that has a larger length
-        hex_string = "00AABBCCDDEE"
-        result = f_429(hex_string)
-        self.assertEqual(result, b'x\x9c\x0b\xd6\xda}\x16\x00\x04\x11\x02\x06')
 
 def run_tests():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(TestCases))
     runner = unittest.TextTestRunner()
     runner.run(suite)
 
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()

diff --git a/data/raw/f_430_ming.py b/data/raw/f_430_ming.py
@@ -40,35 +40,40 @@ def run_tests():
     runner = unittest.TextTestRunner()
     runner.run(suite)
 
+
 class TestCases(unittest.TestCase):
-    # Utility function to decode bytes and convert to float
-    def bytes_to_float(self, byte_val):
-        return float(codecs.decode(byte_val, 'utf-8'))
 
-    def test_case_1(self):
-        random.seed(42)
+    def test_default_functionality(self):
+        """Test the function with default parameters."""
         result = f_430()
-        self.assertEqual(result, b'36806.078125')
-
-    def test_case_2(self):
-        result = f_430(['5D7FC614'])
-        self.assertEqual(result, b'1.1519025322058056e+18')
-
-    def test_case_3(self):
-        # Checking consistency over multiple runs
-        random.seed(0)
-        result = f_430(['ABCD1234', 'DEADBEEF', '00AABEEF'])
-        self.assertEqual(result, b'-6.259853398707798e+18')
-
-    def test_case_4(self):
-        result = f_430(['00000000'])
-        self.assertEqual(result, b'0.0')
-
-    def test_case_5(self):
-        # Checking the decoding process
-        result = f_430(['AAAAAAAA'])
-        self.assertEqual(result, b'-3.0316488252093987e-13')
-
+        self.assertIsInstance(result, bytes)  # Check if output is correctly encoded in UTF-8
+
+    def test_custom_hex_keys(self):
+        """Test the function with a custom list of hexadecimal keys."""
+        custom_keys = ['1A2FC614', '1B0FC614', '1C9FC614']
+        result = f_430(hex_keys=custom_keys)
+        self.assertIsInstance(result, bytes)
+
+    def test_empty_list(self):
+        """Test the function with an empty list."""
+        with self.assertRaises(IndexError):  # Assuming random.choice will raise IndexError on empty list
+            f_430(hex_keys=[])
+
+    def test_consistency_of_output(self):
+        """Ensure that the output is consistent with a fixed seed."""
+        random.seed(42)  # Set the seed for predictability
+        first_result = f_430()
+        random.seed(42)  # Reset seed to ensure same choice is made
+        second_result = f_430()
+        self.assertEqual(first_result, second_result)
+
+    def test_invalid_hex_key(self):
+        """Test with an invalid hex key."""
+        invalid_keys = ['ZZZZZZZZ', 'XXXX']
+        with self.assertRaises(ValueError):
+            f_430(hex_keys=invalid_keys)
+
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()

diff --git a/data/raw/f_433_ming.py b/data/raw/f_433_ming.py
@@ -4,11 +4,11 @@
 
 def f_433(df):
     """
-    Encodes a Pandas DataFrame as a Base64 string. The DataFrame is first converted to CSV format,
-    then encoded to bytes, and finally encoded to a Base64 string.
+    Encodes a dict of list as a Base64 string. The dict is first converted to a Pandas DataFrame.
+    Then convert the data franme to CSV format and encoded to bytes, finally encoded it to a Base64 string.
 
     Parameters:
-        df (DataFrame): The pandas DataFrame to be encoded.
+        df (dict of list): A dictionary where the key 'Word' maps to a list of strings.
 
     Returns:
         str: The Base64 encoded string of the DataFrame's CSV representation.
@@ -37,6 +37,7 @@ def f_433(df):
 import unittest
 from io import StringIO
 
+
 class TestCases(unittest.TestCase):
     def test_encode_basic_dataframe(self):
         df = {'A': [1, 2, 3], 'B': [4, 5, 6]}

diff --git a/data/raw/f_434_ming.py b/data/raw/f_434_ming.py
@@ -5,7 +5,7 @@
 def f_434(list_of_menuitems):
     """
     Given a nested list of menu items, this function flattens the list and returns a Pandas DataFrame
-    detailing the count of each individual menu item.
+    detailing the count of each individual menu item with index name 'MenuItem'.
 
     Parameters:
         list_of_menuitems (list): A nested list of menu items.
@@ -44,33 +44,42 @@ def run_tests():
     runner = unittest.TextTestRunner()
     runner.run(suite)
 
+
 class TestCases(unittest.TestCase):
-    def test_case_1(self):
-        result = f_434([['Pizza', 'Burger'], ['Pizza', 'Coke'], ['Pasta', 'Coke']])
-        expected_result = pd.DataFrame({'Count': [2, 1, 2, 1]},
-                                       index=pd.Index(['Pizza', 'Burger', 'Coke', 'Pasta'], name='MenuItem'))
-        pd.testing.assert_frame_equal(result, expected_result)
-
-    def test_case_2(self):
-        result = f_434([['Bread', 'Butter'], ['Bread', 'Jam'], ['Bread', 'Jam'], ['Butter', 'Jam']])
-        expected_result = pd.DataFrame({'Count': [3, 2, 3]},
-                                       index=pd.Index(['Bread', 'Butter', 'Jam'], name='MenuItem'))
-        pd.testing.assert_frame_equal(result, expected_result)
-
-    def test_case_3(self):
-        result = f_434([['Tea', 'Coffee'], ['Tea', 'Milk'], ['Coffee', 'Milk']])
-        expected_result = pd.DataFrame({'Count': [2, 2, 2]}, index=pd.Index(['Tea', 'Coffee', 'Milk'], name='MenuItem'))
-        pd.testing.assert_frame_equal(result, expected_result)
-
-    def test_case_4(self):
-        result = f_434([['Sandwich'], ['Sandwich', 'Juice'], ['Coffee']])
-        expected_result = pd.DataFrame({'Count': [2, 1, 1]},
-                                       index=pd.Index(['Sandwich', 'Juice', 'Coffee'], name='MenuItem'))
-        pd.testing.assert_frame_equal(result, expected_result)
-
-    def test_case_5(self):
-        result = f_434([[], [], []])
-        self.assertTrue(result.empty)
+
+    def test_normal_functionality(self):
+        """Test the function with typical nested lists."""
+        input_list = [['apple', 'banana'], ['apple'], ['banana', 'orange']]
+        expected_df = pd.DataFrame({'Count': [2, 2, 1]}, index=['apple', 'banana', 'orange'])
+        expected_df.index.name = 'MenuItem'
+        pd.testing.assert_frame_equal(f_434(input_list), expected_df)
+
+    def test_empty_list(self):
+        """Test the function with an empty list."""
+        expected_df = pd.DataFrame(columns=['Count'])
+        expected_df.index.name = 'MenuItem'
+        pd.testing.assert_frame_equal(f_434([]), expected_df)
+
+    def test_single_level_list(self):
+        """Test with a non-nested, single-level list."""
+        input_list = [['apple', 'banana', 'apple']]
+        expected_df = pd.DataFrame({'Count': [2, 1]}, index=['apple', 'banana'])
+        expected_df.index.name = 'MenuItem'
+        pd.testing.assert_frame_equal(f_434(input_list), expected_df)
+
+    def test_uniform_list(self):
+        """Test with a list where all sublists contain the same item."""
+        input_list = [['apple'], ['apple'], ['apple']]
+        expected_df = pd.DataFrame({'Count': [3]}, index=['apple'])
+        expected_df.index.name = 'MenuItem'
+        pd.testing.assert_frame_equal(f_434(input_list), expected_df)
+
+    def test_duplicate_items_across_sublists(self):
+        """Ensure items appearing in multiple sublists are counted correctly."""
+        input_list = [['apple', 'banana'], ['banana', 'banana', 'apple']]
+        expected_df = pd.DataFrame({'Count': [2, 3]}, index=['apple', 'banana'])
+        expected_df.index.name = 'MenuItem'
+        pd.testing.assert_frame_equal(f_434(input_list), expected_df)
 
 
 if __name__ == "__main__":