update: zhihan data

bigcode-project · May 8, 2024 · 4f3652c · 4f3652c
2 parents c3b0fc9 + 9bc90f1
commit 4f3652c
Show file tree

Hide file tree

Showing 115 changed files with 8,322 additions and 1,933 deletions.
diff --git a/data/clean/f_933_zhihan.py b/data/clean/f_933_zhihan.py
@@ -0,0 +1,113 @@
+import numpy as np
+from collections import Counter
+
+
+def f_933(list_of_tuples):
+    """
+    Computes the sum of numeric values and counts the occurrences of categories in a list of tuples.
+
+    Each tuple in the input list contains a numeric value and a category. This function calculates
+    the sum of all the numeric values and also counts how many times each category appears in the list.
+
+    Parameters:
+    - list_of_tuples (list of tuple): A list where each tuple contains a numeric value and a category.
+
+    Returns:
+    - tuple: A 2-element tuple where the first element is the sum of the numeric values, and the
+             second element is a dictionary with categories as keys and their counts as values.
+
+    Requirements:
+    - numpy
+    - collections.Counter
+
+    Example:
+    >>> list_of_tuples = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
+    >>> sum_of_values, category_counts = f_933(list_of_tuples)
+    >>> print(sum_of_values)
+    15
+    >>> print(category_counts)
+    {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1}
+    """
+
+    numeric_values = [pair[0] for pair in list_of_tuples]
+    categories = [pair[1] for pair in list_of_tuples]
+
+    total_sum = np.sum(numeric_values)
+    category_counts = Counter(categories)
+
+    return total_sum, dict(category_counts)
+
+
+import unittest
+
+
+def run_tests():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestCases))
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+
+class TestCases(unittest.TestCase):
+    def test_case_1(self):
+        # Regular list of tuples with different categories
+        input_data = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
+        sum_values, count_values = f_933(input_data)
+        self.assertEqual(sum_values, 15)
+        self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1})
+
+    def test_case_2(self):
+        # List of tuples with all the same categories
+        input_data = [(5, 'Fruits'), (9, 'Fruits'), (-1, 'Fruits'), (-2, 'Fruits')]
+        sum_values, count_values = f_933(input_data)
+        self.assertEqual(sum_values, 11)
+        self.assertEqual(count_values, {'Fruits': 4})
+
+    def test_case_3(self):
+        # List of tuples with all negative numeric values
+        input_data = [(-5, 'Fruits'), (-9, 'Vegetables'), (-1, 'Dairy')]
+        sum_values, count_values = f_933(input_data)
+        self.assertEqual(sum_values, -15)
+        self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1})
+
+    def test_case_4(self):
+        # Empty list
+        input_data = []
+        sum_values, count_values = f_933(input_data)
+        self.assertEqual(sum_values, 0)
+        self.assertEqual(count_values, {})
+
+    def test_case_5(self):
+        # List of tuples with mixed positive and negative numeric values for the same category
+        input_data = [(5, 'Fruits'), (-5, 'Fruits'), (3, 'Fruits')]
+        sum_values, count_values = f_933(input_data)
+        self.assertEqual(sum_values, 3)
+        self.assertEqual(count_values, {'Fruits': 3})
+
+    def test_empty_list(self):
+        """Test with an empty list."""
+        self.assertEqual(f_933([]), (0, {}))
+
+    def test_all_negative_values(self):
+        """Test with all negative numeric values."""
+        list_of_tuples = [(-5, 'Fruits'), (-2, 'Vegetables')]
+        self.assertEqual(f_933(list_of_tuples), (-7, {'Fruits': 1, 'Vegetables': 1}))
+
+    def test_duplicate_categories(self):
+        """Test with duplicate categories."""
+        list_of_tuples = [(1, 'Fruits'), (2, 'Fruits'), (3, 'Vegetables')]
+        self.assertEqual(f_933(list_of_tuples), (6, {'Fruits': 2, 'Vegetables': 1}))
+
+    def test_single_tuple_in_list(self):
+        """Test with a single tuple in the list."""
+        list_of_tuples = [(10, 'Meat')]
+        self.assertEqual(f_933(list_of_tuples), (10, {'Meat': 1}))
+
+    def test_float_numeric_values(self):
+        """Test with non-integer numeric values (floats)."""
+        list_of_tuples = [(1.5, 'Fruits'), (2.5, 'Vegetables')]
+        self.assertEqual(f_933(list_of_tuples), (4.0, {'Fruits': 1, 'Vegetables': 1}))
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/data/clean/f_934_zhihan.py b/data/clean/f_934_zhihan.py
@@ -0,0 +1,141 @@
+import ast
+import json
+from collections import Counter
+
+
+def f_934(file_pointer):
+    """
+    Reads from a given file pointer to a JSON file, evaluates strings that represent dictionaries to actual dictionaries,
+    and counts the frequency of each key across all dictionary entries in the JSON data.
+
+    
+    Parameters:
+    file_pointer (file object): An open file object pointing to the JSON file containing the data. This file should
+                                already be opened in the correct mode (e.g., 'r' for reading).
+
+    Returns:
+    collections.Counter: A Counter object representing the frequency of each key found in the dictionaries.
+
+    Requirements:
+    - ast
+    - json
+    - collections.Counter
+    
+    Note:
+    This function assumes the input JSON data is a list of dictionaries or strings that can be evaluated as dictionaries.
+    
+    Example:
+    >>> with open("data.json", "r") as file:
+    >>>    key_frequency = f_934(file)
+    >>>    print(key_frequency)
+    Counter({'name': 5, 'age': 5, 'city': 3})
+    """
+
+    data = json.load(file_pointer)
+    key_frequency_counter = Counter()
+
+    for item in data:
+        if isinstance(item, str):
+            try:
+                item = ast.literal_eval(item)
+            except ValueError:
+                continue
+
+        if isinstance(item, dict):
+            key_frequency_counter.update(item.keys())
+
+    return key_frequency_counter
+
+
+import unittest
+from io import BytesIO
+from collections import Counter
+import json
+
+
+def run_tests():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestCases))
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+
+class TestCases(unittest.TestCase):
+    def test_with_dicts(self):
+        # Simulate a JSON file containing dictionaries
+        data = json.dumps([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Jake"}]).encode('utf-8')
+        json_file = BytesIO(data)
+
+        # Expected result is a Counter object with the frequency of each key
+        expected = Counter({'name': 3, 'age': 2})
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_with_string_repr_dicts(self):
+        # Simulate a JSON file containing string representations of dictionaries
+        data = json.dumps(['{"city": "New York"}', '{"city": "Los Angeles", "temp": 75}']).encode('utf-8')
+        json_file = BytesIO(data)
+
+        expected = Counter({'city': 2, 'temp': 1})
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_with_invalid_json(self):
+        # Simulate an invalid JSON file
+        data = b'invalid json'
+        json_file = BytesIO(data)
+
+        # In this case, the function should either return an empty Counter or raise a specific exception
+        # Depending on how you've implemented error handling in your function, adjust this test accordingly
+        with self.assertRaises(json.JSONDecodeError):
+            f_934(json_file)
+
+    def test_empty_json(self):
+        # Simulate an empty JSON file
+        data = json.dumps([]).encode('utf-8')
+        json_file = BytesIO(data)
+
+        expected = Counter()
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_mixed_valid_invalid_dicts(self):
+        # Simulate a JSON file with a mix of valid and invalid dictionary strings
+        data = json.dumps(['{"name": "John"}', 'Invalid', '{"age": 30}']).encode('utf-8')
+        json_file = BytesIO(data)
+
+        expected = Counter({'name': 1, 'age': 1})
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_nested_dicts(self):
+        # Simulate a JSON file containing nested dictionaries (should only count top-level keys)
+        data = json.dumps([{"person": {"name": "John", "age": 30}}, {"person": {"city": "New York"}}]).encode('utf-8')
+        json_file = BytesIO(data)
+
+        expected = Counter({'person': 2})
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_with_actual_json_objects_instead_of_strings(self):
+        # Simulate a JSON file with actual JSON objects (dictionaries) instead of string representations
+        data = json.dumps([{"key1": "value1"}, {"key2": "value2", "key3": "value3"}]).encode('utf-8')
+        json_file = BytesIO(data)
+
+        expected = Counter({'key1': 1, 'key2': 1, 'key3': 1})
+        result = f_934(json_file)
+        self.assertEqual(result, expected)
+
+    def test_invalid_json_structure(self):
+        # Simulate a JSON file that is not a list
+        data = json.dumps({"not": "a list"}).encode('utf-8')
+        json_file = BytesIO(data)
+
+        # Depending on how you've implemented error handling, adjust this test accordingly
+        # Here we expect an error or a specific handling
+        with self.assertRaises(SyntaxError):
+            f_934(json_file)
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/data/clean/f_935_zhihan.py b/data/clean/f_935_zhihan.py
@@ -0,0 +1,142 @@
+import ast
+import os
+import glob
+
+# Constants
+DIRECTORY = 'data'
+
+def f_935(directory):
+    """
+    Convert all Unicode string representations of dictionaries in all text files 
+    in the specified directory to Python dictionaries.
+
+    Parameters:
+    directory (str): The path to the directory containing the text files.
+
+    Returns:
+    list: A list of dictionaries extracted from the text files.
+
+    Requirements:
+    - ast
+    - os
+    - glob
+
+    Example:
+    >>> f_935("sample_directory/")
+    [{'key1': 'value1'}, {'key2': 'value2'}]
+
+    Note:
+    Ensure that the text files in the directory contain valid Unicode string representations of dictionaries.
+
+    Raises:
+    - The function would raise a ValueError if there are text file(s) that have invalid dictionary representation
+    """
+    path = os.path.join(directory, '*.txt')
+    files = glob.glob(path)
+
+    results = []
+    for file in files:
+        with open(file, 'r') as f:
+            for line in f:
+                results.append(ast.literal_eval(line.strip()))
+
+    return results
+
+import unittest
+import os
+import ast
+import shutil
+
+
+def run_tests():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestCases))
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+class TestCases(unittest.TestCase):
+    def setUp(self):
+        self.test_dir = 'testdir_f_935'
+        os.makedirs(self.test_dir, exist_ok=True)
+        self.sample_directory = 'testdir_f_935/sample_directory'
+        os.makedirs(self.sample_directory, exist_ok=True)
+        f = open(self.sample_directory+"/1.txt","w")
+        f.write("{'key1': 'value1'}")
+        f.close()
+        f = open(self.sample_directory+"/2.txt","w")
+        f.write("{'key2': 'value2', 'key3': 'value3'}")
+        f.close()
+        f = open(self.sample_directory+"/3.txt","w")
+        f.write("{'key4': 'value4'}")
+        f.close()
+        f = open(self.sample_directory+"/4.txt","w")
+        f.write("{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}")
+        f.close()
+        f = open(self.sample_directory+"/5.txt","w")
+        f.write("{'key8': 'value8'}")
+        f.close()
+        self.empty_directory = "testdir_f_935/empty_directory"
+        os.makedirs(self.empty_directory, exist_ok=True)
+        self.multi_line_directory = "testdir_f_935/multi_line_directory"
+        os.makedirs(self.multi_line_directory, exist_ok=True)
+        f = open(self.multi_line_directory+"/1.txt","w")
+        f.write("{'key1': 'value1'}\n{'key2': 'value2'}")
+        f.close()
+        self.mixed_directory = "testdir_f_935/mixed_directory"
+        os.makedirs(self.mixed_directory, exist_ok=True)
+        f = open(self.mixed_directory+"/1.txt","w")
+        f.write("invalid")
+        f.close()
+        self.invalid_directory = "testdir_f_935/invalid_directory"
+        os.makedirs(self.invalid_directory, exist_ok=True)
+        f = open(self.invalid_directory+"/1.txt","w")
+        f.write("invalid")
+        f.close()
+        f = open(self.invalid_directory+"/2.txt","w")
+        f.write("{'key1': 'value1'}")
+        f.close()
+
+
+    def tearDown(self):
+        # Clean up the test directory
+        shutil.rmtree(self.test_dir)
+
+    def test_case_1(self):
+        # Test with the sample directory
+        result = f_935(self.sample_directory)
+        expected_result = [
+            {'key1': 'value1'},
+            {'key2': 'value2', 'key3': 'value3'},
+            {'key4': 'value4'},
+            {'key5': 'value5', 'key6': 'value6', 'key7': 'value7'},
+            {'key8': 'value8'}
+        ]
+        for i in expected_result:
+            self.assertTrue(i in result)
+
+    def test_case_2(self):
+        # Test with an empty directory
+        result = f_935(self.empty_directory)
+        self.assertEqual(result, [])
+
+    def test_case_3(self):
+        # Test with a directory containing a text file without valid dictionary representation
+        with self.assertRaises(ValueError):
+            f_935(self.invalid_directory)
+
+    def test_case_4(self):
+        # Test with a directory containing multiple text files, some of which are invalid
+        with self.assertRaises(ValueError):
+            f_935(self.mixed_directory)
+
+    def test_case_5(self):
+        # Test with a directory containing a text file with multiple valid dictionary representations
+        result = f_935(self.multi_line_directory)
+        expected_result = [
+            {'key1': 'value1'},
+            {'key2': 'value2'}
+        ]
+        self.assertEqual(result, expected_result)
+
+if __name__ == "__main__":
+    run_tests()