Skip to content

Commit

Permalink
update: zhihan data
Browse files Browse the repository at this point in the history
update: zhihan data
  • Loading branch information
terryyz authored May 8, 2024
2 parents c3b0fc9 + 9bc90f1 commit 4f3652c
Show file tree
Hide file tree
Showing 115 changed files with 8,322 additions and 1,933 deletions.
113 changes: 113 additions & 0 deletions data/clean/f_933_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
from collections import Counter


def f_933(list_of_tuples):
"""
Computes the sum of numeric values and counts the occurrences of categories in a list of tuples.
Each tuple in the input list contains a numeric value and a category. This function calculates
the sum of all the numeric values and also counts how many times each category appears in the list.
Parameters:
- list_of_tuples (list of tuple): A list where each tuple contains a numeric value and a category.
Returns:
- tuple: A 2-element tuple where the first element is the sum of the numeric values, and the
second element is a dictionary with categories as keys and their counts as values.
Requirements:
- numpy
- collections.Counter
Example:
>>> list_of_tuples = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
>>> sum_of_values, category_counts = f_933(list_of_tuples)
>>> print(sum_of_values)
15
>>> print(category_counts)
{'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1}
"""

numeric_values = [pair[0] for pair in list_of_tuples]
categories = [pair[1] for pair in list_of_tuples]

total_sum = np.sum(numeric_values)
category_counts = Counter(categories)

return total_sum, dict(category_counts)


import unittest


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


class TestCases(unittest.TestCase):
def test_case_1(self):
# Regular list of tuples with different categories
input_data = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 15)
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1})

def test_case_2(self):
# List of tuples with all the same categories
input_data = [(5, 'Fruits'), (9, 'Fruits'), (-1, 'Fruits'), (-2, 'Fruits')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 11)
self.assertEqual(count_values, {'Fruits': 4})

def test_case_3(self):
# List of tuples with all negative numeric values
input_data = [(-5, 'Fruits'), (-9, 'Vegetables'), (-1, 'Dairy')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, -15)
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1})

def test_case_4(self):
# Empty list
input_data = []
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 0)
self.assertEqual(count_values, {})

def test_case_5(self):
# List of tuples with mixed positive and negative numeric values for the same category
input_data = [(5, 'Fruits'), (-5, 'Fruits'), (3, 'Fruits')]
sum_values, count_values = f_933(input_data)
self.assertEqual(sum_values, 3)
self.assertEqual(count_values, {'Fruits': 3})

def test_empty_list(self):
"""Test with an empty list."""
self.assertEqual(f_933([]), (0, {}))

def test_all_negative_values(self):
"""Test with all negative numeric values."""
list_of_tuples = [(-5, 'Fruits'), (-2, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (-7, {'Fruits': 1, 'Vegetables': 1}))

def test_duplicate_categories(self):
"""Test with duplicate categories."""
list_of_tuples = [(1, 'Fruits'), (2, 'Fruits'), (3, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (6, {'Fruits': 2, 'Vegetables': 1}))

def test_single_tuple_in_list(self):
"""Test with a single tuple in the list."""
list_of_tuples = [(10, 'Meat')]
self.assertEqual(f_933(list_of_tuples), (10, {'Meat': 1}))

def test_float_numeric_values(self):
"""Test with non-integer numeric values (floats)."""
list_of_tuples = [(1.5, 'Fruits'), (2.5, 'Vegetables')]
self.assertEqual(f_933(list_of_tuples), (4.0, {'Fruits': 1, 'Vegetables': 1}))


if __name__ == "__main__":
run_tests()
141 changes: 141 additions & 0 deletions data/clean/f_934_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import ast
import json
from collections import Counter


def f_934(file_pointer):
"""
Reads from a given file pointer to a JSON file, evaluates strings that represent dictionaries to actual dictionaries,
and counts the frequency of each key across all dictionary entries in the JSON data.
Parameters:
file_pointer (file object): An open file object pointing to the JSON file containing the data. This file should
already be opened in the correct mode (e.g., 'r' for reading).
Returns:
collections.Counter: A Counter object representing the frequency of each key found in the dictionaries.
Requirements:
- ast
- json
- collections.Counter
Note:
This function assumes the input JSON data is a list of dictionaries or strings that can be evaluated as dictionaries.
Example:
>>> with open("data.json", "r") as file:
>>> key_frequency = f_934(file)
>>> print(key_frequency)
Counter({'name': 5, 'age': 5, 'city': 3})
"""

data = json.load(file_pointer)
key_frequency_counter = Counter()

for item in data:
if isinstance(item, str):
try:
item = ast.literal_eval(item)
except ValueError:
continue

if isinstance(item, dict):
key_frequency_counter.update(item.keys())

return key_frequency_counter


import unittest
from io import BytesIO
from collections import Counter
import json


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)


class TestCases(unittest.TestCase):
def test_with_dicts(self):
# Simulate a JSON file containing dictionaries
data = json.dumps([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Jake"}]).encode('utf-8')
json_file = BytesIO(data)

# Expected result is a Counter object with the frequency of each key
expected = Counter({'name': 3, 'age': 2})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_string_repr_dicts(self):
# Simulate a JSON file containing string representations of dictionaries
data = json.dumps(['{"city": "New York"}', '{"city": "Los Angeles", "temp": 75}']).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'city': 2, 'temp': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_invalid_json(self):
# Simulate an invalid JSON file
data = b'invalid json'
json_file = BytesIO(data)

# In this case, the function should either return an empty Counter or raise a specific exception
# Depending on how you've implemented error handling in your function, adjust this test accordingly
with self.assertRaises(json.JSONDecodeError):
f_934(json_file)

def test_empty_json(self):
# Simulate an empty JSON file
data = json.dumps([]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter()
result = f_934(json_file)
self.assertEqual(result, expected)

def test_mixed_valid_invalid_dicts(self):
# Simulate a JSON file with a mix of valid and invalid dictionary strings
data = json.dumps(['{"name": "John"}', 'Invalid', '{"age": 30}']).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'name': 1, 'age': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_nested_dicts(self):
# Simulate a JSON file containing nested dictionaries (should only count top-level keys)
data = json.dumps([{"person": {"name": "John", "age": 30}}, {"person": {"city": "New York"}}]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'person': 2})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_with_actual_json_objects_instead_of_strings(self):
# Simulate a JSON file with actual JSON objects (dictionaries) instead of string representations
data = json.dumps([{"key1": "value1"}, {"key2": "value2", "key3": "value3"}]).encode('utf-8')
json_file = BytesIO(data)

expected = Counter({'key1': 1, 'key2': 1, 'key3': 1})
result = f_934(json_file)
self.assertEqual(result, expected)

def test_invalid_json_structure(self):
# Simulate a JSON file that is not a list
data = json.dumps({"not": "a list"}).encode('utf-8')
json_file = BytesIO(data)

# Depending on how you've implemented error handling, adjust this test accordingly
# Here we expect an error or a specific handling
with self.assertRaises(SyntaxError):
f_934(json_file)


if __name__ == "__main__":
run_tests()
142 changes: 142 additions & 0 deletions data/clean/f_935_zhihan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import ast
import os
import glob

# Constants
DIRECTORY = 'data'

def f_935(directory):
"""
Convert all Unicode string representations of dictionaries in all text files
in the specified directory to Python dictionaries.
Parameters:
directory (str): The path to the directory containing the text files.
Returns:
list: A list of dictionaries extracted from the text files.
Requirements:
- ast
- os
- glob
Example:
>>> f_935("sample_directory/")
[{'key1': 'value1'}, {'key2': 'value2'}]
Note:
Ensure that the text files in the directory contain valid Unicode string representations of dictionaries.
Raises:
- The function would raise a ValueError if there are text file(s) that have invalid dictionary representation
"""
path = os.path.join(directory, '*.txt')
files = glob.glob(path)

results = []
for file in files:
with open(file, 'r') as f:
for line in f:
results.append(ast.literal_eval(line.strip()))

return results

import unittest
import os
import ast
import shutil


def run_tests():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestCases))
runner = unittest.TextTestRunner()
runner.run(suite)

class TestCases(unittest.TestCase):
def setUp(self):
self.test_dir = 'testdir_f_935'
os.makedirs(self.test_dir, exist_ok=True)
self.sample_directory = 'testdir_f_935/sample_directory'
os.makedirs(self.sample_directory, exist_ok=True)
f = open(self.sample_directory+"/1.txt","w")
f.write("{'key1': 'value1'}")
f.close()
f = open(self.sample_directory+"/2.txt","w")
f.write("{'key2': 'value2', 'key3': 'value3'}")
f.close()
f = open(self.sample_directory+"/3.txt","w")
f.write("{'key4': 'value4'}")
f.close()
f = open(self.sample_directory+"/4.txt","w")
f.write("{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}")
f.close()
f = open(self.sample_directory+"/5.txt","w")
f.write("{'key8': 'value8'}")
f.close()
self.empty_directory = "testdir_f_935/empty_directory"
os.makedirs(self.empty_directory, exist_ok=True)
self.multi_line_directory = "testdir_f_935/multi_line_directory"
os.makedirs(self.multi_line_directory, exist_ok=True)
f = open(self.multi_line_directory+"/1.txt","w")
f.write("{'key1': 'value1'}\n{'key2': 'value2'}")
f.close()
self.mixed_directory = "testdir_f_935/mixed_directory"
os.makedirs(self.mixed_directory, exist_ok=True)
f = open(self.mixed_directory+"/1.txt","w")
f.write("invalid")
f.close()
self.invalid_directory = "testdir_f_935/invalid_directory"
os.makedirs(self.invalid_directory, exist_ok=True)
f = open(self.invalid_directory+"/1.txt","w")
f.write("invalid")
f.close()
f = open(self.invalid_directory+"/2.txt","w")
f.write("{'key1': 'value1'}")
f.close()


def tearDown(self):
# Clean up the test directory
shutil.rmtree(self.test_dir)

def test_case_1(self):
# Test with the sample directory
result = f_935(self.sample_directory)
expected_result = [
{'key1': 'value1'},
{'key2': 'value2', 'key3': 'value3'},
{'key4': 'value4'},
{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'},
{'key8': 'value8'}
]
for i in expected_result:
self.assertTrue(i in result)

def test_case_2(self):
# Test with an empty directory
result = f_935(self.empty_directory)
self.assertEqual(result, [])

def test_case_3(self):
# Test with a directory containing a text file without valid dictionary representation
with self.assertRaises(ValueError):
f_935(self.invalid_directory)

def test_case_4(self):
# Test with a directory containing multiple text files, some of which are invalid
with self.assertRaises(ValueError):
f_935(self.mixed_directory)

def test_case_5(self):
# Test with a directory containing a text file with multiple valid dictionary representations
result = f_935(self.multi_line_directory)
expected_result = [
{'key1': 'value1'},
{'key2': 'value2'}
]
self.assertEqual(result, expected_result)

if __name__ == "__main__":
run_tests()
Loading

0 comments on commit 4f3652c

Please sign in to comment.