-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update: zhihan data
- Loading branch information
Showing
115 changed files
with
8,322 additions
and
1,933 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import numpy as np | ||
from collections import Counter | ||
|
||
|
||
def f_933(list_of_tuples): | ||
""" | ||
Computes the sum of numeric values and counts the occurrences of categories in a list of tuples. | ||
Each tuple in the input list contains a numeric value and a category. This function calculates | ||
the sum of all the numeric values and also counts how many times each category appears in the list. | ||
Parameters: | ||
- list_of_tuples (list of tuple): A list where each tuple contains a numeric value and a category. | ||
Returns: | ||
- tuple: A 2-element tuple where the first element is the sum of the numeric values, and the | ||
second element is a dictionary with categories as keys and their counts as values. | ||
Requirements: | ||
- numpy | ||
- collections.Counter | ||
Example: | ||
>>> list_of_tuples = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')] | ||
>>> sum_of_values, category_counts = f_933(list_of_tuples) | ||
>>> print(sum_of_values) | ||
15 | ||
>>> print(category_counts) | ||
{'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1} | ||
""" | ||
|
||
numeric_values = [pair[0] for pair in list_of_tuples] | ||
categories = [pair[1] for pair in list_of_tuples] | ||
|
||
total_sum = np.sum(numeric_values) | ||
category_counts = Counter(categories) | ||
|
||
return total_sum, dict(category_counts) | ||
|
||
|
||
import unittest | ||
|
||
|
||
def run_tests(): | ||
suite = unittest.TestSuite() | ||
suite.addTest(unittest.makeSuite(TestCases)) | ||
runner = unittest.TextTestRunner() | ||
runner.run(suite) | ||
|
||
|
||
class TestCases(unittest.TestCase): | ||
def test_case_1(self): | ||
# Regular list of tuples with different categories | ||
input_data = [(5, 'Fruits'), (9, 'Vegetables'), (-1, 'Dairy'), (-2, 'Bakery'), (4, 'Meat')] | ||
sum_values, count_values = f_933(input_data) | ||
self.assertEqual(sum_values, 15) | ||
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1, 'Bakery': 1, 'Meat': 1}) | ||
|
||
def test_case_2(self): | ||
# List of tuples with all the same categories | ||
input_data = [(5, 'Fruits'), (9, 'Fruits'), (-1, 'Fruits'), (-2, 'Fruits')] | ||
sum_values, count_values = f_933(input_data) | ||
self.assertEqual(sum_values, 11) | ||
self.assertEqual(count_values, {'Fruits': 4}) | ||
|
||
def test_case_3(self): | ||
# List of tuples with all negative numeric values | ||
input_data = [(-5, 'Fruits'), (-9, 'Vegetables'), (-1, 'Dairy')] | ||
sum_values, count_values = f_933(input_data) | ||
self.assertEqual(sum_values, -15) | ||
self.assertEqual(count_values, {'Fruits': 1, 'Vegetables': 1, 'Dairy': 1}) | ||
|
||
def test_case_4(self): | ||
# Empty list | ||
input_data = [] | ||
sum_values, count_values = f_933(input_data) | ||
self.assertEqual(sum_values, 0) | ||
self.assertEqual(count_values, {}) | ||
|
||
def test_case_5(self): | ||
# List of tuples with mixed positive and negative numeric values for the same category | ||
input_data = [(5, 'Fruits'), (-5, 'Fruits'), (3, 'Fruits')] | ||
sum_values, count_values = f_933(input_data) | ||
self.assertEqual(sum_values, 3) | ||
self.assertEqual(count_values, {'Fruits': 3}) | ||
|
||
def test_empty_list(self): | ||
"""Test with an empty list.""" | ||
self.assertEqual(f_933([]), (0, {})) | ||
|
||
def test_all_negative_values(self): | ||
"""Test with all negative numeric values.""" | ||
list_of_tuples = [(-5, 'Fruits'), (-2, 'Vegetables')] | ||
self.assertEqual(f_933(list_of_tuples), (-7, {'Fruits': 1, 'Vegetables': 1})) | ||
|
||
def test_duplicate_categories(self): | ||
"""Test with duplicate categories.""" | ||
list_of_tuples = [(1, 'Fruits'), (2, 'Fruits'), (3, 'Vegetables')] | ||
self.assertEqual(f_933(list_of_tuples), (6, {'Fruits': 2, 'Vegetables': 1})) | ||
|
||
def test_single_tuple_in_list(self): | ||
"""Test with a single tuple in the list.""" | ||
list_of_tuples = [(10, 'Meat')] | ||
self.assertEqual(f_933(list_of_tuples), (10, {'Meat': 1})) | ||
|
||
def test_float_numeric_values(self): | ||
"""Test with non-integer numeric values (floats).""" | ||
list_of_tuples = [(1.5, 'Fruits'), (2.5, 'Vegetables')] | ||
self.assertEqual(f_933(list_of_tuples), (4.0, {'Fruits': 1, 'Vegetables': 1})) | ||
|
||
|
||
if __name__ == "__main__": | ||
run_tests() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import ast | ||
import json | ||
from collections import Counter | ||
|
||
|
||
def f_934(file_pointer): | ||
""" | ||
Reads from a given file pointer to a JSON file, evaluates strings that represent dictionaries to actual dictionaries, | ||
and counts the frequency of each key across all dictionary entries in the JSON data. | ||
Parameters: | ||
file_pointer (file object): An open file object pointing to the JSON file containing the data. This file should | ||
already be opened in the correct mode (e.g., 'r' for reading). | ||
Returns: | ||
collections.Counter: A Counter object representing the frequency of each key found in the dictionaries. | ||
Requirements: | ||
- ast | ||
- json | ||
- collections.Counter | ||
Note: | ||
This function assumes the input JSON data is a list of dictionaries or strings that can be evaluated as dictionaries. | ||
Example: | ||
>>> with open("data.json", "r") as file: | ||
>>> key_frequency = f_934(file) | ||
>>> print(key_frequency) | ||
Counter({'name': 5, 'age': 5, 'city': 3}) | ||
""" | ||
|
||
data = json.load(file_pointer) | ||
key_frequency_counter = Counter() | ||
|
||
for item in data: | ||
if isinstance(item, str): | ||
try: | ||
item = ast.literal_eval(item) | ||
except ValueError: | ||
continue | ||
|
||
if isinstance(item, dict): | ||
key_frequency_counter.update(item.keys()) | ||
|
||
return key_frequency_counter | ||
|
||
|
||
import unittest | ||
from io import BytesIO | ||
from collections import Counter | ||
import json | ||
|
||
|
||
def run_tests(): | ||
suite = unittest.TestSuite() | ||
suite.addTest(unittest.makeSuite(TestCases)) | ||
runner = unittest.TextTestRunner() | ||
runner.run(suite) | ||
|
||
|
||
class TestCases(unittest.TestCase): | ||
def test_with_dicts(self): | ||
# Simulate a JSON file containing dictionaries | ||
data = json.dumps([{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Jake"}]).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
# Expected result is a Counter object with the frequency of each key | ||
expected = Counter({'name': 3, 'age': 2}) | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_with_string_repr_dicts(self): | ||
# Simulate a JSON file containing string representations of dictionaries | ||
data = json.dumps(['{"city": "New York"}', '{"city": "Los Angeles", "temp": 75}']).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
expected = Counter({'city': 2, 'temp': 1}) | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_with_invalid_json(self): | ||
# Simulate an invalid JSON file | ||
data = b'invalid json' | ||
json_file = BytesIO(data) | ||
|
||
# In this case, the function should either return an empty Counter or raise a specific exception | ||
# Depending on how you've implemented error handling in your function, adjust this test accordingly | ||
with self.assertRaises(json.JSONDecodeError): | ||
f_934(json_file) | ||
|
||
def test_empty_json(self): | ||
# Simulate an empty JSON file | ||
data = json.dumps([]).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
expected = Counter() | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_mixed_valid_invalid_dicts(self): | ||
# Simulate a JSON file with a mix of valid and invalid dictionary strings | ||
data = json.dumps(['{"name": "John"}', 'Invalid', '{"age": 30}']).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
expected = Counter({'name': 1, 'age': 1}) | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_nested_dicts(self): | ||
# Simulate a JSON file containing nested dictionaries (should only count top-level keys) | ||
data = json.dumps([{"person": {"name": "John", "age": 30}}, {"person": {"city": "New York"}}]).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
expected = Counter({'person': 2}) | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_with_actual_json_objects_instead_of_strings(self): | ||
# Simulate a JSON file with actual JSON objects (dictionaries) instead of string representations | ||
data = json.dumps([{"key1": "value1"}, {"key2": "value2", "key3": "value3"}]).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
expected = Counter({'key1': 1, 'key2': 1, 'key3': 1}) | ||
result = f_934(json_file) | ||
self.assertEqual(result, expected) | ||
|
||
def test_invalid_json_structure(self): | ||
# Simulate a JSON file that is not a list | ||
data = json.dumps({"not": "a list"}).encode('utf-8') | ||
json_file = BytesIO(data) | ||
|
||
# Depending on how you've implemented error handling, adjust this test accordingly | ||
# Here we expect an error or a specific handling | ||
with self.assertRaises(SyntaxError): | ||
f_934(json_file) | ||
|
||
|
||
if __name__ == "__main__": | ||
run_tests() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import ast | ||
import os | ||
import glob | ||
|
||
# Constants | ||
DIRECTORY = 'data' | ||
|
||
def f_935(directory): | ||
""" | ||
Convert all Unicode string representations of dictionaries in all text files | ||
in the specified directory to Python dictionaries. | ||
Parameters: | ||
directory (str): The path to the directory containing the text files. | ||
Returns: | ||
list: A list of dictionaries extracted from the text files. | ||
Requirements: | ||
- ast | ||
- os | ||
- glob | ||
Example: | ||
>>> f_935("sample_directory/") | ||
[{'key1': 'value1'}, {'key2': 'value2'}] | ||
Note: | ||
Ensure that the text files in the directory contain valid Unicode string representations of dictionaries. | ||
Raises: | ||
- The function would raise a ValueError if there are text file(s) that have invalid dictionary representation | ||
""" | ||
path = os.path.join(directory, '*.txt') | ||
files = glob.glob(path) | ||
|
||
results = [] | ||
for file in files: | ||
with open(file, 'r') as f: | ||
for line in f: | ||
results.append(ast.literal_eval(line.strip())) | ||
|
||
return results | ||
|
||
import unittest | ||
import os | ||
import ast | ||
import shutil | ||
|
||
|
||
def run_tests(): | ||
suite = unittest.TestSuite() | ||
suite.addTest(unittest.makeSuite(TestCases)) | ||
runner = unittest.TextTestRunner() | ||
runner.run(suite) | ||
|
||
class TestCases(unittest.TestCase): | ||
def setUp(self): | ||
self.test_dir = 'testdir_f_935' | ||
os.makedirs(self.test_dir, exist_ok=True) | ||
self.sample_directory = 'testdir_f_935/sample_directory' | ||
os.makedirs(self.sample_directory, exist_ok=True) | ||
f = open(self.sample_directory+"/1.txt","w") | ||
f.write("{'key1': 'value1'}") | ||
f.close() | ||
f = open(self.sample_directory+"/2.txt","w") | ||
f.write("{'key2': 'value2', 'key3': 'value3'}") | ||
f.close() | ||
f = open(self.sample_directory+"/3.txt","w") | ||
f.write("{'key4': 'value4'}") | ||
f.close() | ||
f = open(self.sample_directory+"/4.txt","w") | ||
f.write("{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}") | ||
f.close() | ||
f = open(self.sample_directory+"/5.txt","w") | ||
f.write("{'key8': 'value8'}") | ||
f.close() | ||
self.empty_directory = "testdir_f_935/empty_directory" | ||
os.makedirs(self.empty_directory, exist_ok=True) | ||
self.multi_line_directory = "testdir_f_935/multi_line_directory" | ||
os.makedirs(self.multi_line_directory, exist_ok=True) | ||
f = open(self.multi_line_directory+"/1.txt","w") | ||
f.write("{'key1': 'value1'}\n{'key2': 'value2'}") | ||
f.close() | ||
self.mixed_directory = "testdir_f_935/mixed_directory" | ||
os.makedirs(self.mixed_directory, exist_ok=True) | ||
f = open(self.mixed_directory+"/1.txt","w") | ||
f.write("invalid") | ||
f.close() | ||
self.invalid_directory = "testdir_f_935/invalid_directory" | ||
os.makedirs(self.invalid_directory, exist_ok=True) | ||
f = open(self.invalid_directory+"/1.txt","w") | ||
f.write("invalid") | ||
f.close() | ||
f = open(self.invalid_directory+"/2.txt","w") | ||
f.write("{'key1': 'value1'}") | ||
f.close() | ||
|
||
|
||
def tearDown(self): | ||
# Clean up the test directory | ||
shutil.rmtree(self.test_dir) | ||
|
||
def test_case_1(self): | ||
# Test with the sample directory | ||
result = f_935(self.sample_directory) | ||
expected_result = [ | ||
{'key1': 'value1'}, | ||
{'key2': 'value2', 'key3': 'value3'}, | ||
{'key4': 'value4'}, | ||
{'key5': 'value5', 'key6': 'value6', 'key7': 'value7'}, | ||
{'key8': 'value8'} | ||
] | ||
for i in expected_result: | ||
self.assertTrue(i in result) | ||
|
||
def test_case_2(self): | ||
# Test with an empty directory | ||
result = f_935(self.empty_directory) | ||
self.assertEqual(result, []) | ||
|
||
def test_case_3(self): | ||
# Test with a directory containing a text file without valid dictionary representation | ||
with self.assertRaises(ValueError): | ||
f_935(self.invalid_directory) | ||
|
||
def test_case_4(self): | ||
# Test with a directory containing multiple text files, some of which are invalid | ||
with self.assertRaises(ValueError): | ||
f_935(self.mixed_directory) | ||
|
||
def test_case_5(self): | ||
# Test with a directory containing a text file with multiple valid dictionary representations | ||
result = f_935(self.multi_line_directory) | ||
expected_result = [ | ||
{'key1': 'value1'}, | ||
{'key2': 'value2'} | ||
] | ||
self.assertEqual(result, expected_result) | ||
|
||
if __name__ == "__main__": | ||
run_tests() |
Oops, something went wrong.