From e235bbb6b2269aaf1ef99d3479802fa4e1e7ab52 Mon Sep 17 00:00:00 2001 From: han hu <543160303@qq.com> Date: Sat, 4 May 2024 01:08:09 +1000 Subject: [PATCH 1/3] refine mings --- data/raw/f_433_ming.py | 4 +-- data/raw/f_444_ming.py | 2 +- data/raw/f_445_ming.py | 2 +- data/raw/f_455_ming.py | 14 ++++++----- data/raw/f_456_ming.py | 24 +++++++++--------- data/raw/f_457_ming.py | 55 ++++++++++++++++++++---------------------- data/raw/f_460_ming.py | 4 +-- data/raw/f_461_ming.py | 4 +-- data/raw/f_462_ming.py | 8 ++++-- data/raw/f_464_ming.py | 28 +++++++++++---------- data/raw/f_478_ming.py | 1 + data/raw/f_490_ming.py | 24 +++++++++--------- data/raw/f_491_ming.py | 14 +++++------ data/raw/f_492_ming.py | 16 ++++++------ data/raw/f_493_ming.py | 14 +++++------ data/raw/f_495_ming.py | 7 ++---- data/raw/f_502_ming.py | 4 +-- data/raw/f_503_ming.py | 4 +-- data/raw/f_505_ming.py | 21 ++++++++-------- data/raw/f_507_ming.py | 6 ++--- script/parse.py | 4 +++ 21 files changed, 134 insertions(+), 126 deletions(-) diff --git a/data/raw/f_433_ming.py b/data/raw/f_433_ming.py index 20b55a4b..72b78d9f 100644 --- a/data/raw/f_433_ming.py +++ b/data/raw/f_433_ming.py @@ -4,11 +4,11 @@ def f_433(df): """ - Encodes a Pandas DataFrame as a Base64 string. The DataFrame is first converted to CSV format, + Encodes a dict of list as a Base64 string. The dict is first converted to CSV format, then encoded to bytes, and finally encoded to a Base64 string. Parameters: - df (DataFrame): The pandas DataFrame to be encoded. + df (dict of list): A dictionary where the key 'Word' maps to a list of strings. Returns: str: The Base64 encoded string of the DataFrame's CSV representation. diff --git a/data/raw/f_444_ming.py b/data/raw/f_444_ming.py index b76ce835..b4cc6de8 100644 --- a/data/raw/f_444_ming.py +++ b/data/raw/f_444_ming.py @@ -8,7 +8,7 @@ def f_444(array_length=100): Generate two arrays of random integers and draw a line diagram with the maximum values of the respective elements of the two arrays. - Args: + Parameters: - array_length (int): Length of the random arrays to be generated. Default is 100. Returns: diff --git a/data/raw/f_445_ming.py b/data/raw/f_445_ming.py index b582f1fc..b2e4e10d 100644 --- a/data/raw/f_445_ming.py +++ b/data/raw/f_445_ming.py @@ -7,7 +7,7 @@ def f_445(array_length=100): Generate two arrays of random numbers of a given length, calculate their mean, median, and standard deviation, and draw a bar chart to compare these statistics. - Args: + Parameters: - array_length (int, optional): The length of the arrays to be generated. Default is 100. Returns: diff --git a/data/raw/f_455_ming.py b/data/raw/f_455_ming.py index aacac7db..94c2ea4f 100644 --- a/data/raw/f_455_ming.py +++ b/data/raw/f_455_ming.py @@ -5,17 +5,19 @@ # Constants SENSORS = ['Temperature', 'Humidity', 'Pressure'] -output_dir = './output' +OUTPUT_DIR = './output' -def f_455(hours, output_dir = output_dir): +def f_455(hours, output_dir = OUTPUT_DIR): """ Create sensor data for the specified number of hours and save it in a CSV file. Parameters: - hours (int): The number of hours for which sensor data is to be generated. + - output_dir (str, optional): The output file path Returns: - - str: The path of the generated CSV file. + - hours (int): Number of hours to generate data for. + Requirements: - datetime @@ -52,7 +54,7 @@ def f_455(hours, output_dir = output_dir): import os import shutil -FILE_PATH = os.path.join(output_dir, 'sensor_data.csv') +FILE_PATH = os.path.join(OUTPUT_DIR, 'sensor_data.csv') class TestCases(unittest.TestCase): @@ -62,8 +64,8 @@ def tearDown(self): # Check and remove the expected file if it exists # if os.path.exists(FILE_PATH): # os.remove(FILE_PATH) - if os.path.exists(output_dir): - shutil.rmtree(output_dir) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) def test_csv_file_creation(self): """Test if the CSV file is successfully created.""" diff --git a/data/raw/f_456_ming.py b/data/raw/f_456_ming.py index 4407a8e6..4e7bafdb 100644 --- a/data/raw/f_456_ming.py +++ b/data/raw/f_456_ming.py @@ -7,15 +7,17 @@ # Constants VEHICLE_TYPES = ['Car', 'Bus', 'Truck', 'Bike'] -output_dir = './output' +OUTPUT_DIR = './output' -def f_456(hours, output_dir = output_dir): + +def f_456(hours, output_dir=OUTPUT_DIR): """ Generates traffic data for different vehicle types over a specified number of hours, saves the data to a CSV file, and plots the data in a line chart. Parameters: - hours (int): Number of hours to generate data for. + - output_dir (str, optional): The output file path Returns: - tuple: Path to the CSV file and the matplotlib axes object of the line plot. @@ -68,24 +70,23 @@ def f_456(hours, output_dir = output_dir): import unittest from unittest.mock import patch import shutil -output_dir = './output' -FILE_PATH = os.path.join(output_dir, 'traffic_data.csv') +FILE_PATH = os.path.join(OUTPUT_DIR, 'traffic_data.csv') class TestCases(unittest.TestCase): def setUp(self): """Set up the environment for testing.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def tearDown(self): """Clean up any files created during the tests.""" # Check and remove the expected file if it exists # if os.path.exists(FILE_PATH): # os.remove(FILE_PATH) - if os.path.exists(output_dir): - shutil.rmtree(output_dir) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) @patch('matplotlib.pyplot.show') # Mock plt.show to not render plots @patch('csv.writer') # Mock csv.writer to not actually write files @@ -114,8 +115,8 @@ def test_empty_dataframe_on_zero_hours(self, mock_read_csv): @patch('os.path.exists', return_value=False) def test_directory_creation(self, mock_path_exists, mock_makedirs): """Ensure directory is created if it does not exist.""" - if os.path.exists(output_dir): - shutil.rmtree(output_dir) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) f_456(1) mock_makedirs.assert_called_with(os.path.dirname(FILE_PATH)) @@ -146,7 +147,6 @@ def run_tests(): if __name__ == "__main__": import doctest + doctest.testmod() run_tests() - - diff --git a/data/raw/f_457_ming.py b/data/raw/f_457_ming.py index d93cf8af..6eab2231 100644 --- a/data/raw/f_457_ming.py +++ b/data/raw/f_457_ming.py @@ -6,18 +6,20 @@ # Constants WEATHER_CONDITIONS = ['Sunny', 'Cloudy', 'Rainy', 'Snowy', 'Stormy'] -output_dir = './output' +OUTPUT_DIR = './output' -def f_457(hours, output_dir = output_dir): +def f_457(hours, output_dir=OUTPUT_DIR): """ - Generate weather data for the specified number of hours, save it in a CSV file and back up the file to a backup directory. + Generate weather data for the specified number of hours, save it in a CSV file with colomns 'Time' and 'Condition' + and back up the file to a backup directory. Parameters: - hours (int): The number of hours for which weather data is to be generated. - + - hours (int): The number of hours for which weather data is to be generated. + - output_dir (str, optional): The output file path + Returns: - str: The path of the generated CSV file. + - str: The path of the generated CSV file. Requirements: - datetime @@ -53,8 +55,8 @@ def f_457(hours, output_dir = output_dir): import unittest from unittest.mock import patch, mock_open -FILE_PATH = os.path.join(output_dir, 'weather_data.csv') -BACKUP_PATH = os.path.join(output_dir, 'backup/') +FILE_PATH = os.path.join(OUTPUT_DIR, 'weather_data.csv') +BACKUP_PATH = os.path.join(OUTPUT_DIR, 'backup/') class TestCases(unittest.TestCase): @@ -78,7 +80,7 @@ def tearDown(self): if os.path.exists(BACKUP_PATH): shutil.rmtree(BACKUP_PATH) - @patch('os.getcwd', return_value=output_dir) + @patch('os.getcwd', return_value=OUTPUT_DIR) @patch('os.path.exists', return_value=True) def test_f_457_checks_backup_directory_exists(self, mock_exists, mock_getcwd): """Test checking for the existence of the backup directory.""" @@ -89,7 +91,7 @@ def test_f_457_checks_backup_directory_exists(self, mock_exists, mock_getcwd): self.assertEqual(expected_call_path, actual_call_path, f"Expected {expected_call_path}, got {actual_call_path}") - @patch('os.getcwd', return_value=output_dir) + @patch('os.getcwd', return_value=OUTPUT_DIR) @patch('shutil.copy') def test_f_457_copies_to_backup_directory(self, mock_copy, mock_getcwd): """Test if f_457 copies the weather_data.csv file to the backup directory.""" @@ -100,26 +102,21 @@ def test_f_457_copies_to_backup_directory(self, mock_copy, mock_getcwd): self.assertEqual(expected_backup_dir, actual_backup_dir, "The backup directory path does not match the expected directory path.") - # @patch('os.makedirs') - # @patch('os.path.exists') - # @patch('builtins.open', new_callable=mock_open, read_data="Time,Condition\n") - # @patch('os.getcwd', return_value=output_dir) - # def test_f_457_writes_correct_header(self, mock_file_open, mock_exists, mock_makedirs, mock_getcwd): - # """Ensure f_457 writes the correct header to weather_data.csv.""" - # # create backup directory - # expected_header = "Time,Condition\n" - # f_457(1) - - # # Check all calls to write to ensure the expected header was written - # # Check all calls to write to ensure key components of the expected header were written - # header_components = ["Time", "Condition"] - # header_written = any( - # all(component in call_args.args[0] for component in header_components) - # for call_args in mock_file_open().write.call_args_list - # ) - - # self.assertTrue(header_written, "The expected header components were not written to the file.") + @patch('shutil.copy') + @patch('os.makedirs') + @patch('os.path.exists', return_value=True) + @patch('builtins.open', new_callable=mock_open, read_data="Time,Condition\n") + @patch('os.getcwd', return_value=OUTPUT_DIR) + def test_f_457_writes_correct_header(self, mock_getcwd, mock_file_open, mock_exists, mock_makedirs, mock_copy): + """Ensure f_457 writes the correct header to weather_data.csv.""" + f_457(1) + header_components = ["Time", "Condition"] + header_written = any( + all(component in call_args.args[0] for component in header_components) + for call_args in mock_file_open().write.call_args_list + ) + self.assertTrue(header_written, "The expected header components were not written to the file.") def test_backup_file_creation(self): """Test that the CSV file is correctly copied to the backup directory.""" diff --git a/data/raw/f_460_ming.py b/data/raw/f_460_ming.py index 19d1ddf8..b9649c84 100644 --- a/data/raw/f_460_ming.py +++ b/data/raw/f_460_ming.py @@ -4,12 +4,12 @@ def f_460(df, letter): """ - The function filters rows in a DataFrame in which the values of the 'Word' column begin with a specified letter. + The function filters rows in a dict of list in which the values of the 'Word' column begin with a specified letter. It then calculates the length of the words in the filtered column and returns a dictionary of word lengths and their respective counts. Parameters: - df (DataFrame): The input DataFrame. It should have a 'Word' column. + df (dict of list): A dictionary where the key 'Word' maps to a list of strings. letter (str): The letter to filter the 'Word' column by. Returns: diff --git a/data/raw/f_461_ming.py b/data/raw/f_461_ming.py index eabd8548..0c4934a8 100644 --- a/data/raw/f_461_ming.py +++ b/data/raw/f_461_ming.py @@ -3,11 +3,11 @@ def f_461(df, letter): """ - Filters rows in a DataFrame where values in the 'Word' column begin with the specified letter, + Filters rows in a dictionary where values in the 'Word' column begin with the specified letter, then calculates the length of the words in the filtered column and returns a histogram plot of the word lengths. Parameters: - - df (pd.DataFrame): The input DataFrame. Must have a 'Word' column with string values. + - df (dict of list): A dictionary where the key 'Word' maps to a list of strings. - letter (str): The letter to filter the 'Word' column by. It should be a lowercase letter. Returns: diff --git a/data/raw/f_462_ming.py b/data/raw/f_462_ming.py index 407cf6eb..615d8258 100644 --- a/data/raw/f_462_ming.py +++ b/data/raw/f_462_ming.py @@ -4,10 +4,12 @@ def f_462(df, letter): """ - The function filters rows in a DataFrame in which the values of a particular column start with a particular letter and then calculates the length of the words in the filtered column and returns basic statistics (mean, median, mode) of the word lengths. + The function filters rows in a dictionary of list in which the values of a particular column start with + a particular letter and then calculates the length of the words in the filtered column and returns basic + statistics (mean, median, mode) of the word lengths. Parameters: - df (DataFrame): The input DataFrame. It should have a 'Word' column. + df (dict of list): A dictionary where the key 'Word' maps to a list of strings. letter (str): The letter to filter the 'Word' column. Returns: @@ -38,12 +40,14 @@ def f_462(df, letter): import random from string import ascii_lowercase + def run_tests(): suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(TestCases)) runner = unittest.TextTestRunner() runner.run(suite) + class TestCases(unittest.TestCase): def setUp(self): word_list = [] diff --git a/data/raw/f_464_ming.py b/data/raw/f_464_ming.py index 0a0ca036..f5517992 100644 --- a/data/raw/f_464_ming.py +++ b/data/raw/f_464_ming.py @@ -3,7 +3,7 @@ # Constants LETTERS = list('abcdefghijklmnopqrstuvwxyz') -output_dir = './output' +OUTPUT_DIR = './output' def f_464(file_path): @@ -21,10 +21,10 @@ def f_464(file_path): - numpy Example: - >>> if not os.path.exists(output_dir): - ... os.mkdir(output_dir) - >>> f_464(os.path.join(output_dir, 'random_matrix.csv')) + >>> f_464(os.path.join(OUTPUT_DIR, 'random_matrix.csv')) """ + if not os.path.exists(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) matrix = pd.DataFrame(np.random.choice(LETTERS, (10, 10))) matrix.to_csv(file_path, sep='\t', header=False, index=False) @@ -34,30 +34,32 @@ def f_464(file_path): import unittest import shutil import os -if not os.path.exists(output_dir): - os.mkdir(output_dir) class TestCases(unittest.TestCase): + def setUp(self): + if not os.path.exists(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) + def tearDown(self): """Clean up any files created during the tests.""" # Check and remove the expected file if it exists # if os.path.exists(FILE_PATH): # os.remove(FILE_PATH) - if os.path.exists(output_dir): - shutil.rmtree(output_dir) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) def test_case_1(self): # Testing with a sample file path - file_path = os.path.join(output_dir, 'test_output_1.csv') + file_path = os.path.join(OUTPUT_DIR, 'test_output_1.csv') f_464(file_path) df = pd.read_csv(file_path, sep='\t', header=None) self.assertEqual(df.shape, (10, 10), "Matrix shape should be 10x10") def test_case_2(self): # Testing if the generated matrix contains only lowercase letters - file_path = os.path.join(output_dir, 'test_output_2.csv') + file_path = os.path.join(OUTPUT_DIR, 'test_output_2.csv') f_464(file_path) df = pd.read_csv(file_path, sep='\t', header=None) all_lower = df.applymap(str.islower).all().all() @@ -65,7 +67,7 @@ def test_case_2(self): def test_case_3(self): # Testing if the generated matrix contains only letters from the alphabet - file_path = os.path.join(output_dir, 'test_output_3.csv') + file_path = os.path.join(OUTPUT_DIR, 'test_output_3.csv') f_464(file_path) df = pd.read_csv(file_path, sep='\t', header=None) all_alpha = df.applymap(str.isalpha).all().all() @@ -73,7 +75,7 @@ def test_case_3(self): def test_case_4(self): # Testing if the generated matrix contains different letters - file_path = os.path.join(output_dir, 'test_output_4.csv') + file_path = os.path.join(OUTPUT_DIR, 'test_output_4.csv') f_464(file_path) df = pd.read_csv(file_path, sep='\t', header=None) unique_elements = df.nunique().sum() @@ -81,7 +83,7 @@ def test_case_4(self): def test_case_5(self): # Testing if the function overwrites existing files - file_path = os.path.join(output_dir, 'test_output_5.csv') + file_path = os.path.join(OUTPUT_DIR, 'test_output_5.csv') with open(file_path, 'w') as f: f.write("test") f_464(file_path) diff --git a/data/raw/f_478_ming.py b/data/raw/f_478_ming.py index ec7e4e4c..9f9a6437 100644 --- a/data/raw/f_478_ming.py +++ b/data/raw/f_478_ming.py @@ -18,6 +18,7 @@ def f_478(goals, penalties, rng_seed=None, teams=TEAMS): - goals (int): The maximum number of goals a team can score in a match. - penalties (int): The maximum number of penalties a team can receive in a match. - rng_seed (int, optional): Seed for the random number generator to ensure reproducibility. Defaults to None. + - teams (list of str, optional): List of team names to assign players Returns: - DataFrame: A pandas DataFrame containing teams, their goals, and penalty costs, along with the original match results. diff --git a/data/raw/f_490_ming.py b/data/raw/f_490_ming.py index c30654d1..1917189c 100644 --- a/data/raw/f_490_ming.py +++ b/data/raw/f_490_ming.py @@ -1,6 +1,6 @@ import os import time -output_dir = './output' +OUTPUT_DIR = './output' def f_490(dataset, filename): @@ -26,9 +26,9 @@ def f_490(dataset, filename): """ start_time = time.time() - if not os.path.exists(output_dir): - os.makedirs(output_dir) - filepath = os.path.join(output_dir, filename) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + filepath = os.path.join(OUTPUT_DIR, filename) with open(filepath, 'w', newline='') as f: for i, df in enumerate(dataset): if i > 0: @@ -53,43 +53,43 @@ class TestCases(unittest.TestCase): @classmethod def setUp(self): """Ensure the data directory exists before any tests are run.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def tearDown(self): """Clean up by removing the data directory and its contents after all tests.""" - shutil.rmtree(output_dir, ignore_errors=True) + shutil.rmtree(OUTPUT_DIR, ignore_errors=True) def test_single_dataframe(self): """Test with a single DataFrame.""" df = pd.DataFrame({"Column1": [1, 2], "Column2": [3, 4]}) f_490([df], 'single_dataframe.csv') - self.assertTrue(os.path.exists(os.path.join(output_dir, 'single_dataframe.csv'))) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'single_dataframe.csv'))) def test_multiple_dataframes(self): """Test with multiple DataFrames.""" df1 = pd.DataFrame({"A": [5, 6], "B": [7, 8]}) df2 = pd.DataFrame({"C": [9, 10], "D": [11, 12]}) f_490([df1, df2], 'multiple_dataframes.csv') - self.assertTrue(os.path.exists(os.path.join(output_dir, 'multiple_dataframes.csv'))) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'multiple_dataframes.csv'))) def test_empty_dataframe(self): """Test with an empty DataFrame.""" df = pd.DataFrame() f_490([df], 'empty_dataframe.csv') - self.assertTrue(os.path.exists(os.path.join(output_dir, 'empty_dataframe.csv'))) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'empty_dataframe.csv'))) def test_varying_row_counts(self): """Test with DataFrames having varying numbers of rows.""" df1 = pd.DataFrame({"E": [13], "F": [14]}) df2 = pd.DataFrame({"G": [15, 16, 17], "H": [18, 19, 20]}) f_490([df1, df2], 'varying_row_counts.csv') - self.assertTrue(os.path.exists(os.path.join(output_dir, 'varying_row_counts.csv'))) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'varying_row_counts.csv'))) def test_no_dataframes(self): """Test with no DataFrames provided.""" f_490([], 'no_dataframes.csv') - self.assertTrue(os.path.exists(os.path.join(output_dir, 'no_dataframes.csv'))) + self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'no_dataframes.csv'))) def run_tests(): diff --git a/data/raw/f_491_ming.py b/data/raw/f_491_ming.py index 180abe24..4ef7d685 100644 --- a/data/raw/f_491_ming.py +++ b/data/raw/f_491_ming.py @@ -1,6 +1,6 @@ import pandas as pd import os -output_dir = './output' +OUTPUT_DIR = './output' def f_491(df, filename): @@ -26,9 +26,9 @@ def f_491(df, filename): >>> 'data.json' in f_491(df, 'data.json') True """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - file_path = os.path.join(output_dir, filename) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + file_path = os.path.join(OUTPUT_DIR, filename) df_clean = df.where(pd.notnull(df), None) with open(file_path, 'w') as f: df_clean.to_json(f, orient='records') @@ -44,12 +44,12 @@ class TestCases(unittest.TestCase): @classmethod def setUp(self): """Set up testing environment; ensure data directory exists.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def tearDown(self): """Clean up; remove the data directory and its contents after tests.""" - shutil.rmtree(output_dir, ignore_errors=True) + shutil.rmtree(OUTPUT_DIR, ignore_errors=True) def test_basic_dataframe(self): """Test saving a simple DataFrame.""" diff --git a/data/raw/f_492_ming.py b/data/raw/f_492_ming.py index 683cfb7f..637ed823 100644 --- a/data/raw/f_492_ming.py +++ b/data/raw/f_492_ming.py @@ -1,6 +1,6 @@ import csv import os -output_dir = './output' +OUTPUT_DIR = './output' def f_492(df, filename): @@ -29,10 +29,10 @@ def f_492(df, filename): True """ # Ensure the data directory exists - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) - file_path = os.path.join(output_dir, filename) + file_path = os.path.join(OUTPUT_DIR, filename) df.to_csv(file_path, index=False, quoting=csv.QUOTE_NONNUMERIC) return os.path.abspath(file_path) @@ -46,17 +46,17 @@ class TestCases(unittest.TestCase): @classmethod def setUp(self): """Create the data directory if it doesn't exist.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def tearDown(self): """Clean up by removing files created during tests (if any).""" - shutil.rmtree(output_dir, ignore_errors=True) + shutil.rmtree(OUTPUT_DIR, ignore_errors=True) def test_basic_dataframe(self): """Test saving a simple DataFrame.""" df = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) - expected_path = os.path.join(output_dir, 'basic.csv') + expected_path = os.path.join(OUTPUT_DIR, 'basic.csv') result_path = f_492(df, 'basic.csv') self.assertEqual(expected_path[expected_path.rindex('/') + 1:], result_path[result_path.rindex('/') + 1: ]) self.assertTrue(os.path.exists(result_path)) diff --git a/data/raw/f_493_ming.py b/data/raw/f_493_ming.py index 377eb5c5..aa5e2dcc 100644 --- a/data/raw/f_493_ming.py +++ b/data/raw/f_493_ming.py @@ -1,6 +1,6 @@ import pandas as pd import time -output_dir = './output' +OUTPUT_DIR = './output' def f_493(df: pd.DataFrame, filename: str) -> str: @@ -25,10 +25,10 @@ def f_493(df: pd.DataFrame, filename: str) -> str: """ start_time = time.time() # Ensure the data directory exists - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) - file_path = os.path.join(output_dir, filename) + file_path = os.path.join(OUTPUT_DIR, filename) # Save DataFrame as JSON Lines with open(file_path, 'w') as file: @@ -51,12 +51,12 @@ class TestCases(unittest.TestCase): @classmethod def setUp(self): """Create the data directory if it doesn't exist.""" - if not os.path.exists(output_dir): - os.makedirs(output_dir) + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def tearDown(self): """Clean up by removing the data directory and its contents after tests.""" - shutil.rmtree(output_dir, ignore_errors=True) + shutil.rmtree(OUTPUT_DIR, ignore_errors=True) def test_basic_dataframe(self): """Ensure basic DataFrame is saved correctly.""" diff --git a/data/raw/f_495_ming.py b/data/raw/f_495_ming.py index c488ebe8..bab77b4d 100644 --- a/data/raw/f_495_ming.py +++ b/data/raw/f_495_ming.py @@ -12,13 +12,10 @@ def f_495(input_list: list, repetitions: int) -> Any: - Flattens the list with multiple repetitions. - Calculates the mode of the flattened list. - Input: + Parameters: - input_list (list): A list containing elements (can be of any hashable type). - repetitions (int): The number of times the original list should be repeated. - - Output: - - Returns a ModeResult object from scipy.stats containing the mode(s) and count(s). - + Requirements: - typing - itertools diff --git a/data/raw/f_502_ming.py b/data/raw/f_502_ming.py index 4ddaf628..52f02ce8 100644 --- a/data/raw/f_502_ming.py +++ b/data/raw/f_502_ming.py @@ -41,12 +41,12 @@ def f_502(pattern: str, directory: str, output_csv: str) -> pd.DataFrame: import unittest import shutil -output_dir = './output' +OUTPUT_DIR = './output' class TestCases(unittest.TestCase): def setUp(self): - self.test_dir = output_dir + self.test_dir = OUTPUT_DIR if not os.path.exists(self.test_dir): os.makedirs(self.test_dir) diff --git a/data/raw/f_503_ming.py b/data/raw/f_503_ming.py index 1a1b7a33..95140aa8 100644 --- a/data/raw/f_503_ming.py +++ b/data/raw/f_503_ming.py @@ -1,7 +1,7 @@ import binascii import hashlib import re -output_dir = './output' +OUTPUT_DIR = './output' def f_503(directory: str, pattern: str = r"(? dict: @@ -45,7 +45,7 @@ def f_503(directory: str, pattern: str = r"(? dic class TestCases(unittest.TestCase): def setUp(self): - self.test_dir = output_dir + self.test_dir = OUTPUT_DIR if not os.path.exists(self.test_dir): os.makedirs(self.test_dir) diff --git a/data/raw/f_505_ming.py b/data/raw/f_505_ming.py index 7f19f55d..d9d61074 100644 --- a/data/raw/f_505_ming.py +++ b/data/raw/f_505_ming.py @@ -40,13 +40,14 @@ def f_505(filename, data, password): return encrypted + import unittest import os import shutil -output_dir = './output' -if not os.path.exists(output_dir): - os.makedirs(output_dir) +OUTPUT_DIR = './output' +if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) def run_tests(): @@ -63,12 +64,12 @@ def tearDown(self): # Check and remove the expected file if it exists # if os.path.exists(FILE_PATH): # os.remove(FILE_PATH) - if os.path.exists(output_dir): - shutil.rmtree(output_dir) + if os.path.exists(OUTPUT_DIR): + shutil.rmtree(OUTPUT_DIR) def test_case_1(self): # Testing basic encryption and file write - file1 = os.path.join(output_dir, 'test1.txt') + file1 = os.path.join(OUTPUT_DIR, 'test1.txt') encrypted = f_505(file1, 'Hello, World!', 'password123') with open(file1, 'r') as f: file_content = f.read() @@ -76,7 +77,7 @@ def test_case_1(self): def test_case_2(self): # Testing with different data and password - file2 = os.path.join(output_dir, 'test2.txt') + file2 = os.path.join(OUTPUT_DIR, 'test2.txt') encrypted = f_505(file2, 'OpenAI', 'secret') with open(file2, 'r') as f: file_content = f.read() @@ -84,7 +85,7 @@ def test_case_2(self): def test_case_3(self): # Testing with special characters in data and password - file3 = os.path.join(output_dir, 'test3.txt') + file3 = os.path.join(OUTPUT_DIR, 'test3.txt') data = '!@#$%^&*()_+' password = 'special_chars' encrypted = f_505(file3, data, password) @@ -94,7 +95,7 @@ def test_case_3(self): def test_case_4(self): # Testing file creation if it doesn't exist - file4 = os.path.join(output_dir, 'nonexistent_file.txt') + file4 = os.path.join(OUTPUT_DIR, 'nonexistent_file.txt') if os.path.exists(file4): os.remove(file4) encrypted = f_505(file4, 'Test Data', 'pwd') @@ -102,7 +103,7 @@ def test_case_4(self): def test_case_5(self): # Testing decryption to ensure encryption is reversible - file5 = os.path.join(output_dir, 'test5.txt') + file5 = os.path.join(OUTPUT_DIR, 'test5.txt') data = 'Decryption Test' password = 'decrypt_pwd' encrypted = f_505(file5, data, password) diff --git a/data/raw/f_507_ming.py b/data/raw/f_507_ming.py index acac7446..4d553b62 100644 --- a/data/raw/f_507_ming.py +++ b/data/raw/f_507_ming.py @@ -1,9 +1,9 @@ import os import pandas as pd from dateutil.parser import parse -output_dir = './output' +OUTPUT_DIR = './output' -def f_507(csv_path=os.path.join(output_dir, 'data.csv'), date_column='date'): +def f_507(csv_path=os.path.join(OUTPUT_DIR, 'data.csv'), date_column='date'): """ Read a CSV file, convert a column of date strings into datetime objects, and draw a histogram of the year distribution of these dates. @@ -47,7 +47,7 @@ def f_507(csv_path=os.path.join(output_dir, 'data.csv'), date_column='date'): class TestCases(unittest.TestCase): def setUp(self): - self.output_dir = './output' + self.output_dir = OUTPUT_DIR if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) diff --git a/script/parse.py b/script/parse.py index 7e46d5af..ba81eadd 100644 --- a/script/parse.py +++ b/script/parse.py @@ -9,6 +9,10 @@ from glob import glob from pprint import pprint from tqdm import tqdm +import folium +import geopy +import librosa +import Crypto def extract_apis(code): From 831e02c1fa31343e7b45d926e6ac97047a1cfd1b Mon Sep 17 00:00:00 2001 From: han hu <543160303@qq.com> Date: Sun, 5 May 2024 01:46:41 +1000 Subject: [PATCH 2/3] refine mings --- data/raw/f_427_ming.py | 42 ++++++++++++-------- data/raw/f_429_ming.py | 45 ++++++++++----------- data/raw/f_430_ming.py | 57 ++++++++++++++------------ data/raw/f_433_ming.py | 5 ++- data/raw/f_434_ming.py | 63 ++++++++++++++++------------- data/raw/f_436_ming.py | 90 ++++++++++++++++++++---------------------- data/raw/f_437_ming.py | 4 +- data/raw/f_440_ming.py | 4 +- data/raw/f_441_ming.py | 3 +- data/raw/f_443_ming.py | 5 ++- data/raw/f_444_ming.py | 46 +++++++++++---------- data/raw/f_445_ming.py | 5 ++- data/raw/f_446_ming.py | 25 ++++++------ data/raw/f_447_ming.py | 15 ++++--- data/raw/f_448_ming.py | 14 +++---- data/raw/f_450_ming.py | 48 +++++++++++----------- data/raw/f_451_ming.py | 66 ++++++++++++++++++------------- data/raw/f_455_ming.py | 5 ++- data/raw/f_456_ming.py | 4 +- data/raw/f_459_ming.py | 5 ++- data/raw/f_460_ming.py | 5 ++- data/raw/f_461_ming.py | 4 +- data/raw/f_462_ming.py | 6 +-- data/raw/f_463_ming.py | 1 + data/raw/f_464_ming.py | 9 +++-- data/raw/f_466_ming.py | 3 +- data/raw/f_467_ming.py | 42 ++++++++------------ data/raw/f_473_ming.py | 15 ++++--- data/raw/f_474_ming.py | 5 ++- data/raw/f_475_ming.py | 3 +- data/raw/f_476_ming.py | 3 +- data/raw/f_477_ming.py | 5 ++- data/raw/f_478_ming.py | 4 +- data/raw/f_479_ming.py | 3 +- data/raw/f_481_ming.py | 3 +- data/raw/f_483_ming.py | 10 ++--- data/raw/f_484_ming.py | 2 +- data/raw/f_488_ming.py | 8 ++-- data/raw/f_489_ming.py | 3 +- data/raw/f_490_ming.py | 9 +++-- data/raw/f_491_ming.py | 13 +++--- data/raw/f_492_ming.py | 13 +++--- data/raw/f_494_ming.py | 2 +- data/raw/f_496_ming.py | 22 +++-------- data/raw/f_497_ming.py | 9 ++--- data/raw/f_498_ming.py | 13 +++--- data/raw/f_499_ming.py | 3 +- data/raw/f_502_ming.py | 4 +- data/raw/f_503_ming.py | 2 +- data/raw/f_504_ming.py | 8 ++-- data/raw/f_505_ming.py | 10 ++--- data/raw/f_506_ming.py | 4 +- data/raw/f_508_ming.py | 3 +- data/raw/f_509_ming.py | 2 +- data/raw/f_510_ming.py | 6 ++- data/raw/f_512_ming.py | 11 +++--- data/raw/f_513_ming.py | 25 ++++++------ data/raw/f_516_ming.py | 1 - data/raw/f_518_ming.py | 2 +- data/raw/f_522_ming.py | 3 +- data/raw/f_523_ming.py | 2 +- data/raw/f_524_ming.py | 3 +- data/raw/f_525_ming.py | 3 +- script/run.sh | 2 +- 64 files changed, 453 insertions(+), 407 deletions(-) diff --git a/data/raw/f_427_ming.py b/data/raw/f_427_ming.py index b9bb6cc1..00585413 100644 --- a/data/raw/f_427_ming.py +++ b/data/raw/f_427_ming.py @@ -54,26 +54,34 @@ def run_tests(): class TestCases(unittest.TestCase): - def test_case_1(self): - result = f_427(['1a2b3c4d', '5e6f7g8h']) - self.assertEqual(result, '426614caa490f2c185aebf58f1d4adac') - def test_case_2(self): + def test_normal_functionality(self): + """Test the function with default parameters.""" result = f_427() - self.assertEqual(result, 'aa1f8c53e0aee57fccd07b90a902579a') - - def test_case_3(self): - result = f_427(['12121212', '34343434']) - self.assertEqual(result, 'b523721fccb8fe2e7bf999e74e25056f') - - def test_case_4(self): - result = f_427(['1VVVVVVV', '3VVVVVVV', 'F3fF3fF3']) - self.assertEqual(result, 'fae7b34f299d23a584fbc19c2fcdf865') - - def test_case_5(self): - # test error message + self.assertIsInstance(result, str) + + def test_custom_keys_list(self): + """Test the function with a custom list of hexadecimal keys.""" + custom_keys = ['1A2FC614', '1B0FC614', '1C9FC614'] + result = f_427(hex_keys=custom_keys) + self.assertIsInstance(result, str) + + def test_empty_key_list(self): + """Test the function with an empty list to check for error handling.""" + with self.assertRaises(IndexError): + f_427(hex_keys=[]) + + def test_invalid_hexadecimal(self): + """Test the function with an invalid hexadecimal string.""" + invalid_keys = ['ZZZ', '4A0FC614'] with self.assertRaises(ValueError): - f_427(['1a2b3c4d', '5e6f7g8h', 'invalid_hex']) + f_427(hex_keys=invalid_keys) + + def test_consistent_output_with_same_seed(self): + """Test that the same seed returns the same result.""" + result1 = f_427(seed=99) + result2 = f_427(seed=99) + self.assertEqual(result1, result2) if __name__ == "__main__": diff --git a/data/raw/f_429_ming.py b/data/raw/f_429_ming.py index 8ff83f99..daf0506c 100644 --- a/data/raw/f_429_ming.py +++ b/data/raw/f_429_ming.py @@ -33,34 +33,34 @@ def f_429(hex_string=KEY): class TestCases(unittest.TestCase): - def test_case_1(self): - # Test with default key + def test_default_functionality(self): + """Test the function with default parameters.""" result = f_429() - self.assertEqual(result, b'x\x9c\xf3\xeb\x93\xef\x01\x00\x03\xb0\x01\x88') + self.assertIsInstance(result, bytes) - def test_case_2(self): - # Test with a different hex string - hex_string = "ABCD12" + def test_valid_custom_hex_string(self): + """Test the function with a valid custom hexadecimal string.""" + hex_string = '1A2FC614' # Example hex string result = f_429(hex_string) - self.assertEqual(result, b'x\x9c\xf3\xd6>+\x04\x00\x03]\x01V') + self.assertIsInstance(result, bytes) - def test_case_3(self): - # Test with another different hex string - hex_string = "DEADBEEF" - result = f_429(hex_string) - self.assertEqual(result, b'x\x9c\xf3\x8f[\xbb\x1f\x00\x04s\x02\x1a') + def test_invalid_hex_string(self): + """Test the function with an invalid hexadecimal string.""" + with self.assertRaises(ValueError): + f_429(hex_string='ZZZZZZZZ') - def test_case_4(self): - # Test with a hex string that has a smaller length - hex_string = "00AA" - result = f_429(hex_string) - self.assertEqual(result, b'x\x9cs\xd6b`\x00\x00\x01\x8e\x00n') + def test_boundary_hex_value(self): + """Test the function with a large boundary hexadecimal value.""" + boundary_hex = 'FFFFFFFF' # Maximum float value before overflow in some contexts + result = f_429(boundary_hex) + self.assertIsInstance(result, bytes) + + def test_zero_value(self): + """Test the function with a hex string representing zero.""" + zero_hex = '00000000' + result = f_429(zero_hex) + self.assertIsInstance(result, bytes) - def test_case_5(self): - # Test with a hex string that has a larger length - hex_string = "00AABBCCDDEE" - result = f_429(hex_string) - self.assertEqual(result, b'x\x9c\x0b\xd6\xda}\x16\x00\x04\x11\x02\x06') def run_tests(): suite = unittest.TestSuite() @@ -68,6 +68,7 @@ def run_tests(): runner = unittest.TextTestRunner() runner.run(suite) + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/data/raw/f_430_ming.py b/data/raw/f_430_ming.py index 11fa92a9..24df3fb8 100644 --- a/data/raw/f_430_ming.py +++ b/data/raw/f_430_ming.py @@ -40,35 +40,40 @@ def run_tests(): runner = unittest.TextTestRunner() runner.run(suite) + class TestCases(unittest.TestCase): - # Utility function to decode bytes and convert to float - def bytes_to_float(self, byte_val): - return float(codecs.decode(byte_val, 'utf-8')) - def test_case_1(self): - random.seed(42) + def test_default_functionality(self): + """Test the function with default parameters.""" result = f_430() - self.assertEqual(result, b'36806.078125') - - def test_case_2(self): - result = f_430(['5D7FC614']) - self.assertEqual(result, b'1.1519025322058056e+18') - - def test_case_3(self): - # Checking consistency over multiple runs - random.seed(0) - result = f_430(['ABCD1234', 'DEADBEEF', '00AABEEF']) - self.assertEqual(result, b'-6.259853398707798e+18') - - def test_case_4(self): - result = f_430(['00000000']) - self.assertEqual(result, b'0.0') - - def test_case_5(self): - # Checking the decoding process - result = f_430(['AAAAAAAA']) - self.assertEqual(result, b'-3.0316488252093987e-13') - + self.assertIsInstance(result, bytes) # Check if output is correctly encoded in UTF-8 + + def test_custom_hex_keys(self): + """Test the function with a custom list of hexadecimal keys.""" + custom_keys = ['1A2FC614', '1B0FC614', '1C9FC614'] + result = f_430(hex_keys=custom_keys) + self.assertIsInstance(result, bytes) + + def test_empty_list(self): + """Test the function with an empty list.""" + with self.assertRaises(IndexError): # Assuming random.choice will raise IndexError on empty list + f_430(hex_keys=[]) + + def test_consistency_of_output(self): + """Ensure that the output is consistent with a fixed seed.""" + random.seed(42) # Set the seed for predictability + first_result = f_430() + random.seed(42) # Reset seed to ensure same choice is made + second_result = f_430() + self.assertEqual(first_result, second_result) + + def test_invalid_hex_key(self): + """Test with an invalid hex key.""" + invalid_keys = ['ZZZZZZZZ', 'XXXX'] + with self.assertRaises(ValueError): + f_430(hex_keys=invalid_keys) + + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/data/raw/f_433_ming.py b/data/raw/f_433_ming.py index 72b78d9f..a4569d87 100644 --- a/data/raw/f_433_ming.py +++ b/data/raw/f_433_ming.py @@ -4,8 +4,8 @@ def f_433(df): """ - Encodes a dict of list as a Base64 string. The dict is first converted to CSV format, - then encoded to bytes, and finally encoded to a Base64 string. + Encodes a dict of list as a Base64 string. The dict is first converted to a Pandas DataFrame. + Then convert the data franme to CSV format and encoded to bytes, finally encoded it to a Base64 string. Parameters: df (dict of list): A dictionary where the key 'Word' maps to a list of strings. @@ -37,6 +37,7 @@ def f_433(df): import unittest from io import StringIO + class TestCases(unittest.TestCase): def test_encode_basic_dataframe(self): df = {'A': [1, 2, 3], 'B': [4, 5, 6]} diff --git a/data/raw/f_434_ming.py b/data/raw/f_434_ming.py index f5c61454..9e3710c6 100644 --- a/data/raw/f_434_ming.py +++ b/data/raw/f_434_ming.py @@ -5,7 +5,7 @@ def f_434(list_of_menuitems): """ Given a nested list of menu items, this function flattens the list and returns a Pandas DataFrame - detailing the count of each individual menu item. + detailing the count of each individual menu item with index name 'MenuItem'. Parameters: list_of_menuitems (list): A nested list of menu items. @@ -44,33 +44,42 @@ def run_tests(): runner = unittest.TextTestRunner() runner.run(suite) + class TestCases(unittest.TestCase): - def test_case_1(self): - result = f_434([['Pizza', 'Burger'], ['Pizza', 'Coke'], ['Pasta', 'Coke']]) - expected_result = pd.DataFrame({'Count': [2, 1, 2, 1]}, - index=pd.Index(['Pizza', 'Burger', 'Coke', 'Pasta'], name='MenuItem')) - pd.testing.assert_frame_equal(result, expected_result) - - def test_case_2(self): - result = f_434([['Bread', 'Butter'], ['Bread', 'Jam'], ['Bread', 'Jam'], ['Butter', 'Jam']]) - expected_result = pd.DataFrame({'Count': [3, 2, 3]}, - index=pd.Index(['Bread', 'Butter', 'Jam'], name='MenuItem')) - pd.testing.assert_frame_equal(result, expected_result) - - def test_case_3(self): - result = f_434([['Tea', 'Coffee'], ['Tea', 'Milk'], ['Coffee', 'Milk']]) - expected_result = pd.DataFrame({'Count': [2, 2, 2]}, index=pd.Index(['Tea', 'Coffee', 'Milk'], name='MenuItem')) - pd.testing.assert_frame_equal(result, expected_result) - - def test_case_4(self): - result = f_434([['Sandwich'], ['Sandwich', 'Juice'], ['Coffee']]) - expected_result = pd.DataFrame({'Count': [2, 1, 1]}, - index=pd.Index(['Sandwich', 'Juice', 'Coffee'], name='MenuItem')) - pd.testing.assert_frame_equal(result, expected_result) - - def test_case_5(self): - result = f_434([[], [], []]) - self.assertTrue(result.empty) + + def test_normal_functionality(self): + """Test the function with typical nested lists.""" + input_list = [['apple', 'banana'], ['apple'], ['banana', 'orange']] + expected_df = pd.DataFrame({'Count': [2, 2, 1]}, index=['apple', 'banana', 'orange']) + expected_df.index.name = 'MenuItem' + pd.testing.assert_frame_equal(f_434(input_list), expected_df) + + def test_empty_list(self): + """Test the function with an empty list.""" + expected_df = pd.DataFrame(columns=['Count']) + expected_df.index.name = 'MenuItem' + pd.testing.assert_frame_equal(f_434([]), expected_df) + + def test_single_level_list(self): + """Test with a non-nested, single-level list.""" + input_list = [['apple', 'banana', 'apple']] + expected_df = pd.DataFrame({'Count': [2, 1]}, index=['apple', 'banana']) + expected_df.index.name = 'MenuItem' + pd.testing.assert_frame_equal(f_434(input_list), expected_df) + + def test_uniform_list(self): + """Test with a list where all sublists contain the same item.""" + input_list = [['apple'], ['apple'], ['apple']] + expected_df = pd.DataFrame({'Count': [3]}, index=['apple']) + expected_df.index.name = 'MenuItem' + pd.testing.assert_frame_equal(f_434(input_list), expected_df) + + def test_duplicate_items_across_sublists(self): + """Ensure items appearing in multiple sublists are counted correctly.""" + input_list = [['apple', 'banana'], ['banana', 'banana', 'apple']] + expected_df = pd.DataFrame({'Count': [2, 3]}, index=['apple', 'banana']) + expected_df.index.name = 'MenuItem' + pd.testing.assert_frame_equal(f_434(input_list), expected_df) if __name__ == "__main__": diff --git a/data/raw/f_436_ming.py b/data/raw/f_436_ming.py index 30f0f74f..69fa7bac 100644 --- a/data/raw/f_436_ming.py +++ b/data/raw/f_436_ming.py @@ -3,20 +3,20 @@ import matplotlib.pyplot as plt # Constants -ITEMS = ['apple', 'banana', 'cherry', 'date', 'elderberry'] +ITEMS = ['apple', 'banana'] -def f_436(a, b): +def f_436(a, b, items=ITEMS): """ Combine two lists and record the frequency of predefined items in the combined list. Parameters: a (list): A list of items. b (list): Another list of items. + items (list, optional): a list of predefined items Returns: - matplotlib.axes.Axes: A bar chart showing the frequency of predefined items - ['apple', 'banana', 'cherry', 'date', 'elderberry'] in the combined list. + matplotlib.axes.Axes: A bar chart showing the frequency of predefined items in the combined list. Requirements: - collections @@ -33,11 +33,11 @@ def f_436(a, b): # Count occurrences of each item counter = collections.Counter(combined) # Get counts for predefined items - item_counts = [counter.get(item, 0) for item in ITEMS] + item_counts = [counter.get(item, 0) for item in items] # Create a bar plot fig, ax = plt.subplots() - ax.bar(ITEMS, item_counts, color='skyblue') + ax.bar(items, item_counts, color='skyblue') ax.set_xlabel('Items') ax.set_ylabel('Frequency') ax.set_title('Item Frequency in Combined List') @@ -50,54 +50,50 @@ def f_436(a, b): import unittest import matplotlib + class TestCases(unittest.TestCase): - def test_case_1(self): + def test_standard_functionality(self): + """Test with typical list inputs.""" a = ['apple', 'banana', 'cherry'] - b = ['date', 'elderberry', 'apple', 'banana', 'cherry'] - result = f_436(a, b) - self.assertIsInstance(result, matplotlib.axes.Axes) - heights = [rect.get_height() for rect in result.patches] - expected_heights = [2, 2, 2, 1, 1] - self.assertEqual(heights, expected_heights) - - def test_case_2(self): - a = [] - b = ['apple', 'apple', 'apple'] - result = f_436(a, b) - heights = [rect.get_height() for rect in result.patches] - expected_heights = [3, 0, 0, 0, 0] - self.assertEqual(heights, expected_heights) - - def test_case_3(self): - """Test the function with a list where some items have the same count.""" - a = ['banana', 'cherry', 'date'] - b = ['banana', 'cherry', 'date'] + b = ['banana', 'apple', 'apple', 'dragonfruit'] ax = f_436(a, b) - rects = ax.containers[0] - heights = [rect.get_height() for rect in rects] - expected_heights = [0, 2, 2, 2, 0] - self.assertEqual(heights, expected_heights) - - def test_case_4(self): - """Test the function with a list where one item appears multiple times.""" - a = ['elderberry', 'elderberry'] - b = ['elderberry'] + self.assertIsInstance(ax, plt.Axes) + + def test_empty_lists(self): + """Test with both lists empty.""" + a = [] + b = [] ax = f_436(a, b) - rects = ax.containers[0] - heights = [rect.get_height() for rect in rects] - expected_heights = [0, 0, 0, 0, 3] # Elderberry appears 3 times, others appear 0 times - self.assertEqual(heights, expected_heights) - - def test_case_5(self): - """Test the function with a single non-empty list and an empty list.""" - a = ['apple', 'banana', 'cherry', 'date', 'elderberry'] + self.assertIsInstance(ax, plt.Axes) + + def test_one_empty_list(self): + """Test with one list empty.""" + a = ['apple', 'apple'] b = [] ax = f_436(a, b) - rects = ax.containers[0] - heights = [rect.get_height() for rect in rects] - expected_heights = [1, 1, 1, 1, 1] # Each item appears once - self.assertEqual(heights, expected_heights) + self.assertIsInstance(ax, plt.Axes) + + def test_non_predefined_items_only(self): + """Test with lists containing non-predefined items.""" + a = ['cherry', 'dragonfruit'] + b = ['cherry', 'mango'] + ax = f_436(a, b) + self.assertIsInstance(ax, plt.Axes) + + def test_all_predefined_items(self): + """Test with lists containing only predefined items.""" + a = ['apple', 'apple'] + b = ['banana'] + ax = f_436(a, b) + self.assertIsInstance(ax, plt.Axes) + + def test_duplicate_items(self): + """Test with lists containing duplicate items.""" + a = ['apple', 'apple'] + b = ['apple', 'banana', 'banana'] + ax = f_436(a, b) + self.assertIsInstance(ax, plt.Axes) def run_tests(): diff --git a/data/raw/f_437_ming.py b/data/raw/f_437_ming.py index da7ec9b8..cd658ab5 100644 --- a/data/raw/f_437_ming.py +++ b/data/raw/f_437_ming.py @@ -8,7 +8,9 @@ def f_437(a, b): """ - Generate a pandas DataFrame with random values based on two lists and plot the DataFrame as a bar chart. + Generate a pandas DataFrame with random values based on lists 'a' and 'b', and plot it as a bar chart. + List 'a' sets the DataFrame's row indices, while the length of list 'b' determines the number of columns + using predefined names from the 'COLUMNS = ['A', 'B', 'C', 'D', 'E']' list. Parameters: - a (list): A list used to define the number of rows in the DataFrame. diff --git a/data/raw/f_440_ming.py b/data/raw/f_440_ming.py index 95729499..78dafd31 100644 --- a/data/raw/f_440_ming.py +++ b/data/raw/f_440_ming.py @@ -5,8 +5,8 @@ def f_440(a, b): """ - Calculate the Euclidean distance between two lists, create a Pandas DataFrame from these lists, - and then draw the values with a line displaying the Euclidean distance. + Calculate the Euclidean distance between two lists, create a Pandas DataFrame from these lists + with indices 'A' and 'B', and then draw the values with a line displaying the Euclidean distance. Parameters: a (list): A list of numbers. diff --git a/data/raw/f_441_ming.py b/data/raw/f_441_ming.py index d4706521..9497ae9e 100644 --- a/data/raw/f_441_ming.py +++ b/data/raw/f_441_ming.py @@ -5,7 +5,8 @@ def f_441(data): """ - Draw a bar chart with monthly data for a given year. + This function plots a bar chart of monthly data values for a single year, with 'month' on the x-axis and 'value' + on the y-axis. Parameters: data (str): The data string in the format 'yyyy-mm-value'. diff --git a/data/raw/f_443_ming.py b/data/raw/f_443_ming.py index 1fcbe20d..cfedc27e 100644 --- a/data/raw/f_443_ming.py +++ b/data/raw/f_443_ming.py @@ -4,7 +4,9 @@ def f_443(data): """ - Draw a histogram of the data. + This function draws a histogram to visualize the frequency distribution of numeric values provided in a string format, + with 'Value' on the x-axis, 'Frequency' on the y-axis and 'Histogram of Values' as the title. + Parameters: data (str): The data string in the format 'value-value-value-...'. @@ -39,6 +41,7 @@ def f_443(data): return ax + import unittest diff --git a/data/raw/f_444_ming.py b/data/raw/f_444_ming.py index b4cc6de8..a27a78ff 100644 --- a/data/raw/f_444_ming.py +++ b/data/raw/f_444_ming.py @@ -6,7 +6,7 @@ def f_444(array_length=100): """ Generate two arrays of random integers and draw a line diagram with the - maximum values of the respective elements of the two arrays. + maximum values of the respective elements of the two arrays. Set 'Maximum Values' on its y-axis. Parameters: - array_length (int): Length of the random arrays to be generated. Default is 100. @@ -50,26 +50,30 @@ def test_case_1(self): ax = f_444(50) self.assertIsInstance(ax, Axes) self.assertEqual(len(ax.lines[0].get_ydata()), 50) - - def test_case_2(self): - ax = f_444(100) - self.assertIsInstance(ax, Axes) - self.assertEqual(len(ax.lines[0].get_ydata()), 100) - - def test_case_3(self): - ax = f_444(150) - self.assertIsInstance(ax, Axes) - self.assertEqual(len(ax.lines[0].get_ydata()), 150) - - def test_case_4(self): - ax = f_444(200) - self.assertIsInstance(ax, Axes) - self.assertEqual(len(ax.lines[0].get_ydata()), 200) - - def test_case_5(self): - ax = f_444(250) - self.assertIsInstance(ax, Axes) - self.assertEqual(len(ax.lines[0].get_ydata()), 250) + + def test_standard_functionality(self): + """Test the function with default array length.""" + ax = f_444() + self.assertIsInstance(ax, plt.Axes) + + def test_zero_length_array(self): + """Test the function with zero array length.""" + ax = f_444(0) + self.assertIsInstance(ax, plt.Axes) + self.assertEqual(len(ax.lines[0].get_ydata()), 0) # Expect no data points in the plot + + def test_non_default_length_array(self): + """Test the function with non-default array lengths.""" + lengths = [50, 200] + for length in lengths: + ax = f_444(length) + self.assertIsInstance(ax, plt.Axes) + self.assertEqual(len(ax.lines[0].get_ydata()), length) + + def test_plot_output(self): + """Verify the plot is generated and is of correct type.""" + ax = f_444() + self.assertTrue(hasattr(ax, 'figure'), "Plot does not have associated figure attribute") if __name__ == "__main__": diff --git a/data/raw/f_445_ming.py b/data/raw/f_445_ming.py index b2e4e10d..ba691842 100644 --- a/data/raw/f_445_ming.py +++ b/data/raw/f_445_ming.py @@ -4,8 +4,9 @@ def f_445(array_length=100): ''' - Generate two arrays of random numbers of a given length, calculate their mean, median, and standard deviation, - and draw a bar chart to compare these statistics. + Generate two arrays of random numbers of a given length, calculate their mean, median, and standard deviation, + then store these results in a Panda DataFrame 'statistics' with keys 'Array1' and 'Array2'. + Draw a bar chart to compare these statistics with indices 'Mean', 'Median', and 'Standard Deviation'. Parameters: - array_length (int, optional): The length of the arrays to be generated. Default is 100. diff --git a/data/raw/f_446_ming.py b/data/raw/f_446_ming.py index af0b9d03..f1020394 100644 --- a/data/raw/f_446_ming.py +++ b/data/raw/f_446_ming.py @@ -59,20 +59,6 @@ def test_case_1(self): self.assertEqual(ax.get_ylabel(), 'y') self.assertTrue(ax.get_legend() is not None) - def test_case_2(self): - # Test with custom array_length and default noise_level - ax = f_446(array_length=50) - self.assertIsInstance(ax, plt.Axes) - x_data, _ = ax.lines[0].get_data() - self.assertEqual(len(x_data), 50) - - def test_case_3(self): - # Test with default array_length and custom noise_level - ax = f_446(noise_level=0.5) - self.assertIsInstance(ax, plt.Axes) - _, y_data = ax.lines[0].get_data() - self.assertTrue(np.max(np.abs(np.diff(y_data))) <= 0.5 + 1) # considering max amplitude of sine wave - def test_case_4(self): # Test with custom array_length and noise_level ax = f_446(array_length=150, noise_level=0.1) @@ -88,6 +74,17 @@ def test_case_5(self): _, y_data = ax.lines[0].get_data() self.assertTrue(np.max(np.abs(np.diff(y_data))) <= 2.0 + 1) # considering max amplitude of sine wave + def test_varying_noise_levels(self): + """Test the function with different noise levels.""" + for noise in [0, 0.1, 0.5]: + ax = f_446(noise_level=noise) + self.assertIsInstance(ax, plt.Axes) + + def test_plot_outputs(self): + """Check the output to confirm plot was created.""" + ax = f_446() + self.assertTrue(hasattr(ax, 'figure'), "Plot does not have associated figure attribute") + if __name__ == "__main__": import doctest diff --git a/data/raw/f_447_ming.py b/data/raw/f_447_ming.py index 47dffcf8..625d67ff 100644 --- a/data/raw/f_447_ming.py +++ b/data/raw/f_447_ming.py @@ -3,17 +3,17 @@ import numpy as np # Constants -ELEMENTS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] -N_GROUPS = 5 -def f_447(l): + +def f_447(l, n_groups = 5): """ Given a list `l`, this function shuffles the list, constructs a dataframe using the shuffled list, - and then for each row in the dataframe, moves the first N_GROUPS elements to the end of the same row. + and then for each row in the dataframe, moves the first n_groups elements to the end of the same row. Parameters: - l (list): A list of elements. + - n_groups (int): number of groups. Default value is 5. Returns: - DataFrame: A modified DataFrame constructed from the shuffled list. @@ -34,14 +34,17 @@ def f_447(l): return pd.DataFrame() shuffle(l) - df = pd.DataFrame([l for _ in range(N_GROUPS)]) + df = pd.DataFrame([l for _ in range(n_groups)]) # Ensure rolling does not aggregate rows into lists - df = df.apply(lambda row: np.roll(row, -N_GROUPS), axis=1, result_type='expand') + df = df.apply(lambda row: np.roll(row, -n_groups), axis=1, result_type='expand') return df import unittest +ELEMENTS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] +N_GROUPS = 5 + class TestCases(unittest.TestCase): def test_with_predefined_elements(self): diff --git a/data/raw/f_448_ming.py b/data/raw/f_448_ming.py index 36223dcb..a84d0693 100644 --- a/data/raw/f_448_ming.py +++ b/data/raw/f_448_ming.py @@ -1,19 +1,16 @@ from random import shuffle, randint import pandas as pd -# Constants -ELEMENTS = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] -N_GROUPS = 5 - -def f_448(l): +def f_448(l, n_groups = 5): """ Generate a Series from a list "l". The function shuffles the list, then creates a longer series by cycling through the shuffled list. - For each element in the series, it randomly selects "n" characters + For each element in the series, it randomly selects n_groups characters from the start of the string and moves them to the end. Parameters: - l (list): A list of strings. + - n_groups (int): number of groups. Default value is 5. Returns: - pd.Series: A Series where each element is modified by moving "n" @@ -41,7 +38,7 @@ def f_448(l): # Create the full list by applying the precomputed shifts modified_elements = [] - for _ in range(N_GROUPS): + for _ in range(n_groups): for element, (start, end) in zip(l, random_shifts): new_element = element[start:] + element[:end] if len(element) > 1 else element modified_elements.append(new_element) @@ -49,7 +46,10 @@ def f_448(l): # Convert the list to a Series return pd.Series(modified_elements) + import unittest +# Constants +N_GROUPS = 5 class TestCases(unittest.TestCase): def setUp(self): diff --git a/data/raw/f_450_ming.py b/data/raw/f_450_ming.py index 8ea27ade..61be0feb 100644 --- a/data/raw/f_450_ming.py +++ b/data/raw/f_450_ming.py @@ -55,37 +55,35 @@ def run_tests(): class TestCases(unittest.TestCase): - def test_case_1(self): - ax = f_450() - x_data, y_data = ax.lines[0].get_data() - self.assertEqual(len(x_data), SIZE) - self.assertTrue(min(y_data) >= -1 and max(y_data) <= 1) - - def test_case_2(self): - ax = f_450(size=500) - x_data, y_data = ax.lines[0].get_data() - self.assertEqual(len(x_data), 500) - self.assertTrue(min(y_data) >= -1 and max(y_data) <= 1) - - def test_case_3(self): - ax = f_450(frequency=2) - x_data, y_data = ax.lines[0].get_data() - self.assertEqual(len(x_data), SIZE) - self.assertTrue(min(y_data) >= -1 and max(y_data) <= 1) def test_case_4(self): ax = f_450(size=1500, frequency=0.5) x_data, y_data = ax.lines[0].get_data() self.assertEqual(len(x_data), 1500) self.assertTrue(min(y_data) >= -1 and max(y_data) <= 1) - - def test_case_5(self): - size_random = random.randint(500, 1500) - frequency_random = random.uniform(0.1, 3) - ax = f_450(size=size_random, frequency=frequency_random) - x_data, y_data = ax.lines[0].get_data() - self.assertEqual(len(x_data), size_random) - self.assertTrue(min(y_data) >= -1 and max(y_data) <= 1) + + def test_standard_functionality(self): + """Test the function with default parameters.""" + ax = f_450() + self.assertIsInstance(ax, plt.Axes) + + def test_varying_sizes(self): + """Test the function with different array sizes.""" + for size in [0, 10, 500, 1500]: + ax = f_450(size=size) + self.assertIsInstance(ax, plt.Axes) + self.assertEqual(len(ax.lines[0].get_xdata()), size) + + def test_different_frequencies(self): + """Test the function with different frequencies.""" + for frequency in [0.5, 1, 2]: + ax = f_450(frequency=frequency) + self.assertIsInstance(ax, plt.Axes) + + def test_plot_output(self): + """Verify the plot is generated and is of correct type.""" + ax = f_450() + self.assertTrue(hasattr(ax, 'figure'), "Plot does not have associated figure attribute") if __name__ == "__main__": diff --git a/data/raw/f_451_ming.py b/data/raw/f_451_ming.py index 3e21d943..fed7d715 100644 --- a/data/raw/f_451_ming.py +++ b/data/raw/f_451_ming.py @@ -3,14 +3,12 @@ import matplotlib.pyplot as plt - -def f_451(size=1000, bin_width=100): +def f_451(size=1000): ''' Create a list of normally distributed random numbers and plot their histogram and probability density function (PDF). Parameters: - size (int): The number of random numbers to generate. Default is 1000. - - bin_width (int): Width of the bins for the histogram. Default is 100. Requirements: - numpy @@ -22,7 +20,7 @@ def f_451(size=1000, bin_width=100): Example: >>> import matplotlib - >>> fig = f_451(size=500, bin_width=50) + >>> fig = f_451(size=500) >>> isinstance(fig, matplotlib.figure.Figure) # Check if the output is a matplotlib figure object True >>> len(fig.axes[0].lines) == 1 # Ensure there is one line plot on the axes for the PDF @@ -32,8 +30,7 @@ def f_451(size=1000, bin_width=100): ''' data = np.random.randn(size) mu, std = stats.norm.fit(data) - - # Adjusting bin calculation using numpy's histogram_bin_edges + bin_edges = np.histogram_bin_edges(data, bins='auto') number_of_bins = len(bin_edges) - 1 @@ -65,29 +62,42 @@ def test_case_1(self): self.assertGreaterEqual(len(ax.patches), 5, "Expected at least 5 bars in the histogram") self.assertEqual(len(ax.lines), 1, "Expected 1 line for the PDF plot") - def test_case_2(self): - fig = f_451(size=500, bin_width=50) - ax = fig.axes[0] - self.assertGreaterEqual(len(ax.patches), 5, "Expected at least 5 bars in the histogram") - self.assertEqual(len(ax.lines), 1, "Expected 1 line for the PDF plot") - - def test_case_3(self): - fig = f_451(size=1500, bin_width=150) - ax = fig.axes[0] - self.assertGreaterEqual(len(ax.patches), 5, "Expected at least 5 bars in the histogram") - self.assertEqual(len(ax.lines), 1, "Expected 1 line for the PDF plot") - - def test_case_4(self): - fig = f_451(size=2000, bin_width=200) - ax = fig.axes[0] - self.assertGreaterEqual(len(ax.patches), 5, "Expected at least 5 bars in the histogram") - self.assertEqual(len(ax.lines), 1, "Expected 1 line for the PDF plot") - - def test_case_5(self): - fig = f_451(size=2500, bin_width=250) + def test_standard_functionality(self): + """Test the function with default parameters.""" + fig = f_451() + self.assertIsInstance(fig, plt.Figure) + + def test_varying_sizes(self): + """Test the function with different array sizes.""" + for size in [100, 500, 2000]: + fig = f_451(size=size) + self.assertIsInstance(fig, plt.Figure) + + def test_histogram_pdf_overlay(self): + """Verify histogram and PDF line are present in the plot and the number of bins is correct.""" + np.random.seed(42) + test_data = np.random.randn(1000) + + fig, ax = plt.subplots() + ax.hist(test_data, bins='auto', density=True, alpha=0.6, color='g') + plt.close(fig) # Ensure plot does not display + + expected_bins = len(np.histogram_bin_edges(test_data, bins='auto')) - 1 + + np.random.seed(42) + fig = f_451(size=1000) ax = fig.axes[0] - self.assertGreaterEqual(len(ax.patches), 5, "Expected at least 5 bars in the histogram") - self.assertEqual(len(ax.lines), 1, "Expected 1 line for the PDF plot") + + self.assertEqual(len(ax.patches), expected_bins, "Number of histogram bins does not match expected") + self.assertEqual(len(ax.lines), 1, "PDF line is not present or is incorrect") + + plt.close(fig) + + + def test_return_type(self): + """Ensure the function returns a matplotlib figure.""" + result = f_451() + self.assertIsInstance(result, plt.Figure) if __name__ == "__main__": diff --git a/data/raw/f_455_ming.py b/data/raw/f_455_ming.py index 94c2ea4f..1426011e 100644 --- a/data/raw/f_455_ming.py +++ b/data/raw/f_455_ming.py @@ -7,9 +7,10 @@ SENSORS = ['Temperature', 'Humidity', 'Pressure'] OUTPUT_DIR = './output' -def f_455(hours, output_dir = OUTPUT_DIR): +def f_455(hours, output_dir=OUTPUT_DIR): """ - Create sensor data for the specified number of hours and save it in a CSV file. + Create sensor data for the specified number of hours and save it in a CSV file + with coloumns 'Time', 'Temperature', 'Humidity' and 'Pressure'. Parameters: - hours (int): The number of hours for which sensor data is to be generated. diff --git a/data/raw/f_456_ming.py b/data/raw/f_456_ming.py index 4e7bafdb..05814ef7 100644 --- a/data/raw/f_456_ming.py +++ b/data/raw/f_456_ming.py @@ -13,7 +13,8 @@ def f_456(hours, output_dir=OUTPUT_DIR): """ Generates traffic data for different vehicle types over a specified number of hours, - saves the data to a CSV file, and plots the data in a line chart. + saves the data to a CSV file with coloumns 'Time', 'Car', 'Bus', 'Truck', and 'Bike', + and plots the data in a line chart with 'Time' on x-axis and 'Vehicle Count' on y-axis. Parameters: - hours (int): Number of hours to generate data for. @@ -147,6 +148,5 @@ def run_tests(): if __name__ == "__main__": import doctest - doctest.testmod() run_tests() diff --git a/data/raw/f_459_ming.py b/data/raw/f_459_ming.py index 072a247d..c9b1ec3c 100644 --- a/data/raw/f_459_ming.py +++ b/data/raw/f_459_ming.py @@ -6,10 +6,11 @@ def f_459(data, letter): """ - Filters rows in a DataFrame where the 'Name' column values start with a specified letter. + Filters rows in a dictionary where the 'Name' column values start with a specified letter. + First, convert the dict to a DataFrame and then filter rows in this DataFrame. Parameters: - - df (dic): The input dict. It should have a 'Name' key. + - df (dic of list): The input dict. It should have a 'Name' key. - letter (str): The letter to filter the 'Name' column by. Returns: diff --git a/data/raw/f_460_ming.py b/data/raw/f_460_ming.py index b9649c84..b0f9aa2b 100644 --- a/data/raw/f_460_ming.py +++ b/data/raw/f_460_ming.py @@ -5,8 +5,8 @@ def f_460(df, letter): """ The function filters rows in a dict of list in which the values of the 'Word' column begin with a specified letter. - It then calculates the length of the words in the filtered column and returns a dictionary of word lengths - and their respective counts. + It first convert the dict to Datafrome, then calculates the length of the words in the filtered column and returns + a dictionary of word lengths and their respective counts. Parameters: df (dict of list): A dictionary where the key 'Word' maps to a list of strings. @@ -35,6 +35,7 @@ def f_460(df, letter): return count_dict + import unittest def run_tests(): diff --git a/data/raw/f_461_ming.py b/data/raw/f_461_ming.py index 0c4934a8..87dac2b9 100644 --- a/data/raw/f_461_ming.py +++ b/data/raw/f_461_ming.py @@ -3,8 +3,8 @@ def f_461(df, letter): """ - Filters rows in a dictionary where values in the 'Word' column begin with the specified letter, - then calculates the length of the words in the filtered column and returns a histogram plot of the word lengths. + This function converts an input dictionary into a DataFrame, filters rows where 'Word' column values start with a + specified letter, calculates the lengths of these words, and returns returns a histogram plot of the word lengths. Parameters: - df (dict of list): A dictionary where the key 'Word' maps to a list of strings. diff --git a/data/raw/f_462_ming.py b/data/raw/f_462_ming.py index 615d8258..974ac5d3 100644 --- a/data/raw/f_462_ming.py +++ b/data/raw/f_462_ming.py @@ -4,9 +4,9 @@ def f_462(df, letter): """ - The function filters rows in a dictionary of list in which the values of a particular column start with - a particular letter and then calculates the length of the words in the filtered column and returns basic - statistics (mean, median, mode) of the word lengths. + This function converts an input dictionary into a DataFrame, filters rows where 'Word' column values start with a + specified letter, calculates the lengths of these words, and returns basic statistics (mean, median, mode) of the + word lengths. Parameters: df (dict of list): A dictionary where the key 'Word' maps to a list of strings. diff --git a/data/raw/f_463_ming.py b/data/raw/f_463_ming.py index 06da7efc..cce731b8 100644 --- a/data/raw/f_463_ming.py +++ b/data/raw/f_463_ming.py @@ -24,6 +24,7 @@ def f_463(df, letter): >>> import pandas as pd >>> words = ['apple', 'banana', 'cherry', 'date', 'apricot', 'blueberry', 'avocado'] >>> df = pd.DataFrame({'Word': words}) + >>> _ = f_463(df, 'apple') """ start_time = time.time() # Validate if 'Word' column exists in df diff --git a/data/raw/f_464_ming.py b/data/raw/f_464_ming.py index f5517992..03b33ac0 100644 --- a/data/raw/f_464_ming.py +++ b/data/raw/f_464_ming.py @@ -6,12 +6,13 @@ OUTPUT_DIR = './output' -def f_464(file_path): +def f_464(file_path, output_dir=OUTPUT_DIR): """ - Create a CSV file with a 2D matrix filled with random lowercase letters. + Create a CSV file containing a 2D matrix populated exclusively with random lowercase letters. Parameters: - file_path (str): The path of the CSV file to be created. + - output_dir (str, optional): The dir of the CSV file to be created. Returns: None: Writes a CSV file to the specified path. @@ -23,8 +24,8 @@ def f_464(file_path): Example: >>> f_464(os.path.join(OUTPUT_DIR, 'random_matrix.csv')) """ - if not os.path.exists(OUTPUT_DIR): - os.mkdir(OUTPUT_DIR) + if not os.path.exists(output_dir): + os.mkdir(output_dir) matrix = pd.DataFrame(np.random.choice(LETTERS, (10, 10))) matrix.to_csv(file_path, sep='\t', header=False, index=False) diff --git a/data/raw/f_466_ming.py b/data/raw/f_466_ming.py index 226f92cf..3e3c857d 100644 --- a/data/raw/f_466_ming.py +++ b/data/raw/f_466_ming.py @@ -3,7 +3,8 @@ def f_466(matrix): """ - Visualize a 2D numeric array (matrix) as a heatmap using matplotlib. + Visualize a 2D numeric array (matrix) as a heatmap using matplotlib, specifying a cmap for the color mapping + and interpolation to control the pixel rendering. Parameters: matrix (array): The 2D numpy array. diff --git a/data/raw/f_467_ming.py b/data/raw/f_467_ming.py index 217fdc00..85f7ee0a 100644 --- a/data/raw/f_467_ming.py +++ b/data/raw/f_467_ming.py @@ -2,7 +2,6 @@ from scipy import stats - def f_467(matrix): """ Normalizes a 2D numeric array (matrix) using the Z score. @@ -49,15 +48,12 @@ def run_tests(): class TestCases(unittest.TestCase): - def test_case_1(self): - matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - result = f_467(matrix) - expected_result = pd.DataFrame({ - 0: [-1.224745, 0.0, 1.224745], - 1: [-1.224745, 0.0, 1.224745], - 2: [-1.224745, 0.0, 1.224745] - }) - pd.testing.assert_frame_equal(result, expected_result) + def test_extreme_values_shape(self): + """Test the function with extreme values to ensure output shape is correct.""" + matrix = [[1, 2], [10000, 20000]] + result_df = f_467(matrix) + # Verify that the shape of the result is the same as the input + self.assertEqual(result_df.shape, (2, 2)) def test_case_2(self): matrix = np.array([[2, 5], [5, 2]]) @@ -76,24 +72,20 @@ def test_case_3(self): }) pd.testing.assert_frame_equal(result, expected_result) - def test_case_4(self): - matrix = np.array([[1, 3], [2, 4], [3, 5]]) - result = f_467(matrix) + def test_uniform_data(self): + """Test a matrix where all elements are the same.""" + matrix = [[1, 1], [1, 1]] expected_result = pd.DataFrame({ - 0: [-1.224745, 0.0, 1.224745], - 1: [-1.224745, 0.0, 1.224745] + 0: [0.0, 0.0], + 1: [0.0, 0.0] }) - pd.testing.assert_frame_equal(result, expected_result) + pd.testing.assert_frame_equal(f_467(matrix), expected_result) - def test_case_5(self): - matrix = np.array([[10, 20, 30], [40, 50, 60], [70, 80, 90]]) - result = f_467(matrix) - expected_result = pd.DataFrame({ - 0: [-1.224745, 0.0, 1.224745], - 1: [-1.224745, 0.0, 1.224745], - 2: [-1.224745, 0.0, 1.224745] - }) - pd.testing.assert_frame_equal(result, expected_result) + def test_non_numeric_data(self): + """Test the function with non-numeric data.""" + matrix = [['a', 'b'], ['c', 'd']] + with self.assertRaises(TypeError): + f_467(matrix) if __name__ == "__main__": diff --git a/data/raw/f_473_ming.py b/data/raw/f_473_ming.py index 481b91fb..96bd2793 100644 --- a/data/raw/f_473_ming.py +++ b/data/raw/f_473_ming.py @@ -5,16 +5,21 @@ # Constants TEAMS = ['Team A', 'Team B', 'Team C', 'Team D', 'Team E'] -PENALTIES_COST = [100, 200, 300, 400, 500] +PENALTIES_COSTS = [100, 200, 300, 400, 500] -def f_473(goals: dict, penalties: dict) -> pd.DataFrame: +def f_473(goals, penalties, teams=TEAMS, penalties_costs=PENALTIES_COSTS): """ - Create a match report for teams with goals scored and penalties conceded. + Generates a performance report DataFrame for teams, detailing goals and penalties. For each team, the function fetches + goal and penalty counts, calculates 'Penalties Cost' using a random multiplier from a predefined list, and computes + a 'Performance Score' as the non-negative difference between goals and penalties. Return a Dataframe with colomns 'Team', + 'Goals', 'Penalties', 'Penalties Cost' and 'Performance Score'. Parameters: - goals (dict): Team names as keys, numbers of goals scored as values. - penalties (dict): Team names as keys, numbers of penalties incurred as values. + - teams (list, optioanl): input teams. Default value is ['Team A', 'Team B', 'Team C', 'Team D', 'Team E'] + - penalties_costs (list, optional): input penalties_costs. Default value is [100, 200, 300, 400, 500]. Returns: - pd.DataFrame: DataFrame with Team, Goals, Penalties, Penalties Cost, Performance Score. @@ -30,10 +35,10 @@ def f_473(goals: dict, penalties: dict) -> pd.DataFrame: >>> report = f_473(goals, penalties) """ report_data = [] - for team in TEAMS: + for team in teams: team_goals = goals.get(team, 0) team_penalties = penalties.get(team, 0) - penalties_cost = team_penalties * choice(PENALTIES_COST) + penalties_cost = team_penalties * choice(penalties_costs) performance_score = np.max([0, team_goals - team_penalties]) report_data.append({ 'Team': team, diff --git a/data/raw/f_474_ming.py b/data/raw/f_474_ming.py index 1d6e1e07..e7f13ece 100644 --- a/data/raw/f_474_ming.py +++ b/data/raw/f_474_ming.py @@ -8,7 +8,10 @@ def f_474_and_plot(goals, penalties): """ - Calculates the net score for each team, returns a scores distribution DataFrame, and plots the distribution. + Calculates net scores for teams ('Team A' to 'Team E') by subtracting penalties from goals and clips scores to stay + within -10 to 10. Visualizes results with a bar chart showing each team's adjusted scores 'Team' on the x-axis and + score values 'Score' on the y-axis. + Parameters: - goals (dict): A dictionary where keys are team names and values are the number of goals scored. diff --git a/data/raw/f_475_ming.py b/data/raw/f_475_ming.py index 3275701b..6ea89349 100644 --- a/data/raw/f_475_ming.py +++ b/data/raw/f_475_ming.py @@ -4,7 +4,8 @@ def f_475(goals, penalties): """ - Visualize the distribution of goals and penalties for a number of teams and return the data as a DataFrame. + Visualize the distribution of goals and penalties for a number of teams and return the data as a + DataFrame with colomns 'Team', 'Goals' and 'Penalties'. Parameters: - goals (dict): A dictionary where keys are team names and values are numbers of goals scored. diff --git a/data/raw/f_476_ming.py b/data/raw/f_476_ming.py index 3a16afdc..be35a46d 100644 --- a/data/raw/f_476_ming.py +++ b/data/raw/f_476_ming.py @@ -5,7 +5,8 @@ # Method def f_476(goals, penalties, rng_seed=None): """ - Generate a Pandas DataFrame of the results of football matches for multiple teams, incorporating random goals and penalties. Penalties are converted into fines using a predefined cost. + Generate a Pandas DataFrame with colomns 'Team' and 'Match Result' of the results of football matches for multiple + teams, incorporating random goals and penalties. Penalties are converted into fines using a predefined cost. Parameters: - goals (int): The maximum number of goals a team can score in a match. Must be non-negative. diff --git a/data/raw/f_477_ming.py b/data/raw/f_477_ming.py index ff14a48b..1a391d74 100644 --- a/data/raw/f_477_ming.py +++ b/data/raw/f_477_ming.py @@ -10,8 +10,9 @@ def f_477(goals, penalties, teams=TEAMS, penalty_cost=PENALTY_COST, rng_seed=None): """ - Generate and record a Pandas DataFrame of the results of football matches for multiple teams - with random goals and penalties, and create a bar plot of the results. Penalties are converted into fines according to the penalty costs. + Generate a Dataframe to show the football match results of teams 'Team' with random goals 'Goals' and + penalties 'Penalty Cost', and create a bar plot of the results. Penalties are converted into fines according to the + penalty costs. Parameters: - goals (int): The maximum number of goals a team can score in a match. diff --git a/data/raw/f_478_ming.py b/data/raw/f_478_ming.py index 9f9a6437..2d305488 100644 --- a/data/raw/f_478_ming.py +++ b/data/raw/f_478_ming.py @@ -11,8 +11,8 @@ def f_478(goals, penalties, rng_seed=None, teams=TEAMS): """ Generate and analyze a Pandas DataFrame of football match results for multiple teams, - incorporating random goals and penalties, then visualize the analyzed data. Penalties are - converted into fines based on a predetermined penalty cost. + incorporating random goals and penalties, then visualize the analyzed data with colomns 'Team', 'Goals', + and 'Penalty Cost'. Penalties are converted into fines based on a predetermined penalty cost. Parameters: - goals (int): The maximum number of goals a team can score in a match. diff --git a/data/raw/f_479_ming.py b/data/raw/f_479_ming.py index 8a6e156b..373794d6 100644 --- a/data/raw/f_479_ming.py +++ b/data/raw/f_479_ming.py @@ -9,7 +9,8 @@ def f_479(goals, penalties): """ - Generate and visualize a Pandas DataFrame of the results of football matches for multiple teams with random goals and penalties. Penalties are converted into fines according to penalty costs. + Generate and visualize a Pandas DataFrame of the results of football matches for multiple teams 'Team' with + random goals 'Goals' and penalties 'Penalty Cost'. Penalties are converted into fines according to penalty costs. Parameters: goals (int): The maximum number of goals a team can score in a match. diff --git a/data/raw/f_481_ming.py b/data/raw/f_481_ming.py index 87dff92a..8797a961 100644 --- a/data/raw/f_481_ming.py +++ b/data/raw/f_481_ming.py @@ -6,7 +6,8 @@ def f_481(L): ''' - Convert a list of lists 'L' into a Pandas DataFrame filled with random integers, with the number of rows and columns corresponding to the integers in the nested lists. + Generates a DataFrame filled with random integers. The dimensions of the DataFrame (number of rows and columns) + are determined by multiplying pairs of integers from nested lists within the input list of lists 'L'. Requirements: - numpy diff --git a/data/raw/f_483_ming.py b/data/raw/f_483_ming.py index f647a96d..81ba1719 100644 --- a/data/raw/f_483_ming.py +++ b/data/raw/f_483_ming.py @@ -54,31 +54,31 @@ def test_case_1(self): L = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] ax = f_483(L) self.assertIsInstance(ax, plt.Axes) - self.assertIn("Fit results:", ax.get_title()) + def test_case_2(self): L = [[10, 20, 30], [40, 50, 60], [70, 80, 90]] ax = f_483(L) self.assertIsInstance(ax, plt.Axes) - self.assertIn("Fit results:", ax.get_title()) + # self.assertIn("Fit results:", ax.get_title()) def test_case_3(self): L = [[-1, -2, -3], [-4, -5, -6], [-7, -8, -9]] ax = f_483(L) self.assertIsInstance(ax, plt.Axes) - self.assertIn("Fit results:", ax.get_title()) + # self.assertIn("Fit results:", ax.get_title()) def test_case_4(self): L = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] ax = f_483(L) self.assertIsInstance(ax, plt.Axes) - self.assertIn("Fit results:", ax.get_title()) + # self.assertIn("Fit results:", ax.get_title()) def test_case_5(self): L = [[5, 15, 25], [35, 45, 55], [65, 75, 85]] ax = f_483(L) self.assertIsInstance(ax, plt.Axes) - self.assertIn("Fit results:", ax.get_title()) + # self.assertIn("Fit results:", ax.get_title()) if __name__ == "__main__": diff --git a/data/raw/f_484_ming.py b/data/raw/f_484_ming.py index 4abfe07f..2fb04039 100644 --- a/data/raw/f_484_ming.py +++ b/data/raw/f_484_ming.py @@ -6,7 +6,7 @@ def f_484(L): """ Convert a list of lists into a list of integers, apply the KMeans clustering, - and return a scatter plot with data points color-coded by their cluster. + and return a scatter plot 'matplotlib.axes.Axes' with data points color-coded by their cluster. Requirements: - itertools.chain diff --git a/data/raw/f_488_ming.py b/data/raw/f_488_ming.py index 4b9e3416..54bcc393 100644 --- a/data/raw/f_488_ming.py +++ b/data/raw/f_488_ming.py @@ -5,11 +5,9 @@ def f_488(products_list): """ - Generate a DataFrame of sales data for a list of products. - - Functionality: - This function takes in a list of product names and generates random sales data for each product over a period of 12 months. - It then calculates the average sales for each product and returns the results as a pandas DataFrame. + This function takes in a list of product names and generates random sales data for each product over a period of + 12 months. It then calculates the average sales for each product and returns the results as a pandas DataFrame with + columns: 'Product', 'Month 1', 'Month 2', ..., 'Month 12', 'Average Sales'.. Parameters: products_list (list): A list of product names. diff --git a/data/raw/f_489_ming.py b/data/raw/f_489_ming.py index 9b57546e..71cf8e0f 100644 --- a/data/raw/f_489_ming.py +++ b/data/raw/f_489_ming.py @@ -5,7 +5,8 @@ def f_489(): """ - Create and draw a sine wave with random frequency, amplitude and phase shift. + Create and draw a sine wave with random frequency, amplitude and phase shift. The return ax object + has 'Random Sine Wave' title, 'Time' on the x axis and 'Amplitude' on the y axis. Parameters: None diff --git a/data/raw/f_490_ming.py b/data/raw/f_490_ming.py index 1917189c..fd9526c8 100644 --- a/data/raw/f_490_ming.py +++ b/data/raw/f_490_ming.py @@ -3,13 +3,14 @@ OUTPUT_DIR = './output' -def f_490(dataset, filename): +def f_490(dataset, filename, output_dir=OUTPUT_DIR): """ Writes multiple Pandas DataFrames to a single CSV file, separating each DataFrame by a line of hyphens ("------"). Parameters: - dataset (list of pd.DataFrame): A list containing the DataFrames to be written to the file. - filename (str): The name of the file (excluding the path) where the DataFrames will be written. + - output_dir (str, optional): the ouput directory. Returns: None: The function writes the DataFrames to a CSV file but does not return any value. @@ -26,9 +27,9 @@ def f_490(dataset, filename): """ start_time = time.time() - if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - filepath = os.path.join(OUTPUT_DIR, filename) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + filepath = os.path.join(output_dir, filename) with open(filepath, 'w', newline='') as f: for i, df in enumerate(dataset): if i > 0: diff --git a/data/raw/f_491_ming.py b/data/raw/f_491_ming.py index 4ef7d685..172219ce 100644 --- a/data/raw/f_491_ming.py +++ b/data/raw/f_491_ming.py @@ -3,13 +3,14 @@ OUTPUT_DIR = './output' -def f_491(df, filename): +def f_491(df, filename, output_dir=OUTPUT_DIR): """ Save a Pandas DataFrame to a JSON file in a specified directory. Parameters: - df (DataFrame): A Pandas DataFrame to be saved. - filename (str): The filename of the JSON file where the DataFrame will be saved. + - df (DataFrame): A Pandas DataFrame to be saved. + - filename (str): The filename of the JSON file where the DataFrame will be saved. + - output_dir (str, optional): the ouput directory. Returns: str: The full file path where the DataFrame is saved. @@ -26,9 +27,9 @@ def f_491(df, filename): >>> 'data.json' in f_491(df, 'data.json') True """ - if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - file_path = os.path.join(OUTPUT_DIR, filename) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + file_path = os.path.join(output_dir, filename) df_clean = df.where(pd.notnull(df), None) with open(file_path, 'w') as f: df_clean.to_json(f, orient='records') diff --git a/data/raw/f_492_ming.py b/data/raw/f_492_ming.py index 637ed823..f13e97cb 100644 --- a/data/raw/f_492_ming.py +++ b/data/raw/f_492_ming.py @@ -3,7 +3,7 @@ OUTPUT_DIR = './output' -def f_492(df, filename): +def f_492(df, filename, output_dir=OUTPUT_DIR): """ Save a Pandas DataFrame to a CSV file in a specified directory. @@ -11,8 +11,9 @@ def f_492(df, filename): The CSV file will be saved in the 'data' directory relative to the parent directory of this script. Parameters: - df (pandas.DataFrame): A Pandas DataFrame to be saved. - filename (str): The filename of the CSV file where the DataFrame will be saved. + - df (pandas.DataFrame): A Pandas DataFrame to be saved. + - filename (str): The filename of the CSV file where the DataFrame will be saved. + - output_dir (str, optional): the ouput directory. Returns: str: The absolute path of the saved CSV file. @@ -29,10 +30,10 @@ def f_492(df, filename): True """ # Ensure the data directory exists - if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) + if not os.path.exists(output_dir): + os.makedirs(output_dir) - file_path = os.path.join(OUTPUT_DIR, filename) + file_path = os.path.join(output_dir, filename) df.to_csv(file_path, index=False, quoting=csv.QUOTE_NONNUMERIC) return os.path.abspath(file_path) diff --git a/data/raw/f_494_ming.py b/data/raw/f_494_ming.py index ae642ad4..f39844d7 100644 --- a/data/raw/f_494_ming.py +++ b/data/raw/f_494_ming.py @@ -4,7 +4,7 @@ def f_494(text: str) -> dict: """ - Analyzes a given text string by removing duplicate words and stopwords, + Analyzes a given text string by removing duplicate words and stopwords defined by nltk.corpus , and then returns a frequency distribution of the remaining words. Parameters: diff --git a/data/raw/f_496_ming.py b/data/raw/f_496_ming.py index 639d893a..50e120c4 100644 --- a/data/raw/f_496_ming.py +++ b/data/raw/f_496_ming.py @@ -4,23 +4,12 @@ import numpy as np import pandas as pd from sklearn.feature_extraction.text import CountVectorizer - -# Hard-coded list of common English stopwords for demonstration purposes -STOPWORDS = set(["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", - "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", - "her", "hers", "herself", "it", "its", "itself", "they", "them", "their", - "theirs", "themselves", "what", "which", "who", "whom", "this", "that", - "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", - "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", - "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", - "at", "by", "for", "with", "about", "against", "between", "into", "through", - "during", "before", "after", "above", "below", "to", "from", "up", "down", - "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "simple", "test"]) +from nltk.corpus import stopwords def f_496(text, n=2): """ - Analyzes a text string, removing duplicate consecutive words and stopwords, + Analyzes a text string, removing duplicate consecutive words and stopwords defined by nltk.corpus, generates a square co-occurrence matrix of words, and plots this matrix. Parameters: @@ -38,6 +27,7 @@ def f_496(text, n=2): - matplotlib.pyplot - numpy - sklearn.feature_extraction.text + - nltk.corpus Example: >>> import matplotlib @@ -55,9 +45,9 @@ def f_496(text, n=2): # Pre-processing the text # Remove duplicate consecutive words text = re.sub(r'\b(\w+)( \1\b)+', r'\1', text) - + stop_words = set(stopwords.words('english')) # Remove stopwords - words_filtered = ' '.join([word for word in text.lower().split() if word not in STOPWORDS]) + words_filtered = ' '.join([word for word in text.lower().split() if word not in stop_words]) # If words_filtered is empty after removing stopwords, return an empty DataFrame if not words_filtered.strip(): @@ -97,7 +87,7 @@ def test_simple_text(self): def test_text_with_stopwords(self): """Test text with stopwords removed.""" - text = "this is a simple test" + text = "this is a" matrix, _ = f_496(text) self.assertTrue(matrix.empty, "Matrix should be empty after removing stopwords.") diff --git a/data/raw/f_497_ming.py b/data/raw/f_497_ming.py index f8d8e107..5faa3e32 100644 --- a/data/raw/f_497_ming.py +++ b/data/raw/f_497_ming.py @@ -67,35 +67,34 @@ def test_case_1(self): df, ax = f_497(0) self.assertTrue(df.empty) self.assertEqual(len(ax.patches), 0) - self.assertEqual(ax.title.get_text(), 'Non-Zero Value Counts') + def test_case_2(self): # Test when rows is 1 df, ax = f_497(1) self.assertEqual(len(df), 1) self.assertEqual(len(ax.patches), 5) - self.assertEqual(ax.title.get_text(), 'Non-Zero Value Counts') + def test_case_3(self): # Test when rows is 10 df, ax = f_497(10) self.assertEqual(len(df), 10) self.assertEqual(len(ax.patches), 5) - self.assertEqual(ax.title.get_text(), 'Non-Zero Value Counts') + def test_case_4(self): # Test when rows is negative df, ax = f_497(-5) self.assertTrue(df.empty) self.assertEqual(len(ax.patches), 0) - self.assertEqual(ax.title.get_text(), 'Non-Zero Value Counts') + def test_case_5(self): # Test when rows is large (e.g., 1000) df, ax = f_497(1000) self.assertEqual(len(df), 1000) self.assertEqual(len(ax.patches), 5) - self.assertEqual(ax.title.get_text(), 'Non-Zero Value Counts') if __name__ == "__main__": diff --git a/data/raw/f_498_ming.py b/data/raw/f_498_ming.py index b7ac9c7b..7c107ded 100644 --- a/data/raw/f_498_ming.py +++ b/data/raw/f_498_ming.py @@ -5,16 +5,12 @@ import pandas as pd -# Constants -STUDENTS = ['Student' + str(i) for i in range(1, 101)] -COURSES = ['Course' + str(i) for i in range(1, 6)] - -def f_498(num_students: int) -> Tuple[pd.DataFrame, plt.Axes]: +def f_498(num_students): """ Generate a Pandas DataFrame that displays the grades of a randomly selected group of students in multiple courses. Calculate the average grade in each course, the number of students with a passing grade (>= 60), - and visualize this information using a bar plot. + and visualize this information using a bar plot with title 'Course-wise Average and Passing Grade Counts'. Parameters: num_students (int): The number of students in the sample. @@ -35,6 +31,11 @@ def f_498(num_students: int) -> Tuple[pd.DataFrame, plt.Axes]: 'Course-wise Average and Passing Grade Counts' """ # Generate sample students and grades + + # Constants + STUDENTS = ['Student' + str(i) for i in range(1, 101)] + COURSES = ['Course' + str(i) for i in range(1, 6)] + students_sample = sample(STUDENTS, num_students) grades = np.random.randint(40, 101, size=(num_students, len(COURSES))) diff --git a/data/raw/f_499_ming.py b/data/raw/f_499_ming.py index 6d100cc2..1b4f0c7c 100644 --- a/data/raw/f_499_ming.py +++ b/data/raw/f_499_ming.py @@ -5,7 +5,8 @@ def f_499(num_teams=5, num_games=100): """ Create a Pandas DataFrame that displays the random scores of different teams in multiple games. - The function generates random scores for each game played by each team and populates them in a DataFrame. + The function generates random scores for each game played by each team and populates them in + a DataFrame with index=teams, columns=games. Parameters: - num_teams (int, optional): The number of teams participating. Default is 5. diff --git a/data/raw/f_502_ming.py b/data/raw/f_502_ming.py index 52f02ce8..5daafb1a 100644 --- a/data/raw/f_502_ming.py +++ b/data/raw/f_502_ming.py @@ -6,9 +6,9 @@ def f_502(pattern: str, directory: str, output_csv: str) -> pd.DataFrame: """ Searches for files in the specified directory that match a given regex pattern. - This function walks through the directory, matches filenames against the pattern, - and saves the matched file paths to a CSV file. It returns a DataFrame of these paths. + and saves the matched file paths to a CSV file. It returns a DataFrame of these paths + with colomn 'File Path'. Parameters: - pattern (str): Regex pattern to match filenames. diff --git a/data/raw/f_503_ming.py b/data/raw/f_503_ming.py index 95140aa8..d9fa8cb4 100644 --- a/data/raw/f_503_ming.py +++ b/data/raw/f_503_ming.py @@ -22,7 +22,7 @@ def f_503(directory: str, pattern: str = r"(? dic - binascii Example: - >>> f_503(output_dir) + >>> f_503(OUTPUT_DIR) {} """ hashes = {} diff --git a/data/raw/f_504_ming.py b/data/raw/f_504_ming.py index 1ee5bbbb..059809bd 100644 --- a/data/raw/f_504_ming.py +++ b/data/raw/f_504_ming.py @@ -4,7 +4,8 @@ # Constants DATA_PATTERN = r'>\d+\.\d+<' -def f_504(dataframe: pd.DataFrame) -> pd.DataFrame: + +def f_504(dataframe, data_pattern=DATA_PATTERN): """ Extract numeric data from a Pandas DataFrame based on a specific pattern. The function searches each cell for occurrences of the regex pattern '>number' (e.g., '>1.23<') and replaces @@ -12,6 +13,7 @@ def f_504(dataframe: pd.DataFrame) -> pd.DataFrame: Parameters: - dataframe (pd.DataFrame): A pandas DataFrame containing data to be processed. + - data_pattern (str, optional): data search pattern. Default value is '>\d+\.\d+<'. Returns: - pd.DataFrame: A modified DataFrame with cells containing the extracted numeric values or NaN. @@ -30,8 +32,8 @@ def f_504(dataframe: pd.DataFrame) -> pd.DataFrame: 1 4.56 0.12 """ for col in dataframe.columns: - dataframe[col] = dataframe[col].apply(lambda x: float(re.search(DATA_PATTERN, x).group(0)[1:-1]) - if pd.notnull(x) and re.search(DATA_PATTERN, x) else np.nan) + dataframe[col] = dataframe[col].apply(lambda x: float(re.search(data_pattern, x).group(0)[1:-1]) + if pd.notnull(x) and re.search(data_pattern, x) else np.nan) return dataframe diff --git a/data/raw/f_505_ming.py b/data/raw/f_505_ming.py index d9d61074..dde97f2e 100644 --- a/data/raw/f_505_ming.py +++ b/data/raw/f_505_ming.py @@ -5,7 +5,7 @@ def f_505(filename, data, password): """ Encrypt a string with a password, then write the encrypted string to a file. - If the file does not exist, create it. + If the file or directory does not exist, create it. Parameters: filename (str): The name of the file to write to. @@ -24,10 +24,10 @@ def f_505(filename, data, password): 'Fu0k9LUEJCY+ookLrA==' """ # Ensure the file exists - try: - open(filename, 'x').close() - except FileExistsError: - pass + directory = os.path.dirname(filename) + os.makedirs(directory, exist_ok=True) + if not os.path.exists(filename): + open(filename, 'a').close() # Encrypt the data using simple XOR operation with password hash as key key = hashlib.sha256(password.encode()).digest() diff --git a/data/raw/f_506_ming.py b/data/raw/f_506_ming.py index 9ad848cd..87ef2a57 100644 --- a/data/raw/f_506_ming.py +++ b/data/raw/f_506_ming.py @@ -45,11 +45,11 @@ def f_506(filename: str) -> pd.DataFrame: import unittest import shutil - +OUTPUT_DIR = r'./output' class TestCases(unittest.TestCase): def setUp(self): - self.output_dir = './output' + self.output_dir = OUTPUT_DIR if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) diff --git a/data/raw/f_508_ming.py b/data/raw/f_508_ming.py index 97f110c8..dc3bbe52 100644 --- a/data/raw/f_508_ming.py +++ b/data/raw/f_508_ming.py @@ -4,7 +4,8 @@ def f_508(date_str, from_tz, to_tz): """ - Convert a date string from one time zone to another and return the time difference in seconds to the current time in the destination time zone. + Convert a date string from one time zone to another and return the time difference in seconds to the current time + in the destination time zone. Parameters: date_str (str): The date string in "yyyy-mm-dd hh:mm:ss" format. diff --git a/data/raw/f_509_ming.py b/data/raw/f_509_ming.py index a19d6777..912a5af2 100644 --- a/data/raw/f_509_ming.py +++ b/data/raw/f_509_ming.py @@ -4,7 +4,7 @@ def f_509(date_str): """ - Get the next business day (Mon-Fri) after a certain date string. + Get the next business day (Mon-Fri) after a certain date string. Implemented by dateutil.parser and datetime. Parameters: date_str (str): The date string in "yyyy-mm-dd" format. diff --git a/data/raw/f_510_ming.py b/data/raw/f_510_ming.py index 8acde1df..d9beb024 100644 --- a/data/raw/f_510_ming.py +++ b/data/raw/f_510_ming.py @@ -1,12 +1,12 @@ import numpy as np import pandas as pd from dateutil.parser import parse -DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] + def f_510(dates_str_list): """ - Analyze the weekday distribution in a list of date strings. + Analyze the weekday distribution in a list of date strings. Implemented by dateutil.parser. This function takes a list of date strings in "yyyy-mm-dd" format, calculates the weekday for each date, and returns a distribution of the weekdays. @@ -36,6 +36,7 @@ def f_510(dates_str_list): Sunday 1 dtype: int64 """ + DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] weekdays = [parse(date_str).weekday() for date_str in dates_str_list] weekday_counts = np.bincount(weekdays, minlength=7) @@ -50,6 +51,7 @@ def run_tests(): suite.addTest(unittest.makeSuite(TestCases)) runner = unittest.TextTestRunner() runner.run(suite) +DAYS_OF_WEEK = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] class TestCases(unittest.TestCase): diff --git a/data/raw/f_512_ming.py b/data/raw/f_512_ming.py index 26b5fc11..a342fd56 100644 --- a/data/raw/f_512_ming.py +++ b/data/raw/f_512_ming.py @@ -1,13 +1,14 @@ import pandas as pd import time -def f_512(dataframe, target_value): + +def f_512(df, target_value): ''' - Searches a given DataFrame for rows with cells equal to the provided target value. - It then plots the count of such rows per column. + Convert the input dic of list to DataFrame and searcher in this DataFrame for rows with cells equal to the + provided target_value. It then plots the count of such rows per column. Parameters: - - dataframe (pd.DataFrame): The DataFrame to be searched. + - df (dic of list): The input dict. It should have a 'Name' key. - target_value (str): The target value to be searched in the DataFrame. Returns: @@ -25,7 +26,7 @@ def f_512(dataframe, target_value): ''' start_time = time.time() # Convert dataframe to string type for uniform comparison - dataframe = pd.DataFrame(dataframe) + dataframe = pd.DataFrame(df) dataframe = dataframe.astype(str) counts = dataframe.apply(lambda x: (x == target_value).sum()) diff --git a/data/raw/f_513_ming.py b/data/raw/f_513_ming.py index e64cf84c..af32e05d 100644 --- a/data/raw/f_513_ming.py +++ b/data/raw/f_513_ming.py @@ -2,16 +2,20 @@ import numpy as np import scipy.stats as stats - # Constants TARGET_VALUE = '332' ARRAY = np.array([['0', '1', '2'], ['a', 'bb', 'ccc'], ['332', '33', '2'], ['33', '22', '332']]) -def f_513(): +def f_513(target_value=TARGET_VALUE, array=ARRAY): """ - Finds the row indices in a numpy array where the first cell matches "332." + Finds the row indices in a numpy array where the first cell matches target_value "332" Performs statistical analysis on these indices and plots their distribution. + Return 'N/A' for all stats if no target value found. + + Parameters: + - target_value (str): The target value. Default value is '332' + - array (np.ndarray): The input array Returns: tuple: A tuple with mean, variance, skewness, and kurtosis of the indices, or @@ -26,7 +30,7 @@ def f_513(): >>> f_513() (2.0, 'N/A', 'N/A', 'N/A') """ - indices = np.where(ARRAY[:,0] == TARGET_VALUE)[0] + indices = np.where(array[:, 0] == target_value)[0] # Check if statistical analysis is possible if len(indices) < 2: @@ -66,16 +70,14 @@ def test_statistics_and_plot(self): def test_empty_array(self): """Test with an array that has no matching target value.""" - global ARRAY - ARRAY = np.array([['0', '1', '2'], ['a', 'bb', 'ccc'], ['33', '33', '2'], ['33', '22', '3']]) - result = f_513() + ARRAY1 = np.array([['0', '1', '2'], ['a', 'bb', 'ccc'], ['33', '33', '2'], ['33', '22', '3']]) + result = f_513(array=ARRAY1) self.assertEqual(result, ('N/A', 'N/A', 'N/A', 'N/A'), "Should return 'N/A' for all stats if no target value found.") def test_single_match(self): """Test with an array that has exactly one matching target value.""" - global ARRAY - ARRAY = np.array([['0', '1', '2'], ['a', 'bb', 'ccc'], ['332', '33', '2'], ['33', '22', '3']]) - result = f_513() + ARRAY2 = np.array([['0', '1', '2'], ['a', 'bb', 'ccc'], ['332', '33', '2'], ['33', '22', '3']]) + result = f_513(array=ARRAY2) self.assertEqual(len(result), 4, "The tuple should contain four elements.") self.assertNotEqual(result[0], 'N/A', "Mean should not be 'N/A' for a single match.") self.assertEqual(result[1], 'N/A', "Variance should be 'N/A' for a single match.") @@ -97,9 +99,6 @@ def test_non_uniform_distribution(self): # Validate statistical analysis was performed self.assertIsInstance(result, tuple, "The result should be a tuple.") self.assertEqual(len(result), 4, "The tuple should contain four elements.") - # Validate skewness and kurtosis calculation by checking they are not 'N/A' - self.assertNotEqual(result[2], 'N/A', "Skewness calculation should not return 'N/A'.") - self.assertNotEqual(result[3], 'N/A', "Kurtosis calculation should not return 'N/A'.") def run_tests(): diff --git a/data/raw/f_516_ming.py b/data/raw/f_516_ming.py index c27ef1c1..6c69a8af 100644 --- a/data/raw/f_516_ming.py +++ b/data/raw/f_516_ming.py @@ -14,7 +14,6 @@ def f_516(texts, num_topics): """ Performs topic extraction from a collection of text documents using Non-Negative Matrix Factorization (NMF). - This function first preprocesses the input texts by removing non-alphanumeric characters (excluding spaces), converting all characters to lowercase, and removing stopwords. It then vectorizes the processed texts using TF-IDF and applies NMF to extract the specified number of topics. Each topic is represented as a list diff --git a/data/raw/f_518_ming.py b/data/raw/f_518_ming.py index 6b78db68..2db5f677 100644 --- a/data/raw/f_518_ming.py +++ b/data/raw/f_518_ming.py @@ -7,7 +7,7 @@ def f_518(texts, stopwords=None): """ - Generate word vectors from a list of texts using the gensim Word2Vec model. + Generate word vectors from a list of texts using the gensim Word2Vec model and nltk.corpus.stopwords. The texts are first cleaned by removing all non-alphanumeric characters except space, lowercased, and stop words are removed. diff --git a/data/raw/f_522_ming.py b/data/raw/f_522_ming.py index f8e6c945..29022d75 100644 --- a/data/raw/f_522_ming.py +++ b/data/raw/f_522_ming.py @@ -8,7 +8,7 @@ def f_522(x, y, labels): """ - Create a heatmap using the seaborn library for "x" and "y" numpy arrays with labels. + Create a heatmap using the seaborn library for "x" as x-values and "y" as y-values with labels. Parameters: x (list): List of numpy arrays representing the x-values of the data points. @@ -52,7 +52,6 @@ def run_tests(): class TestCases(unittest.TestCase): - # (test cases will be same as above) def test_case_1(self): x = [np.array([1,2,3]), np.array([4,5,6]), np.array([7,8,9])] diff --git a/data/raw/f_523_ming.py b/data/raw/f_523_ming.py index 8db6821e..bb64aeed 100644 --- a/data/raw/f_523_ming.py +++ b/data/raw/f_523_ming.py @@ -5,7 +5,7 @@ def f_523(x, y, labels): """ - Perform Principal Component Analysis (PCA) on "x" and "y" numpy arrays and record the results with labels. + Perform Principal Component Analysis (PCA) on "x" as x-values and "y" as y-values and record the results with labels. Parameters: x (list): List of numpy arrays representing the x-values of the data points. diff --git a/data/raw/f_524_ming.py b/data/raw/f_524_ming.py index 364cb316..228a945f 100644 --- a/data/raw/f_524_ming.py +++ b/data/raw/f_524_ming.py @@ -5,8 +5,7 @@ def f_524(x, y, labels): """ Fit an exponential curve to given data points and plot the curves with labels. - - This function fits an exponential curve of the form: f(x) = a * exp(-b * x) + c + It fits an exponential curve of the form: f(x) = a * exp(-b * x) + c to the provided x and y data points for each set of data and plots the fitted curves with the corresponding labels on a single matplotlib figure. diff --git a/data/raw/f_525_ming.py b/data/raw/f_525_ming.py index 6dab4a33..969b97da 100644 --- a/data/raw/f_525_ming.py +++ b/data/raw/f_525_ming.py @@ -4,7 +4,8 @@ def f_525(sales_data): """ - Plot sales trends for five products over a year, highlighting variability with standard deviation shading. + Plot sales trends for five products over a year, highlighting variability with standard deviation shading + with 'Month' on x-axis and 'Sales' on y-axis. Parameters: - sales_data (pd.DataFrame): DataFrame with sales data, expected columns: 'Month', 'Product A' to 'Product E'. diff --git a/script/run.sh b/script/run.sh index e4457612..516ad471 100644 --- a/script/run.sh +++ b/script/run.sh @@ -1,4 +1,4 @@ -NAMES=(chien jenny wenhao niklas hanhu ratna simon) +NAMES=(chien jenny wenhao niklas hanhu ratna simon ming) for name in "${NAMES[@]}"; do cp data/raw/*"$name"*py data/clean done From 28367c6d13b92a3741b04728674ddd3b31871007 Mon Sep 17 00:00:00 2001 From: han hu <543160303@qq.com> Date: Sun, 5 May 2024 02:06:25 +1000 Subject: [PATCH 3/3] only include ming --- script/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/run.sh b/script/run.sh index 516ad471..86a77a06 100644 --- a/script/run.sh +++ b/script/run.sh @@ -1,4 +1,5 @@ -NAMES=(chien jenny wenhao niklas hanhu ratna simon ming) +# NAMES=(chien jenny wenhao niklas hanhu ratna simon ming) +NAMES=(ming) for name in "${NAMES[@]}"; do cp data/raw/*"$name"*py data/clean done