Skip to content

Commit

Permalink
fix: fix data quality
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed May 3, 2024
1 parent d168c78 commit b6957ea
Show file tree
Hide file tree
Showing 67 changed files with 1,037 additions and 874 deletions.
5 changes: 3 additions & 2 deletions data/clean/f_1712_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ def f_1713(template_folder):
app = Flask(__name__, template_folder=template_folder)

@app.route('/', methods=['POST'])
def home():
data = request.json
def handle_post():
data = request.get_json()
logging.info(json.dumps(data))
return render_template('index.html', data=data)

return app

import unittest
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_1715_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def load_user(user_id):
class TestCases(unittest.TestCase):

def setUp(self):
current_file_path = os.path.abspath(__file__)
current_file_path = os.path.abspath("__file__")
current_directory = os.path.dirname(current_file_path)
self.secret_key = 'mysecretkey'
self.template_folder = f'{current_directory}/templates'
Expand Down
20 changes: 14 additions & 6 deletions data/clean/f_1736_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,20 @@ def f_1737():
>>> type(fig).__name__
'Figure'
"""
font = {'family': 'Arial'}
plt.rc('font', **font) # Set the global font to Arial.
DIABETES = load_diabetes()
diabetes_df = pd.DataFrame(data=DIABETES.data, columns=DIABETES.feature_names)
pair_plot = sns.pairplot(diabetes_df)
return pair_plot.fig, diabetes_df
# Set the font to Arial
plt.rcParams['font.family'] = 'Arial'

# Load the diabetes dataset
diabetes = load_diabetes()
df = pd.DataFrame(data=diabetes.data, columns=diabetes.feature_names)

# Create a pairplot
pairplot = sns.pairplot(df)

# Show the plot
plt.show()

return pairplot.fig, df

import unittest
import matplotlib.pyplot as plt
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_1751_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def f_1752(numbers):
"""
Creates and returns a dictionary with the mode and entropy of a numpy array constructed from a given list.
The function first converts the list into a numpy array, then calculates the mode and the entropy of this array,
The function first converts the list into a numpy array, then calculates the mode and the entropy (base 2) of this array,
and finally adds them to the initial dictionary with the keys 'mode' and 'entropy'.
Parameters:
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_1756_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def f_1757():
"""
Creates and displays a diagram of a parabola represented by the equation y = x^2.
The function plots the parabola using matplotlib, sets the title, labels the axes,
The function plots the parabola using matplotlib, sets the title as 'y = x^2', labels the axes as 'x' and 'y',
and enables the grid. It uses a fixed range for x values from -10 to 10 with 400 points.
This function is used for demonstrating basic plotting capabilities and visualizing
quadratic functions. The function does not take any parameters and does not return any value.
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_2091_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def f_2092(url_str, file_path):
"""
Fetches JSON data from a given URL, decodes the data, and compresses it into a gzip file.
Fetches JSON data from a given URL, decodes the json-formatted data, and compresses it into a gzip file.
Parameters:
url_str (str): The URL string pointing to the JSON data.
Expand Down
20 changes: 20 additions & 0 deletions data/clean/f_2437_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,26 @@ def test_successful_matrix_creation(self, mock_specshow, mock_read, mock_isfile)
# Ensure that the plotting functions are called, validating the function's complete execution path
mock_specshow.assert_called()

@patch('os.path.isfile', return_value=True)
@patch('soundfile.read', return_value=(np.array([0.1, 0.2, 0.3]), 44100))
# Mock plotting functions
@patch('matplotlib.pyplot.colorbar', MagicMock())
@patch('librosa.display.specshow', return_value=MagicMock())
def test_docstring_examples(self, mock_specshow, mock_read, mock_isfile):
"""Test the examples provided in the function's docstring."""
matrix, fig = f_2439([i for i in range(100)], 10, 10, 'audio.wav')
self.assertIsInstance(matrix, np.ndarray)
self.assertEqual(matrix.shape, (10, 10))

@patch('os.path.isfile', return_value=True)
@patch('soundfile.read', return_value=(np.array([0.1, 0.2, 0.3]), 44100))
@patch('matplotlib.pyplot.colorbar', MagicMock())
@patch('librosa.display.specshow', return_value=MagicMock())
def test_spl_calculation(self, mock_specshow, mock_read, mock_isfile):
"""Test the sound pressure level (SPL) calculation."""
matrix, fig = f_2439([i for i in range(100)], 10, 10, 'audio.wav')
self.assertAlmostEquals(matrix.max(), -0.0)
self.assertAlmostEquals(matrix.min(), -13.309932190414244)

def run_tests():
"""Run all tests for this function."""
Expand Down
7 changes: 4 additions & 3 deletions data/clean/f_2657_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def f_2658():
"""
Creates an HTTP POST request handler for processing incoming data. The data is expected
to be in JSON format with a key 'data'. The handler responds with a 200 success message
if the data is valid, or an error message otherwise.
if the data is valid, or an error message otherwise.
The type of the response can be retrieved as 'content-type' and the length of the response as 'content-length'.
Returns:
function: A class that handles HTTP POST requests and validates incoming data.
Expand All @@ -27,7 +28,7 @@ def f_2658():
- json
Notes:
If the 'Content-Type' header is not 'application/json', indicating the
If the 'content-type' header is not 'application/json', indicating the
client sent a request with an unsupported format. This condition sends a
400 Bad Request response to the client with the message "Content-Type header
is not application/json".
Expand Down Expand Up @@ -62,7 +63,7 @@ def do_POST(self):
return

self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header('content-type', 'application/json')
self.end_headers()
response = json.dumps(SUCCESS_RESPONSE).encode()
self.wfile.write(response)
Expand Down
1 change: 1 addition & 0 deletions data/clean/f_2659_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def f_2661(smtp_server, smtp_port, smtp_username, smtp_password):
"""
Creates an HTTP POST request handler that processes incoming email data and sends
an email. The email data must be a JSON object with 'subject', 'message', and 'to' keys.
The type of the response can be retrieved as 'content-type' and the length of the response as 'content-length'.
Parameters:
smtp_server (str): SMTP server address.
Expand Down
3 changes: 3 additions & 0 deletions data/clean/f_2840_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ def f_2842(SERVER_NAME, SERVER_PORT, path):
Returns:
str: The response body from the server as a string.
Raises:
ssl.SSLError: If there is an SSL handshake error.
Requirements:
- socket
- ssl
Expand Down
28 changes: 18 additions & 10 deletions data/clean/f_2844_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ def f_2846(dir, api_key, recipient_email):
"""
try:
file_list = os.listdir(dir)
except FileNotFoundError:
print("Directory not found.")
return False
except:
raise FileNotFoundError(f"Directory '{dir}' does not exist.")

file_list_str = ', '.join(file_list)

Expand Down Expand Up @@ -64,12 +63,15 @@ def f_2846(dir, api_key, recipient_email):
from python_http_client.exceptions import HTTPError

class TestCases(unittest.TestCase):
@patch('os.path.exists')
@patch('sendgrid.SendGridAPIClient.send')
@patch('os.listdir')
def test_successful_email_send(self, mock_listdir, mock_send):
def test_successful_email_send(self, mock_listdir, mock_send, mock_exists):
"""Test successful email sending with a valid directory."""
mock_listdir.return_value = ['file1.gz', 'file2.gz']
mock_exists.return_value = True
mock_send.return_value = MagicMock(status_code=202)

api_key = 'test_api_key'
recipient_email = '[email protected]'
result = f_2846('./valid_directory', api_key, recipient_email)
Expand All @@ -79,38 +81,44 @@ def test_invalid_directory(self):
"""Test the handling of an invalid directory."""
api_key = 'test_api_key'
recipient_email = '[email protected]'
result = f_2846('/nonexistent_directory', api_key, recipient_email)
self.assertFalse(result)

with self.assertRaises(FileNotFoundError):
f_2846('/nonexistent_directory', api_key, recipient_email)

@patch('os.path.exists')
@patch('os.listdir')
@patch('sendgrid.SendGridAPIClient.send')
def test_failed_email_send(self, mock_send, mock_listdir):
def test_failed_email_send(self, mock_send, mock_listdir, mock_exists):
"""Test handling of a failed email send by ensuring HTTPError is raised."""
mock_listdir.return_value = ['file1.gz', 'file2.gz']
mock_response = Mock(status_code=400, body='Bad Request')
mock_exists.return_value = True
mock_send.side_effect = HTTPError(mock_response, 'Failed to send')
api_key = 'test_api_key'
recipient_email = '[email protected]'
with self.assertRaises(HTTPError):
f_2846('./valid_directory', api_key, recipient_email)

@patch('os.path.exists')
@patch('sendgrid.SendGridAPIClient.send')
@patch('os.listdir')
def test_empty_directory(self, mock_listdir, mock_send):
def test_empty_directory(self, mock_listdir, mock_send, mock_exists):
"""Test sending an email with an empty directory."""
mock_listdir.return_value = []
mock_send.return_value = MagicMock(status_code=202)
mock_exists.return_value = True
api_key = 'test_api_key'
recipient_email = '[email protected]'
result = f_2846('./empty_directory', api_key, recipient_email)
self.assertTrue(result)

@patch('os.path.exists')
@patch('sendgrid.SendGridAPIClient.send')
@patch('os.listdir')
def test_generic_exception_handling(self, mock_listdir, mock_send):
def test_generic_exception_handling(self, mock_listdir, mock_send, mock_exists):
"""Test handling of generic exceptions during email sending."""
mock_listdir.return_value = ['file1.gz', 'file2.gz']
mock_send.side_effect = Exception('Generic error')
mock_exists.return_value = True
api_key = 'test_api_key'
recipient_email = '[email protected]'
with self.assertRaises(Exception):
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_2968_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def f_2970(req_data, secret_key):
"""
Signs the specified request data with a secret key using HMAC SHA256, then URL encodes the signature.
Signs the specified request data with a secret key using HMAC SHA256, then URL encodes the signature and replace spaces with '+'.
Parameters:
req_data (dict): The request data to be signed. It should be a dictionary.
Expand Down
22 changes: 17 additions & 5 deletions data/clean/f_3035_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ def f_3037(x):
x (numpy.ndarray): The range of x values over which to plot the distribution.
Returns:
None. Displays a matplotlib plot of the complex distribution.
numpy.ndarray: The complex distribution created from the two Gaussian distributions.
Raises:
TypeError: If `x` is not a numpy.ndarray.
Requirements:
- numpy
- scipy.stats.norm
Expand Down Expand Up @@ -41,6 +44,7 @@ def f_3037(x):
plt.legend()
plt.grid()
plt.show()
return complex_dist

import unittest
import numpy as np
Expand All @@ -49,7 +53,8 @@ class TestCases(unittest.TestCase):
def test_return_type(self):
""" Test that the function returns None. """
result = f_3037(np.linspace(-10, 10, 1000))
self.assertIsNone(result)
self.assertAlmostEquals(result[0], 7.69459863e-23+3.03794142e-09j)
self.assertAlmostEquals(result[1], 9.398202102189114e-23+3.2258293600449145e-09j)

def test_input_type(self):
""" Test the function with non-numpy array inputs. """
Expand All @@ -59,13 +64,20 @@ def test_input_type(self):
def test_empty_array(self):
""" Test function with empty numpy array. """
result = f_3037(np.array([]))
self.assertIsNone(result)
self.assertEqual(result.size, 0)

def test_array_length(self):
""" Test function with arrays of different lengths. """
result = f_3037(np.linspace(-5, 5, 500))
self.assertIsNone(result)

self.assertAlmostEquals(result[0], 1.4867195147342979e-06+0.0004363413475228801j)
self.assertAlmostEquals(result[-1], 1.4867195147342979e-06+0.06475879783294587j)

def test_special_values(self):
""" Test function with special values. """
result = f_3037(np.linspace(-np.inf, np.inf, 1000))
# nan+nanj, should not use assertEqual
self.assertTrue(np.isnan(result[0].real))
self.assertTrue(np.isnan(result[0].imag))

def run_tests():
"""Run all tests for this function."""
Expand Down
21 changes: 11 additions & 10 deletions data/clean/f_3113_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ def f_3115():
Generates a table displaying the system's CPU usage, memory usage, and disk usage.
Returns:
A string representation of a table with the following system information:
A string representation of a table with the columns of 'Item' and 'Value',
and the following system information:
- CPU Usage (%)
- Memory Usage (%)
- Disk Usage (%)
Expand Down Expand Up @@ -54,15 +55,15 @@ def test_table_headers(self):
with self.subTest(header=header):
self.assertIn(header, self.result)

# def test_non_empty_values(self):
# """Test that the table's values are not empty or zero."""
# # Extract numeric values using a regular expression
# values = re.findall(r'\|\s*[\d.]+\s*\|', self.result)
# # Convert extracted strings to float and test they are greater than 0
# for value_str in values:
# value = float(value_str.strip('| ').strip())
# with self.subTest(value=value):
# self.assertTrue(value > 0)
def test_non_empty_values(self):
"""Test that the table's values are not empty or zero."""
# Extract numeric values using a regular expression
values = re.findall(r'\|\s*[\d.]+\s*\|', self.result)
# Convert extracted strings to float and test they are greater than 0
for value_str in values:
value = float(value_str.strip('| ').strip())
with self.subTest(value=value):
self.assertTrue(value > 0)

def test_value_ranges(self):
"""Test that CPU and memory usage percentages are within 0-100%."""
Expand Down
1 change: 0 additions & 1 deletion data/clean/f_4276_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def test_package_module_addition(self, mock_iter_modules, mock_import_module):

# Call the function under test
modules_added = f_4278('numpy')
print(modules_added)
# Perform your assertions here
# For example, assert that modules were "added" (imported)
self.assertFalse(len(modules_added) > 0)
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_4311_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

def f_4313(my_dict):
"""
Sorts a given dictionary by its keys in ascending order and returns a PrettyTable object displaying the sorted items.
Sorts a given dictionary by its keys in ascending order and returns a PrettyTable object displaying the sorted items with the names 'Key' and 'Value'.
Parameters:
my_dict (dict): The dictionary to be sorted and displayed.
Expand Down
1 change: 1 addition & 0 deletions data/clean/f_4430_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def f_4432(filepath):
This function demonstrates the use of various system-related libraries in Python.
The format of the printed message is:
System: <system-name-here>
Node Name: <node-name-here>
Release: <release-here>
Version: <version-here>
Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_4433_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def f_4435(filepath):
filepath (str): The path of the DLL file.
Returns:
str: The name of the loaded DLL file.
str: The actual name of the loaded DLL file.
Requirements:
- ctypes
Expand Down
5 changes: 3 additions & 2 deletions data/clean/f_4667_hanhu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

def f_4669(filepath):
"""
Attempts to compile a C++ file specified by 'filepath'. The output of the compilation process
Attempts to compile a existing C++ file specified by 'filepath'. The output of the compilation process
is logged, indicating whether the compilation was successful or not. This function is useful
for automating the compilation of C++ code and tracking compilation results.
The log should indicate whether the compilation was successful or if an error occurred.
Parameters:
filepath (str): The path of the C++ file to be compiled.
Expand Down Expand Up @@ -51,7 +52,7 @@ class TestCases(unittest.TestCase):

def setUp(self):
# Setup an empty test file
self.empty_file = 'empty_file.cpp'
self.empty_file = './empty_file.cpp'
with open(self.empty_file, 'w') as f:
f.write("")

Expand Down
2 changes: 1 addition & 1 deletion data/clean/f_644_simon.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def f_644(list_of_pairs):
2 train 0.004193
3 plane 0.000000
4 ship 1.000000
"""
"""

if len(list_of_pairs) == 0:
raise Exception('The input array should not be empty.')
Expand Down
Loading

0 comments on commit b6957ea

Please sign in to comment.