diff --git a/README.rst b/README.rst index 307c403..4390693 100644 --- a/README.rst +++ b/README.rst @@ -249,6 +249,34 @@ If you plan to do a lot of simulatneous query, you might want to speedup query u products = await api.query('059035342X', wait=False) +Buy Box Statistics +~~~~~~~~~~~~~~~~~~ +To load used buy box statistics, you have to enable ``offers``. This example +loads in product offers and converts the buy box data into a +``pandas.DataFrame``. + +.. code:: pycon + + >>> import keepa + >>> key = '' + >>> api = keepa.Keepa(key) + >>> response = api.query('B0088PUEPK', offers=20) + >>> product = response[0] + >>> buybox_info = product['buyBoxUsedHistory'] + >>> df = keepa.process_used_buybox(buybox_info) + datetime user_id condition isFBA + 0 2022-11-02 16:46:00 A1QUAC68EAM09F Used - Like New True + 1 2022-11-13 10:36:00 A18WXU4I7YR6UA Used - Very Good False + 2 2022-11-15 23:50:00 AYUGEV9WZ4X5O Used - Like New False + 3 2022-11-17 06:16:00 A18WXU4I7YR6UA Used - Very Good False + 4 2022-11-17 10:56:00 AYUGEV9WZ4X5O Used - Like New False + .. ... ... ... ... + 115 2023-10-23 10:00:00 AYUGEV9WZ4X5O Used - Like New False + 116 2023-10-25 21:14:00 A1U9HDFCZO1A84 Used - Like New False + 117 2023-10-26 04:08:00 AYUGEV9WZ4X5O Used - Like New False + 118 2023-10-27 08:14:00 A1U9HDFCZO1A84 Used - Like New False + 119 2023-10-27 12:34:00 AYUGEV9WZ4X5O Used - Like New False + Contributing ------------ Contribute to this repository by forking this repository and installing in diff --git a/src/keepa/__init__.py b/src/keepa/__init__.py index f8c6621..0d40377 100644 --- a/src/keepa/__init__.py +++ b/src/keepa/__init__.py @@ -7,6 +7,7 @@ convert_offer_history, format_items, keepa_minutes_to_time, + process_used_buybox, run_and_get, ) from keepa.plotting import plot_product # noqa: F401 diff --git a/src/keepa/interface.py b/src/keepa/interface.py index b1b4db7..78f8c38 100644 --- a/src/keepa/interface.py +++ b/src/keepa/interface.py @@ -4,6 +4,7 @@ import json import logging import time +from typing import List import aiohttp import numpy as np @@ -363,28 +364,28 @@ class Keepa: Create the api object. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) Request data from two ASINs. - >>> products = api.query(["0439064872", "1426208081"]) + >>> products = api.query(['0439064872', '1426208081']) Print item details. - >>> print("Item 1") - >>> print("\t ASIN: {:s}".format(products[0]["asin"])) - >>> print("\t Title: {:s}".format(products[0]["title"])) + >>> print('Item 1') + >>> print('\t ASIN: {:s}'.format(products[0]['asin'])) + >>> print('\t Title: {:s}'.format(products[0]['title'])) Item 1 ASIN: 0439064872 Title: Harry Potter and the Chamber of Secrets (2) Print item price. - >>> usedprice = products[0]["data"]["USED"] - >>> usedtimes = products[0]["data"]["USED_time"] - >>> print("\t Used price: ${:.2f}".format(usedprice[-1])) - >>> print("\t as of: {:s}".format(str(usedtimes[-1]))) + >>> usedprice = products[0]['data']['USED'] + >>> usedtimes = products[0]['data']['USED_time'] + >>> print('\t Used price: ${:.2f}'.format(usedprice[-1])) + >>> print('\t as of: {:s}'.format(str(usedtimes[-1]))) Used price: $0.52 as of: 2023-01-03 04:46:00 @@ -417,7 +418,7 @@ def time_to_refill(self) -> float: should be 0.0 seconds. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) >>> api.time_to_refill 0.0 @@ -509,8 +510,8 @@ def query( FR, JP, CA, CN, IT, ES, IN, MX Defaults to US. offers : int, optional - Adds available offers to product data. Default 0. Must - be between 20 and 100. + Adds available offers to product data. Default 0. Must be between + 20 and 100. Enabling this also enables the ``"buyBoxUsedHistory"``. update : int, optional if data is older than the input integer, keepa will @@ -561,12 +562,11 @@ def query( - buyBoxSellerIdHistory - all buy box fields in the statistics object - The buybox parameter - does not trigger a fresh data collection. If the offers - parameter is used the buybox parameter is ignored, as the - offers parameter also provides access to all buy box - related data. To access the statistics object the stats - parameter is required. + The buybox parameter does not trigger a fresh data collection. If + the offers parameter is used the buybox parameter is ignored, as + the offers parameter also provides access to all buy box related + data. To access the statistics object the stats parameter is + required. wait : bool, optional Wait available token before doing effective query, @@ -738,10 +738,10 @@ def query( keepa interface. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) - >>> response = api.query("B0088PUEPK") - >>> response[0]["title"] + >>> response = api.query('B0088PUEPK') + >>> response[0]['title'] 'Western Digital 1TB WD Blue PC Internal Hard Drive HDD - 7200 RPM, SATA 6 Gb/s, 64 MB Cache, 3.5" - WD10EZEX' @@ -751,15 +751,38 @@ def query( >>> import asyncio >>> import keepa >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) - ... return await api.query("B0088PUEPK") + ... return await api.query('B0088PUEPK') ... >>> response = asyncio.run(main()) - >>> response[0]["title"] + >>> response[0]['title'] 'Western Digital 1TB WD Blue PC Internal Hard Drive HDD - 7200 RPM, SATA 6 Gb/s, 64 MB Cache, 3.5" - WD10EZEX' + Load in product offers and convert the buy box data into a + ``pandas.DataFrame``. + + >>> import keepa + >>> key = '' + >>> api = keepa.Keepa(key) + >>> response = api.query('B0088PUEPK', offers=20) + >>> product = response[0] + >>> buybox_info = product['buyBoxUsedHistory'] + >>> df = keepa.process_used_buybox(buybox_info) + datetime user_id condition isFBA + 0 2022-11-02 16:46:00 A1QUAC68EAM09F Used - Like New True + 1 2022-11-13 10:36:00 A18WXU4I7YR6UA Used - Very Good False + 2 2022-11-15 23:50:00 AYUGEV9WZ4X5O Used - Like New False + 3 2022-11-17 06:16:00 A18WXU4I7YR6UA Used - Very Good False + 4 2022-11-17 10:56:00 AYUGEV9WZ4X5O Used - Like New False + .. ... ... ... ... + 115 2023-10-23 10:00:00 AYUGEV9WZ4X5O Used - Like New False + 116 2023-10-25 21:14:00 A1U9HDFCZO1A84 Used - Like New False + 117 2023-10-26 04:08:00 AYUGEV9WZ4X5O Used - Like New False + 118 2023-10-27 08:14:00 A1U9HDFCZO1A84 Used - Like New False + 119 2023-10-27 12:34:00 AYUGEV9WZ4X5O Used - Like New False + """ # Format items into numpy array try: @@ -1017,7 +1040,7 @@ def best_sellers_query(self, category, rank_avg_range=0, domain="US", wait=True) Query for the best sellers among the ``"movies"`` category. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) >>> categories = api.search_for_categories("movies") >>> category = list(categories.items())[0][0] @@ -1037,7 +1060,7 @@ def best_sellers_query(self, category, rank_avg_range=0, domain="US", wait=True) >>> import asyncio >>> import keepa >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) ... categories = await api.search_for_categories("movies") ... category = list(categories.items())[0][0] @@ -1097,11 +1120,11 @@ def search_for_categories(self, searchterm, domain="US", wait=True) -> list: Print all categories from science. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) - >>> categories = api.search_for_categories("science") + >>> categories = api.search_for_categories('science') >>> for cat_id in categories: - ... print(cat_id, categories[cat_id]["name"]) + ... print(cat_id, categories[cat_id]['name']) ... 9091159011 Behavioral Sciences 8407535011 Fantasy, Horror & Science Fiction @@ -1159,7 +1182,7 @@ def category_lookup(self, category_id, domain="US", include_parents=False, wait= Use 0 to return all root categories. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) >>> categories = api.category_lookup(0) @@ -1277,10 +1300,10 @@ def seller_query( Return the information from seller ``'A2L77EE7U53NWQ'``. >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) - >>> seller_info = api.seller_query("A2L77EE7U53NWQ", "US") - >>> seller_info["A2L77EE7U53NWQ"]["sellerName"] + >>> seller_info = api.seller_query('A2L77EE7U53NWQ', 'US') + >>> seller_info['A2L77EE7U53NWQ']['sellerName'] 'Amazon Warehouse' Notes @@ -2350,10 +2373,10 @@ def product_finder(self, product_parms, domain="US", wait=True) -> list: ``keepa.Keepa`` class. Sort by current sales >>> import keepa - >>> api = keepa.Keepa("") + >>> api = keepa.Keepa('') >>> product_parms = { - ... "author": "jim butcher", - ... "sort": ["current_SALES", "asc"], + ... 'author': 'jim butcher', + ... 'sort': ["current_SALES", "asc"], ... } >>> asins = api.product_finder(product_parms) @@ -2371,9 +2394,9 @@ def product_finder(self, product_parms, domain="US", wait=True) -> list: >>> import asyncio >>> import keepa - >>> product_parms = {"author": "jim butcher"} + >>> product_parms = {'author': 'jim butcher'} >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) ... return await api.product_finder(product_parms) ... @@ -2471,7 +2494,7 @@ def deals(self, deal_parms, domain="US", wait=True) -> dict: ``keepa.Keepa`` class >>> import keepa - >>> key = "" + >>> key = '' >>> api = keepa.Keepa(key) >>> deal_parms = { ... "page": 0, @@ -2483,7 +2506,7 @@ def deals(self, deal_parms, domain="US", wait=True) -> dict: Get the title of the first deal. - >>> deals["dr"][0]["title"] + >>> deals['dr'][0]['title'] 'Orange Cream Rooibos, Tea Bags - Vanilla, Orange | Caffeine-Free, Antioxidant-rich, Hot & Iced | The Spice Hut, First Sip Of Tea' @@ -2499,7 +2522,7 @@ def deals(self, deal_parms, domain="US", wait=True) -> dict: ... "includeCategories": [16310101], ... } >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) ... categories = await api.search_for_categories("movies") ... return await api.deals(deal_parms) @@ -2605,9 +2628,9 @@ class AsyncKeepa: >>> import asyncio >>> import keepa - >>> product_parms = {"author": "jim butcher"} + >>> product_parms = {'author': 'jim butcher'} >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) ... return await api.product_finder(product_parms) ... @@ -2627,12 +2650,12 @@ class AsyncKeepa: >>> import asyncio >>> import keepa >>> async def main(): - ... key = "" + ... key = '' ... api = await keepa.AsyncKeepa().create(key) - ... return await api.query("B0088PUEPK") + ... return await api.query('B0088PUEPK') ... >>> response = asyncio.run(main()) - >>> response[0]["title"] + >>> response[0]['title'] 'Western Digital 1TB WD Blue PC Internal Hard Drive HDD - 7200 RPM, SATA 6 Gb/s, 64 MB Cache, 3.5" - WD10EZEX' @@ -3054,6 +3077,93 @@ def convert_offer_history(csv, to_datetime=True): return times, prices +def _str_to_bool(string: str): + if string: + return bool(int(string)) + return False + + +def process_used_buybox(buybox_info: List[str]) -> pd.DataFrame: + """ + Process used buybox information to create a Pandas DataFrame. + + Parameters + ---------- + buybox_info : list of str + A list containing information about used buybox in a specific order: + [Keepa time minutes, seller id, condition, isFBA, ...] + + Returns + ------- + pd.DataFrame + A DataFrame containing four columns: + - 'datetime': Datetime objects converted from Keepa time minutes. + - 'user_id': String representing the seller ID. + - 'condition': String representing the condition of the product. + - 'isFBA': Boolean indicating whether the offer is Fulfilled by Amazon. + + Notes + ----- + The `condition` is mapped from its code to a descriptive string. + The `isFBA` field is converted to a boolean. + + Examples + -------- + Load in product offers and convert the buy box data into a + ``pandas.DataFrame``. + + >>> import keepa + >>> key = '' + >>> api = keepa.Keepa(key) + >>> response = api.query('B0088PUEPK', offers=20) + >>> product = response[0] + >>> buybox_info = product['buyBoxUsedHistory'] + >>> df = keepa.process_used_buybox(buybox_info) + datetime user_id condition isFBA + 0 2022-11-02 16:46:00 A1QUAC68EAM09F Used - Like New True + 1 2022-11-13 10:36:00 A18WXU4I7YR6UA Used - Very Good False + 2 2022-11-15 23:50:00 AYUGEV9WZ4X5O Used - Like New False + 3 2022-11-17 06:16:00 A18WXU4I7YR6UA Used - Very Good False + 4 2022-11-17 10:56:00 AYUGEV9WZ4X5O Used - Like New False + .. ... ... ... ... + 115 2023-10-23 10:00:00 AYUGEV9WZ4X5O Used - Like New False + 116 2023-10-25 21:14:00 A1U9HDFCZO1A84 Used - Like New False + 117 2023-10-26 04:08:00 AYUGEV9WZ4X5O Used - Like New False + 118 2023-10-27 08:14:00 A1U9HDFCZO1A84 Used - Like New False + 119 2023-10-27 12:34:00 AYUGEV9WZ4X5O Used - Like New False + + """ + datetime_arr = [] + user_id_arr = [] + condition_map = { + "": "Unknown", + "2": "Used - Like New", + "3": "Used - Very Good", + "4": "Used - Good", + "5": "Used - Acceptable", + } + condition_arr = [] + isFBA_arr = [] + + for i in range(0, len(buybox_info), 4): + keepa_time = int(buybox_info[i]) + datetime_arr.append(keepa_minutes_to_time([keepa_time])[0]) + user_id_arr.append(buybox_info[i + 1]) + condition_arr.append(condition_map[buybox_info[i + 2]]) + isFBA_arr.append(_str_to_bool(buybox_info[i + 3])) + + df = pd.DataFrame( + { + 'datetime': datetime_arr, + 'user_id': user_id_arr, + 'condition': condition_arr, + 'isFBA': isFBA_arr, + } + ) + + return df + + def keepa_minutes_to_time(minutes, to_datetime=True): """Accept an array or list of minutes and converts it to a numpy datetime array. diff --git a/tests/test_async_interface.py b/tests/test_async_interface.py index 2b8397d..efc7244 100644 --- a/tests/test_async_interface.py +++ b/tests/test_async_interface.py @@ -28,6 +28,7 @@ # The Great Gatsby: The Original 1925 Edition (F. Scott Fitzgerald Classics) PRODUCT_ASIN = "B09X6JCFF5" +HARD_DRIVE_PRODUCT_ASIN = "B0088PUEPK" # ASINs of a bunch of chairs # categories = API.search_for_categories('chairs') @@ -258,6 +259,13 @@ async def test_bestsellers(api): assert len(asins) == valid_asins.size +@pytest.mark.asyncio +async def test_buybox_used(api): + request = await api.query(HARD_DRIVE_PRODUCT_ASIN, history=False, offers=20) + df = keepa.process_used_buybox(request[0]['buyBoxUsedHistory']) + assert isinstance(df, pd.DataFrame) + + @pytest.mark.asyncio async def test_categories(api): categories = await api.search_for_categories("chairs") diff --git a/tests/test_interface.py b/tests/test_interface.py index 6da1a4c..c9853a8 100644 --- a/tests/test_interface.py +++ b/tests/test_interface.py @@ -31,7 +31,7 @@ # The Great Gatsby: The Original 1925 Edition (F. Scott Fitzgerald Classics) PRODUCT_ASIN = "B09X6JCFF5" - +HARD_DRIVE_PRODUCT_ASIN = "B0088PUEPK" # ASINs of a bunch of chairs generated with # categories = API.search_for_categories('chairs') @@ -329,6 +329,12 @@ def test_bestsellers(api): assert len(asins) == valid_asins.size +def test_buybox_used(api): + request = api.query(HARD_DRIVE_PRODUCT_ASIN, history=False, offers=20) + df = keepa.process_used_buybox(request[0]['buyBoxUsedHistory']) + assert isinstance(df, pd.DataFrame) + + def test_categories(api): categories = api.search_for_categories("chairs") catids = list(categories.keys())