-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored pdfreader to be more extensible and added a mercurycards importer which uses the pdfreader to read credit card transactions.
- Loading branch information
1 parent
738b0ea
commit ad320e0
Showing
10 changed files
with
454 additions
and
94 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
163 changes: 163 additions & 0 deletions
163
beancount_reds_importers/importers/mercurycards/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
"""Mercury Cards pdf importer for beancount.""" | ||
|
||
import re | ||
from datetime import datetime | ||
|
||
import petl as etl | ||
|
||
from beancount_reds_importers.libreader import pdfreader | ||
from beancount_reds_importers.libtransactionbuilder import banking | ||
|
||
|
||
class Importer(banking.Importer, pdfreader.Importer): | ||
IMPORTER_NAME = "Mercury Cards" | ||
|
||
def custom_init(self): | ||
if not self.custom_init_run: | ||
self.max_rounding_error = 0.04 | ||
self.filename_pattern_def = "Mercury Statement *.pdf" | ||
self.pdf_table_extraction_settings = { | ||
"vertical_strategy": "text", | ||
"horizontal_strategy": "text", | ||
} | ||
self.pdf_table_extraction_crop = (0, 0, 0, 0) | ||
self.pdf_table_title_height = 0 | ||
self.pdf_page_break_top = 0 | ||
self.date_format = "%m/%d/%Y" | ||
self.transaction_table_section = "table_1" | ||
self.meta_text = "" | ||
self.skip_transaction_types = {} | ||
self.header_map = { | ||
"Post Date": "settleDate", | ||
"Trans Date": "date", | ||
"Description": "memo", | ||
"Reference": "reference", | ||
"Amount": "amount", | ||
} | ||
|
||
# payee and narration are swapped | ||
# We need to swap them back. See banking.py | ||
self.get_payee = lambda ot: ot.memo | ||
self.get_narration = lambda ot: None # setting to none to use smart importer | ||
|
||
self.debug = True | ||
self.custom_init_run = True | ||
|
||
def file_date(self, file): | ||
if not self.file_read_done: | ||
self.read_file(file) | ||
|
||
return self.get_closing_date() | ||
|
||
def get_closing_date(self): | ||
if self.meta_text == "": | ||
raise ValueError("No meta_text has been found") | ||
|
||
# Pattern to match "Closing Date" followed by a date in mm/dd/yyyy format | ||
pattern = r"Closing Date\s+(\d{2}/\d{2}/\d{4})" | ||
|
||
# Search for all matches in self.meta_text | ||
matches = re.findall(pattern, self.meta_text) | ||
|
||
date_string = matches[0] | ||
date_format = "%m/%d/%Y" | ||
datetime_object = datetime.strptime(date_string, date_format) | ||
|
||
return datetime_object | ||
|
||
def get_adjusted_crop(self, page_idx, page): | ||
"""Dynamically find the crop positon based on the position of text found on the page.""" | ||
adjusted_crop = (0, 0, 1, 1) | ||
table_start_search_text = "TRANSACTIONS" | ||
table_start_search_results = page.search(table_start_search_text) | ||
if table_start_search_results: | ||
table_start = table_start_search_results[0] | ||
table_start_x = table_start["x0"] - 30 | ||
table_start_y = table_start["bottom"] + 50 | ||
|
||
table_end_search_text = "YEAR-TO-DATE" | ||
table_end_search_results = page.search(table_end_search_text) | ||
|
||
if table_end_search_results: | ||
table_end = table_end_search_results[0] | ||
table_end_y = table_end["top"] - 10 | ||
else: | ||
table_end_y = page.bbox[3] # if no end text is found use the whole page | ||
|
||
adjusted_crop = ( | ||
(table_start_x), | ||
(table_start_y), | ||
(page.bbox[2]), | ||
(table_end_y), | ||
) | ||
return adjusted_crop | ||
|
||
def fix_years(self, table): | ||
""" | ||
Determine the correct year for the given date string (MM/DD format). | ||
""" | ||
|
||
def get_year(d): | ||
# Get the current year | ||
current_year = self.get_closing_date().year | ||
|
||
return f"{d}/{current_year}" | ||
|
||
date_headers = ["Post Date", "Trans Date"] | ||
for i in date_headers: | ||
if i in table.header(): | ||
table = table.convert(i, lambda d: get_year(d)) | ||
|
||
return table | ||
|
||
def prepare_tables(self): | ||
"""Make final adjustments to tables before processing by the transaction builder.""" | ||
for section, table in self.alltables.items(): | ||
# set table headers. table was goofy, so they had to be croped out | ||
headers = [ | ||
"Post Date", | ||
"Trans Date", | ||
"Description", | ||
"City", | ||
"State", | ||
"Reference", | ||
"Amount", | ||
] | ||
table = etl.wrap(etl.pushheader(table, headers)) | ||
|
||
# add year to mm/dd formatted date | ||
table = self.fix_years(table) | ||
|
||
table = table.rename(self.header_map) | ||
table = self.convert_columns(table) | ||
|
||
# the amounts should be negative since they're charges | ||
table = etl.convert(table, "amount", lambda a: a * -1) | ||
|
||
table = self.fix_column_names(table) | ||
table = self.prepare_processed_table( | ||
table | ||
) # override this to make additonal adjustments | ||
|
||
self.alltables[section] = table | ||
|
||
self.combine_tables() | ||
return | ||
|
||
def combine_tables(self): | ||
# Initialize an empty table | ||
combined_table = None | ||
|
||
for section, table in self.alltables.items(): | ||
# Convert each table to a petl table | ||
petl_table = etl.wrap(table) | ||
|
||
# Combine tables | ||
if combined_table is None: | ||
combined_table = petl_table # First table initializes the combined table | ||
else: | ||
combined_table = etl.cat( | ||
combined_table, petl_table | ||
) # Concatenate additional tables | ||
|
||
return combined_table |
Binary file added
BIN
+296 KB
beancount_reds_importers/importers/mercurycards/tests/mercury_statement_20241105.pdf
Binary file not shown.
120 changes: 120 additions & 0 deletions
120
beancount_reds_importers/importers/mercurycards/tests/mercury_statement_20241105.pdf.extract
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
|
||
2024-10-05 * "Whole Foods" | ||
Liabilities:Credit-Cards:Mercury -15.01 USD | ||
|
||
2024-10-06 * "Car Wash" | ||
Liabilities:Credit-Cards:Mercury -35.30 USD | ||
|
||
2024-10-07 * "Taco Bell" | ||
Liabilities:Credit-Cards:Mercury -22.76 USD | ||
|
||
2024-10-07 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -10.11 USD | ||
|
||
2024-10-08 * "Papa John's" | ||
Liabilities:Credit-Cards:Mercury -30.16 USD | ||
|
||
2024-10-10 * "Paypal" | ||
Liabilities:Credit-Cards:Mercury -33.97 USD | ||
|
||
2024-10-11 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -101.55 USD | ||
|
||
2024-10-11 * "Wm Supercenter" | ||
Liabilities:Credit-Cards:Mercury -53.44 USD | ||
|
||
2024-10-12 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -204.32 USD | ||
|
||
2024-10-12 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -4.90 USD | ||
|
||
2024-10-12 * "Target" | ||
Liabilities:Credit-Cards:Mercury -10.90 USD | ||
|
||
2024-10-13 * "Bp" | ||
Liabilities:Credit-Cards:Mercury -106.95 USD | ||
|
||
2024-10-14 * "Circle K" | ||
Liabilities:Credit-Cards:Mercury -50.69 USD | ||
|
||
2024-10-15 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -10.11 USD | ||
|
||
2024-10-15 * "Doordash" | ||
Liabilities:Credit-Cards:Mercury -73.82 USD | ||
|
||
2024-10-16 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -10.68 USD | ||
|
||
2024-10-16 * "7-Eleven" | ||
Liabilities:Credit-Cards:Mercury -10.11 USD | ||
|
||
2024-10-17 * "Advance Auto Parts" | ||
Liabilities:Credit-Cards:Mercury -13.86 USD | ||
|
||
2024-10-18 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -15.11 USD | ||
|
||
2024-10-18 * "Papa John's" | ||
Liabilities:Credit-Cards:Mercury -31.29 USD | ||
|
||
2024-10-18 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -12.62 USD | ||
|
||
2024-10-19 * "Amazon Prime" | ||
Liabilities:Credit-Cards:Mercury -23.52 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -30.46 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -44.21 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -23.57 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -22.24 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -69.23 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -60.00 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -57.07 USD | ||
|
||
2024-10-19 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -224.05 USD | ||
|
||
2024-10-20 * "Whole Foods" | ||
Liabilities:Credit-Cards:Mercury -6.39 USD | ||
|
||
2024-10-21 * "Doordash" | ||
Liabilities:Credit-Cards:Mercury -79.56 USD | ||
|
||
2024-10-21 * "Amazon" | ||
Liabilities:Credit-Cards:Mercury -20.24 USD | ||
|
||
2024-10-22 * "Papa John's" | ||
Liabilities:Credit-Cards:Mercury -37.85 USD | ||
|
||
2024-10-22 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -10.11 USD | ||
|
||
2024-10-22 * "Racetrac" | ||
Liabilities:Credit-Cards:Mercury -32.90 USD | ||
|
||
2024-10-23 * "Mcdonald's" | ||
Liabilities:Credit-Cards:Mercury -10.11 USD | ||
|
||
2024-10-24 * "Doordash" | ||
Liabilities:Credit-Cards:Mercury -28.41 USD | ||
|
||
2024-10-24 * "Doordash" | ||
Liabilities:Credit-Cards:Mercury -40.83 USD | ||
|
||
2024-10-25 * "Doordash" | ||
Liabilities:Credit-Cards:Mercury -68.35 USD |
1 change: 1 addition & 0 deletions
1
...t_reds_importers/importers/mercurycards/tests/mercury_statement_20241105.pdf.file_account
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Liabilities:Credit-Cards:Mercury |
1 change: 1 addition & 0 deletions
1
...ount_reds_importers/importers/mercurycards/tests/mercury_statement_20241105.pdf.file_date
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
2024-11-05T00:00:00 |
1 change: 1 addition & 0 deletions
1
...ount_reds_importers/importers/mercurycards/tests/mercury_statement_20241105.pdf.file_name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
mercury_statement_20241105.pdf |
21 changes: 21 additions & 0 deletions
21
beancount_reds_importers/importers/mercurycards/tests/mercurycards_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from os import path | ||
|
||
from beancount.ingest import regression_pytest as regtest | ||
|
||
from beancount_reds_importers.importers import mercurycards | ||
|
||
|
||
@regtest.with_importer( | ||
mercurycards.Importer( | ||
{ | ||
"main_account": "Liabilities:Credit-Cards:Mercury", | ||
"emit_filing_account_metadata": False, | ||
"filename_pattern": "mercury_statement_20241105.pdf", | ||
"skip_transaction_types": {}, | ||
"currency": "USD", | ||
} | ||
) | ||
) | ||
@regtest.with_testdir(path.dirname(__file__)) | ||
class TestMercuryCards(regtest.ImporterTestBase): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.