Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an importer for Vanguard 529 CSV data #83

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions beancount_reds_importers/importers/schwab/schwab_csv_balances.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,17 @@ def file_date(self, file):
def get_max_transaction_date(self):
return self.date.date()

def prepare_processed_table(self, rdr):
rdr = rdr.cut('memo', 'security', 'units', 'unit_price')
rdr = rdr.selectne('memo', '--') # we don't need total rows
rdr = rdr.addfield('date', self.date)
return rdr

def prepare_tables(self):
# first row has date
d = self.raw_rdr[0][0].rsplit(" ", 1)[1]
self.date = datetime.datetime.strptime(d, self.date_format)

for section, table in self.alltables.items():
if section in self.config["section_headers"]:
table = table.rename(self.header_map)
table = self.convert_columns(table)
table = table.cut("memo", "security", "units", "unit_price")
table = table.selectne("memo", "--") # we don't need total rows
table = table.addfield("date", self.date)
self.alltables[section] = table

def get_balance_positions(self):
for section in self.config["section_headers"]:
yield from self.alltables[section].namedtuples()
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from os import path
from beancount.ingest import regression_pytest as regtest
from beancount_reds_importers.importers.schwab import schwab_csv_balances


fund_data = [
('MMM', '123', '3M INC'),
('BND', '789', 'Vanguard Total Bond Market Index Fund'),
('PP', '456', 'PIED PIPER INC'),
('VMMX', '789', 'VANGUARD MONEY MARKET'),
('VMMX2', '901', 'VANGUARD MONEY MARKET MONEY SHARES'),
('HOOLI', '234', 'HOOLI MONEY MARKET'),
]

# list of money_market accounts. These will not be held at cost, and instead will use price conversions
money_market = ['VMMX', 'VMMX2', 'HOOLI']

fund_info = {
'fund_data': fund_data,
'money_market': money_market,
}


def build_config():
acct = "Assets:Investments:Schwab"
root = 'Investments'
taxability = 'Taxable'
leaf = 'Schwab'
currency = 'USD'
config = {
'account_number': 9876,
'main_account': acct + ':{ticker}',
'cash_account': f'{acct}:{{currency}}',
'transfer': ' Assets:Zero-Sum-Accounts:Transfers:Bank-Account',
'dividends': f'Income:{root}:{taxability}:Dividends:{leaf}:{{ticker}}',
'interest': f'Income:{root}:{taxability}:Interest:{leaf}:{{ticker}}',
'cg': f'Income:{root}:{taxability}:Capital-Gains:{leaf}:{{ticker}}',
'capgainsd_lt': f'Income:{root}:{taxability}:Capital-Gains-Distributions:Long:{leaf}:{{ticker}}',
'capgainsd_st': f'Income:{root}:{taxability}:Capital-Gains-Distributions:Short:{leaf}:{{ticker}}',
'fees': f'Expenses:Fees-and-Charges:Brokerage-Fees:{taxability}:{leaf}',
'invexpense': f'Expenses:Expenses:Investment-Expenses:{taxability}:{leaf}',
'rounding_error': 'Equity:Rounding-Errors:Imports',
'fund_info': fund_info,
'currency': currency,
'section_headers': ['Stocks', 'Bonds', 'Money Market']
}
return config


@regtest.with_importer(
schwab_csv_balances.Importer(
build_config()
)
)
@regtest.with_testdir(path.dirname(__file__))
class TestSchwabCSV(regtest.ImporterTestBase):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"Balances for account General Investing ...XXX as of 05/03/2023"
"Stocks"
"Symbol","Description","Quantity","Price"
"MMM","3M INC","656","$16,516.92"

"Bonds"
"Symbol","Description","Quantity","Price"
"BND","VANGUARD TOTAL BOND MARKET ETF","45","$3320.05"

"Ignored"
"Symbol","Description","Quantity","Price"
"PP","PIED PIPER INC","62","$51.95"

"Money Market"
"Symbol","Description","Quantity","Price"
"VMMX","VANGUARD MONEY MARKET","6225","$6225"
"VMMX2","VANGUARD MONEY MARKET MONEY SHARES","129","$5952"
"HOOLI","HOOLI MONEY MARKET","4591","$4591"
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
2023-05-03 price MMM 16516.92 USD
2023-05-03 price BND 3320.05 USD
2023-05-03 price VMMX 6225 USD
2023-05-03 price VMMX2 5952 USD
2023-05-03 price HOOLI 4591 USD

2023-05-04 balance Assets:Investments:Schwab:MMM 656 MMM
2023-05-04 balance Assets:Investments:Schwab:BND 45 BND
2023-05-04 balance Assets:Investments:Schwab:VMMX 6225 VMMX
2023-05-04 balance Assets:Investments:Schwab:VMMX2 129 VMMX2
2023-05-04 balance Assets:Investments:Schwab:HOOLI 4591 HOOLI
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Investments:Schwab
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2023-05-03
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
schwab_csv_brokerage_Balances_123.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Fund Account Number,Fund Name,Price,Shares,Total Value
535672845-01,Vanguard Target Enrollment 2040/2041 Portfolio,$9.45,348.5265,$3293.58

Account Number,Trade Date,Process Date,Transaction Type,Transaction Description,Investment Name,Share Price,Shares,Gross Amount,Net Amount
535672845-01,06/30/2023,06/30/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$9.51,26.2881,$250,$250
535672845-01,05/31/2023,05/31/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$9.01,27.7469,$250,$250
535672845-01,04/28/2023,04/28/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$9.11,27.4424,$250,$250
535672845-01,03/31/2023,03/31/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$9,27.7778,$250,$250
535672845-01,02/28/2023,02/28/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$8.76,28.5388,$250,$250
535672845-01,02/17/2023,02/17/2023,Contribution EBT,Contribution EBT,Vanguard Target Enrollment 2040/2041 Portfolio,$8.99,22.2469,$200,$200
535672845-01,01/31/2023,01/31/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$9.03,27.6855,$250,$250
535672845-01,01/03/2023,01/03/2023,Contribution AIP,Contribution AIP,Vanguard Target Enrollment 2040/2041 Portfolio,$8.42,29.6912,$250,$250
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

2023-01-03 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 29.6912 VTE2040 {8.42 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-01-31 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 27.6855 VTE2040 {9.03 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-02-17 * "Contribution EBT" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 22.2469 VTE2040 {8.99 USD}
Assets:Vanguard:529:Cash -200.00 USD

2023-02-28 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 28.5388 VTE2040 {8.76 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-03-31 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 27.7778 VTE2040 {9.00 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-04-28 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 27.4424 VTE2040 {9.11 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-05-31 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 27.7469 VTE2040 {9.01 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-06-30 * "Contribution AIP" "[VTE2040] Vanguard Target Enrollment 2040/2041 Portfolio"
Assets:Vanguard:529:VTE2040 26.2881 VTE2040 {9.51 USD}
Assets:Vanguard:529:Cash -250.00 USD

2023-06-30 price VTE2040 9.45 USD

2023-07-01 balance Assets:Vanguard:529:VTE2040 348.5265 VTE2040
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Assets:Vanguard:529
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2023-09-10
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ofxdownload_09102023.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from os import path
from beancount.ingest import regression_pytest as regtest
from beancount_reds_importers.importers.vanguard import vanguard_529


@regtest.with_importer(
vanguard_529.Importer(
{
"account_number": "535672845-01",
"main_account": "Assets:Vanguard:529:{ticker}",
"cash_account": "Assets:Vanguard:529:Cash",
"dividends": "Income:Dividends:Vanguard:529:{ticker}",
"interest": "Income:Interest:Vanguard:529:{ticker}",
"cg": "Income:CapitalGains:529:{ticker}",
"capgainsd_lt": "Income:CapitalGains:Long:Vanguard:529:{ticker}",
"capgainsd_st": "Income:CapitalGains:Short:Vanguard:529:{ticker}",
"fees": "Expenses:Fees:Vanguard:529",
"invexpense": "Expenses:Expenses:Vanguard:529",
"rounding_error": "Equity:Rounding-Errors:Imports",
"fund_info": {
"fund_data": [
('VTE2040', '00000000', 'Vanguard Target Enrollment 2040/2041 Portfolio'),
],
"money_market": [],
},
"currency": 'USD',
}
)
)
@regtest.with_testdir(path.dirname(__file__))
class TestVanguard529(regtest.ImporterTestBase):
pass
88 changes: 88 additions & 0 deletions beancount_reds_importers/importers/vanguard/vanguard_529.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
""" Vanguard 529 csv importer."""

import re
from datetime import datetime

from beancount_reds_importers.libreader import csv_multitable_reader
from beancount_reds_importers.libtransactionbuilder import investments


class Importer(investments.Importer, csv_multitable_reader.Importer):
IMPORTER_NAME = 'Vanguard 529'

def custom_init(self):
self.max_rounding_error = 0.04
# Vanguard only gives a csv download option for 529 accounts, but they name it "ofxdownload" to tease you
self.filename_pattern_def = '.*ofxdownload.*'
self.header_identifier = 'Fund Account Number,Fund Name,Price,Shares,Total Value.*'
self.get_ticker_info = self.get_ticker_info_from_id
self.date_format = '%m/%d/%Y'
self.funds_db_txt = 'funds_by_ticker'
self.header_map = {
"Process Date": 'date',
"Trade Date": 'tradeDate',
"Transaction Type": 'type',
"Transaction Description": 'memo',
"Shares": 'units',
"Share Price": 'unit_price',
"Gross Amount": 'amount',
"Net Amount": 'total',
"Price": 'unit_price',
}
self.transaction_type_map = {
'Contribution AIP': 'buystock',
'Contribution EBT': 'buystock',
}
self.skip_transaction_types = []
self.section_titles_are_headers = True
self.config['add_currency_precision'] = self.config.get('add_currency_precision', True)

def deep_identify(self, file):
account_number = self.config.get('account_number', '')
return super().deep_identify(file) and account_number in file.head()

def file_date(self, file):
date = None
# Use the date in the file name. If that doesn't exist, fall back to the maximum date we found in the transactions
match = re.search(r'\d{8}', file.name)
if match:
date_str = match.group()
date = datetime.strptime(date_str, "%m%d%Y").date()
else:
date = self.maxdate
return date

def prepare_tables(self):
ticker_by_desc = {desc: ticker for ticker, _, desc in self.fund_data}

alltables = {}
self.maxdate = None
for section, table in self.alltables.items():
if section == 'Fund Account Number':
section = 'Balance Positions'
table = table.addfield('security', lambda x: ticker_by_desc.get(x['Fund Name'], x['Fund Name']))
# We need to add a date field but we can't do that yet because we need to make sure
# the transactions section has been processed and set
elif section == 'Account Number':
section = 'Transactions'
table = table.addfield('security', lambda x: ticker_by_desc.get(x['Investment Name'], x['Investment Name']))
# We have to do our own finding of the max date because the table data hasn't been cleaned up yet
self.maxdate = max(datetime.strptime(d[0], self.date_format)
for d in table.cut('Trade Date').rename('Trade Date', 'date').namedtuples()) \
.date().strftime(self.date_format)

alltables[section] = table
self.alltables = alltables

self.alltables['Balance Positions'] = self.alltables['Balance Positions'].addfield('date', self.maxdate)

def is_section_title(self, row):
if len(row) == 0:
return False
return row[0] == 'Fund Account Number' or row[0] == 'Account Number'

def get_transactions(self):
yield from self.alltables['Transactions'].namedtuples()

def get_balance_positions(self):
yield from self.alltables['Balance Positions'].namedtuples()
16 changes: 11 additions & 5 deletions beancount_reds_importers/libreader/csv_multitable_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def file_date(self, file):
raise "Not yet implemented"
pass

def convert_columns(self, rdr):
pass

def is_section_title(self, row):
# Match against rows that contain section titles. Eg: 'section1', 'section2', ...
return len(row) == 1
Expand All @@ -59,6 +56,10 @@ def read_file(self, file):

self.raw_rdr = rdr = self.read_raw(file)

skip_offset = 1
if getattr(self, 'section_titles_are_headers', False):
skip_offset = 0

rdr = rdr.skip(getattr(self, "skip_head_rows", 0)) # chop unwanted file header rows
rdr = rdr.head(
len(rdr) - getattr(self, "skip_tail_rows", 0) - 1
Expand All @@ -77,8 +78,8 @@ def read_file(self, file):
for s, e in table_indexes:
if s == e:
continue
table = rdr.skip(s + 1) # skip past start index and header row
table = table.head(e - s - 1) # chop lines after table section data
table = rdr.skip(s + skip_offset) # skip past start index and header row
table = table.head(e - s - skip_offset) # chop lines after table section data
self.alltables[rdr[s][0]] = table

for section, table in self.alltables.items():
Expand All @@ -87,6 +88,11 @@ def read_file(self, file):
self.alltables[section] = table

self.prepare_tables() # to be overridden by importer

for section, table in self.alltables.items():
table = self.process_table(table)
self.alltables[section] = table

self.file_read_done = True

def get_transactions(self):
Expand Down
26 changes: 21 additions & 5 deletions beancount_reds_importers/libreader/csvreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,12 @@ def convert_columns(self, rdr):

# fixup currencies
def remove_non_numeric(x):
return re.sub(r"[^0-9\.-]", "", str(x).strip()) # noqa: W605
return re.sub(r'[^0-9\.-]', "", str(x).strip()) # noqa: W605

def add_decimal(x):
if '.' not in x:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the previous comment: the . character varies internationally.

return x+".00"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For international compatibility, it would be best to use locale to print this, since decimal separator characters vary based on one's region. You may already be familiar with it, but if not, a bit of digging into this might provide a good solution. In particular, the setting of locale needs to be figured out. I'd imagine Beancount does this already, so looking at what it does might provide the right solution here. Thoughts?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may also consider having add_currency_precision be False or an integer, which would specify the number of zeros to add.

return x

currencies = getattr(self, "currency_fields", []) + [
"unit_price",
Expand All @@ -121,9 +126,12 @@ def remove_non_numeric(x):
"amount",
"balance",
]

for i in currencies:
if i in rdr.header():
rdr = rdr.convert(i, remove_non_numeric)
if self.config.get('add_currency_precision', False):
rdr = rdr.convert(i, add_decimal)
rdr = rdr.convert(i, D)

# fixup dates
Expand Down Expand Up @@ -201,14 +209,22 @@ def read_file(self, file):
rdr = self.prepare_table(rdr)

# process table
rdr = rdr.rename(self.header_map)
rdr = self.convert_columns(rdr)
rdr = self.fix_column_names(rdr)
rdr = self.prepare_processed_table(rdr)
rdr = self.process_table(rdr)
self.rdr = rdr
self.ifile = file
self.file_read_done = True

def process_table(self, rdr):
# Filter out any header mappings that don't exist in this table, since petl doesn't do this for us
# and will complain if we try to rename a header that doesn't exist
existing_headers = {key: value for key, value in self.header_map.items() if key in rdr.header()}
rdr = rdr.rename(existing_headers)

rdr = self.convert_columns(rdr)
rdr = self.fix_column_names(rdr)
rdr = self.prepare_processed_table(rdr)
return rdr

def get_transactions(self):
for ot in self.rdr.namedtuples():
if self.skip_transaction(ot):
Expand Down
Loading