From af9030e6ec52460420389b0c7ad93268b76437a9 Mon Sep 17 00:00:00 2001
From: Milind Kamble <milindkamble@yahoo.com>
Date: Fri, 24 Aug 2018 10:42:47 -0500
Subject: [PATCH 1/3] Generic ofx2dataframe converter capable of handling
 multiple inputs

Created unittest for the same.
Modified utils/ofx2xlsx.py script to use the converter and support
output in csv or xlsx format
---
 ofxparse/ofxtodataframe.py | 44 +++++++++++++++++++++
 tests/test_parse.py        | 17 +++++++-
 utils/ofx2xlsx.py          | 79 ++++++++++++++++++--------------------
 3 files changed, 97 insertions(+), 43 deletions(-)
 create mode 100644 ofxparse/ofxtodataframe.py

diff --git a/ofxparse/ofxtodataframe.py b/ofxparse/ofxtodataframe.py
new file mode 100644
index 0000000..04b44dd
--- /dev/null
+++ b/ofxparse/ofxtodataframe.py
@@ -0,0 +1,44 @@
+from ofxparse import OfxParser
+import pandas as pd
+import codecs
+import os.path as path
+
+# fields of transactions are auto extracted using dir(transactiontype)-{attributes starting with '_'}
+
+def ofx_to_dataframe(files, id_len=24):
+    collected_df={}
+    if type(files) is str:
+        files = [files]
+    assert(isinstance(files, list))
+    for fname in files:
+        data = {}
+        with codecs.open(fname) as fileobj:
+            ofx = OfxParser.parse(fileobj)
+        # it seems one ofx file contains only one securities list. Create a mapping from ID to ticker
+        security_map = {x.uniqueid : x.ticker for x in ofx.security_list}
+        # different transaction types have different fields. So we create df for each txn_type
+        # and append the contents of each txn into appropriate df
+        for account in ofx.accounts:
+            for transaction in account.statement.transactions:
+                txn_type = type(transaction).__name__
+                if not txn_type in data:
+                    fields = [x for x in dir(transaction) if not x.startswith('_')]
+                    data[txn_type] = pd.DataFrame(columns=fields)
+                df = data[txn_type]
+                fields = set(df.columns)
+                sr = pd.Series([getattr(transaction,f) for f in fields], index=fields)
+                data[txn_type] = df.append(sr, ignore_index=True)
+        # add fname, acctnum common info into each df. Truncate ID if needed
+        for key,df in data.items():
+            df['fname'] = path.basename(fname)
+            df['id'] = df['id'].str[:id_len]  # clip the last part of the ID which changes from download to download
+            df['acctnum']=account.number
+            if 'security' in df.columns:
+                df['security'] = df['security'].apply(lambda x: security_map[x])
+            if 'AGGREGATE_TYPES' in df.columns :
+                del df['AGGREGATE_TYPES']
+            if key in collected_df:
+                collected_df[key] = collected_df[key].append(df, ignore_index=True)
+            else:
+                collected_df[key] = df
+    return collected_df
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 78bd779..dc9dd78 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -12,7 +12,8 @@
 from .support import open_file
 from ofxparse import OfxParser, AccountType, Account, Statement, Transaction
 from ofxparse.ofxparse import OfxFile, OfxPreprocessedFile, OfxParserException, soup_maker
-
+from ofxparse.ofxtodataframe import ofx_to_dataframe
+import glob
 
 class TestOfxFile(TestCase):
     OfxFileCls = OfxFile
@@ -1049,6 +1050,20 @@ def testFailure(self):
         self.assertEqual(ofx.signon.severity, 'ERROR')
         self.assertEqual(ofx.signon.message, 'Your request could not be processed because you supplied an invalid identification code or your password was incorrect')
 
+class TestOfxToDataFrame(TestCase):
+    def testSingleFile(self):
+        dfs = ofx_to_dataframe('tests/fixtures/fidelity.ofx')
+        self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
+        self.assertEqual(len(dfs['InvestmentTransaction']), 14)
+        self.assertEqual(len(dfs['Transaction']), 3)
+
+    def testMultipleFiles(self):
+        dfs = ofx_to_dataframe(['tests/fixtures/fidelity.ofx', 'tests/fixtures/investment_401k.ofx'])
+        self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
+        self.assertEqual(len(dfs['InvestmentTransaction']), 17)
+        self.assertEqual(len(dfs['Transaction']), 3)
+
+
 if __name__ == "__main__":
     import unittest
     unittest.main()
diff --git a/utils/ofx2xlsx.py b/utils/ofx2xlsx.py
index 32b19c6..05d7515 100644
--- a/utils/ofx2xlsx.py
+++ b/utils/ofx2xlsx.py
@@ -1,57 +1,51 @@
-from ofxparse import OfxParser
+from ofxparse.ofxtodataframe import ofx_to_dataframe
 import pandas as pd
+from pandas import ExcelWriter
 
 import argparse
 
-# TODO automatically extract from transactions
-fields = ['id','type', 'date', 'memo', 'payee', 'amount', 'checknum', 'mcc']
-
+# ToDo: Remove duplicate transactions from different files
 parser = argparse.ArgumentParser(description='Convert multiple .qfx or .ofx to'
-                                             ' .xlsx.\n'
-                                             'Remove duplicate transactions '
-                                             'from different files.\n'
-                                             'use fixed columns:'
-                                             ' %s'%', '.join(fields))
+                                             ' .xlsx or csv.\n')
 parser.add_argument('files', metavar='*.ofx *.qfx', type=str, nargs='+',
-                   help='.qfx or .ofx file names')
-parser.add_argument('--start', type=str, metavar='2014-01-01',
-                    default='2014-01-01',
-                   help="Don't take transaction before this date")
-parser.add_argument('--end', type=str, metavar='2014-12-31',
-                    default='2014-12-31',
+help='.qfx or .ofx file names')
+parser.add_argument('--start', type=str, metavar='1700-01-01',
+                    default='1700-01-01',
+                    help="Don't take transaction before this date")
+parser.add_argument('--end', type=str, metavar='3000-12-31',
+                    default='3000-12-31',
                     help="Don't take transaction after this date")
-parser.add_argument('--output', metavar='output.xlsx', type=str,
-                    default='output.xlsx', help='Were to store the xlsx')
+parser.add_argument('-o', '--output', metavar='output.csv', type=str,
+                    default='output.csv', help='Were to store the output. Extension determines output format')
 parser.add_argument('--id-length', metavar='24', type=int, default=24,
-                   help='Truncate the number of digits in a transaction ID.'
-                        ' This is important because this program remove'
-                        ' transactions with duplicate IDs (after verifing'
-                        ' that they are identical.'
-                        ' If you feel unsafe then use a large number but'
-                        'usually the last digits of the transaction ID are'
-                        'running numbers which change from download to download'
-                        ' as a result you will have duplicate transactions'
-                        ' unless you truncate the ID.')
+                    help='Truncate the number of digits in a transaction ID.'
+                    ' This is important because this program remove'
+                    ' transactions with duplicate IDs (after verifing'
+                    ' that they are identical.'
+                    ' If you feel unsafe then use a large number but'
+                    'usually the last digits of the transaction ID are'
+                    'running numbers which change from download to download'
+                    ' as a result you will have duplicate transactions'
+                    ' unless you truncate the ID.')
 
 
 args = parser.parse_args()
 
-
-data = {}
-for fname in args.files:
-    ofx = OfxParser.parse(file(fname))
-    for account in ofx.accounts:
-        df = data.get(account.number, pd.DataFrame(columns=fields+['fname']))
-        for transaction in account.statement.transactions:
-            s = pd.Series([getattr(transaction,f) for f in fields], index=fields)
-            s['fname'] = fname.split('/')[-1]
-            df = df.append(s, ignore_index=True)
-        df['id'] = df['id'].str[:args.id_length]  # clip the last part of the ID which changes from download to download
-        data[account.number] = df
-
-print "Writing result to", args.output
-writer = pd.ExcelWriter(args.output)
-
+data = ofx_to_dataframe(args.files)
+
+if 'csv' in args.output:
+    outstring = ""
+    for key,df in data.items():
+        outstring += "##### %s".format(key) + df.to_csv(None, index=False, header=True)
+    with open(args.output, 'w') as fileobj:
+        print(outstring, file=fileobj)
+elif 'xlsx' in args.output:
+    writer = pd.ExcelWriter(args.output)
+    for key,df in data.items():
+        df.to_excel(writer, sheet_name=key)
+    writer.save()
+
+__dev_notes__ = '''
 for account_number, df in data.iteritems():
     # A transaction is identified using all `fields`
     # collapse all repeated transactions from the same file into one row
@@ -88,3 +82,4 @@
     df2.to_excel(writer, account_number, index=False)
 
 writer.save()
+'''

From e94fb136751990fa0d2751343146abed88c531c6 Mon Sep 17 00:00:00 2001
From: Milind Kamble <milindkamble@yahoo.com>
Date: Sun, 16 Sep 2018 23:42:11 -0500
Subject: [PATCH 2/3] Created Cash 'Position' for regular (i.e. banking)
 statement Thus investment securities and cash balance positions are
 consolidated into 'Positions' dataframe

Fxed bug associated with attaching acctname to transactions.
---
 ofxparse/ofxtodataframe.py | 47 +++++++++++++++++++++++++++-----------
 utils/ofx2xlsx.py          | 17 ++++++++++----
 2 files changed, 47 insertions(+), 17 deletions(-)
 mode change 100644 => 100755 utils/ofx2xlsx.py

diff --git a/ofxparse/ofxtodataframe.py b/ofxparse/ofxtodataframe.py
index 04b44dd..39682ae 100644
--- a/ofxparse/ofxtodataframe.py
+++ b/ofxparse/ofxtodataframe.py
@@ -2,24 +2,28 @@
 import pandas as pd
 import codecs
 import os.path as path
+import sys, warnings
+import decimal
 
 # fields of transactions are auto extracted using dir(transactiontype)-{attributes starting with '_'}
 
-def ofx_to_dataframe(files, id_len=24):
+def ofx_to_dataframe(fileobjs, id_len=24):
     collected_df={}
-    if type(files) is str:
-        files = [files]
-    assert(isinstance(files, list))
-    for fname in files:
+    assert(isinstance(fileobjs, list))
+    for fileobj in fileobjs:
         data = {}
-        with codecs.open(fname) as fileobj:
-            ofx = OfxParser.parse(fileobj)
+
+        #with codecs.open(fname) as fileobj:
+        #    ofx = OfxParser.parse(fileobj)
+        ofx = OfxParser.parse(fileobj)
         # it seems one ofx file contains only one securities list. Create a mapping from ID to ticker
-        security_map = {x.uniqueid : x.ticker for x in ofx.security_list}
+        if hasattr(ofx, 'security_list'):
+            security_map = {x.uniqueid : x.ticker for x in ofx.security_list}
         # different transaction types have different fields. So we create df for each txn_type
         # and append the contents of each txn into appropriate df
         for account in ofx.accounts:
-            for transaction in account.statement.transactions:
+            for transaction in account.statement.transactions + \
+                (hasattr(account.statement, 'positions') and account.statement.positions or []):
                 txn_type = type(transaction).__name__
                 if not txn_type in data:
                     fields = [x for x in dir(transaction) if not x.startswith('_')]
@@ -27,12 +31,29 @@ def ofx_to_dataframe(files, id_len=24):
                 df = data[txn_type]
                 fields = set(df.columns)
                 sr = pd.Series([getattr(transaction,f) for f in fields], index=fields)
+                sr = pd.Series({f:transaction.f} for f in fields)
+                sr['acctnum'] = account.number
                 data[txn_type] = df.append(sr, ignore_index=True)
-        # add fname, acctnum common info into each df. Truncate ID if needed
+            if hasattr(account, 'balance'):
+                txn_type = 'Positions'
+                if not txn_type in data:
+                    fields = ['date', 'market_value', 'security', 'unit_price', 'units']
+                    data[txn_type] = pd.DataFrame(columns=fields)
+                df = data[txn_type]
+                fields = set(df.columns)
+                sr = pd.Series({
+                    'date'       :statement.end_date,
+                    'security'   : 'Cash',
+                    'units'      :statement.balance,
+                    'unit_price' : decimal.Decimal('1.00')}, index=fields)
+                sr['acctnum'] = account.number
+                data[txn_type] = df.append(sr, ignore_index=True)
+
+        # add fname info into each df. Truncate ID if needed
         for key,df in data.items():
-            df['fname'] = path.basename(fname)
-            df['id'] = df['id'].str[:id_len]  # clip the last part of the ID which changes from download to download
-            df['acctnum']=account.number
+            df['fname'] = hasattr(fileobj, 'name') and fileobj.name or 'stdin'
+            if 'id' in df.columns:
+                df['id'] = df['id'].str[:id_len]  # clip the last part of the ID which changes from download to download
             if 'security' in df.columns:
                 df['security'] = df['security'].apply(lambda x: security_map[x])
             if 'AGGREGATE_TYPES' in df.columns :
diff --git a/utils/ofx2xlsx.py b/utils/ofx2xlsx.py
old mode 100644
new mode 100755
index 05d7515..f8fc09c
--- a/utils/ofx2xlsx.py
+++ b/utils/ofx2xlsx.py
@@ -1,13 +1,18 @@
+#!/usr/bin/env python3
+import warnings
+warnings.filterwarnings("ignore", message="numpy.dtype size changed")
+
 from ofxparse.ofxtodataframe import ofx_to_dataframe
 import pandas as pd
 from pandas import ExcelWriter
-
+import sys
 import argparse
+from io import StringIO
 
 # ToDo: Remove duplicate transactions from different files
 parser = argparse.ArgumentParser(description='Convert multiple .qfx or .ofx to'
                                              ' .xlsx or csv.\n')
-parser.add_argument('files', metavar='*.ofx *.qfx', type=str, nargs='+',
+parser.add_argument('files', type=argparse.FileType('r'), nargs='+',   #;metavar='*.ofx *.qfx', default=[], type=str, nargs='+',
 help='.qfx or .ofx file names')
 parser.add_argument('--start', type=str, metavar='1700-01-01',
                     default='1700-01-01',
@@ -30,13 +35,17 @@
 
 
 args = parser.parse_args()
-
+if 'stdin' in args.files[0].name:
+    fp=args.files[0]
+    args.files=[StringIO(fp.read())]
 data = ofx_to_dataframe(args.files)
 
 if 'csv' in args.output:
     outstring = ""
     for key,df in data.items():
-        outstring += "##### %s".format(key) + df.to_csv(None, index=False, header=True)
+        outstring += "##### {}\n".format(key) + df.to_csv(None, index=False, header=True)
+    if args.output=='output.csv':
+        print(outstring)
     with open(args.output, 'w') as fileobj:
         print(outstring, file=fileobj)
 elif 'xlsx' in args.output:

From 6d377d1857b9a33590695ad69db919107b25113d Mon Sep 17 00:00:00 2001
From: Milind Kamble <milindkamble@yahoo.com>
Date: Sun, 19 Apr 2020 22:39:46 -0500
Subject: [PATCH 3/3] more improvements

---
 ofxparse/ofxtodataframe.py | 48 ++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/ofxparse/ofxtodataframe.py b/ofxparse/ofxtodataframe.py
index 39682ae..621cd51 100644
--- a/ofxparse/ofxtodataframe.py
+++ b/ofxparse/ofxtodataframe.py
@@ -17,37 +17,43 @@ def ofx_to_dataframe(fileobjs, id_len=24):
         #    ofx = OfxParser.parse(fileobj)
         ofx = OfxParser.parse(fileobj)
         # it seems one ofx file contains only one securities list. Create a mapping from ID to ticker
+        security_map = {}
         if hasattr(ofx, 'security_list'):
-            security_map = {x.uniqueid : x.ticker for x in ofx.security_list}
+            security_map.update({x.uniqueid : x.ticker for x in ofx.security_list})
         # different transaction types have different fields. So we create df for each txn_type
         # and append the contents of each txn into appropriate df
         for account in ofx.accounts:
             for transaction in account.statement.transactions + \
                 (hasattr(account.statement, 'positions') and account.statement.positions or []):
                 txn_type = type(transaction).__name__
+                transaction.acctnum = account.number
                 if not txn_type in data:
                     fields = [x for x in dir(transaction) if not x.startswith('_')]
                     data[txn_type] = pd.DataFrame(columns=fields)
                 df = data[txn_type]
                 fields = set(df.columns)
-                sr = pd.Series([getattr(transaction,f) for f in fields], index=fields)
-                sr = pd.Series({f:transaction.f} for f in fields)
-                sr['acctnum'] = account.number
+                sr = pd.Series({f: getattr(transaction,f) for f in fields})
                 data[txn_type] = df.append(sr, ignore_index=True)
-            if hasattr(account, 'balance'):
-                txn_type = 'Positions'
-                if not txn_type in data:
-                    fields = ['date', 'market_value', 'security', 'unit_price', 'units']
-                    data[txn_type] = pd.DataFrame(columns=fields)
-                df = data[txn_type]
-                fields = set(df.columns)
+
+            # add cash balance as a "Cash" position
+            cash_amount = None
+            if hasattr(account.statement, 'balance'):
+                cash_amount = account.statement.balance
+                dt = account.statement.balance_date
+            elif hasattr(account.statement, 'available_cash'):
+                cash_amount = account.statement.available_cash
+                dt = account.statement.end_date
+            if cash_amount is not None:
+                df = data.get('Position',
+                              pd.DataFrame(columns=['date', 'market_value', 'security', 'unit_price', 'units', 'acctnum']))
                 sr = pd.Series({
-                    'date'       :statement.end_date,
-                    'security'   : 'Cash',
-                    'units'      :statement.balance,
-                    'unit_price' : decimal.Decimal('1.00')}, index=fields)
-                sr['acctnum'] = account.number
-                data[txn_type] = df.append(sr, ignore_index=True)
+                    'date'        : dt,
+                    'security'    : account.curdef,
+                    'market_value': cash_amount,
+                    'units'       : cash_amount,
+                    'unit_price'  : decimal.Decimal('1.00'),
+                    'acctnum'     : account.number})
+                data['Position'] = df.append(sr, ignore_index=True)
 
         # add fname info into each df. Truncate ID if needed
         for key,df in data.items():
@@ -55,7 +61,7 @@ def ofx_to_dataframe(fileobjs, id_len=24):
             if 'id' in df.columns:
                 df['id'] = df['id'].str[:id_len]  # clip the last part of the ID which changes from download to download
             if 'security' in df.columns:
-                df['security'] = df['security'].apply(lambda x: security_map[x])
+                df['security'] = df['security'].apply(lambda x: security_map.get(x, x))
             if 'AGGREGATE_TYPES' in df.columns :
                 del df['AGGREGATE_TYPES']
             if key in collected_df:
@@ -63,3 +69,9 @@ def ofx_to_dataframe(fileobjs, id_len=24):
             else:
                 collected_df[key] = df
     return collected_df
+
+__dev_notes__='''
+For brokerage, balances are available in account.statement.balance_list... but overall cash is also summarized in account.statement.available_cash corresponding to statement.end_date
+For bank, balance is available in account.statement.balance (and balance_date)
+
+'''