feat(parsing): Parse columns via names

2024-07-20 18:42:19 +02:00 · 2024-07-20 18:42:19 +02:00 · e3505a9d76
commit e3505a9d76
parent eb50a1920d
1 changed files with 31 additions and 20 deletions
--- a/main.py
+++ b/main.py
@ -1,5 +1,7 @@
 import csv
 import json
 import pandas as pd
 import re
 from datetime import datetime
 from argparse import ArgumentParser
@ -11,6 +13,8 @@ parser.add_argument("-o", "--output", dest="output_filename",
 args = parser.parse_args()
 dividend_expression_re = re.compile(r'(\d+\.?\d*?) ([A-Z]{3})')
 NORDNET_ACCOUNT_ID="cae3d45f-53bc-4dee-83a4-4448b409f8b2"
 format = "%Y-%m-%d"
@ -22,42 +26,41 @@ data = {
 splits = {
    "NOVO B": {
        "2023-09-20 00:00:00 UTC": 0.5
    },
    "NVDA": {
        "2024-06-10 00:00:00 UTC": 0.1
    }
 }
 symbol_translations = {
    "NOVO B": "NOVO-B.CO",
-    "XZEC": "XZEC.DE"
+    "NOVO B.OLD": "NOVO-B.CO",
    "Xtrackers MSCI Eur CDisc ESG Scr ETF 1C": "XZEC.DE"
 }
 #print("Date", "Code", "DataSource", "Currency", "Price", "Quantity", "Action", "Fee", "account", sep=',')
 # Date, Code, DataSource, Currency, Price, Quantity, Action, Free, Note
-with open(args.input_filename, newline='') as csvfile:
+with open(args.input_filename, newline='', encoding="utf16") as csvfile:
-    reader = csv.reader(csvfile, delimiter='\t', quotechar='|')
+    df = pd.read_csv(csvfile, sep='\t', encoding='utf16', index_col=0)
    for row in reader:
        if row[5] in ("INDBETALING", "HÆVNING", "INDSÆTTELSE", "AFKASTSKAT ASK", "MAKULERING AFKASTSKAT ASK", "Transaktionstype", ""):
            continue
    for index, row in df.iterrows():
        date = None
        t_type = None
        price = 0
        try:
-            date = datetime.strptime(row[2], format)
+            date = datetime.strptime(row['Handelsdag'], format)
        except ValueError:
            data = None
            print("Failed parsing date")
-        price = row[10].replace('.', '')
+        price = str(row['Kurs']).replace('.', '')
        price = price.replace(',', '.')
-        quantity = row[9]
+        quantity = row['Antal']
-        fee = row[27]
+        fee = float(str(row['Samlede afgifter']).replace(',', '.'))
-        symbol = row[6]
+        symbol = row['Værdipapirer']
-        currency = row[17]
+        currency = row['Valuta.3']
-        match row[5]:
+        match row['Transaktionstype']:
            case "KØBT":
                t_type = "BUY"
            case "SOLGT":
@ -65,21 +68,29 @@ with open(args.input_filename, newline='') as csvfile:
            case "UDB.":
                t_type = "DIVIDEND"
                fee = 0
-                currency = row[15]
+                div = dividend_expression_re.findall(row['Transaktionstekst'])
                price = div[0][0]
                price = price.replace(',', '.')
                currency = div[0][1]
            case _:
                continue
-        if symbol in splits:
+        if symbol in splits and row['Transaktionstype'] != "UDB.":
            key = list(splits[symbol].keys())[0]
            if date.isoformat() < key:
                ratio = splits[symbol][key]
                price = float(price) * ratio;
                quantity = float(quantity) * (1 / ratio);                
        if row['Valuta'] != row['Valuta.3'] and row['Transaktionstype'] != "UDB.":
            fee = float(fee) / float(str(row['Vekslingskurs']).replace(',', '.'))
        if symbol in symbol_translations:
            symbol = symbol_translations[symbol]
        data['activities'].append({
            'accountId': NORDNET_ACCOUNT_ID,
-            'fee': float(str(fee).replace(',', '.')),
+            'fee': fee,
            'quantity': int(quantity),
            'type': t_type,
            'unitPrice': float(price),