feat(diff): adding file management and diff to extract files

This commit is contained in:
devaine 2026-01-29 15:15:27 -06:00
commit 97c8331866
Signed by: devaine
GPG key ID: 954B1DCAC6FF84EE
4 changed files with 86 additions and 7 deletions

1
.gitignore vendored
View file

@ -6,3 +6,4 @@ notes
# Others # Others
node_modules/ node_modules/
**/__pycache__/

32
webscrape-bank/run.sh Executable file
View file

@ -0,0 +1,32 @@
#!/bin/bash
PIP_PACKAGES=("python-dotenv" "pytest-playwright")
if [ "$1" == "del" ]; then
rm -rf .venv
echo ".venv/ removed, restart the program again"
fi
if [ -d .venv ]; then
echo ".venv/ found!"
source .venv/bin/activate
python src/main.py
else
echo ".venv/ not found!"
echo "Creating new venv/"
python -m venv .venv
source .venv/bin/activate
for i in "${PIP_PACKAGES[@]}"
do
pip install $i
done
playwright install
python src/main.py
fi

View file

@ -2,6 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, Page
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from time import sleep from time import sleep
from datetime import datetime
load_dotenv() load_dotenv()
@ -76,23 +77,18 @@ def download_file(page: Page):
with page.expect_download(timeout=0) as download_info: with page.expect_download(timeout=0) as download_info:
page.mouse.wheel(0, 70) page.mouse.wheel(0, 70)
page.locator( page.locator(
'iframe[title="NextGen account history page"]' 'iframe[title="NextGen account history page"]'
).content_frame.get_by_role("menuitem", name="Export QFX").dblclick() ).content_frame.get_by_role("menuitem", name="Export QFX").dblclick()
download = download_info.value download = download_info.value
download.save_as("./test.qfx") # Download
download.save_as("./qfx/download-" + str(datetime.now().isoformat()) + ".qfx")
print("Downloaded!") print("Downloaded!")
switchingForever(page) switchingForever(page)
# context.close()
# browser.close()
if __name__ == "__main__": if __name__ == "__main__":
with sync_playwright() as playwright: with sync_playwright() as playwright:

View file

@ -0,0 +1,50 @@
from playwright.sync_api import sync_playwright
from threading import Thread
import os
import extract
#from datetime import datetime
import difflib
# Playwright in the background
def playwright():
with sync_playwright() as playwright:
extract.main(playwright)
def revise():
# Removes the OLDEST file on the list ()
file_count = 0
for file in os.scandir("./qfx"):
print(file_count)
if file_count > 1:
#print(os.listdir("./qfx"))
print("Removed: " + os.listdir("./qfx")[0])
os.remove("./qfx/" + os.listdir("./qfx")[0])
if file.is_file():
file_count += 1
# Get the last two files
oldest_file = open("./qfx/" + os.listdir("./qfx")[0])
newest_file = open("./qfx/" + os.listdir("./qfx")[-1])
# Differs the two files
diff = difflib.ndiff(oldest_file.readlines(), newest_file.readlines())
# Grabs only changes
# Thanks to: https://stackoverflow.com/a/15864920
changes = [l for l in diff if l.startswith("+ ") or l.startswith('- ')]
print("RESULT:")
for change in changes:
print(change[2:])
def main():
revise_thread = Thread(target=revise())
#pw_thread = Thread(target=playwright())
#pw_thread.start()
revise_thread.start()
if __name__ == "__main__":
main()