feat(diff): adding file management and diff to extract files
This commit is contained in:
parent
b8552eaefa
commit
97c8331866
4 changed files with 86 additions and 7 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -6,3 +6,4 @@ notes
|
|||
|
||||
# Others
|
||||
node_modules/
|
||||
**/__pycache__/
|
||||
|
|
|
|||
32
webscrape-bank/run.sh
Executable file
32
webscrape-bank/run.sh
Executable file
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/bash
|
||||
|
||||
PIP_PACKAGES=("python-dotenv" "pytest-playwright")
|
||||
|
||||
if [ "$1" == "del" ]; then
|
||||
rm -rf .venv
|
||||
echo ".venv/ removed, restart the program again"
|
||||
fi
|
||||
|
||||
if [ -d .venv ]; then
|
||||
echo ".venv/ found!"
|
||||
|
||||
source .venv/bin/activate
|
||||
|
||||
python src/main.py
|
||||
|
||||
else
|
||||
echo ".venv/ not found!"
|
||||
echo "Creating new venv/"
|
||||
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
|
||||
for i in "${PIP_PACKAGES[@]}"
|
||||
do
|
||||
pip install $i
|
||||
done
|
||||
|
||||
playwright install
|
||||
|
||||
python src/main.py
|
||||
fi
|
||||
|
|
@ -2,6 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, Page
|
|||
from dotenv import load_dotenv
|
||||
import os
|
||||
from time import sleep
|
||||
from datetime import datetime
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
|
@ -76,23 +77,18 @@ def download_file(page: Page):
|
|||
|
||||
with page.expect_download(timeout=0) as download_info:
|
||||
page.mouse.wheel(0, 70)
|
||||
|
||||
page.locator(
|
||||
'iframe[title="NextGen account history page"]'
|
||||
).content_frame.get_by_role("menuitem", name="Export QFX").dblclick()
|
||||
|
||||
|
||||
download = download_info.value
|
||||
|
||||
download.save_as("./test.qfx")
|
||||
|
||||
# Download
|
||||
download.save_as("./qfx/download-" + str(datetime.now().isoformat()) + ".qfx")
|
||||
print("Downloaded!")
|
||||
|
||||
switchingForever(page)
|
||||
|
||||
# context.close()
|
||||
# browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with sync_playwright() as playwright:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
from playwright.sync_api import sync_playwright
|
||||
from threading import Thread
|
||||
import os
|
||||
import extract
|
||||
#from datetime import datetime
|
||||
import difflib
|
||||
|
||||
# Playwright in the background
|
||||
def playwright():
|
||||
with sync_playwright() as playwright:
|
||||
extract.main(playwright)
|
||||
|
||||
|
||||
def revise():
|
||||
# Removes the OLDEST file on the list ()
|
||||
file_count = 0
|
||||
for file in os.scandir("./qfx"):
|
||||
print(file_count)
|
||||
if file_count > 1:
|
||||
#print(os.listdir("./qfx"))
|
||||
print("Removed: " + os.listdir("./qfx")[0])
|
||||
os.remove("./qfx/" + os.listdir("./qfx")[0])
|
||||
|
||||
|
||||
if file.is_file():
|
||||
file_count += 1
|
||||
|
||||
# Get the last two files
|
||||
oldest_file = open("./qfx/" + os.listdir("./qfx")[0])
|
||||
newest_file = open("./qfx/" + os.listdir("./qfx")[-1])
|
||||
|
||||
# Differs the two files
|
||||
diff = difflib.ndiff(oldest_file.readlines(), newest_file.readlines())
|
||||
|
||||
# Grabs only changes
|
||||
# Thanks to: https://stackoverflow.com/a/15864920
|
||||
changes = [l for l in diff if l.startswith("+ ") or l.startswith('- ')]
|
||||
print("RESULT:")
|
||||
for change in changes:
|
||||
print(change[2:])
|
||||
|
||||
def main():
|
||||
revise_thread = Thread(target=revise())
|
||||
#pw_thread = Thread(target=playwright())
|
||||
#pw_thread.start()
|
||||
revise_thread.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue