feat(diff): adding file management and diff to extract files
This commit is contained in:
parent
b8552eaefa
commit
97c8331866
4 changed files with 86 additions and 7 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -6,3 +6,4 @@ notes
|
||||||
|
|
||||||
# Others
|
# Others
|
||||||
node_modules/
|
node_modules/
|
||||||
|
**/__pycache__/
|
||||||
|
|
|
||||||
32
webscrape-bank/run.sh
Executable file
32
webscrape-bank/run.sh
Executable file
|
|
@ -0,0 +1,32 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
PIP_PACKAGES=("python-dotenv" "pytest-playwright")
|
||||||
|
|
||||||
|
if [ "$1" == "del" ]; then
|
||||||
|
rm -rf .venv
|
||||||
|
echo ".venv/ removed, restart the program again"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d .venv ]; then
|
||||||
|
echo ".venv/ found!"
|
||||||
|
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
python src/main.py
|
||||||
|
|
||||||
|
else
|
||||||
|
echo ".venv/ not found!"
|
||||||
|
echo "Creating new venv/"
|
||||||
|
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
for i in "${PIP_PACKAGES[@]}"
|
||||||
|
do
|
||||||
|
pip install $i
|
||||||
|
done
|
||||||
|
|
||||||
|
playwright install
|
||||||
|
|
||||||
|
python src/main.py
|
||||||
|
fi
|
||||||
|
|
@ -2,6 +2,7 @@ from playwright.sync_api import Playwright, sync_playwright, Page
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -76,23 +77,18 @@ def download_file(page: Page):
|
||||||
|
|
||||||
with page.expect_download(timeout=0) as download_info:
|
with page.expect_download(timeout=0) as download_info:
|
||||||
page.mouse.wheel(0, 70)
|
page.mouse.wheel(0, 70)
|
||||||
|
|
||||||
page.locator(
|
page.locator(
|
||||||
'iframe[title="NextGen account history page"]'
|
'iframe[title="NextGen account history page"]'
|
||||||
).content_frame.get_by_role("menuitem", name="Export QFX").dblclick()
|
).content_frame.get_by_role("menuitem", name="Export QFX").dblclick()
|
||||||
|
|
||||||
|
|
||||||
download = download_info.value
|
download = download_info.value
|
||||||
|
|
||||||
download.save_as("./test.qfx")
|
# Download
|
||||||
|
download.save_as("./qfx/download-" + str(datetime.now().isoformat()) + ".qfx")
|
||||||
print("Downloaded!")
|
print("Downloaded!")
|
||||||
|
|
||||||
switchingForever(page)
|
switchingForever(page)
|
||||||
|
|
||||||
# context.close()
|
|
||||||
# browser.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
with sync_playwright() as playwright:
|
with sync_playwright() as playwright:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,50 @@
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from threading import Thread
|
||||||
|
import os
|
||||||
|
import extract
|
||||||
|
#from datetime import datetime
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
# Playwright in the background
|
||||||
|
def playwright():
|
||||||
|
with sync_playwright() as playwright:
|
||||||
|
extract.main(playwright)
|
||||||
|
|
||||||
|
|
||||||
|
def revise():
|
||||||
|
# Removes the OLDEST file on the list ()
|
||||||
|
file_count = 0
|
||||||
|
for file in os.scandir("./qfx"):
|
||||||
|
print(file_count)
|
||||||
|
if file_count > 1:
|
||||||
|
#print(os.listdir("./qfx"))
|
||||||
|
print("Removed: " + os.listdir("./qfx")[0])
|
||||||
|
os.remove("./qfx/" + os.listdir("./qfx")[0])
|
||||||
|
|
||||||
|
|
||||||
|
if file.is_file():
|
||||||
|
file_count += 1
|
||||||
|
|
||||||
|
# Get the last two files
|
||||||
|
oldest_file = open("./qfx/" + os.listdir("./qfx")[0])
|
||||||
|
newest_file = open("./qfx/" + os.listdir("./qfx")[-1])
|
||||||
|
|
||||||
|
# Differs the two files
|
||||||
|
diff = difflib.ndiff(oldest_file.readlines(), newest_file.readlines())
|
||||||
|
|
||||||
|
# Grabs only changes
|
||||||
|
# Thanks to: https://stackoverflow.com/a/15864920
|
||||||
|
changes = [l for l in diff if l.startswith("+ ") or l.startswith('- ')]
|
||||||
|
print("RESULT:")
|
||||||
|
for change in changes:
|
||||||
|
print(change[2:])
|
||||||
|
|
||||||
|
def main():
|
||||||
|
revise_thread = Thread(target=revise())
|
||||||
|
#pw_thread = Thread(target=playwright())
|
||||||
|
#pw_thread.start()
|
||||||
|
revise_thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue