From 3ada3f966dd5e42f15725ac3ee7a02116cd28a8c Mon Sep 17 00:00:00 2001 From: Teddy Middleton-Smith Date: Fri, 13 Mar 2026 09:26:29 +0000 Subject: [PATCH] Feat: Some new source websites not fully integrated. --- .gitignore | 2 +- README.md | 51 ++++++++++++++ docs/excel_formula.md | 156 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 30 -------- 4 files changed, 208 insertions(+), 31 deletions(-) create mode 100644 README.md create mode 100644 docs/excel_formula.md diff --git a/.gitignore b/.gitignore index 069a480..bada790 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ pkm_data/ # too big mtg_cards_20251019_095943.csv mtg_cards_20251019_101118.xlsx -mtg-default-cards-20251018212333.json \ No newline at end of file +mtg-default-cards-20251018212333.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..8252b98 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +## Run product scraper manual +1. create virtual environment +python3 -m venv env_api + +2. enter virtual environment +source ./env_api/bin/activate + +3. intall necessary modules +pip3 install -r requirements.txt + +4. Change to product scraping main directory +- Same local directory as 'TCG Sole Trader Copy.xlsx' workbook +cd product_scraping + +5. Call controller main method +python3 single_run/product_scraper_controller.py + +6. exit virtual environment +deactivate + + +## Run product scraper continuous +""" Call +EMAIL_SENDER=bot@partsltd.co.uk \ +EMAIL_PASSWORD=@sJt1b5Cliu6 \ +EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \ +python3 product_scraper.py + + + +One-time setup (edit credentials first): + + +# 1. Fill in your email details in the service file +nano product_scraping/tcg-scraper.service + +# 2. Copy to systemd and enable +sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable tcg-scraper # auto-start on boot +sudo systemctl start tcg-scraper +Day-to-day controls: + + +sudo systemctl stop tcg-scraper # stop immediately +sudo systemctl start tcg-scraper # start again +sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py) +sudo systemctl status tcg-scraper # is it running? last exit code? +journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit) +journalctl -u tcg-scraper --since today # today's log +""" diff --git a/docs/excel_formula.md b/docs/excel_formula.md new file mode 100644 index 0000000..bcb5dae --- /dev/null +++ b/docs/excel_formula.md @@ -0,0 +1,156 @@ +# Paste into text editor then find on replace the following to Nothing to remove spaces for pasting final result to Excel: " ", "\n", "\t" +=IF( + AND( + [@[Source Name]]="Card Market" + , NOT(ISBLANK([@[Source Link]])) + ) + , [@[Index Card Market]] * 37 + , IF( + AND( + [@[Source Name]]="Chaos Cards" + , NOT(ISBLANK([@[Source Link]])) + ) + , [@[Index Chaos Cards]] * 37 - 1 + , IF( + AND([@[Source Name]]="Games Lore", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Games Lore]] * 37 - 2 + , IF( + AND([@[Source Name]]="Magic Madhouse", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Magic Madhouse]] * 37 - 3 + , IF( + AND([@[Source Name]]="New Realities Gaming", NOT(ISBLANK([@[Source Link]]))) + , [@[Index New Realities Gaming]] * 37 - 4 + , IF( + AND([@[Source Name]]="Column1", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column1]] * 37 - 5 + , IF( + AND([@[Source Name]]="Column2", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column2]] * 37 - 6 + , IF( + AND([@[Source Name]]="Column22", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column22]] * 37 - 7 + , IF( + AND([@[Source Name]]="Column3", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column3]] * 37 - 8 + , IF( + AND([@[Source Name]]="Column7", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column7]] * 37 - 9 + , IF( + AND([@[Source Name]]="Column6", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column6]] * 37 - 10 + , IF( + AND([@[Source Name]]="Column5", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column5]] * 37 - 11 + , IF( + AND([@[Source Name]]="Column4", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column4]] * 37 - 12 + , IF( + AND([@[Source Name]]="Column15", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column15]] * 37 - 13 + , IF( + AND([@[Source Name]]="Column14", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column14]] * 37 - 14 + , IF( + AND([@[Source Name]]="Column13", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column13]] * 37 - 15 + , IF( + AND([@[Source Name]]="Column12", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column12]] * 37 - 16 + , IF( + AND([@[Source Name]]="Column11", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column11]] * 37 - 17 + , IF( + AND([@[Source Name]]="Column10", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column10]] * 37 - 18 + , IF( + AND([@[Source Name]]="Column9", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column9]] * 37 - 19 + , IF( + AND([@[Source Name]]="Column8", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column8]] * 37 - 20 + , IF( + AND([@[Source Name]]="Column221", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column221]] * 37 - 21 + , IF( + AND([@[Source Name]]="Column220", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column220]] * 37 - 22 + , IF( + AND([@[Source Name]]="Column219", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column219]] * 37 - 23 + , IF( + AND([@[Source Name]]="Column218", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column218]] * 37 - 24 + , IF( + AND([@[Source Name]]="Column217", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column217]] * 37 - 37 + , IF( + AND([@[Source Name]]="Column216", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column216]] * 37 - 26 + , IF( + AND([@[Source Name]]="Column215", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column215]] * 37 - 27 + , IF( + AND([@[Source Name]]="Column214", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column214]] * 37 - 28 + , IF( + AND([@[Source Name]]="Column213", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column213]] * 37 - 29 + , IF( + AND([@[Source Name]]="Column212", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column212]] * 37 - 30 + , IF( + AND([@[Source Name]]="Column21", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column21]] * 37 - 31 + , IF( + AND([@[Source Name]]="Column20", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column20]] * 37 - 32 + , IF( + AND([@[Source Name]]="Column19", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column19]] * 37 - 33 + , IF( + AND([@[Source Name]]="Column18", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column18]] * 37 - 34 + , IF( + AND([@[Source Name]]="Column17", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column17]] * 37 - 35 + , IF( + AND([@[Source Name]]="Column16", NOT(ISBLANK([@[Source Link]]))) + , [@[Index Column16]] * 37 - 36 + , -1 + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ) +) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 334fd65..7d80ea4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,33 +14,3 @@ xlsxwriter # undetected_chromedriver playwright aioconsole - -""" Call -EMAIL_SENDER=bot@partsltd.co.uk \ -EMAIL_PASSWORD=@sJt1b5Cliu6 \ -EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \ -python3 product_scraper.py - - - -One-time setup (edit credentials first): - - -# 1. Fill in your email details in the service file -nano product_scraping/tcg-scraper.service - -# 2. Copy to systemd and enable -sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable tcg-scraper # auto-start on boot -sudo systemctl start tcg-scraper -Day-to-day controls: - - -sudo systemctl stop tcg-scraper # stop immediately -sudo systemctl start tcg-scraper # start again -sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py) -sudo systemctl status tcg-scraper # is it running? last exit code? -journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit) -journalctl -u tcg-scraper --since today # today's log -""" \ No newline at end of file