Feat: Some new source websites not fully integrated.

This commit is contained in:
2026-03-13 09:26:29 +00:00
parent bc5880ab74
commit 3ada3f966d
4 changed files with 208 additions and 31 deletions

2
.gitignore vendored
View File

@@ -4,4 +4,4 @@ pkm_data/
# too big # too big
mtg_cards_20251019_095943.csv mtg_cards_20251019_095943.csv
mtg_cards_20251019_101118.xlsx mtg_cards_20251019_101118.xlsx
mtg-default-cards-20251018212333.json mtg-default-cards-20251018212333.json

51
README.md Normal file
View File

@@ -0,0 +1,51 @@
## Run product scraper manual
1. create virtual environment
python3 -m venv env_api
2. enter virtual environment
source ./env_api/bin/activate
3. intall necessary modules
pip3 install -r requirements.txt
4. Change to product scraping main directory
- Same local directory as 'TCG Sole Trader Copy.xlsx' workbook
cd product_scraping
5. Call controller main method
python3 single_run/product_scraper_controller.py
6. exit virtual environment
deactivate
## Run product scraper continuous
""" Call
EMAIL_SENDER=bot@partsltd.co.uk \
EMAIL_PASSWORD=@sJt1b5Cliu6 \
EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \
python3 product_scraper.py
One-time setup (edit credentials first):
# 1. Fill in your email details in the service file
nano product_scraping/tcg-scraper.service
# 2. Copy to systemd and enable
sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable tcg-scraper # auto-start on boot
sudo systemctl start tcg-scraper
Day-to-day controls:
sudo systemctl stop tcg-scraper # stop immediately
sudo systemctl start tcg-scraper # start again
sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py)
sudo systemctl status tcg-scraper # is it running? last exit code?
journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit)
journalctl -u tcg-scraper --since today # today's log
"""

156
docs/excel_formula.md Normal file
View File

@@ -0,0 +1,156 @@
# Paste into text editor then find on replace the following to Nothing to remove spaces for pasting final result to Excel: " ", "\n", "\t"
=IF(
AND(
[@[Source Name]]="Card Market"
, NOT(ISBLANK([@[Source Link]]))
)
, [@[Index Card Market]] * 37
, IF(
AND(
[@[Source Name]]="Chaos Cards"
, NOT(ISBLANK([@[Source Link]]))
)
, [@[Index Chaos Cards]] * 37 - 1
, IF(
AND([@[Source Name]]="Games Lore", NOT(ISBLANK([@[Source Link]])))
, [@[Index Games Lore]] * 37 - 2
, IF(
AND([@[Source Name]]="Magic Madhouse", NOT(ISBLANK([@[Source Link]])))
, [@[Index Magic Madhouse]] * 37 - 3
, IF(
AND([@[Source Name]]="New Realities Gaming", NOT(ISBLANK([@[Source Link]])))
, [@[Index New Realities Gaming]] * 37 - 4
, IF(
AND([@[Source Name]]="Column1", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column1]] * 37 - 5
, IF(
AND([@[Source Name]]="Column2", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column2]] * 37 - 6
, IF(
AND([@[Source Name]]="Column22", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column22]] * 37 - 7
, IF(
AND([@[Source Name]]="Column3", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column3]] * 37 - 8
, IF(
AND([@[Source Name]]="Column7", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column7]] * 37 - 9
, IF(
AND([@[Source Name]]="Column6", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column6]] * 37 - 10
, IF(
AND([@[Source Name]]="Column5", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column5]] * 37 - 11
, IF(
AND([@[Source Name]]="Column4", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column4]] * 37 - 12
, IF(
AND([@[Source Name]]="Column15", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column15]] * 37 - 13
, IF(
AND([@[Source Name]]="Column14", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column14]] * 37 - 14
, IF(
AND([@[Source Name]]="Column13", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column13]] * 37 - 15
, IF(
AND([@[Source Name]]="Column12", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column12]] * 37 - 16
, IF(
AND([@[Source Name]]="Column11", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column11]] * 37 - 17
, IF(
AND([@[Source Name]]="Column10", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column10]] * 37 - 18
, IF(
AND([@[Source Name]]="Column9", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column9]] * 37 - 19
, IF(
AND([@[Source Name]]="Column8", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column8]] * 37 - 20
, IF(
AND([@[Source Name]]="Column221", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column221]] * 37 - 21
, IF(
AND([@[Source Name]]="Column220", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column220]] * 37 - 22
, IF(
AND([@[Source Name]]="Column219", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column219]] * 37 - 23
, IF(
AND([@[Source Name]]="Column218", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column218]] * 37 - 24
, IF(
AND([@[Source Name]]="Column217", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column217]] * 37 - 37
, IF(
AND([@[Source Name]]="Column216", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column216]] * 37 - 26
, IF(
AND([@[Source Name]]="Column215", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column215]] * 37 - 27
, IF(
AND([@[Source Name]]="Column214", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column214]] * 37 - 28
, IF(
AND([@[Source Name]]="Column213", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column213]] * 37 - 29
, IF(
AND([@[Source Name]]="Column212", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column212]] * 37 - 30
, IF(
AND([@[Source Name]]="Column21", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column21]] * 37 - 31
, IF(
AND([@[Source Name]]="Column20", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column20]] * 37 - 32
, IF(
AND([@[Source Name]]="Column19", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column19]] * 37 - 33
, IF(
AND([@[Source Name]]="Column18", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column18]] * 37 - 34
, IF(
AND([@[Source Name]]="Column17", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column17]] * 37 - 35
, IF(
AND([@[Source Name]]="Column16", NOT(ISBLANK([@[Source Link]])))
, [@[Index Column16]] * 37 - 36
, -1
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)
)

View File

@@ -14,33 +14,3 @@ xlsxwriter
# undetected_chromedriver # undetected_chromedriver
playwright playwright
aioconsole aioconsole
""" Call
EMAIL_SENDER=bot@partsltd.co.uk \
EMAIL_PASSWORD=@sJt1b5Cliu6 \
EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \
python3 product_scraper.py
One-time setup (edit credentials first):
# 1. Fill in your email details in the service file
nano product_scraping/tcg-scraper.service
# 2. Copy to systemd and enable
sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable tcg-scraper # auto-start on boot
sudo systemctl start tcg-scraper
Day-to-day controls:
sudo systemctl stop tcg-scraper # stop immediately
sudo systemctl start tcg-scraper # start again
sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py)
sudo systemctl status tcg-scraper # is it running? last exit code?
journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit)
journalctl -u tcg-scraper --since today # today's log
"""