Feat: Some new source websites not fully integrated.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,4 +4,4 @@ pkm_data/
|
||||
# too big
|
||||
mtg_cards_20251019_095943.csv
|
||||
mtg_cards_20251019_101118.xlsx
|
||||
mtg-default-cards-20251018212333.json
|
||||
mtg-default-cards-20251018212333.json
|
||||
|
||||
51
README.md
Normal file
51
README.md
Normal file
@@ -0,0 +1,51 @@
|
||||
## Run product scraper manual
|
||||
1. create virtual environment
|
||||
python3 -m venv env_api
|
||||
|
||||
2. enter virtual environment
|
||||
source ./env_api/bin/activate
|
||||
|
||||
3. intall necessary modules
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
4. Change to product scraping main directory
|
||||
- Same local directory as 'TCG Sole Trader Copy.xlsx' workbook
|
||||
cd product_scraping
|
||||
|
||||
5. Call controller main method
|
||||
python3 single_run/product_scraper_controller.py
|
||||
|
||||
6. exit virtual environment
|
||||
deactivate
|
||||
|
||||
|
||||
## Run product scraper continuous
|
||||
""" Call
|
||||
EMAIL_SENDER=bot@partsltd.co.uk \
|
||||
EMAIL_PASSWORD=@sJt1b5Cliu6 \
|
||||
EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \
|
||||
python3 product_scraper.py
|
||||
|
||||
|
||||
|
||||
One-time setup (edit credentials first):
|
||||
|
||||
|
||||
# 1. Fill in your email details in the service file
|
||||
nano product_scraping/tcg-scraper.service
|
||||
|
||||
# 2. Copy to systemd and enable
|
||||
sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable tcg-scraper # auto-start on boot
|
||||
sudo systemctl start tcg-scraper
|
||||
Day-to-day controls:
|
||||
|
||||
|
||||
sudo systemctl stop tcg-scraper # stop immediately
|
||||
sudo systemctl start tcg-scraper # start again
|
||||
sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py)
|
||||
sudo systemctl status tcg-scraper # is it running? last exit code?
|
||||
journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit)
|
||||
journalctl -u tcg-scraper --since today # today's log
|
||||
"""
|
||||
156
docs/excel_formula.md
Normal file
156
docs/excel_formula.md
Normal file
@@ -0,0 +1,156 @@
|
||||
# Paste into text editor then find on replace the following to Nothing to remove spaces for pasting final result to Excel: " ", "\n", "\t"
|
||||
=IF(
|
||||
AND(
|
||||
[@[Source Name]]="Card Market"
|
||||
, NOT(ISBLANK([@[Source Link]]))
|
||||
)
|
||||
, [@[Index Card Market]] * 37
|
||||
, IF(
|
||||
AND(
|
||||
[@[Source Name]]="Chaos Cards"
|
||||
, NOT(ISBLANK([@[Source Link]]))
|
||||
)
|
||||
, [@[Index Chaos Cards]] * 37 - 1
|
||||
, IF(
|
||||
AND([@[Source Name]]="Games Lore", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Games Lore]] * 37 - 2
|
||||
, IF(
|
||||
AND([@[Source Name]]="Magic Madhouse", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Magic Madhouse]] * 37 - 3
|
||||
, IF(
|
||||
AND([@[Source Name]]="New Realities Gaming", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index New Realities Gaming]] * 37 - 4
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column1", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column1]] * 37 - 5
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column2", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column2]] * 37 - 6
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column22", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column22]] * 37 - 7
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column3", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column3]] * 37 - 8
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column7", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column7]] * 37 - 9
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column6", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column6]] * 37 - 10
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column5", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column5]] * 37 - 11
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column4", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column4]] * 37 - 12
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column15", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column15]] * 37 - 13
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column14", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column14]] * 37 - 14
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column13", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column13]] * 37 - 15
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column12", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column12]] * 37 - 16
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column11", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column11]] * 37 - 17
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column10", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column10]] * 37 - 18
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column9", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column9]] * 37 - 19
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column8", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column8]] * 37 - 20
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column221", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column221]] * 37 - 21
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column220", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column220]] * 37 - 22
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column219", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column219]] * 37 - 23
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column218", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column218]] * 37 - 24
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column217", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column217]] * 37 - 37
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column216", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column216]] * 37 - 26
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column215", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column215]] * 37 - 27
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column214", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column214]] * 37 - 28
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column213", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column213]] * 37 - 29
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column212", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column212]] * 37 - 30
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column21", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column21]] * 37 - 31
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column20", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column20]] * 37 - 32
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column19", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column19]] * 37 - 33
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column18", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column18]] * 37 - 34
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column17", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column17]] * 37 - 35
|
||||
, IF(
|
||||
AND([@[Source Name]]="Column16", NOT(ISBLANK([@[Source Link]])))
|
||||
, [@[Index Column16]] * 37 - 36
|
||||
, -1
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -14,33 +14,3 @@ xlsxwriter
|
||||
# undetected_chromedriver
|
||||
playwright
|
||||
aioconsole
|
||||
|
||||
""" Call
|
||||
EMAIL_SENDER=bot@partsltd.co.uk \
|
||||
EMAIL_PASSWORD=@sJt1b5Cliu6 \
|
||||
EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \
|
||||
python3 product_scraper.py
|
||||
|
||||
|
||||
|
||||
One-time setup (edit credentials first):
|
||||
|
||||
|
||||
# 1. Fill in your email details in the service file
|
||||
nano product_scraping/tcg-scraper.service
|
||||
|
||||
# 2. Copy to systemd and enable
|
||||
sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable tcg-scraper # auto-start on boot
|
||||
sudo systemctl start tcg-scraper
|
||||
Day-to-day controls:
|
||||
|
||||
|
||||
sudo systemctl stop tcg-scraper # stop immediately
|
||||
sudo systemctl start tcg-scraper # start again
|
||||
sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py)
|
||||
sudo systemctl status tcg-scraper # is it running? last exit code?
|
||||
journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit)
|
||||
journalctl -u tcg-scraper --since today # today's log
|
||||
"""
|
||||
Reference in New Issue
Block a user