Files
zwift-offline/scripts/get_pro_names.py

170 lines
14 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#! /usr/bin/env python
# Use this script to populate bot.txt with names from https://www.procyclingstats.com
# pip install beautifulsoup4 country-converter fuzzywuzzy
# scripts/get_pro_names.py -h
from bs4 import BeautifulSoup
import urllib.request
import json
import country_converter as coco
import argparse
import getpass
import os
import sys
import xml.etree.ElementTree as ET
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
base_url = "https://www.procyclingstats.com/rankings.php?filter=Filter"
cc = coco.CountryConverter()
teams = {
'UAE Team Emirates': {'jersey_name': 'UAE', 'jersey_signature': 1751349769, 'bike_name': 'Colnago Colnago V3RS', 'bike_signature': 3628259811, 'front_wheel_name': 'Enve SES 3.4', 'front_wheel_signature': 2223270801, 'rear_wheel_name': 'Enve SES 3.4', 'rear_wheel_signature': 3835575171},
'Soudal - Quick Step': {'jersey_name': 'Deceuninck-Quick-Step', 'jersey_signature': 2906189156, 'bike_name': 'Specialized Tarmac SL7', 'bike_signature': 935373427, 'front_wheel_name': 'Roval Rapide CLX', 'front_wheel_signature': 2181416413, 'rear_wheel_name': 'Roval Rapide CLX', 'rear_wheel_signature': 3548735686},
'Jumbo-Visma': {'jersey_name': 'Team Jumbo-Visma Men 2023', 'jersey_signature': 88214615, 'bike_name': 'Cervelo R5', 'bike_signature': 106535518, 'front_wheel_name': 'Reserve Reserve 25 GR', 'front_wheel_signature': 635220876, 'rear_wheel_name': 'Reserve Reserve 25 GR', 'rear_wheel_signature': 1842698274},
'Alpecin-Deceuninck': {'jersey_name': 'Alpecin-Fenix Pro Team', 'jersey_signature': 930946828, 'bike_name': 'Canyon Aeroad 2015', 'bike_signature': 1520594784, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Trek - Segafredo': {'jersey_name': 'Trek-Segafredo Men', 'jersey_signature': 2140478849, 'bike_name': 'Trek Madone', 'bike_signature': 4129467727, 'front_wheel_name': 'Bontrager Aeolus5', 'front_wheel_signature': 702195190, 'rear_wheel_name': 'Bontrager Aeolus5', 'rear_wheel_signature': 3594144634},
'Movistar Team': {'jersey_name': 'Movistar Team', 'jersey_signature': 1842355135, 'bike_name': 'Canyon Aeroad 2015', 'bike_signature': 1520594784, 'front_wheel_name': 'Zipp 404', 'front_wheel_signature': 613983807, 'rear_wheel_name': 'Zipp 404', 'rear_wheel_signature': 4183014640},
'Lotto Dstny': {'jersey_name': 'Lotto', 'jersey_signature': 4130579852, 'bike_name': 'Ridley Noah Fast 2019', 'bike_signature': 4288910569, 'front_wheel_name': 'DTSwiss ARC 1100 DICUT 62', 'front_wheel_signature': 346409677, 'rear_wheel_name': 'DTSwiss ARC 1100 DICUT 62', 'rear_wheel_signature': 2049111692},
'EF Education-EasyPost': {'jersey_name': 'EF Education First', 'jersey_signature': 2349035663, 'bike_name': 'Cannondale System Six', 'bike_signature': 2005280203, 'front_wheel_name': 'HED HED Vanquish RC6 Pro', 'front_wheel_signature': 1791179228, 'rear_wheel_name': 'HED HED Vanquish RC6 Pro', 'rear_wheel_signature': 2913819265},
'INEOS Grenadiers': {'jersey_name': 'INEOS Grenadiers 2022 Pro', 'jersey_signature': 542207259, 'bike_name': 'Pinarello Dogma F', 'bike_signature': 4208139356, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Groupama - FDJ': {'jersey_name': 'Groupama FDJ 2023', 'jersey_signature': 2814449542, 'bike_name': 'Specialized Amira S-Works', 'bike_signature': 2662728556, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Bahrain - Victorious': {'jersey_name': 'Bahrain McLaren', 'jersey_signature': 2155858980, 'bike_name': 'Merida Scultura', 'bike_signature': 3033010663, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Team DSM': {'jersey_name': 'Team ODZ', 'jersey_signature': 2695025247, 'bike_name': 'Scott Foil', 'bike_signature': 1315158373, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Team Jayco AlUla': {'jersey_name': 'Team 3R', 'jersey_signature': 493134166, 'bike_name': 'Giant Propel Advanced SL Disc', 'bike_signature': 103914490, 'front_wheel_name': 'Cadex CADEX 42', 'front_wheel_signature': 1497226614, 'rear_wheel_name': 'Cadex CADEX 42', 'rear_wheel_signature': 1347687916},
'Uno-X Pro Cycling Team': {'jersey_name': 'UnoXPro2022', 'jersey_signature': 1756517729},
'Cofidis': {'jersey_name': 'Cofidis 2018', 'jersey_signature': 927604154, 'bike_name': 'Cervelo R5', 'bike_signature': 106535518, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Intermarché - Circus - Wanty': {'jersey_name': 'IntermarchéWantyGobert Matériaux', 'jersey_signature': 88121645, 'bike_name': 'Cube Cube Litening', 'bike_signature': 1767548815, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'BORA - hansgrohe': {'jersey_name': 'Bora-Hansgrohe', 'jersey_signature': 321508751, 'bike_name': 'Specialized Tarmac SL7', 'bike_signature': 935373427, 'front_wheel_name': 'Roval Rapide CLX', 'front_wheel_signature': 2181416413, 'rear_wheel_name': 'Roval Rapide CLX', 'rear_wheel_signature': 3548735686},
'Team Arkéa Samsic': {'jersey_name': 'Arkea-Samsic', 'jersey_signature': 598687666, 'bike_name': 'Trek Madone', 'bike_signature': 4129467727, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'AG2R Citroën Team': {'jersey_name': 'AG2R La Mondiale', 'jersey_signature': 1587982785, 'bike_name': 'BMC BmcTeamMachine2022', 'bike_signature': 3868468027, 'front_wheel_name': 'Campagnolo Bora Ultra 35', 'front_wheel_signature': 1053884173, 'rear_wheel_name': 'Campagnolo Bora Ultra 35', 'rear_wheel_signature': 1614586487},
'Astana Qazaqstan Team': {'jersey_name': 'ASTANA PRO TEAM', 'jersey_signature': 1969335676, 'bike_name': 'Giant GiantRevolt2022', 'bike_signature': 2360271970, 'front_wheel_name': 'Shimano C50', 'front_wheel_signature': 1742598126, 'rear_wheel_name': 'Shimano C50', 'rear_wheel_signature': 3725678091},
'Israel - Premier Tech': {'jersey_name': 'Israel Premier-Tech', 'jersey_signature': 552170906},
'TotalEnergies': {'jersey_name': 'Total Direct Energie', 'jersey_signature': 2092402045},
'Team SD Worx': {'jersey_name': 'Team SD Worx', 'jersey_signature': 1494272741},
'UAE Team ADQ': {'jersey_name': 'UAE', 'jersey_signature': 1751349769},
'UAE Development Team': {'jersey_name': 'UAE', 'jersey_signature': 1751349769},
'FDJ - SUEZ': {'jersey_name': 'FDJ Suez 2023', 'jersey_signature': 3360845221},
'Canyon//SRAM Racing': {'jersey_name': 'CANYON//SRAM Racing', 'jersey_signature': 3970245639},
'AG Insurance - Soudal Quick-Step': {'jersey_name': 'Lotto-Soudal', 'jersey_signature': 3103938066},
'Human Powered Health': {'jersey_name': 'Human Powered Health Fan', 'jersey_signature': 854534852},
'Team Jumbo-Visma': {'jersey_name': 'Team Jumbo Visma-Women', 'jersey_signature': 1541349594},
'Liv Racing TeqFind': {'jersey_name': 'Liv Racing 2019', 'jersey_signature': 3932519699},
'Israel Premier Tech Roland': {'jersey_name': 'Israel Premier-Tech', 'jersey_signature': 552170906},
'EF Education-TIBCO-SVB': {'jersey_name': 'Team EF Education-TIBCO-SVB', 'jersey_signature': 2795352821},
'Fenix-Deceuninck': {'jersey_name': 'Deceuninck-Quick-Step', 'jersey_signature': 2906189156},
'Fenix-Deceuninck Continental': {'jersey_name': 'Deceuninck-Quick-Step', 'jersey_signature': 2906189156},
'CERATIZIT-WNT Pro Cycling': {'jersey_name': 'Ceratizit-WNT', 'jersey_signature': 97975537},
'St Michel - Mavic - Auber93 WE': {'jersey_name': 'South Africa Elite', 'jersey_signature': 3305515323},
'Lifeplus Wahoo': {'jersey_name': 'Wahoo', 'jersey_signature': 3553917933},
'Cofidis Women Team': {'jersey_name': 'Cofidis', 'jersey_signature': 4191972189},
'Arkéa Pro Cycling Team': {'jersey_name': 'Arkea', 'jersey_signature': 1128201030},
'MAT Atom Deweloper Wrocław': {'jersey_name': 'Atom Racing Team', 'jersey_signature': 851470392},
'Top Girls Fassa Bortolo': {'jersey_name': 'Clash Of Clubs Blue', 'jersey_signature': 520081294},
'EOLO-Kometa': {'jersey_name': 'Eolo Kometa', 'jersey_signature': 2422819298},
'Green Project-Bardiani CSF-Faizanè': {'jersey_name': 'Bardiani 2019', 'jersey_signature': 3503002798},
'L39ION of Los Angeles': {'jersey_name':'L39ION of LA 2022', 'jersey_signature': 2330819669},
'Lotto Dstny Ladies': {'jersey_name': 'Lotto Soudal Ladies', 'jersey_signature': 1423767803},
'Parkhotel Valkenburg': {'jersey_name': 'Parkhotel Valkenburg', 'jersey_signature': 4102459937}
}
def get_pros(url, male, get_jersey, get_equipment):
data = []
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
site = urllib.request.urlopen(req).read()
soup = BeautifulSoup(site)
for td in soup.find_all('td'):
if td.span and td.contents[0]:
tmp = {}
if "flag" in repr(td.contents[0]):
code = td.span.get_attribute_list("class")[1]
tmp['country_code'] = cc.convert(names=code, to='ISOnumeric')
tmp['is_male'] = male
if td.a:
tmp['first_name'] = (td.a.contents[1].strip())
tmp['last_name'] = (td.a.span.contents[0])
if td.a and td.contents[0]:
if "cu600" in repr(td) and td.a.contents:
if 'first_name' in tmp:
if get_jersey:
if td.a.contents[0] in teams:
tmp['jersey'] = teams[td.a.contents[0]]['jersey_signature']
else:
best_match = process.extractOne(td.a.contents[0], jerseys.keys(), scorer=fuzz.token_set_ratio)
print ("%s %s : %s - %s" % (tmp['first_name'],tmp['last_name'],td.a.contents[0], best_match))
tmp['jersey'] = jerseys[best_match[0]]
if get_equipment:
if td.a.contents[0] in teams:
team = teams[td.a.contents[0]]
if 'bike_signature' in team:
tmp['bike_frame'] = team['bike_signature']
if 'front_wheel_signature' in team:
tmp['bike_wheel_front'] = team['front_wheel_signature']
if 'rear_wheel_signature' in team:
tmp['bike_wheel_rear'] = team['rear_wheel_signature']
data.append(tmp)
return data
tree = ET.parse('../cdn/gameassets/GameDictionary.xml')
root = tree.getroot()
jerseys = {}
for x in root.findall("./JERSEYS/JERSEY"):
jerseys[x.get('name')] = int(x.get('signature'))
def main(argv):
global args
parser = argparse.ArgumentParser(description='Populate Bot names with professional riders')
parser.add_argument('-n', '--nation', help='Riders from specified nation only', default=False)
parser.add_argument('-f', '--female', help='Female riders only', default=False, action='store_true')
parser.add_argument('-m', '--male', help='Male riders only', default=False, action='store_true')
parser.add_argument('-a', '--alltime', help='Use all time ranking', default=False, action='store_true')
parser.add_argument('-p', '--pages', help='Number of pages to process', default=1)
parser.add_argument('-j', '--jersey', help='Get team jerseys', default=False, action='store_true')
parser.add_argument('-e', '--equipment', help='Get team bike and wheels', default=False, action='store_true')
args = parser.parse_args()
url_additions = ""
url_list = []
if args.alltime:
url_additions += "&s=all-time"
if args.nation:
url_additions += "&nation="+args.nation
if args.female:
url_list = [ { "url": base_url + url_additions + "&p=we", "is_male": False } ]
elif args.male:
url_list = [ { "url": base_url + url_additions + "&p=me", "is_male": True } ]
else:
url_list = [ { "url": base_url + url_additions + "&p=me", "is_male": True }, { "url": base_url + url_additions + "&p=we", "is_male": False } ]
if args.pages:
new_url_list = url_list.copy()
for x in range(1,int(args.pages)):
offset = str(x*100)
for url in url_list:
new_url_list += [ { "url": url['url'] + "&offset=" + offset, "is_male": url['is_male'] }]
url_list = new_url_list.copy()
total_data = {}
total_data['riders'] = []
for item in url_list:
total_data['riders'] = total_data['riders'] + get_pros(item['url'], item['is_male'], args.jersey, args.equipment)
total_data['body_types'] = [16, 48, 80, 272, 304, 336, 528, 560, 592]
total_data['hair_types'] = [25953412, 175379869, 398510584, 659452569, 838618949, 924073005, 1022111028, 1262230565, 1305767757, 1569595897, 1626212425, 1985754517, 2234835005, 2507058825, 3092564365, 3200039653, 3296520581, 3351295312, 3536770137, 4021222889, 4179410997, 4294226781]
total_data['facial_hair_types'] = [248681634, 398510584, 867351826, 1947387842, 2173853954, 3169994930, 4131541011, 4216468066]
with open('bot.txt', 'w') as outfile:
json.dump(total_data, outfile, indent=2)
if __name__ == '__main__':
try:
main(sys.argv)
except KeyboardInterrupt:
pass
except SystemExit as se:
print("ERROR:", se)