I'm trying to implement concurrent.futures
or something similar within the following script to make the execution faster. However, When I compare the two scripts, I see no change in speed. What possible change should I bring about to achieve the same?
Original script:
import requests
from bs4 import BeautifulSoup
link = 'https://ldc.lloyds.com/market-directory/results'
params = {
'cobc': '','cob': '','loc': '','ltti': '',
'bro': '0','cov': '1','man': '0','mem': '0',
'omc': '0','run': '0','name': '','mode':' cov',
'c_page': 1 #---------------->unknown number of pages to traverse
}
def get_content(s,link,params):
while True:
r = s.get(link,params=params)
soup = BeautifulSoup(r.text,"lxml")
if not soup.select(".marketing-directories-results .contact-details > h2"):
return
for item in soup.select(".marketing-directories-results .contact-details > h2"):
yield item.text
params['c_page']+=1
if __name__ == '__main__':
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
for item in get_content(s,link,params):
print(item)
When I implement concurrent.futures
within the script, I don't see any speed related improvement in the execution process:
import requests
from bs4 import BeautifulSoup
import concurrent.futures as futures
link = 'https://ldc.lloyds.com/market-directory/results'
params = {
'cobc': '','cob': '','loc': '','ltti': '',
'bro': '0','cov': '1','man': '0','mem': '0',
'omc': '0','run': '0','name': '','mode':' cov',
'c_page': 1 #---------------->unknown number of pages to traverse
}
def get_content(s,link,params):
while True:
item_list = []
r = s.get(link,params=params)
soup = BeautifulSoup(r.text,"lxml")
if not soup.select(".marketing-directories-results .contact-details > h2"):
return
for item in soup.select(".marketing-directories-results .contact-details > h2"):
print(item.text)
params['c_page']+=1
if __name__ == '__main__':
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'
with futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(get_content, s, url, params): url for url in [link]}
futures.as_completed(future_to_url)
How can I implement
concurrent.futures
or something similar to speed the execution up?