Webscraping Multiple Pages in Python with Selenium - loop not working

Question

I'm quite new to python and have written a script using selenium to scrape a website. I've tried everything but can't get the loop to cycle through pages. It currently just repeats the data on the first page 5 times. I want to scrape all the pages for 'BR1' any help would be great, currently the script below only scrapes the first page 5 times.

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

with open('rightmove.csv', 'w') as file:
    file.write('PropertyCardcontent \n')

PATH = ("/usr/local/bin/chromedriver")
driver = webdriver.Chrome(PATH)

driver.get("https://www.rightmove.co.uk/house-prices.html")
print(driver.title)

elem = driver.find_element(By.NAME, 'searchLocation')  # Find the search box
elem.send_keys('BR1' + Keys.RETURN)

try:
    content = WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.ID,'content'))
            )

finally:
    time.sleep(3)

for p in range(5):
    sold = content.find_elements(By.CLASS_NAME, 'sold-prices-content-wrapper ')
    for solds in sold:
        address = solds.find_elements(By.CLASS_NAME, 'sold-prices-content ')
        for addresses in address:
            result = addresses.find_elements(By.CLASS_NAME, 'results ')
            for results in result:
                card = results.find_elements(By.CLASS_NAME,'propertyCard')
                for propertyCard in card:
                    header = propertyCard.find_elements(By.CLASS_NAME,'propertyCard-content')
                    for propertyCardcontent in header:
                        road = propertyCardcontent.find_elements(By.CLASS_NAME,'title')
                    for propertyCardcontent in header:
                        road = propertyCardcontent.find_elements(By.CLASS_NAME,'subTitle')
                        for subtitle in road:
                            bed = subtitle.find_elements(By.CLASS_NAME, 'propertyType')
    with open('rightmove.csv', 'a') as file:
        for i in range(len(result)):
            file.write(header[i].text + '\n')
        
        button = driver.find_element(By.XPATH, '//*[@id="content"]/div[2]/div[2]/div[4]/div[27]/div[3]/div')
        button.click()
    file.close()

time.sleep(3)
driver.quit()

federikowsky federikowsky · Accepted Answer · 2022-06-27T12:20:34

You don't have to go down to dom elem by elem, you can just use xpath or class_name (if it's unique, otherwise it's better xpath or css-selector) and get the item you are looking for. Anyway follow this:

import time
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get("https://www.rightmove.co.uk/house-prices.html")

# send query
query = "BR1"
search_bar = driver.find_element(By.XPATH, '//input[@class="searchBox ac_input"]')
search_bar.send_keys(query)
search_bar.send_keys(Keys.ENTER)

# wait to result been loaded
WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'propertyCard'))
    )

#get amount of pages
pages = driver.find_element(By.XPATH, '//span[@class="pagination-label"][2]').text
pages = int(pages.replace('of ', ''))
data = []
i = 1

while i <= pages:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//div[contains(text(), "Next")]'))
    ).click()
    # get all property for first page
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//div//div[@class="propertyCard"]'))
    )
    propertyCards = driver.find_elements(By.XPATH, '//div//div[@class="propertyCard"]')
    for propertyCard in propertyCards:
        title = propertyCard.find_element(By.CLASS_NAME, 'title').text
        propertyType = propertyCard.find_element(By.CLASS_NAME, 'propertyType').text
        data.append((title, propertyType))
    time.sleep(2)
    i += 1
    
print("you reach the last page")

#get number of results
driver.close()

I use a list of tuple cause in your example you want store 2 item, if you want store more data you can use a dict and then convert into csv with Dictwriter directly. Enjoy.

Webscraping Multiple Pages in Python with Selenium - loop not working

2 Answers