I'm doing automation on a website. My goal in this site is to do a research and extract the information that this site returns to me
When I do a search it generates a lot of links for me. These links are contained on pages with these following buttons
Here is an image that contains multiple links in the DOM
More or less 45 page containing 45 links
My problem is that when I click the 'Nextbutton', he change position in the DOM. This is underlined below
Also has searches that generates 4 pages, 3 pages, 10 pages, 1 page
For each page that is updated change the position of 'Nextbutton'
The idea I had was to get all these links that are in 'Nextbutton' which is also present in DOM and extract all the necessary content without having to go to the next page
from time import sleep
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import Select
FIREFOX = 'C:\\Users\\Daniel\\Desktop\\Scraping\\bin\\geckodriver.exe'
URL = '\x0\x0\x0\x0'
user = '\x0\x0\x0'
password = '\x0\x0\x0'
class Scraping():
def __init__(self):
self.browser = webdriver.Firefox(executable_path=FIREFOX)
self.browser.get(URL_TJ)
sleep(1)
self.browser.find_element_by_xpath('//*[@id="identificacao"]/strong/a').click()
sleep(1)
# here is the login
self.browser.find_element_by_id('usernameForm').send_keys(user)
self.browser.find_element_by_id('passwordForm').send_keys(password)
self.browser.find_element_by_xpath('//*[@id="pbEntrar"]').click()
# here is where I need to manipulate a SELECT in the DOM
select = Select(self.browser.find_element_by_id('cbPesquisa'))
select.select_by_value('DOCPARTE')
sleep(1)
#here I loop reading CSV, I use data to search the page
import csv
ficheiro = './sp-ext.csv'
with open(ficheiro, 'r') as leitor:
reader = csv.DictReader(leitor, delimiter=';')
for linha in reader:
y = linha['Pastas']
field = self.browser.find_element_by_id('campo_DOCPARTE')#here is where I do the research
field.clear()
field.send_keys(y)
self.browser.find_element_by_id('pbEnviar').click()
# here is where I do data extraction from the first page and the first search
fundo_escuro = self.browser.find_elements_by_xpath('//div[@class="fundoEscuro"]')
for extraction in fundo_escuro:
print(extraction.text)
fundo_claro = self.browser.find_elements_by_xpath('//div[@class="fundoClaro"]')
for extraction in fundo_claro:
print(extraction.text)
# here would be the logic extract information from upcoming links
if __name__ == '__main__':
Scraping()
I am using Python 3.7.3 and Windows 8.1.


