Trying to scrap a website with around 800 page, The code stops randomly at random page number!( ex. for the first time i get error in 5th page, then next time i run, i get error at 500th page)
Initialized selenium driver, Extracted the table from a page of website, for each row AND for each column within row, I am extracting data and adding it to separate list, so that later i could group them to form dataframe, but the above error keeps apperating and stopping the loop at random page numbers each time i rerun the program.
The error is "StaleElementReferenceException: Message: {"errorMessage":"Element does not exist in cache","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Content-Length":"117",........"chunks":["elements"]},"urlOriginal":"/session/7cff3fd0-fd4e-11e9-b0e3-c9f679f964da/element/:wdc:1572684982199/elements"}} Screenshot: available via screen"
url = "https://auditoria.cgu.gov.br/"
driver.get(url)
page = driver.find_element_by_id('lista_info').text
last_page=int(re.split(r'\s',page)[-1])
for_heading=driver.find_element_by_id('lista_wrapper').find_elements_by_css_selector('th')
heading_list=[]
for element in for_heading:
heading_list.append(element.text)
df1= pd.DataFrame(columns=heading_list)
listn_0=[]
listn_1=[]
listn_2=[]
listn_3=[]
listn_4=[]
listn_5=[]
listn_6=[]
c=0
page=0
counter=0
page=0
counter=0
while page!=last_page:
for_row = len(driver.find_element_by_id('lista').find_elements_by_tag_name('tr'))
for row in range(1,for_row):
i=0
if(driver.find_element_by_id('lista').find_elements_by_tag_name('tr')[row].find_elements_by_tag_name('td')):
col_list= len(driver.find_element_by_id('lista').find_elements_by_tag_name('tr')[row].find_elements_by_tag_name('td'))
c=0
for cols in range(col_list):
if(c==1):
locals()['listn_%d' %i].append(driver.find_element_by_id('lista').find_elements_by_tag_name('tr')[row].find_elements_by_tag_name('td')[cols].find_element_by_css_selector('a').get_attribute('href'))
else:
locals()['listn_%d' %i].append(driver.find_element_by_id('lista').find_elements_by_tag_name('tr')[row].find_elements_by_tag_name('td')[cols].get_property('text'))
i=i+1
c=c+1
else:
continue;
counter=counter+1
page=page+1
print(counter)
driver.find_element_by_id('lista_next').click()
time.sleep(1)