Quantcast
Channel: Active questions tagged selenium - Stack Overflow
Viewing all articles
Browse latest Browse all 98974

Selenium Python script has different behavior on Windows and Ubuntu environments

$
0
0

I've tried running a script on Windows and on Ubuntu, both using Python 3 and the latest versions of geckodriver, resulting in differing behavior. The full script is given below.

I'm trying to get the data for several different tests from a test prep site. There are different subjects, each of which has a specialization, each of which has a practice-test, each of which has several questions. The scrape function walks through the steps to get data of each type.

subject <--- specialization <---- practice-test *------ question

The get_questions function is where the difference shows up:

  • In Windows, it behaves as expected. After the last question's choice is clicked, it goes on to a results page.
  • In Ubuntu, when a choice is clicked on the last question, it reloads the last question and keeps clicking the same choice and reloading the same question.

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    import pathlib
    import time
    import json
    import os
    
    driver=webdriver.Firefox(executable_path="./geckodriver.exe")
    wait = WebDriverWait(driver, 15)
    data=[]
    
    def setup():
    
       driver.get('https://www.varsitytutors.com/practice-tests')
       try:
          go_away_1= driver.find_element_by_class_name("ub-emb-iframe")
          driver.execute_script("arguments[0].style.visibility='hidden'", go_away_1)
          go_away_2= driver.find_element_by_class_name("ub-emb-iframe-wrapper")
          driver.execute_script("arguments[0].style.visibility='hidden'", go_away_2)
          go_away_3= driver.find_element_by_class_name("ub-emb-visible")
          driver.execute_script("arguments[0].style.visibility='hidden'", go_away_3)
       except:
          pass
    
    def get_subjects(subs=[]):
       subject_clickables_xpath="/html/body/div[3]/div[9]/div/*/div[@data-subject]/div[1]"
       subject_clickables=driver.find_elements_by_xpath(subject_clickables_xpath)
       subject_names=map(lambda x : x.find_element_by_xpath('..').get_attribute('data-subject'), subject_clickables)
       subject_pairs=zip(subject_names, subject_clickables)
       return subject_pairs
    
    def get_specializations(subject):
    
       specialization_clickables_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]"
       specialization_names_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]/../.."
       specialization_names=map(lambda x : x.get_attribute('data-subject'), driver.find_elements_by_xpath(specialization_names_xpath))
       specialization_clickables = driver.find_elements_by_xpath(specialization_clickables_xpath)
       specialization_pairs=zip(specialization_names, specialization_clickables)
       return specialization_pairs
    
    def get_practices(subject, specialization):
       practice_clickables_xpath="/html/body/div[3]/div[8]/div[3]/*/div[1]/a[1]"
       practice_names_xpath="//*/h3[@class='subject_header']"
       lengths_xpath="/html/body/div[3]/div[8]/div[3]/*/div[2]"
       lengths=map(lambda x : x.text, driver.find_elements_by_xpath(lengths_xpath))
       print(lengths)
       practice_names=map(lambda x : x.text, driver.find_elements_by_xpath(practice_names_xpath))
       practice_clickables = driver.find_elements_by_xpath(practice_clickables_xpath)
       practice_pairs=zip(practice_names, practice_clickables)
       return practice_pairs
    
    def remove_popup():
       try:
    
          button=wait.until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'No Thanks')]")))
          button.location_once_scrolled_into_view
          button.click()
       except:
          print('could not find the popup')
    
    def get_questions(subject, specialization, practice):
       remove_popup()
       questions=[]
       current_question=None
       while True:
          question={}
          try:
             WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]")))
             question_number=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]').text.replace('.','')
             question_pre=driver.find_element_by_class_name('question_pre')
             question_body=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[2]/p')
             answer_choices=driver.find_elements_by_class_name('question_row')
             answers=map(lambda x : x.text, answer_choices)
             question['id']=question_number
             question['pre']=question_pre.text
             question['body']=question_body.text
             question['answers']=list(answers)
             questions.append(question)
             choice=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"input.test_button")))
             driver.execute_script("arguments[0].click();", choice[3])
             time.sleep(3)
          except Exception as e:
             if 'results' in driver.current_url:
                driver.get(driver.current_url.replace('http://', 'https://'))
                # last question has been answered; record results
                remove_popup()
                pathlib.Path('data/'+subject+'/'+specialization).mkdir(parents=True, exist_ok=True)
                with open('data/'+subject+'/'+specialization+'/questions.json', 'w') as outfile:
                   json.dump(list(questions), outfile)
                   break
             else:
                driver.get(driver.current_url.replace('http://', 'https://'))
       return questions
    
    
    def scrape():
       setup()
       subjects=get_subjects()
       for subject_name, subject_clickable in subjects:
          subject={}
          subject['name']=subject_name
          subject['specializations']=[]
          subject_clickable.click()
          subject_url=driver.current_url.replace('http://', 'https://')
          specializations=get_specializations(subject_name)
          for specialization_name, specialization_clickable in specializations:
             specialization={}
             specialization['name']=specialization_name
             specialization['practices']=[]
             specialization_clickable.click()
             specialization_url=driver.current_url.replace('http://', 'https://')
             practices=get_practices(subject_name, specialization_name)
             for practice_name, practice_clickable in practices:
                practice={}
                practice['name']=practice_name
                practice_clickable.click()
                questions=get_questions(subject_name, specialization_name, practice_name)
                practice['questions']=questions
                driver.get(specialization_url)
             driver.get(subject_url)
          data.append(subject)
       print(data)
    scrape()
    

Can anyone help me figure out what may be causing this?


Viewing all articles
Browse latest Browse all 98974

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>