Imagine you are in this twitter page and you have to take all of its ids! https://twitter.com/search?l=fr&q=%23metoo%20since%3A2017-11-06%20until%3A2017-11-09&src=typd
I am using selenium to scroll down until there are no more left and then save all ids in a list.
Im afraid my for loop doesn't save them though, what am I doing wrong?
twitter_ids_filename = 'all_ids.json'
id_selector = '.time a.tweet-timestamp'
tweet_selector = 'li.js-stream-item'
ids = []
for day in range(days):
d1 = format_day(increment_day(start, 0))
d2 = format_day(increment_day(start, 1))
url = form_url(d1, d2)
print(url)
print(d1)
driver.get(url)
sleep(delay)
try:
found_tweets = driver.find_elements_by_css_selector(tweet_selector)
increment = 10
while len(found_tweets) >= increment:
print('scrolling down to load more tweets')
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(delay)
found_tweets = driver.find_elements_by_css_selector(tweet_selector)
increment += 10
print('{} tweets found, {} total'.format(len(found_tweets), len(ids)))
for tweet in found_tweets:
try:
id = tweet.find_element_by_css_selector(id_selector).get_attribute('href').split('/')[-1]
ids.append(id)
except StaleElementReferenceException as e:
print('lost element reference', tweet)
except NoSuchElementException:
print('no tweets on this day')