Quantcast
Channel: Active questions tagged selenium - Stack Overflow
Viewing all articles
Browse latest Browse all 97813

Selenium/Scrapy - Returns null for some elements

$
0
0

My goal is to scrape tax data from public county websites. While using scrapy and selenium I was able to gather parcel ids, addresses and owner names but my output returned null for elements located in a table that disappears when I disable javascript.

Each search result from the particular county website returns a list of properties with links to their tax details. It is within these links where the data is I am attempting to scrape.

What is the best way to gather the land_value and total_appr? Again, when I disabled javascript from the chrome tools the values were invisible so do I have to use selenium in another for loop? How do I go about doing this? Thanks

from abc import ABC
from time import sleep

import scrapy
from scrapy import Spider
from scrapy.spiders import CrawlSpider
from selenium import webdriver
from scrapy.selector import Selector
from scrapy.http import Request


class PropertiesSpider(Spider, ABC):
    name = 'properties'
    allowed_domains = ['gwinnettassessor.manatron.com/IWantTo/PropertyGISSearch.aspx']

    def start_requests(self):
        self.driver = webdriver.Chrome('/Users/wenzeljoe/CondaProjects/chromedriver')
        self.driver.get('http://gwinnettassessor.manatron.com/IWantTo/PropertyGISSearch.aspx')

        search_bar = self.driver.find_element_by_xpath("//table/tbody/tr/td/div/input")
        search_bar.send_keys("smith")

        button = self.driver.find_element_by_xpath("//button/span")
        button.click()

        sel = Selector(text=self.driver.page_source)
        properties = sel.xpath("//ul[2]/li/a/@href").extract()
        for property in properties:
            url = 'http://gwinnettassessor.manatron.com/IWantTo/PropertyGISSearch.aspx' + property
            yield Request(url, callback=self.parse_property)

    def parse_property(self, response):
        parcel_id = response.xpath(
            '//table[@class="ui-widget-content ui-table generalinfo"]/tbody/tr[2]/td/text()').extract_first()
        address = response.xpath(
            '//table[@class="ui-widget-content ui-table generalinfo"]/tbody/tr[4]/td/text()').extract_first()
        name = response.xpath(
            '//table[@class="ui-widget-content ui-table generalinfo"]/tbody/tr/td/text()').extract_first()
        land_value = response.xpath("//table[@class='ui-table']/tbody/tr[3]/td/text()").extract_first()
        total_appr = response.xpath("//td[@class='ui-widget String ui-state-default']/text()").extract_first()
        yield {
            'parcel_id': parcel_id,
            'address': address,
            'owner_name': name,
            'land_value': land_value,
            'total_appraisal': total_appr

        }

Viewing all articles
Browse latest Browse all 97813

Trending Articles