i am getting 86% accuracy but i have all the 1000 books data.
here is my code:
import scrapy
class BookSpider(scrapy.Spider):
name = “books_spider”
def start_requests(self):
urls = [
‘http://books.toscrape.com/’
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
for q in response.css("article.product_pod"):
link = q.css("div.image_container img::attr(src)").get()
title = q.css("h3 a::attr(title)").get()
price = q.css("p.price_color::text").get()
yield {
'title': title,
'price': price,
'link' : link
}
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
next_page = response.urljoin(next_page)
yield scrapy.Request(next_page,callback = self.parse)
code to convert json to csv:
f = open(‘book_store/book_store/books.json’)
data = json.load(f)
col_titles = [“image_url”,“book_title”,“product_price”]
filename = ‘book_store.csv’
with open(filename,‘w’) as f:
header_string = ‘,’.join(col_titles)
header_string += ‘\n’
f.write(header_string)
for x in range(1000):
row_string = “”
img = data[x][‘link’]
title = data[x][‘title’]
price = data[x][‘price’]
row_string+= img+","+title+","+price
row_string+=’\n’
f.write(row_string)