here’s the code i wrote
import scrapy
from scrapy.http import request
class Bider(scrapy.Spider):
name=“Bider”
def start_requests(self):
urls=[
"https://books.toscrape.com/catalogue/page-1.html"
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page_no= response.url[-6]
file_name="BooksInfo{}.html".format(page_no)
#with open(file_name,"wb") as f:
# f.write(response.body)
books_data=response.css("article.product_pod")
book_titles=books_data.css("h3 a::text").getall()
product_prices=books_data.css("div.product_price p.price_color::text").getall()
i=1
for data in books_data:
book_title=books_data.css("h3 a::text").get
product_price=books_data.css("div.product_price p.price_color::text").get()
yield {
"image_url":"https://books.toscrape.com/catalogue/page-{}.html".format(i),
"book_title":book_title,
"product_price":product_price
}
i+=1
next_link=response.css("li.next a::attr(href)").get()
if next_link is not None:
next_link=response.urljoin(next_link)
yield scrapy.Request(url=next_link,callback=self.parse)
i am getting a type error object of type is not json serializable on running it in command prompt
DOUBT 2
if i make a csv inside visual studios using the code below where is it going to be saved?? i can’t find it on my system
columns=[“image_url”, “book_title”, “product_price”]
Rows= [
"https://books.toscrape.com/catalogue/page-{}.html".format(i),
book_title,
product_price
]
with open(FName,"w",encoding='utf-8') as f:
columns_string=",".join(columns)
columns_string +="\n"
f.write(columns_string)
rows_string=",".join(Rows)
rows_string +="\n"
f.write(rows_string)