I am using BeautifulSoup to get the required thing and i am only getting 98% accuracy. Can anyone tell me why?
Here is my code:-
from urllib.request import urlopen
from bs4 import BeautifulSoup as soup
image=[]
book=[]
price=[]
for i in range(1,51):
url=f’http://books.toscrape.com/catalogue/page-{i}.html’
site_open=urlopen(url)
site_data=site_open.read()
soup_data=soup(site_data)
imgtag=soup_data.findAll(‘div’,{‘class’:‘image_container’})
for i in imgtag:
imgsrc=i.find(‘img’)
img=imgsrc[‘src’]
image.append(img)
booktag=soup_data.findAll(‘h3’)
for b in booktag:
booktitle=b.find(‘a’)
title=booktitle[‘title’]
book.append(title)
pricetag=soup_data.findAll(‘p’,{‘class’:‘price_color’})
for p in pricetag:
cost=p.text
price.append(cost)
import pandas as pd
df=pd.DataFrame({‘image_url’:image,‘book_title’:book,‘product_price’:price})
df.to_csv(‘all_books.csv’,index=False)