How do i run scrapy class from jupyter notebook?

ankitanit19 · May 15, 2020, 12:38am

class QuotesSpider(scrapy.Spider):
# we are inherting from Spider base class
name = “quotes_spider”
# this is the attribute of the spider class

def start_request(self):
    urls = ["http://quotes.toscrape.com/page/1/",
          "http://quotes.toscrape.com/page/2/",
          "http://quotes.toscrape.com/page/3/"]
# this allows to make get/post request to url
# give list of url from which to get the data 

    #generator function
    for url in urls:
        # we need to do a request method similar to getting API on this url and when response comes
        # we need to define a callback method 
        yield scrapy.Request(url = url, callback = self.parse)
        
#we need to build a function that knows what to do when the response comes
def parse(self, response):
    # to see from which page response is coming - which gives us the id of the url on which request ws done
    page_id = response.url.split("/")[-2]
    # returns the list split by ? and we get the second last elemt to get the page id of the url
    #save this response in a html file 
    filename = "quotes-%s"%page_id
    
    with open(filename, 'wb') as f:
        f.write(response.body)
    self.log('saved file %s' % filename)
    #save in our main directory

from scrapy.crawler import CrawlerProcess
process = CrawlerProcess()
process.crawl(QuotesSpider)
process.start()

this is throwing “ReactorNotRestartable” error