Scraping results
Now that we have complete implementations for each scraper, we will test their relative performance with this snippet. The imports in the code expect your directory structure to be similar to the book's repository, so please adjust as necessary:
import time import re from chp2.all_scrapers import re_scraper, bs_scraper, lxml_scraper, lxml_xpath_scraper from chp1.advanced_link_crawler import download NUM_ITERATIONS = 1000 # number of times to test each scraper html = download('http://example.webscraping.com/places/view/United-Kingdom-239') scrapers = [ ('Regular expressions', re_scraper), ('BeautifulSoup', bs_scraper), ('Lxml', lxml_scraper), ('Xpath', lxml_xpath_scraper)] for name, scraper in scrapers: # record start time of scrape start = time.time() for i in range(NUM_ITERATIONS): if scraper == re_scraper: re.purge() result = scraper(html) # check scraped result is as expected assert result['area...