18 lines
585 B
Python
18 lines
585 B
Python
import scrapy
|
|
from scrapy.spiders import CrawlSpider, Rule
|
|
from scrapy.linkextractors import LinkExtractor
|
|
|
|
class MySpider(CrawlSpider):
|
|
name = 'microsoft.com'
|
|
allowed_domains = ['docs.microsoft.com']
|
|
start_urls = ['https://docs.microsoft.com/en-us/previous-versions/windows/desktop/policy/group-policy-start-page']
|
|
|
|
rules = (
|
|
Rule(LinkExtractor(), callback='parse_item', follow=True),
|
|
)
|
|
|
|
def parse_item(self, response):
|
|
filename = response.url.split("/")[-2] + '.html'
|
|
with open(filename, 'wb') as f:
|
|
f.write(response.body)
|