JASPAR爬取Class信息


import requests
from bs4 import BeautifulSoup

file_out = open("D:/群晖NAS/Desktop/MEME网站结果/JASPAR.爬虫结果.txt", "w")

with open("D:/群晖NAS/Desktop/MEME网站结果/JASPAR.id.txt", "r") as file_id:
    lines = file_id.readlines()

    for line in lines:
        # 指定要爬取的网址
        url = 'https://jaspar.elixir.no/matrix/' + line.replace("\n", "")  + '/'

        print(url)

        # 发送 GET 请求并获取网页内容
        response = requests.get(url)

        soup = BeautifulSoup(response.text, 'html.parser')

        profile = soup.find('table', class_='table table-hover')

        tr_tags = profile.find_all("tr")

        class_txt = tr_tags[2].find_all('td')[1].get_text()

        file_out.write(line.replace("\n", "") + "\t" + class_txt + "\n")

file_out.close()

Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source 李详【Xiang LI】 !