python

基于selenium godaddy域名历史利用聚名自动化检查

作者1 2022年10月20日

"""
godaddy域名自动化检查
"""
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import json
import re
from importcsv import importCsv


def checkDomain(domain):
    d = DesiredCapabilities.CHROME
    d['loggingPrefs'] = {'performance': 'ALL'}
    chrome_options = Options()
    chrome_options.add_experimental_option('w3c', False)  # 重要参数，不添加在无头模式无法获取日志！！
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')  # 重点代码：去掉了webdriver
    caps = {
        'browserName': 'chrome',
        'loggingPrefs': {
            'browser': 'ALL',
            'driver': 'ALL',
            'performance': 'ALL',
        },
        'goog:chromeOptions': {
            'perfLoggingPrefs': {
                'enableNetwork': True,
            },
            'w3c': False,
        },
    }
    # 填写webdriver的保存目录
    driver = webdriver.Chrome(desired_capabilities=caps, chrome_options=chrome_options)

    # 记得写完整的url 包括http和https
    driver.get('http://www.jucha.com/lishi/')

    # 首先清除由于浏览器打开已有的cookies
    driver.delete_all_cookies()

    time.sleep(3)

    f1 = open('cookie.txt')
    cookie = f1.read()
    cookie_list = json.loads(cookie)  # json读取cookies
    for c in cookie_list:
        driver.add_cookie(c)  # 取出的cookie循环加入driver

    driver.refresh()

    # 定位到搜索框元素，并且填充内容为域名
    driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[2]/div[2]/div[1]/textarea').send_keys(domain)
    # 点击查询按钮
    driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[2]/div[2]/div[2]/div[1]/div/button').click()

    time.sleep(2)

    # 获取滑块位置
    ele_button = driver.find_element_by_xpath('//*[@id="nc_1_n1z"]')
    # 获取滑条
    ele = driver.find_element_by_xpath('//*[@id="nc_1__scale_text"]/span')
    # print('滑块区域的宽：', ele.size['width'])
    # print('滑块区域的高：', ele.size['height'])
    # 拖动滑块滑条末尾
    ActionChains(driver).drag_and_drop_by_offset(ele_button, ele.size['width'], ele.size['height']).perform()

    time.sleep(3)

    request_log = driver.get_log('performance')
    # for entry in logs:
    #             params = json.loads(entry.get('message')).get('message').get('params')
    #             print(params)  # 请求连接 包含错误连接
    for i in range(len(request_log)):
        message = json.loads(request_log[i]['message'])
        message = message['message']['params']
        # .get() 方式获取是了避免字段不存在时报错
        request = message.get('request')
        if (request is None):
            continue
        url = request.get('url')
        # print(url)
        if 'http://www.jucha.com:8866/item/search?domain' in url:
            # 得到requestId
            print(message['requestId'])
            # 通过requestId获取接口内容
            content = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': message['requestId']})
            pattern = re.compile(r'callback_\d+\((.+)\);')
            newstr = re.search(pattern, content['body'])
            jsonDate = json.loads(newstr.group(1))
            print(jsonDate)
            if jsonDate['data']['lishi']['data']['data']['yy'] == '中文':
                if int(jsonDate['data']['lishi']['data']['data']['nl']) > 5:
                    if int(jsonDate['data']['lishi']['data']['data']['jls']) > 10:
                        return domain
    driver.close()


if __name__ == '__main__':
    rows = importCsv('Export.csv')
    for row in rows:
        newDomain = checkDomain(row)
        if newDomain is not None:
            print(newDomain)