文件夹下csv数据写入mysql数据库

import os
import pandas as pd
import csv
import pymysql


def save_db(row):
    db = pymysql.connect(host='127.0.0.1', user='shijuan', password='123456', database='shijuan',
                         charset="utf8")
    cursor = db.cursor()
    insert_sql = 'insert into tp_keyword(keyword) values(%s)'
    try:
        # cursor.execute(insert_sql)
        cursor.executemany(insert_sql, row)
        db.commit()
    except:
        # 如果发生错误则回滚
        db.rollback()
        cursor.close()
        # 关闭数据库连接
        db.close()


path = 'E:\网站\关键词\\12'
files = os.listdir(path)
for file in files:
    url=path + '\\' + file
    df = pd.read_csv(url,low_memory=False,quoting=csv.QUOTE_NONE)
    result = df.values.tolist()
    # print(result)
    save_db(result)

 

转换csv编码格式为utf-8

# -*- coding: utf-8 -*-
import os
from chardet.universaldetector import UniversalDetector


def get_filelist(path):
    """
    获取路径下所有csv文件的路径列表
    """
    Filelist = []
    for home, dirs, files in os.walk(path):
        for filename in files:
            if ".csv" in filename:
                Filelist.append(os.path.join(home, filename))
    return Filelist


def read_file(file):
    """
    逐个读取文件的内容
    """
    with open(file, 'rb') as f:
        return f.read()


def get_encode_info(file):
    """
    逐个读取文件的编码方式
    """
    with open(file, 'rb') as f:
        detector = UniversalDetector()
        for line in f.readlines():
            detector.feed(line)
            if detector.done:
                break
        detector.close()
        return detector.result['encoding']


def convert_encode2utf8(file, original_encode, des_encode):
    """
    将文件的编码方式转换为utf-8,并写入原先的文件中。
    """
    file_content = read_file(file)
    file_decode = file_content.decode(original_encode, 'ignore')
    file_encode = file_decode.encode(des_encode)
    with open(file, 'wb') as f:
        f.write(file_encode)


def read_and_convert(path):
    """
    读取文件并转换
    """
    Filelist = get_filelist(path=path)
    fileNum = 0
    for filename in Filelist:
        try:
            file_content = read_file(filename)
            encode_info = get_encode_info(filename)
            if encode_info != 'utf-8':
                fileNum += 1
                convert_encode2utf8(filename, encode_info, 'utf-8')
                print('成功转换 %s 个文件 %s ' % (fileNum, filename))
        except BaseException:
            print(filename, '存在问题,请检查!')


def recheck_again(path):
    """
    再次判断文件是否为utf-8
    """
    print('---------------------以下文件仍存在问题---------------------')
    Filelist = get_filelist(path)
    for filename in Filelist:
        encode_info_ch = get_encode_info(filename)
        if encode_info_ch != 'utf-8':
            print(filename, '的编码方式是:', encode_info_ch)

    print('--------------------------检查结束--------------------------')


if __name__ == "__main__":
    """
    输入文件路径
    """
    path = 'E:\网站\关键词\\123'
    read_and_convert(path)
    recheck_again(path)
    print('转换结束!')

 

基于selenium godaddy域名历史利用聚名自动化检查

"""
godaddy域名自动化检查
"""
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import json
import re
from importcsv import importCsv


def checkDomain(domain):
    d = DesiredCapabilities.CHROME
    d['loggingPrefs'] = {'performance': 'ALL'}
    chrome_options = Options()
    chrome_options.add_experimental_option('w3c', False)  # 重要参数,不添加在无头模式无法获取日志!!
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')  # 重点代码:去掉了webdriver
    caps = {
        'browserName': 'chrome',
        'loggingPrefs': {
            'browser': 'ALL',
            'driver': 'ALL',
            'performance': 'ALL',
        },
        'goog:chromeOptions': {
            'perfLoggingPrefs': {
                'enableNetwork': True,
            },
            'w3c': False,
        },
    }
    # 填写webdriver的保存目录
    driver = webdriver.Chrome(desired_capabilities=caps, chrome_options=chrome_options)

    # 记得写完整的url 包括http和https
    driver.get('http://www.jucha.com/lishi/')

    # 首先清除由于浏览器打开已有的cookies
    driver.delete_all_cookies()

    time.sleep(3)

    f1 = open('cookie.txt')
    cookie = f1.read()
    cookie_list = json.loads(cookie)  # json读取cookies
    for c in cookie_list:
        driver.add_cookie(c)  # 取出的cookie循环加入driver

    driver.refresh()

    # 定位到搜索框元素,并且填充内容为域名
    driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[2]/div[2]/div[1]/textarea').send_keys(domain)
    # 点击查询按钮
    driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div[2]/div[2]/div[2]/div[1]/div/button').click()

    time.sleep(2)

    # 获取滑块位置
    ele_button = driver.find_element_by_xpath('//*[@id="nc_1_n1z"]')
    # 获取滑条
    ele = driver.find_element_by_xpath('//*[@id="nc_1__scale_text"]/span')
    # print('滑块区域的宽:', ele.size['width'])
    # print('滑块区域的高:', ele.size['height'])
    # 拖动滑块滑条末尾
    ActionChains(driver).drag_and_drop_by_offset(ele_button, ele.size['width'], ele.size['height']).perform()

    time.sleep(3)

    request_log = driver.get_log('performance')
    # for entry in logs:
    #             params = json.loads(entry.get('message')).get('message').get('params')
    #             print(params)  # 请求连接 包含错误连接
    for i in range(len(request_log)):
        message = json.loads(request_log[i]['message'])
        message = message['message']['params']
        # .get() 方式获取是了避免字段不存在时报错
        request = message.get('request')
        if (request is None):
            continue
        url = request.get('url')
        # print(url)
        if 'http://www.jucha.com:8866/item/search?domain' in url:
            # 得到requestId
            print(message['requestId'])
            # 通过requestId获取接口内容
            content = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': message['requestId']})
            pattern = re.compile(r'callback_\d+\((.+)\);')
            newstr = re.search(pattern, content['body'])
            jsonDate = json.loads(newstr.group(1))
            print(jsonDate)
            if jsonDate['data']['lishi']['data']['data']['yy'] == '中文':
                if int(jsonDate['data']['lishi']['data']['data']['nl']) > 5:
                    if int(jsonDate['data']['lishi']['data']['data']['jls']) > 10:
                        return domain
    driver.close()


if __name__ == '__main__':
    rows = importCsv('Export.csv')
    for row in rows:
        newDomain = checkDomain(row)
        if newDomain is not None:
            print(newDomain)

 

复杂sql语句练习(mysql)

下面是练习表数据和结构

SET FOREIGN_KEY_CHECKS=0;

— —————————-
— Table structure for course
— —————————-
DROP TABLE IF EXISTS `course`;
CREATE TABLE `course` (
`id` int(11) NOT NULL,
`name` varchar(22) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

— —————————-
— Records of course
— —————————-
INSERT INTO `course` VALUES (‘30001’, ‘物理’);
INSERT INTO `course` VALUES (‘30002’, ‘政治’);
INSERT INTO `course` VALUES (‘30003’, ‘语文’);
INSERT INTO `course` VALUES (‘30004’, ‘高数’);
INSERT INTO `course` VALUES (‘30005’, ‘英语’);

— —————————-
— Table structure for student
— —————————-
DROP TABLE IF EXISTS `student`;
CREATE TABLE `student` (
`id` int(11) NOT NULL,
`name` varchar(22) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

— —————————-
— Records of student
— —————————-
INSERT INTO `student` VALUES (‘10001’, ‘tom’);
INSERT INTO `student` VALUES (‘10002’, ‘json’);
INSERT INTO `student` VALUES (‘10003’, ‘ak’);
INSERT INTO `student` VALUES (‘10004’, ‘km’);
INSERT INTO `student` VALUES (‘10005’, ‘mk’);

— —————————-
— Table structure for student_course
— —————————-
DROP TABLE IF EXISTS `student_course`;
CREATE TABLE `student_course` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`student_id` int(11) NOT NULL,
`course_id` int(11) NOT NULL,
`teacher_id` int(11) NOT NULL,
`score` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=51 DEFAULT CHARSET=utf8;

— —————————-
— Records of student_course
— —————————-
INSERT INTO `student_course` VALUES (‘4’, ‘10001’, ‘30002’, ‘20002’, ’82’);
INSERT INTO `student_course` VALUES (‘6’, ‘10001’, ‘30003’, ‘20003’, ’82’);
INSERT INTO `student_course` VALUES (‘8’, ‘10001’, ‘30004’, ‘20005’, ’82’);
INSERT INTO `student_course` VALUES (’10’, ‘10001’, ‘30005’, ‘20005’, ’82’);
INSERT INTO `student_course` VALUES (’12’, ‘10002’, ‘30001’, ‘20001’, ’90’);
INSERT INTO `student_course` VALUES (’14’, ‘10002’, ‘30002’, ‘20002’, ’92’);
INSERT INTO `student_course` VALUES (’16’, ‘10002’, ‘30003’, ‘20003’, ’62’);
INSERT INTO `student_course` VALUES (’18’, ‘10002’, ‘30004’, ‘20004’, ’82’);
INSERT INTO `student_course` VALUES (’20’, ‘10002’, ‘30005’, ‘20005’, ’82’);
INSERT INTO `student_course` VALUES (’22’, ‘10003’, ‘30001’, ‘20001’, ’69’);
INSERT INTO `student_course` VALUES (’24’, ‘10003’, ‘30002’, ‘20002’, ’89’);
INSERT INTO `student_course` VALUES (’26’, ‘10003’, ‘30003’, ‘20003’, ’99’);
INSERT INTO `student_course` VALUES (’28’, ‘10003’, ‘30004’, ‘20004’, ’82’);
INSERT INTO `student_course` VALUES (’30’, ‘10003’, ‘30005’, ‘20005’, ’82’);
INSERT INTO `student_course` VALUES (’32’, ‘10004’, ‘30001’, ‘20001’, ’92’);
INSERT INTO `student_course` VALUES (’34’, ‘10004’, ‘30002’, ‘20002’, ’93’);
INSERT INTO `student_course` VALUES (’36’, ‘10004’, ‘30003’, ‘20003’, ’73’);
INSERT INTO `student_course` VALUES (’38’, ‘10004’, ‘30004’, ‘20004’, ’82’);
INSERT INTO `student_course` VALUES (’40’, ‘10004’, ‘30005’, ‘20005’, ’82’);
INSERT INTO `student_course` VALUES (’42’, ‘10005’, ‘30001’, ‘20001’, ’95’);
INSERT INTO `student_course` VALUES (’44’, ‘10005’, ‘30002’, ‘20002’, ’75’);
INSERT INTO `student_course` VALUES (’46’, ‘10005’, ‘30003’, ‘20003’, ’79’);
INSERT INTO `student_course` VALUES (’48’, ‘10005’, ‘30004’, ‘20004’, ’82’);
INSERT INTO `student_course` VALUES (’50’, ‘10005’, ‘30005’, ‘20005’, ’82’);

— —————————-
— Table structure for teacher
— —————————-
DROP TABLE IF EXISTS `teacher`;
CREATE TABLE `teacher` (
`id` int(11) NOT NULL,
`name` varchar(22) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

— —————————-
— Records of teacher
— —————————-
INSERT INTO `teacher` VALUES (‘20001’, ‘su’);
INSERT INTO `teacher` VALUES (‘20002’, ‘wang’);
INSERT INTO `teacher` VALUES (‘20003’, ‘zhou’);
INSERT INTO `teacher` VALUES (‘20004’, ‘yang’);
INSERT INTO `teacher` VALUES (‘20005’, ‘liu’);

 

4个表,包括student,course,student_course,teacher

详细练习:

— 1.查询物理成绩比英语成绩高的所有学生
select * from
(select * from student_course WHERE course_id=30001) a,
(select * from student_course WHERE course_id=30005) b
where a.score>b.score and a.student_id=b.student_id;

— 2.查询平均成绩大于60分的同学的学号和平均成绩;
select a.student_id,avg(a.score) from
student_course a
GROUP BY a.student_id
having avg(a.score)>60 ;

— 3查询所有同学的学号,姓名、选课数、总成绩;
select b.student_id,a.name,COUNT(b.course_id),SUM(b.score)
from student a ,student_course b
where a.id=b.student_id
GROUP BY b.student_id ;

— 4、查询名字l开头的老师的个数;
select COUNT(DISTINCT(name)) FROM teacher where name like ‘l%’;

— — 5,查询没学过“liu”老师课的同学的学号、姓名;
select * from student where id not in(
select DISTINCT(student_id) from student_course where teacher_id=(
select DISTINCT(id) from teacher where name=”liu”)
);

— 6 学过30001和30002课程的同学的学号、姓名;–
select a.student_id,b.name
from student_course a,student b
where a.student_id=b.id and a.course_id=30001 and exists(
select* from student_course s2 where s2.student_id=a.student_id and s2.course_id=30002
)
group by a.student_id;

— 7 学过30001和30002课程的同学的学号
select a.student_id from
(select * from student_course where course_id=30001) a inner join
(select * from student_course where course_id=30002) b
on a.student_id=b.student_id;

— 8 、查询学过“liu”老师所教的课的所有同学的学号、姓名;

select * from student where id in
(
select distinct(student_id) from student_course where teacher_id=
(
select distinct(id) from teacher where name=”liu”
)
);

— 9查询课程编号“30002”的成绩比课程编号“30001”课程低的所有同学的学号、姓名;
select*from student where id in(
select distinct(a.student_id) from
(select * from student_course where course_id=30002) a inner join (select * from student_course where course_id=30001) b
on a.score<b.score)
;

— 10 查询没有学全所有课的同学的学号、姓名
select a.student_id,b.name
from student_course a,student b
where a.student_id=b.id
group by a.student_id
having count(a.course_id) <> (select count(*) from course);

— 11查询所有课程成绩小于90分的同学的学号、姓名;
select * from student where id not in (
select student_id from student_course where score>90
);

— 12查询至少学过学号为“10001”同学所有一门课的其他同学学号和姓名
select *from student where id in (
SELECT distinct(student_id) FROM student_course where student_id <> 10001 and course_id in
(
SELECT course_id FROM student_course where student_id=10001
)
);

select distinct a.id,a.name from student a, student_course b where a.id=b.student_id and b.student_id <> 10001 and b.course_id in(
SELECT course_id FROM student_course where student_id=10001
);

— 13把“student_course”表中“liu”老师教的课的成绩都更改为此课程的平均成绩
update student_course set score =(
select score1 from (
select avg(b.score) as score1 from teacher a,student_course b where a.name=”liu” and b.teacher_id=a.id
)
as tablename2
)where teacher_id=(select id from teacher where name=”liu”);

select * from student_course;
update student_course set score=82.1667 where teacher_id=20004;
SET SQL_SAFE_UPDATES = 0;
show variables like ‘SQL_SAFE_UPDATES’;

— 14查询和“10002”号的同学学习的课程完全相同的其他同学学号和姓名;

select b.id,b.name from student_course a,student b
where a.course_id in(
select course_id from student_course where student_id=10002
) and a.student_id=b.id and a.student_id <> 10002
group by b.id
having count(*)=(select count(*) from student_course where student_id=10002);

— 15 删除学习“su”老师课的student_course表记录;
delete from student_course
where teacher_id=(
select id from teacher where name=”su”
);

— 16查找要求符合以下条件:没有上过编号“30003”课程的同学学号、
— “30003”号课的平均成绩;
select id,(select avg(score) from student_course where course_id=”30003″) from student where id not in(
select student_id from student_course where course_id=”30003″
);

— 17按平均成绩从高到低显示所有学生的“物理”、“政治”、“英语”三门的课程成绩,
— 按如下形式显示: 学生ID,物理,政治,英语,有效课程数,有效平均分
select t.student_id as 学生ID,
(select score from student_course where t.student_id=student_id and course_id=”30001″ ) as 物理,
(select score from student_course where t.student_id=student_id and course_id=”30002″ ) as 政治,
(select score from student_course where t.student_id=student_id and course_id=”30005″ ) as 英语,
count(*) AS 有效课程数, AVG(t.score) AS 平均成绩
from student_course t
group by t.student_id
order by avg(t.score);

— 18 查询各科成绩最高和最低的分:以如下形式显示:课程ID,最高分,最低分
select course_id as 课程ID,max(score) as 最高分,min(score) as 最低分
from student_course
group by course_id;

— 19 按各科平均成绩从低到高和及格率
select course_id as 课程ID,avg(score) as 平均成绩,
100*sum(case when score>=60 then 1 else 0 end) /count(*) as 及格百分数
from student_course t
group by course_id
order by avg(score) desc;

— 20.查询如下课程平均成绩和及格率的百分数(用”1行”显示):
— 物理(30001),政治(30002),语文 (30003),高数(30004)

select
(select avg(score) from student_course where course_id=”30001″) as 物理,
(select avg(score) from student_course where course_id=”30002″) as 政治,
(select avg(score) from student_course where course_id=”30003″) as 语文,
(select avg(score) from student_course where course_id=”30004″) as 高数,
100*sum(case when score>=60 then 1 else 0 end)/count(*) as 及格率,
avg (score) as 平均成绩
from student_course
group by course_id;

— 21 查询不同老师所教不同课程平均分从高到低显示
select avg(score)
from student_course
group by teacher_id
order by avg(score) desc;

— 22统计列印各科成绩,各分数段人数:课程ID,课程名称,[100-85],[85-70],[70-60],[ <60]
select a.course_id as 课程ID,
b.name as 课程名称,
sum(case when a.score between 85 and 100 then 1 else 0 end) as “[100-85]”,
sum(case when a.score between 70 and 85 then 1 else 0 end) as “[85-70]”,
sum(case when a.score between 65 and 70 then 1 else 0 end) as “[70-60]”,
sum(case when a.score <60 then 1 else 0 end) as “[<60]”
from student_course a,course b
where a.course_id=b.id
group by a.course_id,b.name;

— 23 查询学生平均成绩
select a.student_id as 学生ID,b.name as 姓名,avg(a.score) as 平均成绩
from student_course a,student b
where a.student_id=b.id
group by a.student_id
order by avg(a.score) desc;

— 24 查询各科成绩前三名的记录:(不考虑成绩并列情况)
SELECT t1.student_id as 学生ID,
t1.course_id as 课程ID,
t1.score as 分数
FROM student_course t1
WHERE t1.id IN (
select t.id from(
SELECT id
FROM student_course
ORDER BY score DESC
limit 3) as t
)
ORDER BY t1.score desc;

— 25查询每门课程被选修的学生数
select count(student_id)
from student_course
group by course_id;

— 26 查询出只选修了一门课程的全部学生的学号和姓名
select a.student_id,b.name
from student_course a,student b
where b.id=a.student_id
group by student_id
having count(course_id)=1;

— 27 查询姓“t”的学生名单
select * from student where name like “t%” ;

— 28 查询同名同性学生名单,并统计同名人数
select name ,count(name) from student group by name;

— 29 查询每门课程的平均成绩,结果按平均成绩升序排列,平均成绩相同时,按课程号降序排列
select avg(score) ,course_id from student_course group by course_id order by avg(score),course_id desc ;

— 30 查询平均成绩大于85的所有学生的学号、姓名和平均成绩
select a.student_id,b.name,avg(a.score)
from student_course a,student b
where a.student_id=b.id
group by a.student_id
having avg(a.score)>85;

— 31 查询课程名称为“政治”,且分数低于60的学生姓名和分数
select a.student_id,a.score,b.name
from student_course a,student b
where a.student_id=b.id and a.course_id=(
select id from course where name =”政治”
)
group by a.student_id,a.score
having a.score <60;

— 32查询所有学生的选课情况;
select a.student_id,a.course_id,b.name,c.name
from student_course a,student b,course c
where a.student_id=b.id and a.course_id=c.id
group by a.student_id,a.course_id;

— 33 查询任何一门课程成绩在70分以上的姓名、课程名称和分数;
select a.student_id,a.course_id,a.score,b.name,c.name
from student_course a,student b,course c
where a.student_id=b.id and a.course_id=c.id
group by a.student_id,a.course_id,a.score
having a.score>70;

— 34 查询不及格的课程,并按课程号从大到小排列
select course_id,score
from student_course
group by course_id,score
having score<60
order by course_id;

— 35 查询课程编号为30003且课程成绩在80分以上的学生的学号和姓名;
select a.student_id,a.score,a.course_id,b.name
from student_course a ,student b
where a.student_id=b.id and a.course_id=”30003″
group by a.student_id,a.score,a.course_id
having a.score>80;

— 36求选了课程的学生人数
select count(distinct student_id) from student_course;

— 37 查询选修“liu”老师所授课程的学生中,成绩最高的学生姓名及其成绩
select b.name,a.score
from student_course a,student b,course c,teacher d
where a.course_id=c.id and a.student_id=b.id and a.teacher_id=d.id and d.name=”liu”
and a.score=(select max(score) from student_course where course_id=c.id);

— 38查询各个课程及相应的选修人数
select course_id,count(student_id)
from student_course
group by course_id;

— 39 查询不同课程成绩相同的学生的学号、课程号、学生成绩
select a.student_id,a.course_id,a.score
from student_course a,student_course b
where a.score=b.score and a.course_id <> b.course_id;

— 40 查询每门功成绩最好的前两名
select * from student_course where score in(
select score from(
select score from student_course limit 2
) as a
)
order by course_id desc;

— 41 统计每门课程的学生选修人数(超过4人的课程才统计)。
— 要求输出课程号和选修人数,查询结果按人数降序排列,查询结果按人数降序排列,
— 若人数相同,按课程号升序排列
select course_id, count(*)
from student_course
group by course_id
having count(*)>4
order by count(*),course_id desc;

— 42 检索至少选修两门课程的学生学号
select student_id
from student_course
group by student_id
having count(*)>=2;

— 43 、查询全部学生都选修的课程的课程号和课程名
select *
from course
where id in(
select course_id from student_course group by course_id having count(*)=(select count(*) from student)
);

— 44 查询没学过“yang”老师讲授的课程的学生姓名
select name from student where id not in(
select distinct a.student_id
from student_course a,teacher c
where a.teacher_id=c.id and c.name = “yang”
);

— 45 查询两门以上不及格课程的同学的学号及其平均成绩
select student_id,avg(score)
from student_course
where score <60
group by student_id
having count(*) >=2;

— 46 检索“30004”课程分数小于60,按分数降序排列的同学学号
select student_id
from student_course
where course_id=”30004″ and score<60
group by student_id,score
order by score desc;

— 47 删除“10002”同学的“30001”课程的成绩
delete from student_course where student_id=10002 and course_id=30002;

 

创建表

//创建表
CREATE TABLE IF NOT EXISTS `student`(
'字段名' 列类型 [属性] [索引] [注释],
'字段名' 列类型 [属性] [索引] [注释],
......
'字段名' 列类型 [属性] [索引] [注释]
)[表的类型][字符集设置][注释]
//插入语句
INSERT INTO 表名([字段1,字段2..])VALUES('值1','值2'..),[('值1','值2'..)..];

 

mysql替换字段中指定的部分字符串

问题描述:
mysql varchar型字段中,替换字段中指定的部分字符串

解决方法:

使用replace(obj, search, replace_str)函数;

sql语法:

UPDATE 表名 SET 字段名=replace(字段名, '被替换字符串', '用来替换的字符串') ;

使用样例:

复制代码
 UPDATE `member` SET `phone`=replace(`phone`, '\', '') ;
update item as t set 
t.share_descs=REPLACE(t.share_descs,'罗莱LOVO','LOVO')
 WHERE t.theme = '2c9280856efd0b23016efdacb45c0444'