提交 d2fb3b48 创建 作者: 宋海霞's avatar 宋海霞

Initial commit

上级
-- 每次建库前先将原来的库删掉
IF EXISTS ( SELECT [name] FROM sys.databases WHERE [name] = 'TPCH' )
DROP DATABASE TPCH
CREATE DATABASE TPCH
差异被折叠。
#!/bin/env python
# -*- coding: utf-8 -*-
import difflib
import sys
import argparse
import webbrowser
import os
import filecmp
import subprocess
from colorama import Fore, init, AnsiToWin32
init(wrap=False)
stream = AnsiToWin32(sys.stderr).stream
compResultFile = "comp_result.html"
notebookFile = "notebook.ipynb"
# 读取文件
def read_file(file_name):
try:
file_desc = open(file_name, 'r', encoding='utf-8')
# 读取后按行分割
text = file_desc.read().splitlines()
file_desc.close()
return text
except IOError as error:
print('Read input file Error: {0}'.format(error))
sys.exit()
# 比较时忽略行末的空格和文件末尾的回车换行
def advanced_file_compare(file1, file2):
f1 = open(file1, encoding='utf-8')
f2 = open(file2, encoding='utf-8')
returnVal = 1
str1 = []
for line1 in f1:
line1 = line1.rstrip()
line1 = line1.replace('\n', '')
if len(line1) != 0:
str1.append(line1)
str2 = []
for line2 in f2:
line2 = line2.rstrip()
line2 = line2.replace('\n', '')
if len(line2) != 0:
str2.append(line2)
count = 0
if len(str1) == len(str2):
for line1 in str1:
if line1 != str2[count]: #文件不同
returnVal = 0
break
else:
count = count + 1
else:
returnVal = 0
f1.close()
f2.close()
return returnVal
# 比较两个文件并把结果生成一份html文本
def compare_file(file1, file2, seqNum, type):
returnVal = 0
if file1 == "" or file2 == "":
print('文件路径不能为空:第一个文件的路径:{0}, 第二个文件的路径:{1} .'.format(file1, file2))
sys.exit()
else:
print("正在将标准查询结果文件 {0} 与用户编写的查询语句查询结果 {1} 进行比较。\n比较结果".format(file1, file2), end=': ')
# if os.path.isfile(file1) and os.path.isfile(file2) and filecmp.cmp(file1, file2):
if os.path.isfile(file1) and os.path.isfile(file2) and advanced_file_compare(file1, file2):
print("文件相同")
if type == 1:
print(Fore.GREEN + "query{0}.sql 验证成功".format(seqNum), file = stream, end='')
else:
print(Fore.GREEN + "schema_check{0}.sql 验证成功".format(seqNum), file = stream, end='')
print(Fore.WHITE, file = stream)
returnVal = 1
return returnVal
else:
print("文件不同")
if type == 1:
print(Fore.RED + "query{0}.sql 验证失败".format(seqNum), file = stream, end='')
else:
print(Fore.RED + "schema_check{0}.sql 验证失败".format(seqNum), file = stream, end='')
print(Fore.WHITE, file = stream)
text1_lines = read_file(file1)
text2_lines = read_file(file2)
diff = difflib.HtmlDiff() # 创建HtmlDiff 对象
result = diff.make_file(text1_lines, text2_lines) # 通过make_file 方法输出 html 格式的对比结果
# 将结果写入到比较结果文件中
try:
with open(compResultFile, 'a+', encoding="utf-8") as result_file:
if type ==1:
promptContent = "<p>query{0}.sql 验证失败。</br>正在将标准查询结果文件 query{0}_result.txt(左边)与用户编写的查询语句输出结果 user_query{0}_result.txt(右边)进行比较:</p>".format(seqNum)
else:
promptContent = "<p>schema_check{0}.sql 验证失败。</br>正在将标准查询结果文件 schema_check{0}_result.txt(左边)与查询语句输出结果 user_schema_check{0}_result.txt(右边)进行比较:</p>".format(seqNum)
result = promptContent + result
result_file.write(result)
except IOError as error:
print('写入 html 文件错误:{0}'.format(error))
finally:
return returnVal
if __name__ == "__main__":
print()
print(Fore.YELLOW + "提示:\n1. 启用验证前一定要先保存 sql 文件。\n2. 每次启动验证,均会重新创建数据库、数据表和插入数据。", file = stream)
print(Fore.WHITE, file = stream)
# 移除比较结果文件
if os.path.isfile(compResultFile):
os.remove(compResultFile)
# 执行每一个query查询
seqNum = 1
while 1:
outputFile = "query{0}_result.txt".format(seqNum)
useroutputFile = "user_query{0}_result.txt".format(seqNum)
if seqNum == 1 and not os.path.isfile(outputFile):
print(Fore.RED + "该项目未提供自动化验证功能", file = stream)
exit(1)
if not os.path.isfile(outputFile):
break
print("\n正在使用 query{0}.sql 进行查询,结果保存到文件 {1}。".format(seqNum, useroutputFile))
# 将对应的case号当参数传入
runCommand = "python testsql.py local_test query {0}".format(seqNum)
execResult = os.system(runCommand)
if execResult != 0:
print(Fore.RED + "应用程序异常,返回值:{0}".format(execResult), file = stream)
exit(1)
if os.path.isfile(outputFile) and os.path.isfile(useroutputFile):
if compare_file(outputFile, useroutputFile, seqNum, 1) == 0:
print("正在使用浏览器打开文件比较结果,可帮助用户查找验证失败的原因。")
# 使用浏览器打开比较结果页面
webbrowser.open('file://' + os.path.realpath(compResultFile))
# 只要有验证失败的,就不再继续检查
exit(1)
seqNum = seqNum + 1
# 执行每一个schema_check查询表结构
seqNum = 1
while 1:
outputFile = "schema_check{0}_result.txt".format(seqNum)
useroutputFile = "user_schema_check{0}_result.txt".format(seqNum)
if seqNum == 1 and not os.path.isfile(outputFile):
print(Fore.RED + "该项目未提供自动化验证功能", file = stream)
exit(1)
if not os.path.isfile(outputFile):
break
print("\n正在使用schema_check{0}.sql进行表结构查询,查询结果保存到文件 {1}。".format(seqNum, useroutputFile))
# 将对应的case号当参数传入
runCommand = "python testsql.py local_test schema_check {0}".format(seqNum)
execResult = os.system(runCommand)
if execResult != 0:
print(Fore.RED + "应用程序异常,返回值:{0}".format(execResult), file = stream)
exit(1)
if os.path.isfile(outputFile) and os.path.isfile(useroutputFile):
if compare_file(outputFile, useroutputFile, seqNum, 2) == 0:
print("正在使用浏览器打开文件比较结果,可帮助用户查找验证失败的原因。")
# 使用浏览器打开比较结果页面
webbrowser.open('file://' + os.path.realpath(compResultFile))
break
seqNum = seqNum + 1
-- 单表查询,查询供应商的名称、地址、联系电话
USE TPCH
SELECT name, address, phone
FROM Supplier
-- 查询没有订购“中国奇瑞”制造的“发动机总成”的顾客
USE TPCH
SELECT custkey, name
FROM Customer C
WHERE NOT EXISTS(
SELECT O.custkey
FROM Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND
O.orderkey = L.orderkey AND
L.partkey = PS.partkey AND
L.suppkey = PS.suppkey AND
PS.partkey = P.partkey AND
P.mfgr = '中国奇瑞' AND P.name = '发动机总成'
)
custkey,name
3,犬养太郎
4,松下幸
5,山姆大叔
6,汤姆
7,汉斯
8,弗雷德
-- 查询至少购买过顾客“张三”购买过的全部零件的顾客姓名
USE TPCH
SELECT CA.name
FROM Customer CA
WHERE NOT EXISTS(
SELECT *
FROM Customer CB, Orders OB, Lineitem LB
WHERE CB.custkey = OB.custkey AND
OB.orderkey = LB.orderkey AND
CB.name = '张三' AND
NOT EXISTS(
SELECT *
FROM Orders OC, Lineitem LC
WHERE CA.custkey = OC.custkey AND
OC.orderkey = LC.orderkey AND
LB.suppkey = LC.suppkey AND
LB.partkey = LC.partkey
)
)
name
张三
弗雷德
-- 查询订单平均金额超过1w的顾客中的中国籍顾客信息
USE TPCH
SELECT C.*
FROM Customer C, (SELECT custkey
FROM Orders
GROUP BY custkey
HAVING AVG(totalprice) > 10000
) B, Nation N
WHERE C.custkey = B.custkey AND
C.nationkey = N.nationkey AND N.name = '中国'
custkey,name,address,nationkey,phone,acctbal,mktsegment,comment
1,张三,中国北京,1,010-1122033 ,10000000.0,亚洲大区 ,中国买家1
2,李四,中国北京,1,010-2233233 ,50000000.0,亚洲大区 ,中国买家2
-- 查询顾客“张三”和“李四”都订购过的全部零件的信息
USE TPCH
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '张三'
INTERSECT
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '李四'
partkey,name,mfgr,brand,type,size,container,retailprice,comment
1,发动机总成,中国奇瑞 ,奇瑞 ,chery1,86,简装 ,30.0,质量优秀
3,发动机总成,中国比亚迪 ,奇瑞 ,byd-1,86,简装 ,100.0,质量优秀
-- 查询顾客“张三”和“李四”订购的全部零件的信息
USE TPCH
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '张三'
UNION
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '李四'
-- 查询顾客“张三”订购过而“李四”没订购过的零件的信息
USE TPCH
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '张三'
EXCEPT
SELECT P.*
FROM Customer C, Orders O, Lineitem L, PartSupp PS, Part P
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND
L.suppkey = PS.suppkey AND L.partkey = PS.partkey AND
PS.partkey = P.partkey AND C.name = '李四'
partkey,name,mfgr,brand,type,size,container,retailprice,comment
2,发动机总成,中国吉利 ,奇瑞 ,geely1,86,简装 ,100.0,质量优秀
name,address,phone
奇瑞 ,中国芜湖,010-111111111
吉利 ,中国台州,010-222222222
比亚迪 ,中国深圳,010-333333333
爱信 ,日本,444-11223331
爱德克斯 ,日本,444-12112233
克虏伯 ,德国,222-3312112233
博世 ,德国,222-1112112233
博格华纳 ,美国,333-442112233
霍尼韦尔 ,美国,333-222112233
-- 单表查询,查询总价大于3000元的订单信息
USE TPCH
SELECT *
FROM Orders
WHERE totalprice > 3000
orderkey,custkey,orderstatus,totalprice,orderdate,orderpriority,clert,shippriority,comment
4,2,1,4000.0,2020-08-31,优先 ,记账员甲 ,1,李四的订单1
7,5,0,10000.0,2020-07-31,优先 ,记账员甲 ,1,山姆大叔的订单
8,6,0,6000.0,2020-07-31,优先 ,记账员丙 ,1,汤姆的订单
9,7,0,6000.0,2020-07-31,优先 ,记账员丙 ,1,汉斯的订单
10,8,0,7000.0,2020-05-31,优先 ,记账员丙 ,1,弗雷德的订单
-- 统计每个顾客的订购金额
USE TPCH
SELECT C.custkey, C.name, SUM(O.totalprice)
FROM Orders O, Customer C
WHERE C.custkey = O.custkey
GROUP BY C.custkey, C.name
custkey,name,
1,张三,6000.0
2,李四,7000.0
5,山姆大叔,10000.0
6,汤姆,6000.0
7,汉斯,6000.0
8,弗雷德,7000.0
-- 查询订单平均金额超过1000元的顾客编号及其姓名
USE TPCH
SELECT C.custkey, MAX(C.name)
FROM Customer C, Orders O
WHERE C.custkey = O.custkey
GROUP BY C.custkey
HAVING AVG(O.totalprice) > 3000
custkey,
5,山姆大叔
6,汤姆
7,汉斯
8,弗雷德
-- 查询与“比亚迪”在同一个国家的供应商编号、名称和地址信息
USE TPCH
SELECT F.suppkey, F.name, F.address
FROM Supplier F, Supplier S
WHERE F.nationkey = S.nationkey AND S.name = '比亚迪'
suppkey,name,address
1,奇瑞 ,中国芜湖
2,吉利 ,中国台州
3,比亚迪 ,中国深圳
-- 查询供应价格大于零售价格的零件名、制造商名、零售价格和供应价格
USE TPCH
SELECT P.name, P.mfgr, P.retailprice, PS.supplycost, PS.suppkey, PS.partkey
FROM Part P, PartSupp PS
WHERE P.retailprice < PS.supplycost
-- 上述连接语句是从两个表的笛卡尔积中选出满足限定条件的元组,得到的结果可能不是同一个商品的有关值
name,mfgr,retailprice,supplycost,suppkey,partkey
发动机总成,中国奇瑞 ,30.0,50.0,1,1
涡轮增压器,美国霍尼韦尔 ,40.0,50.0,1,1
发动机总成,中国奇瑞 ,30.0,60.0,2,2
刹车,日本爱德克斯 ,50.0,60.0,2,2
涡轮增压器,美国霍尼韦尔 ,40.0,60.0,2,2
发动机总成,中国奇瑞 ,30.0,70.0,3,3
刹车,日本爱德克斯 ,50.0,70.0,3,3
减震器,德国克虏伯 ,60.0,70.0,3,3
涡轮增压器,美国霍尼韦尔 ,40.0,70.0,3,3
发动机总成,中国奇瑞 ,30.0,50.0,4,4
涡轮增压器,美国霍尼韦尔 ,40.0,50.0,4,4
发动机总成,中国奇瑞 ,30.0,50.0,7,5
涡轮增压器,美国霍尼韦尔 ,40.0,50.0,7,5
发动机总成,中国奇瑞 ,30.0,50.0,6,7
涡轮增压器,美国霍尼韦尔 ,40.0,50.0,6,7
发动机总成,中国奇瑞 ,30.0,50.0,9,10
涡轮增压器,美国霍尼韦尔 ,40.0,50.0,9,10
-- 查询供应价格大于零售价格的零件名、制造商名、零售价格和供应价格
USE TPCH
SELECT P.name, P.mfgr, P.retailprice, PS.supplycost
FROM Part P, PartSupp PS
WHERE P.partkey = PS.partkey AND P.retailprice < PS.supplycost /*限定条件*/
name,mfgr,retailprice,supplycost
发动机总成,中国奇瑞 ,30.0,50.0
涡轮增压器,美国霍尼韦尔 ,40.0,50.0
-- 查询顾客“张三”订购的订单编号、总价及其订购的零件编号、数量和明细价格
USE TPCH
SELECT O.orderkey, O.totalprice, L.partkey, L.quantity, L.extendedprice
FROM Customer C, Orders O, Lineitem L
WHERE C.custkey = O.custkey AND O.orderkey = L.orderkey AND C.name = '张三'
orderkey,totalprice,partkey,quantity,extendedprice
1,1000.0,1,50.0,15000.0
2,2000.0,2,50.0,15000.0
3,3000.0,3,40.0,15000.0
-- 查询订购了“中国奇瑞”制造的“发动机总成”的顾客
USE TPCH
SELECT custkey, name
FROM Customer
WHERE custkey IN (
SELECT O.custkey
FROM Orders O, Lineitem L, PartSupp PS, Part P
WHERE O.orderkey = L.orderkey AND
L.partkey = PS.partkey AND
L.suppkey = PS.suppkey AND
PS.partkey = P.partkey AND
P.mfgr = '中国奇瑞' AND P.name = '发动机总成'
)
custkey,name
1,张三
2,李四
#!/bin/env python
# -*- coding: utf-8 -*-
# 忽略一些无关的警告
import warnings
warnings.simplefilter('ignore', DeprecationWarning)
# 测试python访问数据库
import pymssql
# 取得连接时会稍微等待一小段时间
from time import sleep
# 输出结果时用到,将检索的结果保存成csv
import pandas as pd
import sys
# 用正则表达式切割字符串
import re
# 字体颜色
red = lambda text: '\033[0;31m' + text + '\033[0m'
green = lambda text: '\033[0;32m' + text + '\033[0m'
yellow = lambda text: '\033[0;33m' + text + '\033[0m'
# 验证类型:本地--local_test,线上--ci_test
test_type = sys.argv[1]
# 查询文件:query.sql,schema_check.sql
sql_file_type = sys.argv[2]
# 取得sql文件名称
sql_file = sql_file_type + sys.argv[3] + ".sql"
if sql_file_type == "query":
query_result_file = "qurey" + sys.argv[3] + "_result.txt"
user_output_txt_file = "user_query" + sys.argv[3] + "_result.txt"
user_output_csv_file = "user_query" + sys.argv[3] + "_result.csv"
else:
query_result_file = "schema_check" + sys.argv[3] + "_result.txt"
user_output_txt_file = "user_schema_check" + sys.argv[3] + "_result.txt"
user_output_csv_file = "user_schema_check" + sys.argv[3] + "_result.csv"
# 可能连接数据库时需要消耗一点时间,这里用循环的方式
def get_conn():
# 循环3次,连接数据库
for i in range(0, 2):
try:
# 调用远程driver,获取成功即返回
if i==0:
# 等待5秒后重新获取
sleep(5)
elif i == 1:
# 等待10秒后重新获取
sleep(10)
else:
# 等待15秒后重新获取
sleep(15)
# 根据平台不同采用不同的连接方式
if sys.platform == 'linux':
# linux操作系统,用户名密码登录
conn = pymssql.connect(host='localhost', user='SA', password='<MyStrong@Passw0rd>')
else:
# windows操作系统,连接端口号
conn = pymssql.connect(host='localhost', server='localhost\SQLEXPRESS', port='1433', database='master')
return conn
except Exception:
print (yellow("请稍等,正在连接数据库..."))
# 没有启动远程driver,程序终止
print (red("连接数据库失败!"))
exit(1)
# 将sql文件的内容转换成字符串
def parse_sql(filename):
# 读取 sql 文件文本内容,sqltxt 为 list 类型
sql = open(filename, 'r', encoding = 'utf-8')
sqltxt = sql.readlines()
# 读取之后关闭文件
sql.close()
# list 转 str
sql = "".join(sqltxt)
return re.split(r'[\r\n]GO|go|Go|gO[\r\n]', sql)
# 输出sql中的打印信息
def my_msg_handler(msgstate, severity, srvname, procname, line, msgtext):
"""
Our custom handler -- It simpy prints a string to stdout assembled from
the pieces of information sent by the server.
"""
print(yellow("== SQL Server Msg == : %s" % msgtext.decode('utf-8')))
# 主函数
if __name__ == '__main__':
# 调用get_conn来取得数据库连接并获取cursor
conn = get_conn()
conn._conn.set_msghandler(my_msg_handler)
cursor = conn.cursor()
# 只有使用query1.sql查询时才建库、建表
if sql_file == "query1.sql":
# 创建测试数据库
if test_type == "ci_test":
print(yellow("正在使用文件 createdb.sql 创建数据库"))
else:
print("正在使用文件 createdb.sql 创建数据库")
createdb_sql = parse_sql('createdb.sql')
conn.autocommit(True)
for line_query in createdb_sql:
cursor.execute(line_query)
conn.autocommit(False)
# 创建数据库表
if test_type == "ci_test":
print(yellow("正在使用文件 createtable.sql 创建表结构和插入数据"))
else:
print("正在使用文件 createtable.sql 创建表结构和插入数据")
createtable_sql = parse_sql('createtable.sql')
for line_query in createtable_sql:
cursor.execute(line_query)
conn.commit()
# 运行学生代码
if test_type == "ci_test":
print(yellow("正在使用文件 {0} 进行查询操作,结果保存在文件 {1}".format(sql_file, user_output_txt_file)))
query_sql = parse_sql(sql_file)
pd.set_option('display.max_columns', None)
n = 0
for line_query in query_sql:
n = n + 1
if n == len(query_sql):
# 执行最后一条sql命令查询的时候用pd.read_sql,便于保存结果
df = pd.read_sql(line_query, conn)
# 保存成csv格式,带表头,带每行前的序号,注意编码方式
# csv方便用户查看
df.to_csv(user_output_csv_file, index=True, header=True, encoding='UTF-8')
# txt用于和答案模板比较
df.to_csv(user_output_txt_file, index=False, header=True, encoding='UTF-8')
if test_type == "ci_test":
print(yellow("正在使用文本比较工具diff对比标准查询结果文件 {0} 和本次查询的输出结果文件 {1}".format(query_result_file, user_output_txt_file)))
else:
# 执行sql命令
cursor.execute(line_query)
# 连接用完后记得关闭以释放资源
conn.close()
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论