通过Python实现对SQL Server 数据文件大小的监控告警功能

1.需求背景

系统程序突然报错,报错信息如下:

The transaction log for database '@dbname' is full. To find out why space in the log cannot be reused, see the log_reuse_wait_desc column in sys.databases

此时查看log文件,已达2T。

当时的紧急处理方案是,移除掉镜像,修改数据库恢复模式(由full修改为simple),收缩日志。

为了防止类似问题再次发生,需对log 文件的大小进行监控,当到达阈值后,触发告警。

2.主要基础组件(类)

配置文件qqmssqltest_db_server_conf.ini

同过此配置文件获取DB Server信息、DB信息、UID信息、邮件服务器信息等。

[sqlserver]db_user = XXXXXXdb_pwd = XXXXXXX[sqlserver_qq]db_host = 110.119.120.114db_port = 1433[windows]user = pwd = [mail]host = zheshiceshidemail.qq.comport = 25user = pwd = sender = zhejiushiceshidebuyaodangzhen@qq.com

获取连接串的组件mssql_get_db_connect.py

# -*- coding: utf-8 -*-import sysimport osimport datetimeimport configparserimport pymssql# pip3 install pymssql-2.1.4-cp37-cp37m-win_amd64.whl# pip3 install pymssql -i https://pypi.doubanio.com/simple# 获取连接串信息def mssql_get_db_connect(db_host, db_port):    db_host = db_host    db_port = db_port    db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")    config = configparser.ConfigParser()    config.read(db_ps_file, encoding="utf-8")    db_user = config.get('sqlserver', 'db_user')    db_pwd = config.get('sqlserver', 'db_pwd')    conn = pymssql.connect(host=db_host, port=db_port, user=db_user, password=db_pwd, charset="utf8", login_timeout=5, timeout=600, autocommit=True)    return conn

执行SQL语句的组件mysql_exec_sql.py

# -*- coding: utf-8 -*-import mysql_get_db_connectdef mysql_exec_dml_sql(db_host, db_port, exec_sql):    conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)    with conn.cursor() as cursor_db:        cursor_db.execute(exec_sql)        conn.commit()def mysql_exec_select_sql(db_host, db_port, exec_sql):    conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)    with conn.cursor() as cursor_db:        cursor_db.execute(exec_sql)        sql_rst = cursor_db.fetchall()    return sql_rstdef mysql_exec_select_sql_include_colnames(db_host, db_port, exec_sql):    conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)    with conn.cursor() as cursor_db:        cursor_db.execute(exec_sql)        sql_rst = cursor_db.fetchall()        col_names = cursor_db.description    return sql_rst, col_names

发邮件的功能send_monitor_mail.py

# -*- coding: utf-8 -*-# pip3 install PyEmailimport smtplibfrom email.mime.text import MIMETextimport configparserimport osimport sys# 发送告警邮件def send_monitor_mail(mail_subject, mail_body, mail_receivers="testwukongbaigujing@qq.com"):    db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")    config = configparser.ConfigParser()    config.read(db_ps_file, encoding="utf-8")    mail_host = config.get('mail', 'host')    mail_port = config.get('mail', 'port')    # mail_user = config.get('mail', 'user')    # mail_pwd = config.get('mail', 'pwd')    sender = config.get('mail', 'sender')    # receivers = config.get('mail', 'receivers')    # 发送HTML格式邮件    message = MIMEText(mail_body, 'html', 'utf-8')    # message = MIMEText(mail_body, 'plain', 'utf-8')    message['subject'] = mail_subject    message['From'] = sender    message['To'] = mail_receivers    try:        smtpObj = smtplib.SMTP()        smtpObj.connect(mail_host, mail_port)          # 25 为 SMTP 端口号        # SMTP AUTH extension not supported by server.        # https://github.com/miguelgrinberg/microblog/issues/76        # smtpObj.ehlo()        # smtpObj.starttls()        # smtpObj.login(mail_user, mail_pwd)        smtpObj.sendmail(sender, mail_receivers, message.as_string())        smtpObj.quit()        print("邮件发送成功")    except Exception as e:        print(e)    # except smtplib.SMTPException:        # print("Error: 无法发送邮件")

3.主要功能代码

收集到的DB数据文件的信息保存到表mssql_dblogsize中,其建表的脚本如下:

CREATE TABLE [dbo].[mssql_dblogsize](    [id] [int] IDENTITY(1,1) NOT NULL,    [createtime] [datetime] NULL,    [vip] [nvarchar](100) NULL,    [port] [nvarchar](100) NULL,    [Environment] [nvarchar](200) NULL,    [Dbname] [varchar](200) NULL,    [Logical_Name] [varchar](200) NULL,    [Physical_Name] [varchar](1500) NULL,    [Size] [bigint] NULL,PRIMARY KEY CLUSTERED (    [id] ASC)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]) ON [PRIMARY]GOALTER TABLE [dbo].[mssql_dblogsize] ADD  DEFAULT (getdate()) FOR [createtime]GO

为了方便对表mssql_dblogsize的数据进行管理和展示,在其基础上抽象加工出了一个视图v_mssql_dblogsize,注意Size大小的转换(Size/128/1024 as SizeGB)

创建视图的脚本如下:

CREATE view [dbo].[v_mssql_dblogsize]as SELECT [id]      ,[createtime]      ,[vip]      ,[port]      ,[Environment]      ,[Dbname]      ,[Logical_Name]      ,[Physical_Name]      ,Size/128/1024 as SizeGB  FROM [dbo].[mssql_dblogsize] where size >50*128*1024and Physical_Name like '%ldf%'GO

本测试实例使用的数据库为qqDB,监控的各个DB Server保存在了表QQDBServer中,注意Port 不一定为标准端口1433.

collect_mssql_dblogsize_info.py

# -*- coding: utf-8 -*-import sysimport osimport configparserimport pymssqlimport mssql_get_db_connectimport mssql_exec_sqlfrom datetime import datetimedef collect_mssql_dblogsize_info():    db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")    config = configparser.ConfigParser()    config.read(db_ps_file, encoding="utf-8")    m_db_host = config.get('sqlserver_qq', 'db_host')    m_db_port = config.getint('sqlserver_qq', 'db_port')    # 获取需要遍历的DB列表    exec_sql_1 = """SELECT IP, case Port when '1444,1433' then '1433' else Port end as Port, EnvironmentFROM qqDB.dbo.QQDBServer  where InUse =1 AND ServerType IN ('SQL') and IP=VIP ;    """    sql_rst_1 = mssql_exec_sql.mssql_exec_select_sql(m_db_host, m_db_port, exec_sql_1)    for j in sql_rst_1:        db_host_2 = j[0]        db_port_2 = j[1]        db_Environment = j[2]        exec_sql_2 = """        select '""" + db_host_2 + """' as vip, '""" + db_port_2 + """' as port, '""" + db_Environment + """' as Environment,DB_NAME(database_id) AS DatabaseName,Name AS Logical_Name,Physical_Name, size FROM master.sys.master_files;        """        try:           sql_rst_2 = mssql_exec_sql.mssql_exec_select_sql(db_host_2, db_port_2, exec_sql_2)        except Exception as e:           print(e)        for k in sql_rst_2:           exec_sql_3 = """           insert into qqDB..mssql_dblogsize([vip], [port], [Environment], [Dbname], [Logical_Name], [Physical_Name], [Size])            values('%s', '%s', '%s', '%s', '%s', '%s', '%s');           """           conn = mssql_get_db_connect.mssql_get_db_connect(m_db_host, m_db_port)           with conn.cursor() as cursor_db:               cursor_db.execute(exec_sql_3 % (k[0], k[1], k[2], k[3], k[4], k[5], k[6] ))               conn.commit()collect_mssql_dblogsize_info()

告警邮件的功能实现为mssql_alert_dblogsize.py,此份代码的告警阈值设置的为50G,数据来自于视图v_mssql_dblogsize。

# -*- coding: utf-8 -*-import sysimport osimport configparserimport pymssqlimport mssql_get_db_connectimport mssql_exec_sqlimport datetimeimport send_monitor_mailimport pandas as pddef mssql_alert_dblogsize():    mail_subject = "SQL Server DB Log Size Greater than 50G, please check!!! "    mail_receivers = "testDBAgrp@qtiantianq.com"    db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")    config = configparser.ConfigParser()    config.read(db_ps_file, encoding="utf-8")    m_db_host = config.get('sqlserver_qq', 'db_host')    m_db_port = config.getint('sqlserver_qq', 'db_port')    # 获取需要遍历的DB列表    exec_sql_4 = """        SELECT [vip] as IP,[port],[Environment],[Dbname]      ,[Logical_Name],[Physical_Name],[SizeGB],[createtime]  FROM qqDB.[dbo].[v_mssql_dblogsize]  order by VIP,Dbname;    """    sql_rst_4, col_name = mssql_exec_sql.mssql_exec_select_sql_include_colnames(m_db_host, m_db_port, exec_sql_4)    # print(sql_rst_4)    if len(sql_rst_4):        mail_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')        columns = []        for i in range(len(col_name)):            columns.append(col_name[i][0])        df = pd.DataFrame(columns=columns)        for i in range(len(sql_rst_4)):            df.loc[i] = list(sql_rst_4[i])        mail_body = df.to_html(index=False, justify="left").replace('<th>', '<th style = "color:red; text-align:left; background-color: yellow">')        mail_html = "<html><body><h4>" + "Deal All : " + "<br><h4>" + "以下数据库的db log文件,已大于50G.请及时检查,谢谢! " + "<br><h4>" + mail_body + "</body></html>"        send_monitor_mail.send_monitor_mail(mail_subject=mail_subject, mail_body=mail_html, mail_receivers=mail_receivers)mssql_alert_dblogsize()

4.实现

定时任务是通过windows的计划任务来实现的,在此不做过多的叙述。告警邮件的部分截图如下:

5.附录

1.报错定位,判断是不是log文件过大

https://blog.csdn.net/weixin_30785593/article/details/99912405

2.关于为什么数据库log文件过大,我们可以参考以下分享的文章

https://blog.csdn.net/chinadm123/article/details/44941275

到此这篇关于通过python实现对SQL Server 数据文件大小的监控告警的文章就介绍到这了,更多相关pythonSQL Server 数据监控告警内容请搜索 以前的文章或继续浏览下面的相关文章希望大家以后多多支持 !

相关文章

发表新评论