OwlCyberSecurity - MANAGER

Edit File: pg_stats_analysis.py

##
# Copyright (c) 2011-2017 Apple Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
from __future__ import print_function

import sqlparse
import os
from gzip import GzipFile
import collections
import tables
import textwrap
import sys
import getopt

def safePercent(x, y, multiplier=100):
    return ((multiplier * x) / y) if y else 0

def _is_literal(token):
    if token.ttype in sqlparse.tokens.Literal:
        return True
    if token.ttype == sqlparse.tokens.Keyword and token.value in (u'True', u'False'):
        return True
    return False

def _substitute(expression, replacement):
    try:
        expression.tokens
    except AttributeError:
        return

for i, token in enumerate(expression.tokens):
        if _is_literal(token):
            expression.tokens[i] = replacement
        elif token.is_whitespace():
            expression.tokens[i] = sqlparse.sql.Token('Whitespace', ' ')
        else:
            _substitute(token, replacement)

def sqlnormalize(sql):
    try:
        statements = sqlparse.parse(sql)
    except ValueError, e:
        print(e)
    # Replace any literal values with placeholders
    qmark = sqlparse.sql.Token('Operator', '?')
    _substitute(statements[0], qmark)
    return sqlparse.format(statements[0].to_unicode().encode('ascii'))

COLUMN_userid = 0
COLUMN_dbid = 1
COLUMN_query = 2
COLUMN_calls = 3
COLUMN_total_time = 4
COLUMN_rows = 5
COLUMN_shared_blks_hit = 6
COLUMN_shared_blks_read = 7
COLUMN_shared_blks_written = 8
COLUMN_local_blks_hit = 9
COLUMN_local_blks_read = 10
COLUMN_local_blks_written = 11
COLUMN_temp_blks_read = 12
COLUMN_temp_blks_written = 13

def sqlStatementsReport(entries):

dcount = collections.defaultdict(int)
    dtime = collections.defaultdict(float)
    drows = collections.defaultdict(int)
    for entry in entries:
        dcount[entry[COLUMN_query]] += int(entry[COLUMN_calls])
        dtime[entry[COLUMN_query]] += float(entry[COLUMN_total_time])
        drows[entry[COLUMN_query]] += int(entry[COLUMN_rows])

daverage = {}
    for k in dcount.keys():
        daverage[k] = dtime[k] / dcount[k]

counttotal = sum(dcount.values())
    timetotal = sum(dtime.values())
    averagetotal = sum(daverage.values())

for sorttype, sortedkeys in (
        ("total time", [i[0] for i in sorted(dtime.iteritems(), key=lambda x:x[1], reverse=True)],),
        ("count", [i[0] for i in sorted(dcount.iteritems(), key=lambda x:x[1], reverse=True)],),
        ("average time", [i[0] for i in sorted(daverage.iteritems(), key=lambda x:x[1], reverse=True)],),
    ):
        table = tables.Table()
        table.addHeader(("Statement", "Count", "Count %", "Total Time", "Total Time %", "Av. Time", "Av. Time %", "Av. rows",))
        table.setDefaultColumnFormats((
            tables.Table.ColumnFormat("%s", tables.Table.ColumnFormat.LEFT_JUSTIFY),
            tables.Table.ColumnFormat("%d", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%.2f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%.2f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%.2f%%", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
            tables.Table.ColumnFormat("%.1f", tables.Table.ColumnFormat.RIGHT_JUSTIFY),
        ))

for key in sortedkeys:

keylines = textwrap.wrap(key, 72, subsequent_indent="  ")
            table.addRow((
                keylines[0],
                dcount[key],
                safePercent(dcount[key], counttotal, 100.0),
                dtime[key],
                safePercent(dtime[key], timetotal, 100.0),
                daverage[key],
                safePercent(daverage[key], averagetotal, 100.0),
                float(drows[key]) / dcount[key],
            ))

for keyline in keylines[1:]:
                table.addRow((
                    keyline,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                    None,
                ))

print("Queries sorted by %s" % (sorttype,))
        table.printTable()
        print("")

def parseStats(logFilePath, donormlize=True, verbose=False):

fpath = os.path.expanduser(logFilePath)
    if fpath.endswith(".gz"):
        f = GzipFile(fpath)
    else:
        f = open(fpath)

# Punt past data
    for line in f:
        if line.startswith("---"):
            break

f.close()

entries = []
    for line in f:
        bits = line.split("|")
        if len(bits) > COLUMN_query:
            while bits[COLUMN_query].endswith("+"):
                line = f.next()
                newbits = line.split("|")
                bits[COLUMN_query] = bits[COLUMN_query][:-1] + newbits[COLUMN_query]

pos = bits[COLUMN_query].find("BEGIN:VCALENDAR")
            if pos != -1:
                bits[COLUMN_query] = bits[COLUMN_query][:pos]

if donormlize:
                bits[COLUMN_query] = sqlnormalize(bits[COLUMN_query].strip())

if bits[COLUMN_query] not in (
                "BEGIN",
                "COMMIT",
                "ROLLBACK",
            ) and bits[COLUMN_query].find("pg_catalog") == -1:
                bits = [bit.strip() for bit in bits]
                entries.append(bits)
                if verbose and divmod(len(entries), 1000)[1] == 0:
                    print("%d entries" % (len(entries),))
                # if float(bits[COLUMN_total_time]) > 1:
                #     print(bits[COLUMN_total_time], bits[COLUMN_query])

if verbose:
        print("Read %d entries" % (len(entries,)))

sqlStatementsReport(entries)

def usage(error_msg=None):
    if error_msg:
        print(error_msg)

print("""Usage: pg_stats_analysis.py [options] FILE
Options:
    -h             Print this help and exit
    -v             Generate progress information
    --no-normalize Do not normalize SQL statements

Arguments:
    FILE      File name for pg_stat_statements output to analyze.

Description:
This utility will analyze the output of s pg_stat_statement table.
""")

if error_msg:
        raise ValueError(error_msg)
    else:
        sys.exit(0)

if __name__ == '__main__':

normalize = True
    verbose = False
    options, args = getopt.getopt(sys.argv[1:], "hv", ["no-normalize", ])

for option, value in options:
        if option == "-h":
            usage()
        elif option == "-v":
            verbose = True
        elif option == "--no-normalize":
            normalize = False
        else:
            usage("Unrecognized option: %s" % (option,))

# Process arguments
    if len(args) != 1:
        usage("Must have a file argument")

parseStats(args[0], normalize, verbose)