odoo_17.0.1/odoo/tools/profiler.py

703 lines
27 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# Part of Odoo. See LICENSE file for full copyright and licensing details.
from datetime import datetime
import gc
import json
import logging
import sys
import time
import threading
import re
import functools
from psycopg2 import sql
from odoo import tools
_logger = logging.getLogger(__name__)
# ensure we have a non patched time for profiling times when using freezegun
real_datetime_now = datetime.now
real_time = time.time.__call__
def _format_frame(frame):
code = frame.f_code
return (code.co_filename, frame.f_lineno, code.co_name, '')
def _format_stack(stack):
return [list(frame) for frame in stack]
def get_current_frame(thread=None):
if thread:
frame = sys._current_frames()[thread.ident]
else:
frame = sys._getframe()
while frame.f_code.co_filename == __file__:
frame = frame.f_back
return frame
def _get_stack_trace(frame, limit_frame=None):
stack = []
while frame is not None and frame != limit_frame:
stack.append(_format_frame(frame))
frame = frame.f_back
if frame is None and limit_frame:
_logger.error("Limit frame was not found")
return list(reversed(stack))
def stack_size():
frame = get_current_frame()
size = 0
while frame:
size += 1
frame = frame.f_back
return size
def make_session(name=''):
return f'{real_datetime_now():%Y-%m-%d %H:%M:%S} {name}'
def force_hook():
"""
Force periodic profiling collectors to generate some stack trace. This is
useful before long calls that do not release the GIL, so that the time
spent in those calls is attributed to a specific stack trace, instead of
some arbitrary former frame.
"""
thread = threading.current_thread()
for func in getattr(thread, 'profile_hooks', ()):
func()
class Collector:
"""
Base class for objects that collect profiling data.
A collector object is used by a profiler to collect profiling data, most
likely a list of stack traces with time and some context information added
by ExecutionContext decorator on current thread.
This is a generic implementation of a basic collector, to be inherited.
It defines default behaviors for creating an entry in the collector.
"""
name = None # symbolic name of the collector
_registry = {} # map collector names to their class
@classmethod
def __init_subclass__(cls):
if cls.name:
cls._registry[cls.name] = cls
cls._registry[cls.__name__] = cls
@classmethod
def make(cls, name, *args, **kwargs):
""" Instantiate a collector corresponding to the given name. """
return cls._registry[name](*args, **kwargs)
def __init__(self):
self._processed = False
self._entries = []
self.profiler = None
def start(self):
""" Start the collector. """
def stop(self):
""" Stop the collector. """
def add(self, entry=None, frame=None):
""" Add an entry (dict) to this collector. """
# todo add entry count limit
self._entries.append({
'stack': self._get_stack_trace(frame),
'exec_context': getattr(self.profiler.init_thread, 'exec_context', ()),
'start': real_time(),
**(entry or {}),
})
def _get_stack_trace(self, frame=None):
""" Return the stack trace to be included in a given entry. """
frame = frame or get_current_frame(self.profiler.init_thread)
return _get_stack_trace(frame, self.profiler.init_frame)
def post_process(self):
for entry in self._entries:
stack = entry.get('stack', [])
self.profiler._add_file_lines(stack)
@property
def entries(self):
""" Return the entries of the collector after postprocessing. """
if not self._processed:
self.post_process()
self._processed = True
return self._entries
class SQLCollector(Collector):
"""
Saves all executed queries in the current thread with the call stack.
"""
name = 'sql'
def start(self):
init_thread = self.profiler.init_thread
if not hasattr(init_thread, 'query_hooks'):
init_thread.query_hooks = []
init_thread.query_hooks.append(self.hook)
def stop(self):
self.profiler.init_thread.query_hooks.remove(self.hook)
def hook(self, cr, query, params, query_start, query_time):
self.add({
'query': str(query),
'full_query': str(cr._format(query, params)),
'start': query_start,
'time': query_time,
})
class PeriodicCollector(Collector):
"""
Record execution frames asynchronously at most every `interval` seconds.
:param interval (float): time to wait in seconds between two samples.
"""
name = 'traces_async'
def __init__(self, interval=0.01): # check duration. dynamic?
super().__init__()
self.active = False
self.frame_interval = interval
self.thread = threading.Thread(target=self.run)
self.last_frame = None
def run(self):
self.active = True
last_time = real_time()
while self.active: # maybe add a check on parent_thread state?
duration = real_time() - last_time
if duration > self.frame_interval * 10 and self.last_frame:
# The profiler has unexpectedly slept for more than 10 frame intervals. This may
# happen when calling a C library without releasing the GIL. In that case, the
# last frame was taken before the call, and the next frame is after the call, and
# the call itself does not appear in any of those frames: the duration of the call
# is incorrectly attributed to the last frame.
self._entries[-1]['stack'].append(('profiling', 0, '⚠ Profiler freezed for %s s' % duration, ''))
self.last_frame = None # skip duplicate detection for the next frame.
self.add()
last_time = real_time()
time.sleep(self.frame_interval)
self._entries.append({'stack': [], 'start': real_time()}) # add final end frame
def start(self):
interval = self.profiler.params.get('traces_async_interval')
if interval:
self.frame_interval = min(max(float(interval), 0.001), 1)
init_thread = self.profiler.init_thread
if not hasattr(init_thread, 'profile_hooks'):
init_thread.profile_hooks = []
init_thread.profile_hooks.append(self.add)
self.thread.start()
def stop(self):
self.active = False
self.thread.join()
self.profiler.init_thread.profile_hooks.remove(self.add)
def add(self, entry=None, frame=None):
""" Add an entry (dict) to this collector. """
frame = frame or get_current_frame(self.profiler.init_thread)
if frame == self.last_frame:
# don't save if the frame is exactly the same as the previous one.
# maybe modify the last entry to add a last seen?
return
self.last_frame = frame
super().add(entry=entry, frame=frame)
class SyncCollector(Collector):
"""
Record complete execution synchronously.
Note that --limit-memory-hard may need to be increased when launching Odoo.
"""
name = 'traces_sync'
def start(self):
assert not self._processed, "You cannot start SyncCollector after accessing entries."
sys.settrace(self.hook) # todo test setprofile, but maybe not multithread safe
def stop(self):
sys.settrace(None)
def hook(self, _frame, event, _arg=None):
if event == 'line':
return
entry = {'event': event, 'frame': _format_frame(_frame)}
if event == 'call' and _frame.f_back:
# we need the parent frame to determine the line number of the call
entry['parent_frame'] = _format_frame(_frame.f_back)
self.add(entry, frame=_frame)
return self.hook
def _get_stack_trace(self, frame=None):
# Getting the full stack trace is slow, and not useful in this case.
# SyncCollector only saves the top frame and event at each call and
# recomputes the complete stack at the end.
return None
def post_process(self):
# Transform the evented traces to full stack traces. This processing
# could be avoided since speedscope will transform that back to
# evented anyway, but it is actually simpler to integrate into the
# current speedscope logic, especially when mixed with SQLCollector.
# We could improve it by saving as evented and manage it later.
stack = []
for entry in self._entries:
frame = entry.pop('frame')
event = entry.pop('event')
if event == 'call':
if stack:
stack[-1] = entry.pop('parent_frame')
stack.append(frame)
elif event == 'return':
stack.pop()
entry['stack'] = stack[:]
super().post_process()
class QwebTracker():
@classmethod
def wrap_render(cls, method_render):
@functools.wraps(method_render)
def _tracked_method_render(self, template, values=None, **options):
current_thread = threading.current_thread()
execution_context_enabled = getattr(current_thread, 'profiler_params', {}).get('execution_context_qweb')
qweb_hooks = getattr(current_thread, 'qweb_hooks', ())
if execution_context_enabled or qweb_hooks:
# To have the new compilation cached because the generated code will change.
# Therefore 'profile' is a key to the cache.
options['profile'] = True
return method_render(self, template, values, **options)
return _tracked_method_render
@classmethod
def wrap_compile(cls, method_compile):
@functools.wraps(method_compile)
def _tracked_compile(self, template):
if not self.env.context.get('profile'):
return method_compile(self, template)
template_functions, def_name = method_compile(self, template)
render_template = template_functions[def_name]
def profiled_method_compile(self, values):
options = template_functions['options']
ref = options.get('ref')
ref_xml = options.get('ref_xml')
qweb_tracker = QwebTracker(ref, ref_xml, self.env.cr)
self = self.with_context(qweb_tracker=qweb_tracker)
if qweb_tracker.execution_context_enabled:
with ExecutionContext(template=ref):
return render_template(self, values)
return render_template(self, values)
template_functions[def_name] = profiled_method_compile
return (template_functions, def_name)
return _tracked_compile
@classmethod
def wrap_compile_directive(cls, method_compile_directive):
@functools.wraps(method_compile_directive)
def _tracked_compile_directive(self, el, options, directive, level):
if not options.get('profile') or directive in ('inner-content', 'tag-open', 'tag-close'):
return method_compile_directive(self, el, options, directive, level)
enter = f"{' ' * 4 * level}self.env.context['qweb_tracker'].enter_directive({directive!r}, {el.attrib!r}, {options['_qweb_error_path_xml'][0]!r})"
leave = f"{' ' * 4 * level}self.env.context['qweb_tracker'].leave_directive({directive!r}, {el.attrib!r}, {options['_qweb_error_path_xml'][0]!r})"
code_directive = method_compile_directive(self, el, options, directive, level)
return [enter, *code_directive, leave] if code_directive else []
return _tracked_compile_directive
def __init__(self, view_id, arch, cr):
current_thread = threading.current_thread() # don't store current_thread on self
self.execution_context_enabled = getattr(current_thread, 'profiler_params', {}).get('execution_context_qweb')
self.qweb_hooks = getattr(current_thread, 'qweb_hooks', ())
self.context_stack = []
self.cr = cr
self.view_id = view_id
for hook in self.qweb_hooks:
hook('render', self.cr.sql_log_count, view_id=view_id, arch=arch)
def enter_directive(self, directive, attrib, xpath):
execution_context = None
if self.execution_context_enabled:
directive_info = {}
if ('t-' + directive) in attrib:
directive_info['t-' + directive] = repr(attrib['t-' + directive])
if directive == 'set':
if 't-value' in attrib:
directive_info['t-value'] = repr(attrib['t-value'])
if 't-valuef' in attrib:
directive_info['t-valuef'] = repr(attrib['t-valuef'])
for key in attrib:
if key.startswith('t-set-') or key.startswith('t-setf-'):
directive_info[key] = repr(attrib[key])
elif directive == 'foreach':
directive_info['t-as'] = repr(attrib['t-as'])
elif directive == 'groups' and 'groups' in attrib and not directive_info.get('t-groups'):
directive_info['t-groups'] = repr(attrib['groups'])
elif directive == 'att':
for key in attrib:
if key.startswith('t-att-') or key.startswith('t-attf-'):
directive_info[key] = repr(attrib[key])
elif directive == 'options':
for key in attrib:
if key.startswith('t-options-'):
directive_info[key] = repr(attrib[key])
elif ('t-' + directive) not in attrib:
directive_info['t-' + directive] = None
execution_context = tools.profiler.ExecutionContext(**directive_info, xpath=xpath)
execution_context.__enter__()
self.context_stack.append(execution_context)
for hook in self.qweb_hooks:
hook('enter', self.cr.sql_log_count, view_id=self.view_id, xpath=xpath, directive=directive, attrib=attrib)
def leave_directive(self, directive, attrib, xpath):
if self.execution_context_enabled:
self.context_stack.pop().__exit__()
for hook in self.qweb_hooks:
hook('leave', self.cr.sql_log_count, view_id=self.view_id, xpath=xpath, directive=directive, attrib=attrib)
class QwebCollector(Collector):
"""
Record qweb execution with directive trace.
"""
name = 'qweb'
def __init__(self):
super().__init__()
self.events = []
def hook(event, sql_log_count, **kwargs):
self.events.append((event, kwargs, sql_log_count, real_time()))
self.hook = hook
def _get_directive_profiling_name(self, directive, attrib):
expr = ''
if directive == 'set':
if 't-set' in attrib:
expr = f"t-set={repr(attrib['t-set'])}"
if 't-value' in attrib:
expr += f" t-value={repr(attrib['t-value'])}"
if 't-valuef' in attrib:
expr += f" t-valuef={repr(attrib['t-valuef'])}"
for key in attrib:
if key.startswith('t-set-') or key.startswith('t-setf-'):
if expr:
expr += ' '
expr += f"{key}={repr(attrib[key])}"
elif directive == 'foreach':
expr = f"t-foreach={repr(attrib['t-foreach'])} t-as={repr(attrib['t-as'])}"
elif directive == 'options':
if attrib.get('t-options'):
expr = f"t-options={repr(attrib['t-options'])}"
for key in attrib:
if key.startswith('t-options-'):
expr = f"{expr} {key}={repr(attrib[key])}"
elif directive == 'att':
for key in attrib:
if key == 't-att' or key.startswith('t-att-') or key.startswith('t-attf-'):
if expr:
expr += ' '
expr += f"{key}={repr(attrib[key])}"
elif ('t-' + directive) in attrib:
expr = f"t-{directive}={repr(attrib['t-' + directive])}"
else:
expr = f"t-{directive}"
return expr
def start(self):
init_thread = self.profiler.init_thread
if not hasattr(init_thread, 'qweb_hooks'):
init_thread.qweb_hooks = []
init_thread.qweb_hooks.append(self.hook)
def stop(self):
self.profiler.init_thread.qweb_hooks.remove(self.hook)
def post_process(self):
last_event_query = None
last_event_time = None
stack = []
results = []
archs = {}
for event, kwargs, sql_count, time in self.events:
if event == 'render':
archs[kwargs['view_id']] = kwargs['arch']
continue
# update the active directive with the elapsed time and queries
if stack:
top = stack[-1]
top['delay'] += time - last_event_time
top['query'] += sql_count - last_event_query
last_event_time = time
last_event_query = sql_count
directive = self._get_directive_profiling_name(kwargs['directive'], kwargs['attrib'])
if directive:
if event == 'enter':
data = {
'view_id': kwargs['view_id'],
'xpath': kwargs['xpath'],
'directive': directive,
'delay': 0,
'query': 0,
}
results.append(data)
stack.append(data)
else:
assert event == "leave"
data = stack.pop()
self.add({'results': {'archs': archs, 'data': results}})
super().post_process()
class ExecutionContext:
"""
Add some context on thread at current call stack level.
This context stored by collector beside stack and is used by Speedscope
to add a level to the stack with this information.
"""
def __init__(self, **context):
self.context = context
self.previous_context = None
def __enter__(self):
current_thread = threading.current_thread()
self.previous_context = getattr(current_thread, 'exec_context', ())
current_thread.exec_context = self.previous_context + ((stack_size(), self.context),)
def __exit__(self, *_args):
threading.current_thread().exec_context = self.previous_context
class Profiler:
"""
Context manager to use to start the recording of some execution.
Will save sql and async stack trace by default.
"""
def __init__(self, collectors=None, db=..., profile_session=None,
description=None, disable_gc=False, params=None):
"""
:param db: database name to use to save results.
Will try to define database automatically by default.
Use value ``None`` to not save results in a database.
:param collectors: list of string and Collector object Ex: ['sql', PeriodicCollector(interval=0.2)]. Use `None` for default collectors
:param profile_session: session description to use to reproup multiple profile. use make_session(name) for default format.
:param description: description of the current profiler Suggestion: (route name/test method/loading module, ...)
:param disable_gc: flag to disable gc durring profiling (usefull to avoid gc while profiling, especially during sql execution)
:param params: parameters usable by collectors (like frame interval)
"""
self.start_time = 0
self.duration = 0
self.profile_session = profile_session or make_session()
self.description = description
self.init_frame = None
self.init_stack_trace = None
self.init_thread = None
self.disable_gc = disable_gc
self.filecache = {}
self.params = params or {} # custom parameters usable by collectors
self.profile_id = None
if db is ...:
# determine database from current thread
db = getattr(threading.current_thread(), 'dbname', None)
if not db:
# only raise if path is not given and db is not explicitely disabled
raise Exception('Database name cannot be defined automaticaly. \n Please provide a valid/falsy dbname or path parameter')
self.db = db
# collectors
if collectors is None:
collectors = ['sql', 'traces_async']
self.collectors = []
for collector in collectors:
if isinstance(collector, str):
try:
collector = Collector.make(collector)
except Exception:
_logger.error("Could not create collector with name %r", collector)
continue
collector.profiler = self
self.collectors.append(collector)
def __enter__(self):
self.init_thread = threading.current_thread()
try:
self.init_frame = get_current_frame(self.init_thread)
self.init_stack_trace = _get_stack_trace(self.init_frame)
except KeyError:
# when using thread pools (gevent) the thread won't exist in the current_frames
# this case is managed by http.py but will still fail when adding a profiler
# inside a piece of code that may be called by a longpolling route.
# in this case, avoid crashing the caller and disable all collectors
self.init_frame = self.init_stack_trace = self.collectors = []
self.db = self.params = None
message = "Cannot start profiler, thread not found. Is the thread part of a thread pool?"
if not self.description:
self.description = message
_logger.warning(message)
if self.description is None:
frame = self.init_frame
code = frame.f_code
self.description = f"{frame.f_code.co_name} ({code.co_filename}:{frame.f_lineno})"
if self.params:
self.init_thread.profiler_params = self.params
if self.disable_gc and gc.isenabled():
gc.disable()
self.start_time = real_time()
for collector in self.collectors:
collector.start()
return self
def __exit__(self, *args):
try:
for collector in self.collectors:
collector.stop()
self.duration = real_time() - self.start_time
self._add_file_lines(self.init_stack_trace)
if self.db:
# pylint: disable=import-outside-toplevel
from odoo.sql_db import db_connect # only import from odoo if/when needed.
with db_connect(self.db).cursor() as cr:
values = {
"name": self.description,
"session": self.profile_session,
"create_date": real_datetime_now(),
"init_stack_trace": json.dumps(_format_stack(self.init_stack_trace)),
"duration": self.duration,
"entry_count": self.entry_count(),
"sql_count": sum(len(collector.entries) for collector in self.collectors if collector.name == 'sql')
}
for collector in self.collectors:
if collector.entries:
values[collector.name] = json.dumps(collector.entries)
query = sql.SQL("INSERT INTO {}({}) VALUES %s RETURNING id").format(
sql.Identifier("ir_profile"),
sql.SQL(",").join(map(sql.Identifier, values)),
)
cr.execute(query, [tuple(values.values())])
self.profile_id = cr.fetchone()[0]
_logger.info('ir_profile %s (%s) created', self.profile_id, self.profile_session)
finally:
if self.disable_gc:
gc.enable()
if self.params:
del self.init_thread.profiler_params
def _add_file_lines(self, stack):
for index, frame in enumerate(stack):
(filename, lineno, name, line) = frame
if line != '':
continue
# retrieve file lines from the filecache
if not lineno:
continue
try:
filelines = self.filecache[filename]
except KeyError:
try:
with tools.file_open(filename, filter_ext=('.py',)) as f:
filelines = f.readlines()
except (ValueError, FileNotFoundError): # mainly for <decorator> "filename"
filelines = None
self.filecache[filename] = filelines
# fill in the line
if filelines is not None:
line = filelines[lineno - 1]
stack[index] = (filename, lineno, name, line)
def entry_count(self):
""" Return the total number of entries collected in this profiler. """
return sum(len(collector.entries) for collector in self.collectors)
def format_path(self, path):
"""
Utility function to format a path for this profiler.
This is mainly useful to uniquify a path between executions.
"""
return path.format(
time=real_datetime_now().strftime("%Y%m%d-%H%M%S"),
len=self.entry_count(),
desc=re.sub("[^0-9a-zA-Z-]+", "_", self.description)
)
def json(self):
"""
Utility function to generate a json version of this profiler.
This is useful to write profiling entries into a file, such as::
with Profiler(db=None) as profiler:
do_stuff()
filename = p.format_path('/home/foo/{desc}_{len}.json')
with open(filename, 'w') as f:
f.write(profiler.json())
"""
return json.dumps({
"name": self.description,
"session": self.profile_session,
"create_date": real_datetime_now().strftime("%Y%m%d-%H%M%S"),
"init_stack_trace": _format_stack(self.init_stack_trace),
"duration": self.duration,
"collectors": {collector.name: collector.entries for collector in self.collectors},
}, indent=4)
class Nested:
"""
Utility to nest another context manager inside a profiler.
The profiler should only be called directly in the "with" without nesting it
with ExitStack. If not, the retrieval of the 'init_frame' may be incorrect
and lead to an error "Limit frame was not found" when profiling. Since the
stack will ignore all stack frames inside this file, the nested frames will
be ignored, too. This is also why Nested() does not use
contextlib.contextmanager.
"""
def __init__(self, profiler, context_manager):
self.profiler = profiler
self.context_manager = context_manager
def __enter__(self):
self.profiler.__enter__()
return self.context_manager.__enter__()
def __exit__(self, exc_type, exc_value, traceback):
try:
return self.context_manager.__exit__(exc_type, exc_value, traceback)
finally:
self.profiler.__exit__(exc_type, exc_value, traceback)