Added periodic idmapper cache size check along with conditional flush mechanism. Ran a lot of tests and stress tests to get statistics on usage.
This commit is contained in:
parent
a143733ccf
commit
f2c75bd0f6
8 changed files with 145 additions and 33 deletions
|
|
@ -13,7 +13,7 @@ import twisted
|
||||||
from time import time as timemeasure
|
from time import time as timemeasure
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from src.server.caches import get_cache_sizes
|
#from src.server.caches import get_cache_sizes
|
||||||
from src.server.sessionhandler import SESSIONS
|
from src.server.sessionhandler import SESSIONS
|
||||||
from src.scripts.models import ScriptDB
|
from src.scripts.models import ScriptDB
|
||||||
from src.objects.models import ObjectDB
|
from src.objects.models import ObjectDB
|
||||||
|
|
@ -678,8 +678,8 @@ class CmdServerLoad(MuxCommand):
|
||||||
loadavg = os.getloadavg()
|
loadavg = os.getloadavg()
|
||||||
psize = _resource.getpagesize()
|
psize = _resource.getpagesize()
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
rmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "rss")).read()) / 1024.0 # resident memory
|
rmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "rss")).read()) / 1000.0 # resident memory
|
||||||
vmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "vsz")).read()) / 1024.0 # virtual memory
|
vmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "vsz")).read()) / 1000.0 # virtual memory
|
||||||
pmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "%mem")).read()) # percent of resident memory to total
|
pmem = float(os.popen('ps -p %d -o %s | tail -1' % (pid, "%mem")).read()) # percent of resident memory to total
|
||||||
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,6 @@ __all__ = ["Script", "DoNothing", "CheckSessions",
|
||||||
|
|
||||||
_GA = object.__getattribute__
|
_GA = object.__getattribute__
|
||||||
_SESSIONS = None
|
_SESSIONS = None
|
||||||
# attr-cache size in MB
|
|
||||||
_ATTRIBUTE_CACHE_MAXSIZE = settings.ATTRIBUTE_CACHE_MAXSIZE
|
|
||||||
|
|
||||||
class ExtendedLoopingCall(LoopingCall):
|
class ExtendedLoopingCall(LoopingCall):
|
||||||
"""
|
"""
|
||||||
|
|
@ -155,7 +153,7 @@ class ScriptBase(TypeClass):
|
||||||
def _step_errback(self, e):
|
def _step_errback(self, e):
|
||||||
"callback for runner errors"
|
"callback for runner errors"
|
||||||
cname = self.__class__.__name__
|
cname = self.__class__.__name__
|
||||||
estring = _("Script %(key)s(#%(dbid)i) of type '%(cname)s': at_repeat() error '%(err)s'.") % \
|
estring = _("Script %(key)s(#%(dbid)s) of type '%(cname)s': at_repeat() error '%(err)s'.") % \
|
||||||
{"key": self.key, "dbid": self.dbid, "cname": cname,
|
{"key": self.key, "dbid": self.dbid, "cname": cname,
|
||||||
"err": e.getErrorMessage()}
|
"err": e.getErrorMessage()}
|
||||||
try:
|
try:
|
||||||
|
|
@ -538,6 +536,24 @@ class CheckSessions(Script):
|
||||||
#print "ValidateSessions run"
|
#print "ValidateSessions run"
|
||||||
_SESSIONS.validate_sessions()
|
_SESSIONS.validate_sessions()
|
||||||
|
|
||||||
|
_FLUSH_CACHE = None
|
||||||
|
_IDMAPPER_CACHE_MAX_MEMORY = settings.IDMAPPER_CACHE_MAXSIZE
|
||||||
|
class ValidateIdmapperCache(Script):
|
||||||
|
"""
|
||||||
|
Check memory use of idmapper cache
|
||||||
|
"""
|
||||||
|
def at_script_creation(self):
|
||||||
|
self.key = "sys_cache_validate"
|
||||||
|
self.desc = _("Restrains size of idmapper cache.")
|
||||||
|
self.interval = 61 * 5 # staggered compared to session check
|
||||||
|
self.persistent = True
|
||||||
|
|
||||||
|
def at_repeat(self):
|
||||||
|
"Called every ~5 mins"
|
||||||
|
global _FLUSH_CACHE
|
||||||
|
if not _FLUSH_CACHE:
|
||||||
|
from src.utils.idmapper.base import conditional_flush as _FLUSH_CACHE
|
||||||
|
_FLUSH_CACHE(_IDMAPPER_CACHE_MAX_MEMORY)
|
||||||
|
|
||||||
class ValidateScripts(Script):
|
class ValidateScripts(Script):
|
||||||
"Check script validation regularly"
|
"Check script validation regularly"
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,10 @@ def create_system_scripts():
|
||||||
script2 = create.create_script(scripts.ValidateScripts)
|
script2 = create.create_script(scripts.ValidateScripts)
|
||||||
# update the channel handler to make sure it's in sync
|
# update the channel handler to make sure it's in sync
|
||||||
script3 = create.create_script(scripts.ValidateChannelHandler)
|
script3 = create.create_script(scripts.ValidateChannelHandler)
|
||||||
if not script1 or not script2 or not script3:
|
# flush the idmapper cache
|
||||||
|
script4 = create.create_script(scripts.ValidateIdmapperCache)
|
||||||
|
|
||||||
|
if not script1 or not script2 or not script3 or not script4:
|
||||||
print " Error creating system scripts."
|
print " Error creating system scripts."
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -379,9 +379,6 @@ EVENNIA = Evennia(application)
|
||||||
print '-' * 50
|
print '-' * 50
|
||||||
print ' %(servername)s Server (%(version)s) started.' % {'servername': SERVERNAME, 'version': VERSION}
|
print ' %(servername)s Server (%(version)s) started.' % {'servername': SERVERNAME, 'version': VERSION}
|
||||||
|
|
||||||
if not settings.GAME_CACHE_TYPE:
|
|
||||||
print " caching disabled"
|
|
||||||
|
|
||||||
if AMP_ENABLED:
|
if AMP_ENABLED:
|
||||||
|
|
||||||
# The AMP protocol handles the communication between
|
# The AMP protocol handles the communication between
|
||||||
|
|
|
||||||
|
|
@ -125,18 +125,23 @@ ENCODINGS = ["utf-8", "latin-1", "ISO-8859-1"]
|
||||||
AMP_HOST = 'localhost'
|
AMP_HOST = 'localhost'
|
||||||
AMP_PORT = 5000
|
AMP_PORT = 5000
|
||||||
AMP_INTERFACE = '127.0.0.1'
|
AMP_INTERFACE = '127.0.0.1'
|
||||||
# Caching speeds up all forms of database access, often considerably. There
|
# Database objects are cached in what is known as the idmapper. The idmapper
|
||||||
# are (currently) only two settings, "local" or None, the latter of which turns
|
# caching results in a massive speedup of the server (since it dramatically
|
||||||
# off all caching completely. Local caching stores data in the process. It's
|
# limits the number of database accesses needed) and also allows for
|
||||||
# very fast but will go out of sync if more than one process writes to the
|
# storing temporary data on objects. It is however also the main memory
|
||||||
# database (such as when using procpool or an extensice web precense).
|
# consumer of Evennia. With this setting the cache can be capped and
|
||||||
GAME_CACHE_TYPE = "local"
|
# flushed when it reaches a certain size.
|
||||||
# Attributes on objects are cached aggressively for speed. If the number of
|
# Empirically, N_objects_in_cache ~ (RMEM - 35) / 0.0157.
|
||||||
# objects is large (and their attributes are often accessed) this can use up
|
# mem(MB) | objs in cache || mem(MB) | objs in cache
|
||||||
# a lot of memory. So every now and then Evennia checks the size of this
|
# 50 | ~1000 || 800 | ~49 000
|
||||||
# cache and resets it if it's too big. This variable sets the maximum
|
# 100 | ~4000 || 1200 | ~75 000
|
||||||
# size (in MB).
|
# 200 | ~10 000 || 1600 | ~100 000
|
||||||
ATTRIBUTE_CACHE_MAXSIZE = 100
|
# 500 | ~30 000 || 2000 | ~125 000
|
||||||
|
# Note that the exact memory usage is not exact, so err on the side of
|
||||||
|
# caution if running on a server with limited memory. How many objects
|
||||||
|
# need to be in memory at any given time depends very much on each game
|
||||||
|
# and how many players are active. Using None disables the cache cap.
|
||||||
|
IDMAPPER_CACHE_MAXSIZE = 200
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
# Evennia Database config
|
# Evennia Database config
|
||||||
|
|
|
||||||
|
|
@ -210,6 +210,4 @@ def c_moves(client):
|
||||||
## "heavy digger memory tester" definition
|
## "heavy digger memory tester" definition
|
||||||
ACTIONS = (c_login,
|
ACTIONS = (c_login,
|
||||||
c_logout,
|
c_logout,
|
||||||
(0.1, c_looks),
|
(1.0, c_digs))
|
||||||
(0.1, c_creates_obj),
|
|
||||||
(0.8, c_digs))
|
|
||||||
|
|
|
||||||
|
|
@ -49,12 +49,12 @@ if __name__ == "__main__":
|
||||||
vmem = data[:,2]
|
vmem = data[:,2]
|
||||||
nobj = data[:,3]
|
nobj = data[:,3]
|
||||||
|
|
||||||
# count total amount of objects
|
# calculate derivative of obj creation
|
||||||
#ntot = data[:,3].copy()
|
oderiv = (0.5*(nobj[2:] - nobj[:-2]) / (secs[2:] - secs[:-2])).copy()
|
||||||
|
|
||||||
fig = pp.figure()
|
fig = pp.figure()
|
||||||
ax1 = fig.add_subplot(111)
|
ax1 = fig.add_subplot(111)
|
||||||
ax1.set_title("Memory usage")
|
ax1.set_title("Memory usage (200 bots, auto-flush at RMEM ~ 200MB)")
|
||||||
ax1.set_xlabel("Time (mins)")
|
ax1.set_xlabel("Time (mins)")
|
||||||
ax1.set_ylabel("Memory usage (MB)")
|
ax1.set_ylabel("Memory usage (MB)")
|
||||||
ax1.plot(secs, rmem, "r", label="RMEM", lw=2)
|
ax1.plot(secs, rmem, "r", label="RMEM", lw=2)
|
||||||
|
|
@ -63,9 +63,36 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
ax2 = ax1.twinx()
|
ax2 = ax1.twinx()
|
||||||
ax2.plot(secs, nobj, "g--", label="objs in cache", lw=2)
|
ax2.plot(secs, nobj, "g--", label="objs in cache", lw=2)
|
||||||
#ax2.plot(secs, ntot, "r--", label="objs total", lw=2)
|
#ax2.plot(secs[:-2], oderiv/60.0, "g--", label="Objs/second", lw=2)
|
||||||
ax2.set_ylabel("Number of objects")
|
ax2.set_ylabel("Number of objects")
|
||||||
ax2.legend(loc="lower right")
|
ax2.legend(loc="lower right")
|
||||||
ax2.annotate("idmapper\nflush", xy=(70,480))
|
#ax2.annotate("All bots\nfinished\nconnecting", xy=(10, 16900))
|
||||||
ax2.annotate("@reload", xy=(185,600))
|
#ax2.annotate("idmapper\nflush", xy=(70,480))
|
||||||
|
#ax2.annotate("@reload", xy=(185,600))
|
||||||
|
|
||||||
|
# # plot mem vs cachesize
|
||||||
|
# nobj, rmem, vmem = nobj[:262].copy(), rmem[:262].copy(), vmem[:262].copy()
|
||||||
|
#
|
||||||
|
# fig = pp.figure()
|
||||||
|
# ax1 = fig.add_subplot(111)
|
||||||
|
# ax1.set_title("Memory usage per cache size")
|
||||||
|
# ax1.set_xlabel("Cache size (number of objects)")
|
||||||
|
# ax1.set_ylabel("Memory usage (MB)")
|
||||||
|
# ax1.plot(nobj, rmem, "r", label="RMEM", lw=2)
|
||||||
|
# ax1.plot(nobj, vmem, "b", label="VMEM", lw=2)
|
||||||
|
#
|
||||||
|
|
||||||
|
## # empirical estimate of memory usage: rmem = 35.0 + 0.0157 * Ncache
|
||||||
|
## # Ncache = int((rmem - 35.0) / 0.0157) (rmem in MB)
|
||||||
|
#
|
||||||
|
# rderiv_aver = 0.0157
|
||||||
|
# fig = pp.figure()
|
||||||
|
# ax1 = fig.add_subplot(111)
|
||||||
|
# ax1.set_title("Relation between memory and cache size")
|
||||||
|
# ax1.set_xlabel("Memory usage (MB)")
|
||||||
|
# ax1.set_ylabel("Idmapper Cache Size (number of objects)")
|
||||||
|
# rmem = numpy.linspace(35, 2000, 2000)
|
||||||
|
# nobjs = numpy.array([int((mem - 35.0) / 0.0157) for mem in rmem])
|
||||||
|
# ax1.plot(rmem, nobjs, "r", lw=2)
|
||||||
|
|
||||||
pp.show()
|
pp.show()
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ leave caching unexpectedly (no use of WeakRefs).
|
||||||
Also adds cache_size() for monitoring the size of the cache.
|
Also adds cache_size() for monitoring the size of the cache.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os, threading, gc
|
import os, threading, gc, time
|
||||||
#from twisted.internet import reactor
|
#from twisted.internet import reactor
|
||||||
#from twisted.internet.threads import blockingCallFromThread
|
#from twisted.internet.threads import blockingCallFromThread
|
||||||
from weakref import WeakValueDictionary
|
from weakref import WeakValueDictionary
|
||||||
|
|
@ -15,10 +15,13 @@ from twisted.internet.reactor import callFromThread
|
||||||
from django.core.exceptions import ObjectDoesNotExist, FieldError
|
from django.core.exceptions import ObjectDoesNotExist, FieldError
|
||||||
from django.db.models.base import Model, ModelBase
|
from django.db.models.base import Model, ModelBase
|
||||||
from django.db.models.signals import post_save, pre_delete, post_syncdb
|
from django.db.models.signals import post_save, pre_delete, post_syncdb
|
||||||
|
from src.utils import logger
|
||||||
from src.utils.utils import dbref, get_evennia_pids, to_str
|
from src.utils.utils import dbref, get_evennia_pids, to_str
|
||||||
|
|
||||||
from manager import SharedMemoryManager
|
from manager import SharedMemoryManager
|
||||||
|
|
||||||
|
AUTO_FLUSH_MIN_INTERVAL = 60.0 * 5 # at least 5 mins between cache flushes
|
||||||
|
|
||||||
_GA = object.__getattribute__
|
_GA = object.__getattribute__
|
||||||
_SA = object.__setattr__
|
_SA = object.__setattr__
|
||||||
_DA = object.__delattr__
|
_DA = object.__delattr__
|
||||||
|
|
@ -36,7 +39,6 @@ _SERVER_PID, _PORTAL_PID = get_evennia_pids()
|
||||||
_IS_SUBPROCESS = (_SERVER_PID and _PORTAL_PID) and not _SELF_PID in (_SERVER_PID, _PORTAL_PID)
|
_IS_SUBPROCESS = (_SERVER_PID and _PORTAL_PID) and not _SELF_PID in (_SERVER_PID, _PORTAL_PID)
|
||||||
_IS_MAIN_THREAD = threading.currentThread().getName() == "MainThread"
|
_IS_MAIN_THREAD = threading.currentThread().getName() == "MainThread"
|
||||||
|
|
||||||
|
|
||||||
class SharedMemoryModelBase(ModelBase):
|
class SharedMemoryModelBase(ModelBase):
|
||||||
# CL: upstream had a __new__ method that skipped ModelBase's __new__ if
|
# CL: upstream had a __new__ method that skipped ModelBase's __new__ if
|
||||||
# SharedMemoryModelBase was not in the model class's ancestors. It's not
|
# SharedMemoryModelBase was not in the model class's ancestors. It's not
|
||||||
|
|
@ -225,6 +227,7 @@ class SharedMemoryModel(Model):
|
||||||
Method to store an instance in the cache.
|
Method to store an instance in the cache.
|
||||||
"""
|
"""
|
||||||
if instance._get_pk_val() is not None:
|
if instance._get_pk_val() is not None:
|
||||||
|
|
||||||
cls.__instance_cache__[instance._get_pk_val()] = instance
|
cls.__instance_cache__[instance._get_pk_val()] = instance
|
||||||
cache_instance = classmethod(cache_instance)
|
cache_instance = classmethod(cache_instance)
|
||||||
|
|
||||||
|
|
@ -301,6 +304,7 @@ class WeakSharedMemoryModelBase(SharedMemoryModelBase):
|
||||||
cls.__instance_cache__ = WeakValueDictionary()
|
cls.__instance_cache__ = WeakValueDictionary()
|
||||||
cls._idmapper_recache_protection = False
|
cls._idmapper_recache_protection = False
|
||||||
|
|
||||||
|
|
||||||
class WeakSharedMemoryModel(SharedMemoryModel):
|
class WeakSharedMemoryModel(SharedMemoryModel):
|
||||||
"""
|
"""
|
||||||
Uses a WeakValue dictionary for caching instead of a regular one
|
Uses a WeakValue dictionary for caching instead of a regular one
|
||||||
|
|
@ -309,6 +313,7 @@ class WeakSharedMemoryModel(SharedMemoryModel):
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
|
|
||||||
|
|
||||||
def flush_cache(**kwargs):
|
def flush_cache(**kwargs):
|
||||||
"""
|
"""
|
||||||
Flush idmapper cache. When doing so the cache will
|
Flush idmapper cache. When doing so the cache will
|
||||||
|
|
@ -346,6 +351,7 @@ def flush_cached_instance(sender, instance, **kwargs):
|
||||||
sender.flush_cached_instance(instance)
|
sender.flush_cached_instance(instance)
|
||||||
pre_delete.connect(flush_cached_instance)
|
pre_delete.connect(flush_cached_instance)
|
||||||
|
|
||||||
|
|
||||||
def update_cached_instance(sender, instance, **kwargs):
|
def update_cached_instance(sender, instance, **kwargs):
|
||||||
"""
|
"""
|
||||||
Re-cache the given instance in the idmapper cache
|
Re-cache the given instance in the idmapper cache
|
||||||
|
|
@ -355,6 +361,66 @@ def update_cached_instance(sender, instance, **kwargs):
|
||||||
sender.cache_instance(instance)
|
sender.cache_instance(instance)
|
||||||
post_save.connect(update_cached_instance)
|
post_save.connect(update_cached_instance)
|
||||||
|
|
||||||
|
|
||||||
|
LAST_FLUSH = None
|
||||||
|
def conditional_flush(max_rmem, force=False):
|
||||||
|
"""
|
||||||
|
Flush the cache if the estimated memory usage exceeds max_rmem.
|
||||||
|
|
||||||
|
The flusher has a timeout to avoid flushing over and over
|
||||||
|
in particular situations (this means that for some setups
|
||||||
|
the memory usage will exceed the requirement and a server with
|
||||||
|
more memory is probably required for the given game)
|
||||||
|
|
||||||
|
force - forces a flush, regardless of timeout.
|
||||||
|
"""
|
||||||
|
global LAST_FLUSH
|
||||||
|
|
||||||
|
def mem2cachesize(desired_rmem):
|
||||||
|
"""
|
||||||
|
Estimate the size of the idmapper cache based on the memory
|
||||||
|
desired. This is used to optionally cap the cache size.
|
||||||
|
|
||||||
|
desired_rmem - memory in MB (minimum 50MB)
|
||||||
|
|
||||||
|
The formula is empirically estimated from usage tests (Linux)
|
||||||
|
and is
|
||||||
|
Ncache = RMEM - 35.0 / 0.0157
|
||||||
|
where RMEM is given in MB and Ncache is the size of the cache
|
||||||
|
for this memory usage. VMEM tends to be about 100MB higher
|
||||||
|
than RMEM for large memory usage.
|
||||||
|
"""
|
||||||
|
vmem = max(desired_rmem, 50.0)
|
||||||
|
Ncache = int(abs(float(vmem) - 35.0) / 0.0157)
|
||||||
|
return Ncache
|
||||||
|
|
||||||
|
if not max_rmem:
|
||||||
|
# auto-flush is disabled
|
||||||
|
return
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
if not LAST_FLUSH:
|
||||||
|
# server is just starting
|
||||||
|
LAST_FLUSH = now
|
||||||
|
return
|
||||||
|
|
||||||
|
if ((now - LAST_FLUSH) < AUTO_FLUSH_MIN_INTERVAL) and not force:
|
||||||
|
# too soon after last flush.
|
||||||
|
logger.log_warnmsg("Warning: Idmapper flush called more than "\
|
||||||
|
"once in %s min interval. Check memory usage." % (AUTO_FLUSH_MIN_INTERVAL/60.0))
|
||||||
|
return
|
||||||
|
|
||||||
|
# check actual memory usage
|
||||||
|
Ncache_max = mem2cachesize(max_rmem)
|
||||||
|
Ncache, _ = cache_size()
|
||||||
|
actual_rmem = float(os.popen('ps -p %d -o %s | tail -1' % (os.getpid(), "rss")).read()) / 1000.0 # resident memory
|
||||||
|
|
||||||
|
if Ncache >= Ncache_max and actual_rmem > max_rmem * 0.9:
|
||||||
|
# flush cache when number of objects in cache is big enough and our
|
||||||
|
# actual memory use is within 10% of our set max
|
||||||
|
flush_cache()
|
||||||
|
LAST_FLUSH = now
|
||||||
|
|
||||||
def cache_size(mb=True):
|
def cache_size(mb=True):
|
||||||
"""
|
"""
|
||||||
Calculate statistics about the cache.
|
Calculate statistics about the cache.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue