Datastore Anti-Pattern #1

Don't do query.get()

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    name = ndb.StringProperty()
    data = ndb.StringProperty()

User(name='current_user', data='foo').put()

q = User.query(User.name == 'current_user')
user = q.get()
print user

Datastore Pattern #1

When you can do get_by_id

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    data = ndb.StringProperty()

User(id='current_user', data='foo').put()
# in another request
user = User.get_by_id('current_user')
print user

Datastore Pattern #1

Why?

  • query has to first scan the index table for the entity key: 2 x READ
  • get_by_id lookups directly from the entity table: 1 x READ
  • Faster and cheaper

Datastore Anti-Pattern #2

Don't use indexed properties

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    secret = ndb.StringProperty()

k = User(secret='iamindexed').put()
# in another request
user = k.get()
user.secret = 'iamreallyindexed'
user.put()

Datastore Pattern #2

When you never need to query against them

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    secret = ndb.StringProperty(indexed=False)

k = User(secret='iamunindexed').put()
# in another request
user = k.get()
user.secret = 'iamreallyunindexed'
user.put()

Datastore Pattern #2

Why?

  • When updating an entity the datastore has to delete and insert a new index of each indexed property: 4 x WRITE + 1 WRITE
  • indexed=False only updates the entity body: 1 WRITE
  • Faster, less bandwidth, cheaper

Datastore Anti-Pattern #3

Don't query full entities

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
    email = ndb.StringProperty()
    phone = ndb.StringProperty()

User(id='john1', city='London', email='john@doe.com', phone='1-800-JOHN').put()
User(id='jane1', city='London', email='jane@doe.com', phone='1-800-JANE').put()

for user in User.query(User.city == 'London'):
    print user.email

Datastore Pattern #3

When you can do projection query

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
    email = ndb.StringProperty()
    phone = ndb.StringProperty()

User(id='john1', city='London', email='john@doe.com', phone='1-800-JOHN').put()
User(id='jane1', city='London', email='jane@doe.com', phone='1-800-JANE').put()

for user in User.query(User.city == 'London').iter(projection=['email']):
    print user.email

Datastore Pattern #3

Why?

  • regular queries fetch all the entity fields: 1 x READ
  • projection queries only fetch projected entity fields from the index: 1 x SMALL READ
  • Faster, less bandwidth, maybe cheaper (because projected properties need to be indexed)

Datastore Anti-Pattern #4

Don't use offset

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

ndb.put_multi([User(id='heidi%s' % i, city='Zurich') for i in xrange(20)])

users = User.query(User.city == 'Zurich').iter(limit=10)
# on another request
users = User.query(User.city == 'Zurich').iter(limit=10, offset=10)
for user in users:
    print user

Datastore Pattern #4

When you can use cursor

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

ndb.put_multi([User(id='heidi%s' % i, city='Zurich') for i in xrange(20)])

users, cursor, more = User.query(User.city == 'Zurich').fetch_page(10)
# on another request
users, cursor, more = User.query(User.city == 'Zurich').fetch_page(10, start_cursor=cursor)
for user in users:
    print user

Datastore Pattern #4

Why?

  • offset=10,limit=10 discards the first 10 results, but still fetch them: 20 x READ
  • offset=N can trigger DeadlineExceededError for large value of N
  • start_cursor=cursor resumes fetching at the cursor position: 10 x READ
  • Faster, less bandwidth, cheaper

App Engine Anti-Pattern #5

Don't get from the datastore

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    data = ndb.StringProperty()

User(id='current_user', data='foo').put()
# in another request
user = User.get_by_id('current_user')
print user

App Engine Pattern #5

When you can get from memcache

from google.appengine.ext import ndb
from google.appengine.api import memcache
import json
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    data = ndb.StringProperty()
user = User(id='current_user', data='foo'); user.put()
memcache.set('current_user', json.dumps(user.to_dict()))
# in another request
user = memcache.get('current_user')
if not user: user = User.get_by_id('current_user').to_dict()
print user

App Engine Pattern #5

Or from memory using ndb builtin caching

from google.appengine.ext import ndb

class User(ndb.Model):
    # NDB uses instance caching and memcache by default:
    # _use_cache = True
    # _use_memcache = True
    data = ndb.StringProperty()

User(id='current_user', data='foo').put()

user = User.get_by_id('current_user')
print user

App Engine Pattern #5

Why?

  • reading from memcache is faster
  • reading from memcache is cheaper
  • reading from instance memory is free

App Engine Anti-Pattern #6

Don't do individual RPC

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

for i in xrange(100):
    User(id='heidi%s' % i, city='Zurich').put()

App Engine Pattern #6

When you can batch them

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

ndb.put_multi(User(id='heidi%s' % i, city='Zurich') for i in xrange(100))

App Engine Pattern #6

Or let ndb batch them for you

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

futures = [User(id='heidi%s' % i, city='Zurich').put_async() for i in xrange(100)]
# do stuff
ndb.Future.wait_all(futures)

App Engine Pattern #6

Why?

  • put_multi and put_async batch writes and run them in parallel
  • less rpcs, less bandwitch, faster
  • applicable to datastore, memcache, taskqueue and Full Text Search

App Engine Anti-Pattern #7

Don't perform synchronous RPC in sequence

from google.appengine.api import urlfetch

print urlfetch.fetch("http://proppy-appstats.appspot.com")
print urlfetch.fetch("http://proppy-appstats.appspot.com/js/slides.js")
print urlfetch.fetch("http://proppy-appstats.appspot.com/theme/css/default.css")

App Engine Pattern #7

When you can do async RPC in parallel

from google.appengine.ext import ndb

ctx = ndb.get_context()
futures = []
futures.append(ctx.urlfetch("http://proppy-appstats.appspot.com"))
futures.append(ctx.urlfetch("http://proppy-appstats.appspot.com/js/slides.js"))
futures.append(ctx.urlfetch("http://proppy-appstats.appspot.com/theme/css/default.css"))
# do stuff
print ndb.Future.wait_all(futures)


App Engine Pattern #7

Why?

  • latency only depends on the slowest async rpc
  • instances can perform other computation while waiting for I/O

App Engine Anti-Pattern #8

Don't do individual datastore RPC

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

try:
  kazo_sensei = User(id="kaz", city="TOK").put()
  raise Exception("failure")
  User(id="proppy", city="SF", parent=kazo_sensei).put()
except Exception as e:
  print e
print User.get_by_id("kaz")

App Engine Anti-Pattern #8

When you need consistency

from google.appengine.ext import ndb
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
@ndb.transactional
def put_kaz_and_proppy():
  kazo_sensei = User(id="kazo", city="TOK").put()
  raise Exception("failure")
  User(id="proppy", city="SF", parent=kazo_sensei).put()
try: put_kaz_and_proppy()
except Exception as e: print e
print User.get_by_id("kazo")

App Engine Pattern #7

Why transaction?

  • Either all or none of the datastore operation are applied
  • Guarantee consistency & isolation
  • Even work with task queue Transactional=True

App Engine Anti-Pattern #9

Don't try to update and read data in the same transaction

from google.appengine.ext import ndb

class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

@ndb.transactional
def put_kaz():
  kaz = User(id="kaz", city="MTV").put()
  print kaz.get()

kaz = User(id="kaz", city="TOK").put()
put_kaz()

App Engine Anti-Pattern #9

Or even delete

from google.appengine.ext import ndb
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()

@ndb.transactional
def kill_kaz(kaz):
  kaz.delete()
  print kaz.get()

kaz = User(id="kaz", city="TOK").put()
kill_kaz(kaz)
print kaz.get()

App Engine Pattern #9

Why?

In a transaction:
  • All reads reflect a consistent snapshot of the Datastore ...
  • ... at the time the transaction started
  • ⚠ NDB auto-caching ⚠

App Engine Anti-Pattern #10

Don't do a global query

from google.appengine.ext import ndb
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
    love = ndb.StringProperty()
kaz = User(id="kaz", city="Tokyo", love="python"); kaz.put()
User(id="proppy", city="Tokyo", love="golang").put()
kaz.love = "golang"; kaz.put()
print User.query(User.city == "Tokyo").fetch() # kaz:python
print User.query(User.city == "Tokyo").fetch() # kaz:golang,proppy:golang
print User.query(User.city == "Tokyo").fetch() # kaz:python,proppy:golang

App Engine Pattern #10

When you need consistency

from google.appengine.ext import ndb
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
    love = ndb.StringProperty()
matsuo_sensei = User(id="matsuo", city="Montain View", love="compute").put()
kaz = User(id="kaz", city="Tokyo", love="python", parent=matsuo_sensei); kaz.put()
User(id="proppy", city="Tokyo", love="golang", parent=matsuo_sensei).put()
kaz.love = "golang"; kaz.put() # max: 1 write/s
print User.query(User.city == "Tokyo", ancestor=matsuo_sensei).fetch() # kaz:golang,proppy:golang
print User.query(User.city == "Tokyo", ancestor=matsuo_sensei).fetch() # kaz:golang,proppy:golang
print User.query(User.city == "Tokyo", ancestor=matsuo_sensei).fetch() # kaz:golang,proppy:golang

App Engine Tips #10

keys_only query + get_multi

from google.appengine.ext import ndb
class User(ndb.Model):
    _use_cache = False
    _use_memcache = False
    city = ndb.StringProperty()
    love = ndb.StringProperty()
kaz = User(id="kaz", city="Tokyo", love="python"); kaz.put()
User(id="proppy", city="Tokyo", love="golang").put()
kaz.love = "golang"; kaz.put()
keys = User.query(User.city == "Tokyo").fetch(keys_only=True) # kaz
print ndb.get_multi(keys) # kaz:golang
keys = User.query(User.city == "Tokyo").fetch(keys_only=True) # kaz,proppy
print ndb.get_multi(keys) # kaz:golang,proppy:golang

App Engine Pattern #10

Why?

  • A global query can get stale results.
  • Multiple run of the same query can get different results.
  • Use ancestor query to get strong consistency.
  • Use keys_get & get_multi to trade index consistency for write throughput.

More tricks?

yield Thank("You!")