summaryrefslogtreecommitdiff
path: root/examples/sharding/attribute_shard.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2007-07-27 04:08:53 +0000
committerMike Bayer <mike_mp@zzzcomputing.com>2007-07-27 04:08:53 +0000
commited4fc64bb0ac61c27bc4af32962fb129e74a36bf (patch)
treec1cf2fb7b1cafced82a8898e23d2a0bf5ced8526 /examples/sharding/attribute_shard.py
parent3a8e235af64e36b3b711df1f069d32359fe6c967 (diff)
downloadsqlalchemy-ed4fc64bb0ac61c27bc4af32962fb129e74a36bf.tar.gz
merging 0.4 branch to trunk. see CHANGES for details. 0.3 moves to maintenance branch in branches/rel_0_3.
Diffstat (limited to 'examples/sharding/attribute_shard.py')
-rw-r--r--examples/sharding/attribute_shard.py194
1 files changed, 194 insertions, 0 deletions
diff --git a/examples/sharding/attribute_shard.py b/examples/sharding/attribute_shard.py
new file mode 100644
index 000000000..e95b978ae
--- /dev/null
+++ b/examples/sharding/attribute_shard.py
@@ -0,0 +1,194 @@
+"""a basic example of using the SQLAlchemy Sharding API.
+Sharding refers to horizontally scaling data across multiple
+databases.
+
+In this example, four sqlite databases will store information about
+weather data on a database-per-continent basis.
+
+To set up a sharding system, you need:
+ 1. multiple databases, each assined a 'shard id'
+ 2. a function which can return a single shard id, given an instance
+ to be saved; this is called "shard_chooser"
+ 3. a function which can return a list of shard ids which apply to a particular
+ instance identifier; this is called "id_chooser". If it returns all shard ids,
+ all shards will be searched.
+ 4. a function which can return a list of shard ids to try, given a particular
+ Query ("query_chooser"). If it returns all shard ids, all shards will be
+ queried and the results joined together.
+"""
+
+# step 1. imports
+from sqlalchemy import *
+from sqlalchemy.orm import *
+from sqlalchemy.orm.shard import ShardedSession
+from sqlalchemy.sql import ColumnOperators
+import datetime, operator
+
+# step 2. databases
+echo = True
+db1 = create_engine('sqlite:///shard1.db', echo=echo)
+db2 = create_engine('sqlite:///shard2.db', echo=echo)
+db3 = create_engine('sqlite:///shard3.db', echo=echo)
+db4 = create_engine('sqlite:///shard4.db', echo=echo)
+
+
+# step 3. create session function. this binds the shard ids
+# to databases within a ShardedSession and returns it.
+def create_session():
+ s = ShardedSession(shard_chooser, id_chooser, query_chooser)
+ s.bind_shard('north_america', db1)
+ s.bind_shard('asia', db2)
+ s.bind_shard('europe', db3)
+ s.bind_shard('south_america', db4)
+ return s
+
+# step 4. table setup.
+meta = MetaData()
+
+# we need a way to create identifiers which are unique across all
+# databases. one easy way would be to just use a composite primary key, where one
+# value is the shard id. but here, we'll show something more "generic", an
+# id generation function. we'll use a simplistic "id table" stored in database
+# #1. Any other method will do just as well; UUID, hilo, application-specific, etc.
+
+ids = Table('ids', meta,
+ Column('nextid', Integer, nullable=False))
+
+def id_generator(ctx):
+ # in reality, might want to use a separate transaction for this.
+ c = db1.connect()
+ nextid = c.execute(ids.select(for_update=True)).scalar()
+ c.execute(ids.update(values={ids.c.nextid : ids.c.nextid + 1}))
+ return nextid
+
+# table setup. we'll store a lead table of continents/cities,
+# and a secondary table storing locations.
+# a particular row will be placed in the database whose shard id corresponds to the
+# 'continent'. in this setup, secondary rows in 'weather_reports' will
+# be placed in the same DB as that of the parent, but this can be changed
+# if you're willing to write more complex sharding functions.
+
+weather_locations = Table("weather_locations", meta,
+ Column('id', Integer, primary_key=True, default=id_generator),
+ Column('continent', String(30), nullable=False),
+ Column('city', String(50), nullable=False)
+ )
+
+weather_reports = Table("weather_reports", meta,
+ Column('id', Integer, primary_key=True),
+ Column('location_id', Integer, ForeignKey('weather_locations.id')),
+ Column('temperature', Float),
+ Column('report_time', DateTime, default=datetime.datetime.now),
+)
+
+# create tables
+for db in (db1, db2, db3, db4):
+ meta.drop_all(db)
+ meta.create_all(db)
+
+# establish initial "id" in db1
+db1.execute(ids.insert(), nextid=1)
+
+
+# step 5. define sharding functions.
+
+# we'll use a straight mapping of a particular set of "country"
+# attributes to shard id.
+shard_lookup = {
+ 'North America':'north_america',
+ 'Asia':'asia',
+ 'Europe':'europe',
+ 'South America':'south_america'
+}
+
+# shard_chooser - looks at the given instance and returns a shard id
+# note that we need to define conditions for
+# the WeatherLocation class, as well as our secondary Report class which will
+# point back to its WeatherLocation via its 'location' attribute.
+def shard_chooser(mapper, instance):
+ if isinstance(instance, WeatherLocation):
+ return shard_lookup[instance.continent]
+ else:
+ return shard_chooser(mapper, instance.location)
+
+# id_chooser. given a primary key, returns a list of shards
+# to search. here, we don't have any particular information from a
+# pk so we just return all shard ids. often, youd want to do some
+# kind of round-robin strategy here so that requests are evenly
+# distributed among DBs
+def id_chooser(ident):
+ return ['north_america', 'asia', 'europe', 'south_america']
+
+# query_chooser. this also returns a list of shard ids, which can
+# just be all of them. but here we'll search into the Query in order
+# to try to narrow down the list of shards to query.
+def query_chooser(query):
+ ids = []
+
+ # here we will traverse through the query's criterion, searching
+ # for SQL constructs. we'll grab continent names as we find them
+ # and convert to shard ids
+ class FindContinent(sql.ClauseVisitor):
+ def visit_binary(self, binary):
+ if binary.left is weather_locations.c.continent:
+ if binary.operator == operator.eq:
+ ids.append(shard_lookup[binary.right.value])
+ elif binary.operator == ColumnOperators.in_op:
+ for bind in binary.right.clauses:
+ ids.append(shard_lookup[bind.value])
+
+ FindContinent().traverse(query._criterion)
+ if len(ids) == 0:
+ return ['north_america', 'asia', 'europe', 'south_america']
+ else:
+ return ids
+
+# step 6. mapped classes.
+class WeatherLocation(object):
+ def __init__(self, continent, city):
+ self.continent = continent
+ self.city = city
+
+class Report(object):
+ def __init__(self, temperature):
+ self.temperature = temperature
+
+# step 7. mappers
+mapper(WeatherLocation, weather_locations, properties={
+ 'reports':relation(Report, backref='location')
+})
+
+mapper(Report, weather_reports)
+
+
+# save and load objects!
+
+tokyo = WeatherLocation('Asia', 'Tokyo')
+newyork = WeatherLocation('North America', 'New York')
+toronto = WeatherLocation('North America', 'Toronto')
+london = WeatherLocation('Europe', 'London')
+dublin = WeatherLocation('Europe', 'Dublin')
+brasilia = WeatherLocation('South America', 'Brasila')
+quito = WeatherLocation('South America', 'Quito')
+
+tokyo.reports.append(Report(80.0))
+newyork.reports.append(Report(75))
+quito.reports.append(Report(85))
+
+sess = create_session()
+for c in [tokyo, newyork, toronto, london, dublin, brasilia, quito]:
+ sess.save(c)
+sess.flush()
+
+sess.clear()
+
+t = sess.query(WeatherLocation).get(tokyo.id)
+assert t.city == tokyo.city
+assert t.reports[0].temperature == 80.0
+
+north_american_cities = sess.query(WeatherLocation).filter(WeatherLocation.continent == 'North America')
+assert [c.city for c in north_american_cities] == ['New York', 'Toronto']
+
+asia_and_europe = sess.query(WeatherLocation).filter(WeatherLocation.continent.in_('Europe', 'Asia'))
+assert set([c.city for c in asia_and_europe]) == set(['Tokyo', 'London', 'Dublin'])
+