merging 0.4 branch to trunk. see CHANGES for details. 0.3 moves to maintenance branch in branches/rel_0_3.

author: Mike Bayer <mike_mp@zzzcomputing.com> 2007-07-27 04:08:53 +0000
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2007-07-27 04:08:53 +0000
commit: ed4fc64bb0ac61c27bc4af32962fb129e74a36bf (patch)
tree: c1cf2fb7b1cafced82a8898e23d2a0bf5ced8526 /examples/sharding/attribute_shard.py
parent: 3a8e235af64e36b3b711df1f069d32359fe6c967 (diff)
download: sqlalchemy-ed4fc64bb0ac61c27bc4af32962fb129e74a36bf.tar.gz
1 files changed, 194 insertions, 0 deletions
diff --git a/examples/sharding/attribute_shard.py b/examples/sharding/attribute_shard.py
new file mode 100644
index 000000000..e95b978ae
--- /dev/null
+++ b/examples/sharding/attribute_shard.py
@@ -0,0 +1,194 @@
+"""a basic example of using the SQLAlchemy Sharding API.
+Sharding refers to horizontally scaling data across multiple
+databases.
+
+In this example, four sqlite databases will store information about
+weather data on a database-per-continent basis.
+
+To set up a sharding system, you need:
+    1. multiple databases, each assined a 'shard id'
+    2. a function which can return a single shard id, given an instance
+    to be saved; this is called "shard_chooser"
+    3. a function which can return a list of shard ids which apply to a particular
+    instance identifier; this is called "id_chooser".  If it returns all shard ids,
+    all shards will be searched.
+    4. a function which can return a list of shard ids to try, given a particular 
+    Query ("query_chooser").  If it returns all shard ids, all shards will be 
+    queried and the results joined together.
+"""
+
+# step 1. imports
+from sqlalchemy import *
+from sqlalchemy.orm import *
+from sqlalchemy.orm.shard import ShardedSession
+from sqlalchemy.sql import ColumnOperators
+import datetime, operator
+
+# step 2. databases
+echo = True
+db1 = create_engine('sqlite:///shard1.db', echo=echo)
+db2 = create_engine('sqlite:///shard2.db', echo=echo)
+db3 = create_engine('sqlite:///shard3.db', echo=echo)
+db4 = create_engine('sqlite:///shard4.db', echo=echo)
+
+
+# step 3. create session function.  this binds the shard ids
+# to databases within a ShardedSession and returns it.
+def create_session():
+    s = ShardedSession(shard_chooser, id_chooser, query_chooser)
+    s.bind_shard('north_america', db1)
+    s.bind_shard('asia', db2)
+    s.bind_shard('europe', db3)
+    s.bind_shard('south_america', db4)
+    return s
+
+# step 4.  table setup.
+meta = MetaData()
+
+# we need a way to create identifiers which are unique across all
+# databases.  one easy way would be to just use a composite primary key, where one
+# value is the shard id.  but here, we'll show something more "generic", an 
+# id generation function.  we'll use a simplistic "id table" stored in database
+# #1.  Any other method will do just as well; UUID, hilo, application-specific, etc.
+
+ids = Table('ids', meta,
+    Column('nextid', Integer, nullable=False))
+
+def id_generator(ctx):
+    # in reality, might want to use a separate transaction for this.
+    c = db1.connect()
+    nextid = c.execute(ids.select(for_update=True)).scalar()
+    c.execute(ids.update(values={ids.c.nextid : ids.c.nextid + 1}))
+    return nextid
+
+# table setup.  we'll store a lead table of continents/cities,
+# and a secondary table storing locations.
+# a particular row will be placed in the database whose shard id corresponds to the
+# 'continent'.  in this setup, secondary rows in 'weather_reports' will 
+# be placed in the same DB as that of the parent, but this can be changed
+# if you're willing to write more complex sharding functions.
+
+weather_locations = Table("weather_locations", meta,
+        Column('id', Integer, primary_key=True, default=id_generator),
+        Column('continent', String(30), nullable=False),
+        Column('city', String(50), nullable=False)
+    )
+
+weather_reports = Table("weather_reports", meta,
+    Column('id', Integer, primary_key=True),
+    Column('location_id', Integer, ForeignKey('weather_locations.id')),
+    Column('temperature', Float),
+    Column('report_time', DateTime, default=datetime.datetime.now),
+)
+
+# create tables
+for db in (db1, db2, db3, db4):
+    meta.drop_all(db)
+    meta.create_all(db)
+    
+# establish initial "id" in db1
+db1.execute(ids.insert(), nextid=1)
+
+
+# step 5. define sharding functions.  
+
+# we'll use a straight mapping of a particular set of "country" 
+# attributes to shard id.
+shard_lookup = {
+    'North America':'north_america',
+    'Asia':'asia',
+    'Europe':'europe',
+    'South America':'south_america'
+}
+
+# shard_chooser - looks at the given instance and returns a shard id
+# note that we need to define conditions for 
+# the WeatherLocation class, as well as our secondary Report class which will
+# point back to its WeatherLocation via its 'location' attribute.
+def shard_chooser(mapper, instance):
+    if isinstance(instance, WeatherLocation):
+        return shard_lookup[instance.continent]
+    else:
+        return shard_chooser(mapper, instance.location)
+
+# id_chooser.  given a primary key, returns a list of shards
+# to search.  here, we don't have any particular information from a
+# pk so we just return all shard ids. often, youd want to do some 
+# kind of round-robin strategy here so that requests are evenly 
+# distributed among DBs
+def id_chooser(ident):
+    return ['north_america', 'asia', 'europe', 'south_america']
+
+# query_chooser.  this also returns a list of shard ids, which can
+# just be all of them.  but here we'll search into the Query in order
+# to try to narrow down the list of shards to query.
+def query_chooser(query):
+    ids = []
+
+    # here we will traverse through the query's criterion, searching
+    # for SQL constructs.  we'll grab continent names as we find them
+    # and convert to shard ids
+    class FindContinent(sql.ClauseVisitor):
+        def visit_binary(self, binary):
+            if binary.left is weather_locations.c.continent:
+                if binary.operator == operator.eq:
+                    ids.append(shard_lookup[binary.right.value])
+                elif binary.operator == ColumnOperators.in_op:
+                    for bind in binary.right.clauses:
+                        ids.append(shard_lookup[bind.value])
+                    
+    FindContinent().traverse(query._criterion)
+    if len(ids) == 0:
+        return ['north_america', 'asia', 'europe', 'south_america']
+    else:
+        return ids
+
+# step 6.  mapped classes.    
+class WeatherLocation(object):
+    def __init__(self, continent, city):
+        self.continent = continent
+        self.city = city
+
+class Report(object):
+    def __init__(self, temperature):
+        self.temperature = temperature
+
+# step 7.  mappers
+mapper(WeatherLocation, weather_locations, properties={
+    'reports':relation(Report, backref='location')
+})
+
+mapper(Report, weather_reports)    
+
+
+# save and load objects!
+
+tokyo = WeatherLocation('Asia', 'Tokyo')
+newyork = WeatherLocation('North America', 'New York')
+toronto = WeatherLocation('North America', 'Toronto')
+london = WeatherLocation('Europe', 'London')
+dublin = WeatherLocation('Europe', 'Dublin')
+brasilia = WeatherLocation('South America', 'Brasila')
+quito = WeatherLocation('South America', 'Quito')
+
+tokyo.reports.append(Report(80.0))
+newyork.reports.append(Report(75))
+quito.reports.append(Report(85))
+
+sess = create_session()
+for c in [tokyo, newyork, toronto, london, dublin, brasilia, quito]:
+    sess.save(c)
+sess.flush()
+
+sess.clear()
+
+t = sess.query(WeatherLocation).get(tokyo.id)
+assert t.city == tokyo.city
+assert t.reports[0].temperature == 80.0
+
+north_american_cities = sess.query(WeatherLocation).filter(WeatherLocation.continent == 'North America')
+assert [c.city for c in north_american_cities] == ['New York', 'Toronto']
+
+asia_and_europe = sess.query(WeatherLocation).filter(WeatherLocation.continent.in_('Europe', 'Asia'))
+assert set([c.city for c in asia_and_europe]) == set(['Tokyo', 'London', 'Dublin'])
+
author	Mike Bayer <mike_mp@zzzcomputing.com>	2007-07-27 04:08:53 +0000
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2007-07-27 04:08:53 +0000
commit	ed4fc64bb0ac61c27bc4af32962fb129e74a36bf (patch)
tree	c1cf2fb7b1cafced82a8898e23d2a0bf5ced8526 /examples/sharding/attribute_shard.py
parent	3a8e235af64e36b3b711df1f069d32359fe6c967 (diff)
download	sqlalchemy-ed4fc64bb0ac61c27bc4af32962fb129e74a36bf.tar.gz