diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2007-07-27 04:08:53 +0000 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2007-07-27 04:08:53 +0000 |
commit | ed4fc64bb0ac61c27bc4af32962fb129e74a36bf (patch) | |
tree | c1cf2fb7b1cafced82a8898e23d2a0bf5ced8526 /examples/sharding/attribute_shard.py | |
parent | 3a8e235af64e36b3b711df1f069d32359fe6c967 (diff) | |
download | sqlalchemy-ed4fc64bb0ac61c27bc4af32962fb129e74a36bf.tar.gz |
merging 0.4 branch to trunk. see CHANGES for details. 0.3 moves to maintenance branch in branches/rel_0_3.
Diffstat (limited to 'examples/sharding/attribute_shard.py')
-rw-r--r-- | examples/sharding/attribute_shard.py | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/examples/sharding/attribute_shard.py b/examples/sharding/attribute_shard.py new file mode 100644 index 000000000..e95b978ae --- /dev/null +++ b/examples/sharding/attribute_shard.py @@ -0,0 +1,194 @@ +"""a basic example of using the SQLAlchemy Sharding API. +Sharding refers to horizontally scaling data across multiple +databases. + +In this example, four sqlite databases will store information about +weather data on a database-per-continent basis. + +To set up a sharding system, you need: + 1. multiple databases, each assined a 'shard id' + 2. a function which can return a single shard id, given an instance + to be saved; this is called "shard_chooser" + 3. a function which can return a list of shard ids which apply to a particular + instance identifier; this is called "id_chooser". If it returns all shard ids, + all shards will be searched. + 4. a function which can return a list of shard ids to try, given a particular + Query ("query_chooser"). If it returns all shard ids, all shards will be + queried and the results joined together. +""" + +# step 1. imports +from sqlalchemy import * +from sqlalchemy.orm import * +from sqlalchemy.orm.shard import ShardedSession +from sqlalchemy.sql import ColumnOperators +import datetime, operator + +# step 2. databases +echo = True +db1 = create_engine('sqlite:///shard1.db', echo=echo) +db2 = create_engine('sqlite:///shard2.db', echo=echo) +db3 = create_engine('sqlite:///shard3.db', echo=echo) +db4 = create_engine('sqlite:///shard4.db', echo=echo) + + +# step 3. create session function. this binds the shard ids +# to databases within a ShardedSession and returns it. +def create_session(): + s = ShardedSession(shard_chooser, id_chooser, query_chooser) + s.bind_shard('north_america', db1) + s.bind_shard('asia', db2) + s.bind_shard('europe', db3) + s.bind_shard('south_america', db4) + return s + +# step 4. table setup. +meta = MetaData() + +# we need a way to create identifiers which are unique across all +# databases. one easy way would be to just use a composite primary key, where one +# value is the shard id. but here, we'll show something more "generic", an +# id generation function. we'll use a simplistic "id table" stored in database +# #1. Any other method will do just as well; UUID, hilo, application-specific, etc. + +ids = Table('ids', meta, + Column('nextid', Integer, nullable=False)) + +def id_generator(ctx): + # in reality, might want to use a separate transaction for this. + c = db1.connect() + nextid = c.execute(ids.select(for_update=True)).scalar() + c.execute(ids.update(values={ids.c.nextid : ids.c.nextid + 1})) + return nextid + +# table setup. we'll store a lead table of continents/cities, +# and a secondary table storing locations. +# a particular row will be placed in the database whose shard id corresponds to the +# 'continent'. in this setup, secondary rows in 'weather_reports' will +# be placed in the same DB as that of the parent, but this can be changed +# if you're willing to write more complex sharding functions. + +weather_locations = Table("weather_locations", meta, + Column('id', Integer, primary_key=True, default=id_generator), + Column('continent', String(30), nullable=False), + Column('city', String(50), nullable=False) + ) + +weather_reports = Table("weather_reports", meta, + Column('id', Integer, primary_key=True), + Column('location_id', Integer, ForeignKey('weather_locations.id')), + Column('temperature', Float), + Column('report_time', DateTime, default=datetime.datetime.now), +) + +# create tables +for db in (db1, db2, db3, db4): + meta.drop_all(db) + meta.create_all(db) + +# establish initial "id" in db1 +db1.execute(ids.insert(), nextid=1) + + +# step 5. define sharding functions. + +# we'll use a straight mapping of a particular set of "country" +# attributes to shard id. +shard_lookup = { + 'North America':'north_america', + 'Asia':'asia', + 'Europe':'europe', + 'South America':'south_america' +} + +# shard_chooser - looks at the given instance and returns a shard id +# note that we need to define conditions for +# the WeatherLocation class, as well as our secondary Report class which will +# point back to its WeatherLocation via its 'location' attribute. +def shard_chooser(mapper, instance): + if isinstance(instance, WeatherLocation): + return shard_lookup[instance.continent] + else: + return shard_chooser(mapper, instance.location) + +# id_chooser. given a primary key, returns a list of shards +# to search. here, we don't have any particular information from a +# pk so we just return all shard ids. often, youd want to do some +# kind of round-robin strategy here so that requests are evenly +# distributed among DBs +def id_chooser(ident): + return ['north_america', 'asia', 'europe', 'south_america'] + +# query_chooser. this also returns a list of shard ids, which can +# just be all of them. but here we'll search into the Query in order +# to try to narrow down the list of shards to query. +def query_chooser(query): + ids = [] + + # here we will traverse through the query's criterion, searching + # for SQL constructs. we'll grab continent names as we find them + # and convert to shard ids + class FindContinent(sql.ClauseVisitor): + def visit_binary(self, binary): + if binary.left is weather_locations.c.continent: + if binary.operator == operator.eq: + ids.append(shard_lookup[binary.right.value]) + elif binary.operator == ColumnOperators.in_op: + for bind in binary.right.clauses: + ids.append(shard_lookup[bind.value]) + + FindContinent().traverse(query._criterion) + if len(ids) == 0: + return ['north_america', 'asia', 'europe', 'south_america'] + else: + return ids + +# step 6. mapped classes. +class WeatherLocation(object): + def __init__(self, continent, city): + self.continent = continent + self.city = city + +class Report(object): + def __init__(self, temperature): + self.temperature = temperature + +# step 7. mappers +mapper(WeatherLocation, weather_locations, properties={ + 'reports':relation(Report, backref='location') +}) + +mapper(Report, weather_reports) + + +# save and load objects! + +tokyo = WeatherLocation('Asia', 'Tokyo') +newyork = WeatherLocation('North America', 'New York') +toronto = WeatherLocation('North America', 'Toronto') +london = WeatherLocation('Europe', 'London') +dublin = WeatherLocation('Europe', 'Dublin') +brasilia = WeatherLocation('South America', 'Brasila') +quito = WeatherLocation('South America', 'Quito') + +tokyo.reports.append(Report(80.0)) +newyork.reports.append(Report(75)) +quito.reports.append(Report(85)) + +sess = create_session() +for c in [tokyo, newyork, toronto, london, dublin, brasilia, quito]: + sess.save(c) +sess.flush() + +sess.clear() + +t = sess.query(WeatherLocation).get(tokyo.id) +assert t.city == tokyo.city +assert t.reports[0].temperature == 80.0 + +north_american_cities = sess.query(WeatherLocation).filter(WeatherLocation.continent == 'North America') +assert [c.city for c in north_american_cities] == ['New York', 'Toronto'] + +asia_and_europe = sess.query(WeatherLocation).filter(WeatherLocation.continent.in_('Europe', 'Asia')) +assert set([c.city for c in asia_and_europe]) == set(['Tokyo', 'London', 'Dublin']) + |