summaryrefslogtreecommitdiff
path: root/db/migrate/20200623185440_add_product_analytics_table.rb
blob: 0a0d438bfb9556f13f7541a8b7f273ce8c946db7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# frozen_string_literal: true

class AddProductAnalyticsTable < ActiveRecord::Migration[6.0]
  include Gitlab::Database::MigrationHelpers
  include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers

  # Set this constant to true if this migration requires downtime.
  DOWNTIME = false

  # Table is based on https://github.com/snowplow/snowplow/blob/master/4-storage/postgres-storage/sql/atomic-def.sql 6e07b1c, with the following differences:
  # * app_id varchar -> project_id integer (+ FK)
  # * Add `id bigserial`
  # * Hash partitioning based on `project_id`
  # * Timestamp columns: Change type to timestamp with time zone
  #
  # This table is part of the "product analytics experiment" and as such marked "experimental". The goal here is to
  # explore the product analytics as a MVP feature more. We are explicitly not spending time on relational modeling
  # here.
  #
  # We expect significant changes to the database part of this once the feature has been validated.
  # Therefore, we expect to drop the table when feature validation is complete. All data will be lost.
  def up
    with_lock_retries do
      execute <<~SQL
        CREATE TABLE "product_analytics_events_experimental" (
          id bigserial NOT NULL,
          -- App
          "project_id" integer NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
          "platform" varchar(255),
          -- Date/time
          "etl_tstamp" timestamp with time zone,
          "collector_tstamp" timestamp with time zone NOT NULL,
          "dvce_created_tstamp" timestamp with time zone,
          -- Date/time
          "event" varchar(128),
          "event_id" char(36) NOT NULL,
          "txn_id" integer,
          -- Versioning
          "name_tracker" varchar(128),
          "v_tracker" varchar(100),
          "v_collector" varchar(100) NOT NULL,
          "v_etl" varchar(100) NOT NULL,
          -- User and visit
          "user_id" varchar(255),
          "user_ipaddress" varchar(45),
          "user_fingerprint" varchar(50),
          "domain_userid" varchar(36),
          "domain_sessionidx" smallint,
          "network_userid" varchar(38),
          -- Location
          "geo_country" char(2),
          "geo_region" char(3),
          "geo_city" varchar(75),
          "geo_zipcode" varchar(15),
          "geo_latitude" double precision,
          "geo_longitude" double precision,
          "geo_region_name" varchar(100),
          -- IP lookups
          "ip_isp" varchar(100),
          "ip_organization" varchar(100),
          "ip_domain" varchar(100),
          "ip_netspeed" varchar(100),
          -- Page
          "page_url" text,
          "page_title" varchar(2000),
          "page_referrer" text,
          -- Page URL components
          "page_urlscheme" varchar(16),
          "page_urlhost" varchar(255),
          "page_urlport" integer,
          "page_urlpath" varchar(3000),
          "page_urlquery" varchar(6000),
          "page_urlfragment" varchar(3000),
          -- Referrer URL components
          "refr_urlscheme" varchar(16),
          "refr_urlhost" varchar(255),
          "refr_urlport" integer,
          "refr_urlpath" varchar(6000),
          "refr_urlquery" varchar(6000),
          "refr_urlfragment" varchar(3000),
          -- Referrer details
          "refr_medium" varchar(25),
          "refr_source" varchar(50),
          "refr_term" varchar(255),
          -- Marketing
          "mkt_medium" varchar(255),
          "mkt_source" varchar(255),
          "mkt_term" varchar(255),
          "mkt_content" varchar(500),
          "mkt_campaign" varchar(255),
          -- Custom structured event
          "se_category" varchar(1000),
          "se_action" varchar(1000),
          "se_label" varchar(1000),
          "se_property" varchar(1000),
          "se_value" double precision,
          -- Ecommerce
          "tr_orderid" varchar(255),
          "tr_affiliation" varchar(255),
          "tr_total" decimal(18,2),
          "tr_tax" decimal(18,2),
          "tr_shipping" decimal(18,2),
          "tr_city" varchar(255),
          "tr_state" varchar(255),
          "tr_country" varchar(255),
          "ti_orderid" varchar(255),
          "ti_sku" varchar(255),
          "ti_name" varchar(255),
          "ti_category" varchar(255),
          "ti_price" decimal(18,2),
          "ti_quantity" integer,
          -- Page ping
          "pp_xoffset_min" integer,
          "pp_xoffset_max" integer,
          "pp_yoffset_min" integer,
          "pp_yoffset_max" integer,
          -- User Agent
          "useragent" varchar(1000),
          -- Browser
          "br_name" varchar(50),
          "br_family" varchar(50),
          "br_version" varchar(50),
          "br_type" varchar(50),
          "br_renderengine" varchar(50),
          "br_lang" varchar(255),
          "br_features_pdf" boolean,
          "br_features_flash" boolean,
          "br_features_java" boolean,
          "br_features_director" boolean,
          "br_features_quicktime" boolean,
          "br_features_realplayer" boolean,
          "br_features_windowsmedia" boolean,
          "br_features_gears" boolean,
          "br_features_silverlight" boolean,
          "br_cookies" boolean,
          "br_colordepth" varchar(12),
          "br_viewwidth" integer,
          "br_viewheight" integer,
          -- Operating System
          "os_name" varchar(50),
          "os_family" varchar(50),
          "os_manufacturer" varchar(50),
          "os_timezone" varchar(50),
          -- Device/Hardware
          "dvce_type" varchar(50),
          "dvce_ismobile" boolean,
          "dvce_screenwidth" integer,
          "dvce_screenheight" integer,
          -- Document
          "doc_charset" varchar(128),
          "doc_width" integer,
          "doc_height" integer,
          -- Currency
          "tr_currency" char(3),
          "tr_total_base" decimal(18, 2),
          "tr_tax_base" decimal(18, 2),
          "tr_shipping_base" decimal(18, 2),
          "ti_currency" char(3),
          "ti_price_base" decimal(18, 2),
          "base_currency" char(3),
          -- Geolocation
          "geo_timezone" varchar(64),
          -- Click ID
          "mkt_clickid" varchar(128),
          "mkt_network" varchar(64),
          -- ETL tags
          "etl_tags" varchar(500),
          -- Time event was sent
          "dvce_sent_tstamp" timestamp with time zone,
          -- Referer
          "refr_domain_userid" varchar(36),
          "refr_dvce_tstamp" timestamp with time zone,
          -- Session ID
          "domain_sessionid" char(36),
          -- Derived timestamp
          "derived_tstamp" timestamp with time zone,
          -- Event schema
          "event_vendor" varchar(1000),
          "event_name" varchar(1000),
          "event_format" varchar(128),
          "event_version" varchar(128),
          -- Event fingerprint
          "event_fingerprint" varchar(128),
          -- True timestamp
          "true_tstamp" timestamp with time zone,
          PRIMARY KEY (id, project_id)
        ) PARTITION BY HASH (project_id)
          WITHOUT OIDS;

        CREATE INDEX index_product_analytics_events_experimental_project_and_time ON product_analytics_events_experimental (project_id, collector_tstamp);
      SQL

      create_hash_partitions :product_analytics_events_experimental, 64
    end
  end

  def down
    with_lock_retries do
      execute 'DROP TABLE product_analytics_events_experimental'
    end
  end
end