summaryrefslogtreecommitdiff
path: root/lib/ansible/modules/database/postgresql/postgresql_copy.py
blob: 9c5a3766fbc0a29a783aef237cc03c63f6a19986 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright: (c) 2019, Andrew Klychkov (@Andersson007) <aaklychkov@mail.ru>
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

ANSIBLE_METADATA = {
    'metadata_version': '1.1',
    'supported_by': 'community',
    'status': ['preview']
}

DOCUMENTATION = r'''
---
module: postgresql_copy
short_description: Copy data between a file/program and a PostgreSQL table
description:
- Copy data between a file/program and a PostgreSQL table.
version_added: '2.9'

options:
  copy_to:
    description:
    - Copy the contents of a table to a file.
    - Can also copy the results of a SELECT query.
    - Mutually exclusive with I(copy_from) and I(dst).
    type: path
    aliases: [ to ]
  copy_from:
    description:
    - Copy data from a file to a table (appending the data to whatever is in the table already).
    - Mutually exclusive with I(copy_to) and I(src).
    type: path
    aliases: [ from ]
  src:
    description:
    - Copy data from I(copy_from) to I(src=tablename).
    - Used with I(copy_to) only.
    type: str
    aliases: [ source ]
  dst:
    description:
    - Copy data to I(dst=tablename) from I(copy_from=/path/to/data.file).
    - Used with I(copy_from) only.
    type: str
    aliases: [ destination ]
  columns:
    description:
    - List of column names for the src/dst table to COPY FROM/TO.
    type: list
    elements: str
    aliases: [ column ]
  program:
    description:
    - Mark I(src)/I(dst) as a program. Data will be copied to/from a program.
    - See block Examples and PROGRAM arg description U(https://www.postgresql.org/docs/current/sql-copy.html).
    type: bool
    default: no
  options:
    description:
    - Options of COPY command.
    - See the full list of available options U(https://www.postgresql.org/docs/current/sql-copy.html).
    type: dict
  db:
    description:
    - Name of database to connect to.
    type: str
    aliases: [ login_db ]
  session_role:
    description:
    - Switch to session_role after connecting.
      The specified session_role must be a role that the current login_user is a member of.
    - Permissions checking for SQL commands is carried out as though
      the session_role were the one that had logged in originally.
    type: str

notes:
- Supports PostgreSQL version 9.4+.
- COPY command is only allowed to database superusers.
- if I(check_mode=yes), we just check the src/dst table availability
  and return the COPY query that actually has not been executed.
- If i(check_mode=yes) and the source has been passed as SQL, the module
  will execute it and rolled the transaction back but pay attention
  it can affect database performance (e.g., if SQL collects a lot of data).

seealso:
- name: COPY command reference
  description: Complete reference of the COPY command documentation.
  link: https://www.postgresql.org/docs/current/sql-copy.html

author:
- Andrew Klychkov (@Andersson007)

extends_documentation_fragment: postgres
'''

EXAMPLES = r'''
- name: Copy text TAB-separated data from file /tmp/data.txt to acme table
  postgresql_copy:
    copy_from: /tmp/data.txt
    dst: acme

- name: Copy CSV (comma-separated) data from file /tmp/data.csv to columns id, name of table acme
  postgresql_copy:
    copy_from: /tmp/data.csv
    dst: acme
    columns: id,name
    options:
      format: csv

- name: >
    Copy text vertical-bar-separated data from file /tmp/data.txt to bar table.
    The NULL values are specified as N
  postgresql_copy:
    copy_from: /tmp/data.csv
    dst: bar
    options:
      delimiter: '|'
      null: 'N'

- name: Copy data from acme table to file /tmp/data.txt in text format, TAB-separated
  postgresql_copy:
    src: acme
    copy_to: /tmp/data.txt

- name: Copy data from SELECT query to/tmp/data.csv in CSV format
  postgresql_copy:
    src: 'SELECT * FROM acme'
    copy_to: /tmp/data.csv
    options:
      format: csv

- name: Copy CSV data from my_table to gzip
  postgresql_copy:
    src: my_table
    copy_to: 'gzip > /tmp/data.csv.gz'
    program: yes
    options:
      format: csv

- name: >
    Copy data from columns id, name of table bar to /tmp/data.txt.
    Output format is text, vertical-bar-separated, NULL as N
  postgresql_copy:
    src: bar
    columns:
    - id
    - name
    copy_to: /tmp/data.csv
    options:
      delimiter: '|'
      null: 'N'
'''

RETURN = r'''
queries:
  description: List of executed queries.
  returned: always
  type: str
  sample: [ "COPY test_table FROM '/tmp/data_file.txt' (FORMAT csv, DELIMITER ',', NULL 'NULL')" ]
src:
  description: Data source.
  returned: always
  type: str
  sample: "mytable"
dst:
  description: Data destination.
  returned: always
  type: str
  sample: "/tmp/data.csv"
'''

try:
    from psycopg2.extras import DictCursor
except ImportError:
    # psycopg2 is checked by connect_to_db()
    # from ansible.module_utils.postgres
    pass

from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.database import pg_quote_identifier
from ansible.module_utils.postgres import (
    connect_to_db,
    exec_sql,
    get_conn_params,
    postgres_common_argument_spec,
)
from ansible.module_utils.six import iteritems


class PgCopyData(object):

    """Implements behavior of COPY FROM, COPY TO PostgreSQL command.

    Arguments:
        module (AnsibleModule) -- object of AnsibleModule class
        cursor (cursor) -- cursor object of psycopg2 library

    Attributes:
        module (AnsibleModule) -- object of AnsibleModule class
        cursor (cursor) -- cursor object of psycopg2 library
        changed (bool) --  something was changed after execution or not
        executed_queries (list) -- executed queries
        dst (str) -- data destination table (when copy_from)
        src (str) -- data source table (when copy_to)
        opt_need_quotes (tuple) -- values of these options must be passed
            to SQL in quotes
    """

    def __init__(self, module, cursor):
        self.module = module
        self.cursor = cursor
        self.executed_queries = []
        self.changed = False
        self.dst = ''
        self.src = ''
        self.opt_need_quotes = (
            'DELIMITER',
            'NULL',
            'QUOTE',
            'ESCAPE',
            'ENCODING',
        )

    def copy_from(self):
        """Implements COPY FROM command behavior."""
        self.src = self.module.params['copy_from']
        self.dst = self.module.params['dst']

        query_fragments = ['COPY %s' % pg_quote_identifier(self.dst, 'table')]

        if self.module.params.get('columns'):
            query_fragments.append('(%s)' % ','.join(self.module.params['columns']))

        query_fragments.append('FROM')

        if self.module.params.get('program'):
            query_fragments.append('PROGRAM')

        query_fragments.append("'%s'" % self.src)

        if self.module.params.get('options'):
            query_fragments.append(self.__transform_options())

        # Note: check mode is implemented here:
        if self.module.check_mode:
            self.changed = self.__check_table(self.dst)

            if self.changed:
                self.executed_queries.append(' '.join(query_fragments))
        else:
            if exec_sql(self, ' '.join(query_fragments), ddl=True):
                self.changed = True

    def copy_to(self):
        """Implements COPY TO command behavior."""
        self.src = self.module.params['src']
        self.dst = self.module.params['copy_to']

        if 'SELECT ' in self.src.upper():
            # If src is SQL SELECT statement:
            query_fragments = ['COPY (%s)' % self.src]
        else:
            # If src is a table:
            query_fragments = ['COPY %s' % pg_quote_identifier(self.src, 'table')]

        if self.module.params.get('columns'):
            query_fragments.append('(%s)' % ','.join(self.module.params['columns']))

        query_fragments.append('TO')

        if self.module.params.get('program'):
            query_fragments.append('PROGRAM')

        query_fragments.append("'%s'" % self.dst)

        if self.module.params.get('options'):
            query_fragments.append(self.__transform_options())

        # Note: check mode is implemented here:
        if self.module.check_mode:
            self.changed = self.__check_table(self.src)

            if self.changed:
                self.executed_queries.append(' '.join(query_fragments))
        else:
            if exec_sql(self, ' '.join(query_fragments), ddl=True):
                self.changed = True

    def __transform_options(self):
        """Transform options dict into a suitable string."""
        for (key, val) in iteritems(self.module.params['options']):
            if key.upper() in self.opt_need_quotes:
                self.module.params['options'][key] = "'%s'" % val

        opt = ['%s %s' % (key, val) for (key, val) in iteritems(self.module.params['options'])]
        return '(%s)' % ', '.join(opt)

    def __check_table(self, table):
        """Check table or SQL in transaction mode for check_mode.

        Return True if it is OK.

        Arguments:
            table (str) - Table name that needs to be checked.
                It can be SQL SELECT statement that was passed
                instead of the table name.
        """
        if 'SELECT ' in table.upper():
            # In this case table is actually SQL SELECT statement.
            # If SQL fails, it's handled by exec_sql():
            exec_sql(self, table, add_to_executed=False)
            # If exec_sql was passed, it means all is OK:
            return True

        exec_sql(self, 'SELECT 1 FROM %s' % pg_quote_identifier(table, 'table'),
                 add_to_executed=False)
        # If SQL was executed successfully:
        return True


# ===========================================
# Module execution.
#


def main():
    argument_spec = postgres_common_argument_spec()
    argument_spec.update(
        copy_to=dict(type='path', aliases=['to']),
        copy_from=dict(type='path', aliases=['from']),
        src=dict(type='str', aliases=['source']),
        dst=dict(type='str', aliases=['destination']),
        columns=dict(type='list', elements='str', aliases=['column']),
        options=dict(type='dict'),
        program=dict(type='bool', default=False),
        db=dict(type='str', aliases=['login_db']),
        session_role=dict(type='str'),
    )
    module = AnsibleModule(
        argument_spec=argument_spec,
        supports_check_mode=True,
        mutually_exclusive=[
            ['copy_from', 'copy_to'],
            ['copy_from', 'src'],
            ['copy_to', 'dst'],
        ]
    )

    # Note: we don't need to check mutually exclusive params here, because they are
    # checked automatically by AnsibleModule (mutually_exclusive=[] list above).
    if module.params.get('copy_from') and not module.params.get('dst'):
        module.fail_json(msg='dst param is necessary with copy_from')

    elif module.params.get('copy_to') and not module.params.get('src'):
        module.fail_json(msg='src param is necessary with copy_to')

    # Connect to DB and make cursor object:
    conn_params = get_conn_params(module, module.params)
    db_connection = connect_to_db(module, conn_params, autocommit=False)
    cursor = db_connection.cursor(cursor_factory=DictCursor)

    ##############
    # Create the object and do main job:
    data = PgCopyData(module, cursor)

    # Note: parameters like dst, src, etc. are got
    # from module object into data object of PgCopyData class.
    # Therefore not need to pass args to the methods below.
    # Note: check mode is implemented inside the methods below
    # by checking passed module.check_mode arg.
    if module.params.get('copy_to'):
        data.copy_to()

    elif module.params.get('copy_from'):
        data.copy_from()

    # Finish:
    if module.check_mode:
        db_connection.rollback()
    else:
        db_connection.commit()

    cursor.close()
    db_connection.close()

    # Return some values:
    module.exit_json(
        changed=data.changed,
        queries=data.executed_queries,
        src=data.src,
        dst=data.dst,
    )


if __name__ == '__main__':
    main()