366 lines
13 KiB
Python
366 lines
13 KiB
Python
#!/usr/bin/python
|
|
|
|
# Copyright: (c) 2018, Rob White (@wimnat)
|
|
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
|
|
|
ANSIBLE_METADATA = {'metadata_version': '1.1',
|
|
'status': ['preview'],
|
|
'supported_by': 'community'}
|
|
|
|
DOCUMENTATION = '''
|
|
---
|
|
module: aws_glue_job
|
|
short_description: Manage an AWS Glue job
|
|
description:
|
|
- Manage an AWS Glue job. See U(https://aws.amazon.com/glue/) for details.
|
|
version_added: "2.6"
|
|
requirements: [ boto3 ]
|
|
author: "Rob White (@wimnat)"
|
|
options:
|
|
allocated_capacity:
|
|
description:
|
|
- The number of AWS Glue data processing units (DPUs) to allocate to this Job. From 2 to 100 DPUs
|
|
can be allocated; the default is 10. A DPU is a relative measure of processing power that consists
|
|
of 4 vCPUs of compute capacity and 16 GB of memory.
|
|
required: false
|
|
command_name:
|
|
description:
|
|
- The name of the job command. This must be 'glueetl'.
|
|
required: false
|
|
default: glueetl
|
|
command_script_location:
|
|
description:
|
|
- The S3 path to a script that executes a job.
|
|
required: true
|
|
connections:
|
|
description:
|
|
- A list of Glue connections used for this job.
|
|
required: false
|
|
default_arguments:
|
|
description:
|
|
- A dict of default arguments for this job. You can specify arguments here that your own job-execution
|
|
script consumes, as well as arguments that AWS Glue itself consumes.
|
|
required: false
|
|
description:
|
|
description:
|
|
- Description of the job being defined.
|
|
required: false
|
|
max_concurrent_runs:
|
|
description:
|
|
- The maximum number of concurrent runs allowed for the job. The default is 1. An error is returned when
|
|
this threshold is reached. The maximum value you can specify is controlled by a service limit.
|
|
required: false
|
|
max_retries:
|
|
description:
|
|
- The maximum number of times to retry this job if it fails.
|
|
required: false
|
|
name:
|
|
description:
|
|
- The name you assign to this job definition. It must be unique in your account.
|
|
required: true
|
|
role:
|
|
description:
|
|
- The name or ARN of the IAM role associated with this job.
|
|
required: true
|
|
state:
|
|
description:
|
|
- Create or delete the AWS Glue job.
|
|
required: true
|
|
choices: [ 'present', 'absent' ]
|
|
timeout:
|
|
description:
|
|
- The job timeout in minutes.
|
|
required: false
|
|
extends_documentation_fragment:
|
|
- aws
|
|
- ec2
|
|
'''
|
|
|
|
EXAMPLES = '''
|
|
# Note: These examples do not set authentication details, see the AWS Guide for details.
|
|
|
|
# Create an AWS Glue job
|
|
- aws_glue_job:
|
|
command_script_location: s3bucket/script.py
|
|
name: my-glue-job
|
|
role: my-iam-role
|
|
state: present
|
|
|
|
# Delete an AWS Glue job
|
|
- aws_glue_job:
|
|
name: my-glue-job
|
|
state: absent
|
|
|
|
'''
|
|
|
|
RETURN = '''
|
|
allocated_capacity:
|
|
description: The number of AWS Glue data processing units (DPUs) allocated to runs of this job. From 2 to
|
|
100 DPUs can be allocated; the default is 10. A DPU is a relative measure of processing power
|
|
that consists of 4 vCPUs of compute capacity and 16 GB of memory.
|
|
returned: when state is present
|
|
type: int
|
|
sample: 10
|
|
command:
|
|
description: The JobCommand that executes this job.
|
|
returned: when state is present
|
|
type: complex
|
|
contains:
|
|
name:
|
|
description: The name of the job command.
|
|
returned: when state is present
|
|
type: string
|
|
sample: glueetl
|
|
script_location:
|
|
description: Specifies the S3 path to a script that executes a job.
|
|
returned: when state is present
|
|
type: string
|
|
sample: mybucket/myscript.py
|
|
connections:
|
|
description: The connections used for this job.
|
|
returned: when state is present
|
|
type: dict
|
|
sample: "{ Connections: [ 'list', 'of', 'connections' ] }"
|
|
created_on:
|
|
description: The time and date that this job definition was created.
|
|
returned: when state is present
|
|
type: string
|
|
sample: "2018-04-21T05:19:58.326000+00:00"
|
|
default_arguments:
|
|
description: The default arguments for this job, specified as name-value pairs.
|
|
returned: when state is present
|
|
type: dict
|
|
sample: "{ 'mykey1': 'myvalue1' }"
|
|
description:
|
|
description: Description of the job being defined.
|
|
returned: when state is present
|
|
type: string
|
|
sample: My first Glue job
|
|
job_name:
|
|
description: The name of the AWS Glue job.
|
|
returned: always
|
|
type: string
|
|
sample: my-glue-job
|
|
execution_property:
|
|
description: An ExecutionProperty specifying the maximum number of concurrent runs allowed for this job.
|
|
returned: always
|
|
type: complex
|
|
contains:
|
|
max_concurrent_runs:
|
|
description: The maximum number of concurrent runs allowed for the job. The default is 1. An error is
|
|
returned when this threshold is reached. The maximum value you can specify is controlled by
|
|
a service limit.
|
|
returned: when state is present
|
|
type: int
|
|
sample: 1
|
|
last_modified_on:
|
|
description: The last point in time when this job definition was modified.
|
|
returned: when state is present
|
|
type: string
|
|
sample: "2018-04-21T05:19:58.326000+00:00"
|
|
max_retries:
|
|
description: The maximum number of times to retry this job after a JobRun fails.
|
|
returned: when state is present
|
|
type: int
|
|
sample: 5
|
|
name:
|
|
description: The name assigned to this job definition.
|
|
returned: when state is present
|
|
type: string
|
|
sample: my-glue-job
|
|
role:
|
|
description: The name or ARN of the IAM role associated with this job.
|
|
returned: when state is present
|
|
type: string
|
|
sample: my-iam-role
|
|
timeout:
|
|
description: The job timeout in minutes.
|
|
returned: when state is present
|
|
type: int
|
|
sample: 300
|
|
'''
|
|
|
|
from ansible.module_utils.aws.core import AnsibleAWSModule
|
|
from ansible.module_utils.ec2 import camel_dict_to_snake_dict
|
|
|
|
# Non-ansible imports
|
|
import copy
|
|
try:
|
|
from botocore.exceptions import BotoCoreError, ClientError
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
def _get_glue_job(connection, module, glue_job_name):
|
|
"""
|
|
Get an AWS Glue job based on name. If not found, return None.
|
|
|
|
:param connection: AWS boto3 glue connection
|
|
:param module: Ansible module
|
|
:param glue_job_name: Name of Glue job to get
|
|
:return: boto3 Glue job dict or None if not found
|
|
"""
|
|
|
|
try:
|
|
return connection.get_job(JobName=glue_job_name)['Job']
|
|
except (BotoCoreError, ClientError) as e:
|
|
if e.response['Error']['Code'] == 'EntityNotFoundException':
|
|
return None
|
|
else:
|
|
module.fail_json_aws(e)
|
|
|
|
|
|
def _compare_glue_job_params(user_params, current_params):
|
|
"""
|
|
Compare Glue job params. If there is a difference, return True immediately else return False
|
|
|
|
:param user_params: the Glue job parameters passed by the user
|
|
:param current_params: the Glue job parameters currently configured
|
|
:return: True if any parameter is mismatched else False
|
|
"""
|
|
|
|
# Weirdly, boto3 doesn't return some keys if the value is empty e.g. Description
|
|
# To counter this, add the key if it's missing with a blank value
|
|
|
|
if 'Description' not in current_params:
|
|
current_params['Description'] = ""
|
|
if 'DefaultArguments' not in current_params:
|
|
current_params['DefaultArguments'] = dict()
|
|
|
|
if 'AllocatedCapacity' in user_params and user_params['AllocatedCapacity'] != current_params['AllocatedCapacity']:
|
|
return True
|
|
if 'Command' in user_params and user_params['Command']['ScriptLocation'] != current_params['Command']['ScriptLocation']:
|
|
return True
|
|
if 'Connections' in user_params and set(user_params['Connections']) != set(current_params['Connections']):
|
|
return True
|
|
if 'DefaultArguments' in user_params and set(user_params['DefaultArguments']) != set(current_params['DefaultArguments']):
|
|
return True
|
|
if 'Description' in user_params and user_params['Description'] != current_params['Description']:
|
|
return True
|
|
if 'ExecutionProperty' in user_params and user_params['ExecutionProperty']['MaxConcurrentRuns'] != current_params['ExecutionProperty']['MaxConcurrentRuns']:
|
|
return True
|
|
if 'MaxRetries' in user_params and user_params['MaxRetries'] != current_params['MaxRetries']:
|
|
return True
|
|
if 'Timeout' in user_params and user_params['Timeout'] != current_params['Timeout']:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def create_or_update_glue_job(connection, module, glue_job):
|
|
"""
|
|
Create or update an AWS Glue job
|
|
|
|
:param connection: AWS boto3 glue connection
|
|
:param module: Ansible module
|
|
:param glue_job: a dict of AWS Glue job parameters or None
|
|
:return:
|
|
"""
|
|
|
|
changed = False
|
|
params = dict()
|
|
params['Name'] = module.params.get("name")
|
|
params['Role'] = module.params.get("role")
|
|
if module.params.get("allocated_capacity") is not None:
|
|
params['AllocatedCapacity'] = module.params.get("allocated_capacity")
|
|
if module.params.get("command_script_location") is not None:
|
|
params['Command'] = {'Name': module.params.get("command_name"), 'ScriptLocation': module.params.get("command_script_location")}
|
|
if module.params.get("connections") is not None:
|
|
params['Connections'] = {'Connections': module.params.get("connections")}
|
|
if module.params.get("default_arguments") is not None:
|
|
params['DefaultArguments'] = module.params.get("default_arguments")
|
|
if module.params.get("description") is not None:
|
|
params['Description'] = module.params.get("description")
|
|
if module.params.get("max_concurrent_runs") is not None:
|
|
params['ExecutionProperty'] = {'MaxConcurrentRuns': module.params.get("max_concurrent_runs")}
|
|
if module.params.get("max_retries") is not None:
|
|
params['MaxRetries'] = module.params.get("max_retries")
|
|
if module.params.get("timeout") is not None:
|
|
params['Timeout'] = module.params.get("timeout")
|
|
|
|
# If glue_job is not None then check if it needs to be modified, else create it
|
|
if glue_job:
|
|
if _compare_glue_job_params(params, glue_job):
|
|
try:
|
|
# Update job needs slightly modified params
|
|
update_params = {'JobName': params['Name'], 'JobUpdate': copy.deepcopy(params)}
|
|
del update_params['JobUpdate']['Name']
|
|
connection.update_job(**update_params)
|
|
changed = True
|
|
except (BotoCoreError, ClientError) as e:
|
|
module.fail_json_aws(e)
|
|
else:
|
|
try:
|
|
connection.create_job(**params)
|
|
changed = True
|
|
except (BotoCoreError, ClientError) as e:
|
|
module.fail_json_aws(e)
|
|
|
|
# If changed, get the Glue job again
|
|
if changed:
|
|
glue_job = _get_glue_job(connection, module, params['Name'])
|
|
|
|
module.exit_json(changed=changed, **camel_dict_to_snake_dict(glue_job))
|
|
|
|
|
|
def delete_glue_job(connection, module, glue_job):
|
|
"""
|
|
Delete an AWS Glue job
|
|
|
|
:param connection: AWS boto3 glue connection
|
|
:param module: Ansible module
|
|
:param glue_job: a dict of AWS Glue job parameters or None
|
|
:return:
|
|
"""
|
|
|
|
changed = False
|
|
|
|
if glue_job:
|
|
try:
|
|
connection.delete_job(JobName=glue_job['Name'])
|
|
changed = True
|
|
except (BotoCoreError, ClientError) as e:
|
|
module.fail_json_aws(e)
|
|
|
|
module.exit_json(changed=changed)
|
|
|
|
|
|
def main():
|
|
|
|
argument_spec = (
|
|
dict(
|
|
allocated_capacity=dict(type='int'),
|
|
command_name=dict(type='str', default='glueetl'),
|
|
command_script_location=dict(type='str'),
|
|
connections=dict(type='list'),
|
|
default_arguments=dict(type='dict'),
|
|
description=dict(type='str'),
|
|
max_concurrent_runs=dict(type='int'),
|
|
max_retries=dict(type='int'),
|
|
name=dict(required=True, type='str'),
|
|
role=dict(type='str'),
|
|
state=dict(required=True, choices=['present', 'absent'], type='str'),
|
|
timeout=dict(type='int')
|
|
)
|
|
)
|
|
|
|
module = AnsibleAWSModule(argument_spec=argument_spec,
|
|
required_if=[
|
|
('state', 'present', ['role', 'command_script_location'])
|
|
]
|
|
)
|
|
|
|
connection = module.client('glue')
|
|
|
|
state = module.params.get("state")
|
|
|
|
glue_job = _get_glue_job(connection, module, module.params.get("name"))
|
|
|
|
if state == 'present':
|
|
create_or_update_glue_job(connection, module, glue_job)
|
|
else:
|
|
delete_glue_job(connection, module, glue_job)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|