#!/bin/bash : ' @vendor Chorke Academia, Inc. @web https://cdn.chorke.org/docs/academia @version 1.0.00 @since 1.0.00 ' :<\n\n\ \n\n\ EOF ) YARN_CONF_INDX=0 declare -a YARN_CONF_FILL YARN_CONF_FILL[${YARN_CONF_INDX}]=$(cat <\n\ \n\ yarn.resourcemanager.scheduler.class\n\ org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler\n\ \n\ \n\ yarn.scheduler.capacity.root.queues\n\ $(echo "${HADOOP_SCHEDULERS_SET[@]}"|tr ' ' ',')\n\ EOF ) for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do declare -a MINMAX_CAPACITY="${MINMAX_CAPACITIES_MAP[${HADOOP_SCHEDULER}]}" MIN_CAPACITY=${MINMAX_CAPACITY[0]} YARN_CONF_INDX=$((${YARN_CONF_INDX}+1)) YARN_CONF_FILL[${YARN_CONF_INDX}]=$(cat <\n\ yarn.scheduler.capacity.${HADOOP_SCHEDULER}.capacity\n\ ${MIN_CAPACITY}\n\ EOF ) done for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do declare -a MINMAX_CAPACITY="${MINMAX_CAPACITIES_MAP[${HADOOP_SCHEDULER}]}" MAX_CAPACITY=${MINMAX_CAPACITY[1]} YARN_CONF_INDX=$((${YARN_CONF_INDX}+1)) YARN_CONF_FILL[${YARN_CONF_INDX}]=$(cat <\n\ yarn.scheduler.capacity.${HADOOP_SCHEDULER}.maximum-capacity\n\ ${MAX_CAPACITY}\n\ EOF ) done YARN_CONF_INDX=$((${YARN_CONF_INDX}+1)) YARN_CONF_FILL[${YARN_CONF_INDX}]=$(cat < EOF ) sudo sed -z "s|${YARN_CONF_FIND}|$(echo "${YARN_CONF_FILL[@]}")|" -i "${YARN_SITE_FILE}" } function conf_capacity_scheduler(){ # exported varriable to indexed & associative array declare -A HADOOP_SCHEDULERS_MAP="${HADOOP_SCHEDULERS}" declare -A MINMAX_CAPACITIES_MAP="${MINMAX_CAPACITIES}" declare -a HADOOP_SCHEDULERS_SET="($(echo ${!HADOOP_SCHEDULERS_MAP[@]}|tr ' ' $'\n'|sort))" # add capacity config to capacity-scheduler.xml CAPACITY_SCHEDULER_FILE="${HADOOP_HOME_DIR}/etc/hadoop/capacity-scheduler.xml" # replace default root queues ROOT_QUEUES_FIND=$(cat <yarn.scheduler.capacity.root.queues\n\ [ ]*default EOF ) ROOT_QUEUES_FILL=$(cat <yarn.scheduler.capacity.root.queues\n\ $(echo "${HADOOP_SCHEDULERS_SET[@]}"|tr ' ' ',') EOF ) sed -z "s|${ROOT_QUEUES_FIND}|${ROOT_QUEUES_FILL}|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root capacity DEFAULT_CAPACITY_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.capacity\n\ 100\n\ Default queue target capacity.\n\ EOF ) DEFAULT_CAPACITY_INDX=0 declare -a DEFAULT_CAPACITY_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do declare -a MINMAX_CAPACITY="${MINMAX_CAPACITIES_MAP[${HADOOP_SCHEDULER}]}" HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} MIN_CAPACITY=${MINMAX_CAPACITY[0]} DEFAULT_CAPACITY_FILL[${DEFAULT_CAPACITY_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.capacity\n\ ${MIN_CAPACITY}\n\ ${HADOOP_SCHEDULER_NAME} queue target capacity.\n\ EOF ) DEFAULT_CAPACITY_INDX=$((${DEFAULT_CAPACITY_INDX}+1)) done sed -z "s|${DEFAULT_CAPACITY_FIND}|$(echo "${DEFAULT_CAPACITY_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root user limit factor DEFAULT_USER_LIMIT_FACTOR_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.user-limit-factor\n\ 1\n\ \n\ Default queue user limit a percentage from 0.0 to 1.0.\n\ \n\ EOF ) DEFAULT_USER_LIMIT_FACTOR_INDX=0 declare -a DEFAULT_USER_LIMIT_FACTOR_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} DEFAULT_USER_LIMIT_FACTOR_FILL[${DEFAULT_USER_LIMIT_FACTOR_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.user-limit-factor\n\ 1\n\ \n\ ${HADOOP_SCHEDULER_NAME} queue user limit a percentage from 0.0 to 1.0.\n\ \n\ EOF ) DEFAULT_USER_LIMIT_FACTOR_INDX=$((${DEFAULT_USER_LIMIT_FACTOR_INDX}+1)) done sed -z "s|${DEFAULT_USER_LIMIT_FACTOR_FIND}|$(echo "${DEFAULT_USER_LIMIT_FACTOR_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root maximum capacity DEFAULT_MAX_CAPACITY_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.maximum-capacity\n\ 100\n\ \n\ The maximum capacity of the default queue. \n\ \n\ EOF ) DEFAULT_MAX_CAPACITY_INDX=0 declare -a DEFAULT_MAX_CAPACITY_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do declare -a MINMAX_CAPACITY="${MINMAX_CAPACITIES_MAP[${HADOOP_SCHEDULER}]}" HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} MAX_CAPACITY=${MINMAX_CAPACITY[1]} DEFAULT_MAX_CAPACITY_FILL[${DEFAULT_MAX_CAPACITY_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.maximum-capacity\n\ ${MAX_CAPACITY}\n\ The maximum capacity of the ${HADOOP_SCHEDULER_NAME} queue.\n\ EOF ) DEFAULT_MAX_CAPACITY_INDX=$((${DEFAULT_MAX_CAPACITY_INDX}+1)) done sed -z "s|${DEFAULT_MAX_CAPACITY_FIND}|$(echo "${DEFAULT_MAX_CAPACITY_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root state DEFAULT_STATE_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.state\n\ RUNNING\n\ \n\ The state of the default queue. State can be one of RUNNING or STOPPED.\n\ \n\ EOF ) DEFAULT_STATE_INDX=0 declare -a DEFAULT_STATE_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} DEFAULT_STATE_FILL[${DEFAULT_STATE_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.state\n\ RUNNING\n\ \n\ The state of the ${HADOOP_SCHEDULER_NAME} queue. State can be one of RUNNING or STOPPED.\n\ \n\ EOF ) DEFAULT_STATE_INDX=$((${DEFAULT_STATE_INDX}+1)) done sed -z "s|${DEFAULT_STATE_FIND}|$(echo "${DEFAULT_STATE_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root acl submit applications DEFAULT_ACL_SUBMIT_APPLICATIONS_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.acl_submit_applications\n\ \*\n\ \n\ The ACL of who can submit jobs to the default queue.\n\ \n\ EOF ) DEFAULT_ACL_SUBMIT_APPLICATIONS_INDX=0 declare -a DEFAULT_ACL_SUBMIT_APPLICATIONS_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} DEFAULT_ACL_SUBMIT_APPLICATIONS_FILL[${DEFAULT_ACL_SUBMIT_APPLICATIONS_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.acl_submit_applications\n\ *\n\ \n\ The ACL of who can submit jobs to the ${HADOOP_SCHEDULER_NAME} queue.\n\ \n\ EOF ) DEFAULT_ACL_SUBMIT_APPLICATIONS_INDX=$((${DEFAULT_ACL_SUBMIT_APPLICATIONS_INDX}+1)) done sed -z "s|${DEFAULT_ACL_SUBMIT_APPLICATIONS_FIND}|$(echo "${DEFAULT_ACL_SUBMIT_APPLICATIONS_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root acl administer queue DEFAULT_ACL_ADMINISTER_QUEUE_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.acl_administer_queue\n\ \*\n\ \n\ The ACL of who can administer jobs on the default queue.\n\ \n\ EOF ) DEFAULT_ACL_ADMINISTER_QUEUE_INDX=0 declare -a DEFAULT_ACL_ADMINISTER_QUEUE_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do HADOOP_SCHEDULER_NAME=${HADOOP_SCHEDULERS_MAP[${HADOOP_SCHEDULER}]} DEFAULT_ACL_ADMINISTER_QUEUE_FILL[${DEFAULT_ACL_ADMINISTER_QUEUE_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.acl_administer_queue\n\ *\n\ \n\ The ACL of who can administer jobs on the ${HADOOP_SCHEDULER_NAME} queue.\n\ \n\ EOF ) DEFAULT_ACL_ADMINISTER_QUEUE_INDX=$((${DEFAULT_ACL_ADMINISTER_QUEUE_INDX}+1)) done sed -z "s|${DEFAULT_ACL_ADMINISTER_QUEUE_FIND}|$(echo "${DEFAULT_ACL_ADMINISTER_QUEUE_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root acl application max priority DEFAULT_ACL_APPLICATION_MAX_PRIORITY_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.acl_application_max_priority\n\ \*\n\ \n\ The ACL of who can submit applications with configured priority.\n\ For e.g, \[user={name} group={name} max_priority={priority} default_priority={priority}\]\n\ \n\ EOF ) DEFAULT_ACL_APPLICATION_MAX_PRIORITY_INDX=0 declare -a DEFAULT_ACL_APPLICATION_MAX_PRIORITY_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do DEFAULT_ACL_APPLICATION_MAX_PRIORITY_FILL[${DEFAULT_ACL_APPLICATION_MAX_PRIORITY_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.acl_application_max_priority\n\ *\n\ \n\ The ACL of who can submit applications with configured priority.\n\ For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]\n\ \n\ EOF ) DEFAULT_ACL_APPLICATION_MAX_PRIORITY_INDX=$((${DEFAULT_ACL_APPLICATION_MAX_PRIORITY_INDX}+1)) done sed -z "s|${DEFAULT_ACL_APPLICATION_MAX_PRIORITY_FIND}|$(echo "${DEFAULT_ACL_APPLICATION_MAX_PRIORITY_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root maximum application lifetime DEFAULT_MAXIMUM_APPLICATION_LIFETIME_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.maximum-application-lifetime\n\ \n\ -1\n\ \n\ Maximum lifetime of an application which is submitted to a queue\n\ in seconds. Any value less than or equal to zero will be considered as\n\ disabled.\n\ This will be a hard time limit for all applications in this\n\ queue. If positive value is configured then any application submitted\n\ to this queue will be killed after exceeds the configured lifetime.\n\ User can also specify lifetime per application basis in\n\ application submission context. But user lifetime will be\n\ overridden if it exceeds queue maximum lifetime. It is point-in-time\n\ configuration.\n\ Note : Configuring too low value will result in killing application\n\ sooner. This feature is applicable only for leaf queue.\n\ \n\ EOF ) DEFAULT_MAXIMUM_APPLICATION_LIFETIME_INDX=0 declare -a DEFAULT_MAXIMUM_APPLICATION_LIFETIME_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do DEFAULT_MAXIMUM_APPLICATION_LIFETIME_FILL[${DEFAULT_MAXIMUM_APPLICATION_LIFETIME_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.maximum-application-lifetime\n\ \n\ -1\n\ \n\ Maximum lifetime of an application which is submitted to a queue\n\ in seconds. Any value less than or equal to zero will be considered as\n\ disabled.\n\ This will be a hard time limit for all applications in this\n\ queue. If positive value is configured then any application submitted\n\ to this queue will be killed after exceeds the configured lifetime.\n\ User can also specify lifetime per application basis in\n\ application submission context. But user lifetime will be\n\ overridden if it exceeds queue maximum lifetime. It is point-in-time\n\ configuration.\n\ Note : Configuring too low value will result in killing application\n\ sooner. This feature is applicable only for leaf queue.\n\ \n\ EOF ) DEFAULT_MAXIMUM_APPLICATION_LIFETIME_INDX=$((${DEFAULT_MAXIMUM_APPLICATION_LIFETIME_INDX}+1)) done sed -z "s|${DEFAULT_MAXIMUM_APPLICATION_LIFETIME_FIND}|$(echo "${DEFAULT_MAXIMUM_APPLICATION_LIFETIME_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" # replace default root default application lifetime DEFAULT_APPLICATION_LIFETIME_FIND=$(cat <\n\ yarn.scheduler.capacity.root.default.default-application-lifetime\n\ \n\ -1\n\ \n\ Default lifetime of an application which is submitted to a queue\n\ in seconds. Any value less than or equal to zero will be considered as\n\ disabled.\n\ If the user has not submitted application with lifetime value then this\n\ value will be taken. It is point-in-time configuration.\n\ Note : Default lifetime can't exceed maximum lifetime. This feature is\n\ applicable only for leaf queue.\n\ \n\ EOF ) DEFAULT_APPLICATION_LIFETIME_INDX=0 declare -a DEFAULT_APPLICATION_LIFETIME_FILL for HADOOP_SCHEDULER in ${HADOOP_SCHEDULERS_SET[@]};do DEFAULT_APPLICATION_LIFETIME_FILL[${DEFAULT_APPLICATION_LIFETIME_INDX}]=$(cat <\n\ yarn.scheduler.capacity.root.${HADOOP_SCHEDULER}.default-application-lifetime\n\ \n\ -1\n\ \n\ Default lifetime of an application which is submitted to a queue\n\ in seconds. Any value less than or equal to zero will be considered as\n\ disabled.\n\ If the user has not submitted application with lifetime value then this\n\ value will be taken. It is point-in-time configuration.\n\ Note : Default lifetime can't exceed maximum lifetime. This feature is\n\ applicable only for leaf queue.\n\ \n\ EOF ) DEFAULT_APPLICATION_LIFETIME_INDX=$((${DEFAULT_APPLICATION_LIFETIME_INDX}+1)) done sed -z "s|${DEFAULT_APPLICATION_LIFETIME_FIND}|$(echo "${DEFAULT_APPLICATION_LIFETIME_FILL[@]}")|" -i "${CAPACITY_SCHEDULER_FILE}" } function conf_init(){ conf_yarn_site conf_capacity_scheduler } function void_init(){ if [[ "${HADOOP_HOME_DIR}/env/set" == '/env/set' ]];then export HADOOP_HOME_DIR='/var/hadoop/hadoop-3.3.4' fi if [[ "${MINMAX_CAPACITIES}/env/set" == '/env/set' ]];then export MINMAX_CAPACITIES="([dev]='(50 50)' [prod]='(50 70)')" fi if [[ "${HADOOP_SCHEDULERS}/env/set" == '/env/set' ]];then export HADOOP_SCHEDULERS='([dev]="Development" [prod]="Production")' fi } function init(){ void_init conf_init } init