Dynamic Script to Create Table Using Input table
This script will:
- Dynamically determine the configuration file name based on the output Hive table name.
- Read the configuration file to get the columns.
- Extract the schema from the Hive table.
- Generate the Oracle and Hive table creation scripts.
- Generate a HiveQL file to insert data into the new Hive table based on the specified columns.
#!/bin/bash # Check if necessary arguments are provided if [ "$#" -ne 8 ]; then echo "Usage: $0 <input_hive_table> <output_hive_table> <output_oracle_table> <oracle_server> <oracle_remote_dir> <load_type> <where_value> <mode>" exit 1 fi INPUT_TABLE="$1" OUTPUT_HIVE_TABLE="$2" OUTPUT_ORACLE_TABLE="$3" ORACLE_SERVER="$4" ORACLE_REMOTE_DIR="$5" LOAD_TYPE="$6" WHERE_VALUE="$7" MODE="$8" # Extract database and table names for Hive and Oracle HIVE_INPUT_DB=$(echo "$INPUT_TABLE" | cut -d '.' -f 1) HIVE_INPUT_TBL=$(echo "$INPUT_TABLE" | cut -d '.' -f 2) HIVE_OUTPUT_DB=$(echo "$OUTPUT_HIVE_TABLE" | cut -d '.' -f 1) HIVE_OUTPUT_TBL=$(echo "$OUTPUT_HIVE_TABLE" | cut -d '.' -f 2) ORACLE_OUTPUT_DB=$(echo "$OUTPUT_ORACLE_TABLE" | cut -d '.' -f 1) ORACLE_OUTPUT_TBL=$(echo "$OUTPUT_ORACLE_TABLE" | cut -d '.' -f 2) # Define the configuration file based on the output table name CONFIG_FILE="${HIVE_OUTPUT_TBL}.ini" # Check if the config file exists if [ ! -f "$CONFIG_FILE" ]; then echo "Configuration file not found: $CONFIG_FILE" exit 1 fi # Read the columns, condition properties, and date column from the config file COLUMNS=$(grep '^columns=' "$CONFIG_FILE" | cut -d '=' -f 2) CONDITIONS=$(grep '^conditions=' "$CONFIG_FILE" | cut -d '=' -f 2) DATE_COLUMN=$(grep '^date_column=' "$CONFIG_FILE" | cut -d '=' -f 2) IFS=',' read -r -a COLUMNS_ARRAY <<< "$COLUMNS" IFS=',' read -r -a CONDITION_COLUMNS_ARRAY <<< "$CONDITIONS" # Beeline command to describe the schema of the input table BEELINE_DESC_CMD="USE $HIVE_INPUT_DB; DESCRIBE $HIVE_INPUT_TBL;" # Execute the Beeline describe command and store the result SCHEMA=$(beeline -u "jdbc:hive2://<hive_server>:<port>/<database>" -e "$BEELINE_DESC_CMD" | grep -Ev '^[+-]|^#|^[-]+$') # Check if the describe command was successful if [ $? -ne 0 ]; then echo "Failed to describe the input table: $INPUT_TABLE" exit 1 fi # Function to convert Hive data types to Oracle data types function convert_data_type { local hive_type="$1" case "$hive_type" in string) echo "VARCHAR2(4000)" ;; varchar\(*\)) echo "VARCHAR2${hive_type#varchar}" ;; int) echo "NUMBER" ;; bigint) echo "NUMBER" ;; double) echo "FLOAT" ;; float) echo "FLOAT" ;; boolean) echo "NUMBER(1)" ;; date) echo "DATE" ;; timestamp) echo "TIMESTAMP" ;; *) echo "VARCHAR2(4000)" ;; esac } # Function to check if a column is in the list of specified columns function column_in_list { local column="$1" for col in "${COLUMNS_ARRAY[@]}"; do if [[ "$col" == "$column" ]]; then return 0 fi done return 1 } # Initialize variables for column names COLUMN_NAMES="" SELECT_COLUMNS="" # Construct the CREATE TABLE statements HIVE_CREATE_TABLE_CMD="CREATE TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL (" ORACLE_CREATE_TABLE_CMD="CREATE TABLE $ORACLE_OUTPUT_DB.$ORACLE_OUTPUT_TBL (" # Parse the schema and add columns to the CREATE TABLE statements while read -r line; do # Skip empty lines and lines that start with # if [[ -z "$line" || "$line" =~ ^# ]]; then continue fi # Extract column name and type column_name=$(echo "$line" | cut -d '|' -f 1 | xargs) column_type=$(echo "$line" | cut -d '|' -f 2 | xargs) # Convert Hive data type to Oracle data type oracle_type=$(convert_data_type "$column_type") # Check if the column should be included if [[ "$COLUMNS" == "ALL" || $(column_in_list "$column_name") ]]; then # Add column definition to the CREATE TABLE statements HIVE_CREATE_TABLE_CMD="$HIVE_CREATE_TABLE_CMD $column_name $column_type," ORACLE_CREATE_TABLE_CMD="$ORACLE_CREATE_TABLE_CMD $column_name $oracle_type," # Append column names for insert and select statements COLUMN_NAMES="$COLUMN_NAMES $column_name," SELECT_COLUMNS="$SELECT_COLUMNS $column_name," fi done <<< "$SCHEMA" # Remove the trailing comma and close the CREATE TABLE statements HIVE_CREATE_TABLE_CMD="${HIVE_CREATE_TABLE_CMD%,} );" ORACLE_CREATE_TABLE_CMD="${ORACLE_CREATE_TABLE_CMD%,} );" # Remove the trailing comma for column names COLUMN_NAMES="${COLUMN_NAMES%,}" SELECT_COLUMNS="${SELECT_COLUMNS%,}" # Determine the load type (overwrite or append) and construct the insert statement if [ "$LOAD_TYPE" == "full" ]; then LOAD_CMD="INSERT OVERWRITE TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL SELECT $SELECT_COLUMNS FROM $HIVE_INPUT_DB.$HIVE_INPUT_TBL" else LOAD_CMD="INSERT INTO TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL SELECT $SELECT_COLUMNS FROM $HIVE_INPUT_DB.$HIVE_INPUT_TBL" fi # Add the WHERE clause if provided if [[ -n "$WHERE_VALUE" ]]; then WHERE_CLAUSE=" WHERE " if [ "$LOAD_TYPE" == "append" ]; then WHERE_CLAUSE="$WHERE_CLAUSE $DATE_COLUMN = '$WHERE_VALUE'" else for condition_column in "${CONDITION_COLUMNS_ARRAY[@]}"; do WHERE_CLAUSE="$WHERE_CLAUSE $condition_column = '$WHERE_VALUE' AND" done # Remove the trailing "AND" WHERE_CLAUSE="${WHERE_CLAUSE%AND}" fi LOAD_CMD="$LOAD_CMD $WHERE_CLAUSE" fi LOAD_CMD="$LOAD_CMD;" # Generate the SQL file to insert data into the new Hive table INSERT_SQL_FILE="/tmp/insert_${HIVE_OUTPUT_TBL}.hql" echo "$LOAD_CMD" > "$INSERT_SQL_FILE" # Write the Oracle CREATE TABLE statement to a SQL file SQL_FILE="/tmp/create_${ORACLE_OUTPUT_TBL}.sql" echo "$ORACLE_CREATE_TABLE_CMD" > "$SQL_FILE" # Execute the CREATE TABLE statement for Hive using Beeline beeline -u "jdbc:hive2://<hive_server>:<port>/<database>" -e "USE $HIVE_OUTPUT_DB; $HIVE_CREATE_TABLE_CMD" # Check if the create table command was successful if [ $? -ne 0 ]; then echo "Failed to create the new Hive table: $OUTPUT_HIVE_TABLE" exit 1 fi echo "Hive table $HIVE_OUTPUT_TBL created successfully in the database $HIVE_OUTPUT_DB." # Transfer the Oracle CREATE TABLE SQL file to the remote server scp "$SQL_FILE" "$ORACLE_SERVER:$ORACLE_REMOTE_DIR" # Check if the scp command was successful if [ $? -ne 0 ]; then echo "Failed to transfer the Oracle CREATE TABLE SQL file to the remote server" exit 1 fi # Run the SQL file on the remote Oracle server using sqlplus ssh "$ORACLE_SERVER" "cd $ORACLE_REMOTE_DIR; sqlplus your_username/your_password @$(basename $SQL_FILE)" # Check if the sqlplus command was successful if [ $? -ne 0 ]; then echo "Failed to create the new Oracle table: $OUTPUT_ORACLE_TABLE" exit 1 fi echo "Oracle table $ORACLE_OUTPUT_TBL created successfully in the Oracle database $ORACLE_OUTPUT_DB." echo "HiveQL file for loading data into the new Hive table is created: $INSERT_SQL_FILE" # Define the job name based on the output table name JOB_NAME="hive_load_${HIVE_OUTPUT_TBL}" # Define paths LOG_DIR="/path/to/logs" AUTOSYS_JIL_FILE="/tmp/${JOB_NAME}.jil" # Create the AutoSys job definition file if [ "$MODE" == "F" ]; then COMMAND="beeline -u 'jdbc:hive2://<hive_server>:<port>/<database>' -f '$INSERT_SQL_FILE'" else COMMAND="beeline -u 'jdbc:hive2://<hive_server>:<port>/<database>' -e 'INSERT INTO TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL SELECT $SELECT_COLUMNS FROM $HIVE_INPUT_DB.$HIVE_INPUT_TBL WHERE $DATE_COLUMN = $WHERE_VALUE;'" fi # Write the AutoSys JIL file cat <<EOL > "$AUTOSYS_JIL_FILE" insert_job: $JOB_NAME job_type: c command: $COMMAND machine: <machine_name> owner: <owner_name> permission: gx,ge date_conditions: 1 days_of_week: all start_times: "00:00" description: "AutoSys job to load data into Hive table $HIVE_OUTPUT_TBL" std_out_file: $LOG_DIR/$JOB_NAME.out std_err_file: $LOG_DIR/$JOB_NAME.err alarm_if_fail: 1 EOL # Print a message indicating the job has been created echo "AutoSys job definition file created: $AUTOSYS_JIL_FILE"
# Configuration for Hive and Oracle Table Creation # List of columns to be included. Use "ALL" to include all columns. columns=column1,column2,column3 # The condition columns to be used in the WHERE clause for full data load. conditions=condition_column1,condition_column2 # The date column to be used in the WHERE clause for append data load. date_column=date_column_name
#!/bin/bash # Check if necessary arguments are provided if [ "$#" -ne 8 ]; then echo "Usage: $0 <input_hive_table> <output_hive_table> <output_oracle_table> <oracle_server> <oracle_remote_dir> <load_type> <where_value> <mode>" exit 1 fi INPUT_TABLE="$1" OUTPUT_HIVE_TABLE="$2" OUTPUT_ORACLE_TABLE="$3" ORACLE_SERVER="$4" ORACLE_REMOTE_DIR="$5" LOAD_TYPE="$6" WHERE_VALUE="$7" MODE="$8" # Extract database and table names for Hive and Oracle HIVE_INPUT_DB=$(echo "$INPUT_TABLE" | cut -d '.' -f 1) HIVE_INPUT_TBL=$(echo "$INPUT_TABLE" | cut -d '.' -f 2) HIVE_OUTPUT_DB=$(echo "$OUTPUT_HIVE_TABLE" | cut -d '.' -f 1) HIVE_OUTPUT_TBL=$(echo "$OUTPUT_HIVE_TABLE" | cut -d '.' -f 2) ORACLE_OUTPUT_DB=$(echo "$OUTPUT_ORACLE_TABLE" | cut -d '.' -f 1) ORACLE_OUTPUT_TBL=$(echo "$OUTPUT_ORACLE_TABLE" | cut -d '.' -f 2) # Define the configuration files based on the output table name and for the AutoSys job CONFIG_FILE="${HIVE_OUTPUT_TBL}.ini" AUTOSYS_CONFIG_FILE="job_config.ini" # Check if the config files exist if [ ! -f "$CONFIG_FILE" ]; then echo "Configuration file not found: $CONFIG_FILE" exit 1 fi if [ ! -f "$AUTOSYS_CONFIG_FILE" ]; then echo "AutoSys configuration file not found: $AUTOSYS_CONFIG_FILE" exit 1 fi # Read the columns, condition properties, date column, and partition column from the config file COLUMNS=$(grep '^columns=' "$CONFIG_FILE" | cut -d '=' -f 2) CONDITIONS=$(grep '^conditions=' "$CONFIG_FILE" | cut -d '=' -f 2) DATE_COLUMN=$(grep '^date_column=' "$CONFIG_FILE" | cut -d '=' -f 2) PARTITION_COLUMN=$(grep '^partition_column=' "$CONFIG_FILE" | cut -d '=' -f 2) IFS=',' read -r -a COLUMNS_ARRAY <<< "$COLUMNS" IFS=',' read -r -a CONDITION_COLUMNS_ARRAY <<< "$CONDITIONS" # Read machine name and owner from AutoSys config file MACHINE_NAME=$(grep '^name=' "$AUTOSYS_CONFIG_FILE" -A1 | grep 'machine' -A1 | tail -n1 | cut -d '=' -f2) OWNER_NAME=$(grep '^name=' "$AUTOSYS_CONFIG_FILE" -A1 | grep 'owner' -A1 | tail -n1 | cut -d '=' -f2) # Beeline command to describe the schema of the input table BEELINE_DESC_CMD="USE $HIVE_INPUT_DB; DESCRIBE $HIVE_INPUT_TBL;" # Execute the Beeline describe command and store the result SCHEMA=$(beeline -u "jdbc:hive2://<hive_server>:<port>/<database>" -e "$BEELINE_DESC_CMD" | grep -Ev '^[+-]|^#|^[-]+$') # Check if the describe command was successful if [ $? -ne 0 ]; then echo "Failed to describe the input table: $INPUT_TABLE" exit 1 fi # Function to convert Hive data types to Oracle data types function convert_data_type { local hive_type="$1" case "$hive_type" in string) echo "VARCHAR2(4000)" ;; varchar\(*\)) echo "VARCHAR2${hive_type#varchar}" ;; int) echo "NUMBER" ;; bigint) echo "NUMBER" ;; double) echo "FLOAT" ;; float) echo "FLOAT" ;; boolean) echo "NUMBER(1)" ;; date) echo "DATE" ;; timestamp) echo "TIMESTAMP" ;; *) echo "VARCHAR2(4000)" ;; esac } # Function to check if a column is in the list of specified columns function column_in_list { local column="$1" for col in "${COLUMNS_ARRAY[@]}"; do if [[ "$col" == "$column" ]]; then return 0 fi done return 1 } # Initialize variables for column names COLUMN_NAMES="" SELECT_COLUMNS="" PARTITION_COLUMN_DEF="" # Construct the CREATE TABLE statements HIVE_CREATE_TABLE_CMD="CREATE TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL (" ORACLE_CREATE_TABLE_CMD="CREATE TABLE $ORACLE_OUTPUT_DB.$ORACLE_OUTPUT_TBL (" # Parse the schema and add columns to the CREATE TABLE statements while read -r line; do # Skip empty lines and lines that start with # if [[ -z "$line" || "$line" =~ ^# ]]; then continue fi # Extract column name and type column_name=$(echo "$line" | cut -d '|' -f 1 | xargs) column_type=$(echo "$line" | cut -d '|' -f 2 | xargs) # Convert Hive data type to Oracle data type oracle_type=$(convert_data_type "$column_type") # Check if the column should be included if [[ "$COLUMNS" == "ALL" || $(column_in_list "$column_name") ]]; then # Add column definition to the CREATE TABLE statements if [[ "$PARTITION_COLUMN" == "$column_name" ]]; then PARTITION_COLUMN_DEF="$column_name $column_type" else HIVE_CREATE_TABLE_CMD="$HIVE_CREATE_TABLE_CMD $column_name $column_type," fi ORACLE_CREATE_TABLE_CMD="$ORACLE_CREATE_TABLE_CMD $column_name $oracle_type," # Append column names for insert and select statements COLUMN_NAMES="$COLUMN_NAMES $column_name," SELECT_COLUMNS="$SELECT_COLUMNS $column_name," fi done <<< "$SCHEMA" # Remove the trailing comma and close the CREATE TABLE statements HIVE_CREATE_TABLE_CMD="${HIVE_CREATE_TABLE_CMD%,} )" if [[ -n "$PARTITION_COLUMN_DEF" ]]; then HIVE_CREATE_TABLE_CMD="$HIVE_CREATE_TABLE_CMD PARTITIONED BY ($PARTITION_COLUMN_DEF);" else HIVE_CREATE_TABLE_CMD="$HIVE_CREATE_TABLE_CMD;" fi ORACLE_CREATE_TABLE_CMD="${ORACLE_CREATE_TABLE_CMD%,} );" # Remove the trailing comma for column names COLUMN_NAMES="${COLUMN_NAMES%,}" SELECT_COLUMNS="${SELECT_COLUMNS%,}" # Determine the load type (overwrite or append) and construct the insert statement if [ "$LOAD_TYPE" == "F" ]; then LOAD_CMD="INSERT OVERWRITE TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL SELECT $SELECT_COLUMNS FROM $HIVE_INPUT_DB.$HIVE_INPUT_TBL" else LOAD_CMD="INSERT INTO TABLE $HIVE_OUTPUT_DB.$HIVE_OUTPUT_TBL SELECT $SELECT_COLUMNS FROM $HIVE_INPUT_DB.$HIVE_INPUT_TBL" fi # Add the WHERE clause if provided if [[ -n "$WHERE_VALUE" ]]; then WHERE_CLAUSE=" WHERE " if [ "$LOAD_TYPE" == "I" ]; then WHERE_CLAUSE="$WHERE_CLAUSE $DATE_COLUMN = '$WHERE_VALUE'" else for condition_column in "${CONDITION_COLUMNS_ARRAY[@]}"; do WHERE_CLAUSE="$WHERE_CLAUSE $condition_column = '$WHERE_VALUE' AND" done # Remove the trailing "AND" WHERE_CLAUSE="${WHERE_CLAUSE%AND}" fi LOAD_CMD="$LOAD_CMD $WHERE_CLAUSE" fi LOAD_CMD="$LOAD_CMD;" # Generate the SQL file to insert data into the new Hive table INSERT_SQL_FILE="/tmp/insert_${HIVE_OUTPUT_TBL}.hql" echo "$LOAD_CMD" > "$INSERT_SQL_FILE" # Write the Oracle CREATE TABLE statement to a SQL file SQL_FILE="/tmp/create_${ORACLE_OUTPUT_TBL}.sql" echo "$ORACLE_CREATE_TABLE_CMD" > "$SQL_FILE" # Add the GRANT statement to the Oracle SQL file echo "GRANT SELECT ON ${ORACLE_OUTPUT_DB}.${ORACLE_OUTPUT_TBL} TO read;" >> "$SQL_FILE" # Execute the CREATE TABLE statement for Hive using Beeline beeline -u "jdbc:hive2://<hive_server>:<port>/<database>" -e "USE $HIVE_OUTPUT_DB; $HIVE_CREATE_TABLE_CMD" # Check if the create table command was successful if [ $? -ne 0 ]; then echo "Failed to create the new Hive table: $OUTPUT_HIVE_TABLE" exit 1 fi echo "Hive table $HIVE_OUTPUT_TBL created successfully in the database $HIVE_OUTPUT_DB." # Transfer the Oracle CREATE TABLE SQL file to the remote server scp "$SQL_FILE" "$ORACLE_SERVER:$ORACLE_REMOTE_DIR" # Check if the scp command was successful if [ $? -ne 0 ]; then echo "Failed to transfer the Oracle CREATE TABLE SQL file to the remote server" exit 1 fi #