Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions tests/providers/apache/hdfs/sensors/test_webhdfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import unittest

from airflow.providers.apache.hdfs.sensors.web_hdfs import WebHdfsSensor
from tests.providers.apache.hive import DEFAULT_DATE, TestHiveEnvironment


@unittest.skipIf(
'AIRFLOW_RUNALL_TESTS' not in os.environ,
"Skipped because AIRFLOW_RUNALL_TESTS is not set")
class TestWebHdfsSensor(TestHiveEnvironment):

def test_webhdfs_sensor(self):
op = WebHdfsSensor(
task_id='webhdfs_sensor_check',
filepath='hdfs://user/hive/warehouse/airflow.db/static_babynames',
timeout=120,
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)
31 changes: 31 additions & 0 deletions tests/providers/apache/hive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,34 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from unittest import TestCase

from airflow import DAG

DEFAULT_DATE = datetime(2015, 1, 1)
DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat()
DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10]


class TestHiveEnvironment(TestCase):

def setUp(self):
args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
dag = DAG('test_dag_id', default_args=args)
self.dag = dag
self.hql = """
USE airflow;
DROP TABLE IF EXISTS static_babynames_partitioned;
CREATE TABLE IF NOT EXISTS static_babynames_partitioned (
state string,
year string,
name string,
gender string,
num int)
PARTITIONED BY (ds string);
INSERT OVERWRITE TABLE static_babynames_partitioned
PARTITION(ds='{{ ds }}')
SELECT state, year, name, gender, num FROM static_babynames;
"""
188 changes: 1 addition & 187 deletions tests/providers/apache/hive/operators/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,54 +16,15 @@
# specific language governing permissions and limitations
# under the License.

import datetime
import os
import unittest
from unittest import mock

from airflow import DAG
from airflow.configuration import conf
from airflow.exceptions import AirflowSensorTimeout
from airflow.models import TaskInstance
from airflow.providers.apache.hdfs.sensors.hdfs import HdfsSensor
from airflow.providers.apache.hdfs.sensors.web_hdfs import WebHdfsSensor
from airflow.providers.apache.hive.operators.hive import HiveOperator
from airflow.providers.apache.hive.operators.hive_stats import HiveStatsCollectionOperator
from airflow.providers.apache.hive.operators.hive_to_mysql import HiveToMySqlTransfer
from airflow.providers.apache.hive.operators.hive_to_samba import Hive2SambaOperator
from airflow.providers.apache.hive.sensors.hive_partition import HivePartitionSensor
from airflow.providers.apache.hive.sensors.metastore_partition import MetastorePartitionSensor
from airflow.providers.apache.hive.sensors.named_hive_partition import NamedHivePartitionSensor
from airflow.providers.mysql.operators.presto_to_mysql import PrestoToMySqlTransfer
from airflow.providers.presto.operators.presto_check import PrestoCheckOperator
from airflow.sensors.sql_sensor import SqlSensor
from airflow.utils import timezone

DEFAULT_DATE = datetime.datetime(2015, 1, 1)
DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat()
DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10]


class TestHiveEnvironment(unittest.TestCase):

def setUp(self):
args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
dag = DAG('test_dag_id', default_args=args)
self.dag = dag
self.hql = """
USE airflow;
DROP TABLE IF EXISTS static_babynames_partitioned;
CREATE TABLE IF NOT EXISTS static_babynames_partitioned (
state string,
year string,
name string,
gender string,
num int)
PARTITIONED BY (ds string);
INSERT OVERWRITE TABLE static_babynames_partitioned
PARTITION(ds='{{ ds }}')
SELECT state, year, name, gender, num FROM static_babynames;
"""
from tests.providers.apache.hive import DEFAULT_DATE, TestHiveEnvironment


class HiveOperatorConfigTest(TestHiveEnvironment):
Expand Down Expand Up @@ -168,150 +129,3 @@ def test_beeline(self):
hql=self.hql, dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_presto(self):
sql = """
SELECT count(1) FROM airflow.static_babynames_partitioned;
"""
op = PrestoCheckOperator(
task_id='presto_check', sql=sql, dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_presto_to_mysql(self):
op = PrestoToMySqlTransfer(
task_id='presto_to_mysql_check',
sql="""
SELECT name, count(*) as ccount
FROM airflow.static_babynames
GROUP BY name
""",
mysql_table='test_static_babynames',
mysql_preoperator='TRUNCATE TABLE test_static_babynames;',
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hdfs_sensor(self):
op = HdfsSensor(
task_id='hdfs_sensor_check',
filepath='hdfs://user/hive/warehouse/airflow.db/static_babynames',
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_webhdfs_sensor(self):
op = WebHdfsSensor(
task_id='webhdfs_sensor_check',
filepath='hdfs://user/hive/warehouse/airflow.db/static_babynames',
timeout=120,
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_sql_sensor(self):
op = SqlSensor(
task_id='hdfs_sensor_check',
conn_id='presto_default',
sql="SELECT 'x' FROM airflow.static_babynames LIMIT 1;",
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hive_stats(self):
op = HiveStatsCollectionOperator(
task_id='hive_stats_check',
table="airflow.static_babynames_partitioned",
partition={'ds': DEFAULT_DATE_DS},
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_named_hive_partition_sensor(self):
op = NamedHivePartitionSensor(
task_id='hive_partition_check',
partition_names=[
"airflow.static_babynames_partitioned/ds={{ds}}"
],
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_named_hive_partition_sensor_succeeds_on_multiple_partitions(self):
op = NamedHivePartitionSensor(
task_id='hive_partition_check',
partition_names=[
"airflow.static_babynames_partitioned/ds={{ds}}",
"airflow.static_babynames_partitioned/ds={{ds}}"
],
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_named_hive_partition_sensor_parses_partitions_with_periods(self):
name = NamedHivePartitionSensor.parse_partition_name(
partition="schema.table/part1=this.can.be.an.issue/part2=ok")
self.assertEqual(name[0], "schema")
self.assertEqual(name[1], "table")
self.assertEqual(name[2], "part1=this.can.be.an.issue/part2=this_should_be_ok")

def test_named_hive_partition_sensor_times_out_on_nonexistent_partition(self):
with self.assertRaises(AirflowSensorTimeout):
op = NamedHivePartitionSensor(
task_id='hive_partition_check',
partition_names=[
"airflow.static_babynames_partitioned/ds={{ds}}",
"airflow.static_babynames_partitioned/ds=nonexistent"
],
poke_interval=0.1,
timeout=1,
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hive_partition_sensor(self):
op = HivePartitionSensor(
task_id='hive_partition_check',
table='airflow.static_babynames_partitioned',
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hive_metastore_sql_sensor(self):
op = MetastorePartitionSensor(
task_id='hive_partition_check',
table='airflow.static_babynames_partitioned',
partition_name='ds={}'.format(DEFAULT_DATE_DS),
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hive2samba(self):
op = Hive2SambaOperator(
task_id='hive2samba_check',
samba_conn_id='tableau_samba',
hql="SELECT * FROM airflow.static_babynames LIMIT 10000",
destination_filepath='test_airflow.csv',
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)

def test_hive_to_mysql(self):
op = HiveToMySqlTransfer(
mysql_conn_id='airflow_db',
task_id='hive_to_mysql_check',
create=True,
sql="""
SELECT name
FROM airflow.static_babynames
LIMIT 100
""",
mysql_table='test_static_babynames',
mysql_preoperator=[
'DROP TABLE IF EXISTS test_static_babynames;',
'CREATE TABLE test_static_babynames (name VARCHAR(500))',
],
dag=self.dag)
op.clear(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)
18 changes: 16 additions & 2 deletions tests/providers/apache/hive/operators/test_hive_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
# specific language governing permissions and limitations
# under the License.

import os
import unittest
from collections import OrderedDict
from unittest.mock import patch

from airflow import AirflowException
from airflow.providers.apache.hive.operators.hive_stats import HiveStatsCollectionOperator
from tests.providers.apache.hive import DEFAULT_DATE, DEFAULT_DATE_DS, TestHiveEnvironment


class _FakeCol:
Expand All @@ -33,7 +35,7 @@ def __init__(self, col_name, col_type):
fake_col = _FakeCol('col', 'string')


class TestHiveStatsCollectionOperator(unittest.TestCase):
class TestHiveStatsCollectionOperator(TestHiveEnvironment):

def setUp(self):
self.kwargs = dict(
Expand All @@ -43,8 +45,8 @@ def setUp(self):
presto_conn_id='presto_conn_id',
mysql_conn_id='mysql_conn_id',
task_id='test_hive_stats_collection_operator',
dag=None
)
super().setUp()

def test_get_default_exprs(self):
col = 'col'
Expand Down Expand Up @@ -282,3 +284,15 @@ def test_execute_delete_previous_runs_rows(self,
hive_stats_collection_operator.dttm
)
mock_mysql_hook.return_value.run.assert_called_once_with(sql)

@unittest.skipIf(
'AIRFLOW_RUNALL_TESTS' not in os.environ,
"Skipped because AIRFLOW_RUNALL_TESTS is not set")
def test_runs_for_hive_stats(self):
op = HiveStatsCollectionOperator(
task_id='hive_stats_check',
table="airflow.static_babynames_partitioned",
partition={'ds': DEFAULT_DATE_DS},
dag=self.dag)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)
33 changes: 30 additions & 3 deletions tests/providers/apache/hive/operators/test_hive_to_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import unittest
from unittest.mock import PropertyMock, patch

from airflow.providers.apache.hive.operators.hive_to_mysql import HiveToMySqlTransfer
from airflow.utils import timezone
from airflow.utils.operator_helpers import context_to_airflow_vars
from tests.providers.apache.hive import TestHiveEnvironment

DEFAULT_DATE = timezone.datetime(2015, 1, 1)


class TestHiveToMySqlTransfer(unittest.TestCase):
class TestHiveToMySqlTransfer(TestHiveEnvironment):

def setUp(self):
self.kwargs = dict(
Expand All @@ -32,8 +36,8 @@ def setUp(self):
hiveserver2_conn_id='hiveserver2_default',
mysql_conn_id='mysql_default',
task_id='test_hive_to_mysql',
dag=None
)
super().setUp()

@patch('airflow.providers.apache.hive.operators.hive_to_mysql.MySqlHook')
@patch('airflow.providers.apache.hive.operators.hive_to_mysql.HiveServer2Hook')
Expand Down Expand Up @@ -105,3 +109,26 @@ def test_execute_with_hive_conf(self, mock_hive_hook, mock_mysql_hook):
self.kwargs['sql'],
hive_conf=hive_conf
)

@unittest.skipIf(
'AIRFLOW_RUNALL_TESTS' not in os.environ,
"Skipped because AIRFLOW_RUNALL_TESTS is not set")
def test_hive_to_mysql(self):
op = HiveToMySqlTransfer(
mysql_conn_id='airflow_db',
task_id='hive_to_mysql_check',
create=True,
sql="""
SELECT name
FROM airflow.static_babynames
LIMIT 100
""",
mysql_table='test_static_babynames',
mysql_preoperator=[
'DROP TABLE IF EXISTS test_static_babynames;',
'CREATE TABLE test_static_babynames (name VARCHAR(500))',
],
dag=self.dag)
op.clear(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
ignore_ti_state=True)
Loading