{ "cells": [ { "cell_type": "markdown", "id": "68e4b588", "metadata": {}, "source": [ "### Reference Documents:\n", "SparkMagic
\n", "Spark Doc
" ] }, { "cell_type": "markdown", "id": "923155da", "metadata": {}, "source": [ "# Downloading Files into a Runtime/Remote Cluster from S3 (For Amazon AWS Environments)\n", "### In this notebook we'll be showing you examples on how to download files from S3 to all your Attached Cluster Nodes\n", "### Downloading files from other sources will be done in similar fashion." ] }, { "cell_type": "markdown", "id": "e3b80c64", "metadata": {}, "source": [ "## Helper Methods" ] }, { "cell_type": "code", "execution_count": 21, "id": "984bf3c5", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%spark\n", "\n", "import urllib3\n", "from urllib3.exceptions import InsecureRequestWarning\n", "from syn_utils.syn_notebook.lib import AmazonClusterPyPackageInstaller\n", "import requests\n", "\n", "requests.packages.urllib3.disable_warnings(InsecureRequestWarning)\n", "urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)\n", "\n", "TYPE_YUM='yum'\n", "TYPE_PIP='pip'\n", "INSTALL='install'\n", "UNINSTALL='uninstall'\n", "\n", "def run_cmd(cmd):\n", " amazon_installer = AmazonClusterPyPackageInstaller()\n", " for ip in amazon_installer.node_ip_addresses:\n", " amazon_installer.execute_command(cmd, ip)\n", " \n", "def custom_pkg_install(packages_arr, run_type, inst_uninst):\n", " pkgs = ' '.join(packages_arr)\n", " \n", " cmd_to_run = ''\n", " if run_type == TYPE_YUM:\n", " cmd_to_run = f'sudo yum {inst_uninst} {pkgs} -y'\n", " elif run_type == TYPE_PIP:\n", " if inst_uninst == INSTALL:\n", " cmd_to_run = f'sudo python3 -m pip {inst_uninst} {pkgs}'\n", " elif inst_uninst == UNINSTALL:\n", " cmd_to_run = f'sudo python3 -m pip {inst_uninst} {pkgs} -y'\n", " \n", " print(f'Running Command [{cmd_to_run}]')\n", " run_cmd(cmd_to_run)" ] }, { "cell_type": "markdown", "id": "7d670588", "metadata": {}, "source": [ "#### Download files onto the cluster using AWS CLI Utils\n", "#### This will download the file noted in the \"object_key\" variable to the path noted by the \"local_des_path\" to all the nodes in the cluster." ] }, { "cell_type": "code", "execution_count": 6, "id": "035c633b", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "executing command on 10.0.1.181\n", "executing command on 10.0.1.17\n", "executing command on 10.0.1.154\n", "executing command on 10.0.1.181\n", "executing command on 10.0.1.17\n", "executing command on 10.0.1.154\n", "executing command on 10.0.1.181\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/csv_test/20220701.export.CSV to ../../tmp/my_files/20220701.export.CSV\n", "executing command on 10.0.1.17\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/csv_test/20220701.export.CSV to ../../tmp/my_files/20220701.export.CSV\n", "executing command on 10.0.1.154\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/csv_test/20220701.export.CSV to ../../tmp/my_files/20220701.export.CSV\n", "executing command on 10.0.1.181\n", "total 42M\n", "drwxr-xr-x 2 root root 33 Sep 10 03:22 .\n", "drwxrwxrwt 41 root root 4.0K Sep 10 03:22 ..\n", "-rw-r--r-- 1 root root 42M Jul 12 16:51 20220701.export.CSV\n", "executing command on 10.0.1.17\n", "total 42M\n", "drwxr-xr-x 2 root root 33 Sep 10 03:23 .\n", "drwxrwxrwt 18 root root 4.0K Sep 10 03:23 ..\n", "-rw-r--r-- 1 root root 42M Jul 12 16:51 20220701.export.CSV\n", "executing command on 10.0.1.154\n", "total 42M\n", "drwxr-xr-x 2 root root 33 Sep 10 03:23 .\n", "drwxrwxrwt 18 root root 4.0K Sep 10 03:23 ..\n", "-rw-r--r-- 1 root root 42M Jul 12 16:51 20220701.export.CSV" ] } ], "source": [ "%%spark\n", "\n", "import shutil\n", "import subprocess\n", "\n", "bucket_name = 'syntasa-gov-sandbox-01'\n", "object_prefix = 'other/sample-data/csv_test'\n", "object_key = '20220701.export.CSV'\n", "obj_in_s3_full_path = f's3://{bucket_name}/{object_prefix}/{object_key}'\n", "local_des_path = '/tmp/my_files'\n", "\n", "# Please note ::: If you are trying to create a folder or download a file to a folder other than /tmp/ then you will have to run the below commands as the sudo user\n", "# You can prepend 'sudo' to each one of the cmd, cmd1, cmd2 variables.\n", "\n", "# Lets delete all the files/folders that we will create (start from scratch)\n", "cmd = f'sudo rm -rf {local_des_path}'\n", "run_cmd(cmd)\n", "\n", "# Lets create a temporary folder locally to hold our files\n", "cmd1 = f'sudo mkdir -p {local_des_path}'\n", "run_cmd(cmd)\n", "\n", "# Now download the file from s3\n", "cmd2 = f'sudo aws s3 cp {obj_in_s3_full_path} {local_des_path}/'\n", "run_cmd(cmd2)\n", "\n", "# Now validate the file was downloaded to all the nodes\n", "cmd3 = f'sudo ls -lah {local_des_path}/'\n", "run_cmd(cmd3)" ] }, { "cell_type": "markdown", "id": "f1fe3f7a", "metadata": {}, "source": [ "#### Download entire folder onto the cluster using AWS CLI Utils\n", "#### This will download an entire folder onto your remote cluster (all nodes of the cluster)" ] }, { "cell_type": "code", "execution_count": 22, "id": "c8e7fe12", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "executing command on 10.0.1.181\n", "executing command on 10.0.1.17\n", "executing command on 10.0.1.154\n", "executing command on 10.0.1.181\n", "executing command on 10.0.1.17\n", "executing command on 10.0.1.154\n", "executing command on 10.0.1.181\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-06.parquet\n", "executing command on 10.0.1.17\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-06.parquet\n", "executing command on 10.0.1.154\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2014-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2014-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2015-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2015-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2016-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2016-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2020-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2020-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2021-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2021-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2022-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2022-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2017-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2017-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-05.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-07.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-07.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-08.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-08.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-10.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-10.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-09.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-09.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-11.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-11.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2018-12.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2018-12.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-01.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-01.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-02.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-02.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-03.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-03.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-04.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-04.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-06.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-06.parquet\n", "download: s3://syntasa-gov-sandbox-01/other/sample-data/geomesa_dataset/green_nyc_tripdata/green_tripdata_2019-05.parquet to ../../tmp/my_files/green_nyc_tripdata/green_tripdata_2019-05.parquet\n", "executing command on 10.0.1.181\n", "total 1.2G\n", "drwxr-xr-x 2 root root 8.0K Sep 10 03:39 .\n", "drwxr-xr-x 3 root root 59 Sep 10 03:38 ..\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2014-01.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2014-02.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-03.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-04.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2014-05.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-06.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-07.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-08.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-09.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2014-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2014-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2014-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-01.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-02.parquet\n", "-rw-r--r-- 1 root root 25M Jun 6 03:42 green_tripdata_2015-03.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-04.parquet\n", "-rw-r--r-- 1 root root 26M Jun 6 03:42 green_tripdata_2015-05.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-06.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-07.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-08.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-09.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-01.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-02.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-03.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-04.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-05.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2016-06.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2016-07.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-08.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-09.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-10.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2016-11.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-12.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-01.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-02.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2017-03.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-04.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-05.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2017-06.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-07.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-08.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-09.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-10.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-11.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-12.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-01.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-02.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-03.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-04.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-05.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-06.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-07.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-08.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-09.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-10.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-11.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-12.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-01.parquet\n", "-rw-r--r-- 1 root root 9.9M Jun 6 03:42 green_tripdata_2019-02.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-03.parquet\n", "-rw-r--r-- 1 root root 9.0M Jun 6 03:42 green_tripdata_2019-04.parquet\n", "-rw-r--r-- 1 root root 8.7M Jun 6 03:42 green_tripdata_2019-05.parquet\n", "-rw-r--r-- 1 root root 8.2M Jun 6 03:42 green_tripdata_2019-06.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-07.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-08.parquet\n", "-rw-r--r-- 1 root root 7.4M Jun 6 03:42 green_tripdata_2019-09.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-10.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-11.parquet\n", "-rw-r--r-- 1 root root 7.2M Jun 6 03:42 green_tripdata_2019-12.parquet\n", "-rw-r--r-- 1 root root 6.9M Jun 6 03:42 green_tripdata_2020-01.parquet\n", "-rw-r--r-- 1 root root 6.4M Jun 6 03:42 green_tripdata_2020-02.parquet\n", "-rw-r--r-- 1 root root 3.9M Jun 6 03:42 green_tripdata_2020-03.parquet\n", "-rw-r--r-- 1 root root 697K Jun 6 03:42 green_tripdata_2020-04.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2020-05.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2020-06.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2020-07.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2020-08.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-09.parquet\n", "-rw-r--r-- 1 root root 1.7M Jun 6 03:42 green_tripdata_2020-10.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-11.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2020-12.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2021-01.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2021-02.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-03.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-04.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-05.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-06.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-07.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-08.parquet\n", "-rw-r--r-- 1 root root 1.8M Jun 6 03:42 green_tripdata_2021-09.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-10.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-11.parquet\n", "-rw-r--r-- 1 root root 2.0M Jun 6 03:42 green_tripdata_2021-12.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2022-01.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2022-02.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2022-03.parquet\n", "executing command on 10.0.1.17\n", "total 1.2G\n", "drwxr-xr-x 2 root root 8.0K Sep 10 03:39 .\n", "drwxr-xr-x 3 root root 59 Sep 10 03:39 ..\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2014-01.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2014-02.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-03.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-04.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2014-05.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-06.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-07.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-08.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-09.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2014-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2014-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2014-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-01.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-02.parquet\n", "-rw-r--r-- 1 root root 25M Jun 6 03:42 green_tripdata_2015-03.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-04.parquet\n", "-rw-r--r-- 1 root root 26M Jun 6 03:42 green_tripdata_2015-05.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-06.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-07.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-08.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-09.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-01.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-02.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-03.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-04.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-05.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2016-06.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2016-07.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-08.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-09.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-10.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2016-11.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-12.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-01.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-02.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2017-03.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-04.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-05.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2017-06.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-07.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-08.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-09.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-10.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-11.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-12.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-01.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-02.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-03.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-04.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-05.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-06.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-07.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-08.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-09.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-10.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-11.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-12.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-01.parquet\n", "-rw-r--r-- 1 root root 9.9M Jun 6 03:42 green_tripdata_2019-02.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-03.parquet\n", "-rw-r--r-- 1 root root 9.0M Jun 6 03:42 green_tripdata_2019-04.parquet\n", "-rw-r--r-- 1 root root 8.7M Jun 6 03:42 green_tripdata_2019-05.parquet\n", "-rw-r--r-- 1 root root 8.2M Jun 6 03:42 green_tripdata_2019-06.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-07.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-08.parquet\n", "-rw-r--r-- 1 root root 7.4M Jun 6 03:42 green_tripdata_2019-09.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-10.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-11.parquet\n", "-rw-r--r-- 1 root root 7.2M Jun 6 03:42 green_tripdata_2019-12.parquet\n", "-rw-r--r-- 1 root root 6.9M Jun 6 03:42 green_tripdata_2020-01.parquet\n", "-rw-r--r-- 1 root root 6.4M Jun 6 03:42 green_tripdata_2020-02.parquet\n", "-rw-r--r-- 1 root root 3.9M Jun 6 03:42 green_tripdata_2020-03.parquet\n", "-rw-r--r-- 1 root root 697K Jun 6 03:42 green_tripdata_2020-04.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2020-05.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2020-06.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2020-07.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2020-08.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-09.parquet\n", "-rw-r--r-- 1 root root 1.7M Jun 6 03:42 green_tripdata_2020-10.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-11.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2020-12.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2021-01.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2021-02.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-03.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-04.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-05.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-06.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-07.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-08.parquet\n", "-rw-r--r-- 1 root root 1.8M Jun 6 03:42 green_tripdata_2021-09.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-10.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-11.parquet\n", "-rw-r--r-- 1 root root 2.0M Jun 6 03:42 green_tripdata_2021-12.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2022-01.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2022-02.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2022-03.parquet\n", "executing command on 10.0.1.154\n", "total 1.2G\n", "drwxr-xr-x 2 root root 8.0K Sep 10 03:39 .\n", "drwxr-xr-x 3 root root 59 Sep 10 03:39 ..\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2014-01.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2014-02.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-03.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-04.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2014-05.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-06.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2014-07.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-08.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2014-09.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2014-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2014-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2014-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-01.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-02.parquet\n", "-rw-r--r-- 1 root root 25M Jun 6 03:42 green_tripdata_2015-03.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-04.parquet\n", "-rw-r--r-- 1 root root 26M Jun 6 03:42 green_tripdata_2015-05.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-06.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-07.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-08.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2015-09.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-10.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2015-11.parquet\n", "-rw-r--r-- 1 root root 24M Jun 6 03:42 green_tripdata_2015-12.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-01.parquet\n", "-rw-r--r-- 1 root root 22M Jun 6 03:42 green_tripdata_2016-02.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-03.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-04.parquet\n", "-rw-r--r-- 1 root root 23M Jun 6 03:42 green_tripdata_2016-05.parquet\n", "-rw-r--r-- 1 root root 21M Jun 6 03:42 green_tripdata_2016-06.parquet\n", "-rw-r--r-- 1 root root 20M Jun 6 03:42 green_tripdata_2016-07.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-08.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-09.parquet\n", "-rw-r--r-- 1 root root 19M Jun 6 03:42 green_tripdata_2016-10.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2016-11.parquet\n", "-rw-r--r-- 1 root root 18M Jun 6 03:42 green_tripdata_2016-12.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-01.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-02.parquet\n", "-rw-r--r-- 1 root root 17M Jun 6 03:42 green_tripdata_2017-03.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-04.parquet\n", "-rw-r--r-- 1 root root 16M Jun 6 03:42 green_tripdata_2017-05.parquet\n", "-rw-r--r-- 1 root root 15M Jun 6 03:42 green_tripdata_2017-06.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-07.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-08.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-09.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-10.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-11.parquet\n", "-rw-r--r-- 1 root root 14M Jun 6 03:42 green_tripdata_2017-12.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-01.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-02.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-03.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-04.parquet\n", "-rw-r--r-- 1 root root 13M Jun 6 03:42 green_tripdata_2018-05.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-06.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-07.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-08.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-09.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-10.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2018-11.parquet\n", "-rw-r--r-- 1 root root 12M Jun 6 03:42 green_tripdata_2018-12.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-01.parquet\n", "-rw-r--r-- 1 root root 9.9M Jun 6 03:42 green_tripdata_2019-02.parquet\n", "-rw-r--r-- 1 root root 11M Jun 6 03:42 green_tripdata_2019-03.parquet\n", "-rw-r--r-- 1 root root 9.0M Jun 6 03:42 green_tripdata_2019-04.parquet\n", "-rw-r--r-- 1 root root 8.7M Jun 6 03:42 green_tripdata_2019-05.parquet\n", "-rw-r--r-- 1 root root 8.2M Jun 6 03:42 green_tripdata_2019-06.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-07.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-08.parquet\n", "-rw-r--r-- 1 root root 7.4M Jun 6 03:42 green_tripdata_2019-09.parquet\n", "-rw-r--r-- 1 root root 7.6M Jun 6 03:42 green_tripdata_2019-10.parquet\n", "-rw-r--r-- 1 root root 7.3M Jun 6 03:42 green_tripdata_2019-11.parquet\n", "-rw-r--r-- 1 root root 7.2M Jun 6 03:42 green_tripdata_2019-12.parquet\n", "-rw-r--r-- 1 root root 6.9M Jun 6 03:42 green_tripdata_2020-01.parquet\n", "-rw-r--r-- 1 root root 6.4M Jun 6 03:42 green_tripdata_2020-02.parquet\n", "-rw-r--r-- 1 root root 3.9M Jun 6 03:42 green_tripdata_2020-03.parquet\n", "-rw-r--r-- 1 root root 697K Jun 6 03:42 green_tripdata_2020-04.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2020-05.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2020-06.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2020-07.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2020-08.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-09.parquet\n", "-rw-r--r-- 1 root root 1.7M Jun 6 03:42 green_tripdata_2020-10.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2020-11.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2020-12.parquet\n", "-rw-r--r-- 1 root root 1.3M Jun 6 03:42 green_tripdata_2021-01.parquet\n", "-rw-r--r-- 1 root root 1.1M Jun 6 03:42 green_tripdata_2021-02.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-03.parquet\n", "-rw-r--r-- 1 root root 1.5M Jun 6 03:42 green_tripdata_2021-04.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-05.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-06.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-07.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2021-08.parquet\n", "-rw-r--r-- 1 root root 1.8M Jun 6 03:42 green_tripdata_2021-09.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-10.parquet\n", "-rw-r--r-- 1 root root 2.1M Jun 6 03:42 green_tripdata_2021-11.parquet\n", "-rw-r--r-- 1 root root 2.0M Jun 6 03:42 green_tripdata_2021-12.parquet\n", "-rw-r--r-- 1 root root 1.2M Jun 6 03:42 green_tripdata_2022-01.parquet\n", "-rw-r--r-- 1 root root 1.4M Jun 6 03:42 green_tripdata_2022-02.parquet\n", "-rw-r--r-- 1 root root 1.6M Jun 6 03:42 green_tripdata_2022-03.parquet" ] } ], "source": [ "%%spark\n", "\n", "import shutil\n", "import subprocess\n", "\n", "bucket_name = 'syntasa-gov-sandbox-01'\n", "object_prefix = 'other/sample-data/geomesa_dataset/green_nyc_tripdata'\n", "obj_in_s3_full_path = f's3://{bucket_name}/{object_prefix}/'\n", "local_des_path = '/tmp/my_files/green_nyc_tripdata'\n", "\n", "# Please note ::: If you are trying to create a folder or download a file to a folder other than /tmp/ then you will have to run the below commands as the sudo user\n", "# You can prepend 'sudo' to each one of the cmd, cmd1, cmd2 variables.\n", "\n", "# Lets delete all the files/folders that we will create (start from scratch)\n", "cmd = f'sudo rm -rf {local_des_path}'\n", "run_cmd(cmd)\n", "\n", "# Lets create a temporary folder locally to hold our files\n", "cmd1 = f'sudo mkdir -p {local_des_path}'\n", "run_cmd(cmd)\n", "\n", "# Now download the file from s3\n", "cmd2 = f'sudo aws s3 sync {obj_in_s3_full_path} {local_des_path}/ --quiet' #add the quiet method here if you don't want the entire S3 command logs (useful when trying to avoid printing every file copied)\n", "run_cmd(cmd2)\n", "\n", "# Now validate the file was downloaded to all the nodes\n", "cmd3 = f'sudo ls -lah {local_des_path}/'\n", "run_cmd(cmd3)" ] }, { "cell_type": "markdown", "id": "2354e872", "metadata": {}, "source": [ "# Reading Files into a Spark Dataframe" ] }, { "cell_type": "markdown", "id": "18c97798", "metadata": {}, "source": [ "#### Reading a single file into a dataframe (that was downloaded earlier)" ] }, { "cell_type": "code", "execution_count": 37, "id": "dd586817", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Total Number of Recrods :: 109879\n", "+-------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "|_c0 |_c1 |_c2 |_c3 |\n", "+-------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "|1052048178\t20210701\t202107\t2021\t2021.4959\t\t\t\t\t\t\t\t\t\t\tMIL\tARMY\t\t\t\t\t\tMIL\t\t\t1\t051\t051\t05\t1\t3.4\t1\t1\t1\t3\t0\t\t\t\t\t\t\t3\tIndiana University| Indiana | United States\tUS\tUSIN\t39.1662\t-86.5264\t449676\t2\tIndiana | United States\tUS\tUSIN\t39.8647\t-86.2604\tIN\t20220701\thttps://www.nwitimes.com/business/local/strack-van-til-stores-doing-checkout-challenge-to-raise-funds-for-salvation-army/article_1f171cd1-e2de-5817-9760-1c237a3afc8d.html|\n", "|1052048179\t20210701\t202107\t2021\t2021.4959\t\t\t\t\t\t\t\t\t\t\tMIL\tARMY\t\t\t\t\t\tMIL\t\t\t1\t051\t051\t05\t1\t3.4\t3\t1\t3\t3\t0\t\t\t\t\t\t\t2\tIndiana | United States\tUS\tUSIN\t39.8647\t-86.2604\tIN\t2\tIndiana| United States\tUS\tUSIN\t39.8647\t-86.2604\tIN\t20220701\thttps://www.nwitimes.com/business/local/strack-van-til-stores-doing-checkout-challenge-to-raise-funds-for-salvation-army/article_1f171cd1-e2de-5817-9760-1c237a3afc8d.html|null |\n", "+-------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "only showing top 2 rows" ] } ], "source": [ "%%spark\n", "\n", "spark.conf.set(\"spark.sql.repl.eagerEval.enabled\", True)\n", "\n", "file_name = '20220701.export.CSV'\n", "local_folder_path = '/tmp/my_files'\n", "\n", "spark_df = spark.read.option('header', False).csv(f'file://{local_folder_path}/{file_name}')\n", "record_count = spark_df.count()\n", "\n", "print(f'Total Number of Recrods :: {record_count}')\n", "\n", "spark_df.show(2, truncate=False)" ] }, { "cell_type": "markdown", "id": "26b4d38e", "metadata": {}, "source": [ "#### Read Entire folder of Parquet files (that were downloaded earlier)" ] }, { "cell_type": "code", "execution_count": 38, "id": "987fa0b6", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Total Number of Recrods :: 81407439\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "|VendorID|lpep_pickup_datetime|lpep_dropoff_datetime|store_and_fwd_flag|RatecodeID|PULocationID|DOLocationID|passenger_count|trip_distance|fare_amount|extra|mta_tax|tip_amount|tolls_amount|ehail_fee|improvement_surcharge|total_amount|payment_type|trip_type|congestion_surcharge|\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "|2 |2015-05-01 00:24:18 |2015-05-01 00:24:39 |N |1 |146 |146 |1 |0.0 |2.5 |0.5 |0.5 |0.0 |0.0 |null |0.3 |3.8 |2 |1.0 |null |\n", "|2 |2015-05-01 00:28:15 |2015-05-01 00:29:00 |N |1 |146 |146 |1 |0.0 |2.5 |0.5 |0.5 |0.0 |0.0 |null |0.3 |3.8 |2 |1.0 |null |\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "only showing top 2 rows" ] } ], "source": [ "%%spark\n", "\n", "spark.conf.set(\"spark.sql.repl.eagerEval.enabled\", True)\n", "\n", "folder_path = '/tmp/my_files/green_nyc_tripdata'\n", "\n", "spark_df1 = spark.read.parquet(f'file://{folder_path}/')\n", "record_count = spark_df1.count()\n", "\n", "print(f'Total Number of Recrods :: {record_count}')\n", "\n", "spark_df1.show(2, truncate=False)" ] }, { "cell_type": "markdown", "id": "d904e062", "metadata": {}, "source": [ "#### Read files directly from S3 into a Dataframe" ] }, { "cell_type": "code", "execution_count": 39, "id": "b01d8660", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Total Number of Recrods :: 81407439\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "|VendorID|lpep_pickup_datetime|lpep_dropoff_datetime|store_and_fwd_flag|RatecodeID|PULocationID|DOLocationID|passenger_count|trip_distance|fare_amount|extra|mta_tax|tip_amount|tolls_amount|ehail_fee|improvement_surcharge|total_amount|payment_type|trip_type|congestion_surcharge|\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "|2 |2015-05-01 00:24:18 |2015-05-01 00:24:39 |N |1 |146 |146 |1 |0.0 |2.5 |0.5 |0.5 |0.0 |0.0 |null |0.3 |3.8 |2 |1.0 |null |\n", "|2 |2015-05-01 00:28:15 |2015-05-01 00:29:00 |N |1 |146 |146 |1 |0.0 |2.5 |0.5 |0.5 |0.0 |0.0 |null |0.3 |3.8 |2 |1.0 |null |\n", "+--------+--------------------+---------------------+------------------+----------+------------+------------+---------------+-------------+-----------+-----+-------+----------+------------+---------+---------------------+------------+------------+---------+--------------------+\n", "only showing top 2 rows" ] } ], "source": [ "%%spark\n", "\n", "spark.conf.set(\"spark.sql.repl.eagerEval.enabled\", True)\n", "\n", "bucket_name = 'syntasa-gov-sandbox-01'\n", "object_prefix = 'other/sample-data/geomesa_dataset/green_nyc_tripdata'\n", "obj_in_s3_full_path = f's3a://{bucket_name}/{object_prefix}/'\n", "\n", "spark_df2 = spark.read.parquet(obj_in_s3_full_path)\n", "record_count = spark_df2.count()\n", "\n", "print(f'Total Number of Recrods :: {record_count}')\n", "\n", "spark_df2.show(2, truncate=False)" ] }, { "cell_type": "markdown", "id": "b06a09b3", "metadata": {}, "source": [ "# Reading Files into a Pandas DF " ] }, { "cell_type": "markdown", "id": "055af8c8", "metadata": {}, "source": [ "#### Before we begin, lets install some libraries on the cluster. We need Pandas, but we'll also install python3-devel and python3-tools from yum so that our cluster has the latest gcc and other binaries (these are not needed for pandas specifically, but they are needed for other libraries such as pycocotools etc..)" ] }, { "cell_type": "code", "execution_count": 56, "id": "e9bef09d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Running Command [sudo yum install python3-devel python3-tools -y]\n", "executing command on 10.0.1.181\n", "Loaded plugins: extras_suggestions, langpacks, priorities, update-motd\n", "10 packages excluded due to repository priority protections\n", "Package python3-devel-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Package python3-tools-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Nothing to do\n", "executing command on 10.0.1.17\n", "Loaded plugins: extras_suggestions, langpacks, priorities, update-motd\n", "10 packages excluded due to repository priority protections\n", "Package python3-devel-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Package python3-tools-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Nothing to do\n", "executing command on 10.0.1.154\n", "Loaded plugins: extras_suggestions, langpacks, priorities, update-motd\n", "10 packages excluded due to repository priority protections\n", "Package python3-devel-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Package python3-tools-3.7.10-1.amzn2.0.1.x86_64 already installed and latest version\n", "Nothing to do\n", "Running Command [sudo python3 -m pip install pandas]\n", "executing command on 10.0.1.181\n", "Requirement already satisfied: pandas in /usr/local/lib64/python3.7/site-packages (1.3.5)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2020.1)\n", "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib64/python3.7/site-packages (from pandas) (1.21.6)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.13.0)\n", "executing command on 10.0.1.17\n", "Requirement already satisfied: pandas in /usr/local/lib64/python3.7/site-packages (1.3.5)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2020.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib64/python3.7/site-packages (from pandas) (1.21.6)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.13.0)\n", "executing command on 10.0.1.154\n", "Requirement already satisfied: pandas in /usr/local/lib64/python3.7/site-packages (1.3.5)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2020.1)\n", "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib64/python3.7/site-packages (from pandas) (1.21.6)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.13.0)" ] } ], "source": [ "%%spark\n", "\n", "# Lets install some custom packages (we will install python3 tools as well as pandas)\n", "custom_pkg_install(['python3-devel','python3-tools'], TYPE_YUM, INSTALL)\n", "custom_pkg_install(['pandas'], TYPE_PIP, INSTALL)" ] }, { "cell_type": "markdown", "id": "bfc7b0d5", "metadata": {}, "source": [ "#### Now lets convert our above dataframe to a pandas dataframe" ] }, { "cell_type": "code", "execution_count": 55, "id": "5f19f675", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " hey ... pandas\n", "0 1052048178\\t20210701\\t202107\\t2021\\t2021.4959\\... ... United States\\tUS\\tUSIN\\t39.8647\\t-86.2604\\tI...\n", "1 1052048179\\t20210701\\t202107\\t2021\\t2021.4959\\... ... None\n", "2 1052048180\\t20210701\\t202107\\t2021\\t2021.4959\\... ... None\n", "3 1052048181\\t20210701\\t202107\\t2021\\t2021.4959\\... ... None\n", "4 1052048182\\t20210701\\t202107\\t2021\\t2021.4959\\... ... None\n", "... ... ... ...\n", "109874 1052215348\\t20220701\\t202207\\t2022\\t2022.4959\\... ... United States\\tUS\\tUSNY\\t42.1497\\t-74.9384\\tN...\n", "109875 1052215349\\t20220701\\t202207\\t2022\\t2022.4959\\... ... Republic Of\\tVM\\tVM\\t16.166667\\t107.833333\\tV...\n", "109876 1052215350\\t20220701\\t202207\\t2022\\t2022.4959\\... ... None\n", "109877 1052215351\\t20220701\\t202207\\t2022\\t2022.4959\\... ... None\n", "109878 1052215352\\t20220701\\t202207\\t2022\\t2022.4959\\... ... Ontario\n", "\n", "[109879 rows x 4 columns]" ] } ], "source": [ "%%spark\n", "\n", "import pandas as pd\n", "from pyspark.sql.functions import col\n", "\n", "# Set the maxResultSize (this is not needed for smaller datasets)\n", "spark.sparkContext._conf.set(\"spark.driver.maxResultSize\", \"2g\")\n", "\n", "pandasDF = spark_df.toPandas()\n", "\n", "# Do some cool stuff with Pandas here.\n", "pandasDF.columns = ['hey', 'this', 'is', 'pandas']\n", "pandasDF\n" ] }, { "cell_type": "code", "execution_count": null, "id": "21ec5a61", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Syntasa Kernel", "language": "python", "name": "syntasa_kernel" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "syn_metadata": { "spark_lang_type": "python" } }, "nbformat": 4, "nbformat_minor": 5 }