{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8cd2849d-b5cd-4c4b-bc93-70a603fd75cc", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "id": "9134bbf2-7c77-4cb8-958c-bbc72db15d51", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ArnoudvanBree\\AppData\\Local\\Temp\\ipykernel_10280\\4158708717.py:1: DtypeWarning: Columns (3,7,17,22,25,26,28,32,34,35,37,39,41,43,44,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,94) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv('usgs_mrds_clean.csv')\n" ] } ], "source": [ "df = pd.read_csv('usgs_mrds_clean.csv')" ] }, { "cell_type": "code", "execution_count": 4, "id": "c5c0a9a5-f577-412d-9915-90b345f61798", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dep idurlmrds idmas idsite namelatitudelongituderegioncountrystate...leadgoldEU CRMall applicationsapplicationsmelter or refineryregion cleandata sourcedata source urldata source year
010009306https://mrdata.usgs.gov/mrds/show-mrds.php?dep...D002208NaNHorse Mountain39.69999-106.20061NaNUnited StatesColorado...FalseFalseFalse[]NaNFalseNaNUSGS Mineral Resources Data Systemhttps://mrdata.usgs.gov/mrds/2011
110021322https://mrdata.usgs.gov/mrds/show-mrds.php?dep...DC15645NaNSquaw Peak40.29026-111.60030NaNUnited StatesUtah...TrueFalseFalse['Nuclear energy', 'Solar PV', 'Electricity gr...Nuclear energyFalseNaNUSGS Mineral Resources Data Systemhttps://mrdata.usgs.gov/mrds/2011
210021561https://mrdata.usgs.gov/mrds/show-mrds.php?dep...DE00236NaNTronoh Mine4.77001101.10923ASMalaysiaPerak...FalseFalseFalse['Nuclear energy', 'Solar PV', 'Aircraft (figh...Nuclear energyFalseNaNUSGS Mineral Resources Data Systemhttps://mrdata.usgs.gov/mrds/2011
310022784https://mrdata.usgs.gov/mrds/show-mrds.php?dep...FS00252NaNHy Grade Placer45.09213-110.78471NaNUnited StatesMontana...FalseTrueFalse['Aircraft (fighter, transport, maritime patro...NaNFalseNaNUSGS Mineral Resources Data Systemhttps://mrdata.usgs.gov/mrds/2011
410028178https://mrdata.usgs.gov/mrds/show-mrds.php?dep...M005024NaNE. A. Culver38.31935-120.72045NaNUnited StatesCalifornia...FalseTrueFalse['Aircraft (fighter, transport, maritime patro...NaNFalseNaNUSGS Mineral Resources Data Systemhttps://mrdata.usgs.gov/mrds/2011
\n", "

5 rows × 98 columns

\n", "
" ], "text/plain": [ " dep id url mrds id \\\n", "0 10009306 https://mrdata.usgs.gov/mrds/show-mrds.php?dep... D002208 \n", "1 10021322 https://mrdata.usgs.gov/mrds/show-mrds.php?dep... DC15645 \n", "2 10021561 https://mrdata.usgs.gov/mrds/show-mrds.php?dep... DE00236 \n", "3 10022784 https://mrdata.usgs.gov/mrds/show-mrds.php?dep... FS00252 \n", "4 10028178 https://mrdata.usgs.gov/mrds/show-mrds.php?dep... M005024 \n", "\n", " mas id site name latitude longitude region country \\\n", "0 NaN Horse Mountain 39.69999 -106.20061 NaN United States \n", "1 NaN Squaw Peak 40.29026 -111.60030 NaN United States \n", "2 NaN Tronoh Mine 4.77001 101.10923 AS Malaysia \n", "3 NaN Hy Grade Placer 45.09213 -110.78471 NaN United States \n", "4 NaN E. A. Culver 38.31935 -120.72045 NaN United States \n", "\n", " state ... lead gold EU CRM \\\n", "0 Colorado ... False False False \n", "1 Utah ... True False False \n", "2 Perak ... False False False \n", "3 Montana ... False True False \n", "4 California ... False True False \n", "\n", " all applications application \\\n", "0 [] NaN \n", "1 ['Nuclear energy', 'Solar PV', 'Electricity gr... Nuclear energy \n", "2 ['Nuclear energy', 'Solar PV', 'Aircraft (figh... Nuclear energy \n", "3 ['Aircraft (fighter, transport, maritime patro... NaN \n", "4 ['Aircraft (fighter, transport, maritime patro... NaN \n", "\n", " smelter or refinery region clean data source \\\n", "0 False NaN USGS Mineral Resources Data System \n", "1 False NaN USGS Mineral Resources Data System \n", "2 False NaN USGS Mineral Resources Data System \n", "3 False NaN USGS Mineral Resources Data System \n", "4 False NaN USGS Mineral Resources Data System \n", "\n", " data source url data source year \n", "0 https://mrdata.usgs.gov/mrds/ 2011 \n", "1 https://mrdata.usgs.gov/mrds/ 2011 \n", "2 https://mrdata.usgs.gov/mrds/ 2011 \n", "3 https://mrdata.usgs.gov/mrds/ 2011 \n", "4 https://mrdata.usgs.gov/mrds/ 2011 \n", "\n", "[5 rows x 98 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "2d519f38-9fdf-445f-865b-6467a5baa943", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "98" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(list(df.columns))" ] }, { "cell_type": "code", "execution_count": 11, "id": "132aeced-0f90-4de4-8d68-d17ee866b181", "metadata": {}, "outputs": [], "source": [ "column_names = [col.lower().replace(' ','_') for col in list(df.columns)]" ] }, { "cell_type": "code", "execution_count": 8, "id": "8e115500-40d0-4976-861d-c8a9099ca83d", "metadata": {}, "outputs": [], "source": [ "def get_CREATE_TABLE_query(schema_name,table_name,column_names,SQL_datatypes):\n", " '''\n", " This function creates a SQL CREATE TABLE query based on the provided list of column names and list of datatypes.\n", " \n", " Arguments:\n", " - schema_name (str): schema of the new table.\n", " - table_name (str): name of the new table.\n", " - column_names (list): list of column names.\n", " - SQL_datatypes (list): list of PostgresSQL data types for the columns. \n", " Returns:\n", " - sql_command (str): the SQL command that generates the table.\n", " '''\n", " \n", " sql_command = 'CREATE TABLE IF NOT EXISTS ' + str(schema_name) + '.' + str(table_name) + '\\n' + '(' + '\\n'\n", " \n", " nr_columns = len(column_names)\n", " \n", " for (idx,column_name),datatype in zip(enumerate(column_names),SQL_datatypes):\n", " \n", " if idx < nr_columns - 1:\n", " sql_command += ' ' + column_name + ' ' + datatype + ',' + '\\n'\n", " else:\n", " #leave out ',' in last line\n", " sql_command += ' ' + column_name + ' ' + datatype + '\\n'\n", " \n", " sql_command += ');'\n", " \n", " return(sql_command)" ] }, { "cell_type": "code", "execution_count": 13, "id": "530664d4-81c5-4f19-aedf-66d8fd91a0b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CREATE TABLE IF NOT EXISTS minerals.usgs_mrds_clean\n", "(\n", " dep_id text,\n", " url text,\n", " mrds_id text,\n", " mas_id text,\n", " site_name text,\n", " latitude text,\n", " longitude text,\n", " region text,\n", " country text,\n", " state text,\n", " county text,\n", " com_type text,\n", " commod1 text,\n", " commod2 text,\n", " commod3 text,\n", " oper_type text,\n", " dep_type text,\n", " prod_size text,\n", " dev_stat text,\n", " ore text,\n", " gangue text,\n", " other_matl text,\n", " orebody_fm text,\n", " work_type text,\n", " model text,\n", " alteration text,\n", " conc_proc text,\n", " names text,\n", " ore_ctrl text,\n", " reporter text,\n", " hrock_unit text,\n", " hrock_type text,\n", " arock_unit text,\n", " arock_type text,\n", " structure text,\n", " tectonic text,\n", " ref text,\n", " yfp_ba text,\n", " yr_fst_prd text,\n", " ylp_ba text,\n", " yr_lst_prd text,\n", " dy_ba text,\n", " disc_yr text,\n", " prod_yrs text,\n", " discr text,\n", " score text,\n", " geometry text,\n", " all_commodities text,\n", " antimony text,\n", " baryte text,\n", " beryllium text,\n", " bismuth text,\n", " borates text,\n", " cobalt text,\n", " fluorspar text,\n", " gallium text,\n", " germanium text,\n", " hafnium text,\n", " ree text,\n", " indium text,\n", " lithium text,\n", " magnesium text,\n", " natural_graphite text,\n", " niobium text,\n", " pgm text,\n", " phosphorus text,\n", " scandium text,\n", " silicon_metal text,\n", " tantalum text,\n", " titanium text,\n", " vanadium text,\n", " tungsten text,\n", " strontium text,\n", " uranium text,\n", " molybdenum text,\n", " manganese text,\n", " tin text,\n", " chromium text,\n", " zirconium text,\n", " silver text,\n", " aluminum text,\n", " tellurium text,\n", " nickel text,\n", " iron text,\n", " selenium text,\n", " zinc text,\n", " cadmium text,\n", " copper text,\n", " lead text,\n", " gold text,\n", " eu_crm text,\n", " all_applications text,\n", " application text,\n", " smelter_or_refinery text,\n", " region_clean text,\n", " data_source text,\n", " data_source_url text,\n", " data_source_year text\n", ");\n" ] } ], "source": [ "print(get_CREATE_TABLE_query(schema_name='minerals',table_name='usgs_mrds_clean',column_names=column_names,SQL_datatypes=98*['text',]))" ] }, { "cell_type": "code", "execution_count": null, "id": "abbb5793-2a3e-4232-85ba-0c894bdb48eb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }