import pandas as pd
import numpy as np
import plotly.graph_objects as go
import glob
import os

import pycountry
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from collections import Counter


#!pip install pandas
#!pip install numpy
#!pip install bs4
#!pip install plotly
#!pip install pywidgets
#!pip install requests
#!pip install fastparquet
#!pip install pycountry
#!pip install nltk

foo = 37   # The true answer to life, the universe, and just everything! At least nature seems to think so!

print(f'stuff {variable or code}')


foo = 37
notfoo = 42
print(f'The closer to true answer to Life, the Universe, and just Everything is {37} and not {notfoo}!')

The closer to true answer to Life, the Universe, and just Everything is 37 and not 42!

import os


if not os.path.exists('Raw_Data'):
    os.makedirs('Raw_Data')

os.makedirs('Data', exist_ok=True)


url = 'https://github.com/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/'

soup = BeautifulSoup(requests.get(url).content, "html.parser")

files = []
for link in soup.select('a[href*=".xls"]'):
    file = link['href']
    files.append(file)
    
files

['/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_01.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_02.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_03.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_04.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_05.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_06.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_07.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_08.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_09.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_10.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_11.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_12.xls',
 '/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/blob/main/ClimateAndArt_13.xls']


test = files[0]
test
test = test.split('/')
test
test[-1]

'ClimateAndArt_01.xls'

file = file.split('/')[-1]


url = 'https://github.com/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/'

soup = BeautifulSoup(requests.get(url).content, "html.parser")

files = []
for link in soup.select('a[href*=".xls"]'):
    file = link['href']
    file = file.split('/')[-1]
    files.append(file)
    
files

['ClimateAndArt_01.xls',
 'ClimateAndArt_02.xls',
 'ClimateAndArt_03.xls',
 'ClimateAndArt_04.xls',
 'ClimateAndArt_05.xls',
 'ClimateAndArt_06.xls',
 'ClimateAndArt_07.xls',
 'ClimateAndArt_08.xls',
 'ClimateAndArt_09.xls',
 'ClimateAndArt_10.xls',
 'ClimateAndArt_11.xls',
 'ClimateAndArt_12.xls',
 'ClimateAndArt_13.xls']


url = 'https://github.com/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023'

soup = BeautifulSoup(requests.get(url).content, "html.parser")

files = []
for link in soup.select('a[href*=".xls"]'):
    file = link['href']
    file = file.split('/')[-1]
    files.append(file)
    
files

['ClimateAndArt_01.xls',
 'ClimateAndArt_02.xls',
 'ClimateAndArt_03.xls',
 'ClimateAndArt_04.xls',
 'ClimateAndArt_05.xls',
 'ClimateAndArt_06.xls',
 'ClimateAndArt_07.xls',
 'ClimateAndArt_08.xls',
 'ClimateAndArt_09.xls',
 'ClimateAndArt_10.xls',
 'ClimateAndArt_11.xls',
 'ClimateAndArt_12.xls',
 'ClimateAndArt_13.xls']


url = 'https://github.com/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023/'

for file in files:
    url_file = (url+'/raw/main/'+file)
    req = requests.get(url_file)
    fName = os.path.join('Raw_data', file)
    open(fName, 'wb').write(req.content)


#url = 'https://github.com/auroraTDunn/CRDDS_Data_Bootcamp_Jan_2023'
'''
for file in files:
    url_file = (url+'/raw/main/'+file)
    data = requests.get(url_file).content
    df = pd.read_excel(BytesIO(data))
    fName = os.path.join('Raw_data', file)
    df.to_excel(fName, index=False)
'''

"\nfor file in files:\n    url_file = (url+'/raw/main/'+file)\n    data = requests.get(url_file).content\n    df = pd.read_excel(BytesIO(data))\n    fName = os.path.join('Raw_data', file)\n    df.to_excel(fName, index=False)\n"


# Create a path to the data files from where our Notebook is at
path = os.path.join("Raw_data","*.xls")


# Create an empty DataFrame
df = pd.DataFrame([])

# Loop through each Excel in the Raw_data folder
#  read it into a temporary DataFrame
#  then concatenate the file to our DataFrame
for files in glob.glob(path):
    print(f'file name: {files}')
    df_temp = pd.read_excel(files)
    df = pd.concat([df, df_temp], ignore_index=True)

file name: Raw_data\ClimateAndArt_01.xls
file name: Raw_data\ClimateAndArt_02.xls
file name: Raw_data\ClimateAndArt_03.xls
file name: Raw_data\ClimateAndArt_04.xls
file name: Raw_data\ClimateAndArt_05.xls
file name: Raw_data\ClimateAndArt_06.xls
file name: Raw_data\ClimateAndArt_07.xls
file name: Raw_data\ClimateAndArt_08.xls
file name: Raw_data\ClimateAndArt_09.xls
file name: Raw_data\ClimateAndArt_10.xls
file name: Raw_data\ClimateAndArt_11.xls
file name: Raw_data\ClimateAndArt_12.xls
file name: Raw_data\ClimateAndArt_13.xls


# Create a list of DataFrames for each Excel file in the "path" data directory
df_temp = (pd.read_excel(f) for f in glob.glob(path))

# Now concat the list of DataFrames together.
df = pd.concat(df_temp, ignore_index=True)


df_temp = (pd.read_excel(f) for f in glob.glob(os.path.join("Raw_data","*.xls")))
df = pd.concat(df_temp, ignore_index=True)


# Name of our new directory
proc_dir = 'Processed_data'

# If it does not already exists then create it
if not os.path.exists(proc_dir):
   os.makedirs(proc_dir)


#df.to_csv(os.path.join(proc_dir, 'Combined_lists.csv'), index=False)
df.to_csv('Processed_data/Combined_lists.csv', index=False)

df


df.head()


df.head(10)


df.tail()


df.tail(10)


df[100:101]


df[100:115:5]


df['Article Title']

0              Representing nature: art and climate change
1        RED CROSS/RED CRESCENT CLIMATE CENTRE AND BALA...
2        The Contribution of Art to Climate Change Comm...
3        Climate change, adaptation and Eco-Art in Sing...
4        Facing climate injustices: Community trust-bui...
                               ...                        
12681            Flora of Russia on iNaturalist: a dataset
12682    Review of Ophiocara (Teleostei: Butidae) from ...
12683              TRY - a global database of plant traits
12684    The formation of human populations in South an...
12685    Biotic threats for 23 major non-native tree sp...
Name: Article Title, Length: 12686, dtype: object


df['Article Title'].head(3)

0          Representing nature: art and climate change
1    RED CROSS/RED CRESCENT CLIMATE CENTRE AND BALA...
2    The Contribution of Art to Climate Change Comm...
Name: Article Title, dtype: object


df['Article Title'][100:105]

100    Conceptual deconstruction of the simulated pre...
101            Art for a Future Planet Beyond Apocalypse
102    Assessing climate model projections: State of ...
103    Climate consensus: A multilevel study testing ...
104    Art, climate change and (other) eco materials:...
Name: Article Title, dtype: object


df['Abstract'].head(3)

0    Climate change is now an established scientifi...
1    The art! (sic) climate contest used art as a c...
2    This article examines how and why the climate ...
Name: Abstract, dtype: object


df.shape

(12686, 72)


df.shape[0]

12686


df.shape[1]

72


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12686 entries, 0 to 12685
Data columns (total 72 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Publication Type            12686 non-null  object 
 1   Authors                     12681 non-null  object 
 2   Book Authors                58 non-null     object 
 3   Book Editors                1244 non-null   object 
 4   Book Group Authors          309 non-null    object 
 5   Author Full Names           12681 non-null  object 
 6   Book Author Full Names      58 non-null     object 
 7   Group Authors               28 non-null     object 
 8   Article Title               12686 non-null  object 
 9   Source Title                12686 non-null  object 
 10  Book Series Title           1182 non-null   object 
 11  Book Series Subtitle        0 non-null      float64
 12  Language                    12686 non-null  object 
 13  Document Type               12686 non-null  object 
 14  Conference Title            1518 non-null   object 
 15  Conference Date             1518 non-null   object 
 16  Conference Location         1518 non-null   object 
 17  Conference Sponsor          1147 non-null   object 
 18  Conference Host             256 non-null    object 
 19  Author Keywords             8945 non-null   object 
 20  Keywords Plus               10357 non-null  object 
 21  Abstract                    12356 non-null  object 
 22  Addresses                   12402 non-null  object 
 23  Affiliations                11887 non-null  object 
 24  Reprint Addresses           12388 non-null  object 
 25  Email Addresses             11236 non-null  object 
 26  Researcher Ids              7693 non-null   object 
 27  ORCIDs                      9244 non-null   object 
 28  Funding Orgs                7732 non-null   object 
 29  Funding Name Preferred      7714 non-null   object 
 30  Funding Text                7607 non-null   object 
 31  Cited References            0 non-null      float64
 32  Cited Reference Count       12686 non-null  int64  
 33  Times Cited, WoS Core       12686 non-null  int64  
 34  Times Cited, All Databases  12686 non-null  int64  
 35  180 Day Usage Count         12686 non-null  int64  
 36  Since 2013 Usage Count      12686 non-null  int64  
 37  Publisher                   12686 non-null  object 
 38  Publisher City              12686 non-null  object 
 39  Publisher Address           12686 non-null  object 
 40  ISSN                        10955 non-null  object 
 41  eISSN                       8921 non-null   object 
 42  ISBN                        1321 non-null   object 
 43  Journal Abbreviation        12253 non-null  object 
 44  Journal ISO Abbreviation    11165 non-null  object 
 45  Publication Date            9661 non-null   object 
 46  Publication Year            12527 non-null  float64
 47  Volume                      11501 non-null  object 
 48  Issue                       8053 non-null   object 
 49  Part Number                 181 non-null    object 
 50  Supplement                  77 non-null     object 
 51  Special Issue               671 non-null    object 
 52  Meeting Abstract            1 non-null      object 
 53  Start Page                  8959 non-null   object 
 54  End Page                    8959 non-null   object 
 55  Article Number              3559 non-null   object 
 56  DOI                         11473 non-null  object 
 57  DOI Link                    11473 non-null  float64
 58  Book DOI                    197 non-null    object 
 59  Early Access Date           1249 non-null   object 
 60  Number of Pages             12686 non-null  int64  
 61  WoS Categories              12683 non-null  object 
 62  Web of Science Index        12686 non-null  object 
 63  Research Areas              12683 non-null  object 
 64  IDS Number                  12686 non-null  object 
 65  Pubmed Id                   1525 non-null   float64
 66  Open Access Designations    6455 non-null   object 
 67  Highly Cited Status         317 non-null    object 
 68  Hot Paper Status            317 non-null    object 
 69  Date of Export              12686 non-null  object 
 70  UT (Unique WOS ID)          12686 non-null  object 
 71  Web of Science Record       12686 non-null  int64  
dtypes: float64(5), int64(7), object(60)
memory usage: 7.0+ MB


df.describe()


df['Number of Pages'].describe()

count    12686.000000
mean        15.629040
std         10.831938
min          0.000000
25%         10.000000
50%         14.000000
75%         19.000000
max        359.000000
Name: Number of Pages, dtype: float64


# List of all column names
header_names = df.columns.values.tolist()


header_names

['Publication Type',
 'Authors',
 'Book Authors',
 'Book Editors',
 'Book Group Authors',
 'Author Full Names',
 'Book Author Full Names',
 'Group Authors',
 'Article Title',
 'Source Title',
 'Book Series Title',
 'Book Series Subtitle',
 'Language',
 'Document Type',
 'Conference Title',
 'Conference Date',
 'Conference Location',
 'Conference Sponsor',
 'Conference Host',
 'Author Keywords',
 'Keywords Plus',
 'Abstract',
 'Addresses',
 'Affiliations',
 'Reprint Addresses',
 'Email Addresses',
 'Researcher Ids',
 'ORCIDs',
 'Funding Orgs',
 'Funding Name Preferred',
 'Funding Text',
 'Cited References',
 'Cited Reference Count',
 'Times Cited, WoS Core',
 'Times Cited, All Databases',
 '180 Day Usage Count',
 'Since 2013 Usage Count',
 'Publisher',
 'Publisher City',
 'Publisher Address',
 'ISSN',
 'eISSN',
 'ISBN',
 'Journal Abbreviation',
 'Journal ISO Abbreviation',
 'Publication Date',
 'Publication Year',
 'Volume',
 'Issue',
 'Part Number',
 'Supplement',
 'Special Issue',
 'Meeting Abstract',
 'Start Page',
 'End Page',
 'Article Number',
 'DOI',
 'DOI Link',
 'Book DOI',
 'Early Access Date',
 'Number of Pages',
 'WoS Categories',
 'Web of Science Index',
 'Research Areas',
 'IDS Number',
 'Pubmed Id',
 'Open Access Designations',
 'Highly Cited Status',
 'Hot Paper Status',
 'Date of Export',
 'UT (Unique WOS ID)',
 'Web of Science Record']


keeper_cols = [
    'Publication Type',
    'Authors',
    'Author Full Names',
    'Article Title',
    'Source Title',
    'Book Series Title',
    'Book Series Subtitle',
    'Language',
    'Document Type',
    'Conference Title',
    'Conference Date',
    'Conference Location',
    'Author Keywords',
    'Keywords Plus',
    'Abstract',
    'Addresses', 
    'Affiliations',
    'Email Addresses',
    'Funding Orgs',	
    'Funding Name Preferred',
    'Cited Reference Count',
    'Times Cited, WoS Core',
    'Times Cited, All Databases',
    '180 Day Usage Count',
    'Since 2013 Usage Count',
    'Publisher',
    'Publisher City',
    'Journal Abbreviation',
    'Journal ISO Abbreviation',
    'Publication Date',
    'Publication Year',
    'Volume',
    'Issue',
    'Start Page',
    'DOI',
    'Number of Pages',
    'WoS Categories',
    'Web of Science Index',
    'Research Areas']

df_keepers = df[keeper_cols]


df_keepers.head()


df_keepers.shape

(12686, 39)


delete_cols = [
    'Book Authors',
    'Book Editors',
    'Book Group Authors',
    'Book Author Full Names',
    'Group Authors',
    'Conference Sponsor',
    'Conference Host',
    'Reprint Addresses',
    'Researcher Ids',
    'ORCIDs',
    'Funding Text',
    'Cited References',
    'Publisher Address',
    'ISSN',
    'eISSN',
    'ISBN',
    'Part Number',
    'Supplement',
    'Special Issue',
    'Meeting Abstract',
    'End Page',
    'Article Number',
    'DOI Link',
    'Book DOI',
    'Early Access Date',
    'IDS Number',
    'Pubmed Id',
    'Open Access Designations',
    'Highly Cited Status',
    'Hot Paper Status',
    'Date of Export',
    'UT (Unique WOS ID)',
    'Web of Science Record']
    
df.drop(delete_cols, axis=1, inplace=True)


df.head()


df['Publication Type'].unique()

array(['J', 'C', 'S', 'B'], dtype=object)


df['Publication Type'].unique()

array(['J', 'C', 'S', 'B'], dtype=object)


pubtype = {'B':'Book', 'J':'Journal', 'P':'Patent', 'S':'Series'}

myvalue = dict_name.['desired_key_name']


for key in pubtype:
    print(f'key: {key}    value: {pubtype[key]}') # This is here just for show and tell!
    df['Publication Type'].replace(key, pubtype[key], inplace=True)

key: B    value: Book
key: J    value: Journal
key: P    value: Patent
key: S    value: Series


df['Publication Type'].unique()

array(['Journal', 'C', 'Series', 'Book'], dtype=object)


df['Publication Type'].value_counts()

Journal    11057
C           1269
Series       196
Book         164
Name: Publication Type, dtype: int64


proc_dir = 'Processed_data'

df.to_csv(os.path.join(proc_dir, 'Combined_lists.csv'), index=False)


proc_dir = 'Processed_data'
df = pd.read_csv(os.path.join(proc_dir, 'Combined_lists.csv'))


#df.head()


df['Addresses'].head(8)

0    Univ Plymouth, Sch Art & Media, Plymouth PL4 8...
1    [Dal Farra, Ricardo] Concordia Univ, Hexagram,...
2       [Chen, Mei-Hsin] Univ Navarra, Pamplona, Spain
3    [Guy, Simon] Univ Manchester, Sch Environm & D...
4    [Baztan, Juan; Vanderlinden, Jean-Paul; Zhu, Z...
5    [Burke, Miriam] Royal Holloway Univ London, De...
6    [Roedder, Simone] Univ Hamburg, Inst Sociol, D...
7    [Bentz, Julia] Univ Lisbon, Fac Sci, Ctr Ecol ...
Name: Addresses, dtype: object


df['Addresses'][0]

'Univ Plymouth, Sch Art & Media, Plymouth PL4 8AA, Devon, England'


df['Addresses'][118]

'[Mauritsen, Thorsten] Max Planck Inst Meteorol, D-20146 Hamburg, Germany; [Graversen, Rune G.] Stockholm Univ, Dept Meteorol, S-10691 Stockholm, Sweden; [Klocke, Daniel] European Ctr Medium Range Weather Forecasts, Reading RG2 9AX, Berks, England; [Langen, Peter L.; Stevens, Bjorn; Tomassini, Lorenzo] DMI, Copenhagen, Denmark'


for c in pycountry.countries:
    print(c)

Country(alpha_2='AW', alpha_3='ABW', name='Aruba', numeric='533')
Country(alpha_2='AF', alpha_3='AFG', name='Afghanistan', numeric='004', official_name='Islamic Republic of Afghanistan')
Country(alpha_2='AO', alpha_3='AGO', name='Angola', numeric='024', official_name='Republic of Angola')
Country(alpha_2='AI', alpha_3='AIA', name='Anguilla', numeric='660')
Country(alpha_2='AX', alpha_3='ALA', name='Åland Islands', numeric='248')
Country(alpha_2='AL', alpha_3='ALB', name='Albania', numeric='008', official_name='Republic of Albania')
Country(alpha_2='AD', alpha_3='AND', name='Andorra', numeric='020', official_name='Principality of Andorra')
Country(alpha_2='AE', alpha_3='ARE', name='United Arab Emirates', numeric='784')
Country(alpha_2='AR', alpha_3='ARG', name='Argentina', numeric='032', official_name='Argentine Republic')
Country(alpha_2='AM', alpha_3='ARM', name='Armenia', numeric='051', official_name='Republic of Armenia')
Country(alpha_2='AS', alpha_3='ASM', name='American Samoa', numeric='016')
Country(alpha_2='AQ', alpha_3='ATA', name='Antarctica', numeric='010')
Country(alpha_2='TF', alpha_3='ATF', name='French Southern Territories', numeric='260')
Country(alpha_2='AG', alpha_3='ATG', name='Antigua and Barbuda', numeric='028')
Country(alpha_2='AU', alpha_3='AUS', name='Australia', numeric='036')
Country(alpha_2='AT', alpha_3='AUT', name='Austria', numeric='040', official_name='Republic of Austria')
Country(alpha_2='AZ', alpha_3='AZE', name='Azerbaijan', numeric='031', official_name='Republic of Azerbaijan')
Country(alpha_2='BI', alpha_3='BDI', name='Burundi', numeric='108', official_name='Republic of Burundi')
Country(alpha_2='BE', alpha_3='BEL', name='Belgium', numeric='056', official_name='Kingdom of Belgium')
Country(alpha_2='BJ', alpha_3='BEN', name='Benin', numeric='204', official_name='Republic of Benin')
Country(alpha_2='BQ', alpha_3='BES', name='Bonaire, Sint Eustatius and Saba', numeric='535', official_name='Bonaire, Sint Eustatius and Saba')
Country(alpha_2='BF', alpha_3='BFA', name='Burkina Faso', numeric='854')
Country(alpha_2='BD', alpha_3='BGD', name='Bangladesh', numeric='050', official_name="People's Republic of Bangladesh")
Country(alpha_2='BG', alpha_3='BGR', name='Bulgaria', numeric='100', official_name='Republic of Bulgaria')
Country(alpha_2='BH', alpha_3='BHR', name='Bahrain', numeric='048', official_name='Kingdom of Bahrain')
Country(alpha_2='BS', alpha_3='BHS', name='Bahamas', numeric='044', official_name='Commonwealth of the Bahamas')
Country(alpha_2='BA', alpha_3='BIH', name='Bosnia and Herzegovina', numeric='070', official_name='Republic of Bosnia and Herzegovina')
Country(alpha_2='BL', alpha_3='BLM', name='Saint Barthélemy', numeric='652')
Country(alpha_2='BY', alpha_3='BLR', name='Belarus', numeric='112', official_name='Republic of Belarus')
Country(alpha_2='BZ', alpha_3='BLZ', name='Belize', numeric='084')
Country(alpha_2='BM', alpha_3='BMU', name='Bermuda', numeric='060')
Country(alpha_2='BO', alpha_3='BOL', common_name='Bolivia', name='Bolivia, Plurinational State of', numeric='068', official_name='Plurinational State of Bolivia')
Country(alpha_2='BR', alpha_3='BRA', name='Brazil', numeric='076', official_name='Federative Republic of Brazil')
Country(alpha_2='BB', alpha_3='BRB', name='Barbados', numeric='052')
Country(alpha_2='BN', alpha_3='BRN', name='Brunei Darussalam', numeric='096')
Country(alpha_2='BT', alpha_3='BTN', name='Bhutan', numeric='064', official_name='Kingdom of Bhutan')
Country(alpha_2='BV', alpha_3='BVT', name='Bouvet Island', numeric='074')
Country(alpha_2='BW', alpha_3='BWA', name='Botswana', numeric='072', official_name='Republic of Botswana')
Country(alpha_2='CF', alpha_3='CAF', name='Central African Republic', numeric='140')
Country(alpha_2='CA', alpha_3='CAN', name='Canada', numeric='124')
Country(alpha_2='CC', alpha_3='CCK', name='Cocos (Keeling) Islands', numeric='166')
Country(alpha_2='CH', alpha_3='CHE', name='Switzerland', numeric='756', official_name='Swiss Confederation')
Country(alpha_2='CL', alpha_3='CHL', name='Chile', numeric='152', official_name='Republic of Chile')
Country(alpha_2='CN', alpha_3='CHN', name='China', numeric='156', official_name="People's Republic of China")
Country(alpha_2='CI', alpha_3='CIV', name="Côte d'Ivoire", numeric='384', official_name="Republic of Côte d'Ivoire")
Country(alpha_2='CM', alpha_3='CMR', name='Cameroon', numeric='120', official_name='Republic of Cameroon')
Country(alpha_2='CD', alpha_3='COD', name='Congo, The Democratic Republic of the', numeric='180')
Country(alpha_2='CG', alpha_3='COG', name='Congo', numeric='178', official_name='Republic of the Congo')
Country(alpha_2='CK', alpha_3='COK', name='Cook Islands', numeric='184')
Country(alpha_2='CO', alpha_3='COL', name='Colombia', numeric='170', official_name='Republic of Colombia')
Country(alpha_2='KM', alpha_3='COM', name='Comoros', numeric='174', official_name='Union of the Comoros')
Country(alpha_2='CV', alpha_3='CPV', name='Cabo Verde', numeric='132', official_name='Republic of Cabo Verde')
Country(alpha_2='CR', alpha_3='CRI', name='Costa Rica', numeric='188', official_name='Republic of Costa Rica')
Country(alpha_2='CU', alpha_3='CUB', name='Cuba', numeric='192', official_name='Republic of Cuba')
Country(alpha_2='CW', alpha_3='CUW', name='Curaçao', numeric='531', official_name='Curaçao')
Country(alpha_2='CX', alpha_3='CXR', name='Christmas Island', numeric='162')
Country(alpha_2='KY', alpha_3='CYM', name='Cayman Islands', numeric='136')
Country(alpha_2='CY', alpha_3='CYP', name='Cyprus', numeric='196', official_name='Republic of Cyprus')
Country(alpha_2='CZ', alpha_3='CZE', name='Czechia', numeric='203', official_name='Czech Republic')
Country(alpha_2='DE', alpha_3='DEU', name='Germany', numeric='276', official_name='Federal Republic of Germany')
Country(alpha_2='DJ', alpha_3='DJI', name='Djibouti', numeric='262', official_name='Republic of Djibouti')
Country(alpha_2='DM', alpha_3='DMA', name='Dominica', numeric='212', official_name='Commonwealth of Dominica')
Country(alpha_2='DK', alpha_3='DNK', name='Denmark', numeric='208', official_name='Kingdom of Denmark')
Country(alpha_2='DO', alpha_3='DOM', name='Dominican Republic', numeric='214')
Country(alpha_2='DZ', alpha_3='DZA', name='Algeria', numeric='012', official_name="People's Democratic Republic of Algeria")
Country(alpha_2='EC', alpha_3='ECU', name='Ecuador', numeric='218', official_name='Republic of Ecuador')
Country(alpha_2='EG', alpha_3='EGY', name='Egypt', numeric='818', official_name='Arab Republic of Egypt')
Country(alpha_2='ER', alpha_3='ERI', name='Eritrea', numeric='232', official_name='the State of Eritrea')
Country(alpha_2='EH', alpha_3='ESH', name='Western Sahara', numeric='732')
Country(alpha_2='ES', alpha_3='ESP', name='Spain', numeric='724', official_name='Kingdom of Spain')
Country(alpha_2='EE', alpha_3='EST', name='Estonia', numeric='233', official_name='Republic of Estonia')
Country(alpha_2='ET', alpha_3='ETH', name='Ethiopia', numeric='231', official_name='Federal Democratic Republic of Ethiopia')
Country(alpha_2='FI', alpha_3='FIN', name='Finland', numeric='246', official_name='Republic of Finland')
Country(alpha_2='FJ', alpha_3='FJI', name='Fiji', numeric='242', official_name='Republic of Fiji')
Country(alpha_2='FK', alpha_3='FLK', name='Falkland Islands (Malvinas)', numeric='238')
Country(alpha_2='FR', alpha_3='FRA', name='France', numeric='250', official_name='French Republic')
Country(alpha_2='FO', alpha_3='FRO', name='Faroe Islands', numeric='234')
Country(alpha_2='FM', alpha_3='FSM', name='Micronesia, Federated States of', numeric='583', official_name='Federated States of Micronesia')
Country(alpha_2='GA', alpha_3='GAB', name='Gabon', numeric='266', official_name='Gabonese Republic')
Country(alpha_2='GB', alpha_3='GBR', name='United Kingdom', numeric='826', official_name='United Kingdom of Great Britain and Northern Ireland')
Country(alpha_2='GE', alpha_3='GEO', name='Georgia', numeric='268')
Country(alpha_2='GG', alpha_3='GGY', name='Guernsey', numeric='831')
Country(alpha_2='GH', alpha_3='GHA', name='Ghana', numeric='288', official_name='Republic of Ghana')
Country(alpha_2='GI', alpha_3='GIB', name='Gibraltar', numeric='292')
Country(alpha_2='GN', alpha_3='GIN', name='Guinea', numeric='324', official_name='Republic of Guinea')
Country(alpha_2='GP', alpha_3='GLP', name='Guadeloupe', numeric='312')
Country(alpha_2='GM', alpha_3='GMB', name='Gambia', numeric='270', official_name='Republic of the Gambia')
Country(alpha_2='GW', alpha_3='GNB', name='Guinea-Bissau', numeric='624', official_name='Republic of Guinea-Bissau')
Country(alpha_2='GQ', alpha_3='GNQ', name='Equatorial Guinea', numeric='226', official_name='Republic of Equatorial Guinea')
Country(alpha_2='GR', alpha_3='GRC', name='Greece', numeric='300', official_name='Hellenic Republic')
Country(alpha_2='GD', alpha_3='GRD', name='Grenada', numeric='308')
Country(alpha_2='GL', alpha_3='GRL', name='Greenland', numeric='304')
Country(alpha_2='GT', alpha_3='GTM', name='Guatemala', numeric='320', official_name='Republic of Guatemala')
Country(alpha_2='GF', alpha_3='GUF', name='French Guiana', numeric='254')
Country(alpha_2='GU', alpha_3='GUM', name='Guam', numeric='316')
Country(alpha_2='GY', alpha_3='GUY', name='Guyana', numeric='328', official_name='Republic of Guyana')
Country(alpha_2='HK', alpha_3='HKG', name='Hong Kong', numeric='344', official_name='Hong Kong Special Administrative Region of China')
Country(alpha_2='HM', alpha_3='HMD', name='Heard Island and McDonald Islands', numeric='334')
Country(alpha_2='HN', alpha_3='HND', name='Honduras', numeric='340', official_name='Republic of Honduras')
Country(alpha_2='HR', alpha_3='HRV', name='Croatia', numeric='191', official_name='Republic of Croatia')
Country(alpha_2='HT', alpha_3='HTI', name='Haiti', numeric='332', official_name='Republic of Haiti')
Country(alpha_2='HU', alpha_3='HUN', name='Hungary', numeric='348', official_name='Hungary')
Country(alpha_2='ID', alpha_3='IDN', name='Indonesia', numeric='360', official_name='Republic of Indonesia')
Country(alpha_2='IM', alpha_3='IMN', name='Isle of Man', numeric='833')
Country(alpha_2='IN', alpha_3='IND', name='India', numeric='356', official_name='Republic of India')
Country(alpha_2='IO', alpha_3='IOT', name='British Indian Ocean Territory', numeric='086')
Country(alpha_2='IE', alpha_3='IRL', name='Ireland', numeric='372')
Country(alpha_2='IR', alpha_3='IRN', name='Iran, Islamic Republic of', numeric='364', official_name='Islamic Republic of Iran')
Country(alpha_2='IQ', alpha_3='IRQ', name='Iraq', numeric='368', official_name='Republic of Iraq')
Country(alpha_2='IS', alpha_3='ISL', name='Iceland', numeric='352', official_name='Republic of Iceland')
Country(alpha_2='IL', alpha_3='ISR', name='Israel', numeric='376', official_name='State of Israel')
Country(alpha_2='IT', alpha_3='ITA', name='Italy', numeric='380', official_name='Italian Republic')
Country(alpha_2='JM', alpha_3='JAM', name='Jamaica', numeric='388')
Country(alpha_2='JE', alpha_3='JEY', name='Jersey', numeric='832')
Country(alpha_2='JO', alpha_3='JOR', name='Jordan', numeric='400', official_name='Hashemite Kingdom of Jordan')
Country(alpha_2='JP', alpha_3='JPN', name='Japan', numeric='392')
Country(alpha_2='KZ', alpha_3='KAZ', name='Kazakhstan', numeric='398', official_name='Republic of Kazakhstan')
Country(alpha_2='KE', alpha_3='KEN', name='Kenya', numeric='404', official_name='Republic of Kenya')
Country(alpha_2='KG', alpha_3='KGZ', name='Kyrgyzstan', numeric='417', official_name='Kyrgyz Republic')
Country(alpha_2='KH', alpha_3='KHM', name='Cambodia', numeric='116', official_name='Kingdom of Cambodia')
Country(alpha_2='KI', alpha_3='KIR', name='Kiribati', numeric='296', official_name='Republic of Kiribati')
Country(alpha_2='KN', alpha_3='KNA', name='Saint Kitts and Nevis', numeric='659')
Country(alpha_2='KR', alpha_3='KOR', name='Korea, Republic of', numeric='410')
Country(alpha_2='KW', alpha_3='KWT', name='Kuwait', numeric='414', official_name='State of Kuwait')
Country(alpha_2='LA', alpha_3='LAO', name="Lao People's Democratic Republic", numeric='418')
Country(alpha_2='LB', alpha_3='LBN', name='Lebanon', numeric='422', official_name='Lebanese Republic')
Country(alpha_2='LR', alpha_3='LBR', name='Liberia', numeric='430', official_name='Republic of Liberia')
Country(alpha_2='LY', alpha_3='LBY', name='Libya', numeric='434', official_name='Libya')
Country(alpha_2='LC', alpha_3='LCA', name='Saint Lucia', numeric='662')
Country(alpha_2='LI', alpha_3='LIE', name='Liechtenstein', numeric='438', official_name='Principality of Liechtenstein')
Country(alpha_2='LK', alpha_3='LKA', name='Sri Lanka', numeric='144', official_name='Democratic Socialist Republic of Sri Lanka')
Country(alpha_2='LS', alpha_3='LSO', name='Lesotho', numeric='426', official_name='Kingdom of Lesotho')
Country(alpha_2='LT', alpha_3='LTU', name='Lithuania', numeric='440', official_name='Republic of Lithuania')
Country(alpha_2='LU', alpha_3='LUX', name='Luxembourg', numeric='442', official_name='Grand Duchy of Luxembourg')
Country(alpha_2='LV', alpha_3='LVA', name='Latvia', numeric='428', official_name='Republic of Latvia')
Country(alpha_2='MO', alpha_3='MAC', name='Macao', numeric='446', official_name='Macao Special Administrative Region of China')
Country(alpha_2='MF', alpha_3='MAF', name='Saint Martin (French part)', numeric='663')
Country(alpha_2='MA', alpha_3='MAR', name='Morocco', numeric='504', official_name='Kingdom of Morocco')
Country(alpha_2='MC', alpha_3='MCO', name='Monaco', numeric='492', official_name='Principality of Monaco')
Country(alpha_2='MD', alpha_3='MDA', common_name='Moldova', name='Moldova, Republic of', numeric='498', official_name='Republic of Moldova')
Country(alpha_2='MG', alpha_3='MDG', name='Madagascar', numeric='450', official_name='Republic of Madagascar')
Country(alpha_2='MV', alpha_3='MDV', name='Maldives', numeric='462', official_name='Republic of Maldives')
Country(alpha_2='MX', alpha_3='MEX', name='Mexico', numeric='484', official_name='United Mexican States')
Country(alpha_2='MH', alpha_3='MHL', name='Marshall Islands', numeric='584', official_name='Republic of the Marshall Islands')
Country(alpha_2='MK', alpha_3='MKD', name='North Macedonia', numeric='807', official_name='Republic of North Macedonia')
Country(alpha_2='ML', alpha_3='MLI', name='Mali', numeric='466', official_name='Republic of Mali')
Country(alpha_2='MT', alpha_3='MLT', name='Malta', numeric='470', official_name='Republic of Malta')
Country(alpha_2='MM', alpha_3='MMR', name='Myanmar', numeric='104', official_name='Republic of Myanmar')
Country(alpha_2='ME', alpha_3='MNE', name='Montenegro', numeric='499', official_name='Montenegro')
Country(alpha_2='MN', alpha_3='MNG', name='Mongolia', numeric='496')
Country(alpha_2='MP', alpha_3='MNP', name='Northern Mariana Islands', numeric='580', official_name='Commonwealth of the Northern Mariana Islands')
Country(alpha_2='MZ', alpha_3='MOZ', name='Mozambique', numeric='508', official_name='Republic of Mozambique')
Country(alpha_2='MR', alpha_3='MRT', name='Mauritania', numeric='478', official_name='Islamic Republic of Mauritania')
Country(alpha_2='MS', alpha_3='MSR', name='Montserrat', numeric='500')
Country(alpha_2='MQ', alpha_3='MTQ', name='Martinique', numeric='474')
Country(alpha_2='MU', alpha_3='MUS', name='Mauritius', numeric='480', official_name='Republic of Mauritius')
Country(alpha_2='MW', alpha_3='MWI', name='Malawi', numeric='454', official_name='Republic of Malawi')
Country(alpha_2='MY', alpha_3='MYS', name='Malaysia', numeric='458')
Country(alpha_2='YT', alpha_3='MYT', name='Mayotte', numeric='175')
Country(alpha_2='NA', alpha_3='NAM', name='Namibia', numeric='516', official_name='Republic of Namibia')
Country(alpha_2='NC', alpha_3='NCL', name='New Caledonia', numeric='540')
Country(alpha_2='NE', alpha_3='NER', name='Niger', numeric='562', official_name='Republic of the Niger')
Country(alpha_2='NF', alpha_3='NFK', name='Norfolk Island', numeric='574')
Country(alpha_2='NG', alpha_3='NGA', name='Nigeria', numeric='566', official_name='Federal Republic of Nigeria')
Country(alpha_2='NI', alpha_3='NIC', name='Nicaragua', numeric='558', official_name='Republic of Nicaragua')
Country(alpha_2='NU', alpha_3='NIU', name='Niue', numeric='570', official_name='Niue')
Country(alpha_2='NL', alpha_3='NLD', name='Netherlands', numeric='528', official_name='Kingdom of the Netherlands')
Country(alpha_2='NO', alpha_3='NOR', name='Norway', numeric='578', official_name='Kingdom of Norway')
Country(alpha_2='NP', alpha_3='NPL', name='Nepal', numeric='524', official_name='Federal Democratic Republic of Nepal')
Country(alpha_2='NR', alpha_3='NRU', name='Nauru', numeric='520', official_name='Republic of Nauru')
Country(alpha_2='NZ', alpha_3='NZL', name='New Zealand', numeric='554')
Country(alpha_2='OM', alpha_3='OMN', name='Oman', numeric='512', official_name='Sultanate of Oman')
Country(alpha_2='PK', alpha_3='PAK', name='Pakistan', numeric='586', official_name='Islamic Republic of Pakistan')
Country(alpha_2='PA', alpha_3='PAN', name='Panama', numeric='591', official_name='Republic of Panama')
Country(alpha_2='PN', alpha_3='PCN', name='Pitcairn', numeric='612')
Country(alpha_2='PE', alpha_3='PER', name='Peru', numeric='604', official_name='Republic of Peru')
Country(alpha_2='PH', alpha_3='PHL', name='Philippines', numeric='608', official_name='Republic of the Philippines')
Country(alpha_2='PW', alpha_3='PLW', name='Palau', numeric='585', official_name='Republic of Palau')
Country(alpha_2='PG', alpha_3='PNG', name='Papua New Guinea', numeric='598', official_name='Independent State of Papua New Guinea')
Country(alpha_2='PL', alpha_3='POL', name='Poland', numeric='616', official_name='Republic of Poland')
Country(alpha_2='PR', alpha_3='PRI', name='Puerto Rico', numeric='630')
Country(alpha_2='KP', alpha_3='PRK', name="Korea, Democratic People's Republic of", numeric='408', official_name="Democratic People's Republic of Korea")
Country(alpha_2='PT', alpha_3='PRT', name='Portugal', numeric='620', official_name='Portuguese Republic')
Country(alpha_2='PY', alpha_3='PRY', name='Paraguay', numeric='600', official_name='Republic of Paraguay')
Country(alpha_2='PS', alpha_3='PSE', name='Palestine, State of', numeric='275', official_name='the State of Palestine')
Country(alpha_2='PF', alpha_3='PYF', name='French Polynesia', numeric='258')
Country(alpha_2='QA', alpha_3='QAT', name='Qatar', numeric='634', official_name='State of Qatar')
Country(alpha_2='RE', alpha_3='REU', name='Réunion', numeric='638')
Country(alpha_2='RO', alpha_3='ROU', name='Romania', numeric='642')
Country(alpha_2='RU', alpha_3='RUS', name='Russian Federation', numeric='643')
Country(alpha_2='RW', alpha_3='RWA', name='Rwanda', numeric='646', official_name='Rwandese Republic')
Country(alpha_2='SA', alpha_3='SAU', name='Saudi Arabia', numeric='682', official_name='Kingdom of Saudi Arabia')
Country(alpha_2='SD', alpha_3='SDN', name='Sudan', numeric='729', official_name='Republic of the Sudan')
Country(alpha_2='SN', alpha_3='SEN', name='Senegal', numeric='686', official_name='Republic of Senegal')
Country(alpha_2='SG', alpha_3='SGP', name='Singapore', numeric='702', official_name='Republic of Singapore')
Country(alpha_2='GS', alpha_3='SGS', name='South Georgia and the South Sandwich Islands', numeric='239')
Country(alpha_2='SH', alpha_3='SHN', name='Saint Helena, Ascension and Tristan da Cunha', numeric='654')
Country(alpha_2='SJ', alpha_3='SJM', name='Svalbard and Jan Mayen', numeric='744')
Country(alpha_2='SB', alpha_3='SLB', name='Solomon Islands', numeric='090')
Country(alpha_2='SL', alpha_3='SLE', name='Sierra Leone', numeric='694', official_name='Republic of Sierra Leone')
Country(alpha_2='SV', alpha_3='SLV', name='El Salvador', numeric='222', official_name='Republic of El Salvador')
Country(alpha_2='SM', alpha_3='SMR', name='San Marino', numeric='674', official_name='Republic of San Marino')
Country(alpha_2='SO', alpha_3='SOM', name='Somalia', numeric='706', official_name='Federal Republic of Somalia')
Country(alpha_2='PM', alpha_3='SPM', name='Saint Pierre and Miquelon', numeric='666')
Country(alpha_2='RS', alpha_3='SRB', name='Serbia', numeric='688', official_name='Republic of Serbia')
Country(alpha_2='SS', alpha_3='SSD', name='South Sudan', numeric='728', official_name='Republic of South Sudan')
Country(alpha_2='ST', alpha_3='STP', name='Sao Tome and Principe', numeric='678', official_name='Democratic Republic of Sao Tome and Principe')
Country(alpha_2='SR', alpha_3='SUR', name='Suriname', numeric='740', official_name='Republic of Suriname')
Country(alpha_2='SK', alpha_3='SVK', name='Slovakia', numeric='703', official_name='Slovak Republic')
Country(alpha_2='SI', alpha_3='SVN', name='Slovenia', numeric='705', official_name='Republic of Slovenia')
Country(alpha_2='SE', alpha_3='SWE', name='Sweden', numeric='752', official_name='Kingdom of Sweden')
Country(alpha_2='SZ', alpha_3='SWZ', name='Eswatini', numeric='748', official_name='Kingdom of Eswatini')
Country(alpha_2='SX', alpha_3='SXM', name='Sint Maarten (Dutch part)', numeric='534', official_name='Sint Maarten (Dutch part)')
Country(alpha_2='SC', alpha_3='SYC', name='Seychelles', numeric='690', official_name='Republic of Seychelles')
Country(alpha_2='SY', alpha_3='SYR', name='Syrian Arab Republic', numeric='760')
Country(alpha_2='TC', alpha_3='TCA', name='Turks and Caicos Islands', numeric='796')
Country(alpha_2='TD', alpha_3='TCD', name='Chad', numeric='148', official_name='Republic of Chad')
Country(alpha_2='TG', alpha_3='TGO', name='Togo', numeric='768', official_name='Togolese Republic')
Country(alpha_2='TH', alpha_3='THA', name='Thailand', numeric='764', official_name='Kingdom of Thailand')
Country(alpha_2='TJ', alpha_3='TJK', name='Tajikistan', numeric='762', official_name='Republic of Tajikistan')
Country(alpha_2='TK', alpha_3='TKL', name='Tokelau', numeric='772')
Country(alpha_2='TM', alpha_3='TKM', name='Turkmenistan', numeric='795')
Country(alpha_2='TL', alpha_3='TLS', name='Timor-Leste', numeric='626', official_name='Democratic Republic of Timor-Leste')
Country(alpha_2='TO', alpha_3='TON', name='Tonga', numeric='776', official_name='Kingdom of Tonga')
Country(alpha_2='TT', alpha_3='TTO', name='Trinidad and Tobago', numeric='780', official_name='Republic of Trinidad and Tobago')
Country(alpha_2='TN', alpha_3='TUN', name='Tunisia', numeric='788', official_name='Republic of Tunisia')
Country(alpha_2='TR', alpha_3='TUR', name='Turkey', numeric='792', official_name='Republic of Turkey')
Country(alpha_2='TV', alpha_3='TUV', name='Tuvalu', numeric='798')
Country(alpha_2='TW', alpha_3='TWN', common_name='Taiwan', name='Taiwan, Province of China', numeric='158', official_name='Taiwan, Province of China')
Country(alpha_2='TZ', alpha_3='TZA', common_name='Tanzania', name='Tanzania, United Republic of', numeric='834', official_name='United Republic of Tanzania')
Country(alpha_2='UG', alpha_3='UGA', name='Uganda', numeric='800', official_name='Republic of Uganda')
Country(alpha_2='UA', alpha_3='UKR', name='Ukraine', numeric='804')
Country(alpha_2='UM', alpha_3='UMI', name='United States Minor Outlying Islands', numeric='581')
Country(alpha_2='UY', alpha_3='URY', name='Uruguay', numeric='858', official_name='Eastern Republic of Uruguay')
Country(alpha_2='US', alpha_3='USA', name='United States', numeric='840', official_name='United States of America')
Country(alpha_2='UZ', alpha_3='UZB', name='Uzbekistan', numeric='860', official_name='Republic of Uzbekistan')
Country(alpha_2='VA', alpha_3='VAT', name='Holy See (Vatican City State)', numeric='336')
Country(alpha_2='VC', alpha_3='VCT', name='Saint Vincent and the Grenadines', numeric='670')
Country(alpha_2='VE', alpha_3='VEN', common_name='Venezuela', name='Venezuela, Bolivarian Republic of', numeric='862', official_name='Bolivarian Republic of Venezuela')
Country(alpha_2='VG', alpha_3='VGB', name='Virgin Islands, British', numeric='092', official_name='British Virgin Islands')
Country(alpha_2='VI', alpha_3='VIR', name='Virgin Islands, U.S.', numeric='850', official_name='Virgin Islands of the United States')
Country(alpha_2='VN', alpha_3='VNM', common_name='Vietnam', name='Viet Nam', numeric='704', official_name='Socialist Republic of Viet Nam')
Country(alpha_2='VU', alpha_3='VUT', name='Vanuatu', numeric='548', official_name='Republic of Vanuatu')
Country(alpha_2='WF', alpha_3='WLF', name='Wallis and Futuna', numeric='876')
Country(alpha_2='WS', alpha_3='WSM', name='Samoa', numeric='882', official_name='Independent State of Samoa')
Country(alpha_2='YE', alpha_3='YEM', name='Yemen', numeric='887', official_name='Republic of Yemen')
Country(alpha_2='ZA', alpha_3='ZAF', name='South Africa', numeric='710', official_name='Republic of South Africa')
Country(alpha_2='ZM', alpha_3='ZMB', name='Zambia', numeric='894', official_name='Republic of Zambia')
Country(alpha_2='ZW', alpha_3='ZWE', name='Zimbabwe', numeric='716', official_name='Republic of Zimbabwe')


df['Country'] = ''


def find_country(txt):
    clist = []
    if txt == 'None':
        return 'None'
    for c in pycountry.countries:
        #print(f'\nc: {c}   txt: {txt}')
        if c.name in txt:
            clist.append(c.name)
    return clist


for index, row in df.iterrows():
    addr = df['Addresses'][index]
    #print(f'INDEX: {index}   ADDRESS: {addr}')
    c = find_country(addr)
    #print(f'index: {index}    c: {c}')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_46064\4012802089.py in <cell line: 1>()
      2     addr = df['Addresses'][index]
      3     #print(f'INDEX: {index}   ADDRESS: {addr}')
----> 4     c = find_country(addr)
      5     #print(f'index: {index}    c: {c}')
      6 

~\AppData\Local\Temp\ipykernel_46064\1686651311.py in find_country(txt)
      5     for c in pycountry.countries:
      6         #print(f'\nc: {c}   txt: {txt}')
----> 7         if c.name in txt:
      8             clist.append(c.name)
      9     return clist

TypeError: argument of type 'float' is not iterable


df['Addresses'][22]

nan


df['Addresses'].fillna('None', inplace=True)

df = df.fillna('None')


for index, row in df.iterrows():
    addr = df['Addresses'][index]
    #print(f'INDEX: {index}   ADDRESS: {addr}')
    c = find_country(addr)


df['Addresses'][0]

'Univ Plymouth, Sch Art & Media, Plymouth PL4 8AA, Devon, England'


ccode = {'England':'United Kingdom'}


for key in ccode:
    df['Addresses'] = df['Addresses'].str.replace(key, ccode.get(key))


for index, row in df.iterrows():
    addr = df['Addresses'][index]
    #print(f'INDEX: {index}   ADDRESS: {addr}')
    c = find_country(addr)
    #print(f'index: {index}    c: {c}')


c_index = []

for index, row in df.iterrows():
    addr = df['Addresses'][index]
    #print(f'INDEX: {index}   ADDRESS: {addr}')
    c = find_country(addr)
    if c == []:
        c_index.append(index)


c_index[0:10]

[9, 15, 18, 23, 35, 36, 38, 39, 44, 45]


len(c_index)

2095


test_df = pd.DataFrame(columns=['c_index', 'addresses'])

test_df['c_index'] = c_index

for index, value in enumerate(c_index):
    test_df['addresses'][index] = df['Addresses'][value]
    
test_df

C:\Users\tdunn\AppData\Local\Temp\ipykernel_46064\2048375063.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['addresses'][index] = df['Addresses'][value]


#for i in test_df['addresses']:
#    print(i)


ccode = {'England':'United Kingdom',
         'Wales':'United Kingdom',
         'South Korea':'Korea, Republic of',
         'USA':'United States',
         'Czech Republic':'Czechia',
         'Scotland':'United Kingdom',
         'Russia':'Russian Federation',
         'Iran':'Iran, Islamic Republic of',
         'U Arab Emirates':'United Arab Emirates',
         'Taiwan':'Taiwan, Province of China',
         'Venezuela':'Venezuela, Bolivarian Republic of',
         'Vietnam':'Viet Nam',
         'ENGLAND':'United Kingdom',
         'VENEZUELA':'Venezuela, Bolivarian Republic of',
         'CZECH REPUBLIC':'Czechia',
         'SCOTLAND':'United Kingdom'}


def find_country(txt):
    clist = []
    if txt == 'None':
        return 'None'
    for c in pycountry.countries:
        if c.name.lower() in txt.lower():
            clist.append(c.name)
    return clist

def find_state(txt):
    states = ['CA', 'NJ', 'CO', 'WA', 'NM', 'DC', 'OR', 'MA', 'MD', 'AZ', 'NE']
    clist = []
    for c in states:
        if c in txt:
            clist.append('United States')
            return clist
        else:
            return 'None'

for index, row in df.iterrows():
    addr = df['Addresses'][index]
    c = find_country(addr)
    if c:
        df['Country'][index] = c
    else:
        df['Country'][index] = find_state(addr)

for index, row in df.iterrows():
    addr = df['Addresses'][index]
    c = find_country(addr)
    if c:
        df.at[index, 'Country'] = c
    else:
        df.at[index, 'Country'] = find_state(addr)


ccode = {'England':'United Kingdom',
         'Wales':'United Kingdom',
         'South Korea':'Korea, Republic of',
         'USA':'United States',
         'Czech Republic':'Czechia',
         'Scotland':'United Kingdom',
         'Russia':'Russian Federation',
         'Iran':'Iran, Islamic Republic of',
         'U Arab Emirates':'United Arab Emirates',
         'Taiwan':'Taiwan, Province of China',
         'Venezuela':'Venezuela, Bolivarian Republic of',
         'Vietnam':'Viet Nam',
         'ENGLAND':'United Kingdom',
         'VENEZUELA':'Venezuela, Bolivarian Republic of',
         'CZECH REPUBLIC':'Czechia',
         'SCOTLAND':'United Kingdom'}


for key in ccode:
    df['Addresses'] = df['Addresses'].str.replace(key, ccode.get(key))


def find_state(txt):
    states = ['CA', 'NJ', 'CO', 'WA', 'NM', 'DC', 'OR', 'MA', 'MD', 'AZ', 'NE']
    clist = []
    for c in states:
        if c in txt:
            clist.append('United States')
            return clist
        else:
            return 'None'


def find_country(txt):
    clist = []
    if txt == 'None':
        return 'None'
    for c in pycountry.countries:
        if c.name.lower() in txt.lower():
            clist.append(c.name)
    return clist


df['Country'] = ''

for index, row in df.iterrows():
    addr = df['Addresses'][index]
    c = find_country(addr)
    if c:
        df.at[index, 'Country'] = c
    else:
        df.at[index, 'Country'] = find_state(addr)


df['Country']

0                                         [United Kingdom]
1                         [Argentina, Canada, Netherlands]
2                                                  [Spain]
3                                         [United Kingdom]
4                                  [France, United States]
                               ...                        
12681                         [Canada, Russian Federation]
12682                                             [France]
12683    [Argentina, Australia, Austria, Brazil, Canada...
12684    [Afghanistan, Austria, Canada, China, Czechia,...
12685    [Austria, Belgium, Bulgaria, Switzerland, Czec...
Name: Country, Length: 12686, dtype: object


df.to_csv(os.path.join('Processed_data', 'Combined_lists.csv'))

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)	Web of Science Record
0	J	Miles, M	NaN	NaN	NaN	Miles, Malcolm	NaN	NaN	Representing nature: art and climate change	CULTURAL GEOGRAPHIES	...	Social Science Citation Index (SSCI); Arts &am...	Environmental Sciences & Ecology; Geography	549EU	NaN	NaN	NaN	NaN	2022-12-21	WOS:000274029500002	0
1	J	Dal Farra, R; Suarez, P	NaN	NaN	NaN	Dal Farra, Ricardo; Suarez, Pablo	NaN	NaN	RED CROSS/RED CRESCENT CLIMATE CENTRE AND BALA...	LEONARDO	...	Arts & Humanities Citation Index (A&HCI)	Art	AP6YE	NaN	NaN	NaN	NaN	2022-12-21	WOS:000342223700017	0
2	J	Chen, MH	NaN	NaN	NaN	Chen, Mei-Hsin	NaN	NaN	The Contribution of Art to Climate Change Comm...	REVISTA HUMANIDADES	...	Emerging Sources Citation Index (ESCI)	Arts & Humanities - Other Topics	2T9FT	NaN	gold, Green Submitted	NaN	NaN	2022-12-21	WOS:000822772700002	0
3	J	Guy, S; Henshaw, V; Heidrich, O	NaN	NaN	NaN	Guy, Simon; Henshaw, Victoria; Heidrich, Oliver	NaN	NaN	Climate change, adaptation and Eco-Art in Sing...	JOURNAL OF ENVIRONMENTAL PLANNING AND MANAGEMENT	...	Social Science Citation Index (SSCI); Arts &am...	Development Studies; Public Administration	AS7TP	NaN	NaN	NaN	NaN	2022-12-21	WOS:000344457900003	0
4	J	Baztan, J; Vanderlinden, JP; Jaffres, L; Jorge...	NaN	NaN	NaN	Baztan, Juan; Vanderlinden, Jean-Paul; Jaffres...	NaN	NaN	Facing climate injustices: Community trust-bui...	CLIMATE RISK MANAGEMENT	...	Science Citation Index Expanded (SCI-EXPANDED)...	Environmental Sciences & Ecology; Meteorology ...	PK4RQ	33106769.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000602434600001	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
12681	J	Seregin, AP; Bochkov, DA; Shner, JV; Garin, EV...	NaN	NaN	NaN	Seregin, Alexey P.; Bochkov, Dmitriy A.; Shner...	NaN	NaN	Flora of Russia on iNaturalist: a dataset	BIODIVERSITY DATA JOURNAL	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation	OT0QT	33244292.0	Green Submitted, gold, Green Published	NaN	NaN	2022-12-21	WOS:000590560300001	0
12682	J	Keith, P; Mennesson, MI	NaN	NaN	NaN	Keith, Philippe; Mennesson, Marion, I	NaN	NaN	Review of Ophiocara (Teleostei: Butidae) from ...	CYBIUM	...	Science Citation Index Expanded (SCI-EXPANDED)	Zoology	ST1IC	NaN	NaN	NaN	NaN	2022-12-21	WOS:000662202900002	0
12683	J	Kattge, J; Diaz, S; Lavorel, S; Prentice, C; L...	NaN	NaN	NaN	Kattge, J.; Diaz, S.; Lavorel, S.; Prentice, C...	NaN	NaN	TRY - a global database of plant traits	GLOBAL CHANGE BIOLOGY	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation; Environmental Sci...	800WS	NaN	Green Published, Green Submitted, Bronze, Gree...	NaN	NaN	2022-12-21	WOS:000293399000011	0
12684	J	Narasimhan, VM; Patterson, N; Moorjani, P; Roh...	NaN	NaN	NaN	Narasimhan, Vagheesh M.; Patterson, Nick; Moor...	NaN	NaN	The formation of human populations in South an...	SCIENCE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Science & Technology - Other Topics	IW1MK	31488661.0	Green Submitted, Green Accepted	Y	N	2022-12-21	WOS:000484732700038	0
12685	J	Potzelsberger, E; Gossner, MM; Beenken, L; Gaz...	NaN	NaN	NaN	Poetzelsberger, Elisabeth; Gossner, Martin M.;...	NaN	NaN	Biotic threats for 23 major non-native tree sp...	SCIENTIFIC DATA	...	Science Citation Index Expanded (SCI-EXPANDED)	Science & Technology - Other Topics	TY6PT	34362931.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000683905100002	0

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)
0	J	Miles, M	NaN	NaN	NaN	Miles, Malcolm	NaN	NaN	Representing nature: art and climate change	CULTURAL GEOGRAPHIES	...	Social Science Citation Index (SSCI); Arts &am...	Environmental Sciences & Ecology; Geography	549EU	NaN	NaN	NaN	NaN	2022-12-21	WOS:000274029500002
1	J	Dal Farra, R; Suarez, P	NaN	NaN	NaN	Dal Farra, Ricardo; Suarez, Pablo	NaN	NaN	RED CROSS/RED CRESCENT CLIMATE CENTRE AND BALA...	LEONARDO	...	Arts & Humanities Citation Index (A&HCI)	Art	AP6YE	NaN	NaN	NaN	NaN	2022-12-21	WOS:000342223700017
2	J	Chen, MH	NaN	NaN	NaN	Chen, Mei-Hsin	NaN	NaN	The Contribution of Art to Climate Change Comm...	REVISTA HUMANIDADES	...	Emerging Sources Citation Index (ESCI)	Arts & Humanities - Other Topics	2T9FT	NaN	gold, Green Submitted	NaN	NaN	2022-12-21	WOS:000822772700002
3	J	Guy, S; Henshaw, V; Heidrich, O	NaN	NaN	NaN	Guy, Simon; Henshaw, Victoria; Heidrich, Oliver	NaN	NaN	Climate change, adaptation and Eco-Art in Sing...	JOURNAL OF ENVIRONMENTAL PLANNING AND MANAGEMENT	...	Social Science Citation Index (SSCI); Arts &am...	Development Studies; Public Administration	AS7TP	NaN	NaN	NaN	NaN	2022-12-21	WOS:000344457900003
4	J	Baztan, J; Vanderlinden, JP; Jaffres, L; Jorge...	NaN	NaN	NaN	Baztan, Juan; Vanderlinden, Jean-Paul; Jaffres...	NaN	NaN	Facing climate injustices: Community trust-bui...	CLIMATE RISK MANAGEMENT	...	Science Citation Index Expanded (SCI-EXPANDED)...	Environmental Sciences & Ecology; Meteorology ...	PK4RQ	33106769.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000602434600001

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)
0	J	Miles, M	NaN	NaN	NaN	Miles, Malcolm	NaN	NaN	Representing nature: art and climate change	CULTURAL GEOGRAPHIES	...	Social Science Citation Index (SSCI); Arts &am...	Environmental Sciences & Ecology; Geography	549EU	NaN	NaN	NaN	NaN	2022-12-21	WOS:000274029500002
1	J	Dal Farra, R; Suarez, P	NaN	NaN	NaN	Dal Farra, Ricardo; Suarez, Pablo	NaN	NaN	RED CROSS/RED CRESCENT CLIMATE CENTRE AND BALA...	LEONARDO	...	Arts & Humanities Citation Index (A&HCI)	Art	AP6YE	NaN	NaN	NaN	NaN	2022-12-21	WOS:000342223700017
2	J	Chen, MH	NaN	NaN	NaN	Chen, Mei-Hsin	NaN	NaN	The Contribution of Art to Climate Change Comm...	REVISTA HUMANIDADES	...	Emerging Sources Citation Index (ESCI)	Arts & Humanities - Other Topics	2T9FT	NaN	gold, Green Submitted	NaN	NaN	2022-12-21	WOS:000822772700002
3	J	Guy, S; Henshaw, V; Heidrich, O	NaN	NaN	NaN	Guy, Simon; Henshaw, Victoria; Heidrich, Oliver	NaN	NaN	Climate change, adaptation and Eco-Art in Sing...	JOURNAL OF ENVIRONMENTAL PLANNING AND MANAGEMENT	...	Social Science Citation Index (SSCI); Arts &am...	Development Studies; Public Administration	AS7TP	NaN	NaN	NaN	NaN	2022-12-21	WOS:000344457900003
4	J	Baztan, J; Vanderlinden, JP; Jaffres, L; Jorge...	NaN	NaN	NaN	Baztan, Juan; Vanderlinden, Jean-Paul; Jaffres...	NaN	NaN	Facing climate injustices: Community trust-bui...	CLIMATE RISK MANAGEMENT	...	Science Citation Index Expanded (SCI-EXPANDED)...	Environmental Sciences & Ecology; Meteorology ...	PK4RQ	33106769.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000602434600001
5	J	Burke, M; Tickwell, D; Whitmarsh, L	NaN	NaN	NaN	Burke, Miriam; Tickwell, David; Whitmarsh, Lor...	NaN	NaN	Participatory arts and affective engagement wi...	GLOBAL ENVIRONMENTAL CHANGE-HUMAN AND POLICY D...	...	Science Citation Index Expanded (SCI-EXPANDED)...	Environmental Sciences & Ecology; Geography	GC1AN	NaN	Green Accepted	NaN	NaN	2022-12-21	WOS:000429509100010
6	J	Rodder, S	NaN	NaN	NaN	Roedder, Simone	NaN	NaN	The Climate of Science-Art and the Art-Science...	MINERVA	...	Social Science Citation Index (SSCI); Arts &am...	Education & Educational Research; History & Ph...	EL1GB	NaN	NaN	NaN	NaN	2022-12-21	WOS:000394367400005
7	J	Bentz, J; O'Brien, K	NaN	NaN	NaN	Bentz, Julia; O'Brien, Karen	NaN	NaN	ART FOR CHANGE: Transformative learning and yo...	ELEMENTA-SCIENCE OF THE ANTHROPOCENE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Environmental Sciences & Ecology; Meteorology ...	JZ5OV	NaN	gold, Green Published	NaN	NaN	2022-12-21	WOS:000505152300002
8	J	Ture, C	NaN	NaN	NaN	Ture, Cengiz	NaN	NaN	THE ROLE OF VISUAL ARTS ON SOCIAL PERCEPTION A...	ANADOLU UNIVERSITESI SANAT & TASARIM DERGISI-A...	...	Emerging Sources Citation Index (ESCI)	Art	VC9DG	NaN	NaN	NaN	NaN	2022-12-21	WOS:000435155100015
9	J	Kim, S	NaN	NaN	NaN	Kim, Sunhee	NaN	NaN	Art therapy development in Korea: The current ...	ARTS IN PSYCHOTHERAPY	...	Social Science Citation Index (SSCI); Arts &am...	Psychology; Rehabilitation	414XY	NaN	NaN	NaN	NaN	2022-12-21	WOS:000263899700001

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)
12681	J	Seregin, AP; Bochkov, DA; Shner, JV; Garin, EV...	NaN	NaN	NaN	Seregin, Alexey P.; Bochkov, Dmitriy A.; Shner...	NaN	NaN	Flora of Russia on iNaturalist: a dataset	BIODIVERSITY DATA JOURNAL	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation	OT0QT	33244292.0	Green Submitted, gold, Green Published	NaN	NaN	2022-12-21	WOS:000590560300001
12682	J	Keith, P; Mennesson, MI	NaN	NaN	NaN	Keith, Philippe; Mennesson, Marion, I	NaN	NaN	Review of Ophiocara (Teleostei: Butidae) from ...	CYBIUM	...	Science Citation Index Expanded (SCI-EXPANDED)	Zoology	ST1IC	NaN	NaN	NaN	NaN	2022-12-21	WOS:000662202900002
12683	J	Kattge, J; Diaz, S; Lavorel, S; Prentice, C; L...	NaN	NaN	NaN	Kattge, J.; Diaz, S.; Lavorel, S.; Prentice, C...	NaN	NaN	TRY - a global database of plant traits	GLOBAL CHANGE BIOLOGY	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation; Environmental Sci...	800WS	NaN	Green Published, Green Submitted, Bronze, Gree...	NaN	NaN	2022-12-21	WOS:000293399000011
12684	J	Narasimhan, VM; Patterson, N; Moorjani, P; Roh...	NaN	NaN	NaN	Narasimhan, Vagheesh M.; Patterson, Nick; Moor...	NaN	NaN	The formation of human populations in South an...	SCIENCE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Science & Technology - Other Topics	IW1MK	31488661.0	Green Submitted, Green Accepted	Y	N	2022-12-21	WOS:000484732700038
12685	J	Potzelsberger, E; Gossner, MM; Beenken, L; Gaz...	NaN	NaN	NaN	Poetzelsberger, Elisabeth; Gossner, Martin M.;...	NaN	NaN	Biotic threats for 23 major non-native tree sp...	SCIENTIFIC DATA	...	Science Citation Index Expanded (SCI-EXPANDED)	Science & Technology - Other Topics	TY6PT	34362931.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000683905100002

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)
12676	J	Dorigo, W; Himmelbauer, I; Aberer, D; Schremme...	NaN	NaN	NaN	Dorigo, Wouter; Himmelbauer, Irene; Aberer, Da...	NaN	NaN	The International Soil Moisture Network: servi...	HYDROLOGY AND EARTH SYSTEM SCIENCES	...	Science Citation Index Expanded (SCI-EXPANDED)	Geology; Water Resources	WU3PD	NaN	Green Submitted, gold, Green Published	Y	N	2022-12-21	WOS:000716459500001
12677	J	Cooper, A; Turney, CSM; Palmer, J; Hogg, A; Mc...	NaN	NaN	NaN	Cooper, Alan; Turney, Chris S. M.; Palmer, Jon...	NaN	NaN	Response to Comment on A global environmental ...	SCIENCE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Science & Technology - Other Topics	XA6YJ	34793228.0	Green Accepted	NaN	NaN	2022-12-21	WOS:000720789200002
12678	J	Cooper, A; Turney, CSM; Palmer, J; Hogg, A; Mc...	NaN	NaN	NaN	Cooper, Alan; Turney, Chris S. M.; Palmer, Jon...	NaN	NaN	Response to Comment on A global environmental ...	SCIENCE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Science & Technology - Other Topics	XA6YJ	34793203.0	Green Accepted	NaN	NaN	2022-12-21	WOS:000720789200004
12679	J	Jennings, B	NaN	NaN	NaN	Jennings, Bruce	NaN	NaN	Solidarity and care as relational practices	BIOETHICS	...	Science Citation Index Expanded (SCI-EXPANDED)...	Social Sciences - Other Topics; Medical Ethics...	HA5QZ	30264873.0	NaN	NaN	NaN	2022-12-21	WOS:000450332600003
12680	J	Malaspinas, AS; Westaway, MC; Muller, C; Sousa...	NaN	NaN	NaN	Malaspinas, Anna-Sapfo; Westaway, Michael C.; ...	NaN	NaN	A genomic history of Aboriginal Australia	NATURE	...	Science Citation Index Expanded (SCI-EXPANDED)	Science & Technology - Other Topics	EA5OS	27654914.0	Green Submitted	NaN	NaN	2022-12-21	WOS:000386671000038
12681	J	Seregin, AP; Bochkov, DA; Shner, JV; Garin, EV...	NaN	NaN	NaN	Seregin, Alexey P.; Bochkov, Dmitriy A.; Shner...	NaN	NaN	Flora of Russia on iNaturalist: a dataset	BIODIVERSITY DATA JOURNAL	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation	OT0QT	33244292.0	Green Submitted, gold, Green Published	NaN	NaN	2022-12-21	WOS:000590560300001
12682	J	Keith, P; Mennesson, MI	NaN	NaN	NaN	Keith, Philippe; Mennesson, Marion, I	NaN	NaN	Review of Ophiocara (Teleostei: Butidae) from ...	CYBIUM	...	Science Citation Index Expanded (SCI-EXPANDED)	Zoology	ST1IC	NaN	NaN	NaN	NaN	2022-12-21	WOS:000662202900002
12683	J	Kattge, J; Diaz, S; Lavorel, S; Prentice, C; L...	NaN	NaN	NaN	Kattge, J.; Diaz, S.; Lavorel, S.; Prentice, C...	NaN	NaN	TRY - a global database of plant traits	GLOBAL CHANGE BIOLOGY	...	Science Citation Index Expanded (SCI-EXPANDED)	Biodiversity & Conservation; Environmental Sci...	800WS	NaN	Green Published, Green Submitted, Bronze, Gree...	NaN	NaN	2022-12-21	WOS:000293399000011
12684	J	Narasimhan, VM; Patterson, N; Moorjani, P; Roh...	NaN	NaN	NaN	Narasimhan, Vagheesh M.; Patterson, Nick; Moor...	NaN	NaN	The formation of human populations in South an...	SCIENCE	...	Science Citation Index Expanded (SCI-EXPANDED)...	Science & Technology - Other Topics	IW1MK	31488661.0	Green Submitted, Green Accepted	Y	N	2022-12-21	WOS:000484732700038
12685	J	Potzelsberger, E; Gossner, MM; Beenken, L; Gaz...	NaN	NaN	NaN	Poetzelsberger, Elisabeth; Gossner, Martin M.;...	NaN	NaN	Biotic threats for 23 major non-native tree sp...	SCIENTIFIC DATA	...	Science Citation Index Expanded (SCI-EXPANDED)	Science & Technology - Other Topics	TY6PT	34362931.0	Green Published, gold	NaN	NaN	2022-12-21	WOS:000683905100002

Part 1 - Working with Data¶

Introduction¶

Jupyter command line interfacing.¶

A Lightning Quick Introduction to Important Python Concepts.¶

Main data structures - Lists, Tuples, and Dictionaries¶

Object Oriented Coding¶

A Word on Print Statments¶

Part 1 - Working with Data¶

Obtaining Data¶

First an Aside on Best OS File Management Coding Practices¶

Proper Path and File Practices¶

Folder Creation¶

Finding and Downloading Data¶

Downloading via Requests¶

Downloading Via Pandas¶

Creating a single file from many files.¶

Saving merged Data to a New File Type¶

Quick Examinations of the Data¶

head()¶

tail()¶

Slicing¶

Looking at Columns¶

shape¶

info()¶

describe()¶

Header Names¶

Cleaning the Data¶

Keeping Just the Columns of Interest¶

Removing Columns of No Interest¶

Replacing Data¶

unique() and value_counts()¶

Dealing with Fairly Messy Data¶

pycountry¶

fillna()¶

Item 1 - Other Countries with common names not match ISO names.¶

Item 2 - Some country names are all capital letters or lowercase letters (thats actually an issue here.)¶

Item 3 - Some list just the US states and no 'United States' for an actual conutry.¶

QUESTIONS???¶

	Publication Type	Authors	Book Authors	Book Editors	Book Group Authors	Author Full Names	Book Author Full Names	Group Authors	Article Title	Source Title	...	Web of Science Index	Research Areas	IDS Number	Pubmed Id	Open Access Designations	Highly Cited Status	Hot Paper Status	Date of Export	UT (Unique WOS ID)
100	J	Stassen, C; Dommenget, D; Chadwick, R	NaN	NaN	NaN	Stassen, Christian; Dommenget, Dietmar; Chadwi...	NaN	NaN	Conceptual deconstruction of the simulated pre...	CLIMATE DYNAMICS	...	Science Citation Index Expanded (SCI-EXPANDED)	Meteorology & Atmospheric Sciences	ML7MZ	NaN	Green Accepted	NaN	NaN	2022-12-21	WOS:000535336100001
105	J	Stucchi, L; Bignami, DF; Bocchiola, D; Del Cur...	NaN	NaN	NaN	Stucchi, Leonardo; Bignami, Daniele Fabrizio; ...	NaN	NaN	Assessment of Climate-Driven Flood Risk and Ad...	CLIMATE	...	Emerging Sources Citation Index (ESCI)	Meteorology & Atmospheric Sciences	QN4FA	NaN	gold	NaN	NaN	2022-12-21	WOS:000622416300001
110	J	Grose, MR; Narsey, S; Delage, FP; Dowdy, AJ; B...	NaN	NaN	NaN	Grose, M. R.; Narsey, S.; Delage, F. P.; Dowdy...	NaN	NaN	Insights From CMIP6 for Australia's Future Cli...	EARTHS FUTURE	...	Science Citation Index Expanded (SCI-EXPANDED)	Environmental Sciences & Ecology; Geology; Met...	LU4NR	NaN	gold, Green Accepted, Green Published	Y	N	2022-12-21	WOS:000537734300001

	Book Series Subtitle	Cited References	Cited Reference Count	Times Cited, WoS Core	Times Cited, All Databases	180 Day Usage Count	Since 2013 Usage Count	Publication Year	DOI Link	Number of Pages	Pubmed Id	Web of Science Record
count	0.0	0.0	12686.000000	12686.000000	12686.000000	12686.000000	12686.000000	12527.000000	11473.0	12686.000000	1.525000e+03	12686.0
mean	NaN	NaN	66.851569	33.764071	34.873404	4.211729	32.252168	2016.078471	0.0	15.629040	2.955808e+07	0.0
std	NaN	NaN	66.088802	238.393534	253.445908	11.660664	93.612961	5.758761	0.0	10.831938	5.745922e+06	0.0
min	NaN	NaN	0.000000	0.000000	0.000000	0.000000	0.000000	1956.000000	0.0	0.000000	1.832098e+06	0.0
25%	NaN	NaN	31.000000	1.000000	1.000000	0.000000	4.000000	2013.000000	0.0	10.000000	2.637171e+07	0.0
50%	NaN	NaN	53.000000	8.000000	8.000000	1.000000	12.000000	2018.000000	0.0	14.000000	3.120301e+07	0.0
75%	NaN	NaN	81.000000	27.000000	28.000000	4.000000	31.000000	2020.000000	0.0	19.000000	3.407992e+07	0.0
max	NaN	NaN	1295.000000	22838.000000	24472.000000	335.000000	4666.000000	2023.000000	0.0	359.000000	3.650743e+07	0.0

	c_index	addresses
0	9	[Kim, Sunhee] Seoul Womens Univ, Seoul, South ...
1	15	[Lee, Keunhye] Gachon Univ, Dept Interior Arch...
2	18	[Evans, Eleri] Swansea Univ, Swansea, W Glam, ...
3	23	[Gonzalez, Jorge E.; Ramamurthy, Prathap] CUNY...
4	35	[Tosca, Mika G.; Gilbert, Ilai; Walls, Kelvin ...
...	...	...
2090	12633	[Fields, Lindsey; Nixon, Scott W.; Oviatt, Can...
2091	12655	[Farrara, John D.; Chao, Yi; Li, Zhijin; Wang,...
2092	12657	[Spangler, Lee H.; Dobeck, Laura M.; Gullickso...
2093	12675	[Kilb, Debi; Yang, Alan] Univ Calif San Diego,...
2094	12679	[Jennings, Bruce] Vanderbilt Univ, Med Sch, Ct...