import pandas as pd
import numpy as np
import os

import pandas as pd
import numpy as np
import os

df = pd.DataFrame({"Person":["John", "Myla", "Lewis", "John", "Myla"],
                   "Age": [24., np.nan, 21., 33, 26],
                   "Single": [False, True, True, True, False]})

df

df

df.columns

df.columns

Index(['Person', 'Age', 'Single'], dtype='object')

df['Age']

df['Age']

0    24.0
1     NaN
2    21.0
3    33.0
4    26.0
Name: Age, dtype: float64

df[['Person','Age']]

df[['Person','Age']]

df.iloc[1:3]

df.iloc[1:3]

df.loc[df['Single'] == True]

df.loc[df['Single'] == True]

df.loc[(df['Age'] > 20) & (df['Age'] < 30)]

df.loc[(df['Age'] >20) & (df['Age'] < 30)]

newDF = df.loc[(df['Age'] > 20) & (df['Age'] < 30)]

newDF = df.loc[(df['Age'] > 20) & (df['Age'] < 30)]

newDF2 = df.loc[(df['Age'] > 20) & (df['Age'] < 30)].copy()

newDF2 = df.loc[(df['Age'] > 20) & (df['Age'] < 30)].copy()

newDF['Age'] = newDF['Age'] + 1

newDF['Age'] = newDF['Age'] +1

C:\Users\phwh9568\AppData\Local\Temp\ipykernel_34720\1414648428.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newDF['Age'] = newDF['Age'] +1

newDF2['Age'] = newDF['Age'] + 1

newDF2['Age'] = newDF2['Age'] +1

newDF2

.min()
.max()
.mean()
.sum()
.cumsum()
.mode()
.median()
.count()
.std()
.unique()
.describe()

df['Age'].describe()

count     4.00000
mean     26.00000
std       5.09902
min      21.00000
25%      23.25000
50%      25.00000
75%      27.75000
max      33.00000
Name: Age, dtype: float64

df.isna()

df = df.fillna(20.0)

df.fillna(20.0,inplace=True)

df

df['chainsaw'] = 'vrooooooom'

df['chainsaw'] = 'vrooooooom'

df['result'] = df['Age'] ** 2

df['result'] = df['Age'] ** 2

df['email'] = df['Person'] + '@cuboulder.rules'

df['email'] = df['Person'] + '@cuboulder.rules'

df['email'].str.split('@',expand=True)

df['domain'] = df['email'].str.split('@',expand=True)[1]

df

df

df.drop(columns='chainsaw', inplace=True)

df.drop(columns='chainsaw', inplace=True)

df

df.drop(columns=['result','email','domain'], inplace=True)

df

def mingle(age, single):
    if age < 30:
        if single == True:
            result = 'READY TO MINGLE'
        else: 
            result = "Home by 8pm"
    else:
        result = "Home by 8pm"
    
    return result

def mingle(age, single):
    if age < 30:
        if single == True:
            result = 'READY TO MINGLE'
        else: 
            result = "Home by 8pm"
    else:
        result = "Home by 8pm"
    
    return result

mingle(25,True)

mingle(30,True)

'Home by 8pm'

df['status'] = df.apply(lambda row: mingle(row['Age'], row['Single']), axis = 1)

df['status'] = df.apply(lambda row: mingle(row['Age'], row['Single']), axis=1)

df

df.shape

df.shape

(5, 3)

trans = df.T
trans

df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
                        "bar", "bar", "bar", "bar"],
                    "B": ["one", "one", "one", "two", "two",
                        "one", "one", "two", "two"],
                    "C": ["small", "large", "large", "small",
                        "small", "large", "small", "small", "large"],
                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})

df

pd.pivot_table(df,values='D', index='A', aggfunc='sum')

aggDF = pd.pivot_table(df,values='D', index='A', aggfunc='sum')
aggDF

aggDF = aggDF.rename(columns={'D':'sumD'})

df = pd.merge(df, aggDF, on='A')

df = pd.merge(df, aggDF, on='A')

df

df['Dperc'] = df['D']/df['sumD']*100

df

df.groupby(['A','C']).sum('D')

Pandas Primer¶

Applying a custom function to a column.¶

Reshaping data¶

	Person	Age	Single
0	John	24.0	False
1	Myla	NaN	True
2	Lewis	21.0	True
3	John	33.0	True
4	Myla	26.0	False

	0	1
0	John	cuboulder.rules
1	Myla	cuboulder.rules
2	Lewis	cuboulder.rules
3	John	cuboulder.rules
4	Myla	cuboulder.rules

	0	1	2	3	4
Person	John	Myla	Lewis	John	Myla
Age	24.0	20.0	21.0	33.0	26.0
Single	False	True	True	True	False

	A	B	C	D
0	foo	one	small	1
1	foo	one	large	2
2	foo	one	large	2
3	foo	two	small	3
4	foo	two	small	3
5	bar	one	large	4
6	bar	one	small	5
7	bar	two	small	6
8	bar	two	large	7

	A	B	C	D	sumD	Dperc
0	foo	one	small	1	11	9.090909
1	foo	one	large	2	11	18.181818
2	foo	one	large	2	11	18.181818
3	foo	two	small	3	11	27.272727
4	foo	two	small	3	11	27.272727
5	bar	one	large	4	22	18.181818
6	bar	one	small	5	22	22.727273
7	bar	two	small	6	22	27.272727
8	bar	two	large	7	22	31.818182