import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


            
              apples = np.array([5.5,6.5,7.9,5.7,6.8,8.1])
apples

array([5.5, 6.5, 7.9, 5.7, 6.8, 8.1])


            
              app = pd.Series(apples)
app

0    5.5
1    6.5
2    7.9
3    5.7
4    6.8
5    8.1
dtype: float64


            
              # add one to all values
app + 1

0    6.5
1    7.5
2    8.9
3    6.7
4    7.8
5    9.1
dtype: float64


            
              # compute the Series mean
app.mean()

6.75


            
              fruits = np.array([[5.5,6.5,7.9,5.7,6.8,8.1],[5.1,3.4,6.5,8.8,6.3,4.9],[4.3,6.7,6.5,4.6,7.2,7.9]])

fruits.shape

(3, 6)


            
              fruits.transpose()

array([[5.5, 5.1, 4.3],
       [6.5, 3.4, 6.7],
       [7.9, 6.5, 6.5],
       [5.7, 8.8, 4.6],
       [6.8, 6.3, 7.2],
       [8.1, 4.9, 7.9]])


            
              # create dataframe from ndarray
f = pd.DataFrame(fruits).transpose()
f


            
              # name the columns
f.columns = (['apples','bananas','oranges'])

f


            
              type(f)

pandas.core.frame.DataFrame


            
              f.describe()


            
              ax1 = plt.subplot(211)
ax2 = plt.subplot(212)
plt.tight_layout()
f.plot(ax=ax1, ylim=(0,9))
f.plot.bar(ax=ax2, rot=0) #By default pandas bar plot will put your x-labels on their sides. Use rot=0 to fix it.

<AxesSubplot: >


            
              # a boxplot 
f.boxplot()

<AxesSubplot: >


            
              apples.size

6


            
              # create Series of same length
pears = pd.Series([6.5,6.1,8.2,7.5,7.3,6.4])


            
              # add new column to dataframe
f['pears'] = pears

f


            
              f['farm'] = 'McDonald'
f


            
              f.plot()

<AxesSubplot: >


            
              fr = {'apples':[3.3,4.5,5.7,4.7,3.7,4.4], 'bananas': [4.5,3.9,6.4,5.1,4.8,4.3], 
      'oranges':[4.0,5.9,6.0,6.5,5.2,5.8], 'pears':[6.8,6.6,6.2,6.1,5.9,6.5]}


            
              fruits2 = pd.DataFrame(fr)
fruits2


            
              fruits2['farm'] = 'FarmerJohn'
fruits2


            
              f = f.append(fruits2)
f

/var/folders/jr/tf9k36ws1956sfj0w8ybnj2m0000gn/T/ipykernel_43618/4084900850.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  f = f.append(fruits2)


            
              apps = f['apples'].groupby(f['farm'])
apps

<pandas.core.groupby.generic.SeriesGroupBy object at 0x12fec0dc0>


            
              apps.apply(np.mean)

farm
FarmerJohn    4.383333
McDonald      6.750000
Name: apples, dtype: float64


            
              # groupby objects have plot capabilities too
apps.plot()

farm
FarmerJohn    AxesSubplot(0.125,0.11;0.775x0.77)
McDonald      AxesSubplot(0.125,0.11;0.775x0.77)
Name: apples, dtype: object


            
              f.boxplot("apples", by="farm")

<AxesSubplot: title={'center': 'apples'}, xlabel='farm'>


            
              flowers = pd.read_csv('../Datasets/iris.csv')
flowers


            
              # first row of data and 3rd through 5th columns
flowers.iloc[0, 2:5]

Petal.Length       1.4
Petal.Width        0.2
Species         setosa
Name: 0, dtype: object


            
              # Ditto but using names instead of numeric index location. Note the colon indicating the range of consecutive columns
flowers.loc[1, 'Petal.Length':'Species']

Petal.Length       1.4
Petal.Width        0.2
Species         setosa
Name: 1, dtype: object


            
              # LOC: return all rows from 'Species' column matching query from Sepal.Length column
flowers.loc[(flowers['Sepal.Length'] >= 4.9), 'Species']

0         setosa
1         setosa
4         setosa
5         setosa
7         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: Species, Length: 134, dtype: object


            
              flowers.loc[(flowers['Sepal.Length'] >= 4.9)]


            
              data = pd.read_csv('../Datasets/TitanicSurvival.csv')

#show first N rows (default = 5)
data.head()


            
              data['survived'].value_counts()

no     809
yes    500
Name: survived, dtype: int64


            
              data['survived'].value_counts(normalize = True)

no     0.618029
yes    0.381971
Name: survived, dtype: float64

	apples	bananas	oranges
count	6.000000	6.000000	6.000000
mean	6.750000	5.833333	6.200000
std	1.083974	1.832667	1.442221
min	5.500000	3.400000	4.300000
25%	5.900000	4.950000	5.075000
50%	6.650000	5.700000	6.600000
75%	7.625000	6.450000	7.075000
max	8.100000	8.800000	7.900000

	Unnamed: 0	survived	sex	age	passengerClass
0	Allen, Miss. Elisabeth Walton	yes	female	29.0000	1st
1	Allison, Master. Hudson Trevor	yes	male	0.9167	1st
2	Allison, Miss. Helen Loraine	no	female	2.0000	1st
3	Allison, Mr. Hudson Joshua Crei	no	male	30.0000	1st
4	Allison, Mrs. Hudson J C (Bessi	no	female	25.0000	1st

Intro to Pandas Library¶

Numpy arrays to dataframe¶

Built-in Plotting Capabilities¶

Creating new columns¶

Dataframes from dictionaries¶

Read in a dataframe¶

Pandas `loc` and `iloc`¶

Categorical data & counting¶

	0	1	2
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges	pears
0	5.5	5.1	4.3	6.5
1	6.5	3.4	6.7	6.1
2	7.9	6.5	6.5	8.2
3	5.7	8.8	4.6	7.5
4	6.8	6.3	7.2	7.3
5	8.1	4.9	7.9	6.4

	apples	bananas	oranges	pears	farm
0	5.5	5.1	4.3	6.5	McDonald
1	6.5	3.4	6.7	6.1	McDonald
2	7.9	6.5	6.5	8.2	McDonald
3	5.7	8.8	4.6	7.5	McDonald
4	6.8	6.3	7.2	7.3	McDonald
5	8.1	4.9	7.9	6.4	McDonald

	apples	bananas	oranges	pears
0	3.3	4.5	4.0	6.8
1	4.5	3.9	5.9	6.6
2	5.7	6.4	6.0	6.2
3	4.7	5.1	6.5	6.1
4	3.7	4.8	5.2	5.9
5	4.4	4.3	5.8	6.5

	apples	bananas	oranges	pears	farm
0	3.3	4.5	4.0	6.8	FarmerJohn
1	4.5	3.9	5.9	6.6	FarmerJohn
2	5.7	6.4	6.0	6.2	FarmerJohn
3	4.7	5.1	6.5	6.1	FarmerJohn
4	3.7	4.8	5.2	5.9	FarmerJohn
5	4.4	4.3	5.8	6.5	FarmerJohn

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	0	1	2
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges	pears
0	5.5	5.1	4.3	6.5
1	6.5	3.4	6.7	6.1
2	7.9	6.5	6.5	8.2
3	5.7	8.8	4.6	7.5
4	6.8	6.3	7.2	7.3
5	8.1	4.9	7.9	6.4

	apples	bananas	oranges	pears
0	3.3	4.5	4.0	6.8
1	4.5	3.9	5.9	6.6
2	5.7	6.4	6.0	6.2
3	4.7	5.1	6.5	6.1
4	3.7	4.8	5.2	5.9
5	4.4	4.3	5.8	6.5

Intro to Pandas Library¶

Numpy arrays to dataframe¶

Built-in Plotting Capabilities¶

Creating new columns¶

Dataframes from dictionaries¶

Read in a dataframe¶

Pandas loc and iloc¶

Categorical data & counting¶

Pandas `loc` and `iloc`¶

	0	1	2
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges
0	5.5	5.1	4.3
1	6.5	3.4	6.7
2	7.9	6.5	6.5
3	5.7	8.8	4.6
4	6.8	6.3	7.2
5	8.1	4.9	7.9

	apples	bananas	oranges	pears
0	5.5	5.1	4.3	6.5
1	6.5	3.4	6.7	6.1
2	7.9	6.5	6.5	8.2
3	5.7	8.8	4.6	7.5
4	6.8	6.3	7.2	7.3
5	8.1	4.9	7.9	6.4

	apples	bananas	oranges	pears
0	3.3	4.5	4.0	6.8
1	4.5	3.9	5.9	6.6
2	5.7	6.4	6.0	6.2
3	4.7	5.1	6.5	6.1
4	3.7	4.8	5.2	5.9
5	4.4	4.3	5.8	6.5