Class 6
Source: https://pandas.pydata.org/
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data
data.values #Values are a numpy array
data.index #The index is an array-like object of type pd.Index
#Associated index
data[1]
data[1:3]
Note:
#Constructing Series objects
data = pd.Series([0.25, 0.5, 0.75, 1.0],index=['a', 'b', 'c', 'd'])
data
data['a'] #Access item
#We can even use noncontiguous or nonsequential indices:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2, 5, 3, 7])
data[7]
#Python dictionaries to Pandas Series
serie = pd.Series({2:'a', 1:'b', 3:'c'})
#Python dictionaries to Pandas Series
serie = pd.Series({2:'a', 1:'b', 3:'c'})
#Create an Empty DataFrame
import pandas as pd
df = pd.DataFrame()
print df
#Create a DataFrame from Lists
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print df
import pandas as pd
dataFrutas = [['Manzana',100],['Pera',105],['Banano',130]]
df = pd.DataFrame(dataFrutas,columns=['Nombre','Peso(gr)'])
print df
#Create a DataFrame from Dict of ndarrays / Lists
import pandas as pd
dataFrutas = {'Nombre':['Manzana', 'Pera', 'Banano', 'Fresa'],'Peso(gr)':[100,105,130,42]}
df = pd.DataFrame(dataFrutas)
print df
import pandas as pd
dataFrutas = {'Nombre':['Manzana', 'Pera', 'Banano', 'Fresa'],'Peso(gr)':[100,105,130,42]}
df = pd.DataFrame(dataFrutas, index=['primera','segunda','tercera','cuarta'])
print df
df['Nombre']['primera']
#Create a DataFrame from List of Dicts
import pandas as pd
dataFrutas = [{'Nombre': 'Manzana', 'Peso(gr)': 100},
{'Nombre': 'Pera', 'Peso(gr)': 105, 'ciudad': 'Bogota'}]
df = pd.DataFrame(dataFrutas)
print df
import numpy as np
import pandas as pd
df = pd.DataFrame(data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
index= [2, 'A', 2], columns=['primera', 49, 'tercera'])
#loc and iloc differences
ans = df.loc[2] # index named 2
ansA = df.loc['A'] # index named 'A'as a Series
ansA = df.loc[['A']] # index named 'A' as DataFrame
# 2 to iloc
ans2 = df.iloc[2] #index at position 2 as a Series
ans2 = df.iloc[[2]] #index at position 2 as a Dataframe
#Read csv
import pandas as pd
movies = pd.read_csv('../Dropbox/movies.csv',sep=',')
#Rename a column
movies.rename(columns={'movieId': 'peliculaId', 'title': 'titulo'}, inplace=True)
#Select a column from dataframe
titulo = movies['titulo']
#Select row
movies.iloc[2]
titulo.iloc[2]
#select rows
primeras_150 = movies.iloc[0:150] #First 150 movies titles
#Addition of rows
movies = movies.append(primeras_150.iloc[10])
#reset index .reset_index()
movies = movies.reset_index(drop=True)
#Delete rows
movies = movies.drop(9125)
#Column deletion
del movies['peliculaId']
#get column names
movies.columns
#Vectorized String Operations
movies['titulo'] = movies['titulo'].str.lower()
movies['genres'] = movies['genres'].str.upper()
#Replacing All Occurrences of a String in a DataFrame
movies.replace(['ACTION', 'COMEDY', ], ['ACCION','COMEDIA'],inplace = True)
for index, row in movies.iterrows() :
print(row['titulo'], row['genres'])
#Write csv
movies.to_csv('../Dropbox/moviesFinal.csv',sep=',')
#Read cvs file into DataFrame and call it titanic_train.
titanic_train = pd.read_csv('../Dropbox/train.csv',sep=',')
#Rename column 'name' to 'nombre'.
titanic_train.rename(columns={'Name': 'nombre'}, inplace=True)
#Upper case 'nombre' column.
titanic_train['nombre'] = titanic_train['nombre'].str.upper()
#Select rows that are from index 300 to 400 into a new DataFrame
select_rows_df = titanic_train.iloc[300:401]
#Select columns 'PassengerId', 'nombre', 'Survived' into a new DataFrame
result_df = select_rows_df[['PassengerId','nombre','Survived']]
#Save final DataFrame as Result.csv
result_df.to_csv('../Dropbox/Result.csv',sep=',',index=False)