Python: Pandas - The Series Data Structure

From MyWiki
Jump to: navigation, search
import pandas as pd
pd.Series?
 
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)
 
 
numbers = [1, 2, 3]
pd.Series(numbers)
 
 
animals = ['Tiger', 'Bear', None]
pd.Series(animals)
 
numbers = [1, 2, None]
pd.Series(numbers)
 
 
import numpy as np
np.nan == None
 
 
np.nan == np.nan
 
 
np.isnan(np.nan)
 
 
 
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
 
s.index
 
 
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s
 
 
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Querying a series

sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
 
 
s.iloc[3]
 
 
s.loc['Golf']
 
 
s[3]
 
 
s['Golf']
 
 
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)
 
 
s[0] #This won't call s.iloc[0] as one might expect, it generates an error instead
 
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s
 
 
total = 0
for item in s:
    total+=item
print(total)
 
 
import numpy as np
 
total = np.sum(s)
print(total)
 
 
#this creates a big series of random numbers
s = pd.Series(np.random.randint(0,1000,10000))
s.head()
 
len(s)
 
 
%%timeit -n 100
summary = 0
for item in s:
    summary+=item
 
 
%%timeit -n 100
summary = np.sum(s)
 
 
s+=2 #adds two to each item in s using broadcasting
s.head()
 
 
for label, value in s.iteritems():
    s.set_value(label, value+2)
s.head()
 
 
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2
 
 
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2
 
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s
 
 
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)
 
 
original_sports
 
cricket_loving_countries
 
all_countries
 
all_countries.loc['Cricket']

The DataFrame Data Structure