Difference between revisions of "Python: Pandas - The Series Data Structure"
From MyWiki
Line 83: | Line 83: | ||
+ | s[0] #This won't call s.iloc[0] as one might expect, it generates an error instead | ||
+ | |||
+ | s = pd.Series([100.00, 120.00, 101.00, 3.00]) | ||
+ | s | ||
+ | |||
+ | |||
+ | total = 0 | ||
+ | for item in s: | ||
+ | total+=item | ||
+ | print(total) | ||
+ | |||
+ | |||
+ | import numpy as np | ||
+ | |||
+ | total = np.sum(s) | ||
+ | print(total) | ||
+ | |||
+ | |||
+ | #this creates a big series of random numbers | ||
+ | s = pd.Series(np.random.randint(0,1000,10000)) | ||
+ | s.head() | ||
+ | |||
+ | len(s) | ||
+ | |||
+ | |||
+ | %%timeit -n 100 | ||
+ | summary = 0 | ||
+ | for item in s: | ||
+ | summary+=item | ||
+ | |||
+ | |||
+ | %%timeit -n 100 | ||
+ | summary = np.sum(s) | ||
+ | |||
+ | |||
+ | s+=2 #adds two to each item in s using broadcasting | ||
+ | s.head() | ||
+ | |||
+ | |||
+ | for label, value in s.iteritems(): | ||
+ | s.set_value(label, value+2) | ||
+ | s.head() | ||
+ | |||
+ | |||
+ | %%timeit -n 10 | ||
+ | s = pd.Series(np.random.randint(0,1000,10000)) | ||
+ | for label, value in s.iteritems(): | ||
+ | s.loc[label]= value+2 | ||
+ | |||
+ | |||
+ | %%timeit -n 10 | ||
+ | s = pd.Series(np.random.randint(0,1000,10000)) | ||
+ | s+=2 | ||
+ | |||
+ | s = pd.Series([1, 2, 3]) | ||
+ | s.loc['Animal'] = 'Bears' | ||
+ | s | ||
+ | |||
+ | |||
+ | original_sports = pd.Series({'Archery': 'Bhutan', | ||
+ | 'Golf': 'Scotland', | ||
+ | 'Sumo': 'Japan', | ||
+ | 'Taekwondo': 'South Korea'}) | ||
+ | cricket_loving_countries = pd.Series(['Australia', | ||
+ | 'Barbados', | ||
+ | 'Pakistan', | ||
+ | 'England'], | ||
+ | index=['Cricket', | ||
+ | 'Cricket', | ||
+ | 'Cricket', | ||
+ | 'Cricket']) | ||
+ | all_countries = original_sports.append(cricket_loving_countries) | ||
+ | |||
+ | |||
+ | original_sports | ||
+ | |||
+ | cricket_loving_countries | ||
+ | |||
+ | all_countries | ||
+ | |||
+ | all_countries.loc['Cricket'] | ||
+ | |||
+ | </source> | ||
+ | |||
+ | The DataFrame Data Structure | ||
+ | |||
+ | <source lang="python"> | ||
</source> | </source> |
Latest revision as of 18:15, 28 July 2019
import pandas as pd pd.Series? animals = ['Tiger', 'Bear', 'Moose'] pd.Series(animals) numbers = [1, 2, 3] pd.Series(numbers) animals = ['Tiger', 'Bear', None] pd.Series(animals) numbers = [1, 2, None] pd.Series(numbers) import numpy as np np.nan == None np.nan == np.nan np.isnan(np.nan) sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'} s = pd.Series(sports) s s.index s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada']) s sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'} s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey']) s
Querying a series
sports = {'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'} s = pd.Series(sports) s s.iloc[3] s.loc['Golf'] s[3] s['Golf'] sports = {99: 'Bhutan', 100: 'Scotland', 101: 'Japan', 102: 'South Korea'} s = pd.Series(sports) s[0] #This won't call s.iloc[0] as one might expect, it generates an error instead s = pd.Series([100.00, 120.00, 101.00, 3.00]) s total = 0 for item in s: total+=item print(total) import numpy as np total = np.sum(s) print(total) #this creates a big series of random numbers s = pd.Series(np.random.randint(0,1000,10000)) s.head() len(s) %%timeit -n 100 summary = 0 for item in s: summary+=item %%timeit -n 100 summary = np.sum(s) s+=2 #adds two to each item in s using broadcasting s.head() for label, value in s.iteritems(): s.set_value(label, value+2) s.head() %%timeit -n 10 s = pd.Series(np.random.randint(0,1000,10000)) for label, value in s.iteritems(): s.loc[label]= value+2 %%timeit -n 10 s = pd.Series(np.random.randint(0,1000,10000)) s+=2 s = pd.Series([1, 2, 3]) s.loc['Animal'] = 'Bears' s original_sports = pd.Series({'Archery': 'Bhutan', 'Golf': 'Scotland', 'Sumo': 'Japan', 'Taekwondo': 'South Korea'}) cricket_loving_countries = pd.Series(['Australia', 'Barbados', 'Pakistan', 'England'], index=['Cricket', 'Cricket', 'Cricket', 'Cricket']) all_countries = original_sports.append(cricket_loving_countries) original_sports cricket_loving_countries all_countries all_countries.loc['Cricket']
The DataFrame Data Structure