Python 3.

Read CSV using pandas

import pandas as pd

dataframe1=pd.read_csv('xxx.csv')

 

Example

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

wv = pd.read_sas(r'C:\temp\final_all.sas7bdat')

schoolname=wv['School_Name'].value_counts()
schoolname[:76]

type(wv)
wv.shape
wv.columns
type(wv.columns)
wv.index
type(wv.index)
wv.iloc[:5,:4]
wv.iloc[-5,:5]
wv.head(5)
wv.tail(3)
wv.info()
ethnicity=wv['Ethnicity']
type(ethnicity)
np_vals = wv.values

x1=wv['Reading_Score'].values
plt.plot(x1)
plt.show()

#Reading vs Science
wv.plot(x='Reading_Score',y='Science_Score',kind='scatter')
plt.xlabel('Reading')
plt.ylabel('Science')
plt.show

#histogram
wv.plot(y='Science_Score',kind='hist')
plt.xlabel('Science_Score')
plt.show

wv['GrandTotal'].count()
wv['GrandTotal'].mean()
wv['GrandTotal'].std()
wv['GrandTotal'].median()
#entire data frame
wv.mean()
wv.std()
wv.quantile(0.5)

#box plots -- this didn't work
wv.plot(kind='box')
plt.ylabel('[Reading_Score]')
plt.show

wv['SchoolName'].describe()

 

Reading a text file

encoding="utf-8"

df = pd.read_csv('C:/Users/1/Documents/Python Scripts/test.csv')

df

 

 

fruits = ['apple', 'banana', 'orange']

# Get the elements of fruits using a for loop, and print 'I like ___s'
for fruit in fruits:
print ('I like '+ fruit+'s')

 

Reference:

Charles Severance's Python for Informatics

 

IN and NOT IN

if "H" in "Hello":
print("Yes")

"Z" not in "HEllo"

 

Identify the working directory

import os

os.getcwd()

Change the working directory (On my computer, I need double \ ).

os.chdir('c:\\temp')

 

Read a file and print a line that includes a search word (p. 129 of Python for Informatics)

import os
import re

os.chdir('C:\\temp')
hand=open('macro1.txt')
for line in hand:
line=line.rstrip()
if re.search('Madison',line):
print (line)