from IPython.core.display import display, HTML
display(HTML("<style> .container{width:90% !important;}</style>"))

import pandas as pd
import seaborn as sns

# 예시 데이터 불러오기
df = sns.load_dataset('iris')
print(df.shape)
df.head()

(150, 5)

데이터프레임에서 특정 컬럼 정보만 불러오기¶

columns = ['sepal_length', 'sepal_width', 'species']
df[columns].head()

# 이 방식에서는 한글이나 특수문자가 들어간 컬럼명을 쓸 수 없음
df.sepal_width.head()

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
Name: sepal_width, dtype: float64

df['sepal_width'].head()

0    3.5
1    3.0
2    3.2
3    3.1
4    3.6
Name: sepal_width, dtype: float64

정규 표현식으로 특정컬럼 불러오기¶

df.filter( regex = 'regex' )

df.filter(regex='\,').head(3)

df.filter(regex='length$').head(3)

df.filter(regex='_').head(3)

df.filter(regex='^sepal').head(3)

df.filter(regex='^(?!species).*').head(3)

df.filter(regex='^x[1-5]$').head(3)

df.loc[2:5, 'sepal_width':'petal_width']

loc와는 다르게 iloc로 검색을 했을 때 a:b 면 b-1행까지 검색을 함
iloc는 인덱스번호만 입력이 가능함

df.iloc[2:5:,  1:3]

loc 를 통해 logic 조건으로 행을 지정하고, 컬럼명을 선택할 수 있음

df.loc[df['sepal_length']>3 , ['sepal_length','sepal_width']].head()

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2

	sepal_length	sepal_width	petal_length	petal_width
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2

5. Handling Missing Data(결측치 다루기) (0)	2019.10.16
4. Summarize Data(자료 요약하기) (0)	2019.10.16
2. Subset Observations(Rows) (행 데이터 다루기) (0)	2019.10.09
1. Creating DataFrame(데이터프레임 만들기) (0)	2019.10.09