'python' 카테고리의 글 목록 (2 Page)

'python'에 해당되는 글 32건

2019.12.26 정규표현식 연습 2. 반복
2019.12.26 정규표현식 연습 1.
2019.12.26 자주 쓰는 string 함수
2019.12.26 크롤링 연습 10. session을 이용한 post 예제
2019.12.26 크롤링 연습 9. 공공데이터 ex)미세먼지
2019.12.26 크롤링 연습 8. Open API사용한 report 작성
2019.12.26 크롤링 연습 7. open API test
2019.12.25 크롤링 연습 6. openpyxl 이용 웹 데이터 저장
2019.12.25 크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기
2019.12.24 크롤링 연습 4. 순위 가져오기

정규표현식 연습 2. 반복

 


import re


#반복을 나타내는 기호

# ? 앞문자가 문자가 0 또는 1번 (0 or 1)
# * 앞문자가 0번 또는 그이상 (0 or 1 more)
# + 앞문자가 1번 또는 그이상 (1 more)

#패턴 정의

pattern1 = re.compile('D?ABC')# ABC or DABC
pattern2 = re.compile('D*ABC')# ABC or DDABC or DDDABC....
pattern3 = re.compile('D+ABC')# DABC or DDABC or DDDABC...


string = 'DDDDDDDd ABC'

print('======ex) ',string)
print(pattern1.search(string))
print(pattern2.search(string))
print(pattern3.search(string))

string = 'DDDDDDDd DABC'

print('======ex) ',string)
print(pattern1.search(string))
print(pattern2.search(string))
print(pattern3.search(string))

string = 'DDDDDDDd DDABC'
print('======ex) ',string)

print(pattern1.search(string))
print(pattern2.search(string))
print(pattern3.search(string))


string = 'DDDDDDDd DDDABC'
print('======ex) ',string)

print(pattern1.search(string))
print(pattern2.search(string))
print(pattern3.search(string))


#결과
======ex)  DDDDDDDd ABC
re.Match object; span=(9, 12), match='ABC'
re.Match object; span=(9, 12), match='ABC'
None
======ex)  DDDDDDDd DABC
re.Match object; span=(9, 13), match='DABC'
re.Match object; span=(9, 13), match='DABC'
re.Match object; span=(9, 13), match='DABC'
======ex)  DDDDDDDd DDAABC
re.Match object; span=(12, 15), match='ABC'
re.Match object; span=(12, 15), match='ABC'
None
======ex)  DDDDDDDd DDDABC
re.Match object; span=(11, 15), match='DABC'
re.Match object; span=(9, 15), match='DDDABC'
re.Match object; span=(9, 15), match='DDDABC'

 

import re


#반복을 나타내는 기호

# {n} 앞문자가 n번 반복
# {m,n} 앞문자가 m번 이상 n번 이하 반복


#패턴 정의

pattern1 = re.compile('D{1}ABC')
pattern2 = re.compile('D{2,5}ABC')



string = 'DDDDDDDd ABC'

print('======ex) ',string)
print(pattern1.search(string))
print(pattern2.search(string))

string = 'DDDDDDDd DABC'

print('======ex) ',string)
print(pattern1.search(string))
print(pattern2.search(string))

string = 'DDDDDDDd DDABC'
print('======ex) ',string)

print(pattern1.search(string))
print(pattern2.search(string))


string = 'DDDDDDDd DDDABC'
print('======ex) ',string)

print(pattern1.search(string))
print(pattern2.search(string))

string = 'DDDDDDDd DDDDDDABC'
print('======ex) ',string)

print(pattern1.search(string))
print(pattern2.search(string))


#결과
======ex)  DDDDDDDd ABC
None
None
======ex)  DDDDDDDd DABC
re.Match object; span=(9, 13), match='DABC'
None
======ex)  DDDDDDDd DDABC
re.Match object; span=(10, 14), match='DABC'
re.Match object; span=(9, 14), match='DDABC'
======ex)  DDDDDDDd DDDABC
re.Match object; span=(11, 15), match='DABC'
re.Match object; span=(9, 15), match='DDDABC'
======ex)  DDDDDDDd DDDDDDABC
re.Match object; span=(14, 18), match='DABC'
re.Match object; span=(10, 18), match='DDDDDABC'

 
import re


#패턴 정의

pattern1 = re.compile('[가-힣]')#한글 패턴 찾기

pattern2 = re.compile('[^가-힣]')#한글 패턴이 아닌것 찾기

string = "안녕hello"
print(pattern1.search(string))
print(pattern2.search(string))

#결과
re.Match object; span=(0, 1), match='안'
re.Match object; span=(2, 3), match='h'

 
import re

#첫번째 단어를 리턴
pattern1 = re.compile('[a-zA-Z]+')


print('ex1)')
string = "hello world"
print(pattern1.search(string))#문자열 전체를 검색하여 정규식에 해당하는 패턴을 리턴
print(pattern1.match(string))#문자열 처음부터 검색하여 정규식에 해당하는 패턴을 리턴
print('ex2)')
string = "11hello world"
print(pattern1.search(string))#문자열 전체를 검색하여 정규식에 해당하는 패턴을 리턴
print(pattern1.match(string))#문자열 처음부터 검색하여 정규식에 해당하는 패턴을 리턴

#결과
ex1)
re.Match object; span=(0, 5), match='hello'
re.Match object; span=(0, 5), match='hello'
ex2)
re.Match object; span=(2, 7), match='hello'
None

'python' 카테고리의 다른 글

python 가상환경 설정 (0)	2020.01.02
크롤링 연습 11. google sheet에 데이터 저장 (0)	2019.12.27
정규표현식 연습 1. (0)	2019.12.26
자주 쓰는 string 함수 (0)	2019.12.26
크롤링 연습 10. session을 이용한 post 예제 (0)	2019.12.26

Posted by easy16

정규표현식 연습 1.

 

import re

string = '[Dave]!@#!@#'


#p1 을 p2로 대체 from string
#re.sub('[^A-Za-z0-9]', '', string)
re.sub('\W', '', string)


#결과 
'Dave'



import re
#패턴 정의
pattern = re.compile('D.A')

string = 'DnA DoA DaA Dok D.A'

#패턴에 해당되는 하나의 문자를 찾아줌
pattern.search(string)
#패턴에 해당되는 모든 문자를 찾아줌
pattern.findall(string)

#결과
['DnA', 'DoA', 'DaA']


import re
#패턴 정의
pattern = re.compile('D\.A')

string = 'DnA DoA DaA Dok D.A'


pattern.search(string)
#결과


import re
#패턴 정의

string = 'DnA DoA DaA Dok'
re.sub('D.A',"Dave",string)

#결과
'Dave Dave Dave Dok'

'python' 카테고리의 다른 글

크롤링 연습 11. google sheet에 데이터 저장 (0)	2019.12.27
정규표현식 연습 2. 반복 (0)	2019.12.26
자주 쓰는 string 함수 (0)	2019.12.26
크롤링 연습 10. session을 이용한 post 예제 (0)	2019.12.26
크롤링 연습 9. 공공데이터 ex)미세먼지 (0)	2019.12.26

Posted by easy16

자주 쓰는 string 함수

 



test_string = "Dave David"
#Return the number of non-overlapping occurrences of substring sub in
test_string.count('Id')

#결과
0

test_string = "Dave David"
test_string.index('D')
test_string.rindex('D')

#결과
5

#index, find 차이
test_string = "Dave David"
#문자가 존재하지 않는 경우 에러 발생
#test_string.index('f')
#문자가 존재하지 않는 경우 -1 return
print(test_string.find('f'))

#결과
-1

test_string = "Dave David"
#Concatenate any number of strings.
comma = ','
comma.join(test_string)

#결과
'D,a,v,e, ,D,a,v,i,d'

test_string = "   111Dave111 "

print(test_string.strip())
print(test_string.lstrip())
print(test_string.rstrip())
print(test_string.strip(' 1'))

#결과
111Dave111
111Dave111 
   111Dave111
Dave

test_string = "Dave David"

print(test_string.lower())
print(test_string.upper())

#결과
dave david
DAVE DAVID


test_string1 = "Dave David Dope"
test_string2 = "Dave/David/Dope"
print(test_string1.split())
print(test_string2.split('/'))
['Dave', 'David', 'Dope']
['Dave', 'David', 'Dope']


test_string = "(Dave)"
test_string.replace('(','[').replace(')',']')

#결과
'[Dave]'

'python' 카테고리의 다른 글

정규표현식 연습 2. 반복 (0)	2019.12.26
정규표현식 연습 1. (0)	2019.12.26
크롤링 연습 10. session을 이용한 post 예제 (0)	2019.12.26
크롤링 연습 9. 공공데이터 ex)미세먼지 (0)	2019.12.26
크롤링 연습 8. Open API사용한 report 작성 (0)	2019.12.26

Posted by easy16

크롤링 연습 10. session을 이용한 post 예제

 
#쿠키와 세션

#로그인이 필요한 페이지의 경우 ,header에 특별한 정보를 정해진 포멧을 포함해야 한다.
#그 때, client내에 정보를 저정하는 것이 쿠키 (보안에 취약)
#server에 저장하는 방식은 세션 (client에서는 암호화된 값을 가진다)

#사이트마다 로그인 방식이 다르므로 그때마다 다른 기술을 적용해야한다.

#실습
#1, 로그인 필요한 웹페이지 요청시, 어떤 정보가 필요한가?
# - 쿠키 또는 세션
#2, 해당정보를 어떻게 코드레벨로 전달할 것인지?

import requests
from bs4 import BeautifulSoup

login_url = 'https://www.hanbit.co.kr/member/login_proc.php'
crawl_url = 'http://www.hanbit.co.kr/myhanbit/myhanbit.html'

#해당 세션을 기반으로 요청
session = requests.session()

params = { 'm_id':'id', 'm_passwd':'passwod'}

#url에 data를 넣어 요청
res = session.post(login_url,data=params)

# HTTP error code가 200이 아니면 다음으로 진행되지 않음.\
#if res.status_code == 200
res.raise_for_status()

#헤더 확인
#print(res.headers)

#저장된 세션 확인
#print(session.cookies.get_dict())

#해당 세션을 이용하여 url을 호출
res = session.get(crawl_url)
soup = BeautifulSoup(res.content,'html.parser')

#마일리지 정보 가져오기
#container > div > div.sm_mymileage > dl.mileage_section1 > dd > span
data = soup.select('div.sm_mymileage > dl.mileage_section1 > dd > span')

print('마일리지: ', data[0].get_text())

'python' 카테고리의 다른 글

정규표현식 연습 1. (0)	2019.12.26
자주 쓰는 string 함수 (0)	2019.12.26
크롤링 연습 9. 공공데이터 ex)미세먼지 (0)	2019.12.26
크롤링 연습 8. Open API사용한 report 작성 (0)	2019.12.26
크롤링 연습 7. open API test (0)	2019.12.26

Posted by easy16

크롤링 연습 9. 공공데이터 ex)미세먼지

 

#공공 open API 테스트
#xml 포멧 활용
# 역시 bs4를 이용하여 xml 데이터를 가져온다.
import requests
from bs4 import BeautifulSoup


service_key = '?ServiceKey=abcd'
open_api = 'http://openapi.airkorea.or.kr/openapi/services/rest/ArpltnInforInqireSvc/getCtprvnMesureSidoLIst'
params = '&numOfRows=10&pageNo=1&sidoName=서울&searchCondition=DAILY'

url = open_api + service_key + params
#print(url)
#요청
res = requests.get(url)

soup = BeautifulSoup(res.content, 'html.parser')

data = soup.find_all('item')


#요청에 대한 결과 처리
if res.status_code == 200:
    for item in data:
        #print(item.get_text())
        #print(item)
        cityname = item.find('cityname')
        pm10value = item.find('pm10value')
        print(cityname.get_text(), pm10value.get_text())
    
    
else:
    print("Error code : ",res.status_code)

'python' 카테고리의 다른 글

자주 쓰는 string 함수 (0)	2019.12.26
크롤링 연습 10. session을 이용한 post 예제 (0)	2019.12.26
크롤링 연습 8. Open API사용한 report 작성 (0)	2019.12.26
크롤링 연습 7. open API test (0)	2019.12.26
크롤링 연습 6. openpyxl 이용 웹 데이터 저장 (0)	2019.12.25

Posted by easy16

크롤링 연습 8. Open API사용한 report 작성

Shopping에서 iphone 검색 순위 1000개 가져오기

 
import requests
import pprint
import openpyxl

client_id ='XiNJxT192mk123'
client_secret = 'dpolse'

excel_file = openpyxl.Workbook()
excel_sheet = excel_file.active
excel_sheet.column_dimensions['B'].width = 100
excel_sheet.column_dimensions['C'].width = 100
excel_sheet.append(['rank','title','link'])


#100개의 결과 가져오기 , display는 최대 100으로 지정가능하므로 start 위치를 변경시켜 반복 한다
start_num, item_index = 1, 1
for index in range(10):
    start_num =  1+index*100
    
    naver_open_api = 'https://openapi.naver.com/v1/search/shop.json?query=iphone&display=100&sort=sim&start='
    
    naver_open_api = naver_open_api + str(start_num)
    #print(naver_open_api)
   
    header_params = {'X-Naver-Client-Id':client_id ,'X-Naver-Client-Secret':client_secret}

    res = requests.get(naver_open_api, headers=header_params)

    if res.status_code == 200:
        data = res.json()
        for item in data['items']:
            #print(item_index, item['title'],item['link'])
            excel_sheet.append([item_index, item['title'], item['link']])
            item_index += 1

    else:
        print("Error code : ",res.status_code)
        
        
excel_file.save('shpping_1000.xlsx')
excel_file.close()

'python' 카테고리의 다른 글

크롤링 연습 10. session을 이용한 post 예제 (0)	2019.12.26
크롤링 연습 9. 공공데이터 ex)미세먼지 (0)	2019.12.26
크롤링 연습 7. open API test (0)	2019.12.26
크롤링 연습 6. openpyxl 이용 웹 데이터 저장 (0)	2019.12.25
크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기 (0)	2019.12.25

Posted by easy16

크롤링 연습 7. open API test

 
#다양한 크롤링 기법 맛보기

# Open API(Rest API)란 Represenational State Transfer API, HTTP 통해 서버 제공 기능을 사용 가능
# 일반적으로 XML, JSON(JavaScript Object notation) 형태로 응답을 전달( 데이터 추출이 쉬움 )

# as-is : html
# to-be : JSON(RestAPI)


import requests

#naver open API에 대한 정보 네이버 개발자 사이트에서 등록 후, 획득 가능
client_id = 'KLJSDFk123LJSFD'
client_secret ='JKSdlfj5415' 

#요청 서버
naver_open_api = 'https://openapi.naver.com/v1/search/news.json?query=갤럭시노트10'
#헤더 정보를 추가
header_params = {'X-Naver-Client-Id':client_id ,'X-Naver-Client-Secret':client_secret}
#요청
res = requests.get(naver_open_api, headers=header_params)



#요청에 대한 결과 처리
if res.status_code == 200:
    #JSON 형태로 data 저장
    #res.content (인코딩이 맞지 않음)
    data = res.json()#data=res.text

    #pprint를 사용하여 출력을 보기 쉽게 보여줌
    #pprint.pprint(data)
    
    #enumerate 를 for문에 활용 index를 활용 가능
    
    for index, item in enumerate(data['items']):
        print(index+1, item['title'],item['link'])
    
else:
    print("Error code : ",res.status_code)

'python' 카테고리의 다른 글

크롤링 연습 9. 공공데이터 ex)미세먼지 (0)	2019.12.26
크롤링 연습 8. Open API사용한 report 작성 (0)	2019.12.26
크롤링 연습 6. openpyxl 이용 웹 데이터 저장 (0)	2019.12.25
크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기 (0)	2019.12.25
크롤링 연습 4. 순위 가져오기 (0)	2019.12.24

Posted by easy16

크롤링 연습 6. openpyxl 이용 웹 데이터 저장

 


#엑셀로 출력 저장하기 -> openpyxl 이용
# read & write


import openpyxl
import requests
from bs4 import BeautifulSoup

#기본적인 excel 파일 생성 방법
# 1, file 생성 
# 2, sheet 생성
# 3, 행 및 열 간격 설정
excel_file = openpyxl.Workbook()
excel_sheet = excel_file.active
excel_sheet.column_dimensions['A'].width = 10
excel_sheet.column_dimensions['B'].width = 100

excel_sheet.append(['numer','title'])
for page in range(1, 21):
    site = 'https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=buysell&page='+str(page)
   
    res = requests.get(site)
    soup = BeautifulSoup(res.content, 'html.parser')

    item_list = soup.find_all('a','item-subject')
    
    excel_sheet.append([ "page : "+ str(page)])
    num = 0 
    for item in item_list:
        num += 1 
        l1 = item.get_text().strip('\r\t\n123')
        excel_sheet.append([num,l1])
        

#셀 정렬
cell_A1 = excel_sheet['A1']
cell_A1.alignment = openpyxl.styles.Alignment(horizontal='center') 

cell_A1 = excel_sheet['B1']
cell_A1.alignment = openpyxl.styles.Alignment(horizontal='center') 
    
excel_file.save('results.xlsx')
excel_file.close()

 

#엑셀 읽기
import openpyxl

excel_file = openpyxl.load_workbook('results.xlsx')
#활성 sheet 읽기
excel_sheet = excel_file.active

#특정 sheet 읽기
#excel_sheet = excel_file.get_sheet_by_name('seeko 중고 장터')
#for column in excel_sheet.columns:
#    print(column[0].value, column[1].value)

for row in excel_sheet.rows:
    print(row[0].value,row[1].value, row[2].value)


excel_file.close()

'python' 카테고리의 다른 글

크롤링 연습 8. Open API사용한 report 작성 (0)	2019.12.26
크롤링 연습 7. open API test (0)	2019.12.26
크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기 (0)	2019.12.25
크롤링 연습 4. 순위 가져오기 (0)	2019.12.24
크롤링 연습 3. select (0)	2019.12.22

Posted by easy16

크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기

 
#urllib 사용법
#requests를 사용할 때, encoding 관련 에러가 발생하면 아래의 urllib을 사용해보도록.

from urllib.request import urlopen
from bs4 import BeautifulSoup


res = urlopen('https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=mainnews')
#print(type(res))
soup = BeautifulSoup(res, 'html.parser')

data = soup.find_all('a','item-subject')
for item in item_list:
    l1 = item.get_text().strip('\t\n31').split('\n')
    print(l1)

 
#게시판의 여러 페이지 읽어오기

#1 페이지
#https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=buysell&page=1
#2 페이지
#https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=buysell&page=2
#3 페이지
#https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=buysell&page=3

#링크에 대한 규칙을 파악



import requests
from bs4 import BeautifulSoup


for page in range(20, 1,-1):
    site = 'https://seeko.earlyadopter.co.kr/bbs/board.php?bo_table=buysell&page='+str(page)
    #print(site)
    
    res = requests.get(site)
    soup = BeautifulSoup(res.content, 'html.parser')

    item_list = soup.find_all('a','item-subject')
    print('====start page : '+str(page)+'====')
    for item in item_list:
        l1 = item.get_text().strip('\r\t\n123').split('\n')
        print(l1)
        #print(l1 ,'item')
    print('====end page '+str(page)+'====')

'python' 카테고리의 다른 글

크롤링 연습 7. open API test (0)	2019.12.26
크롤링 연습 6. openpyxl 이용 웹 데이터 저장 (0)	2019.12.25
크롤링 연습 4. 순위 가져오기 (0)	2019.12.24
크롤링 연습 3. select (0)	2019.12.22
크롤링 연습 2. 실시간 검색어 (0)	2019.12.19

Posted by easy16

크롤링 연습 4. 순위 가져오기

 
import requests
from bs4 import BeautifulSoup

res = requests.get('https://search.shopping.naver.com/best100v2/detail.nhn?catId=50000000&listType=B10002')
soup = BeautifulSoup(res.content,"html.parser")
"""
best_list = soup.select('a._popular_srch_lst_li')
for item in best_list:
    print (item.get_text())
    
"""
"""
#전체 리스트
best_list = soup.select('#popular_srch_lst')
#특정 아이템
best_list = soup.select('#popular_srch_lst > li.on > span.txt > a' )
for item in best_list:
    print (item.get_text())
"""
"""
#best 10.
best_list = soup.select('#popular_srch_lst li a' )
for item in best_list:
    print (item.get_text())
"""
    
#
##productListArea > ul > li:nth-child(1) > p > a
#best_list = soup.select('#cateProductListArea50000000 ul li p a' )
best_list = soup.select('#productListArea > ul > li> p > a' )

for item in best_list:
    print (item.get_text())

 


import requests
from bs4 import BeautifulSoup

res = requests.get('https://finance.naver.com/sise/lastsearch2.nhn')
soup = BeautifulSoup(res.content,"html.parser")

#Naver stocks
##contentarea > div.box_type_l > table > tbody > tr:nth-child(3) > td:nth-child(2) > a
best_list = soup.select('div.box_type_l table  tr' )

for item in best_list:
    title=item.find('a')
    tier=item.find('td',class_='no')
    change_list=item.select('td.number > span.tah.p11.nv01')
    #print(change_list)
    #change=change_list[1]
    
    
    if title !=None and len(change_list) > 1:
        print(tier.get_text() + "위 ",title.get_text(),change_list[1].get_text().strip())

'python' 카테고리의 다른 글

크롤링 연습 6. openpyxl 이용 웹 데이터 저장 (0)	2019.12.25
크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기 (0)	2019.12.25
크롤링 연습 3. select (0)	2019.12.22
크롤링 연습 2. 실시간 검색어 (0)	2019.12.19
크롤링 연습 1. find, find_all (0)	2019.12.17

Posted by easy16

easy blog

'python'에 해당되는 글 32건

정규표현식 연습 2. 반복

'python' 카테고리의 다른 글

정규표현식 연습 1.

'python' 카테고리의 다른 글

자주 쓰는 string 함수

'python' 카테고리의 다른 글

크롤링 연습 10. session을 이용한 post 예제

'python' 카테고리의 다른 글

크롤링 연습 9. 공공데이터 ex)미세먼지

'python' 카테고리의 다른 글

크롤링 연습 8. Open API사용한 report 작성

'python' 카테고리의 다른 글

크롤링 연습 7. open API test

'python' 카테고리의 다른 글

크롤링 연습 6. openpyxl 이용 웹 데이터 저장

'python' 카테고리의 다른 글

크롤링 연습 5. urllib 사용 및 게시판 글 리스트 모두 가져오기

'python' 카테고리의 다른 글

크롤링 연습 4. 순위 가져오기

'python' 카테고리의 다른 글

카테고리

공지사항

태그목록

최근에 올라온 글

최근에 달린 댓글

글 보관함

달력

링크

티스토리툴바