퍼블릭 xml 리소스들
https://www.data.go.kr/
http://www.weather.go.kr/weather/lifenindustry/sevice_rss.jsp
# xml -> beautifulsoup을 쓴다.# 설치 필요#
from bs4 import BeautifulSoup
fp = open("song.xml")
soup = BeautifulSoup(fp, "html.parser")
#print(type(soup), soup)
# soup은 xml의 root element를 나타냄.# . 으로 후손 접근 가능함. 직접 자식이 아니어도 됨. .find("후손명")도 동일 함.
print(soup.songlist.song) # OKprint(soup.song) # OKsng = soup.song
print(sng.title)
print(sng.title.text)
sngs = soup.findAll('song') # bs4.element.ResultSet 타입의 object를 갖는 리스트 임.print(type(sngs))
print(sngs[1].title.string)
for s in sngs:
print(s['album'])
print(sngs[1].parent)
# sibling : findPrevious, findNext. Sibling은 부모가 달라고 레벨이 같은 친구들임.
print(sngs[1].findPrevious('song'))
print(sngs[1].findNext('song'))
soup.find('song', {'album':'BB'}) # 속성 검색
class A:
def __init__(self):
a = 0
a = A()
print(a)
# 어떻게 soup은 object인데 print하면 텍스트가 나오나 ?
# __repr__ 메소드를 구현해 주면 된다.
RSS 리더
from bs4 import BeautifulSoup
import urllib.request as REQ
jurl = "http://rss.joins.com/joins_news_list.xml"response = REQ.urlopen(jurl)
soup = BeautifulSoup(response, "html.parser")
#print(soup)
items = soup.findAll("item")
for i in items:
print("기사제목:", i.title.string )
print("기사내용:", i.description.string)
print("-"*20)
HTML에서 일부 가져오기
from bs4 import BeautifulSoup
import urllib.request as REQ
nurl = "http://news.naver.com"response = REQ.urlopen(nurl)
soup = BeautifulSoup(response, "html.parser")
#print(soup)
items = soup.findAll("div", {'class':'newsnow_tx_inner'})
for i in items:
print(i.strong.string)
추가하기
from bs4 import BeautifulSoup
fp = open("song.xml")
soup = BeautifulSoup(fp, "html.parser")
print(soup)
# 항목 추가 방법n_song = soup.new_tag('song', album='Cheap Thrills')
n_title = soup.new_tag('title')
n_title.string = 'Chandlier'n_singer = soup.new_tag('singer')
n_singer.string = 'Sia'
n_song.append(n_title)
n_song.append(n_singer)
soup.songlist.append(n_song)
print (soup)
# 변경된 내용을 저장하기 위해서는 문자여로 변환 필요.s = soup.prettify() # s는 object가 아니라 stringprint(s)
f = open("song.xml", "w")
f.write(s)
f.close()
삭제하기
from bs4 import BeautifulSoup
fp = open("song.xml")
soup = BeautifulSoup(fp, "html.parser")
print(soup)
soup.song.decompose()
print(soup)
기상청 날씨
from bs4 import BeautifulSoup
import urllib.request as REQ
nurl = "http://www.weather.go.kr/weather/forecast/mid-term-rss3.jsp?stnId=109"response = REQ.urlopen(nurl)
soup = BeautifulSoup(response, "html.parser")
locations = soup.findAll("location")
def printCity(location):
print(location.city.string)
print("-"*10)
datas = location.findAll('data')
for d in datas:
print(d.tmef.string, d.wf.string, d.tmn.string, d.tmx.string, sep="\n")
def printWeather():
for location in locations:
printCity(location)
def searchWeather():
city = input("검색지역을 입력하세요 : ")
cities = soup.findAll('city')
for c in cities:
if (c.string == city):
printCity(c.parent)
break
def showTop5():
tmx = soup.findAll("tmx")
sml = sorted(tmx, key=lambda n:n.string, reverse=True)[:5]
for t in sml:
print(t.parent.parent.city.string, t.parent.tmef.string, t.parent.wf.string, t.string)
while True:
print("서울 경기 주간 예보")
m = int(input("1. 서울 경기 지역 날씨 정보\n2. 지역 검색\n3. 최고기온 top5\n>"))
{1:printWeather, 2:searchWeather, 3:showTop5}.get(m)()
댓글 없음:
댓글 쓰기