pip install js2py pip install loguru pip install pandas pip install plotly pip install pyquery
參考「第11屆iT邦幫忙鐵人賽-參考 Python 程式交易 30 天新手入門系列」系列範例修改,並存成檔名 getTaxies.py:
import csv import datetime import fractions import json import os import random import re import time import urllib.parse import argparse from random import randint from time import sleep import js2py import loguru import pandas import plotly.graph_objects import pyquery import requests import requests.exceptions now = datetime.datetime.now() proxies = [] proxy = None class Taiex: def __init__(self, date, openPrice, highestPrice, lowestPrice, closePrice): # 日期 self.Date = date # 開盤價 self.OpenPrice = openPrice # 最高價 self.HighestPrice = highestPrice # 最低價 self.LowestPrice = lowestPrice # 收盤價 self.ClosePrice = closePrice # 物件表達式 def __repr__(self): return f'class Taiex {{ Date={self.Date}, OpenPrice={self.OpenPrice}, HighestPrice={self.HighestPrice}, LowestPrice={self.LowestPrice}, ClosePrice={self.ClosePrice} }}' def getProxy(): global proxies if len(proxies) == 0: getProxies() proxy = random.choice(proxies) loguru.logger.debug(f'getProxy: {proxy}') proxies.remove(proxy) loguru.logger.debug(f'getProxy: {len(proxies)} proxies is unused.') return proxy def reqProxies(hour): global proxies proxies = proxies + getProxiesFromProxyNova() proxies = proxies + getProxiesFromGatherProxy() proxies = proxies + getProxiesFromFreeProxyList() proxies = list(dict.fromkeys(proxies)) loguru.logger.debug(f'reqProxies: {len(proxies)} proxies is found.') def getProxies(): global proxies hour = f'{now:%Y%m%d%H}' filename = f'proxies-{hour}.csv' filepath = f'{filename}' if os.path.isfile(filepath): loguru.logger.info(f'getProxies: {filename} exists.') loguru.logger.warning(f'getProxies: {filename} is loading...') with open(filepath, 'r', newline='', encoding='utf-8-sig') as f: reader = csv.DictReader(f) for row in reader: proxy = row['Proxy'] proxies.append(proxy) loguru.logger.success(f'getProxies: {filename} is loaded.') else: loguru.logger.info(f'getProxies: {filename} does not exist.') reqProxies(hour) loguru.logger.warning(f'getProxies: {filename} is saving...') with open(filepath, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerow([ 'Proxy' ]) for proxy in proxies: writer.writerow([ proxy ]) loguru.logger.success(f'getProxies: {filename} is saved.') def getProxiesFromProxyNova(): proxies = [] countries = [ 'tw', 'jp', 'kr', 'id', 'my', 'th', 'vn', 'ph', 'hk', 'uk', 'us' ] for country in countries: url = f'https://www.proxynova.com/proxy-server-list/country-{country}/' loguru.logger.debug(f'getProxiesFromProxyNova: {url}') loguru.logger.warning(f'getProxiesFromProxyNova: downloading...') response = requests.get(url) if response.status_code != 200: loguru.logger.debug(f'getProxiesFromProxyNova: status code is not 200') continue loguru.logger.success(f'getProxiesFromProxyNova: downloaded.') d = pyquery.PyQuery(response.text) table = d('table#tbl_proxy_list') rows = list(table('tbody:first > tr').items()) loguru.logger.warning(f'getProxiesFromProxyNova: scanning...') for row in rows: tds = list(row('td').items()) if len(tds) == 1: continue js = row('td:nth-child(1) > abbr').text() js = 'let x = %s; x' % (js[15:-2]) ip = js2py.eval_js(js).strip() port = row('td:nth-child(2)').text().strip() proxy = f'{ip}:{port}' proxies.append(proxy) loguru.logger.success(f'getProxiesFromProxyNova: scanned.') loguru.logger.debug(f'getProxiesFromProxyNova: {len(proxies)} proxies is found.') time.sleep(1) return proxies def getProxiesFromGatherProxy(): proxies = [] countries = [ 'Taiwan', 'Japan', 'United States', 'Thailand', 'Vietnam', 'Indonesia', 'Singapore', 'Philippines', 'Malaysia', 'Hong Kong' ] for country in countries: url = f'http://www.gatherproxy.com/proxylist/country/?c={urllib.parse.quote(country)}' loguru.logger.debug(f'getProxiesFromGatherProxy: {url}') loguru.logger.warning(f'getProxiesFromGatherProxy: downloading...') response = requests.get(url) if response.status_code != 200: loguru.logger.debug(f'getProxiesFromGatherProxy: status code is not 200') continue loguru.logger.success(f'getProxiesFromGatherProxy: downloaded.') d = pyquery.PyQuery(response.text) scripts = list(d('table#tblproxy > script').items()) loguru.logger.warning(f'getProxiesFromGatherProxy: scanning...') for script in scripts: script = script.text().strip() script = re.sub(r'^gp\.insertPrx\(', '', script) script = re.sub(r'\);$', '', script) script = json.loads(script) ip = script['PROXY_IP'].strip() port = int(script['PROXY_PORT'].strip(), 16) proxy = f'{ip}:{port}' proxies.append(proxy) loguru.logger.success(f'getProxiesFromGatherProxy: scanned.') loguru.logger.debug(f'getProxiesFromGatherProxy: {len(proxies)} proxies is found.') time.sleep(1) return proxies def getProxiesFromFreeProxyList(): proxies = [] url = 'https://free-proxy-list.net/' loguru.logger.debug(f'getProxiesFromFreeProxyList: {url}') loguru.logger.warning(f'getProxiesFromFreeProxyList: downloading...') response = requests.get(url) if response.status_code != 200: loguru.logger.debug(f'getProxiesFromFreeProxyList: status code is not 200') return loguru.logger.success(f'getProxiesFromFreeProxyList: downloaded.') d = pyquery.PyQuery(response.text) trs = list(d('table#proxylisttable > tbody > tr').items()) loguru.logger.warning(f'getProxiesFromFreeProxyList: scanning...') for tr in trs: tds = list(tr('td').items()) ip = tds[0].text().strip() port = tds[1].text().strip() proxy = f'{ip}:{port}' proxies.append(proxy) loguru.logger.success(f'getProxiesFromFreeProxyList: scanned.') loguru.logger.debug(f'getProxiesFromFreeProxyList: {len(proxies)} proxies is found.') return proxies # 取得指定年月內每交易日的盤後資訊 def getTaiexs(year, month): global proxy taiexs = [] while True: if proxy is None: proxy = getProxy() url = f'https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date={year}{month:02}01' loguru.logger.info(f'getTaiexs: month {month} url is {url}') loguru.logger.warning(f'getTaiexs: month {month} is downloading...') try: response = requests.get( url, proxies={ 'https': f'https://{proxy}' }, timeout=3 ) if response.status_code != 200: loguru.logger.success(f'getTaiexs: month {month} status code is not 200.') proxy = None break loguru.logger.success(f'getTaiexs: month {month} is downloaded.') body = response.json() stat = body['stat'] if stat != 'OK': loguru.logger.error(f'getTaiexs: month {month} responses with error({stat}).') break records = body['data'] if len(records) == 0: loguru.logger.success(f'getTaiexs: month {month} has no data.') break for record in records: date = record[0].strip() parts = date.split('/') y = int(parts[0]) + 1911 m = int(parts[1]) d = int(parts[2]) date = f'{y}{m:02d}{d:02d}' openPrice = record[1].replace(',', '').strip() highestPrice = record[2].replace(',', '').strip() lowestPrice = record[3].replace(',', '').strip() closePrice = record[4].replace(',', '').strip() taiex = Taiex( date=date, openPrice=openPrice, highestPrice=highestPrice, lowestPrice=lowestPrice, closePrice=closePrice ) taiexs.append(taiex) except requests.exceptions.ConnectionError: loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (connection error).') proxy = None continue except requests.exceptions.ConnectTimeout: loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (connect timeout).') proxy = None continue except requests.exceptions.ProxyError: loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (proxy error).') proxy = None continue except requests.exceptions.SSLError: loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (ssl error).') proxy = None continue except Exception as e: loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working.') loguru.logger.error(e) proxy = None continue break return taiexs # 儲存傳入的盤後資訊 def saveTaiexs(filepath, taiexs): loguru.logger.info(f'saveTaiexs: {len(taiexs)} taiexs.') loguru.logger.warning(f'saveTaiexs: {filepath} is saving...') with open(filepath, mode='w', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerow([ 'Date', 'OpenPrice', 'HighestPrice', 'LowestPrice', 'ClosePrice' ]) for taiex in taiexs: writer.writerow([ taiex.Date, taiex.OpenPrice, taiex.HighestPrice, taiex.LowestPrice, taiex.ClosePrice ]) loguru.logger.success(f'main: {filepath} is saved.') def main(args): taiexs = [] thisYear = 2019 if args.year: loguru.logger.info(f'[-y|--year] [value:{args.year}]') thisYear = int(args.year) else: loguru.logger.info(f'[-y|--year] is not used, set year in '+str(thisYear)) # 取得從 2019.01 至 2019.12 的盤後資訊 for month in range(1, 13): # 程式暫停 3~15 秒 sleep(randint(3, 15)) taiexs = taiexs + getTaiexs(thisYear, month) filepath = f'taiexs-'+str(thisYear)+'.csv' saveTaiexs(filepath, taiexs) # 使用 Pandas 讀取下載回來的紀錄檔 df = pandas.read_csv(filepath) # 將 Date 欄位按照格式轉換為 datetime 資料 df['Date'] = pandas.to_datetime(df['Date'], format='%Y%m%d') # 建立圖表 figure = plotly.graph_objects.Figure( data=[ # Line Chart # 收盤價 plotly.graph_objects.Scatter( x=df['Date'], y=df['ClosePrice'], name='收盤價', mode='lines', line=plotly.graph_objects.scatter.Line( color='#6B99E5' ) ), # Candlestick Chart # K 棒 plotly.graph_objects.Candlestick( x=df['Date'], open=df['OpenPrice'], high=df['HighestPrice'], low=df['LowestPrice'], close=df['ClosePrice'], name='盤後資訊', ) ], # 設定 XY 顯示格式 layout=plotly.graph_objects.Layout( xaxis=plotly.graph_objects.layout.XAxis( tickformat='%Y-%m' ), yaxis=plotly.graph_objects.layout.YAxis( tickformat='.2f' ) ) ) figure.show() if __name__ == '__main__': parser = argparse.ArgumentParser() # -y [value:yyyy] # -- year [value:yyyy] parser.add_argument( '-y', '--year', help='set year in yyyy format', type=int ) args = parser.parse_args() loguru.logger.add( f'{datetime.date.today():%Y%m%d}.log', rotation='1 day', retention='7 days', level='DEBUG' ) main(args)範例以2007年為例,執行下列:
python getTaxies.py -y 2007執行結果:
python getTaixes.py -y 2007 2019-10-14 03:34:40.292 | INFO | __main__:main:300 - [-y|--year] [value:2007] 2019-10-14 03:34:44.294 | INFO | __main__:getProxies:69 - getProxies: proxies-2019101403.csv exists. 2019-10-14 03:34:44.298 | WARNING | __main__:getProxies:70 - getProxies: proxies-2019101403.csv is loading... 2019-10-14 03:34:44.308 | SUCCESS | __main__:getProxies:76 - getProxies: proxies-2019101403.csv is loaded. 2019-10-14 03:34:44.313 | DEBUG | __main__:getProxy:50 - getProxy: 180.250.216.242:3128 2019-10-14 03:34:44.317 | DEBUG | __main__:getProxy:52 - getProxy: 724 proxies is unused. 2019-10-14 03:34:44.322 | INFO | __main__:getTaiexs:206 - getTaiexs: month 1 url is https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date=20070101 ...(略)... 2019-10-14 03:36:58.484 | INFO | __main__:getTaiexs:206 - getTaiexs: month 12 url is https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date=20071201 2019-10-14 03:36:58.489 | WARNING | __main__:getTaiexs:207 - getTaiexs: month 12 is downloading... 2019-10-14 03:36:58.846 | SUCCESS | __main__:getTaiexs:220 - getTaiexs: month 12 is downloaded. 2019-10-14 03:36:58.850 | INFO | __main__:saveTaiexs:275 - saveTaiexs: 247 taiexs. 2019-10-14 03:36:58.853 | WARNING | __main__:saveTaiexs:276 - saveTaiexs: taiexs-2007.csv is saving... 2019-10-14 03:36:58.864 | SUCCESS | __main__:saveTaiexs:294 - main: taiexs-2007.csv is saved.
2007年大盤指數 |
#
沒有留言:
張貼留言