2019年10月14日 星期一

第11屆iT邦幫忙鐵人賽-參考 Python 程式交易 30 天新手入門系列範例

可能需要預先安裝下列 Python Packages:
pip install js2py
pip install loguru
pip install pandas
pip install plotly
pip install pyquery

參考「第11屆iT邦幫忙鐵人賽-參考 Python 程式交易 30 天新手入門系列」系列範例修改,並存成檔名 getTaxies.py:


import csv
import datetime
import fractions
import json
import os
import random
import re
import time
import urllib.parse
import argparse

from random import randint
from time import sleep

import js2py
import loguru
import pandas
import plotly.graph_objects
import pyquery
import requests
import requests.exceptions

now = datetime.datetime.now()
proxies = []

proxy = None

class Taiex:
    def __init__(self, date, openPrice, highestPrice, lowestPrice, closePrice):
        # 日期
        self.Date = date
        # 開盤價
        self.OpenPrice = openPrice
        # 最高價
        self.HighestPrice = highestPrice
        # 最低價
        self.LowestPrice = lowestPrice
        # 收盤價
        self.ClosePrice = closePrice
    # 物件表達式
    def __repr__(self):
        return f'class Taiex {{ Date={self.Date}, OpenPrice={self.OpenPrice}, HighestPrice={self.HighestPrice}, LowestPrice={self.LowestPrice}, ClosePrice={self.ClosePrice} }}'

def getProxy():
    global proxies
    if len(proxies) == 0:
        getProxies()
    proxy = random.choice(proxies)
    loguru.logger.debug(f'getProxy: {proxy}')
    proxies.remove(proxy)
    loguru.logger.debug(f'getProxy: {len(proxies)} proxies is unused.')
    return proxy

def reqProxies(hour):
    global proxies
    proxies = proxies + getProxiesFromProxyNova()
    proxies = proxies + getProxiesFromGatherProxy()
    proxies = proxies + getProxiesFromFreeProxyList()
    proxies = list(dict.fromkeys(proxies))
    loguru.logger.debug(f'reqProxies: {len(proxies)} proxies is found.')

def getProxies():
    global proxies
    hour = f'{now:%Y%m%d%H}'
    filename = f'proxies-{hour}.csv'
    filepath = f'{filename}'
    if os.path.isfile(filepath):
        loguru.logger.info(f'getProxies: {filename} exists.')
        loguru.logger.warning(f'getProxies: {filename} is loading...')
        with open(filepath, 'r', newline='', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)
            for row in reader:
                proxy = row['Proxy']
                proxies.append(proxy)
        loguru.logger.success(f'getProxies: {filename} is loaded.')
    else:
        loguru.logger.info(f'getProxies: {filename} does not exist.')
        reqProxies(hour)
        loguru.logger.warning(f'getProxies: {filename} is saving...')
        with open(filepath, 'w', newline='', encoding='utf-8-sig') as f:
            writer = csv.writer(f)
            writer.writerow([
                'Proxy'
            ])
            for proxy in proxies:
                writer.writerow([
                    proxy
                ])
        loguru.logger.success(f'getProxies: {filename} is saved.')

def getProxiesFromProxyNova():
    proxies = []
    countries = [
        'tw',
        'jp',
        'kr',
        'id',
        'my',
        'th',
        'vn',
        'ph',
        'hk',
        'uk',
        'us'
    ]
    for country in countries:
        url = f'https://www.proxynova.com/proxy-server-list/country-{country}/'
        loguru.logger.debug(f'getProxiesFromProxyNova: {url}')
        loguru.logger.warning(f'getProxiesFromProxyNova: downloading...')
        response = requests.get(url)
        if response.status_code != 200:
            loguru.logger.debug(f'getProxiesFromProxyNova: status code is not 200')
            continue
        loguru.logger.success(f'getProxiesFromProxyNova: downloaded.')
        d = pyquery.PyQuery(response.text)
        table = d('table#tbl_proxy_list')
        rows = list(table('tbody:first > tr').items())
        loguru.logger.warning(f'getProxiesFromProxyNova: scanning...')
        for row in rows:
            tds = list(row('td').items())
            if len(tds) == 1:
                continue
            js = row('td:nth-child(1) > abbr').text()
            js = 'let x = %s; x' % (js[15:-2])
            ip = js2py.eval_js(js).strip()
            port = row('td:nth-child(2)').text().strip()
            proxy = f'{ip}:{port}'
            proxies.append(proxy)
        loguru.logger.success(f'getProxiesFromProxyNova: scanned.')
        loguru.logger.debug(f'getProxiesFromProxyNova: {len(proxies)} proxies is found.')
        time.sleep(1)
    return proxies

def getProxiesFromGatherProxy():
    proxies = []
    countries = [
        'Taiwan',
        'Japan',
        'United States',
        'Thailand',
        'Vietnam',
        'Indonesia',
        'Singapore',
        'Philippines',
        'Malaysia',
        'Hong Kong'
    ]
    for country in countries:
        url = f'http://www.gatherproxy.com/proxylist/country/?c={urllib.parse.quote(country)}'
        loguru.logger.debug(f'getProxiesFromGatherProxy: {url}')
        loguru.logger.warning(f'getProxiesFromGatherProxy: downloading...')
        response = requests.get(url)
        if response.status_code != 200:
            loguru.logger.debug(f'getProxiesFromGatherProxy: status code is not 200')
            continue
        loguru.logger.success(f'getProxiesFromGatherProxy: downloaded.')
        d = pyquery.PyQuery(response.text)
        scripts = list(d('table#tblproxy > script').items())
        loguru.logger.warning(f'getProxiesFromGatherProxy: scanning...')
        for script in scripts:
            script = script.text().strip()
            script = re.sub(r'^gp\.insertPrx\(', '', script)
            script = re.sub(r'\);$', '', script)
            script = json.loads(script)
            ip = script['PROXY_IP'].strip()
            port = int(script['PROXY_PORT'].strip(), 16)
            proxy = f'{ip}:{port}'
            proxies.append(proxy)
        loguru.logger.success(f'getProxiesFromGatherProxy: scanned.')
        loguru.logger.debug(f'getProxiesFromGatherProxy: {len(proxies)} proxies is found.')
        time.sleep(1)
    return proxies

def getProxiesFromFreeProxyList():
    proxies = []
    url = 'https://free-proxy-list.net/'
    loguru.logger.debug(f'getProxiesFromFreeProxyList: {url}')
    loguru.logger.warning(f'getProxiesFromFreeProxyList: downloading...')
    response = requests.get(url)
    if response.status_code != 200:
        loguru.logger.debug(f'getProxiesFromFreeProxyList: status code is not 200')
        return
    loguru.logger.success(f'getProxiesFromFreeProxyList: downloaded.')
    d = pyquery.PyQuery(response.text)
    trs = list(d('table#proxylisttable > tbody > tr').items())
    loguru.logger.warning(f'getProxiesFromFreeProxyList: scanning...')
    for tr in trs:
        tds = list(tr('td').items())
        ip = tds[0].text().strip()
        port = tds[1].text().strip()
        proxy = f'{ip}:{port}'
        proxies.append(proxy)
    loguru.logger.success(f'getProxiesFromFreeProxyList: scanned.')
    loguru.logger.debug(f'getProxiesFromFreeProxyList: {len(proxies)} proxies is found.')
    return proxies

# 取得指定年月內每交易日的盤後資訊
def getTaiexs(year, month):
    global proxy
    taiexs = []
    while True:
        if proxy is None:
            proxy = getProxy()
        url = f'https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date={year}{month:02}01'
        loguru.logger.info(f'getTaiexs: month {month} url is {url}')
        loguru.logger.warning(f'getTaiexs: month {month} is downloading...')
        try:
            response = requests.get(
                url,
                proxies={
                    'https': f'https://{proxy}'
                },
                timeout=3
            )
            if response.status_code != 200:
                loguru.logger.success(f'getTaiexs: month {month} status code is not 200.')
                proxy = None
                break
            loguru.logger.success(f'getTaiexs: month {month} is downloaded.')
            body = response.json()
            stat = body['stat']
            if stat != 'OK':
                loguru.logger.error(f'getTaiexs: month {month} responses with error({stat}).')
                break
            records = body['data']
            if len(records) == 0:
                loguru.logger.success(f'getTaiexs: month {month} has no data.')
                break
            for record in records:
                date = record[0].strip()
                parts = date.split('/')
                y = int(parts[0]) + 1911
                m = int(parts[1])
                d = int(parts[2])
                date = f'{y}{m:02d}{d:02d}'
                openPrice = record[1].replace(',', '').strip()
                highestPrice = record[2].replace(',', '').strip()
                lowestPrice = record[3].replace(',', '').strip()
                closePrice = record[4].replace(',', '').strip()
                taiex = Taiex(
                    date=date,
                    openPrice=openPrice,
                    highestPrice=highestPrice,
                    lowestPrice=lowestPrice,
                    closePrice=closePrice
                )
                taiexs.append(taiex)
        except requests.exceptions.ConnectionError:
            loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (connection error).')
            proxy = None
            continue
        except requests.exceptions.ConnectTimeout:
            loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (connect timeout).')
            proxy = None
            continue
        except requests.exceptions.ProxyError:
            loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (proxy error).')
            proxy = None
            continue
        except requests.exceptions.SSLError:
            loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working (ssl error).')
            proxy = None
            continue
        except Exception as e:
            loguru.logger.error(f'getTaiexs: proxy({proxy}) is not working.')
            loguru.logger.error(e)
            proxy = None
            continue
        break
    return taiexs

# 儲存傳入的盤後資訊
def saveTaiexs(filepath, taiexs):
    loguru.logger.info(f'saveTaiexs: {len(taiexs)} taiexs.')
    loguru.logger.warning(f'saveTaiexs: {filepath} is saving...')
    with open(filepath, mode='w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Date',
            'OpenPrice',
            'HighestPrice',
            'LowestPrice',
            'ClosePrice'
        ])
        for taiex in taiexs:
            writer.writerow([
                taiex.Date,
                taiex.OpenPrice,
                taiex.HighestPrice,
                taiex.LowestPrice,
                taiex.ClosePrice
            ])
    loguru.logger.success(f'main: {filepath} is saved.')

def main(args):
    taiexs = []
    thisYear = 2019
    if args.year:
        loguru.logger.info(f'[-y|--year] [value:{args.year}]')
        thisYear = int(args.year)
    else:
        loguru.logger.info(f'[-y|--year] is not used, set year in '+str(thisYear))
    # 取得從 2019.01 至 2019.12 的盤後資訊
    for month in range(1, 13):
        # 程式暫停 3~15 秒
        sleep(randint(3, 15))
        taiexs = taiexs + getTaiexs(thisYear, month)
    filepath = f'taiexs-'+str(thisYear)+'.csv'
    saveTaiexs(filepath, taiexs)

    # 使用 Pandas 讀取下載回來的紀錄檔
    df = pandas.read_csv(filepath)
    # 將 Date 欄位按照格式轉換為 datetime 資料
    df['Date'] = pandas.to_datetime(df['Date'], format='%Y%m%d')
    
    # 建立圖表
    figure = plotly.graph_objects.Figure(
        data=[
            # Line Chart
            # 收盤價
            plotly.graph_objects.Scatter(
                x=df['Date'],
                y=df['ClosePrice'],
                name='收盤價',
                mode='lines',
                line=plotly.graph_objects.scatter.Line(
                    color='#6B99E5'
                )
            ),
            # Candlestick Chart
            # K 棒
            plotly.graph_objects.Candlestick(
                x=df['Date'],
                open=df['OpenPrice'],
                high=df['HighestPrice'],
                low=df['LowestPrice'],
                close=df['ClosePrice'],
                name='盤後資訊',
            )
        ],
        # 設定 XY 顯示格式
        layout=plotly.graph_objects.Layout(
            xaxis=plotly.graph_objects.layout.XAxis(
                tickformat='%Y-%m'
            ),
            yaxis=plotly.graph_objects.layout.YAxis(
                tickformat='.2f'
            )
        )
    )
    figure.show()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # -y [value:yyyy]
    # -- year [value:yyyy]
    parser.add_argument(
        '-y',
        '--year',
        help='set year in yyyy format',
        type=int
    )

    args = parser.parse_args()

    loguru.logger.add(
        f'{datetime.date.today():%Y%m%d}.log',
        rotation='1 day',
        retention='7 days',
        level='DEBUG'
    )
    main(args)
範例以2007年為例,執行下列:
python getTaxies.py -y 2007
執行結果:
python getTaixes.py -y 2007
2019-10-14 03:34:40.292 | INFO     | __main__:main:300 - [-y|--year] [value:2007]
2019-10-14 03:34:44.294 | INFO     | __main__:getProxies:69 - getProxies: proxies-2019101403.csv exists.
2019-10-14 03:34:44.298 | WARNING  | __main__:getProxies:70 - getProxies: proxies-2019101403.csv is loading...
2019-10-14 03:34:44.308 | SUCCESS  | __main__:getProxies:76 - getProxies: proxies-2019101403.csv is loaded.
2019-10-14 03:34:44.313 | DEBUG    | __main__:getProxy:50 - getProxy: 180.250.216.242:3128
2019-10-14 03:34:44.317 | DEBUG    | __main__:getProxy:52 - getProxy: 724 proxies is unused.
2019-10-14 03:34:44.322 | INFO     | __main__:getTaiexs:206 - getTaiexs: month 1 url is https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date=20070101
...(略)...
2019-10-14 03:36:58.484 | INFO     | __main__:getTaiexs:206 - getTaiexs: month 12 url is https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date=20071201
2019-10-14 03:36:58.489 | WARNING  | __main__:getTaiexs:207 - getTaiexs: month 12 is downloading...
2019-10-14 03:36:58.846 | SUCCESS  | __main__:getTaiexs:220 - getTaiexs: month 12 is downloaded.
2019-10-14 03:36:58.850 | INFO     | __main__:saveTaiexs:275 - saveTaiexs: 247 taiexs.
2019-10-14 03:36:58.853 | WARNING  | __main__:saveTaiexs:276 - saveTaiexs: taiexs-2007.csv is saving...
2019-10-14 03:36:58.864 | SUCCESS  | __main__:saveTaiexs:294 - main: taiexs-2007.csv is saved.

2007年大盤指數

#

沒有留言: